diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 168 |
1 files changed, 106 insertions, 62 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3a0338b4b179..4ee3abf20ed6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/highmem.h> | ||
26 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
28 | 27 | ||
@@ -45,38 +44,27 @@ do { \ | |||
45 | #endif | 44 | #endif |
46 | 45 | ||
47 | /* | 46 | /* |
48 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | 47 | * | NHM/WSM | SNB | |
48 | * register ------------------------------- | ||
49 | * | HT | no HT | HT | no HT | | ||
50 | *----------------------------------------- | ||
51 | * offcore | core | core | cpu | core | | ||
52 | * lbr_sel | core | core | cpu | core | | ||
53 | * ld_lat | cpu | core | cpu | core | | ||
54 | *----------------------------------------- | ||
55 | * | ||
56 | * Given that there is a small number of shared regs, | ||
57 | * we can pre-allocate their slot in the per-cpu | ||
58 | * per-core reg tables. | ||
49 | */ | 59 | */ |
50 | static unsigned long | 60 | enum extra_reg_type { |
51 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 61 | EXTRA_REG_NONE = -1, /* not used */ |
52 | { | ||
53 | unsigned long offset, addr = (unsigned long)from; | ||
54 | unsigned long size, len = 0; | ||
55 | struct page *page; | ||
56 | void *map; | ||
57 | int ret; | ||
58 | |||
59 | do { | ||
60 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
61 | if (!ret) | ||
62 | break; | ||
63 | |||
64 | offset = addr & (PAGE_SIZE - 1); | ||
65 | size = min(PAGE_SIZE - offset, n - len); | ||
66 | |||
67 | map = kmap_atomic(page); | ||
68 | memcpy(to, map+offset, size); | ||
69 | kunmap_atomic(map); | ||
70 | put_page(page); | ||
71 | 62 | ||
72 | len += size; | 63 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
73 | to += size; | 64 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
74 | addr += size; | ||
75 | 65 | ||
76 | } while (len < n); | 66 | EXTRA_REG_MAX /* number of entries needed */ |
77 | 67 | }; | |
78 | return len; | ||
79 | } | ||
80 | 68 | ||
81 | struct event_constraint { | 69 | struct event_constraint { |
82 | union { | 70 | union { |
@@ -132,11 +120,10 @@ struct cpu_hw_events { | |||
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 120 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
133 | 121 | ||
134 | /* | 122 | /* |
135 | * Intel percore register state. | 123 | * manage shared (per-core, per-cpu) registers |
136 | * Coordinate shared resources between HT threads. | 124 | * used on Intel NHM/WSM/SNB |
137 | */ | 125 | */ |
138 | int percore_used; /* Used by this CPU? */ | 126 | struct intel_shared_regs *shared_regs; |
139 | struct intel_percore *per_core; | ||
140 | 127 | ||
141 | /* | 128 | /* |
142 | * AMD specific bits | 129 | * AMD specific bits |
@@ -187,26 +174,45 @@ struct cpu_hw_events { | |||
187 | for ((e) = (c); (e)->weight; (e)++) | 174 | for ((e) = (c); (e)->weight; (e)++) |
188 | 175 | ||
189 | /* | 176 | /* |
177 | * Per register state. | ||
178 | */ | ||
179 | struct er_account { | ||
180 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
181 | u64 config; /* extra MSR config */ | ||
182 | u64 reg; /* extra MSR number */ | ||
183 | atomic_t ref; /* reference count */ | ||
184 | }; | ||
185 | |||
186 | /* | ||
190 | * Extra registers for specific events. | 187 | * Extra registers for specific events. |
188 | * | ||
191 | * Some events need large masks and require external MSRs. | 189 | * Some events need large masks and require external MSRs. |
192 | * Define a mapping to these extra registers. | 190 | * Those extra MSRs end up being shared for all events on |
191 | * a PMU and sometimes between PMU of sibling HT threads. | ||
192 | * In either case, the kernel needs to handle conflicting | ||
193 | * accesses to those extra, shared, regs. The data structure | ||
194 | * to manage those registers is stored in cpu_hw_event. | ||
193 | */ | 195 | */ |
194 | struct extra_reg { | 196 | struct extra_reg { |
195 | unsigned int event; | 197 | unsigned int event; |
196 | unsigned int msr; | 198 | unsigned int msr; |
197 | u64 config_mask; | 199 | u64 config_mask; |
198 | u64 valid_mask; | 200 | u64 valid_mask; |
201 | int idx; /* per_xxx->regs[] reg index */ | ||
199 | }; | 202 | }; |
200 | 203 | ||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | 204 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
202 | .event = (e), \ | 205 | .event = (e), \ |
203 | .msr = (ms), \ | 206 | .msr = (ms), \ |
204 | .config_mask = (m), \ | 207 | .config_mask = (m), \ |
205 | .valid_mask = (vm), \ | 208 | .valid_mask = (vm), \ |
209 | .idx = EXTRA_REG_##i \ | ||
206 | } | 210 | } |
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | 211 | |
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | 212 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | 213 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
214 | |||
215 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
210 | 216 | ||
211 | union perf_capabilities { | 217 | union perf_capabilities { |
212 | struct { | 218 | struct { |
@@ -252,7 +258,6 @@ struct x86_pmu { | |||
252 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 258 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
253 | struct perf_event *event); | 259 | struct perf_event *event); |
254 | struct event_constraint *event_constraints; | 260 | struct event_constraint *event_constraints; |
255 | struct event_constraint *percore_constraints; | ||
256 | void (*quirks)(void); | 261 | void (*quirks)(void); |
257 | int perfctr_second_write; | 262 | int perfctr_second_write; |
258 | 263 | ||
@@ -286,8 +291,12 @@ struct x86_pmu { | |||
286 | * Extra registers for events | 291 | * Extra registers for events |
287 | */ | 292 | */ |
288 | struct extra_reg *extra_regs; | 293 | struct extra_reg *extra_regs; |
294 | unsigned int er_flags; | ||
289 | }; | 295 | }; |
290 | 296 | ||
297 | #define ERF_NO_HT_SHARING 1 | ||
298 | #define ERF_HAS_RSP_1 2 | ||
299 | |||
291 | static struct x86_pmu x86_pmu __read_mostly; | 300 | static struct x86_pmu x86_pmu __read_mostly; |
292 | 301 | ||
293 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 302 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
393 | */ | 402 | */ |
394 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | 403 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) |
395 | { | 404 | { |
405 | struct hw_perf_event_extra *reg; | ||
396 | struct extra_reg *er; | 406 | struct extra_reg *er; |
397 | 407 | ||
398 | event->hw.extra_reg = 0; | 408 | reg = &event->hw.extra_reg; |
399 | event->hw.extra_config = 0; | ||
400 | 409 | ||
401 | if (!x86_pmu.extra_regs) | 410 | if (!x86_pmu.extra_regs) |
402 | return 0; | 411 | return 0; |
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
406 | continue; | 415 | continue; |
407 | if (event->attr.config1 & ~er->valid_mask) | 416 | if (event->attr.config1 & ~er->valid_mask) |
408 | return -EINVAL; | 417 | return -EINVAL; |
409 | event->hw.extra_reg = er->msr; | 418 | |
410 | event->hw.extra_config = event->attr.config1; | 419 | reg->idx = er->idx; |
420 | reg->config = event->attr.config1; | ||
421 | reg->reg = er->msr; | ||
411 | break; | 422 | break; |
412 | } | 423 | } |
413 | return 0; | 424 | return 0; |
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
706 | event->hw.last_cpu = -1; | 717 | event->hw.last_cpu = -1; |
707 | event->hw.last_tag = ~0ULL; | 718 | event->hw.last_tag = ~0ULL; |
708 | 719 | ||
720 | /* mark unused */ | ||
721 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
722 | |||
709 | return x86_pmu.hw_config(event); | 723 | return x86_pmu.hw_config(event); |
710 | } | 724 | } |
711 | 725 | ||
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
747 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 761 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
748 | u64 enable_mask) | 762 | u64 enable_mask) |
749 | { | 763 | { |
750 | if (hwc->extra_reg) | 764 | if (hwc->extra_reg.reg) |
751 | wrmsrl(hwc->extra_reg, hwc->extra_config); | 765 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); |
752 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 766 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
753 | } | 767 | } |
754 | 768 | ||
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1332 | if (!x86_perf_event_set_period(event)) | 1346 | if (!x86_perf_event_set_period(event)) |
1333 | continue; | 1347 | continue; |
1334 | 1348 | ||
1335 | if (perf_event_overflow(event, 1, &data, regs)) | 1349 | if (perf_event_overflow(event, &data, regs)) |
1336 | x86_pmu_stop(event, 0); | 1350 | x86_pmu_stop(event, 0); |
1337 | } | 1351 | } |
1338 | 1352 | ||
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu) | |||
1637 | perf_pmu_enable(pmu); | 1651 | perf_pmu_enable(pmu); |
1638 | return 0; | 1652 | return 0; |
1639 | } | 1653 | } |
1654 | /* | ||
1655 | * a fake_cpuc is used to validate event groups. Due to | ||
1656 | * the extra reg logic, we need to also allocate a fake | ||
1657 | * per_core and per_cpu structure. Otherwise, group events | ||
1658 | * using extra reg may conflict without the kernel being | ||
1659 | * able to catch this when the last event gets added to | ||
1660 | * the group. | ||
1661 | */ | ||
1662 | static void free_fake_cpuc(struct cpu_hw_events *cpuc) | ||
1663 | { | ||
1664 | kfree(cpuc->shared_regs); | ||
1665 | kfree(cpuc); | ||
1666 | } | ||
1667 | |||
1668 | static struct cpu_hw_events *allocate_fake_cpuc(void) | ||
1669 | { | ||
1670 | struct cpu_hw_events *cpuc; | ||
1671 | int cpu = raw_smp_processor_id(); | ||
1672 | |||
1673 | cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); | ||
1674 | if (!cpuc) | ||
1675 | return ERR_PTR(-ENOMEM); | ||
1676 | |||
1677 | /* only needed, if we have extra_regs */ | ||
1678 | if (x86_pmu.extra_regs) { | ||
1679 | cpuc->shared_regs = allocate_shared_regs(cpu); | ||
1680 | if (!cpuc->shared_regs) | ||
1681 | goto error; | ||
1682 | } | ||
1683 | return cpuc; | ||
1684 | error: | ||
1685 | free_fake_cpuc(cpuc); | ||
1686 | return ERR_PTR(-ENOMEM); | ||
1687 | } | ||
1640 | 1688 | ||
1641 | /* | 1689 | /* |
1642 | * validate that we can schedule this event | 1690 | * validate that we can schedule this event |
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event) | |||
1647 | struct event_constraint *c; | 1695 | struct event_constraint *c; |
1648 | int ret = 0; | 1696 | int ret = 0; |
1649 | 1697 | ||
1650 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | 1698 | fake_cpuc = allocate_fake_cpuc(); |
1651 | if (!fake_cpuc) | 1699 | if (IS_ERR(fake_cpuc)) |
1652 | return -ENOMEM; | 1700 | return PTR_ERR(fake_cpuc); |
1653 | 1701 | ||
1654 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | 1702 | c = x86_pmu.get_event_constraints(fake_cpuc, event); |
1655 | 1703 | ||
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event) | |||
1659 | if (x86_pmu.put_event_constraints) | 1707 | if (x86_pmu.put_event_constraints) |
1660 | x86_pmu.put_event_constraints(fake_cpuc, event); | 1708 | x86_pmu.put_event_constraints(fake_cpuc, event); |
1661 | 1709 | ||
1662 | kfree(fake_cpuc); | 1710 | free_fake_cpuc(fake_cpuc); |
1663 | 1711 | ||
1664 | return ret; | 1712 | return ret; |
1665 | } | 1713 | } |
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event) | |||
1679 | { | 1727 | { |
1680 | struct perf_event *leader = event->group_leader; | 1728 | struct perf_event *leader = event->group_leader; |
1681 | struct cpu_hw_events *fake_cpuc; | 1729 | struct cpu_hw_events *fake_cpuc; |
1682 | int ret, n; | 1730 | int ret = -ENOSPC, n; |
1683 | |||
1684 | ret = -ENOMEM; | ||
1685 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1686 | if (!fake_cpuc) | ||
1687 | goto out; | ||
1688 | 1731 | ||
1732 | fake_cpuc = allocate_fake_cpuc(); | ||
1733 | if (IS_ERR(fake_cpuc)) | ||
1734 | return PTR_ERR(fake_cpuc); | ||
1689 | /* | 1735 | /* |
1690 | * the event is not yet connected with its | 1736 | * the event is not yet connected with its |
1691 | * siblings therefore we must first collect | 1737 | * siblings therefore we must first collect |
1692 | * existing siblings, then add the new event | 1738 | * existing siblings, then add the new event |
1693 | * before we can simulate the scheduling | 1739 | * before we can simulate the scheduling |
1694 | */ | 1740 | */ |
1695 | ret = -ENOSPC; | ||
1696 | n = collect_events(fake_cpuc, leader, true); | 1741 | n = collect_events(fake_cpuc, leader, true); |
1697 | if (n < 0) | 1742 | if (n < 0) |
1698 | goto out_free; | 1743 | goto out; |
1699 | 1744 | ||
1700 | fake_cpuc->n_events = n; | 1745 | fake_cpuc->n_events = n; |
1701 | n = collect_events(fake_cpuc, event, false); | 1746 | n = collect_events(fake_cpuc, event, false); |
1702 | if (n < 0) | 1747 | if (n < 0) |
1703 | goto out_free; | 1748 | goto out; |
1704 | 1749 | ||
1705 | fake_cpuc->n_events = n; | 1750 | fake_cpuc->n_events = n; |
1706 | 1751 | ||
1707 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); | 1752 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1708 | 1753 | ||
1709 | out_free: | ||
1710 | kfree(fake_cpuc); | ||
1711 | out: | 1754 | out: |
1755 | free_fake_cpuc(fake_cpuc); | ||
1712 | return ret; | 1756 | return ret; |
1713 | } | 1757 | } |
1714 | 1758 | ||