diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:13:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:13:25 -0400 |
commit | 8c292f11744297dfb3a69f4a0bccbe4a6417b50d (patch) | |
tree | f1a89560de25a69b697d459a9b5cf2e738038d9f /arch/x86 | |
parent | d31605dc8a63f1df28443ddb3560b1079417af92 (diff) | |
parent | 538592ff0b008237ae88f5ce5fb1247127dc3ce5 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar:
"Main changes:
Kernel side changes:
- Add SNB/IVB/HSW client uncore memory controller support (Stephane
Eranian)
- Fix various x86/P4 PMU driver bugs (Don Zickus)
Tooling, user visible changes:
- Add several futex 'perf bench' microbenchmarks (Davidlohr Bueso)
- Speed up thread map generation (Don Zickus)
- Introduce 'perf kvm --list-cmds' command line option for use by
scripts (Ramkumar Ramachandra)
- Print the evsel name in the annotate stdio output, prep to fix
support outputting annotation for multiple events, not just for the
first one (Arnaldo Carvalho de Melo)
- Allow setting preferred callchain method in .perfconfig (Jiri Olsa)
- Show in what binaries/modules 'perf probe's are set (Masami
Hiramatsu)
- Support distro-style debuginfo for uprobe in 'perf probe' (Masami
Hiramatsu)
Tooling, internal changes and fixes:
- Use tid in mmap/mmap2 events to find maps (Don Zickus)
- Record the reason for filtering an address_location (Namhyung Kim)
- Apply all filters to an addr_location (Namhyung Kim)
- Merge al->filtered with hist_entry->filtered in report/hists
(Namhyung Kim)
- Fix memory leak when synthesizing thread records (Namhyung Kim)
- Use ui__has_annotation() in 'report' (Namhyung Kim)
- hists browser refactorings to reuse code accross UIs (Namhyung Kim)
- Add support for the new DWARF unwinder library in elfutils (Jiri
Olsa)
- Fix build race in the generation of bison files (Jiri Olsa)
- Further streamline the feature detection display, trimming it a bit
to show just the libraries detected, using VF=1 gets a more verbose
output, showing the less interesting feature checks as well (Jiri
Olsa).
- Check compatible symtab type before loading dso (Namhyung Kim)
- Check return value of filename__read_debuglink() (Stephane Eranian)
- Move some hashing and fs related code from tools/perf/util/ to
tools/lib/ so that it can be used by more tools/ living utilities
(Borislav Petkov)
- Prepare DWARF unwinding code for using an elfutils alternative
unwinding library (Jiri Olsa)
- Fix DWARF unwind max_stack processing (Jiri Olsa)
- Add dwarf unwind 'perf test' entry (Jiri Olsa)
- 'perf probe' improvements including memory leak fixes, sharing the
intlist class with other tools, uprobes/kprobes code sharing and
use of ref_reloc_sym (Masami Hiramatsu)
- Shorten sample symbol resolving by adding cpumode to struct
addr_location (Arnaldo Carvalho de Melo)
- Fix synthesizing mmaps for threads (Don Zickus)
- Fix invalid output on event group stdio report (Namhyung Kim)
- Fixup header alignment in 'perf sched latency' output (Ramkumar
Ramachandra)
- Fix off-by-one error in 'perf timechart record' argv handling
(Ramkumar Ramachandra)
Tooling, cleanups:
- Remove unused thread__find_map function (Jiri Olsa)
- Remove unused simple_strtoul() function (Ramkumar Ramachandra)
Tooling, documentation updates:
- Update function names in debug messages (Ramkumar Ramachandra)
- Update some code references in design.txt (Ramkumar Ramachandra)
- Clarify load-latency information in the 'perf mem' docs (Andi
Kleen)
- Clarify x86 register naming in 'perf probe' docs (Andi Kleen)"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (96 commits)
perf tools: Remove unused simple_strtoul() function
perf tools: Update some code references in design.txt
perf evsel: Update function names in debug messages
perf tools: Remove thread__find_map function
perf annotate: Print the evsel name in the stdio output
perf report: Use ui__has_annotation()
perf tools: Fix memory leak when synthesizing thread records
perf tools: Use tid in mmap/mmap2 events to find maps
perf report: Merge al->filtered with hist_entry->filtered
perf symbols: Apply all filters to an addr_location
perf symbols: Record the reason for filtering an address_location
perf sched: Fixup header alignment in 'latency' output
perf timechart: Fix off-by-one error in 'record' argv handling
perf machine: Factor machine__find_thread to take tid argument
perf tools: Speed up thread map generation
perf kvm: introduce --list-cmds for use by scripts
perf ui hists: Pass evsel to hpp->header/width functions explicitly
perf symbols: Introduce thread__find_cpumode_addr_location
perf session: Change header.misc dump from decimal to hex
perf ui/tui: Reuse generic __hpp__fmt() code
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/nmi.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 47 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 544 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 37 |
7 files changed, 576 insertions, 102 deletions
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 86f9301903c8..5f2fc4441b11 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_NMI_H | 1 | #ifndef _ASM_X86_NMI_H |
2 | #define _ASM_X86_NMI_H | 2 | #define _ASM_X86_NMI_H |
3 | 3 | ||
4 | #include <linux/irq_work.h> | ||
4 | #include <linux/pm.h> | 5 | #include <linux/pm.h> |
5 | #include <asm/irq.h> | 6 | #include <asm/irq.h> |
6 | #include <asm/io.h> | 7 | #include <asm/io.h> |
@@ -38,6 +39,8 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); | |||
38 | struct nmiaction { | 39 | struct nmiaction { |
39 | struct list_head list; | 40 | struct list_head list; |
40 | nmi_handler_t handler; | 41 | nmi_handler_t handler; |
42 | u64 max_duration; | ||
43 | struct irq_work irq_work; | ||
41 | unsigned long flags; | 44 | unsigned long flags; |
42 | const char *name; | 45 | const char *name; |
43 | }; | 46 | }; |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79f9f848bee4..ae407f7226c8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
892 | * hw_perf_group_sched_in() or x86_pmu_enable() | 892 | * hw_perf_group_sched_in() or x86_pmu_enable() |
893 | * | 893 | * |
894 | * step1: save events moving to new counters | 894 | * step1: save events moving to new counters |
895 | * step2: reprogram moved events into new counters | ||
896 | */ | 895 | */ |
897 | for (i = 0; i < n_running; i++) { | 896 | for (i = 0; i < n_running; i++) { |
898 | event = cpuc->event_list[i]; | 897 | event = cpuc->event_list[i]; |
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
918 | x86_pmu_stop(event, PERF_EF_UPDATE); | 917 | x86_pmu_stop(event, PERF_EF_UPDATE); |
919 | } | 918 | } |
920 | 919 | ||
920 | /* | ||
921 | * step2: reprogram moved events into new counters | ||
922 | */ | ||
921 | for (i = 0; i < cpuc->n_events; i++) { | 923 | for (i = 0; i < cpuc->n_events; i++) { |
922 | event = cpuc->event_list[i]; | 924 | event = cpuc->event_list[i]; |
923 | hwc = &event->hw; | 925 | hwc = &event->hw; |
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1043 | /* | 1045 | /* |
1044 | * If group events scheduling transaction was started, | 1046 | * If group events scheduling transaction was started, |
1045 | * skip the schedulability test here, it will be performed | 1047 | * skip the schedulability test here, it will be performed |
1046 | * at commit time (->commit_txn) as a whole | 1048 | * at commit time (->commit_txn) as a whole. |
1047 | */ | 1049 | */ |
1048 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1050 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1049 | goto done_collect; | 1051 | goto done_collect; |
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1058 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1060 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1059 | 1061 | ||
1060 | done_collect: | 1062 | done_collect: |
1063 | /* | ||
1064 | * Commit the collect_events() state. See x86_pmu_del() and | ||
1065 | * x86_pmu_*_txn(). | ||
1066 | */ | ||
1061 | cpuc->n_events = n; | 1067 | cpuc->n_events = n; |
1062 | cpuc->n_added += n - n0; | 1068 | cpuc->n_added += n - n0; |
1063 | cpuc->n_txn += n - n0; | 1069 | cpuc->n_txn += n - n0; |
@@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1183 | * If we're called during a txn, we don't need to do anything. | 1189 | * If we're called during a txn, we don't need to do anything. |
1184 | * The events never got scheduled and ->cancel_txn will truncate | 1190 | * The events never got scheduled and ->cancel_txn will truncate |
1185 | * the event_list. | 1191 | * the event_list. |
1192 | * | ||
1193 | * XXX assumes any ->del() called during a TXN will only be on | ||
1194 | * an event added during that same TXN. | ||
1186 | */ | 1195 | */ |
1187 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1196 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1188 | return; | 1197 | return; |
1189 | 1198 | ||
1199 | /* | ||
1200 | * Not a TXN, therefore cleanup properly. | ||
1201 | */ | ||
1190 | x86_pmu_stop(event, PERF_EF_UPDATE); | 1202 | x86_pmu_stop(event, PERF_EF_UPDATE); |
1191 | 1203 | ||
1192 | for (i = 0; i < cpuc->n_events; i++) { | 1204 | for (i = 0; i < cpuc->n_events; i++) { |
1193 | if (event == cpuc->event_list[i]) { | 1205 | if (event == cpuc->event_list[i]) |
1206 | break; | ||
1207 | } | ||
1194 | 1208 | ||
1195 | if (i >= cpuc->n_events - cpuc->n_added) | 1209 | if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ |
1196 | --cpuc->n_added; | 1210 | return; |
1197 | 1211 | ||
1198 | if (x86_pmu.put_event_constraints) | 1212 | /* If we have a newly added event; make sure to decrease n_added. */ |
1199 | x86_pmu.put_event_constraints(cpuc, event); | 1213 | if (i >= cpuc->n_events - cpuc->n_added) |
1214 | --cpuc->n_added; | ||
1200 | 1215 | ||
1201 | while (++i < cpuc->n_events) | 1216 | if (x86_pmu.put_event_constraints) |
1202 | cpuc->event_list[i-1] = cpuc->event_list[i]; | 1217 | x86_pmu.put_event_constraints(cpuc, event); |
1218 | |||
1219 | /* Delete the array entry. */ | ||
1220 | while (++i < cpuc->n_events) | ||
1221 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
1222 | --cpuc->n_events; | ||
1203 | 1223 | ||
1204 | --cpuc->n_events; | ||
1205 | break; | ||
1206 | } | ||
1207 | } | ||
1208 | perf_event_update_userpage(event); | 1224 | perf_event_update_userpage(event); |
1209 | } | 1225 | } |
1210 | 1226 | ||
@@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) | |||
1598 | { | 1614 | { |
1599 | __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); | 1615 | __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); |
1600 | /* | 1616 | /* |
1601 | * Truncate the collected events. | 1617 | * Truncate collected array by the number of events added in this |
1618 | * transaction. See x86_pmu_add() and x86_pmu_*_txn(). | ||
1602 | */ | 1619 | */ |
1603 | __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); | 1620 | __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); |
1604 | __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); | 1621 | __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); |
@@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) | |||
1609 | * Commit group events scheduling transaction | 1626 | * Commit group events scheduling transaction |
1610 | * Perform the group schedulability test as a whole | 1627 | * Perform the group schedulability test as a whole |
1611 | * Return 0 if success | 1628 | * Return 0 if success |
1629 | * | ||
1630 | * Does not cancel the transaction on failure; expects the caller to do this. | ||
1612 | */ | 1631 | */ |
1613 | static int x86_pmu_commit_txn(struct pmu *pmu) | 1632 | static int x86_pmu_commit_txn(struct pmu *pmu) |
1614 | { | 1633 | { |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 4972c244d0bc..3b2f9bdd974b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -130,9 +130,11 @@ struct cpu_hw_events { | |||
130 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 130 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
131 | int enabled; | 131 | int enabled; |
132 | 132 | ||
133 | int n_events; | 133 | int n_events; /* the # of events in the below arrays */ |
134 | int n_added; | 134 | int n_added; /* the # last events in the below arrays; |
135 | int n_txn; | 135 | they've never been enabled yet */ |
136 | int n_txn; /* the # last events in the below arrays; | ||
137 | added in the current transaction */ | ||
136 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 138 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
137 | u64 tags[X86_PMC_IDX_MAX]; | 139 | u64 tags[X86_PMC_IDX_MAX]; |
138 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 140 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 047f540cf3f7..bd2253d40cff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); | |||
66 | DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); | 66 | DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); |
67 | DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); | 67 | DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); |
68 | 68 | ||
69 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); | ||
70 | static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); | ||
71 | static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); | ||
72 | static void uncore_pmu_event_read(struct perf_event *event); | ||
73 | |||
74 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
75 | { | ||
76 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
77 | } | ||
78 | |||
79 | static struct intel_uncore_box * | ||
80 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
81 | { | ||
82 | struct intel_uncore_box *box; | ||
83 | |||
84 | box = *per_cpu_ptr(pmu->box, cpu); | ||
85 | if (box) | ||
86 | return box; | ||
87 | |||
88 | raw_spin_lock(&uncore_box_lock); | ||
89 | list_for_each_entry(box, &pmu->box_list, list) { | ||
90 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
91 | atomic_inc(&box->refcnt); | ||
92 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
93 | break; | ||
94 | } | ||
95 | } | ||
96 | raw_spin_unlock(&uncore_box_lock); | ||
97 | |||
98 | return *per_cpu_ptr(pmu->box, cpu); | ||
99 | } | ||
100 | |||
101 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
102 | { | ||
103 | /* | ||
104 | * perf core schedules event on the basis of cpu, uncore events are | ||
105 | * collected by one of the cpus inside a physical package. | ||
106 | */ | ||
107 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
108 | } | ||
109 | |||
69 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) | 110 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) |
70 | { | 111 | { |
71 | u64 count; | 112 | u64 count; |
@@ -1639,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = { | |||
1639 | &snb_uncore_cbox, | 1680 | &snb_uncore_cbox, |
1640 | NULL, | 1681 | NULL, |
1641 | }; | 1682 | }; |
1683 | |||
1684 | enum { | ||
1685 | SNB_PCI_UNCORE_IMC, | ||
1686 | }; | ||
1687 | |||
1688 | static struct uncore_event_desc snb_uncore_imc_events[] = { | ||
1689 | INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), | ||
1690 | INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), | ||
1691 | INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), | ||
1692 | |||
1693 | INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), | ||
1694 | INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), | ||
1695 | INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), | ||
1696 | |||
1697 | { /* end: all zeroes */ }, | ||
1698 | }; | ||
1699 | |||
1700 | #define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff | ||
1701 | #define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 | ||
1702 | |||
1703 | /* page size multiple covering all config regs */ | ||
1704 | #define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 | ||
1705 | |||
1706 | #define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 | ||
1707 | #define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 | ||
1708 | #define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 | ||
1709 | #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 | ||
1710 | #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE | ||
1711 | |||
1712 | static struct attribute *snb_uncore_imc_formats_attr[] = { | ||
1713 | &format_attr_event.attr, | ||
1714 | NULL, | ||
1715 | }; | ||
1716 | |||
1717 | static struct attribute_group snb_uncore_imc_format_group = { | ||
1718 | .name = "format", | ||
1719 | .attrs = snb_uncore_imc_formats_attr, | ||
1720 | }; | ||
1721 | |||
1722 | static void snb_uncore_imc_init_box(struct intel_uncore_box *box) | ||
1723 | { | ||
1724 | struct pci_dev *pdev = box->pci_dev; | ||
1725 | int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; | ||
1726 | resource_size_t addr; | ||
1727 | u32 pci_dword; | ||
1728 | |||
1729 | pci_read_config_dword(pdev, where, &pci_dword); | ||
1730 | addr = pci_dword; | ||
1731 | |||
1732 | #ifdef CONFIG_PHYS_ADDR_T_64BIT | ||
1733 | pci_read_config_dword(pdev, where + 4, &pci_dword); | ||
1734 | addr |= ((resource_size_t)pci_dword << 32); | ||
1735 | #endif | ||
1736 | |||
1737 | addr &= ~(PAGE_SIZE - 1); | ||
1738 | |||
1739 | box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); | ||
1740 | box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; | ||
1741 | } | ||
1742 | |||
1743 | static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) | ||
1744 | {} | ||
1745 | |||
1746 | static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) | ||
1747 | {} | ||
1748 | |||
1749 | static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1750 | {} | ||
1751 | |||
1752 | static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1753 | {} | ||
1754 | |||
1755 | static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
1756 | { | ||
1757 | struct hw_perf_event *hwc = &event->hw; | ||
1758 | |||
1759 | return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * custom event_init() function because we define our own fixed, free | ||
1764 | * running counters, so we do not want to conflict with generic uncore | ||
1765 | * logic. Also simplifies processing | ||
1766 | */ | ||
1767 | static int snb_uncore_imc_event_init(struct perf_event *event) | ||
1768 | { | ||
1769 | struct intel_uncore_pmu *pmu; | ||
1770 | struct intel_uncore_box *box; | ||
1771 | struct hw_perf_event *hwc = &event->hw; | ||
1772 | u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; | ||
1773 | int idx, base; | ||
1774 | |||
1775 | if (event->attr.type != event->pmu->type) | ||
1776 | return -ENOENT; | ||
1777 | |||
1778 | pmu = uncore_event_to_pmu(event); | ||
1779 | /* no device found for this pmu */ | ||
1780 | if (pmu->func_id < 0) | ||
1781 | return -ENOENT; | ||
1782 | |||
1783 | /* Sampling not supported yet */ | ||
1784 | if (hwc->sample_period) | ||
1785 | return -EINVAL; | ||
1786 | |||
1787 | /* unsupported modes and filters */ | ||
1788 | if (event->attr.exclude_user || | ||
1789 | event->attr.exclude_kernel || | ||
1790 | event->attr.exclude_hv || | ||
1791 | event->attr.exclude_idle || | ||
1792 | event->attr.exclude_host || | ||
1793 | event->attr.exclude_guest || | ||
1794 | event->attr.sample_period) /* no sampling */ | ||
1795 | return -EINVAL; | ||
1796 | |||
1797 | /* | ||
1798 | * Place all uncore events for a particular physical package | ||
1799 | * onto a single cpu | ||
1800 | */ | ||
1801 | if (event->cpu < 0) | ||
1802 | return -EINVAL; | ||
1803 | |||
1804 | /* check only supported bits are set */ | ||
1805 | if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) | ||
1806 | return -EINVAL; | ||
1807 | |||
1808 | box = uncore_pmu_to_box(pmu, event->cpu); | ||
1809 | if (!box || box->cpu < 0) | ||
1810 | return -EINVAL; | ||
1811 | |||
1812 | event->cpu = box->cpu; | ||
1813 | |||
1814 | event->hw.idx = -1; | ||
1815 | event->hw.last_tag = ~0ULL; | ||
1816 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
1817 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
1818 | /* | ||
1819 | * check event is known (whitelist, determines counter) | ||
1820 | */ | ||
1821 | switch (cfg) { | ||
1822 | case SNB_UNCORE_PCI_IMC_DATA_READS: | ||
1823 | base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; | ||
1824 | idx = UNCORE_PMC_IDX_FIXED; | ||
1825 | break; | ||
1826 | case SNB_UNCORE_PCI_IMC_DATA_WRITES: | ||
1827 | base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; | ||
1828 | idx = UNCORE_PMC_IDX_FIXED + 1; | ||
1829 | break; | ||
1830 | default: | ||
1831 | return -EINVAL; | ||
1832 | } | ||
1833 | |||
1834 | /* must be done before validate_group */ | ||
1835 | event->hw.event_base = base; | ||
1836 | event->hw.config = cfg; | ||
1837 | event->hw.idx = idx; | ||
1838 | |||
1839 | /* no group validation needed, we have free running counters */ | ||
1840 | |||
1841 | return 0; | ||
1842 | } | ||
1843 | |||
1844 | static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
1845 | { | ||
1846 | return 0; | ||
1847 | } | ||
1848 | |||
1849 | static void snb_uncore_imc_event_start(struct perf_event *event, int flags) | ||
1850 | { | ||
1851 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1852 | u64 count; | ||
1853 | |||
1854 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
1855 | return; | ||
1856 | |||
1857 | event->hw.state = 0; | ||
1858 | box->n_active++; | ||
1859 | |||
1860 | list_add_tail(&event->active_entry, &box->active_list); | ||
1861 | |||
1862 | count = snb_uncore_imc_read_counter(box, event); | ||
1863 | local64_set(&event->hw.prev_count, count); | ||
1864 | |||
1865 | if (box->n_active == 1) | ||
1866 | uncore_pmu_start_hrtimer(box); | ||
1867 | } | ||
1868 | |||
1869 | static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) | ||
1870 | { | ||
1871 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1872 | struct hw_perf_event *hwc = &event->hw; | ||
1873 | |||
1874 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
1875 | box->n_active--; | ||
1876 | |||
1877 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
1878 | hwc->state |= PERF_HES_STOPPED; | ||
1879 | |||
1880 | list_del(&event->active_entry); | ||
1881 | |||
1882 | if (box->n_active == 0) | ||
1883 | uncore_pmu_cancel_hrtimer(box); | ||
1884 | } | ||
1885 | |||
1886 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
1887 | /* | ||
1888 | * Drain the remaining delta count out of a event | ||
1889 | * that we are disabling: | ||
1890 | */ | ||
1891 | uncore_perf_event_update(box, event); | ||
1892 | hwc->state |= PERF_HES_UPTODATE; | ||
1893 | } | ||
1894 | } | ||
1895 | |||
1896 | static int snb_uncore_imc_event_add(struct perf_event *event, int flags) | ||
1897 | { | ||
1898 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1899 | struct hw_perf_event *hwc = &event->hw; | ||
1900 | |||
1901 | if (!box) | ||
1902 | return -ENODEV; | ||
1903 | |||
1904 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
1905 | if (!(flags & PERF_EF_START)) | ||
1906 | hwc->state |= PERF_HES_ARCH; | ||
1907 | |||
1908 | snb_uncore_imc_event_start(event, 0); | ||
1909 | |||
1910 | box->n_events++; | ||
1911 | |||
1912 | return 0; | ||
1913 | } | ||
1914 | |||
1915 | static void snb_uncore_imc_event_del(struct perf_event *event, int flags) | ||
1916 | { | ||
1917 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1918 | int i; | ||
1919 | |||
1920 | snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); | ||
1921 | |||
1922 | for (i = 0; i < box->n_events; i++) { | ||
1923 | if (event == box->event_list[i]) { | ||
1924 | --box->n_events; | ||
1925 | break; | ||
1926 | } | ||
1927 | } | ||
1928 | } | ||
1929 | |||
1930 | static int snb_pci2phy_map_init(int devid) | ||
1931 | { | ||
1932 | struct pci_dev *dev = NULL; | ||
1933 | int bus; | ||
1934 | |||
1935 | dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); | ||
1936 | if (!dev) | ||
1937 | return -ENOTTY; | ||
1938 | |||
1939 | bus = dev->bus->number; | ||
1940 | |||
1941 | pcibus_to_physid[bus] = 0; | ||
1942 | |||
1943 | pci_dev_put(dev); | ||
1944 | |||
1945 | return 0; | ||
1946 | } | ||
1947 | |||
1948 | static struct pmu snb_uncore_imc_pmu = { | ||
1949 | .task_ctx_nr = perf_invalid_context, | ||
1950 | .event_init = snb_uncore_imc_event_init, | ||
1951 | .add = snb_uncore_imc_event_add, | ||
1952 | .del = snb_uncore_imc_event_del, | ||
1953 | .start = snb_uncore_imc_event_start, | ||
1954 | .stop = snb_uncore_imc_event_stop, | ||
1955 | .read = uncore_pmu_event_read, | ||
1956 | }; | ||
1957 | |||
1958 | static struct intel_uncore_ops snb_uncore_imc_ops = { | ||
1959 | .init_box = snb_uncore_imc_init_box, | ||
1960 | .enable_box = snb_uncore_imc_enable_box, | ||
1961 | .disable_box = snb_uncore_imc_disable_box, | ||
1962 | .disable_event = snb_uncore_imc_disable_event, | ||
1963 | .enable_event = snb_uncore_imc_enable_event, | ||
1964 | .hw_config = snb_uncore_imc_hw_config, | ||
1965 | .read_counter = snb_uncore_imc_read_counter, | ||
1966 | }; | ||
1967 | |||
1968 | static struct intel_uncore_type snb_uncore_imc = { | ||
1969 | .name = "imc", | ||
1970 | .num_counters = 2, | ||
1971 | .num_boxes = 1, | ||
1972 | .fixed_ctr_bits = 32, | ||
1973 | .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, | ||
1974 | .event_descs = snb_uncore_imc_events, | ||
1975 | .format_group = &snb_uncore_imc_format_group, | ||
1976 | .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, | ||
1977 | .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, | ||
1978 | .ops = &snb_uncore_imc_ops, | ||
1979 | .pmu = &snb_uncore_imc_pmu, | ||
1980 | }; | ||
1981 | |||
1982 | static struct intel_uncore_type *snb_pci_uncores[] = { | ||
1983 | [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, | ||
1984 | NULL, | ||
1985 | }; | ||
1986 | |||
1987 | static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { | ||
1988 | { /* IMC */ | ||
1989 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), | ||
1990 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
1991 | }, | ||
1992 | { /* end: all zeroes */ }, | ||
1993 | }; | ||
1994 | |||
1995 | static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { | ||
1996 | { /* IMC */ | ||
1997 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), | ||
1998 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
1999 | }, | ||
2000 | { /* end: all zeroes */ }, | ||
2001 | }; | ||
2002 | |||
2003 | static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { | ||
2004 | { /* IMC */ | ||
2005 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), | ||
2006 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
2007 | }, | ||
2008 | { /* end: all zeroes */ }, | ||
2009 | }; | ||
2010 | |||
2011 | static struct pci_driver snb_uncore_pci_driver = { | ||
2012 | .name = "snb_uncore", | ||
2013 | .id_table = snb_uncore_pci_ids, | ||
2014 | }; | ||
2015 | |||
2016 | static struct pci_driver ivb_uncore_pci_driver = { | ||
2017 | .name = "ivb_uncore", | ||
2018 | .id_table = ivb_uncore_pci_ids, | ||
2019 | }; | ||
2020 | |||
2021 | static struct pci_driver hsw_uncore_pci_driver = { | ||
2022 | .name = "hsw_uncore", | ||
2023 | .id_table = hsw_uncore_pci_ids, | ||
2024 | }; | ||
2025 | |||
1642 | /* end of Sandy Bridge uncore support */ | 2026 | /* end of Sandy Bridge uncore support */ |
1643 | 2027 | ||
1644 | /* Nehalem uncore support */ | 2028 | /* Nehalem uncore support */ |
@@ -2789,6 +3173,7 @@ again: | |||
2789 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | 3173 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) |
2790 | { | 3174 | { |
2791 | struct intel_uncore_box *box; | 3175 | struct intel_uncore_box *box; |
3176 | struct perf_event *event; | ||
2792 | unsigned long flags; | 3177 | unsigned long flags; |
2793 | int bit; | 3178 | int bit; |
2794 | 3179 | ||
@@ -2801,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | |||
2801 | */ | 3186 | */ |
2802 | local_irq_save(flags); | 3187 | local_irq_save(flags); |
2803 | 3188 | ||
3189 | /* | ||
3190 | * handle boxes with an active event list as opposed to active | ||
3191 | * counters | ||
3192 | */ | ||
3193 | list_for_each_entry(event, &box->active_list, active_entry) { | ||
3194 | uncore_perf_event_update(box, event); | ||
3195 | } | ||
3196 | |||
2804 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) | 3197 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) |
2805 | uncore_perf_event_update(box, box->events[bit]); | 3198 | uncore_perf_event_update(box, box->events[bit]); |
2806 | 3199 | ||
2807 | local_irq_restore(flags); | 3200 | local_irq_restore(flags); |
2808 | 3201 | ||
2809 | hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); | 3202 | hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); |
2810 | return HRTIMER_RESTART; | 3203 | return HRTIMER_RESTART; |
2811 | } | 3204 | } |
2812 | 3205 | ||
2813 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) | 3206 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) |
2814 | { | 3207 | { |
2815 | __hrtimer_start_range_ns(&box->hrtimer, | 3208 | __hrtimer_start_range_ns(&box->hrtimer, |
2816 | ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, | 3209 | ns_to_ktime(box->hrtimer_duration), 0, |
2817 | HRTIMER_MODE_REL_PINNED, 0); | 3210 | HRTIMER_MODE_REL_PINNED, 0); |
2818 | } | 3211 | } |
2819 | 3212 | ||
@@ -2847,43 +3240,12 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, | |||
2847 | box->cpu = -1; | 3240 | box->cpu = -1; |
2848 | box->phys_id = -1; | 3241 | box->phys_id = -1; |
2849 | 3242 | ||
2850 | return box; | 3243 | /* set default hrtimer timeout */ |
2851 | } | 3244 | box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; |
2852 | 3245 | ||
2853 | static struct intel_uncore_box * | 3246 | INIT_LIST_HEAD(&box->active_list); |
2854 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
2855 | { | ||
2856 | struct intel_uncore_box *box; | ||
2857 | 3247 | ||
2858 | box = *per_cpu_ptr(pmu->box, cpu); | 3248 | return box; |
2859 | if (box) | ||
2860 | return box; | ||
2861 | |||
2862 | raw_spin_lock(&uncore_box_lock); | ||
2863 | list_for_each_entry(box, &pmu->box_list, list) { | ||
2864 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
2865 | atomic_inc(&box->refcnt); | ||
2866 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
2867 | break; | ||
2868 | } | ||
2869 | } | ||
2870 | raw_spin_unlock(&uncore_box_lock); | ||
2871 | |||
2872 | return *per_cpu_ptr(pmu->box, cpu); | ||
2873 | } | ||
2874 | |||
2875 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
2876 | { | ||
2877 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
2878 | } | ||
2879 | |||
2880 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
2881 | { | ||
2882 | /* | ||
2883 | * perf core schedules event on the basis of cpu, uncore events are | ||
2884 | * collected by one of the cpus inside a physical package. | ||
2885 | */ | ||
2886 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
2887 | } | 3249 | } |
2888 | 3250 | ||
2889 | static int | 3251 | static int |
@@ -3279,16 +3641,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | |||
3279 | { | 3641 | { |
3280 | int ret; | 3642 | int ret; |
3281 | 3643 | ||
3282 | pmu->pmu = (struct pmu) { | 3644 | if (!pmu->type->pmu) { |
3283 | .attr_groups = pmu->type->attr_groups, | 3645 | pmu->pmu = (struct pmu) { |
3284 | .task_ctx_nr = perf_invalid_context, | 3646 | .attr_groups = pmu->type->attr_groups, |
3285 | .event_init = uncore_pmu_event_init, | 3647 | .task_ctx_nr = perf_invalid_context, |
3286 | .add = uncore_pmu_event_add, | 3648 | .event_init = uncore_pmu_event_init, |
3287 | .del = uncore_pmu_event_del, | 3649 | .add = uncore_pmu_event_add, |
3288 | .start = uncore_pmu_event_start, | 3650 | .del = uncore_pmu_event_del, |
3289 | .stop = uncore_pmu_event_stop, | 3651 | .start = uncore_pmu_event_start, |
3290 | .read = uncore_pmu_event_read, | 3652 | .stop = uncore_pmu_event_stop, |
3291 | }; | 3653 | .read = uncore_pmu_event_read, |
3654 | }; | ||
3655 | } else { | ||
3656 | pmu->pmu = *pmu->type->pmu; | ||
3657 | pmu->pmu.attr_groups = pmu->type->attr_groups; | ||
3658 | } | ||
3292 | 3659 | ||
3293 | if (pmu->type->num_boxes == 1) { | 3660 | if (pmu->type->num_boxes == 1) { |
3294 | if (strlen(pmu->type->name) > 0) | 3661 | if (strlen(pmu->type->name) > 0) |
@@ -3502,6 +3869,28 @@ static int __init uncore_pci_init(void) | |||
3502 | pci_uncores = ivt_pci_uncores; | 3869 | pci_uncores = ivt_pci_uncores; |
3503 | uncore_pci_driver = &ivt_uncore_pci_driver; | 3870 | uncore_pci_driver = &ivt_uncore_pci_driver; |
3504 | break; | 3871 | break; |
3872 | case 42: /* Sandy Bridge */ | ||
3873 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); | ||
3874 | if (ret) | ||
3875 | return ret; | ||
3876 | pci_uncores = snb_pci_uncores; | ||
3877 | uncore_pci_driver = &snb_uncore_pci_driver; | ||
3878 | break; | ||
3879 | case 58: /* Ivy Bridge */ | ||
3880 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); | ||
3881 | if (ret) | ||
3882 | return ret; | ||
3883 | pci_uncores = snb_pci_uncores; | ||
3884 | uncore_pci_driver = &ivb_uncore_pci_driver; | ||
3885 | break; | ||
3886 | case 60: /* Haswell */ | ||
3887 | case 69: /* Haswell Celeron */ | ||
3888 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); | ||
3889 | if (ret) | ||
3890 | return ret; | ||
3891 | pci_uncores = snb_pci_uncores; | ||
3892 | uncore_pci_driver = &hsw_uncore_pci_driver; | ||
3893 | break; | ||
3505 | default: | 3894 | default: |
3506 | return 0; | 3895 | return 0; |
3507 | } | 3896 | } |
@@ -3773,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy) | |||
3773 | 4162 | ||
3774 | static int __init uncore_cpu_init(void) | 4163 | static int __init uncore_cpu_init(void) |
3775 | { | 4164 | { |
3776 | int ret, cpu, max_cores; | 4165 | int ret, max_cores; |
3777 | 4166 | ||
3778 | max_cores = boot_cpu_data.x86_max_cores; | 4167 | max_cores = boot_cpu_data.x86_max_cores; |
3779 | switch (boot_cpu_data.x86_model) { | 4168 | switch (boot_cpu_data.x86_model) { |
@@ -3817,29 +4206,6 @@ static int __init uncore_cpu_init(void) | |||
3817 | if (ret) | 4206 | if (ret) |
3818 | return ret; | 4207 | return ret; |
3819 | 4208 | ||
3820 | get_online_cpus(); | ||
3821 | |||
3822 | for_each_online_cpu(cpu) { | ||
3823 | int i, phys_id = topology_physical_package_id(cpu); | ||
3824 | |||
3825 | for_each_cpu(i, &uncore_cpu_mask) { | ||
3826 | if (phys_id == topology_physical_package_id(i)) { | ||
3827 | phys_id = -1; | ||
3828 | break; | ||
3829 | } | ||
3830 | } | ||
3831 | if (phys_id < 0) | ||
3832 | continue; | ||
3833 | |||
3834 | uncore_cpu_prepare(cpu, phys_id); | ||
3835 | uncore_event_init_cpu(cpu); | ||
3836 | } | ||
3837 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
3838 | |||
3839 | register_cpu_notifier(&uncore_cpu_nb); | ||
3840 | |||
3841 | put_online_cpus(); | ||
3842 | |||
3843 | return 0; | 4209 | return 0; |
3844 | } | 4210 | } |
3845 | 4211 | ||
@@ -3868,6 +4234,41 @@ static int __init uncore_pmus_register(void) | |||
3868 | return 0; | 4234 | return 0; |
3869 | } | 4235 | } |
3870 | 4236 | ||
4237 | static void __init uncore_cpumask_init(void) | ||
4238 | { | ||
4239 | int cpu; | ||
4240 | |||
4241 | /* | ||
4242 | * ony invoke once from msr or pci init code | ||
4243 | */ | ||
4244 | if (!cpumask_empty(&uncore_cpu_mask)) | ||
4245 | return; | ||
4246 | |||
4247 | get_online_cpus(); | ||
4248 | |||
4249 | for_each_online_cpu(cpu) { | ||
4250 | int i, phys_id = topology_physical_package_id(cpu); | ||
4251 | |||
4252 | for_each_cpu(i, &uncore_cpu_mask) { | ||
4253 | if (phys_id == topology_physical_package_id(i)) { | ||
4254 | phys_id = -1; | ||
4255 | break; | ||
4256 | } | ||
4257 | } | ||
4258 | if (phys_id < 0) | ||
4259 | continue; | ||
4260 | |||
4261 | uncore_cpu_prepare(cpu, phys_id); | ||
4262 | uncore_event_init_cpu(cpu); | ||
4263 | } | ||
4264 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
4265 | |||
4266 | register_cpu_notifier(&uncore_cpu_nb); | ||
4267 | |||
4268 | put_online_cpus(); | ||
4269 | } | ||
4270 | |||
4271 | |||
3871 | static int __init intel_uncore_init(void) | 4272 | static int __init intel_uncore_init(void) |
3872 | { | 4273 | { |
3873 | int ret; | 4274 | int ret; |
@@ -3886,6 +4287,7 @@ static int __init intel_uncore_init(void) | |||
3886 | uncore_pci_exit(); | 4287 | uncore_pci_exit(); |
3887 | goto fail; | 4288 | goto fail; |
3888 | } | 4289 | } |
4290 | uncore_cpumask_init(); | ||
3889 | 4291 | ||
3890 | uncore_pmus_register(); | 4292 | uncore_pmus_register(); |
3891 | return 0; | 4293 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index a80ab71a883d..90236f0c94a9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | #define UNCORE_PMU_NAME_LEN 32 | 7 | #define UNCORE_PMU_NAME_LEN 32 |
8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) | 8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) |
9 | #define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC) | ||
9 | 10 | ||
10 | #define UNCORE_FIXED_EVENT 0xff | 11 | #define UNCORE_FIXED_EVENT 0xff |
11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | 12 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 |
@@ -440,6 +441,7 @@ struct intel_uncore_type { | |||
440 | struct intel_uncore_ops *ops; | 441 | struct intel_uncore_ops *ops; |
441 | struct uncore_event_desc *event_descs; | 442 | struct uncore_event_desc *event_descs; |
442 | const struct attribute_group *attr_groups[4]; | 443 | const struct attribute_group *attr_groups[4]; |
444 | struct pmu *pmu; /* for custom pmu ops */ | ||
443 | }; | 445 | }; |
444 | 446 | ||
445 | #define pmu_group attr_groups[0] | 447 | #define pmu_group attr_groups[0] |
@@ -488,8 +490,11 @@ struct intel_uncore_box { | |||
488 | u64 tags[UNCORE_PMC_IDX_MAX]; | 490 | u64 tags[UNCORE_PMC_IDX_MAX]; |
489 | struct pci_dev *pci_dev; | 491 | struct pci_dev *pci_dev; |
490 | struct intel_uncore_pmu *pmu; | 492 | struct intel_uncore_pmu *pmu; |
493 | u64 hrtimer_duration; /* hrtimer timeout for this box */ | ||
491 | struct hrtimer hrtimer; | 494 | struct hrtimer hrtimer; |
492 | struct list_head list; | 495 | struct list_head list; |
496 | struct list_head active_list; | ||
497 | void *io_addr; | ||
493 | struct intel_uncore_extra_reg shared_regs[0]; | 498 | struct intel_uncore_extra_reg shared_regs[0]; |
494 | }; | 499 | }; |
495 | 500 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3486e6660357..5d466b7d8609 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1257,7 +1257,24 @@ again: | |||
1257 | pass++; | 1257 | pass++; |
1258 | goto again; | 1258 | goto again; |
1259 | } | 1259 | } |
1260 | 1260 | /* | |
1261 | * Perf does test runs to see if a whole group can be assigned | ||
1262 | * together succesfully. There can be multiple rounds of this. | ||
1263 | * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config | ||
1264 | * bits, such that the next round of group assignments will | ||
1265 | * cause the above p4_should_swap_ts to pass instead of fail. | ||
1266 | * This leads to counters exclusive to thread0 being used by | ||
1267 | * thread1. | ||
1268 | * | ||
1269 | * Solve this with a cheap hack, reset the idx back to -1 to | ||
1270 | * force a new lookup (p4_next_cntr) to get the right counter | ||
1271 | * for the right thread. | ||
1272 | * | ||
1273 | * This probably doesn't comply with the general spirit of how | ||
1274 | * perf wants to work, but P4 is special. :-( | ||
1275 | */ | ||
1276 | if (p4_should_swap_ts(hwc->config, cpu)) | ||
1277 | hwc->idx = -1; | ||
1261 | p4_pmu_swap_config_ts(hwc, cpu); | 1278 | p4_pmu_swap_config_ts(hwc, cpu); |
1262 | if (assign) | 1279 | if (assign) |
1263 | assign[i] = cntr_idx; | 1280 | assign[i] = cntr_idx; |
@@ -1322,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
1322 | __init int p4_pmu_init(void) | 1339 | __init int p4_pmu_init(void) |
1323 | { | 1340 | { |
1324 | unsigned int low, high; | 1341 | unsigned int low, high; |
1342 | int i, reg; | ||
1325 | 1343 | ||
1326 | /* If we get stripped -- indexing fails */ | 1344 | /* If we get stripped -- indexing fails */ |
1327 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); | 1345 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
@@ -1340,5 +1358,19 @@ __init int p4_pmu_init(void) | |||
1340 | 1358 | ||
1341 | x86_pmu = p4_pmu; | 1359 | x86_pmu = p4_pmu; |
1342 | 1360 | ||
1361 | /* | ||
1362 | * Even though the counters are configured to interrupt a particular | ||
1363 | * logical processor when an overflow happens, testing has shown that | ||
1364 | * on kdump kernels (which uses a single cpu), thread1's counter | ||
1365 | * continues to run and will report an NMI on thread0. Due to the | ||
1366 | * overflow bug, this leads to a stream of unknown NMIs. | ||
1367 | * | ||
1368 | * Solve this by zero'ing out the registers to mimic a reset. | ||
1369 | */ | ||
1370 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
1371 | reg = x86_pmu_config_addr(i); | ||
1372 | wrmsrl_safe(reg, 0ULL); | ||
1373 | } | ||
1374 | |||
1343 | return 0; | 1375 | return 0; |
1344 | } | 1376 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6fcb49ce50a1..b4872b999a71 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -87,6 +87,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
87 | #define nmi_to_desc(type) (&nmi_desc[type]) | 87 | #define nmi_to_desc(type) (&nmi_desc[type]) |
88 | 88 | ||
89 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; | 89 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; |
90 | |||
90 | static int __init nmi_warning_debugfs(void) | 91 | static int __init nmi_warning_debugfs(void) |
91 | { | 92 | { |
92 | debugfs_create_u64("nmi_longest_ns", 0644, | 93 | debugfs_create_u64("nmi_longest_ns", 0644, |
@@ -95,6 +96,20 @@ static int __init nmi_warning_debugfs(void) | |||
95 | } | 96 | } |
96 | fs_initcall(nmi_warning_debugfs); | 97 | fs_initcall(nmi_warning_debugfs); |
97 | 98 | ||
99 | static void nmi_max_handler(struct irq_work *w) | ||
100 | { | ||
101 | struct nmiaction *a = container_of(w, struct nmiaction, irq_work); | ||
102 | int remainder_ns, decimal_msecs; | ||
103 | u64 whole_msecs = ACCESS_ONCE(a->max_duration); | ||
104 | |||
105 | remainder_ns = do_div(whole_msecs, (1000 * 1000)); | ||
106 | decimal_msecs = remainder_ns / 1000; | ||
107 | |||
108 | printk_ratelimited(KERN_INFO | ||
109 | "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", | ||
110 | a->handler, whole_msecs, decimal_msecs); | ||
111 | } | ||
112 | |||
98 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 113 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
99 | { | 114 | { |
100 | struct nmi_desc *desc = nmi_to_desc(type); | 115 | struct nmi_desc *desc = nmi_to_desc(type); |
@@ -110,26 +125,20 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 | |||
110 | * to handle those situations. | 125 | * to handle those situations. |
111 | */ | 126 | */ |
112 | list_for_each_entry_rcu(a, &desc->head, list) { | 127 | list_for_each_entry_rcu(a, &desc->head, list) { |
113 | u64 before, delta, whole_msecs; | 128 | int thishandled; |
114 | int remainder_ns, decimal_msecs, thishandled; | 129 | u64 delta; |
115 | 130 | ||
116 | before = sched_clock(); | 131 | delta = sched_clock(); |
117 | thishandled = a->handler(type, regs); | 132 | thishandled = a->handler(type, regs); |
118 | handled += thishandled; | 133 | handled += thishandled; |
119 | delta = sched_clock() - before; | 134 | delta = sched_clock() - delta; |
120 | trace_nmi_handler(a->handler, (int)delta, thishandled); | 135 | trace_nmi_handler(a->handler, (int)delta, thishandled); |
121 | 136 | ||
122 | if (delta < nmi_longest_ns) | 137 | if (delta < nmi_longest_ns || delta < a->max_duration) |
123 | continue; | 138 | continue; |
124 | 139 | ||
125 | nmi_longest_ns = delta; | 140 | a->max_duration = delta; |
126 | whole_msecs = delta; | 141 | irq_work_queue(&a->irq_work); |
127 | remainder_ns = do_div(whole_msecs, (1000 * 1000)); | ||
128 | decimal_msecs = remainder_ns / 1000; | ||
129 | printk_ratelimited(KERN_INFO | ||
130 | "INFO: NMI handler (%ps) took too long to run: " | ||
131 | "%lld.%03d msecs\n", a->handler, whole_msecs, | ||
132 | decimal_msecs); | ||
133 | } | 142 | } |
134 | 143 | ||
135 | rcu_read_unlock(); | 144 | rcu_read_unlock(); |
@@ -146,6 +155,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) | |||
146 | if (!action->handler) | 155 | if (!action->handler) |
147 | return -EINVAL; | 156 | return -EINVAL; |
148 | 157 | ||
158 | init_irq_work(&action->irq_work, nmi_max_handler); | ||
159 | |||
149 | spin_lock_irqsave(&desc->lock, flags); | 160 | spin_lock_irqsave(&desc->lock, flags); |
150 | 161 | ||
151 | /* | 162 | /* |