diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-11 14:47:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-11 14:47:45 -0500 |
commit | ddb321a8dd158520d97ed1cbade1d4ac36b6af31 (patch) | |
tree | 842f5550c5947d4aebff56dcd1091950b0cc0f82 /arch | |
parent | 1e6c3e8f8fb94a8914a380e02a7e8ad81d47273e (diff) | |
parent | 5306c31c5733cb4a79cc002e0c3ad256fd439614 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"Mostly tooling fixes, but also some kernel side fixes: uncore PMU
driver fix, user regs sampling fix and an instruction decoder fix that
unbreaks PEBS precise sampling"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes
perf/x86_64: Improve user regs sampling
perf: Move task_pt_regs sampling into arch code
x86: Fix off-by-one in instruction decoder
perf hists browser: Fix segfault when showing callchain
perf callchain: Free callchains when hist entries are deleted
perf hists: Fix children sort key behavior
perf diff: Fix to sort by baseline field by default
perf list: Fix --raw-dump option
perf probe: Fix crash in dwarf_getcfi_elf
perf probe: Fix to fall back to find probe point in symbols
perf callchain: Append callchains only when requested
perf ui/tui: Print backtrace symbols when segfault occurs
perf report: Show progress bar for output resorting
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/perf_regs.c | 8 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_regs.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/perf_regs.c | 90 | ||||
-rw-r--r-- | arch/x86/lib/insn.c | 2 |
6 files changed, 125 insertions, 2 deletions
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 6e4379c67cbc..592dda3f21ff 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c | |||
@@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task) | |||
28 | { | 28 | { |
29 | return PERF_SAMPLE_REGS_ABI_32; | 29 | return PERF_SAMPLE_REGS_ABI_32; |
30 | } | 30 | } |
31 | |||
32 | void perf_get_regs_user(struct perf_regs *regs_user, | ||
33 | struct pt_regs *regs, | ||
34 | struct pt_regs *regs_user_copy) | ||
35 | { | ||
36 | regs_user->regs = task_pt_regs(current); | ||
37 | regs_user->abi = perf_reg_abi(current); | ||
38 | } | ||
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 6762ad705587..3f62b35fb6f1 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c | |||
@@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task) | |||
50 | else | 50 | else |
51 | return PERF_SAMPLE_REGS_ABI_64; | 51 | return PERF_SAMPLE_REGS_ABI_64; |
52 | } | 52 | } |
53 | |||
54 | void perf_get_regs_user(struct perf_regs *regs_user, | ||
55 | struct pt_regs *regs, | ||
56 | struct pt_regs *regs_user_copy) | ||
57 | { | ||
58 | regs_user->regs = task_pt_regs(current); | ||
59 | regs_user->abi = perf_reg_abi(current); | ||
60 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 18eb78bbdd10..863d9b02563e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -17,7 +17,7 @@ | |||
17 | #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) | 17 | #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) |
18 | #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) | 18 | #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) |
19 | #define UNCORE_EXTRA_PCI_DEV 0xff | 19 | #define UNCORE_EXTRA_PCI_DEV 0xff |
20 | #define UNCORE_EXTRA_PCI_DEV_MAX 2 | 20 | #define UNCORE_EXTRA_PCI_DEV_MAX 3 |
21 | 21 | ||
22 | /* support up to 8 sockets */ | 22 | /* support up to 8 sockets */ |
23 | #define UNCORE_SOCKET_MAX 8 | 23 | #define UNCORE_SOCKET_MAX 8 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 745b158e9a65..21af6149edf2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | |||
@@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void) | |||
891 | enum { | 891 | enum { |
892 | SNBEP_PCI_QPI_PORT0_FILTER, | 892 | SNBEP_PCI_QPI_PORT0_FILTER, |
893 | SNBEP_PCI_QPI_PORT1_FILTER, | 893 | SNBEP_PCI_QPI_PORT1_FILTER, |
894 | HSWEP_PCI_PCU_3, | ||
894 | }; | 895 | }; |
895 | 896 | ||
896 | static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) | 897 | static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) |
@@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void) | |||
2026 | { | 2027 | { |
2027 | if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) | 2028 | if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) |
2028 | hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; | 2029 | hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; |
2030 | |||
2031 | /* Detect 6-8 core systems with only two SBOXes */ | ||
2032 | if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { | ||
2033 | u32 capid4; | ||
2034 | |||
2035 | pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], | ||
2036 | 0x94, &capid4); | ||
2037 | if (((capid4 >> 6) & 0x3) == 0) | ||
2038 | hswep_uncore_sbox.num_boxes = 2; | ||
2039 | } | ||
2040 | |||
2029 | uncore_msr_uncores = hswep_msr_uncores; | 2041 | uncore_msr_uncores = hswep_msr_uncores; |
2030 | } | 2042 | } |
2031 | 2043 | ||
@@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { | |||
2287 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | 2299 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, |
2288 | SNBEP_PCI_QPI_PORT1_FILTER), | 2300 | SNBEP_PCI_QPI_PORT1_FILTER), |
2289 | }, | 2301 | }, |
2302 | { /* PCU.3 (for Capability registers) */ | ||
2303 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), | ||
2304 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | ||
2305 | HSWEP_PCI_PCU_3), | ||
2306 | }, | ||
2290 | { /* end: all zeroes */ } | 2307 | { /* end: all zeroes */ } |
2291 | }; | 2308 | }; |
2292 | 2309 | ||
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index e309cc5c276e..781861cc5ee8 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c | |||
@@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task) | |||
78 | { | 78 | { |
79 | return PERF_SAMPLE_REGS_ABI_32; | 79 | return PERF_SAMPLE_REGS_ABI_32; |
80 | } | 80 | } |
81 | |||
82 | void perf_get_regs_user(struct perf_regs *regs_user, | ||
83 | struct pt_regs *regs, | ||
84 | struct pt_regs *regs_user_copy) | ||
85 | { | ||
86 | regs_user->regs = task_pt_regs(current); | ||
87 | regs_user->abi = perf_reg_abi(current); | ||
88 | } | ||
81 | #else /* CONFIG_X86_64 */ | 89 | #else /* CONFIG_X86_64 */ |
82 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ | 90 | #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ |
83 | (1ULL << PERF_REG_X86_ES) | \ | 91 | (1ULL << PERF_REG_X86_ES) | \ |
@@ -102,4 +110,86 @@ u64 perf_reg_abi(struct task_struct *task) | |||
102 | else | 110 | else |
103 | return PERF_SAMPLE_REGS_ABI_64; | 111 | return PERF_SAMPLE_REGS_ABI_64; |
104 | } | 112 | } |
113 | |||
114 | void perf_get_regs_user(struct perf_regs *regs_user, | ||
115 | struct pt_regs *regs, | ||
116 | struct pt_regs *regs_user_copy) | ||
117 | { | ||
118 | struct pt_regs *user_regs = task_pt_regs(current); | ||
119 | |||
120 | /* | ||
121 | * If we're in an NMI that interrupted task_pt_regs setup, then | ||
122 | * we can't sample user regs at all. This check isn't really | ||
123 | * sufficient, though, as we could be in an NMI inside an interrupt | ||
124 | * that happened during task_pt_regs setup. | ||
125 | */ | ||
126 | if (regs->sp > (unsigned long)&user_regs->r11 && | ||
127 | regs->sp <= (unsigned long)(user_regs + 1)) { | ||
128 | regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; | ||
129 | regs_user->regs = NULL; | ||
130 | return; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * RIP, flags, and the argument registers are usually saved. | ||
135 | * orig_ax is probably okay, too. | ||
136 | */ | ||
137 | regs_user_copy->ip = user_regs->ip; | ||
138 | regs_user_copy->cx = user_regs->cx; | ||
139 | regs_user_copy->dx = user_regs->dx; | ||
140 | regs_user_copy->si = user_regs->si; | ||
141 | regs_user_copy->di = user_regs->di; | ||
142 | regs_user_copy->r8 = user_regs->r8; | ||
143 | regs_user_copy->r9 = user_regs->r9; | ||
144 | regs_user_copy->r10 = user_regs->r10; | ||
145 | regs_user_copy->r11 = user_regs->r11; | ||
146 | regs_user_copy->orig_ax = user_regs->orig_ax; | ||
147 | regs_user_copy->flags = user_regs->flags; | ||
148 | |||
149 | /* | ||
150 | * Don't even try to report the "rest" regs. | ||
151 | */ | ||
152 | regs_user_copy->bx = -1; | ||
153 | regs_user_copy->bp = -1; | ||
154 | regs_user_copy->r12 = -1; | ||
155 | regs_user_copy->r13 = -1; | ||
156 | regs_user_copy->r14 = -1; | ||
157 | regs_user_copy->r15 = -1; | ||
158 | |||
159 | /* | ||
160 | * For this to be at all useful, we need a reasonable guess for | ||
161 | * sp and the ABI. Be careful: we're in NMI context, and we're | ||
162 | * considering current to be the current task, so we should | ||
163 | * be careful not to look at any other percpu variables that might | ||
164 | * change during context switches. | ||
165 | */ | ||
166 | if (IS_ENABLED(CONFIG_IA32_EMULATION) && | ||
167 | task_thread_info(current)->status & TS_COMPAT) { | ||
168 | /* Easy case: we're in a compat syscall. */ | ||
169 | regs_user->abi = PERF_SAMPLE_REGS_ABI_32; | ||
170 | regs_user_copy->sp = user_regs->sp; | ||
171 | regs_user_copy->cs = user_regs->cs; | ||
172 | regs_user_copy->ss = user_regs->ss; | ||
173 | } else if (user_regs->orig_ax != -1) { | ||
174 | /* | ||
175 | * We're probably in a 64-bit syscall. | ||
176 | * Warning: this code is severely racy. At least it's better | ||
177 | * than just blindly copying user_regs. | ||
178 | */ | ||
179 | regs_user->abi = PERF_SAMPLE_REGS_ABI_64; | ||
180 | regs_user_copy->sp = this_cpu_read(old_rsp); | ||
181 | regs_user_copy->cs = __USER_CS; | ||
182 | regs_user_copy->ss = __USER_DS; | ||
183 | regs_user_copy->cx = -1; /* usually contains garbage */ | ||
184 | } else { | ||
185 | /* We're probably in an interrupt or exception. */ | ||
186 | regs_user->abi = user_64bit_mode(user_regs) ? | ||
187 | PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; | ||
188 | regs_user_copy->sp = user_regs->sp; | ||
189 | regs_user_copy->cs = user_regs->cs; | ||
190 | regs_user_copy->ss = user_regs->ss; | ||
191 | } | ||
192 | |||
193 | regs_user->regs = regs_user_copy; | ||
194 | } | ||
105 | #endif /* CONFIG_X86_32 */ | 195 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 2480978b31cc..1313ae6b478b 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | /* Verify next sizeof(t) bytes can be on the same instruction */ | 29 | /* Verify next sizeof(t) bytes can be on the same instruction */ |
30 | #define validate_next(t, insn, n) \ | 30 | #define validate_next(t, insn, n) \ |
31 | ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) | 31 | ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) |
32 | 32 | ||
33 | #define __get_next(t, insn) \ | 33 | #define __get_next(t, insn) \ |
34 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | 34 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) |