diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-04 15:13:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-04 15:13:10 -0400 |
commit | 8ce42c8b7fdf4fc008a6fc7349beb8f4dd5cb774 (patch) | |
tree | bc05326ed8ade9137e3ce5fb5b1d439dcdce266f /arch | |
parent | 0121b0c771f929bb5298554b70843ab46280c298 (diff) | |
parent | 6e03bb5ad363fdbe4e1e227cfb78f7978c662e18 (diff) |
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf: Always build the powerpc perf_arch_fetch_caller_regs version
perf: Always build the stub perf_arch_fetch_caller_regs version
perf, probe-finder: Build fix on Debian
perf/scripts: Tuple was set from long in both branches in python_process_event()
perf: Fix 'perf sched record' deadlock
perf, x86: Fix callgraphs of 32-bit processes on 64-bit kernels
perf, x86: Fix AMD hotplug & constraint initialization
x86: Move notify_cpu_starting() callback to a later stage
x86,kgdb: Always initialize the hw breakpoint attribute
perf: Use hot regs with software sched switch/migrate events
perf: Correctly align perf event tracing buffer
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/kernel/misc.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 54 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 80 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 4 |
6 files changed, 99 insertions, 48 deletions
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index b485a87c94e1..22e507c8a556 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S | |||
@@ -128,7 +128,6 @@ _GLOBAL(__restore_cpu_power7) | |||
128 | /* place holder */ | 128 | /* place holder */ |
129 | blr | 129 | blr |
130 | 130 | ||
131 | #ifdef CONFIG_EVENT_TRACING | ||
132 | /* | 131 | /* |
133 | * Get a minimal set of registers for our caller's nth caller. | 132 | * Get a minimal set of registers for our caller's nth caller. |
134 | * r3 = regs pointer, r5 = n. | 133 | * r3 = regs pointer, r5 = n. |
@@ -154,4 +153,3 @@ _GLOBAL(perf_arch_fetch_caller_regs) | |||
154 | PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) | 153 | PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) |
155 | PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) | 154 | PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) |
156 | blr | 155 | blr |
157 | #endif /* CONFIG_EVENT_TRACING */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 60398a0d947c..53ea4cf1a878 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 29 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | #include <asm/compat.h> | ||
31 | 32 | ||
32 | static u64 perf_event_mask __read_mostly; | 33 | static u64 perf_event_mask __read_mostly; |
33 | 34 | ||
@@ -158,7 +159,7 @@ struct x86_pmu { | |||
158 | struct perf_event *event); | 159 | struct perf_event *event); |
159 | struct event_constraint *event_constraints; | 160 | struct event_constraint *event_constraints; |
160 | 161 | ||
161 | void (*cpu_prepare)(int cpu); | 162 | int (*cpu_prepare)(int cpu); |
162 | void (*cpu_starting)(int cpu); | 163 | void (*cpu_starting)(int cpu); |
163 | void (*cpu_dying)(int cpu); | 164 | void (*cpu_dying)(int cpu); |
164 | void (*cpu_dead)(int cpu); | 165 | void (*cpu_dead)(int cpu); |
@@ -1333,11 +1334,12 @@ static int __cpuinit | |||
1333 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | 1334 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
1334 | { | 1335 | { |
1335 | unsigned int cpu = (long)hcpu; | 1336 | unsigned int cpu = (long)hcpu; |
1337 | int ret = NOTIFY_OK; | ||
1336 | 1338 | ||
1337 | switch (action & ~CPU_TASKS_FROZEN) { | 1339 | switch (action & ~CPU_TASKS_FROZEN) { |
1338 | case CPU_UP_PREPARE: | 1340 | case CPU_UP_PREPARE: |
1339 | if (x86_pmu.cpu_prepare) | 1341 | if (x86_pmu.cpu_prepare) |
1340 | x86_pmu.cpu_prepare(cpu); | 1342 | ret = x86_pmu.cpu_prepare(cpu); |
1341 | break; | 1343 | break; |
1342 | 1344 | ||
1343 | case CPU_STARTING: | 1345 | case CPU_STARTING: |
@@ -1350,6 +1352,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1350 | x86_pmu.cpu_dying(cpu); | 1352 | x86_pmu.cpu_dying(cpu); |
1351 | break; | 1353 | break; |
1352 | 1354 | ||
1355 | case CPU_UP_CANCELED: | ||
1353 | case CPU_DEAD: | 1356 | case CPU_DEAD: |
1354 | if (x86_pmu.cpu_dead) | 1357 | if (x86_pmu.cpu_dead) |
1355 | x86_pmu.cpu_dead(cpu); | 1358 | x86_pmu.cpu_dead(cpu); |
@@ -1359,7 +1362,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1359 | break; | 1362 | break; |
1360 | } | 1363 | } |
1361 | 1364 | ||
1362 | return NOTIFY_OK; | 1365 | return ret; |
1363 | } | 1366 | } |
1364 | 1367 | ||
1365 | static void __init pmu_check_apic(void) | 1368 | static void __init pmu_check_apic(void) |
@@ -1628,14 +1631,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
1628 | return len; | 1631 | return len; |
1629 | } | 1632 | } |
1630 | 1633 | ||
1631 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1634 | #ifdef CONFIG_COMPAT |
1635 | static inline int | ||
1636 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1632 | { | 1637 | { |
1633 | unsigned long bytes; | 1638 | /* 32-bit process in 64-bit kernel. */ |
1639 | struct stack_frame_ia32 frame; | ||
1640 | const void __user *fp; | ||
1634 | 1641 | ||
1635 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | 1642 | if (!test_thread_flag(TIF_IA32)) |
1643 | return 0; | ||
1644 | |||
1645 | fp = compat_ptr(regs->bp); | ||
1646 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1647 | unsigned long bytes; | ||
1648 | frame.next_frame = 0; | ||
1649 | frame.return_address = 0; | ||
1650 | |||
1651 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); | ||
1652 | if (bytes != sizeof(frame)) | ||
1653 | break; | ||
1654 | |||
1655 | if (fp < compat_ptr(regs->sp)) | ||
1656 | break; | ||
1636 | 1657 | ||
1637 | return bytes == sizeof(*frame); | 1658 | callchain_store(entry, frame.return_address); |
1659 | fp = compat_ptr(frame.next_frame); | ||
1660 | } | ||
1661 | return 1; | ||
1662 | } | ||
1663 | #else | ||
1664 | static inline int | ||
1665 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1666 | { | ||
1667 | return 0; | ||
1638 | } | 1668 | } |
1669 | #endif | ||
1639 | 1670 | ||
1640 | static void | 1671 | static void |
1641 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1672 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1651,11 +1682,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1651 | callchain_store(entry, PERF_CONTEXT_USER); | 1682 | callchain_store(entry, PERF_CONTEXT_USER); |
1652 | callchain_store(entry, regs->ip); | 1683 | callchain_store(entry, regs->ip); |
1653 | 1684 | ||
1685 | if (perf_callchain_user32(regs, entry)) | ||
1686 | return; | ||
1687 | |||
1654 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1688 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1689 | unsigned long bytes; | ||
1655 | frame.next_frame = NULL; | 1690 | frame.next_frame = NULL; |
1656 | frame.return_address = 0; | 1691 | frame.return_address = 0; |
1657 | 1692 | ||
1658 | if (!copy_stack_frame(fp, &frame)) | 1693 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1694 | if (bytes != sizeof(frame)) | ||
1659 | break; | 1695 | break; |
1660 | 1696 | ||
1661 | if ((unsigned long)fp < regs->sp) | 1697 | if ((unsigned long)fp < regs->sp) |
@@ -1702,7 +1738,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1702 | return entry; | 1738 | return entry; |
1703 | } | 1739 | } |
1704 | 1740 | ||
1705 | #ifdef CONFIG_EVENT_TRACING | ||
1706 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | 1741 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) |
1707 | { | 1742 | { |
1708 | regs->ip = ip; | 1743 | regs->ip = ip; |
@@ -1714,4 +1749,3 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
1714 | regs->cs = __KERNEL_CS; | 1749 | regs->cs = __KERNEL_CS; |
1715 | local_save_flags(regs->flags); | 1750 | local_save_flags(regs->flags); |
1716 | } | 1751 | } |
1717 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index b87e0b6970cb..db6f7d4056e1 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) | |||
137 | return (hwc->config & 0xe0) == 0xe0; | 137 | return (hwc->config & 0xe0) == 0xe0; |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) | ||
141 | { | ||
142 | struct amd_nb *nb = cpuc->amd_nb; | ||
143 | |||
144 | return nb && nb->nb_id != -1; | ||
145 | } | ||
146 | |||
140 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | 147 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, |
141 | struct perf_event *event) | 148 | struct perf_event *event) |
142 | { | 149 | { |
@@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
147 | /* | 154 | /* |
148 | * only care about NB events | 155 | * only care about NB events |
149 | */ | 156 | */ |
150 | if (!(nb && amd_is_nb_event(hwc))) | 157 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) |
151 | return; | 158 | return; |
152 | 159 | ||
153 | /* | 160 | /* |
@@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
214 | /* | 221 | /* |
215 | * if not NB event or no NB, then no constraints | 222 | * if not NB event or no NB, then no constraints |
216 | */ | 223 | */ |
217 | if (!(nb && amd_is_nb_event(hwc))) | 224 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) |
218 | return &unconstrained; | 225 | return &unconstrained; |
219 | 226 | ||
220 | /* | 227 | /* |
@@ -293,51 +300,55 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
293 | return nb; | 300 | return nb; |
294 | } | 301 | } |
295 | 302 | ||
296 | static void amd_pmu_cpu_online(int cpu) | 303 | static int amd_pmu_cpu_prepare(int cpu) |
304 | { | ||
305 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
306 | |||
307 | WARN_ON_ONCE(cpuc->amd_nb); | ||
308 | |||
309 | if (boot_cpu_data.x86_max_cores < 2) | ||
310 | return NOTIFY_OK; | ||
311 | |||
312 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | ||
313 | if (!cpuc->amd_nb) | ||
314 | return NOTIFY_BAD; | ||
315 | |||
316 | return NOTIFY_OK; | ||
317 | } | ||
318 | |||
319 | static void amd_pmu_cpu_starting(int cpu) | ||
297 | { | 320 | { |
298 | struct cpu_hw_events *cpu1, *cpu2; | 321 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
299 | struct amd_nb *nb = NULL; | 322 | struct amd_nb *nb; |
300 | int i, nb_id; | 323 | int i, nb_id; |
301 | 324 | ||
302 | if (boot_cpu_data.x86_max_cores < 2) | 325 | if (boot_cpu_data.x86_max_cores < 2) |
303 | return; | 326 | return; |
304 | 327 | ||
305 | /* | ||
306 | * function may be called too early in the | ||
307 | * boot process, in which case nb_id is bogus | ||
308 | */ | ||
309 | nb_id = amd_get_nb_id(cpu); | 328 | nb_id = amd_get_nb_id(cpu); |
310 | if (nb_id == BAD_APICID) | 329 | WARN_ON_ONCE(nb_id == BAD_APICID); |
311 | return; | ||
312 | |||
313 | cpu1 = &per_cpu(cpu_hw_events, cpu); | ||
314 | cpu1->amd_nb = NULL; | ||
315 | 330 | ||
316 | raw_spin_lock(&amd_nb_lock); | 331 | raw_spin_lock(&amd_nb_lock); |
317 | 332 | ||
318 | for_each_online_cpu(i) { | 333 | for_each_online_cpu(i) { |
319 | cpu2 = &per_cpu(cpu_hw_events, i); | 334 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
320 | nb = cpu2->amd_nb; | 335 | if (WARN_ON_ONCE(!nb)) |
321 | if (!nb) | ||
322 | continue; | 336 | continue; |
323 | if (nb->nb_id == nb_id) | ||
324 | goto found; | ||
325 | } | ||
326 | 337 | ||
327 | nb = amd_alloc_nb(cpu, nb_id); | 338 | if (nb->nb_id == nb_id) { |
328 | if (!nb) { | 339 | kfree(cpuc->amd_nb); |
329 | pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); | 340 | cpuc->amd_nb = nb; |
330 | raw_spin_unlock(&amd_nb_lock); | 341 | break; |
331 | return; | 342 | } |
332 | } | 343 | } |
333 | found: | 344 | |
334 | nb->refcnt++; | 345 | cpuc->amd_nb->nb_id = nb_id; |
335 | cpu1->amd_nb = nb; | 346 | cpuc->amd_nb->refcnt++; |
336 | 347 | ||
337 | raw_spin_unlock(&amd_nb_lock); | 348 | raw_spin_unlock(&amd_nb_lock); |
338 | } | 349 | } |
339 | 350 | ||
340 | static void amd_pmu_cpu_offline(int cpu) | 351 | static void amd_pmu_cpu_dead(int cpu) |
341 | { | 352 | { |
342 | struct cpu_hw_events *cpuhw; | 353 | struct cpu_hw_events *cpuhw; |
343 | 354 | ||
@@ -349,8 +360,10 @@ static void amd_pmu_cpu_offline(int cpu) | |||
349 | raw_spin_lock(&amd_nb_lock); | 360 | raw_spin_lock(&amd_nb_lock); |
350 | 361 | ||
351 | if (cpuhw->amd_nb) { | 362 | if (cpuhw->amd_nb) { |
352 | if (--cpuhw->amd_nb->refcnt == 0) | 363 | struct amd_nb *nb = cpuhw->amd_nb; |
353 | kfree(cpuhw->amd_nb); | 364 | |
365 | if (nb->nb_id == -1 || --nb->refcnt == 0) | ||
366 | kfree(nb); | ||
354 | 367 | ||
355 | cpuhw->amd_nb = NULL; | 368 | cpuhw->amd_nb = NULL; |
356 | } | 369 | } |
@@ -379,8 +392,9 @@ static __initconst struct x86_pmu amd_pmu = { | |||
379 | .get_event_constraints = amd_get_event_constraints, | 392 | .get_event_constraints = amd_get_event_constraints, |
380 | .put_event_constraints = amd_put_event_constraints, | 393 | .put_event_constraints = amd_put_event_constraints, |
381 | 394 | ||
382 | .cpu_prepare = amd_pmu_cpu_online, | 395 | .cpu_prepare = amd_pmu_cpu_prepare, |
383 | .cpu_dead = amd_pmu_cpu_offline, | 396 | .cpu_starting = amd_pmu_cpu_starting, |
397 | .cpu_dead = amd_pmu_cpu_dead, | ||
384 | }; | 398 | }; |
385 | 399 | ||
386 | static __init int amd_pmu_init(void) | 400 | static __init int amd_pmu_init(void) |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 29e5f7c845b2..e39e77168a37 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -30,6 +30,11 @@ struct stack_frame { | |||
30 | unsigned long return_address; | 30 | unsigned long return_address; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | struct stack_frame_ia32 { | ||
34 | u32 next_frame; | ||
35 | u32 return_address; | ||
36 | }; | ||
37 | |||
33 | static inline unsigned long rewind_frame_pointer(int n) | 38 | static inline unsigned long rewind_frame_pointer(int n) |
34 | { | 39 | { |
35 | struct stack_frame *frame; | 40 | struct stack_frame *frame; |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index bfba6019d762..b2258ca91003 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -618,8 +618,8 @@ int kgdb_arch_init(void) | |||
618 | * portion of kgdb because this operation requires mutexs to | 618 | * portion of kgdb because this operation requires mutexs to |
619 | * complete. | 619 | * complete. |
620 | */ | 620 | */ |
621 | hw_breakpoint_init(&attr); | ||
621 | attr.bp_addr = (unsigned long)kgdb_arch_init; | 622 | attr.bp_addr = (unsigned long)kgdb_arch_init; |
622 | attr.type = PERF_TYPE_BREAKPOINT; | ||
623 | attr.bp_len = HW_BREAKPOINT_LEN_1; | 623 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
624 | attr.bp_type = HW_BREAKPOINT_W; | 624 | attr.bp_type = HW_BREAKPOINT_W; |
625 | attr.disabled = 1; | 625 | attr.disabled = 1; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 06d98ae5a802..6808b934d6c0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -242,8 +242,6 @@ static void __cpuinit smp_callin(void) | |||
242 | end_local_APIC_setup(); | 242 | end_local_APIC_setup(); |
243 | map_cpu_to_logical_apicid(); | 243 | map_cpu_to_logical_apicid(); |
244 | 244 | ||
245 | notify_cpu_starting(cpuid); | ||
246 | |||
247 | /* | 245 | /* |
248 | * Need to setup vector mappings before we enable interrupts. | 246 | * Need to setup vector mappings before we enable interrupts. |
249 | */ | 247 | */ |
@@ -264,6 +262,8 @@ static void __cpuinit smp_callin(void) | |||
264 | */ | 262 | */ |
265 | smp_store_cpu_info(cpuid); | 263 | smp_store_cpu_info(cpuid); |
266 | 264 | ||
265 | notify_cpu_starting(cpuid); | ||
266 | |||
267 | /* | 267 | /* |
268 | * Allow the master to continue. | 268 | * Allow the master to continue. |
269 | */ | 269 | */ |