diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
commit | 12e24f34cb0d55efd08c18b2112507d4bf498008 (patch) | |
tree | 83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86/kernel | |
parent | 1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff) | |
parent | eadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff) |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
perfcounter: Handle some IO return values
perf_counter: Push perf_sample_data through the swcounter code
perf_counter tools: Define and use our own u64, s64 etc. definitions
perf_counter: Close race in perf_lock_task_context()
perf_counter, x86: Improve interactions with fast-gup
perf_counter: Simplify and fix task migration counting
perf_counter tools: Add a data file header
perf_counter: Update userspace callchain sampling uses
perf_counter: Make callchain samples extensible
perf report: Filter to parent set by default
perf_counter tools: Handle lost events
perf_counter: Add event overlow handling
fs: Provide empty .set_page_dirty() aop for anon inodes
perf_counter: tools: Makefile tweaks for 64-bit powerpc
perf_counter: powerpc: Add processor back-end for MPC7450 family
perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
perf_counter: powerpc: Change how processor-specific back-ends get selected
perf_counter: powerpc: Use unsigned long for register and constraint values
perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
perf_counter tools: Add and use isprint()
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 138 |
1 files changed, 74 insertions, 64 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
24 | #include <asm/stacktrace.h> | 25 | #include <asm/stacktrace.h> |
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) | |||
389 | return event & CORE_EVNTSEL_MASK; | 390 | return event & CORE_EVNTSEL_MASK; |
390 | } | 391 | } |
391 | 392 | ||
392 | static const u64 amd_0f_hw_cache_event_ids | 393 | static const u64 amd_hw_cache_event_ids |
393 | [PERF_COUNT_HW_CACHE_MAX] | 394 | [PERF_COUNT_HW_CACHE_MAX] |
394 | [PERF_COUNT_HW_CACHE_OP_MAX] | 395 | [PERF_COUNT_HW_CACHE_OP_MAX] |
395 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 396 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
396 | { | 397 | { |
397 | [ C(L1D) ] = { | 398 | [ C(L1D) ] = { |
398 | [ C(OP_READ) ] = { | 399 | [ C(OP_READ) ] = { |
399 | [ C(RESULT_ACCESS) ] = 0, | 400 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
400 | [ C(RESULT_MISS) ] = 0, | 401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
401 | }, | 402 | }, |
402 | [ C(OP_WRITE) ] = { | 403 | [ C(OP_WRITE) ] = { |
403 | [ C(RESULT_ACCESS) ] = 0, | 404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ |
404 | [ C(RESULT_MISS) ] = 0, | 405 | [ C(RESULT_MISS) ] = 0, |
405 | }, | 406 | }, |
406 | [ C(OP_PREFETCH) ] = { | 407 | [ C(OP_PREFETCH) ] = { |
407 | [ C(RESULT_ACCESS) ] = 0, | 408 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
408 | [ C(RESULT_MISS) ] = 0, | 409 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
409 | }, | 410 | }, |
410 | }, | 411 | }, |
411 | [ C(L1I ) ] = { | 412 | [ C(L1I ) ] = { |
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids | |||
418 | [ C(RESULT_MISS) ] = -1, | 419 | [ C(RESULT_MISS) ] = -1, |
419 | }, | 420 | }, |
420 | [ C(OP_PREFETCH) ] = { | 421 | [ C(OP_PREFETCH) ] = { |
421 | [ C(RESULT_ACCESS) ] = 0, | 422 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
422 | [ C(RESULT_MISS) ] = 0, | 423 | [ C(RESULT_MISS) ] = 0, |
423 | }, | 424 | }, |
424 | }, | 425 | }, |
425 | [ C(LL ) ] = { | 426 | [ C(LL ) ] = { |
426 | [ C(OP_READ) ] = { | 427 | [ C(OP_READ) ] = { |
427 | [ C(RESULT_ACCESS) ] = 0, | 428 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
428 | [ C(RESULT_MISS) ] = 0, | 429 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
429 | }, | 430 | }, |
430 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
431 | [ C(RESULT_ACCESS) ] = 0, | 432 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
432 | [ C(RESULT_MISS) ] = 0, | 433 | [ C(RESULT_MISS) ] = 0, |
433 | }, | 434 | }, |
434 | [ C(OP_PREFETCH) ] = { | 435 | [ C(OP_PREFETCH) ] = { |
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids | |||
438 | }, | 439 | }, |
439 | [ C(DTLB) ] = { | 440 | [ C(DTLB) ] = { |
440 | [ C(OP_READ) ] = { | 441 | [ C(OP_READ) ] = { |
441 | [ C(RESULT_ACCESS) ] = 0, | 442 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
442 | [ C(RESULT_MISS) ] = 0, | 443 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ |
443 | }, | 444 | }, |
444 | [ C(OP_WRITE) ] = { | 445 | [ C(OP_WRITE) ] = { |
445 | [ C(RESULT_ACCESS) ] = 0, | 446 | [ C(RESULT_ACCESS) ] = 0, |
@@ -1223,6 +1224,8 @@ again: | |||
1223 | if (!intel_pmu_save_and_restart(counter)) | 1224 | if (!intel_pmu_save_and_restart(counter)) |
1224 | continue; | 1225 | continue; |
1225 | 1226 | ||
1227 | data.period = counter->hw.last_period; | ||
1228 | |||
1226 | if (perf_counter_overflow(counter, 1, &data)) | 1229 | if (perf_counter_overflow(counter, 1, &data)) |
1227 | intel_pmu_disable_counter(&counter->hw, bit); | 1230 | intel_pmu_disable_counter(&counter->hw, bit); |
1228 | } | 1231 | } |
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void) | |||
1459 | 1462 | ||
1460 | static int amd_pmu_init(void) | 1463 | static int amd_pmu_init(void) |
1461 | { | 1464 | { |
1465 | /* Performance-monitoring supported from K7 and later: */ | ||
1466 | if (boot_cpu_data.x86 < 6) | ||
1467 | return -ENODEV; | ||
1468 | |||
1462 | x86_pmu = amd_pmu; | 1469 | x86_pmu = amd_pmu; |
1463 | 1470 | ||
1464 | switch (boot_cpu_data.x86) { | 1471 | /* Events are common for all AMDs */ |
1465 | case 0x0f: | 1472 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
1466 | case 0x10: | 1473 | sizeof(hw_cache_event_ids)); |
1467 | case 0x11: | ||
1468 | memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, | ||
1469 | sizeof(hw_cache_event_ids)); | ||
1470 | 1474 | ||
1471 | pr_cont("AMD Family 0f/10/11 events, "); | ||
1472 | break; | ||
1473 | } | ||
1474 | return 0; | 1475 | return 0; |
1475 | } | 1476 | } |
1476 | 1477 | ||
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1554 | */ | 1555 | */ |
1555 | 1556 | ||
1556 | static inline | 1557 | static inline |
1557 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | 1558 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) |
1558 | { | 1559 | { |
1559 | if (entry->nr < MAX_STACK_DEPTH) | 1560 | if (entry->nr < PERF_MAX_STACK_DEPTH) |
1560 | entry->ip[entry->nr++] = ip; | 1561 | entry->ip[entry->nr++] = ip; |
1561 | } | 1562 | } |
1562 | 1563 | ||
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg) | |||
1577 | 1578 | ||
1578 | static int backtrace_stack(void *data, char *name) | 1579 | static int backtrace_stack(void *data, char *name) |
1579 | { | 1580 | { |
1580 | /* Don't bother with IRQ stacks for now */ | 1581 | /* Process all stacks: */ |
1581 | return -1; | 1582 | return 0; |
1582 | } | 1583 | } |
1583 | 1584 | ||
1584 | static void backtrace_address(void *data, unsigned long addr, int reliable) | 1585 | static void backtrace_address(void *data, unsigned long addr, int reliable) |
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1596 | .address = backtrace_address, | 1597 | .address = backtrace_address, |
1597 | }; | 1598 | }; |
1598 | 1599 | ||
1600 | #include "../dumpstack.h" | ||
1601 | |||
1599 | static void | 1602 | static void |
1600 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1603 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1601 | { | 1604 | { |
1602 | unsigned long bp; | 1605 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
1603 | char *stack; | 1606 | callchain_store(entry, regs->ip); |
1604 | int nr = entry->nr; | ||
1605 | 1607 | ||
1606 | callchain_store(entry, instruction_pointer(regs)); | 1608 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1609 | } | ||
1607 | 1610 | ||
1608 | stack = ((char *)regs + sizeof(struct pt_regs)); | 1611 | /* |
1609 | #ifdef CONFIG_FRAME_POINTER | 1612 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
1610 | bp = frame_pointer(regs); | 1613 | */ |
1611 | #else | 1614 | static unsigned long |
1612 | bp = 0; | 1615 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
1613 | #endif | 1616 | { |
1617 | unsigned long offset, addr = (unsigned long)from; | ||
1618 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1619 | unsigned long size, len = 0; | ||
1620 | struct page *page; | ||
1621 | void *map; | ||
1622 | int ret; | ||
1614 | 1623 | ||
1615 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | 1624 | do { |
1625 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1626 | if (!ret) | ||
1627 | break; | ||
1616 | 1628 | ||
1617 | entry->kernel = entry->nr - nr; | 1629 | offset = addr & (PAGE_SIZE - 1); |
1618 | } | 1630 | size = min(PAGE_SIZE - offset, n - len); |
1619 | 1631 | ||
1632 | map = kmap_atomic(page, type); | ||
1633 | memcpy(to, map+offset, size); | ||
1634 | kunmap_atomic(map, type); | ||
1635 | put_page(page); | ||
1620 | 1636 | ||
1621 | struct stack_frame { | 1637 | len += size; |
1622 | const void __user *next_fp; | 1638 | to += size; |
1623 | unsigned long return_address; | 1639 | addr += size; |
1624 | }; | 1640 | |
1641 | } while (len < n); | ||
1642 | |||
1643 | return len; | ||
1644 | } | ||
1625 | 1645 | ||
1626 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1646 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1627 | { | 1647 | { |
1628 | int ret; | 1648 | unsigned long bytes; |
1629 | 1649 | ||
1630 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | 1650 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); |
1631 | return 0; | ||
1632 | 1651 | ||
1633 | ret = 1; | 1652 | return bytes == sizeof(*frame); |
1634 | pagefault_disable(); | ||
1635 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1636 | ret = 0; | ||
1637 | pagefault_enable(); | ||
1638 | |||
1639 | return ret; | ||
1640 | } | 1653 | } |
1641 | 1654 | ||
1642 | static void | 1655 | static void |
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1644 | { | 1657 | { |
1645 | struct stack_frame frame; | 1658 | struct stack_frame frame; |
1646 | const void __user *fp; | 1659 | const void __user *fp; |
1647 | int nr = entry->nr; | ||
1648 | 1660 | ||
1649 | regs = (struct pt_regs *)current->thread.sp0 - 1; | 1661 | if (!user_mode(regs)) |
1650 | fp = (void __user *)regs->bp; | 1662 | regs = task_pt_regs(current); |
1651 | 1663 | ||
1664 | fp = (void __user *)regs->bp; | ||
1665 | |||
1666 | callchain_store(entry, PERF_CONTEXT_USER); | ||
1652 | callchain_store(entry, regs->ip); | 1667 | callchain_store(entry, regs->ip); |
1653 | 1668 | ||
1654 | while (entry->nr < MAX_STACK_DEPTH) { | 1669 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1655 | frame.next_fp = NULL; | 1670 | frame.next_frame = NULL; |
1656 | frame.return_address = 0; | 1671 | frame.return_address = 0; |
1657 | 1672 | ||
1658 | if (!copy_stack_frame(fp, &frame)) | 1673 | if (!copy_stack_frame(fp, &frame)) |
1659 | break; | 1674 | break; |
1660 | 1675 | ||
1661 | if ((unsigned long)fp < user_stack_pointer(regs)) | 1676 | if ((unsigned long)fp < regs->sp) |
1662 | break; | 1677 | break; |
1663 | 1678 | ||
1664 | callchain_store(entry, frame.return_address); | 1679 | callchain_store(entry, frame.return_address); |
1665 | fp = frame.next_fp; | 1680 | fp = frame.next_frame; |
1666 | } | 1681 | } |
1667 | |||
1668 | entry->user = entry->nr - nr; | ||
1669 | } | 1682 | } |
1670 | 1683 | ||
1671 | static void | 1684 | static void |
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1701 | entry = &__get_cpu_var(irq_entry); | 1714 | entry = &__get_cpu_var(irq_entry); |
1702 | 1715 | ||
1703 | entry->nr = 0; | 1716 | entry->nr = 0; |
1704 | entry->hv = 0; | ||
1705 | entry->kernel = 0; | ||
1706 | entry->user = 0; | ||
1707 | 1717 | ||
1708 | perf_do_callchain(regs, entry); | 1718 | perf_do_callchain(regs, entry); |
1709 | 1719 | ||