aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
commit12e24f34cb0d55efd08c18b2112507d4bf498008 (patch)
tree83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86/kernel
parent1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff)
parenteadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff)
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits) perfcounter: Handle some IO return values perf_counter: Push perf_sample_data through the swcounter code perf_counter tools: Define and use our own u64, s64 etc. definitions perf_counter: Close race in perf_lock_task_context() perf_counter, x86: Improve interactions with fast-gup perf_counter: Simplify and fix task migration counting perf_counter tools: Add a data file header perf_counter: Update userspace callchain sampling uses perf_counter: Make callchain samples extensible perf report: Filter to parent set by default perf_counter tools: Handle lost events perf_counter: Add event overlow handling fs: Provide empty .set_page_dirty() aop for anon inodes perf_counter: tools: Makefile tweaks for 64-bit powerpc perf_counter: powerpc: Add processor back-end for MPC7450 family perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels perf_counter: powerpc: Change how processor-specific back-ends get selected perf_counter: powerpc: Use unsigned long for register and constraint values perf_counter: powerpc: Enable use of software counters on 32-bit powerpc perf_counter tools: Add and use isprint() ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c138
1 files changed, 74 insertions, 64 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..76dfef23f789 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19#include <linux/kdebug.h> 19#include <linux/kdebug.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/highmem.h>
22 23
23#include <asm/apic.h> 24#include <asm/apic.h>
24#include <asm/stacktrace.h> 25#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
389 return event & CORE_EVNTSEL_MASK; 390 return event & CORE_EVNTSEL_MASK;
390} 391}
391 392
392static const u64 amd_0f_hw_cache_event_ids 393static const u64 amd_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX] 394 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX] 395 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 396 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{ 397{
397 [ C(L1D) ] = { 398 [ C(L1D) ] = {
398 [ C(OP_READ) ] = { 399 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0, 400 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400 [ C(RESULT_MISS) ] = 0, 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401 }, 402 },
402 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0, 404 [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
404 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
405 }, 406 },
406 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0, 408 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408 [ C(RESULT_MISS) ] = 0, 409 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409 }, 410 },
410 }, 411 },
411 [ C(L1I ) ] = { 412 [ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
418 [ C(RESULT_MISS) ] = -1, 419 [ C(RESULT_MISS) ] = -1,
419 }, 420 },
420 [ C(OP_PREFETCH) ] = { 421 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0, 422 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422 [ C(RESULT_MISS) ] = 0, 423 [ C(RESULT_MISS) ] = 0,
423 }, 424 },
424 }, 425 },
425 [ C(LL ) ] = { 426 [ C(LL ) ] = {
426 [ C(OP_READ) ] = { 427 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0, 428 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428 [ C(RESULT_MISS) ] = 0, 429 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429 }, 430 },
430 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0, 432 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432 [ C(RESULT_MISS) ] = 0, 433 [ C(RESULT_MISS) ] = 0,
433 }, 434 },
434 [ C(OP_PREFETCH) ] = { 435 [ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
438 }, 439 },
439 [ C(DTLB) ] = { 440 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = { 441 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0, 442 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442 [ C(RESULT_MISS) ] = 0, 443 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443 }, 444 },
444 [ C(OP_WRITE) ] = { 445 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0, 446 [ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
1223 if (!intel_pmu_save_and_restart(counter)) 1224 if (!intel_pmu_save_and_restart(counter))
1224 continue; 1225 continue;
1225 1226
1227 data.period = counter->hw.last_period;
1228
1226 if (perf_counter_overflow(counter, 1, &data)) 1229 if (perf_counter_overflow(counter, 1, &data))
1227 intel_pmu_disable_counter(&counter->hw, bit); 1230 intel_pmu_disable_counter(&counter->hw, bit);
1228 } 1231 }
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
1459 1462
1460static int amd_pmu_init(void) 1463static int amd_pmu_init(void)
1461{ 1464{
1465 /* Performance-monitoring supported from K7 and later: */
1466 if (boot_cpu_data.x86 < 6)
1467 return -ENODEV;
1468
1462 x86_pmu = amd_pmu; 1469 x86_pmu = amd_pmu;
1463 1470
1464 switch (boot_cpu_data.x86) { 1471 /* Events are common for all AMDs */
1465 case 0x0f: 1472 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466 case 0x10: 1473 sizeof(hw_cache_event_ids));
1467 case 0x11:
1468 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469 sizeof(hw_cache_event_ids));
1470 1474
1471 pr_cont("AMD Family 0f/10/11 events, ");
1472 break;
1473 }
1474 return 0; 1475 return 0;
1475} 1476}
1476 1477
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1554 */ 1555 */
1555 1556
1556static inline 1557static inline
1557void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) 1558void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558{ 1559{
1559 if (entry->nr < MAX_STACK_DEPTH) 1560 if (entry->nr < PERF_MAX_STACK_DEPTH)
1560 entry->ip[entry->nr++] = ip; 1561 entry->ip[entry->nr++] = ip;
1561} 1562}
1562 1563
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
1577 1578
1578static int backtrace_stack(void *data, char *name) 1579static int backtrace_stack(void *data, char *name)
1579{ 1580{
1580 /* Don't bother with IRQ stacks for now */ 1581 /* Process all stacks: */
1581 return -1; 1582 return 0;
1582} 1583}
1583 1584
1584static void backtrace_address(void *data, unsigned long addr, int reliable) 1585static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596 .address = backtrace_address, 1597 .address = backtrace_address,
1597}; 1598};
1598 1599
1600#include "../dumpstack.h"
1601
1599static void 1602static void
1600perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1603perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1601{ 1604{
1602 unsigned long bp; 1605 callchain_store(entry, PERF_CONTEXT_KERNEL);
1603 char *stack; 1606 callchain_store(entry, regs->ip);
1604 int nr = entry->nr;
1605 1607
1606 callchain_store(entry, instruction_pointer(regs)); 1608 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1609}
1607 1610
1608 stack = ((char *)regs + sizeof(struct pt_regs)); 1611/*
1609#ifdef CONFIG_FRAME_POINTER 1612 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610 bp = frame_pointer(regs); 1613 */
1611#else 1614static unsigned long
1612 bp = 0; 1615copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1613#endif 1616{
1617 unsigned long offset, addr = (unsigned long)from;
1618 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1619 unsigned long size, len = 0;
1620 struct page *page;
1621 void *map;
1622 int ret;
1614 1623
1615 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); 1624 do {
1625 ret = __get_user_pages_fast(addr, 1, 0, &page);
1626 if (!ret)
1627 break;
1616 1628
1617 entry->kernel = entry->nr - nr; 1629 offset = addr & (PAGE_SIZE - 1);
1618} 1630 size = min(PAGE_SIZE - offset, n - len);
1619 1631
1632 map = kmap_atomic(page, type);
1633 memcpy(to, map+offset, size);
1634 kunmap_atomic(map, type);
1635 put_page(page);
1620 1636
1621struct stack_frame { 1637 len += size;
1622 const void __user *next_fp; 1638 to += size;
1623 unsigned long return_address; 1639 addr += size;
1624}; 1640
1641 } while (len < n);
1642
1643 return len;
1644}
1625 1645
1626static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1646static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1627{ 1647{
1628 int ret; 1648 unsigned long bytes;
1629 1649
1630 if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) 1650 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1631 return 0;
1632 1651
1633 ret = 1; 1652 return bytes == sizeof(*frame);
1634 pagefault_disable();
1635 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636 ret = 0;
1637 pagefault_enable();
1638
1639 return ret;
1640} 1653}
1641 1654
1642static void 1655static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1644{ 1657{
1645 struct stack_frame frame; 1658 struct stack_frame frame;
1646 const void __user *fp; 1659 const void __user *fp;
1647 int nr = entry->nr;
1648 1660
1649 regs = (struct pt_regs *)current->thread.sp0 - 1; 1661 if (!user_mode(regs))
1650 fp = (void __user *)regs->bp; 1662 regs = task_pt_regs(current);
1651 1663
1664 fp = (void __user *)regs->bp;
1665
1666 callchain_store(entry, PERF_CONTEXT_USER);
1652 callchain_store(entry, regs->ip); 1667 callchain_store(entry, regs->ip);
1653 1668
1654 while (entry->nr < MAX_STACK_DEPTH) { 1669 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655 frame.next_fp = NULL; 1670 frame.next_frame = NULL;
1656 frame.return_address = 0; 1671 frame.return_address = 0;
1657 1672
1658 if (!copy_stack_frame(fp, &frame)) 1673 if (!copy_stack_frame(fp, &frame))
1659 break; 1674 break;
1660 1675
1661 if ((unsigned long)fp < user_stack_pointer(regs)) 1676 if ((unsigned long)fp < regs->sp)
1662 break; 1677 break;
1663 1678
1664 callchain_store(entry, frame.return_address); 1679 callchain_store(entry, frame.return_address);
1665 fp = frame.next_fp; 1680 fp = frame.next_frame;
1666 } 1681 }
1667
1668 entry->user = entry->nr - nr;
1669} 1682}
1670 1683
1671static void 1684static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1701 entry = &__get_cpu_var(irq_entry); 1714 entry = &__get_cpu_var(irq_entry);
1702 1715
1703 entry->nr = 0; 1716 entry->nr = 0;
1704 entry->hv = 0;
1705 entry->kernel = 0;
1706 entry->user = 0;
1707 1717
1708 perf_do_callchain(regs, entry); 1718 perf_do_callchain(regs, entry);
1709 1719