aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
commit12e24f34cb0d55efd08c18b2112507d4bf498008 (patch)
tree83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86
parent1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff)
parenteadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff)
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits) perfcounter: Handle some IO return values perf_counter: Push perf_sample_data through the swcounter code perf_counter tools: Define and use our own u64, s64 etc. definitions perf_counter: Close race in perf_lock_task_context() perf_counter, x86: Improve interactions with fast-gup perf_counter: Simplify and fix task migration counting perf_counter tools: Add a data file header perf_counter: Update userspace callchain sampling uses perf_counter: Make callchain samples extensible perf report: Filter to parent set by default perf_counter tools: Handle lost events perf_counter: Add event overlow handling fs: Provide empty .set_page_dirty() aop for anon inodes perf_counter: tools: Makefile tweaks for 64-bit powerpc perf_counter: powerpc: Add processor back-end for MPC7450 family perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels perf_counter: powerpc: Change how processor-specific back-ends get selected perf_counter: powerpc: Use unsigned long for register and constraint values perf_counter: powerpc: Enable use of software counters on 32-bit powerpc perf_counter tools: Add and use isprint() ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/perf_counter.h5
-rw-r--r--arch/x86/include/asm/pgtable_32.h8
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c138
-rw-r--r--arch/x86/mm/gup.c58
5 files changed, 143 insertions, 73 deletions
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b3..5fb33e160ea0 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86 86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS 87#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void); 88extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(void); 89extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 31bd120cf2a2..01fd9461d323 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
49#endif 49#endif
50 50
51#if defined(CONFIG_HIGHPTE) 51#if defined(CONFIG_HIGHPTE)
52#define __KM_PTE \
53 (in_nmi() ? KM_NMI_PTE : \
54 in_irq() ? KM_IRQ_PTE : \
55 KM_PTE0)
52#define pte_offset_map(dir, address) \ 56#define pte_offset_map(dir, address) \
53 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ 57 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
54 pte_index((address))) 58 pte_index((address)))
55#define pte_offset_map_nested(dir, address) \ 59#define pte_offset_map_nested(dir, address) \
56 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ 60 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
57 pte_index((address))) 61 pte_index((address)))
58#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) 62#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
59#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) 63#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
60#else 64#else
61#define pte_offset_map(dir, address) \ 65#define pte_offset_map(dir, address) \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b685ece89d5c..512ee87062c2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,12 @@
25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) 25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26 26
27#define KERNEL_DS MAKE_MM_SEG(-1UL) 27#define KERNEL_DS MAKE_MM_SEG(-1UL)
28#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) 28
29#ifdef CONFIG_X86_32
30# define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
31#else
32# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK)
33#endif
29 34
30#define get_ds() (KERNEL_DS) 35#define get_ds() (KERNEL_DS)
31#define get_fs() (current_thread_info()->addr_limit) 36#define get_fs() (current_thread_info()->addr_limit)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..76dfef23f789 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19#include <linux/kdebug.h> 19#include <linux/kdebug.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/highmem.h>
22 23
23#include <asm/apic.h> 24#include <asm/apic.h>
24#include <asm/stacktrace.h> 25#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
389 return event & CORE_EVNTSEL_MASK; 390 return event & CORE_EVNTSEL_MASK;
390} 391}
391 392
392static const u64 amd_0f_hw_cache_event_ids 393static const u64 amd_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX] 394 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX] 395 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 396 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{ 397{
397 [ C(L1D) ] = { 398 [ C(L1D) ] = {
398 [ C(OP_READ) ] = { 399 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0, 400 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400 [ C(RESULT_MISS) ] = 0, 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401 }, 402 },
402 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0, 404 [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
404 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
405 }, 406 },
406 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0, 408 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408 [ C(RESULT_MISS) ] = 0, 409 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409 }, 410 },
410 }, 411 },
411 [ C(L1I ) ] = { 412 [ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
418 [ C(RESULT_MISS) ] = -1, 419 [ C(RESULT_MISS) ] = -1,
419 }, 420 },
420 [ C(OP_PREFETCH) ] = { 421 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0, 422 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422 [ C(RESULT_MISS) ] = 0, 423 [ C(RESULT_MISS) ] = 0,
423 }, 424 },
424 }, 425 },
425 [ C(LL ) ] = { 426 [ C(LL ) ] = {
426 [ C(OP_READ) ] = { 427 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0, 428 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428 [ C(RESULT_MISS) ] = 0, 429 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429 }, 430 },
430 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0, 432 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432 [ C(RESULT_MISS) ] = 0, 433 [ C(RESULT_MISS) ] = 0,
433 }, 434 },
434 [ C(OP_PREFETCH) ] = { 435 [ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
438 }, 439 },
439 [ C(DTLB) ] = { 440 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = { 441 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0, 442 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442 [ C(RESULT_MISS) ] = 0, 443 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443 }, 444 },
444 [ C(OP_WRITE) ] = { 445 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0, 446 [ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
1223 if (!intel_pmu_save_and_restart(counter)) 1224 if (!intel_pmu_save_and_restart(counter))
1224 continue; 1225 continue;
1225 1226
1227 data.period = counter->hw.last_period;
1228
1226 if (perf_counter_overflow(counter, 1, &data)) 1229 if (perf_counter_overflow(counter, 1, &data))
1227 intel_pmu_disable_counter(&counter->hw, bit); 1230 intel_pmu_disable_counter(&counter->hw, bit);
1228 } 1231 }
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
1459 1462
1460static int amd_pmu_init(void) 1463static int amd_pmu_init(void)
1461{ 1464{
1465 /* Performance-monitoring supported from K7 and later: */
1466 if (boot_cpu_data.x86 < 6)
1467 return -ENODEV;
1468
1462 x86_pmu = amd_pmu; 1469 x86_pmu = amd_pmu;
1463 1470
1464 switch (boot_cpu_data.x86) { 1471 /* Events are common for all AMDs */
1465 case 0x0f: 1472 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466 case 0x10: 1473 sizeof(hw_cache_event_ids));
1467 case 0x11:
1468 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469 sizeof(hw_cache_event_ids));
1470 1474
1471 pr_cont("AMD Family 0f/10/11 events, ");
1472 break;
1473 }
1474 return 0; 1475 return 0;
1475} 1476}
1476 1477
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1554 */ 1555 */
1555 1556
1556static inline 1557static inline
1557void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) 1558void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558{ 1559{
1559 if (entry->nr < MAX_STACK_DEPTH) 1560 if (entry->nr < PERF_MAX_STACK_DEPTH)
1560 entry->ip[entry->nr++] = ip; 1561 entry->ip[entry->nr++] = ip;
1561} 1562}
1562 1563
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
1577 1578
1578static int backtrace_stack(void *data, char *name) 1579static int backtrace_stack(void *data, char *name)
1579{ 1580{
1580 /* Don't bother with IRQ stacks for now */ 1581 /* Process all stacks: */
1581 return -1; 1582 return 0;
1582} 1583}
1583 1584
1584static void backtrace_address(void *data, unsigned long addr, int reliable) 1585static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596 .address = backtrace_address, 1597 .address = backtrace_address,
1597}; 1598};
1598 1599
1600#include "../dumpstack.h"
1601
1599static void 1602static void
1600perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1603perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1601{ 1604{
1602 unsigned long bp; 1605 callchain_store(entry, PERF_CONTEXT_KERNEL);
1603 char *stack; 1606 callchain_store(entry, regs->ip);
1604 int nr = entry->nr;
1605 1607
1606 callchain_store(entry, instruction_pointer(regs)); 1608 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1609}
1607 1610
1608 stack = ((char *)regs + sizeof(struct pt_regs)); 1611/*
1609#ifdef CONFIG_FRAME_POINTER 1612 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610 bp = frame_pointer(regs); 1613 */
1611#else 1614static unsigned long
1612 bp = 0; 1615copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1613#endif 1616{
1617 unsigned long offset, addr = (unsigned long)from;
1618 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1619 unsigned long size, len = 0;
1620 struct page *page;
1621 void *map;
1622 int ret;
1614 1623
1615 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); 1624 do {
1625 ret = __get_user_pages_fast(addr, 1, 0, &page);
1626 if (!ret)
1627 break;
1616 1628
1617 entry->kernel = entry->nr - nr; 1629 offset = addr & (PAGE_SIZE - 1);
1618} 1630 size = min(PAGE_SIZE - offset, n - len);
1619 1631
1632 map = kmap_atomic(page, type);
1633 memcpy(to, map+offset, size);
1634 kunmap_atomic(map, type);
1635 put_page(page);
1620 1636
1621struct stack_frame { 1637 len += size;
1622 const void __user *next_fp; 1638 to += size;
1623 unsigned long return_address; 1639 addr += size;
1624}; 1640
1641 } while (len < n);
1642
1643 return len;
1644}
1625 1645
1626static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1646static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1627{ 1647{
1628 int ret; 1648 unsigned long bytes;
1629 1649
1630 if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) 1650 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1631 return 0;
1632 1651
1633 ret = 1; 1652 return bytes == sizeof(*frame);
1634 pagefault_disable();
1635 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636 ret = 0;
1637 pagefault_enable();
1638
1639 return ret;
1640} 1653}
1641 1654
1642static void 1655static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1644{ 1657{
1645 struct stack_frame frame; 1658 struct stack_frame frame;
1646 const void __user *fp; 1659 const void __user *fp;
1647 int nr = entry->nr;
1648 1660
1649 regs = (struct pt_regs *)current->thread.sp0 - 1; 1661 if (!user_mode(regs))
1650 fp = (void __user *)regs->bp; 1662 regs = task_pt_regs(current);
1651 1663
1664 fp = (void __user *)regs->bp;
1665
1666 callchain_store(entry, PERF_CONTEXT_USER);
1652 callchain_store(entry, regs->ip); 1667 callchain_store(entry, regs->ip);
1653 1668
1654 while (entry->nr < MAX_STACK_DEPTH) { 1669 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655 frame.next_fp = NULL; 1670 frame.next_frame = NULL;
1656 frame.return_address = 0; 1671 frame.return_address = 0;
1657 1672
1658 if (!copy_stack_frame(fp, &frame)) 1673 if (!copy_stack_frame(fp, &frame))
1659 break; 1674 break;
1660 1675
1661 if ((unsigned long)fp < user_stack_pointer(regs)) 1676 if ((unsigned long)fp < regs->sp)
1662 break; 1677 break;
1663 1678
1664 callchain_store(entry, frame.return_address); 1679 callchain_store(entry, frame.return_address);
1665 fp = frame.next_fp; 1680 fp = frame.next_frame;
1666 } 1681 }
1667
1668 entry->user = entry->nr - nr;
1669} 1682}
1670 1683
1671static void 1684static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1701 entry = &__get_cpu_var(irq_entry); 1714 entry = &__get_cpu_var(irq_entry);
1702 1715
1703 entry->nr = 0; 1716 entry->nr = 0;
1704 entry->hv = 0;
1705 entry->kernel = 0;
1706 entry->user = 0;
1707 1717
1708 perf_do_callchain(regs, entry); 1718 perf_do_callchain(regs, entry);
1709 1719
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index f97480941269..71da1bca13cb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
14static inline pte_t gup_get_pte(pte_t *ptep) 14static inline pte_t gup_get_pte(pte_t *ptep)
15{ 15{
16#ifndef CONFIG_X86_PAE 16#ifndef CONFIG_X86_PAE
17 return *ptep; 17 return ACCESS_ONCE(*ptep);
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219 return 1; 219 return 1;
220} 220}
221 221
222/*
223 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
224 * back to the regular GUP.
225 */
226int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
227 struct page **pages)
228{
229 struct mm_struct *mm = current->mm;
230 unsigned long addr, len, end;
231 unsigned long next;
232 unsigned long flags;
233 pgd_t *pgdp;
234 int nr = 0;
235
236 start &= PAGE_MASK;
237 addr = start;
238 len = (unsigned long) nr_pages << PAGE_SHIFT;
239 end = start + len;
240 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
241 (void __user *)start, len)))
242 return 0;
243
244 /*
245 * XXX: batch / limit 'nr', to avoid large irq off latency
246 * needs some instrumenting to determine the common sizes used by
247 * important workloads (eg. DB2), and whether limiting the batch size
248 * will decrease performance.
249 *
250 * It seems like we're in the clear for the moment. Direct-IO is
251 * the main guy that batches up lots of get_user_pages, and even
252 * they are limited to 64-at-a-time which is not so many.
253 */
254 /*
255 * This doesn't prevent pagetable teardown, but does prevent
256 * the pagetables and pages from being freed on x86.
257 *
258 * So long as we atomically load page table pointers versus teardown
259 * (which we do on x86, with the above PAE exception), we can follow the
260 * address down to the the page and take a ref on it.
261 */
262 local_irq_save(flags);
263 pgdp = pgd_offset(mm, addr);
264 do {
265 pgd_t pgd = *pgdp;
266
267 next = pgd_addr_end(addr, end);
268 if (pgd_none(pgd))
269 break;
270 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
271 break;
272 } while (pgdp++, addr = next, addr != end);
273 local_irq_restore(flags);
274
275 return nr;
276}
277
222/** 278/**
223 * get_user_pages_fast() - pin user pages in memory 279 * get_user_pages_fast() - pin user pages in memory
224 * @start: starting user address 280 * @start: starting user address