Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits) perfcounter: Handle some IO return values perf_counter: Push perf_sample_data through the swcounter code perf_counter tools: Define and use our own u64, s64 etc. definitions perf_counter: Close race in perf_lock_task_context() perf_counter, x86: Improve interactions with fast-gup perf_counter: Simplify and fix task migration counting perf_counter tools: Add a data file header perf_counter: Update userspace callchain sampling uses perf_counter: Make callchain samples extensible perf report: Filter to parent set by default perf_counter tools: Handle lost events perf_counter: Add event overlow handling fs: Provide empty .set_page_dirty() aop for anon inodes perf_counter: tools: Makefile tweaks for 64-bit powerpc perf_counter: powerpc: Add processor back-end for MPC7450 family perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels perf_counter: powerpc: Change how processor-specific back-ends get selected perf_counter: powerpc: Use unsigned long for register and constraint values perf_counter: powerpc: Enable use of software counters on 32-bit powerpc perf_counter tools: Add and use isprint() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-20 14:29:32 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-20 14:29:32 -0400
commit: 12e24f34cb0d55efd08c18b2112507d4bf498008 (patch)
tree: 83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86
parent: 1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff)
parent: eadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff)
5 files changed, 143 insertions, 73 deletions
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b3..5fb33e160ea0 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2                     0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES                    (X86_PMC_IDX_FIXED + 2)
-extern void set_perf_counter_pending(void);
-#define clear_perf_counter_pending()    do { } while (0)
-#define test_perf_counter_pending()     (0)
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 31bd120cf2a2..01fd9461d323 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
 #endif
 #if defined(CONFIG_HIGHPTE)
+#define __KM_PTE                        \
+        (in_nmi() ? KM_NMI_PTE :        \
+         in_irq() ? KM_IRQ_PTE :        \
+         KM_PTE0)
 #define pte_offset_map(dir, address)                                    \
-        ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +          \
+        ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) +         \
         pte_index((address)))
 #define pte_offset_map_nested(dir, address)                             \
        ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +          \
         pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
 #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
 #else
 #define pte_offset_map(dir, address)                                    \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b685ece89d5c..512ee87062c2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,12 @@
 #define MAKE_MM_SEG(s)  ((mm_segment_t) { (s) })
 #define KERNEL_DS       MAKE_MM_SEG(-1UL)
-#define USER_DS         MAKE_MM_SEG(PAGE_OFFSET)
+#ifdef CONFIG_X86_32
+# define USER_DS        MAKE_MM_SEG(PAGE_OFFSET)
+#else
+# define USER_DS        MAKE_MM_SEG(__VIRTUAL_MASK)
+#endif
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current_thread_info()->addr_limit)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..76dfef23f789 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/highmem.h>
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
        return event & CORE_EVNTSEL_MASK;
 }
-static const u64 amd_0f_hw_cache_event_ids
+static const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
 {
 [ C(L1D) ] = {
        [ C(OP_READ) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-                [ C(RESULT_MISS)   ] = 0,
+                [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
        },
        [ C(OP_WRITE) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
                [ C(RESULT_MISS)   ] = 0,
        },
        [ C(OP_PREFETCH) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-                [ C(RESULT_MISS)   ] = 0,
+                [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
        },
 },
 [ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
        [ C(OP_PREFETCH) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
                [ C(RESULT_MISS)   ] = 0,
        },
 },
 [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-                [ C(RESULT_MISS)   ] = 0,
+                [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
        },
        [ C(OP_WRITE) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
                [ C(RESULT_MISS)   ] = 0,
        },
        [ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
 },
 [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
-                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-                [ C(RESULT_MISS)   ] = 0,
+                [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
                if (!intel_pmu_save_and_restart(counter))
                        continue;
+                data.period = counter->hw.last_period;
                if (perf_counter_overflow(counter, 1, &data))
                        intel_pmu_disable_counter(&counter->hw, bit);
        }
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
 static int amd_pmu_init(void)
 {
+        /* Performance-monitoring supported from K7 and later: */
+        if (boot_cpu_data.x86 < 6)
+                return -ENODEV;
        x86_pmu = amd_pmu;
-        switch (boot_cpu_data.x86) {
+        /* Events are common for all AMDs */
-        case 0x0f:
+        memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-        case 0x10:
+               sizeof(hw_cache_event_ids));
-        case 0x11:
-                memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
-                       sizeof(hw_cache_event_ids));
-                pr_cont("AMD Family 0f/10/11 events, ");
-                break;
-        }
        return 0;
 }
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 */
 static inline
-void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
-        if (entry->nr < MAX_STACK_DEPTH)
+        if (entry->nr < PERF_MAX_STACK_DEPTH)
                entry->ip[entry->nr++] = ip;
 }
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
 static int backtrace_stack(void *data, char *name)
 {
-        /* Don't bother with IRQ stacks for now */
+        /* Process all stacks: */
-        return -1;
+        return 0;
 }
 static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
        .address                = backtrace_address,
 };
+#include "../dumpstack.h"
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-        unsigned long bp;
+        callchain_store(entry, PERF_CONTEXT_KERNEL);
-        char *stack;
+        callchain_store(entry, regs->ip);
-        int nr = entry->nr;
-        callchain_store(entry, instruction_pointer(regs));
+        dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
-        stack = ((char *)regs + sizeof(struct pt_regs));
+/*
-#ifdef CONFIG_FRAME_POINTER
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
-        bp = frame_pointer(regs);
+ */
-#else
+static unsigned long
-        bp = 0;
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-#endif
+{
+        unsigned long offset, addr = (unsigned long)from;
+        int type = in_nmi() ? KM_NMI : KM_IRQ0;
+        unsigned long size, len = 0;
+        struct page *page;
+        void *map;
+        int ret;
-        dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+        do {
+                ret = __get_user_pages_fast(addr, 1, 0, &page);
+                if (!ret)
+                        break;
-        entry->kernel = entry->nr - nr;
+                offset = addr & (PAGE_SIZE - 1);
-}
+                size = min(PAGE_SIZE - offset, n - len);
+                map = kmap_atomic(page, type);
+                memcpy(to, map+offset, size);
+                kunmap_atomic(map, type);
+                put_page(page);
-struct stack_frame {
+                len  += size;
-        const void __user       *next_fp;
+                to   += size;
-        unsigned long           return_address;
+                addr += size;
-};
+        } while (len < n);
+        return len;
+}
 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 {
-        int ret;
+        unsigned long bytes;
-        if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+        bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
-                return 0;
-        ret = 1;
+        return bytes == sizeof(*frame);
-        pagefault_disable();
-        if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
-                ret = 0;
-        pagefault_enable();
-        return ret;
 }
 static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
        struct stack_frame frame;
        const void __user *fp;
-        int nr = entry->nr;
-        regs = (struct pt_regs *)current->thread.sp0 - 1;
+        if (!user_mode(regs))
-        fp   = (void __user *)regs->bp;
+                regs = task_pt_regs(current);
+        fp = (void __user *)regs->bp;
+        callchain_store(entry, PERF_CONTEXT_USER);
        callchain_store(entry, regs->ip);
-        while (entry->nr < MAX_STACK_DEPTH) {
+        while (entry->nr < PERF_MAX_STACK_DEPTH) {
-                frame.next_fp        = NULL;
+                frame.next_frame             = NULL;
                frame.return_address = 0;
                if (!copy_stack_frame(fp, &frame))
                        break;
-                if ((unsigned long)fp < user_stack_pointer(regs))
+                if ((unsigned long)fp < regs->sp)
                        break;
                callchain_store(entry, frame.return_address);
-                fp = frame.next_fp;
+                fp = frame.next_frame;
        }
-        entry->user = entry->nr - nr;
 }
 static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
                entry = &__get_cpu_var(irq_entry);
        entry->nr = 0;
-        entry->hv = 0;
-        entry->kernel = 0;
-        entry->user = 0;
        perf_do_callchain(regs, entry);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index f97480941269..71da1bca13cb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
 static inline pte_t gup_get_pte(pte_t *ptep)
 {
 #ifndef CONFIG_X86_PAE
-        return *ptep;
+        return ACCESS_ONCE(*ptep);
 #else
        /*
         * With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
        return 1;
 }
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                          struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long addr, len, end;
+        unsigned long next;
+        unsigned long flags;
+        pgd_t *pgdp;
+        int nr = 0;
+        start &= PAGE_MASK;
+        addr = start;
+        len = (unsigned long) nr_pages << PAGE_SHIFT;
+        end = start + len;
+        if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+                                        (void __user *)start, len)))
+                return 0;
+        /*
+         * XXX: batch / limit 'nr', to avoid large irq off latency
+         * needs some instrumenting to determine the common sizes used by
+         * important workloads (eg. DB2), and whether limiting the batch size
+         * will decrease performance.
+         *
+         * It seems like we're in the clear for the moment. Direct-IO is
+         * the main guy that batches up lots of get_user_pages, and even
+         * they are limited to 64-at-a-time which is not so many.
+         */
+        /*
+         * This doesn't prevent pagetable teardown, but does prevent
+         * the pagetables and pages from being freed on x86.
+         *
+         * So long as we atomically load page table pointers versus teardown
+         * (which we do on x86, with the above PAE exception), we can follow the
+         * address down to the the page and take a ref on it.
+         */
+        local_irq_save(flags);
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        break;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        break;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_restore(flags);
+        return nr;
+}
 /**
 * get_user_pages_fast() - pin user pages in memory
 * @start:      starting user address
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-20 14:29:32 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-20 14:29:32 -0400
commit	12e24f34cb0d55efd08c18b2112507d4bf498008 (patch)
tree	83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86
parent	1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff)
parent	eadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff)

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
84	#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b	84	#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85	#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)	85	#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86		86
87	extern void set_perf_counter_pending(void);
88
89	#define clear_perf_counter_pending() do { } while (0)
90	#define test_perf_counter_pending() (0)
91
92	#ifdef CONFIG_PERF_COUNTERS	87	#ifdef CONFIG_PERF_COUNTERS
93	extern void init_hw_perf_counters(void);	88	extern void init_hw_perf_counters(void);
94	extern void perf_counters_lapic_init(void);	89	extern void perf_counters_lapic_init(void);


diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
49	#endif	49	#endif
50		50
51	#if defined(CONFIG_HIGHPTE)	51	#if defined(CONFIG_HIGHPTE)
		52	#define __KM_PTE \
		53	(in_nmi() ? KM_NMI_PTE : \
		54	in_irq() ? KM_IRQ_PTE : \
		55	KM_PTE0)
52	#define pte_offset_map(dir, address) \	56	#define pte_offset_map(dir, address) \
53	((pte_t )kmap_atomic_pte(pmd_page((dir)), KM_PTE0) + \	57	((pte_t )kmap_atomic_pte(pmd_page((dir)), __KM_PTE) + \
54	pte_index((address)))	58	pte_index((address)))
55	#define pte_offset_map_nested(dir, address) \	59	#define pte_offset_map_nested(dir, address) \
56	((pte_t )kmap_atomic_pte(pmd_page((dir)), KM_PTE1) + \	60	((pte_t )kmap_atomic_pte(pmd_page((dir)), KM_PTE1) + \
57	pte_index((address)))	61	pte_index((address)))
58	#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)	62	#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
59	#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)	63	#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
60	#else	64	#else
61	#define pte_offset_map(dir, address) \	65	#define pte_offset_map(dir, address) \


diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..512ee87062c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,12 @@
25	#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })	25	#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26		26
27	#define KERNEL_DS MAKE_MM_SEG(-1UL)	27	#define KERNEL_DS MAKE_MM_SEG(-1UL)
28	#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)	28
		29	#ifdef CONFIG_X86_32
		30	# define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
		31	#else
		32	# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK)
		33	#endif
29		34
30	#define get_ds() (KERNEL_DS)	35	#define get_ds() (KERNEL_DS)
31	#define get_fs() (current_thread_info()->addr_limit)	36	#define get_fs() (current_thread_info()->addr_limit)


diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19	#include <linux/kdebug.h>	19	#include <linux/kdebug.h>
20	#include <linux/sched.h>	20	#include <linux/sched.h>
21	#include <linux/uaccess.h>	21	#include <linux/uaccess.h>
		22	#include <linux/highmem.h>
22		23
23	#include <asm/apic.h>	24	#include <asm/apic.h>
24	#include <asm/stacktrace.h>	25	#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
389	return event & CORE_EVNTSEL_MASK;	390	return event & CORE_EVNTSEL_MASK;
390	}	391	}
391		392
392	static const u64 amd_0f_hw_cache_event_ids	393	static const u64 amd_hw_cache_event_ids
393	[PERF_COUNT_HW_CACHE_MAX]	394	[PERF_COUNT_HW_CACHE_MAX]
394	[PERF_COUNT_HW_CACHE_OP_MAX]	395	[PERF_COUNT_HW_CACHE_OP_MAX]
395	[PERF_COUNT_HW_CACHE_RESULT_MAX] =	396	[PERF_COUNT_HW_CACHE_RESULT_MAX] =
396	{	397	{
397	[ C(L1D) ] = {	398	[ C(L1D) ] = {
398	[ C(OP_READ) ] = {	399	[ C(OP_READ) ] = {
399	[ C(RESULT_ACCESS) ] = 0,	400	[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400	[ C(RESULT_MISS) ] = 0,	401	[ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401	},	402	},
402	[ C(OP_WRITE) ] = {	403	[ C(OP_WRITE) ] = {
403	[ C(RESULT_ACCESS) ] = 0,	404	[ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
404	[ C(RESULT_MISS) ] = 0,	405	[ C(RESULT_MISS) ] = 0,
405	},	406	},
406	[ C(OP_PREFETCH) ] = {	407	[ C(OP_PREFETCH) ] = {
407	[ C(RESULT_ACCESS) ] = 0,	408	[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408	[ C(RESULT_MISS) ] = 0,	409	[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409	},	410	},
410	},	411	},
411	[ C(L1I ) ] = {	412	[ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
418	[ C(RESULT_MISS) ] = -1,	419	[ C(RESULT_MISS) ] = -1,
419	},	420	},
420	[ C(OP_PREFETCH) ] = {	421	[ C(OP_PREFETCH) ] = {
421	[ C(RESULT_ACCESS) ] = 0,	422	[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422	[ C(RESULT_MISS) ] = 0,	423	[ C(RESULT_MISS) ] = 0,
423	},	424	},
424	},	425	},
425	[ C(LL ) ] = {	426	[ C(LL ) ] = {
426	[ C(OP_READ) ] = {	427	[ C(OP_READ) ] = {
427	[ C(RESULT_ACCESS) ] = 0,	428	[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428	[ C(RESULT_MISS) ] = 0,	429	[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429	},	430	},
430	[ C(OP_WRITE) ] = {	431	[ C(OP_WRITE) ] = {
431	[ C(RESULT_ACCESS) ] = 0,	432	[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432	[ C(RESULT_MISS) ] = 0,	433	[ C(RESULT_MISS) ] = 0,
433	},	434	},
434	[ C(OP_PREFETCH) ] = {	435	[ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
438	},	439	},
439	[ C(DTLB) ] = {	440	[ C(DTLB) ] = {
440	[ C(OP_READ) ] = {	441	[ C(OP_READ) ] = {
441	[ C(RESULT_ACCESS) ] = 0,	442	[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442	[ C(RESULT_MISS) ] = 0,	443	[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443	},	444	},
444	[ C(OP_WRITE) ] = {	445	[ C(OP_WRITE) ] = {
445	[ C(RESULT_ACCESS) ] = 0,	446	[ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
1223	if (!intel_pmu_save_and_restart(counter))	1224	if (!intel_pmu_save_and_restart(counter))
1224	continue;	1225	continue;
1225		1226
		1227	data.period = counter->hw.last_period;
		1228
1226	if (perf_counter_overflow(counter, 1, &data))	1229	if (perf_counter_overflow(counter, 1, &data))
1227	intel_pmu_disable_counter(&counter->hw, bit);	1230	intel_pmu_disable_counter(&counter->hw, bit);
1228	}	1231	}
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
1459		1462
1460	static int amd_pmu_init(void)	1463	static int amd_pmu_init(void)
1461	{	1464	{
		1465	/* Performance-monitoring supported from K7 and later: */
		1466	if (boot_cpu_data.x86 < 6)
		1467	return -ENODEV;
		1468
1462	x86_pmu = amd_pmu;	1469	x86_pmu = amd_pmu;
1463		1470
1464	switch (boot_cpu_data.x86) {	1471	/* Events are common for all AMDs */
1465	case 0x0f:	1472	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466	case 0x10:	1473	sizeof(hw_cache_event_ids));
1467	case 0x11:
1468	memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469	sizeof(hw_cache_event_ids));
1470		1474
1471	pr_cont("AMD Family 0f/10/11 events, ");
1472	break;
1473	}
1474	return 0;	1475	return 0;
1475	}	1476	}
1476		1477
@@ -1554,9 +1555,9 @@ const struct pmu hw_perf_counter_init(struct perf_counter counter)
1554	*/	1555	*/
1555		1556
1556	static inline	1557	static inline
1557	void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)	1558	void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558	{	1559	{
1559	if (entry->nr < MAX_STACK_DEPTH)	1560	if (entry->nr < PERF_MAX_STACK_DEPTH)
1560	entry->ip[entry->nr++] = ip;	1561	entry->ip[entry->nr++] = ip;
1561	}	1562	}
1562		1563
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void data, char msg)
1577		1578
1578	static int backtrace_stack(void data, char name)	1579	static int backtrace_stack(void data, char name)
1579	{	1580	{
1580	/* Don't bother with IRQ stacks for now */	1581	/* Process all stacks: */
1581	return -1;	1582	return 0;
1582	}	1583	}
1583		1584
1584	static void backtrace_address(void *data, unsigned long addr, int reliable)	1585	static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596	.address = backtrace_address,	1597	.address = backtrace_address,
1597	};	1598	};
1598		1599
		1600	#include "../dumpstack.h"
		1601
1599	static void	1602	static void
1600	perf_callchain_kernel(struct pt_regs regs, struct perf_callchain_entry entry)	1603	perf_callchain_kernel(struct pt_regs regs, struct perf_callchain_entry entry)
1601	{	1604	{
1602	unsigned long bp;	1605	callchain_store(entry, PERF_CONTEXT_KERNEL);
1603	char *stack;	1606	callchain_store(entry, regs->ip);
1604	int nr = entry->nr;
1605		1607
1606	callchain_store(entry, instruction_pointer(regs));	1608	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
		1609	}
1607		1610
1608	stack = ((char *)regs + sizeof(struct pt_regs));	1611	/*
1609	#ifdef CONFIG_FRAME_POINTER	1612	* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610	bp = frame_pointer(regs);	1613	*/
1611	#else	1614	static unsigned long
1612	bp = 0;	1615	copy_from_user_nmi(void to, const void __user from, unsigned long n)
1613	#endif	1616	{
		1617	unsigned long offset, addr = (unsigned long)from;
		1618	int type = in_nmi() ? KM_NMI : KM_IRQ0;
		1619	unsigned long size, len = 0;
		1620	struct page *page;
		1621	void *map;
		1622	int ret;
1614		1623
1615	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);	1624	do {
		1625	ret = __get_user_pages_fast(addr, 1, 0, &page);
		1626	if (!ret)
		1627	break;
1616		1628
1617	entry->kernel = entry->nr - nr;	1629	offset = addr & (PAGE_SIZE - 1);
1618	}	1630	size = min(PAGE_SIZE - offset, n - len);
1619		1631
		1632	map = kmap_atomic(page, type);
		1633	memcpy(to, map+offset, size);
		1634	kunmap_atomic(map, type);
		1635	put_page(page);
1620		1636
1621	struct stack_frame {	1637	len += size;
1622	const void __user *next_fp;	1638	to += size;
1623	unsigned long return_address;	1639	addr += size;
1624	};	1640
		1641	} while (len < n);
		1642
		1643	return len;
		1644	}
1625		1645
1626	static int copy_stack_frame(const void __user fp, struct stack_frame frame)	1646	static int copy_stack_frame(const void __user fp, struct stack_frame frame)
1627	{	1647	{
1628	int ret;	1648	unsigned long bytes;
1629		1649
1630	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))	1650	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1631	return 0;
1632		1651
1633	ret = 1;	1652	return bytes == sizeof(*frame);
1634	pagefault_disable();
1635	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636	ret = 0;
1637	pagefault_enable();
1638
1639	return ret;
1640	}	1653	}
1641		1654
1642	static void	1655	static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs regs, struct perf_callchain_entry entry)
1644	{	1657	{
1645	struct stack_frame frame;	1658	struct stack_frame frame;
1646	const void __user *fp;	1659	const void __user *fp;
1647	int nr = entry->nr;
1648		1660
1649	regs = (struct pt_regs *)current->thread.sp0 - 1;	1661	if (!user_mode(regs))
1650	fp = (void __user *)regs->bp;	1662	regs = task_pt_regs(current);
1651		1663
		1664	fp = (void __user *)regs->bp;
		1665
		1666	callchain_store(entry, PERF_CONTEXT_USER);
1652	callchain_store(entry, regs->ip);	1667	callchain_store(entry, regs->ip);
1653		1668
1654	while (entry->nr < MAX_STACK_DEPTH) {	1669	while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655	frame.next_fp = NULL;	1670	frame.next_frame = NULL;
1656	frame.return_address = 0;	1671	frame.return_address = 0;
1657		1672
1658	if (!copy_stack_frame(fp, &frame))	1673	if (!copy_stack_frame(fp, &frame))
1659	break;	1674	break;
1660		1675
1661	if ((unsigned long)fp < user_stack_pointer(regs))	1676	if ((unsigned long)fp < regs->sp)
1662	break;	1677	break;
1663		1678
1664	callchain_store(entry, frame.return_address);	1679	callchain_store(entry, frame.return_address);
1665	fp = frame.next_fp;	1680	fp = frame.next_frame;
1666	}	1681	}
1667
1668	entry->user = entry->nr - nr;
1669	}	1682	}
1670		1683
1671	static void	1684	static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry perf_callchain(struct pt_regs regs)
1701	entry = &__get_cpu_var(irq_entry);	1714	entry = &__get_cpu_var(irq_entry);
1702		1715
1703	entry->nr = 0;	1716	entry->nr = 0;
1704	entry->hv = 0;
1705	entry->kernel = 0;
1706	entry->user = 0;
1707		1717
1708	perf_do_callchain(regs, entry);	1718	perf_do_callchain(regs, entry);
1709		1719


diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index f97480941269..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
14	static inline pte_t gup_get_pte(pte_t *ptep)	14	static inline pte_t gup_get_pte(pte_t *ptep)
15	{	15	{
16	#ifndef CONFIG_X86_PAE	16	#ifndef CONFIG_X86_PAE
17	return *ptep;	17	return ACCESS_ONCE(*ptep);
18	#else	18	#else
19	/*	19	/*
20	* With get_user_pages_fast, we walk down the pagetables without taking	20	* With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219	return 1;	219	return 1;
220	}	220	}
221		221
		222	/*
		223	* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
		224	* back to the regular GUP.
		225	*/
		226	int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
		227	struct page **pages)
		228	{
		229	struct mm_struct *mm = current->mm;
		230	unsigned long addr, len, end;
		231	unsigned long next;
		232	unsigned long flags;
		233	pgd_t *pgdp;
		234	int nr = 0;
		235
		236	start &= PAGE_MASK;
		237	addr = start;
		238	len = (unsigned long) nr_pages << PAGE_SHIFT;
		239	end = start + len;
		240	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
		241	(void __user *)start, len)))
		242	return 0;
		243
		244	/*
		245	* XXX: batch / limit 'nr', to avoid large irq off latency
		246	* needs some instrumenting to determine the common sizes used by
		247	* important workloads (eg. DB2), and whether limiting the batch size
		248	* will decrease performance.
		249	*
		250	* It seems like we're in the clear for the moment. Direct-IO is
		251	* the main guy that batches up lots of get_user_pages, and even
		252	* they are limited to 64-at-a-time which is not so many.
		253	*/
		254	/*
		255	* This doesn't prevent pagetable teardown, but does prevent
		256	* the pagetables and pages from being freed on x86.
		257	*
		258	* So long as we atomically load page table pointers versus teardown
		259	* (which we do on x86, with the above PAE exception), we can follow the
		260	* address down to the the page and take a ref on it.
		261	*/
		262	local_irq_save(flags);
		263	pgdp = pgd_offset(mm, addr);
		264	do {
		265	pgd_t pgd = *pgdp;
		266
		267	next = pgd_addr_end(addr, end);
		268	if (pgd_none(pgd))
		269	break;
		270	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
		271	break;
		272	} while (pgdp++, addr = next, addr != end);
		273	local_irq_restore(flags);
		274
		275	return nr;
		276	}
		277
222	/**	278	/**
223	* get_user_pages_fast() - pin user pages in memory	279	* get_user_pages_fast() - pin user pages in memory
224	* @start: starting user address	280	* @start: starting user address