diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
commit | 12e24f34cb0d55efd08c18b2112507d4bf498008 (patch) | |
tree | 83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch/x86 | |
parent | 1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff) | |
parent | eadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff) |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
perfcounter: Handle some IO return values
perf_counter: Push perf_sample_data through the swcounter code
perf_counter tools: Define and use our own u64, s64 etc. definitions
perf_counter: Close race in perf_lock_task_context()
perf_counter, x86: Improve interactions with fast-gup
perf_counter: Simplify and fix task migration counting
perf_counter tools: Add a data file header
perf_counter: Update userspace callchain sampling uses
perf_counter: Make callchain samples extensible
perf report: Filter to parent set by default
perf_counter tools: Handle lost events
perf_counter: Add event overlow handling
fs: Provide empty .set_page_dirty() aop for anon inodes
perf_counter: tools: Makefile tweaks for 64-bit powerpc
perf_counter: powerpc: Add processor back-end for MPC7450 family
perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
perf_counter: powerpc: Change how processor-specific back-ends get selected
perf_counter: powerpc: Use unsigned long for register and constraint values
perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
perf_counter tools: Add and use isprint()
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/perf_counter.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/uaccess.h | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 138 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 58 |
5 files changed, 143 insertions, 73 deletions
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -84,11 +84,6 @@ union cpuid10_edx { | |||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) |
86 | 86 | ||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | 87 | #ifdef CONFIG_PERF_COUNTERS |
93 | extern void init_hw_perf_counters(void); | 88 | extern void init_hw_perf_counters(void); |
94 | extern void perf_counters_lapic_init(void); | 89 | extern void perf_counters_lapic_init(void); |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | #if defined(CONFIG_HIGHPTE) | 51 | #if defined(CONFIG_HIGHPTE) |
52 | #define __KM_PTE \ | ||
53 | (in_nmi() ? KM_NMI_PTE : \ | ||
54 | in_irq() ? KM_IRQ_PTE : \ | ||
55 | KM_PTE0) | ||
52 | #define pte_offset_map(dir, address) \ | 56 | #define pte_offset_map(dir, address) \ |
53 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ | 57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ |
54 | pte_index((address))) | 58 | pte_index((address))) |
55 | #define pte_offset_map_nested(dir, address) \ | 59 | #define pte_offset_map_nested(dir, address) \ |
56 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ |
57 | pte_index((address))) | 61 | pte_index((address))) |
58 | #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) | 62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
59 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
60 | #else | 64 | #else |
61 | #define pte_offset_map(dir, address) \ | 65 | #define pte_offset_map(dir, address) \ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..512ee87062c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -25,7 +25,12 @@ | |||
25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) | 25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) |
26 | 26 | ||
27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
28 | #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | 28 | |
29 | #ifdef CONFIG_X86_32 | ||
30 | # define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | ||
31 | #else | ||
32 | # define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) | ||
33 | #endif | ||
29 | 34 | ||
30 | #define get_ds() (KERNEL_DS) | 35 | #define get_ds() (KERNEL_DS) |
31 | #define get_fs() (current_thread_info()->addr_limit) | 36 | #define get_fs() (current_thread_info()->addr_limit) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
24 | #include <asm/stacktrace.h> | 25 | #include <asm/stacktrace.h> |
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) | |||
389 | return event & CORE_EVNTSEL_MASK; | 390 | return event & CORE_EVNTSEL_MASK; |
390 | } | 391 | } |
391 | 392 | ||
392 | static const u64 amd_0f_hw_cache_event_ids | 393 | static const u64 amd_hw_cache_event_ids |
393 | [PERF_COUNT_HW_CACHE_MAX] | 394 | [PERF_COUNT_HW_CACHE_MAX] |
394 | [PERF_COUNT_HW_CACHE_OP_MAX] | 395 | [PERF_COUNT_HW_CACHE_OP_MAX] |
395 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 396 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
396 | { | 397 | { |
397 | [ C(L1D) ] = { | 398 | [ C(L1D) ] = { |
398 | [ C(OP_READ) ] = { | 399 | [ C(OP_READ) ] = { |
399 | [ C(RESULT_ACCESS) ] = 0, | 400 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
400 | [ C(RESULT_MISS) ] = 0, | 401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
401 | }, | 402 | }, |
402 | [ C(OP_WRITE) ] = { | 403 | [ C(OP_WRITE) ] = { |
403 | [ C(RESULT_ACCESS) ] = 0, | 404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ |
404 | [ C(RESULT_MISS) ] = 0, | 405 | [ C(RESULT_MISS) ] = 0, |
405 | }, | 406 | }, |
406 | [ C(OP_PREFETCH) ] = { | 407 | [ C(OP_PREFETCH) ] = { |
407 | [ C(RESULT_ACCESS) ] = 0, | 408 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
408 | [ C(RESULT_MISS) ] = 0, | 409 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
409 | }, | 410 | }, |
410 | }, | 411 | }, |
411 | [ C(L1I ) ] = { | 412 | [ C(L1I ) ] = { |
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids | |||
418 | [ C(RESULT_MISS) ] = -1, | 419 | [ C(RESULT_MISS) ] = -1, |
419 | }, | 420 | }, |
420 | [ C(OP_PREFETCH) ] = { | 421 | [ C(OP_PREFETCH) ] = { |
421 | [ C(RESULT_ACCESS) ] = 0, | 422 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
422 | [ C(RESULT_MISS) ] = 0, | 423 | [ C(RESULT_MISS) ] = 0, |
423 | }, | 424 | }, |
424 | }, | 425 | }, |
425 | [ C(LL ) ] = { | 426 | [ C(LL ) ] = { |
426 | [ C(OP_READ) ] = { | 427 | [ C(OP_READ) ] = { |
427 | [ C(RESULT_ACCESS) ] = 0, | 428 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
428 | [ C(RESULT_MISS) ] = 0, | 429 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
429 | }, | 430 | }, |
430 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
431 | [ C(RESULT_ACCESS) ] = 0, | 432 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
432 | [ C(RESULT_MISS) ] = 0, | 433 | [ C(RESULT_MISS) ] = 0, |
433 | }, | 434 | }, |
434 | [ C(OP_PREFETCH) ] = { | 435 | [ C(OP_PREFETCH) ] = { |
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids | |||
438 | }, | 439 | }, |
439 | [ C(DTLB) ] = { | 440 | [ C(DTLB) ] = { |
440 | [ C(OP_READ) ] = { | 441 | [ C(OP_READ) ] = { |
441 | [ C(RESULT_ACCESS) ] = 0, | 442 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
442 | [ C(RESULT_MISS) ] = 0, | 443 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ |
443 | }, | 444 | }, |
444 | [ C(OP_WRITE) ] = { | 445 | [ C(OP_WRITE) ] = { |
445 | [ C(RESULT_ACCESS) ] = 0, | 446 | [ C(RESULT_ACCESS) ] = 0, |
@@ -1223,6 +1224,8 @@ again: | |||
1223 | if (!intel_pmu_save_and_restart(counter)) | 1224 | if (!intel_pmu_save_and_restart(counter)) |
1224 | continue; | 1225 | continue; |
1225 | 1226 | ||
1227 | data.period = counter->hw.last_period; | ||
1228 | |||
1226 | if (perf_counter_overflow(counter, 1, &data)) | 1229 | if (perf_counter_overflow(counter, 1, &data)) |
1227 | intel_pmu_disable_counter(&counter->hw, bit); | 1230 | intel_pmu_disable_counter(&counter->hw, bit); |
1228 | } | 1231 | } |
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void) | |||
1459 | 1462 | ||
1460 | static int amd_pmu_init(void) | 1463 | static int amd_pmu_init(void) |
1461 | { | 1464 | { |
1465 | /* Performance-monitoring supported from K7 and later: */ | ||
1466 | if (boot_cpu_data.x86 < 6) | ||
1467 | return -ENODEV; | ||
1468 | |||
1462 | x86_pmu = amd_pmu; | 1469 | x86_pmu = amd_pmu; |
1463 | 1470 | ||
1464 | switch (boot_cpu_data.x86) { | 1471 | /* Events are common for all AMDs */ |
1465 | case 0x0f: | 1472 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
1466 | case 0x10: | 1473 | sizeof(hw_cache_event_ids)); |
1467 | case 0x11: | ||
1468 | memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, | ||
1469 | sizeof(hw_cache_event_ids)); | ||
1470 | 1474 | ||
1471 | pr_cont("AMD Family 0f/10/11 events, "); | ||
1472 | break; | ||
1473 | } | ||
1474 | return 0; | 1475 | return 0; |
1475 | } | 1476 | } |
1476 | 1477 | ||
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1554 | */ | 1555 | */ |
1555 | 1556 | ||
1556 | static inline | 1557 | static inline |
1557 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | 1558 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) |
1558 | { | 1559 | { |
1559 | if (entry->nr < MAX_STACK_DEPTH) | 1560 | if (entry->nr < PERF_MAX_STACK_DEPTH) |
1560 | entry->ip[entry->nr++] = ip; | 1561 | entry->ip[entry->nr++] = ip; |
1561 | } | 1562 | } |
1562 | 1563 | ||
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg) | |||
1577 | 1578 | ||
1578 | static int backtrace_stack(void *data, char *name) | 1579 | static int backtrace_stack(void *data, char *name) |
1579 | { | 1580 | { |
1580 | /* Don't bother with IRQ stacks for now */ | 1581 | /* Process all stacks: */ |
1581 | return -1; | 1582 | return 0; |
1582 | } | 1583 | } |
1583 | 1584 | ||
1584 | static void backtrace_address(void *data, unsigned long addr, int reliable) | 1585 | static void backtrace_address(void *data, unsigned long addr, int reliable) |
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1596 | .address = backtrace_address, | 1597 | .address = backtrace_address, |
1597 | }; | 1598 | }; |
1598 | 1599 | ||
1600 | #include "../dumpstack.h" | ||
1601 | |||
1599 | static void | 1602 | static void |
1600 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1603 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1601 | { | 1604 | { |
1602 | unsigned long bp; | 1605 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
1603 | char *stack; | 1606 | callchain_store(entry, regs->ip); |
1604 | int nr = entry->nr; | ||
1605 | 1607 | ||
1606 | callchain_store(entry, instruction_pointer(regs)); | 1608 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1609 | } | ||
1607 | 1610 | ||
1608 | stack = ((char *)regs + sizeof(struct pt_regs)); | 1611 | /* |
1609 | #ifdef CONFIG_FRAME_POINTER | 1612 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
1610 | bp = frame_pointer(regs); | 1613 | */ |
1611 | #else | 1614 | static unsigned long |
1612 | bp = 0; | 1615 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
1613 | #endif | 1616 | { |
1617 | unsigned long offset, addr = (unsigned long)from; | ||
1618 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1619 | unsigned long size, len = 0; | ||
1620 | struct page *page; | ||
1621 | void *map; | ||
1622 | int ret; | ||
1614 | 1623 | ||
1615 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | 1624 | do { |
1625 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1626 | if (!ret) | ||
1627 | break; | ||
1616 | 1628 | ||
1617 | entry->kernel = entry->nr - nr; | 1629 | offset = addr & (PAGE_SIZE - 1); |
1618 | } | 1630 | size = min(PAGE_SIZE - offset, n - len); |
1619 | 1631 | ||
1632 | map = kmap_atomic(page, type); | ||
1633 | memcpy(to, map+offset, size); | ||
1634 | kunmap_atomic(map, type); | ||
1635 | put_page(page); | ||
1620 | 1636 | ||
1621 | struct stack_frame { | 1637 | len += size; |
1622 | const void __user *next_fp; | 1638 | to += size; |
1623 | unsigned long return_address; | 1639 | addr += size; |
1624 | }; | 1640 | |
1641 | } while (len < n); | ||
1642 | |||
1643 | return len; | ||
1644 | } | ||
1625 | 1645 | ||
1626 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1646 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1627 | { | 1647 | { |
1628 | int ret; | 1648 | unsigned long bytes; |
1629 | 1649 | ||
1630 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | 1650 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); |
1631 | return 0; | ||
1632 | 1651 | ||
1633 | ret = 1; | 1652 | return bytes == sizeof(*frame); |
1634 | pagefault_disable(); | ||
1635 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1636 | ret = 0; | ||
1637 | pagefault_enable(); | ||
1638 | |||
1639 | return ret; | ||
1640 | } | 1653 | } |
1641 | 1654 | ||
1642 | static void | 1655 | static void |
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1644 | { | 1657 | { |
1645 | struct stack_frame frame; | 1658 | struct stack_frame frame; |
1646 | const void __user *fp; | 1659 | const void __user *fp; |
1647 | int nr = entry->nr; | ||
1648 | 1660 | ||
1649 | regs = (struct pt_regs *)current->thread.sp0 - 1; | 1661 | if (!user_mode(regs)) |
1650 | fp = (void __user *)regs->bp; | 1662 | regs = task_pt_regs(current); |
1651 | 1663 | ||
1664 | fp = (void __user *)regs->bp; | ||
1665 | |||
1666 | callchain_store(entry, PERF_CONTEXT_USER); | ||
1652 | callchain_store(entry, regs->ip); | 1667 | callchain_store(entry, regs->ip); |
1653 | 1668 | ||
1654 | while (entry->nr < MAX_STACK_DEPTH) { | 1669 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1655 | frame.next_fp = NULL; | 1670 | frame.next_frame = NULL; |
1656 | frame.return_address = 0; | 1671 | frame.return_address = 0; |
1657 | 1672 | ||
1658 | if (!copy_stack_frame(fp, &frame)) | 1673 | if (!copy_stack_frame(fp, &frame)) |
1659 | break; | 1674 | break; |
1660 | 1675 | ||
1661 | if ((unsigned long)fp < user_stack_pointer(regs)) | 1676 | if ((unsigned long)fp < regs->sp) |
1662 | break; | 1677 | break; |
1663 | 1678 | ||
1664 | callchain_store(entry, frame.return_address); | 1679 | callchain_store(entry, frame.return_address); |
1665 | fp = frame.next_fp; | 1680 | fp = frame.next_frame; |
1666 | } | 1681 | } |
1667 | |||
1668 | entry->user = entry->nr - nr; | ||
1669 | } | 1682 | } |
1670 | 1683 | ||
1671 | static void | 1684 | static void |
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1701 | entry = &__get_cpu_var(irq_entry); | 1714 | entry = &__get_cpu_var(irq_entry); |
1702 | 1715 | ||
1703 | entry->nr = 0; | 1716 | entry->nr = 0; |
1704 | entry->hv = 0; | ||
1705 | entry->kernel = 0; | ||
1706 | entry->user = 0; | ||
1707 | 1717 | ||
1708 | perf_do_callchain(regs, entry); | 1718 | perf_do_callchain(regs, entry); |
1709 | 1719 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index f97480941269..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | 14 | static inline pte_t gup_get_pte(pte_t *ptep) |
15 | { | 15 | { |
16 | #ifndef CONFIG_X86_PAE | 16 | #ifndef CONFIG_X86_PAE |
17 | return *ptep; | 17 | return ACCESS_ONCE(*ptep); |
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
219 | return 1; | 219 | return 1; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | ||
223 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
224 | * back to the regular GUP. | ||
225 | */ | ||
226 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
227 | struct page **pages) | ||
228 | { | ||
229 | struct mm_struct *mm = current->mm; | ||
230 | unsigned long addr, len, end; | ||
231 | unsigned long next; | ||
232 | unsigned long flags; | ||
233 | pgd_t *pgdp; | ||
234 | int nr = 0; | ||
235 | |||
236 | start &= PAGE_MASK; | ||
237 | addr = start; | ||
238 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
239 | end = start + len; | ||
240 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
241 | (void __user *)start, len))) | ||
242 | return 0; | ||
243 | |||
244 | /* | ||
245 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
246 | * needs some instrumenting to determine the common sizes used by | ||
247 | * important workloads (eg. DB2), and whether limiting the batch size | ||
248 | * will decrease performance. | ||
249 | * | ||
250 | * It seems like we're in the clear for the moment. Direct-IO is | ||
251 | * the main guy that batches up lots of get_user_pages, and even | ||
252 | * they are limited to 64-at-a-time which is not so many. | ||
253 | */ | ||
254 | /* | ||
255 | * This doesn't prevent pagetable teardown, but does prevent | ||
256 | * the pagetables and pages from being freed on x86. | ||
257 | * | ||
258 | * So long as we atomically load page table pointers versus teardown | ||
259 | * (which we do on x86, with the above PAE exception), we can follow the | ||
260 | * address down to the the page and take a ref on it. | ||
261 | */ | ||
262 | local_irq_save(flags); | ||
263 | pgdp = pgd_offset(mm, addr); | ||
264 | do { | ||
265 | pgd_t pgd = *pgdp; | ||
266 | |||
267 | next = pgd_addr_end(addr, end); | ||
268 | if (pgd_none(pgd)) | ||
269 | break; | ||
270 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
271 | break; | ||
272 | } while (pgdp++, addr = next, addr != end); | ||
273 | local_irq_restore(flags); | ||
274 | |||
275 | return nr; | ||
276 | } | ||
277 | |||
222 | /** | 278 | /** |
223 | * get_user_pages_fast() - pin user pages in memory | 279 | * get_user_pages_fast() - pin user pages in memory |
224 | * @start: starting user address | 280 | * @start: starting user address |