diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/allocpercpu.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 66 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mprotect.c | 10 | ||||
-rw-r--r-- | mm/page_alloc.c | 1 | ||||
-rw-r--r-- | mm/slub.c | 14 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 |
8 files changed, 84 insertions, 19 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6eed..05f2b4009ccc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * linux/mm/allocpercpu.c | 2 | * linux/mm/allocpercpu.c |
3 | * | 3 | * |
4 | * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> | 4 | * Separated from slab.c August 11, 2006 Christoph Lameter |
5 | */ | 5 | */ |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
diff --git a/mm/memory.c b/mm/memory.c index 9aefaae46858..2302d228fe04 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1045,6 +1045,26 @@ no_page_table: | |||
1045 | return page; | 1045 | return page; |
1046 | } | 1046 | } |
1047 | 1047 | ||
1048 | /* Can we do the FOLL_ANON optimization? */ | ||
1049 | static inline int use_zero_page(struct vm_area_struct *vma) | ||
1050 | { | ||
1051 | /* | ||
1052 | * We don't want to optimize FOLL_ANON for make_pages_present() | ||
1053 | * when it tries to page in a VM_LOCKED region. As to VM_SHARED, | ||
1054 | * we want to get the page from the page tables to make sure | ||
1055 | * that we serialize and update with any other user of that | ||
1056 | * mapping. | ||
1057 | */ | ||
1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | ||
1059 | return 0; | ||
1060 | /* | ||
1061 | * And if we have a fault or a nopfn routine, it's not an | ||
1062 | * anonymous region. | ||
1063 | */ | ||
1064 | return !vma->vm_ops || | ||
1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
1066 | } | ||
1067 | |||
1048 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1049 | unsigned long start, int len, int write, int force, | 1069 | unsigned long start, int len, int write, int force, |
1050 | struct page **pages, struct vm_area_struct **vmas) | 1070 | struct page **pages, struct vm_area_struct **vmas) |
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1119 | foll_flags = FOLL_TOUCH; | 1139 | foll_flags = FOLL_TOUCH; |
1120 | if (pages) | 1140 | if (pages) |
1121 | foll_flags |= FOLL_GET; | 1141 | foll_flags |= FOLL_GET; |
1122 | if (!write && !(vma->vm_flags & VM_LOCKED) && | 1142 | if (!write && use_zero_page(vma)) |
1123 | (!vma->vm_ops || !vma->vm_ops->fault)) | ||
1124 | foll_flags |= FOLL_ANON; | 1143 | foll_flags |= FOLL_ANON; |
1125 | 1144 | ||
1126 | do { | 1145 | do { |
@@ -1132,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1132 | * be processed until returning to user space. | 1151 | * be processed until returning to user space. |
1133 | */ | 1152 | */ |
1134 | if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) | 1153 | if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) |
1135 | return -ENOMEM; | 1154 | return i ? i : -ENOMEM; |
1136 | 1155 | ||
1137 | if (write) | 1156 | if (write) |
1138 | foll_flags |= FOLL_WRITE; | 1157 | foll_flags |= FOLL_WRITE; |
@@ -1678,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1678 | struct page *dirty_page = NULL; | 1697 | struct page *dirty_page = NULL; |
1679 | 1698 | ||
1680 | old_page = vm_normal_page(vma, address, orig_pte); | 1699 | old_page = vm_normal_page(vma, address, orig_pte); |
1681 | if (!old_page) | 1700 | if (!old_page) { |
1701 | /* | ||
1702 | * VM_MIXEDMAP !pfn_valid() case | ||
1703 | * | ||
1704 | * We should not cow pages in a shared writeable mapping. | ||
1705 | * Just mark the pages writable as we can't do any dirty | ||
1706 | * accounting on raw pfn maps. | ||
1707 | */ | ||
1708 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
1709 | (VM_WRITE|VM_SHARED)) | ||
1710 | goto reuse; | ||
1682 | goto gotten; | 1711 | goto gotten; |
1712 | } | ||
1683 | 1713 | ||
1684 | /* | 1714 | /* |
1685 | * Take out anonymous pages first, anonymous shared vmas are | 1715 | * Take out anonymous pages first, anonymous shared vmas are |
@@ -1732,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1732 | } | 1762 | } |
1733 | 1763 | ||
1734 | if (reuse) { | 1764 | if (reuse) { |
1765 | reuse: | ||
1735 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 1766 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
1736 | entry = pte_mkyoung(orig_pte); | 1767 | entry = pte_mkyoung(orig_pte); |
1737 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1768 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -1766,7 +1797,6 @@ gotten: | |||
1766 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1797 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1767 | if (likely(pte_same(*page_table, orig_pte))) { | 1798 | if (likely(pte_same(*page_table, orig_pte))) { |
1768 | if (old_page) { | 1799 | if (old_page) { |
1769 | page_remove_rmap(old_page, vma); | ||
1770 | if (!PageAnon(old_page)) { | 1800 | if (!PageAnon(old_page)) { |
1771 | dec_mm_counter(mm, file_rss); | 1801 | dec_mm_counter(mm, file_rss); |
1772 | inc_mm_counter(mm, anon_rss); | 1802 | inc_mm_counter(mm, anon_rss); |
@@ -1788,6 +1818,32 @@ gotten: | |||
1788 | lru_cache_add_active(new_page); | 1818 | lru_cache_add_active(new_page); |
1789 | page_add_new_anon_rmap(new_page, vma, address); | 1819 | page_add_new_anon_rmap(new_page, vma, address); |
1790 | 1820 | ||
1821 | if (old_page) { | ||
1822 | /* | ||
1823 | * Only after switching the pte to the new page may | ||
1824 | * we remove the mapcount here. Otherwise another | ||
1825 | * process may come and find the rmap count decremented | ||
1826 | * before the pte is switched to the new page, and | ||
1827 | * "reuse" the old page writing into it while our pte | ||
1828 | * here still points into it and can be read by other | ||
1829 | * threads. | ||
1830 | * | ||
1831 | * The critical issue is to order this | ||
1832 | * page_remove_rmap with the ptp_clear_flush above. | ||
1833 | * Those stores are ordered by (if nothing else,) | ||
1834 | * the barrier present in the atomic_add_negative | ||
1835 | * in page_remove_rmap. | ||
1836 | * | ||
1837 | * Then the TLB flush in ptep_clear_flush ensures that | ||
1838 | * no process can access the old page before the | ||
1839 | * decremented mapcount is visible. And the old page | ||
1840 | * cannot be reused until after the decremented | ||
1841 | * mapcount is visible. So transitively, TLBs to | ||
1842 | * old page will be flushed before it can be reused. | ||
1843 | */ | ||
1844 | page_remove_rmap(old_page, vma); | ||
1845 | } | ||
1846 | |||
1791 | /* Free the old page.. */ | 1847 | /* Free the old page.. */ |
1792 | new_page = old_page; | 1848 | new_page = old_page; |
1793 | ret |= VM_FAULT_WRITE; | 1849 | ret |= VM_FAULT_WRITE; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37a5034f63d..c94e58b192c3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
729 | } else { | 729 | } else { |
730 | *policy = pol == &default_policy ? MPOL_DEFAULT : | 730 | *policy = pol == &default_policy ? MPOL_DEFAULT : |
731 | pol->mode; | 731 | pol->mode; |
732 | *policy |= pol->flags; | 732 | /* |
733 | * Internal mempolicy flags must be masked off before exposing | ||
734 | * the policy to userspace. | ||
735 | */ | ||
736 | *policy |= (pol->flags & MPOL_MODE_FLAGS); | ||
733 | } | 737 | } |
734 | 738 | ||
735 | if (vma) { | 739 | if (vma) { |
diff --git a/mm/migrate.c b/mm/migrate.c index 112bcaeaa104..55bd355d170d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | 9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> |
10 | * Hirokazu Takahashi <taka@valinux.co.jp> | 10 | * Hirokazu Takahashi <taka@valinux.co.jp> |
11 | * Dave Hansen <haveblue@us.ibm.com> | 11 | * Dave Hansen <haveblue@us.ibm.com> |
12 | * Christoph Lameter <clameter@sgi.com> | 12 | * Christoph Lameter |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
diff --git a/mm/mprotect.c b/mm/mprotect.c index a5bf31c27375..acfe7c8d72fc 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
47 | if (pte_present(oldpte)) { | 47 | if (pte_present(oldpte)) { |
48 | pte_t ptent; | 48 | pte_t ptent; |
49 | 49 | ||
50 | /* Avoid an SMP race with hardware updated dirty/clean | 50 | ptent = ptep_modify_prot_start(mm, addr, pte); |
51 | * bits by wiping the pte and then setting the new pte | ||
52 | * into place. | ||
53 | */ | ||
54 | ptent = ptep_get_and_clear(mm, addr, pte); | ||
55 | ptent = pte_modify(ptent, newprot); | 51 | ptent = pte_modify(ptent, newprot); |
52 | |||
56 | /* | 53 | /* |
57 | * Avoid taking write faults for pages we know to be | 54 | * Avoid taking write faults for pages we know to be |
58 | * dirty. | 55 | * dirty. |
59 | */ | 56 | */ |
60 | if (dirty_accountable && pte_dirty(ptent)) | 57 | if (dirty_accountable && pte_dirty(ptent)) |
61 | ptent = pte_mkwrite(ptent); | 58 | ptent = pte_mkwrite(ptent); |
62 | set_pte_at(mm, addr, pte, ptent); | 59 | |
60 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
63 | #ifdef CONFIG_MIGRATION | 61 | #ifdef CONFIG_MIGRATION |
64 | } else if (!pte_file(oldpte)) { | 62 | } else if (!pte_file(oldpte)) { |
65 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 63 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f552955a02f..f32fae3121f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2328,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2328 | static void build_zonelist_cache(pg_data_t *pgdat) | 2328 | static void build_zonelist_cache(pg_data_t *pgdat) |
2329 | { | 2329 | { |
2330 | pgdat->node_zonelists[0].zlcache_ptr = NULL; | 2330 | pgdat->node_zonelists[0].zlcache_ptr = NULL; |
2331 | pgdat->node_zonelists[1].zlcache_ptr = NULL; | ||
2332 | } | 2331 | } |
2333 | 2332 | ||
2334 | #endif /* CONFIG_NUMA */ | 2333 | #endif /* CONFIG_NUMA */ |
@@ -5,7 +5,7 @@ | |||
5 | * The allocator synchronizes using per slab locks and only | 5 | * The allocator synchronizes using per slab locks and only |
6 | * uses a centralized lock to manage a pool of partial slabs. | 6 | * uses a centralized lock to manage a pool of partial slabs. |
7 | * | 7 | * |
8 | * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> | 8 | * (C) 2007 SGI, Christoph Lameter |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
@@ -2995,8 +2995,6 @@ void __init kmem_cache_init(void) | |||
2995 | create_kmalloc_cache(&kmalloc_caches[1], | 2995 | create_kmalloc_cache(&kmalloc_caches[1], |
2996 | "kmalloc-96", 96, GFP_KERNEL); | 2996 | "kmalloc-96", 96, GFP_KERNEL); |
2997 | caches++; | 2997 | caches++; |
2998 | } | ||
2999 | if (KMALLOC_MIN_SIZE <= 128) { | ||
3000 | create_kmalloc_cache(&kmalloc_caches[2], | 2998 | create_kmalloc_cache(&kmalloc_caches[2], |
3001 | "kmalloc-192", 192, GFP_KERNEL); | 2999 | "kmalloc-192", 192, GFP_KERNEL); |
3002 | caches++; | 3000 | caches++; |
@@ -3026,6 +3024,16 @@ void __init kmem_cache_init(void) | |||
3026 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) | 3024 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) |
3027 | size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; | 3025 | size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; |
3028 | 3026 | ||
3027 | if (KMALLOC_MIN_SIZE == 128) { | ||
3028 | /* | ||
3029 | * The 192 byte sized cache is not used if the alignment | ||
3030 | * is 128 byte. Redirect kmalloc to use the 256 byte cache | ||
3031 | * instead. | ||
3032 | */ | ||
3033 | for (i = 128 + 8; i <= 192; i += 8) | ||
3034 | size_index[(i - 1) / 8] = 8; | ||
3035 | } | ||
3036 | |||
3029 | slab_state = UP; | 3037 | slab_state = UP; |
3030 | 3038 | ||
3031 | /* Provide the correct kmalloc names now that the caches are up */ | 3039 | /* Provide the correct kmalloc names now that the caches are up */ |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a3..a91b5f8fcaf6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Virtual Memory Map support | 2 | * Virtual Memory Map support |
3 | * | 3 | * |
4 | * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>. | 4 | * (C) 2007 sgi. Christoph Lameter. |
5 | * | 5 | * |
6 | * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, | 6 | * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, |
7 | * virt_to_page, page_address() to be implemented as a base offset | 7 | * virt_to_page, page_address() to be implemented as a base offset |