aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/memory.c67
-rw-r--r--mm/migrate.c10
-rw-r--r--mm/slab.c5
4 files changed, 78 insertions, 10 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); 442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
443} 443}
444 444
445void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 445int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446 unsigned long size, int flags) 446 unsigned long size, int flags)
447{ 447{
448 int ret; 448 int ret;
449 449
450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
451 if (ret < 0) 451 if (ret < 0)
452 return; 452 return -ENOMEM;
453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
454
455 return 0;
454} 456}
455 457
456void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 458void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/memory.c b/mm/memory.c
index 19e0ae9beecb..d14b251a25a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -999,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
999 goto no_page_table; 999 goto no_page_table;
1000 1000
1001 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 1001 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
1002 if (!ptep)
1003 goto out;
1004 1002
1005 pte = *ptep; 1003 pte = *ptep;
1006 if (!pte_present(pte)) 1004 if (!pte_present(pte))
1007 goto unlock; 1005 goto no_page;
1008 if ((flags & FOLL_WRITE) && !pte_write(pte)) 1006 if ((flags & FOLL_WRITE) && !pte_write(pte))
1009 goto unlock; 1007 goto unlock;
1010 page = vm_normal_page(vma, address, pte); 1008 page = vm_normal_page(vma, address, pte);
1011 if (unlikely(!page)) 1009 if (unlikely(!page))
1012 goto unlock; 1010 goto bad_page;
1013 1011
1014 if (flags & FOLL_GET) 1012 if (flags & FOLL_GET)
1015 get_page(page); 1013 get_page(page);
@@ -1024,6 +1022,15 @@ unlock:
1024out: 1022out:
1025 return page; 1023 return page;
1026 1024
1025bad_page:
1026 pte_unmap_unlock(ptep, ptl);
1027 return ERR_PTR(-EFAULT);
1028
1029no_page:
1030 pte_unmap_unlock(ptep, ptl);
1031 if (!pte_none(pte))
1032 return page;
1033 /* Fall through to ZERO_PAGE handling */
1027no_page_table: 1034no_page_table:
1028 /* 1035 /*
1029 * When core dumping an enormous anonymous area that nobody 1036 * When core dumping an enormous anonymous area that nobody
@@ -1038,6 +1045,26 @@ no_page_table:
1038 return page; 1045 return page;
1039} 1046}
1040 1047
1048/* Can we do the FOLL_ANON optimization? */
1049static inline int use_zero_page(struct vm_area_struct *vma)
1050{
1051 /*
1052 * We don't want to optimize FOLL_ANON for make_pages_present()
1053 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
1054 * we want to get the page from the page tables to make sure
1055 * that we serialize and update with any other user of that
1056 * mapping.
1057 */
1058 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1059 return 0;
1060 /*
1061 * And if we have a fault or a nopfn routine, it's not an
1062 * anonymous region.
1063 */
1064 return !vma->vm_ops ||
1065 (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
1066}
1067
1041int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1068int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1042 unsigned long start, int len, int write, int force, 1069 unsigned long start, int len, int write, int force,
1043 struct page **pages, struct vm_area_struct **vmas) 1070 struct page **pages, struct vm_area_struct **vmas)
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1112 foll_flags = FOLL_TOUCH; 1139 foll_flags = FOLL_TOUCH;
1113 if (pages) 1140 if (pages)
1114 foll_flags |= FOLL_GET; 1141 foll_flags |= FOLL_GET;
1115 if (!write && !(vma->vm_flags & VM_LOCKED) && 1142 if (!write && use_zero_page(vma))
1116 (!vma->vm_ops || !vma->vm_ops->fault))
1117 foll_flags |= FOLL_ANON; 1143 foll_flags |= FOLL_ANON;
1118 1144
1119 do { 1145 do {
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1159 1185
1160 cond_resched(); 1186 cond_resched();
1161 } 1187 }
1188 if (IS_ERR(page))
1189 return i ? i : PTR_ERR(page);
1162 if (pages) { 1190 if (pages) {
1163 pages[i] = page; 1191 pages[i] = page;
1164 1192
@@ -1757,7 +1785,6 @@ gotten:
1757 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1785 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1758 if (likely(pte_same(*page_table, orig_pte))) { 1786 if (likely(pte_same(*page_table, orig_pte))) {
1759 if (old_page) { 1787 if (old_page) {
1760 page_remove_rmap(old_page, vma);
1761 if (!PageAnon(old_page)) { 1788 if (!PageAnon(old_page)) {
1762 dec_mm_counter(mm, file_rss); 1789 dec_mm_counter(mm, file_rss);
1763 inc_mm_counter(mm, anon_rss); 1790 inc_mm_counter(mm, anon_rss);
@@ -1779,6 +1806,32 @@ gotten:
1779 lru_cache_add_active(new_page); 1806 lru_cache_add_active(new_page);
1780 page_add_new_anon_rmap(new_page, vma, address); 1807 page_add_new_anon_rmap(new_page, vma, address);
1781 1808
1809 if (old_page) {
1810 /*
1811 * Only after switching the pte to the new page may
1812 * we remove the mapcount here. Otherwise another
1813 * process may come and find the rmap count decremented
1814 * before the pte is switched to the new page, and
1815 * "reuse" the old page writing into it while our pte
1816 * here still points into it and can be read by other
1817 * threads.
1818 *
1819 * The critical issue is to order this
1820 * page_remove_rmap with the ptp_clear_flush above.
1821 * Those stores are ordered by (if nothing else,)
1822 * the barrier present in the atomic_add_negative
1823 * in page_remove_rmap.
1824 *
1825 * Then the TLB flush in ptep_clear_flush ensures that
1826 * no process can access the old page before the
1827 * decremented mapcount is visible. And the old page
1828 * cannot be reused until after the decremented
1829 * mapcount is visible. So transitively, TLBs to
1830 * old page will be flushed before it can be reused.
1831 */
1832 page_remove_rmap(old_page, vma);
1833 }
1834
1782 /* Free the old page.. */ 1835 /* Free the old page.. */
1783 new_page = old_page; 1836 new_page = old_page;
1784 ret |= VM_FAULT_WRITE; 1837 ret |= VM_FAULT_WRITE;
diff --git a/mm/migrate.c b/mm/migrate.c
index 449d77d409f5..112bcaeaa104 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
865 goto set_status; 865 goto set_status;
866 866
867 page = follow_page(vma, pp->addr, FOLL_GET); 867 page = follow_page(vma, pp->addr, FOLL_GET);
868
869 err = PTR_ERR(page);
870 if (IS_ERR(page))
871 goto set_status;
872
868 err = -ENOENT; 873 err = -ENOENT;
869 if (!page) 874 if (!page)
870 goto set_status; 875 goto set_status;
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
928 goto set_status; 933 goto set_status;
929 934
930 page = follow_page(vma, pm->addr, 0); 935 page = follow_page(vma, pm->addr, 0);
936
937 err = PTR_ERR(page);
938 if (IS_ERR(page))
939 goto set_status;
940
931 err = -ENOENT; 941 err = -ENOENT;
932 /* Use PageReserved to check for zero page */ 942 /* Use PageReserved to check for zero page */
933 if (!page || PageReserved(page)) 943 if (!page || PageReserved(page))
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1b..046607f05f3e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
3263 3263
3264 if (cpuset_zone_allowed_hardwall(zone, flags) && 3264 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3265 cache->nodelists[nid] && 3265 cache->nodelists[nid] &&
3266 cache->nodelists[nid]->free_objects) 3266 cache->nodelists[nid]->free_objects) {
3267 obj = ____cache_alloc_node(cache, 3267 obj = ____cache_alloc_node(cache,
3268 flags | GFP_THISNODE, nid); 3268 flags | GFP_THISNODE, nid);
3269 if (obj)
3270 break;
3271 }
3269 } 3272 }
3270 3273
3271 if (!obj) { 3274 if (!obj) {