aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-03-15 03:29:44 -0400
committerIngo Molnar <mingo@elte.hu>2011-03-15 03:29:44 -0400
commit8460b3e5bc64955aeefdd8357b3bf7b5ff79b3f2 (patch)
tree7e5f6d050b72ab08a4497e82a4a103fefb086e80 /mm
parent56396e6823fe9b42fe9cf9403d6ed67756255f70 (diff)
parent521cb40b0c44418a4fd36dc633f575813d59a43d (diff)
Merge commit 'v2.6.38' into x86/mm
Conflicts: arch/x86/mm/numa_64.c Merge reason: Resolve the conflict, update the branch to .38. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c69
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c16
-rw-r--r--mm/migrate.c6
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmscan.c32
10 files changed, 116 insertions, 76 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e62ddb8f24b6..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
650 650
651static inline struct page *alloc_hugepage_vma(int defrag, 651static inline struct page *alloc_hugepage_vma(int defrag,
652 struct vm_area_struct *vma, 652 struct vm_area_struct *vma,
653 unsigned long haddr) 653 unsigned long haddr, int nd)
654{ 654{
655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), 655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
656 HPAGE_PMD_ORDER, vma, haddr); 656 HPAGE_PMD_ORDER, vma, haddr, nd);
657} 657}
658 658
659#ifndef CONFIG_NUMA 659#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
678 if (unlikely(khugepaged_enter(vma))) 678 if (unlikely(khugepaged_enter(vma)))
679 return VM_FAULT_OOM; 679 return VM_FAULT_OOM;
680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
681 vma, haddr); 681 vma, haddr, numa_node_id());
682 if (unlikely(!page)) 682 if (unlikely(!page))
683 goto out; 683 goto out;
684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
799 } 799 }
800 800
801 for (i = 0; i < HPAGE_PMD_NR; i++) { 801 for (i = 0; i < HPAGE_PMD_NR; i++) {
802 pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, 802 pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
803 vma, address); 803 vma, address, page_to_nid(page));
804 if (unlikely(!pages[i] || 804 if (unlikely(!pages[i] ||
805 mem_cgroup_newpage_charge(pages[i], mm, 805 mem_cgroup_newpage_charge(pages[i], mm,
806 GFP_KERNEL))) { 806 GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
902 if (transparent_hugepage_enabled(vma) && 902 if (transparent_hugepage_enabled(vma) &&
903 !transparent_hugepage_debug_cow()) 903 !transparent_hugepage_debug_cow())
904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
905 vma, haddr); 905 vma, haddr, numa_node_id());
906 else 906 else
907 new_page = NULL; 907 new_page = NULL;
908 908
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1745static void collapse_huge_page(struct mm_struct *mm, 1745static void collapse_huge_page(struct mm_struct *mm,
1746 unsigned long address, 1746 unsigned long address,
1747 struct page **hpage, 1747 struct page **hpage,
1748 struct vm_area_struct *vma) 1748 struct vm_area_struct *vma,
1749 int node)
1749{ 1750{
1750 pgd_t *pgd; 1751 pgd_t *pgd;
1751 pud_t *pud; 1752 pud_t *pud;
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
1761#ifndef CONFIG_NUMA 1762#ifndef CONFIG_NUMA
1762 VM_BUG_ON(!*hpage); 1763 VM_BUG_ON(!*hpage);
1763 new_page = *hpage; 1764 new_page = *hpage;
1765 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1766 up_read(&mm->mmap_sem);
1767 return;
1768 }
1764#else 1769#else
1765 VM_BUG_ON(*hpage); 1770 VM_BUG_ON(*hpage);
1766 /* 1771 /*
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1773 * mmap_sem in read mode is good idea also to allow greater 1778 * mmap_sem in read mode is good idea also to allow greater
1774 * scalability. 1779 * scalability.
1775 */ 1780 */
1776 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); 1781 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
1782 node);
1777 if (unlikely(!new_page)) { 1783 if (unlikely(!new_page)) {
1778 up_read(&mm->mmap_sem); 1784 up_read(&mm->mmap_sem);
1779 *hpage = ERR_PTR(-ENOMEM); 1785 *hpage = ERR_PTR(-ENOMEM);
1780 return; 1786 return;
1781 } 1787 }
1782#endif
1783 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1788 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1784 up_read(&mm->mmap_sem); 1789 up_read(&mm->mmap_sem);
1785 put_page(new_page); 1790 put_page(new_page);
1786 return; 1791 return;
1787 } 1792 }
1793#endif
1788 1794
1789 /* after allocating the hugepage upgrade to mmap_sem write mode */ 1795 /* after allocating the hugepage upgrade to mmap_sem write mode */
1790 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
@@ -1811,6 +1817,8 @@ static void collapse_huge_page(struct mm_struct *mm,
1811 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 1817 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
1812 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) 1818 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
1813 goto out; 1819 goto out;
1820 if (is_vma_temporary_stack(vma))
1821 goto out;
1814 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1822 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
1815 1823
1816 pgd = pgd_offset(mm, address); 1824 pgd = pgd_offset(mm, address);
@@ -1917,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1917 struct page *page; 1925 struct page *page;
1918 unsigned long _address; 1926 unsigned long _address;
1919 spinlock_t *ptl; 1927 spinlock_t *ptl;
1928 int node = -1;
1920 1929
1921 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1930 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1922 1931
@@ -1947,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1947 page = vm_normal_page(vma, _address, pteval); 1956 page = vm_normal_page(vma, _address, pteval);
1948 if (unlikely(!page)) 1957 if (unlikely(!page))
1949 goto out_unmap; 1958 goto out_unmap;
1959 /*
1960 * Chose the node of the first page. This could
1961 * be more sophisticated and look at more pages,
1962 * but isn't for now.
1963 */
1964 if (node == -1)
1965 node = page_to_nid(page);
1950 VM_BUG_ON(PageCompound(page)); 1966 VM_BUG_ON(PageCompound(page));
1951 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 1967 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
1952 goto out_unmap; 1968 goto out_unmap;
@@ -1963,7 +1979,7 @@ out_unmap:
1963 pte_unmap_unlock(pte, ptl); 1979 pte_unmap_unlock(pte, ptl);
1964 if (ret) 1980 if (ret)
1965 /* collapse_huge_page will return with the mmap_sem released */ 1981 /* collapse_huge_page will return with the mmap_sem released */
1966 collapse_huge_page(mm, address, hpage, vma); 1982 collapse_huge_page(mm, address, hpage, vma, node);
1967out: 1983out:
1968 return ret; 1984 return ret;
1969} 1985}
@@ -2032,32 +2048,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2032 if ((!(vma->vm_flags & VM_HUGEPAGE) && 2048 if ((!(vma->vm_flags & VM_HUGEPAGE) &&
2033 !khugepaged_always()) || 2049 !khugepaged_always()) ||
2034 (vma->vm_flags & VM_NOHUGEPAGE)) { 2050 (vma->vm_flags & VM_NOHUGEPAGE)) {
2051 skip:
2035 progress++; 2052 progress++;
2036 continue; 2053 continue;
2037 } 2054 }
2038
2039 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 2055 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
2040 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { 2056 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
2041 khugepaged_scan.address = vma->vm_end; 2057 goto skip;
2042 progress++; 2058 if (is_vma_temporary_stack(vma))
2043 continue; 2059 goto skip;
2044 } 2060
2045 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 2061 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
2046 2062
2047 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2063 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2048 hend = vma->vm_end & HPAGE_PMD_MASK; 2064 hend = vma->vm_end & HPAGE_PMD_MASK;
2049 if (hstart >= hend) { 2065 if (hstart >= hend)
2050 progress++; 2066 goto skip;
2051 continue; 2067 if (khugepaged_scan.address > hend)
2052 } 2068 goto skip;
2053 if (khugepaged_scan.address < hstart) 2069 if (khugepaged_scan.address < hstart)
2054 khugepaged_scan.address = hstart; 2070 khugepaged_scan.address = hstart;
2055 if (khugepaged_scan.address > hend) { 2071 VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2056 khugepaged_scan.address = hend + HPAGE_PMD_SIZE;
2057 progress++;
2058 continue;
2059 }
2060 BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2061 2072
2062 while (khugepaged_scan.address < hend) { 2073 while (khugepaged_scan.address < hend) {
2063 int ret; 2074 int ret;
@@ -2086,7 +2097,7 @@ breakouterloop:
2086breakouterloop_mmap_sem: 2097breakouterloop_mmap_sem:
2087 2098
2088 spin_lock(&khugepaged_mm_lock); 2099 spin_lock(&khugepaged_mm_lock);
2089 BUG_ON(khugepaged_scan.mm_slot != mm_slot); 2100 VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
2090 /* 2101 /*
2091 * Release the current mm_slot if this mm is about to die, or 2102 * Release the current mm_slot if this mm is about to die, or
2092 * if we scanned all vmas of this mm. 2103 * if we scanned all vmas of this mm.
@@ -2241,9 +2252,9 @@ static int khugepaged(void *none)
2241 2252
2242 for (;;) { 2253 for (;;) {
2243 mutex_unlock(&khugepaged_mutex); 2254 mutex_unlock(&khugepaged_mutex);
2244 BUG_ON(khugepaged_thread != current); 2255 VM_BUG_ON(khugepaged_thread != current);
2245 khugepaged_loop(); 2256 khugepaged_loop();
2246 BUG_ON(khugepaged_thread != current); 2257 VM_BUG_ON(khugepaged_thread != current);
2247 2258
2248 mutex_lock(&khugepaged_mutex); 2259 mutex_lock(&khugepaged_mutex);
2249 if (!khugepaged_enabled()) 2260 if (!khugepaged_enabled())
diff --git a/mm/memory.c b/mm/memory.c
index 8e8c18324863..5823698c2b71 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
2648 details.last_index = ULONG_MAX; 2648 details.last_index = ULONG_MAX;
2649 details.i_mmap_lock = &mapping->i_mmap_lock; 2649 details.i_mmap_lock = &mapping->i_mmap_lock;
2650 2650
2651 mutex_lock(&mapping->unmap_mutex);
2651 spin_lock(&mapping->i_mmap_lock); 2652 spin_lock(&mapping->i_mmap_lock);
2652 2653
2653 /* Protect against endless unmapping loops */ 2654 /* Protect against endless unmapping loops */
@@ -2664,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
2664 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2665 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2665 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2666 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2666 spin_unlock(&mapping->i_mmap_lock); 2667 spin_unlock(&mapping->i_mmap_lock);
2668 mutex_unlock(&mapping->unmap_mutex);
2667} 2669}
2668EXPORT_SYMBOL(unmap_mapping_range); 2670EXPORT_SYMBOL(unmap_mapping_range);
2669 2671
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 368fc9d23610..b53ec99f1428 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
1524} 1524}
1525 1525
1526/* Return a zonelist indicated by gfp for node representing a mempolicy */ 1526/* Return a zonelist indicated by gfp for node representing a mempolicy */
1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) 1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
1528 int nd)
1528{ 1529{
1529 int nd = numa_node_id();
1530
1531 switch (policy->mode) { 1530 switch (policy->mode) {
1532 case MPOL_PREFERRED: 1531 case MPOL_PREFERRED:
1533 if (!(policy->flags & MPOL_F_LOCAL)) 1532 if (!(policy->flags & MPOL_F_LOCAL))
@@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1679 zl = node_zonelist(interleave_nid(*mpol, vma, addr, 1678 zl = node_zonelist(interleave_nid(*mpol, vma, addr,
1680 huge_page_shift(hstate_vma(vma))), gfp_flags); 1679 huge_page_shift(hstate_vma(vma))), gfp_flags);
1681 } else { 1680 } else {
1682 zl = policy_zonelist(gfp_flags, *mpol); 1681 zl = policy_zonelist(gfp_flags, *mpol, numa_node_id());
1683 if ((*mpol)->mode == MPOL_BIND) 1682 if ((*mpol)->mode == MPOL_BIND)
1684 *nodemask = &(*mpol)->v.nodes; 1683 *nodemask = &(*mpol)->v.nodes;
1685 } 1684 }
@@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1820 */ 1819 */
1821struct page * 1820struct page *
1822alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 1821alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1823 unsigned long addr) 1822 unsigned long addr, int node)
1824{ 1823{
1825 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1824 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1826 struct zonelist *zl; 1825 struct zonelist *zl;
@@ -1830,13 +1829,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1830 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1829 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1831 unsigned nid; 1830 unsigned nid;
1832 1831
1833 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1832 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
1834 mpol_cond_put(pol); 1833 mpol_cond_put(pol);
1835 page = alloc_page_interleave(gfp, order, nid); 1834 page = alloc_page_interleave(gfp, order, nid);
1836 put_mems_allowed(); 1835 put_mems_allowed();
1837 return page; 1836 return page;
1838 } 1837 }
1839 zl = policy_zonelist(gfp, pol); 1838 zl = policy_zonelist(gfp, pol, node);
1840 if (unlikely(mpol_needs_cond_ref(pol))) { 1839 if (unlikely(mpol_needs_cond_ref(pol))) {
1841 /* 1840 /*
1842 * slow path: ref counted shared policy 1841 * slow path: ref counted shared policy
@@ -1892,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1892 page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1891 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1893 else 1892 else
1894 page = __alloc_pages_nodemask(gfp, order, 1893 page = __alloc_pages_nodemask(gfp, order,
1895 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1894 policy_zonelist(gfp, pol, numa_node_id()),
1895 policy_nodemask(gfp, pol));
1896 put_mems_allowed(); 1896 put_mems_allowed();
1897 return page; 1897 return page;
1898} 1898}
diff --git a/mm/migrate.c b/mm/migrate.c
index 766115253807..352de555626c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1287,14 +1287,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1287 return -EPERM; 1287 return -EPERM;
1288 1288
1289 /* Find the mm_struct */ 1289 /* Find the mm_struct */
1290 read_lock(&tasklist_lock); 1290 rcu_read_lock();
1291 task = pid ? find_task_by_vpid(pid) : current; 1291 task = pid ? find_task_by_vpid(pid) : current;
1292 if (!task) { 1292 if (!task) {
1293 read_unlock(&tasklist_lock); 1293 rcu_read_unlock();
1294 return -ESRCH; 1294 return -ESRCH;
1295 } 1295 }
1296 mm = get_task_mm(task); 1296 mm = get_task_mm(task);
1297 read_unlock(&tasklist_lock); 1297 rcu_read_unlock();
1298 1298
1299 if (!mm) 1299 if (!mm)
1300 return -EINVAL; 1300 return -EINVAL;
diff --git a/mm/mremap.c b/mm/mremap.c
index 9925b6391b80..1de98d492ddc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,9 +94,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
94 */ 94 */
95 mapping = vma->vm_file->f_mapping; 95 mapping = vma->vm_file->f_mapping;
96 spin_lock(&mapping->i_mmap_lock); 96 spin_lock(&mapping->i_mmap_lock);
97 if (new_vma->vm_truncate_count && 97 new_vma->vm_truncate_count = 0;
98 new_vma->vm_truncate_count != vma->vm_truncate_count)
99 new_vma->vm_truncate_count = 0;
100 } 98 }
101 99
102 /* 100 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index da0fe32059b3..bd7625676a64 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5371,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5371 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { 5371 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
5372 unsigned long check = pfn + iter; 5372 unsigned long check = pfn + iter;
5373 5373
5374 if (!pfn_valid_within(check)) { 5374 if (!pfn_valid_within(check))
5375 iter++;
5376 continue; 5375 continue;
5377 } 5376
5378 page = pfn_to_page(check); 5377 page = pfn_to_page(check);
5379 if (!page_count(page)) { 5378 if (!page_count(page)) {
5380 if (PageBuddy(page)) 5379 if (PageBuddy(page))
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1c..941bf82e8961 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
497 struct mm_struct *mm = vma->vm_mm; 497 struct mm_struct *mm = vma->vm_mm;
498 int referenced = 0; 498 int referenced = 0;
499 499
500 /*
501 * Don't want to elevate referenced for mlocked page that gets this far,
502 * in order that it progresses to try_to_unmap and is moved to the
503 * unevictable list.
504 */
505 if (vma->vm_flags & VM_LOCKED) {
506 *mapcount = 0; /* break early from loop */
507 *vm_flags |= VM_LOCKED;
508 goto out;
509 }
510
511 /* Pretend the page is referenced if the task has the
512 swap token and is in the middle of a page fault. */
513 if (mm != current->mm && has_swap_token(mm) &&
514 rwsem_is_locked(&mm->mmap_sem))
515 referenced++;
516
517 if (unlikely(PageTransHuge(page))) { 500 if (unlikely(PageTransHuge(page))) {
518 pmd_t *pmd; 501 pmd_t *pmd;
519 502
520 spin_lock(&mm->page_table_lock); 503 spin_lock(&mm->page_table_lock);
504 /*
505 * rmap might return false positives; we must filter
506 * these out using page_check_address_pmd().
507 */
521 pmd = page_check_address_pmd(page, mm, address, 508 pmd = page_check_address_pmd(page, mm, address,
522 PAGE_CHECK_ADDRESS_PMD_FLAG); 509 PAGE_CHECK_ADDRESS_PMD_FLAG);
523 if (pmd && !pmd_trans_splitting(*pmd) && 510 if (!pmd) {
524 pmdp_clear_flush_young_notify(vma, address, pmd)) 511 spin_unlock(&mm->page_table_lock);
512 goto out;
513 }
514
515 if (vma->vm_flags & VM_LOCKED) {
516 spin_unlock(&mm->page_table_lock);
517 *mapcount = 0; /* break early from loop */
518 *vm_flags |= VM_LOCKED;
519 goto out;
520 }
521
522 /* go ahead even if the pmd is pmd_trans_splitting() */
523 if (pmdp_clear_flush_young_notify(vma, address, pmd))
525 referenced++; 524 referenced++;
526 spin_unlock(&mm->page_table_lock); 525 spin_unlock(&mm->page_table_lock);
527 } else { 526 } else {
528 pte_t *pte; 527 pte_t *pte;
529 spinlock_t *ptl; 528 spinlock_t *ptl;
530 529
530 /*
531 * rmap might return false positives; we must filter
532 * these out using page_check_address().
533 */
531 pte = page_check_address(page, mm, address, &ptl, 0); 534 pte = page_check_address(page, mm, address, &ptl, 0);
532 if (!pte) 535 if (!pte)
533 goto out; 536 goto out;
534 537
538 if (vma->vm_flags & VM_LOCKED) {
539 pte_unmap_unlock(pte, ptl);
540 *mapcount = 0; /* break early from loop */
541 *vm_flags |= VM_LOCKED;
542 goto out;
543 }
544
535 if (ptep_clear_flush_young_notify(vma, address, pte)) { 545 if (ptep_clear_flush_young_notify(vma, address, pte)) {
536 /* 546 /*
537 * Don't treat a reference through a sequentially read 547 * Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
546 pte_unmap_unlock(pte, ptl); 556 pte_unmap_unlock(pte, ptl);
547 } 557 }
548 558
559 /* Pretend the page is referenced if the task has the
560 swap token and is in the middle of a page fault. */
561 if (mm != current->mm && has_swap_token(mm) &&
562 rwsem_is_locked(&mm->mmap_sem))
563 referenced++;
564
549 (*mapcount)--; 565 (*mapcount)--;
550 566
551 if (referenced) 567 if (referenced)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 07a458d72fa8..0341c5700e34 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1940,7 +1940,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1940 1940
1941 error = -EINVAL; 1941 error = -EINVAL;
1942 if (S_ISBLK(inode->i_mode)) { 1942 if (S_ISBLK(inode->i_mode)) {
1943 bdev = I_BDEV(inode); 1943 bdev = bdgrab(I_BDEV(inode));
1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, 1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1945 sys_swapon); 1945 sys_swapon);
1946 if (error < 0) { 1946 if (error < 0) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 49feb46e77b8..d64296be00d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -225,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
225 next = start; 225 next = start;
226 while (next <= end && 226 while (next <= end &&
227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
228 mem_cgroup_uncharge_start();
228 for (i = 0; i < pagevec_count(&pvec); i++) { 229 for (i = 0; i < pagevec_count(&pvec); i++) {
229 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
230 pgoff_t page_index = page->index; 231 pgoff_t page_index = page->index;
@@ -247,6 +248,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
247 unlock_page(page); 248 unlock_page(page);
248 } 249 }
249 pagevec_release(&pvec); 250 pagevec_release(&pvec);
251 mem_cgroup_uncharge_end();
250 cond_resched(); 252 cond_resched();
251 } 253 }
252 254
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17497d0cd8b9..6771ea70bfe7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1841,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
1842 return false; 1842 return false;
1843 1843
1844 /* 1844 /* Consider stopping depending on scan and reclaim activity */
1845 * If we failed to reclaim and have scanned the full list, stop. 1845 if (sc->gfp_mask & __GFP_REPEAT) {
1846 * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far 1846 /*
1847 * faster but obviously would be less likely to succeed 1847 * For __GFP_REPEAT allocations, stop reclaiming if the
1848 * allocation. If this is desirable, use GFP_REPEAT to decide 1848 * full LRU list has been scanned and we are still failing
1849 * if both reclaimed and scanned should be checked or just 1849 * to reclaim pages. This full LRU scan is potentially
1850 * reclaimed 1850 * expensive but a __GFP_REPEAT caller really wants to succeed
1851 */ 1851 */
1852 if (!nr_reclaimed && !nr_scanned) 1852 if (!nr_reclaimed && !nr_scanned)
1853 return false; 1853 return false;
1854 } else {
1855 /*
1856 * For non-__GFP_REPEAT allocations which can presumably
1857 * fail without consequence, stop if we failed to reclaim
1858 * any pages from the last SWAP_CLUSTER_MAX number of
1859 * pages that were scanned. This will return to the
1860 * caller faster at the risk reclaim/compaction and
1861 * the resulting allocation attempt fails
1862 */
1863 if (!nr_reclaimed)
1864 return false;
1865 }
1854 1866
1855 /* 1867 /*
1856 * If we have not reclaimed enough pages for compaction and the 1868 * If we have not reclaimed enough pages for compaction and the