aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/compaction.c11
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/kmemleak-test.c6
-rw-r--r--mm/kmemleak.c13
-rw-r--r--mm/memblock.c8
-rw-r--r--mm/memcontrol.c199
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c18
-rw-r--r--mm/pgtable-generic.c1
-rw-r--r--mm/truncate.c11
-rw-r--r--mm/vmscan.c4
12 files changed, 174 insertions, 106 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 3ad483bdf505..e9c0c61f2ddd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -179,7 +179,7 @@ config SPLIT_PTLOCK_CPUS
179config COMPACTION 179config COMPACTION
180 bool "Allow for memory compaction" 180 bool "Allow for memory compaction"
181 select MIGRATION 181 select MIGRATION
182 depends on EXPERIMENTAL && HUGETLB_PAGE && MMU 182 depends on MMU
183 help 183 help
184 Allows the compaction of memory for the allocation of huge pages. 184 Allows the compaction of memory for the allocation of huge pages.
185 185
diff --git a/mm/compaction.c b/mm/compaction.c
index 6d592a021072..8be430b812de 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone,
406 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) 406 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
407 return COMPACT_CONTINUE; 407 return COMPACT_CONTINUE;
408 408
409 /*
410 * order == -1 is expected when compacting via
411 * /proc/sys/vm/compact_memory
412 */
409 if (cc->order == -1) 413 if (cc->order == -1)
410 return COMPACT_CONTINUE; 414 return COMPACT_CONTINUE;
411 415
@@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order)
454 return COMPACT_SKIPPED; 458 return COMPACT_SKIPPED;
455 459
456 /* 460 /*
461 * order == -1 is expected when compacting via
462 * /proc/sys/vm/compact_memory
463 */
464 if (order == -1)
465 return COMPACT_CONTINUE;
466
467 /*
457 * fragmentation index determines if allocation failures are due to 468 * fragmentation index determines if allocation failures are due to
458 * low memory or external fragmentation 469 * low memory or external fragmentation
459 * 470 *
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 004c9c2aac78..e187454d82f6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page)
1203 BUG_ON(!PageDirty(page_tail)); 1203 BUG_ON(!PageDirty(page_tail));
1204 BUG_ON(!PageSwapBacked(page_tail)); 1204 BUG_ON(!PageSwapBacked(page_tail));
1205 1205
1206 mem_cgroup_split_huge_fixup(page, page_tail);
1207
1206 lru_add_page_tail(zone, page, page_tail); 1208 lru_add_page_tail(zone, page, page_tail);
1207 } 1209 }
1208 1210
@@ -1837,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm,
1837 spin_lock(ptl); 1839 spin_lock(ptl);
1838 isolated = __collapse_huge_page_isolate(vma, address, pte); 1840 isolated = __collapse_huge_page_isolate(vma, address, pte);
1839 spin_unlock(ptl); 1841 spin_unlock(ptl);
1840 pte_unmap(pte);
1841 1842
1842 if (unlikely(!isolated)) { 1843 if (unlikely(!isolated)) {
1844 pte_unmap(pte);
1843 spin_lock(&mm->page_table_lock); 1845 spin_lock(&mm->page_table_lock);
1844 BUG_ON(!pmd_none(*pmd)); 1846 BUG_ON(!pmd_none(*pmd));
1845 set_pmd_at(mm, address, pmd, _pmd); 1847 set_pmd_at(mm, address, pmd, _pmd);
@@ -1856,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1856 anon_vma_unlock(vma->anon_vma); 1858 anon_vma_unlock(vma->anon_vma);
1857 1859
1858 __collapse_huge_page_copy(pte, new_page, vma, address, ptl); 1860 __collapse_huge_page_copy(pte, new_page, vma, address, ptl);
1861 pte_unmap(pte);
1859 __SetPageUptodate(new_page); 1862 __SetPageUptodate(new_page);
1860 pgtable = pmd_pgtable(_pmd); 1863 pgtable = pmd_pgtable(_pmd);
1861 VM_BUG_ON(page_count(pgtable) != 1); 1864 VM_BUG_ON(page_count(pgtable) != 1);
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c
index 177a5169bbde..ff0d9779cec8 100644
--- a/mm/kmemleak-test.c
+++ b/mm/kmemleak-test.c
@@ -75,13 +75,11 @@ static int __init kmemleak_test_init(void)
75 * after the module is removed. 75 * after the module is removed.
76 */ 76 */
77 for (i = 0; i < 10; i++) { 77 for (i = 0; i < 10; i++) {
78 elem = kmalloc(sizeof(*elem), GFP_KERNEL); 78 elem = kzalloc(sizeof(*elem), GFP_KERNEL);
79 pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); 79 pr_info("kmemleak: kzalloc(sizeof(*elem)) = %p\n", elem);
80 if (!elem) 80 if (!elem)
81 return -ENOMEM; 81 return -ENOMEM;
82 memset(elem, 0, sizeof(*elem));
83 INIT_LIST_HEAD(&elem->list); 82 INIT_LIST_HEAD(&elem->list);
84
85 list_add_tail(&elem->list, &test_list); 83 list_add_tail(&elem->list, &test_list);
86 } 84 }
87 85
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index bd9bc214091b..84225f3b7190 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -113,7 +113,9 @@
113#define BYTES_PER_POINTER sizeof(void *) 113#define BYTES_PER_POINTER sizeof(void *)
114 114
115/* GFP bitmask for kmemleak internal allocations */ 115/* GFP bitmask for kmemleak internal allocations */
116#define GFP_KMEMLEAK_MASK (GFP_KERNEL | GFP_ATOMIC) 116#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
117 __GFP_NORETRY | __GFP_NOMEMALLOC | \
118 __GFP_NOWARN)
117 119
118/* scanning area inside a memory block */ 120/* scanning area inside a memory block */
119struct kmemleak_scan_area { 121struct kmemleak_scan_area {
@@ -511,9 +513,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
511 struct kmemleak_object *object; 513 struct kmemleak_object *object;
512 struct prio_tree_node *node; 514 struct prio_tree_node *node;
513 515
514 object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); 516 object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
515 if (!object) { 517 if (!object) {
516 kmemleak_stop("Cannot allocate a kmemleak_object structure\n"); 518 pr_warning("Cannot allocate a kmemleak_object structure\n");
519 kmemleak_disable();
517 return NULL; 520 return NULL;
518 } 521 }
519 522
@@ -734,9 +737,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
734 return; 737 return;
735 } 738 }
736 739
737 area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK); 740 area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
738 if (!area) { 741 if (!area) {
739 kmemleak_warn("Cannot allocate a scan area\n"); 742 pr_warning("Cannot allocate a scan area\n");
740 goto out; 743 goto out;
741 } 744 }
742 745
diff --git a/mm/memblock.c b/mm/memblock.c
index 400dc62697d7..bdba245d8afd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
683 683
684int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 684int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
685{ 685{
686 int idx = memblock_search(&memblock.reserved, base); 686 int idx = memblock_search(&memblock.memory, base);
687 687
688 if (idx == -1) 688 if (idx == -1)
689 return 0; 689 return 0;
690 return memblock.reserved.regions[idx].base <= base && 690 return memblock.memory.regions[idx].base <= base &&
691 (memblock.reserved.regions[idx].base + 691 (memblock.memory.regions[idx].base +
692 memblock.reserved.regions[idx].size) >= (base + size); 692 memblock.memory.regions[idx].size) >= (base + size);
693} 693}
694 694
695int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 695int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ab841031436..3878cfe399dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
600} 600}
601 601
602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
603 struct page_cgroup *pc, 603 bool file, int nr_pages)
604 bool charge)
605{ 604{
606 int val = (charge) ? 1 : -1;
607
608 preempt_disable(); 605 preempt_disable();
609 606
610 if (PageCgroupCache(pc)) 607 if (file)
611 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); 608 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
612 else 609 else
613 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); 610 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
614 611
615 if (charge) 612 /* pagein of a big page is an event. So, ignore page size */
613 if (nr_pages > 0)
616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); 614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
617 else 615 else
618 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); 616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
619 __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); 617
618 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
620 619
621 preempt_enable(); 620 preempt_enable();
622} 621}
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
815 * removed from global LRU. 814 * removed from global LRU.
816 */ 815 */
817 mz = page_cgroup_zoneinfo(pc); 816 mz = page_cgroup_zoneinfo(pc);
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 817 /* huge page split is done under lru_lock. so, we have no races. */
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
819 if (mem_cgroup_is_root(pc->mem_cgroup)) 819 if (mem_cgroup_is_root(pc->mem_cgroup))
820 return; 820 return;
821 VM_BUG_ON(list_empty(&pc->lru)); 821 VM_BUG_ON(list_empty(&pc->lru));
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
836 return; 836 return;
837 837
838 pc = lookup_page_cgroup(page); 838 pc = lookup_page_cgroup(page);
839 /*
840 * Used bit is set without atomic ops but after smp_wmb().
841 * For making pc->mem_cgroup visible, insert smp_rmb() here.
842 */
843 smp_rmb();
844 /* unused or root page is not rotated. */ 839 /* unused or root page is not rotated. */
845 if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) 840 if (!PageCgroupUsed(pc))
841 return;
842 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
843 smp_rmb();
844 if (mem_cgroup_is_root(pc->mem_cgroup))
846 return; 845 return;
847 mz = page_cgroup_zoneinfo(pc); 846 mz = page_cgroup_zoneinfo(pc);
848 list_move(&pc->lru, &mz->lists[lru]); 847 list_move(&pc->lru, &mz->lists[lru]);
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
857 return; 856 return;
858 pc = lookup_page_cgroup(page); 857 pc = lookup_page_cgroup(page);
859 VM_BUG_ON(PageCgroupAcctLRU(pc)); 858 VM_BUG_ON(PageCgroupAcctLRU(pc));
860 /*
861 * Used bit is set without atomic ops but after smp_wmb().
862 * For making pc->mem_cgroup visible, insert smp_rmb() here.
863 */
864 smp_rmb();
865 if (!PageCgroupUsed(pc)) 859 if (!PageCgroupUsed(pc))
866 return; 860 return;
867 861 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
862 smp_rmb();
868 mz = page_cgroup_zoneinfo(pc); 863 mz = page_cgroup_zoneinfo(pc);
869 MEM_CGROUP_ZSTAT(mz, lru) += 1; 864 /* huge page split is done under lru_lock. so, we have no races. */
865 MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
870 SetPageCgroupAcctLRU(pc); 866 SetPageCgroupAcctLRU(pc);
871 if (mem_cgroup_is_root(pc->mem_cgroup)) 867 if (mem_cgroup_is_root(pc->mem_cgroup))
872 return; 868 return;
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1030 return NULL; 1026 return NULL;
1031 1027
1032 pc = lookup_page_cgroup(page); 1028 pc = lookup_page_cgroup(page);
1033 /*
1034 * Used bit is set without atomic ops but after smp_wmb().
1035 * For making pc->mem_cgroup visible, insert smp_rmb() here.
1036 */
1037 smp_rmb();
1038 if (!PageCgroupUsed(pc)) 1029 if (!PageCgroupUsed(pc))
1039 return NULL; 1030 return NULL;
1040 1031 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1032 smp_rmb();
1041 mz = page_cgroup_zoneinfo(pc); 1033 mz = page_cgroup_zoneinfo(pc);
1042 if (!mz) 1034 if (!mz)
1043 return NULL; 1035 return NULL;
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1615 if (unlikely(!mem || !PageCgroupUsed(pc))) 1607 if (unlikely(!mem || !PageCgroupUsed(pc)))
1616 goto out; 1608 goto out;
1617 /* pc->mem_cgroup is unstable ? */ 1609 /* pc->mem_cgroup is unstable ? */
1618 if (unlikely(mem_cgroup_stealed(mem))) { 1610 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
1619 /* take a lock against to access pc->mem_cgroup */ 1611 /* take a lock against to access pc->mem_cgroup */
1620 move_lock_page_cgroup(pc, &flags); 1612 move_lock_page_cgroup(pc, &flags);
1621 need_unlock = true; 1613 need_unlock = true;
@@ -1840,6 +1832,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1840 if (likely(!ret)) 1832 if (likely(!ret))
1841 return CHARGE_OK; 1833 return CHARGE_OK;
1842 1834
1835 res_counter_uncharge(&mem->res, csize);
1843 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 1836 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
1844 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1837 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1845 } else 1838 } else
@@ -2084,14 +2077,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2084 return mem; 2077 return mem;
2085} 2078}
2086 2079
2087/* 2080static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2088 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be 2081 struct page_cgroup *pc,
2089 * USED state. If already USED, uncharge and return. 2082 enum charge_type ctype,
2090 */ 2083 int page_size)
2091static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2092 struct page_cgroup *pc,
2093 enum charge_type ctype)
2094{ 2084{
2085 int nr_pages = page_size >> PAGE_SHIFT;
2086
2087 /* try_charge() can return NULL to *memcg, taking care of it. */
2088 if (!mem)
2089 return;
2090
2091 lock_page_cgroup(pc);
2092 if (unlikely(PageCgroupUsed(pc))) {
2093 unlock_page_cgroup(pc);
2094 mem_cgroup_cancel_charge(mem, page_size);
2095 return;
2096 }
2097 /*
2098 * we don't need page_cgroup_lock about tail pages, becase they are not
2099 * accessed by any other context at this point.
2100 */
2095 pc->mem_cgroup = mem; 2101 pc->mem_cgroup = mem;
2096 /* 2102 /*
2097 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2103 * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2115,43 +2121,57 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2115 break; 2121 break;
2116 } 2122 }
2117 2123
2118 mem_cgroup_charge_statistics(mem, pc, true); 2124 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2125 unlock_page_cgroup(pc);
2126 /*
2127 * "charge_statistics" updated event counter. Then, check it.
2128 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2129 * if they exceeds softlimit.
2130 */
2131 memcg_check_events(mem, pc->page);
2119} 2132}
2120 2133
2121static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, 2134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2122 struct page_cgroup *pc,
2123 enum charge_type ctype,
2124 int page_size)
2125{
2126 int i;
2127 int count = page_size >> PAGE_SHIFT;
2128 2135
2129 /* try_charge() can return NULL to *memcg, taking care of it. */ 2136#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
2130 if (!mem) 2137 (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
2131 return; 2138/*
2139 * Because tail pages are not marked as "used", set it. We're under
2140 * zone->lru_lock, 'splitting on pmd' and compund_lock.
2141 */
2142void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2143{
2144 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2145 struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
2146 unsigned long flags;
2132 2147
2133 lock_page_cgroup(pc); 2148 if (mem_cgroup_disabled())
2134 if (unlikely(PageCgroupUsed(pc))) {
2135 unlock_page_cgroup(pc);
2136 mem_cgroup_cancel_charge(mem, page_size);
2137 return; 2149 return;
2138 }
2139
2140 /* 2150 /*
2141 * we don't need page_cgroup_lock about tail pages, becase they are not 2151 * We have no races with charge/uncharge but will have races with
2142 * accessed by any other context at this point. 2152 * page state accounting.
2143 */ 2153 */
2144 for (i = 0; i < count; i++) 2154 move_lock_page_cgroup(head_pc, &flags);
2145 ____mem_cgroup_commit_charge(mem, pc + i, ctype);
2146 2155
2147 unlock_page_cgroup(pc); 2156 tail_pc->mem_cgroup = head_pc->mem_cgroup;
2148 /* 2157 smp_wmb(); /* see __commit_charge() */
2149 * "charge_statistics" updated event counter. Then, check it. 2158 if (PageCgroupAcctLRU(head_pc)) {
2150 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2159 enum lru_list lru;
2151 * if they exceeds softlimit. 2160 struct mem_cgroup_per_zone *mz;
2152 */ 2161
2153 memcg_check_events(mem, pc->page); 2162 /*
2163 * LRU flags cannot be copied because we need to add tail
2164 *.page to LRU by generic call and our hook will be called.
2165 * We hold lru_lock, then, reduce counter directly.
2166 */
2167 lru = page_lru(head);
2168 mz = page_cgroup_zoneinfo(head_pc);
2169 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
2170 }
2171 tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2172 move_unlock_page_cgroup(head_pc, &flags);
2154} 2173}
2174#endif
2155 2175
2156/** 2176/**
2157 * __mem_cgroup_move_account - move account of the page 2177 * __mem_cgroup_move_account - move account of the page
@@ -2171,8 +2191,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2171 */ 2191 */
2172 2192
2173static void __mem_cgroup_move_account(struct page_cgroup *pc, 2193static void __mem_cgroup_move_account(struct page_cgroup *pc,
2174 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2194 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
2195 int charge_size)
2175{ 2196{
2197 int nr_pages = charge_size >> PAGE_SHIFT;
2198
2176 VM_BUG_ON(from == to); 2199 VM_BUG_ON(from == to);
2177 VM_BUG_ON(PageLRU(pc->page)); 2200 VM_BUG_ON(PageLRU(pc->page));
2178 VM_BUG_ON(!page_is_cgroup_locked(pc)); 2201 VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2186,14 +2209,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2186 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); 2209 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
2187 preempt_enable(); 2210 preempt_enable();
2188 } 2211 }
2189 mem_cgroup_charge_statistics(from, pc, false); 2212 mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
2190 if (uncharge) 2213 if (uncharge)
2191 /* This is not "cancel", but cancel_charge does all we need. */ 2214 /* This is not "cancel", but cancel_charge does all we need. */
2192 mem_cgroup_cancel_charge(from, PAGE_SIZE); 2215 mem_cgroup_cancel_charge(from, charge_size);
2193 2216
2194 /* caller should have done css_get */ 2217 /* caller should have done css_get */
2195 pc->mem_cgroup = to; 2218 pc->mem_cgroup = to;
2196 mem_cgroup_charge_statistics(to, pc, true); 2219 mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
2197 /* 2220 /*
2198 * We charges against "to" which may not have any tasks. Then, "to" 2221 * We charges against "to" which may not have any tasks. Then, "to"
2199 * can be under rmdir(). But in current implementation, caller of 2222 * can be under rmdir(). But in current implementation, caller of
@@ -2208,15 +2231,24 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2208 * __mem_cgroup_move_account() 2231 * __mem_cgroup_move_account()
2209 */ 2232 */
2210static int mem_cgroup_move_account(struct page_cgroup *pc, 2233static int mem_cgroup_move_account(struct page_cgroup *pc,
2211 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2234 struct mem_cgroup *from, struct mem_cgroup *to,
2235 bool uncharge, int charge_size)
2212{ 2236{
2213 int ret = -EINVAL; 2237 int ret = -EINVAL;
2214 unsigned long flags; 2238 unsigned long flags;
2239 /*
2240 * The page is isolated from LRU. So, collapse function
2241 * will not handle this page. But page splitting can happen.
2242 * Do this check under compound_page_lock(). The caller should
2243 * hold it.
2244 */
2245 if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
2246 return -EBUSY;
2215 2247
2216 lock_page_cgroup(pc); 2248 lock_page_cgroup(pc);
2217 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { 2249 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
2218 move_lock_page_cgroup(pc, &flags); 2250 move_lock_page_cgroup(pc, &flags);
2219 __mem_cgroup_move_account(pc, from, to, uncharge); 2251 __mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
2220 move_unlock_page_cgroup(pc, &flags); 2252 move_unlock_page_cgroup(pc, &flags);
2221 ret = 0; 2253 ret = 0;
2222 } 2254 }
@@ -2241,6 +2273,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2241 struct cgroup *cg = child->css.cgroup; 2273 struct cgroup *cg = child->css.cgroup;
2242 struct cgroup *pcg = cg->parent; 2274 struct cgroup *pcg = cg->parent;
2243 struct mem_cgroup *parent; 2275 struct mem_cgroup *parent;
2276 int page_size = PAGE_SIZE;
2277 unsigned long flags;
2244 int ret; 2278 int ret;
2245 2279
2246 /* Is ROOT ? */ 2280 /* Is ROOT ? */
@@ -2253,15 +2287,24 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2253 if (isolate_lru_page(page)) 2287 if (isolate_lru_page(page))
2254 goto put; 2288 goto put;
2255 2289
2290 if (PageTransHuge(page))
2291 page_size = HPAGE_SIZE;
2292
2256 parent = mem_cgroup_from_cont(pcg); 2293 parent = mem_cgroup_from_cont(pcg);
2257 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, 2294 ret = __mem_cgroup_try_charge(NULL, gfp_mask,
2258 PAGE_SIZE); 2295 &parent, false, page_size);
2259 if (ret || !parent) 2296 if (ret || !parent)
2260 goto put_back; 2297 goto put_back;
2261 2298
2262 ret = mem_cgroup_move_account(pc, child, parent, true); 2299 if (page_size > PAGE_SIZE)
2300 flags = compound_lock_irqsave(page);
2301
2302 ret = mem_cgroup_move_account(pc, child, parent, true, page_size);
2263 if (ret) 2303 if (ret)
2264 mem_cgroup_cancel_charge(parent, PAGE_SIZE); 2304 mem_cgroup_cancel_charge(parent, page_size);
2305
2306 if (page_size > PAGE_SIZE)
2307 compound_unlock_irqrestore(page, flags);
2265put_back: 2308put_back:
2266 putback_lru_page(page); 2309 putback_lru_page(page);
2267put: 2310put:
@@ -2546,7 +2589,6 @@ direct_uncharge:
2546static struct mem_cgroup * 2589static struct mem_cgroup *
2547__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2590__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2548{ 2591{
2549 int i;
2550 int count; 2592 int count;
2551 struct page_cgroup *pc; 2593 struct page_cgroup *pc;
2552 struct mem_cgroup *mem = NULL; 2594 struct mem_cgroup *mem = NULL;
@@ -2596,8 +2638,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2596 break; 2638 break;
2597 } 2639 }
2598 2640
2599 for (i = 0; i < count; i++) 2641 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2600 mem_cgroup_charge_statistics(mem, pc + i, false);
2601 2642
2602 ClearPageCgroupUsed(pc); 2643 ClearPageCgroupUsed(pc);
2603 /* 2644 /*
@@ -4844,7 +4885,7 @@ retry:
4844 goto put; 4885 goto put;
4845 pc = lookup_page_cgroup(page); 4886 pc = lookup_page_cgroup(page);
4846 if (!mem_cgroup_move_account(pc, 4887 if (!mem_cgroup_move_account(pc,
4847 mc.from, mc.to, false)) { 4888 mc.from, mc.to, false, PAGE_SIZE)) {
4848 mc.precharge--; 4889 mc.precharge--;
4849 /* we uncharge from mc.from later. */ 4890 /* we uncharge from mc.from later. */
4850 mc.moved_charge++; 4891 mc.moved_charge++;
diff --git a/mm/migrate.c b/mm/migrate.c
index 46fe8cc13d67..9f29a3b7aac2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -888,7 +888,7 @@ out:
888 * are movable anymore because to has become empty 888 * are movable anymore because to has become empty
889 * or no retryable pages exist anymore. 889 * or no retryable pages exist anymore.
890 * Caller should call putback_lru_pages to return pages to the LRU 890 * Caller should call putback_lru_pages to return pages to the LRU
891 * or free list. 891 * or free list only if ret != 0.
892 * 892 *
893 * Return: Number of pages not migrated or error code. 893 * Return: Number of pages not migrated or error code.
894 */ 894 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 90c1439549fd..a873e61e312e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1088,8 +1088,10 @@ static void drain_pages(unsigned int cpu)
1088 pset = per_cpu_ptr(zone->pageset, cpu); 1088 pset = per_cpu_ptr(zone->pageset, cpu);
1089 1089
1090 pcp = &pset->pcp; 1090 pcp = &pset->pcp;
1091 free_pcppages_bulk(zone, pcp->count, pcp); 1091 if (pcp->count) {
1092 pcp->count = 0; 1092 free_pcppages_bulk(zone, pcp->count, pcp);
1093 pcp->count = 0;
1094 }
1093 local_irq_restore(flags); 1095 local_irq_restore(flags);
1094 } 1096 }
1095} 1097}
@@ -2034,6 +2036,14 @@ restart:
2034 */ 2036 */
2035 alloc_flags = gfp_to_alloc_flags(gfp_mask); 2037 alloc_flags = gfp_to_alloc_flags(gfp_mask);
2036 2038
2039 /*
2040 * Find the true preferred zone if the allocation is unconstrained by
2041 * cpusets.
2042 */
2043 if (!(alloc_flags & ALLOC_CPUSET) && !nodemask)
2044 first_zones_zonelist(zonelist, high_zoneidx, NULL,
2045 &preferred_zone);
2046
2037 /* This is the last chance, in general, before the goto nopage. */ 2047 /* This is the last chance, in general, before the goto nopage. */
2038 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 2048 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2039 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 2049 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2192,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2192 2202
2193 get_mems_allowed(); 2203 get_mems_allowed();
2194 /* The preferred zone is used for statistics later */ 2204 /* The preferred zone is used for statistics later */
2195 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); 2205 first_zones_zonelist(zonelist, high_zoneidx,
2206 nodemask ? : &cpuset_current_mems_allowed,
2207 &preferred_zone);
2196 if (!preferred_zone) { 2208 if (!preferred_zone) {
2197 put_mems_allowed(); 2209 put_mems_allowed();
2198 return NULL; 2210 return NULL;
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 0369f5b3ba1b..eb663fb533e0 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -6,6 +6,7 @@
6 * Copyright (C) 2010 Linus Torvalds 6 * Copyright (C) 2010 Linus Torvalds
7 */ 7 */
8 8
9#include <linux/pagemap.h>
9#include <asm/tlb.h> 10#include <asm/tlb.h>
10#include <asm-generic/pgtable.h> 11#include <asm-generic/pgtable.h>
11 12
diff --git a/mm/truncate.c b/mm/truncate.c
index 3c2d5ddfa0d4..49feb46e77b8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache);
549 * @inode: inode 549 * @inode: inode
550 * @newsize: new file size 550 * @newsize: new file size
551 * 551 *
552 * truncate_setsize updastes i_size update and performs pagecache 552 * truncate_setsize updates i_size and performs pagecache truncation (if
553 * truncation (if necessary) for a file size updates. It will be 553 * necessary) to @newsize. It will be typically be called from the filesystem's
554 * typically be called from the filesystem's setattr function when 554 * setattr function when ATTR_SIZE is passed in.
555 * ATTR_SIZE is passed in.
556 * 555 *
557 * Must be called with inode_mutex held and after all filesystem 556 * Must be called with inode_mutex held and before all filesystem specific
558 * specific block truncation has been performed. 557 * block truncation has been performed.
559 */ 558 */
560void truncate_setsize(struct inode *inode, loff_t newsize) 559void truncate_setsize(struct inode *inode, loff_t newsize)
561{ 560{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 47a50962ce81..148c6e630df2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,7 +41,6 @@
41#include <linux/memcontrol.h> 41#include <linux/memcontrol.h>
42#include <linux/delayacct.h> 42#include <linux/delayacct.h>
43#include <linux/sysctl.h> 43#include <linux/sysctl.h>
44#include <linux/compaction.h>
45 44
46#include <asm/tlbflush.h> 45#include <asm/tlbflush.h>
47#include <asm/div64.h> 46#include <asm/div64.h>
@@ -2084,7 +2083,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2084 struct zone *preferred_zone; 2083 struct zone *preferred_zone;
2085 2084
2086 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), 2085 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
2087 NULL, &preferred_zone); 2086 &cpuset_current_mems_allowed,
2087 &preferred_zone);
2088 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); 2088 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
2089 } 2089 }
2090 } 2090 }