diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/compaction.c | 11 | ||||
-rw-r--r-- | mm/huge_memory.c | 5 | ||||
-rw-r--r-- | mm/kmemleak-test.c | 6 | ||||
-rw-r--r-- | mm/kmemleak.c | 13 | ||||
-rw-r--r-- | mm/memblock.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 199 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 18 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 1 | ||||
-rw-r--r-- | mm/truncate.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
12 files changed, 174 insertions, 106 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 3ad483bdf505..e9c0c61f2ddd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -179,7 +179,7 @@ config SPLIT_PTLOCK_CPUS | |||
179 | config COMPACTION | 179 | config COMPACTION |
180 | bool "Allow for memory compaction" | 180 | bool "Allow for memory compaction" |
181 | select MIGRATION | 181 | select MIGRATION |
182 | depends on EXPERIMENTAL && HUGETLB_PAGE && MMU | 182 | depends on MMU |
183 | help | 183 | help |
184 | Allows the compaction of memory for the allocation of huge pages. | 184 | Allows the compaction of memory for the allocation of huge pages. |
185 | 185 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 6d592a021072..8be430b812de 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone, | |||
406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | 406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) |
407 | return COMPACT_CONTINUE; | 407 | return COMPACT_CONTINUE; |
408 | 408 | ||
409 | /* | ||
410 | * order == -1 is expected when compacting via | ||
411 | * /proc/sys/vm/compact_memory | ||
412 | */ | ||
409 | if (cc->order == -1) | 413 | if (cc->order == -1) |
410 | return COMPACT_CONTINUE; | 414 | return COMPACT_CONTINUE; |
411 | 415 | ||
@@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order) | |||
454 | return COMPACT_SKIPPED; | 458 | return COMPACT_SKIPPED; |
455 | 459 | ||
456 | /* | 460 | /* |
461 | * order == -1 is expected when compacting via | ||
462 | * /proc/sys/vm/compact_memory | ||
463 | */ | ||
464 | if (order == -1) | ||
465 | return COMPACT_CONTINUE; | ||
466 | |||
467 | /* | ||
457 | * fragmentation index determines if allocation failures are due to | 468 | * fragmentation index determines if allocation failures are due to |
458 | * low memory or external fragmentation | 469 | * low memory or external fragmentation |
459 | * | 470 | * |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 004c9c2aac78..e187454d82f6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page) | |||
1203 | BUG_ON(!PageDirty(page_tail)); | 1203 | BUG_ON(!PageDirty(page_tail)); |
1204 | BUG_ON(!PageSwapBacked(page_tail)); | 1204 | BUG_ON(!PageSwapBacked(page_tail)); |
1205 | 1205 | ||
1206 | mem_cgroup_split_huge_fixup(page, page_tail); | ||
1207 | |||
1206 | lru_add_page_tail(zone, page, page_tail); | 1208 | lru_add_page_tail(zone, page, page_tail); |
1207 | } | 1209 | } |
1208 | 1210 | ||
@@ -1837,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1837 | spin_lock(ptl); | 1839 | spin_lock(ptl); |
1838 | isolated = __collapse_huge_page_isolate(vma, address, pte); | 1840 | isolated = __collapse_huge_page_isolate(vma, address, pte); |
1839 | spin_unlock(ptl); | 1841 | spin_unlock(ptl); |
1840 | pte_unmap(pte); | ||
1841 | 1842 | ||
1842 | if (unlikely(!isolated)) { | 1843 | if (unlikely(!isolated)) { |
1844 | pte_unmap(pte); | ||
1843 | spin_lock(&mm->page_table_lock); | 1845 | spin_lock(&mm->page_table_lock); |
1844 | BUG_ON(!pmd_none(*pmd)); | 1846 | BUG_ON(!pmd_none(*pmd)); |
1845 | set_pmd_at(mm, address, pmd, _pmd); | 1847 | set_pmd_at(mm, address, pmd, _pmd); |
@@ -1856,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1856 | anon_vma_unlock(vma->anon_vma); | 1858 | anon_vma_unlock(vma->anon_vma); |
1857 | 1859 | ||
1858 | __collapse_huge_page_copy(pte, new_page, vma, address, ptl); | 1860 | __collapse_huge_page_copy(pte, new_page, vma, address, ptl); |
1861 | pte_unmap(pte); | ||
1859 | __SetPageUptodate(new_page); | 1862 | __SetPageUptodate(new_page); |
1860 | pgtable = pmd_pgtable(_pmd); | 1863 | pgtable = pmd_pgtable(_pmd); |
1861 | VM_BUG_ON(page_count(pgtable) != 1); | 1864 | VM_BUG_ON(page_count(pgtable) != 1); |
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c index 177a5169bbde..ff0d9779cec8 100644 --- a/mm/kmemleak-test.c +++ b/mm/kmemleak-test.c | |||
@@ -75,13 +75,11 @@ static int __init kmemleak_test_init(void) | |||
75 | * after the module is removed. | 75 | * after the module is removed. |
76 | */ | 76 | */ |
77 | for (i = 0; i < 10; i++) { | 77 | for (i = 0; i < 10; i++) { |
78 | elem = kmalloc(sizeof(*elem), GFP_KERNEL); | 78 | elem = kzalloc(sizeof(*elem), GFP_KERNEL); |
79 | pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); | 79 | pr_info("kmemleak: kzalloc(sizeof(*elem)) = %p\n", elem); |
80 | if (!elem) | 80 | if (!elem) |
81 | return -ENOMEM; | 81 | return -ENOMEM; |
82 | memset(elem, 0, sizeof(*elem)); | ||
83 | INIT_LIST_HEAD(&elem->list); | 82 | INIT_LIST_HEAD(&elem->list); |
84 | |||
85 | list_add_tail(&elem->list, &test_list); | 83 | list_add_tail(&elem->list, &test_list); |
86 | } | 84 | } |
87 | 85 | ||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index bd9bc214091b..84225f3b7190 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -113,7 +113,9 @@ | |||
113 | #define BYTES_PER_POINTER sizeof(void *) | 113 | #define BYTES_PER_POINTER sizeof(void *) |
114 | 114 | ||
115 | /* GFP bitmask for kmemleak internal allocations */ | 115 | /* GFP bitmask for kmemleak internal allocations */ |
116 | #define GFP_KMEMLEAK_MASK (GFP_KERNEL | GFP_ATOMIC) | 116 | #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ |
117 | __GFP_NORETRY | __GFP_NOMEMALLOC | \ | ||
118 | __GFP_NOWARN) | ||
117 | 119 | ||
118 | /* scanning area inside a memory block */ | 120 | /* scanning area inside a memory block */ |
119 | struct kmemleak_scan_area { | 121 | struct kmemleak_scan_area { |
@@ -511,9 +513,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
511 | struct kmemleak_object *object; | 513 | struct kmemleak_object *object; |
512 | struct prio_tree_node *node; | 514 | struct prio_tree_node *node; |
513 | 515 | ||
514 | object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); | 516 | object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); |
515 | if (!object) { | 517 | if (!object) { |
516 | kmemleak_stop("Cannot allocate a kmemleak_object structure\n"); | 518 | pr_warning("Cannot allocate a kmemleak_object structure\n"); |
519 | kmemleak_disable(); | ||
517 | return NULL; | 520 | return NULL; |
518 | } | 521 | } |
519 | 522 | ||
@@ -734,9 +737,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) | |||
734 | return; | 737 | return; |
735 | } | 738 | } |
736 | 739 | ||
737 | area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK); | 740 | area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); |
738 | if (!area) { | 741 | if (!area) { |
739 | kmemleak_warn("Cannot allocate a scan area\n"); | 742 | pr_warning("Cannot allocate a scan area\n"); |
740 | goto out; | 743 | goto out; |
741 | } | 744 | } |
742 | 745 | ||
diff --git a/mm/memblock.c b/mm/memblock.c index 400dc62697d7..bdba245d8afd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) | |||
683 | 683 | ||
684 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) | 684 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) |
685 | { | 685 | { |
686 | int idx = memblock_search(&memblock.reserved, base); | 686 | int idx = memblock_search(&memblock.memory, base); |
687 | 687 | ||
688 | if (idx == -1) | 688 | if (idx == -1) |
689 | return 0; | 689 | return 0; |
690 | return memblock.reserved.regions[idx].base <= base && | 690 | return memblock.memory.regions[idx].base <= base && |
691 | (memblock.reserved.regions[idx].base + | 691 | (memblock.memory.regions[idx].base + |
692 | memblock.reserved.regions[idx].size) >= (base + size); | 692 | memblock.memory.regions[idx].size) >= (base + size); |
693 | } | 693 | } |
694 | 694 | ||
695 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) | 695 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ab841031436..3878cfe399dc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | |||
600 | } | 600 | } |
601 | 601 | ||
602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
603 | struct page_cgroup *pc, | 603 | bool file, int nr_pages) |
604 | bool charge) | ||
605 | { | 604 | { |
606 | int val = (charge) ? 1 : -1; | ||
607 | |||
608 | preempt_disable(); | 605 | preempt_disable(); |
609 | 606 | ||
610 | if (PageCgroupCache(pc)) | 607 | if (file) |
611 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); | 608 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); |
612 | else | 609 | else |
613 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); | 610 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); |
614 | 611 | ||
615 | if (charge) | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | ||
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
617 | else | 615 | else |
618 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
619 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); | 617 | |
618 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | ||
620 | 619 | ||
621 | preempt_enable(); | 620 | preempt_enable(); |
622 | } | 621 | } |
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | |||
815 | * removed from global LRU. | 814 | * removed from global LRU. |
816 | */ | 815 | */ |
817 | mz = page_cgroup_zoneinfo(pc); | 816 | mz = page_cgroup_zoneinfo(pc); |
818 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 817 | /* huge page split is done under lru_lock. so, we have no races. */ |
818 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); | ||
819 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 819 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
820 | return; | 820 | return; |
821 | VM_BUG_ON(list_empty(&pc->lru)); | 821 | VM_BUG_ON(list_empty(&pc->lru)); |
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
836 | return; | 836 | return; |
837 | 837 | ||
838 | pc = lookup_page_cgroup(page); | 838 | pc = lookup_page_cgroup(page); |
839 | /* | ||
840 | * Used bit is set without atomic ops but after smp_wmb(). | ||
841 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
842 | */ | ||
843 | smp_rmb(); | ||
844 | /* unused or root page is not rotated. */ | 839 | /* unused or root page is not rotated. */ |
845 | if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) | 840 | if (!PageCgroupUsed(pc)) |
841 | return; | ||
842 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | ||
843 | smp_rmb(); | ||
844 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
846 | return; | 845 | return; |
847 | mz = page_cgroup_zoneinfo(pc); | 846 | mz = page_cgroup_zoneinfo(pc); |
848 | list_move(&pc->lru, &mz->lists[lru]); | 847 | list_move(&pc->lru, &mz->lists[lru]); |
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
857 | return; | 856 | return; |
858 | pc = lookup_page_cgroup(page); | 857 | pc = lookup_page_cgroup(page); |
859 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | 858 | VM_BUG_ON(PageCgroupAcctLRU(pc)); |
860 | /* | ||
861 | * Used bit is set without atomic ops but after smp_wmb(). | ||
862 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
863 | */ | ||
864 | smp_rmb(); | ||
865 | if (!PageCgroupUsed(pc)) | 859 | if (!PageCgroupUsed(pc)) |
866 | return; | 860 | return; |
867 | 861 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
862 | smp_rmb(); | ||
868 | mz = page_cgroup_zoneinfo(pc); | 863 | mz = page_cgroup_zoneinfo(pc); |
869 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 864 | /* huge page split is done under lru_lock. so, we have no races. */ |
865 | MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); | ||
870 | SetPageCgroupAcctLRU(pc); | 866 | SetPageCgroupAcctLRU(pc); |
871 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 867 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
872 | return; | 868 | return; |
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
1030 | return NULL; | 1026 | return NULL; |
1031 | 1027 | ||
1032 | pc = lookup_page_cgroup(page); | 1028 | pc = lookup_page_cgroup(page); |
1033 | /* | ||
1034 | * Used bit is set without atomic ops but after smp_wmb(). | ||
1035 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
1036 | */ | ||
1037 | smp_rmb(); | ||
1038 | if (!PageCgroupUsed(pc)) | 1029 | if (!PageCgroupUsed(pc)) |
1039 | return NULL; | 1030 | return NULL; |
1040 | 1031 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
1032 | smp_rmb(); | ||
1041 | mz = page_cgroup_zoneinfo(pc); | 1033 | mz = page_cgroup_zoneinfo(pc); |
1042 | if (!mz) | 1034 | if (!mz) |
1043 | return NULL; | 1035 | return NULL; |
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1615 | if (unlikely(!mem || !PageCgroupUsed(pc))) | 1607 | if (unlikely(!mem || !PageCgroupUsed(pc))) |
1616 | goto out; | 1608 | goto out; |
1617 | /* pc->mem_cgroup is unstable ? */ | 1609 | /* pc->mem_cgroup is unstable ? */ |
1618 | if (unlikely(mem_cgroup_stealed(mem))) { | 1610 | if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { |
1619 | /* take a lock against to access pc->mem_cgroup */ | 1611 | /* take a lock against to access pc->mem_cgroup */ |
1620 | move_lock_page_cgroup(pc, &flags); | 1612 | move_lock_page_cgroup(pc, &flags); |
1621 | need_unlock = true; | 1613 | need_unlock = true; |
@@ -1840,6 +1832,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1840 | if (likely(!ret)) | 1832 | if (likely(!ret)) |
1841 | return CHARGE_OK; | 1833 | return CHARGE_OK; |
1842 | 1834 | ||
1835 | res_counter_uncharge(&mem->res, csize); | ||
1843 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 1836 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
1844 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1837 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1845 | } else | 1838 | } else |
@@ -2084,14 +2077,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2084 | return mem; | 2077 | return mem; |
2085 | } | 2078 | } |
2086 | 2079 | ||
2087 | /* | 2080 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2088 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be | 2081 | struct page_cgroup *pc, |
2089 | * USED state. If already USED, uncharge and return. | 2082 | enum charge_type ctype, |
2090 | */ | 2083 | int page_size) |
2091 | static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
2092 | struct page_cgroup *pc, | ||
2093 | enum charge_type ctype) | ||
2094 | { | 2084 | { |
2085 | int nr_pages = page_size >> PAGE_SHIFT; | ||
2086 | |||
2087 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2088 | if (!mem) | ||
2089 | return; | ||
2090 | |||
2091 | lock_page_cgroup(pc); | ||
2092 | if (unlikely(PageCgroupUsed(pc))) { | ||
2093 | unlock_page_cgroup(pc); | ||
2094 | mem_cgroup_cancel_charge(mem, page_size); | ||
2095 | return; | ||
2096 | } | ||
2097 | /* | ||
2098 | * we don't need page_cgroup_lock about tail pages, becase they are not | ||
2099 | * accessed by any other context at this point. | ||
2100 | */ | ||
2095 | pc->mem_cgroup = mem; | 2101 | pc->mem_cgroup = mem; |
2096 | /* | 2102 | /* |
2097 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | 2103 | * We access a page_cgroup asynchronously without lock_page_cgroup(). |
@@ -2115,43 +2121,57 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2115 | break; | 2121 | break; |
2116 | } | 2122 | } |
2117 | 2123 | ||
2118 | mem_cgroup_charge_statistics(mem, pc, true); | 2124 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); |
2125 | unlock_page_cgroup(pc); | ||
2126 | /* | ||
2127 | * "charge_statistics" updated event counter. Then, check it. | ||
2128 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | ||
2129 | * if they exceeds softlimit. | ||
2130 | */ | ||
2131 | memcg_check_events(mem, pc->page); | ||
2119 | } | 2132 | } |
2120 | 2133 | ||
2121 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2134 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2122 | struct page_cgroup *pc, | ||
2123 | enum charge_type ctype, | ||
2124 | int page_size) | ||
2125 | { | ||
2126 | int i; | ||
2127 | int count = page_size >> PAGE_SHIFT; | ||
2128 | 2135 | ||
2129 | /* try_charge() can return NULL to *memcg, taking care of it. */ | 2136 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ |
2130 | if (!mem) | 2137 | (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) |
2131 | return; | 2138 | /* |
2139 | * Because tail pages are not marked as "used", set it. We're under | ||
2140 | * zone->lru_lock, 'splitting on pmd' and compund_lock. | ||
2141 | */ | ||
2142 | void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | ||
2143 | { | ||
2144 | struct page_cgroup *head_pc = lookup_page_cgroup(head); | ||
2145 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | ||
2146 | unsigned long flags; | ||
2132 | 2147 | ||
2133 | lock_page_cgroup(pc); | 2148 | if (mem_cgroup_disabled()) |
2134 | if (unlikely(PageCgroupUsed(pc))) { | ||
2135 | unlock_page_cgroup(pc); | ||
2136 | mem_cgroup_cancel_charge(mem, page_size); | ||
2137 | return; | 2149 | return; |
2138 | } | ||
2139 | |||
2140 | /* | 2150 | /* |
2141 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2151 | * We have no races with charge/uncharge but will have races with |
2142 | * accessed by any other context at this point. | 2152 | * page state accounting. |
2143 | */ | 2153 | */ |
2144 | for (i = 0; i < count; i++) | 2154 | move_lock_page_cgroup(head_pc, &flags); |
2145 | ____mem_cgroup_commit_charge(mem, pc + i, ctype); | ||
2146 | 2155 | ||
2147 | unlock_page_cgroup(pc); | 2156 | tail_pc->mem_cgroup = head_pc->mem_cgroup; |
2148 | /* | 2157 | smp_wmb(); /* see __commit_charge() */ |
2149 | * "charge_statistics" updated event counter. Then, check it. | 2158 | if (PageCgroupAcctLRU(head_pc)) { |
2150 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2159 | enum lru_list lru; |
2151 | * if they exceeds softlimit. | 2160 | struct mem_cgroup_per_zone *mz; |
2152 | */ | 2161 | |
2153 | memcg_check_events(mem, pc->page); | 2162 | /* |
2163 | * LRU flags cannot be copied because we need to add tail | ||
2164 | *.page to LRU by generic call and our hook will be called. | ||
2165 | * We hold lru_lock, then, reduce counter directly. | ||
2166 | */ | ||
2167 | lru = page_lru(head); | ||
2168 | mz = page_cgroup_zoneinfo(head_pc); | ||
2169 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | ||
2170 | } | ||
2171 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | ||
2172 | move_unlock_page_cgroup(head_pc, &flags); | ||
2154 | } | 2173 | } |
2174 | #endif | ||
2155 | 2175 | ||
2156 | /** | 2176 | /** |
2157 | * __mem_cgroup_move_account - move account of the page | 2177 | * __mem_cgroup_move_account - move account of the page |
@@ -2171,8 +2191,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2171 | */ | 2191 | */ |
2172 | 2192 | ||
2173 | static void __mem_cgroup_move_account(struct page_cgroup *pc, | 2193 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
2174 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2194 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge, |
2195 | int charge_size) | ||
2175 | { | 2196 | { |
2197 | int nr_pages = charge_size >> PAGE_SHIFT; | ||
2198 | |||
2176 | VM_BUG_ON(from == to); | 2199 | VM_BUG_ON(from == to); |
2177 | VM_BUG_ON(PageLRU(pc->page)); | 2200 | VM_BUG_ON(PageLRU(pc->page)); |
2178 | VM_BUG_ON(!page_is_cgroup_locked(pc)); | 2201 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
@@ -2186,14 +2209,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2186 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 2209 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); |
2187 | preempt_enable(); | 2210 | preempt_enable(); |
2188 | } | 2211 | } |
2189 | mem_cgroup_charge_statistics(from, pc, false); | 2212 | mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); |
2190 | if (uncharge) | 2213 | if (uncharge) |
2191 | /* This is not "cancel", but cancel_charge does all we need. */ | 2214 | /* This is not "cancel", but cancel_charge does all we need. */ |
2192 | mem_cgroup_cancel_charge(from, PAGE_SIZE); | 2215 | mem_cgroup_cancel_charge(from, charge_size); |
2193 | 2216 | ||
2194 | /* caller should have done css_get */ | 2217 | /* caller should have done css_get */ |
2195 | pc->mem_cgroup = to; | 2218 | pc->mem_cgroup = to; |
2196 | mem_cgroup_charge_statistics(to, pc, true); | 2219 | mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages); |
2197 | /* | 2220 | /* |
2198 | * We charges against "to" which may not have any tasks. Then, "to" | 2221 | * We charges against "to" which may not have any tasks. Then, "to" |
2199 | * can be under rmdir(). But in current implementation, caller of | 2222 | * can be under rmdir(). But in current implementation, caller of |
@@ -2208,15 +2231,24 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2208 | * __mem_cgroup_move_account() | 2231 | * __mem_cgroup_move_account() |
2209 | */ | 2232 | */ |
2210 | static int mem_cgroup_move_account(struct page_cgroup *pc, | 2233 | static int mem_cgroup_move_account(struct page_cgroup *pc, |
2211 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2234 | struct mem_cgroup *from, struct mem_cgroup *to, |
2235 | bool uncharge, int charge_size) | ||
2212 | { | 2236 | { |
2213 | int ret = -EINVAL; | 2237 | int ret = -EINVAL; |
2214 | unsigned long flags; | 2238 | unsigned long flags; |
2239 | /* | ||
2240 | * The page is isolated from LRU. So, collapse function | ||
2241 | * will not handle this page. But page splitting can happen. | ||
2242 | * Do this check under compound_page_lock(). The caller should | ||
2243 | * hold it. | ||
2244 | */ | ||
2245 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | ||
2246 | return -EBUSY; | ||
2215 | 2247 | ||
2216 | lock_page_cgroup(pc); | 2248 | lock_page_cgroup(pc); |
2217 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | 2249 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { |
2218 | move_lock_page_cgroup(pc, &flags); | 2250 | move_lock_page_cgroup(pc, &flags); |
2219 | __mem_cgroup_move_account(pc, from, to, uncharge); | 2251 | __mem_cgroup_move_account(pc, from, to, uncharge, charge_size); |
2220 | move_unlock_page_cgroup(pc, &flags); | 2252 | move_unlock_page_cgroup(pc, &flags); |
2221 | ret = 0; | 2253 | ret = 0; |
2222 | } | 2254 | } |
@@ -2241,6 +2273,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2241 | struct cgroup *cg = child->css.cgroup; | 2273 | struct cgroup *cg = child->css.cgroup; |
2242 | struct cgroup *pcg = cg->parent; | 2274 | struct cgroup *pcg = cg->parent; |
2243 | struct mem_cgroup *parent; | 2275 | struct mem_cgroup *parent; |
2276 | int page_size = PAGE_SIZE; | ||
2277 | unsigned long flags; | ||
2244 | int ret; | 2278 | int ret; |
2245 | 2279 | ||
2246 | /* Is ROOT ? */ | 2280 | /* Is ROOT ? */ |
@@ -2253,15 +2287,24 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2253 | if (isolate_lru_page(page)) | 2287 | if (isolate_lru_page(page)) |
2254 | goto put; | 2288 | goto put; |
2255 | 2289 | ||
2290 | if (PageTransHuge(page)) | ||
2291 | page_size = HPAGE_SIZE; | ||
2292 | |||
2256 | parent = mem_cgroup_from_cont(pcg); | 2293 | parent = mem_cgroup_from_cont(pcg); |
2257 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, | 2294 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, |
2258 | PAGE_SIZE); | 2295 | &parent, false, page_size); |
2259 | if (ret || !parent) | 2296 | if (ret || !parent) |
2260 | goto put_back; | 2297 | goto put_back; |
2261 | 2298 | ||
2262 | ret = mem_cgroup_move_account(pc, child, parent, true); | 2299 | if (page_size > PAGE_SIZE) |
2300 | flags = compound_lock_irqsave(page); | ||
2301 | |||
2302 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); | ||
2263 | if (ret) | 2303 | if (ret) |
2264 | mem_cgroup_cancel_charge(parent, PAGE_SIZE); | 2304 | mem_cgroup_cancel_charge(parent, page_size); |
2305 | |||
2306 | if (page_size > PAGE_SIZE) | ||
2307 | compound_unlock_irqrestore(page, flags); | ||
2265 | put_back: | 2308 | put_back: |
2266 | putback_lru_page(page); | 2309 | putback_lru_page(page); |
2267 | put: | 2310 | put: |
@@ -2546,7 +2589,6 @@ direct_uncharge: | |||
2546 | static struct mem_cgroup * | 2589 | static struct mem_cgroup * |
2547 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2590 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2548 | { | 2591 | { |
2549 | int i; | ||
2550 | int count; | 2592 | int count; |
2551 | struct page_cgroup *pc; | 2593 | struct page_cgroup *pc; |
2552 | struct mem_cgroup *mem = NULL; | 2594 | struct mem_cgroup *mem = NULL; |
@@ -2596,8 +2638,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2596 | break; | 2638 | break; |
2597 | } | 2639 | } |
2598 | 2640 | ||
2599 | for (i = 0; i < count; i++) | 2641 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); |
2600 | mem_cgroup_charge_statistics(mem, pc + i, false); | ||
2601 | 2642 | ||
2602 | ClearPageCgroupUsed(pc); | 2643 | ClearPageCgroupUsed(pc); |
2603 | /* | 2644 | /* |
@@ -4844,7 +4885,7 @@ retry: | |||
4844 | goto put; | 4885 | goto put; |
4845 | pc = lookup_page_cgroup(page); | 4886 | pc = lookup_page_cgroup(page); |
4846 | if (!mem_cgroup_move_account(pc, | 4887 | if (!mem_cgroup_move_account(pc, |
4847 | mc.from, mc.to, false)) { | 4888 | mc.from, mc.to, false, PAGE_SIZE)) { |
4848 | mc.precharge--; | 4889 | mc.precharge--; |
4849 | /* we uncharge from mc.from later. */ | 4890 | /* we uncharge from mc.from later. */ |
4850 | mc.moved_charge++; | 4891 | mc.moved_charge++; |
diff --git a/mm/migrate.c b/mm/migrate.c index 46fe8cc13d67..9f29a3b7aac2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -888,7 +888,7 @@ out: | |||
888 | * are movable anymore because to has become empty | 888 | * are movable anymore because to has become empty |
889 | * or no retryable pages exist anymore. | 889 | * or no retryable pages exist anymore. |
890 | * Caller should call putback_lru_pages to return pages to the LRU | 890 | * Caller should call putback_lru_pages to return pages to the LRU |
891 | * or free list. | 891 | * or free list only if ret != 0. |
892 | * | 892 | * |
893 | * Return: Number of pages not migrated or error code. | 893 | * Return: Number of pages not migrated or error code. |
894 | */ | 894 | */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 90c1439549fd..a873e61e312e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1088,8 +1088,10 @@ static void drain_pages(unsigned int cpu) | |||
1088 | pset = per_cpu_ptr(zone->pageset, cpu); | 1088 | pset = per_cpu_ptr(zone->pageset, cpu); |
1089 | 1089 | ||
1090 | pcp = &pset->pcp; | 1090 | pcp = &pset->pcp; |
1091 | free_pcppages_bulk(zone, pcp->count, pcp); | 1091 | if (pcp->count) { |
1092 | pcp->count = 0; | 1092 | free_pcppages_bulk(zone, pcp->count, pcp); |
1093 | pcp->count = 0; | ||
1094 | } | ||
1093 | local_irq_restore(flags); | 1095 | local_irq_restore(flags); |
1094 | } | 1096 | } |
1095 | } | 1097 | } |
@@ -2034,6 +2036,14 @@ restart: | |||
2034 | */ | 2036 | */ |
2035 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 2037 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
2036 | 2038 | ||
2039 | /* | ||
2040 | * Find the true preferred zone if the allocation is unconstrained by | ||
2041 | * cpusets. | ||
2042 | */ | ||
2043 | if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) | ||
2044 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | ||
2045 | &preferred_zone); | ||
2046 | |||
2037 | /* This is the last chance, in general, before the goto nopage. */ | 2047 | /* This is the last chance, in general, before the goto nopage. */ |
2038 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2048 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2039 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2049 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2192,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2192 | 2202 | ||
2193 | get_mems_allowed(); | 2203 | get_mems_allowed(); |
2194 | /* The preferred zone is used for statistics later */ | 2204 | /* The preferred zone is used for statistics later */ |
2195 | first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); | 2205 | first_zones_zonelist(zonelist, high_zoneidx, |
2206 | nodemask ? : &cpuset_current_mems_allowed, | ||
2207 | &preferred_zone); | ||
2196 | if (!preferred_zone) { | 2208 | if (!preferred_zone) { |
2197 | put_mems_allowed(); | 2209 | put_mems_allowed(); |
2198 | return NULL; | 2210 | return NULL; |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 0369f5b3ba1b..eb663fb533e0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2010 Linus Torvalds | 6 | * Copyright (C) 2010 Linus Torvalds |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/pagemap.h> | ||
9 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
10 | #include <asm-generic/pgtable.h> | 11 | #include <asm-generic/pgtable.h> |
11 | 12 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 3c2d5ddfa0d4..49feb46e77b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache); | |||
549 | * @inode: inode | 549 | * @inode: inode |
550 | * @newsize: new file size | 550 | * @newsize: new file size |
551 | * | 551 | * |
552 | * truncate_setsize updastes i_size update and performs pagecache | 552 | * truncate_setsize updates i_size and performs pagecache truncation (if |
553 | * truncation (if necessary) for a file size updates. It will be | 553 | * necessary) to @newsize. It will be typically be called from the filesystem's |
554 | * typically be called from the filesystem's setattr function when | 554 | * setattr function when ATTR_SIZE is passed in. |
555 | * ATTR_SIZE is passed in. | ||
556 | * | 555 | * |
557 | * Must be called with inode_mutex held and after all filesystem | 556 | * Must be called with inode_mutex held and before all filesystem specific |
558 | * specific block truncation has been performed. | 557 | * block truncation has been performed. |
559 | */ | 558 | */ |
560 | void truncate_setsize(struct inode *inode, loff_t newsize) | 559 | void truncate_setsize(struct inode *inode, loff_t newsize) |
561 | { | 560 | { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 47a50962ce81..148c6e630df2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
42 | #include <linux/delayacct.h> | 42 | #include <linux/delayacct.h> |
43 | #include <linux/sysctl.h> | 43 | #include <linux/sysctl.h> |
44 | #include <linux/compaction.h> | ||
45 | 44 | ||
46 | #include <asm/tlbflush.h> | 45 | #include <asm/tlbflush.h> |
47 | #include <asm/div64.h> | 46 | #include <asm/div64.h> |
@@ -2084,7 +2083,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2084 | struct zone *preferred_zone; | 2083 | struct zone *preferred_zone; |
2085 | 2084 | ||
2086 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | 2085 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), |
2087 | NULL, &preferred_zone); | 2086 | &cpuset_current_mems_allowed, |
2087 | &preferred_zone); | ||
2088 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | 2088 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); |
2089 | } | 2089 | } |
2090 | } | 2090 | } |