diff options
author | Sascha Hauer <s.hauer@pengutronix.de> | 2011-02-11 02:32:18 -0500 |
---|---|---|
committer | Sascha Hauer <s.hauer@pengutronix.de> | 2011-02-11 02:33:14 -0500 |
commit | f19693a17c6705e197eb24d4618060eaac1b535c (patch) | |
tree | fc39dc23297c0e6be730cb0dfd74a34d9c0b8bfd /mm | |
parent | 23b120cdfae4f5c29da69de750d545bad719ead4 (diff) | |
parent | 100b33c8bd8a3235fd0b7948338d6cbb3db3c63d (diff) |
Merge commit 'v2.6.38-rc4' into imx-for-2.6.39
Conflicts:
arch/arm/mach-mxs/clock-mx28.c
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/compaction.c | 11 | ||||
-rw-r--r-- | mm/huge_memory.c | 12 | ||||
-rw-r--r-- | mm/kmemleak-test.c | 6 | ||||
-rw-r--r-- | mm/kmemleak.c | 13 | ||||
-rw-r--r-- | mm/memblock.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 266 | ||||
-rw-r--r-- | mm/memory-failure.c | 94 | ||||
-rw-r--r-- | mm/migrate.c | 9 | ||||
-rw-r--r-- | mm/mlock.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 18 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 1 | ||||
-rw-r--r-- | mm/truncate.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
14 files changed, 312 insertions, 150 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 3ad483bdf505..e9c0c61f2ddd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -179,7 +179,7 @@ config SPLIT_PTLOCK_CPUS | |||
179 | config COMPACTION | 179 | config COMPACTION |
180 | bool "Allow for memory compaction" | 180 | bool "Allow for memory compaction" |
181 | select MIGRATION | 181 | select MIGRATION |
182 | depends on EXPERIMENTAL && HUGETLB_PAGE && MMU | 182 | depends on MMU |
183 | help | 183 | help |
184 | Allows the compaction of memory for the allocation of huge pages. | 184 | Allows the compaction of memory for the allocation of huge pages. |
185 | 185 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 6d592a021072..8be430b812de 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone, | |||
406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | 406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) |
407 | return COMPACT_CONTINUE; | 407 | return COMPACT_CONTINUE; |
408 | 408 | ||
409 | /* | ||
410 | * order == -1 is expected when compacting via | ||
411 | * /proc/sys/vm/compact_memory | ||
412 | */ | ||
409 | if (cc->order == -1) | 413 | if (cc->order == -1) |
410 | return COMPACT_CONTINUE; | 414 | return COMPACT_CONTINUE; |
411 | 415 | ||
@@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order) | |||
454 | return COMPACT_SKIPPED; | 458 | return COMPACT_SKIPPED; |
455 | 459 | ||
456 | /* | 460 | /* |
461 | * order == -1 is expected when compacting via | ||
462 | * /proc/sys/vm/compact_memory | ||
463 | */ | ||
464 | if (order == -1) | ||
465 | return COMPACT_CONTINUE; | ||
466 | |||
467 | /* | ||
457 | * fragmentation index determines if allocation failures are due to | 468 | * fragmentation index determines if allocation failures are due to |
458 | * low memory or external fragmentation | 469 | * low memory or external fragmentation |
459 | * | 470 | * |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 004c9c2aac78..b6c1ce3c53b5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1162,7 +1162,12 @@ static void __split_huge_page_refcount(struct page *page) | |||
1162 | /* after clearing PageTail the gup refcount can be released */ | 1162 | /* after clearing PageTail the gup refcount can be released */ |
1163 | smp_mb(); | 1163 | smp_mb(); |
1164 | 1164 | ||
1165 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 1165 | /* |
1166 | * retain hwpoison flag of the poisoned tail page: | ||
1167 | * fix for the unsuitable process killed on Guest Machine(KVM) | ||
1168 | * by the memory-failure. | ||
1169 | */ | ||
1170 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; | ||
1166 | page_tail->flags |= (page->flags & | 1171 | page_tail->flags |= (page->flags & |
1167 | ((1L << PG_referenced) | | 1172 | ((1L << PG_referenced) | |
1168 | (1L << PG_swapbacked) | | 1173 | (1L << PG_swapbacked) | |
@@ -1203,6 +1208,8 @@ static void __split_huge_page_refcount(struct page *page) | |||
1203 | BUG_ON(!PageDirty(page_tail)); | 1208 | BUG_ON(!PageDirty(page_tail)); |
1204 | BUG_ON(!PageSwapBacked(page_tail)); | 1209 | BUG_ON(!PageSwapBacked(page_tail)); |
1205 | 1210 | ||
1211 | mem_cgroup_split_huge_fixup(page, page_tail); | ||
1212 | |||
1206 | lru_add_page_tail(zone, page, page_tail); | 1213 | lru_add_page_tail(zone, page, page_tail); |
1207 | } | 1214 | } |
1208 | 1215 | ||
@@ -1837,9 +1844,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1837 | spin_lock(ptl); | 1844 | spin_lock(ptl); |
1838 | isolated = __collapse_huge_page_isolate(vma, address, pte); | 1845 | isolated = __collapse_huge_page_isolate(vma, address, pte); |
1839 | spin_unlock(ptl); | 1846 | spin_unlock(ptl); |
1840 | pte_unmap(pte); | ||
1841 | 1847 | ||
1842 | if (unlikely(!isolated)) { | 1848 | if (unlikely(!isolated)) { |
1849 | pte_unmap(pte); | ||
1843 | spin_lock(&mm->page_table_lock); | 1850 | spin_lock(&mm->page_table_lock); |
1844 | BUG_ON(!pmd_none(*pmd)); | 1851 | BUG_ON(!pmd_none(*pmd)); |
1845 | set_pmd_at(mm, address, pmd, _pmd); | 1852 | set_pmd_at(mm, address, pmd, _pmd); |
@@ -1856,6 +1863,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1856 | anon_vma_unlock(vma->anon_vma); | 1863 | anon_vma_unlock(vma->anon_vma); |
1857 | 1864 | ||
1858 | __collapse_huge_page_copy(pte, new_page, vma, address, ptl); | 1865 | __collapse_huge_page_copy(pte, new_page, vma, address, ptl); |
1866 | pte_unmap(pte); | ||
1859 | __SetPageUptodate(new_page); | 1867 | __SetPageUptodate(new_page); |
1860 | pgtable = pmd_pgtable(_pmd); | 1868 | pgtable = pmd_pgtable(_pmd); |
1861 | VM_BUG_ON(page_count(pgtable) != 1); | 1869 | VM_BUG_ON(page_count(pgtable) != 1); |
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c index 177a5169bbde..ff0d9779cec8 100644 --- a/mm/kmemleak-test.c +++ b/mm/kmemleak-test.c | |||
@@ -75,13 +75,11 @@ static int __init kmemleak_test_init(void) | |||
75 | * after the module is removed. | 75 | * after the module is removed. |
76 | */ | 76 | */ |
77 | for (i = 0; i < 10; i++) { | 77 | for (i = 0; i < 10; i++) { |
78 | elem = kmalloc(sizeof(*elem), GFP_KERNEL); | 78 | elem = kzalloc(sizeof(*elem), GFP_KERNEL); |
79 | pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); | 79 | pr_info("kmemleak: kzalloc(sizeof(*elem)) = %p\n", elem); |
80 | if (!elem) | 80 | if (!elem) |
81 | return -ENOMEM; | 81 | return -ENOMEM; |
82 | memset(elem, 0, sizeof(*elem)); | ||
83 | INIT_LIST_HEAD(&elem->list); | 82 | INIT_LIST_HEAD(&elem->list); |
84 | |||
85 | list_add_tail(&elem->list, &test_list); | 83 | list_add_tail(&elem->list, &test_list); |
86 | } | 84 | } |
87 | 85 | ||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index bd9bc214091b..84225f3b7190 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -113,7 +113,9 @@ | |||
113 | #define BYTES_PER_POINTER sizeof(void *) | 113 | #define BYTES_PER_POINTER sizeof(void *) |
114 | 114 | ||
115 | /* GFP bitmask for kmemleak internal allocations */ | 115 | /* GFP bitmask for kmemleak internal allocations */ |
116 | #define GFP_KMEMLEAK_MASK (GFP_KERNEL | GFP_ATOMIC) | 116 | #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ |
117 | __GFP_NORETRY | __GFP_NOMEMALLOC | \ | ||
118 | __GFP_NOWARN) | ||
117 | 119 | ||
118 | /* scanning area inside a memory block */ | 120 | /* scanning area inside a memory block */ |
119 | struct kmemleak_scan_area { | 121 | struct kmemleak_scan_area { |
@@ -511,9 +513,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
511 | struct kmemleak_object *object; | 513 | struct kmemleak_object *object; |
512 | struct prio_tree_node *node; | 514 | struct prio_tree_node *node; |
513 | 515 | ||
514 | object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); | 516 | object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); |
515 | if (!object) { | 517 | if (!object) { |
516 | kmemleak_stop("Cannot allocate a kmemleak_object structure\n"); | 518 | pr_warning("Cannot allocate a kmemleak_object structure\n"); |
519 | kmemleak_disable(); | ||
517 | return NULL; | 520 | return NULL; |
518 | } | 521 | } |
519 | 522 | ||
@@ -734,9 +737,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) | |||
734 | return; | 737 | return; |
735 | } | 738 | } |
736 | 739 | ||
737 | area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK); | 740 | area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); |
738 | if (!area) { | 741 | if (!area) { |
739 | kmemleak_warn("Cannot allocate a scan area\n"); | 742 | pr_warning("Cannot allocate a scan area\n"); |
740 | goto out; | 743 | goto out; |
741 | } | 744 | } |
742 | 745 | ||
diff --git a/mm/memblock.c b/mm/memblock.c index 400dc62697d7..bdba245d8afd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) | |||
683 | 683 | ||
684 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) | 684 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) |
685 | { | 685 | { |
686 | int idx = memblock_search(&memblock.reserved, base); | 686 | int idx = memblock_search(&memblock.memory, base); |
687 | 687 | ||
688 | if (idx == -1) | 688 | if (idx == -1) |
689 | return 0; | 689 | return 0; |
690 | return memblock.reserved.regions[idx].base <= base && | 690 | return memblock.memory.regions[idx].base <= base && |
691 | (memblock.reserved.regions[idx].base + | 691 | (memblock.memory.regions[idx].base + |
692 | memblock.reserved.regions[idx].size) >= (base + size); | 692 | memblock.memory.regions[idx].size) >= (base + size); |
693 | } | 693 | } |
694 | 694 | ||
695 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) | 695 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ab841031436..da53a252b259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -600,23 +600,24 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | |||
600 | } | 600 | } |
601 | 601 | ||
602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
603 | struct page_cgroup *pc, | 603 | bool file, int nr_pages) |
604 | bool charge) | ||
605 | { | 604 | { |
606 | int val = (charge) ? 1 : -1; | ||
607 | |||
608 | preempt_disable(); | 605 | preempt_disable(); |
609 | 606 | ||
610 | if (PageCgroupCache(pc)) | 607 | if (file) |
611 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); | 608 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); |
612 | else | 609 | else |
613 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); | 610 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); |
614 | 611 | ||
615 | if (charge) | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | ||
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
617 | else | 615 | else { |
618 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
619 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); | 617 | nr_pages = -nr_pages; /* for event */ |
618 | } | ||
619 | |||
620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | ||
620 | 621 | ||
621 | preempt_enable(); | 622 | preempt_enable(); |
622 | } | 623 | } |
@@ -815,7 +816,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | |||
815 | * removed from global LRU. | 816 | * removed from global LRU. |
816 | */ | 817 | */ |
817 | mz = page_cgroup_zoneinfo(pc); | 818 | mz = page_cgroup_zoneinfo(pc); |
818 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 819 | /* huge page split is done under lru_lock. so, we have no races. */ |
820 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); | ||
819 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 821 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
820 | return; | 822 | return; |
821 | VM_BUG_ON(list_empty(&pc->lru)); | 823 | VM_BUG_ON(list_empty(&pc->lru)); |
@@ -836,13 +838,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
836 | return; | 838 | return; |
837 | 839 | ||
838 | pc = lookup_page_cgroup(page); | 840 | pc = lookup_page_cgroup(page); |
839 | /* | ||
840 | * Used bit is set without atomic ops but after smp_wmb(). | ||
841 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
842 | */ | ||
843 | smp_rmb(); | ||
844 | /* unused or root page is not rotated. */ | 841 | /* unused or root page is not rotated. */ |
845 | if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) | 842 | if (!PageCgroupUsed(pc)) |
843 | return; | ||
844 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | ||
845 | smp_rmb(); | ||
846 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
846 | return; | 847 | return; |
847 | mz = page_cgroup_zoneinfo(pc); | 848 | mz = page_cgroup_zoneinfo(pc); |
848 | list_move(&pc->lru, &mz->lists[lru]); | 849 | list_move(&pc->lru, &mz->lists[lru]); |
@@ -857,16 +858,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
857 | return; | 858 | return; |
858 | pc = lookup_page_cgroup(page); | 859 | pc = lookup_page_cgroup(page); |
859 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | 860 | VM_BUG_ON(PageCgroupAcctLRU(pc)); |
860 | /* | ||
861 | * Used bit is set without atomic ops but after smp_wmb(). | ||
862 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
863 | */ | ||
864 | smp_rmb(); | ||
865 | if (!PageCgroupUsed(pc)) | 861 | if (!PageCgroupUsed(pc)) |
866 | return; | 862 | return; |
867 | 863 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
864 | smp_rmb(); | ||
868 | mz = page_cgroup_zoneinfo(pc); | 865 | mz = page_cgroup_zoneinfo(pc); |
869 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 866 | /* huge page split is done under lru_lock. so, we have no races. */ |
867 | MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); | ||
870 | SetPageCgroupAcctLRU(pc); | 868 | SetPageCgroupAcctLRU(pc); |
871 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 869 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
872 | return; | 870 | return; |
@@ -1030,14 +1028,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
1030 | return NULL; | 1028 | return NULL; |
1031 | 1029 | ||
1032 | pc = lookup_page_cgroup(page); | 1030 | pc = lookup_page_cgroup(page); |
1033 | /* | ||
1034 | * Used bit is set without atomic ops but after smp_wmb(). | ||
1035 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
1036 | */ | ||
1037 | smp_rmb(); | ||
1038 | if (!PageCgroupUsed(pc)) | 1031 | if (!PageCgroupUsed(pc)) |
1039 | return NULL; | 1032 | return NULL; |
1040 | 1033 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
1034 | smp_rmb(); | ||
1041 | mz = page_cgroup_zoneinfo(pc); | 1035 | mz = page_cgroup_zoneinfo(pc); |
1042 | if (!mz) | 1036 | if (!mz) |
1043 | return NULL; | 1037 | return NULL; |
@@ -1119,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
1119 | return false; | 1113 | return false; |
1120 | } | 1114 | } |
1121 | 1115 | ||
1116 | /** | ||
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | ||
1118 | * @mem: memory cgroup to check | ||
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | ||
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | ||
1122 | * whether this would exceed the limit. | ||
1123 | */ | ||
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | ||
1125 | { | ||
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | ||
1127 | return false; | ||
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | ||
1129 | return false; | ||
1130 | return true; | ||
1131 | } | ||
1132 | |||
1122 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1123 | { | 1134 | { |
1124 | struct cgroup *cgrp = memcg->css.cgroup; | 1135 | struct cgroup *cgrp = memcg->css.cgroup; |
@@ -1615,7 +1626,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1615 | if (unlikely(!mem || !PageCgroupUsed(pc))) | 1626 | if (unlikely(!mem || !PageCgroupUsed(pc))) |
1616 | goto out; | 1627 | goto out; |
1617 | /* pc->mem_cgroup is unstable ? */ | 1628 | /* pc->mem_cgroup is unstable ? */ |
1618 | if (unlikely(mem_cgroup_stealed(mem))) { | 1629 | if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { |
1619 | /* take a lock against to access pc->mem_cgroup */ | 1630 | /* take a lock against to access pc->mem_cgroup */ |
1620 | move_lock_page_cgroup(pc, &flags); | 1631 | move_lock_page_cgroup(pc, &flags); |
1621 | need_unlock = true; | 1632 | need_unlock = true; |
@@ -1840,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1840 | if (likely(!ret)) | 1851 | if (likely(!ret)) |
1841 | return CHARGE_OK; | 1852 | return CHARGE_OK; |
1842 | 1853 | ||
1854 | res_counter_uncharge(&mem->res, csize); | ||
1843 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 1855 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
1844 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1856 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1845 | } else | 1857 | } else |
1846 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1847 | 1859 | /* | |
1848 | if (csize > PAGE_SIZE) /* change csize and retry */ | 1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | ||
1862 | * (PAGE_SIZE). | ||
1863 | * | ||
1864 | * Never reclaim on behalf of optional batching, retry with a | ||
1865 | * single page instead. | ||
1866 | */ | ||
1867 | if (csize == CHARGE_SIZE) | ||
1849 | return CHARGE_RETRY; | 1868 | return CHARGE_RETRY; |
1850 | 1869 | ||
1851 | if (!(gfp_mask & __GFP_WAIT)) | 1870 | if (!(gfp_mask & __GFP_WAIT)) |
1852 | return CHARGE_WOULDBLOCK; | 1871 | return CHARGE_WOULDBLOCK; |
1853 | 1872 | ||
1854 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1855 | gfp_mask, flags); | 1874 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | ||
1876 | return CHARGE_RETRY; | ||
1856 | /* | 1877 | /* |
1857 | * try_to_free_mem_cgroup_pages() might not give us a full | 1878 | * Even though the limit is exceeded at this point, reclaim |
1858 | * picture of reclaim. Some pages are reclaimed and might be | 1879 | * may have been able to free some pages. Retry the charge |
1859 | * moved to swap cache or just unmapped from the cgroup. | 1880 | * before killing the task. |
1860 | * Check the limit again to see if the reclaim reduced the | 1881 | * |
1861 | * current usage of the cgroup before giving up | 1882 | * Only for regular pages, though: huge pages are rather |
1883 | * unlikely to succeed so close to the limit, and we fall back | ||
1884 | * to regular pages anyway in case of failure. | ||
1862 | */ | 1885 | */ |
1863 | if (ret || mem_cgroup_check_under_limit(mem_over_limit)) | 1886 | if (csize == PAGE_SIZE && ret) |
1864 | return CHARGE_RETRY; | 1887 | return CHARGE_RETRY; |
1865 | 1888 | ||
1866 | /* | 1889 | /* |
@@ -2084,14 +2107,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2084 | return mem; | 2107 | return mem; |
2085 | } | 2108 | } |
2086 | 2109 | ||
2087 | /* | 2110 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2088 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be | 2111 | struct page_cgroup *pc, |
2089 | * USED state. If already USED, uncharge and return. | 2112 | enum charge_type ctype, |
2090 | */ | 2113 | int page_size) |
2091 | static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
2092 | struct page_cgroup *pc, | ||
2093 | enum charge_type ctype) | ||
2094 | { | 2114 | { |
2115 | int nr_pages = page_size >> PAGE_SHIFT; | ||
2116 | |||
2117 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2118 | if (!mem) | ||
2119 | return; | ||
2120 | |||
2121 | lock_page_cgroup(pc); | ||
2122 | if (unlikely(PageCgroupUsed(pc))) { | ||
2123 | unlock_page_cgroup(pc); | ||
2124 | mem_cgroup_cancel_charge(mem, page_size); | ||
2125 | return; | ||
2126 | } | ||
2127 | /* | ||
2128 | * we don't need page_cgroup_lock about tail pages, becase they are not | ||
2129 | * accessed by any other context at this point. | ||
2130 | */ | ||
2095 | pc->mem_cgroup = mem; | 2131 | pc->mem_cgroup = mem; |
2096 | /* | 2132 | /* |
2097 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | 2133 | * We access a page_cgroup asynchronously without lock_page_cgroup(). |
@@ -2115,43 +2151,57 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2115 | break; | 2151 | break; |
2116 | } | 2152 | } |
2117 | 2153 | ||
2118 | mem_cgroup_charge_statistics(mem, pc, true); | 2154 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); |
2155 | unlock_page_cgroup(pc); | ||
2156 | /* | ||
2157 | * "charge_statistics" updated event counter. Then, check it. | ||
2158 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | ||
2159 | * if they exceeds softlimit. | ||
2160 | */ | ||
2161 | memcg_check_events(mem, pc->page); | ||
2119 | } | 2162 | } |
2120 | 2163 | ||
2121 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2164 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2122 | struct page_cgroup *pc, | ||
2123 | enum charge_type ctype, | ||
2124 | int page_size) | ||
2125 | { | ||
2126 | int i; | ||
2127 | int count = page_size >> PAGE_SHIFT; | ||
2128 | 2165 | ||
2129 | /* try_charge() can return NULL to *memcg, taking care of it. */ | 2166 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ |
2130 | if (!mem) | 2167 | (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) |
2131 | return; | 2168 | /* |
2169 | * Because tail pages are not marked as "used", set it. We're under | ||
2170 | * zone->lru_lock, 'splitting on pmd' and compund_lock. | ||
2171 | */ | ||
2172 | void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | ||
2173 | { | ||
2174 | struct page_cgroup *head_pc = lookup_page_cgroup(head); | ||
2175 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | ||
2176 | unsigned long flags; | ||
2132 | 2177 | ||
2133 | lock_page_cgroup(pc); | 2178 | if (mem_cgroup_disabled()) |
2134 | if (unlikely(PageCgroupUsed(pc))) { | ||
2135 | unlock_page_cgroup(pc); | ||
2136 | mem_cgroup_cancel_charge(mem, page_size); | ||
2137 | return; | 2179 | return; |
2138 | } | ||
2139 | |||
2140 | /* | 2180 | /* |
2141 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2181 | * We have no races with charge/uncharge but will have races with |
2142 | * accessed by any other context at this point. | 2182 | * page state accounting. |
2143 | */ | 2183 | */ |
2144 | for (i = 0; i < count; i++) | 2184 | move_lock_page_cgroup(head_pc, &flags); |
2145 | ____mem_cgroup_commit_charge(mem, pc + i, ctype); | ||
2146 | 2185 | ||
2147 | unlock_page_cgroup(pc); | 2186 | tail_pc->mem_cgroup = head_pc->mem_cgroup; |
2148 | /* | 2187 | smp_wmb(); /* see __commit_charge() */ |
2149 | * "charge_statistics" updated event counter. Then, check it. | 2188 | if (PageCgroupAcctLRU(head_pc)) { |
2150 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2189 | enum lru_list lru; |
2151 | * if they exceeds softlimit. | 2190 | struct mem_cgroup_per_zone *mz; |
2152 | */ | 2191 | |
2153 | memcg_check_events(mem, pc->page); | 2192 | /* |
2193 | * LRU flags cannot be copied because we need to add tail | ||
2194 | *.page to LRU by generic call and our hook will be called. | ||
2195 | * We hold lru_lock, then, reduce counter directly. | ||
2196 | */ | ||
2197 | lru = page_lru(head); | ||
2198 | mz = page_cgroup_zoneinfo(head_pc); | ||
2199 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | ||
2200 | } | ||
2201 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | ||
2202 | move_unlock_page_cgroup(head_pc, &flags); | ||
2154 | } | 2203 | } |
2204 | #endif | ||
2155 | 2205 | ||
2156 | /** | 2206 | /** |
2157 | * __mem_cgroup_move_account - move account of the page | 2207 | * __mem_cgroup_move_account - move account of the page |
@@ -2171,8 +2221,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2171 | */ | 2221 | */ |
2172 | 2222 | ||
2173 | static void __mem_cgroup_move_account(struct page_cgroup *pc, | 2223 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
2174 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2224 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge, |
2225 | int charge_size) | ||
2175 | { | 2226 | { |
2227 | int nr_pages = charge_size >> PAGE_SHIFT; | ||
2228 | |||
2176 | VM_BUG_ON(from == to); | 2229 | VM_BUG_ON(from == to); |
2177 | VM_BUG_ON(PageLRU(pc->page)); | 2230 | VM_BUG_ON(PageLRU(pc->page)); |
2178 | VM_BUG_ON(!page_is_cgroup_locked(pc)); | 2231 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
@@ -2186,14 +2239,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2186 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 2239 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); |
2187 | preempt_enable(); | 2240 | preempt_enable(); |
2188 | } | 2241 | } |
2189 | mem_cgroup_charge_statistics(from, pc, false); | 2242 | mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); |
2190 | if (uncharge) | 2243 | if (uncharge) |
2191 | /* This is not "cancel", but cancel_charge does all we need. */ | 2244 | /* This is not "cancel", but cancel_charge does all we need. */ |
2192 | mem_cgroup_cancel_charge(from, PAGE_SIZE); | 2245 | mem_cgroup_cancel_charge(from, charge_size); |
2193 | 2246 | ||
2194 | /* caller should have done css_get */ | 2247 | /* caller should have done css_get */ |
2195 | pc->mem_cgroup = to; | 2248 | pc->mem_cgroup = to; |
2196 | mem_cgroup_charge_statistics(to, pc, true); | 2249 | mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages); |
2197 | /* | 2250 | /* |
2198 | * We charges against "to" which may not have any tasks. Then, "to" | 2251 | * We charges against "to" which may not have any tasks. Then, "to" |
2199 | * can be under rmdir(). But in current implementation, caller of | 2252 | * can be under rmdir(). But in current implementation, caller of |
@@ -2208,15 +2261,24 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2208 | * __mem_cgroup_move_account() | 2261 | * __mem_cgroup_move_account() |
2209 | */ | 2262 | */ |
2210 | static int mem_cgroup_move_account(struct page_cgroup *pc, | 2263 | static int mem_cgroup_move_account(struct page_cgroup *pc, |
2211 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2264 | struct mem_cgroup *from, struct mem_cgroup *to, |
2265 | bool uncharge, int charge_size) | ||
2212 | { | 2266 | { |
2213 | int ret = -EINVAL; | 2267 | int ret = -EINVAL; |
2214 | unsigned long flags; | 2268 | unsigned long flags; |
2269 | /* | ||
2270 | * The page is isolated from LRU. So, collapse function | ||
2271 | * will not handle this page. But page splitting can happen. | ||
2272 | * Do this check under compound_page_lock(). The caller should | ||
2273 | * hold it. | ||
2274 | */ | ||
2275 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | ||
2276 | return -EBUSY; | ||
2215 | 2277 | ||
2216 | lock_page_cgroup(pc); | 2278 | lock_page_cgroup(pc); |
2217 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | 2279 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { |
2218 | move_lock_page_cgroup(pc, &flags); | 2280 | move_lock_page_cgroup(pc, &flags); |
2219 | __mem_cgroup_move_account(pc, from, to, uncharge); | 2281 | __mem_cgroup_move_account(pc, from, to, uncharge, charge_size); |
2220 | move_unlock_page_cgroup(pc, &flags); | 2282 | move_unlock_page_cgroup(pc, &flags); |
2221 | ret = 0; | 2283 | ret = 0; |
2222 | } | 2284 | } |
@@ -2241,6 +2303,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2241 | struct cgroup *cg = child->css.cgroup; | 2303 | struct cgroup *cg = child->css.cgroup; |
2242 | struct cgroup *pcg = cg->parent; | 2304 | struct cgroup *pcg = cg->parent; |
2243 | struct mem_cgroup *parent; | 2305 | struct mem_cgroup *parent; |
2306 | int page_size = PAGE_SIZE; | ||
2307 | unsigned long flags; | ||
2244 | int ret; | 2308 | int ret; |
2245 | 2309 | ||
2246 | /* Is ROOT ? */ | 2310 | /* Is ROOT ? */ |
@@ -2253,15 +2317,24 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2253 | if (isolate_lru_page(page)) | 2317 | if (isolate_lru_page(page)) |
2254 | goto put; | 2318 | goto put; |
2255 | 2319 | ||
2320 | if (PageTransHuge(page)) | ||
2321 | page_size = HPAGE_SIZE; | ||
2322 | |||
2256 | parent = mem_cgroup_from_cont(pcg); | 2323 | parent = mem_cgroup_from_cont(pcg); |
2257 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, | 2324 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, |
2258 | PAGE_SIZE); | 2325 | &parent, false, page_size); |
2259 | if (ret || !parent) | 2326 | if (ret || !parent) |
2260 | goto put_back; | 2327 | goto put_back; |
2261 | 2328 | ||
2262 | ret = mem_cgroup_move_account(pc, child, parent, true); | 2329 | if (page_size > PAGE_SIZE) |
2330 | flags = compound_lock_irqsave(page); | ||
2331 | |||
2332 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); | ||
2263 | if (ret) | 2333 | if (ret) |
2264 | mem_cgroup_cancel_charge(parent, PAGE_SIZE); | 2334 | mem_cgroup_cancel_charge(parent, page_size); |
2335 | |||
2336 | if (page_size > PAGE_SIZE) | ||
2337 | compound_unlock_irqrestore(page, flags); | ||
2265 | put_back: | 2338 | put_back: |
2266 | putback_lru_page(page); | 2339 | putback_lru_page(page); |
2267 | put: | 2340 | put: |
@@ -2280,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2280 | gfp_t gfp_mask, enum charge_type ctype) | 2353 | gfp_t gfp_mask, enum charge_type ctype) |
2281 | { | 2354 | { |
2282 | struct mem_cgroup *mem = NULL; | 2355 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | ||
2283 | struct page_cgroup *pc; | 2357 | struct page_cgroup *pc; |
2358 | bool oom = true; | ||
2284 | int ret; | 2359 | int ret; |
2285 | int page_size = PAGE_SIZE; | ||
2286 | 2360 | ||
2287 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
2288 | page_size <<= compound_order(page); | 2362 | page_size <<= compound_order(page); |
2289 | VM_BUG_ON(!PageTransHuge(page)); | 2363 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | ||
2365 | * Never OOM-kill a process for a huge page. The | ||
2366 | * fault handler will fall back to regular pages. | ||
2367 | */ | ||
2368 | oom = false; | ||
2290 | } | 2369 | } |
2291 | 2370 | ||
2292 | pc = lookup_page_cgroup(page); | 2371 | pc = lookup_page_cgroup(page); |
@@ -2295,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2295 | return 0; | 2374 | return 0; |
2296 | prefetchw(pc); | 2375 | prefetchw(pc); |
2297 | 2376 | ||
2298 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | 2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); |
2299 | if (ret || !mem) | 2378 | if (ret || !mem) |
2300 | return ret; | 2379 | return ret; |
2301 | 2380 | ||
@@ -2546,7 +2625,6 @@ direct_uncharge: | |||
2546 | static struct mem_cgroup * | 2625 | static struct mem_cgroup * |
2547 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2626 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2548 | { | 2627 | { |
2549 | int i; | ||
2550 | int count; | 2628 | int count; |
2551 | struct page_cgroup *pc; | 2629 | struct page_cgroup *pc; |
2552 | struct mem_cgroup *mem = NULL; | 2630 | struct mem_cgroup *mem = NULL; |
@@ -2596,8 +2674,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2596 | break; | 2674 | break; |
2597 | } | 2675 | } |
2598 | 2676 | ||
2599 | for (i = 0; i < count; i++) | 2677 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); |
2600 | mem_cgroup_charge_statistics(mem, pc + i, false); | ||
2601 | 2678 | ||
2602 | ClearPageCgroupUsed(pc); | 2679 | ClearPageCgroupUsed(pc); |
2603 | /* | 2680 | /* |
@@ -4844,7 +4921,7 @@ retry: | |||
4844 | goto put; | 4921 | goto put; |
4845 | pc = lookup_page_cgroup(page); | 4922 | pc = lookup_page_cgroup(page); |
4846 | if (!mem_cgroup_move_account(pc, | 4923 | if (!mem_cgroup_move_account(pc, |
4847 | mc.from, mc.to, false)) { | 4924 | mc.from, mc.to, false, PAGE_SIZE)) { |
4848 | mc.precharge--; | 4925 | mc.precharge--; |
4849 | /* we uncharge from mc.from later. */ | 4926 | /* we uncharge from mc.from later. */ |
4850 | mc.moved_charge++; | 4927 | mc.moved_charge++; |
@@ -4983,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4983 | static int __init enable_swap_account(char *s) | 5060 | static int __init enable_swap_account(char *s) |
4984 | { | 5061 | { |
4985 | /* consider enabled if no parameter or 1 is given */ | 5062 | /* consider enabled if no parameter or 1 is given */ |
4986 | if (!s || !strcmp(s, "1")) | 5063 | if (!(*s) || !strcmp(s, "=1")) |
4987 | really_do_swap_account = 1; | 5064 | really_do_swap_account = 1; |
4988 | else if (!strcmp(s, "0")) | 5065 | else if (!strcmp(s, "=0")) |
4989 | really_do_swap_account = 0; | 5066 | really_do_swap_account = 0; |
4990 | return 1; | 5067 | return 1; |
4991 | } | 5068 | } |
@@ -4993,7 +5070,8 @@ __setup("swapaccount", enable_swap_account); | |||
4993 | 5070 | ||
4994 | static int __init disable_swap_account(char *s) | 5071 | static int __init disable_swap_account(char *s) |
4995 | { | 5072 | { |
4996 | enable_swap_account("0"); | 5073 | printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n"); |
5074 | enable_swap_account("=0"); | ||
4997 | return 1; | 5075 | return 1; |
4998 | } | 5076 | } |
4999 | __setup("noswapaccount", disable_swap_account); | 5077 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 548fbd70f026..0207c2f6f8bd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -233,8 +233,8 @@ void shake_page(struct page *p, int access) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Only all shrink_slab here (which would also | 236 | * Only call shrink_slab here (which would also shrink other caches) if |
237 | * shrink other caches) if access is not potentially fatal. | 237 | * access is not potentially fatal. |
238 | */ | 238 | */ |
239 | if (access) { | 239 | if (access) { |
240 | int nr; | 240 | int nr; |
@@ -386,8 +386,6 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
386 | struct task_struct *tsk; | 386 | struct task_struct *tsk; |
387 | struct anon_vma *av; | 387 | struct anon_vma *av; |
388 | 388 | ||
389 | if (!PageHuge(page) && unlikely(split_huge_page(page))) | ||
390 | return; | ||
391 | read_lock(&tasklist_lock); | 389 | read_lock(&tasklist_lock); |
392 | av = page_lock_anon_vma(page); | 390 | av = page_lock_anon_vma(page); |
393 | if (av == NULL) /* Not actually mapped anymore */ | 391 | if (av == NULL) /* Not actually mapped anymore */ |
@@ -856,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
856 | int ret; | 854 | int ret; |
857 | int kill = 1; | 855 | int kill = 1; |
858 | struct page *hpage = compound_head(p); | 856 | struct page *hpage = compound_head(p); |
857 | struct page *ppage; | ||
859 | 858 | ||
860 | if (PageReserved(p) || PageSlab(p)) | 859 | if (PageReserved(p) || PageSlab(p)) |
861 | return SWAP_SUCCESS; | 860 | return SWAP_SUCCESS; |
@@ -897,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
897 | } | 896 | } |
898 | 897 | ||
899 | /* | 898 | /* |
899 | * ppage: poisoned page | ||
900 | * if p is regular page(4k page) | ||
901 | * ppage == real poisoned page; | ||
902 | * else p is hugetlb or THP, ppage == head page. | ||
903 | */ | ||
904 | ppage = hpage; | ||
905 | |||
906 | if (PageTransHuge(hpage)) { | ||
907 | /* | ||
908 | * Verify that this isn't a hugetlbfs head page, the check for | ||
909 | * PageAnon is just for avoid tripping a split_huge_page | ||
910 | * internal debug check, as split_huge_page refuses to deal with | ||
911 | * anything that isn't an anon page. PageAnon can't go away fro | ||
912 | * under us because we hold a refcount on the hpage, without a | ||
913 | * refcount on the hpage. split_huge_page can't be safely called | ||
914 | * in the first place, having a refcount on the tail isn't | ||
915 | * enough * to be safe. | ||
916 | */ | ||
917 | if (!PageHuge(hpage) && PageAnon(hpage)) { | ||
918 | if (unlikely(split_huge_page(hpage))) { | ||
919 | /* | ||
920 | * FIXME: if splitting THP is failed, it is | ||
921 | * better to stop the following operation rather | ||
922 | * than causing panic by unmapping. System might | ||
923 | * survive if the page is freed later. | ||
924 | */ | ||
925 | printk(KERN_INFO | ||
926 | "MCE %#lx: failed to split THP\n", pfn); | ||
927 | |||
928 | BUG_ON(!PageHWPoison(p)); | ||
929 | return SWAP_FAIL; | ||
930 | } | ||
931 | /* THP is split, so ppage should be the real poisoned page. */ | ||
932 | ppage = p; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | /* | ||
900 | * First collect all the processes that have the page | 937 | * First collect all the processes that have the page |
901 | * mapped in dirty form. This has to be done before try_to_unmap, | 938 | * mapped in dirty form. This has to be done before try_to_unmap, |
902 | * because ttu takes the rmap data structures down. | 939 | * because ttu takes the rmap data structures down. |
@@ -905,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
905 | * there's nothing that can be done. | 942 | * there's nothing that can be done. |
906 | */ | 943 | */ |
907 | if (kill) | 944 | if (kill) |
908 | collect_procs(hpage, &tokill); | 945 | collect_procs(ppage, &tokill); |
946 | |||
947 | if (hpage != ppage) | ||
948 | lock_page_nosync(ppage); | ||
909 | 949 | ||
910 | ret = try_to_unmap(hpage, ttu); | 950 | ret = try_to_unmap(ppage, ttu); |
911 | if (ret != SWAP_SUCCESS) | 951 | if (ret != SWAP_SUCCESS) |
912 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 952 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
913 | pfn, page_mapcount(hpage)); | 953 | pfn, page_mapcount(ppage)); |
954 | |||
955 | if (hpage != ppage) | ||
956 | unlock_page(ppage); | ||
914 | 957 | ||
915 | /* | 958 | /* |
916 | * Now that the dirty bit has been propagated to the | 959 | * Now that the dirty bit has been propagated to the |
@@ -921,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
921 | * use a more force-full uncatchable kill to prevent | 964 | * use a more force-full uncatchable kill to prevent |
922 | * any accesses to the poisoned memory. | 965 | * any accesses to the poisoned memory. |
923 | */ | 966 | */ |
924 | kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, | 967 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, |
925 | ret != SWAP_SUCCESS, p, pfn); | 968 | ret != SWAP_SUCCESS, p, pfn); |
926 | 969 | ||
927 | return ret; | 970 | return ret; |
@@ -1022,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1022 | * The check (unnecessarily) ignores LRU pages being isolated and | 1065 | * The check (unnecessarily) ignores LRU pages being isolated and |
1023 | * walked by the page reclaim code, however that's not a big loss. | 1066 | * walked by the page reclaim code, however that's not a big loss. |
1024 | */ | 1067 | */ |
1025 | if (!PageLRU(p) && !PageHuge(p)) | 1068 | if (!PageHuge(p) && !PageTransCompound(p)) { |
1026 | shake_page(p, 0); | 1069 | if (!PageLRU(p)) |
1027 | if (!PageLRU(p) && !PageHuge(p)) { | 1070 | shake_page(p, 0); |
1028 | /* | 1071 | if (!PageLRU(p)) { |
1029 | * shake_page could have turned it free. | 1072 | /* |
1030 | */ | 1073 | * shake_page could have turned it free. |
1031 | if (is_free_buddy_page(p)) { | 1074 | */ |
1032 | action_result(pfn, "free buddy, 2nd try", DELAYED); | 1075 | if (is_free_buddy_page(p)) { |
1033 | return 0; | 1076 | action_result(pfn, "free buddy, 2nd try", |
1077 | DELAYED); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | action_result(pfn, "non LRU", IGNORED); | ||
1081 | put_page(p); | ||
1082 | return -EBUSY; | ||
1034 | } | 1083 | } |
1035 | action_result(pfn, "non LRU", IGNORED); | ||
1036 | put_page(p); | ||
1037 | return -EBUSY; | ||
1038 | } | 1084 | } |
1039 | 1085 | ||
1040 | /* | 1086 | /* |
@@ -1064,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1064 | * For error on the tail page, we should set PG_hwpoison | 1110 | * For error on the tail page, we should set PG_hwpoison |
1065 | * on the head page to show that the hugepage is hwpoisoned | 1111 | * on the head page to show that the hugepage is hwpoisoned |
1066 | */ | 1112 | */ |
1067 | if (PageTail(p) && TestSetPageHWPoison(hpage)) { | 1113 | if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { |
1068 | action_result(pfn, "hugepage already hardware poisoned", | 1114 | action_result(pfn, "hugepage already hardware poisoned", |
1069 | IGNORED); | 1115 | IGNORED); |
1070 | unlock_page(hpage); | 1116 | unlock_page(hpage); |
@@ -1295,7 +1341,10 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1295 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, | 1341 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, |
1296 | true); | 1342 | true); |
1297 | if (ret) { | 1343 | if (ret) { |
1298 | putback_lru_pages(&pagelist); | 1344 | struct page *page1, *page2; |
1345 | list_for_each_entry_safe(page1, page2, &pagelist, lru) | ||
1346 | put_page(page1); | ||
1347 | |||
1299 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1348 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", |
1300 | pfn, ret, page->flags); | 1349 | pfn, ret, page->flags); |
1301 | if (ret > 0) | 1350 | if (ret > 0) |
@@ -1419,6 +1468,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1419 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, | 1468 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1420 | 0, true); | 1469 | 0, true); |
1421 | if (ret) { | 1470 | if (ret) { |
1471 | putback_lru_pages(&pagelist); | ||
1422 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1472 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1423 | pfn, ret, page->flags); | 1473 | pfn, ret, page->flags); |
1424 | if (ret > 0) | 1474 | if (ret > 0) |
diff --git a/mm/migrate.c b/mm/migrate.c index 46fe8cc13d67..766115253807 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -772,6 +772,7 @@ uncharge: | |||
772 | unlock: | 772 | unlock: |
773 | unlock_page(page); | 773 | unlock_page(page); |
774 | 774 | ||
775 | move_newpage: | ||
775 | if (rc != -EAGAIN) { | 776 | if (rc != -EAGAIN) { |
776 | /* | 777 | /* |
777 | * A page that has been migrated has all references | 778 | * A page that has been migrated has all references |
@@ -785,8 +786,6 @@ unlock: | |||
785 | putback_lru_page(page); | 786 | putback_lru_page(page); |
786 | } | 787 | } |
787 | 788 | ||
788 | move_newpage: | ||
789 | |||
790 | /* | 789 | /* |
791 | * Move the new page to the LRU. If migration was not successful | 790 | * Move the new page to the LRU. If migration was not successful |
792 | * then this will free the page. | 791 | * then this will free the page. |
@@ -888,7 +887,7 @@ out: | |||
888 | * are movable anymore because to has become empty | 887 | * are movable anymore because to has become empty |
889 | * or no retryable pages exist anymore. | 888 | * or no retryable pages exist anymore. |
890 | * Caller should call putback_lru_pages to return pages to the LRU | 889 | * Caller should call putback_lru_pages to return pages to the LRU |
891 | * or free list. | 890 | * or free list only if ret != 0. |
892 | * | 891 | * |
893 | * Return: Number of pages not migrated or error code. | 892 | * Return: Number of pages not migrated or error code. |
894 | */ | 893 | */ |
@@ -981,10 +980,6 @@ int migrate_huge_pages(struct list_head *from, | |||
981 | } | 980 | } |
982 | rc = 0; | 981 | rc = 0; |
983 | out: | 982 | out: |
984 | |||
985 | list_for_each_entry_safe(page, page2, from, lru) | ||
986 | put_page(page); | ||
987 | |||
988 | if (rc) | 983 | if (rc) |
989 | return rc; | 984 | return rc; |
990 | 985 | ||
diff --git a/mm/mlock.c b/mm/mlock.c index 13e81ee8be9d..c3924c7f00be 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -178,6 +178,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | 178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) |
179 | gup_flags |= FOLL_WRITE; | 179 | gup_flags |= FOLL_WRITE; |
180 | 180 | ||
181 | /* | ||
182 | * We want mlock to succeed for regions that have any permissions | ||
183 | * other than PROT_NONE. | ||
184 | */ | ||
185 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
186 | gup_flags |= FOLL_FORCE; | ||
187 | |||
181 | if (vma->vm_flags & VM_LOCKED) | 188 | if (vma->vm_flags & VM_LOCKED) |
182 | gup_flags |= FOLL_MLOCK; | 189 | gup_flags |= FOLL_MLOCK; |
183 | 190 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 90c1439549fd..a873e61e312e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1088,8 +1088,10 @@ static void drain_pages(unsigned int cpu) | |||
1088 | pset = per_cpu_ptr(zone->pageset, cpu); | 1088 | pset = per_cpu_ptr(zone->pageset, cpu); |
1089 | 1089 | ||
1090 | pcp = &pset->pcp; | 1090 | pcp = &pset->pcp; |
1091 | free_pcppages_bulk(zone, pcp->count, pcp); | 1091 | if (pcp->count) { |
1092 | pcp->count = 0; | 1092 | free_pcppages_bulk(zone, pcp->count, pcp); |
1093 | pcp->count = 0; | ||
1094 | } | ||
1093 | local_irq_restore(flags); | 1095 | local_irq_restore(flags); |
1094 | } | 1096 | } |
1095 | } | 1097 | } |
@@ -2034,6 +2036,14 @@ restart: | |||
2034 | */ | 2036 | */ |
2035 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 2037 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
2036 | 2038 | ||
2039 | /* | ||
2040 | * Find the true preferred zone if the allocation is unconstrained by | ||
2041 | * cpusets. | ||
2042 | */ | ||
2043 | if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) | ||
2044 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | ||
2045 | &preferred_zone); | ||
2046 | |||
2037 | /* This is the last chance, in general, before the goto nopage. */ | 2047 | /* This is the last chance, in general, before the goto nopage. */ |
2038 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2048 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2039 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2049 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2192,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2192 | 2202 | ||
2193 | get_mems_allowed(); | 2203 | get_mems_allowed(); |
2194 | /* The preferred zone is used for statistics later */ | 2204 | /* The preferred zone is used for statistics later */ |
2195 | first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); | 2205 | first_zones_zonelist(zonelist, high_zoneidx, |
2206 | nodemask ? : &cpuset_current_mems_allowed, | ||
2207 | &preferred_zone); | ||
2196 | if (!preferred_zone) { | 2208 | if (!preferred_zone) { |
2197 | put_mems_allowed(); | 2209 | put_mems_allowed(); |
2198 | return NULL; | 2210 | return NULL; |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 0369f5b3ba1b..eb663fb533e0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2010 Linus Torvalds | 6 | * Copyright (C) 2010 Linus Torvalds |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/pagemap.h> | ||
9 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
10 | #include <asm-generic/pgtable.h> | 11 | #include <asm-generic/pgtable.h> |
11 | 12 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 3c2d5ddfa0d4..49feb46e77b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache); | |||
549 | * @inode: inode | 549 | * @inode: inode |
550 | * @newsize: new file size | 550 | * @newsize: new file size |
551 | * | 551 | * |
552 | * truncate_setsize updastes i_size update and performs pagecache | 552 | * truncate_setsize updates i_size and performs pagecache truncation (if |
553 | * truncation (if necessary) for a file size updates. It will be | 553 | * necessary) to @newsize. It will be typically be called from the filesystem's |
554 | * typically be called from the filesystem's setattr function when | 554 | * setattr function when ATTR_SIZE is passed in. |
555 | * ATTR_SIZE is passed in. | ||
556 | * | 555 | * |
557 | * Must be called with inode_mutex held and after all filesystem | 556 | * Must be called with inode_mutex held and before all filesystem specific |
558 | * specific block truncation has been performed. | 557 | * block truncation has been performed. |
559 | */ | 558 | */ |
560 | void truncate_setsize(struct inode *inode, loff_t newsize) | 559 | void truncate_setsize(struct inode *inode, loff_t newsize) |
561 | { | 560 | { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 47a50962ce81..148c6e630df2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
42 | #include <linux/delayacct.h> | 42 | #include <linux/delayacct.h> |
43 | #include <linux/sysctl.h> | 43 | #include <linux/sysctl.h> |
44 | #include <linux/compaction.h> | ||
45 | 44 | ||
46 | #include <asm/tlbflush.h> | 45 | #include <asm/tlbflush.h> |
47 | #include <asm/div64.h> | 46 | #include <asm/div64.h> |
@@ -2084,7 +2083,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2084 | struct zone *preferred_zone; | 2083 | struct zone *preferred_zone; |
2085 | 2084 | ||
2086 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | 2085 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), |
2087 | NULL, &preferred_zone); | 2086 | &cpuset_current_mems_allowed, |
2087 | &preferred_zone); | ||
2088 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | 2088 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); |
2089 | } | 2089 | } |
2090 | } | 2090 | } |