diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-12-08 09:25:06 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-12-08 09:25:06 -0500 |
commit | f0b9abfb044649bc452fb2fb975ff2fd599cc6a3 (patch) | |
tree | 7800081c5cb16a4dfee1e57a70f3be90f7b50d9a /mm | |
parent | adc1ef1e37358d3c17d1a74a58b2e104fc0bda15 (diff) | |
parent | 1b3c393cd43f22ead8a6a2f839efc6df8ebd7465 (diff) |
Merge branch 'linus' into perf/core
Conflicts:
tools/perf/Makefile
tools/perf/builtin-test.c
tools/perf/perf.h
tools/perf/tests/parse-events.c
tools/perf/util/evsel.h
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 10 | ||||
-rw-r--r-- | mm/compaction.c | 10 | ||||
-rw-r--r-- | mm/highmem.c | 2 | ||||
-rw-r--r-- | mm/memblock.c | 24 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 7 | ||||
-rw-r--r-- | mm/mempolicy.c | 22 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 26 | ||||
-rw-r--r-- | mm/mmzone.c | 6 | ||||
-rw-r--r-- | mm/nobootmem.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 83 | ||||
-rw-r--r-- | mm/rmap.c | 20 | ||||
-rw-r--r-- | mm/shmem.c | 44 | ||||
-rw-r--r-- | mm/sparse.c | 10 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 107 |
19 files changed, 245 insertions, 220 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 434be4ae7a04..f468185b3b28 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -198,8 +198,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
198 | int order = ilog2(BITS_PER_LONG); | 198 | int order = ilog2(BITS_PER_LONG); |
199 | 199 | ||
200 | __free_pages_bootmem(pfn_to_page(start), order); | 200 | __free_pages_bootmem(pfn_to_page(start), order); |
201 | fixup_zone_present_pages(page_to_nid(pfn_to_page(start)), | ||
202 | start, start + BITS_PER_LONG); | ||
203 | count += BITS_PER_LONG; | 201 | count += BITS_PER_LONG; |
204 | start += BITS_PER_LONG; | 202 | start += BITS_PER_LONG; |
205 | } else { | 203 | } else { |
@@ -210,9 +208,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
210 | if (vec & 1) { | 208 | if (vec & 1) { |
211 | page = pfn_to_page(start + off); | 209 | page = pfn_to_page(start + off); |
212 | __free_pages_bootmem(page, 0); | 210 | __free_pages_bootmem(page, 0); |
213 | fixup_zone_present_pages( | ||
214 | page_to_nid(page), | ||
215 | start + off, start + off + 1); | ||
216 | count++; | 211 | count++; |
217 | } | 212 | } |
218 | vec >>= 1; | 213 | vec >>= 1; |
@@ -226,11 +221,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
226 | pages = bdata->node_low_pfn - bdata->node_min_pfn; | 221 | pages = bdata->node_low_pfn - bdata->node_min_pfn; |
227 | pages = bootmem_bootmap_pages(pages); | 222 | pages = bootmem_bootmap_pages(pages); |
228 | count += pages; | 223 | count += pages; |
229 | while (pages--) { | 224 | while (pages--) |
230 | fixup_zone_present_pages(page_to_nid(page), | ||
231 | page_to_pfn(page), page_to_pfn(page) + 1); | ||
232 | __free_pages_bootmem(page++, 0); | 225 | __free_pages_bootmem(page++, 0); |
233 | } | ||
234 | 226 | ||
235 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); | 227 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); |
236 | 228 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 9eef55838fca..694eaabaaebd 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -713,7 +713,15 @@ static void isolate_freepages(struct zone *zone, | |||
713 | 713 | ||
714 | /* Found a block suitable for isolating free pages from */ | 714 | /* Found a block suitable for isolating free pages from */ |
715 | isolated = 0; | 715 | isolated = 0; |
716 | end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); | 716 | |
717 | /* | ||
718 | * As pfn may not start aligned, pfn+pageblock_nr_page | ||
719 | * may cross a MAX_ORDER_NR_PAGES boundary and miss | ||
720 | * a pfn_valid check. Ensure isolate_freepages_block() | ||
721 | * only scans within a pageblock | ||
722 | */ | ||
723 | end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | ||
724 | end_pfn = min(end_pfn, zone_end_pfn); | ||
717 | isolated = isolate_freepages_block(cc, pfn, end_pfn, | 725 | isolated = isolate_freepages_block(cc, pfn, end_pfn, |
718 | freelist, false); | 726 | freelist, false); |
719 | nr_freepages += isolated; | 727 | nr_freepages += isolated; |
diff --git a/mm/highmem.c b/mm/highmem.c index d517cd16a6eb..2da13a5c50e2 100644 --- a/mm/highmem.c +++ b/mm/highmem.c | |||
@@ -98,7 +98,7 @@ struct page *kmap_to_page(void *vaddr) | |||
98 | { | 98 | { |
99 | unsigned long addr = (unsigned long)vaddr; | 99 | unsigned long addr = (unsigned long)vaddr; |
100 | 100 | ||
101 | if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) { | 101 | if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) { |
102 | int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; | 102 | int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; |
103 | return pte_page(pkmap_page_table[i]); | 103 | return pte_page(pkmap_page_table[i]); |
104 | } | 104 | } |
diff --git a/mm/memblock.c b/mm/memblock.c index 931eef145af5..625905523c2a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -930,6 +930,30 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si | |||
930 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; | 930 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; |
931 | } | 931 | } |
932 | 932 | ||
933 | void __init_memblock memblock_trim_memory(phys_addr_t align) | ||
934 | { | ||
935 | int i; | ||
936 | phys_addr_t start, end, orig_start, orig_end; | ||
937 | struct memblock_type *mem = &memblock.memory; | ||
938 | |||
939 | for (i = 0; i < mem->cnt; i++) { | ||
940 | orig_start = mem->regions[i].base; | ||
941 | orig_end = mem->regions[i].base + mem->regions[i].size; | ||
942 | start = round_up(orig_start, align); | ||
943 | end = round_down(orig_end, align); | ||
944 | |||
945 | if (start == orig_start && end == orig_end) | ||
946 | continue; | ||
947 | |||
948 | if (start < end) { | ||
949 | mem->regions[i].base = start; | ||
950 | mem->regions[i].size = end - start; | ||
951 | } else { | ||
952 | memblock_remove_region(mem, i); | ||
953 | i--; | ||
954 | } | ||
955 | } | ||
956 | } | ||
933 | 957 | ||
934 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) | 958 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) |
935 | { | 959 | { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7acf43bf04a2..dd39ba000b31 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1055,12 +1055,24 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, | |||
1055 | struct mem_cgroup *memcg) | 1055 | struct mem_cgroup *memcg) |
1056 | { | 1056 | { |
1057 | struct mem_cgroup_per_zone *mz; | 1057 | struct mem_cgroup_per_zone *mz; |
1058 | struct lruvec *lruvec; | ||
1058 | 1059 | ||
1059 | if (mem_cgroup_disabled()) | 1060 | if (mem_cgroup_disabled()) { |
1060 | return &zone->lruvec; | 1061 | lruvec = &zone->lruvec; |
1062 | goto out; | ||
1063 | } | ||
1061 | 1064 | ||
1062 | mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); | 1065 | mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); |
1063 | return &mz->lruvec; | 1066 | lruvec = &mz->lruvec; |
1067 | out: | ||
1068 | /* | ||
1069 | * Since a node can be onlined after the mem_cgroup was created, | ||
1070 | * we have to be prepared to initialize lruvec->zone here; | ||
1071 | * and if offlined then reonlined, we need to reinitialize it. | ||
1072 | */ | ||
1073 | if (unlikely(lruvec->zone != zone)) | ||
1074 | lruvec->zone = zone; | ||
1075 | return lruvec; | ||
1064 | } | 1076 | } |
1065 | 1077 | ||
1066 | /* | 1078 | /* |
@@ -1087,9 +1099,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) | |||
1087 | struct mem_cgroup_per_zone *mz; | 1099 | struct mem_cgroup_per_zone *mz; |
1088 | struct mem_cgroup *memcg; | 1100 | struct mem_cgroup *memcg; |
1089 | struct page_cgroup *pc; | 1101 | struct page_cgroup *pc; |
1102 | struct lruvec *lruvec; | ||
1090 | 1103 | ||
1091 | if (mem_cgroup_disabled()) | 1104 | if (mem_cgroup_disabled()) { |
1092 | return &zone->lruvec; | 1105 | lruvec = &zone->lruvec; |
1106 | goto out; | ||
1107 | } | ||
1093 | 1108 | ||
1094 | pc = lookup_page_cgroup(page); | 1109 | pc = lookup_page_cgroup(page); |
1095 | memcg = pc->mem_cgroup; | 1110 | memcg = pc->mem_cgroup; |
@@ -1107,7 +1122,16 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) | |||
1107 | pc->mem_cgroup = memcg = root_mem_cgroup; | 1122 | pc->mem_cgroup = memcg = root_mem_cgroup; |
1108 | 1123 | ||
1109 | mz = page_cgroup_zoneinfo(memcg, page); | 1124 | mz = page_cgroup_zoneinfo(memcg, page); |
1110 | return &mz->lruvec; | 1125 | lruvec = &mz->lruvec; |
1126 | out: | ||
1127 | /* | ||
1128 | * Since a node can be onlined after the mem_cgroup was created, | ||
1129 | * we have to be prepared to initialize lruvec->zone here; | ||
1130 | * and if offlined then reonlined, we need to reinitialize it. | ||
1131 | */ | ||
1132 | if (unlikely(lruvec->zone != zone)) | ||
1133 | lruvec->zone = zone; | ||
1134 | return lruvec; | ||
1111 | } | 1135 | } |
1112 | 1136 | ||
1113 | /** | 1137 | /** |
@@ -1452,17 +1476,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg) | |||
1452 | static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | 1476 | static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) |
1453 | { | 1477 | { |
1454 | u64 limit; | 1478 | u64 limit; |
1455 | u64 memsw; | ||
1456 | 1479 | ||
1457 | limit = res_counter_read_u64(&memcg->res, RES_LIMIT); | 1480 | limit = res_counter_read_u64(&memcg->res, RES_LIMIT); |
1458 | limit += total_swap_pages << PAGE_SHIFT; | ||
1459 | 1481 | ||
1460 | memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | ||
1461 | /* | 1482 | /* |
1462 | * If memsw is finite and limits the amount of swap space available | 1483 | * Do not consider swap space if we cannot swap due to swappiness |
1463 | * to this memcg, return that limit. | ||
1464 | */ | 1484 | */ |
1465 | return min(limit, memsw); | 1485 | if (mem_cgroup_swappiness(memcg)) { |
1486 | u64 memsw; | ||
1487 | |||
1488 | limit += total_swap_pages << PAGE_SHIFT; | ||
1489 | memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | ||
1490 | |||
1491 | /* | ||
1492 | * If memsw is finite and limits the amount of swap space | ||
1493 | * available to this memcg, return that limit. | ||
1494 | */ | ||
1495 | limit = min(limit, memsw); | ||
1496 | } | ||
1497 | |||
1498 | return limit; | ||
1466 | } | 1499 | } |
1467 | 1500 | ||
1468 | void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | 1501 | void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, |
@@ -3688,17 +3721,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3688 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | 3721 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, |
3689 | int node, int zid, enum lru_list lru) | 3722 | int node, int zid, enum lru_list lru) |
3690 | { | 3723 | { |
3691 | struct mem_cgroup_per_zone *mz; | 3724 | struct lruvec *lruvec; |
3692 | unsigned long flags, loop; | 3725 | unsigned long flags, loop; |
3693 | struct list_head *list; | 3726 | struct list_head *list; |
3694 | struct page *busy; | 3727 | struct page *busy; |
3695 | struct zone *zone; | 3728 | struct zone *zone; |
3696 | 3729 | ||
3697 | zone = &NODE_DATA(node)->node_zones[zid]; | 3730 | zone = &NODE_DATA(node)->node_zones[zid]; |
3698 | mz = mem_cgroup_zoneinfo(memcg, node, zid); | 3731 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
3699 | list = &mz->lruvec.lists[lru]; | 3732 | list = &lruvec->lists[lru]; |
3700 | 3733 | ||
3701 | loop = mz->lru_size[lru]; | 3734 | loop = mem_cgroup_get_lru_size(lruvec, lru); |
3702 | /* give some margin against EBUSY etc...*/ | 3735 | /* give some margin against EBUSY etc...*/ |
3703 | loop += 256; | 3736 | loop += 256; |
3704 | busy = NULL; | 3737 | busy = NULL; |
@@ -4736,7 +4769,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
4736 | 4769 | ||
4737 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 4770 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
4738 | mz = &pn->zoneinfo[zone]; | 4771 | mz = &pn->zoneinfo[zone]; |
4739 | lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]); | 4772 | lruvec_init(&mz->lruvec); |
4740 | mz->usage_in_excess = 0; | 4773 | mz->usage_in_excess = 0; |
4741 | mz->on_tree = false; | 4774 | mz->on_tree = false; |
4742 | mz->memcg = memcg; | 4775 | mz->memcg = memcg; |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6c5899b9034a..8b20278be6a6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -1476,9 +1476,17 @@ int soft_offline_page(struct page *page, int flags) | |||
1476 | { | 1476 | { |
1477 | int ret; | 1477 | int ret; |
1478 | unsigned long pfn = page_to_pfn(page); | 1478 | unsigned long pfn = page_to_pfn(page); |
1479 | struct page *hpage = compound_trans_head(page); | ||
1479 | 1480 | ||
1480 | if (PageHuge(page)) | 1481 | if (PageHuge(page)) |
1481 | return soft_offline_huge_page(page, flags); | 1482 | return soft_offline_huge_page(page, flags); |
1483 | if (PageTransHuge(hpage)) { | ||
1484 | if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { | ||
1485 | pr_info("soft offline: %#lx: failed to split THP\n", | ||
1486 | pfn); | ||
1487 | return -EBUSY; | ||
1488 | } | ||
1489 | } | ||
1482 | 1490 | ||
1483 | ret = get_any_page(page, pfn, flags); | 1491 | ret = get_any_page(page, pfn, flags); |
1484 | if (ret < 0) | 1492 | if (ret < 0) |
diff --git a/mm/memory.c b/mm/memory.c index fb135ba4aba9..221fc9ffcab1 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2527,9 +2527,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2527 | int ret = 0; | 2527 | int ret = 0; |
2528 | int page_mkwrite = 0; | 2528 | int page_mkwrite = 0; |
2529 | struct page *dirty_page = NULL; | 2529 | struct page *dirty_page = NULL; |
2530 | unsigned long mmun_start; /* For mmu_notifiers */ | 2530 | unsigned long mmun_start = 0; /* For mmu_notifiers */ |
2531 | unsigned long mmun_end; /* For mmu_notifiers */ | 2531 | unsigned long mmun_end = 0; /* For mmu_notifiers */ |
2532 | bool mmun_called = false; /* For mmu_notifiers */ | ||
2533 | 2532 | ||
2534 | old_page = vm_normal_page(vma, address, orig_pte); | 2533 | old_page = vm_normal_page(vma, address, orig_pte); |
2535 | if (!old_page) { | 2534 | if (!old_page) { |
@@ -2708,8 +2707,7 @@ gotten: | |||
2708 | goto oom_free_new; | 2707 | goto oom_free_new; |
2709 | 2708 | ||
2710 | mmun_start = address & PAGE_MASK; | 2709 | mmun_start = address & PAGE_MASK; |
2711 | mmun_end = (address & PAGE_MASK) + PAGE_SIZE; | 2710 | mmun_end = mmun_start + PAGE_SIZE; |
2712 | mmun_called = true; | ||
2713 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2711 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2714 | 2712 | ||
2715 | /* | 2713 | /* |
@@ -2778,7 +2776,7 @@ gotten: | |||
2778 | page_cache_release(new_page); | 2776 | page_cache_release(new_page); |
2779 | unlock: | 2777 | unlock: |
2780 | pte_unmap_unlock(page_table, ptl); | 2778 | pte_unmap_unlock(page_table, ptl); |
2781 | if (mmun_called) | 2779 | if (mmun_end > mmun_start) |
2782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2780 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2783 | if (old_page) { | 2781 | if (old_page) { |
2784 | /* | 2782 | /* |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 56b758ae57d2..e4eeacae2b91 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned long info, struct page *page, | |||
106 | void __ref put_page_bootmem(struct page *page) | 106 | void __ref put_page_bootmem(struct page *page) |
107 | { | 107 | { |
108 | unsigned long type; | 108 | unsigned long type; |
109 | struct zone *zone; | ||
110 | 109 | ||
111 | type = (unsigned long) page->lru.next; | 110 | type = (unsigned long) page->lru.next; |
112 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || | 111 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || |
@@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page *page) | |||
117 | set_page_private(page, 0); | 116 | set_page_private(page, 0); |
118 | INIT_LIST_HEAD(&page->lru); | 117 | INIT_LIST_HEAD(&page->lru); |
119 | __free_pages_bootmem(page, 0); | 118 | __free_pages_bootmem(page, 0); |
120 | |||
121 | zone = page_zone(page); | ||
122 | zone_span_writelock(zone); | ||
123 | zone->present_pages++; | ||
124 | zone_span_writeunlock(zone); | ||
125 | totalram_pages++; | ||
126 | } | 119 | } |
127 | 120 | ||
128 | } | 121 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d04a8a54c294..4ea600da8940 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) | |||
2037 | return new; | 2037 | return new; |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | /* | ||
2041 | * If *frompol needs [has] an extra ref, copy *frompol to *tompol , | ||
2042 | * eliminate the * MPOL_F_* flags that require conditional ref and | ||
2043 | * [NOTE!!!] drop the extra ref. Not safe to reference *frompol directly | ||
2044 | * after return. Use the returned value. | ||
2045 | * | ||
2046 | * Allows use of a mempolicy for, e.g., multiple allocations with a single | ||
2047 | * policy lookup, even if the policy needs/has extra ref on lookup. | ||
2048 | * shmem_readahead needs this. | ||
2049 | */ | ||
2050 | struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, | ||
2051 | struct mempolicy *frompol) | ||
2052 | { | ||
2053 | if (!mpol_needs_cond_ref(frompol)) | ||
2054 | return frompol; | ||
2055 | |||
2056 | *tompol = *frompol; | ||
2057 | tompol->flags &= ~MPOL_F_SHARED; /* copy doesn't need unref */ | ||
2058 | __mpol_put(frompol); | ||
2059 | return tompol; | ||
2060 | } | ||
2061 | |||
2062 | /* Slow path of a mempolicy comparison */ | 2040 | /* Slow path of a mempolicy comparison */ |
2063 | bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) | 2041 | bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) |
2064 | { | 2042 | { |
@@ -334,8 +334,10 @@ void validate_mm(struct mm_struct *mm) | |||
334 | struct vm_area_struct *vma = mm->mmap; | 334 | struct vm_area_struct *vma = mm->mmap; |
335 | while (vma) { | 335 | while (vma) { |
336 | struct anon_vma_chain *avc; | 336 | struct anon_vma_chain *avc; |
337 | vma_lock_anon_vma(vma); | ||
337 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) | 338 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
338 | anon_vma_interval_tree_verify(avc); | 339 | anon_vma_interval_tree_verify(avc); |
340 | vma_unlock_anon_vma(vma); | ||
339 | vma = vma->vm_next; | 341 | vma = vma->vm_next; |
340 | i++; | 342 | i++; |
341 | } | 343 | } |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 479a1e751a73..8a5ac8c686b0 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -196,28 +196,28 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, | |||
196 | BUG_ON(atomic_read(&mm->mm_users) <= 0); | 196 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * Verify that mmu_notifier_init() already run and the global srcu is | 199 | * Verify that mmu_notifier_init() already run and the global srcu is |
200 | * initialized. | 200 | * initialized. |
201 | */ | 201 | */ |
202 | BUG_ON(!srcu.per_cpu_ref); | 202 | BUG_ON(!srcu.per_cpu_ref); |
203 | 203 | ||
204 | ret = -ENOMEM; | ||
205 | mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); | ||
206 | if (unlikely(!mmu_notifier_mm)) | ||
207 | goto out; | ||
208 | |||
204 | if (take_mmap_sem) | 209 | if (take_mmap_sem) |
205 | down_write(&mm->mmap_sem); | 210 | down_write(&mm->mmap_sem); |
206 | ret = mm_take_all_locks(mm); | 211 | ret = mm_take_all_locks(mm); |
207 | if (unlikely(ret)) | 212 | if (unlikely(ret)) |
208 | goto out; | 213 | goto out_clean; |
209 | 214 | ||
210 | if (!mm_has_notifiers(mm)) { | 215 | if (!mm_has_notifiers(mm)) { |
211 | mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), | ||
212 | GFP_KERNEL); | ||
213 | if (unlikely(!mmu_notifier_mm)) { | ||
214 | ret = -ENOMEM; | ||
215 | goto out_of_mem; | ||
216 | } | ||
217 | INIT_HLIST_HEAD(&mmu_notifier_mm->list); | 216 | INIT_HLIST_HEAD(&mmu_notifier_mm->list); |
218 | spin_lock_init(&mmu_notifier_mm->lock); | 217 | spin_lock_init(&mmu_notifier_mm->lock); |
219 | 218 | ||
220 | mm->mmu_notifier_mm = mmu_notifier_mm; | 219 | mm->mmu_notifier_mm = mmu_notifier_mm; |
220 | mmu_notifier_mm = NULL; | ||
221 | } | 221 | } |
222 | atomic_inc(&mm->mm_count); | 222 | atomic_inc(&mm->mm_count); |
223 | 223 | ||
@@ -233,12 +233,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, | |||
233 | hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); | 233 | hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); |
234 | spin_unlock(&mm->mmu_notifier_mm->lock); | 234 | spin_unlock(&mm->mmu_notifier_mm->lock); |
235 | 235 | ||
236 | out_of_mem: | ||
237 | mm_drop_all_locks(mm); | 236 | mm_drop_all_locks(mm); |
238 | out: | 237 | out_clean: |
239 | if (take_mmap_sem) | 238 | if (take_mmap_sem) |
240 | up_write(&mm->mmap_sem); | 239 | up_write(&mm->mmap_sem); |
241 | 240 | kfree(mmu_notifier_mm); | |
241 | out: | ||
242 | BUG_ON(atomic_read(&mm->mm_users) <= 0); | 242 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
243 | return ret; | 243 | return ret; |
244 | } | 244 | } |
diff --git a/mm/mmzone.c b/mm/mmzone.c index 3cef80f6ac79..4596d81b89b1 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c | |||
@@ -87,7 +87,7 @@ int memmap_valid_within(unsigned long pfn, | |||
87 | } | 87 | } |
88 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ | 88 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
89 | 89 | ||
90 | void lruvec_init(struct lruvec *lruvec, struct zone *zone) | 90 | void lruvec_init(struct lruvec *lruvec) |
91 | { | 91 | { |
92 | enum lru_list lru; | 92 | enum lru_list lru; |
93 | 93 | ||
@@ -95,8 +95,4 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) | |||
95 | 95 | ||
96 | for_each_lru(lru) | 96 | for_each_lru(lru) |
97 | INIT_LIST_HEAD(&lruvec->lists[lru]); | 97 | INIT_LIST_HEAD(&lruvec->lists[lru]); |
98 | |||
99 | #ifdef CONFIG_MEMCG | ||
100 | lruvec->zone = zone; | ||
101 | #endif | ||
102 | } | 98 | } |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 714d5d650470..bd82f6b31411 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -116,8 +116,6 @@ static unsigned long __init __free_memory_core(phys_addr_t start, | |||
116 | return 0; | 116 | return 0; |
117 | 117 | ||
118 | __free_pages_memory(start_pfn, end_pfn); | 118 | __free_pages_memory(start_pfn, end_pfn); |
119 | fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT), | ||
120 | start_pfn, end_pfn); | ||
121 | 119 | ||
122 | return end_pfn - start_pfn; | 120 | return end_pfn - start_pfn; |
123 | } | 121 | } |
@@ -128,7 +126,6 @@ unsigned long __init free_low_memory_core_early(int nodeid) | |||
128 | phys_addr_t start, end, size; | 126 | phys_addr_t start, end, size; |
129 | u64 i; | 127 | u64 i; |
130 | 128 | ||
131 | reset_zone_present_pages(); | ||
132 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) | 129 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) |
133 | count += __free_memory_core(start, end); | 130 | count += __free_memory_core(start, end); |
134 | 131 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bb90971182bd..a8f2c87792c3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1405,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1405 | 1405 | ||
1406 | mt = get_pageblock_migratetype(page); | 1406 | mt = get_pageblock_migratetype(page); |
1407 | if (unlikely(mt != MIGRATE_ISOLATE)) | 1407 | if (unlikely(mt != MIGRATE_ISOLATE)) |
1408 | __mod_zone_freepage_state(zone, -(1UL << order), mt); | 1408 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); |
1409 | 1409 | ||
1410 | if (alloc_order != order) | 1410 | if (alloc_order != order) |
1411 | expand(zone, page, alloc_order, order, | 1411 | expand(zone, page, alloc_order, order, |
@@ -1422,7 +1422,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1422 | } | 1422 | } |
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | return 1UL << order; | 1425 | return 1UL << alloc_order; |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | /* | 1428 | /* |
@@ -1809,10 +1809,10 @@ static void __paginginit init_zone_allows_reclaim(int nid) | |||
1809 | int i; | 1809 | int i; |
1810 | 1810 | ||
1811 | for_each_online_node(i) | 1811 | for_each_online_node(i) |
1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) { | 1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) |
1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); | 1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); |
1814 | else | ||
1814 | zone_reclaim_mode = 1; | 1815 | zone_reclaim_mode = 1; |
1815 | } | ||
1816 | } | 1816 | } |
1817 | 1817 | ||
1818 | #else /* CONFIG_NUMA */ | 1818 | #else /* CONFIG_NUMA */ |
@@ -2378,6 +2378,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) | |||
2378 | return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); | 2378 | return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); |
2379 | } | 2379 | } |
2380 | 2380 | ||
2381 | /* Returns true if the allocation is likely for THP */ | ||
2382 | static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order) | ||
2383 | { | ||
2384 | if (order == pageblock_order && | ||
2385 | (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) | ||
2386 | return true; | ||
2387 | return false; | ||
2388 | } | ||
2389 | |||
2381 | static inline struct page * | 2390 | static inline struct page * |
2382 | __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | 2391 | __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, |
2383 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2392 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
@@ -2416,7 +2425,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2416 | goto nopage; | 2425 | goto nopage; |
2417 | 2426 | ||
2418 | restart: | 2427 | restart: |
2419 | wake_all_kswapd(order, zonelist, high_zoneidx, | 2428 | /* The decision whether to wake kswapd for THP is made later */ |
2429 | if (!is_thp_alloc(gfp_mask, order)) | ||
2430 | wake_all_kswapd(order, zonelist, high_zoneidx, | ||
2420 | zone_idx(preferred_zone)); | 2431 | zone_idx(preferred_zone)); |
2421 | 2432 | ||
2422 | /* | 2433 | /* |
@@ -2487,15 +2498,21 @@ rebalance: | |||
2487 | goto got_pg; | 2498 | goto got_pg; |
2488 | sync_migration = true; | 2499 | sync_migration = true; |
2489 | 2500 | ||
2490 | /* | 2501 | if (is_thp_alloc(gfp_mask, order)) { |
2491 | * If compaction is deferred for high-order allocations, it is because | 2502 | /* |
2492 | * sync compaction recently failed. In this is the case and the caller | 2503 | * If compaction is deferred for high-order allocations, it is |
2493 | * requested a movable allocation that does not heavily disrupt the | 2504 | * because sync compaction recently failed. If this is the case |
2494 | * system then fail the allocation instead of entering direct reclaim. | 2505 | * and the caller requested a movable allocation that does not |
2495 | */ | 2506 | * heavily disrupt the system then fail the allocation instead |
2496 | if ((deferred_compaction || contended_compaction) && | 2507 | * of entering direct reclaim. |
2497 | (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) | 2508 | */ |
2498 | goto nopage; | 2509 | if (deferred_compaction || contended_compaction) |
2510 | goto nopage; | ||
2511 | |||
2512 | /* If process is willing to reclaim/compact then wake kswapd */ | ||
2513 | wake_all_kswapd(order, zonelist, high_zoneidx, | ||
2514 | zone_idx(preferred_zone)); | ||
2515 | } | ||
2499 | 2516 | ||
2500 | /* Try direct reclaim and then allocating */ | 2517 | /* Try direct reclaim and then allocating */ |
2501 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2518 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
@@ -4505,7 +4522,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4505 | zone->zone_pgdat = pgdat; | 4522 | zone->zone_pgdat = pgdat; |
4506 | 4523 | ||
4507 | zone_pcp_init(zone); | 4524 | zone_pcp_init(zone); |
4508 | lruvec_init(&zone->lruvec, zone); | 4525 | lruvec_init(&zone->lruvec); |
4509 | if (!size) | 4526 | if (!size) |
4510 | continue; | 4527 | continue; |
4511 | 4528 | ||
@@ -5825,7 +5842,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5825 | ret = start_isolate_page_range(pfn_max_align_down(start), | 5842 | ret = start_isolate_page_range(pfn_max_align_down(start), |
5826 | pfn_max_align_up(end), migratetype); | 5843 | pfn_max_align_up(end), migratetype); |
5827 | if (ret) | 5844 | if (ret) |
5828 | goto done; | 5845 | return ret; |
5829 | 5846 | ||
5830 | ret = __alloc_contig_migrate_range(&cc, start, end); | 5847 | ret = __alloc_contig_migrate_range(&cc, start, end); |
5831 | if (ret) | 5848 | if (ret) |
@@ -6098,37 +6115,3 @@ void dump_page(struct page *page) | |||
6098 | dump_page_flags(page->flags); | 6115 | dump_page_flags(page->flags); |
6099 | mem_cgroup_print_bad_page(page); | 6116 | mem_cgroup_print_bad_page(page); |
6100 | } | 6117 | } |
6101 | |||
6102 | /* reset zone->present_pages */ | ||
6103 | void reset_zone_present_pages(void) | ||
6104 | { | ||
6105 | struct zone *z; | ||
6106 | int i, nid; | ||
6107 | |||
6108 | for_each_node_state(nid, N_HIGH_MEMORY) { | ||
6109 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6110 | z = NODE_DATA(nid)->node_zones + i; | ||
6111 | z->present_pages = 0; | ||
6112 | } | ||
6113 | } | ||
6114 | } | ||
6115 | |||
6116 | /* calculate zone's present pages in buddy system */ | ||
6117 | void fixup_zone_present_pages(int nid, unsigned long start_pfn, | ||
6118 | unsigned long end_pfn) | ||
6119 | { | ||
6120 | struct zone *z; | ||
6121 | unsigned long zone_start_pfn, zone_end_pfn; | ||
6122 | int i; | ||
6123 | |||
6124 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6125 | z = NODE_DATA(nid)->node_zones + i; | ||
6126 | zone_start_pfn = z->zone_start_pfn; | ||
6127 | zone_end_pfn = zone_start_pfn + z->spanned_pages; | ||
6128 | |||
6129 | /* if the two regions intersect */ | ||
6130 | if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn)) | ||
6131 | z->present_pages += min(end_pfn, zone_end_pfn) - | ||
6132 | max(start_pfn, zone_start_pfn); | ||
6133 | } | ||
6134 | } | ||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/mmu_notifier.h> | 56 | #include <linux/mmu_notifier.h> |
57 | #include <linux/migrate.h> | 57 | #include <linux/migrate.h> |
58 | #include <linux/hugetlb.h> | 58 | #include <linux/hugetlb.h> |
59 | #include <linux/backing-dev.h> | ||
59 | 60 | ||
60 | #include <asm/tlbflush.h> | 61 | #include <asm/tlbflush.h> |
61 | 62 | ||
@@ -926,11 +927,8 @@ int page_mkclean(struct page *page) | |||
926 | 927 | ||
927 | if (page_mapped(page)) { | 928 | if (page_mapped(page)) { |
928 | struct address_space *mapping = page_mapping(page); | 929 | struct address_space *mapping = page_mapping(page); |
929 | if (mapping) { | 930 | if (mapping) |
930 | ret = page_mkclean_file(mapping, page); | 931 | ret = page_mkclean_file(mapping, page); |
931 | if (page_test_and_clear_dirty(page_to_pfn(page), 1)) | ||
932 | ret = 1; | ||
933 | } | ||
934 | } | 932 | } |
935 | 933 | ||
936 | return ret; | 934 | return ret; |
@@ -1116,6 +1114,7 @@ void page_add_file_rmap(struct page *page) | |||
1116 | */ | 1114 | */ |
1117 | void page_remove_rmap(struct page *page) | 1115 | void page_remove_rmap(struct page *page) |
1118 | { | 1116 | { |
1117 | struct address_space *mapping = page_mapping(page); | ||
1119 | bool anon = PageAnon(page); | 1118 | bool anon = PageAnon(page); |
1120 | bool locked; | 1119 | bool locked; |
1121 | unsigned long flags; | 1120 | unsigned long flags; |
@@ -1138,8 +1137,19 @@ void page_remove_rmap(struct page *page) | |||
1138 | * this if the page is anon, so about to be freed; but perhaps | 1137 | * this if the page is anon, so about to be freed; but perhaps |
1139 | * not if it's in swapcache - there might be another pte slot | 1138 | * not if it's in swapcache - there might be another pte slot |
1140 | * containing the swap entry, but page not yet written to swap. | 1139 | * containing the swap entry, but page not yet written to swap. |
1140 | * | ||
1141 | * And we can skip it on file pages, so long as the filesystem | ||
1142 | * participates in dirty tracking; but need to catch shm and tmpfs | ||
1143 | * and ramfs pages which have been modified since creation by read | ||
1144 | * fault. | ||
1145 | * | ||
1146 | * Note that mapping must be decided above, before decrementing | ||
1147 | * mapcount (which luckily provides a barrier): once page is unmapped, | ||
1148 | * it could be truncated and page->mapping reset to NULL at any moment. | ||
1149 | * Note also that we are relying on page_mapping(page) to set mapping | ||
1150 | * to &swapper_space when PageSwapCache(page). | ||
1141 | */ | 1151 | */ |
1142 | if ((!anon || PageSwapCache(page)) && | 1152 | if (mapping && !mapping_cap_account_dirty(mapping) && |
1143 | page_test_and_clear_dirty(page_to_pfn(page), 1)) | 1153 | page_test_and_clear_dirty(page_to_pfn(page), 1)) |
1144 | set_page_dirty(page); | 1154 | set_page_dirty(page); |
1145 | /* | 1155 | /* |
diff --git a/mm/shmem.c b/mm/shmem.c index 67afba5117f2..50c5b8f3a359 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -643,7 +643,7 @@ static void shmem_evict_inode(struct inode *inode) | |||
643 | kfree(info->symlink); | 643 | kfree(info->symlink); |
644 | 644 | ||
645 | simple_xattrs_free(&info->xattrs); | 645 | simple_xattrs_free(&info->xattrs); |
646 | BUG_ON(inode->i_blocks); | 646 | WARN_ON(inode->i_blocks); |
647 | shmem_free_inode(inode->i_sb); | 647 | shmem_free_inode(inode->i_sb); |
648 | clear_inode(inode); | 648 | clear_inode(inode); |
649 | } | 649 | } |
@@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | |||
910 | static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, | 910 | static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, |
911 | struct shmem_inode_info *info, pgoff_t index) | 911 | struct shmem_inode_info *info, pgoff_t index) |
912 | { | 912 | { |
913 | struct mempolicy mpol, *spol; | ||
914 | struct vm_area_struct pvma; | 913 | struct vm_area_struct pvma; |
915 | 914 | struct page *page; | |
916 | spol = mpol_cond_copy(&mpol, | ||
917 | mpol_shared_policy_lookup(&info->policy, index)); | ||
918 | 915 | ||
919 | /* Create a pseudo vma that just contains the policy */ | 916 | /* Create a pseudo vma that just contains the policy */ |
920 | pvma.vm_start = 0; | 917 | pvma.vm_start = 0; |
921 | /* Bias interleave by inode number to distribute better across nodes */ | 918 | /* Bias interleave by inode number to distribute better across nodes */ |
922 | pvma.vm_pgoff = index + info->vfs_inode.i_ino; | 919 | pvma.vm_pgoff = index + info->vfs_inode.i_ino; |
923 | pvma.vm_ops = NULL; | 920 | pvma.vm_ops = NULL; |
924 | pvma.vm_policy = spol; | 921 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |
925 | return swapin_readahead(swap, gfp, &pvma, 0); | 922 | |
923 | page = swapin_readahead(swap, gfp, &pvma, 0); | ||
924 | |||
925 | /* Drop reference taken by mpol_shared_policy_lookup() */ | ||
926 | mpol_cond_put(pvma.vm_policy); | ||
927 | |||
928 | return page; | ||
926 | } | 929 | } |
927 | 930 | ||
928 | static struct page *shmem_alloc_page(gfp_t gfp, | 931 | static struct page *shmem_alloc_page(gfp_t gfp, |
929 | struct shmem_inode_info *info, pgoff_t index) | 932 | struct shmem_inode_info *info, pgoff_t index) |
930 | { | 933 | { |
931 | struct vm_area_struct pvma; | 934 | struct vm_area_struct pvma; |
935 | struct page *page; | ||
932 | 936 | ||
933 | /* Create a pseudo vma that just contains the policy */ | 937 | /* Create a pseudo vma that just contains the policy */ |
934 | pvma.vm_start = 0; | 938 | pvma.vm_start = 0; |
@@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp_t gfp, | |||
937 | pvma.vm_ops = NULL; | 941 | pvma.vm_ops = NULL; |
938 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); | 942 | pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |
939 | 943 | ||
940 | /* | 944 | page = alloc_page_vma(gfp, &pvma, 0); |
941 | * alloc_page_vma() will drop the shared policy reference | 945 | |
942 | */ | 946 | /* Drop reference taken by mpol_shared_policy_lookup() */ |
943 | return alloc_page_vma(gfp, &pvma, 0); | 947 | mpol_cond_put(pvma.vm_policy); |
948 | |||
949 | return page; | ||
944 | } | 950 | } |
945 | #else /* !CONFIG_NUMA */ | 951 | #else /* !CONFIG_NUMA */ |
946 | #ifdef CONFIG_TMPFS | 952 | #ifdef CONFIG_TMPFS |
@@ -1145,8 +1151,20 @@ repeat: | |||
1145 | if (!error) { | 1151 | if (!error) { |
1146 | error = shmem_add_to_page_cache(page, mapping, index, | 1152 | error = shmem_add_to_page_cache(page, mapping, index, |
1147 | gfp, swp_to_radix_entry(swap)); | 1153 | gfp, swp_to_radix_entry(swap)); |
1148 | /* We already confirmed swap, and make no allocation */ | 1154 | /* |
1149 | VM_BUG_ON(error); | 1155 | * We already confirmed swap under page lock, and make |
1156 | * no memory allocation here, so usually no possibility | ||
1157 | * of error; but free_swap_and_cache() only trylocks a | ||
1158 | * page, so it is just possible that the entry has been | ||
1159 | * truncated or holepunched since swap was confirmed. | ||
1160 | * shmem_undo_range() will have done some of the | ||
1161 | * unaccounting, now delete_from_swap_cache() will do | ||
1162 | * the rest (including mem_cgroup_uncharge_swapcache). | ||
1163 | * Reset swap.val? No, leave it so "failed" goes back to | ||
1164 | * "repeat": reading a hole and writing should succeed. | ||
1165 | */ | ||
1166 | if (error) | ||
1167 | delete_from_swap_cache(page); | ||
1150 | } | 1168 | } |
1151 | if (error) | 1169 | if (error) |
1152 | goto failed; | 1170 | goto failed; |
diff --git a/mm/sparse.c b/mm/sparse.c index fac95f2888f2..a83de2f72b30 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -617,7 +617,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) | |||
617 | { | 617 | { |
618 | return; /* XXX: Not implemented yet */ | 618 | return; /* XXX: Not implemented yet */ |
619 | } | 619 | } |
620 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) | 620 | static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) |
621 | { | 621 | { |
622 | } | 622 | } |
623 | #else | 623 | #else |
@@ -658,10 +658,11 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) | |||
658 | get_order(sizeof(struct page) * nr_pages)); | 658 | get_order(sizeof(struct page) * nr_pages)); |
659 | } | 659 | } |
660 | 660 | ||
661 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) | 661 | static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) |
662 | { | 662 | { |
663 | unsigned long maps_section_nr, removing_section_nr, i; | 663 | unsigned long maps_section_nr, removing_section_nr, i; |
664 | unsigned long magic; | 664 | unsigned long magic; |
665 | struct page *page = virt_to_page(memmap); | ||
665 | 666 | ||
666 | for (i = 0; i < nr_pages; i++, page++) { | 667 | for (i = 0; i < nr_pages; i++, page++) { |
667 | magic = (unsigned long) page->lru.next; | 668 | magic = (unsigned long) page->lru.next; |
@@ -710,13 +711,10 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) | |||
710 | */ | 711 | */ |
711 | 712 | ||
712 | if (memmap) { | 713 | if (memmap) { |
713 | struct page *memmap_page; | ||
714 | memmap_page = virt_to_page(memmap); | ||
715 | |||
716 | nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) | 714 | nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) |
717 | >> PAGE_SHIFT; | 715 | >> PAGE_SHIFT; |
718 | 716 | ||
719 | free_map_bootmem(memmap_page, nr_pages); | 717 | free_map_bootmem(memmap, nr_pages); |
720 | } | 718 | } |
721 | } | 719 | } |
722 | 720 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index 71cd288b2001..f91a25547ffe 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1494,9 +1494,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1494 | BUG_ON(!current->mm); | 1494 | BUG_ON(!current->mm); |
1495 | 1495 | ||
1496 | pathname = getname(specialfile); | 1496 | pathname = getname(specialfile); |
1497 | err = PTR_ERR(pathname); | ||
1498 | if (IS_ERR(pathname)) | 1497 | if (IS_ERR(pathname)) |
1499 | goto out; | 1498 | return PTR_ERR(pathname); |
1500 | 1499 | ||
1501 | victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); | 1500 | victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); |
1502 | err = PTR_ERR(victim); | 1501 | err = PTR_ERR(victim); |
@@ -1608,6 +1607,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1608 | out_dput: | 1607 | out_dput: |
1609 | filp_close(victim, NULL); | 1608 | filp_close(victim, NULL); |
1610 | out: | 1609 | out: |
1610 | putname(pathname); | ||
1611 | return err; | 1611 | return err; |
1612 | } | 1612 | } |
1613 | 1613 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2624edcfb420..124bbfe5cc52 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1760,28 +1760,6 @@ static bool in_reclaim_compaction(struct scan_control *sc) | |||
1760 | return false; | 1760 | return false; |
1761 | } | 1761 | } |
1762 | 1762 | ||
1763 | #ifdef CONFIG_COMPACTION | ||
1764 | /* | ||
1765 | * If compaction is deferred for sc->order then scale the number of pages | ||
1766 | * reclaimed based on the number of consecutive allocation failures | ||
1767 | */ | ||
1768 | static unsigned long scale_for_compaction(unsigned long pages_for_compaction, | ||
1769 | struct lruvec *lruvec, struct scan_control *sc) | ||
1770 | { | ||
1771 | struct zone *zone = lruvec_zone(lruvec); | ||
1772 | |||
1773 | if (zone->compact_order_failed <= sc->order) | ||
1774 | pages_for_compaction <<= zone->compact_defer_shift; | ||
1775 | return pages_for_compaction; | ||
1776 | } | ||
1777 | #else | ||
1778 | static unsigned long scale_for_compaction(unsigned long pages_for_compaction, | ||
1779 | struct lruvec *lruvec, struct scan_control *sc) | ||
1780 | { | ||
1781 | return pages_for_compaction; | ||
1782 | } | ||
1783 | #endif | ||
1784 | |||
1785 | /* | 1763 | /* |
1786 | * Reclaim/compaction is used for high-order allocation requests. It reclaims | 1764 | * Reclaim/compaction is used for high-order allocation requests. It reclaims |
1787 | * order-0 pages before compacting the zone. should_continue_reclaim() returns | 1765 | * order-0 pages before compacting the zone. should_continue_reclaim() returns |
@@ -1829,9 +1807,6 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, | |||
1829 | * inactive lists are large enough, continue reclaiming | 1807 | * inactive lists are large enough, continue reclaiming |
1830 | */ | 1808 | */ |
1831 | pages_for_compaction = (2UL << sc->order); | 1809 | pages_for_compaction = (2UL << sc->order); |
1832 | |||
1833 | pages_for_compaction = scale_for_compaction(pages_for_compaction, | ||
1834 | lruvec, sc); | ||
1835 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); | 1810 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1836 | if (nr_swap_pages > 0) | 1811 | if (nr_swap_pages > 0) |
1837 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); | 1812 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); |
@@ -2232,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) | |||
2232 | * Throttle direct reclaimers if backing storage is backed by the network | 2207 | * Throttle direct reclaimers if backing storage is backed by the network |
2233 | * and the PFMEMALLOC reserve for the preferred node is getting dangerously | 2208 | * and the PFMEMALLOC reserve for the preferred node is getting dangerously |
2234 | * depleted. kswapd will continue to make progress and wake the processes | 2209 | * depleted. kswapd will continue to make progress and wake the processes |
2235 | * when the low watermark is reached | 2210 | * when the low watermark is reached. |
2211 | * | ||
2212 | * Returns true if a fatal signal was delivered during throttling. If this | ||
2213 | * happens, the page allocator should not consider triggering the OOM killer. | ||
2236 | */ | 2214 | */ |
2237 | static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | 2215 | static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, |
2238 | nodemask_t *nodemask) | 2216 | nodemask_t *nodemask) |
2239 | { | 2217 | { |
2240 | struct zone *zone; | 2218 | struct zone *zone; |
@@ -2249,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | |||
2249 | * processes to block on log_wait_commit(). | 2227 | * processes to block on log_wait_commit(). |
2250 | */ | 2228 | */ |
2251 | if (current->flags & PF_KTHREAD) | 2229 | if (current->flags & PF_KTHREAD) |
2252 | return; | 2230 | goto out; |
2231 | |||
2232 | /* | ||
2233 | * If a fatal signal is pending, this process should not throttle. | ||
2234 | * It should return quickly so it can exit and free its memory | ||
2235 | */ | ||
2236 | if (fatal_signal_pending(current)) | ||
2237 | goto out; | ||
2253 | 2238 | ||
2254 | /* Check if the pfmemalloc reserves are ok */ | 2239 | /* Check if the pfmemalloc reserves are ok */ |
2255 | first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); | 2240 | first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); |
2256 | pgdat = zone->zone_pgdat; | 2241 | pgdat = zone->zone_pgdat; |
2257 | if (pfmemalloc_watermark_ok(pgdat)) | 2242 | if (pfmemalloc_watermark_ok(pgdat)) |
2258 | return; | 2243 | goto out; |
2259 | 2244 | ||
2260 | /* Account for the throttling */ | 2245 | /* Account for the throttling */ |
2261 | count_vm_event(PGSCAN_DIRECT_THROTTLE); | 2246 | count_vm_event(PGSCAN_DIRECT_THROTTLE); |
@@ -2271,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | |||
2271 | if (!(gfp_mask & __GFP_FS)) { | 2256 | if (!(gfp_mask & __GFP_FS)) { |
2272 | wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, | 2257 | wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, |
2273 | pfmemalloc_watermark_ok(pgdat), HZ); | 2258 | pfmemalloc_watermark_ok(pgdat), HZ); |
2274 | return; | 2259 | |
2260 | goto check_pending; | ||
2275 | } | 2261 | } |
2276 | 2262 | ||
2277 | /* Throttle until kswapd wakes the process */ | 2263 | /* Throttle until kswapd wakes the process */ |
2278 | wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, | 2264 | wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, |
2279 | pfmemalloc_watermark_ok(pgdat)); | 2265 | pfmemalloc_watermark_ok(pgdat)); |
2266 | |||
2267 | check_pending: | ||
2268 | if (fatal_signal_pending(current)) | ||
2269 | return true; | ||
2270 | |||
2271 | out: | ||
2272 | return false; | ||
2280 | } | 2273 | } |
2281 | 2274 | ||
2282 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 2275 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
@@ -2298,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2298 | .gfp_mask = sc.gfp_mask, | 2291 | .gfp_mask = sc.gfp_mask, |
2299 | }; | 2292 | }; |
2300 | 2293 | ||
2301 | throttle_direct_reclaim(gfp_mask, zonelist, nodemask); | ||
2302 | |||
2303 | /* | 2294 | /* |
2304 | * Do not enter reclaim if fatal signal is pending. 1 is returned so | 2295 | * Do not enter reclaim if fatal signal was delivered while throttled. |
2305 | * that the page allocator does not consider triggering OOM | 2296 | * 1 is returned so that the page allocator does not OOM kill at this |
2297 | * point. | ||
2306 | */ | 2298 | */ |
2307 | if (fatal_signal_pending(current)) | 2299 | if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask)) |
2308 | return 1; | 2300 | return 1; |
2309 | 2301 | ||
2310 | trace_mm_vmscan_direct_reclaim_begin(order, | 2302 | trace_mm_vmscan_direct_reclaim_begin(order, |
@@ -2422,6 +2414,19 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) | |||
2422 | } while (memcg); | 2414 | } while (memcg); |
2423 | } | 2415 | } |
2424 | 2416 | ||
2417 | static bool zone_balanced(struct zone *zone, int order, | ||
2418 | unsigned long balance_gap, int classzone_idx) | ||
2419 | { | ||
2420 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + | ||
2421 | balance_gap, classzone_idx, 0)) | ||
2422 | return false; | ||
2423 | |||
2424 | if (COMPACTION_BUILD && order && !compaction_suitable(zone, order)) | ||
2425 | return false; | ||
2426 | |||
2427 | return true; | ||
2428 | } | ||
2429 | |||
2425 | /* | 2430 | /* |
2426 | * pgdat_balanced is used when checking if a node is balanced for high-order | 2431 | * pgdat_balanced is used when checking if a node is balanced for high-order |
2427 | * allocations. Only zones that meet watermarks and are in a zone allowed | 2432 | * allocations. Only zones that meet watermarks and are in a zone allowed |
@@ -2500,8 +2505,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
2500 | continue; | 2505 | continue; |
2501 | } | 2506 | } |
2502 | 2507 | ||
2503 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), | 2508 | if (!zone_balanced(zone, order, 0, i)) |
2504 | i, 0)) | ||
2505 | all_zones_ok = false; | 2509 | all_zones_ok = false; |
2506 | else | 2510 | else |
2507 | balanced += zone->present_pages; | 2511 | balanced += zone->present_pages; |
@@ -2610,8 +2614,7 @@ loop_again: | |||
2610 | break; | 2614 | break; |
2611 | } | 2615 | } |
2612 | 2616 | ||
2613 | if (!zone_watermark_ok_safe(zone, order, | 2617 | if (!zone_balanced(zone, order, 0, 0)) { |
2614 | high_wmark_pages(zone), 0, 0)) { | ||
2615 | end_zone = i; | 2618 | end_zone = i; |
2616 | break; | 2619 | break; |
2617 | } else { | 2620 | } else { |
@@ -2687,9 +2690,8 @@ loop_again: | |||
2687 | testorder = 0; | 2690 | testorder = 0; |
2688 | 2691 | ||
2689 | if ((buffer_heads_over_limit && is_highmem_idx(i)) || | 2692 | if ((buffer_heads_over_limit && is_highmem_idx(i)) || |
2690 | !zone_watermark_ok_safe(zone, testorder, | 2693 | !zone_balanced(zone, testorder, |
2691 | high_wmark_pages(zone) + balance_gap, | 2694 | balance_gap, end_zone)) { |
2692 | end_zone, 0)) { | ||
2693 | shrink_zone(zone, &sc); | 2695 | shrink_zone(zone, &sc); |
2694 | 2696 | ||
2695 | reclaim_state->reclaimed_slab = 0; | 2697 | reclaim_state->reclaimed_slab = 0; |
@@ -2716,8 +2718,7 @@ loop_again: | |||
2716 | continue; | 2718 | continue; |
2717 | } | 2719 | } |
2718 | 2720 | ||
2719 | if (!zone_watermark_ok_safe(zone, testorder, | 2721 | if (!zone_balanced(zone, testorder, 0, end_zone)) { |
2720 | high_wmark_pages(zone), end_zone, 0)) { | ||
2721 | all_zones_ok = 0; | 2722 | all_zones_ok = 0; |
2722 | /* | 2723 | /* |
2723 | * We are still under min water mark. This | 2724 | * We are still under min water mark. This |
@@ -2822,22 +2823,6 @@ out: | |||
2822 | if (!populated_zone(zone)) | 2823 | if (!populated_zone(zone)) |
2823 | continue; | 2824 | continue; |
2824 | 2825 | ||
2825 | if (zone->all_unreclaimable && | ||
2826 | sc.priority != DEF_PRIORITY) | ||
2827 | continue; | ||
2828 | |||
2829 | /* Would compaction fail due to lack of free memory? */ | ||
2830 | if (COMPACTION_BUILD && | ||
2831 | compaction_suitable(zone, order) == COMPACT_SKIPPED) | ||
2832 | goto loop_again; | ||
2833 | |||
2834 | /* Confirm the zone is balanced for order-0 */ | ||
2835 | if (!zone_watermark_ok(zone, 0, | ||
2836 | high_wmark_pages(zone), 0, 0)) { | ||
2837 | order = sc.order = 0; | ||
2838 | goto loop_again; | ||
2839 | } | ||
2840 | |||
2841 | /* Check if the memory needs to be defragmented. */ | 2826 | /* Check if the memory needs to be defragmented. */ |
2842 | if (zone_watermark_ok(zone, order, | 2827 | if (zone_watermark_ok(zone, order, |
2843 | low_wmark_pages(zone), *classzone_idx, 0)) | 2828 | low_wmark_pages(zone), *classzone_idx, 0)) |
@@ -3017,6 +3002,8 @@ static int kswapd(void *p) | |||
3017 | &balanced_classzone_idx); | 3002 | &balanced_classzone_idx); |
3018 | } | 3003 | } |
3019 | } | 3004 | } |
3005 | |||
3006 | current->reclaim_state = NULL; | ||
3020 | return 0; | 3007 | return 0; |
3021 | } | 3008 | } |
3022 | 3009 | ||