diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 10 | ||||
-rw-r--r-- | mm/compaction.c | 2 | ||||
-rw-r--r-- | mm/fremap.c | 2 | ||||
-rw-r--r-- | mm/highmem.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 1 | ||||
-rw-r--r-- | mm/memblock.c | 24 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 7 | ||||
-rw-r--r-- | mm/mempolicy.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 26 | ||||
-rw-r--r-- | mm/mmzone.c | 6 | ||||
-rw-r--r-- | mm/nobootmem.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 51 | ||||
-rw-r--r-- | mm/rmap.c | 20 | ||||
-rw-r--r-- | mm/shmem.c | 18 | ||||
-rw-r--r-- | mm/slob.c | 6 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 64 |
20 files changed, 174 insertions, 156 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 434be4ae7a04..f468185b3b28 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -198,8 +198,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
198 | int order = ilog2(BITS_PER_LONG); | 198 | int order = ilog2(BITS_PER_LONG); |
199 | 199 | ||
200 | __free_pages_bootmem(pfn_to_page(start), order); | 200 | __free_pages_bootmem(pfn_to_page(start), order); |
201 | fixup_zone_present_pages(page_to_nid(pfn_to_page(start)), | ||
202 | start, start + BITS_PER_LONG); | ||
203 | count += BITS_PER_LONG; | 201 | count += BITS_PER_LONG; |
204 | start += BITS_PER_LONG; | 202 | start += BITS_PER_LONG; |
205 | } else { | 203 | } else { |
@@ -210,9 +208,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
210 | if (vec & 1) { | 208 | if (vec & 1) { |
211 | page = pfn_to_page(start + off); | 209 | page = pfn_to_page(start + off); |
212 | __free_pages_bootmem(page, 0); | 210 | __free_pages_bootmem(page, 0); |
213 | fixup_zone_present_pages( | ||
214 | page_to_nid(page), | ||
215 | start + off, start + off + 1); | ||
216 | count++; | 211 | count++; |
217 | } | 212 | } |
218 | vec >>= 1; | 213 | vec >>= 1; |
@@ -226,11 +221,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
226 | pages = bdata->node_low_pfn - bdata->node_min_pfn; | 221 | pages = bdata->node_low_pfn - bdata->node_min_pfn; |
227 | pages = bootmem_bootmap_pages(pages); | 222 | pages = bootmem_bootmap_pages(pages); |
228 | count += pages; | 223 | count += pages; |
229 | while (pages--) { | 224 | while (pages--) |
230 | fixup_zone_present_pages(page_to_nid(page), | ||
231 | page_to_pfn(page), page_to_pfn(page) + 1); | ||
232 | __free_pages_bootmem(page++, 0); | 225 | __free_pages_bootmem(page++, 0); |
233 | } | ||
234 | 226 | ||
235 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); | 227 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); |
236 | 228 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 2c4ce17651d8..9eef55838fca 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -346,7 +346,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
346 | * pages requested were isolated. If there were any failures, 0 is | 346 | * pages requested were isolated. If there were any failures, 0 is |
347 | * returned and CMA will fail. | 347 | * returned and CMA will fail. |
348 | */ | 348 | */ |
349 | if (strict && nr_strict_required != total_isolated) | 349 | if (strict && nr_strict_required > total_isolated) |
350 | total_isolated = 0; | 350 | total_isolated = 0; |
351 | 351 | ||
352 | if (locked) | 352 | if (locked) |
diff --git a/mm/fremap.c b/mm/fremap.c index 3899a86851ce..a0aaf0e56800 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -169,7 +169,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, | |||
169 | if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) | 169 | if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) |
170 | goto out; | 170 | goto out; |
171 | 171 | ||
172 | if (!vma->vm_ops->remap_pages) | 172 | if (!vma->vm_ops || !vma->vm_ops->remap_pages) |
173 | goto out; | 173 | goto out; |
174 | 174 | ||
175 | if (start < vma->vm_start || start + size > vma->vm_end) | 175 | if (start < vma->vm_start || start + size > vma->vm_end) |
diff --git a/mm/highmem.c b/mm/highmem.c index d517cd16a6eb..2da13a5c50e2 100644 --- a/mm/highmem.c +++ b/mm/highmem.c | |||
@@ -98,7 +98,7 @@ struct page *kmap_to_page(void *vaddr) | |||
98 | { | 98 | { |
99 | unsigned long addr = (unsigned long)vaddr; | 99 | unsigned long addr = (unsigned long)vaddr; |
100 | 100 | ||
101 | if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) { | 101 | if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) { |
102 | int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; | 102 | int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; |
103 | return pte_page(pkmap_page_table[i]); | 103 | return pte_page(pkmap_page_table[i]); |
104 | } | 104 | } |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a863af26c79c..40f17c34b415 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/khugepaged.h> | 17 | #include <linux/khugepaged.h> |
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
19 | #include <linux/mman.h> | 19 | #include <linux/mman.h> |
20 | #include <linux/pagemap.h> | ||
20 | #include <asm/tlb.h> | 21 | #include <asm/tlb.h> |
21 | #include <asm/pgalloc.h> | 22 | #include <asm/pgalloc.h> |
22 | #include "internal.h" | 23 | #include "internal.h" |
diff --git a/mm/memblock.c b/mm/memblock.c index 931eef145af5..625905523c2a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -930,6 +930,30 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si | |||
930 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; | 930 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; |
931 | } | 931 | } |
932 | 932 | ||
933 | void __init_memblock memblock_trim_memory(phys_addr_t align) | ||
934 | { | ||
935 | int i; | ||
936 | phys_addr_t start, end, orig_start, orig_end; | ||
937 | struct memblock_type *mem = &memblock.memory; | ||
938 | |||
939 | for (i = 0; i < mem->cnt; i++) { | ||
940 | orig_start = mem->regions[i].base; | ||
941 | orig_end = mem->regions[i].base + mem->regions[i].size; | ||
942 | start = round_up(orig_start, align); | ||
943 | end = round_down(orig_end, align); | ||
944 | |||
945 | if (start == orig_start && end == orig_end) | ||
946 | continue; | ||
947 | |||
948 | if (start < end) { | ||
949 | mem->regions[i].base = start; | ||
950 | mem->regions[i].size = end - start; | ||
951 | } else { | ||
952 | memblock_remove_region(mem, i); | ||
953 | i--; | ||
954 | } | ||
955 | } | ||
956 | } | ||
933 | 957 | ||
934 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) | 958 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) |
935 | { | 959 | { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7acf43bf04a2..dd39ba000b31 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1055,12 +1055,24 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, | |||
1055 | struct mem_cgroup *memcg) | 1055 | struct mem_cgroup *memcg) |
1056 | { | 1056 | { |
1057 | struct mem_cgroup_per_zone *mz; | 1057 | struct mem_cgroup_per_zone *mz; |
1058 | struct lruvec *lruvec; | ||
1058 | 1059 | ||
1059 | if (mem_cgroup_disabled()) | 1060 | if (mem_cgroup_disabled()) { |
1060 | return &zone->lruvec; | 1061 | lruvec = &zone->lruvec; |
1062 | goto out; | ||
1063 | } | ||
1061 | 1064 | ||
1062 | mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); | 1065 | mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); |
1063 | return &mz->lruvec; | 1066 | lruvec = &mz->lruvec; |
1067 | out: | ||
1068 | /* | ||
1069 | * Since a node can be onlined after the mem_cgroup was created, | ||
1070 | * we have to be prepared to initialize lruvec->zone here; | ||
1071 | * and if offlined then reonlined, we need to reinitialize it. | ||
1072 | */ | ||
1073 | if (unlikely(lruvec->zone != zone)) | ||
1074 | lruvec->zone = zone; | ||
1075 | return lruvec; | ||
1064 | } | 1076 | } |
1065 | 1077 | ||
1066 | /* | 1078 | /* |
@@ -1087,9 +1099,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) | |||
1087 | struct mem_cgroup_per_zone *mz; | 1099 | struct mem_cgroup_per_zone *mz; |
1088 | struct mem_cgroup *memcg; | 1100 | struct mem_cgroup *memcg; |
1089 | struct page_cgroup *pc; | 1101 | struct page_cgroup *pc; |
1102 | struct lruvec *lruvec; | ||
1090 | 1103 | ||
1091 | if (mem_cgroup_disabled()) | 1104 | if (mem_cgroup_disabled()) { |
1092 | return &zone->lruvec; | 1105 | lruvec = &zone->lruvec; |
1106 | goto out; | ||
1107 | } | ||
1093 | 1108 | ||
1094 | pc = lookup_page_cgroup(page); | 1109 | pc = lookup_page_cgroup(page); |
1095 | memcg = pc->mem_cgroup; | 1110 | memcg = pc->mem_cgroup; |
@@ -1107,7 +1122,16 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) | |||
1107 | pc->mem_cgroup = memcg = root_mem_cgroup; | 1122 | pc->mem_cgroup = memcg = root_mem_cgroup; |
1108 | 1123 | ||
1109 | mz = page_cgroup_zoneinfo(memcg, page); | 1124 | mz = page_cgroup_zoneinfo(memcg, page); |
1110 | return &mz->lruvec; | 1125 | lruvec = &mz->lruvec; |
1126 | out: | ||
1127 | /* | ||
1128 | * Since a node can be onlined after the mem_cgroup was created, | ||
1129 | * we have to be prepared to initialize lruvec->zone here; | ||
1130 | * and if offlined then reonlined, we need to reinitialize it. | ||
1131 | */ | ||
1132 | if (unlikely(lruvec->zone != zone)) | ||
1133 | lruvec->zone = zone; | ||
1134 | return lruvec; | ||
1111 | } | 1135 | } |
1112 | 1136 | ||
1113 | /** | 1137 | /** |
@@ -1452,17 +1476,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg) | |||
1452 | static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | 1476 | static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) |
1453 | { | 1477 | { |
1454 | u64 limit; | 1478 | u64 limit; |
1455 | u64 memsw; | ||
1456 | 1479 | ||
1457 | limit = res_counter_read_u64(&memcg->res, RES_LIMIT); | 1480 | limit = res_counter_read_u64(&memcg->res, RES_LIMIT); |
1458 | limit += total_swap_pages << PAGE_SHIFT; | ||
1459 | 1481 | ||
1460 | memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | ||
1461 | /* | 1482 | /* |
1462 | * If memsw is finite and limits the amount of swap space available | 1483 | * Do not consider swap space if we cannot swap due to swappiness |
1463 | * to this memcg, return that limit. | ||
1464 | */ | 1484 | */ |
1465 | return min(limit, memsw); | 1485 | if (mem_cgroup_swappiness(memcg)) { |
1486 | u64 memsw; | ||
1487 | |||
1488 | limit += total_swap_pages << PAGE_SHIFT; | ||
1489 | memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | ||
1490 | |||
1491 | /* | ||
1492 | * If memsw is finite and limits the amount of swap space | ||
1493 | * available to this memcg, return that limit. | ||
1494 | */ | ||
1495 | limit = min(limit, memsw); | ||
1496 | } | ||
1497 | |||
1498 | return limit; | ||
1466 | } | 1499 | } |
1467 | 1500 | ||
1468 | void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | 1501 | void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, |
@@ -3688,17 +3721,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3688 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, | 3721 | static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, |
3689 | int node, int zid, enum lru_list lru) | 3722 | int node, int zid, enum lru_list lru) |
3690 | { | 3723 | { |
3691 | struct mem_cgroup_per_zone *mz; | 3724 | struct lruvec *lruvec; |
3692 | unsigned long flags, loop; | 3725 | unsigned long flags, loop; |
3693 | struct list_head *list; | 3726 | struct list_head *list; |
3694 | struct page *busy; | 3727 | struct page *busy; |
3695 | struct zone *zone; | 3728 | struct zone *zone; |
3696 | 3729 | ||
3697 | zone = &NODE_DATA(node)->node_zones[zid]; | 3730 | zone = &NODE_DATA(node)->node_zones[zid]; |
3698 | mz = mem_cgroup_zoneinfo(memcg, node, zid); | 3731 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
3699 | list = &mz->lruvec.lists[lru]; | 3732 | list = &lruvec->lists[lru]; |
3700 | 3733 | ||
3701 | loop = mz->lru_size[lru]; | 3734 | loop = mem_cgroup_get_lru_size(lruvec, lru); |
3702 | /* give some margin against EBUSY etc...*/ | 3735 | /* give some margin against EBUSY etc...*/ |
3703 | loop += 256; | 3736 | loop += 256; |
3704 | busy = NULL; | 3737 | busy = NULL; |
@@ -4736,7 +4769,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
4736 | 4769 | ||
4737 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 4770 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
4738 | mz = &pn->zoneinfo[zone]; | 4771 | mz = &pn->zoneinfo[zone]; |
4739 | lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]); | 4772 | lruvec_init(&mz->lruvec); |
4740 | mz->usage_in_excess = 0; | 4773 | mz->usage_in_excess = 0; |
4741 | mz->on_tree = false; | 4774 | mz->on_tree = false; |
4742 | mz->memcg = memcg; | 4775 | mz->memcg = memcg; |
diff --git a/mm/memory.c b/mm/memory.c index fb135ba4aba9..221fc9ffcab1 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2527,9 +2527,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2527 | int ret = 0; | 2527 | int ret = 0; |
2528 | int page_mkwrite = 0; | 2528 | int page_mkwrite = 0; |
2529 | struct page *dirty_page = NULL; | 2529 | struct page *dirty_page = NULL; |
2530 | unsigned long mmun_start; /* For mmu_notifiers */ | 2530 | unsigned long mmun_start = 0; /* For mmu_notifiers */ |
2531 | unsigned long mmun_end; /* For mmu_notifiers */ | 2531 | unsigned long mmun_end = 0; /* For mmu_notifiers */ |
2532 | bool mmun_called = false; /* For mmu_notifiers */ | ||
2533 | 2532 | ||
2534 | old_page = vm_normal_page(vma, address, orig_pte); | 2533 | old_page = vm_normal_page(vma, address, orig_pte); |
2535 | if (!old_page) { | 2534 | if (!old_page) { |
@@ -2708,8 +2707,7 @@ gotten: | |||
2708 | goto oom_free_new; | 2707 | goto oom_free_new; |
2709 | 2708 | ||
2710 | mmun_start = address & PAGE_MASK; | 2709 | mmun_start = address & PAGE_MASK; |
2711 | mmun_end = (address & PAGE_MASK) + PAGE_SIZE; | 2710 | mmun_end = mmun_start + PAGE_SIZE; |
2712 | mmun_called = true; | ||
2713 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2711 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2714 | 2712 | ||
2715 | /* | 2713 | /* |
@@ -2778,7 +2776,7 @@ gotten: | |||
2778 | page_cache_release(new_page); | 2776 | page_cache_release(new_page); |
2779 | unlock: | 2777 | unlock: |
2780 | pte_unmap_unlock(page_table, ptl); | 2778 | pte_unmap_unlock(page_table, ptl); |
2781 | if (mmun_called) | 2779 | if (mmun_end > mmun_start) |
2782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2780 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2783 | if (old_page) { | 2781 | if (old_page) { |
2784 | /* | 2782 | /* |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 56b758ae57d2..e4eeacae2b91 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned long info, struct page *page, | |||
106 | void __ref put_page_bootmem(struct page *page) | 106 | void __ref put_page_bootmem(struct page *page) |
107 | { | 107 | { |
108 | unsigned long type; | 108 | unsigned long type; |
109 | struct zone *zone; | ||
110 | 109 | ||
111 | type = (unsigned long) page->lru.next; | 110 | type = (unsigned long) page->lru.next; |
112 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || | 111 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || |
@@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page *page) | |||
117 | set_page_private(page, 0); | 116 | set_page_private(page, 0); |
118 | INIT_LIST_HEAD(&page->lru); | 117 | INIT_LIST_HEAD(&page->lru); |
119 | __free_pages_bootmem(page, 0); | 118 | __free_pages_bootmem(page, 0); |
120 | |||
121 | zone = page_zone(page); | ||
122 | zone_span_writelock(zone); | ||
123 | zone->present_pages++; | ||
124 | zone_span_writeunlock(zone); | ||
125 | totalram_pages++; | ||
126 | } | 119 | } |
127 | 120 | ||
128 | } | 121 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0b78fb9ea65b..d04a8a54c294 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1536,9 +1536,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
1536 | * | 1536 | * |
1537 | * Returns effective policy for a VMA at specified address. | 1537 | * Returns effective policy for a VMA at specified address. |
1538 | * Falls back to @task or system default policy, as necessary. | 1538 | * Falls back to @task or system default policy, as necessary. |
1539 | * Current or other task's task mempolicy and non-shared vma policies | 1539 | * Current or other task's task mempolicy and non-shared vma policies must be |
1540 | * are protected by the task's mmap_sem, which must be held for read by | 1540 | * protected by task_lock(task) by the caller. |
1541 | * the caller. | ||
1542 | * Shared policies [those marked as MPOL_F_SHARED] require an extra reference | 1541 | * Shared policies [those marked as MPOL_F_SHARED] require an extra reference |
1543 | * count--added by the get_policy() vm_op, as appropriate--to protect against | 1542 | * count--added by the get_policy() vm_op, as appropriate--to protect against |
1544 | * freeing by another task. It is the caller's responsibility to free the | 1543 | * freeing by another task. It is the caller's responsibility to free the |
@@ -334,8 +334,10 @@ void validate_mm(struct mm_struct *mm) | |||
334 | struct vm_area_struct *vma = mm->mmap; | 334 | struct vm_area_struct *vma = mm->mmap; |
335 | while (vma) { | 335 | while (vma) { |
336 | struct anon_vma_chain *avc; | 336 | struct anon_vma_chain *avc; |
337 | vma_lock_anon_vma(vma); | ||
337 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) | 338 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
338 | anon_vma_interval_tree_verify(avc); | 339 | anon_vma_interval_tree_verify(avc); |
340 | vma_unlock_anon_vma(vma); | ||
339 | vma = vma->vm_next; | 341 | vma = vma->vm_next; |
340 | i++; | 342 | i++; |
341 | } | 343 | } |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 479a1e751a73..8a5ac8c686b0 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -196,28 +196,28 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, | |||
196 | BUG_ON(atomic_read(&mm->mm_users) <= 0); | 196 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * Verify that mmu_notifier_init() already run and the global srcu is | 199 | * Verify that mmu_notifier_init() already run and the global srcu is |
200 | * initialized. | 200 | * initialized. |
201 | */ | 201 | */ |
202 | BUG_ON(!srcu.per_cpu_ref); | 202 | BUG_ON(!srcu.per_cpu_ref); |
203 | 203 | ||
204 | ret = -ENOMEM; | ||
205 | mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); | ||
206 | if (unlikely(!mmu_notifier_mm)) | ||
207 | goto out; | ||
208 | |||
204 | if (take_mmap_sem) | 209 | if (take_mmap_sem) |
205 | down_write(&mm->mmap_sem); | 210 | down_write(&mm->mmap_sem); |
206 | ret = mm_take_all_locks(mm); | 211 | ret = mm_take_all_locks(mm); |
207 | if (unlikely(ret)) | 212 | if (unlikely(ret)) |
208 | goto out; | 213 | goto out_clean; |
209 | 214 | ||
210 | if (!mm_has_notifiers(mm)) { | 215 | if (!mm_has_notifiers(mm)) { |
211 | mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), | ||
212 | GFP_KERNEL); | ||
213 | if (unlikely(!mmu_notifier_mm)) { | ||
214 | ret = -ENOMEM; | ||
215 | goto out_of_mem; | ||
216 | } | ||
217 | INIT_HLIST_HEAD(&mmu_notifier_mm->list); | 216 | INIT_HLIST_HEAD(&mmu_notifier_mm->list); |
218 | spin_lock_init(&mmu_notifier_mm->lock); | 217 | spin_lock_init(&mmu_notifier_mm->lock); |
219 | 218 | ||
220 | mm->mmu_notifier_mm = mmu_notifier_mm; | 219 | mm->mmu_notifier_mm = mmu_notifier_mm; |
220 | mmu_notifier_mm = NULL; | ||
221 | } | 221 | } |
222 | atomic_inc(&mm->mm_count); | 222 | atomic_inc(&mm->mm_count); |
223 | 223 | ||
@@ -233,12 +233,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, | |||
233 | hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); | 233 | hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); |
234 | spin_unlock(&mm->mmu_notifier_mm->lock); | 234 | spin_unlock(&mm->mmu_notifier_mm->lock); |
235 | 235 | ||
236 | out_of_mem: | ||
237 | mm_drop_all_locks(mm); | 236 | mm_drop_all_locks(mm); |
238 | out: | 237 | out_clean: |
239 | if (take_mmap_sem) | 238 | if (take_mmap_sem) |
240 | up_write(&mm->mmap_sem); | 239 | up_write(&mm->mmap_sem); |
241 | 240 | kfree(mmu_notifier_mm); | |
241 | out: | ||
242 | BUG_ON(atomic_read(&mm->mm_users) <= 0); | 242 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
243 | return ret; | 243 | return ret; |
244 | } | 244 | } |
diff --git a/mm/mmzone.c b/mm/mmzone.c index 3cef80f6ac79..4596d81b89b1 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c | |||
@@ -87,7 +87,7 @@ int memmap_valid_within(unsigned long pfn, | |||
87 | } | 87 | } |
88 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ | 88 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
89 | 89 | ||
90 | void lruvec_init(struct lruvec *lruvec, struct zone *zone) | 90 | void lruvec_init(struct lruvec *lruvec) |
91 | { | 91 | { |
92 | enum lru_list lru; | 92 | enum lru_list lru; |
93 | 93 | ||
@@ -95,8 +95,4 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) | |||
95 | 95 | ||
96 | for_each_lru(lru) | 96 | for_each_lru(lru) |
97 | INIT_LIST_HEAD(&lruvec->lists[lru]); | 97 | INIT_LIST_HEAD(&lruvec->lists[lru]); |
98 | |||
99 | #ifdef CONFIG_MEMCG | ||
100 | lruvec->zone = zone; | ||
101 | #endif | ||
102 | } | 98 | } |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 714d5d650470..bd82f6b31411 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -116,8 +116,6 @@ static unsigned long __init __free_memory_core(phys_addr_t start, | |||
116 | return 0; | 116 | return 0; |
117 | 117 | ||
118 | __free_pages_memory(start_pfn, end_pfn); | 118 | __free_pages_memory(start_pfn, end_pfn); |
119 | fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT), | ||
120 | start_pfn, end_pfn); | ||
121 | 119 | ||
122 | return end_pfn - start_pfn; | 120 | return end_pfn - start_pfn; |
123 | } | 121 | } |
@@ -128,7 +126,6 @@ unsigned long __init free_low_memory_core_early(int nodeid) | |||
128 | phys_addr_t start, end, size; | 126 | phys_addr_t start, end, size; |
129 | u64 i; | 127 | u64 i; |
130 | 128 | ||
131 | reset_zone_present_pages(); | ||
132 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) | 129 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) |
133 | count += __free_memory_core(start, end); | 130 | count += __free_memory_core(start, end); |
134 | 131 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bb90971182bd..92871579cbee 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1405,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1405 | 1405 | ||
1406 | mt = get_pageblock_migratetype(page); | 1406 | mt = get_pageblock_migratetype(page); |
1407 | if (unlikely(mt != MIGRATE_ISOLATE)) | 1407 | if (unlikely(mt != MIGRATE_ISOLATE)) |
1408 | __mod_zone_freepage_state(zone, -(1UL << order), mt); | 1408 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); |
1409 | 1409 | ||
1410 | if (alloc_order != order) | 1410 | if (alloc_order != order) |
1411 | expand(zone, page, alloc_order, order, | 1411 | expand(zone, page, alloc_order, order, |
@@ -1809,10 +1809,10 @@ static void __paginginit init_zone_allows_reclaim(int nid) | |||
1809 | int i; | 1809 | int i; |
1810 | 1810 | ||
1811 | for_each_online_node(i) | 1811 | for_each_online_node(i) |
1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) { | 1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) |
1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); | 1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); |
1814 | else | ||
1814 | zone_reclaim_mode = 1; | 1815 | zone_reclaim_mode = 1; |
1815 | } | ||
1816 | } | 1816 | } |
1817 | 1817 | ||
1818 | #else /* CONFIG_NUMA */ | 1818 | #else /* CONFIG_NUMA */ |
@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2416 | goto nopage; | 2416 | goto nopage; |
2417 | 2417 | ||
2418 | restart: | 2418 | restart: |
2419 | wake_all_kswapd(order, zonelist, high_zoneidx, | 2419 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2420 | zone_idx(preferred_zone)); | 2420 | wake_all_kswapd(order, zonelist, high_zoneidx, |
2421 | zone_idx(preferred_zone)); | ||
2421 | 2422 | ||
2422 | /* | 2423 | /* |
2423 | * OK, we're below the kswapd watermark and have kicked background | 2424 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2494,7 +2495,7 @@ rebalance: | |||
2494 | * system then fail the allocation instead of entering direct reclaim. | 2495 | * system then fail the allocation instead of entering direct reclaim. |
2495 | */ | 2496 | */ |
2496 | if ((deferred_compaction || contended_compaction) && | 2497 | if ((deferred_compaction || contended_compaction) && |
2497 | (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) | 2498 | (gfp_mask & __GFP_NO_KSWAPD)) |
2498 | goto nopage; | 2499 | goto nopage; |
2499 | 2500 | ||
2500 | /* Try direct reclaim and then allocating */ | 2501 | /* Try direct reclaim and then allocating */ |
@@ -4505,7 +4506,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4505 | zone->zone_pgdat = pgdat; | 4506 | zone->zone_pgdat = pgdat; |
4506 | 4507 | ||
4507 | zone_pcp_init(zone); | 4508 | zone_pcp_init(zone); |
4508 | lruvec_init(&zone->lruvec, zone); | 4509 | lruvec_init(&zone->lruvec); |
4509 | if (!size) | 4510 | if (!size) |
4510 | continue; | 4511 | continue; |
4511 | 4512 | ||
@@ -5825,7 +5826,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5825 | ret = start_isolate_page_range(pfn_max_align_down(start), | 5826 | ret = start_isolate_page_range(pfn_max_align_down(start), |
5826 | pfn_max_align_up(end), migratetype); | 5827 | pfn_max_align_up(end), migratetype); |
5827 | if (ret) | 5828 | if (ret) |
5828 | goto done; | 5829 | return ret; |
5829 | 5830 | ||
5830 | ret = __alloc_contig_migrate_range(&cc, start, end); | 5831 | ret = __alloc_contig_migrate_range(&cc, start, end); |
5831 | if (ret) | 5832 | if (ret) |
@@ -6098,37 +6099,3 @@ void dump_page(struct page *page) | |||
6098 | dump_page_flags(page->flags); | 6099 | dump_page_flags(page->flags); |
6099 | mem_cgroup_print_bad_page(page); | 6100 | mem_cgroup_print_bad_page(page); |
6100 | } | 6101 | } |
6101 | |||
6102 | /* reset zone->present_pages */ | ||
6103 | void reset_zone_present_pages(void) | ||
6104 | { | ||
6105 | struct zone *z; | ||
6106 | int i, nid; | ||
6107 | |||
6108 | for_each_node_state(nid, N_HIGH_MEMORY) { | ||
6109 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6110 | z = NODE_DATA(nid)->node_zones + i; | ||
6111 | z->present_pages = 0; | ||
6112 | } | ||
6113 | } | ||
6114 | } | ||
6115 | |||
6116 | /* calculate zone's present pages in buddy system */ | ||
6117 | void fixup_zone_present_pages(int nid, unsigned long start_pfn, | ||
6118 | unsigned long end_pfn) | ||
6119 | { | ||
6120 | struct zone *z; | ||
6121 | unsigned long zone_start_pfn, zone_end_pfn; | ||
6122 | int i; | ||
6123 | |||
6124 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6125 | z = NODE_DATA(nid)->node_zones + i; | ||
6126 | zone_start_pfn = z->zone_start_pfn; | ||
6127 | zone_end_pfn = zone_start_pfn + z->spanned_pages; | ||
6128 | |||
6129 | /* if the two regions intersect */ | ||
6130 | if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn)) | ||
6131 | z->present_pages += min(end_pfn, zone_end_pfn) - | ||
6132 | max(start_pfn, zone_start_pfn); | ||
6133 | } | ||
6134 | } | ||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/mmu_notifier.h> | 56 | #include <linux/mmu_notifier.h> |
57 | #include <linux/migrate.h> | 57 | #include <linux/migrate.h> |
58 | #include <linux/hugetlb.h> | 58 | #include <linux/hugetlb.h> |
59 | #include <linux/backing-dev.h> | ||
59 | 60 | ||
60 | #include <asm/tlbflush.h> | 61 | #include <asm/tlbflush.h> |
61 | 62 | ||
@@ -926,11 +927,8 @@ int page_mkclean(struct page *page) | |||
926 | 927 | ||
927 | if (page_mapped(page)) { | 928 | if (page_mapped(page)) { |
928 | struct address_space *mapping = page_mapping(page); | 929 | struct address_space *mapping = page_mapping(page); |
929 | if (mapping) { | 930 | if (mapping) |
930 | ret = page_mkclean_file(mapping, page); | 931 | ret = page_mkclean_file(mapping, page); |
931 | if (page_test_and_clear_dirty(page_to_pfn(page), 1)) | ||
932 | ret = 1; | ||
933 | } | ||
934 | } | 932 | } |
935 | 933 | ||
936 | return ret; | 934 | return ret; |
@@ -1116,6 +1114,7 @@ void page_add_file_rmap(struct page *page) | |||
1116 | */ | 1114 | */ |
1117 | void page_remove_rmap(struct page *page) | 1115 | void page_remove_rmap(struct page *page) |
1118 | { | 1116 | { |
1117 | struct address_space *mapping = page_mapping(page); | ||
1119 | bool anon = PageAnon(page); | 1118 | bool anon = PageAnon(page); |
1120 | bool locked; | 1119 | bool locked; |
1121 | unsigned long flags; | 1120 | unsigned long flags; |
@@ -1138,8 +1137,19 @@ void page_remove_rmap(struct page *page) | |||
1138 | * this if the page is anon, so about to be freed; but perhaps | 1137 | * this if the page is anon, so about to be freed; but perhaps |
1139 | * not if it's in swapcache - there might be another pte slot | 1138 | * not if it's in swapcache - there might be another pte slot |
1140 | * containing the swap entry, but page not yet written to swap. | 1139 | * containing the swap entry, but page not yet written to swap. |
1140 | * | ||
1141 | * And we can skip it on file pages, so long as the filesystem | ||
1142 | * participates in dirty tracking; but need to catch shm and tmpfs | ||
1143 | * and ramfs pages which have been modified since creation by read | ||
1144 | * fault. | ||
1145 | * | ||
1146 | * Note that mapping must be decided above, before decrementing | ||
1147 | * mapcount (which luckily provides a barrier): once page is unmapped, | ||
1148 | * it could be truncated and page->mapping reset to NULL at any moment. | ||
1149 | * Note also that we are relying on page_mapping(page) to set mapping | ||
1150 | * to &swapper_space when PageSwapCache(page). | ||
1141 | */ | 1151 | */ |
1142 | if ((!anon || PageSwapCache(page)) && | 1152 | if (mapping && !mapping_cap_account_dirty(mapping) && |
1143 | page_test_and_clear_dirty(page_to_pfn(page), 1)) | 1153 | page_test_and_clear_dirty(page_to_pfn(page), 1)) |
1144 | set_page_dirty(page); | 1154 | set_page_dirty(page); |
1145 | /* | 1155 | /* |
diff --git a/mm/shmem.c b/mm/shmem.c index 67afba5117f2..89341b658bd0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -643,7 +643,7 @@ static void shmem_evict_inode(struct inode *inode) | |||
643 | kfree(info->symlink); | 643 | kfree(info->symlink); |
644 | 644 | ||
645 | simple_xattrs_free(&info->xattrs); | 645 | simple_xattrs_free(&info->xattrs); |
646 | BUG_ON(inode->i_blocks); | 646 | WARN_ON(inode->i_blocks); |
647 | shmem_free_inode(inode->i_sb); | 647 | shmem_free_inode(inode->i_sb); |
648 | clear_inode(inode); | 648 | clear_inode(inode); |
649 | } | 649 | } |
@@ -1145,8 +1145,20 @@ repeat: | |||
1145 | if (!error) { | 1145 | if (!error) { |
1146 | error = shmem_add_to_page_cache(page, mapping, index, | 1146 | error = shmem_add_to_page_cache(page, mapping, index, |
1147 | gfp, swp_to_radix_entry(swap)); | 1147 | gfp, swp_to_radix_entry(swap)); |
1148 | /* We already confirmed swap, and make no allocation */ | 1148 | /* |
1149 | VM_BUG_ON(error); | 1149 | * We already confirmed swap under page lock, and make |
1150 | * no memory allocation here, so usually no possibility | ||
1151 | * of error; but free_swap_and_cache() only trylocks a | ||
1152 | * page, so it is just possible that the entry has been | ||
1153 | * truncated or holepunched since swap was confirmed. | ||
1154 | * shmem_undo_range() will have done some of the | ||
1155 | * unaccounting, now delete_from_swap_cache() will do | ||
1156 | * the rest (including mem_cgroup_uncharge_swapcache). | ||
1157 | * Reset swap.val? No, leave it so "failed" goes back to | ||
1158 | * "repeat": reading a hole and writing should succeed. | ||
1159 | */ | ||
1160 | if (error) | ||
1161 | delete_from_swap_cache(page); | ||
1150 | } | 1162 | } |
1151 | if (error) | 1163 | if (error) |
1152 | goto failed; | 1164 | goto failed; |
@@ -429,7 +429,7 @@ static __always_inline void * | |||
429 | __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) | 429 | __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) |
430 | { | 430 | { |
431 | unsigned int *m; | 431 | unsigned int *m; |
432 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 432 | int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
433 | void *ret; | 433 | void *ret; |
434 | 434 | ||
435 | gfp &= gfp_allowed_mask; | 435 | gfp &= gfp_allowed_mask; |
@@ -502,7 +502,7 @@ void kfree(const void *block) | |||
502 | 502 | ||
503 | sp = virt_to_page(block); | 503 | sp = virt_to_page(block); |
504 | if (PageSlab(sp)) { | 504 | if (PageSlab(sp)) { |
505 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 505 | int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
506 | unsigned int *m = (unsigned int *)(block - align); | 506 | unsigned int *m = (unsigned int *)(block - align); |
507 | slob_free(m, *m + align); | 507 | slob_free(m, *m + align); |
508 | } else | 508 | } else |
@@ -521,7 +521,7 @@ size_t ksize(const void *block) | |||
521 | 521 | ||
522 | sp = virt_to_page(block); | 522 | sp = virt_to_page(block); |
523 | if (PageSlab(sp)) { | 523 | if (PageSlab(sp)) { |
524 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 524 | int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
525 | unsigned int *m = (unsigned int *)(block - align); | 525 | unsigned int *m = (unsigned int *)(block - align); |
526 | return SLOB_UNITS(*m) * SLOB_UNIT; | 526 | return SLOB_UNITS(*m) * SLOB_UNIT; |
527 | } else | 527 | } else |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 71cd288b2001..f91a25547ffe 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1494,9 +1494,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1494 | BUG_ON(!current->mm); | 1494 | BUG_ON(!current->mm); |
1495 | 1495 | ||
1496 | pathname = getname(specialfile); | 1496 | pathname = getname(specialfile); |
1497 | err = PTR_ERR(pathname); | ||
1498 | if (IS_ERR(pathname)) | 1497 | if (IS_ERR(pathname)) |
1499 | goto out; | 1498 | return PTR_ERR(pathname); |
1500 | 1499 | ||
1501 | victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); | 1500 | victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); |
1502 | err = PTR_ERR(victim); | 1501 | err = PTR_ERR(victim); |
@@ -1608,6 +1607,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1608 | out_dput: | 1607 | out_dput: |
1609 | filp_close(victim, NULL); | 1608 | filp_close(victim, NULL); |
1610 | out: | 1609 | out: |
1610 | putname(pathname); | ||
1611 | return err; | 1611 | return err; |
1612 | } | 1612 | } |
1613 | 1613 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2624edcfb420..cbf84e152f04 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1760,28 +1760,6 @@ static bool in_reclaim_compaction(struct scan_control *sc) | |||
1760 | return false; | 1760 | return false; |
1761 | } | 1761 | } |
1762 | 1762 | ||
1763 | #ifdef CONFIG_COMPACTION | ||
1764 | /* | ||
1765 | * If compaction is deferred for sc->order then scale the number of pages | ||
1766 | * reclaimed based on the number of consecutive allocation failures | ||
1767 | */ | ||
1768 | static unsigned long scale_for_compaction(unsigned long pages_for_compaction, | ||
1769 | struct lruvec *lruvec, struct scan_control *sc) | ||
1770 | { | ||
1771 | struct zone *zone = lruvec_zone(lruvec); | ||
1772 | |||
1773 | if (zone->compact_order_failed <= sc->order) | ||
1774 | pages_for_compaction <<= zone->compact_defer_shift; | ||
1775 | return pages_for_compaction; | ||
1776 | } | ||
1777 | #else | ||
1778 | static unsigned long scale_for_compaction(unsigned long pages_for_compaction, | ||
1779 | struct lruvec *lruvec, struct scan_control *sc) | ||
1780 | { | ||
1781 | return pages_for_compaction; | ||
1782 | } | ||
1783 | #endif | ||
1784 | |||
1785 | /* | 1763 | /* |
1786 | * Reclaim/compaction is used for high-order allocation requests. It reclaims | 1764 | * Reclaim/compaction is used for high-order allocation requests. It reclaims |
1787 | * order-0 pages before compacting the zone. should_continue_reclaim() returns | 1765 | * order-0 pages before compacting the zone. should_continue_reclaim() returns |
@@ -1829,9 +1807,6 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, | |||
1829 | * inactive lists are large enough, continue reclaiming | 1807 | * inactive lists are large enough, continue reclaiming |
1830 | */ | 1808 | */ |
1831 | pages_for_compaction = (2UL << sc->order); | 1809 | pages_for_compaction = (2UL << sc->order); |
1832 | |||
1833 | pages_for_compaction = scale_for_compaction(pages_for_compaction, | ||
1834 | lruvec, sc); | ||
1835 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); | 1810 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1836 | if (nr_swap_pages > 0) | 1811 | if (nr_swap_pages > 0) |
1837 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); | 1812 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); |
@@ -2232,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) | |||
2232 | * Throttle direct reclaimers if backing storage is backed by the network | 2207 | * Throttle direct reclaimers if backing storage is backed by the network |
2233 | * and the PFMEMALLOC reserve for the preferred node is getting dangerously | 2208 | * and the PFMEMALLOC reserve for the preferred node is getting dangerously |
2234 | * depleted. kswapd will continue to make progress and wake the processes | 2209 | * depleted. kswapd will continue to make progress and wake the processes |
2235 | * when the low watermark is reached | 2210 | * when the low watermark is reached. |
2211 | * | ||
2212 | * Returns true if a fatal signal was delivered during throttling. If this | ||
2213 | * happens, the page allocator should not consider triggering the OOM killer. | ||
2236 | */ | 2214 | */ |
2237 | static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | 2215 | static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, |
2238 | nodemask_t *nodemask) | 2216 | nodemask_t *nodemask) |
2239 | { | 2217 | { |
2240 | struct zone *zone; | 2218 | struct zone *zone; |
@@ -2249,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | |||
2249 | * processes to block on log_wait_commit(). | 2227 | * processes to block on log_wait_commit(). |
2250 | */ | 2228 | */ |
2251 | if (current->flags & PF_KTHREAD) | 2229 | if (current->flags & PF_KTHREAD) |
2252 | return; | 2230 | goto out; |
2231 | |||
2232 | /* | ||
2233 | * If a fatal signal is pending, this process should not throttle. | ||
2234 | * It should return quickly so it can exit and free its memory | ||
2235 | */ | ||
2236 | if (fatal_signal_pending(current)) | ||
2237 | goto out; | ||
2253 | 2238 | ||
2254 | /* Check if the pfmemalloc reserves are ok */ | 2239 | /* Check if the pfmemalloc reserves are ok */ |
2255 | first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); | 2240 | first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); |
2256 | pgdat = zone->zone_pgdat; | 2241 | pgdat = zone->zone_pgdat; |
2257 | if (pfmemalloc_watermark_ok(pgdat)) | 2242 | if (pfmemalloc_watermark_ok(pgdat)) |
2258 | return; | 2243 | goto out; |
2259 | 2244 | ||
2260 | /* Account for the throttling */ | 2245 | /* Account for the throttling */ |
2261 | count_vm_event(PGSCAN_DIRECT_THROTTLE); | 2246 | count_vm_event(PGSCAN_DIRECT_THROTTLE); |
@@ -2271,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, | |||
2271 | if (!(gfp_mask & __GFP_FS)) { | 2256 | if (!(gfp_mask & __GFP_FS)) { |
2272 | wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, | 2257 | wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, |
2273 | pfmemalloc_watermark_ok(pgdat), HZ); | 2258 | pfmemalloc_watermark_ok(pgdat), HZ); |
2274 | return; | 2259 | |
2260 | goto check_pending; | ||
2275 | } | 2261 | } |
2276 | 2262 | ||
2277 | /* Throttle until kswapd wakes the process */ | 2263 | /* Throttle until kswapd wakes the process */ |
2278 | wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, | 2264 | wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, |
2279 | pfmemalloc_watermark_ok(pgdat)); | 2265 | pfmemalloc_watermark_ok(pgdat)); |
2266 | |||
2267 | check_pending: | ||
2268 | if (fatal_signal_pending(current)) | ||
2269 | return true; | ||
2270 | |||
2271 | out: | ||
2272 | return false; | ||
2280 | } | 2273 | } |
2281 | 2274 | ||
2282 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 2275 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
@@ -2298,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2298 | .gfp_mask = sc.gfp_mask, | 2291 | .gfp_mask = sc.gfp_mask, |
2299 | }; | 2292 | }; |
2300 | 2293 | ||
2301 | throttle_direct_reclaim(gfp_mask, zonelist, nodemask); | ||
2302 | |||
2303 | /* | 2294 | /* |
2304 | * Do not enter reclaim if fatal signal is pending. 1 is returned so | 2295 | * Do not enter reclaim if fatal signal was delivered while throttled. |
2305 | * that the page allocator does not consider triggering OOM | 2296 | * 1 is returned so that the page allocator does not OOM kill at this |
2297 | * point. | ||
2306 | */ | 2298 | */ |
2307 | if (fatal_signal_pending(current)) | 2299 | if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask)) |
2308 | return 1; | 2300 | return 1; |
2309 | 2301 | ||
2310 | trace_mm_vmscan_direct_reclaim_begin(order, | 2302 | trace_mm_vmscan_direct_reclaim_begin(order, |
@@ -3017,6 +3009,8 @@ static int kswapd(void *p) | |||
3017 | &balanced_classzone_idx); | 3009 | &balanced_classzone_idx); |
3018 | } | 3010 | } |
3019 | } | 3011 | } |
3012 | |||
3013 | current->reclaim_state = NULL; | ||
3020 | return 0; | 3014 | return 0; |
3021 | } | 3015 | } |
3022 | 3016 | ||