aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c10
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/mempolicy.c22
-rw-r--r--mm/page_alloc.c9
-rw-r--r--mm/shmem.c26
-rw-r--r--mm/sparse.c10
-rw-r--r--mm/vmscan.c83
7 files changed, 87 insertions, 81 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 9eef55838fca..694eaabaaebd 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -713,7 +713,15 @@ static void isolate_freepages(struct zone *zone,
713 713
714 /* Found a block suitable for isolating free pages from */ 714 /* Found a block suitable for isolating free pages from */
715 isolated = 0; 715 isolated = 0;
716 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 716
717 /*
718 * As pfn may not start aligned, pfn+pageblock_nr_page
719 * may cross a MAX_ORDER_NR_PAGES boundary and miss
720 * a pfn_valid check. Ensure isolate_freepages_block()
721 * only scans within a pageblock
722 */
723 end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
724 end_pfn = min(end_pfn, zone_end_pfn);
717 isolated = isolate_freepages_block(cc, pfn, end_pfn, 725 isolated = isolate_freepages_block(cc, pfn, end_pfn,
718 freelist, false); 726 freelist, false);
719 nr_freepages += isolated; 727 nr_freepages += isolated;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6c5899b9034a..8b20278be6a6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1476,9 +1476,17 @@ int soft_offline_page(struct page *page, int flags)
1476{ 1476{
1477 int ret; 1477 int ret;
1478 unsigned long pfn = page_to_pfn(page); 1478 unsigned long pfn = page_to_pfn(page);
1479 struct page *hpage = compound_trans_head(page);
1479 1480
1480 if (PageHuge(page)) 1481 if (PageHuge(page))
1481 return soft_offline_huge_page(page, flags); 1482 return soft_offline_huge_page(page, flags);
1483 if (PageTransHuge(hpage)) {
1484 if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
1485 pr_info("soft offline: %#lx: failed to split THP\n",
1486 pfn);
1487 return -EBUSY;
1488 }
1489 }
1482 1490
1483 ret = get_any_page(page, pfn, flags); 1491 ret = get_any_page(page, pfn, flags);
1484 if (ret < 0) 1492 if (ret < 0)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d04a8a54c294..4ea600da8940 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)
2037 return new; 2037 return new;
2038} 2038}
2039 2039
2040/*
2041 * If *frompol needs [has] an extra ref, copy *frompol to *tompol ,
2042 * eliminate the * MPOL_F_* flags that require conditional ref and
2043 * [NOTE!!!] drop the extra ref. Not safe to reference *frompol directly
2044 * after return. Use the returned value.
2045 *
2046 * Allows use of a mempolicy for, e.g., multiple allocations with a single
2047 * policy lookup, even if the policy needs/has extra ref on lookup.
2048 * shmem_readahead needs this.
2049 */
2050struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
2051 struct mempolicy *frompol)
2052{
2053 if (!mpol_needs_cond_ref(frompol))
2054 return frompol;
2055
2056 *tompol = *frompol;
2057 tompol->flags &= ~MPOL_F_SHARED; /* copy doesn't need unref */
2058 __mpol_put(frompol);
2059 return tompol;
2060}
2061
2062/* Slow path of a mempolicy comparison */ 2040/* Slow path of a mempolicy comparison */
2063bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) 2041bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
2064{ 2042{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bcb72c6e2b2d..7e208f0ad68c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1422,7 +1422,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1422 } 1422 }
1423 } 1423 }
1424 1424
1425 return 1UL << order; 1425 return 1UL << alloc_order;
1426} 1426}
1427 1427
1428/* 1428/*
@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2416 goto nopage; 2416 goto nopage;
2417 2417
2418restart: 2418restart:
2419 wake_all_kswapd(order, zonelist, high_zoneidx, 2419 if (!(gfp_mask & __GFP_NO_KSWAPD))
2420 zone_idx(preferred_zone)); 2420 wake_all_kswapd(order, zonelist, high_zoneidx,
2421 zone_idx(preferred_zone));
2421 2422
2422 /* 2423 /*
2423 * OK, we're below the kswapd watermark and have kicked background 2424 * OK, we're below the kswapd watermark and have kicked background
@@ -2494,7 +2495,7 @@ rebalance:
2494 * system then fail the allocation instead of entering direct reclaim. 2495 * system then fail the allocation instead of entering direct reclaim.
2495 */ 2496 */
2496 if ((deferred_compaction || contended_compaction) && 2497 if ((deferred_compaction || contended_compaction) &&
2497 (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) 2498 (gfp_mask & __GFP_NO_KSWAPD))
2498 goto nopage; 2499 goto nopage;
2499 2500
2500 /* Try direct reclaim and then allocating */ 2501 /* Try direct reclaim and then allocating */
diff --git a/mm/shmem.c b/mm/shmem.c
index 89341b658bd0..50c5b8f3a359 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
910static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, 910static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
911 struct shmem_inode_info *info, pgoff_t index) 911 struct shmem_inode_info *info, pgoff_t index)
912{ 912{
913 struct mempolicy mpol, *spol;
914 struct vm_area_struct pvma; 913 struct vm_area_struct pvma;
915 914 struct page *page;
916 spol = mpol_cond_copy(&mpol,
917 mpol_shared_policy_lookup(&info->policy, index));
918 915
919 /* Create a pseudo vma that just contains the policy */ 916 /* Create a pseudo vma that just contains the policy */
920 pvma.vm_start = 0; 917 pvma.vm_start = 0;
921 /* Bias interleave by inode number to distribute better across nodes */ 918 /* Bias interleave by inode number to distribute better across nodes */
922 pvma.vm_pgoff = index + info->vfs_inode.i_ino; 919 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
923 pvma.vm_ops = NULL; 920 pvma.vm_ops = NULL;
924 pvma.vm_policy = spol; 921 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
925 return swapin_readahead(swap, gfp, &pvma, 0); 922
923 page = swapin_readahead(swap, gfp, &pvma, 0);
924
925 /* Drop reference taken by mpol_shared_policy_lookup() */
926 mpol_cond_put(pvma.vm_policy);
927
928 return page;
926} 929}
927 930
928static struct page *shmem_alloc_page(gfp_t gfp, 931static struct page *shmem_alloc_page(gfp_t gfp,
929 struct shmem_inode_info *info, pgoff_t index) 932 struct shmem_inode_info *info, pgoff_t index)
930{ 933{
931 struct vm_area_struct pvma; 934 struct vm_area_struct pvma;
935 struct page *page;
932 936
933 /* Create a pseudo vma that just contains the policy */ 937 /* Create a pseudo vma that just contains the policy */
934 pvma.vm_start = 0; 938 pvma.vm_start = 0;
@@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp_t gfp,
937 pvma.vm_ops = NULL; 941 pvma.vm_ops = NULL;
938 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); 942 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
939 943
940 /* 944 page = alloc_page_vma(gfp, &pvma, 0);
941 * alloc_page_vma() will drop the shared policy reference 945
942 */ 946 /* Drop reference taken by mpol_shared_policy_lookup() */
943 return alloc_page_vma(gfp, &pvma, 0); 947 mpol_cond_put(pvma.vm_policy);
948
949 return page;
944} 950}
945#else /* !CONFIG_NUMA */ 951#else /* !CONFIG_NUMA */
946#ifdef CONFIG_TMPFS 952#ifdef CONFIG_TMPFS
diff --git a/mm/sparse.c b/mm/sparse.c
index fac95f2888f2..a83de2f72b30 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -617,7 +617,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
617{ 617{
618 return; /* XXX: Not implemented yet */ 618 return; /* XXX: Not implemented yet */
619} 619}
620static void free_map_bootmem(struct page *page, unsigned long nr_pages) 620static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
621{ 621{
622} 622}
623#else 623#else
@@ -658,10 +658,11 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
658 get_order(sizeof(struct page) * nr_pages)); 658 get_order(sizeof(struct page) * nr_pages));
659} 659}
660 660
661static void free_map_bootmem(struct page *page, unsigned long nr_pages) 661static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
662{ 662{
663 unsigned long maps_section_nr, removing_section_nr, i; 663 unsigned long maps_section_nr, removing_section_nr, i;
664 unsigned long magic; 664 unsigned long magic;
665 struct page *page = virt_to_page(memmap);
665 666
666 for (i = 0; i < nr_pages; i++, page++) { 667 for (i = 0; i < nr_pages; i++, page++) {
667 magic = (unsigned long) page->lru.next; 668 magic = (unsigned long) page->lru.next;
@@ -710,13 +711,10 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
710 */ 711 */
711 712
712 if (memmap) { 713 if (memmap) {
713 struct page *memmap_page;
714 memmap_page = virt_to_page(memmap);
715
716 nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) 714 nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
717 >> PAGE_SHIFT; 715 >> PAGE_SHIFT;
718 716
719 free_map_bootmem(memmap_page, nr_pages); 717 free_map_bootmem(memmap, nr_pages);
720 } 718 }
721} 719}
722 720
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 48550c66f1f2..b7ed37675644 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2207,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
2207 * Throttle direct reclaimers if backing storage is backed by the network 2207 * Throttle direct reclaimers if backing storage is backed by the network
2208 * and the PFMEMALLOC reserve for the preferred node is getting dangerously 2208 * and the PFMEMALLOC reserve for the preferred node is getting dangerously
2209 * depleted. kswapd will continue to make progress and wake the processes 2209 * depleted. kswapd will continue to make progress and wake the processes
2210 * when the low watermark is reached 2210 * when the low watermark is reached.
2211 *
2212 * Returns true if a fatal signal was delivered during throttling. If this
2213 * happens, the page allocator should not consider triggering the OOM killer.
2211 */ 2214 */
2212static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, 2215static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2213 nodemask_t *nodemask) 2216 nodemask_t *nodemask)
2214{ 2217{
2215 struct zone *zone; 2218 struct zone *zone;
@@ -2224,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2224 * processes to block on log_wait_commit(). 2227 * processes to block on log_wait_commit().
2225 */ 2228 */
2226 if (current->flags & PF_KTHREAD) 2229 if (current->flags & PF_KTHREAD)
2227 return; 2230 goto out;
2231
2232 /*
2233 * If a fatal signal is pending, this process should not throttle.
2234 * It should return quickly so it can exit and free its memory
2235 */
2236 if (fatal_signal_pending(current))
2237 goto out;
2228 2238
2229 /* Check if the pfmemalloc reserves are ok */ 2239 /* Check if the pfmemalloc reserves are ok */
2230 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); 2240 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
2231 pgdat = zone->zone_pgdat; 2241 pgdat = zone->zone_pgdat;
2232 if (pfmemalloc_watermark_ok(pgdat)) 2242 if (pfmemalloc_watermark_ok(pgdat))
2233 return; 2243 goto out;
2234 2244
2235 /* Account for the throttling */ 2245 /* Account for the throttling */
2236 count_vm_event(PGSCAN_DIRECT_THROTTLE); 2246 count_vm_event(PGSCAN_DIRECT_THROTTLE);
@@ -2246,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2246 if (!(gfp_mask & __GFP_FS)) { 2256 if (!(gfp_mask & __GFP_FS)) {
2247 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, 2257 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
2248 pfmemalloc_watermark_ok(pgdat), HZ); 2258 pfmemalloc_watermark_ok(pgdat), HZ);
2249 return; 2259
2260 goto check_pending;
2250 } 2261 }
2251 2262
2252 /* Throttle until kswapd wakes the process */ 2263 /* Throttle until kswapd wakes the process */
2253 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, 2264 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
2254 pfmemalloc_watermark_ok(pgdat)); 2265 pfmemalloc_watermark_ok(pgdat));
2266
2267check_pending:
2268 if (fatal_signal_pending(current))
2269 return true;
2270
2271out:
2272 return false;
2255} 2273}
2256 2274
2257unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 2275unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
@@ -2273,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2273 .gfp_mask = sc.gfp_mask, 2291 .gfp_mask = sc.gfp_mask,
2274 }; 2292 };
2275 2293
2276 throttle_direct_reclaim(gfp_mask, zonelist, nodemask);
2277
2278 /* 2294 /*
2279 * Do not enter reclaim if fatal signal is pending. 1 is returned so 2295 * Do not enter reclaim if fatal signal was delivered while throttled.
2280 * that the page allocator does not consider triggering OOM 2296 * 1 is returned so that the page allocator does not OOM kill at this
2297 * point.
2281 */ 2298 */
2282 if (fatal_signal_pending(current)) 2299 if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
2283 return 1; 2300 return 1;
2284 2301
2285 trace_mm_vmscan_direct_reclaim_begin(order, 2302 trace_mm_vmscan_direct_reclaim_begin(order,
@@ -2397,6 +2414,19 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
2397 } while (memcg); 2414 } while (memcg);
2398} 2415}
2399 2416
2417static bool zone_balanced(struct zone *zone, int order,
2418 unsigned long balance_gap, int classzone_idx)
2419{
2420 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
2421 balance_gap, classzone_idx, 0))
2422 return false;
2423
2424 if (COMPACTION_BUILD && order && !compaction_suitable(zone, order))
2425 return false;
2426
2427 return true;
2428}
2429
2400/* 2430/*
2401 * pgdat_balanced is used when checking if a node is balanced for high-order 2431 * pgdat_balanced is used when checking if a node is balanced for high-order
2402 * allocations. Only zones that meet watermarks and are in a zone allowed 2432 * allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2475,8 +2505,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2475 continue; 2505 continue;
2476 } 2506 }
2477 2507
2478 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 2508 if (!zone_balanced(zone, order, 0, i))
2479 i, 0))
2480 all_zones_ok = false; 2509 all_zones_ok = false;
2481 else 2510 else
2482 balanced += zone->present_pages; 2511 balanced += zone->present_pages;
@@ -2585,8 +2614,7 @@ loop_again:
2585 break; 2614 break;
2586 } 2615 }
2587 2616
2588 if (!zone_watermark_ok_safe(zone, order, 2617 if (!zone_balanced(zone, order, 0, 0)) {
2589 high_wmark_pages(zone), 0, 0)) {
2590 end_zone = i; 2618 end_zone = i;
2591 break; 2619 break;
2592 } else { 2620 } else {
@@ -2662,9 +2690,8 @@ loop_again:
2662 testorder = 0; 2690 testorder = 0;
2663 2691
2664 if ((buffer_heads_over_limit && is_highmem_idx(i)) || 2692 if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
2665 !zone_watermark_ok_safe(zone, testorder, 2693 !zone_balanced(zone, testorder,
2666 high_wmark_pages(zone) + balance_gap, 2694 balance_gap, end_zone)) {
2667 end_zone, 0)) {
2668 shrink_zone(zone, &sc); 2695 shrink_zone(zone, &sc);
2669 2696
2670 reclaim_state->reclaimed_slab = 0; 2697 reclaim_state->reclaimed_slab = 0;
@@ -2691,8 +2718,7 @@ loop_again:
2691 continue; 2718 continue;
2692 } 2719 }
2693 2720
2694 if (!zone_watermark_ok_safe(zone, testorder, 2721 if (!zone_balanced(zone, testorder, 0, end_zone)) {
2695 high_wmark_pages(zone), end_zone, 0)) {
2696 all_zones_ok = 0; 2722 all_zones_ok = 0;
2697 /* 2723 /*
2698 * We are still under min water mark. This 2724 * We are still under min water mark. This
@@ -2797,29 +2823,10 @@ out:
2797 if (!populated_zone(zone)) 2823 if (!populated_zone(zone))
2798 continue; 2824 continue;
2799 2825
2800 if (zone->all_unreclaimable &&
2801 sc.priority != DEF_PRIORITY)
2802 continue;
2803
2804 /* Would compaction fail due to lack of free memory? */
2805 if (COMPACTION_BUILD &&
2806 compaction_suitable(zone, order) == COMPACT_SKIPPED)
2807 goto loop_again;
2808
2809 /* Confirm the zone is balanced for order-0 */
2810 if (!zone_watermark_ok(zone, 0,
2811 high_wmark_pages(zone), 0, 0)) {
2812 order = sc.order = 0;
2813 goto loop_again;
2814 }
2815
2816 /* Check if the memory needs to be defragmented. */ 2826 /* Check if the memory needs to be defragmented. */
2817 if (zone_watermark_ok(zone, order, 2827 if (zone_watermark_ok(zone, order,
2818 low_wmark_pages(zone), *classzone_idx, 0)) 2828 low_wmark_pages(zone), *classzone_idx, 0))
2819 zones_need_compaction = 0; 2829 zones_need_compaction = 0;
2820
2821 /* If balanced, clear the congested flag */
2822 zone_clear_flag(zone, ZONE_CONGESTED);
2823 } 2830 }
2824 2831
2825 if (zones_need_compaction) 2832 if (zones_need_compaction)