aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-01-10 18:07:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 19:30:42 -0500
commitf90ac3982a78d36f894824636beeef13361d7c59 (patch)
tree64bbe3b415bdfc151dc44f6b4c216c65351eb53c
parent938929f14cb595f43cd1a4e63e22d36cab1e4a1f (diff)
mm: avoid livelock on !__GFP_FS allocations
Colin Cross reported; Under the following conditions, __alloc_pages_slowpath can loop forever: gfp_mask & __GFP_WAIT is true gfp_mask & __GFP_FS is false reclaim and compaction make no progress order <= PAGE_ALLOC_COSTLY_ORDER These conditions happen very often during suspend and resume, when pm_restrict_gfp_mask() effectively converts all GFP_KERNEL allocations into __GFP_WAIT. The oom killer is not run because gfp_mask & __GFP_FS is false, but should_alloc_retry will always return true when order is less than PAGE_ALLOC_COSTLY_ORDER. In his fix, he avoided retrying the allocation if reclaim made no progress and __GFP_FS was not set. The problem is that this would result in GFP_NOIO allocations failing that previously succeeded which would be very unfortunate. The big difference between GFP_NOIO and suspend converting GFP_KERNEL to behave like GFP_NOIO is that normally flushers will be cleaning pages and kswapd reclaims pages allowing GFP_NOIO to succeed after a short delay. The same does not necessarily apply during suspend as the storage device may be suspended. This patch special cases the suspend case to fail the page allocation if reclaim cannot make progress and adds some documentation on how gfp_allowed_mask is currently used. Failing allocations like this may cause suspend to abort but that is better than a livelock. [mgorman@suse.de: Rework fix to be suspend specific] [rientjes@google.com: Move suspended device check to should_alloc_retry] Reported-by: Colin Cross <ccross@android.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: David Rientjes <rientjes@google.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h16
-rw-r--r--mm/page_alloc.c30
-rw-r--r--mm/swapfile.c6
3 files changed, 41 insertions, 11 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 656295865d58..91812df1351a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -368,9 +368,25 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
368void drain_all_pages(void); 368void drain_all_pages(void);
369void drain_local_pages(void *dummy); 369void drain_local_pages(void *dummy);
370 370
371/*
372 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
373 * GFP flags are used before interrupts are enabled. Once interrupts are
374 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
375 * hibernation, it is used by PM to avoid I/O during memory allocation while
376 * devices are suspended.
377 */
371extern gfp_t gfp_allowed_mask; 378extern gfp_t gfp_allowed_mask;
372 379
373extern void pm_restrict_gfp_mask(void); 380extern void pm_restrict_gfp_mask(void);
374extern void pm_restore_gfp_mask(void); 381extern void pm_restore_gfp_mask(void);
375 382
383#ifdef CONFIG_PM_SLEEP
384extern bool pm_suspended_storage(void);
385#else
386static inline bool pm_suspended_storage(void)
387{
388 return false;
389}
390#endif /* CONFIG_PM_SLEEP */
391
376#endif /* __LINUX_GFP_H */ 392#endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 671e6c94fed7..3cba4b67203f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -127,6 +127,13 @@ void pm_restrict_gfp_mask(void)
127 saved_gfp_mask = gfp_allowed_mask; 127 saved_gfp_mask = gfp_allowed_mask;
128 gfp_allowed_mask &= ~GFP_IOFS; 128 gfp_allowed_mask &= ~GFP_IOFS;
129} 129}
130
131bool pm_suspended_storage(void)
132{
133 if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
134 return false;
135 return true;
136}
130#endif /* CONFIG_PM_SLEEP */ 137#endif /* CONFIG_PM_SLEEP */
131 138
132#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 139#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -1786,12 +1793,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
1786 1793
1787static inline int 1794static inline int
1788should_alloc_retry(gfp_t gfp_mask, unsigned int order, 1795should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1796 unsigned long did_some_progress,
1789 unsigned long pages_reclaimed) 1797 unsigned long pages_reclaimed)
1790{ 1798{
1791 /* Do not loop if specifically requested */ 1799 /* Do not loop if specifically requested */
1792 if (gfp_mask & __GFP_NORETRY) 1800 if (gfp_mask & __GFP_NORETRY)
1793 return 0; 1801 return 0;
1794 1802
1803 /* Always retry if specifically requested */
1804 if (gfp_mask & __GFP_NOFAIL)
1805 return 1;
1806
1807 /*
1808 * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
1809 * making forward progress without invoking OOM. Suspend also disables
1810 * storage devices so kswapd will not help. Bail if we are suspending.
1811 */
1812 if (!did_some_progress && pm_suspended_storage())
1813 return 0;
1814
1795 /* 1815 /*
1796 * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER 1816 * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
1797 * means __GFP_NOFAIL, but that may not be true in other 1817 * means __GFP_NOFAIL, but that may not be true in other
@@ -1810,13 +1830,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1810 if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) 1830 if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
1811 return 1; 1831 return 1;
1812 1832
1813 /*
1814 * Don't let big-order allocations loop unless the caller
1815 * explicitly requests that.
1816 */
1817 if (gfp_mask & __GFP_NOFAIL)
1818 return 1;
1819
1820 return 0; 1833 return 0;
1821} 1834}
1822 1835
@@ -2209,7 +2222,8 @@ rebalance:
2209 2222
2210 /* Check if we should retry the allocation */ 2223 /* Check if we should retry the allocation */
2211 pages_reclaimed += did_some_progress; 2224 pages_reclaimed += did_some_progress;
2212 if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { 2225 if (should_alloc_retry(gfp_mask, order, did_some_progress,
2226 pages_reclaimed)) {
2213 /* Wait for some write requests to complete then retry */ 2227 /* Wait for some write requests to complete then retry */
2214 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); 2228 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
2215 goto rebalance; 2229 goto rebalance;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b1cd12060723..9520592d4231 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -667,10 +667,10 @@ int try_to_free_swap(struct page *page)
667 * original page might be freed under memory pressure, then 667 * original page might be freed under memory pressure, then
668 * later read back in from swap, now with the wrong data. 668 * later read back in from swap, now with the wrong data.
669 * 669 *
670 * Hibernation clears bits from gfp_allowed_mask to prevent 670 * Hibration suspends storage while it is writing the image
671 * memory reclaim from writing to disk, so check that here. 671 * to disk so check that here.
672 */ 672 */
673 if (!(gfp_allowed_mask & __GFP_IO)) 673 if (pm_suspended_storage())
674 return 0; 674 return 0;
675 675
676 delete_from_swap_cache(page); 676 delete_from_swap_cache(page);