aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2019-01-08 18:23:39 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-08 20:15:11 -0500
commit73444bc4d8f92e46a20cb6bd3342fc2ea75c6787 (patch)
tree47f9f5b01fc70956e3cd33d1b1375646943ffac4
parentddeaab32a89f04b7e2a2df8771583a719c4ac6b7 (diff)
mm, page_alloc: do not wake kswapd with zone lock held
syzbot reported the following regression in the latest merge window and it was confirmed by Qian Cai that a similar bug was visible from a different context. ====================================================== WARNING: possible circular locking dependency detected 4.20.0+ #297 Not tainted ------------------------------------------------------ syz-executor0/8529 is trying to acquire lock: 000000005e7fb829 (&pgdat->kswapd_wait){....}, at: __wake_up_common_lock+0x19e/0x330 kernel/sched/wait.c:120 but task is already holding lock: 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: spin_lock include/linux/spinlock.h:329 [inline] 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: rmqueue_bulk mm/page_alloc.c:2548 [inline] 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: __rmqueue_pcplist mm/page_alloc.c:3021 [inline] 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: rmqueue_pcplist mm/page_alloc.c:3050 [inline] 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: rmqueue mm/page_alloc.c:3072 [inline] 000000009bb7bae0 (&(&zone->lock)->rlock){-.-.}, at: get_page_from_freelist+0x1bae/0x52a0 mm/page_alloc.c:3491 It appears to be a false positive in that the only way the lock ordering should be inverted is if kswapd is waking itself and the wakeup allocates debugging objects which should already be allocated if it's kswapd doing the waking. Nevertheless, the possibility exists and so it's best to avoid the problem. This patch flags a zone as needing a kswapd using the, surprisingly, unused zone flag field. The flag is read without the lock held to do the wakeup. It's possible that the flag setting context is not the same as the flag clearing context or for small races to occur. However, each race possibility is harmless and there is no visible degredation in fragmentation treatment. While zone->flag could have continued to be unused, there is potential for moving some existing fields into the flags field instead. Particularly read-mostly ones like zone->initialized and zone->contiguous. Link: http://lkml.kernel.org/r/20190103225712.GJ31517@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Reported-by: syzbot+93d94a001cfbce9e60e1@syzkaller.appspotmail.com Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Qian Cai <cai@lca.pw> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--mm/page_alloc.c8
2 files changed, 13 insertions, 1 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cc4a507d7ca4..842f9189537b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -520,6 +520,12 @@ enum pgdat_flags {
520 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 520 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
521}; 521};
522 522
523enum zone_flags {
524 ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
525 * Cleared when kswapd is woken.
526 */
527};
528
523static inline unsigned long zone_managed_pages(struct zone *zone) 529static inline unsigned long zone_managed_pages(struct zone *zone)
524{ 530{
525 return (unsigned long)atomic_long_read(&zone->managed_pages); 531 return (unsigned long)atomic_long_read(&zone->managed_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cde5dac6229a..d295c9bc01a8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2214,7 +2214,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
2214 */ 2214 */
2215 boost_watermark(zone); 2215 boost_watermark(zone);
2216 if (alloc_flags & ALLOC_KSWAPD) 2216 if (alloc_flags & ALLOC_KSWAPD)
2217 wakeup_kswapd(zone, 0, 0, zone_idx(zone)); 2217 set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
2218 2218
2219 /* We are not allowed to try stealing from the whole block */ 2219 /* We are not allowed to try stealing from the whole block */
2220 if (!whole_block) 2220 if (!whole_block)
@@ -3102,6 +3102,12 @@ struct page *rmqueue(struct zone *preferred_zone,
3102 local_irq_restore(flags); 3102 local_irq_restore(flags);
3103 3103
3104out: 3104out:
3105 /* Separate test+clear to avoid unnecessary atomics */
3106 if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) {
3107 clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
3108 wakeup_kswapd(zone, 0, 0, zone_idx(zone));
3109 }
3110
3105 VM_BUG_ON_PAGE(page && bad_range(zone, page), page); 3111 VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
3106 return page; 3112 return page;
3107 3113