aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2015-11-06 19:28:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-06 20:50:42 -0500
commitd0164adc89f6bb374d304ffcc375c6d2652fe67d (patch)
treede1cbe09c86dcd24a4a476f7e0b41af239bbdc29
parent016c13daa5c9e4827eca703e2f0621c131f2cca3 (diff)
mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd
__GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/vm/balance14
-rw-r--r--arch/arm/mm/dma-mapping.c6
-rw-r--r--arch/arm/xen/mm.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--block/bio.c26
-rw-r--r--block/blk-core.c16
-rw-r--r--block/blk-ioc.c2
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--block/blk-mq.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c3
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/connector/connector.c3
-rw-r--r--drivers/firewire/core-cdev.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c2
-rw-r--r--drivers/media/pci/solo6x10/solo6x10-v4l2.c2
-rw-r--r--drivers/media/pci/tw68/tw68-video.c2
-rw-r--r--drivers/mtd/mtdcore.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c2
-rw-r--r--drivers/staging/android/ion/ion_system_heap.c2
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h2
-rw-r--r--drivers/usb/host/u132-hcd.c2
-rw-r--r--drivers/video/fbdev/vermilion/vermilion.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fscache/cookie.c2
-rw-r--r--fs/fscache/page.c6
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/xfs/xfs_qm.c2
-rw-r--r--include/linux/gfp.h46
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/sock.h2
-rw-r--r--include/trace/events/gfpflags.h5
-rw-r--r--kernel/audit.c6
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/smp.c2
-rw-r--r--lib/idr.c4
-rw-r--r--lib/radix-tree.c10
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/mempool.c10
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c43
-rw-r--r--mm/slab.c18
-rw-r--r--mm/slub.c10
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c4
-rw-r--r--mm/zswap.c5
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c6
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/sctp/associola.c2
66 files changed, 210 insertions, 172 deletions
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index c46e68cf9344..964595481af6 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -1,12 +1,14 @@
1Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com> 1Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
2 2
3Memory balancing is needed for non __GFP_WAIT as well as for non 3Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
4__GFP_IO allocations. 4well as for non __GFP_IO allocations.
5 5
6There are two reasons to be requesting non __GFP_WAIT allocations: 6The first reason why a caller may avoid reclaim is that the caller can not
7the caller can not sleep (typically intr context), or does not want 7sleep due to holding a spinlock or is in interrupt context. The second may
8to incur cost overheads of page stealing and possible swap io for 8be that the caller is willing to fail the allocation without incurring the
9whatever reasons. 9overhead of page reclaim. This may happen for opportunistic high-order
10allocation requests that have order-0 fallback options. In such cases,
11the caller may also wish to avoid waking kswapd.
10 12
11__GFP_IO allocation requests are made to prevent file system deadlocks. 13__GFP_IO allocation requests are made to prevent file system deadlocks.
12 14
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ad4eb2d26e16..e62400e5fb99 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
651 651
652 if (nommu()) 652 if (nommu())
653 addr = __alloc_simple_buffer(dev, size, gfp, &page); 653 addr = __alloc_simple_buffer(dev, size, gfp, &page);
654 else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT)) 654 else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
655 addr = __alloc_from_contiguous(dev, size, prot, &page, 655 addr = __alloc_from_contiguous(dev, size, prot, &page,
656 caller, want_vaddr); 656 caller, want_vaddr);
657 else if (is_coherent) 657 else if (is_coherent)
658 addr = __alloc_simple_buffer(dev, size, gfp, &page); 658 addr = __alloc_simple_buffer(dev, size, gfp, &page);
659 else if (!(gfp & __GFP_WAIT)) 659 else if (!gfpflags_allow_blocking(gfp))
660 addr = __alloc_from_pool(size, &page); 660 addr = __alloc_from_pool(size, &page);
661 else 661 else
662 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, 662 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1363 *handle = DMA_ERROR_CODE; 1363 *handle = DMA_ERROR_CODE;
1364 size = PAGE_ALIGN(size); 1364 size = PAGE_ALIGN(size);
1365 1365
1366 if (!(gfp & __GFP_WAIT)) 1366 if (!gfpflags_allow_blocking(gfp))
1367 return __iommu_alloc_atomic(dev, size, handle); 1367 return __iommu_alloc_atomic(dev, size, handle);
1368 1368
1369 /* 1369 /*
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7c34f7126b04..c5f9a9e3d1f3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -25,7 +25,7 @@
25unsigned long xen_get_swiotlb_free_pages(unsigned int order) 25unsigned long xen_get_swiotlb_free_pages(unsigned int order)
26{ 26{
27 struct memblock_region *reg; 27 struct memblock_region *reg;
28 gfp_t flags = __GFP_NOWARN; 28 gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
29 29
30 for_each_memblock(memory, reg) { 30 for_each_memblock(memory, reg) {
31 if (reg->base < (phys_addr_t)0xffffffff) { 31 if (reg->base < (phys_addr_t)0xffffffff) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6320361d8d4c..bb4bf6a06ad6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
100 if (IS_ENABLED(CONFIG_ZONE_DMA) && 100 if (IS_ENABLED(CONFIG_ZONE_DMA) &&
101 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 101 dev->coherent_dma_mask <= DMA_BIT_MASK(32))
102 flags |= GFP_DMA; 102 flags |= GFP_DMA;
103 if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { 103 if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
104 struct page *page; 104 struct page *page;
105 void *addr; 105 void *addr;
106 106
@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
148 148
149 size = PAGE_ALIGN(size); 149 size = PAGE_ALIGN(size);
150 150
151 if (!coherent && !(flags & __GFP_WAIT)) { 151 if (!coherent && !gfpflags_allow_blocking(flags)) {
152 struct page *page = NULL; 152 struct page *page = NULL;
153 void *addr = __alloc_from_pool(size, &page, flags); 153 void *addr = __alloc_from_pool(size, &page, flags);
154 154
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cd99433b8ba1..6ba014c61d62 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
90again: 90again:
91 page = NULL; 91 page = NULL;
92 /* CMA can be used only in the context which permits sleeping */ 92 /* CMA can be used only in the context which permits sleeping */
93 if (flag & __GFP_WAIT) { 93 if (gfpflags_allow_blocking(flag)) {
94 page = dma_alloc_from_contiguous(dev, count, get_order(size)); 94 page = dma_alloc_from_contiguous(dev, count, get_order(size));
95 if (page && page_to_phys(page) + size > dma_mask) { 95 if (page && page_to_phys(page) + size > dma_mask) {
96 dma_release_from_contiguous(dev, page, count); 96 dma_release_from_contiguous(dev, page, count);
diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..4f184d938942 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -211,7 +211,7 @@ fallback:
211 bvl = mempool_alloc(pool, gfp_mask); 211 bvl = mempool_alloc(pool, gfp_mask);
212 } else { 212 } else {
213 struct biovec_slab *bvs = bvec_slabs + *idx; 213 struct biovec_slab *bvs = bvec_slabs + *idx;
214 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 214 gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
215 215
216 /* 216 /*
217 * Make this allocation restricted and don't dump info on 217 * Make this allocation restricted and don't dump info on
@@ -221,11 +221,11 @@ fallback:
221 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 221 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
222 222
223 /* 223 /*
224 * Try a slab allocation. If this fails and __GFP_WAIT 224 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
225 * is set, retry with the 1-entry mempool 225 * is set, retry with the 1-entry mempool
226 */ 226 */
227 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); 227 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
228 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { 228 if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
229 *idx = BIOVEC_MAX_IDX; 229 *idx = BIOVEC_MAX_IDX;
230 goto fallback; 230 goto fallback;
231 } 231 }
@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
395 * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is 395 * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
396 * backed by the @bs's mempool. 396 * backed by the @bs's mempool.
397 * 397 *
398 * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be 398 * When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
399 * able to allocate a bio. This is due to the mempool guarantees. To make this 399 * always be able to allocate a bio. This is due to the mempool guarantees.
400 * work, callers must never allocate more than 1 bio at a time from this pool. 400 * To make this work, callers must never allocate more than 1 bio at a time
401 * Callers that need to allocate more than 1 bio must always submit the 401 * from this pool. Callers that need to allocate more than 1 bio must always
402 * previously allocated bio for IO before attempting to allocate a new one. 402 * submit the previously allocated bio for IO before attempting to allocate
403 * Failure to do so can cause deadlocks under memory pressure. 403 * a new one. Failure to do so can cause deadlocks under memory pressure.
404 * 404 *
405 * Note that when running under generic_make_request() (i.e. any block 405 * Note that when running under generic_make_request() (i.e. any block
406 * driver), bios are not submitted until after you return - see the code in 406 * driver), bios are not submitted until after you return - see the code in
@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
459 * We solve this, and guarantee forward progress, with a rescuer 459 * We solve this, and guarantee forward progress, with a rescuer
460 * workqueue per bio_set. If we go to allocate and there are 460 * workqueue per bio_set. If we go to allocate and there are
461 * bios on current->bio_list, we first try the allocation 461 * bios on current->bio_list, we first try the allocation
462 * without __GFP_WAIT; if that fails, we punt those bios we 462 * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
463 * would be blocking to the rescuer workqueue before we retry 463 * bios we would be blocking to the rescuer workqueue before
464 * with the original gfp_flags. 464 * we retry with the original gfp_flags.
465 */ 465 */
466 466
467 if (current->bio_list && !bio_list_empty(current->bio_list)) 467 if (current->bio_list && !bio_list_empty(current->bio_list))
468 gfp_mask &= ~__GFP_WAIT; 468 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
469 469
470 p = mempool_alloc(bs->bio_pool, gfp_mask); 470 p = mempool_alloc(bs->bio_pool, gfp_mask);
471 if (!p && gfp_mask != saved_gfp) { 471 if (!p && gfp_mask != saved_gfp) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..9e32f0868e36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1206,8 +1206,8 @@ rq_starved:
1206 * @bio: bio to allocate request for (can be %NULL) 1206 * @bio: bio to allocate request for (can be %NULL)
1207 * @gfp_mask: allocation mask 1207 * @gfp_mask: allocation mask
1208 * 1208 *
1209 * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this 1209 * Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
1210 * function keeps retrying under memory pressure and fails iff @q is dead. 1210 * this function keeps retrying under memory pressure and fails iff @q is dead.
1211 * 1211 *
1212 * Must be called with @q->queue_lock held and, 1212 * Must be called with @q->queue_lock held and,
1213 * Returns ERR_PTR on failure, with @q->queue_lock held. 1213 * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1227,7 +1227,7 @@ retry:
1227 if (!IS_ERR(rq)) 1227 if (!IS_ERR(rq))
1228 return rq; 1228 return rq;
1229 1229
1230 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { 1230 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1231 blk_put_rl(rl); 1231 blk_put_rl(rl);
1232 return rq; 1232 return rq;
1233 } 1233 }
@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
1305 * BUG. 1305 * BUG.
1306 * 1306 *
1307 * WARNING: When allocating/cloning a bio-chain, careful consideration should be 1307 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
1308 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for 1308 * given to how you allocate bios. In particular, you cannot use
1309 * anything but the first bio in the chain. Otherwise you risk waiting for IO 1309 * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
1310 * completion of a bio that hasn't been submitted yet, thus resulting in a 1310 * you risk waiting for IO completion of a bio that hasn't been submitted yet,
1311 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead 1311 * thus resulting in a deadlock. Alternatively bios should be allocated using
1312 * of bio_alloc(), as that avoids the mempool deadlock. 1312 * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
1313 * If possible a big IO should be split into smaller parts when allocation 1313 * If possible a big IO should be split into smaller parts when allocation
1314 * fails. Partial allocation should not be an error, or you risk a live-lock. 1314 * fails. Partial allocation should not be an error, or you risk a live-lock.
1315 */ 1315 */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1a27f45ec776..381cb50a673c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
289{ 289{
290 struct io_context *ioc; 290 struct io_context *ioc;
291 291
292 might_sleep_if(gfp_flags & __GFP_WAIT); 292 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
293 293
294 do { 294 do {
295 task_lock(task); 295 task_lock(task);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60ac684c8b8c..a07ca3488d96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
268 if (tag != -1) 268 if (tag != -1)
269 return tag; 269 return tag;
270 270
271 if (!(data->gfp & __GFP_WAIT)) 271 if (!gfpflags_allow_blocking(data->gfp))
272 return -1; 272 return -1;
273 273
274 bs = bt_wait_ptr(bt, hctx); 274 bs = bt_wait_ptr(bt, hctx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..68c0a3416b34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
244 244
245 ctx = blk_mq_get_ctx(q); 245 ctx = blk_mq_get_ctx(q);
246 hctx = q->mq_ops->map_queue(q, ctx->cpu); 246 hctx = q->mq_ops->map_queue(q, ctx->cpu);
247 blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT, 247 blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
248 reserved, ctx, hctx); 248 reserved, ctx, hctx);
249 249
250 rq = __blk_mq_alloc_request(&alloc_data, rw); 250 rq = __blk_mq_alloc_request(&alloc_data, rw);
251 if (!rq && (gfp & __GFP_WAIT)) { 251 if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
252 __blk_mq_run_hw_queue(hctx); 252 __blk_mq_run_hw_queue(hctx);
253 blk_mq_put_ctx(ctx); 253 blk_mq_put_ctx(ctx);
254 254
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
1186 ctx = blk_mq_get_ctx(q); 1186 ctx = blk_mq_get_ctx(q);
1187 hctx = q->mq_ops->map_queue(q, ctx->cpu); 1187 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1188 blk_mq_set_alloc_data(&alloc_data, q, 1188 blk_mq_set_alloc_data(&alloc_data, q,
1189 __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx); 1189 __GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
1190 rq = __blk_mq_alloc_request(&alloc_data, rw); 1190 rq = __blk_mq_alloc_request(&alloc_data, rw);
1191 ctx = alloc_data.ctx; 1191 ctx = alloc_data.ctx;
1192 hctx = alloc_data.hctx; 1192 hctx = alloc_data.hctx;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c097909c589c..b4b5680ac6ad 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
357 } 357 }
358 358
359 if (has_payload && data_size) { 359 if (has_payload && data_size) {
360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); 360 page = drbd_alloc_pages(peer_device, nr_pages,
361 gfpflags_allow_blocking(gfp_mask));
361 if (!page) 362 if (!page)
362 goto fail; 363 goto fail;
363 } 364 }
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index e22942596207..1b709a4e3b5e 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
271 goto err_out; 271 goto err_out;
272 272
273 tmp->bi_bdev = NULL; 273 tmp->bi_bdev = NULL;
274 gfpmask &= ~__GFP_WAIT; 274 gfpmask &= ~__GFP_DIRECT_RECLAIM;
275 tmp->bi_next = NULL; 275 tmp->bi_next = NULL;
276 276
277 if (!new_chain) 277 if (!new_chain)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 30f522848c73..d7373ca69c99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
124 if (group) 124 if (group)
125 return netlink_broadcast(dev->nls, skb, portid, group, 125 return netlink_broadcast(dev->nls, skb, portid, group,
126 gfp_mask); 126 gfp_mask);
127 return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT)); 127 return netlink_unicast(dev->nls, skb, portid,
128 !gfpflags_allow_blocking(gfp_mask));
128} 129}
129EXPORT_SYMBOL_GPL(cn_netlink_send_mult); 130EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
130 131
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3973a7c441..36a7c2d89a01 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
486static int add_client_resource(struct client *client, 486static int add_client_resource(struct client *client,
487 struct client_resource *resource, gfp_t gfp_mask) 487 struct client_resource *resource, gfp_t gfp_mask)
488{ 488{
489 bool preload = !!(gfp_mask & __GFP_WAIT); 489 bool preload = gfpflags_allow_blocking(gfp_mask);
490 unsigned long flags; 490 unsigned long flags;
491 int ret; 491 int ret;
492 492
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d631a946481..d58cb9e034fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2215,7 +2215,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2215 */ 2215 */
2216 mapping = file_inode(obj->base.filp)->i_mapping; 2216 mapping = file_inode(obj->base.filp)->i_mapping;
2217 gfp = mapping_gfp_mask(mapping); 2217 gfp = mapping_gfp_mask(mapping);
2218 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2218 gfp |= __GFP_NORETRY | __GFP_NOWARN;
2219 gfp &= ~(__GFP_IO | __GFP_WAIT); 2219 gfp &= ~(__GFP_IO | __GFP_WAIT);
2220 sg = st->sgl; 2220 sg = st->sgl;
2221 st->nents = 0; 2221 st->nents = 0;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..59ab264c99c4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1083,7 +1083,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
1083 1083
1084static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) 1084static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
1085{ 1085{
1086 bool preload = !!(gfp_mask & __GFP_WAIT); 1086 bool preload = gfpflags_allow_blocking(gfp_mask);
1087 unsigned long flags; 1087 unsigned long flags;
1088 int ret, id; 1088 int ret, id;
1089 1089
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0d533bba4ad1..8b2be1e7714f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
2668 2668
2669 page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); 2669 page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
2670 if (!page) { 2670 if (!page) {
2671 if (!(flag & __GFP_WAIT)) 2671 if (!gfpflags_allow_blocking(flag))
2672 return NULL; 2672 return NULL;
2673 2673
2674 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 2674 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7cf80c1a8a16..f1042daef9ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
3647 flags |= GFP_DMA32; 3647 flags |= GFP_DMA32;
3648 } 3648 }
3649 3649
3650 if (flags & __GFP_WAIT) { 3650 if (gfpflags_allow_blocking(flags)) {
3651 unsigned int count = size >> PAGE_SHIFT; 3651 unsigned int count = size >> PAGE_SHIFT;
3652 3652
3653 page = dma_alloc_from_contiguous(dev, count, order); 3653 page = dma_alloc_from_contiguous(dev, count, order);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3729b394432c..917d47e290ae 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
994 struct bio_vec *bvec; 994 struct bio_vec *bvec;
995 995
996retry: 996retry:
997 if (unlikely(gfp_mask & __GFP_WAIT)) 997 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
998 mutex_lock(&cc->bio_alloc_lock); 998 mutex_lock(&cc->bio_alloc_lock);
999 999
1000 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); 1000 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
@@ -1010,7 +1010,7 @@ retry:
1010 if (!page) { 1010 if (!page) {
1011 crypt_free_buffer_pages(cc, clone); 1011 crypt_free_buffer_pages(cc, clone);
1012 bio_put(clone); 1012 bio_put(clone);
1013 gfp_mask |= __GFP_WAIT; 1013 gfp_mask |= __GFP_DIRECT_RECLAIM;
1014 goto retry; 1014 goto retry;
1015 } 1015 }
1016 1016
@@ -1027,7 +1027,7 @@ retry:
1027 } 1027 }
1028 1028
1029return_clone: 1029return_clone:
1030 if (unlikely(gfp_mask & __GFP_WAIT)) 1030 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
1031 mutex_unlock(&cc->bio_alloc_lock); 1031 mutex_unlock(&cc->bio_alloc_lock);
1032 1032
1033 return clone; 1033 return clone;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3a7cade5e27d..1452ed9aacb4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
244 *pages = NULL; 244 *pages = NULL;
245 245
246 do { 246 do {
247 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY); 247 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
248 if (unlikely(!pl)) { 248 if (unlikely(!pl)) {
249 /* Use reserved pages */ 249 /* Use reserved pages */
250 pl = kc->pages; 250 pl = kc->pages;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 1bd2fd47421f..4432fd69b7cb 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
1297 solo_enc->vidq.ops = &solo_enc_video_qops; 1297 solo_enc->vidq.ops = &solo_enc_video_qops;
1298 solo_enc->vidq.mem_ops = &vb2_dma_sg_memops; 1298 solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
1299 solo_enc->vidq.drv_priv = solo_enc; 1299 solo_enc->vidq.drv_priv = solo_enc;
1300 solo_enc->vidq.gfp_flags = __GFP_DMA32; 1300 solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
1301 solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; 1301 solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
1302 solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf); 1302 solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
1303 solo_enc->vidq.lock = &solo_enc->lock; 1303 solo_enc->vidq.lock = &solo_enc->lock;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 26df903585d7..f7ce493b1fee 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
678 solo_dev->vidq.mem_ops = &vb2_dma_contig_memops; 678 solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
679 solo_dev->vidq.drv_priv = solo_dev; 679 solo_dev->vidq.drv_priv = solo_dev;
680 solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; 680 solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
681 solo_dev->vidq.gfp_flags = __GFP_DMA32; 681 solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
682 solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf); 682 solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
683 solo_dev->vidq.lock = &solo_dev->lock; 683 solo_dev->vidq.lock = &solo_dev->lock;
684 ret = vb2_queue_init(&solo_dev->vidq); 684 ret = vb2_queue_init(&solo_dev->vidq);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 4c3293dcddbc..46642ef9151b 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
979 dev->vidq.ops = &tw68_video_qops; 979 dev->vidq.ops = &tw68_video_qops;
980 dev->vidq.mem_ops = &vb2_dma_sg_memops; 980 dev->vidq.mem_ops = &vb2_dma_sg_memops;
981 dev->vidq.drv_priv = dev; 981 dev->vidq.drv_priv = dev;
982 dev->vidq.gfp_flags = __GFP_DMA32; 982 dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
983 dev->vidq.buf_struct_size = sizeof(struct tw68_buf); 983 dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
984 dev->vidq.lock = &dev->lock; 984 dev->vidq.lock = &dev->lock;
985 dev->vidq.min_buffers_needed = 2; 985 dev->vidq.min_buffers_needed = 2;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8bbbb751bf45..2dfb291a47c6 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
1188 */ 1188 */
1189void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) 1189void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
1190{ 1190{
1191 gfp_t flags = __GFP_NOWARN | __GFP_WAIT | 1191 gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
1192 __GFP_NORETRY | __GFP_NO_KSWAPD;
1193 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); 1192 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
1194 void *kbuf; 1193 void *kbuf;
1195 1194
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44173be5cbf0..f8d7a2f06950 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
691{ 691{
692 if (fp->rx_frag_size) { 692 if (fp->rx_frag_size) {
693 /* GFP_KERNEL allocations are used only during initialization */ 693 /* GFP_KERNEL allocations are used only during initialization */
694 if (unlikely(gfp_mask & __GFP_WAIT)) 694 if (unlikely(gfpflags_allow_blocking(gfp_mask)))
695 return (void *)__get_free_page(gfp_mask); 695 return (void *)__get_free_page(gfp_mask);
696 696
697 return netdev_alloc_frag(fp->rx_frag_size); 697 return netdev_alloc_frag(fp->rx_frag_size);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index ada724aab3d5..d4c3e5512dd5 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -27,7 +27,7 @@
27#include "ion_priv.h" 27#include "ion_priv.h"
28 28
29static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN | 29static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
30 __GFP_NORETRY) & ~__GFP_WAIT; 30 __GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
31static gfp_t low_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN); 31static gfp_t low_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
32static const unsigned int orders[] = {8, 4, 0}; 32static const unsigned int orders[] = {8, 4, 0};
33static const int num_orders = ARRAY_SIZE(orders); 33static const int num_orders = ARRAY_SIZE(orders);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 6af733de69ca..f0b0423a716b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -95,7 +95,7 @@ do { \
95do { \ 95do { \
96 LASSERT(!in_interrupt() || \ 96 LASSERT(!in_interrupt() || \
97 ((size) <= LIBCFS_VMALLOC_SIZE && \ 97 ((size) <= LIBCFS_VMALLOC_SIZE && \
98 ((mask) & __GFP_WAIT) == 0)); \ 98 !gfpflags_allow_blocking(mask))); \
99} while (0) 99} while (0)
100 100
101#define LIBCFS_ALLOC_POST(ptr, size) \ 101#define LIBCFS_ALLOC_POST(ptr, size) \
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 0a94895a358d..692ccc69345e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
2244{ 2244{
2245 struct u132 *u132 = hcd_to_u132(hcd); 2245 struct u132 *u132 = hcd_to_u132(hcd);
2246 if (irqs_disabled()) { 2246 if (irqs_disabled()) {
2247 if (__GFP_WAIT & mem_flags) { 2247 if (gfpflags_allow_blocking(mem_flags)) {
2248 printk(KERN_ERR "invalid context for function that might sleep\n"); 2248 printk(KERN_ERR "invalid context for function that might sleep\n");
2249 return -EINVAL; 2249 return -EINVAL;
2250 } 2250 }
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6b70d7f62b2f..1c1e95a0b8fa 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
99 * below the first 16MB. 99 * below the first 16MB.
100 */ 100 */
101 101
102 flags = __GFP_DMA | __GFP_HIGH; 102 flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
103 va->logical = 103 va->logical =
104 __get_free_pages(flags, --max_order); 104 __get_free_pages(flags, --max_order);
105 } while (va->logical == 0 && max_order > min_order); 105 } while (va->logical == 0 && max_order > min_order);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1e60d00d4ea7..c339d561e596 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2572,7 +2572,7 @@ int open_ctree(struct super_block *sb,
2572 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2572 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2573 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ 2573 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
2574 /* readahead state */ 2574 /* readahead state */
2575 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2575 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
2576 spin_lock_init(&fs_info->reada_lock); 2576 spin_lock_init(&fs_info->reada_lock);
2577 2577
2578 fs_info->thread_pool_size = min_t(unsigned long, 2578 fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3915c9473e94..032abfbebe76 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -594,7 +594,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
594 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 594 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
595 clear = 1; 595 clear = 1;
596again: 596again:
597 if (!prealloc && (mask & __GFP_WAIT)) { 597 if (!prealloc && gfpflags_allow_blocking(mask)) {
598 /* 598 /*
599 * Don't care for allocation failure here because we might end 599 * Don't care for allocation failure here because we might end
600 * up not needing the pre-allocated extent state at all, which 600 * up not needing the pre-allocated extent state at all, which
@@ -718,7 +718,7 @@ search_again:
718 if (start > end) 718 if (start > end)
719 goto out; 719 goto out;
720 spin_unlock(&tree->lock); 720 spin_unlock(&tree->lock);
721 if (mask & __GFP_WAIT) 721 if (gfpflags_allow_blocking(mask))
722 cond_resched(); 722 cond_resched();
723 goto again; 723 goto again;
724} 724}
@@ -850,7 +850,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
850 850
851 bits |= EXTENT_FIRST_DELALLOC; 851 bits |= EXTENT_FIRST_DELALLOC;
852again: 852again:
853 if (!prealloc && (mask & __GFP_WAIT)) { 853 if (!prealloc && gfpflags_allow_blocking(mask)) {
854 prealloc = alloc_extent_state(mask); 854 prealloc = alloc_extent_state(mask);
855 BUG_ON(!prealloc); 855 BUG_ON(!prealloc);
856 } 856 }
@@ -1028,7 +1028,7 @@ search_again:
1028 if (start > end) 1028 if (start > end)
1029 goto out; 1029 goto out;
1030 spin_unlock(&tree->lock); 1030 spin_unlock(&tree->lock);
1031 if (mask & __GFP_WAIT) 1031 if (gfpflags_allow_blocking(mask))
1032 cond_resched(); 1032 cond_resched();
1033 goto again; 1033 goto again;
1034} 1034}
@@ -1076,7 +1076,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1076 btrfs_debug_check_extent_io_range(tree, start, end); 1076 btrfs_debug_check_extent_io_range(tree, start, end);
1077 1077
1078again: 1078again:
1079 if (!prealloc && (mask & __GFP_WAIT)) { 1079 if (!prealloc && gfpflags_allow_blocking(mask)) {
1080 /* 1080 /*
1081 * Best effort, don't worry if extent state allocation fails 1081 * Best effort, don't worry if extent state allocation fails
1082 * here for the first iteration. We might have a cached state 1082 * here for the first iteration. We might have a cached state
@@ -1253,7 +1253,7 @@ search_again:
1253 if (start > end) 1253 if (start > end)
1254 goto out; 1254 goto out;
1255 spin_unlock(&tree->lock); 1255 spin_unlock(&tree->lock);
1256 if (mask & __GFP_WAIT) 1256 if (gfpflags_allow_blocking(mask))
1257 cond_resched(); 1257 cond_resched();
1258 first_iteration = false; 1258 first_iteration = false;
1259 goto again; 1259 goto again;
@@ -4319,7 +4319,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
4319 u64 start = page_offset(page); 4319 u64 start = page_offset(page);
4320 u64 end = start + PAGE_CACHE_SIZE - 1; 4320 u64 end = start + PAGE_CACHE_SIZE - 1;
4321 4321
4322 if ((mask & __GFP_WAIT) && 4322 if (gfpflags_allow_blocking(mask) &&
4323 page->mapping->host->i_size > 16 * 1024 * 1024) { 4323 page->mapping->host->i_size > 16 * 1024 * 1024) {
4324 u64 len; 4324 u64 len;
4325 while (start <= end) { 4325 while (start <= end) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fc735869c18..e023919b4470 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -156,8 +156,8 @@ static struct btrfs_device *__alloc_device(void)
156 spin_lock_init(&dev->reada_lock); 156 spin_lock_init(&dev->reada_lock);
157 atomic_set(&dev->reada_in_flight, 0); 157 atomic_set(&dev->reada_in_flight, 0);
158 atomic_set(&dev->dev_stats_ccnt, 0); 158 atomic_set(&dev->dev_stats_ccnt, 0);
159 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); 159 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
160 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); 160 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
161 161
162 return dev; 162 return dev;
163} 163}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a10cf..49f6c78ee3af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1058,7 +1058,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1058 return 0; 1058 return 0;
1059 if (journal) 1059 if (journal)
1060 return jbd2_journal_try_to_free_buffers(journal, page, 1060 return jbd2_journal_try_to_free_buffers(journal, page,
1061 wait & ~__GFP_WAIT); 1061 wait & ~__GFP_DIRECT_RECLAIM);
1062 return try_to_free_buffers(page); 1062 return try_to_free_buffers(page);
1063} 1063}
1064 1064
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d403c69bee08..4304072161aa 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
111 111
112 /* radix tree insertion won't use the preallocation pool unless it's 112 /* radix tree insertion won't use the preallocation pool unless it's
113 * told it may not wait */ 113 * told it may not wait */
114 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT); 114 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
115 115
116 switch (cookie->def->type) { 116 switch (cookie->def->type) {
117 case FSCACHE_COOKIE_TYPE_INDEX: 117 case FSCACHE_COOKIE_TYPE_INDEX:
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 483bbc613bf0..79483b3d8c6f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
58 58
59/* 59/*
60 * decide whether a page can be released, possibly by cancelling a store to it 60 * decide whether a page can be released, possibly by cancelling a store to it
61 * - we're allowed to sleep if __GFP_WAIT is flagged 61 * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
62 */ 62 */
63bool __fscache_maybe_release_page(struct fscache_cookie *cookie, 63bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
64 struct page *page, 64 struct page *page,
@@ -122,7 +122,7 @@ page_busy:
122 * allocator as the work threads writing to the cache may all end up 122 * allocator as the work threads writing to the cache may all end up
123 * sleeping on memory allocation, so we may need to impose a timeout 123 * sleeping on memory allocation, so we may need to impose a timeout
124 * too. */ 124 * too. */
125 if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) { 125 if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
126 fscache_stat(&fscache_n_store_vmscan_busy); 126 fscache_stat(&fscache_n_store_vmscan_busy);
127 return false; 127 return false;
128 } 128 }
@@ -132,7 +132,7 @@ page_busy:
132 _debug("fscache writeout timeout page: %p{%lx}", 132 _debug("fscache writeout timeout page: %p{%lx}",
133 page, page->index); 133 page, page->index);
134 134
135 gfp &= ~__GFP_WAIT; 135 gfp &= ~__GFP_DIRECT_RECLAIM;
136 goto try_again; 136 goto try_again;
137} 137}
138EXPORT_SYMBOL(__fscache_maybe_release_page); 138EXPORT_SYMBOL(__fscache_maybe_release_page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338ec2464..89463eee6791 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1937,8 +1937,8 @@ out:
1937 * @journal: journal for operation 1937 * @journal: journal for operation
1938 * @page: to try and free 1938 * @page: to try and free
1939 * @gfp_mask: we use the mask to detect how hard should we try to release 1939 * @gfp_mask: we use the mask to detect how hard should we try to release
1940 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to 1940 * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
1941 * release the buffers. 1941 * code to release the buffers.
1942 * 1942 *
1943 * 1943 *
1944 * For all the buffers on this page, 1944 * For all the buffers on this page,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 37f639d50af5..93e236429c5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
473 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 473 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
474 474
475 /* Always try to initiate a 'commit' if relevant, but only 475 /* Always try to initiate a 'commit' if relevant, but only
476 * wait for it if __GFP_WAIT is set. Even then, only wait 1 476 * wait for it if the caller allows blocking. Even then,
477 * second and only if the 'bdi' is not congested. 477 * only wait 1 second and only if the 'bdi' is not congested.
478 * Waiting indefinitely can cause deadlocks when the NFS 478 * Waiting indefinitely can cause deadlocks when the NFS
479 * server is on this machine, when a new TCP connection is 479 * server is on this machine, when a new TCP connection is
480 * needed and in other rare cases. There is no particular 480 * needed and in other rare cases. There is no particular
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
484 if (mapping) { 484 if (mapping) {
485 struct nfs_server *nfss = NFS_SERVER(mapping->host); 485 struct nfs_server *nfss = NFS_SERVER(mapping->host);
486 nfs_commit_inode(mapping->host, 0); 486 nfs_commit_inode(mapping->host, 0);
487 if ((gfp & __GFP_WAIT) && 487 if (gfpflags_allow_blocking(gfp) &&
488 !bdi_write_congested(&nfss->backing_dev_info)) { 488 !bdi_write_congested(&nfss->backing_dev_info)) {
489 wait_on_page_bit_killable_timeout(page, PG_private, 489 wait_on_page_bit_killable_timeout(page, PG_private,
490 HZ); 490 HZ);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index eac9549efd52..587174fd4f2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
525 unsigned long freed; 525 unsigned long freed;
526 int error; 526 int error;
527 527
528 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) 528 if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
529 return 0; 529 return 0;
530 530
531 INIT_LIST_HEAD(&isol.buffers); 531 INIT_LIST_HEAD(&isol.buffers);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 440fca3e7e5d..b56e811b6f7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -29,12 +29,13 @@ struct vm_area_struct;
29#define ___GFP_NOMEMALLOC 0x10000u 29#define ___GFP_NOMEMALLOC 0x10000u
30#define ___GFP_HARDWALL 0x20000u 30#define ___GFP_HARDWALL 0x20000u
31#define ___GFP_THISNODE 0x40000u 31#define ___GFP_THISNODE 0x40000u
32#define ___GFP_WAIT 0x80000u 32#define ___GFP_ATOMIC 0x80000u
33#define ___GFP_NOACCOUNT 0x100000u 33#define ___GFP_NOACCOUNT 0x100000u
34#define ___GFP_NOTRACK 0x200000u 34#define ___GFP_NOTRACK 0x200000u
35#define ___GFP_NO_KSWAPD 0x400000u 35#define ___GFP_DIRECT_RECLAIM 0x400000u
36#define ___GFP_OTHER_NODE 0x800000u 36#define ___GFP_OTHER_NODE 0x800000u
37#define ___GFP_WRITE 0x1000000u 37#define ___GFP_WRITE 0x1000000u
38#define ___GFP_KSWAPD_RECLAIM 0x2000000u
38/* If the above are modified, __GFP_BITS_SHIFT may need updating */ 39/* If the above are modified, __GFP_BITS_SHIFT may need updating */
39 40
40/* 41/*
@@ -71,7 +72,7 @@ struct vm_area_struct;
71 * __GFP_MOVABLE: Flag that this page will be movable by the page migration 72 * __GFP_MOVABLE: Flag that this page will be movable by the page migration
72 * mechanism or reclaimed 73 * mechanism or reclaimed
73 */ 74 */
74#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */ 75#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */
75#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ 76#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */
76#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ 77#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */
77#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ 78#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */
@@ -94,23 +95,37 @@ struct vm_area_struct;
94#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ 95#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
95#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ 96#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
96 97
97#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
98#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 98#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
99#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 99#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
100 100
101/* 101/*
102 * A caller that is willing to wait may enter direct reclaim and will
103 * wake kswapd to reclaim pages in the background until the high
104 * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
105 * avoid unnecessary delays when a fallback option is available but
106 * still allow kswapd to reclaim in the background. The kswapd flag
107 * can be cleared when the reclaiming of pages would cause unnecessary
108 * disruption.
109 */
110#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
111#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
112#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
113
114/*
102 * This may seem redundant, but it's a way of annotating false positives vs. 115 * This may seem redundant, but it's a way of annotating false positives vs.
103 * allocations that simply cannot be supported (e.g. page tables). 116 * allocations that simply cannot be supported (e.g. page tables).
104 */ 117 */
105#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 118#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
106 119
107#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ 120#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */
108#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 121#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
109 122
110/* This equals 0, but use constants in case they ever change */ 123/*
111#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) 124 * GFP_ATOMIC callers can not sleep, need the allocation to succeed.
112/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */ 125 * A lower watermark is applied to allow access to "atomic reserves"
113#define GFP_ATOMIC (__GFP_HIGH) 126 */
127#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
128#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
114#define GFP_NOIO (__GFP_WAIT) 129#define GFP_NOIO (__GFP_WAIT)
115#define GFP_NOFS (__GFP_WAIT | __GFP_IO) 130#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
116#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) 131#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
@@ -119,10 +134,10 @@ struct vm_area_struct;
119#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) 134#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
120#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) 135#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
121#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) 136#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
122#define GFP_IOFS (__GFP_IO | __GFP_FS) 137#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
123#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 138#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
124 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 139 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
125 __GFP_NO_KSWAPD) 140 ~__GFP_KSWAPD_RECLAIM)
126 141
127/* This mask makes up all the page movable related flags */ 142/* This mask makes up all the page movable related flags */
128#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) 143#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
164 return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; 179 return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
165} 180}
166 181
182static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
183{
184 return gfp_flags & __GFP_DIRECT_RECLAIM;
185}
186
167#ifdef CONFIG_HIGHMEM 187#ifdef CONFIG_HIGHMEM
168#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM 188#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
169#else 189#else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 24f4dfd94c51..4355129fff91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
1224 1224
1225static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) 1225static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
1226{ 1226{
1227 might_sleep_if(pri & __GFP_WAIT); 1227 might_sleep_if(gfpflags_allow_blocking(pri));
1228 1228
1229 if (skb_cloned(skb)) 1229 if (skb_cloned(skb))
1230 return pskb_expand_head(skb, 0, 0, pri); 1230 return pskb_expand_head(skb, 0, 0, pri);
@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
1308 */ 1308 */
1309static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) 1309static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
1310{ 1310{
1311 might_sleep_if(pri & __GFP_WAIT); 1311 might_sleep_if(gfpflags_allow_blocking(pri));
1312 if (skb_shared(skb)) { 1312 if (skb_shared(skb)) {
1313 struct sk_buff *nskb = skb_clone(skb, pri); 1313 struct sk_buff *nskb = skb_clone(skb, pri);
1314 1314
@@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
1344static inline struct sk_buff *skb_unshare(struct sk_buff *skb, 1344static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
1345 gfp_t pri) 1345 gfp_t pri)
1346{ 1346{
1347 might_sleep_if(pri & __GFP_WAIT); 1347 might_sleep_if(gfpflags_allow_blocking(pri));
1348 if (skb_cloned(skb)) { 1348 if (skb_cloned(skb)) {
1349 struct sk_buff *nskb = skb_copy(skb, pri); 1349 struct sk_buff *nskb = skb_copy(skb, pri);
1350 1350
diff --git a/include/net/sock.h b/include/net/sock.h
index f570e75e3da9..bbf7c2cf15b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
2041 */ 2041 */
2042static inline struct page_frag *sk_page_frag(struct sock *sk) 2042static inline struct page_frag *sk_page_frag(struct sock *sk)
2043{ 2043{
2044 if (sk->sk_allocation & __GFP_WAIT) 2044 if (gfpflags_allow_blocking(sk->sk_allocation))
2045 return &current->task_frag; 2045 return &current->task_frag;
2046 2046
2047 return &sk->sk_frag; 2047 return &sk->sk_frag;
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..dde6bf092c8a 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -20,7 +20,7 @@
20 {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ 20 {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \
21 {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ 21 {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \
22 {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ 22 {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \
23 {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ 23 {(unsigned long)__GFP_ATOMIC, "GFP_ATOMIC"}, \
24 {(unsigned long)__GFP_IO, "GFP_IO"}, \ 24 {(unsigned long)__GFP_IO, "GFP_IO"}, \
25 {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ 25 {(unsigned long)__GFP_COLD, "GFP_COLD"}, \
26 {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ 26 {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \
@@ -36,7 +36,8 @@
36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
39 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ 39 {(unsigned long)__GFP_DIRECT_RECLAIM, "GFP_DIRECT_RECLAIM"}, \
40 {(unsigned long)__GFP_KSWAPD_RECLAIM, "GFP_KSWAPD_RECLAIM"}, \
40 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ 41 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
41 ) : "GFP_NOWAIT" 42 ) : "GFP_NOWAIT"
42 43
diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a32ded7..5ffcbd354a52 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1371 if (unlikely(audit_filter_type(type))) 1371 if (unlikely(audit_filter_type(type)))
1372 return NULL; 1372 return NULL;
1373 1373
1374 if (gfp_mask & __GFP_WAIT) { 1374 if (gfp_mask & __GFP_DIRECT_RECLAIM) {
1375 if (audit_pid && audit_pid == current->pid) 1375 if (audit_pid && audit_pid == current->pid)
1376 gfp_mask &= ~__GFP_WAIT; 1376 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
1377 else 1377 else
1378 reserve = 0; 1378 reserve = 0;
1379 } 1379 }
1380 1380
1381 while (audit_backlog_limit 1381 while (audit_backlog_limit
1382 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { 1382 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
1383 if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) { 1383 if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
1384 long sleep_time; 1384 long sleep_time;
1385 1385
1386 sleep_time = timeout_start + audit_backlog_wait_time - jiffies; 1386 sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce3f9ce..f1603c153890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
299 299
300 idr_preload(gfp_mask); 300 idr_preload(gfp_mask);
301 spin_lock_bh(&cgroup_idr_lock); 301 spin_lock_bh(&cgroup_idr_lock);
302 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT); 302 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
303 spin_unlock_bh(&cgroup_idr_lock); 303 spin_unlock_bh(&cgroup_idr_lock);
304 idr_preload_end(); 304 idr_preload_end();
305 return ret; 305 return ret;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9952..deae3907ac1e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2738 return; 2738 return;
2739 2739
2740 /* no reclaim without waiting on it */ 2740 /* no reclaim without waiting on it */
2741 if (!(gfp_mask & __GFP_WAIT)) 2741 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
2742 return; 2742 return;
2743 2743
2744 /* this guy won't enter reclaim */ 2744 /* this guy won't enter reclaim */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4e1e2f..3a970604308f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1779 while (to_alloc-- > 0) { 1779 while (to_alloc-- > 0) {
1780 struct page *page; 1780 struct page *page;
1781 1781
1782 page = alloc_image_page(__GFP_HIGHMEM); 1782 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
1783 memory_bm_set_bit(bm, page_to_pfn(page)); 1783 memory_bm_set_bit(bm, page_to_pfn(page));
1784 } 1784 }
1785 return nr_highmem; 1785 return nr_highmem;
diff --git a/kernel/smp.c b/kernel/smp.c
index 07854477c164..d903c02223af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
669 cpumask_var_t cpus; 669 cpumask_var_t cpus;
670 int cpu, ret; 670 int cpu, ret;
671 671
672 might_sleep_if(gfp_flags & __GFP_WAIT); 672 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
673 673
674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { 674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
675 preempt_disable(); 675 preempt_disable();
diff --git a/lib/idr.c b/lib/idr.c
index 5335c43adf46..6098336df267 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask)
399 * allocation guarantee. Disallow usage from those contexts. 399 * allocation guarantee. Disallow usage from those contexts.
400 */ 400 */
401 WARN_ON_ONCE(in_interrupt()); 401 WARN_ON_ONCE(in_interrupt());
402 might_sleep_if(gfp_mask & __GFP_WAIT); 402 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
403 403
404 preempt_disable(); 404 preempt_disable();
405 405
@@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
453 struct idr_layer *pa[MAX_IDR_LEVEL + 1]; 453 struct idr_layer *pa[MAX_IDR_LEVEL + 1];
454 int id; 454 int id;
455 455
456 might_sleep_if(gfp_mask & __GFP_WAIT); 456 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
457 457
458 /* sanity checks */ 458 /* sanity checks */
459 if (WARN_ON_ONCE(start < 0)) 459 if (WARN_ON_ONCE(start < 0))
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f9ebe1c82060..fcf5d98574ce 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
188 * preloading in the interrupt anyway as all the allocations have to 188 * preloading in the interrupt anyway as all the allocations have to
189 * be atomic. So just do normal allocation when in interrupt. 189 * be atomic. So just do normal allocation when in interrupt.
190 */ 190 */
191 if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) { 191 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
192 struct radix_tree_preload *rtp; 192 struct radix_tree_preload *rtp;
193 193
194 /* 194 /*
@@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node)
249 * with preemption not disabled. 249 * with preemption not disabled.
250 * 250 *
251 * To make use of this facility, the radix tree must be initialised without 251 * To make use of this facility, the radix tree must be initialised without
252 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 252 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
253 */ 253 */
254static int __radix_tree_preload(gfp_t gfp_mask) 254static int __radix_tree_preload(gfp_t gfp_mask)
255{ 255{
@@ -286,12 +286,12 @@ out:
286 * with preemption not disabled. 286 * with preemption not disabled.
287 * 287 *
288 * To make use of this facility, the radix tree must be initialised without 288 * To make use of this facility, the radix tree must be initialised without
289 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 289 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
290 */ 290 */
291int radix_tree_preload(gfp_t gfp_mask) 291int radix_tree_preload(gfp_t gfp_mask)
292{ 292{
293 /* Warn on non-sensical use... */ 293 /* Warn on non-sensical use... */
294 WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT)); 294 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
295 return __radix_tree_preload(gfp_mask); 295 return __radix_tree_preload(gfp_mask);
296} 296}
297EXPORT_SYMBOL(radix_tree_preload); 297EXPORT_SYMBOL(radix_tree_preload);
@@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload);
303 */ 303 */
304int radix_tree_maybe_preload(gfp_t gfp_mask) 304int radix_tree_maybe_preload(gfp_t gfp_mask)
305{ 305{
306 if (gfp_mask & __GFP_WAIT) 306 if (gfpflags_allow_blocking(gfp_mask))
307 return __radix_tree_preload(gfp_mask); 307 return __radix_tree_preload(gfp_mask);
308 /* Preloading doesn't help anything with this gfp mask, skip it */ 308 /* Preloading doesn't help anything with this gfp mask, skip it */
309 preempt_disable(); 309 preempt_disable();
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 619984fc07ec..8ed2ffd963c5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
637{ 637{
638 struct bdi_writeback *wb; 638 struct bdi_writeback *wb;
639 639
640 might_sleep_if(gfp & __GFP_WAIT); 640 might_sleep_if(gfpflags_allow_blocking(gfp));
641 641
642 if (!memcg_css->parent) 642 if (!memcg_css->parent)
643 return &bdi->wb; 643 return &bdi->wb;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 312a716fa14c..57312b5d6e12 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
326 size_t offset; 326 size_t offset;
327 void *retval; 327 void *retval;
328 328
329 might_sleep_if(mem_flags & __GFP_WAIT); 329 might_sleep_if(gfpflags_allow_blocking(mem_flags));
330 330
331 spin_lock_irqsave(&pool->lock, flags); 331 spin_lock_irqsave(&pool->lock, flags);
332 list_for_each_entry(page, &pool->page_list, page_list) { 332 list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc502e590366..05374f09339c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2046,7 +2046,7 @@ retry:
2046 if (unlikely(task_in_memcg_oom(current))) 2046 if (unlikely(task_in_memcg_oom(current)))
2047 goto nomem; 2047 goto nomem;
2048 2048
2049 if (!(gfp_mask & __GFP_WAIT)) 2049 if (!gfpflags_allow_blocking(gfp_mask))
2050 goto nomem; 2050 goto nomem;
2051 2051
2052 mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); 2052 mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
4364{ 4364{
4365 int ret; 4365 int ret;
4366 4366
4367 /* Try a single bulk charge without reclaim first */ 4367 /* Try a single bulk charge without reclaim first, kswapd may wake */
4368 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); 4368 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
4369 if (!ret) { 4369 if (!ret) {
4370 mc.precharge += count; 4370 mc.precharge += count;
4371 return ret; 4371 return ret;
diff --git a/mm/mempool.c b/mm/mempool.c
index 4c533bc51d73..004d42b1dfaf 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
320 gfp_t gfp_temp; 320 gfp_t gfp_temp;
321 321
322 VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); 322 VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
323 might_sleep_if(gfp_mask & __GFP_WAIT); 323 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
324 324
325 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 325 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
326 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 326 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
327 gfp_mask |= __GFP_NOWARN; /* failures are OK */ 327 gfp_mask |= __GFP_NOWARN; /* failures are OK */
328 328
329 gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 329 gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
330 330
331repeat_alloc: 331repeat_alloc:
332 332
@@ -349,7 +349,7 @@ repeat_alloc:
349 } 349 }
350 350
351 /* 351 /*
352 * We use gfp mask w/o __GFP_WAIT or IO for the first round. If 352 * We use gfp mask w/o direct reclaim or IO for the first round. If
353 * alloc failed with that and @pool was empty, retry immediately. 353 * alloc failed with that and @pool was empty, retry immediately.
354 */ 354 */
355 if (gfp_temp != gfp_mask) { 355 if (gfp_temp != gfp_mask) {
@@ -358,8 +358,8 @@ repeat_alloc:
358 goto repeat_alloc; 358 goto repeat_alloc;
359 } 359 }
360 360
361 /* We must not sleep if !__GFP_WAIT */ 361 /* We must not sleep if !__GFP_DIRECT_RECLAIM */
362 if (!(gfp_mask & __GFP_WAIT)) { 362 if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
363 spin_unlock_irqrestore(&pool->lock, flags); 363 spin_unlock_irqrestore(&pool->lock, flags);
364 return NULL; 364 return NULL;
365 } 365 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 2834faba719a..e60379eb23f8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
1578 (GFP_HIGHUSER_MOVABLE | 1578 (GFP_HIGHUSER_MOVABLE |
1579 __GFP_THISNODE | __GFP_NOMEMALLOC | 1579 __GFP_THISNODE | __GFP_NOMEMALLOC |
1580 __GFP_NORETRY | __GFP_NOWARN) & 1580 __GFP_NORETRY | __GFP_NOWARN) &
1581 ~GFP_IOFS, 0); 1581 ~(__GFP_IO | __GFP_FS), 0);
1582 1582
1583 return newpage; 1583 return newpage;
1584} 1584}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 67390988881a..70461f3e3378 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,12 +169,12 @@ void pm_restrict_gfp_mask(void)
169 WARN_ON(!mutex_is_locked(&pm_mutex)); 169 WARN_ON(!mutex_is_locked(&pm_mutex));
170 WARN_ON(saved_gfp_mask); 170 WARN_ON(saved_gfp_mask);
171 saved_gfp_mask = gfp_allowed_mask; 171 saved_gfp_mask = gfp_allowed_mask;
172 gfp_allowed_mask &= ~GFP_IOFS; 172 gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
173} 173}
174 174
175bool pm_suspended_storage(void) 175bool pm_suspended_storage(void)
176{ 176{
177 if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS) 177 if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
178 return false; 178 return false;
179 return true; 179 return true;
180} 180}
@@ -2183,7 +2183,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
2183 return false; 2183 return false;
2184 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) 2184 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
2185 return false; 2185 return false;
2186 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT)) 2186 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
2187 return false; 2187 return false;
2188 2188
2189 return should_fail(&fail_page_alloc.attr, 1 << order); 2189 return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2685,7 +2685,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
2685 if (test_thread_flag(TIF_MEMDIE) || 2685 if (test_thread_flag(TIF_MEMDIE) ||
2686 (current->flags & (PF_MEMALLOC | PF_EXITING))) 2686 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2687 filter &= ~SHOW_MEM_FILTER_NODES; 2687 filter &= ~SHOW_MEM_FILTER_NODES;
2688 if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) 2688 if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
2689 filter &= ~SHOW_MEM_FILTER_NODES; 2689 filter &= ~SHOW_MEM_FILTER_NODES;
2690 2690
2691 if (fmt) { 2691 if (fmt) {
@@ -2945,7 +2945,6 @@ static inline int
2945gfp_to_alloc_flags(gfp_t gfp_mask) 2945gfp_to_alloc_flags(gfp_t gfp_mask)
2946{ 2946{
2947 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; 2947 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
2948 const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
2949 2948
2950 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ 2949 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
2951 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); 2950 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2954,11 +2953,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2954 * The caller may dip into page reserves a bit more if the caller 2953 * The caller may dip into page reserves a bit more if the caller
2955 * cannot run direct reclaim, or if the caller has realtime scheduling 2954 * cannot run direct reclaim, or if the caller has realtime scheduling
2956 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will 2955 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
2957 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). 2956 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
2958 */ 2957 */
2959 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); 2958 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
2960 2959
2961 if (atomic) { 2960 if (gfp_mask & __GFP_ATOMIC) {
2962 /* 2961 /*
2963 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even 2962 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
2964 * if it can't schedule. 2963 * if it can't schedule.
@@ -2995,11 +2994,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
2995 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); 2994 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
2996} 2995}
2997 2996
2997static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
2998{
2999 return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
3000}
3001
2998static inline struct page * 3002static inline struct page *
2999__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, 3003__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3000 struct alloc_context *ac) 3004 struct alloc_context *ac)
3001{ 3005{
3002 const gfp_t wait = gfp_mask & __GFP_WAIT; 3006 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
3003 struct page *page = NULL; 3007 struct page *page = NULL;
3004 int alloc_flags; 3008 int alloc_flags;
3005 unsigned long pages_reclaimed = 0; 3009 unsigned long pages_reclaimed = 0;
@@ -3020,15 +3024,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3020 } 3024 }
3021 3025
3022 /* 3026 /*
3027 * We also sanity check to catch abuse of atomic reserves being used by
3028 * callers that are not in atomic context.
3029 */
3030 if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
3031 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
3032 gfp_mask &= ~__GFP_ATOMIC;
3033
3034 /*
3023 * If this allocation cannot block and it is for a specific node, then 3035 * If this allocation cannot block and it is for a specific node, then
3024 * fail early. There's no need to wakeup kswapd or retry for a 3036 * fail early. There's no need to wakeup kswapd or retry for a
3025 * speculative node-specific allocation. 3037 * speculative node-specific allocation.
3026 */ 3038 */
3027 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) 3039 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
3028 goto nopage; 3040 goto nopage;
3029 3041
3030retry: 3042retry:
3031 if (!(gfp_mask & __GFP_NO_KSWAPD)) 3043 if (gfp_mask & __GFP_KSWAPD_RECLAIM)
3032 wake_all_kswapds(order, ac); 3044 wake_all_kswapds(order, ac);
3033 3045
3034 /* 3046 /*
@@ -3071,8 +3083,8 @@ retry:
3071 } 3083 }
3072 } 3084 }
3073 3085
3074 /* Atomic allocations - we can't balance anything */ 3086 /* Caller is not willing to reclaim, we can't balance anything */
3075 if (!wait) { 3087 if (!can_direct_reclaim) {
3076 /* 3088 /*
3077 * All existing users of the deprecated __GFP_NOFAIL are 3089 * All existing users of the deprecated __GFP_NOFAIL are
3078 * blockable, so warn of any new users that actually allow this 3090 * blockable, so warn of any new users that actually allow this
@@ -3102,7 +3114,7 @@ retry:
3102 goto got_pg; 3114 goto got_pg;
3103 3115
3104 /* Checks for THP-specific high-order allocations */ 3116 /* Checks for THP-specific high-order allocations */
3105 if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { 3117 if (is_thp_gfp_mask(gfp_mask)) {
3106 /* 3118 /*
3107 * If compaction is deferred for high-order allocations, it is 3119 * If compaction is deferred for high-order allocations, it is
3108 * because sync compaction recently failed. If this is the case 3120 * because sync compaction recently failed. If this is the case
@@ -3137,8 +3149,7 @@ retry:
3137 * fault, so use asynchronous memory compaction for THP unless it is 3149 * fault, so use asynchronous memory compaction for THP unless it is
3138 * khugepaged trying to collapse. 3150 * khugepaged trying to collapse.
3139 */ 3151 */
3140 if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE || 3152 if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
3141 (current->flags & PF_KTHREAD))
3142 migration_mode = MIGRATE_SYNC_LIGHT; 3153 migration_mode = MIGRATE_SYNC_LIGHT;
3143 3154
3144 /* Try direct reclaim and then allocating */ 3155 /* Try direct reclaim and then allocating */
@@ -3209,7 +3220,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3209 3220
3210 lockdep_trace_alloc(gfp_mask); 3221 lockdep_trace_alloc(gfp_mask);
3211 3222
3212 might_sleep_if(gfp_mask & __GFP_WAIT); 3223 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
3213 3224
3214 if (should_fail_alloc_page(gfp_mask, order)) 3225 if (should_fail_alloc_page(gfp_mask, order))
3215 return NULL; 3226 return NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 272e809404d5..a9ef77d19a9a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1031} 1031}
1032 1032
1033/* 1033/*
1034 * Construct gfp mask to allocate from a specific node but do not invoke reclaim 1034 * Construct gfp mask to allocate from a specific node but do not direct reclaim
1035 * or warn about failures. 1035 * or warn about failures. kswapd may still wake to reclaim in the background.
1036 */ 1036 */
1037static inline gfp_t gfp_exact_node(gfp_t flags) 1037static inline gfp_t gfp_exact_node(gfp_t flags)
1038{ 1038{
1039 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; 1039 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
1040} 1040}
1041#endif 1041#endif
1042 1042
@@ -2633,7 +2633,7 @@ static int cache_grow(struct kmem_cache *cachep,
2633 2633
2634 offset *= cachep->colour_off; 2634 offset *= cachep->colour_off;
2635 2635
2636 if (local_flags & __GFP_WAIT) 2636 if (gfpflags_allow_blocking(local_flags))
2637 local_irq_enable(); 2637 local_irq_enable();
2638 2638
2639 /* 2639 /*
@@ -2663,7 +2663,7 @@ static int cache_grow(struct kmem_cache *cachep,
2663 2663
2664 cache_init_objs(cachep, page); 2664 cache_init_objs(cachep, page);
2665 2665
2666 if (local_flags & __GFP_WAIT) 2666 if (gfpflags_allow_blocking(local_flags))
2667 local_irq_disable(); 2667 local_irq_disable();
2668 check_irq_off(); 2668 check_irq_off();
2669 spin_lock(&n->list_lock); 2669 spin_lock(&n->list_lock);
@@ -2677,7 +2677,7 @@ static int cache_grow(struct kmem_cache *cachep,
2677opps1: 2677opps1:
2678 kmem_freepages(cachep, page); 2678 kmem_freepages(cachep, page);
2679failed: 2679failed:
2680 if (local_flags & __GFP_WAIT) 2680 if (gfpflags_allow_blocking(local_flags))
2681 local_irq_disable(); 2681 local_irq_disable();
2682 return 0; 2682 return 0;
2683} 2683}
@@ -2869,7 +2869,7 @@ force_grow:
2869static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 2869static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2870 gfp_t flags) 2870 gfp_t flags)
2871{ 2871{
2872 might_sleep_if(flags & __GFP_WAIT); 2872 might_sleep_if(gfpflags_allow_blocking(flags));
2873#if DEBUG 2873#if DEBUG
2874 kmem_flagcheck(cachep, flags); 2874 kmem_flagcheck(cachep, flags);
2875#endif 2875#endif
@@ -3057,11 +3057,11 @@ retry:
3057 */ 3057 */
3058 struct page *page; 3058 struct page *page;
3059 3059
3060 if (local_flags & __GFP_WAIT) 3060 if (gfpflags_allow_blocking(local_flags))
3061 local_irq_enable(); 3061 local_irq_enable();
3062 kmem_flagcheck(cache, flags); 3062 kmem_flagcheck(cache, flags);
3063 page = kmem_getpages(cache, local_flags, numa_mem_id()); 3063 page = kmem_getpages(cache, local_flags, numa_mem_id());
3064 if (local_flags & __GFP_WAIT) 3064 if (gfpflags_allow_blocking(local_flags))
3065 local_irq_disable(); 3065 local_irq_disable();
3066 if (page) { 3066 if (page) {
3067 /* 3067 /*
diff --git a/mm/slub.c b/mm/slub.c
index 75a5fa92ac2a..97695622a858 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1265,7 +1265,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
1265{ 1265{
1266 flags &= gfp_allowed_mask; 1266 flags &= gfp_allowed_mask;
1267 lockdep_trace_alloc(flags); 1267 lockdep_trace_alloc(flags);
1268 might_sleep_if(flags & __GFP_WAIT); 1268 might_sleep_if(gfpflags_allow_blocking(flags));
1269 1269
1270 if (should_failslab(s->object_size, flags, s->flags)) 1270 if (should_failslab(s->object_size, flags, s->flags))
1271 return NULL; 1271 return NULL;
@@ -1353,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1353 1353
1354 flags &= gfp_allowed_mask; 1354 flags &= gfp_allowed_mask;
1355 1355
1356 if (flags & __GFP_WAIT) 1356 if (gfpflags_allow_blocking(flags))
1357 local_irq_enable(); 1357 local_irq_enable();
1358 1358
1359 flags |= s->allocflags; 1359 flags |= s->allocflags;
@@ -1363,8 +1363,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1363 * so we fall-back to the minimum order allocation. 1363 * so we fall-back to the minimum order allocation.
1364 */ 1364 */
1365 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; 1365 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1366 if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min)) 1366 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1367 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT; 1367 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
1368 1368
1369 page = alloc_slab_page(s, alloc_gfp, node, oo); 1369 page = alloc_slab_page(s, alloc_gfp, node, oo);
1370 if (unlikely(!page)) { 1370 if (unlikely(!page)) {
@@ -1424,7 +1424,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1424 page->frozen = 1; 1424 page->frozen = 1;
1425 1425
1426out: 1426out:
1427 if (flags & __GFP_WAIT) 1427 if (gfpflags_allow_blocking(flags))
1428 local_irq_disable(); 1428 local_irq_disable();
1429 if (!page) 1429 if (!page)
1430 return NULL; 1430 return NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9db9ef5e8481..7ee94dc10000 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1617,7 +1617,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1617 goto fail; 1617 goto fail;
1618 } 1618 }
1619 area->pages[i] = page; 1619 area->pages[i] = page;
1620 if (gfp_mask & __GFP_WAIT) 1620 if (gfpflags_allow_blocking(gfp_mask))
1621 cond_resched(); 1621 cond_resched();
1622 } 1622 }
1623 1623
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0cd7eed4e38..2aec4241b42a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file,
1476 * won't get blocked by normal direct-reclaimers, forming a circular 1476 * won't get blocked by normal direct-reclaimers, forming a circular
1477 * deadlock. 1477 * deadlock.
1478 */ 1478 */
1479 if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) 1479 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
1480 inactive >>= 3; 1480 inactive >>= 3;
1481 1481
1482 return isolated > inactive; 1482 return isolated > inactive;
@@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3791 /* 3791 /*
3792 * Do not scan if the allocation should not be delayed. 3792 * Do not scan if the allocation should not be delayed.
3793 */ 3793 */
3794 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) 3794 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
3795 return ZONE_RECLAIM_NOSCAN; 3795 return ZONE_RECLAIM_NOSCAN;
3796 3796
3797 /* 3797 /*
diff --git a/mm/zswap.c b/mm/zswap.c
index 4043df7c672f..e54166d3732e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -571,7 +571,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
571static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 571static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
572{ 572{
573 struct zswap_pool *pool; 573 struct zswap_pool *pool;
574 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; 574 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
575 575
576 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 576 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
577 if (!pool) { 577 if (!pool) {
@@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1011 /* store */ 1011 /* store */
1012 len = dlen + sizeof(struct zswap_header); 1012 len = dlen + sizeof(struct zswap_header);
1013 ret = zpool_malloc(entry->pool->zpool, len, 1013 ret = zpool_malloc(entry->pool->zpool, len,
1014 __GFP_NORETRY | __GFP_NOWARN, &handle); 1014 __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1015 &handle);
1015 if (ret == -ENOSPC) { 1016 if (ret == -ENOSPC) {
1016 zswap_reject_compress_poor++; 1017 zswap_reject_compress_poor++;
1017 goto put_dstmem; 1018 goto put_dstmem;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
414 len += NET_SKB_PAD; 414 len += NET_SKB_PAD;
415 415
416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
417 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 417 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
419 if (!skb) 419 if (!skb)
420 goto skb_fail; 420 goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
481 len += NET_SKB_PAD + NET_IP_ALIGN; 481 len += NET_SKB_PAD + NET_IP_ALIGN;
482 482
483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
484 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 484 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
486 if (!skb) 486 if (!skb)
487 goto skb_fail; 487 goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4452 return NULL; 4452 return NULL;
4453 4453
4454 gfp_head = gfp_mask; 4454 gfp_head = gfp_mask;
4455 if (gfp_head & __GFP_WAIT) 4455 if (gfp_head & __GFP_DIRECT_RECLAIM)
4456 gfp_head |= __GFP_REPEAT; 4456 gfp_head |= __GFP_REPEAT;
4457 4457
4458 *errcode = -ENOBUFS; 4458 *errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4467 4467
4468 while (order) { 4468 while (order) {
4469 if (npages >= 1 << order) { 4469 if (npages >= 1 << order) {
4470 page = alloc_pages((gfp_mask & ~__GFP_WAIT) | 4470 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
4471 __GFP_COMP | 4471 __GFP_COMP |
4472 __GFP_NOWARN | 4472 __GFP_NOWARN |
4473 __GFP_NORETRY, 4473 __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1944,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1944 1944
1945 pfrag->offset = 0; 1945 pfrag->offset = 0;
1946 if (SKB_FRAG_PAGE_ORDER) { 1946 if (SKB_FRAG_PAGE_ORDER) {
1947 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | 1947 /* Avoid direct reclaim but allow kswapd to wake */
1948 __GFP_NOWARN | __GFP_NORETRY, 1948 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
1949 __GFP_COMP | __GFP_NOWARN |
1950 __GFP_NORETRY,
1949 SKB_FRAG_PAGE_ORDER); 1951 SKB_FRAG_PAGE_ORDER);
1950 if (likely(pfrag->page)) { 1952 if (likely(pfrag->page)) {
1951 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 1953 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
2116 consume_skb(info.skb2); 2116 consume_skb(info.skb2);
2117 2117
2118 if (info.delivered) { 2118 if (info.delivered) {
2119 if (info.congested && (allocation & __GFP_WAIT)) 2119 if (info.congested && gfpflags_allow_blocking(allocation))
2120 yield(); 2120 yield();
2121 return 0; 2121 return 0;
2122 } 2122 }
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
305 gfp_t slab_mask = GFP_NOWAIT; 305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT; 306 gfp_t page_mask = GFP_NOWAIT;
307 307
308 if (gfp & __GFP_WAIT) { 308 if (gfp & __GFP_DIRECT_RECLAIM) {
309 slab_mask = GFP_KERNEL; 309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER; 310 page_mask = GFP_HIGHUSER;
311 } 311 }
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
379 struct ib_recv_wr *failed_wr; 379 struct ib_recv_wr *failed_wr;
380 unsigned int posted = 0; 380 unsigned int posted = 0;
381 int ret = 0; 381 int ret = 0;
382 bool can_wait = !!(gfp & __GFP_WAIT); 382 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
383 u32 pos; 383 u32 pos;
384 384
385 /* the goal here is to just make sure that someone, somewhere 385 /* the goal here is to just make sure that someone, somewhere
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
500 if (bundle->num_conns >= 20) { 500 if (bundle->num_conns >= 20) {
501 _debug("too many conns"); 501 _debug("too many conns");
502 502
503 if (!(gfp & __GFP_WAIT)) { 503 if (!gfpflags_allow_blocking(gfp)) {
504 _leave(" = -EAGAIN"); 504 _leave(" = -EAGAIN");
505 return -EAGAIN; 505 return -EAGAIN;
506 } 506 }
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
1590/* Set an association id for a given association */ 1590/* Set an association id for a given association */
1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) 1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
1592{ 1592{
1593 bool preload = !!(gfp & __GFP_WAIT); 1593 bool preload = gfpflags_allow_blocking(gfp);
1594 int ret; 1594 int ret;
1595 1595
1596 /* If the id is already assigned, keep it. */ 1596 /* If the id is already assigned, keep it. */