211 files changed, 2298 insertions, 1632 deletions
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 2216eb187c21..b784c270105f 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -23,6 +23,10 @@ Example:
 Reminder: sizeof() result is of type size_t.
+The kernel's printf does not support %n. For obvious reasons, floating
+point formats (%e, %f, %g, %a) are also not recognized. Use of any
+unsupported specifier or length qualifier results in a WARN and early
+return from vsnprintf.
 Raw pointer value SHOULD be printed with %p. The kernel supports
 the following extended format specifiers for pointer types:
@@ -119,6 +123,7 @@ Raw buffer as an escaped string:
        If field width is omitted the 1 byte only will be escaped.
 Raw buffer as a hex string:
        %*ph    00 01 02  ...  3f
        %*phC   00:01:02: ... :3f
        %*phD   00-01-02- ... -3f
@@ -234,6 +239,7 @@ UUID/GUID addresses:
        Passed by reference.
 dentry names:
        %pd{,2,3,4}
        %pD{,2,3,4}
@@ -256,6 +262,8 @@ struct va_format:
                va_list *va;
        };
+        Implements a "recursive vsnprintf".
        Do not use this feature without some mechanism to verify the
        correctness of the format string and va_list arguments.
@@ -284,6 +292,27 @@ bitmap and its derivatives such as cpumask and nodemask:
        Passed by reference.
+Network device features:
+        %pNF    0x000000000000c000
+        For printing netdev_features_t.
+        Passed by reference.
+Command from struct task_struct
+        %pT     ls
+        For printing executable name excluding path from struct
+        task_struct.
+        Passed by reference.
+If you add other %p extensions, please extend lib/test_printf.c with
+one or more test cases, if at all feasible.
 Thank you for your cooperation and attention.
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index c46e68cf9344..964595481af6 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -1,12 +1,14 @@
 Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
-Memory balancing is needed for non __GFP_WAIT as well as for non
+Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
-__GFP_IO allocations.
+well as for non __GFP_IO allocations.
-There are two reasons to be requesting non __GFP_WAIT allocations:
+The first reason why a caller may avoid reclaim is that the caller can not
-the caller can not sleep (typically intr context), or does not want
+sleep due to holding a spinlock or is in interrupt context. The second may
-to incur cost overheads of page stealing and possible swap io for
+be that the caller is willing to fail the allocation without incurring the
-whatever reasons.
+overhead of page reclaim. This may happen for opportunistic high-order
+allocation requests that have order-0 fallback options. In such cases,
+the caller may also wish to avoid waking kswapd.
 __GFP_IO allocation requests are made to prevent file system deadlocks.
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index 6dea4fd5c961..62842a857dab 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -54,8 +54,8 @@ everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
 which must be called on PTE table allocation / freeing.
 Make sure the architecture doesn't use slab allocator for page table
-allocation: slab uses page->slab_cache and page->first_page for its pages.
+allocation: slab uses page->slab_cache for its pages.
-These fields share storage with page->ptl.
+This field shares storage with page->ptl.
 PMD split lock only makes sense if you have more than two page table
 levels.
diff --git a/MAINTAINERS b/MAINTAINERS
index 4c5446a6a4a2..7af7f4a01f0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4209,7 +4209,10 @@ L:	linux-kernel@vger.kernel.org
 T:      git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git
 S:      Maintained
 F:      drivers/extcon/
+F:      include/linux/extcon/
+F:      include/linux/extcon.h
 F:      Documentation/extcon/
+F:      Documentation/devicetree/bindings/extcon/
 EXYNOS DP DRIVER
 M:      Jingoo Han <jingoohan1@gmail.com>
@@ -7490,6 +7493,7 @@ S:	Supported
 F:      Documentation/filesystems/nilfs2.txt
 F:      fs/nilfs2/
 F:      include/linux/nilfs2_fs.h
+F:      include/trace/events/nilfs2.h
 NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER
 M:      YOKOTA Hiroshi <yokota@netlab.is.tsukuba.ac.jp>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ad4eb2d26e16..e62400e5fb99 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
        if (nommu())
                addr = __alloc_simple_buffer(dev, size, gfp, &page);
-        else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT))
+        else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
                addr = __alloc_from_contiguous(dev, size, prot, &page,
                                               caller, want_vaddr);
        else if (is_coherent)
                addr = __alloc_simple_buffer(dev, size, gfp, &page);
-        else if (!(gfp & __GFP_WAIT))
+        else if (!gfpflags_allow_blocking(gfp))
                addr = __alloc_from_pool(size, &page);
        else
                addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
        *handle = DMA_ERROR_CODE;
        size = PAGE_ALIGN(size);
-        if (!(gfp & __GFP_WAIT))
+        if (!gfpflags_allow_blocking(gfp))
                return __iommu_alloc_atomic(dev, size, handle);
        /*
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7c34f7126b04..c5f9a9e3d1f3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -25,7 +25,7 @@
 unsigned long xen_get_swiotlb_free_pages(unsigned int order)
 {
        struct memblock_region *reg;
-        gfp_t flags = __GFP_NOWARN;
+        gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
        for_each_memblock(memory, reg) {
                if (reg->base < (phys_addr_t)0xffffffff) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6320361d8d4c..bb4bf6a06ad6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
        if (IS_ENABLED(CONFIG_ZONE_DMA) &&
            dev->coherent_dma_mask <= DMA_BIT_MASK(32))
                flags |= GFP_DMA;
-        if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
+        if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
                struct page *page;
                void *addr;
@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
        size = PAGE_ALIGN(size);
-        if (!coherent && !(flags & __GFP_WAIT)) {
+        if (!coherent && !gfpflags_allow_blocking(flags)) {
                struct page *page = NULL;
                void *addr = __alloc_from_pool(size, &page, flags);
diff --git a/arch/sh/kernel/cpu/sh5/unwind.c b/arch/sh/kernel/cpu/sh5/unwind.c
index 10aed41757fc..3a4fed406fc6 100644
--- a/arch/sh/kernel/cpu/sh5/unwind.c
+++ b/arch/sh/kernel/cpu/sh5/unwind.c
@@ -159,7 +159,7 @@ static int lookup_prev_stack_frame(unsigned long fp, unsigned long pc,
                        /* Sign extend */
                        regcache[dest] =
-                                ((((s64)(u64)op >> 10) & 0xffff) << 54) >> 54;
+                                sign_extend64((((u64)op >> 10) & 0xffff), 9);
                        break;
                case (0xd0 >> 2): /* addi */
                case (0xd4 >> 2): /* addi.l */
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 112ea11c030d..d208c27ccc67 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -101,7 +101,7 @@ static int generate_and_check_address(struct pt_regs *regs,
        if (displacement_not_indexed) {
                __s64 displacement;
                displacement = (opcode >> 10) & 0x3ff;
-                displacement = ((displacement << 54) >> 54); /* sign extend */
+                displacement = sign_extend64(displacement, 9);
                addr = (__u64)((__s64)base_address + (displacement << width_shift));
        } else {
                __u64 offset;
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
index f32ac13934f2..ec863b9a9f78 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -163,10 +163,9 @@ again:
                goto again;
        delta = now - prev;
-        if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
+        if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
-                delta <<= 32;
+                delta = sign_extend64(delta, 31);
-                delta >>= 32; /* sign extend */
-        }
        local64_add(now - prev, &event->count);
 }
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cd99433b8ba1..6ba014c61d62 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 again:
        page = NULL;
        /* CMA can be used only in the context which permits sleeping */
-        if (flag & __GFP_WAIT) {
+        if (gfpflags_allow_blocking(flag)) {
                page = dma_alloc_from_contiguous(dev, count, get_order(size));
                if (page && page_to_phys(page) + size > dma_mask) {
                        dma_release_from_contiguous(dev, page, count);
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index f3dfe0d921c2..44c6764d9146 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -169,7 +169,6 @@ CONFIG_FLATMEM_MANUAL=y
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..4f184d938942 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -211,7 +211,7 @@ fallback:
                bvl = mempool_alloc(pool, gfp_mask);
        } else {
                struct biovec_slab *bvs = bvec_slabs + *idx;
-                gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+                gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
                /*
                 * Make this allocation restricted and don't dump info on
@@ -221,11 +221,11 @@ fallback:
                __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
                /*
-                 * Try a slab allocation. If this fails and __GFP_WAIT
+                 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
                 * is set, retry with the 1-entry mempool
                 */
                bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
-                if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+                if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
                        *idx = BIOVEC_MAX_IDX;
                        goto fallback;
                }
@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 *   If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
 *   backed by the @bs's mempool.
 *
- *   When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
+ *   When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
- *   able to allocate a bio. This is due to the mempool guarantees. To make this
+ *   always be able to allocate a bio. This is due to the mempool guarantees.
- *   work, callers must never allocate more than 1 bio at a time from this pool.
+ *   To make this work, callers must never allocate more than 1 bio at a time
- *   Callers that need to allocate more than 1 bio must always submit the
+ *   from this pool. Callers that need to allocate more than 1 bio must always
- *   previously allocated bio for IO before attempting to allocate a new one.
+ *   submit the previously allocated bio for IO before attempting to allocate
- *   Failure to do so can cause deadlocks under memory pressure.
+ *   a new one. Failure to do so can cause deadlocks under memory pressure.
 *
 *   Note that when running under generic_make_request() (i.e. any block
 *   driver), bios are not submitted until after you return - see the code in
@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                 * We solve this, and guarantee forward progress, with a rescuer
                 * workqueue per bio_set. If we go to allocate and there are
                 * bios on current->bio_list, we first try the allocation
-                 * without __GFP_WAIT; if that fails, we punt those bios we
+                 * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
-                 * would be blocking to the rescuer workqueue before we retry
+                 * bios we would be blocking to the rescuer workqueue before
-                 * with the original gfp_flags.
+                 * we retry with the original gfp_flags.
                 */
                if (current->bio_list && !bio_list_empty(current->bio_list))
-                        gfp_mask &= ~__GFP_WAIT;
+                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
                p = mempool_alloc(bs->bio_pool, gfp_mask);
                if (!p && gfp_mask != saved_gfp) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..590cca21c24a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
                if (percpu_ref_tryget_live(&q->q_usage_counter))
                        return 0;
-                if (!(gfp & __GFP_WAIT))
+                if (!gfpflags_allow_blocking(gfp))
                        return -EBUSY;
                ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -1206,8 +1206,8 @@ rq_starved:
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
 *
- * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
+ * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
- * function keeps retrying under memory pressure and fails iff @q is dead.
+ * this function keeps retrying under memory pressure and fails iff @q is dead.
 *
 * Must be called with @q->queue_lock held and,
 * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1227,7 +1227,7 @@ retry:
        if (!IS_ERR(rq))
                return rq;
-        if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
+        if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
                blk_put_rl(rl);
                return rq;
        }
@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
 * BUG.
 *
 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
+ * given to how you allocate bios. In particular, you cannot use
- * anything but the first bio in the chain. Otherwise you risk waiting for IO
+ * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
- * completion of a bio that hasn't been submitted yet, thus resulting in a
+ * you risk waiting for IO completion of a bio that hasn't been submitted yet,
- * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
+ * thus resulting in a deadlock. Alternatively bios should be allocated using
- * of bio_alloc(), as that avoids the mempool deadlock.
+ * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
 * If possible a big IO should be split into smaller parts when allocation
 * fails. Partial allocation should not be an error, or you risk a live-lock.
 */
@@ -2038,7 +2038,7 @@ void generic_make_request(struct bio *bio)
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-                if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
+                if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
                        q->make_request_fn(q, bio);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1a27f45ec776..381cb50a673c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
 {
        struct io_context *ioc;
-        might_sleep_if(gfp_flags & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(gfp_flags));
        do {
                task_lock(task);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60ac684c8b8c..a07ca3488d96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
        if (tag != -1)
                return tag;
-        if (!(data->gfp & __GFP_WAIT))
+        if (!gfpflags_allow_blocking(data->gfp))
                return -1;
        bs = bt_wait_ptr(bt, hctx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..694f8703f83c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
        ctx = blk_mq_get_ctx(q);
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
-        blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
+        blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
                        reserved, ctx, hctx);
        rq = __blk_mq_alloc_request(&alloc_data, rw);
-        if (!rq && (gfp & __GFP_WAIT)) {
+        if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
                __blk_mq_run_hw_queue(hctx);
                blk_mq_put_ctx(ctx);
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
                ctx = blk_mq_get_ctx(q);
                hctx = q->mq_ops->map_queue(q, ctx->cpu);
                blk_mq_set_alloc_data(&alloc_data, q,
-                                __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
+                                __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
                rq = __blk_mq_alloc_request(&alloc_data, rw);
                ctx = alloc_data.ctx;
                hctx = alloc_data.hctx;
diff --git a/block/ioprio.c b/block/ioprio.c
index 31666c92b46a..cc7800e9eb44 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -123,7 +123,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
                                break;
                        do_each_thread(g, p) {
-                                if (!uid_eq(task_uid(p), uid))
+                                if (!uid_eq(task_uid(p), uid) ||
+                                    !task_pid_vnr(p))
                                        continue;
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
@@ -220,7 +221,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
                                break;
                        do_each_thread(g, p) {
-                                if (!uid_eq(task_uid(p), user->uid))
+                                if (!uid_eq(task_uid(p), user->uid) ||
+                                    !task_pid_vnr(p))
                                        continue;
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index dda653ce7b24..0774799942e0 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -444,7 +444,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
        }
-        rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
+        rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM);
        if (IS_ERR(rq)) {
                err = PTR_ERR(rq);
                goto error_free_buffer;
@@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
                break;
        }
-        if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) {
+        if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
                err = DRIVER_ERROR << 24;
                goto error;
        }
@@ -536,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
        struct request *rq;
        int err;
-        rq = blk_get_request(q, WRITE, __GFP_WAIT);
+        rq = blk_get_request(q, WRITE, __GFP_RECLAIM);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
        blk_rq_set_block_pc(rq);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index d3d73d114a46..9462d2752850 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1007,7 +1007,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
        bm_set_page_unchanged(b->bm_pages[page_nr]);
        if (ctx->flags & BM_AIO_COPY_PAGES) {
-                page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
+                page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
                copy_highpage(page, b->bm_pages[page_nr]);
                bm_store_page_idx(page, page_nr);
        } else
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c097909c589c..b4b5680ac6ad 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
        }
        if (has_payload && data_size) {
-                page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
+                page = drbd_alloc_pages(peer_device, nr_pages,
+                                        gfpflags_allow_blocking(gfp_mask));
                if (!page)
                        goto fail;
        }
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f504232c1ee7..a28a562f7b7f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
        struct request *rq;
-        rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true);
+        rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
        return blk_mq_rq_to_pdu(rq);
 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1b87623381e2..93b3f99b6865 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -444,9 +444,7 @@ static int nbd_thread_recv(struct nbd_device *nbd)
        spin_unlock_irqrestore(&nbd->tasks_lock, flags);
        if (signal_pending(current)) {
-                siginfo_t info;
+                ret = kernel_dequeue_signal(NULL);
-                ret = dequeue_signal_lock(current, &current->blocked, &info);
                dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
                         task_pid_nr(current), current->comm, ret);
                mutex_lock(&nbd->tx_lock);
@@ -560,11 +558,8 @@ static int nbd_thread_send(void *data)
                                         !list_empty(&nbd->waiting_queue));
                if (signal_pending(current)) {
-                        siginfo_t info;
+                        int ret = kernel_dequeue_signal(NULL);
-                        int ret;
-                        ret = dequeue_signal_lock(current, &current->blocked,
-                                                  &info);
                        dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
                                 task_pid_nr(current), current->comm, ret);
                        mutex_lock(&nbd->tx_lock);
@@ -592,10 +587,8 @@ static int nbd_thread_send(void *data)
        spin_unlock_irqrestore(&nbd->tasks_lock, flags);
        /* Clear maybe pending signals */
-        if (signal_pending(current)) {
+        if (signal_pending(current))
-                siginfo_t info;
+                kernel_dequeue_signal(NULL);
-                dequeue_signal_lock(current, &current->blocked, &info);
-        }
        return 0;
 }
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index e22942596207..1b709a4e3b5e 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
                        goto err_out;
                tmp->bi_bdev = NULL;
-                gfpmask &= ~__GFP_WAIT;
+                gfpmask &= ~__GFP_DIRECT_RECLAIM;
                tmp->bi_next = NULL;
                if (!new_chain)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index b9242d78283d..562b5a4ca7b7 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -723,7 +723,7 @@ static int pd_special_command(struct pd_unit *disk,
        struct request *rq;
        int err = 0;
-        rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index cd813f9110bf..2f477d45d6cf 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,14 +704,14 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
        int ret = 0;
        rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
-                             WRITE : READ, __GFP_WAIT);
+                             WRITE : READ, __GFP_RECLAIM);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
        blk_rq_set_block_pc(rq);
        if (cgc->buflen) {
                ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
-                                      __GFP_WAIT);
+                                      __GFP_RECLAIM);
                if (ret)
                        goto out;
        }
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9fa15bb9d118..81a557c33a1f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -106,7 +106,7 @@ static void zram_set_obj_size(struct zram_meta *meta,
        meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
 }
-static inline int is_partial_io(struct bio_vec *bvec)
+static inline bool is_partial_io(struct bio_vec *bvec)
 {
        return bvec->bv_len != PAGE_SIZE;
 }
@@ -114,25 +114,25 @@ static inline int is_partial_io(struct bio_vec *bvec)
 /*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
-static inline int valid_io_request(struct zram *zram,
+static inline bool valid_io_request(struct zram *zram,
                sector_t start, unsigned int size)
 {
        u64 end, bound;
        /* unaligned request */
        if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
-                return 0;
+                return false;
        if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
-                return 0;
+                return false;
        end = start + (size >> SECTOR_SHIFT);
        bound = zram->disksize >> SECTOR_SHIFT;
        /* out of range range */
        if (unlikely(start >= bound || end > bound || start > end))
-                return 0;
+                return false;
        /* I/O request is valid */
-        return 1;
+        return true;
 }
 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
@@ -157,7 +157,7 @@ static inline void update_used_max(struct zram *zram,
        } while (old_max != cur_max);
 }
-static int page_zero_filled(void *ptr)
+static bool page_zero_filled(void *ptr)
 {
        unsigned int pos;
        unsigned long *page;
@@ -166,10 +166,10 @@ static int page_zero_filled(void *ptr)
        for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
                if (page[pos])
-                        return 0;
+                        return false;
        }
-        return 1;
+        return true;
 }
 static void handle_zero_page(struct bio_vec *bvec)
@@ -365,6 +365,9 @@ static ssize_t comp_algorithm_store(struct device *dev,
        struct zram *zram = dev_to_zram(dev);
        size_t sz;
+        if (!zcomp_available_algorithm(buf))
+                return -EINVAL;
        down_write(&zram->init_lock);
        if (init_done(zram)) {
                up_write(&zram->init_lock);
@@ -378,9 +381,6 @@ static ssize_t comp_algorithm_store(struct device *dev,
        if (sz > 0 && zram->compressor[sz - 1] == '\n')
                zram->compressor[sz - 1] = 0x00;
-        if (!zcomp_available_algorithm(zram->compressor))
-                len = -EINVAL;
        up_write(&zram->init_lock);
        return len;
 }
@@ -726,14 +726,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
        }
        alloced_pages = zs_get_total_pages(meta->mem_pool);
+        update_used_max(zram, alloced_pages);
        if (zram->limit_pages && alloced_pages > zram->limit_pages) {
                zs_free(meta->mem_pool, handle);
                ret = -ENOMEM;
                goto out;
        }
-        update_used_max(zram, alloced_pages);
        cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
        if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 30f522848c73..d7373ca69c99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
        if (group)
                return netlink_broadcast(dev->nls, skb, portid, group,
                                         gfp_mask);
-        return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
+        return netlink_unicast(dev->nls, skb, portid,
+                        !gfpflags_allow_blocking(gfp_mask));
 }
 EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3973a7c441..36a7c2d89a01 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
 static int add_client_resource(struct client *client,
                               struct client_resource *resource, gfp_t gfp_mask)
 {
-        bool preload = !!(gfp_mask & __GFP_WAIT);
+        bool preload = gfpflags_allow_blocking(gfp_mask);
        unsigned long flags;
        int ret;
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 3c2d4abd71c5..1d47d2e9487c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -491,7 +491,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
                 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping)
                 * so shmem can relocate pages during swapin if required.
                 */
-                BUG_ON((mapping_gfp_mask(mapping) & __GFP_DMA32) &&
+                BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
                                (page_to_pfn(p) >= 0x00100000UL));
        }
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 4924d381b664..daa2ff12101b 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -38,8 +38,6 @@
 #include "drm_legacy.h"
 #include "drm_internal.h"
-static int drm_notifier(void *priv);
 static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
 /**
@@ -118,14 +116,8 @@ int drm_legacy_lock(struct drm_device *dev, void *data,
         * really probably not the correct answer but lets us debug xkb
         * xserver for now */
        if (!file_priv->is_master) {
-                sigemptyset(&dev->sigmask);
-                sigaddset(&dev->sigmask, SIGSTOP);
-                sigaddset(&dev->sigmask, SIGTSTP);
-                sigaddset(&dev->sigmask, SIGTTIN);
-                sigaddset(&dev->sigmask, SIGTTOU);
                dev->sigdata.context = lock->context;
                dev->sigdata.lock = master->lock.hw_lock;
-                block_all_signals(drm_notifier, dev, &dev->sigmask);
        }
        if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
@@ -169,7 +161,6 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_
                /* FIXME: Should really bail out here. */
        }
-        unblock_all_signals();
        return 0;
 }
@@ -288,38 +279,6 @@ int drm_legacy_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 }
 /**
- * If we get here, it means that the process has called DRM_IOCTL_LOCK
- * without calling DRM_IOCTL_UNLOCK.
- *
- * If the lock is not held, then let the signal proceed as usual.  If the lock
- * is held, then set the contended flag and keep the signal blocked.
- *
- * \param priv pointer to a drm_device structure.
- * \return one if the signal should be delivered normally, or zero if the
- * signal should be blocked.
- */
-static int drm_notifier(void *priv)
-{
-        struct drm_device *dev = priv;
-        struct drm_hw_lock *lock = dev->sigdata.lock;
-        unsigned int old, new, prev;
-        /* Allow signal delivery if lock isn't held */
-        if (!lock || !_DRM_LOCK_IS_HELD(lock->lock)
-            || _DRM_LOCKING_CONTEXT(lock->lock) != dev->sigdata.context)
-                return 1;
-        /* Otherwise, set flag to force call to
-           drmUnlock */
-        do {
-                old = lock->lock;
-                new = old | _DRM_LOCK_CONT;
-                prev = cmpxchg(&lock->lock, old, new);
-        } while (prev != old);
-        return 0;
-}
-/**
 * This function returns immediately and takes the hw lock
 * with the kernel context if it is free, otherwise it gets the highest priority when and if
 * it is eventually released.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d631a946481..399aab265db3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2214,9 +2214,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
         * Fail silently without starting the shrinker
         */
        mapping = file_inode(obj->base.filp)->i_mapping;
-        gfp = mapping_gfp_mask(mapping);
+        gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-        gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
+        gfp |= __GFP_NORETRY | __GFP_NOWARN;
-        gfp &= ~(__GFP_IO | __GFP_WAIT);
        sg = st->sgl;
        st->nents = 0;
        for (i = 0; i < page_count; i++) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 1362ad80a76c..05352f490d60 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
        struct request *rq;
        int error;
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
        rq->special = (char *)pc;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 64a6b827b3dd..ef907fd5ba98 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -441,7 +441,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
                struct request *rq;
                int error;
-                rq = blk_get_request(drive->queue, write, __GFP_WAIT);
+                rq = blk_get_request(drive->queue, write, __GFP_RECLAIM);
                memcpy(rq->cmd, cmd, BLK_MAX_CDB);
                rq->cmd_type = REQ_TYPE_ATA_PC;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 066e39036518..474173eb31bb 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -303,7 +303,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
        struct request *rq;
        int ret;
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
        rq->cmd_flags = REQ_QUIET;
        ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index b05a74d78ef5..0dd43b4fcec6 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -165,7 +165,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
        if (!(setting->flags & DS_SYNC))
                return setting->set(drive, arg);
-        rq = blk_get_request(q, READ, __GFP_WAIT);
+        rq = blk_get_request(q, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
        rq->cmd_len = 5;
        rq->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 56b9708894a5..37a8a907febe 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -477,7 +477,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
        if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
                return -EBUSY;
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
        drive->mult_req = arg;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index aa2e9b77b20d..d05db2469209 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
        if (NULL == (void *) arg) {
                struct request *rq;
-                rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+                rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
                rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
                err = blk_execute_rq(drive->queue, NULL, rq, 0);
                blk_put_request(rq);
@@ -221,7 +221,7 @@ static int generic_drive_reset(ide_drive_t *drive)
        struct request *rq;
        int ret = 0;
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
        rq->cmd_len = 1;
        rq->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index c80868520488..2d7dca56dd24 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
        }
        spin_unlock_irq(&hwif->lock);
-        rq = blk_get_request(q, READ, __GFP_WAIT);
+        rq = blk_get_request(q, READ, __GFP_RECLAIM);
        rq->cmd[0] = REQ_PARK_HEADS;
        rq->cmd_len = 1;
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 081e43458d50..e34af488693a 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -18,7 +18,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
        }
        memset(&rqpm, 0, sizeof(rqpm));
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
        rq->special = &rqpm;
        rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -88,7 +88,7 @@ int generic_ide_resume(struct device *dev)
        }
        memset(&rqpm, 0, sizeof(rqpm));
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
        rq->cmd_flags |= REQ_PREEMPT;
        rq->special = &rqpm;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f5d51d1d09ee..12fa04997dcc 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -852,7 +852,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
        BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
        BUG_ON(size < 0 || size % tape->blk_size);
-        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_DRV_PRIV;
        rq->cmd[13] = cmd;
        rq->rq_disk = tape->disk;
@@ -860,7 +860,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
        if (size) {
                ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-                                      __GFP_WAIT);
+                                      __GFP_RECLAIM);
                if (ret)
                        goto out_put;
        }
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 0979e126fff1..a716693417a3 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -430,7 +430,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
        int error;
        int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
-        rq = blk_get_request(drive->queue, rw, __GFP_WAIT);
+        rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM);
        rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
        /*
@@ -441,7 +441,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
         */
        if (nsect) {
                error = blk_rq_map_kern(drive->queue, rq, buf,
-                                        nsect * SECTOR_SIZE, __GFP_WAIT);
+                                        nsect * SECTOR_SIZE, __GFP_RECLAIM);
                if (error)
                        goto put_req;
        }
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index dcdaa79e3f0f..2aba774f835b 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1086,7 +1086,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
-        bool preload = !!(gfp_mask & __GFP_WAIT);
+        bool preload = gfpflags_allow_blocking(gfp_mask);
        unsigned long flags;
        int ret, id;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7e00470adc30..4ff340fe904f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
         * heavy filesystem activity makes these fail, and we can
         * use compound pages.
         */
-        gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+        gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
        egrcnt = rcd->rcvegrcnt;
        egroff = rcd->rcvegr_tid_base;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0d533bba4ad1..8b2be1e7714f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
        page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
        if (!page) {
-                if (!(flag & __GFP_WAIT))
+                if (!gfpflags_allow_blocking(flag))
                        return NULL;
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7cf80c1a8a16..f1042daef9ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
                        flags |= GFP_DMA32;
        }
-        if (flags & __GFP_WAIT) {
+        if (gfpflags_allow_blocking(flags)) {
                unsigned int count = size >> PAGE_SHIFT;
                page = dma_alloc_from_contiguous(dev, count, order);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3729b394432c..917d47e290ae 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
        struct bio_vec *bvec;
 retry:
-        if (unlikely(gfp_mask & __GFP_WAIT))
+        if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
                mutex_lock(&cc->bio_alloc_lock);
        clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
@@ -1010,7 +1010,7 @@ retry:
                if (!page) {
                        crypt_free_buffer_pages(cc, clone);
                        bio_put(clone);
-                        gfp_mask |= __GFP_WAIT;
+                        gfp_mask |= __GFP_DIRECT_RECLAIM;
                        goto retry;
                }
@@ -1027,7 +1027,7 @@ retry:
        }
 return_clone:
-        if (unlikely(gfp_mask & __GFP_WAIT))
+        if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
                mutex_unlock(&cc->bio_alloc_lock);
        return clone;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3a7cade5e27d..1452ed9aacb4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
        *pages = NULL;
        do {
-                pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
+                pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
                if (unlikely(!pl)) {
                        /* Use reserved pages */
                        pl = kc->pages;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 1bd2fd47421f..4432fd69b7cb 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
        solo_enc->vidq.ops = &solo_enc_video_qops;
        solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
        solo_enc->vidq.drv_priv = solo_enc;
-        solo_enc->vidq.gfp_flags = __GFP_DMA32;
+        solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
        solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
        solo_enc->vidq.lock = &solo_enc->lock;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 26df903585d7..f7ce493b1fee 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
        solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
        solo_dev->vidq.drv_priv = solo_dev;
        solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-        solo_dev->vidq.gfp_flags = __GFP_DMA32;
+        solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
        solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
        solo_dev->vidq.lock = &solo_dev->lock;
        ret = vb2_queue_init(&solo_dev->vidq);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 4c3293dcddbc..46642ef9151b 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
        dev->vidq.ops = &tw68_video_qops;
        dev->vidq.mem_ops = &vb2_dma_sg_memops;
        dev->vidq.drv_priv = dev;
-        dev->vidq.gfp_flags = __GFP_DMA32;
+        dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
        dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
        dev->vidq.lock = &dev->lock;
        dev->vidq.min_buffers_needed = 2;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 89300870fefb..1e688bfec567 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
 /*
 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
- * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
 * __GFP_NOWARN, to suppress page allocation failure warnings.
 */
 #define VMW_PAGE_ALLOC_NOSLEEP          (__GFP_HIGHMEM|__GFP_NOWARN)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index a91cee90aef9..95c13b2ffa79 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1216,8 +1216,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
 */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-        gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
+        gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
-                       __GFP_NORETRY | __GFP_NO_KSWAPD;
        size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
        void *kbuf;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44173be5cbf0..f8d7a2f06950 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
 {
        if (fp->rx_frag_size) {
                /* GFP_KERNEL allocations are used only during initialization */
-                if (unlikely(gfp_mask & __GFP_WAIT))
+                if (unlikely(gfpflags_allow_blocking(gfp_mask)))
                        return (void *)__get_free_page(gfp_mask);
                return netdev_alloc_frag(fp->rx_frag_size);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9f4fe3a5f41e..97b6640a3745 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1025,11 +1025,13 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
        req->special = (void *)0;
        if (buffer && bufflen) {
-                ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+                ret = blk_rq_map_kern(q, req, buffer, bufflen,
+                                      __GFP_DIRECT_RECLAIM);
                if (ret)
                        goto out;
        } else if (ubuffer && bufflen) {
-                ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+                ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+                                      __GFP_DIRECT_RECLAIM);
                if (ret)
                        goto out;
                bio = req->bio;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 66a96cd98b97..984ddcb4786d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1970,7 +1970,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
        struct request *req;
        /*
-         * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a
+         * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
         * request becomes available
         */
        req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 126a48c6431e..dd8ad2a44510 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -222,13 +222,13 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
        int write = (data_direction == DMA_TO_DEVICE);
        int ret = DRIVER_ERROR << 24;
-        req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
+        req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
        if (IS_ERR(req))
                return ret;
        blk_rq_set_block_pc(req);
        if (bufflen &&  blk_rq_map_kern(sdev->request_queue, req,
-                                        buffer, bufflen, __GFP_WAIT))
+                                        buffer, bufflen, __GFP_RECLAIM))
                goto out;
        req->cmd_len = COMMAND_SIZE(cmd[0]);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index ada724aab3d5..d4c3e5512dd5 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -27,7 +27,7 @@
 #include "ion_priv.h"
 static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
-                                     __GFP_NORETRY) & ~__GFP_WAIT;
+                                     __GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
 static gfp_t low_order_gfp_flags  = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
 static const unsigned int orders[] = {8, 4, 0};
 static const int num_orders = ARRAY_SIZE(orders);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 6af733de69ca..f0b0423a716b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -95,7 +95,7 @@ do {								    \
 do {                                                                        \
        LASSERT(!in_interrupt() ||                                          \
                ((size) <= LIBCFS_VMALLOC_SIZE &&                           \
-                 ((mask) & __GFP_WAIT) == 0));                              \
+                 !gfpflags_allow_blocking(mask)));                          \
 } while (0)
 #define LIBCFS_ALLOC_POST(ptr, size)                                        \
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index fe49f1b87652..4ea651c6db3a 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1245,7 +1245,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
        for (i = 0; i < npages; i++) {
                page = alloc_pages_node(
                                cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-                                __GFP_ZERO | GFP_IOFS, 0);
+                                GFP_KERNEL | __GFP_ZERO, 0);
                if (page == NULL) {
                        while (--i >= 0)
                                __free_page(rb->rb_kiov[i].kiov_page);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 0060ff64f88e..64a0335934f3 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -860,7 +860,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
                        bulk->bk_iovs[i].kiov_offset = 0;
                        bulk->bk_iovs[i].kiov_len    = len;
                        bulk->bk_iovs[i].kiov_page   =
-                                alloc_page(GFP_IOFS);
+                                alloc_page(GFP_KERNEL);
                        if (bulk->bk_iovs[i].kiov_page == NULL) {
                                lstcon_rpc_put(*crpc);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 162f9d330496..7005002c15da 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -146,7 +146,7 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
                int nob;
                pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-                                      GFP_IOFS, 0);
+                                      GFP_KERNEL, 0);
                if (pg == NULL) {
                        CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
                        srpc_free_bulk(bk);
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 50e8fd23fa17..07a68594c279 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -319,7 +319,7 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
        struct libcfs_ioctl_data *data;
        int err = 0;
-        LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS);
+        LIBCFS_ALLOC_GFP(buf, 1024, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c
index 973c7c209dfc..f2d018d7823c 100644
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.c
@@ -810,7 +810,7 @@ int cfs_trace_allocate_string_buffer(char **str, int nob)
        if (nob > 2 * PAGE_CACHE_SIZE)      /* string must be "sensible" */
                return -EINVAL;
-        *str = kmalloc(nob, GFP_IOFS | __GFP_ZERO);
+        *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
        if (*str == NULL)
                return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
index c902133dfc97..fe4a72268e3a 100644
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ b/drivers/staging/lustre/lustre/llite/remote_perm.c
@@ -82,7 +82,7 @@ static struct hlist_head *alloc_rmtperm_hash(void)
        struct hlist_head *hash;
        int i;
-        hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_IOFS | __GFP_ZERO);
+        hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_NOFS | __GFP_ZERO);
        if (!hash)
                return NULL;
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index b81efcd997ae..5f53f3b7ceff 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1112,7 +1112,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
        LASSERT(cfg->cfg_instance != NULL);
        LASSERT(cfg->cfg_sb == cfg->cfg_instance);
-        inst = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+        inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
        if (!inst)
                return -ENOMEM;
@@ -1308,14 +1308,14 @@ static int mgc_process_recover_log(struct obd_device *obd,
        if (cfg->cfg_last_idx == 0) /* the first time */
                nrpages = CONFIG_READ_NRPAGES_INIT;
-        pages = kcalloc(nrpages, sizeof(*pages), GFP_NOFS);
+        pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL);
        if (pages == NULL) {
                rc = -ENOMEM;
                goto out;
        }
        for (i = 0; i < nrpages; i++) {
-                pages[i] = alloc_page(GFP_IOFS);
+                pages[i] = alloc_page(GFP_KERNEL);
                if (pages[i] == NULL) {
                        rc = -ENOMEM;
                        goto out;
@@ -1466,7 +1466,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc,
        if (cld->cld_cfg.cfg_sb)
                lsi = s2lsi(cld->cld_cfg.cfg_sb);
-        env = kzalloc(sizeof(*env), GFP_NOFS);
+        env = kzalloc(sizeof(*env), GFP_KERNEL);
        if (!env)
                return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index b6f000bb8c82..f61ef669644c 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1562,7 +1562,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
                  (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
                  (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
-        gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
+        gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
        LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
        LASSERT(lsm != NULL);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index cfb83bcfcb17..b1d1a87f05e3 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -346,7 +346,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 {
        struct osc_extent *ext;
-        ext = kmem_cache_alloc(osc_extent_kmem, GFP_IOFS | __GFP_ZERO);
+        ext = kmem_cache_alloc(osc_extent_kmem, GFP_NOFS | __GFP_ZERO);
        if (ext == NULL)
                return NULL;
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 47a1202fcbdf..8666f3ad24e9 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1560,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
         * heavy filesystem activity makes these fail, and we can
         * use compound pages.
         */
-        gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+        gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
        /*
         * The minimum size of the eager buffers is a groups of MTU-sized
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
index 5d9b9dbd8fc4..13c3cd11ab92 100644
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
@@ -905,7 +905,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
         * heavy filesystem activity makes these fail, and we can
         * use compound pages.
         */
-        gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+        gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
        egrcnt = dd->ipath_rcvegrcnt;
        /* TID number offset for this port */
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index cd54e72a6c50..5ec533826621 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2345,7 +2345,6 @@ static void fsg_disable(struct usb_function *f)
 static void handle_exception(struct fsg_common *common)
 {
-        siginfo_t               info;
        int                     i;
        struct fsg_buffhd       *bh;
        enum fsg_state          old_state;
@@ -2357,8 +2356,7 @@ static void handle_exception(struct fsg_common *common)
         * into a high-priority EXIT exception.
         */
        for (;;) {
-                int sig =
+                int sig = kernel_dequeue_signal(NULL);
-                        dequeue_signal_lock(current, &current->blocked, &info);
                if (!sig)
                        break;
                if (sig != SIGUSR1) {
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 0a94895a358d..692ccc69345e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 {
        struct u132 *u132 = hcd_to_u132(hcd);
        if (irqs_disabled()) {
-                if (__GFP_WAIT & mem_flags) {
+                if (gfpflags_allow_blocking(mem_flags)) {
                        printk(KERN_ERR "invalid context for function that might sleep\n");
                        return -EINVAL;
                }
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6b70d7f62b2f..1c1e95a0b8fa 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
                 * below the first 16MB.
                 */
-                flags = __GFP_DMA | __GFP_HIGH;
+                flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
                va->logical =
                         __get_free_pages(flags, --max_order);
        } while (va->logical == 0 && max_order > min_order);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 97b049ad0594..c473c42d7d6c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                        goto next;
                }
-                page = __page_cache_alloc(mapping_gfp_mask(mapping) &
+                page = __page_cache_alloc(mapping_gfp_constraint(mapping,
-                                                                ~__GFP_FS);
+                                                                 ~__GFP_FS));
                if (!page)
                        break;
-                if (add_to_page_cache_lru(page, mapping, pg_index,
+                if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
-                                                                GFP_NOFS)) {
                        page_cache_release(page);
                        goto next;
                }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a2e73f6053a8..8c58191249cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3367,7 +3367,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
 {
-        return mapping_gfp_mask(mapping) & ~__GFP_FS;
+        return mapping_gfp_constraint(mapping, ~__GFP_FS);
 }
 /* extent-tree.c */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2d4667594681..640598c0d0e7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2575,7 +2575,7 @@ int open_ctree(struct super_block *sb,
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
        fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
        /* readahead state */
-        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
+        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        spin_lock_init(&fs_info->reada_lock);
        fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 33a01ea41465..9abe18763a7f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -616,7 +616,7 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
                clear = 1;
 again:
-        if (!prealloc && (mask & __GFP_WAIT)) {
+        if (!prealloc && gfpflags_allow_blocking(mask)) {
                /*
                 * Don't care for allocation failure here because we might end
                 * up not needing the pre-allocated extent state at all, which
@@ -741,7 +741,7 @@ search_again:
        if (start > end)
                goto out;
        spin_unlock(&tree->lock);
-        if (mask & __GFP_WAIT)
+        if (gfpflags_allow_blocking(mask))
                cond_resched();
        goto again;
 }
@@ -874,7 +874,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        bits |= EXTENT_FIRST_DELALLOC;
 again:
-        if (!prealloc && (mask & __GFP_WAIT)) {
+        if (!prealloc && gfpflags_allow_blocking(mask)) {
                prealloc = alloc_extent_state(mask);
                BUG_ON(!prealloc);
        }
@@ -1052,7 +1052,7 @@ search_again:
        if (start > end)
                goto out;
        spin_unlock(&tree->lock);
-        if (mask & __GFP_WAIT)
+        if (gfpflags_allow_blocking(mask))
                cond_resched();
        goto again;
 }
@@ -1100,7 +1100,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        btrfs_debug_check_extent_io_range(tree, start, end);
 again:
-        if (!prealloc && (mask & __GFP_WAIT)) {
+        if (!prealloc && gfpflags_allow_blocking(mask)) {
                /*
                 * Best effort, don't worry if extent state allocation fails
                 * here for the first iteration. We might have a cached state
@@ -1278,7 +1278,7 @@ search_again:
        if (start > end)
                goto out;
        spin_unlock(&tree->lock);
-        if (mask & __GFP_WAIT)
+        if (gfpflags_allow_blocking(mask))
                cond_resched();
        first_iteration = false;
        goto again;
@@ -4386,7 +4386,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
        u64 start = page_offset(page);
        u64 end = start + PAGE_CACHE_SIZE - 1;
-        if ((mask & __GFP_WAIT) &&
+        if (gfpflags_allow_blocking(mask) &&
            page->mapping->host->i_size > 16 * 1024 * 1024) {
                u64 len;
                while (start <= end) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0948d34cb84a..85a1f8621b51 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
        }
        mapping_set_gfp_mask(inode->i_mapping,
-                        mapping_gfp_mask(inode->i_mapping) &
+                        mapping_gfp_constraint(inode->i_mapping,
-                        ~(__GFP_FS | __GFP_HIGHMEM));
+                        ~(__GFP_FS | __GFP_HIGHMEM)));
        return inode;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 17ed76d18eb6..9b2dafa5ba59 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -232,8 +232,8 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
-        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        return dev;
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 82283abb2795..51aff0296ce2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
        int ret = 0;            /* Will call free_more_memory() */
        gfp_t gfp_mask;
-        gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+        gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
        /*
         * XXX: __getblk_slow() can not really deal with failure and
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index aecd0859eacb..9c4b737a54df 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -30,7 +30,7 @@ extern unsigned cachefiles_debug;
 #define CACHEFILES_DEBUG_KLEAVE 2
 #define CACHEFILES_DEBUG_KDEBUG 4
-#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC)
+#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC)
 /*
 * node records
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9d23e788d1df..b7d218a168fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                int ret1;
                struct address_space *mapping = inode->i_mapping;
                struct page *page = find_or_create_page(mapping, 0,
-                                                mapping_gfp_mask(mapping) &
+                                                mapping_gfp_constraint(mapping,
-                                                ~__GFP_FS);
+                                                ~__GFP_FS));
                if (!page) {
                        ret = VM_FAULT_OOM;
                        goto out;
@@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                if (i_size_read(inode) == 0)
                        return;
                page = find_or_create_page(mapping, 0,
-                                           mapping_gfp_mask(mapping) & ~__GFP_FS);
+                                           mapping_gfp_constraint(mapping,
+                                           ~__GFP_FS));
                if (!page)
                        return;
                if (PageUptodate(page)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 47c5c97e2dd3..0068e82217c3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
        struct page *page, *tpage;
        unsigned int expected_index;
        int rc;
-        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+        gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
        INIT_LIST_HEAD(tmplist);
diff --git a/fs/coredump.c b/fs/coredump.c
index a8f75640ac86..1777331eee76 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -280,23 +280,24 @@ out:
        return ispipe;
 }
-static int zap_process(struct task_struct *start, int exit_code)
+static int zap_process(struct task_struct *start, int exit_code, int flags)
 {
        struct task_struct *t;
        int nr = 0;
+        /* ignore all signals except SIGKILL, see prepare_signal() */
+        start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
        start->signal->group_exit_code = exit_code;
        start->signal->group_stop_count = 0;
-        t = start;
+        for_each_thread(start, t) {
-        do {
                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                if (t != current && t->mm) {
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
                        nr++;
                }
-        } while_each_thread(start, t);
+        }
        return nr;
 }
@@ -311,10 +312,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
        spin_lock_irq(&tsk->sighand->siglock);
        if (!signal_group_exit(tsk->signal)) {
                mm->core_state = core_state;
-                nr = zap_process(tsk, exit_code);
                tsk->signal->group_exit_task = tsk;
-                /* ignore all signals except SIGKILL, see prepare_signal() */
+                nr = zap_process(tsk, exit_code, 0);
-                tsk->signal->flags = SIGNAL_GROUP_COREDUMP;
                clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
        }
        spin_unlock_irq(&tsk->sighand->siglock);
@@ -360,18 +359,18 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
                        continue;
                if (g->flags & PF_KTHREAD)
                        continue;
-                p = g;
-                do {
+                for_each_thread(g, p) {
-                        if (p->mm) {
+                        if (unlikely(!p->mm))
-                                if (unlikely(p->mm == mm)) {
+                                continue;
-                                        lock_task_sighand(p, &flags);
+                        if (unlikely(p->mm == mm)) {
-                                        nr += zap_process(p, exit_code);
+                                lock_task_sighand(p, &flags);
-                                        p->signal->flags = SIGNAL_GROUP_EXIT;
+                                nr += zap_process(p, exit_code,
-                                        unlock_task_sighand(p, &flags);
+                                                        SIGNAL_GROUP_EXIT);
-                                }
+                                unlock_task_sighand(p, &flags);
-                                break;
                        }
-                } while_each_thread(g, p);
+                        break;
+                }
        }
        rcu_read_unlock();
 done:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3ae0e0427191..18e7554cf94c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -361,7 +361,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
        /*
         * bio_alloc() is guaranteed to return a bio when called with
-         * __GFP_WAIT and we request a valid number of vectors.
+         * __GFP_RECLAIM and we request a valid number of vectors.
         */
        bio = bio_alloc(GFP_KERNEL, nr_vecs);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e8d620a484f6..7d1aad1d9313 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3386,7 +3386,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
        int err = 0;
        page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
-                                   mapping_gfp_mask(mapping) & ~__GFP_FS);
+                                   mapping_gfp_constraint(mapping, ~__GFP_FS));
        if (!page)
                return -ENOMEM;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index d94af71a4e7f..5dc5e95063de 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
                        page = list_entry(pages->prev, struct page, lru);
                        list_del(&page->lru);
                        if (add_to_page_cache_lru(page, mapping, page->index,
-                                        GFP_KERNEL & mapping_gfp_mask(mapping)))
+                                  mapping_gfp_constraint(mapping, GFP_KERNEL)))
                                goto next_page;
                }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04d0f1b33409..753f4e68b820 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1061,7 +1061,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
                return 0;
        if (journal)
                return jbd2_journal_try_to_free_buffers(journal, page,
-                                                        wait & ~__GFP_WAIT);
+                                                wait & ~__GFP_DIRECT_RECLAIM);
        return try_to_free_buffers(page);
 }
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d403c69bee08..4304072161aa 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
        /* radix tree insertion won't use the preallocation pool unless it's
         * told it may not wait */
-        INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT);
+        INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        switch (cookie->def->type) {
        case FSCACHE_COOKIE_TYPE_INDEX:
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 483bbc613bf0..79483b3d8c6f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
 /*
 * decide whether a page can be released, possibly by cancelling a store to it
- * - we're allowed to sleep if __GFP_WAIT is flagged
+ * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
 */
 bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
                                  struct page *page,
@@ -122,7 +122,7 @@ page_busy:
         * allocator as the work threads writing to the cache may all end up
         * sleeping on memory allocation, so we may need to impose a timeout
         * too. */
-        if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) {
+        if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
                fscache_stat(&fscache_n_store_vmscan_busy);
                return false;
        }
@@ -132,7 +132,7 @@ page_busy:
                _debug("fscache writeout timeout page: %p{%lx}",
                        page, page->index);
-        gfp &= ~__GFP_WAIT;
+        gfp &= ~__GFP_DIRECT_RECLAIM;
        goto try_again;
 }
 EXPORT_SYMBOL(__fscache_maybe_release_page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338ec2464..89463eee6791 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1937,8 +1937,8 @@ out:
 * @journal: journal for operation
 * @page: to try and free
 * @gfp_mask: we use the mask to detect how hard should we try to release
- * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
- * release the buffers.
+ * code to release the buffers.
 *
 *
 * For all the buffers on this page,
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index bb9cebc9ca8a..e5c1783ab64a 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -80,7 +80,6 @@ static int jffs2_garbage_collect_thread(void *_c)
        siginitset(&hupmask, sigmask(SIGHUP));
        allow_signal(SIGKILL);
        allow_signal(SIGSTOP);
-        allow_signal(SIGCONT);
        allow_signal(SIGHUP);
        c->gc_task = current;
@@ -121,20 +120,18 @@ static int jffs2_garbage_collect_thread(void *_c)
                /* Put_super will send a SIGKILL and then wait on the sem.
                 */
                while (signal_pending(current) || freezing(current)) {
-                        siginfo_t info;
                        unsigned long signr;
                        if (try_to_freeze())
                                goto again;
-                        signr = dequeue_signal_lock(current, &current->blocked, &info);
+                        signr = kernel_dequeue_signal(NULL);
                        switch(signr) {
                        case SIGSTOP:
                                jffs2_dbg(1, "%s(): SIGSTOP received\n",
                                          __func__);
-                                set_current_state(TASK_STOPPED);
+                                kernel_signal_stop();
-                                schedule();
                                break;
                        case SIGKILL:
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 63f31c0733c5..f3a4857ff071 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1264,7 +1264,7 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
        if ((c->flash_size % c->sector_size) != 0) {
                c->flash_size = (c->flash_size / c->sector_size) * c->sector_size;
                pr_warn("flash size adjusted to %dKiB\n", c->flash_size);
-        };
+        }
        c->wbuf_ofs = 0xFFFFFFFF;
        c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 7f9b096d8d57..6de0fbfc6c00 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
        filler_t *filler = super->s_devops->readpage;
        struct page *page;
-        BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
+        BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
        if (use_filler)
                page = read_cache_page(mapping, index, filler, sb);
        else {
diff --git a/fs/mpage.c b/fs/mpage.c
index 09abba7653aa..1480d3a18037 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
        sector_t last_block_in_bio = 0;
        struct buffer_head map_bh;
        unsigned long first_logical_block = 0;
-        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+        gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
        map_bh.b_state = 0;
        map_bh.b_size = 0;
@@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
        sector_t last_block_in_bio = 0;
        struct buffer_head map_bh;
        unsigned long first_logical_block = 0;
-        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping);
+        gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
        map_bh.b_state = 0;
        map_bh.b_size = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 6f567347f14f..d84d7c7515fc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink);
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
        return __page_symlink(inode, symname, len,
-                        !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
+                        !mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
 }
 EXPORT_SYMBOL(page_symlink);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 37f639d50af5..93e236429c5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
        dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
        /* Always try to initiate a 'commit' if relevant, but only
-         * wait for it if __GFP_WAIT is set.  Even then, only wait 1
+         * wait for it if the caller allows blocking.  Even then,
-         * second and only if the 'bdi' is not congested.
+         * only wait 1 second and only if the 'bdi' is not congested.
         * Waiting indefinitely can cause deadlocks when the NFS
         * server is on this machine, when a new TCP connection is
         * needed and in other rare cases.  There is no particular
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
        if (mapping) {
                struct nfs_server *nfss = NFS_SERVER(mapping->host);
                nfs_commit_inode(mapping->host, 0);
-                if ((gfp & __GFP_WAIT) &&
+                if (gfpflags_allow_blocking(gfp) &&
                    !bdi_write_congested(&nfss->backing_dev_info)) {
                        wait_on_page_bit_killable_timeout(page, PG_private,
                                                          HZ);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 8df0f3b7839b..2ccbf5531554 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -133,38 +133,38 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
 /**
 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
 * @desc: pointer to descriptor structure for the group
+ * @lock: spin lock protecting @desc
 */
 static unsigned long
-nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
+nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
-                               const struct nilfs_palloc_group_desc *desc)
+                               spinlock_t *lock)
 {
        unsigned long nfree;
-        spin_lock(nilfs_mdt_bgl_lock(inode, group));
+        spin_lock(lock);
        nfree = le32_to_cpu(desc->pg_nfrees);
-        spin_unlock(nilfs_mdt_bgl_lock(inode, group));
+        spin_unlock(lock);
        return nfree;
 }
 /**
 * nilfs_palloc_group_desc_add_entries - adjust count of free entries
- * @inode: inode of metadata file using this allocator
- * @group: group number
 * @desc: pointer to descriptor structure for the group
+ * @lock: spin lock protecting @desc
 * @n: delta to be added
 */
-static void
+static u32
-nilfs_palloc_group_desc_add_entries(struct inode *inode,
+nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
-                                    unsigned long group,
+                                    spinlock_t *lock, u32 n)
-                                    struct nilfs_palloc_group_desc *desc,
-                                    u32 n)
 {
-        spin_lock(nilfs_mdt_bgl_lock(inode, group));
+        u32 nfree;
+        spin_lock(lock);
        le32_add_cpu(&desc->pg_nfrees, n);
-        spin_unlock(nilfs_mdt_bgl_lock(inode, group));
+        nfree = le32_to_cpu(desc->pg_nfrees);
+        spin_unlock(lock);
+        return nfree;
 }
 /**
@@ -240,6 +240,26 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
 }
 /**
+ * nilfs_palloc_delete_block - delete a block on the persistent allocator file
+ * @inode: inode of metadata file using this allocator
+ * @blkoff: block offset
+ * @prev: nilfs_bh_assoc struct of the last used buffer
+ * @lock: spin lock protecting @prev
+ */
+static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
+                                     struct nilfs_bh_assoc *prev,
+                                     spinlock_t *lock)
+{
+        spin_lock(lock);
+        if (prev->bh && blkoff == prev->blkoff) {
+                brelse(prev->bh);
+                prev->bh = NULL;
+        }
+        spin_unlock(lock);
+        return nilfs_mdt_delete_block(inode, blkoff);
+}
+/**
 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
 * @inode: inode of metadata file using this allocator
 * @group: group number
@@ -278,6 +298,22 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
 }
 /**
+ * nilfs_palloc_delete_bitmap_block - delete a bitmap block
+ * @inode: inode of metadata file using this allocator
+ * @group: group number
+ */
+static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
+                                            unsigned long group)
+{
+        struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+        return nilfs_palloc_delete_block(inode,
+                                         nilfs_palloc_bitmap_blkoff(inode,
+                                                                    group),
+                                         &cache->prev_bitmap, &cache->lock);
+}
+/**
 * nilfs_palloc_get_entry_block - get buffer head of an entry block
 * @inode: inode of metadata file using this allocator
 * @nr: serial number of the entry (e.g. inode number)
@@ -296,6 +332,20 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
 }
 /**
+ * nilfs_palloc_delete_entry_block - delete an entry block
+ * @inode: inode of metadata file using this allocator
+ * @nr: serial number of the entry
+ */
+static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
+{
+        struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
+        return nilfs_palloc_delete_block(inode,
+                                         nilfs_palloc_entry_blkoff(inode, nr),
+                                         &cache->prev_entry, &cache->lock);
+}
+/**
 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
 * @inode: inode of metadata file using this allocator
 * @group: group number
@@ -332,51 +382,40 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
 /**
 * nilfs_palloc_find_available_slot - find available slot in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
- * @target: offset number of an entry in the group (start point)
 * @bitmap: bitmap of the group
+ * @target: offset number of an entry in the group (start point)
 * @bsize: size in bits
+ * @lock: spin lock protecting @bitmap
 */
-static int nilfs_palloc_find_available_slot(struct inode *inode,
+static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
-                                            unsigned long group,
                                            unsigned long target,
-                                            unsigned char *bitmap,
+                                            unsigned bsize,
-                                            int bsize)
+                                            spinlock_t *lock)
-{
+{
-        int curr, pos, end, i;
+        int pos, end = bsize;
-        if (target > 0) {
+        if (likely(target < bsize)) {
-                end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
+                pos = target;
-                if (end > bsize)
+                do {
-                        end = bsize;
+                        pos = nilfs_find_next_zero_bit(bitmap, end, pos);
-                pos = nilfs_find_next_zero_bit(bitmap, end, target);
+                        if (pos >= end)
-                if (pos < end &&
+                                break;
-                    !nilfs_set_bit_atomic(
+                        if (!nilfs_set_bit_atomic(lock, pos, bitmap))
-                            nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
-                        return pos;
-        } else
-                end = 0;
-        for (i = 0, curr = end;
-             i < bsize;
-             i += BITS_PER_LONG, curr += BITS_PER_LONG) {
-                /* wrap around */
-                if (curr >= bsize)
-                        curr = 0;
-                while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
-                       != ~0UL) {
-                        end = curr + BITS_PER_LONG;
-                        if (end > bsize)
-                                end = bsize;
-                        pos = nilfs_find_next_zero_bit(bitmap, end, curr);
-                        if ((pos < end) &&
-                            !nilfs_set_bit_atomic(
-                                    nilfs_mdt_bgl_lock(inode, group), pos,
-                                    bitmap))
                                return pos;
-                }
+                } while (++pos < end);
+                end = target;
+        }
+        /* wrap around */
+        for (pos = 0; pos < end; pos++) {
+                pos = nilfs_find_next_zero_bit(bitmap, end, pos);
+                if (pos >= end)
+                        break;
+                if (!nilfs_set_bit_atomic(lock, pos, bitmap))
+                        return pos;
        }
        return -ENOSPC;
 }
@@ -475,15 +514,15 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
        void *desc_kaddr, *bitmap_kaddr;
        unsigned long group, maxgroup, ngroups;
        unsigned long group_offset, maxgroup_offset;
-        unsigned long n, entries_per_group, groups_per_desc_block;
+        unsigned long n, entries_per_group;
        unsigned long i, j;
+        spinlock_t *lock;
        int pos, ret;
        ngroups = nilfs_palloc_groups_count(inode);
        maxgroup = ngroups - 1;
        group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
        entries_per_group = nilfs_palloc_entries_per_group(inode);
-        groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
        for (i = 0; i < ngroups; i += n) {
                if (group >= ngroups) {
@@ -501,8 +540,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
                n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
                                                           maxgroup);
                for (j = 0; j < n; j++, desc++, group++) {
-                        if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
+                        lock = nilfs_mdt_bgl_lock(inode, group);
-                            > 0) {
+                        if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) {
                                ret = nilfs_palloc_get_bitmap_block(
                                        inode, group, 1, &bitmap_bh);
                                if (ret < 0)
@@ -510,12 +549,12 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
                                bitmap_kaddr = kmap(bitmap_bh->b_page);
                                bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
                                pos = nilfs_palloc_find_available_slot(
-                                        inode, group, group_offset, bitmap,
+                                        bitmap, group_offset,
-                                        entries_per_group);
+                                        entries_per_group, lock);
                                if (pos >= 0) {
                                        /* found a free entry */
                                        nilfs_palloc_group_desc_add_entries(
-                                                inode, group, desc, -1);
+                                                desc, lock, -1);
                                        req->pr_entry_nr =
                                                entries_per_group * group + pos;
                                        kunmap(desc_bh->b_page);
@@ -573,6 +612,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
        unsigned long group, group_offset;
        unsigned char *bitmap;
        void *desc_kaddr, *bitmap_kaddr;
+        spinlock_t *lock;
        group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
        desc_kaddr = kmap(req->pr_desc_bh->b_page);
@@ -580,13 +620,15 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
                                                 req->pr_desc_bh, desc_kaddr);
        bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
        bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
+        lock = nilfs_mdt_bgl_lock(inode, group);
-        if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
+        if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
-                                    group_offset, bitmap))
+                nilfs_warning(inode->i_sb, __func__,
-                printk(KERN_WARNING "%s: entry number %llu already freed\n",
+                              "entry number %llu already freed: ino=%lu\n",
-                       __func__, (unsigned long long)req->pr_entry_nr);
+                              (unsigned long long)req->pr_entry_nr,
+                              (unsigned long)inode->i_ino);
        else
-                nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
+                nilfs_palloc_group_desc_add_entries(desc, lock, 1);
        kunmap(req->pr_bitmap_bh->b_page);
        kunmap(req->pr_desc_bh->b_page);
@@ -611,6 +653,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
        void *desc_kaddr, *bitmap_kaddr;
        unsigned char *bitmap;
        unsigned long group, group_offset;
+        spinlock_t *lock;
        group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
        desc_kaddr = kmap(req->pr_desc_bh->b_page);
@@ -618,12 +661,15 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
                                                 req->pr_desc_bh, desc_kaddr);
        bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
        bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
-        if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
+        lock = nilfs_mdt_bgl_lock(inode, group);
-                                    group_offset, bitmap))
-                printk(KERN_WARNING "%s: entry number %llu already freed\n",
+        if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
-                       __func__, (unsigned long long)req->pr_entry_nr);
+                nilfs_warning(inode->i_sb, __func__,
+                              "entry number %llu already freed: ino=%lu\n",
+                              (unsigned long long)req->pr_entry_nr,
+                              (unsigned long)inode->i_ino);
        else
-                nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
+                nilfs_palloc_group_desc_add_entries(desc, lock, 1);
        kunmap(req->pr_bitmap_bh->b_page);
        kunmap(req->pr_desc_bh->b_page);
@@ -680,22 +726,6 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
 }
 /**
- * nilfs_palloc_group_is_in - judge if an entry is in a group
- * @inode: inode of metadata file using this allocator
- * @group: group number
- * @nr: serial number of the entry (e.g. inode number)
- */
-static int
-nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
-{
-        __u64 first, last;
-        first = group * nilfs_palloc_entries_per_group(inode);
-        last = first + nilfs_palloc_entries_per_group(inode) - 1;
-        return (nr >= first) && (nr <= last);
-}
-/**
 * nilfs_palloc_freev - deallocate a set of persistent objects
 * @inode: inode of metadata file using this allocator
 * @entry_nrs: array of entry numbers to be deallocated
@@ -708,9 +738,18 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
        unsigned char *bitmap;
        void *desc_kaddr, *bitmap_kaddr;
        unsigned long group, group_offset;
-        int i, j, n, ret;
+        __u64 group_min_nr, last_nrs[8];
+        const unsigned long epg = nilfs_palloc_entries_per_group(inode);
+        const unsigned epb = NILFS_MDT(inode)->mi_entries_per_block;
+        unsigned entry_start, end, pos;
+        spinlock_t *lock;
+        int i, j, k, ret;
+        u32 nfree;
        for (i = 0; i < nitems; i = j) {
+                int change_group = false;
+                int nempties = 0, n = 0;
                group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
                ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
                if (ret < 0)
@@ -721,38 +760,89 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
                        brelse(desc_bh);
                        return ret;
                }
-                desc_kaddr = kmap(desc_bh->b_page);
-                desc = nilfs_palloc_block_get_group_desc(
+                /* Get the first entry number of the group */
-                        inode, group, desc_bh, desc_kaddr);
+                group_min_nr = (__u64)group * epg;
                bitmap_kaddr = kmap(bitmap_bh->b_page);
                bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
-                for (j = i, n = 0;
+                lock = nilfs_mdt_bgl_lock(inode, group);
-                     (j < nitems) && nilfs_palloc_group_is_in(inode, group,
-                                                              entry_nrs[j]);
+                j = i;
-                     j++) {
+                entry_start = rounddown(group_offset, epb);
-                        nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
+                do {
-                        if (!nilfs_clear_bit_atomic(
+                        if (!nilfs_clear_bit_atomic(lock, group_offset,
-                                    nilfs_mdt_bgl_lock(inode, group),
+                                                    bitmap)) {
-                                    group_offset, bitmap)) {
+                                nilfs_warning(inode->i_sb, __func__,
-                                printk(KERN_WARNING
+                                              "entry number %llu already freed: ino=%lu\n",
-                                       "%s: entry number %llu already freed\n",
+                                              (unsigned long long)entry_nrs[j],
-                                       __func__,
+                                              (unsigned long)inode->i_ino);
-                                       (unsigned long long)entry_nrs[j]);
                        } else {
                                n++;
                        }
-                }
-                nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
+                        j++;
+                        if (j >= nitems || entry_nrs[j] < group_min_nr ||
+                            entry_nrs[j] >= group_min_nr + epg) {
+                                change_group = true;
+                        } else {
+                                group_offset = entry_nrs[j] - group_min_nr;
+                                if (group_offset >= entry_start &&
+                                    group_offset < entry_start + epb) {
+                                        /* This entry is in the same block */
+                                        continue;
+                                }
+                        }
+                        /* Test if the entry block is empty or not */
+                        end = entry_start + epb;
+                        pos = nilfs_find_next_bit(bitmap, end, entry_start);
+                        if (pos >= end) {
+                                last_nrs[nempties++] = entry_nrs[j - 1];
+                                if (nempties >= ARRAY_SIZE(last_nrs))
+                                        break;
+                        }
+                        if (change_group)
+                                break;
+                        /* Go on to the next entry block */
+                        entry_start = rounddown(group_offset, epb);
+                } while (true);
                kunmap(bitmap_bh->b_page);
-                kunmap(desc_bh->b_page);
+                mark_buffer_dirty(bitmap_bh);
+                brelse(bitmap_bh);
+                for (k = 0; k < nempties; k++) {
+                        ret = nilfs_palloc_delete_entry_block(inode,
+                                                              last_nrs[k]);
+                        if (ret && ret != -ENOENT) {
+                                nilfs_warning(inode->i_sb, __func__,
+                                              "failed to delete block of entry %llu: ino=%lu, err=%d\n",
+                                              (unsigned long long)last_nrs[k],
+                                              (unsigned long)inode->i_ino, ret);
+                        }
+                }
+                desc_kaddr = kmap_atomic(desc_bh->b_page);
+                desc = nilfs_palloc_block_get_group_desc(
+                        inode, group, desc_bh, desc_kaddr);
+                nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
+                kunmap_atomic(desc_kaddr);
                mark_buffer_dirty(desc_bh);
-                mark_buffer_dirty(bitmap_bh);
                nilfs_mdt_mark_dirty(inode);
-                brelse(bitmap_bh);
                brelse(desc_bh);
+                if (nfree == nilfs_palloc_entries_per_group(inode)) {
+                        ret = nilfs_palloc_delete_bitmap_block(inode, group);
+                        if (ret && ret != -ENOENT) {
+                                nilfs_warning(inode->i_sb, __func__,
+                                              "failed to delete bitmap block of group %lu: ino=%lu, err=%d\n",
+                                              group,
+                                              (unsigned long)inode->i_ino, ret);
+                        }
+                }
        }
        return 0;
 }
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4bd6451b5703..6e6f49aa53df 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -77,6 +77,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
 #define nilfs_set_bit_atomic            ext2_set_bit_atomic
 #define nilfs_clear_bit_atomic          ext2_clear_bit_atomic
 #define nilfs_find_next_zero_bit        find_next_zero_bit_le
+#define nilfs_find_next_bit             find_next_bit_le
 /**
 * struct nilfs_bh_assoc - block offset and buffer head association
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 919fd5bb14a8..3a3821b00486 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -919,8 +919,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
                              int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *right;
-        __u64 newkey;
-        __u64 newptr;
        int nchildren, n, move, ncblk;
        node = nilfs_btree_get_nonroot_node(path, level);
@@ -942,9 +940,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
        if (!buffer_dirty(path[level].bp_sib_bh))
                mark_buffer_dirty(path[level].bp_sib_bh);
-        newkey = nilfs_btree_node_get_key(right, 0);
-        newptr = path[level].bp_newreq.bpr_ptr;
        if (move) {
                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
                nilfs_btree_node_insert(right, path[level].bp_index,
@@ -1856,7 +1851,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
                                   __u64 key, __u64 ptr,
                                   const __u64 *keys, const __u64 *ptrs, int n)
 {
-        struct buffer_head *bh;
+        struct buffer_head *bh = NULL;
        union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
        struct nilfs_bmap_stats stats;
        int ret;
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0d5fada91191..7dc23f100e57 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -155,7 +155,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
 int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
 {
        struct nilfs_dat_entry *entry;
-        __u64 start;
        sector_t blocknr;
        void *kaddr;
        int ret;
@@ -169,7 +168,6 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
        kaddr = kmap_atomic(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
-        start = le64_to_cpu(entry->de_start);
        blocknr = le64_to_cpu(entry->de_blocknr);
        kunmap_atomic(kaddr);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 4a73d6dffabf..ac2f64943ff4 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
                goto failed;
        mapping_set_gfp_mask(inode->i_mapping,
-                             mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+                           mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
        root = NILFS_I(dir)->i_root;
        ii = NILFS_I(inode);
@@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb,
        up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
        nilfs_set_inode_flags(inode);
        mapping_set_gfp_mask(inode->i_mapping,
-                             mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+                           mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
        return 0;
 failed_unmap:
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index dee34d990281..1125f40233ff 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -33,6 +33,7 @@
 #include "page.h"
 #include "mdt.h"
+#include <trace/events/nilfs2.h>
 #define NILFS_MDT_MAX_RA_BLOCKS         (16 - 1)
@@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
        set_buffer_uptodate(bh);
        mark_buffer_dirty(bh);
        nilfs_mdt_mark_dirty(inode);
+        trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
        return 0;
 }
@@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
        get_bh(bh);
        submit_bh(mode, bh);
        ret = 0;
+        trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
 out:
        get_bh(bh);
        *out_bh = bh;
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index fe529a87a208..03246cac3338 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
 }
 /* Default GFP flags using highmem */
-#define NILFS_MDT_GFP      (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
+#define NILFS_MDT_GFP      (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM)
 int nilfs_mdt_get_block(struct inode *, unsigned long, int,
                        void (*init_block)(struct inode *,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ff00a0b7acb9..9b4f205d1173 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -582,7 +582,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
                                 struct nilfs_recovery_info *ri)
 {
        struct buffer_head *bh_sum = NULL;
-        struct nilfs_segment_summary *sum;
+        struct nilfs_segment_summary *sum = NULL;
        sector_t pseg_start;
        sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
        unsigned long nsalvaged_blocks = 0;
@@ -814,7 +814,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs,
                            struct nilfs_recovery_info *ri)
 {
        struct buffer_head *bh_sum = NULL;
-        struct nilfs_segment_summary *sum;
+        struct nilfs_segment_summary *sum = NULL;
        sector_t pseg_start, pseg_end, sr_pseg_start = 0;
        sector_t seg_start, seg_end; /* range of full segment (block number) */
        sector_t b, end;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c6abbad9b8e3..3b65adaae7e4 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -77,6 +77,36 @@ enum {
        NILFS_ST_DONE,
 };
+#define CREATE_TRACE_POINTS
+#include <trace/events/nilfs2.h>
+/*
+ * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
+ * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
+ * the variable must use them because transition of stage count must involve
+ * trace events (trace_nilfs2_collection_stage_transition).
+ *
+ * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
+ * produce tracepoint events. It is provided just for making the intention
+ * clear.
+ */
+static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
+{
+        sci->sc_stage.scnt++;
+        trace_nilfs2_collection_stage_transition(sci);
+}
+static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
+{
+        sci->sc_stage.scnt = next_scnt;
+        trace_nilfs2_collection_stage_transition(sci);
+}
+static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
+{
+        return sci->sc_stage.scnt;
+}
 /* State flags of collection */
 #define NILFS_CF_NODE           0x0001  /* Collecting node blocks */
 #define NILFS_CF_IFILE_STARTED  0x0002  /* IFILE stage has started */
@@ -184,11 +214,18 @@ int nilfs_transaction_begin(struct super_block *sb,
 {
        struct the_nilfs *nilfs;
        int ret = nilfs_prepare_segment_lock(ti);
+        struct nilfs_transaction_info *trace_ti;
        if (unlikely(ret < 0))
                return ret;
-        if (ret > 0)
+        if (ret > 0) {
+                trace_ti = current->journal_info;
+                trace_nilfs2_transaction_transition(sb, trace_ti,
+                                    trace_ti->ti_count, trace_ti->ti_flags,
+                                    TRACE_NILFS2_TRANSACTION_BEGIN);
                return 0;
+        }
        sb_start_intwrite(sb);
@@ -199,6 +236,11 @@ int nilfs_transaction_begin(struct super_block *sb,
                ret = -ENOSPC;
                goto failed;
        }
+        trace_ti = current->journal_info;
+        trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
+                                            trace_ti->ti_flags,
+                                            TRACE_NILFS2_TRANSACTION_BEGIN);
        return 0;
 failed:
@@ -231,6 +273,8 @@ int nilfs_transaction_commit(struct super_block *sb)
        ti->ti_flags |= NILFS_TI_COMMIT;
        if (ti->ti_count > 0) {
                ti->ti_count--;
+                trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
                return 0;
        }
        if (nilfs->ns_writer) {
@@ -242,6 +286,9 @@ int nilfs_transaction_commit(struct super_block *sb)
                        nilfs_segctor_do_flush(sci, 0);
        }
        up_read(&nilfs->ns_segctor_sem);
+        trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_SYNC)
@@ -260,10 +307,15 @@ void nilfs_transaction_abort(struct super_block *sb)
        BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
        if (ti->ti_count > 0) {
                ti->ti_count--;
+                trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
                return;
        }
        up_read(&nilfs->ns_segctor_sem);
+        trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                    ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
@@ -309,6 +361,9 @@ static void nilfs_transaction_lock(struct super_block *sb,
        current->journal_info = ti;
        for (;;) {
+                trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
                down_write(&nilfs->ns_segctor_sem);
                if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
                        break;
@@ -320,6 +375,9 @@ static void nilfs_transaction_lock(struct super_block *sb,
        }
        if (gcflag)
                ti->ti_flags |= NILFS_TI_GC;
+        trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
 }
 static void nilfs_transaction_unlock(struct super_block *sb)
@@ -332,6 +390,9 @@ static void nilfs_transaction_unlock(struct super_block *sb)
        up_write(&nilfs->ns_segctor_sem);
        current->journal_info = ti->ti_save;
+        trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
+                            ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
 }
 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -1062,7 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
        size_t ndone;
        int err = 0;
-        switch (sci->sc_stage.scnt) {
+        switch (nilfs_sc_cstage_get(sci)) {
        case NILFS_ST_INIT:
                /* Pre-processes */
                sci->sc_stage.flags = 0;
@@ -1071,7 +1132,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                        sci->sc_nblk_inc = 0;
                        sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
                        if (mode == SC_LSEG_DSYNC) {
-                                sci->sc_stage.scnt = NILFS_ST_DSYNC;
+                                nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
                                goto dsync_mode;
                        }
                }
@@ -1079,10 +1140,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                sci->sc_stage.dirty_file_ptr = NULL;
                sci->sc_stage.gc_inode_ptr = NULL;
                if (mode == SC_FLUSH_DAT) {
-                        sci->sc_stage.scnt = NILFS_ST_DAT;
+                        nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
                        goto dat_stage;
                }
-                sci->sc_stage.scnt++;  /* Fall through */
+                nilfs_sc_cstage_inc(sci);  /* Fall through */
        case NILFS_ST_GC:
                if (nilfs_doing_gc()) {
                        head = &sci->sc_gc_inodes;
@@ -1103,7 +1164,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                        }
                        sci->sc_stage.gc_inode_ptr = NULL;
                }
-                sci->sc_stage.scnt++;  /* Fall through */
+                nilfs_sc_cstage_inc(sci);  /* Fall through */
        case NILFS_ST_FILE:
                head = &sci->sc_dirty_files;
                ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
@@ -1125,10 +1186,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                }
                sci->sc_stage.dirty_file_ptr = NULL;
                if (mode == SC_FLUSH_FILE) {
-                        sci->sc_stage.scnt = NILFS_ST_DONE;
+                        nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
                        return 0;
                }
-                sci->sc_stage.scnt++;
+                nilfs_sc_cstage_inc(sci);
                sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
                /* Fall through */
        case NILFS_ST_IFILE:
@@ -1136,7 +1197,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                                              &nilfs_sc_file_ops);
                if (unlikely(err))
                        break;
-                sci->sc_stage.scnt++;
+                nilfs_sc_cstage_inc(sci);
                /* Creating a checkpoint */
                err = nilfs_segctor_create_checkpoint(sci);
                if (unlikely(err))
@@ -1147,7 +1208,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                                              &nilfs_sc_file_ops);
                if (unlikely(err))
                        break;
-                sci->sc_stage.scnt++;  /* Fall through */
+                nilfs_sc_cstage_inc(sci);  /* Fall through */
        case NILFS_ST_SUFILE:
                err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
                                         sci->sc_nfreesegs, &ndone);
@@ -1163,7 +1224,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                                              &nilfs_sc_file_ops);
                if (unlikely(err))
                        break;
-                sci->sc_stage.scnt++;  /* Fall through */
+                nilfs_sc_cstage_inc(sci);  /* Fall through */
        case NILFS_ST_DAT:
 dat_stage:
                err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
@@ -1171,10 +1232,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                if (unlikely(err))
                        break;
                if (mode == SC_FLUSH_DAT) {
-                        sci->sc_stage.scnt = NILFS_ST_DONE;
+                        nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
                        return 0;
                }
-                sci->sc_stage.scnt++;  /* Fall through */
+                nilfs_sc_cstage_inc(sci);  /* Fall through */
        case NILFS_ST_SR:
                if (mode == SC_LSEG_SR) {
                        /* Appending a super root */
@@ -1184,7 +1245,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                }
                /* End of a logical segment */
                sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
-                sci->sc_stage.scnt = NILFS_ST_DONE;
+                nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
                return 0;
        case NILFS_ST_DSYNC:
 dsync_mode:
@@ -1197,7 +1258,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                if (unlikely(err))
                        break;
                sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
-                sci->sc_stage.scnt = NILFS_ST_DONE;
+                nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
                return 0;
        case NILFS_ST_DONE:
                return 0;
@@ -1442,7 +1503,8 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
                        goto failed;
                /* The current segment is filled up */
-                if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
+                if (mode != SC_LSEG_SR ||
+                    nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
                        break;
                nilfs_clear_logs(&sci->sc_segbufs);
@@ -1946,7 +2008,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
        int err;
-        sci->sc_stage.scnt = NILFS_ST_INIT;
+        nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
        sci->sc_cno = nilfs->ns_cno;
        err = nilfs_segctor_collect_dirty_files(sci, nilfs);
@@ -1974,7 +2036,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
                        goto failed;
                /* Avoid empty segment */
-                if (sci->sc_stage.scnt == NILFS_ST_DONE &&
+                if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
                    nilfs_segbuf_empty(sci->sc_curseg)) {
                        nilfs_segctor_abort_construction(sci, nilfs, 1);
                        goto out;
@@ -1988,7 +2050,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
                        nilfs_segctor_fill_in_file_bmap(sci);
                if (mode == SC_LSEG_SR &&
-                    sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
+                    nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
                        err = nilfs_segctor_fill_in_checkpoint(sci);
                        if (unlikely(err))
                                goto failed_to_write;
@@ -2007,7 +2069,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
                if (unlikely(err))
                        goto failed_to_write;
-                if (sci->sc_stage.scnt == NILFS_ST_DONE ||
+                if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
                    nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
                        /*
                         * At this point, we avoid double buffering
@@ -2020,7 +2082,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
                        if (err)
                                goto failed_to_write;
                }
-        } while (sci->sc_stage.scnt != NILFS_ST_DONE);
+        } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
 out:
        nilfs_segctor_drop_written_files(sci, nilfs);
@@ -2430,7 +2492,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
 {
        int mode = 0;
-        int err;
        spin_lock(&sci->sc_state_lock);
        mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
@@ -2438,7 +2499,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
        spin_unlock(&sci->sc_state_lock);
        if (mode) {
-                err = nilfs_segctor_do_construct(sci, mode);
+                nilfs_segctor_do_construct(sci, mode);
                spin_lock(&sci->sc_state_lock);
                sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index a48d6de1e02c..0408b9b2814b 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -67,7 +67,8 @@ struct nilfs_recovery_info {
 /**
 * struct nilfs_cstage - Context of collection stage
- * @scnt: Stage count
+ * @scnt: Stage count, must be accessed via wrappers:
+ *        nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get()
 * @flags: State flags
 * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
 * @gc_inode_ptr: Pointer on the list of gc-inodes
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 2a869c35c362..52821ffc11f4 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -30,6 +30,8 @@
 #include "mdt.h"
 #include "sufile.h"
+#include <trace/events/nilfs2.h>
 /**
 * struct nilfs_sufile_info - on-memory private data of sufile
 * @mi: on-memory private data of metadata file
@@ -317,7 +319,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
        size_t susz = NILFS_MDT(sufile)->mi_entry_size;
        __u64 segnum, maxsegnum, last_alloc;
        void *kaddr;
-        unsigned long nsegments, ncleansegs, nsus, cnt;
+        unsigned long nsegments, nsus, cnt;
        int ret, j;
        down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -327,7 +329,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                goto out_sem;
        kaddr = kmap_atomic(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
-        ncleansegs = le64_to_cpu(header->sh_ncleansegs);
        last_alloc = le64_to_cpu(header->sh_last_alloc);
        kunmap_atomic(kaddr);
@@ -358,6 +359,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                                break; /* never happens */
                        }
                }
+                trace_nilfs2_segment_usage_check(sufile, segnum, cnt);
                ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
                                                           &su_bh);
                if (ret < 0)
@@ -388,6 +390,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                        nilfs_mdt_mark_dirty(sufile);
                        brelse(su_bh);
                        *segnump = segnum;
+                        trace_nilfs2_segment_usage_allocated(sufile, segnum);
                        goto out_header;
                }
@@ -490,6 +495,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
        NILFS_SUI(sufile)->ncleansegs++;
        nilfs_mdt_mark_dirty(sufile);
+        trace_nilfs2_segment_usage_freed(sufile, segnum);
 }
 /**
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f47585bfeb01..354013ea22ec 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -361,7 +361,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
        struct nilfs_super_block *nsbp;
        sector_t blocknr, newblocknr;
        unsigned long offset;
-        int sb2i = -1;  /* array index of the secondary superblock */
+        int sb2i;  /* array index of the secondary superblock */
        int ret = 0;
        /* nilfs->ns_sem must be locked by the caller. */
@@ -372,6 +372,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
        } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
                sb2i = 0;
                blocknr = nilfs->ns_sbh[0]->b_blocknr;
+        } else {
+                sb2i = -1;
+                blocknr = 0;
        }
        if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
                goto out;  /* super block location is unchanged */
@@ -1405,14 +1408,10 @@ static void nilfs_destroy_cachep(void)
         */
        rcu_barrier();
-        if (nilfs_inode_cachep)
+        kmem_cache_destroy(nilfs_inode_cachep);
-                kmem_cache_destroy(nilfs_inode_cachep);
+        kmem_cache_destroy(nilfs_transaction_cachep);
-        if (nilfs_transaction_cachep)
+        kmem_cache_destroy(nilfs_segbuf_cachep);
-                kmem_cache_destroy(nilfs_transaction_cachep);
+        kmem_cache_destroy(nilfs_btree_path_cache);
-        if (nilfs_segbuf_cachep)
-                kmem_cache_destroy(nilfs_segbuf_cachep);
-        if (nilfs_btree_path_cache)
-                kmem_cache_destroy(nilfs_btree_path_cache);
 }
 static int __init nilfs_init_cachep(void)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 262561fea923..9d383e5eff0e 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
                                }
                        }
                        err = add_to_page_cache_lru(*cached_page, mapping,
-                                        index,
+                                   index,
-                                        GFP_KERNEL & mapping_gfp_mask(mapping));
+                                   mapping_gfp_constraint(mapping, GFP_KERNEL));
                        if (unlikely(err)) {
                                if (err == -EEXIST)
                                        continue;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index eed2050db9be..d73291f5f0fc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -91,18 +91,18 @@
 static inline void task_name(struct seq_file *m, struct task_struct *p)
 {
        char *buf;
+        size_t size;
        char tcomm[sizeof(p->comm)];
+        int ret;
        get_task_comm(tcomm, p);
        seq_puts(m, "Name:\t");
-        buf = m->buf + m->count;
-        /* Ignore error for now */
+        size = seq_get_buf(m, &buf);
-        buf += string_escape_str(tcomm, buf, m->size - m->count,
+        ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
-                                 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
+        seq_commit(m, ret < size ? ret : -1);
-        m->count = buf - m->buf;
        seq_putc(m, '\n');
 }
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 6e5fcd00733e..3c2a915c695a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -291,11 +291,19 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
 */
 int proc_fd_permission(struct inode *inode, int mask)
 {
-        int rv = generic_permission(inode, mask);
+        struct task_struct *p;
+        int rv;
+        rv = generic_permission(inode, mask);
        if (rv == 0)
-                return 0;
+                return rv;
-        if (task_tgid(current) == proc_pid(inode))
+        rcu_read_lock();
+        p = pid_task(proc_pid(inode), PIDTYPE_PID);
+        if (p && same_thread_group(p, current))
                rv = 0;
+        rcu_read_unlock();
        return rv;
 }
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 225586e141ca..e85664b7c7d9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -13,6 +13,7 @@
 #include <linux/cred.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
+#include <linux/string_helpers.h>
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -25,12 +26,17 @@ static void seq_set_overflow(struct seq_file *m)
 static void *seq_buf_alloc(unsigned long size)
 {
        void *buf;
+        gfp_t gfp = GFP_KERNEL;
        /*
-         * __GFP_NORETRY to avoid oom-killings with high-order allocations -
+         * For high order allocations, use __GFP_NORETRY to avoid oom-killing -
-         * it's better to fall back to vmalloc() than to kill things.
+         * it's better to fall back to vmalloc() than to kill things.  For small
+         * allocations, just use GFP_KERNEL which will oom kill, thus no need
+         * for vmalloc fallback.
         */
-        buf = kmalloc(size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+        if (size > PAGE_SIZE)
+                gfp |= __GFP_NORETRY | __GFP_NOWARN;
+        buf = kmalloc(size, gfp);
        if (!buf && size > PAGE_SIZE)
                buf = vmalloc(size);
        return buf;
@@ -377,26 +383,12 @@ EXPORT_SYMBOL(seq_release);
 */
 void seq_escape(struct seq_file *m, const char *s, const char *esc)
 {
-        char *end = m->buf + m->size;
+        char *buf;
-        char *p;
+        size_t size = seq_get_buf(m, &buf);
-        char c;
+        int ret;
-        for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
+        ret = string_escape_str(s, buf, size, ESCAPE_OCTAL, esc);
-                if (!strchr(esc, c)) {
+        seq_commit(m, ret < size ? ret : -1);
-                        *p++ = c;
-                        continue;
-                }
-                if (p + 3 < end) {
-                        *p++ = '\\';
-                        *p++ = '0' + ((c & 0300) >> 6);
-                        *p++ = '0' + ((c & 070) >> 3);
-                        *p++ = '0' + (c & 07);
-                        continue;
-                }
-                seq_set_overflow(m);
-                return;
-        }
-        m->count = p - m->buf;
 }
 EXPORT_SYMBOL(seq_escape);
@@ -773,6 +765,8 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
 {
        const u8 *ptr = buf;
        int i, linelen, remaining = len;
+        char *buffer;
+        size_t size;
        int ret;
        if (rowsize != 16 && rowsize != 32)
@@ -794,15 +788,12 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
                        break;
                }
+                size = seq_get_buf(m, &buffer);
                ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-                                         m->buf + m->count, m->size - m->count,
+                                         buffer, size, ascii);
-                                         ascii);
+                seq_commit(m, ret < size ? ret : -1);
-                if (ret >= m->size - m->count) {
-                        seq_set_overflow(m);
+                seq_putc(m, '\n');
-                } else {
-                        m->count += ret;
-                        seq_putc(m, '\n');
-                }
        }
 }
 EXPORT_SYMBOL(seq_hex_dump);
diff --git a/fs/splice.c b/fs/splice.c
index 5fc1e50a7f30..801c21cd77fe 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                                break;
                        error = add_to_page_cache_lru(page, mapping, index,
-                                        GFP_KERNEL & mapping_gfp_mask(mapping));
+                                   mapping_gfp_constraint(mapping, GFP_KERNEL));
                        if (unlikely(error)) {
                                page_cache_release(page);
                                if (error == -EEXIST)
diff --git a/fs/sync.c b/fs/sync.c
index 4ec430ae2b0d..dd5d1711c7ac 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -348,7 +348,8 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
        }
        if (flags & SYNC_FILE_RANGE_WRITE) {
-                ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+                ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
+                                                 WB_SYNC_NONE);
                if (ret < 0)
                        goto out_put;
        }
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index eac9549efd52..587174fd4f2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
        unsigned long           freed;
        int                     error;
-        if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
+        if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
                return 0;
        INIT_LIST_HEAD(&isol.buffers);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8b5ce7c5d9bb..f56cdcecc1c9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -822,7 +822,6 @@ struct drm_device {
        struct drm_sg_mem *sg;  /**< Scatter gather memory */
        unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
-        sigset_t sigmask;
        struct {
                int context;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index e63553386ae7..2b8ed123ad36 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
 * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
 * @value: value to sign extend
 * @index: 0 based bit index (0<=index<32) to sign bit
+ *
+ * This is safe to use for 16- and 8-bit types as well.
 */
 static inline __s32 sign_extend32(__u32 value, int index)
 {
@@ -171,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index)
        return (__s32)(value << shift) >> shift;
 }
+/**
+ * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
+ * @value: value to sign extend
+ * @index: 0 based bit index (0<=index<64) to sign bit
+ */
+static inline __s64 sign_extend64(__u64 value, int index)
+{
+        __u8 shift = 63 - index;
+        return (__s64)(value << shift) >> shift;
+}
 static inline unsigned fls_long(unsigned long l)
 {
        if (sizeof(l) == 4)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0e3110a0b771..22ab246feed3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -205,7 +205,10 @@
 #if GCC_VERSION >= 40600
 /*
- * Tell the optimizer that something else uses this function or variable.
+ * When used with Link Time Optimization, gcc can optimize away C functions or
+ * variables which are referenced only from assembly code.  __visible tells the
+ * optimizer that something else uses this function or variable, thus preventing
+ * this.
 */
 #define __visible       __attribute__((externally_visible))
 #endif
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5a1311942358..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void);
 */
 static inline unsigned int read_mems_allowed_begin(void)
 {
+        if (!cpusets_enabled())
+                return 0;
        return read_seqcount_begin(&current->mems_allowed_seq);
 }
@@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void)
 */
 static inline bool read_mems_allowed_retry(unsigned int seq)
 {
+        if (!cpusets_enabled())
+                return false;
        return read_seqcount_retry(&current->mems_allowed_seq, seq);
 }
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ac07ff090919..2e551e2d2d03 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
+#include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { }
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
-        return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536;
+        if (dev->dma_parms && dev->dma_parms->max_segment_size)
+                return dev->dma_parms->max_segment_size;
+        return SZ_64K;
 }
 static inline unsigned int dma_set_max_seg_size(struct device *dev,
@@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev,
        if (dev->dma_parms) {
                dev->dma_parms->max_segment_size = size;
                return 0;
-        } else
+        }
-                return -EIO;
+        return -EIO;
 }
 static inline unsigned long dma_get_seg_boundary(struct device *dev)
 {
-        return dev->dma_parms ?
+        if (dev->dma_parms && dev->dma_parms->segment_boundary_mask)
-                dev->dma_parms->segment_boundary_mask : 0xffffffff;
+                return dev->dma_parms->segment_boundary_mask;
+        return DMA_BIT_MASK(32);
 }
 static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
@@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
        if (dev->dma_parms) {
                dev->dma_parms->segment_boundary_mask = mask;
                return 0;
-        } else
+        }
-                return -EIO;
+        return -EIO;
 }
 #ifndef dma_max_pfn
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..6523109e136d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
 #define ___GFP_HIGHMEM          0x02u
 #define ___GFP_DMA32            0x04u
 #define ___GFP_MOVABLE          0x08u
-#define ___GFP_WAIT             0x10u
+#define ___GFP_RECLAIMABLE      0x10u
 #define ___GFP_HIGH             0x20u
 #define ___GFP_IO               0x40u
 #define ___GFP_FS               0x80u
@@ -29,18 +29,17 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC       0x10000u
 #define ___GFP_HARDWALL         0x20000u
 #define ___GFP_THISNODE         0x40000u
-#define ___GFP_RECLAIMABLE      0x80000u
+#define ___GFP_ATOMIC           0x80000u
 #define ___GFP_NOACCOUNT        0x100000u
 #define ___GFP_NOTRACK          0x200000u
-#define ___GFP_NO_KSWAPD        0x400000u
+#define ___GFP_DIRECT_RECLAIM   0x400000u
 #define ___GFP_OTHER_NODE       0x800000u
 #define ___GFP_WRITE            0x1000000u
+#define ___GFP_KSWAPD_RECLAIM   0x2000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 /*
- * GFP bitmasks..
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
- *
- * Zone modifiers (see linux/mmzone.h - low three bits)
 *
 * Do not put any conditional on these. If necessary modify the definitions
 * without the underscores and use them consistently. The definitions here may
@@ -50,116 +49,229 @@ struct vm_area_struct;
 #define __GFP_HIGHMEM   ((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32     ((__force gfp_t)___GFP_DMA32)
 #define __GFP_MOVABLE   ((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
+#define __GFP_MOVABLE   ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK    (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+/*
+ * Page mobility and placement hints
+ *
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
+ *
+ * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ *   moved by page migration during memory compaction or can be reclaimed.
+ *
+ * __GFP_RECLAIMABLE is used for slab allocations that specify
+ *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ *   these pages will be spread between local zones to avoid all the dirty
+ *   pages being in one zone (fair zone allocation policy).
+ *
+ * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * __GFP_THISNODE forces the allocation to be satisified from the requested
+ *   node with no fallbacks or placement policy enforcements.
+ */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE     ((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE  ((__force gfp_t)___GFP_THISNODE)
 /*
- * Action modifiers - doesn't change the zoning
+ * Watermark modifiers -- controls access to emergency reserves
+ *
+ * __GFP_HIGH indicates that the caller is high-priority and that granting
+ *   the request is necessary before the system can make forward progress.
+ *   For example, creating an IO context to clean pages.
+ *
+ * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ *   high priority. Users are typically interrupt handlers. This may be
+ *   used in conjunction with __GFP_HIGH
+ *
+ * __GFP_MEMALLOC allows access to all memory. This should only be used when
+ *   the caller guarantees the allocation will allow more memory to be freed
+ *   very shortly e.g. process exiting or swapping. Users either should
+ *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ *
+ * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ *
+ * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
+ */
+#define __GFP_ATOMIC    ((__force gfp_t)___GFP_ATOMIC)
+#define __GFP_HIGH      ((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC  ((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT)
+/*
+ * Reclaim modifiers
+ *
+ * __GFP_IO can start physical IO.
+ *
+ * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ *   allocator recursing into the filesystem which might already be holding
+ *   locks.
+ *
+ * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ *   This flag can be cleared to avoid unnecessary delays when a fallback
+ *   option is available.
+ *
+ * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ *   the low watermark is reached and have it reclaim pages until the high
+ *   watermark is reached. A caller may wish to clear this flag when fallback
+ *   options are available and the reclaim is likely to disrupt the system. The
+ *   canonical example is THP allocation where a fallback is cheap but
+ *   reclaim/compaction may cause indirect stalls.
+ *
+ * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail.  This depends upon the particular VM implementation.
+ *   _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
+ *   cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure policy)
+ *   (and the flag should be used only when there is no reasonable failure
- * but it is definitely preferable to use the flag rather than opencode endless
+ *   policy) but it is definitely preferable to use the flag rather than
- * loop around allocator.
+ *   opencode endless loop around allocator.
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
+ *   return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed.  The OOM killer is not called with the current
+ *   the allocation to succeed.  The OOM killer is not called with the current
- * implementation.
+ *   implementation.
- *
- * __GFP_MOVABLE: Flag that this page will be movable by the page migration
- * mechanism or reclaimed
 */
-#define __GFP_WAIT      ((__force gfp_t)___GFP_WAIT)    /* Can wait and reschedule? */
+#define __GFP_IO        ((__force gfp_t)___GFP_IO)
-#define __GFP_HIGH      ((__force gfp_t)___GFP_HIGH)    /* Should access emergency pools? */
+#define __GFP_FS        ((__force gfp_t)___GFP_FS)
-#define __GFP_IO        ((__force gfp_t)___GFP_IO)      /* Can start physical IO? */
+#define __GFP_DIRECT_RECLAIM    ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
-#define __GFP_FS        ((__force gfp_t)___GFP_FS)      /* Can call down to low-level FS? */
+#define __GFP_KSWAPD_RECLAIM    ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
-#define __GFP_COLD      ((__force gfp_t)___GFP_COLD)    /* Cache-cold page required */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_NOWARN    ((__force gfp_t)___GFP_NOWARN)  /* Suppress page allocation failure warning */
+#define __GFP_REPEAT    ((__force gfp_t)___GFP_REPEAT)
-#define __GFP_REPEAT    ((__force gfp_t)___GFP_REPEAT)  /* See above */
+#define __GFP_NOFAIL    ((__force gfp_t)___GFP_NOFAIL)
-#define __GFP_NOFAIL    ((__force gfp_t)___GFP_NOFAIL)  /* See above */
+#define __GFP_NORETRY   ((__force gfp_t)___GFP_NORETRY)
-#define __GFP_NORETRY   ((__force gfp_t)___GFP_NORETRY) /* See above */
-#define __GFP_MEMALLOC  ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
-#define __GFP_COMP      ((__force gfp_t)___GFP_COMP)    /* Add compound page metadata */
-#define __GFP_ZERO      ((__force gfp_t)___GFP_ZERO)    /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
-                                                         * This takes precedence over the
-                                                         * __GFP_MEMALLOC flag if both are
-                                                         * set
-                                                         */
-#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
-#define __GFP_THISNODE  ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
-#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
-#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
-#define __GFP_NOTRACK   ((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
-#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_WRITE     ((__force gfp_t)___GFP_WRITE)   /* Allocator intends to dirty page */
 /*
- * This may seem redundant, but it's a way of annotating false positives vs.
+ * Action modifiers
- * allocations that simply cannot be supported (e.g. page tables).
+ *
+ * __GFP_COLD indicates that the caller does not expect to be used in the near
+ *   future. Where possible, a cache-cold page will be returned.
+ *
+ * __GFP_NOWARN suppresses allocation failure reports.
+ *
+ * __GFP_COMP address compound page metadata.
+ *
+ * __GFP_ZERO returns a zeroed page on success.
+ *
+ * __GFP_NOTRACK avoids tracking with kmemcheck.
+ *
+ * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
+ *   distinguishing in the source between false positives and allocations that
+ *   cannot be supported (e.g. page tables).
+ *
+ * __GFP_OTHER_NODE is for allocations that are on a remote node but that
+ *   should not be accounted for as a remote allocation in vmstat. A
+ *   typical user would be khugepaged collapsing a huge page on a remote
+ *   node.
 */
+#define __GFP_COLD      ((__force gfp_t)___GFP_COLD)
+#define __GFP_NOWARN    ((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP      ((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO      ((__force gfp_t)___GFP_ZERO)
+#define __GFP_NOTRACK   ((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
-#define __GFP_BITS_SHIFT 25     /* Room for N __GFP_FOO bits */
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
-/* This equals 0, but use constants in case they ever change */
+/*
-#define GFP_NOWAIT      (GFP_ATOMIC & ~__GFP_HIGH)
+ * Useful GFP flag combinations that are commonly used. It is recommended
-/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
+ * that subsystems start with one of these combinations and then set/clear
-#define GFP_ATOMIC      (__GFP_HIGH)
+ * __GFP_FOO flags as necessary.
-#define GFP_NOIO        (__GFP_WAIT)
+ *
-#define GFP_NOFS        (__GFP_WAIT | __GFP_IO)
+ * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
-#define GFP_KERNEL      (__GFP_WAIT | __GFP_IO | __GFP_FS)
+ *   watermark is applied to allow access to "atomic reserves"
-#define GFP_TEMPORARY   (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ *
+ * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * GFP_NOWAIT is for kernel allocations that should not stall for direct
+ *   reclaim, start physical IO or use any filesystem callback.
+ *
+ * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ *   that do not require the starting of any physical IO.
+ *
+ * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ *
+ * GFP_USER is for userspace allocations that also need to be directly
+ *   accessibly by the kernel or hardware. It is typically used by hardware
+ *   for buffers that are mapped to userspace (e.g. graphics) that hardware
+ *   still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * GFP_DMA exists for historical reasons and should be avoided where possible.
+ *   The flags indicates that the caller requires that the lowest zone be
+ *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ *   it would require careful auditing as some users really require it and
+ *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
+ *   lowest zone as a type of emergency reserve.
+ *
+ * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
+ *   address.
+ *
+ * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ *   do not need to be directly accessible by the kernel but that cannot
+ *   move once in use. An example may be a hardware allocation that maps
+ *   data directly into userspace but has no addressing limitations.
+ *
+ * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ *   need direct access to but can use kmap() when access is required. They
+ *   are expected to be movable via page reclaim or page migration. Typically,
+ *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
+ *
+ * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
+ *   that will fail quickly if memory is not available and will not wake
+ *   kswapd on failure.
+ */
+#define GFP_ATOMIC      (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL      (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT      (__GFP_KSWAPD_RECLAIM)
+#define GFP_NOIO        (__GFP_RECLAIM)
+#define GFP_NOFS        (__GFP_RECLAIM | __GFP_IO)
+#define GFP_TEMPORARY   (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
                         __GFP_RECLAIMABLE)
-#define GFP_USER        (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER        (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA         __GFP_DMA
+#define GFP_DMA32       __GFP_DMA32
 #define GFP_HIGHUSER    (GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE    (GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS        (__GFP_IO | __GFP_FS)
+#define GFP_TRANSHUGE   ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-#define GFP_TRANSHUGE   (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
-                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+                         ~__GFP_KSWAPD_RECLAIM)
-                         __GFP_NO_KSWAPD)
-/* This mask makes up all the page movable related flags */
+/* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
-/* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-                        __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
-/* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
-/* Control allocation constraints */
-#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
-/* Do not use these with a slab allocator */
-#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
-/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
-   platforms, used as appropriate on others */
-#define GFP_DMA         __GFP_DMA
-/* 4GB DMA on some platforms */
-#define GFP_DMA32       __GFP_DMA32
-/* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
-        WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+        VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+        BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
+        BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
        if (unlikely(page_group_by_mobility_disabled))
                return MIGRATE_UNMOVABLE;
        /* Group based on mobility */
-        return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+        return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
-                ((gfp_flags & __GFP_RECLAIMABLE) != 0);
+}
+#undef GFP_MOVABLE_MASK
+#undef GFP_MOVABLE_SHIFT
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+        return gfp_flags & __GFP_DIRECT_RECLAIM;
 }
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7edd30515298..24154c26d469 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
        if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
                return NULL;
-        return (struct hugetlb_cgroup *)page[2].lru.next;
+        return (struct hugetlb_cgroup *)page[2].private;
 }
 static inline
@@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
        if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
                return -1;
-        page[2].lru.next = (void *)h_cg;
+        page[2].private = (unsigned long)h_cg;
        return 0;
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5582410727cb..2c13f747ac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -413,6 +413,8 @@ extern __printf(2, 3)
 char *kasprintf(gfp_t gfp, const char *fmt, ...);
 extern __printf(2, 0)
 char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
+extern __printf(2, 0)
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
 extern __scanf(2, 3)
 int sscanf(const char *, const char *, ...);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 906c46a05707..00bad7793788 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page,
 #endif
 }
-static inline struct page *compound_head_by_tail(struct page *tail)
-{
-        struct page *head = tail->first_page;
-        /*
-         * page->first_page may be a dangling pointer to an old
-         * compound page, so recheck that it is still a tail
-         * page before returning.
-         */
-        smp_rmb();
-        if (likely(PageTail(tail)))
-                return head;
-        return tail;
-}
-/*
- * Since either compound page could be dismantled asynchronously in THP
- * or we access asynchronously arbitrary positioned struct page, there
- * would be tail flag race. To handle this race, we should call
- * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
- */
-static inline struct page *compound_head(struct page *page)
-{
-        if (unlikely(PageTail(page)))
-                return compound_head_by_tail(page);
-        return page;
-}
-/*
- * If we access compound page synchronously such as access to
- * allocated page, there is no need to handle tail flag race, so we can
- * check tail flag directly without any synchronization primitive.
- */
-static inline struct page *compound_head_fast(struct page *page)
-{
-        if (unlikely(PageTail(page)))
-                return page->first_page;
-        return page;
-}
 /*
 * The atomic page->_mapcount, starts from -1: so that transitions
 * both from it and to it can be tracked, using atomic_inc_and_test
@@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page)
        VM_BUG_ON_PAGE(!PageTail(page), page);
        VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
        VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
-        if (compound_tail_refcounted(page->first_page))
+        if (compound_tail_refcounted(compound_head(page)))
                atomic_inc(&page->_mapcount);
 }
@@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x)
 {
        struct page *page = virt_to_page(x);
-        /*
+        return compound_head(page);
-         * We don't need to worry about synchronization of tail flag
-         * when we call virt_to_head_page() since it is only called for
-         * already allocated page and this page won't be freed until
-         * this virt_to_head_page() is finished. So use _fast variant.
-         */
-        return compound_head_fast(page);
 }
 /*
@@ -568,28 +522,42 @@ int split_free_page(struct page *page);
 /*
 * Compound pages have a destructor function.  Provide a
 * prototype for that function and accessor functions.
- * These are _only_ valid on the head of a PG_compound page.
+ * These are _only_ valid on the head of a compound page.
 */
+typedef void compound_page_dtor(struct page *);
+/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
+enum compound_dtor_id {
+        NULL_COMPOUND_DTOR,
+        COMPOUND_PAGE_DTOR,
+#ifdef CONFIG_HUGETLB_PAGE
+        HUGETLB_PAGE_DTOR,
+#endif
+        NR_COMPOUND_DTORS,
+};
+extern compound_page_dtor * const compound_page_dtors[];
 static inline void set_compound_page_dtor(struct page *page,
-                                                compound_page_dtor *dtor)
+                enum compound_dtor_id compound_dtor)
 {
-        page[1].compound_dtor = dtor;
+        VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
+        page[1].compound_dtor = compound_dtor;
 }
 static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 {
-        return page[1].compound_dtor;
+        VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
+        return compound_page_dtors[page[1].compound_dtor];
 }
-static inline int compound_order(struct page *page)
+static inline unsigned int compound_order(struct page *page)
 {
        if (!PageHead(page))
                return 0;
        return page[1].compound_order;
 }
-static inline void set_compound_order(struct page *page, unsigned long order)
+static inline void set_compound_order(struct page *page, unsigned int order)
 {
        page[1].compound_order = order;
 }
@@ -1572,8 +1540,7 @@ static inline bool ptlock_init(struct page *page)
         * with 0. Make sure nobody took it in use in between.
         *
         * It can happen if arch try to use slab for page table allocation:
-         * slab code uses page->slab_cache and page->first_page (for tail
+         * slab code uses page->slab_cache, which share storage with page->ptl.
-         * pages), which share storage with page->ptl.
         */
        VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
        if (!ptlock_alloc(page))
@@ -1843,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern __printf(3, 4)
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
+                const char *fmt, ...);
 extern void setup_per_cpu_pageset(void);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0a85da25a822..f8d1492a114f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,8 +28,6 @@ struct mem_cgroup;
                IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
 #define ALLOC_SPLIT_PTLOCKS     (SPINLOCK_SIZE > BITS_PER_LONG/8)
-typedef void compound_page_dtor(struct page *);
 /*
 * Each physical page in the system has a struct page associated with
 * it to keep track of whatever it is we are using the page for at the
@@ -113,7 +111,13 @@ struct page {
                };
        };
-        /* Third double word block */
+        /*
+         * Third double word block
+         *
+         * WARNING: bit 0 of the first word encode PageTail(). That means
+         * the rest users of the storage space MUST NOT use the bit to
+         * avoid collision and false-positive PageTail().
+         */
        union {
                struct list_head lru;   /* Pageout list, eg. active_list
                                         * protected by zone->lru_lock !
@@ -131,18 +135,37 @@ struct page {
 #endif
                };
-                struct slab *slab_page; /* slab fields */
                struct rcu_head rcu_head;       /* Used by SLAB
                                                 * when destroying via RCU
                                                 */
-                /* First tail page of compound page */
+                /* Tail pages of compound page */
                struct {
-                        compound_page_dtor *compound_dtor;
+                        unsigned long compound_head; /* If bit zero is set */
-                        unsigned long compound_order;
+                        /* First tail page only */
+#ifdef CONFIG_64BIT
+                        /*
+                         * On 64 bit system we have enough space in struct page
+                         * to encode compound_dtor and compound_order with
+                         * unsigned int. It can help compiler generate better or
+                         * smaller code on some archtectures.
+                         */
+                        unsigned int compound_dtor;
+                        unsigned int compound_order;
+#else
+                        unsigned short int compound_dtor;
+                        unsigned short int compound_order;
+#endif
                };
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-                pgtable_t pmd_huge_pte; /* protected by page->ptl */
+                struct {
+                        unsigned long __pad;    /* do not overlay pmd_huge_pte
+                                                 * with compound_head to avoid
+                                                 * possible bit 0 collision.
+                                                 */
+                        pgtable_t pmd_huge_pte; /* protected by page->ptl */
+                };
 #endif
        };
@@ -163,7 +186,6 @@ struct page {
 #endif
 #endif
                struct kmem_cache *slab_cache;  /* SL[AU]B: Pointer to slab */
-                struct page *first_page;        /* Compound tail pages */
        };
 #ifdef CONFIG_MEMCG
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2d7e660cdefe..e23a9e704536 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,10 +37,10 @@
 enum {
        MIGRATE_UNMOVABLE,
-        MIGRATE_RECLAIMABLE,
        MIGRATE_MOVABLE,
+        MIGRATE_RECLAIMABLE,
        MIGRATE_PCPTYPES,       /* the number of types on the pcp lists */
-        MIGRATE_RESERVE = MIGRATE_PCPTYPES,
+        MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
        /*
         * MIGRATE_CMA migration type is designed to mimic the way
@@ -334,13 +334,16 @@ struct zone {
        /* zone watermarks, access with *_wmark_pages(zone) macros */
        unsigned long watermark[NR_WMARK];
+        unsigned long nr_reserved_highatomic;
        /*
-         * We don't know if the memory that we're going to allocate will be freeable
+         * We don't know if the memory that we're going to allocate will be
-         * or/and it will be released eventually, so to avoid totally wasting several
+         * freeable or/and it will be released eventually, so to avoid totally
-         * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+         * wasting several GB of ram we must reserve some of the lower zone
-         * to run OOM on the lower zones despite there's tons of freeable ram
+         * memory (otherwise we risk to run OOM on the lower zones despite
-         * on the higher zones). This array is recalculated at runtime if the
+         * there being tons of freeable ram on the higher zones).  This array is
-         * sysctl_lowmem_reserve_ratio sysctl changes.
+         * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
+         * changes.
         */
        long lowmem_reserve[MAX_NR_ZONES];
@@ -429,12 +432,6 @@ struct zone {
        const char              *name;
-        /*
-         * Number of MIGRATE_RESERVE page block. To maintain for just
-         * optimization. Protected by zone->lock.
-         */
-        int                     nr_migrate_reserve_block;
 #ifdef CONFIG_MEMORY_ISOLATION
        /*
         * Number of isolated pageblock. It is used to solve incorrect
@@ -589,75 +586,8 @@ static inline bool zone_is_empty(struct zone *zone)
 * [1]  : No fallback (__GFP_THISNODE)
 */
 #define MAX_ZONELISTS 2
-/*
- * We cache key information from each zonelist for smaller cache
- * footprint when scanning for free pages in get_page_from_freelist().
- *
- * 1) The BITMAP fullzones tracks which zones in a zonelist have come
- *    up short of free memory since the last time (last_fullzone_zap)
- *    we zero'd fullzones.
- * 2) The array z_to_n[] maps each zone in the zonelist to its node
- *    id, so that we can efficiently evaluate whether that node is
- *    set in the current tasks mems_allowed.
- *
- * Both fullzones and z_to_n[] are one-to-one with the zonelist,
- * indexed by a zones offset in the zonelist zones[] array.
- *
- * The get_page_from_freelist() routine does two scans.  During the
- * first scan, we skip zones whose corresponding bit in 'fullzones'
- * is set or whose corresponding node in current->mems_allowed (which
- * comes from cpusets) is not set.  During the second scan, we bypass
- * this zonelist_cache, to ensure we look methodically at each zone.
- *
- * Once per second, we zero out (zap) fullzones, forcing us to
- * reconsider nodes that might have regained more free memory.
- * The field last_full_zap is the time we last zapped fullzones.
- *
- * This mechanism reduces the amount of time we waste repeatedly
- * reexaming zones for free memory when they just came up low on
- * memory momentarilly ago.
- *
- * The zonelist_cache struct members logically belong in struct
- * zonelist.  However, the mempolicy zonelists constructed for
- * MPOL_BIND are intentionally variable length (and usually much
- * shorter).  A general purpose mechanism for handling structs with
- * multiple variable length members is more mechanism than we want
- * here.  We resort to some special case hackery instead.
- *
- * The MPOL_BIND zonelists don't need this zonelist_cache (in good
- * part because they are shorter), so we put the fixed length stuff
- * at the front of the zonelist struct, ending in a variable length
- * zones[], as is needed by MPOL_BIND.
- *
- * Then we put the optional zonelist cache on the end of the zonelist
- * struct.  This optional stuff is found by a 'zlcache_ptr' pointer in
- * the fixed length portion at the front of the struct.  This pointer
- * both enables us to find the zonelist cache, and in the case of
- * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
- * to know that the zonelist cache is not there.
- *
- * The end result is that struct zonelists come in two flavors:
- *  1) The full, fixed length version, shown below, and
- *  2) The custom zonelists for MPOL_BIND.
- * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
- *
- * Even though there may be multiple CPU cores on a node modifying
- * fullzones or last_full_zap in the same zonelist_cache at the same
- * time, we don't lock it.  This is just hint data - if it is wrong now
- * and then, the allocator will still function, perhaps a bit slower.
- */
-struct zonelist_cache {
-        unsigned short z_to_n[MAX_ZONES_PER_ZONELIST];          /* zone->nid */
-        DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST);      /* zone full? */
-        unsigned long last_full_zap;            /* when last zap'd (jiffies) */
-};
 #else
 #define MAX_ZONELISTS 1
-struct zonelist_cache;
 #endif
 /*
@@ -675,9 +605,6 @@ struct zoneref {
 * allocation, the other zones are fallback zones, in decreasing
 * priority.
 *
- * If zlcache_ptr is not NULL, then it is just the address of zlcache,
- * as explained above.  If zlcache_ptr is NULL, there is no zlcache.
- * *
 * To speed the reading of the zonelist, the zonerefs contain the zone index
 * of the entry being read. Helper functions to access information given
 * a struct zoneref are
@@ -687,11 +614,7 @@ struct zoneref {
 * zonelist_node_idx()  - Return the index of the node for an entry
 */
 struct zonelist {
-        struct zonelist_cache *zlcache_ptr;                  // NULL or &zlcache
        struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
-#ifdef CONFIG_NUMA
-        struct zonelist_cache zlcache;                       // optional ...
-#endif
 };
 #ifndef CONFIG_DISCONTIGMEM
@@ -817,7 +740,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, unsigned int order,
                unsigned long mark, int classzone_idx, int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-                unsigned long mark, int classzone_idx, int alloc_flags);
+                unsigned long mark, int classzone_idx);
 enum memmap_context {
        MEMMAP_EARLY,
        MEMMAP_HOTPLUG,
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index c12f2147c350..52666d90ca94 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
 extern const struct kernel_param_ops param_ops_charp;
 extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
+extern void param_free_charp(void *arg);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 /* We used to allow int as well as bool.  We're taking that away! */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a525e5067484..bb53c7b86315 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,12 +86,7 @@ enum pageflags {
        PG_private,             /* If pagecache, has fs-private data */
        PG_private_2,           /* If pagecache, has fs aux data */
        PG_writeback,           /* Page is under writeback */
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
        PG_head,                /* A head page */
-        PG_tail,                /* A tail page */
-#else
-        PG_compound,            /* A compound page */
-#endif
        PG_swapcache,           /* Swap page: swp_entry_t in private */
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,             /* To be reclaimed asap */
@@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page)
        test_set_page_writeback_keepwrite(page);
 }
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
-/*
- * System with lots of page flags available. This allows separate
- * flags for PageHead() and PageTail() checks of compound pages so that bit
- * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths except arch/powerpc/mm/init_64.c
- * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
- * and avoid handling those in real mode.
- */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
-__PAGEFLAG(Tail, tail)
-static inline int PageCompound(struct page *page)
+static inline int PageTail(struct page *page)
-{
-        return page->flags & ((1L << PG_head) | (1L << PG_tail));
-}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void ClearPageCompound(struct page *page)
 {
-        BUG_ON(!PageHead(page));
+        return READ_ONCE(page->compound_head) & 1;
-        ClearPageHead(page);
 }
-#endif
-#define PG_head_mask ((1L << PG_head))
-#else
+static inline void set_compound_head(struct page *page, struct page *head)
-/*
- * Reduce page flag use as much as possible by overlapping
- * compound page flags with the flags used for page cache pages. Possible
- * because PageCompound is always set for compound pages and not for
- * pages on the LRU and/or pagecache.
- */
-TESTPAGEFLAG(Compound, compound)
-__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
-/*
- * PG_reclaim is used in combination with PG_compound to mark the
- * head and tail of a compound page. This saves one page flag
- * but makes it impossible to use compound pages for the page cache.
- * The PG_reclaim bit would have to be used for reclaim or readahead
- * if compound pages enter the page cache.
- *
- * PG_compound & PG_reclaim     => Tail page
- * PG_compound & ~PG_reclaim    => Head page
- */
-#define PG_head_mask ((1L << PG_compound))
-#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
-static inline int PageHead(struct page *page)
 {
-        return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+        WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
 }
-static inline int PageTail(struct page *page)
+static inline void clear_compound_head(struct page *page)
 {
-        return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
+        WRITE_ONCE(page->compound_head, 0);
 }
-static inline void __SetPageTail(struct page *page)
+static inline struct page *compound_head(struct page *page)
 {
-        page->flags |= PG_head_tail_mask;
+        unsigned long head = READ_ONCE(page->compound_head);
+        if (unlikely(head & 1))
+                return (struct page *) (head - 1);
+        return page;
 }
-static inline void __ClearPageTail(struct page *page)
+static inline int PageCompound(struct page *page)
 {
-        page->flags &= ~PG_head_tail_mask;
+        return PageHead(page) || PageTail(page);
-}
+}
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void ClearPageCompound(struct page *page)
 {
-        BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
+        BUG_ON(!PageHead(page));
-        clear_bit(PG_compound, &page->flags);
+        ClearPageHead(page);
 }
 #endif
-#endif /* !PAGEFLAGS_EXTENDED */
+#define PG_head_mask ((1L << PG_head))
 #ifdef CONFIG_HUGETLB_PAGE
 int PageHuge(struct page *page);
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2baeee12f48e..e942558b3585 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -44,7 +44,7 @@ enum pageblock_bits {
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 /* Huge page sizes are variable */
-extern int pageblock_order;
+extern unsigned int pageblock_order;
 #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a6c78e00ea96..26eabf5ec718 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
        return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
 }
+/* Restricts the given gfp_mask to what the mapping allows. */
+static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
+                gfp_t gfp_mask)
+{
+        return mapping_gfp_mask(mapping) & gfp_mask;
+}
 /*
 * This is non-atomic.  Only to be used before the mapping is activated.
 * Probably needs a barrier...
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 830c4992088d..a5aa7ae671f4 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
        })
 /**
- * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
+ * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
- * given type safe against removal of rb_node entry
+ * given type allowing the backing memory of @pos to be invalidated
 *
 * @pos:        the 'type *' to use as a loop cursor.
 * @n:          another 'type *' to use as temporary storage
 * @root:       'rb_root *' of the rbtree.
 * @field:      the name of the rb_node field within 'type'.
+ *
+ * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
+ * list_for_each_entry_safe() and allows the iteration to continue independent
+ * of changes to @pos by the body of the loop.
+ *
+ * Note, however, that it cannot handle other modifications that re-order the
+ * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
+ * rb_erase() may rebalance the tree, causing us to miss some nodes.
 */
 #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
        for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeb5066a44fb..4069febaa34a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1570,9 +1570,7 @@ struct task_struct {
        unsigned long sas_ss_sp;
        size_t sas_ss_size;
-        int (*notifier)(void *priv);
-        void *notifier_data;
-        sigset_t *notifier_mask;
        struct callback_head *task_works;
        struct audit_context *audit_context;
@@ -2464,21 +2462,29 @@ extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+static inline int kernel_dequeue_signal(siginfo_t *info)
 {
-        unsigned long flags;
+        struct task_struct *tsk = current;
+        siginfo_t __info;
        int ret;
-        spin_lock_irqsave(&tsk->sighand->siglock, flags);
+        spin_lock_irq(&tsk->sighand->siglock);
-        ret = dequeue_signal(tsk, mask, info);
+        ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
-        spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+        spin_unlock_irq(&tsk->sighand->siglock);
        return ret;
 }
-extern void block_all_signals(int (*notifier)(void *priv), void *priv,
+static inline void kernel_signal_stop(void)
-                              sigset_t *mask);
+{
-extern void unblock_all_signals(void);
+        spin_lock_irq(&current->sighand->siglock);
+        if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+                __set_current_state(TASK_STOPPED);
+        spin_unlock_irq(&current->sighand->siglock);
+        schedule();
+}
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 24f4dfd94c51..4355129fff91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
 {
-        might_sleep_if(pri & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_cloned(skb))
                return pskb_expand_head(skb, 0, 0, pri);
@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
 */
 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 {
-        might_sleep_if(pri & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_shared(skb)) {
                struct sk_buff *nskb = skb_clone(skb, pri);
@@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
                                          gfp_t pri)
 {
-        might_sleep_if(pri & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_cloned(skb)) {
                struct sk_buff *nskb = skb_copy(skb, pri);
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 42f8ec992452..2e97b7707dff 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -38,10 +38,10 @@ enum zpool_mapmode {
 bool zpool_has_pool(char *type);
-struct zpool *zpool_create_pool(char *type, char *name,
+struct zpool *zpool_create_pool(const char *type, const char *name,
                        gfp_t gfp, const struct zpool_ops *ops);
-char *zpool_get_type(struct zpool *pool);
+const char *zpool_get_type(struct zpool *pool);
 void zpool_destroy_pool(struct zpool *pool);
@@ -83,7 +83,9 @@ struct zpool_driver {
        atomic_t refcount;
        struct list_head list;
-        void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops,
+        void *(*create)(const char *name,
+                        gfp_t gfp,
+                        const struct zpool_ops *ops,
                        struct zpool *zpool);
        void (*destroy)(void *pool);
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6398dfae53f1..34eb16098a33 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -41,7 +41,7 @@ struct zs_pool_stats {
 struct zs_pool;
-struct zs_pool *zs_create_pool(char *name, gfp_t flags);
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
 void zs_destroy_pool(struct zs_pool *pool);
 unsigned long zs_malloc(struct zs_pool *pool, size_t size);
diff --git a/include/linux/zutil.h b/include/linux/zutil.h
index 6adfa9a6ffe9..663689521759 100644
--- a/include/linux/zutil.h
+++ b/include/linux/zutil.h
@@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf,
   An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
   much faster. Usage example:
-     uLong adler = adler32(0L, NULL, 0);
+     uLong adler = zlib_adler32(0L, NULL, 0);
     while (read_buffer(buffer, length) != EOF) {
-       adler = adler32(adler, buffer, length);
+       adler = zlib_adler32(adler, buffer, length);
     }
     if (adler != original_adler) error();
 */
diff --git a/include/net/sock.h b/include/net/sock.h
index f570e75e3da9..bbf7c2cf15b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-        if (sk->sk_allocation & __GFP_WAIT)
+        if (gfpflags_allow_blocking(sk->sk_allocation))
                return &current->task_frag;
        return &sk->sk_frag;
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..dde6bf092c8a 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -20,7 +20,7 @@
        {(unsigned long)GFP_ATOMIC,             "GFP_ATOMIC"},          \
        {(unsigned long)GFP_NOIO,               "GFP_NOIO"},            \
        {(unsigned long)__GFP_HIGH,             "GFP_HIGH"},            \
-        {(unsigned long)__GFP_WAIT,             "GFP_WAIT"},            \
+        {(unsigned long)__GFP_ATOMIC,           "GFP_ATOMIC"},          \
        {(unsigned long)__GFP_IO,               "GFP_IO"},              \
        {(unsigned long)__GFP_COLD,             "GFP_COLD"},            \
        {(unsigned long)__GFP_NOWARN,           "GFP_NOWARN"},          \
@@ -36,7 +36,8 @@
        {(unsigned long)__GFP_RECLAIMABLE,      "GFP_RECLAIMABLE"},     \
        {(unsigned long)__GFP_MOVABLE,          "GFP_MOVABLE"},         \
        {(unsigned long)__GFP_NOTRACK,          "GFP_NOTRACK"},         \
-        {(unsigned long)__GFP_NO_KSWAPD,        "GFP_NO_KSWAPD"},       \
+        {(unsigned long)__GFP_DIRECT_RECLAIM,   "GFP_DIRECT_RECLAIM"},  \
+        {(unsigned long)__GFP_KSWAPD_RECLAIM,   "GFP_KSWAPD_RECLAIM"},  \
        {(unsigned long)__GFP_OTHER_NODE,       "GFP_OTHER_NODE"}       \
        ) : "GFP_NOWAIT"
diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h
new file mode 100644
index 000000000000..c7805818fcc6
--- /dev/null
+++ b/include/trace/events/nilfs2.h
@@ -0,0 +1,224 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nilfs2
+#if !defined(_TRACE_NILFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NILFS2_H
+#include <linux/tracepoint.h>
+struct nilfs_sc_info;
+#define show_collection_stage(type)                                     \
+        __print_symbolic(type,                                          \
+        { NILFS_ST_INIT, "ST_INIT" },                                   \
+        { NILFS_ST_GC, "ST_GC" },                                       \
+        { NILFS_ST_FILE, "ST_FILE" },                                   \
+        { NILFS_ST_IFILE, "ST_IFILE" },                                 \
+        { NILFS_ST_CPFILE, "ST_CPFILE" },                               \
+        { NILFS_ST_SUFILE, "ST_SUFILE" },                               \
+        { NILFS_ST_DAT, "ST_DAT" },                                     \
+        { NILFS_ST_SR, "ST_SR" },                                       \
+        { NILFS_ST_DSYNC, "ST_DSYNC" },                                 \
+        { NILFS_ST_DONE, "ST_DONE"})
+TRACE_EVENT(nilfs2_collection_stage_transition,
+            TP_PROTO(struct nilfs_sc_info *sci),
+            TP_ARGS(sci),
+            TP_STRUCT__entry(
+                    __field(void *, sci)
+                    __field(int, stage)
+            ),
+            TP_fast_assign(
+                        __entry->sci = sci;
+                        __entry->stage = sci->sc_stage.scnt;
+                    ),
+            TP_printk("sci = %p stage = %s",
+                      __entry->sci,
+                      show_collection_stage(__entry->stage))
+);
+#ifndef TRACE_HEADER_MULTI_READ
+enum nilfs2_transaction_transition_state {
+        TRACE_NILFS2_TRANSACTION_BEGIN,
+        TRACE_NILFS2_TRANSACTION_COMMIT,
+        TRACE_NILFS2_TRANSACTION_ABORT,
+        TRACE_NILFS2_TRANSACTION_TRYLOCK,
+        TRACE_NILFS2_TRANSACTION_LOCK,
+        TRACE_NILFS2_TRANSACTION_UNLOCK,
+};
+#endif
+#define show_transaction_state(type)                                    \
+        __print_symbolic(type,                                          \
+                         { TRACE_NILFS2_TRANSACTION_BEGIN, "BEGIN" },   \
+                         { TRACE_NILFS2_TRANSACTION_COMMIT, "COMMIT" }, \
+                         { TRACE_NILFS2_TRANSACTION_ABORT, "ABORT" },   \
+                         { TRACE_NILFS2_TRANSACTION_TRYLOCK, "TRYLOCK" }, \
+                         { TRACE_NILFS2_TRANSACTION_LOCK, "LOCK" },     \
+                         { TRACE_NILFS2_TRANSACTION_UNLOCK, "UNLOCK" })
+TRACE_EVENT(nilfs2_transaction_transition,
+            TP_PROTO(struct super_block *sb,
+                     struct nilfs_transaction_info *ti,
+                     int count,
+                     unsigned int flags,
+                     enum nilfs2_transaction_transition_state state),
+            TP_ARGS(sb, ti, count, flags, state),
+            TP_STRUCT__entry(
+                    __field(void *, sb)
+                    __field(void *, ti)
+                    __field(int, count)
+                    __field(unsigned int, flags)
+                    __field(int, state)
+            ),
+            TP_fast_assign(
+                    __entry->sb = sb;
+                    __entry->ti = ti;
+                    __entry->count = count;
+                    __entry->flags = flags;
+                    __entry->state = state;
+                    ),
+            TP_printk("sb = %p ti = %p count = %d flags = %x state = %s",
+                      __entry->sb,
+                      __entry->ti,
+                      __entry->count,
+                      __entry->flags,
+                      show_transaction_state(__entry->state))
+);
+TRACE_EVENT(nilfs2_segment_usage_check,
+            TP_PROTO(struct inode *sufile,
+                     __u64 segnum,
+                     unsigned long cnt),
+            TP_ARGS(sufile, segnum, cnt),
+            TP_STRUCT__entry(
+                    __field(struct inode *, sufile)
+                    __field(__u64, segnum)
+                    __field(unsigned long, cnt)
+            ),
+            TP_fast_assign(
+                    __entry->sufile = sufile;
+                    __entry->segnum = segnum;
+                    __entry->cnt = cnt;
+                    ),
+            TP_printk("sufile = %p segnum = %llu cnt = %lu",
+                      __entry->sufile,
+                      __entry->segnum,
+                      __entry->cnt)
+);
+TRACE_EVENT(nilfs2_segment_usage_allocated,
+            TP_PROTO(struct inode *sufile,
+                     __u64 segnum),
+            TP_ARGS(sufile, segnum),
+            TP_STRUCT__entry(
+                    __field(struct inode *, sufile)
+                    __field(__u64, segnum)
+            ),
+            TP_fast_assign(
+                    __entry->sufile = sufile;
+                    __entry->segnum = segnum;
+                    ),
+            TP_printk("sufile = %p segnum = %llu",
+                      __entry->sufile,
+                      __entry->segnum)
+);
+TRACE_EVENT(nilfs2_segment_usage_freed,
+            TP_PROTO(struct inode *sufile,
+                     __u64 segnum),
+            TP_ARGS(sufile, segnum),
+            TP_STRUCT__entry(
+                    __field(struct inode *, sufile)
+                    __field(__u64, segnum)
+            ),
+            TP_fast_assign(
+                    __entry->sufile = sufile;
+                    __entry->segnum = segnum;
+                    ),
+            TP_printk("sufile = %p segnum = %llu",
+                      __entry->sufile,
+                      __entry->segnum)
+);
+TRACE_EVENT(nilfs2_mdt_insert_new_block,
+            TP_PROTO(struct inode *inode,
+                     unsigned long ino,
+                     unsigned long block),
+            TP_ARGS(inode, ino, block),
+            TP_STRUCT__entry(
+                    __field(struct inode *, inode)
+                    __field(unsigned long, ino)
+                    __field(unsigned long, block)
+            ),
+            TP_fast_assign(
+                    __entry->inode = inode;
+                    __entry->ino = ino;
+                    __entry->block = block;
+                    ),
+            TP_printk("inode = %p ino = %lu block = %lu",
+                      __entry->inode,
+                      __entry->ino,
+                      __entry->block)
+);
+TRACE_EVENT(nilfs2_mdt_submit_block,
+            TP_PROTO(struct inode *inode,
+                     unsigned long ino,
+                     unsigned long blkoff,
+                     int mode),
+            TP_ARGS(inode, ino, blkoff, mode),
+            TP_STRUCT__entry(
+                    __field(struct inode *, inode)
+                    __field(unsigned long, ino)
+                    __field(unsigned long, blkoff)
+                    __field(int, mode)
+            ),
+            TP_fast_assign(
+                    __entry->inode = inode;
+                    __entry->ino = ino;
+                    __entry->blkoff = blkoff;
+                    __entry->mode = mode;
+                    ),
+            TP_printk("inode = %p ino = %lu blkoff = %lu mode = %x",
+                      __entry->inode,
+                      __entry->ino,
+                      __entry->blkoff,
+                      __entry->mode)
+);
+#endif /* _TRACE_NILFS2_H */
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE nilfs2
+#include <trace/define_trace.h>
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 71f448e5e927..ed81aafd2392 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -123,7 +123,6 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
        size_t len = src->m_ts;
        size_t alen;
-        WARN_ON(dst == NULL);
        if (src->m_ts > dst->m_ts)
                return ERR_PTR(-EINVAL);
diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a32ded7..5ffcbd354a52 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
        if (unlikely(audit_filter_type(type)))
                return NULL;
-        if (gfp_mask & __GFP_WAIT) {
+        if (gfp_mask & __GFP_DIRECT_RECLAIM) {
                if (audit_pid && audit_pid == current->pid)
-                        gfp_mask &= ~__GFP_WAIT;
+                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
                else
                        reserve = 0;
        }
        while (audit_backlog_limit
               && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
-                if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
+                if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
                        long sleep_time;
                        sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce3f9ce..f1603c153890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
        idr_preload(gfp_mask);
        spin_lock_bh(&cgroup_idr_lock);
-        ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT);
+        ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
        spin_unlock_bh(&cgroup_idr_lock);
        idr_preload_end();
        return ret;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4c5edc357923..d873b64fbddc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
 * Version 2.  See the file COPYING for more details.
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index bd9f8a03cefa..11b64a63c0f8 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,7 +6,7 @@
 * Version 2.  See the file COPYING for more details.
 */
-#define pr_fmt(fmt)     "kexec: " fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/capability.h>
 #include <linux/mm.h>
@@ -1027,7 +1027,7 @@ static int __init crash_notes_memory_init(void)
        crash_notes = __alloc_percpu(size, align);
        if (!crash_notes) {
-                pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
+                pr_warn("Memory allocation for saving cpu register states failed\n");
                return -ENOMEM;
        }
        return 0;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 6a9a3f2a0e8e..b70ada0028d2 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -9,6 +9,8 @@
 * Version 2.  See the file COPYING for more details.
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9952..deae3907ac1e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
                return;
        /* no reclaim without waiting on it */
-        if (!(gfp_mask & __GFP_WAIT))
+        if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
                return;
        /* this guy won't enter reclaim */
diff --git a/kernel/panic.c b/kernel/panic.c
index 04e91ff7560b..4579dbb7ed87 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,7 @@
 #include <linux/sysrq.h>
 #include <linux/init.h>
 #include <linux/nmi.h>
+#include <linux/console.h>
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -147,6 +148,15 @@ void panic(const char *fmt, ...)
        bust_spinlocks(0);
+        /*
+         * We may have ended up stopping the CPU holding the lock (in
+         * smp_send_stop()) while still having some valuable data in the console
+         * buffer.  Try to acquire the lock then release it regardless of the
+         * result.  The release will also print the buffers out.
+         */
+        console_trylock();
+        console_unlock();
        if (!panic_blink)
                panic_blink = no_blink;
diff --git a/kernel/params.c b/kernel/params.c
index b6554aa71094..93a380a2345d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -325,10 +325,11 @@ int param_get_charp(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_charp);
-static void param_free_charp(void *arg)
+void param_free_charp(void *arg)
 {
        maybe_kfree_parameter(*((char **)arg));
 }
+EXPORT_SYMBOL(param_free_charp);
 const struct kernel_param_ops param_ops_charp = {
        .set = param_set_charp,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4e1e2f..3a970604308f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
        while (to_alloc-- > 0) {
                struct page *page;
-                page = alloc_image_page(__GFP_HIGHMEM);
+                page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
                memory_bm_set_bit(bm, page_to_pfn(page));
        }
        return nr_highmem;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b2066fb5b10f..12cd989dadf6 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
        struct bio *bio;
        int error = 0;
-        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+        bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
        bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
        bio->bi_bdev = hib_resume_bdev;
@@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
                return -ENOSPC;
        if (hb) {
-                src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
+                src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
                                              __GFP_NORETRY);
                if (src) {
                        copy_page(src, buf);
@@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
                        ret = hib_wait_io(hb); /* Free pages */
                        if (ret)
                                return ret;
-                        src = (void *)__get_free_page(__GFP_WAIT |
+                        src = (void *)__get_free_page(__GFP_RECLAIM |
                                                      __GFP_NOWARN |
                                                      __GFP_NORETRY);
                        if (src) {
@@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
        nr_threads = num_online_cpus() - 1;
        nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
-        page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+        page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
        if (!page) {
                printk(KERN_ERR "PM: Failed to allocate LZO page\n");
                ret = -ENOMEM;
@@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
                last = tmp;
                tmp->map = (struct swap_map_page *)
-                           __get_free_page(__GFP_WAIT | __GFP_HIGH);
+                           __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
                if (!tmp->map) {
                        release_swap_reader(handle);
                        return -ENOMEM;
@@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
        for (i = 0; i < read_pages; i++) {
                page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
-                                                  __GFP_WAIT | __GFP_HIGH :
+                                                  __GFP_RECLAIM | __GFP_HIGH :
-                                                  __GFP_WAIT | __GFP_NOWARN |
+                                                  __GFP_RECLAIM | __GFP_NOWARN |
-                                                  __GFP_NORETRY);
+                                                  __GFP_NORETRY);
                if (!page[i]) {
                        if (i < LZO_CMP_PAGES) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b16f35487b67..2ce8826f1053 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,6 +269,9 @@ static u32 clear_idx;
 #define PREFIX_MAX              32
 #define LOG_LINE_MAX            (1024 - PREFIX_MAX)
+#define LOG_LEVEL(v)            ((v) & 0x07)
+#define LOG_FACILITY(v)         ((v) >> 3 & 0xff)
 /* record buffer */
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 #define LOG_ALIGN 4
@@ -612,7 +615,6 @@ struct devkmsg_user {
 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 {
        char *buf, *line;
-        int i;
        int level = default_message_loglevel;
        int facility = 1;       /* LOG_USER */
        size_t len = iov_iter_count(from);
@@ -642,12 +644,13 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
        line = buf;
        if (line[0] == '<') {
                char *endp = NULL;
+                unsigned int u;
-                i = simple_strtoul(line+1, &endp, 10);
+                u = simple_strtoul(line + 1, &endp, 10);
                if (endp && endp[0] == '>') {
-                        level = i & 7;
+                        level = LOG_LEVEL(u);
-                        if (i >> 3)
+                        if (LOG_FACILITY(u) != 0)
-                                facility = i >> 3;
+                                facility = LOG_FACILITY(u);
                        endp++;
                        len -= endp - line;
                        line = endp;
diff --git a/kernel/signal.c b/kernel/signal.c
index 0f6bbbe77b46..c0b01fe24bbd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -503,41 +503,6 @@ int unhandled_signal(struct task_struct *tsk, int sig)
        return !tsk->ptrace;
 }
-/*
- * Notify the system that a driver wants to block all signals for this
- * process, and wants to be notified if any signals at all were to be
- * sent/acted upon.  If the notifier routine returns non-zero, then the
- * signal will be acted upon after all.  If the notifier routine returns 0,
- * then then signal will be blocked.  Only one block per process is
- * allowed.  priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not.
- */
-void
-block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&current->sighand->siglock, flags);
-        current->notifier_mask = mask;
-        current->notifier_data = priv;
-        current->notifier = notifier;
-        spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-/* Notify the system that blocking has ended. */
-void
-unblock_all_signals(void)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&current->sighand->siglock, flags);
-        current->notifier = NULL;
-        current->notifier_data = NULL;
-        recalc_sigpending();
-        spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
        struct sigqueue *q, *first = NULL;
@@ -580,19 +545,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 {
        int sig = next_signal(pending, mask);
-        if (sig) {
+        if (sig)
-                if (current->notifier) {
-                        if (sigismember(current->notifier_mask, sig)) {
-                                if (!(current->notifier)(current->notifier_data)) {
-                                        clear_thread_flag(TIF_SIGPENDING);
-                                        return 0;
-                                }
-                        }
-                }
                collect_signal(sig, pending, info);
-        }
        return sig;
 }
@@ -834,7 +788,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
        sigset_t flush;
        if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
-                if (signal->flags & SIGNAL_GROUP_COREDUMP)
+                if (!(signal->flags & SIGNAL_GROUP_EXIT))
                        return sig == SIGKILL;
                /*
                 * The process is in the middle of dying, nothing to do.
@@ -2483,9 +2437,6 @@ EXPORT_SYMBOL(force_sig);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
 EXPORT_SYMBOL(sigprocmask);
-EXPORT_SYMBOL(block_all_signals);
-EXPORT_SYMBOL(unblock_all_signals);
 /*
 * System call entry points.
diff --git a/kernel/smp.c b/kernel/smp.c
index 07854477c164..d903c02223af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
        cpumask_var_t cpus;
        int cpu, ret;
-        might_sleep_if(gfp_flags & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(gfp_flags));
        if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
                preempt_disable();
diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f671a5c..6af9212ab5aa 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,7 +222,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
                                goto out_unlock;        /* No processes for this user */
                }
                do_each_thread(g, p) {
-                        if (uid_eq(task_uid(p), uid))
+                        if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
                                error = set_one_prio(p, niceval, error);
                } while_each_thread(g, p);
                if (!uid_eq(uid, cred->uid))
@@ -290,7 +290,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
                                goto out_unlock;        /* No processes for this user */
                }
                do_each_thread(g, p) {
-                        if (uid_eq(task_uid(p), uid)) {
+                        if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
                                niceval = nice_to_rlimit(task_nice(p));
                                if (niceval > retval)
                                        retval = niceval;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1d1521c26302..16bf3bc25e3e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1686,6 +1686,9 @@ config TEST_STRING_HELPERS
 config TEST_KSTRTOX
        tristate "Test kstrto*() family of functions at runtime"
+config TEST_PRINTF
+        tristate "Test printf() family of functions at runtime"
 config TEST_RHASHTABLE
        tristate "Perform selftest on resizable hash table"
        default n
diff --git a/lib/Makefile b/lib/Makefile
index 8de3b012eac7..7f1de26613d2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
+obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fcb65d2a0b94..8855f019ebe8 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1249,6 +1249,14 @@ static void check_sync(struct device *dev,
                                dir2name[entry->direction],
                                dir2name[ref->direction]);
+        if (ref->sg_call_ents && ref->type == dma_debug_sg &&
+            ref->sg_call_ents != entry->sg_call_ents) {
+                err_printk(ref->dev, entry, "DMA-API: device driver syncs "
+                           "DMA sg list with different entry count "
+                           "[map count=%d] [sync count=%d]\n",
+                           entry->sg_call_ents, ref->sg_call_ents);
+        }
 out:
        put_hash_bucket(bucket, &flags);
 }
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e491e02eff54..e3952e9c8ec0 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -42,7 +42,7 @@ extern struct _ddebug __stop___verbose[];
 struct ddebug_table {
        struct list_head link;
-        char *mod_name;
+        const char *mod_name;
        unsigned int num_ddebugs;
        struct _ddebug *ddebugs;
 };
@@ -841,12 +841,12 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
                             const char *name)
 {
        struct ddebug_table *dt;
-        char *new_name;
+        const char *new_name;
        dt = kzalloc(sizeof(*dt), GFP_KERNEL);
        if (dt == NULL)
                return -ENOMEM;
-        new_name = kstrdup(name, GFP_KERNEL);
+        new_name = kstrdup_const(name, GFP_KERNEL);
        if (new_name == NULL) {
                kfree(dt);
                return -ENOMEM;
@@ -907,7 +907,7 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module)
 static void ddebug_table_free(struct ddebug_table *dt)
 {
        list_del_init(&dt->link);
-        kfree(dt->mod_name);
+        kfree_const(dt->mod_name);
        kfree(dt);
 }
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
index a8fe6274a13c..137e861d9690 100644
--- a/lib/halfmd4.c
+++ b/lib/halfmd4.c
@@ -1,6 +1,7 @@
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/cryptohash.h>
+#include <linux/bitops.h>
 /* F, G and H are basic MD4 functions: selection, majority, parity */
 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
@@ -14,7 +15,7 @@
 * Rotation is separate from addition to prevent recomputation
 */
 #define ROUND(f, a, b, c, d, x, s)      \
-        (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
+        (a += f(b, c, d) + x, a = rol32(a, s))
 #define K1 0
 #define K2 013240474631UL
 #define K3 015666365641UL
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 8d74c20d8595..992457b1284c 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -169,11 +169,15 @@ int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize,
                }
        } else {
                for (j = 0; j < len; j++) {
-                        if (linebuflen < lx + 3)
+                        if (linebuflen < lx + 2)
                                goto overflow2;
                        ch = ptr[j];
                        linebuf[lx++] = hex_asc_hi(ch);
+                        if (linebuflen < lx + 2)
+                                goto overflow2;
                        linebuf[lx++] = hex_asc_lo(ch);
+                        if (linebuflen < lx + 2)
+                                goto overflow2;
                        linebuf[lx++] = ' ';
                }
                if (j)
diff --git a/lib/idr.c b/lib/idr.c
index 5335c43adf46..6098336df267 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask)
         * allocation guarantee.  Disallow usage from those contexts.
         */
        WARN_ON_ONCE(in_interrupt());
-        might_sleep_if(gfp_mask & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(gfp_mask));
        preempt_disable();
@@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
        struct idr_layer *pa[MAX_IDR_LEVEL + 1];
        int id;
-        might_sleep_if(gfp_mask & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(gfp_mask));
        /* sanity checks */
        if (WARN_ON_ONCE(start < 0))
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index bd2bea963364..391fd23976a2 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -36,8 +36,7 @@ bool current_is_single_threaded(void)
                if (unlikely(p == task->group_leader))
                        continue;
-                t = p;
+                for_each_thread(p, t) {
-                do {
                        if (unlikely(t->mm == mm))
                                goto found;
                        if (likely(t->mm))
@@ -48,7 +47,7 @@ bool current_is_single_threaded(void)
                         * forked before exiting.
                         */
                        smp_rmb();
-                } while_each_thread(p, t);
+                }
        }
        ret = true;
 found:
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index 32f12150fc4f..f194e6e593e1 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -31,6 +31,22 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 }
 EXPORT_SYMBOL(kvasprintf);
+/*
+ * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt
+ * (or the sole vararg) points to rodata, we will then save a memory
+ * allocation and string copy. In any case, the return value should be
+ * freed using kfree_const().
+ */
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap)
+{
+        if (!strchr(fmt, '%'))
+                return kstrdup_const(fmt, gfp);
+        if (!strcmp(fmt, "%s"))
+                return kstrdup_const(va_arg(ap, const char*), gfp);
+        return kvasprintf(gfp, fmt, ap);
+}
+EXPORT_SYMBOL(kvasprintf_const);
 char *kasprintf(gfp_t gfp, const char *fmt, ...)
 {
        va_list ap;
diff --git a/lib/kobject.c b/lib/kobject.c
index 055407746266..7cbccd2b4c72 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -257,18 +257,32 @@ static int kobject_add_internal(struct kobject *kobj)
 int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
                                  va_list vargs)
 {
-        char *s;
+        const char *s;
        if (kobj->name && !fmt)
                return 0;
-        s = kvasprintf(GFP_KERNEL, fmt, vargs);
+        s = kvasprintf_const(GFP_KERNEL, fmt, vargs);
        if (!s)
                return -ENOMEM;
-        /* ewww... some of these buggers have '/' in the name ... */
+        /*
-        strreplace(s, '/', '!');
+         * ewww... some of these buggers have '/' in the name ... If
-        kfree(kobj->name);
+         * that's the case, we need to make sure we have an actual
+         * allocated copy to modify, since kvasprintf_const may have
+         * returned something from .rodata.
+         */
+        if (strchr(s, '/')) {
+                char *t;
+                t = kstrdup(s, GFP_KERNEL);
+                kfree_const(s);
+                if (!t)
+                        return -ENOMEM;
+                strreplace(t, '/', '!');
+                s = t;
+        }
+        kfree_const(kobj->name);
        kobj->name = s;
        return 0;
@@ -466,7 +480,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
        envp[0] = devpath_string;
        envp[1] = NULL;
-        name = dup_name = kstrdup(new_name, GFP_KERNEL);
+        name = dup_name = kstrdup_const(new_name, GFP_KERNEL);
        if (!name) {
                error = -ENOMEM;
                goto out;
@@ -486,7 +500,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
        kobject_uevent_env(kobj, KOBJ_MOVE, envp);
 out:
-        kfree(dup_name);
+        kfree_const(dup_name);
        kfree(devpath_string);
        kfree(devpath);
        kobject_put(kobj);
@@ -634,7 +648,7 @@ static void kobject_cleanup(struct kobject *kobj)
        /* free name if we allocated it */
        if (name) {
                pr_debug("kobject: '%s': free name\n", name);
-                kfree(name);
+                kfree_const(name);
        }
 }
diff --git a/lib/llist.c b/lib/llist.c
index 0b0e9779d675..ae5872b1df0c 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -66,12 +66,12 @@ struct llist_node *llist_del_first(struct llist_head *head)
 {
        struct llist_node *entry, *old_entry, *next;
-        entry = head->first;
+        entry = smp_load_acquire(&head->first);
        for (;;) {
                if (entry == NULL)
                        return NULL;
                old_entry = entry;
-                next = entry->next;
+                next = READ_ONCE(entry->next);
                entry = cmpxchg(&head->first, old_entry, next);
                if (entry == old_entry)
                        break;
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index f75715131f20..6d40944960de 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -135,7 +135,7 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
 * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
 *
 * @gfp indicates whether or not to wait until a free id is available (it's not
- * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep
+ * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
 * however long it takes until another thread frees an id (same semantics as a
 * mempool).
 *
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f9ebe1c82060..fcf5d98574ce 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
         * preloading in the interrupt anyway as all the allocations have to
         * be atomic. So just do normal allocation when in interrupt.
         */
-        if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
+        if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
                struct radix_tree_preload *rtp;
                /*
@@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node)
 * with preemption not disabled.
 *
 * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
 */
 static int __radix_tree_preload(gfp_t gfp_mask)
 {
@@ -286,12 +286,12 @@ out:
 * with preemption not disabled.
 *
 * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
 */
 int radix_tree_preload(gfp_t gfp_mask)
 {
        /* Warn on non-sensical use... */
-        WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
+        WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
        return __radix_tree_preload(gfp_mask);
 }
 EXPORT_SYMBOL(radix_tree_preload);
@@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload);
 */
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
-        if (gfp_mask & __GFP_WAIT)
+        if (gfpflags_allow_blocking(gfp_mask))
                return __radix_tree_preload(gfp_mask);
        /* Preloading doesn't help anything with this gfp mask, skip it */
        preempt_disable();
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 8e376efd88a4..98866a770770 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -326,6 +326,39 @@ out:
        kfree(out_test);
 }
+#define string_get_size_maxbuf 16
+#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+        do {                                                                   \
+                BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
+                __test_string_get_size((size), (blk_size), (units),            \
+                                       (exp_result));                          \
+        } while (0)
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+                                          const enum string_size_units units,
+                                          const char *exp_result)
+{
+        char buf[string_get_size_maxbuf];
+        string_get_size(size, blk_size, units, buf, sizeof(buf));
+        if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+                return;
+        buf[sizeof(buf) - 1] = '\0';
+        pr_warn("Test 'test_string_get_size_one' failed!\n");
+        pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+                size, blk_size, units);
+        pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+}
+static __init void test_string_get_size(void)
+{
+        test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
+        test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
+        test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+}
 static int __init test_string_helpers_init(void)
 {
        unsigned int i;
@@ -344,6 +377,9 @@ static int __init test_string_helpers_init(void)
        for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
                test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
+        /* Test string_get_size() */
+        test_string_get_size();
        return -EINVAL;
 }
 module_init(test_string_helpers_init);
diff --git a/lib/test_printf.c b/lib/test_printf.c
new file mode 100644
index 000000000000..c5a666af9ba5
--- /dev/null
+++ b/lib/test_printf.c
@@ -0,0 +1,362 @@
+/*
+ * Test cases for printf facility.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#define BUF_SIZE 256
+#define FILL_CHAR '$'
+#define PTR1 ((void*)0x01234567)
+#define PTR2 ((void*)(long)(int)0xfedcba98)
+#if BITS_PER_LONG == 64
+#define PTR1_ZEROES "000000000"
+#define PTR1_SPACES "         "
+#define PTR1_STR "1234567"
+#define PTR2_STR "fffffffffedcba98"
+#define PTR_WIDTH 16
+#else
+#define PTR1_ZEROES "0"
+#define PTR1_SPACES " "
+#define PTR1_STR "1234567"
+#define PTR2_STR "fedcba98"
+#define PTR_WIDTH 8
+#endif
+#define PTR_WIDTH_STR stringify(PTR_WIDTH)
+static unsigned total_tests __initdata;
+static unsigned failed_tests __initdata;
+static char *test_buffer __initdata;
+static int __printf(4, 0) __init
+do_test(int bufsize, const char *expect, int elen,
+        const char *fmt, va_list ap)
+{
+        va_list aq;
+        int ret, written;
+        total_tests++;
+        memset(test_buffer, FILL_CHAR, BUF_SIZE);
+        va_copy(aq, ap);
+        ret = vsnprintf(test_buffer, bufsize, fmt, aq);
+        va_end(aq);
+        if (ret != elen) {
+                pr_warn("vsnprintf(buf, %d, \"%s\", ...) returned %d, expected %d\n",
+                        bufsize, fmt, ret, elen);
+                return 1;
+        }
+        if (!bufsize) {
+                if (memchr_inv(test_buffer, FILL_CHAR, BUF_SIZE)) {
+                        pr_warn("vsnprintf(buf, 0, \"%s\", ...) wrote to buffer\n",
+                                fmt);
+                        return 1;
+                }
+                return 0;
+        }
+        written = min(bufsize-1, elen);
+        if (test_buffer[written]) {
+                pr_warn("vsnprintf(buf, %d, \"%s\", ...) did not nul-terminate buffer\n",
+                        bufsize, fmt);
+                return 1;
+        }
+        if (memcmp(test_buffer, expect, written)) {
+                pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n",
+                        bufsize, fmt, test_buffer, written, expect);
+                return 1;
+        }
+        return 0;
+}
+static void __printf(3, 4) __init
+__test(const char *expect, int elen, const char *fmt, ...)
+{
+        va_list ap;
+        int rand;
+        char *p;
+        BUG_ON(elen >= BUF_SIZE);
+        va_start(ap, fmt);
+        /*
+         * Every fmt+args is subjected to four tests: Three where we
+         * tell vsnprintf varying buffer sizes (plenty, not quite
+         * enough and 0), and then we also test that kvasprintf would
+         * be able to print it as expected.
+         */
+        failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap);
+        rand = 1 + prandom_u32_max(elen+1);
+        /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */
+        failed_tests += do_test(rand, expect, elen, fmt, ap);
+        failed_tests += do_test(0, expect, elen, fmt, ap);
+        p = kvasprintf(GFP_KERNEL, fmt, ap);
+        if (p) {
+                if (memcmp(p, expect, elen+1)) {
+                        pr_warn("kvasprintf(..., \"%s\", ...) returned '%s', expected '%s'\n",
+                                fmt, p, expect);
+                        failed_tests++;
+                }
+                kfree(p);
+        }
+        va_end(ap);
+}
+#define test(expect, fmt, ...)                                  \
+        __test(expect, strlen(expect), fmt, ##__VA_ARGS__)
+static void __init
+test_basic(void)
+{
+        /* Work around annoying "warning: zero-length gnu_printf format string". */
+        char nul = '\0';
+        test("", &nul);
+        test("100%", "100%%");
+        test("xxx%yyy", "xxx%cyyy", '%');
+        __test("xxx\0yyy", 7, "xxx%cyyy", '\0');
+}
+static void __init
+test_number(void)
+{
+        test("0x1234abcd  ", "%#-12x", 0x1234abcd);
+        test("  0x1234abcd", "%#12x", 0x1234abcd);
+        test("0|001| 12|+123| 1234|-123|-1234", "%d|%03d|%3d|%+d|% d|%+d|% d", 0, 1, 12, 123, 1234, -123, -1234);
+}
+static void __init
+test_string(void)
+{
+        test("", "%s%.0s", "", "123");
+        test("ABCD|abc|123", "%s|%.3s|%.*s", "ABCD", "abcdef", 3, "123456");
+        test("1  |  2|3  |  4|5  ", "%-3s|%3s|%-*s|%*s|%*s", "1", "2", 3, "3", 3, "4", -3, "5");
+        /*
+         * POSIX and C99 say that a missing precision should be
+         * treated as a precision of 0. However, the kernel's printf
+         * implementation treats this case as if the . wasn't
+         * present. Let's add a test case documenting the current
+         * behaviour; should anyone ever feel the need to follow the
+         * standards more closely, this can be revisited.
+         */
+        test("a||", "%.s|%.0s|%.*s", "a", "b", 0, "c");
+        test("a  |   |   ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c");
+}
+static void __init
+plain(void)
+{
+        test(PTR1_ZEROES PTR1_STR " " PTR2_STR, "%p %p", PTR1, PTR2);
+        /*
+         * The field width is overloaded for some %p extensions to
+         * pass another piece of information. For plain pointers, the
+         * behaviour is slightly odd: One cannot pass either the 0
+         * flag nor a precision to %p without gcc complaining, and if
+         * one explicitly gives a field width, the number is no longer
+         * zero-padded.
+         */
+        test("|" PTR1_STR PTR1_SPACES "  |  " PTR1_SPACES PTR1_STR "|",
+             "|%-*p|%*p|", PTR_WIDTH+2, PTR1, PTR_WIDTH+2, PTR1);
+        test("|" PTR2_STR "  |  " PTR2_STR "|",
+             "|%-*p|%*p|", PTR_WIDTH+2, PTR2, PTR_WIDTH+2, PTR2);
+        /*
+         * Unrecognized %p extensions are treated as plain %p, but the
+         * alphanumeric suffix is ignored (that is, does not occur in
+         * the output.)
+         */
+        test("|"PTR1_ZEROES PTR1_STR"|", "|%p0y|", PTR1);
+        test("|"PTR2_STR"|", "|%p0y|", PTR2);
+}
+static void __init
+symbol_ptr(void)
+{
+}
+static void __init
+kernel_ptr(void)
+{
+}
+static void __init
+struct_resource(void)
+{
+}
+static void __init
+addr(void)
+{
+}
+static void __init
+escaped_str(void)
+{
+}
+static void __init
+hex_string(void)
+{
+        const char buf[3] = {0xc0, 0xff, 0xee};
+        test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee",
+             "%3ph|%3phC|%3phD|%3phN", buf, buf, buf, buf);
+        test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee",
+             "%*ph|%*phC|%*phD|%*phN", 3, buf, 3, buf, 3, buf, 3, buf);
+}
+static void __init
+mac(void)
+{
+        const u8 addr[6] = {0x2d, 0x48, 0xd6, 0xfc, 0x7a, 0x05};
+        test("2d:48:d6:fc:7a:05", "%pM", addr);
+        test("05:7a:fc:d6:48:2d", "%pMR", addr);
+        test("2d-48-d6-fc-7a-05", "%pMF", addr);
+        test("2d48d6fc7a05", "%pm", addr);
+        test("057afcd6482d", "%pmR", addr);
+}
+static void __init
+ip4(void)
+{
+        struct sockaddr_in sa;
+        sa.sin_family = AF_INET;
+        sa.sin_port = cpu_to_be16(12345);
+        sa.sin_addr.s_addr = cpu_to_be32(0x7f000001);
+        test("127.000.000.001|127.0.0.1", "%pi4|%pI4", &sa.sin_addr, &sa.sin_addr);
+        test("127.000.000.001|127.0.0.1", "%piS|%pIS", &sa, &sa);
+        sa.sin_addr.s_addr = cpu_to_be32(0x01020304);
+        test("001.002.003.004:12345|1.2.3.4:12345", "%piSp|%pISp", &sa, &sa);
+}
+static void __init
+ip6(void)
+{
+}
+static void __init
+ip(void)
+{
+        ip4();
+        ip6();
+}
+static void __init
+uuid(void)
+{
+        const char uuid[16] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+                               0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+        test("00010203-0405-0607-0809-0a0b0c0d0e0f", "%pUb", uuid);
+        test("00010203-0405-0607-0809-0A0B0C0D0E0F", "%pUB", uuid);
+        test("03020100-0504-0706-0809-0a0b0c0d0e0f", "%pUl", uuid);
+        test("03020100-0504-0706-0809-0A0B0C0D0E0F", "%pUL", uuid);
+}
+static void __init
+dentry(void)
+{
+}
+static void __init
+struct_va_format(void)
+{
+}
+static void __init
+struct_clk(void)
+{
+}
+static void __init
+bitmap(void)
+{
+        DECLARE_BITMAP(bits, 20);
+        const int primes[] = {2,3,5,7,11,13,17,19};
+        int i;
+        bitmap_zero(bits, 20);
+        test("00000|00000", "%20pb|%*pb", bits, 20, bits);
+        test("|", "%20pbl|%*pbl", bits, 20, bits);
+        for (i = 0; i < ARRAY_SIZE(primes); ++i)
+                set_bit(primes[i], bits);
+        test("a28ac|a28ac", "%20pb|%*pb", bits, 20, bits);
+        test("2-3,5,7,11,13,17,19|2-3,5,7,11,13,17,19", "%20pbl|%*pbl", bits, 20, bits);
+        bitmap_fill(bits, 20);
+        test("fffff|fffff", "%20pb|%*pb", bits, 20, bits);
+        test("0-19|0-19", "%20pbl|%*pbl", bits, 20, bits);
+}
+static void __init
+netdev_features(void)
+{
+}
+static void __init
+test_pointer(void)
+{
+        plain();
+        symbol_ptr();
+        kernel_ptr();
+        struct_resource();
+        addr();
+        escaped_str();
+        hex_string();
+        mac();
+        ip();
+        uuid();
+        dentry();
+        struct_va_format();
+        struct_clk();
+        bitmap();
+        netdev_features();
+}
+static int __init
+test_printf_init(void)
+{
+        test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+        if (!test_buffer)
+                return -ENOMEM;
+        test_basic();
+        test_number();
+        test_string();
+        test_pointer();
+        kfree(test_buffer);
+        if (failed_tests == 0)
+                pr_info("all %u tests passed\n", total_tests);
+        else
+                pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
+        return failed_tests ? -EINVAL : 0;
+}
+module_init(test_printf_init);
+MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>");
+MODULE_LICENSE("GPL");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 95cd63b43b99..f9cee8e1233c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1449,6 +1449,8 @@ int kptr_restrict __read_mostly;
 *        (legacy clock framework) of the clock
 * - 'Cr' For a clock, it prints the current rate of the clock
 *
+ * ** Please update also Documentation/printk-formats.txt when making changes **
+ *
 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
 * function pointers are really function descriptors, which contain a
 * pointer to the real address.
@@ -1457,7 +1459,7 @@ static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
              struct printf_spec spec)
 {
-        int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
+        const int default_width = 2 * sizeof(void *);
        if (!ptr && *fmt != 'K') {
                /*
@@ -1769,14 +1771,14 @@ qualifier:
        case 'n':
                /*
-                 * Since %n poses a greater security risk than utility, treat
+                 * Since %n poses a greater security risk than
-                 * it as an invalid format specifier. Warn about its use so
+                 * utility, treat it as any other invalid or
-                 * that new instances don't get added.
+                 * unsupported format specifier.
                 */
-                WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt);
                /* Fall-through */
        default:
+                WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt);
                spec->type = FORMAT_TYPE_INVALID;
                return fmt - start;
        }
@@ -1811,41 +1813,16 @@ qualifier:
 * @fmt: The format string to use
 * @args: Arguments for the format string
 *
- * This function follows C99 vsnprintf, but has some extensions:
+ * This function generally follows C99 vsnprintf, but has some
- * %pS output the name of a text symbol with offset
+ * extensions and a few limitations:
- * %ps output the name of a text symbol without offset
+ *
- * %pF output the name of a function pointer with its offset
+ * %n is unsupported
- * %pf output the name of a function pointer without its offset
+ * %p* is handled by pointer()
- * %pB output the name of a backtrace symbol with its offset
- * %pR output the address range in a struct resource with decoded flags
- * %pr output the address range in a struct resource with raw flags
- * %pb output the bitmap with field width as the number of bits
- * %pbl output the bitmap as range list with field width as the number of bits
- * %pM output a 6-byte MAC address with colons
- * %pMR output a 6-byte MAC address with colons in reversed order
- * %pMF output a 6-byte MAC address with dashes
- * %pm output a 6-byte MAC address without colons
- * %pmR output a 6-byte MAC address without colons in reversed order
- * %pI4 print an IPv4 address without leading zeros
- * %pi4 print an IPv4 address with leading zeros
- * %pI6 print an IPv6 address with colons
- * %pi6 print an IPv6 address without colons
- * %pI6c print an IPv6 address as specified by RFC 5952
- * %pIS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address
- * %piS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address
- * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
- *   case.
- * %*pE[achnops] print an escaped buffer
- * %*ph[CDN] a variable-length hex string with a separator (supports up to 64
- *           bytes of the input)
- * %pC output the name (Common Clock Framework) or address (legacy clock
- *     framework) of a clock
- * %pCn output the name (Common Clock Framework) or address (legacy clock
- *      framework) of a clock
- * %pCr output the current rate of a clock
- * %n is ignored
 *
- * ** Please update Documentation/printk-formats.txt when making changes **
+ * See pointer() or Documentation/printk-formats.txt for more
+ * extensive description.
+ *
+ * ** Please update the documentation in both places when making changes **
 *
 * The return value is the number of characters which would
 * be generated for the given input, excluding the trailing
@@ -1944,10 +1921,15 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                        break;
                case FORMAT_TYPE_INVALID:
-                        if (str < end)
+                        /*
-                                *str = '%';
+                         * Presumably the arguments passed gcc's type
-                        ++str;
+                         * checking, but there is no safe or sane way
-                        break;
+                         * for us to continue parsing the format and
+                         * fetching from the va_list; the remaining
+                         * specifiers and arguments would be out of
+                         * sync.
+                         */
+                        goto out;
                default:
                        switch (spec.type) {
@@ -1992,6 +1974,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                }
        }
+out:
        if (size > 0) {
                if (str < end)
                        *str = '\0';
@@ -2189,9 +2172,10 @@ do {									\
                switch (spec.type) {
                case FORMAT_TYPE_NONE:
-                case FORMAT_TYPE_INVALID:
                case FORMAT_TYPE_PERCENT_CHAR:
                        break;
+                case FORMAT_TYPE_INVALID:
+                        goto out;
                case FORMAT_TYPE_WIDTH:
                case FORMAT_TYPE_PRECISION:
@@ -2253,6 +2237,7 @@ do {									\
                }
        }
+out:
        return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
 #undef save_arg
 }
@@ -2286,7 +2271,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
        char *str, *end;
        const char *args = (const char *)bin_buf;
-        if (WARN_ON_ONCE((int) size < 0))
+        if (WARN_ON_ONCE(size > INT_MAX))
                return 0;
        str = buf;
@@ -2375,12 +2360,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
                        break;
                case FORMAT_TYPE_PERCENT_CHAR:
-                case FORMAT_TYPE_INVALID:
                        if (str < end)
                                *str = '%';
                        ++str;
                        break;
+                case FORMAT_TYPE_INVALID:
+                        goto out;
                default: {
                        unsigned long long num;
@@ -2423,6 +2410,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
                } /* switch(spec.type) */
        } /* while(*fmt) */
+out:
        if (size > 0) {
                if (str < end)
                        *str = '\0';
diff --git a/mm/Kconfig b/mm/Kconfig
index 0d9fdcd01e47..97a4e06b15c0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE
        depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
        depends on MIGRATION
-#
-# If we have space for more page flags then we can enable additional
-# optimizations and functionality.
-#
-# Regular Sparsemem takes page flag bits for the sectionid if it does not
-# use a virtual memmap. Disable extended page flags for 32 bit platforms
-# that require the use of a sectionid in the page flags.
-#
-config PAGEFLAGS_EXTENDED
-        def_bool y
-        depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 619984fc07ec..8ed2ffd963c5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 {
        struct bdi_writeback *wb;
-        might_sleep_if(gfp & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(gfp));
        if (!memcg_css->parent)
                return &bdi->wb;
diff --git a/mm/debug.c b/mm/debug.c
index e784110fb51d..668aa35191ca 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = {
        {1UL << PG_private,             "private"       },
        {1UL << PG_private_2,           "private_2"     },
        {1UL << PG_writeback,           "writeback"     },
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
        {1UL << PG_head,                "head"          },
-        {1UL << PG_tail,                "tail"          },
-#else
-        {1UL << PG_compound,            "compound"      },
-#endif
        {1UL << PG_swapcache,           "swapcache"     },
        {1UL << PG_mappedtodisk,        "mappedtodisk"  },
        {1UL << PG_reclaim,             "reclaim"       },
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 312a716fa14c..57312b5d6e12 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
        size_t offset;
        void *retval;
-        might_sleep_if(mem_flags & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(mem_flags));
        spin_lock_irqsave(&pool->lock, flags);
        list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/failslab.c b/mm/failslab.c
index 98fb490311eb..79171b4a5826 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -3,11 +3,11 @@
 static struct {
        struct fault_attr attr;
-        bool ignore_gfp_wait;
+        bool ignore_gfp_reclaim;
        bool cache_filter;
 } failslab = {
        .attr = FAULT_ATTR_INITIALIZER,
-        .ignore_gfp_wait = true,
+        .ignore_gfp_reclaim = true,
        .cache_filter = false,
 };
@@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
        if (gfpflags & __GFP_NOFAIL)
                return false;
-        if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+        if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM))
                return false;
        if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
@@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void)
                return PTR_ERR(dir);
        if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-                                &failslab.ignore_gfp_wait))
+                                &failslab.ignore_gfp_reclaim))
                goto fail;
        if (!debugfs_create_bool("cache-filter", mode, dir,
                                &failslab.cache_filter))
diff --git a/mm/filemap.c b/mm/filemap.c
index 58e04e26f996..1bb007624b53 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1722,7 +1722,7 @@ no_cached_page:
                        goto out;
                }
                error = add_to_page_cache_lru(page, mapping, index,
-                                        GFP_KERNEL & mapping_gfp_mask(mapping));
+                                mapping_gfp_constraint(mapping, GFP_KERNEL));
                if (error) {
                        page_cache_release(page);
                        if (error == -EEXIST) {
@@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset)
                        return -ENOMEM;
                ret = add_to_page_cache_lru(page, mapping, offset,
-                                GFP_KERNEL & mapping_gfp_mask(mapping));
+                                mapping_gfp_constraint(mapping, GFP_KERNEL));
                if (ret == 0)
                        ret = mapping->a_ops->readpage(file, page);
                else if (ret == -EEXIST)
@@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter);
 * page is known to the local caching routines.
 *
 * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
+ * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
 *
 */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00cfd1ae2271..c29ddebc8705 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
        for_each_populated_zone(zone)
                nr_zones++;
-        /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
+        /* Ensure 2 pageblocks are free to assist fragmentation avoidance */
        recommended_min = pageblock_nr_pages * nr_zones * 2;
        /*
@@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 {
-        return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
+        return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
 }
 /* Caller must hold page table lock. */
@@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page,
                                      (1L << PG_unevictable)));
                page_tail->flags |= (1L << PG_dirty);
-                /* clear PageTail before overwriting first_page */
+                clear_compound_head(page_tail);
-                smp_wmb();
                if (page_is_young(page))
                        set_page_young(page_tail);
@@ -2413,8 +2412,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 static struct page *
 khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
-                       struct vm_area_struct *vma, unsigned long address,
+                       unsigned long address, int node)
-                       int node)
 {
        VM_BUG_ON_PAGE(*hpage, *hpage);
@@ -2481,8 +2479,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 static struct page *
 khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
-                       struct vm_area_struct *vma, unsigned long address,
+                       unsigned long address, int node)
-                       int node)
 {
        up_read(&mm->mmap_sem);
        VM_BUG_ON(!*hpage);
@@ -2530,7 +2527,7 @@ static void collapse_huge_page(struct mm_struct *mm,
                __GFP_THISNODE;
        /* release the mmap_sem read lock. */
-        new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
+        new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
        if (!new_page)
                return;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 74ef0c6a25dd..7ce07d681265 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -994,23 +994,22 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 #if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
 static void destroy_compound_gigantic_page(struct page *page,
-                                        unsigned long order)
+                                        unsigned int order)
 {
        int i;
        int nr_pages = 1 << order;
        struct page *p = page + 1;
        for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-                __ClearPageTail(p);
+                clear_compound_head(p);
                set_page_refcounted(p);
-                p->first_page = NULL;
        }
        set_compound_order(page, 0);
        __ClearPageHead(page);
 }
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
 {
        free_contig_range(page_to_pfn(page), 1 << order);
 }
@@ -1054,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
        return zone_spans_pfn(zone, last_pfn);
 }
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
 {
        unsigned long nr_pages = 1 << order;
        unsigned long ret, pfn, flags;
@@ -1090,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
 }
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
 static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
 {
@@ -1123,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
 static inline bool gigantic_page_supported(void) { return true; }
 #else
 static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
 static inline void destroy_compound_gigantic_page(struct page *page,
-                                                unsigned long order) { }
+                                                unsigned int order) { }
 static inline int alloc_fresh_gigantic_page(struct hstate *h,
                                        nodemask_t *nodes_allowed) { return 0; }
 #endif
@@ -1146,7 +1145,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
                                1 << PG_writeback);
        }
        VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-        set_compound_page_dtor(page, NULL);
+        set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
        set_page_refcounted(page);
        if (hstate_is_gigantic(h)) {
                destroy_compound_gigantic_page(page, huge_page_order(h));
@@ -1242,7 +1241,7 @@ void free_huge_page(struct page *page)
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
        INIT_LIST_HEAD(&page->lru);
-        set_compound_page_dtor(page, free_huge_page);
+        set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
        spin_lock(&hugetlb_lock);
        set_hugetlb_cgroup(page, NULL);
        h->nr_huge_pages++;
@@ -1251,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
        put_page(page); /* free it into the hugepage allocator */
 }
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
 {
        int i;
        int nr_pages = 1 << order;
@@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
                 */
                __ClearPageReserved(p);
                set_page_count(p, 0);
-                p->first_page = page;
+                set_compound_head(p, page);
-                /* Make sure p->first_page is always valid for PageTail() */
-                smp_wmb();
-                __SetPageTail(p);
        }
 }
@@ -1294,7 +1290,7 @@ int PageHuge(struct page *page)
                return 0;
        page = compound_head(page);
-        return get_compound_page_dtor(page) == free_huge_page;
+        return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
 }
 EXPORT_SYMBOL_GPL(PageHuge);
@@ -1568,7 +1564,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
        if (page) {
                INIT_LIST_HEAD(&page->lru);
                r_nid = page_to_nid(page);
-                set_compound_page_dtor(page, free_huge_page);
+                set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
                set_hugetlb_cgroup(page, NULL);
                /*
                 * We incremented the global counters already
@@ -1972,7 +1968,8 @@ found:
        return 1;
 }
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+                unsigned int order)
 {
        if (unlikely(order > (MAX_ORDER - 1)))
                prep_compound_gigantic_page(page, order);
@@ -2683,7 +2680,7 @@ static int __init hugetlb_init(void)
 module_init(hugetlb_init);
 /* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
 {
        struct hstate *h;
        unsigned long i;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 33d59abe91f1..d8fb10de0f14 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void)
                /*
                 * Add cgroup control files only if the huge page consists
                 * of more than two normal pages. This is because we use
-                 * page[2].lru.next for storing cgroup details.
+                 * page[2].private for storing cgroup details.
                 */
                if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
                        __hugetlb_cgroup_file_init(hstate_index(h));
diff --git a/mm/internal.h b/mm/internal.h
index d4b807d6c963..38e24b89e4c4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -14,6 +14,25 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
+/*
+ * The set of flags that only affect watermark checking and reclaim
+ * behaviour. This is used by the MM to obey the caller constraints
+ * about IO, FS and watermark checking while ignoring placement
+ * hints such as HIGHMEM usage.
+ */
+#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+                        __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+/* The GFP flags allowed during early boot */
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
+/* Control allocation cpuset and node placement constraints */
+#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
+/* Do not use these with a slab allocator */
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long floor, unsigned long ceiling);
@@ -61,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page,
         * speculative page access (like in
         * page_cache_get_speculative()) on tail pages.
         */
-        VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page);
+        VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
        if (get_page_head)
-                atomic_inc(&page->first_page->_count);
+                atomic_inc(&compound_head(page)->_count);
        get_huge_page_tail(page);
 }
@@ -129,6 +148,7 @@ struct alloc_context {
        int classzone_idx;
        int migratetype;
        enum zone_type high_zoneidx;
+        bool spread_dirty_pages;
 };
 /*
@@ -157,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
                                        unsigned int order);
-extern void prep_compound_page(struct page *page, unsigned long order);
+extern void prep_compound_page(struct page *page, unsigned int order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
@@ -215,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 * page cannot be allocated or merged in parallel. Alternatively, it must
 * handle invalid values gracefully, and use page_order_unsafe() below.
 */
-static inline unsigned long page_order(struct page *page)
+static inline unsigned int page_order(struct page *page)
 {
        /* PageBuddy() must be checked by the caller */
        return page_private(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc502e590366..9acfb165eb52 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2046,7 +2046,7 @@ retry:
        if (unlikely(task_in_memcg_oom(current)))
                goto nomem;
-        if (!(gfp_mask & __GFP_WAIT))
+        if (!gfpflags_allow_blocking(gfp_mask))
                goto nomem;
        mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -2120,7 +2120,7 @@ done_restock:
        /*
         * If the hierarchy is above the normal consumption range, schedule
         * reclaim on returning to userland.  We can perform reclaim here
-         * if __GFP_WAIT but let's always punt for simplicity and so that
+         * if __GFP_RECLAIM but let's always punt for simplicity and so that
         * GFP_KERNEL can consistently be used during reclaim.  @memcg is
         * not recorded as it most likely matches current's and won't
         * change in the meantime.  As high limit is checked again before
@@ -2801,7 +2801,7 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
        return val;
 }
-static inline unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
        unsigned long val;
@@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
 {
        int ret;
-        /* Try a single bulk charge without reclaim first */
+        /* Try a single bulk charge without reclaim first, kswapd may wake */
-        ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+        ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
        if (!ret) {
                mc.precharge += count;
                return ret;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 16a0ec385320..8424b64711ac 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 #define lru             (1UL << PG_lru)
 #define swapbacked      (1UL << PG_swapbacked)
 #define head            (1UL << PG_head)
-#define tail            (1UL << PG_tail)
-#define compound        (1UL << PG_compound)
 #define slab            (1UL << PG_slab)
 #define reserved        (1UL << PG_reserved)
@@ -800,12 +798,7 @@ static struct page_state {
         */
        { slab,         slab,           MF_MSG_SLAB,    me_kernel },
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
        { head,         head,           MF_MSG_HUGE,            me_huge_page },
-        { tail,         tail,           MF_MSG_HUGE,            me_huge_page },
-#else
-        { compound,     compound,       MF_MSG_HUGE,            me_huge_page },
-#endif
        { sc|dirty,     sc|dirty,       MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
        { sc|dirty,     sc,             MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
diff --git a/mm/mempool.c b/mm/mempool.c
index 4c533bc51d73..004d42b1dfaf 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
        gfp_t gfp_temp;
        VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
-        might_sleep_if(gfp_mask & __GFP_WAIT);
+        might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
        gfp_mask |= __GFP_NOMEMALLOC;   /* don't allocate emergency reserves */
        gfp_mask |= __GFP_NORETRY;      /* don't loop in __alloc_pages */
        gfp_mask |= __GFP_NOWARN;       /* failures are OK */
-        gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+        gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
 repeat_alloc:
@@ -349,7 +349,7 @@ repeat_alloc:
        }
        /*
-         * We use gfp mask w/o __GFP_WAIT or IO for the first round.  If
+         * We use gfp mask w/o direct reclaim or IO for the first round.  If
         * alloc failed with that and @pool was empty, retry immediately.
         */
        if (gfp_temp != gfp_mask) {
@@ -358,8 +358,8 @@ repeat_alloc:
                goto repeat_alloc;
        }
-        /* We must not sleep if !__GFP_WAIT */
+        /* We must not sleep if !__GFP_DIRECT_RECLAIM */
-        if (!(gfp_mask & __GFP_WAIT)) {
+        if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
                spin_unlock_irqrestore(&pool->lock, flags);
                return NULL;
        }
diff --git a/mm/migrate.c b/mm/migrate.c
index 2834faba719a..7890d0bb5e23 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
                                         (GFP_HIGHUSER_MOVABLE |
                                          __GFP_THISNODE | __GFP_NOMEMALLOC |
                                          __GFP_NORETRY | __GFP_NOWARN) &
-                                         ~GFP_IOFS, 0);
+                                         ~(__GFP_IO | __GFP_FS), 0);
        return newpage;
 }
@@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                goto out_dropref;
        new_page = alloc_pages_node(node,
-                (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+                (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
                HPAGE_PMD_ORDER);
        if (!new_page)
                goto out_fail;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index e4778285d8d1..d13a33918fa2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -118,6 +118,15 @@ found:
        return t;
 }
+/*
+ * order == -1 means the oom kill is required by sysrq, otherwise only
+ * for display purposes.
+ */
+static inline bool is_sysrq_oom(struct oom_control *oc)
+{
+        return oc->order == -1;
+}
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
                struct mem_cgroup *memcg, const nodemask_t *nodemask)
@@ -265,7 +274,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
         * Don't allow any other task to have access to the reserves.
         */
        if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-                if (oc->order != -1)
+                if (!is_sysrq_oom(oc))
                        return OOM_SCAN_ABORT;
        }
        if (!task->mm)
@@ -278,7 +287,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
        if (oom_task_origin(task))
                return OOM_SCAN_SELECT;
-        if (task_will_free_mem(task) && oc->order != -1)
+        if (task_will_free_mem(task) && !is_sysrq_oom(oc))
                return OOM_SCAN_ABORT;
        return OOM_SCAN_OK;
@@ -629,7 +638,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
                        return;
        }
        /* Do not panic for oom kills triggered by sysrq */
-        if (oc->order == -1)
+        if (is_sysrq_oom(oc))
                return;
        dump_header(oc, NULL, memcg);
        panic("Out of memory: %s panic_on_oom is enabled\n",
@@ -709,7 +718,7 @@ bool out_of_memory(struct oom_control *oc)
        p = select_bad_process(oc, &points, totalpages);
        /* Found nothing?!?! Either we hang forever, or we panic. */
-        if (!p && oc->order != -1) {
+        if (!p && !is_sysrq_oom(oc)) {
                dump_header(oc, NULL, NULL);
                panic("Out of memory and no killable processes...\n");
        }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 446bb36ee59d..208e4c7e771b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,19 +169,19 @@ void pm_restrict_gfp_mask(void)
        WARN_ON(!mutex_is_locked(&pm_mutex));
        WARN_ON(saved_gfp_mask);
        saved_gfp_mask = gfp_allowed_mask;
-        gfp_allowed_mask &= ~GFP_IOFS;
+        gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
 }
 bool pm_suspended_storage(void)
 {
-        if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+        if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
                return false;
        return true;
 }
 #endif /* CONFIG_PM_SLEEP */
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
-int pageblock_order __read_mostly;
+unsigned int pageblock_order __read_mostly;
 #endif
 static void __free_pages_ok(struct page *page, unsigned int order);
@@ -229,6 +229,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
 #endif
 };
+static void free_compound_page(struct page *page);
+compound_page_dtor * const compound_page_dtors[] = {
+        NULL,
+        free_compound_page,
+#ifdef CONFIG_HUGETLB_PAGE
+        free_huge_page,
+#endif
+};
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
@@ -436,15 +445,15 @@ out:
 /*
 * Higher-order pages are called "compound pages".  They are structured thusly:
 *
- * The first PAGE_SIZE page is called the "head page".
+ * The first PAGE_SIZE page is called the "head page" and have PG_head set.
 *
- * The remaining PAGE_SIZE pages are called "tail pages".
+ * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
+ * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
 *
- * All pages have PG_compound set.  All tail pages have their ->first_page
+ * The first tail page's ->compound_dtor holds the offset in array of compound
- * pointing at the head page.
+ * page destructors. See compound_page_dtors.
 *
- * The first tail page's ->lru.next holds the address of the compound page's
+ * The first tail page's ->compound_order holds the order of allocation.
- * put_page() function.  Its ->lru.prev holds the order of allocation.
 * This usage means that zero-order pages may not be compound.
 */
@@ -453,21 +462,18 @@ static void free_compound_page(struct page *page)
        __free_pages_ok(page, compound_order(page));
 }
-void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned int order)
 {
        int i;
        int nr_pages = 1 << order;
-        set_compound_page_dtor(page, free_compound_page);
+        set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
        set_compound_order(page, order);
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
                set_page_count(p, 0);
-                p->first_page = page;
+                set_compound_head(p, page);
-                /* Make sure p->first_page is always valid for PageTail() */
-                smp_wmb();
-                __SetPageTail(p);
        }
 }
@@ -656,7 +662,7 @@ static inline void __free_one_page(struct page *page,
        unsigned long combined_idx;
        unsigned long uninitialized_var(buddy_idx);
        struct page *buddy;
-        int max_order = MAX_ORDER;
+        unsigned int max_order = MAX_ORDER;
        VM_BUG_ON(!zone_is_initialized(zone));
        VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -669,7 +675,7 @@ static inline void __free_one_page(struct page *page,
                 * pageblock. Without this, pageblock isolation
                 * could cause incorrect freepage accounting.
                 */
-                max_order = min(MAX_ORDER, pageblock_order + 1);
+                max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
        } else {
                __mod_zone_freepage_state(zone, 1 << order, migratetype);
        }
@@ -817,7 +823,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        if (unlikely(has_isolate_pageblock(zone)))
                                mt = get_pageblock_migratetype(page);
-                        /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                        __free_one_page(page, page_to_pfn(page), zone, 0, mt);
                        trace_mm_page_pcpu_drain(page, 0, mt);
                } while (--to_free && --batch_free && !list_empty(list));
@@ -846,17 +851,30 @@ static void free_one_page(struct zone *zone,
 static int free_tail_pages_check(struct page *head_page, struct page *page)
 {
-        if (!IS_ENABLED(CONFIG_DEBUG_VM))
+        int ret = 1;
-                return 0;
+        /*
+         * We rely page->lru.next never has bit 0 set, unless the page
+         * is PageTail(). Let's make sure that's true even for poisoned ->lru.
+         */
+        BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
+        if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
+                ret = 0;
+                goto out;
+        }
        if (unlikely(!PageTail(page))) {
                bad_page(page, "PageTail not set", 0);
-                return 1;
+                goto out;
        }
-        if (unlikely(page->first_page != head_page)) {
+        if (unlikely(compound_head(page) != head_page)) {
-                bad_page(page, "first_page not consistent", 0);
+                bad_page(page, "compound_head not consistent", 0);
-                return 1;
+                goto out;
        }
-        return 0;
+        ret = 0;
+out:
+        clear_compound_head(page);
+        return ret;
 }
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -923,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
                        struct page *page = pfn_to_page(start_pfn);
                        init_reserved_page(start_pfn);
+                        /* Avoid false-positive PageTail() */
+                        INIT_LIST_HEAD(&page->lru);
                        SetPageReserved(page);
                }
        }
@@ -1417,15 +1439,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 * the free lists for the desirable migrate type are depleted
 */
 static int fallbacks[MIGRATE_TYPES][4] = {
-        [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
+        [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
-        [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
+        [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
-        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
+        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
 #ifdef CONFIG_CMA
-        [MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
+        [MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
 #endif
-        [MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
 #ifdef CONFIG_MEMORY_ISOLATION
-        [MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+        [MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
 #endif
 };
@@ -1450,7 +1471,7 @@ int move_freepages(struct zone *zone,
                          int migratetype)
 {
        struct page *page;
-        unsigned long order;
+        unsigned int order;
        int pages_moved = 0;
 #ifndef CONFIG_HOLES_IN_ZONE
@@ -1563,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
 static void steal_suitable_fallback(struct zone *zone, struct page *page,
                                                          int start_type)
 {
-        int current_order = page_order(page);
+        unsigned int current_order = page_order(page);
        int pages;
        /* Take ownership for orders >= pageblock_order */
@@ -1598,7 +1619,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
        *can_steal = false;
        for (i = 0;; i++) {
                fallback_mt = fallbacks[migratetype][i];
-                if (fallback_mt == MIGRATE_RESERVE)
+                if (fallback_mt == MIGRATE_TYPES)
                        break;
                if (list_empty(&area->free_list[fallback_mt]))
@@ -1617,6 +1638,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
        return -1;
 }
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+                                unsigned int alloc_order)
+{
+        int mt;
+        unsigned long max_managed, flags;
+        /*
+         * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+         * Check is race-prone but harmless.
+         */
+        max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+        if (zone->nr_reserved_highatomic >= max_managed)
+                return;
+        spin_lock_irqsave(&zone->lock, flags);
+        /* Recheck the nr_reserved_highatomic limit under the lock */
+        if (zone->nr_reserved_highatomic >= max_managed)
+                goto out_unlock;
+        /* Yoink! */
+        mt = get_pageblock_migratetype(page);
+        if (mt != MIGRATE_HIGHATOMIC &&
+                        !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+                zone->nr_reserved_highatomic += pageblock_nr_pages;
+                set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+                move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+        }
+out_unlock:
+        spin_unlock_irqrestore(&zone->lock, flags);
+}
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+        struct zonelist *zonelist = ac->zonelist;
+        unsigned long flags;
+        struct zoneref *z;
+        struct zone *zone;
+        struct page *page;
+        int order;
+        for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+                                                                ac->nodemask) {
+                /* Preserve at least one pageblock */
+                if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+                        continue;
+                spin_lock_irqsave(&zone->lock, flags);
+                for (order = 0; order < MAX_ORDER; order++) {
+                        struct free_area *area = &(zone->free_area[order]);
+                        if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+                                continue;
+                        page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+                                                struct page, lru);
+                        /*
+                         * It should never happen but changes to locking could
+                         * inadvertently allow a per-cpu drain to add pages
+                         * to MIGRATE_HIGHATOMIC while unreserving so be safe
+                         * and watch for underflows.
+                         */
+                        zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+                                zone->nr_reserved_highatomic);
+                        /*
+                         * Convert to ac->migratetype and avoid the normal
+                         * pageblock stealing heuristics. Minimally, the caller
+                         * is doing the work and needs the pages. More
+                         * importantly, if the block was always converted to
+                         * MIGRATE_UNMOVABLE or another type then the number
+                         * of pageblocks that cannot be completely freed
+                         * may increase.
+                         */
+                        set_pageblock_migratetype(page, ac->migratetype);
+                        move_freepages_block(zone, page, ac->migratetype);
+                        spin_unlock_irqrestore(&zone->lock, flags);
+                        return;
+                }
+                spin_unlock_irqrestore(&zone->lock, flags);
+        }
+}
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
 __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1672,29 +1788,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 * Call me with the zone->lock already held.
 */
 static struct page *__rmqueue(struct zone *zone, unsigned int order,
-                                                int migratetype)
+                                int migratetype, gfp_t gfp_flags)
 {
        struct page *page;
-retry_reserve:
        page = __rmqueue_smallest(zone, order, migratetype);
+        if (unlikely(!page)) {
-        if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
                if (migratetype == MIGRATE_MOVABLE)
                        page = __rmqueue_cma_fallback(zone, order);
                if (!page)
                        page = __rmqueue_fallback(zone, order, migratetype);
-                /*
-                 * Use MIGRATE_RESERVE rather than fail an allocation. goto
-                 * is used because __rmqueue_smallest is an inline function
-                 * and we want just one call site
-                 */
-                if (!page) {
-                        migratetype = MIGRATE_RESERVE;
-                        goto retry_reserve;
-                }
        }
        trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -1714,7 +1818,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
        spin_lock(&zone->lock);
        for (i = 0; i < count; ++i) {
-                struct page *page = __rmqueue(zone, order, migratetype);
+                struct page *page = __rmqueue(zone, order, migratetype, 0);
                if (unlikely(page == NULL))
                        break;
@@ -2086,7 +2190,7 @@ int split_free_page(struct page *page)
 static inline
 struct page *buffered_rmqueue(struct zone *preferred_zone,
                        struct zone *zone, unsigned int order,
-                        gfp_t gfp_flags, int migratetype)
+                        gfp_t gfp_flags, int alloc_flags, int migratetype)
 {
        unsigned long flags;
        struct page *page;
@@ -2129,7 +2233,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
                        WARN_ON_ONCE(order > 1);
                }
                spin_lock_irqsave(&zone->lock, flags);
-                page = __rmqueue(zone, order, migratetype);
+                page = NULL;
+                if (alloc_flags & ALLOC_HARDER) {
+                        page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+                        if (page)
+                                trace_mm_page_alloc_zone_locked(page, order, migratetype);
+                }
+                if (!page)
+                        page = __rmqueue(zone, order, migratetype, gfp_flags);
                spin_unlock(&zone->lock);
                if (!page)
                        goto failed;
@@ -2160,11 +2272,11 @@ static struct {
        struct fault_attr attr;
        bool ignore_gfp_highmem;
-        bool ignore_gfp_wait;
+        bool ignore_gfp_reclaim;
        u32 min_order;
 } fail_page_alloc = {
        .attr = FAULT_ATTR_INITIALIZER,
-        .ignore_gfp_wait = true,
+        .ignore_gfp_reclaim = true,
        .ignore_gfp_highmem = true,
        .min_order = 1,
 };
@@ -2183,7 +2295,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
                return false;
        if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
                return false;
-        if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))
+        if (fail_page_alloc.ignore_gfp_reclaim &&
+                        (gfp_mask & __GFP_DIRECT_RECLAIM))
                return false;
        return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2202,7 +2315,7 @@ static int __init fail_page_alloc_debugfs(void)
                return PTR_ERR(dir);
        if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-                                &fail_page_alloc.ignore_gfp_wait))
+                                &fail_page_alloc.ignore_gfp_reclaim))
                goto fail;
        if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
                                &fail_page_alloc.ignore_gfp_highmem))
@@ -2232,42 +2345,77 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 /*
- * Return true if free pages are above 'mark'. This takes into account the order
+ * Return true if free base pages are above 'mark'. For high-order checks it
- * of the allocation.
+ * will return true of the order-0 watermark is reached and there is at least
+ * one free page of a suitable size. Checking now avoids taking the zone lock
+ * to check in the allocation paths if no pages are free.
 */
 static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                        unsigned long mark, int classzone_idx, int alloc_flags,
                        long free_pages)
 {
-        /* free_pages may go negative - that's OK */
        long min = mark;
        int o;
-        long free_cma = 0;
+        const int alloc_harder = (alloc_flags & ALLOC_HARDER);
+        /* free_pages may go negative - that's OK */
        free_pages -= (1 << order) - 1;
        if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
-        if (alloc_flags & ALLOC_HARDER)
+        /*
+         * If the caller does not have rights to ALLOC_HARDER then subtract
+         * the high-atomic reserves. This will over-estimate the size of the
+         * atomic reserve but it avoids a search.
+         */
+        if (likely(!alloc_harder))
+                free_pages -= z->nr_reserved_highatomic;
+        else
                min -= min / 4;
 #ifdef CONFIG_CMA
        /* If allocation can't use CMA areas don't use free CMA pages */
        if (!(alloc_flags & ALLOC_CMA))
-                free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
+                free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
-        if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
+        /*
+         * Check watermarks for an order-0 allocation request. If these
+         * are not met, then a high-order request also cannot go ahead
+         * even if a suitable page happened to be free.
+         */
+        if (free_pages <= min + z->lowmem_reserve[classzone_idx])
                return false;
-        for (o = 0; o < order; o++) {
-                /* At the next order, this order's pages become unavailable */
-                free_pages -= z->free_area[o].nr_free << o;
-                /* Require fewer higher order pages to be free */
+        /* If this is an order-0 request then the watermark is fine */
-                min >>= 1;
+        if (!order)
+                return true;
+        /* For a high-order request, check at least one suitable page is free */
+        for (o = order; o < MAX_ORDER; o++) {
+                struct free_area *area = &z->free_area[o];
+                int mt;
+                if (!area->nr_free)
+                        continue;
+                if (alloc_harder)
+                        return true;
+                for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
+                        if (!list_empty(&area->free_list[mt]))
+                                return true;
+                }
-                if (free_pages <= min)
+#ifdef CONFIG_CMA
-                        return false;
+                if ((alloc_flags & ALLOC_CMA) &&
+                    !list_empty(&area->free_list[MIGRATE_CMA])) {
+                        return true;
+                }
+#endif
        }
-        return true;
+        return false;
 }
 bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
@@ -2278,134 +2426,18 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 }
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-                        unsigned long mark, int classzone_idx, int alloc_flags)
+                        unsigned long mark, int classzone_idx)
 {
        long free_pages = zone_page_state(z, NR_FREE_PAGES);
        if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
                free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
-        return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+        return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
                                                                free_pages);
 }
 #ifdef CONFIG_NUMA
-/*
- * zlc_setup - Setup for "zonelist cache".  Uses cached zone data to
- * skip over zones that are not allowed by the cpuset, or that have
- * been recently (in last second) found to be nearly full.  See further
- * comments in mmzone.h.  Reduces cache footprint of zonelist scans
- * that have to skip over a lot of full or unallowed zones.
- *
- * If the zonelist cache is present in the passed zonelist, then
- * returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_states[N_MEMORY].)
- *
- * If the zonelist cache is not available for this zonelist, does
- * nothing and returns NULL.
- *
- * If the fullzones BITMAP in the zonelist cache is stale (more than
- * a second since last zap'd) then we zap it out (clear its bits.)
- *
- * We hold off even calling zlc_setup, until after we've checked the
- * first zone in the zonelist, on the theory that most allocations will
- * be satisfied from that first zone, so best to examine that zone as
- * quickly as we can.
- */
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-        struct zonelist_cache *zlc;     /* cached zonelist speedup info */
-        nodemask_t *allowednodes;       /* zonelist_cache approximation */
-        zlc = zonelist->zlcache_ptr;
-        if (!zlc)
-                return NULL;
-        if (time_after(jiffies, zlc->last_full_zap + HZ)) {
-                bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-                zlc->last_full_zap = jiffies;
-        }
-        allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
-                                        &cpuset_current_mems_allowed :
-                                        &node_states[N_MEMORY];
-        return allowednodes;
-}
-/*
- * Given 'z' scanning a zonelist, run a couple of quick checks to see
- * if it is worth looking at further for free memory:
- *  1) Check that the zone isn't thought to be full (doesn't have its
- *     bit set in the zonelist_cache fullzones BITMAP).
- *  2) Check that the zones node (obtained from the zonelist_cache
- *     z_to_n[] mapping) is allowed in the passed in allowednodes mask.
- * Return true (non-zero) if zone is worth looking at further, or
- * else return false (zero) if it is not.
- *
- * This check -ignores- the distinction between various watermarks,
- * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ...  If a zone is
- * found to be full for any variation of these watermarks, it will
- * be considered full for up to one second by all requests, unless
- * we are so low on memory on all allowed nodes that we are forced
- * into the second scan of the zonelist.
- *
- * In the second scan we ignore this zonelist cache and exactly
- * apply the watermarks to all zones, even it is slower to do so.
- * We are low on memory in the second scan, and should leave no stone
- * unturned looking for a free page.
- */
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-                                                nodemask_t *allowednodes)
-{
-        struct zonelist_cache *zlc;     /* cached zonelist speedup info */
-        int i;                          /* index of *z in zonelist zones */
-        int n;                          /* node that zone *z is on */
-        zlc = zonelist->zlcache_ptr;
-        if (!zlc)
-                return 1;
-        i = z - zonelist->_zonerefs;
-        n = zlc->z_to_n[i];
-        /* This zone is worth trying if it is allowed but not full */
-        return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);
-}
-/*
- * Given 'z' scanning a zonelist, set the corresponding bit in
- * zlc->fullzones, so that subsequent attempts to allocate a page
- * from that zone don't waste time re-examining it.
- */
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-        struct zonelist_cache *zlc;     /* cached zonelist speedup info */
-        int i;                          /* index of *z in zonelist zones */
-        zlc = zonelist->zlcache_ptr;
-        if (!zlc)
-                return;
-        i = z - zonelist->_zonerefs;
-        set_bit(i, zlc->fullzones);
-}
-/*
- * clear all zones full, called after direct reclaim makes progress so that
- * a zone that was recently full is not skipped over for up to a second
- */
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-        struct zonelist_cache *zlc;     /* cached zonelist speedup info */
-        zlc = zonelist->zlcache_ptr;
-        if (!zlc)
-                return;
-        bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-}
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
        return local_zone->node == zone->node;
@@ -2416,28 +2448,7 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
        return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
                                RECLAIM_DISTANCE;
 }
 #else   /* CONFIG_NUMA */
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-        return NULL;
-}
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-                                nodemask_t *allowednodes)
-{
-        return 1;
-}
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-}
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-}
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
        return true;
@@ -2447,7 +2458,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
        return true;
 }
 #endif  /* CONFIG_NUMA */
 static void reset_alloc_batches(struct zone *preferred_zone)
@@ -2474,11 +2484,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
        struct zoneref *z;
        struct page *page = NULL;
        struct zone *zone;
-        nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
-        int zlc_active = 0;             /* set if using zonelist_cache */
-        int did_zlc_setup = 0;          /* just call zlc_setup() one time */
-        bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
-                                (gfp_mask & __GFP_WRITE);
        int nr_fair_skipped = 0;
        bool zonelist_rescan;
@@ -2493,9 +2498,6 @@ zonelist_scan:
                                                                ac->nodemask) {
                unsigned long mark;
-                if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-                        !zlc_zone_worth_trying(zonelist, z, allowednodes))
-                                continue;
                if (cpusets_enabled() &&
                        (alloc_flags & ALLOC_CPUSET) &&
                        !cpuset_zone_allowed(zone, gfp_mask))
@@ -2533,14 +2535,14 @@ zonelist_scan:
                 *
                 * XXX: For now, allow allocations to potentially
                 * exceed the per-zone dirty limit in the slowpath
-                 * (ALLOC_WMARK_LOW unset) before going into reclaim,
+                 * (spread_dirty_pages unset) before going into reclaim,
                 * which is important when on a NUMA setup the allowed
                 * zones are together not big enough to reach the
                 * global limit.  The proper fix for these situations
                 * will require awareness of zones in the
                 * dirty-throttling and the flusher threads.
                 */
-                if (consider_zone_dirty && !zone_dirty_ok(zone))
+                if (ac->spread_dirty_pages && !zone_dirty_ok(zone))
                        continue;
                mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
@@ -2553,28 +2555,8 @@ zonelist_scan:
                        if (alloc_flags & ALLOC_NO_WATERMARKS)
                                goto try_this_zone;
-                        if (IS_ENABLED(CONFIG_NUMA) &&
-                                        !did_zlc_setup && nr_online_nodes > 1) {
-                                /*
-                                 * we do zlc_setup if there are multiple nodes
-                                 * and before considering the first zone allowed
-                                 * by the cpuset.
-                                 */
-                                allowednodes = zlc_setup(zonelist, alloc_flags);
-                                zlc_active = 1;
-                                did_zlc_setup = 1;
-                        }
                        if (zone_reclaim_mode == 0 ||
                            !zone_allows_reclaim(ac->preferred_zone, zone))
-                                goto this_zone_full;
-                        /*
-                         * As we may have just activated ZLC, check if the first
-                         * eligible zone has failed zone_reclaim recently.
-                         */
-                        if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-                                !zlc_zone_worth_trying(zonelist, z, allowednodes))
                                continue;
                        ret = zone_reclaim(zone, gfp_mask, order);
@@ -2591,34 +2573,26 @@ zonelist_scan:
                                                ac->classzone_idx, alloc_flags))
                                        goto try_this_zone;
-                                /*
-                                 * Failed to reclaim enough to meet watermark.
-                                 * Only mark the zone full if checking the min
-                                 * watermark or if we failed to reclaim just
-                                 * 1<<order pages or else the page allocator
-                                 * fastpath will prematurely mark zones full
-                                 * when the watermark is between the low and
-                                 * min watermarks.
-                                 */
-                                if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) ||
-                                    ret == ZONE_RECLAIM_SOME)
-                                        goto this_zone_full;
                                continue;
                        }
                }
 try_this_zone:
                page = buffered_rmqueue(ac->preferred_zone, zone, order,
-                                                gfp_mask, ac->migratetype);
+                                gfp_mask, alloc_flags, ac->migratetype);
                if (page) {
                        if (prep_new_page(page, order, gfp_mask, alloc_flags))
                                goto try_this_zone;
+                        /*
+                         * If this is a high-order atomic allocation then check
+                         * if the pageblock should be reserved for the future
+                         */
+                        if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+                                reserve_highatomic_pageblock(page, zone, order);
                        return page;
                }
-this_zone_full:
-                if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
-                        zlc_mark_zone_full(zonelist, z);
        }
        /*
@@ -2639,12 +2613,6 @@ this_zone_full:
                        zonelist_rescan = true;
        }
-        if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
-                /* Disable zlc cache for second zonelist scan */
-                zlc_active = 0;
-                zonelist_rescan = true;
-        }
        if (zonelist_rescan)
                goto zonelist_scan;
@@ -2669,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
                DEFAULT_RATELIMIT_INTERVAL,
                DEFAULT_RATELIMIT_BURST);
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
 {
        unsigned int filter = SHOW_MEM_FILTER_NODES;
@@ -2686,7 +2654,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
                if (test_thread_flag(TIF_MEMDIE) ||
                    (current->flags & (PF_MEMALLOC | PF_EXITING)))
                        filter &= ~SHOW_MEM_FILTER_NODES;
-        if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+        if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                filter &= ~SHOW_MEM_FILTER_NODES;
        if (fmt) {
@@ -2703,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
                va_end(args);
        }
-        pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+        pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
                current->comm, order, gfp_mask);
        dump_stack();
@@ -2889,19 +2857,17 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
        if (unlikely(!(*did_some_progress)))
                return NULL;
-        /* After successful reclaim, reconsider all zones for allocation */
-        if (IS_ENABLED(CONFIG_NUMA))
-                zlc_clear_zones_full(ac->zonelist);
 retry:
        page = get_page_from_freelist(gfp_mask, order,
                                        alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
        /*
         * If an allocation failed after direct reclaim, it could be because
-         * pages are pinned on the per-cpu lists. Drain them and try again
+         * pages are pinned on the per-cpu lists or in high alloc reserves.
+         * Shrink them them and try again
         */
        if (!page && !drained) {
+                unreserve_highatomic_pageblock(ac);
                drain_all_pages(NULL);
                drained = true;
                goto retry;
@@ -2946,7 +2912,6 @@ static inline int
 gfp_to_alloc_flags(gfp_t gfp_mask)
 {
        int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-        const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
        /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
        BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2955,11 +2920,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         * The caller may dip into page reserves a bit more if the caller
         * cannot run direct reclaim, or if the caller has realtime scheduling
         * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-         * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
+         * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
         */
        alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
-        if (atomic) {
+        if (gfp_mask & __GFP_ATOMIC) {
                /*
                 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
                 * if it can't schedule.
@@ -2996,11 +2961,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
        return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
 }
+static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
+{
+        return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
+}
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                                struct alloc_context *ac)
 {
-        const gfp_t wait = gfp_mask & __GFP_WAIT;
+        bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
        struct page *page = NULL;
        int alloc_flags;
        unsigned long pages_reclaimed = 0;
@@ -3021,15 +2991,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        }
        /*
+         * We also sanity check to catch abuse of atomic reserves being used by
+         * callers that are not in atomic context.
+         */
+        if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+                                (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+                gfp_mask &= ~__GFP_ATOMIC;
+        /*
         * If this allocation cannot block and it is for a specific node, then
         * fail early.  There's no need to wakeup kswapd or retry for a
         * speculative node-specific allocation.
         */
-        if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
+        if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
                goto nopage;
 retry:
-        if (!(gfp_mask & __GFP_NO_KSWAPD))
+        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
                wake_all_kswapds(order, ac);
        /*
@@ -3072,8 +3050,8 @@ retry:
                }
        }
-        /* Atomic allocations - we can't balance anything */
+        /* Caller is not willing to reclaim, we can't balance anything */
-        if (!wait) {
+        if (!can_direct_reclaim) {
                /*
                 * All existing users of the deprecated __GFP_NOFAIL are
                 * blockable, so warn of any new users that actually allow this
@@ -3103,7 +3081,7 @@ retry:
                goto got_pg;
        /* Checks for THP-specific high-order allocations */
-        if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+        if (is_thp_gfp_mask(gfp_mask)) {
                /*
                 * If compaction is deferred for high-order allocations, it is
                 * because sync compaction recently failed. If this is the case
@@ -3138,8 +3116,7 @@ retry:
         * fault, so use asynchronous memory compaction for THP unless it is
         * khugepaged trying to collapse.
         */
-        if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
+        if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
-                                                (current->flags & PF_KTHREAD))
                migration_mode = MIGRATE_SYNC_LIGHT;
        /* Try direct reclaim and then allocating */
@@ -3210,7 +3187,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        lockdep_trace_alloc(gfp_mask);
-        might_sleep_if(gfp_mask & __GFP_WAIT);
+        might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
        if (should_fail_alloc_page(gfp_mask, order))
                return NULL;
@@ -3231,6 +3208,10 @@ retry_cpuset:
        /* We set it here, as __alloc_pages_slowpath might have changed it */
        ac.zonelist = zonelist;
+        /* Dirty zone balancing only done in the fast path */
+        ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
        /* The preferred zone is used for statistics later */
        preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx,
                                ac.nodemask ? : &cpuset_current_mems_allowed,
@@ -3249,6 +3230,7 @@ retry_cpuset:
                 * complete.
                 */
                alloc_mask = memalloc_noio_flags(gfp_mask);
+                ac.spread_dirty_pages = false;
                page = __alloc_pages_slowpath(alloc_mask, order, &ac);
        }
@@ -3467,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
        }
 }
-static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+static void *make_alloc_exact(unsigned long addr, unsigned int order,
+                size_t size)
 {
        if (addr) {
                unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3517,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
 */
 void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
-        unsigned order = get_order(size);
+        unsigned int order = get_order(size);
        struct page *p = alloc_pages_node(nid, gfp_mask, order);
        if (!p)
                return NULL;
@@ -3666,7 +3649,6 @@ static void show_migration_types(unsigned char type)
                [MIGRATE_UNMOVABLE]     = 'U',
                [MIGRATE_RECLAIMABLE]   = 'E',
                [MIGRATE_MOVABLE]       = 'M',
-                [MIGRATE_RESERVE]       = 'R',
 #ifdef CONFIG_CMA
                [MIGRATE_CMA]           = 'C',
 #endif
@@ -3819,7 +3801,8 @@ void show_free_areas(unsigned int filter)
        }
        for_each_populated_zone(zone) {
-                unsigned long nr[MAX_ORDER], flags, order, total = 0;
+                unsigned int order;
+                unsigned long nr[MAX_ORDER], flags, total = 0;
                unsigned char types[MAX_ORDER];
                if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4168,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat)
        nodemask_t used_mask;
        int local_node, prev_node;
        struct zonelist *zonelist;
-        int order = current_zonelist_order;
+        unsigned int order = current_zonelist_order;
        /* initialize zonelists */
        for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -4212,20 +4195,6 @@ static void build_zonelists(pg_data_t *pgdat)
        build_thisnode_zonelists(pgdat);
 }
-/* Construct the zonelist performance cache - see further mmzone.h */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-        struct zonelist *zonelist;
-        struct zonelist_cache *zlc;
-        struct zoneref *z;
-        zonelist = &pgdat->node_zonelists[0];
-        zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
-        bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-        for (z = zonelist->_zonerefs; z->zone; z++)
-                zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
-}
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 /*
 * Return node id of node used for "local" allocations.
@@ -4286,12 +4255,6 @@ static void build_zonelists(pg_data_t *pgdat)
        zonelist->_zonerefs[j].zone_idx = 0;
 }
-/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-        pgdat->node_zonelists[0].zlcache_ptr = NULL;
-}
 #endif  /* CONFIG_NUMA */
 /*
@@ -4332,14 +4295,12 @@ static int __build_all_zonelists(void *data)
        if (self && !node_online(self->node_id)) {
                build_zonelists(self);
-                build_zonelist_cache(self);
        }
        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);
                build_zonelists(pgdat);
-                build_zonelist_cache(pgdat);
        }
        /*
@@ -4499,120 +4460,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
 }
 /*
- * Check if a pageblock contains reserved pages
- */
-static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
-{
-        unsigned long pfn;
-        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-                if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
-                        return 1;
-        }
-        return 0;
-}
-/*
- * Mark a number of pageblocks as MIGRATE_RESERVE. The number
- * of blocks reserved is based on min_wmark_pages(zone). The memory within
- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
- * higher will lead to a bigger reserve which will get freed as contiguous
- * blocks as reclaim kicks in
- */
-static void setup_zone_migrate_reserve(struct zone *zone)
-{
-        unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
-        struct page *page;
-        unsigned long block_migratetype;
-        int reserve;
-        int old_reserve;
-        /*
-         * Get the start pfn, end pfn and the number of blocks to reserve
-         * We have to be careful to be aligned to pageblock_nr_pages to
-         * make sure that we always check pfn_valid for the first page in
-         * the block.
-         */
-        start_pfn = zone->zone_start_pfn;
-        end_pfn = zone_end_pfn(zone);
-        start_pfn = roundup(start_pfn, pageblock_nr_pages);
-        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
-                                                        pageblock_order;
-        /*
-         * Reserve blocks are generally in place to help high-order atomic
-         * allocations that are short-lived. A min_free_kbytes value that
-         * would result in more than 2 reserve blocks for atomic allocations
-         * is assumed to be in place to help anti-fragmentation for the
-         * future allocation of hugepages at runtime.
-         */
-        reserve = min(2, reserve);
-        old_reserve = zone->nr_migrate_reserve_block;
-        /* When memory hot-add, we almost always need to do nothing */
-        if (reserve == old_reserve)
-                return;
-        zone->nr_migrate_reserve_block = reserve;
-        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-                if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
-                        return;
-                if (!pfn_valid(pfn))
-                        continue;
-                page = pfn_to_page(pfn);
-                /* Watch out for overlapping nodes */
-                if (page_to_nid(page) != zone_to_nid(zone))
-                        continue;
-                block_migratetype = get_pageblock_migratetype(page);
-                /* Only test what is necessary when the reserves are not met */
-                if (reserve > 0) {
-                        /*
-                         * Blocks with reserved pages will never free, skip
-                         * them.
-                         */
-                        block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
-                        if (pageblock_is_reserved(pfn, block_end_pfn))
-                                continue;
-                        /* If this block is reserved, account for it */
-                        if (block_migratetype == MIGRATE_RESERVE) {
-                                reserve--;
-                                continue;
-                        }
-                        /* Suitable for reserving if this block is movable */
-                        if (block_migratetype == MIGRATE_MOVABLE) {
-                                set_pageblock_migratetype(page,
-                                                        MIGRATE_RESERVE);
-                                move_freepages_block(zone, page,
-                                                        MIGRATE_RESERVE);
-                                reserve--;
-                                continue;
-                        }
-                } else if (!old_reserve) {
-                        /*
-                         * At boot time we don't need to scan the whole zone
-                         * for turning off MIGRATE_RESERVE.
-                         */
-                        break;
-                }
-                /*
-                 * If the reserve is met and this is a previous reserved block,
-                 * take it back
-                 */
-                if (block_migratetype == MIGRATE_RESERVE) {
-                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-                        move_freepages_block(zone, page, MIGRATE_MOVABLE);
-                }
-        }
-}
-/*
 * Initially all pages are reserved - free ones are freed
 * up by free_all_bootmem() once the early boot process is
 * done. Non-atomic initialization, single-pass.
@@ -4651,9 +4498,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 * movable at startup. This will force kernel allocations
                 * to reserve their blocks rather than leaking throughout
                 * the address space during boot when many long-lived
-                 * kernel allocations are made. Later some blocks near
+                 * kernel allocations are made.
-                 * the start are marked MIGRATE_RESERVE by
-                 * setup_zone_migrate_reserve()
                 *
                 * bitmap is created for zone's valid pfn range. but memmap
                 * can be created for invalid pages (for alignment)
@@ -6214,7 +6059,6 @@ static void __setup_per_zone_wmarks(void)
                        high_wmark_pages(zone) - low_wmark_pages(zone) -
                        atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-                setup_zone_migrate_reserve(zone);
                spin_unlock_irqrestore(&zone->lock, flags);
        }
@@ -6836,7 +6680,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
                       unsigned migratetype)
 {
        unsigned long outer_start, outer_end;
-        int ret = 0, order;
+        unsigned int order;
+        int ret = 0;
        struct compact_control cc = {
                .nr_migratepages = 0,
diff --git a/mm/readahead.c b/mm/readahead.c
index 998ad592f408..ba22d7fe0afb 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
                page = list_to_page(pages);
                list_del(&page->lru);
                if (add_to_page_cache_lru(page, mapping, page->index,
-                                GFP_KERNEL & mapping_gfp_mask(mapping))) {
+                                mapping_gfp_constraint(mapping, GFP_KERNEL))) {
                        read_cache_pages_invalidate_page(mapping, page);
                        continue;
                }
@@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
                struct page *page = list_to_page(pages);
                list_del(&page->lru);
                if (!add_to_page_cache_lru(page, mapping, page->index,
-                                GFP_KERNEL & mapping_gfp_mask(mapping))) {
+                                mapping_gfp_constraint(mapping, GFP_KERNEL))) {
                        mapping->a_ops->readpage(filp, page);
                }
                page_cache_release(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b8b73928398..9187eee4128b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -73,6 +73,8 @@ static struct vfsmount *shm_mnt;
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include "internal.h"
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
diff --git a/mm/slab.c b/mm/slab.c
index 272e809404d5..e0819fa96559 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 }
 /*
- * Construct gfp mask to allocate from a specific node but do not invoke reclaim
+ * Construct gfp mask to allocate from a specific node but do not direct reclaim
- * or warn about failures.
+ * or warn about failures. kswapd may still wake to reclaim in the background.
 */
 static inline gfp_t gfp_exact_node(gfp_t flags)
 {
-        return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
+        return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
 }
 #endif
@@ -1889,21 +1889,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
        freelist = page->freelist;
        slab_destroy_debugcheck(cachep, page);
-        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
+        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
-                struct rcu_head *head;
+                call_rcu(&page->rcu_head, kmem_rcu_free);
+        else
-                /*
-                 * RCU free overloads the RCU head over the LRU.
-                 * slab_page has been overloeaded over the LRU,
-                 * however it is not used from now on so that
-                 * we can use it safely.
-                 */
-                head = (void *)&page->rcu_head;
-                call_rcu(head, kmem_rcu_free);
-        } else {
                kmem_freepages(cachep, page);
-        }
        /*
         * From now on, we don't use freelist
@@ -2633,7 +2622,7 @@ static int cache_grow(struct kmem_cache *cachep,
        offset *= cachep->colour_off;
-        if (local_flags & __GFP_WAIT)
+        if (gfpflags_allow_blocking(local_flags))
                local_irq_enable();
        /*
@@ -2663,7 +2652,7 @@ static int cache_grow(struct kmem_cache *cachep,
        cache_init_objs(cachep, page);
-        if (local_flags & __GFP_WAIT)
+        if (gfpflags_allow_blocking(local_flags))
                local_irq_disable();
        check_irq_off();
        spin_lock(&n->list_lock);
@@ -2677,7 +2666,7 @@ static int cache_grow(struct kmem_cache *cachep,
 opps1:
        kmem_freepages(cachep, page);
 failed:
-        if (local_flags & __GFP_WAIT)
+        if (gfpflags_allow_blocking(local_flags))
                local_irq_disable();
        return 0;
 }
@@ -2869,7 +2858,7 @@ force_grow:
 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
                                                gfp_t flags)
 {
-        might_sleep_if(flags & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(flags));
 #if DEBUG
        kmem_flagcheck(cachep, flags);
 #endif
@@ -3057,11 +3046,11 @@ retry:
                 */
                struct page *page;
-                if (local_flags & __GFP_WAIT)
+                if (gfpflags_allow_blocking(local_flags))
                        local_irq_enable();
                kmem_flagcheck(cache, flags);
                page = kmem_getpages(cache, local_flags, numa_mem_id());
-                if (local_flags & __GFP_WAIT)
+                if (gfpflags_allow_blocking(local_flags))
                        local_irq_disable();
                if (page) {
                        /*
diff --git a/mm/slub.c b/mm/slub.c
index 75a5fa92ac2a..7cb4bf9ae320 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1265,7 +1265,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 {
        flags &= gfp_allowed_mask;
        lockdep_trace_alloc(flags);
-        might_sleep_if(flags & __GFP_WAIT);
+        might_sleep_if(gfpflags_allow_blocking(flags));
        if (should_failslab(s->object_size, flags, s->flags))
                return NULL;
@@ -1353,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
        flags &= gfp_allowed_mask;
-        if (flags & __GFP_WAIT)
+        if (gfpflags_allow_blocking(flags))
                local_irq_enable();
        flags |= s->allocflags;
@@ -1363,8 +1363,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
         * so we fall-back to the minimum order allocation.
         */
        alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
-        if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min))
+        if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
-                alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT;
+                alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
        page = alloc_slab_page(s, alloc_gfp, node, oo);
        if (unlikely(!page)) {
@@ -1424,7 +1424,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
        page->frozen = 1;
 out:
-        if (flags & __GFP_WAIT)
+        if (gfpflags_allow_blocking(flags))
                local_irq_disable();
        if (!page)
                return NULL;
@@ -1507,10 +1507,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)
                        VM_BUG_ON(s->reserved != sizeof(*head));
                        head = page_address(page) + offset;
                } else {
-                        /*
+                        head = &page->rcu_head;
-                         * RCU free overloads the RCU head over the LRU
-                         */
-                        head = (void *)&page->lru;
                }
                call_rcu(head, rcu_free_slab);
diff --git a/mm/swap.c b/mm/swap.c
index 983f692a47fd..39395fb549c0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -201,7 +201,7 @@ out_put_single:
                                __put_single_page(page);
                        return;
                }
-                VM_BUG_ON_PAGE(page_head != page->first_page, page);
+                VM_BUG_ON_PAGE(page_head != compound_head(page), page);
                /*
                 * We can release the refcount taken by
                 * get_page_unless_zero() now that
@@ -262,7 +262,7 @@ static void put_compound_page(struct page *page)
         *  Case 3 is possible, as we may race with
         *  __split_huge_page_refcount tearing down a THP page.
         */
-        page_head = compound_head_by_tail(page);
+        page_head = compound_head(page);
        if (!__compound_tail_refcounted(page_head))
                put_unrefcounted_compound_page(page_head, page);
        else
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9db9ef5e8481..d04563480c94 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,6 +35,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
+#include "internal.h"
 struct vfree_deferred {
        struct llist_head list;
        struct work_struct wq;
@@ -1617,7 +1619,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                        goto fail;
                }
                area->pages[i] = page;
-                if (gfp_mask & __GFP_WAIT)
+                if (gfpflags_allow_blocking(gfp_mask))
                        cond_resched();
        }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55721b619aee..2aec4241b42a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file,
         * won't get blocked by normal direct-reclaimers, forming a circular
         * deadlock.
         */
-        if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
+        if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
                inactive >>= 3;
        return isolated > inactive;
@@ -2477,7 +2477,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
        balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                        zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
        watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
-        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0);
        /*
         * If compaction is deferred, reclaim up to a point where
@@ -2960,7 +2960,7 @@ static bool zone_balanced(struct zone *zone, int order,
                          unsigned long balance_gap, int classzone_idx)
 {
        if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
-                                    balance_gap, classzone_idx, 0))
+                                    balance_gap, classzone_idx))
                return false;
        if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
@@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        /*
         * Do not scan if the allocation should not be delayed.
         */
-        if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
+        if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
                return ZONE_RECLAIM_NOSCAN;
        /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ffcb4f58bf3e..879a2be23325 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,7 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
        "Unmovable",
        "Reclaimable",
        "Movable",
-        "Reserve",
+        "HighAtomic",
 #ifdef CONFIG_CMA
        "CMA",
 #endif
diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdff9d5..d8a181fd779b 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -137,7 +137,7 @@ static const struct zbud_ops zbud_zpool_ops = {
        .evict =        zbud_zpool_evict
 };
-static void *zbud_zpool_create(char *name, gfp_t gfp,
+static void *zbud_zpool_create(const char *name, gfp_t gfp,
                               const struct zpool_ops *zpool_ops,
                               struct zpool *zpool)
 {
diff --git a/mm/zpool.c b/mm/zpool.c
index 8f670d3e8706..fd3ff719c32c 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -18,8 +18,6 @@
 #include <linux/zpool.h>
 struct zpool {
-        char *type;
        struct zpool_driver *driver;
        void *pool;
        const struct zpool_ops *ops;
@@ -73,7 +71,8 @@ int zpool_unregister_driver(struct zpool_driver *driver)
 }
 EXPORT_SYMBOL(zpool_unregister_driver);
-static struct zpool_driver *zpool_get_driver(char *type)
+/* this assumes @type is null-terminated. */
+static struct zpool_driver *zpool_get_driver(const char *type)
 {
        struct zpool_driver *driver;
@@ -113,6 +112,8 @@ static void zpool_put_driver(struct zpool_driver *driver)
 * not be loaded, and calling @zpool_create_pool() with the pool type will
 * fail.
 *
+ * The @type string must be null-terminated.
+ *
 * Returns: true if @type pool is available, false if not
 */
 bool zpool_has_pool(char *type)
@@ -145,9 +146,11 @@ EXPORT_SYMBOL(zpool_has_pool);
 *
 * Implementations must guarantee this to be thread-safe.
 *
+ * The @type and @name strings must be null-terminated.
+ *
 * Returns: New zpool on success, NULL on failure.
 */
-struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
                const struct zpool_ops *ops)
 {
        struct zpool_driver *driver;
@@ -174,7 +177,6 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
                return NULL;
        }
-        zpool->type = driver->type;
        zpool->driver = driver;
        zpool->pool = driver->create(name, gfp, ops, zpool);
        zpool->ops = ops;
@@ -208,7 +210,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
 */
 void zpool_destroy_pool(struct zpool *zpool)
 {
-        pr_debug("destroying pool type %s\n", zpool->type);
+        pr_debug("destroying pool type %s\n", zpool->driver->type);
        spin_lock(&pools_lock);
        list_del(&zpool->list);
@@ -228,9 +230,9 @@ void zpool_destroy_pool(struct zpool *zpool)
 *
 * Returns: The type of zpool.
 */
-char *zpool_get_type(struct zpool *zpool)
+const char *zpool_get_type(struct zpool *zpool)
 {
-        return zpool->type;
+        return zpool->driver->type;
 }
 /**
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b6fcdc..9f15bdd9163c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -16,7 +16,7 @@
 * struct page(s) to form a zspage.
 *
 * Usage of struct page fields:
- *      page->first_page: points to the first component (0-order) page
+ *      page->private: points to the first component (0-order) page
 *      page->index (union with page->freelist): offset of the first object
 *              starting in this page. For the first page, this is
 *              always 0, so we use this field (aka freelist) to point
@@ -26,8 +26,7 @@
 *
 *      For _first_ page only:
 *
- *      page->private (union with page->first_page): refers to the
+ *      page->private: refers to the component page after the first page
- *              component page after the first page
 *              If the page is first_page for huge object, it stores handle.
 *              Look at size_class->huge.
 *      page->freelist: points to the first free object in zspage.
@@ -38,6 +37,7 @@
 *      page->lru: links together first pages of various zspages.
 *              Basically forming list of zspages in a fullness group.
 *      page->mapping: class index and fullness group of the zspage
+ *      page->inuse: the number of objects that are used in this zspage
 *
 * Usage of struct page flags:
 *      PG_private: identifies the first component page
@@ -58,7 +58,7 @@
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/vmalloc.h>
-#include <linux/hardirq.h>
+#include <linux/preempt.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/debugfs.h>
@@ -166,9 +166,14 @@ enum zs_stat_type {
        OBJ_USED,
        CLASS_ALMOST_FULL,
        CLASS_ALMOST_EMPTY,
-        NR_ZS_STAT_TYPE,
 };
+#ifdef CONFIG_ZSMALLOC_STAT
+#define NR_ZS_STAT_TYPE (CLASS_ALMOST_EMPTY + 1)
+#else
+#define NR_ZS_STAT_TYPE (OBJ_USED + 1)
+#endif
 struct zs_size_stat {
        unsigned long objs[NR_ZS_STAT_TYPE];
 };
@@ -237,7 +242,7 @@ struct link_free {
 };
 struct zs_pool {
-        char *name;
+        const char *name;
        struct size_class **size_class;
        struct kmem_cache *handle_cachep;
@@ -311,7 +316,7 @@ static void record_obj(unsigned long handle, unsigned long obj)
 #ifdef CONFIG_ZPOOL
-static void *zs_zpool_create(char *name, gfp_t gfp,
+static void *zs_zpool_create(const char *name, gfp_t gfp,
                             const struct zpool_ops *zpool_ops,
                             struct zpool *zpool)
 {
@@ -447,19 +452,23 @@ static int get_size_class_index(int size)
 static inline void zs_stat_inc(struct size_class *class,
                                enum zs_stat_type type, unsigned long cnt)
 {
-        class->stats.objs[type] += cnt;
+        if (type < NR_ZS_STAT_TYPE)
+                class->stats.objs[type] += cnt;
 }
 static inline void zs_stat_dec(struct size_class *class,
                                enum zs_stat_type type, unsigned long cnt)
 {
-        class->stats.objs[type] -= cnt;
+        if (type < NR_ZS_STAT_TYPE)
+                class->stats.objs[type] -= cnt;
 }
 static inline unsigned long zs_stat_get(struct size_class *class,
                                enum zs_stat_type type)
 {
-        return class->stats.objs[type];
+        if (type < NR_ZS_STAT_TYPE)
+                return class->stats.objs[type];
+        return 0;
 }
 #ifdef CONFIG_ZSMALLOC_STAT
@@ -548,7 +557,7 @@ static const struct file_operations zs_stat_size_ops = {
        .release        = single_release,
 };
-static int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
        struct dentry *entry;
@@ -588,7 +597,7 @@ static void __exit zs_stat_exit(void)
 {
 }
-static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
        return 0;
 }
@@ -764,7 +773,7 @@ static struct page *get_first_page(struct page *page)
        if (is_first_page(page))
                return page;
        else
-                return page->first_page;
+                return (struct page *)page_private(page);
 }
 static struct page *get_next_page(struct page *page)
@@ -824,7 +833,7 @@ static unsigned long obj_to_head(struct size_class *class, struct page *page,
 {
        if (class->huge) {
                VM_BUG_ON(!is_first_page(page));
-                return *(unsigned long *)page_private(page);
+                return page_private(page);
        } else
                return *(unsigned long *)obj;
 }
@@ -949,7 +958,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
         * Allocate individual pages and link them together as:
         * 1. first page->private = first sub-page
         * 2. all sub-pages are linked together using page->lru
-         * 3. each sub-page is linked to the first page using page->first_page
+         * 3. each sub-page is linked to the first page using page->private
         *
         * For each size class, First/Head pages are linked together using
         * page->lru. Also, we set PG_private to identify the first page
@@ -974,7 +983,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
                if (i == 1)
                        set_page_private(first_page, (unsigned long)page);
                if (i >= 1)
-                        page->first_page = first_page;
+                        set_page_private(page, (unsigned long)first_page);
                if (i >= 2)
                        list_add(&page->lru, &prev_page->lru);
                if (i == class->pages_per_zspage - 1)   /* last page */
@@ -1428,8 +1437,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class,
        struct page *first_page, *f_page;
        unsigned long f_objidx, f_offset;
        void *vaddr;
-        int class_idx;
-        enum fullness_group fullness;
        BUG_ON(!obj);
@@ -1437,7 +1444,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class,
        obj_to_location(obj, &f_page, &f_objidx);
        first_page = get_first_page(f_page);
-        get_zspage_mapping(first_page, &class_idx, &fullness);
        f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
        vaddr = kmap_atomic(f_page);
@@ -1822,9 +1828,6 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker,
        struct zs_pool *pool = container_of(shrinker, struct zs_pool,
                        shrinker);
-        if (!pool->shrinker_enabled)
-                return 0;
        for (i = zs_size_classes - 1; i >= 0; i--) {
                class = pool->size_class[i];
                if (!class)
@@ -1866,7 +1869,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
 * On success, a pointer to the newly created pool is returned,
 * otherwise NULL.
 */
-struct zs_pool *zs_create_pool(char *name, gfp_t flags)
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
 {
        int i;
        struct zs_pool *pool;
diff --git a/mm/zswap.c b/mm/zswap.c
index 4043df7c672f..025f8dc723de 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -82,33 +82,27 @@ module_param_named(enabled, zswap_enabled, bool, 0644);
 /* Crypto compressor to use */
 #define ZSWAP_COMPRESSOR_DEFAULT "lzo"
-static char zswap_compressor[CRYPTO_MAX_ALG_NAME] = ZSWAP_COMPRESSOR_DEFAULT;
+static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
-static struct kparam_string zswap_compressor_kparam = {
-        .string =       zswap_compressor,
-        .maxlen =       sizeof(zswap_compressor),
-};
 static int zswap_compressor_param_set(const char *,
                                      const struct kernel_param *);
 static struct kernel_param_ops zswap_compressor_param_ops = {
        .set =          zswap_compressor_param_set,
-        .get =          param_get_string,
+        .get =          param_get_charp,
+        .free =         param_free_charp,
 };
 module_param_cb(compressor, &zswap_compressor_param_ops,
-                &zswap_compressor_kparam, 0644);
+                &zswap_compressor, 0644);
 /* Compressed storage zpool to use */
 #define ZSWAP_ZPOOL_DEFAULT "zbud"
-static char zswap_zpool_type[32 /* arbitrary */] = ZSWAP_ZPOOL_DEFAULT;
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
-static struct kparam_string zswap_zpool_kparam = {
-        .string =       zswap_zpool_type,
-        .maxlen =       sizeof(zswap_zpool_type),
-};
 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
 static struct kernel_param_ops zswap_zpool_param_ops = {
-        .set =  zswap_zpool_param_set,
+        .set =          zswap_zpool_param_set,
-        .get =  param_get_string,
+        .get =          param_get_charp,
+        .free =         param_free_charp,
 };
-module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_kparam, 0644);
+module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
 /* The maximum percentage of memory that the compressed pool can occupy */
 static unsigned int zswap_max_pool_percent = 20;
@@ -342,7 +336,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
                                pgoff_t offset)
 {
-        struct zswap_entry *entry = NULL;
+        struct zswap_entry *entry;
        entry = zswap_rb_search(root, offset);
        if (entry)
@@ -571,7 +565,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
        struct zswap_pool *pool;
-        gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+        gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
        pool = kzalloc(sizeof(*pool), GFP_KERNEL);
        if (!pool) {
@@ -615,19 +609,29 @@ error:
        return NULL;
 }
-static struct zswap_pool *__zswap_pool_create_fallback(void)
+static __init struct zswap_pool *__zswap_pool_create_fallback(void)
 {
        if (!crypto_has_comp(zswap_compressor, 0, 0)) {
+                if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
+                        pr_err("default compressor %s not available\n",
+                               zswap_compressor);
+                        return NULL;
+                }
                pr_err("compressor %s not available, using default %s\n",
                       zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
-                strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
+                param_free_charp(&zswap_compressor);
-                        sizeof(zswap_compressor));
+                zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
        }
        if (!zpool_has_pool(zswap_zpool_type)) {
+                if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+                        pr_err("default zpool %s not available\n",
+                               zswap_zpool_type);
+                        return NULL;
+                }
                pr_err("zpool %s not available, using default %s\n",
                       zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
-                strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
+                param_free_charp(&zswap_zpool_type);
-                        sizeof(zswap_zpool_type));
+                zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
        }
        return zswap_pool_create(zswap_zpool_type, zswap_compressor);
@@ -684,43 +688,39 @@ static void zswap_pool_put(struct zswap_pool *pool)
 * param callbacks
 **********************************/
+/* val must be a null-terminated string */
 static int __zswap_param_set(const char *val, const struct kernel_param *kp,
                             char *type, char *compressor)
 {
        struct zswap_pool *pool, *put_pool = NULL;
-        char str[kp->str->maxlen], *s;
+        char *s = strstrip((char *)val);
        int ret;
-        /*
+        /* no change required */
-         * kp is either zswap_zpool_kparam or zswap_compressor_kparam, defined
+        if (!strcmp(s, *(char **)kp->arg))
-         * at the top of this file, so maxlen is CRYPTO_MAX_ALG_NAME (64) or
+                return 0;
-         * 32 (arbitrary).
-         */
-        strlcpy(str, val, kp->str->maxlen);
-        s = strim(str);
        /* if this is load-time (pre-init) param setting,
         * don't create a pool; that's done during init.
         */
        if (!zswap_init_started)
-                return param_set_copystring(s, kp);
+                return param_set_charp(s, kp);
-        /* no change required */
-        if (!strncmp(kp->str->string, s, kp->str->maxlen))
-                return 0;
        if (!type) {
-                type = s;
+                if (!zpool_has_pool(s)) {
-                if (!zpool_has_pool(type)) {
+                        pr_err("zpool %s not available\n", s);
-                        pr_err("zpool %s not available\n", type);
                        return -ENOENT;
                }
+                type = s;
        } else if (!compressor) {
-                compressor = s;
+                if (!crypto_has_comp(s, 0, 0)) {
-                if (!crypto_has_comp(compressor, 0, 0)) {
+                        pr_err("compressor %s not available\n", s);
-                        pr_err("compressor %s not available\n", compressor);
                        return -ENOENT;
                }
+                compressor = s;
+        } else {
+                WARN_ON(1);
+                return -EINVAL;
        }
        spin_lock(&zswap_pools_lock);
@@ -736,7 +736,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
        }
        if (pool)
-                ret = param_set_copystring(s, kp);
+                ret = param_set_charp(s, kp);
        else
                ret = -EINVAL;
@@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* store */
        len = dlen + sizeof(struct zswap_header);
        ret = zpool_malloc(entry->pool->zpool, len,
-                           __GFP_NORETRY | __GFP_NOWARN, &handle);
+                           __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
+                           &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
                goto put_dstmem;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
        len += NET_SKB_PAD;
        if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-            (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+            (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
                skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
                if (!skb)
                        goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
        len += NET_SKB_PAD + NET_IP_ALIGN;
        if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-            (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+            (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
                skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
                if (!skb)
                        goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
                return NULL;
        gfp_head = gfp_mask;
-        if (gfp_head & __GFP_WAIT)
+        if (gfp_head & __GFP_DIRECT_RECLAIM)
                gfp_head |= __GFP_REPEAT;
        *errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
                while (order) {
                        if (npages >= 1 << order) {
-                                page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+                                page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
                                                   __GFP_COMP |
                                                   __GFP_NOWARN |
                                                   __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1944,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
        pfrag->offset = 0;
        if (SKB_FRAG_PAGE_ORDER) {
-                pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
+                /* Avoid direct reclaim but allow kswapd to wake */
-                                          __GFP_NOWARN | __GFP_NORETRY,
+                pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+                                          __GFP_COMP | __GFP_NOWARN |
+                                          __GFP_NORETRY,
                                          SKB_FRAG_PAGE_ORDER);
                if (likely(pfrag->page)) {
                        pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
        consume_skb(info.skb2);
        if (info.delivered) {
-                if (info.congested && (allocation & __GFP_WAIT))
+                if (info.congested && gfpflags_allow_blocking(allocation))
                        yield();
                return 0;
        }
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
        gfp_t slab_mask = GFP_NOWAIT;
        gfp_t page_mask = GFP_NOWAIT;
-        if (gfp & __GFP_WAIT) {
+        if (gfp & __GFP_DIRECT_RECLAIM) {
                slab_mask = GFP_KERNEL;
                page_mask = GFP_HIGHUSER;
        }
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
        struct ib_recv_wr *failed_wr;
        unsigned int posted = 0;
        int ret = 0;
-        bool can_wait = !!(gfp & __GFP_WAIT);
+        bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
        u32 pos;
        /* the goal here is to just make sure that someone, somewhere
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
                if (bundle->num_conns >= 20) {
                        _debug("too many conns");
-                        if (!(gfp & __GFP_WAIT)) {
+                        if (!gfpflags_allow_blocking(gfp)) {
                                _leave(" = -EAGAIN");
                                return -EAGAIN;
                        }
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-        bool preload = !!(gfp & __GFP_WAIT);
+        bool preload = gfpflags_allow_blocking(gfp);
        int ret;
        /* If the id is already assigned, keep it. */
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f2a1131b2f8b..2b3c22808c3b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -370,6 +370,8 @@ our $typeTypedefs = qr{(?x:
        $typeKernelTypedefs\b
 )};
+our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b};
 our $logFunctions = qr{(?x:
        printk(?:_ratelimited|_once|)|
        (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
@@ -2313,42 +2315,43 @@ sub process {
                              "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
                }
+# Check if the commit log is in a possible stack dump
+                if ($in_commit_log && !$commit_log_possible_stack_dump &&
+                    ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
+                     $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
+                                        # timestamp
+                     $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
+                                        # stack dump address
+                        $commit_log_possible_stack_dump = 1;
+                }
 # Check for line lengths > 75 in commit log, warn once
                if ($in_commit_log && !$commit_log_long_line &&
-                   length($line) > 75 &&
+                    length($line) > 75 &&
-                   !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
+                    !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
-                                       # file delta changes
+                                        # file delta changes
-                     $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
+                      $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
-                                       # filename then :
+                                        # filename then :
-                     $line =~ /^\s*(?:Fixes:|Link:)/i ||
+                      $line =~ /^\s*(?:Fixes:|Link:)/i ||
-                                       # A Fixes: or Link: line
+                                        # A Fixes: or Link: line
-                     $commit_log_possible_stack_dump)) {
+                      $commit_log_possible_stack_dump)) {
                        WARN("COMMIT_LOG_LONG_LINE",
                             "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr);
                        $commit_log_long_line = 1;
                }
-# Check if the commit log is in a possible stack dump
-               if ($in_commit_log && !$commit_log_possible_stack_dump &&
-                   ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
-                    $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
-                               # timestamp
-                    $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
-                               # stack dump address
-                       $commit_log_possible_stack_dump = 1;
-               }
 # Reset possible stack dump if a blank line is found
-               if ($in_commit_log && $commit_log_possible_stack_dump &&
+                if ($in_commit_log && $commit_log_possible_stack_dump &&
-                   $line =~ /^\s*$/) {
+                    $line =~ /^\s*$/) {
-                       $commit_log_possible_stack_dump = 0;
+                        $commit_log_possible_stack_dump = 0;
-               }
+                }
 # Check for git id commit length and improperly formed commit descriptions
-                if ($in_commit_log &&
+                if ($in_commit_log && !$commit_log_possible_stack_dump &&
                    ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
-                    ($line =~ /\b[0-9a-f]{12,40}\b/i &&
+                     ($line =~ /\b[0-9a-f]{12,40}\b/i &&
-                     $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) {
+                      $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i &&
+                      $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) {
                        my $init_char = "c";
                        my $orig_commit = "";
                        my $short = 1;
@@ -3333,21 +3336,20 @@ sub process {
                }
 # check for global initialisers.
-                if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) {
+                if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/) {
                        if (ERROR("GLOBAL_INITIALISERS",
-                                  "do not initialise globals to 0 or NULL\n" .
+                                  "do not initialise globals to $1\n" . $herecurr) &&
-                                      $herecurr) &&
                            $fix) {
-                                $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                                $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*$zero_initializer\s*;/$1;/;
                        }
                }
 # check for static initialisers.
-                if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
+                if ($line =~ /^\+.*\bstatic\s.*=\s*($zero_initializer)\s*;/) {
                        if (ERROR("INITIALISED_STATIC",
-                                  "do not initialise statics to 0 or NULL\n" .
+                                  "do not initialise statics to $1\n" .
                                      $herecurr) &&
                            $fix) {
-                                $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                                $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*$zero_initializer\s*;/$1;/;
                        }
                }
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 98bae869f6d0..cab641a12dd5 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -781,6 +781,7 @@ MAINTAINER field selection options:
    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
    --git-blame => use git blame to find modified commits for patch or file
+    --git-blame-signatures => when used with --git-blame, also include all commit signers
    --git-since => git history to use (default: $email_git_since)
    --hg-since => hg history to use (default: $email_hg_since)
    --interactive => display a menu (mostly useful if used with the --git option)
@@ -812,7 +813,7 @@ Other options:
  --help => show this help information
 Default options:
-  [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0
+  [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
   --remove-duplicates --rolestats]
 Notes:
@@ -844,6 +845,9 @@ Notes:
      Entries in this file can be any command line argument.
      This file is prepended to any additional command line arguments.
      Multiple lines and # comments are allowed.
+  Most options have both positive and negative forms.
+      The negative forms for --<foo> are --no<foo> and --no-<foo>.
 EOT
 }
@@ -970,20 +974,29 @@ sub find_ending_index {
    return $index;
 }
-sub get_maintainer_role {
+sub get_subsystem_name {
    my ($index) = @_;
-    my $i;
    my $start = find_starting_index($index);
-    my $end = find_ending_index($index);
-    my $role = "unknown";
    my $subsystem = $typevalue[$start];
    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
        $subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
        $subsystem =~ s/\s*$//;
        $subsystem = $subsystem . "...";
    }
+    return $subsystem;
+}
+sub get_maintainer_role {
+    my ($index) = @_;
+    my $i;
+    my $start = find_starting_index($index);
+    my $end = find_ending_index($index);
+    my $role = "unknown";
+    my $subsystem = get_subsystem_name($index);
    for ($i = $start + 1; $i < $end; $i++) {
        my $tv = $typevalue[$i];
@@ -1017,16 +1030,7 @@ sub get_maintainer_role {
 sub get_list_role {
    my ($index) = @_;
-    my $i;
+    my $subsystem = get_subsystem_name($index);
-    my $start = find_starting_index($index);
-    my $end = find_ending_index($index);
-    my $subsystem = $typevalue[$start];
-    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
-        $subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
-        $subsystem =~ s/\s*$//;
-        $subsystem = $subsystem . "...";
-    }
    if ($subsystem eq "THE REST") {
        $subsystem = "";
@@ -1114,7 +1118,8 @@ sub add_categories {
                    }
                }
                if ($email_reviewer) {
-                    push_email_addresses($pvalue, 'reviewer');
+                    my $subsystem = get_subsystem_name($i);
+                    push_email_addresses($pvalue, "reviewer:$subsystem");
                }
            } elsif ($ptype eq "T") {
                push(@scm, $pvalue);
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index e24121afb2f2..6eb62936c672 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -126,7 +126,7 @@ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size,
 {
        void *ptr;
        int order = ima_maxorder;
-        gfp_t gfp_mask = __GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY;
+        gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY;
        if (order)
                order = min(get_order(max_size), order);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index cfe121353eec..4b4957b8df4e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -6,6 +6,7 @@ TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
 TARGETS += kcmp
+TARGETS += lib
 TARGETS += membarrier
 TARGETS += memfd
 TARGETS += memory-hotplug
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
new file mode 100644
index 000000000000..47147b968514
--- /dev/null
+++ b/tools/testing/selftests/lib/Makefile
@@ -0,0 +1,8 @@
+# Makefile for lib/ function selftests
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+TEST_PROGS := printf.sh
+include ../lib.mk
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
new file mode 100644
index 000000000000..4fdc70fe6980
--- /dev/null
+++ b/tools/testing/selftests/lib/printf.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs printf infrastructure using test_printf kernel module
+if /sbin/modprobe -q test_printf; then
+        /sbin/modprobe -q -r test_printf
+        echo "printf: ok"
+else
+        echo "printf: [FAIL]"
+        exit 1
+fi