aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/printk-formats.txt29
-rw-r--r--Documentation/vm/balance14
-rw-r--r--Documentation/vm/split_page_table_lock4
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm/mm/dma-mapping.c6
-rw-r--r--arch/arm/xen/mm.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/sh/kernel/cpu/sh5/unwind.c2
-rw-r--r--arch/sh/kernel/traps_64.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_msr.c7
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/xtensa/configs/iss_defconfig1
-rw-r--r--block/bio.c26
-rw-r--r--block/blk-core.c20
-rw-r--r--block/blk-ioc.c2
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--block/blk-mq.c6
-rw-r--r--block/ioprio.c6
-rw-r--r--block/scsi_ioctl.c6
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c3
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/pktcdvd.c4
-rw-r--r--drivers/block/zram/zram_drv.c28
-rw-r--r--drivers/connector/connector.c3
-rw-r--r--drivers/firewire/core-cdev.c2
-rw-r--r--drivers/gpu/drm/drm_gem.c2
-rw-r--r--drivers/gpu/drm/drm_lock.c41
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c5
-rw-r--r--drivers/ide/ide-atapi.c2
-rw-r--r--drivers/ide/ide-cd.c2
-rw-r--r--drivers/ide/ide-cd_ioctl.c2
-rw-r--r--drivers/ide/ide-devsets.c2
-rw-r--r--drivers/ide/ide-disk.c2
-rw-r--r--drivers/ide/ide-ioctls.c4
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-pm.c4
-rw-r--r--drivers/ide/ide-tape.c4
-rw-r--r--drivers/ide/ide-taskfile.c4
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c2
-rw-r--r--drivers/media/pci/solo6x10/solo6x10-v4l2.c2
-rw-r--r--drivers/media/pci/tw68/tw68-video.c2
-rw-r--r--drivers/misc/vmw_balloon.c2
-rw-r--r--drivers/mtd/mtdcore.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c2
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/scsi/scsi_error.c2
-rw-r--r--drivers/scsi/scsi_lib.c4
-rw-r--r--drivers/staging/android/ion/ion_system_heap.c2
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h2
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c2
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c2
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c2
-rw-r--r--drivers/staging/lustre/lustre/libcfs/module.c2
-rw-r--r--drivers/staging/lustre/lustre/libcfs/tracefile.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/remote_perm.c2
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c8
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c2
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c2
-rw-r--r--drivers/staging/rdma/hfi1/init.c2
-rw-r--r--drivers/staging/rdma/ipath/ipath_file_ops.c2
-rw-r--r--drivers/usb/gadget/function/f_mass_storage.c4
-rw-r--r--drivers/usb/host/u132-hcd.c2
-rw-r--r--drivers/video/fbdev/vermilion/vermilion.c2
-rw-r--r--fs/btrfs/compression.c7
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/ceph/addr.c7
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/coredump.c35
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/readpage.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fscache/cookie.c2
-rw-r--r--fs/fscache/page.c6
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/background.c7
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/nilfs2/alloc.c308
-rw-r--r--fs/nilfs2/alloc.h1
-rw-r--r--fs/nilfs2/btree.c7
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c6
-rw-r--r--fs/nilfs2/mdt.h2
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segment.c107
-rw-r--r--fs/nilfs2/segment.h3
-rw-r--r--fs/nilfs2/sufile.c11
-rw-r--r--fs/nilfs2/super.c17
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/proc/array.c10
-rw-r--r--fs/proc/fd.c14
-rw-r--r--fs/seq_file.c51
-rw-r--r--fs/splice.c2
-rw-r--r--fs/sync.c3
-rw-r--r--fs/xfs/xfs_qm.c2
-rw-r--r--include/drm/drmP.h1
-rw-r--r--include/linux/bitops.h13
-rw-r--r--include/linux/compiler-gcc.h5
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--include/linux/dma-mapping.h18
-rw-r--r--include/linux/gfp.h286
-rw-r--r--include/linux/hugetlb_cgroup.h4
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/mm.h82
-rw-r--r--include/linux/mm_types.h40
-rw-r--r--include/linux/mmzone.h101
-rw-r--r--include/linux/moduleparam.h1
-rw-r--r--include/linux/page-flags.h80
-rw-r--r--include/linux/pageblock-flags.h2
-rw-r--r--include/linux/pagemap.h7
-rw-r--r--include/linux/rbtree.h12
-rw-r--r--include/linux/sched.h28
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/linux/zpool.h8
-rw-r--r--include/linux/zsmalloc.h2
-rw-r--r--include/linux/zutil.h4
-rw-r--r--include/net/sock.h2
-rw-r--r--include/trace/events/gfpflags.h5
-rw-r--r--include/trace/events/nilfs2.h224
-rw-r--r--ipc/msgutil.c1
-rw-r--r--kernel/audit.c6
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kexec_core.c4
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/panic.c10
-rw-r--r--kernel/params.c3
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/power/swap.c16
-rw-r--r--kernel/printk/printk.c13
-rw-r--r--kernel/signal.c53
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/sys.c4
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/dma-debug.c8
-rw-r--r--lib/dynamic_debug.c8
-rw-r--r--lib/halfmd4.c3
-rw-r--r--lib/hexdump.c6
-rw-r--r--lib/idr.c4
-rw-r--r--lib/is_single_threaded.c5
-rw-r--r--lib/kasprintf.c16
-rw-r--r--lib/kobject.c30
-rw-r--r--lib/llist.c4
-rw-r--r--lib/percpu_ida.c2
-rw-r--r--lib/radix-tree.c10
-rw-r--r--lib/test-string_helpers.c36
-rw-r--r--lib/test_printf.c362
-rw-r--r--lib/vsprintf.c80
-rw-r--r--mm/Kconfig12
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/debug.c5
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/failslab.c8
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/huge_memory.c15
-rw-r--r--mm/hugetlb.c35
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/internal.h28
-rw-r--r--mm/memcontrol.c10
-rw-r--r--mm/memory-failure.c7
-rw-r--r--mm/mempool.c10
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page_alloc.c703
-rw-r--r--mm/readahead.c4
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slab.c35
-rw-r--r--mm/slub.c15
-rw-r--r--mm/swap.c4
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c8
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/zbud.c2
-rw-r--r--mm/zpool.c18
-rw-r--r--mm/zsmalloc.c49
-rw-r--r--mm/zswap.c87
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c6
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/sctp/associola.c2
-rwxr-xr-xscripts/checkpatch.pl66
-rwxr-xr-xscripts/get_maintainer.pl37
-rw-r--r--security/integrity/ima/ima_crypto.c2
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/lib/Makefile8
-rw-r--r--tools/testing/selftests/lib/printf.sh10
211 files changed, 2298 insertions, 1632 deletions
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 2216eb187c21..b784c270105f 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -23,6 +23,10 @@ Example:
23 23
24Reminder: sizeof() result is of type size_t. 24Reminder: sizeof() result is of type size_t.
25 25
26The kernel's printf does not support %n. For obvious reasons, floating
27point formats (%e, %f, %g, %a) are also not recognized. Use of any
28unsupported specifier or length qualifier results in a WARN and early
29return from vsnprintf.
26 30
27Raw pointer value SHOULD be printed with %p. The kernel supports 31Raw pointer value SHOULD be printed with %p. The kernel supports
28the following extended format specifiers for pointer types: 32the following extended format specifiers for pointer types:
@@ -119,6 +123,7 @@ Raw buffer as an escaped string:
119 If field width is omitted the 1 byte only will be escaped. 123 If field width is omitted the 1 byte only will be escaped.
120 124
121Raw buffer as a hex string: 125Raw buffer as a hex string:
126
122 %*ph 00 01 02 ... 3f 127 %*ph 00 01 02 ... 3f
123 %*phC 00:01:02: ... :3f 128 %*phC 00:01:02: ... :3f
124 %*phD 00-01-02- ... -3f 129 %*phD 00-01-02- ... -3f
@@ -234,6 +239,7 @@ UUID/GUID addresses:
234 Passed by reference. 239 Passed by reference.
235 240
236dentry names: 241dentry names:
242
237 %pd{,2,3,4} 243 %pd{,2,3,4}
238 %pD{,2,3,4} 244 %pD{,2,3,4}
239 245
@@ -256,6 +262,8 @@ struct va_format:
256 va_list *va; 262 va_list *va;
257 }; 263 };
258 264
265 Implements a "recursive vsnprintf".
266
259 Do not use this feature without some mechanism to verify the 267 Do not use this feature without some mechanism to verify the
260 correctness of the format string and va_list arguments. 268 correctness of the format string and va_list arguments.
261 269
@@ -284,6 +292,27 @@ bitmap and its derivatives such as cpumask and nodemask:
284 292
285 Passed by reference. 293 Passed by reference.
286 294
295Network device features:
296
297 %pNF 0x000000000000c000
298
299 For printing netdev_features_t.
300
301 Passed by reference.
302
303Command from struct task_struct
304
305 %pT ls
306
307 For printing executable name excluding path from struct
308 task_struct.
309
310 Passed by reference.
311
312If you add other %p extensions, please extend lib/test_printf.c with
313one or more test cases, if at all feasible.
314
315
287Thank you for your cooperation and attention. 316Thank you for your cooperation and attention.
288 317
289 318
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index c46e68cf9344..964595481af6 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -1,12 +1,14 @@
1Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com> 1Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
2 2
3Memory balancing is needed for non __GFP_WAIT as well as for non 3Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
4__GFP_IO allocations. 4well as for non __GFP_IO allocations.
5 5
6There are two reasons to be requesting non __GFP_WAIT allocations: 6The first reason why a caller may avoid reclaim is that the caller can not
7the caller can not sleep (typically intr context), or does not want 7sleep due to holding a spinlock or is in interrupt context. The second may
8to incur cost overheads of page stealing and possible swap io for 8be that the caller is willing to fail the allocation without incurring the
9whatever reasons. 9overhead of page reclaim. This may happen for opportunistic high-order
10allocation requests that have order-0 fallback options. In such cases,
11the caller may also wish to avoid waking kswapd.
10 12
11__GFP_IO allocation requests are made to prevent file system deadlocks. 13__GFP_IO allocation requests are made to prevent file system deadlocks.
12 14
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index 6dea4fd5c961..62842a857dab 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -54,8 +54,8 @@ everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
54which must be called on PTE table allocation / freeing. 54which must be called on PTE table allocation / freeing.
55 55
56Make sure the architecture doesn't use slab allocator for page table 56Make sure the architecture doesn't use slab allocator for page table
57allocation: slab uses page->slab_cache and page->first_page for its pages. 57allocation: slab uses page->slab_cache for its pages.
58These fields share storage with page->ptl. 58This field shares storage with page->ptl.
59 59
60PMD split lock only makes sense if you have more than two page table 60PMD split lock only makes sense if you have more than two page table
61levels. 61levels.
diff --git a/MAINTAINERS b/MAINTAINERS
index 4c5446a6a4a2..7af7f4a01f0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4209,7 +4209,10 @@ L: linux-kernel@vger.kernel.org
4209T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git 4209T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git
4210S: Maintained 4210S: Maintained
4211F: drivers/extcon/ 4211F: drivers/extcon/
4212F: include/linux/extcon/
4213F: include/linux/extcon.h
4212F: Documentation/extcon/ 4214F: Documentation/extcon/
4215F: Documentation/devicetree/bindings/extcon/
4213 4216
4214EXYNOS DP DRIVER 4217EXYNOS DP DRIVER
4215M: Jingoo Han <jingoohan1@gmail.com> 4218M: Jingoo Han <jingoohan1@gmail.com>
@@ -7490,6 +7493,7 @@ S: Supported
7490F: Documentation/filesystems/nilfs2.txt 7493F: Documentation/filesystems/nilfs2.txt
7491F: fs/nilfs2/ 7494F: fs/nilfs2/
7492F: include/linux/nilfs2_fs.h 7495F: include/linux/nilfs2_fs.h
7496F: include/trace/events/nilfs2.h
7493 7497
7494NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER 7498NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER
7495M: YOKOTA Hiroshi <yokota@netlab.is.tsukuba.ac.jp> 7499M: YOKOTA Hiroshi <yokota@netlab.is.tsukuba.ac.jp>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ad4eb2d26e16..e62400e5fb99 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
651 651
652 if (nommu()) 652 if (nommu())
653 addr = __alloc_simple_buffer(dev, size, gfp, &page); 653 addr = __alloc_simple_buffer(dev, size, gfp, &page);
654 else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT)) 654 else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
655 addr = __alloc_from_contiguous(dev, size, prot, &page, 655 addr = __alloc_from_contiguous(dev, size, prot, &page,
656 caller, want_vaddr); 656 caller, want_vaddr);
657 else if (is_coherent) 657 else if (is_coherent)
658 addr = __alloc_simple_buffer(dev, size, gfp, &page); 658 addr = __alloc_simple_buffer(dev, size, gfp, &page);
659 else if (!(gfp & __GFP_WAIT)) 659 else if (!gfpflags_allow_blocking(gfp))
660 addr = __alloc_from_pool(size, &page); 660 addr = __alloc_from_pool(size, &page);
661 else 661 else
662 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, 662 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1363 *handle = DMA_ERROR_CODE; 1363 *handle = DMA_ERROR_CODE;
1364 size = PAGE_ALIGN(size); 1364 size = PAGE_ALIGN(size);
1365 1365
1366 if (!(gfp & __GFP_WAIT)) 1366 if (!gfpflags_allow_blocking(gfp))
1367 return __iommu_alloc_atomic(dev, size, handle); 1367 return __iommu_alloc_atomic(dev, size, handle);
1368 1368
1369 /* 1369 /*
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7c34f7126b04..c5f9a9e3d1f3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -25,7 +25,7 @@
25unsigned long xen_get_swiotlb_free_pages(unsigned int order) 25unsigned long xen_get_swiotlb_free_pages(unsigned int order)
26{ 26{
27 struct memblock_region *reg; 27 struct memblock_region *reg;
28 gfp_t flags = __GFP_NOWARN; 28 gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
29 29
30 for_each_memblock(memory, reg) { 30 for_each_memblock(memory, reg) {
31 if (reg->base < (phys_addr_t)0xffffffff) { 31 if (reg->base < (phys_addr_t)0xffffffff) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6320361d8d4c..bb4bf6a06ad6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
100 if (IS_ENABLED(CONFIG_ZONE_DMA) && 100 if (IS_ENABLED(CONFIG_ZONE_DMA) &&
101 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 101 dev->coherent_dma_mask <= DMA_BIT_MASK(32))
102 flags |= GFP_DMA; 102 flags |= GFP_DMA;
103 if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { 103 if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
104 struct page *page; 104 struct page *page;
105 void *addr; 105 void *addr;
106 106
@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
148 148
149 size = PAGE_ALIGN(size); 149 size = PAGE_ALIGN(size);
150 150
151 if (!coherent && !(flags & __GFP_WAIT)) { 151 if (!coherent && !gfpflags_allow_blocking(flags)) {
152 struct page *page = NULL; 152 struct page *page = NULL;
153 void *addr = __alloc_from_pool(size, &page, flags); 153 void *addr = __alloc_from_pool(size, &page, flags);
154 154
diff --git a/arch/sh/kernel/cpu/sh5/unwind.c b/arch/sh/kernel/cpu/sh5/unwind.c
index 10aed41757fc..3a4fed406fc6 100644
--- a/arch/sh/kernel/cpu/sh5/unwind.c
+++ b/arch/sh/kernel/cpu/sh5/unwind.c
@@ -159,7 +159,7 @@ static int lookup_prev_stack_frame(unsigned long fp, unsigned long pc,
159 159
160 /* Sign extend */ 160 /* Sign extend */
161 regcache[dest] = 161 regcache[dest] =
162 ((((s64)(u64)op >> 10) & 0xffff) << 54) >> 54; 162 sign_extend64((((u64)op >> 10) & 0xffff), 9);
163 break; 163 break;
164 case (0xd0 >> 2): /* addi */ 164 case (0xd0 >> 2): /* addi */
165 case (0xd4 >> 2): /* addi.l */ 165 case (0xd4 >> 2): /* addi.l */
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 112ea11c030d..d208c27ccc67 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -101,7 +101,7 @@ static int generate_and_check_address(struct pt_regs *regs,
101 if (displacement_not_indexed) { 101 if (displacement_not_indexed) {
102 __s64 displacement; 102 __s64 displacement;
103 displacement = (opcode >> 10) & 0x3ff; 103 displacement = (opcode >> 10) & 0x3ff;
104 displacement = ((displacement << 54) >> 54); /* sign extend */ 104 displacement = sign_extend64(displacement, 9);
105 addr = (__u64)((__s64)base_address + (displacement << width_shift)); 105 addr = (__u64)((__s64)base_address + (displacement << width_shift));
106 } else { 106 } else {
107 __u64 offset; 107 __u64 offset;
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
index f32ac13934f2..ec863b9a9f78 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -163,10 +163,9 @@ again:
163 goto again; 163 goto again;
164 164
165 delta = now - prev; 165 delta = now - prev;
166 if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { 166 if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
167 delta <<= 32; 167 delta = sign_extend64(delta, 31);
168 delta >>= 32; /* sign extend */ 168
169 }
170 local64_add(now - prev, &event->count); 169 local64_add(now - prev, &event->count);
171} 170}
172 171
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cd99433b8ba1..6ba014c61d62 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
90again: 90again:
91 page = NULL; 91 page = NULL;
92 /* CMA can be used only in the context which permits sleeping */ 92 /* CMA can be used only in the context which permits sleeping */
93 if (flag & __GFP_WAIT) { 93 if (gfpflags_allow_blocking(flag)) {
94 page = dma_alloc_from_contiguous(dev, count, get_order(size)); 94 page = dma_alloc_from_contiguous(dev, count, get_order(size));
95 if (page && page_to_phys(page) + size > dma_mask) { 95 if (page && page_to_phys(page) + size > dma_mask) {
96 dma_release_from_contiguous(dev, page, count); 96 dma_release_from_contiguous(dev, page, count);
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index f3dfe0d921c2..44c6764d9146 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -169,7 +169,6 @@ CONFIG_FLATMEM_MANUAL=y
169# CONFIG_SPARSEMEM_MANUAL is not set 169# CONFIG_SPARSEMEM_MANUAL is not set
170CONFIG_FLATMEM=y 170CONFIG_FLATMEM=y
171CONFIG_FLAT_NODE_MEM_MAP=y 171CONFIG_FLAT_NODE_MEM_MAP=y
172CONFIG_PAGEFLAGS_EXTENDED=y
173CONFIG_SPLIT_PTLOCK_CPUS=4 172CONFIG_SPLIT_PTLOCK_CPUS=4
174# CONFIG_PHYS_ADDR_T_64BIT is not set 173# CONFIG_PHYS_ADDR_T_64BIT is not set
175CONFIG_ZONE_DMA_FLAG=1 174CONFIG_ZONE_DMA_FLAG=1
diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..4f184d938942 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -211,7 +211,7 @@ fallback:
211 bvl = mempool_alloc(pool, gfp_mask); 211 bvl = mempool_alloc(pool, gfp_mask);
212 } else { 212 } else {
213 struct biovec_slab *bvs = bvec_slabs + *idx; 213 struct biovec_slab *bvs = bvec_slabs + *idx;
214 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 214 gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
215 215
216 /* 216 /*
217 * Make this allocation restricted and don't dump info on 217 * Make this allocation restricted and don't dump info on
@@ -221,11 +221,11 @@ fallback:
221 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 221 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
222 222
223 /* 223 /*
224 * Try a slab allocation. If this fails and __GFP_WAIT 224 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
225 * is set, retry with the 1-entry mempool 225 * is set, retry with the 1-entry mempool
226 */ 226 */
227 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); 227 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
228 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { 228 if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
229 *idx = BIOVEC_MAX_IDX; 229 *idx = BIOVEC_MAX_IDX;
230 goto fallback; 230 goto fallback;
231 } 231 }
@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
395 * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is 395 * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
396 * backed by the @bs's mempool. 396 * backed by the @bs's mempool.
397 * 397 *
398 * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be 398 * When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
399 * able to allocate a bio. This is due to the mempool guarantees. To make this 399 * always be able to allocate a bio. This is due to the mempool guarantees.
400 * work, callers must never allocate more than 1 bio at a time from this pool. 400 * To make this work, callers must never allocate more than 1 bio at a time
401 * Callers that need to allocate more than 1 bio must always submit the 401 * from this pool. Callers that need to allocate more than 1 bio must always
402 * previously allocated bio for IO before attempting to allocate a new one. 402 * submit the previously allocated bio for IO before attempting to allocate
403 * Failure to do so can cause deadlocks under memory pressure. 403 * a new one. Failure to do so can cause deadlocks under memory pressure.
404 * 404 *
405 * Note that when running under generic_make_request() (i.e. any block 405 * Note that when running under generic_make_request() (i.e. any block
406 * driver), bios are not submitted until after you return - see the code in 406 * driver), bios are not submitted until after you return - see the code in
@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
459 * We solve this, and guarantee forward progress, with a rescuer 459 * We solve this, and guarantee forward progress, with a rescuer
460 * workqueue per bio_set. If we go to allocate and there are 460 * workqueue per bio_set. If we go to allocate and there are
461 * bios on current->bio_list, we first try the allocation 461 * bios on current->bio_list, we first try the allocation
462 * without __GFP_WAIT; if that fails, we punt those bios we 462 * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
463 * would be blocking to the rescuer workqueue before we retry 463 * bios we would be blocking to the rescuer workqueue before
464 * with the original gfp_flags. 464 * we retry with the original gfp_flags.
465 */ 465 */
466 466
467 if (current->bio_list && !bio_list_empty(current->bio_list)) 467 if (current->bio_list && !bio_list_empty(current->bio_list))
468 gfp_mask &= ~__GFP_WAIT; 468 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
469 469
470 p = mempool_alloc(bs->bio_pool, gfp_mask); 470 p = mempool_alloc(bs->bio_pool, gfp_mask);
471 if (!p && gfp_mask != saved_gfp) { 471 if (!p && gfp_mask != saved_gfp) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..590cca21c24a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
638 if (percpu_ref_tryget_live(&q->q_usage_counter)) 638 if (percpu_ref_tryget_live(&q->q_usage_counter))
639 return 0; 639 return 0;
640 640
641 if (!(gfp & __GFP_WAIT)) 641 if (!gfpflags_allow_blocking(gfp))
642 return -EBUSY; 642 return -EBUSY;
643 643
644 ret = wait_event_interruptible(q->mq_freeze_wq, 644 ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -1206,8 +1206,8 @@ rq_starved:
1206 * @bio: bio to allocate request for (can be %NULL) 1206 * @bio: bio to allocate request for (can be %NULL)
1207 * @gfp_mask: allocation mask 1207 * @gfp_mask: allocation mask
1208 * 1208 *
1209 * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this 1209 * Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
1210 * function keeps retrying under memory pressure and fails iff @q is dead. 1210 * this function keeps retrying under memory pressure and fails iff @q is dead.
1211 * 1211 *
1212 * Must be called with @q->queue_lock held and, 1212 * Must be called with @q->queue_lock held and,
1213 * Returns ERR_PTR on failure, with @q->queue_lock held. 1213 * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1227,7 +1227,7 @@ retry:
1227 if (!IS_ERR(rq)) 1227 if (!IS_ERR(rq))
1228 return rq; 1228 return rq;
1229 1229
1230 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { 1230 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1231 blk_put_rl(rl); 1231 blk_put_rl(rl);
1232 return rq; 1232 return rq;
1233 } 1233 }
@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
1305 * BUG. 1305 * BUG.
1306 * 1306 *
1307 * WARNING: When allocating/cloning a bio-chain, careful consideration should be 1307 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
1308 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for 1308 * given to how you allocate bios. In particular, you cannot use
1309 * anything but the first bio in the chain. Otherwise you risk waiting for IO 1309 * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
1310 * completion of a bio that hasn't been submitted yet, thus resulting in a 1310 * you risk waiting for IO completion of a bio that hasn't been submitted yet,
1311 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead 1311 * thus resulting in a deadlock. Alternatively bios should be allocated using
1312 * of bio_alloc(), as that avoids the mempool deadlock. 1312 * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
1313 * If possible a big IO should be split into smaller parts when allocation 1313 * If possible a big IO should be split into smaller parts when allocation
1314 * fails. Partial allocation should not be an error, or you risk a live-lock. 1314 * fails. Partial allocation should not be an error, or you risk a live-lock.
1315 */ 1315 */
@@ -2038,7 +2038,7 @@ void generic_make_request(struct bio *bio)
2038 do { 2038 do {
2039 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 2039 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2040 2040
2041 if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) { 2041 if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
2042 2042
2043 q->make_request_fn(q, bio); 2043 q->make_request_fn(q, bio);
2044 2044
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1a27f45ec776..381cb50a673c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
289{ 289{
290 struct io_context *ioc; 290 struct io_context *ioc;
291 291
292 might_sleep_if(gfp_flags & __GFP_WAIT); 292 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
293 293
294 do { 294 do {
295 task_lock(task); 295 task_lock(task);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60ac684c8b8c..a07ca3488d96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
268 if (tag != -1) 268 if (tag != -1)
269 return tag; 269 return tag;
270 270
271 if (!(data->gfp & __GFP_WAIT)) 271 if (!gfpflags_allow_blocking(data->gfp))
272 return -1; 272 return -1;
273 273
274 bs = bt_wait_ptr(bt, hctx); 274 bs = bt_wait_ptr(bt, hctx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..694f8703f83c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
244 244
245 ctx = blk_mq_get_ctx(q); 245 ctx = blk_mq_get_ctx(q);
246 hctx = q->mq_ops->map_queue(q, ctx->cpu); 246 hctx = q->mq_ops->map_queue(q, ctx->cpu);
247 blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT, 247 blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
248 reserved, ctx, hctx); 248 reserved, ctx, hctx);
249 249
250 rq = __blk_mq_alloc_request(&alloc_data, rw); 250 rq = __blk_mq_alloc_request(&alloc_data, rw);
251 if (!rq && (gfp & __GFP_WAIT)) { 251 if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
252 __blk_mq_run_hw_queue(hctx); 252 __blk_mq_run_hw_queue(hctx);
253 blk_mq_put_ctx(ctx); 253 blk_mq_put_ctx(ctx);
254 254
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
1186 ctx = blk_mq_get_ctx(q); 1186 ctx = blk_mq_get_ctx(q);
1187 hctx = q->mq_ops->map_queue(q, ctx->cpu); 1187 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1188 blk_mq_set_alloc_data(&alloc_data, q, 1188 blk_mq_set_alloc_data(&alloc_data, q,
1189 __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx); 1189 __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
1190 rq = __blk_mq_alloc_request(&alloc_data, rw); 1190 rq = __blk_mq_alloc_request(&alloc_data, rw);
1191 ctx = alloc_data.ctx; 1191 ctx = alloc_data.ctx;
1192 hctx = alloc_data.hctx; 1192 hctx = alloc_data.hctx;
diff --git a/block/ioprio.c b/block/ioprio.c
index 31666c92b46a..cc7800e9eb44 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -123,7 +123,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
123 break; 123 break;
124 124
125 do_each_thread(g, p) { 125 do_each_thread(g, p) {
126 if (!uid_eq(task_uid(p), uid)) 126 if (!uid_eq(task_uid(p), uid) ||
127 !task_pid_vnr(p))
127 continue; 128 continue;
128 ret = set_task_ioprio(p, ioprio); 129 ret = set_task_ioprio(p, ioprio);
129 if (ret) 130 if (ret)
@@ -220,7 +221,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
220 break; 221 break;
221 222
222 do_each_thread(g, p) { 223 do_each_thread(g, p) {
223 if (!uid_eq(task_uid(p), user->uid)) 224 if (!uid_eq(task_uid(p), user->uid) ||
225 !task_pid_vnr(p))
224 continue; 226 continue;
225 tmpio = get_task_ioprio(p); 227 tmpio = get_task_ioprio(p);
226 if (tmpio < 0) 228 if (tmpio < 0)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index dda653ce7b24..0774799942e0 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -444,7 +444,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
444 444
445 } 445 }
446 446
447 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); 447 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM);
448 if (IS_ERR(rq)) { 448 if (IS_ERR(rq)) {
449 err = PTR_ERR(rq); 449 err = PTR_ERR(rq);
450 goto error_free_buffer; 450 goto error_free_buffer;
@@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
495 break; 495 break;
496 } 496 }
497 497
498 if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) { 498 if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
499 err = DRIVER_ERROR << 24; 499 err = DRIVER_ERROR << 24;
500 goto error; 500 goto error;
501 } 501 }
@@ -536,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
536 struct request *rq; 536 struct request *rq;
537 int err; 537 int err;
538 538
539 rq = blk_get_request(q, WRITE, __GFP_WAIT); 539 rq = blk_get_request(q, WRITE, __GFP_RECLAIM);
540 if (IS_ERR(rq)) 540 if (IS_ERR(rq))
541 return PTR_ERR(rq); 541 return PTR_ERR(rq);
542 blk_rq_set_block_pc(rq); 542 blk_rq_set_block_pc(rq);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index d3d73d114a46..9462d2752850 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1007,7 +1007,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
1007 bm_set_page_unchanged(b->bm_pages[page_nr]); 1007 bm_set_page_unchanged(b->bm_pages[page_nr]);
1008 1008
1009 if (ctx->flags & BM_AIO_COPY_PAGES) { 1009 if (ctx->flags & BM_AIO_COPY_PAGES) {
1010 page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); 1010 page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
1011 copy_highpage(page, b->bm_pages[page_nr]); 1011 copy_highpage(page, b->bm_pages[page_nr]);
1012 bm_store_page_idx(page, page_nr); 1012 bm_store_page_idx(page, page_nr);
1013 } else 1013 } else
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c097909c589c..b4b5680ac6ad 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
357 } 357 }
358 358
359 if (has_payload && data_size) { 359 if (has_payload && data_size) {
360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); 360 page = drbd_alloc_pages(peer_device, nr_pages,
361 gfpflags_allow_blocking(gfp_mask));
361 if (!page) 362 if (!page)
362 goto fail; 363 goto fail;
363 } 364 }
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f504232c1ee7..a28a562f7b7f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
173{ 173{
174 struct request *rq; 174 struct request *rq;
175 175
176 rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true); 176 rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
177 return blk_mq_rq_to_pdu(rq); 177 return blk_mq_rq_to_pdu(rq);
178} 178}
179 179
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1b87623381e2..93b3f99b6865 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -444,9 +444,7 @@ static int nbd_thread_recv(struct nbd_device *nbd)
444 spin_unlock_irqrestore(&nbd->tasks_lock, flags); 444 spin_unlock_irqrestore(&nbd->tasks_lock, flags);
445 445
446 if (signal_pending(current)) { 446 if (signal_pending(current)) {
447 siginfo_t info; 447 ret = kernel_dequeue_signal(NULL);
448
449 ret = dequeue_signal_lock(current, &current->blocked, &info);
450 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", 448 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
451 task_pid_nr(current), current->comm, ret); 449 task_pid_nr(current), current->comm, ret);
452 mutex_lock(&nbd->tx_lock); 450 mutex_lock(&nbd->tx_lock);
@@ -560,11 +558,8 @@ static int nbd_thread_send(void *data)
560 !list_empty(&nbd->waiting_queue)); 558 !list_empty(&nbd->waiting_queue));
561 559
562 if (signal_pending(current)) { 560 if (signal_pending(current)) {
563 siginfo_t info; 561 int ret = kernel_dequeue_signal(NULL);
564 int ret;
565 562
566 ret = dequeue_signal_lock(current, &current->blocked,
567 &info);
568 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", 563 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
569 task_pid_nr(current), current->comm, ret); 564 task_pid_nr(current), current->comm, ret);
570 mutex_lock(&nbd->tx_lock); 565 mutex_lock(&nbd->tx_lock);
@@ -592,10 +587,8 @@ static int nbd_thread_send(void *data)
592 spin_unlock_irqrestore(&nbd->tasks_lock, flags); 587 spin_unlock_irqrestore(&nbd->tasks_lock, flags);
593 588
594 /* Clear maybe pending signals */ 589 /* Clear maybe pending signals */
595 if (signal_pending(current)) { 590 if (signal_pending(current))
596 siginfo_t info; 591 kernel_dequeue_signal(NULL);
597 dequeue_signal_lock(current, &current->blocked, &info);
598 }
599 592
600 return 0; 593 return 0;
601} 594}
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index e22942596207..1b709a4e3b5e 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
271 goto err_out; 271 goto err_out;
272 272
273 tmp->bi_bdev = NULL; 273 tmp->bi_bdev = NULL;
274 gfpmask &= ~__GFP_WAIT; 274 gfpmask &= ~__GFP_DIRECT_RECLAIM;
275 tmp->bi_next = NULL; 275 tmp->bi_next = NULL;
276 276
277 if (!new_chain) 277 if (!new_chain)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index b9242d78283d..562b5a4ca7b7 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -723,7 +723,7 @@ static int pd_special_command(struct pd_unit *disk,
723 struct request *rq; 723 struct request *rq;
724 int err = 0; 724 int err = 0;
725 725
726 rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); 726 rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM);
727 if (IS_ERR(rq)) 727 if (IS_ERR(rq))
728 return PTR_ERR(rq); 728 return PTR_ERR(rq);
729 729
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index cd813f9110bf..2f477d45d6cf 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,14 +704,14 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
704 int ret = 0; 704 int ret = 0;
705 705
706 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? 706 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
707 WRITE : READ, __GFP_WAIT); 707 WRITE : READ, __GFP_RECLAIM);
708 if (IS_ERR(rq)) 708 if (IS_ERR(rq))
709 return PTR_ERR(rq); 709 return PTR_ERR(rq);
710 blk_rq_set_block_pc(rq); 710 blk_rq_set_block_pc(rq);
711 711
712 if (cgc->buflen) { 712 if (cgc->buflen) {
713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, 713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
714 __GFP_WAIT); 714 __GFP_RECLAIM);
715 if (ret) 715 if (ret)
716 goto out; 716 goto out;
717 } 717 }
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9fa15bb9d118..81a557c33a1f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -106,7 +106,7 @@ static void zram_set_obj_size(struct zram_meta *meta,
106 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 106 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
107} 107}
108 108
109static inline int is_partial_io(struct bio_vec *bvec) 109static inline bool is_partial_io(struct bio_vec *bvec)
110{ 110{
111 return bvec->bv_len != PAGE_SIZE; 111 return bvec->bv_len != PAGE_SIZE;
112} 112}
@@ -114,25 +114,25 @@ static inline int is_partial_io(struct bio_vec *bvec)
114/* 114/*
115 * Check if request is within bounds and aligned on zram logical blocks. 115 * Check if request is within bounds and aligned on zram logical blocks.
116 */ 116 */
117static inline int valid_io_request(struct zram *zram, 117static inline bool valid_io_request(struct zram *zram,
118 sector_t start, unsigned int size) 118 sector_t start, unsigned int size)
119{ 119{
120 u64 end, bound; 120 u64 end, bound;
121 121
122 /* unaligned request */ 122 /* unaligned request */
123 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 123 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
124 return 0; 124 return false;
125 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 125 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
126 return 0; 126 return false;
127 127
128 end = start + (size >> SECTOR_SHIFT); 128 end = start + (size >> SECTOR_SHIFT);
129 bound = zram->disksize >> SECTOR_SHIFT; 129 bound = zram->disksize >> SECTOR_SHIFT;
130 /* out of range range */ 130 /* out of range range */
131 if (unlikely(start >= bound || end > bound || start > end)) 131 if (unlikely(start >= bound || end > bound || start > end))
132 return 0; 132 return false;
133 133
134 /* I/O request is valid */ 134 /* I/O request is valid */
135 return 1; 135 return true;
136} 136}
137 137
138static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 138static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
@@ -157,7 +157,7 @@ static inline void update_used_max(struct zram *zram,
157 } while (old_max != cur_max); 157 } while (old_max != cur_max);
158} 158}
159 159
160static int page_zero_filled(void *ptr) 160static bool page_zero_filled(void *ptr)
161{ 161{
162 unsigned int pos; 162 unsigned int pos;
163 unsigned long *page; 163 unsigned long *page;
@@ -166,10 +166,10 @@ static int page_zero_filled(void *ptr)
166 166
167 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { 167 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
168 if (page[pos]) 168 if (page[pos])
169 return 0; 169 return false;
170 } 170 }
171 171
172 return 1; 172 return true;
173} 173}
174 174
175static void handle_zero_page(struct bio_vec *bvec) 175static void handle_zero_page(struct bio_vec *bvec)
@@ -365,6 +365,9 @@ static ssize_t comp_algorithm_store(struct device *dev,
365 struct zram *zram = dev_to_zram(dev); 365 struct zram *zram = dev_to_zram(dev);
366 size_t sz; 366 size_t sz;
367 367
368 if (!zcomp_available_algorithm(buf))
369 return -EINVAL;
370
368 down_write(&zram->init_lock); 371 down_write(&zram->init_lock);
369 if (init_done(zram)) { 372 if (init_done(zram)) {
370 up_write(&zram->init_lock); 373 up_write(&zram->init_lock);
@@ -378,9 +381,6 @@ static ssize_t comp_algorithm_store(struct device *dev,
378 if (sz > 0 && zram->compressor[sz - 1] == '\n') 381 if (sz > 0 && zram->compressor[sz - 1] == '\n')
379 zram->compressor[sz - 1] = 0x00; 382 zram->compressor[sz - 1] = 0x00;
380 383
381 if (!zcomp_available_algorithm(zram->compressor))
382 len = -EINVAL;
383
384 up_write(&zram->init_lock); 384 up_write(&zram->init_lock);
385 return len; 385 return len;
386} 386}
@@ -726,14 +726,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
726 } 726 }
727 727
728 alloced_pages = zs_get_total_pages(meta->mem_pool); 728 alloced_pages = zs_get_total_pages(meta->mem_pool);
729 update_used_max(zram, alloced_pages);
730
729 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 731 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
730 zs_free(meta->mem_pool, handle); 732 zs_free(meta->mem_pool, handle);
731 ret = -ENOMEM; 733 ret = -ENOMEM;
732 goto out; 734 goto out;
733 } 735 }
734 736
735 update_used_max(zram, alloced_pages);
736
737 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); 737 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
738 738
739 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { 739 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 30f522848c73..d7373ca69c99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
124 if (group) 124 if (group)
125 return netlink_broadcast(dev->nls, skb, portid, group, 125 return netlink_broadcast(dev->nls, skb, portid, group,
126 gfp_mask); 126 gfp_mask);
127 return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT)); 127 return netlink_unicast(dev->nls, skb, portid,
128 !gfpflags_allow_blocking(gfp_mask));
128} 129}
129EXPORT_SYMBOL_GPL(cn_netlink_send_mult); 130EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
130 131
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3973a7c441..36a7c2d89a01 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
486static int add_client_resource(struct client *client, 486static int add_client_resource(struct client *client,
487 struct client_resource *resource, gfp_t gfp_mask) 487 struct client_resource *resource, gfp_t gfp_mask)
488{ 488{
489 bool preload = !!(gfp_mask & __GFP_WAIT); 489 bool preload = gfpflags_allow_blocking(gfp_mask);
490 unsigned long flags; 490 unsigned long flags;
491 int ret; 491 int ret;
492 492
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 3c2d4abd71c5..1d47d2e9487c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -491,7 +491,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
491 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping) 491 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping)
492 * so shmem can relocate pages during swapin if required. 492 * so shmem can relocate pages during swapin if required.
493 */ 493 */
494 BUG_ON((mapping_gfp_mask(mapping) & __GFP_DMA32) && 494 BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
495 (page_to_pfn(p) >= 0x00100000UL)); 495 (page_to_pfn(p) >= 0x00100000UL));
496 } 496 }
497 497
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 4924d381b664..daa2ff12101b 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -38,8 +38,6 @@
38#include "drm_legacy.h" 38#include "drm_legacy.h"
39#include "drm_internal.h" 39#include "drm_internal.h"
40 40
41static int drm_notifier(void *priv);
42
43static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); 41static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
44 42
45/** 43/**
@@ -118,14 +116,8 @@ int drm_legacy_lock(struct drm_device *dev, void *data,
118 * really probably not the correct answer but lets us debug xkb 116 * really probably not the correct answer but lets us debug xkb
119 * xserver for now */ 117 * xserver for now */
120 if (!file_priv->is_master) { 118 if (!file_priv->is_master) {
121 sigemptyset(&dev->sigmask);
122 sigaddset(&dev->sigmask, SIGSTOP);
123 sigaddset(&dev->sigmask, SIGTSTP);
124 sigaddset(&dev->sigmask, SIGTTIN);
125 sigaddset(&dev->sigmask, SIGTTOU);
126 dev->sigdata.context = lock->context; 119 dev->sigdata.context = lock->context;
127 dev->sigdata.lock = master->lock.hw_lock; 120 dev->sigdata.lock = master->lock.hw_lock;
128 block_all_signals(drm_notifier, dev, &dev->sigmask);
129 } 121 }
130 122
131 if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) 123 if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
@@ -169,7 +161,6 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_
169 /* FIXME: Should really bail out here. */ 161 /* FIXME: Should really bail out here. */
170 } 162 }
171 163
172 unblock_all_signals();
173 return 0; 164 return 0;
174} 165}
175 166
@@ -288,38 +279,6 @@ int drm_legacy_lock_free(struct drm_lock_data *lock_data, unsigned int context)
288} 279}
289 280
290/** 281/**
291 * If we get here, it means that the process has called DRM_IOCTL_LOCK
292 * without calling DRM_IOCTL_UNLOCK.
293 *
294 * If the lock is not held, then let the signal proceed as usual. If the lock
295 * is held, then set the contended flag and keep the signal blocked.
296 *
297 * \param priv pointer to a drm_device structure.
298 * \return one if the signal should be delivered normally, or zero if the
299 * signal should be blocked.
300 */
301static int drm_notifier(void *priv)
302{
303 struct drm_device *dev = priv;
304 struct drm_hw_lock *lock = dev->sigdata.lock;
305 unsigned int old, new, prev;
306
307 /* Allow signal delivery if lock isn't held */
308 if (!lock || !_DRM_LOCK_IS_HELD(lock->lock)
309 || _DRM_LOCKING_CONTEXT(lock->lock) != dev->sigdata.context)
310 return 1;
311
312 /* Otherwise, set flag to force call to
313 drmUnlock */
314 do {
315 old = lock->lock;
316 new = old | _DRM_LOCK_CONT;
317 prev = cmpxchg(&lock->lock, old, new);
318 } while (prev != old);
319 return 0;
320}
321
322/**
323 * This function returns immediately and takes the hw lock 282 * This function returns immediately and takes the hw lock
324 * with the kernel context if it is free, otherwise it gets the highest priority when and if 283 * with the kernel context if it is free, otherwise it gets the highest priority when and if
325 * it is eventually released. 284 * it is eventually released.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d631a946481..399aab265db3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2214,9 +2214,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2214 * Fail silently without starting the shrinker 2214 * Fail silently without starting the shrinker
2215 */ 2215 */
2216 mapping = file_inode(obj->base.filp)->i_mapping; 2216 mapping = file_inode(obj->base.filp)->i_mapping;
2217 gfp = mapping_gfp_mask(mapping); 2217 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2218 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2218 gfp |= __GFP_NORETRY | __GFP_NOWARN;
2219 gfp &= ~(__GFP_IO | __GFP_WAIT);
2220 sg = st->sgl; 2219 sg = st->sgl;
2221 st->nents = 0; 2220 st->nents = 0;
2222 for (i = 0; i < page_count; i++) { 2221 for (i = 0; i < page_count; i++) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 1362ad80a76c..05352f490d60 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
92 struct request *rq; 92 struct request *rq;
93 int error; 93 int error;
94 94
95 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 95 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
96 rq->cmd_type = REQ_TYPE_DRV_PRIV; 96 rq->cmd_type = REQ_TYPE_DRV_PRIV;
97 rq->special = (char *)pc; 97 rq->special = (char *)pc;
98 98
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 64a6b827b3dd..ef907fd5ba98 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -441,7 +441,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
441 struct request *rq; 441 struct request *rq;
442 int error; 442 int error;
443 443
444 rq = blk_get_request(drive->queue, write, __GFP_WAIT); 444 rq = blk_get_request(drive->queue, write, __GFP_RECLAIM);
445 445
446 memcpy(rq->cmd, cmd, BLK_MAX_CDB); 446 memcpy(rq->cmd, cmd, BLK_MAX_CDB);
447 rq->cmd_type = REQ_TYPE_ATA_PC; 447 rq->cmd_type = REQ_TYPE_ATA_PC;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 066e39036518..474173eb31bb 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -303,7 +303,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
303 struct request *rq; 303 struct request *rq;
304 int ret; 304 int ret;
305 305
306 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 306 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
307 rq->cmd_type = REQ_TYPE_DRV_PRIV; 307 rq->cmd_type = REQ_TYPE_DRV_PRIV;
308 rq->cmd_flags = REQ_QUIET; 308 rq->cmd_flags = REQ_QUIET;
309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); 309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index b05a74d78ef5..0dd43b4fcec6 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -165,7 +165,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
165 if (!(setting->flags & DS_SYNC)) 165 if (!(setting->flags & DS_SYNC))
166 return setting->set(drive, arg); 166 return setting->set(drive, arg);
167 167
168 rq = blk_get_request(q, READ, __GFP_WAIT); 168 rq = blk_get_request(q, READ, __GFP_RECLAIM);
169 rq->cmd_type = REQ_TYPE_DRV_PRIV; 169 rq->cmd_type = REQ_TYPE_DRV_PRIV;
170 rq->cmd_len = 5; 170 rq->cmd_len = 5;
171 rq->cmd[0] = REQ_DEVSET_EXEC; 171 rq->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 56b9708894a5..37a8a907febe 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -477,7 +477,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
477 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) 477 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
478 return -EBUSY; 478 return -EBUSY;
479 479
480 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 480 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
481 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 481 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
482 482
483 drive->mult_req = arg; 483 drive->mult_req = arg;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index aa2e9b77b20d..d05db2469209 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
125 if (NULL == (void *) arg) { 125 if (NULL == (void *) arg) {
126 struct request *rq; 126 struct request *rq;
127 127
128 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 128 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
129 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 129 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
130 err = blk_execute_rq(drive->queue, NULL, rq, 0); 130 err = blk_execute_rq(drive->queue, NULL, rq, 0);
131 blk_put_request(rq); 131 blk_put_request(rq);
@@ -221,7 +221,7 @@ static int generic_drive_reset(ide_drive_t *drive)
221 struct request *rq; 221 struct request *rq;
222 int ret = 0; 222 int ret = 0;
223 223
224 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 224 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
225 rq->cmd_type = REQ_TYPE_DRV_PRIV; 225 rq->cmd_type = REQ_TYPE_DRV_PRIV;
226 rq->cmd_len = 1; 226 rq->cmd_len = 1;
227 rq->cmd[0] = REQ_DRIVE_RESET; 227 rq->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index c80868520488..2d7dca56dd24 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
31 } 31 }
32 spin_unlock_irq(&hwif->lock); 32 spin_unlock_irq(&hwif->lock);
33 33
34 rq = blk_get_request(q, READ, __GFP_WAIT); 34 rq = blk_get_request(q, READ, __GFP_RECLAIM);
35 rq->cmd[0] = REQ_PARK_HEADS; 35 rq->cmd[0] = REQ_PARK_HEADS;
36 rq->cmd_len = 1; 36 rq->cmd_len = 1;
37 rq->cmd_type = REQ_TYPE_DRV_PRIV; 37 rq->cmd_type = REQ_TYPE_DRV_PRIV;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 081e43458d50..e34af488693a 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -18,7 +18,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
18 } 18 }
19 19
20 memset(&rqpm, 0, sizeof(rqpm)); 20 memset(&rqpm, 0, sizeof(rqpm));
21 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 21 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
22 rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; 22 rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
23 rq->special = &rqpm; 23 rq->special = &rqpm;
24 rqpm.pm_step = IDE_PM_START_SUSPEND; 24 rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -88,7 +88,7 @@ int generic_ide_resume(struct device *dev)
88 } 88 }
89 89
90 memset(&rqpm, 0, sizeof(rqpm)); 90 memset(&rqpm, 0, sizeof(rqpm));
91 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 91 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
92 rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; 92 rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
93 rq->cmd_flags |= REQ_PREEMPT; 93 rq->cmd_flags |= REQ_PREEMPT;
94 rq->special = &rqpm; 94 rq->special = &rqpm;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f5d51d1d09ee..12fa04997dcc 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -852,7 +852,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
852 BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); 852 BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
853 BUG_ON(size < 0 || size % tape->blk_size); 853 BUG_ON(size < 0 || size % tape->blk_size);
854 854
855 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 855 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
856 rq->cmd_type = REQ_TYPE_DRV_PRIV; 856 rq->cmd_type = REQ_TYPE_DRV_PRIV;
857 rq->cmd[13] = cmd; 857 rq->cmd[13] = cmd;
858 rq->rq_disk = tape->disk; 858 rq->rq_disk = tape->disk;
@@ -860,7 +860,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
860 860
861 if (size) { 861 if (size) {
862 ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, 862 ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
863 __GFP_WAIT); 863 __GFP_RECLAIM);
864 if (ret) 864 if (ret)
865 goto out_put; 865 goto out_put;
866 } 866 }
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 0979e126fff1..a716693417a3 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -430,7 +430,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
430 int error; 430 int error;
431 int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE; 431 int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
432 432
433 rq = blk_get_request(drive->queue, rw, __GFP_WAIT); 433 rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM);
434 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 434 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
435 435
436 /* 436 /*
@@ -441,7 +441,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
441 */ 441 */
442 if (nsect) { 442 if (nsect) {
443 error = blk_rq_map_kern(drive->queue, rq, buf, 443 error = blk_rq_map_kern(drive->queue, rq, buf,
444 nsect * SECTOR_SIZE, __GFP_WAIT); 444 nsect * SECTOR_SIZE, __GFP_RECLAIM);
445 if (error) 445 if (error)
446 goto put_req; 446 goto put_req;
447 } 447 }
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index dcdaa79e3f0f..2aba774f835b 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1086,7 +1086,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
1086 1086
1087static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) 1087static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
1088{ 1088{
1089 bool preload = !!(gfp_mask & __GFP_WAIT); 1089 bool preload = gfpflags_allow_blocking(gfp_mask);
1090 unsigned long flags; 1090 unsigned long flags;
1091 int ret, id; 1091 int ret, id;
1092 1092
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7e00470adc30..4ff340fe904f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1680 * heavy filesystem activity makes these fail, and we can 1680 * heavy filesystem activity makes these fail, and we can
1681 * use compound pages. 1681 * use compound pages.
1682 */ 1682 */
1683 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1683 gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
1684 1684
1685 egrcnt = rcd->rcvegrcnt; 1685 egrcnt = rcd->rcvegrcnt;
1686 egroff = rcd->rcvegr_tid_base; 1686 egroff = rcd->rcvegr_tid_base;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0d533bba4ad1..8b2be1e7714f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
2668 2668
2669 page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); 2669 page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
2670 if (!page) { 2670 if (!page) {
2671 if (!(flag & __GFP_WAIT)) 2671 if (!gfpflags_allow_blocking(flag))
2672 return NULL; 2672 return NULL;
2673 2673
2674 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 2674 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7cf80c1a8a16..f1042daef9ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
3647 flags |= GFP_DMA32; 3647 flags |= GFP_DMA32;
3648 } 3648 }
3649 3649
3650 if (flags & __GFP_WAIT) { 3650 if (gfpflags_allow_blocking(flags)) {
3651 unsigned int count = size >> PAGE_SHIFT; 3651 unsigned int count = size >> PAGE_SHIFT;
3652 3652
3653 page = dma_alloc_from_contiguous(dev, count, order); 3653 page = dma_alloc_from_contiguous(dev, count, order);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3729b394432c..917d47e290ae 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
994 struct bio_vec *bvec; 994 struct bio_vec *bvec;
995 995
996retry: 996retry:
997 if (unlikely(gfp_mask & __GFP_WAIT)) 997 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
998 mutex_lock(&cc->bio_alloc_lock); 998 mutex_lock(&cc->bio_alloc_lock);
999 999
1000 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); 1000 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
@@ -1010,7 +1010,7 @@ retry:
1010 if (!page) { 1010 if (!page) {
1011 crypt_free_buffer_pages(cc, clone); 1011 crypt_free_buffer_pages(cc, clone);
1012 bio_put(clone); 1012 bio_put(clone);
1013 gfp_mask |= __GFP_WAIT; 1013 gfp_mask |= __GFP_DIRECT_RECLAIM;
1014 goto retry; 1014 goto retry;
1015 } 1015 }
1016 1016
@@ -1027,7 +1027,7 @@ retry:
1027 } 1027 }
1028 1028
1029return_clone: 1029return_clone:
1030 if (unlikely(gfp_mask & __GFP_WAIT)) 1030 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
1031 mutex_unlock(&cc->bio_alloc_lock); 1031 mutex_unlock(&cc->bio_alloc_lock);
1032 1032
1033 return clone; 1033 return clone;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3a7cade5e27d..1452ed9aacb4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
244 *pages = NULL; 244 *pages = NULL;
245 245
246 do { 246 do {
247 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY); 247 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
248 if (unlikely(!pl)) { 248 if (unlikely(!pl)) {
249 /* Use reserved pages */ 249 /* Use reserved pages */
250 pl = kc->pages; 250 pl = kc->pages;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 1bd2fd47421f..4432fd69b7cb 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
1297 solo_enc->vidq.ops = &solo_enc_video_qops; 1297 solo_enc->vidq.ops = &solo_enc_video_qops;
1298 solo_enc->vidq.mem_ops = &vb2_dma_sg_memops; 1298 solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
1299 solo_enc->vidq.drv_priv = solo_enc; 1299 solo_enc->vidq.drv_priv = solo_enc;
1300 solo_enc->vidq.gfp_flags = __GFP_DMA32; 1300 solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
1301 solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; 1301 solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
1302 solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf); 1302 solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
1303 solo_enc->vidq.lock = &solo_enc->lock; 1303 solo_enc->vidq.lock = &solo_enc->lock;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 26df903585d7..f7ce493b1fee 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
678 solo_dev->vidq.mem_ops = &vb2_dma_contig_memops; 678 solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
679 solo_dev->vidq.drv_priv = solo_dev; 679 solo_dev->vidq.drv_priv = solo_dev;
680 solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; 680 solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
681 solo_dev->vidq.gfp_flags = __GFP_DMA32; 681 solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
682 solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf); 682 solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
683 solo_dev->vidq.lock = &solo_dev->lock; 683 solo_dev->vidq.lock = &solo_dev->lock;
684 ret = vb2_queue_init(&solo_dev->vidq); 684 ret = vb2_queue_init(&solo_dev->vidq);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 4c3293dcddbc..46642ef9151b 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
979 dev->vidq.ops = &tw68_video_qops; 979 dev->vidq.ops = &tw68_video_qops;
980 dev->vidq.mem_ops = &vb2_dma_sg_memops; 980 dev->vidq.mem_ops = &vb2_dma_sg_memops;
981 dev->vidq.drv_priv = dev; 981 dev->vidq.drv_priv = dev;
982 dev->vidq.gfp_flags = __GFP_DMA32; 982 dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
983 dev->vidq.buf_struct_size = sizeof(struct tw68_buf); 983 dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
984 dev->vidq.lock = &dev->lock; 984 dev->vidq.lock = &dev->lock;
985 dev->vidq.min_buffers_needed = 2; 985 dev->vidq.min_buffers_needed = 2;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 89300870fefb..1e688bfec567 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
75 75
76/* 76/*
77 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't 77 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
78 * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use 78 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
79 * __GFP_NOWARN, to suppress page allocation failure warnings. 79 * __GFP_NOWARN, to suppress page allocation failure warnings.
80 */ 80 */
81#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) 81#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index a91cee90aef9..95c13b2ffa79 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1216,8 +1216,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
1216 */ 1216 */
1217void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) 1217void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
1218{ 1218{
1219 gfp_t flags = __GFP_NOWARN | __GFP_WAIT | 1219 gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
1220 __GFP_NORETRY | __GFP_NO_KSWAPD;
1221 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); 1220 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
1222 void *kbuf; 1221 void *kbuf;
1223 1222
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44173be5cbf0..f8d7a2f06950 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
691{ 691{
692 if (fp->rx_frag_size) { 692 if (fp->rx_frag_size) {
693 /* GFP_KERNEL allocations are used only during initialization */ 693 /* GFP_KERNEL allocations are used only during initialization */
694 if (unlikely(gfp_mask & __GFP_WAIT)) 694 if (unlikely(gfpflags_allow_blocking(gfp_mask)))
695 return (void *)__get_free_page(gfp_mask); 695 return (void *)__get_free_page(gfp_mask);
696 696
697 return netdev_alloc_frag(fp->rx_frag_size); 697 return netdev_alloc_frag(fp->rx_frag_size);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9f4fe3a5f41e..97b6640a3745 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1025,11 +1025,13 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
1025 req->special = (void *)0; 1025 req->special = (void *)0;
1026 1026
1027 if (buffer && bufflen) { 1027 if (buffer && bufflen) {
1028 ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); 1028 ret = blk_rq_map_kern(q, req, buffer, bufflen,
1029 __GFP_DIRECT_RECLAIM);
1029 if (ret) 1030 if (ret)
1030 goto out; 1031 goto out;
1031 } else if (ubuffer && bufflen) { 1032 } else if (ubuffer && bufflen) {
1032 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT); 1033 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
1034 __GFP_DIRECT_RECLAIM);
1033 if (ret) 1035 if (ret)
1034 goto out; 1036 goto out;
1035 bio = req->bio; 1037 bio = req->bio;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 66a96cd98b97..984ddcb4786d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1970,7 +1970,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
1970 struct request *req; 1970 struct request *req;
1971 1971
1972 /* 1972 /*
1973 * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a 1973 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
1974 * request becomes available 1974 * request becomes available
1975 */ 1975 */
1976 req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); 1976 req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 126a48c6431e..dd8ad2a44510 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -222,13 +222,13 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
222 int write = (data_direction == DMA_TO_DEVICE); 222 int write = (data_direction == DMA_TO_DEVICE);
223 int ret = DRIVER_ERROR << 24; 223 int ret = DRIVER_ERROR << 24;
224 224
225 req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); 225 req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
226 if (IS_ERR(req)) 226 if (IS_ERR(req))
227 return ret; 227 return ret;
228 blk_rq_set_block_pc(req); 228 blk_rq_set_block_pc(req);
229 229
230 if (bufflen && blk_rq_map_kern(sdev->request_queue, req, 230 if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
231 buffer, bufflen, __GFP_WAIT)) 231 buffer, bufflen, __GFP_RECLAIM))
232 goto out; 232 goto out;
233 233
234 req->cmd_len = COMMAND_SIZE(cmd[0]); 234 req->cmd_len = COMMAND_SIZE(cmd[0]);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index ada724aab3d5..d4c3e5512dd5 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -27,7 +27,7 @@
27#include "ion_priv.h" 27#include "ion_priv.h"
28 28
29static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN | 29static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
30 __GFP_NORETRY) & ~__GFP_WAIT; 30 __GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
31static gfp_t low_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN); 31static gfp_t low_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
32static const unsigned int orders[] = {8, 4, 0}; 32static const unsigned int orders[] = {8, 4, 0};
33static const int num_orders = ARRAY_SIZE(orders); 33static const int num_orders = ARRAY_SIZE(orders);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 6af733de69ca..f0b0423a716b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -95,7 +95,7 @@ do { \
95do { \ 95do { \
96 LASSERT(!in_interrupt() || \ 96 LASSERT(!in_interrupt() || \
97 ((size) <= LIBCFS_VMALLOC_SIZE && \ 97 ((size) <= LIBCFS_VMALLOC_SIZE && \
98 ((mask) & __GFP_WAIT) == 0)); \ 98 !gfpflags_allow_blocking(mask))); \
99} while (0) 99} while (0)
100 100
101#define LIBCFS_ALLOC_POST(ptr, size) \ 101#define LIBCFS_ALLOC_POST(ptr, size) \
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index fe49f1b87652..4ea651c6db3a 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1245,7 +1245,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
1245 for (i = 0; i < npages; i++) { 1245 for (i = 0; i < npages; i++) {
1246 page = alloc_pages_node( 1246 page = alloc_pages_node(
1247 cfs_cpt_spread_node(lnet_cpt_table(), cpt), 1247 cfs_cpt_spread_node(lnet_cpt_table(), cpt),
1248 __GFP_ZERO | GFP_IOFS, 0); 1248 GFP_KERNEL | __GFP_ZERO, 0);
1249 if (page == NULL) { 1249 if (page == NULL) {
1250 while (--i >= 0) 1250 while (--i >= 0)
1251 __free_page(rb->rb_kiov[i].kiov_page); 1251 __free_page(rb->rb_kiov[i].kiov_page);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 0060ff64f88e..64a0335934f3 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -860,7 +860,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
860 bulk->bk_iovs[i].kiov_offset = 0; 860 bulk->bk_iovs[i].kiov_offset = 0;
861 bulk->bk_iovs[i].kiov_len = len; 861 bulk->bk_iovs[i].kiov_len = len;
862 bulk->bk_iovs[i].kiov_page = 862 bulk->bk_iovs[i].kiov_page =
863 alloc_page(GFP_IOFS); 863 alloc_page(GFP_KERNEL);
864 864
865 if (bulk->bk_iovs[i].kiov_page == NULL) { 865 if (bulk->bk_iovs[i].kiov_page == NULL) {
866 lstcon_rpc_put(*crpc); 866 lstcon_rpc_put(*crpc);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 162f9d330496..7005002c15da 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -146,7 +146,7 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
146 int nob; 146 int nob;
147 147
148 pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt), 148 pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
149 GFP_IOFS, 0); 149 GFP_KERNEL, 0);
150 if (pg == NULL) { 150 if (pg == NULL) {
151 CERROR("Can't allocate page %d of %d\n", i, bulk_npg); 151 CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
152 srpc_free_bulk(bk); 152 srpc_free_bulk(bk);
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 50e8fd23fa17..07a68594c279 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -319,7 +319,7 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
319 struct libcfs_ioctl_data *data; 319 struct libcfs_ioctl_data *data;
320 int err = 0; 320 int err = 0;
321 321
322 LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS); 322 LIBCFS_ALLOC_GFP(buf, 1024, GFP_KERNEL);
323 if (buf == NULL) 323 if (buf == NULL)
324 return -ENOMEM; 324 return -ENOMEM;
325 325
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c
index 973c7c209dfc..f2d018d7823c 100644
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.c
@@ -810,7 +810,7 @@ int cfs_trace_allocate_string_buffer(char **str, int nob)
810 if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */ 810 if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */
811 return -EINVAL; 811 return -EINVAL;
812 812
813 *str = kmalloc(nob, GFP_IOFS | __GFP_ZERO); 813 *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
814 if (*str == NULL) 814 if (*str == NULL)
815 return -ENOMEM; 815 return -ENOMEM;
816 816
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
index c902133dfc97..fe4a72268e3a 100644
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ b/drivers/staging/lustre/lustre/llite/remote_perm.c
@@ -82,7 +82,7 @@ static struct hlist_head *alloc_rmtperm_hash(void)
82 struct hlist_head *hash; 82 struct hlist_head *hash;
83 int i; 83 int i;
84 84
85 hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_IOFS | __GFP_ZERO); 85 hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_NOFS | __GFP_ZERO);
86 if (!hash) 86 if (!hash)
87 return NULL; 87 return NULL;
88 88
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index b81efcd997ae..5f53f3b7ceff 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1112,7 +1112,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
1112 LASSERT(cfg->cfg_instance != NULL); 1112 LASSERT(cfg->cfg_instance != NULL);
1113 LASSERT(cfg->cfg_sb == cfg->cfg_instance); 1113 LASSERT(cfg->cfg_sb == cfg->cfg_instance);
1114 1114
1115 inst = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1115 inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1116 if (!inst) 1116 if (!inst)
1117 return -ENOMEM; 1117 return -ENOMEM;
1118 1118
@@ -1308,14 +1308,14 @@ static int mgc_process_recover_log(struct obd_device *obd,
1308 if (cfg->cfg_last_idx == 0) /* the first time */ 1308 if (cfg->cfg_last_idx == 0) /* the first time */
1309 nrpages = CONFIG_READ_NRPAGES_INIT; 1309 nrpages = CONFIG_READ_NRPAGES_INIT;
1310 1310
1311 pages = kcalloc(nrpages, sizeof(*pages), GFP_NOFS); 1311 pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL);
1312 if (pages == NULL) { 1312 if (pages == NULL) {
1313 rc = -ENOMEM; 1313 rc = -ENOMEM;
1314 goto out; 1314 goto out;
1315 } 1315 }
1316 1316
1317 for (i = 0; i < nrpages; i++) { 1317 for (i = 0; i < nrpages; i++) {
1318 pages[i] = alloc_page(GFP_IOFS); 1318 pages[i] = alloc_page(GFP_KERNEL);
1319 if (pages[i] == NULL) { 1319 if (pages[i] == NULL) {
1320 rc = -ENOMEM; 1320 rc = -ENOMEM;
1321 goto out; 1321 goto out;
@@ -1466,7 +1466,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc,
1466 if (cld->cld_cfg.cfg_sb) 1466 if (cld->cld_cfg.cfg_sb)
1467 lsi = s2lsi(cld->cld_cfg.cfg_sb); 1467 lsi = s2lsi(cld->cld_cfg.cfg_sb);
1468 1468
1469 env = kzalloc(sizeof(*env), GFP_NOFS); 1469 env = kzalloc(sizeof(*env), GFP_KERNEL);
1470 if (!env) 1470 if (!env)
1471 return -ENOMEM; 1471 return -ENOMEM;
1472 1472
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index b6f000bb8c82..f61ef669644c 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1562,7 +1562,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
1562 (oa->o_valid & OBD_MD_FLFLAGS) != 0 && 1562 (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
1563 (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0); 1563 (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
1564 1564
1565 gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER; 1565 gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
1566 1566
1567 LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); 1567 LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
1568 LASSERT(lsm != NULL); 1568 LASSERT(lsm != NULL);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index cfb83bcfcb17..b1d1a87f05e3 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -346,7 +346,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
346{ 346{
347 struct osc_extent *ext; 347 struct osc_extent *ext;
348 348
349 ext = kmem_cache_alloc(osc_extent_kmem, GFP_IOFS | __GFP_ZERO); 349 ext = kmem_cache_alloc(osc_extent_kmem, GFP_NOFS | __GFP_ZERO);
350 if (ext == NULL) 350 if (ext == NULL)
351 return NULL; 351 return NULL;
352 352
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 47a1202fcbdf..8666f3ad24e9 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1560,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
1560 * heavy filesystem activity makes these fail, and we can 1560 * heavy filesystem activity makes these fail, and we can
1561 * use compound pages. 1561 * use compound pages.
1562 */ 1562 */
1563 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1563 gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
1564 1564
1565 /* 1565 /*
1566 * The minimum size of the eager buffers is a groups of MTU-sized 1566 * The minimum size of the eager buffers is a groups of MTU-sized
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
index 5d9b9dbd8fc4..13c3cd11ab92 100644
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
@@ -905,7 +905,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
905 * heavy filesystem activity makes these fail, and we can 905 * heavy filesystem activity makes these fail, and we can
906 * use compound pages. 906 * use compound pages.
907 */ 907 */
908 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 908 gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
909 909
910 egrcnt = dd->ipath_rcvegrcnt; 910 egrcnt = dd->ipath_rcvegrcnt;
911 /* TID number offset for this port */ 911 /* TID number offset for this port */
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index cd54e72a6c50..5ec533826621 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2345,7 +2345,6 @@ static void fsg_disable(struct usb_function *f)
2345 2345
2346static void handle_exception(struct fsg_common *common) 2346static void handle_exception(struct fsg_common *common)
2347{ 2347{
2348 siginfo_t info;
2349 int i; 2348 int i;
2350 struct fsg_buffhd *bh; 2349 struct fsg_buffhd *bh;
2351 enum fsg_state old_state; 2350 enum fsg_state old_state;
@@ -2357,8 +2356,7 @@ static void handle_exception(struct fsg_common *common)
2357 * into a high-priority EXIT exception. 2356 * into a high-priority EXIT exception.
2358 */ 2357 */
2359 for (;;) { 2358 for (;;) {
2360 int sig = 2359 int sig = kernel_dequeue_signal(NULL);
2361 dequeue_signal_lock(current, &current->blocked, &info);
2362 if (!sig) 2360 if (!sig)
2363 break; 2361 break;
2364 if (sig != SIGUSR1) { 2362 if (sig != SIGUSR1) {
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 0a94895a358d..692ccc69345e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
2244{ 2244{
2245 struct u132 *u132 = hcd_to_u132(hcd); 2245 struct u132 *u132 = hcd_to_u132(hcd);
2246 if (irqs_disabled()) { 2246 if (irqs_disabled()) {
2247 if (__GFP_WAIT & mem_flags) { 2247 if (gfpflags_allow_blocking(mem_flags)) {
2248 printk(KERN_ERR "invalid context for function that might sleep\n"); 2248 printk(KERN_ERR "invalid context for function that might sleep\n");
2249 return -EINVAL; 2249 return -EINVAL;
2250 } 2250 }
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6b70d7f62b2f..1c1e95a0b8fa 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
99 * below the first 16MB. 99 * below the first 16MB.
100 */ 100 */
101 101
102 flags = __GFP_DMA | __GFP_HIGH; 102 flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
103 va->logical = 103 va->logical =
104 __get_free_pages(flags, --max_order); 104 __get_free_pages(flags, --max_order);
105 } while (va->logical == 0 && max_order > min_order); 105 } while (va->logical == 0 && max_order > min_order);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 97b049ad0594..c473c42d7d6c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode,
482 goto next; 482 goto next;
483 } 483 }
484 484
485 page = __page_cache_alloc(mapping_gfp_mask(mapping) & 485 page = __page_cache_alloc(mapping_gfp_constraint(mapping,
486 ~__GFP_FS); 486 ~__GFP_FS));
487 if (!page) 487 if (!page)
488 break; 488 break;
489 489
490 if (add_to_page_cache_lru(page, mapping, pg_index, 490 if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
491 GFP_NOFS)) {
492 page_cache_release(page); 491 page_cache_release(page);
493 goto next; 492 goto next;
494 } 493 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a2e73f6053a8..8c58191249cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3367,7 +3367,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
3367 3367
3368static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) 3368static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
3369{ 3369{
3370 return mapping_gfp_mask(mapping) & ~__GFP_FS; 3370 return mapping_gfp_constraint(mapping, ~__GFP_FS);
3371} 3371}
3372 3372
3373/* extent-tree.c */ 3373/* extent-tree.c */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2d4667594681..640598c0d0e7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2575,7 +2575,7 @@ int open_ctree(struct super_block *sb,
2575 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2575 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2576 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ 2576 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
2577 /* readahead state */ 2577 /* readahead state */
2578 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2578 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
2579 spin_lock_init(&fs_info->reada_lock); 2579 spin_lock_init(&fs_info->reada_lock);
2580 2580
2581 fs_info->thread_pool_size = min_t(unsigned long, 2581 fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 33a01ea41465..9abe18763a7f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -616,7 +616,7 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
616 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 616 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
617 clear = 1; 617 clear = 1;
618again: 618again:
619 if (!prealloc && (mask & __GFP_WAIT)) { 619 if (!prealloc && gfpflags_allow_blocking(mask)) {
620 /* 620 /*
621 * Don't care for allocation failure here because we might end 621 * Don't care for allocation failure here because we might end
622 * up not needing the pre-allocated extent state at all, which 622 * up not needing the pre-allocated extent state at all, which
@@ -741,7 +741,7 @@ search_again:
741 if (start > end) 741 if (start > end)
742 goto out; 742 goto out;
743 spin_unlock(&tree->lock); 743 spin_unlock(&tree->lock);
744 if (mask & __GFP_WAIT) 744 if (gfpflags_allow_blocking(mask))
745 cond_resched(); 745 cond_resched();
746 goto again; 746 goto again;
747} 747}
@@ -874,7 +874,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
874 874
875 bits |= EXTENT_FIRST_DELALLOC; 875 bits |= EXTENT_FIRST_DELALLOC;
876again: 876again:
877 if (!prealloc && (mask & __GFP_WAIT)) { 877 if (!prealloc && gfpflags_allow_blocking(mask)) {
878 prealloc = alloc_extent_state(mask); 878 prealloc = alloc_extent_state(mask);
879 BUG_ON(!prealloc); 879 BUG_ON(!prealloc);
880 } 880 }
@@ -1052,7 +1052,7 @@ search_again:
1052 if (start > end) 1052 if (start > end)
1053 goto out; 1053 goto out;
1054 spin_unlock(&tree->lock); 1054 spin_unlock(&tree->lock);
1055 if (mask & __GFP_WAIT) 1055 if (gfpflags_allow_blocking(mask))
1056 cond_resched(); 1056 cond_resched();
1057 goto again; 1057 goto again;
1058} 1058}
@@ -1100,7 +1100,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1100 btrfs_debug_check_extent_io_range(tree, start, end); 1100 btrfs_debug_check_extent_io_range(tree, start, end);
1101 1101
1102again: 1102again:
1103 if (!prealloc && (mask & __GFP_WAIT)) { 1103 if (!prealloc && gfpflags_allow_blocking(mask)) {
1104 /* 1104 /*
1105 * Best effort, don't worry if extent state allocation fails 1105 * Best effort, don't worry if extent state allocation fails
1106 * here for the first iteration. We might have a cached state 1106 * here for the first iteration. We might have a cached state
@@ -1278,7 +1278,7 @@ search_again:
1278 if (start > end) 1278 if (start > end)
1279 goto out; 1279 goto out;
1280 spin_unlock(&tree->lock); 1280 spin_unlock(&tree->lock);
1281 if (mask & __GFP_WAIT) 1281 if (gfpflags_allow_blocking(mask))
1282 cond_resched(); 1282 cond_resched();
1283 first_iteration = false; 1283 first_iteration = false;
1284 goto again; 1284 goto again;
@@ -4386,7 +4386,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
4386 u64 start = page_offset(page); 4386 u64 start = page_offset(page);
4387 u64 end = start + PAGE_CACHE_SIZE - 1; 4387 u64 end = start + PAGE_CACHE_SIZE - 1;
4388 4388
4389 if ((mask & __GFP_WAIT) && 4389 if (gfpflags_allow_blocking(mask) &&
4390 page->mapping->host->i_size > 16 * 1024 * 1024) { 4390 page->mapping->host->i_size > 16 * 1024 * 1024) {
4391 u64 len; 4391 u64 len;
4392 while (start <= end) { 4392 while (start <= end) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0948d34cb84a..85a1f8621b51 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
85 } 85 }
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & 88 mapping_gfp_constraint(inode->i_mapping,
89 ~(__GFP_FS | __GFP_HIGHMEM)); 89 ~(__GFP_FS | __GFP_HIGHMEM)));
90 90
91 return inode; 91 return inode;
92} 92}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 17ed76d18eb6..9b2dafa5ba59 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -232,8 +232,8 @@ static struct btrfs_device *__alloc_device(void)
232 spin_lock_init(&dev->reada_lock); 232 spin_lock_init(&dev->reada_lock);
233 atomic_set(&dev->reada_in_flight, 0); 233 atomic_set(&dev->reada_in_flight, 0);
234 atomic_set(&dev->dev_stats_ccnt, 0); 234 atomic_set(&dev->dev_stats_ccnt, 0);
235 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); 235 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
236 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); 236 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
237 237
238 return dev; 238 return dev;
239} 239}
diff --git a/fs/buffer.c b/fs/buffer.c
index 82283abb2795..51aff0296ce2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
999 int ret = 0; /* Will call free_more_memory() */ 999 int ret = 0; /* Will call free_more_memory() */
1000 gfp_t gfp_mask; 1000 gfp_t gfp_mask;
1001 1001
1002 gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; 1002 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
1003 1003
1004 /* 1004 /*
1005 * XXX: __getblk_slow() can not really deal with failure and 1005 * XXX: __getblk_slow() can not really deal with failure and
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index aecd0859eacb..9c4b737a54df 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -30,7 +30,7 @@ extern unsigned cachefiles_debug;
30#define CACHEFILES_DEBUG_KLEAVE 2 30#define CACHEFILES_DEBUG_KLEAVE 2
31#define CACHEFILES_DEBUG_KDEBUG 4 31#define CACHEFILES_DEBUG_KDEBUG 4
32 32
33#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) 33#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC)
34 34
35/* 35/*
36 * node records 36 * node records
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9d23e788d1df..b7d218a168fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1283 int ret1; 1283 int ret1;
1284 struct address_space *mapping = inode->i_mapping; 1284 struct address_space *mapping = inode->i_mapping;
1285 struct page *page = find_or_create_page(mapping, 0, 1285 struct page *page = find_or_create_page(mapping, 0,
1286 mapping_gfp_mask(mapping) & 1286 mapping_gfp_constraint(mapping,
1287 ~__GFP_FS); 1287 ~__GFP_FS));
1288 if (!page) { 1288 if (!page) {
1289 ret = VM_FAULT_OOM; 1289 ret = VM_FAULT_OOM;
1290 goto out; 1290 goto out;
@@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
1428 if (i_size_read(inode) == 0) 1428 if (i_size_read(inode) == 0)
1429 return; 1429 return;
1430 page = find_or_create_page(mapping, 0, 1430 page = find_or_create_page(mapping, 0,
1431 mapping_gfp_mask(mapping) & ~__GFP_FS); 1431 mapping_gfp_constraint(mapping,
1432 ~__GFP_FS));
1432 if (!page) 1433 if (!page)
1433 return; 1434 return;
1434 if (PageUptodate(page)) { 1435 if (PageUptodate(page)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 47c5c97e2dd3..0068e82217c3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3380 struct page *page, *tpage; 3380 struct page *page, *tpage;
3381 unsigned int expected_index; 3381 unsigned int expected_index;
3382 int rc; 3382 int rc;
3383 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); 3383 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3384 3384
3385 INIT_LIST_HEAD(tmplist); 3385 INIT_LIST_HEAD(tmplist);
3386 3386
diff --git a/fs/coredump.c b/fs/coredump.c
index a8f75640ac86..1777331eee76 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -280,23 +280,24 @@ out:
280 return ispipe; 280 return ispipe;
281} 281}
282 282
283static int zap_process(struct task_struct *start, int exit_code) 283static int zap_process(struct task_struct *start, int exit_code, int flags)
284{ 284{
285 struct task_struct *t; 285 struct task_struct *t;
286 int nr = 0; 286 int nr = 0;
287 287
288 /* ignore all signals except SIGKILL, see prepare_signal() */
289 start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
288 start->signal->group_exit_code = exit_code; 290 start->signal->group_exit_code = exit_code;
289 start->signal->group_stop_count = 0; 291 start->signal->group_stop_count = 0;
290 292
291 t = start; 293 for_each_thread(start, t) {
292 do {
293 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 294 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
294 if (t != current && t->mm) { 295 if (t != current && t->mm) {
295 sigaddset(&t->pending.signal, SIGKILL); 296 sigaddset(&t->pending.signal, SIGKILL);
296 signal_wake_up(t, 1); 297 signal_wake_up(t, 1);
297 nr++; 298 nr++;
298 } 299 }
299 } while_each_thread(start, t); 300 }
300 301
301 return nr; 302 return nr;
302} 303}
@@ -311,10 +312,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
311 spin_lock_irq(&tsk->sighand->siglock); 312 spin_lock_irq(&tsk->sighand->siglock);
312 if (!signal_group_exit(tsk->signal)) { 313 if (!signal_group_exit(tsk->signal)) {
313 mm->core_state = core_state; 314 mm->core_state = core_state;
314 nr = zap_process(tsk, exit_code);
315 tsk->signal->group_exit_task = tsk; 315 tsk->signal->group_exit_task = tsk;
316 /* ignore all signals except SIGKILL, see prepare_signal() */ 316 nr = zap_process(tsk, exit_code, 0);
317 tsk->signal->flags = SIGNAL_GROUP_COREDUMP;
318 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 317 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
319 } 318 }
320 spin_unlock_irq(&tsk->sighand->siglock); 319 spin_unlock_irq(&tsk->sighand->siglock);
@@ -360,18 +359,18 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
360 continue; 359 continue;
361 if (g->flags & PF_KTHREAD) 360 if (g->flags & PF_KTHREAD)
362 continue; 361 continue;
363 p = g; 362
364 do { 363 for_each_thread(g, p) {
365 if (p->mm) { 364 if (unlikely(!p->mm))
366 if (unlikely(p->mm == mm)) { 365 continue;
367 lock_task_sighand(p, &flags); 366 if (unlikely(p->mm == mm)) {
368 nr += zap_process(p, exit_code); 367 lock_task_sighand(p, &flags);
369 p->signal->flags = SIGNAL_GROUP_EXIT; 368 nr += zap_process(p, exit_code,
370 unlock_task_sighand(p, &flags); 369 SIGNAL_GROUP_EXIT);
371 } 370 unlock_task_sighand(p, &flags);
372 break;
373 } 371 }
374 } while_each_thread(g, p); 372 break;
373 }
375 } 374 }
376 rcu_read_unlock(); 375 rcu_read_unlock();
377done: 376done:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3ae0e0427191..18e7554cf94c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -361,7 +361,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
361 361
362 /* 362 /*
363 * bio_alloc() is guaranteed to return a bio when called with 363 * bio_alloc() is guaranteed to return a bio when called with
364 * __GFP_WAIT and we request a valid number of vectors. 364 * __GFP_RECLAIM and we request a valid number of vectors.
365 */ 365 */
366 bio = bio_alloc(GFP_KERNEL, nr_vecs); 366 bio = bio_alloc(GFP_KERNEL, nr_vecs);
367 367
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e8d620a484f6..7d1aad1d9313 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3386,7 +3386,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
3386 int err = 0; 3386 int err = 0;
3387 3387
3388 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3388 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3389 mapping_gfp_mask(mapping) & ~__GFP_FS); 3389 mapping_gfp_constraint(mapping, ~__GFP_FS));
3390 if (!page) 3390 if (!page)
3391 return -ENOMEM; 3391 return -ENOMEM;
3392 3392
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index d94af71a4e7f..5dc5e95063de 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
166 page = list_entry(pages->prev, struct page, lru); 166 page = list_entry(pages->prev, struct page, lru);
167 list_del(&page->lru); 167 list_del(&page->lru);
168 if (add_to_page_cache_lru(page, mapping, page->index, 168 if (add_to_page_cache_lru(page, mapping, page->index,
169 GFP_KERNEL & mapping_gfp_mask(mapping))) 169 mapping_gfp_constraint(mapping, GFP_KERNEL)))
170 goto next_page; 170 goto next_page;
171 } 171 }
172 172
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04d0f1b33409..753f4e68b820 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1061,7 +1061,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1061 return 0; 1061 return 0;
1062 if (journal) 1062 if (journal)
1063 return jbd2_journal_try_to_free_buffers(journal, page, 1063 return jbd2_journal_try_to_free_buffers(journal, page,
1064 wait & ~__GFP_WAIT); 1064 wait & ~__GFP_DIRECT_RECLAIM);
1065 return try_to_free_buffers(page); 1065 return try_to_free_buffers(page);
1066} 1066}
1067 1067
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d403c69bee08..4304072161aa 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
111 111
112 /* radix tree insertion won't use the preallocation pool unless it's 112 /* radix tree insertion won't use the preallocation pool unless it's
113 * told it may not wait */ 113 * told it may not wait */
114 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT); 114 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
115 115
116 switch (cookie->def->type) { 116 switch (cookie->def->type) {
117 case FSCACHE_COOKIE_TYPE_INDEX: 117 case FSCACHE_COOKIE_TYPE_INDEX:
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 483bbc613bf0..79483b3d8c6f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
58 58
59/* 59/*
60 * decide whether a page can be released, possibly by cancelling a store to it 60 * decide whether a page can be released, possibly by cancelling a store to it
61 * - we're allowed to sleep if __GFP_WAIT is flagged 61 * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
62 */ 62 */
63bool __fscache_maybe_release_page(struct fscache_cookie *cookie, 63bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
64 struct page *page, 64 struct page *page,
@@ -122,7 +122,7 @@ page_busy:
122 * allocator as the work threads writing to the cache may all end up 122 * allocator as the work threads writing to the cache may all end up
123 * sleeping on memory allocation, so we may need to impose a timeout 123 * sleeping on memory allocation, so we may need to impose a timeout
124 * too. */ 124 * too. */
125 if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) { 125 if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
126 fscache_stat(&fscache_n_store_vmscan_busy); 126 fscache_stat(&fscache_n_store_vmscan_busy);
127 return false; 127 return false;
128 } 128 }
@@ -132,7 +132,7 @@ page_busy:
132 _debug("fscache writeout timeout page: %p{%lx}", 132 _debug("fscache writeout timeout page: %p{%lx}",
133 page, page->index); 133 page, page->index);
134 134
135 gfp &= ~__GFP_WAIT; 135 gfp &= ~__GFP_DIRECT_RECLAIM;
136 goto try_again; 136 goto try_again;
137} 137}
138EXPORT_SYMBOL(__fscache_maybe_release_page); 138EXPORT_SYMBOL(__fscache_maybe_release_page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338ec2464..89463eee6791 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1937,8 +1937,8 @@ out:
1937 * @journal: journal for operation 1937 * @journal: journal for operation
1938 * @page: to try and free 1938 * @page: to try and free
1939 * @gfp_mask: we use the mask to detect how hard should we try to release 1939 * @gfp_mask: we use the mask to detect how hard should we try to release
1940 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to 1940 * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
1941 * release the buffers. 1941 * code to release the buffers.
1942 * 1942 *
1943 * 1943 *
1944 * For all the buffers on this page, 1944 * For all the buffers on this page,
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index bb9cebc9ca8a..e5c1783ab64a 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -80,7 +80,6 @@ static int jffs2_garbage_collect_thread(void *_c)
80 siginitset(&hupmask, sigmask(SIGHUP)); 80 siginitset(&hupmask, sigmask(SIGHUP));
81 allow_signal(SIGKILL); 81 allow_signal(SIGKILL);
82 allow_signal(SIGSTOP); 82 allow_signal(SIGSTOP);
83 allow_signal(SIGCONT);
84 allow_signal(SIGHUP); 83 allow_signal(SIGHUP);
85 84
86 c->gc_task = current; 85 c->gc_task = current;
@@ -121,20 +120,18 @@ static int jffs2_garbage_collect_thread(void *_c)
121 /* Put_super will send a SIGKILL and then wait on the sem. 120 /* Put_super will send a SIGKILL and then wait on the sem.
122 */ 121 */
123 while (signal_pending(current) || freezing(current)) { 122 while (signal_pending(current) || freezing(current)) {
124 siginfo_t info;
125 unsigned long signr; 123 unsigned long signr;
126 124
127 if (try_to_freeze()) 125 if (try_to_freeze())
128 goto again; 126 goto again;
129 127
130 signr = dequeue_signal_lock(current, &current->blocked, &info); 128 signr = kernel_dequeue_signal(NULL);
131 129
132 switch(signr) { 130 switch(signr) {
133 case SIGSTOP: 131 case SIGSTOP:
134 jffs2_dbg(1, "%s(): SIGSTOP received\n", 132 jffs2_dbg(1, "%s(): SIGSTOP received\n",
135 __func__); 133 __func__);
136 set_current_state(TASK_STOPPED); 134 kernel_signal_stop();
137 schedule();
138 break; 135 break;
139 136
140 case SIGKILL: 137 case SIGKILL:
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 63f31c0733c5..f3a4857ff071 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1264,7 +1264,7 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1264 if ((c->flash_size % c->sector_size) != 0) { 1264 if ((c->flash_size % c->sector_size) != 0) {
1265 c->flash_size = (c->flash_size / c->sector_size) * c->sector_size; 1265 c->flash_size = (c->flash_size / c->sector_size) * c->sector_size;
1266 pr_warn("flash size adjusted to %dKiB\n", c->flash_size); 1266 pr_warn("flash size adjusted to %dKiB\n", c->flash_size);
1267 }; 1267 }
1268 1268
1269 c->wbuf_ofs = 0xFFFFFFFF; 1269 c->wbuf_ofs = 0xFFFFFFFF;
1270 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); 1270 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 7f9b096d8d57..6de0fbfc6c00 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
57 filler_t *filler = super->s_devops->readpage; 57 filler_t *filler = super->s_devops->readpage;
58 struct page *page; 58 struct page *page;
59 59
60 BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); 60 BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
61 if (use_filler) 61 if (use_filler)
62 page = read_cache_page(mapping, index, filler, sb); 62 page = read_cache_page(mapping, index, filler, sb);
63 else { 63 else {
diff --git a/fs/mpage.c b/fs/mpage.c
index 09abba7653aa..1480d3a18037 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
361 sector_t last_block_in_bio = 0; 361 sector_t last_block_in_bio = 0;
362 struct buffer_head map_bh; 362 struct buffer_head map_bh;
363 unsigned long first_logical_block = 0; 363 unsigned long first_logical_block = 0;
364 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); 364 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
365 365
366 map_bh.b_state = 0; 366 map_bh.b_state = 0;
367 map_bh.b_size = 0; 367 map_bh.b_size = 0;
@@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
397 sector_t last_block_in_bio = 0; 397 sector_t last_block_in_bio = 0;
398 struct buffer_head map_bh; 398 struct buffer_head map_bh;
399 unsigned long first_logical_block = 0; 399 unsigned long first_logical_block = 0;
400 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping); 400 gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
401 401
402 map_bh.b_state = 0; 402 map_bh.b_state = 0;
403 map_bh.b_size = 0; 403 map_bh.b_size = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 6f567347f14f..d84d7c7515fc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink);
4604int page_symlink(struct inode *inode, const char *symname, int len) 4604int page_symlink(struct inode *inode, const char *symname, int len)
4605{ 4605{
4606 return __page_symlink(inode, symname, len, 4606 return __page_symlink(inode, symname, len,
4607 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); 4607 !mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
4608} 4608}
4609EXPORT_SYMBOL(page_symlink); 4609EXPORT_SYMBOL(page_symlink);
4610 4610
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 37f639d50af5..93e236429c5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
473 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 473 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
474 474
475 /* Always try to initiate a 'commit' if relevant, but only 475 /* Always try to initiate a 'commit' if relevant, but only
476 * wait for it if __GFP_WAIT is set. Even then, only wait 1 476 * wait for it if the caller allows blocking. Even then,
477 * second and only if the 'bdi' is not congested. 477 * only wait 1 second and only if the 'bdi' is not congested.
478 * Waiting indefinitely can cause deadlocks when the NFS 478 * Waiting indefinitely can cause deadlocks when the NFS
479 * server is on this machine, when a new TCP connection is 479 * server is on this machine, when a new TCP connection is
480 * needed and in other rare cases. There is no particular 480 * needed and in other rare cases. There is no particular
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
484 if (mapping) { 484 if (mapping) {
485 struct nfs_server *nfss = NFS_SERVER(mapping->host); 485 struct nfs_server *nfss = NFS_SERVER(mapping->host);
486 nfs_commit_inode(mapping->host, 0); 486 nfs_commit_inode(mapping->host, 0);
487 if ((gfp & __GFP_WAIT) && 487 if (gfpflags_allow_blocking(gfp) &&
488 !bdi_write_congested(&nfss->backing_dev_info)) { 488 !bdi_write_congested(&nfss->backing_dev_info)) {
489 wait_on_page_bit_killable_timeout(page, PG_private, 489 wait_on_page_bit_killable_timeout(page, PG_private,
490 HZ); 490 HZ);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 8df0f3b7839b..2ccbf5531554 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -133,38 +133,38 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
133 133
134/** 134/**
135 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group 135 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
136 * @inode: inode of metadata file using this allocator
137 * @group: group number
138 * @desc: pointer to descriptor structure for the group 136 * @desc: pointer to descriptor structure for the group
137 * @lock: spin lock protecting @desc
139 */ 138 */
140static unsigned long 139static unsigned long
141nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, 140nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
142 const struct nilfs_palloc_group_desc *desc) 141 spinlock_t *lock)
143{ 142{
144 unsigned long nfree; 143 unsigned long nfree;
145 144
146 spin_lock(nilfs_mdt_bgl_lock(inode, group)); 145 spin_lock(lock);
147 nfree = le32_to_cpu(desc->pg_nfrees); 146 nfree = le32_to_cpu(desc->pg_nfrees);
148 spin_unlock(nilfs_mdt_bgl_lock(inode, group)); 147 spin_unlock(lock);
149 return nfree; 148 return nfree;
150} 149}
151 150
152/** 151/**
153 * nilfs_palloc_group_desc_add_entries - adjust count of free entries 152 * nilfs_palloc_group_desc_add_entries - adjust count of free entries
154 * @inode: inode of metadata file using this allocator
155 * @group: group number
156 * @desc: pointer to descriptor structure for the group 153 * @desc: pointer to descriptor structure for the group
154 * @lock: spin lock protecting @desc
157 * @n: delta to be added 155 * @n: delta to be added
158 */ 156 */
159static void 157static u32
160nilfs_palloc_group_desc_add_entries(struct inode *inode, 158nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
161 unsigned long group, 159 spinlock_t *lock, u32 n)
162 struct nilfs_palloc_group_desc *desc,
163 u32 n)
164{ 160{
165 spin_lock(nilfs_mdt_bgl_lock(inode, group)); 161 u32 nfree;
162
163 spin_lock(lock);
166 le32_add_cpu(&desc->pg_nfrees, n); 164 le32_add_cpu(&desc->pg_nfrees, n);
167 spin_unlock(nilfs_mdt_bgl_lock(inode, group)); 165 nfree = le32_to_cpu(desc->pg_nfrees);
166 spin_unlock(lock);
167 return nfree;
168} 168}
169 169
170/** 170/**
@@ -240,6 +240,26 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
240} 240}
241 241
242/** 242/**
243 * nilfs_palloc_delete_block - delete a block on the persistent allocator file
244 * @inode: inode of metadata file using this allocator
245 * @blkoff: block offset
246 * @prev: nilfs_bh_assoc struct of the last used buffer
247 * @lock: spin lock protecting @prev
248 */
249static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
250 struct nilfs_bh_assoc *prev,
251 spinlock_t *lock)
252{
253 spin_lock(lock);
254 if (prev->bh && blkoff == prev->blkoff) {
255 brelse(prev->bh);
256 prev->bh = NULL;
257 }
258 spin_unlock(lock);
259 return nilfs_mdt_delete_block(inode, blkoff);
260}
261
262/**
243 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block 263 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
244 * @inode: inode of metadata file using this allocator 264 * @inode: inode of metadata file using this allocator
245 * @group: group number 265 * @group: group number
@@ -278,6 +298,22 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
278} 298}
279 299
280/** 300/**
301 * nilfs_palloc_delete_bitmap_block - delete a bitmap block
302 * @inode: inode of metadata file using this allocator
303 * @group: group number
304 */
305static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
306 unsigned long group)
307{
308 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
309
310 return nilfs_palloc_delete_block(inode,
311 nilfs_palloc_bitmap_blkoff(inode,
312 group),
313 &cache->prev_bitmap, &cache->lock);
314}
315
316/**
281 * nilfs_palloc_get_entry_block - get buffer head of an entry block 317 * nilfs_palloc_get_entry_block - get buffer head of an entry block
282 * @inode: inode of metadata file using this allocator 318 * @inode: inode of metadata file using this allocator
283 * @nr: serial number of the entry (e.g. inode number) 319 * @nr: serial number of the entry (e.g. inode number)
@@ -296,6 +332,20 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
296} 332}
297 333
298/** 334/**
335 * nilfs_palloc_delete_entry_block - delete an entry block
336 * @inode: inode of metadata file using this allocator
337 * @nr: serial number of the entry
338 */
339static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
340{
341 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
342
343 return nilfs_palloc_delete_block(inode,
344 nilfs_palloc_entry_blkoff(inode, nr),
345 &cache->prev_entry, &cache->lock);
346}
347
348/**
299 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor 349 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
300 * @inode: inode of metadata file using this allocator 350 * @inode: inode of metadata file using this allocator
301 * @group: group number 351 * @group: group number
@@ -332,51 +382,40 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
332 382
333/** 383/**
334 * nilfs_palloc_find_available_slot - find available slot in a group 384 * nilfs_palloc_find_available_slot - find available slot in a group
335 * @inode: inode of metadata file using this allocator
336 * @group: group number
337 * @target: offset number of an entry in the group (start point)
338 * @bitmap: bitmap of the group 385 * @bitmap: bitmap of the group
386 * @target: offset number of an entry in the group (start point)
339 * @bsize: size in bits 387 * @bsize: size in bits
388 * @lock: spin lock protecting @bitmap
340 */ 389 */
341static int nilfs_palloc_find_available_slot(struct inode *inode, 390static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
342 unsigned long group,
343 unsigned long target, 391 unsigned long target,
344 unsigned char *bitmap, 392 unsigned bsize,
345 int bsize) 393 spinlock_t *lock)
346{ 394{
347 int curr, pos, end, i; 395 int pos, end = bsize;
348 396
349 if (target > 0) { 397 if (likely(target < bsize)) {
350 end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); 398 pos = target;
351 if (end > bsize) 399 do {
352 end = bsize; 400 pos = nilfs_find_next_zero_bit(bitmap, end, pos);
353 pos = nilfs_find_next_zero_bit(bitmap, end, target); 401 if (pos >= end)
354 if (pos < end && 402 break;
355 !nilfs_set_bit_atomic( 403 if (!nilfs_set_bit_atomic(lock, pos, bitmap))
356 nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
357 return pos;
358 } else
359 end = 0;
360
361 for (i = 0, curr = end;
362 i < bsize;
363 i += BITS_PER_LONG, curr += BITS_PER_LONG) {
364 /* wrap around */
365 if (curr >= bsize)
366 curr = 0;
367 while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
368 != ~0UL) {
369 end = curr + BITS_PER_LONG;
370 if (end > bsize)
371 end = bsize;
372 pos = nilfs_find_next_zero_bit(bitmap, end, curr);
373 if ((pos < end) &&
374 !nilfs_set_bit_atomic(
375 nilfs_mdt_bgl_lock(inode, group), pos,
376 bitmap))
377 return pos; 404 return pos;
378 } 405 } while (++pos < end);
406
407 end = target;
408 }
409
410 /* wrap around */
411 for (pos = 0; pos < end; pos++) {
412 pos = nilfs_find_next_zero_bit(bitmap, end, pos);
413 if (pos >= end)
414 break;
415 if (!nilfs_set_bit_atomic(lock, pos, bitmap))
416 return pos;
379 } 417 }
418
380 return -ENOSPC; 419 return -ENOSPC;
381} 420}
382 421
@@ -475,15 +514,15 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
475 void *desc_kaddr, *bitmap_kaddr; 514 void *desc_kaddr, *bitmap_kaddr;
476 unsigned long group, maxgroup, ngroups; 515 unsigned long group, maxgroup, ngroups;
477 unsigned long group_offset, maxgroup_offset; 516 unsigned long group_offset, maxgroup_offset;
478 unsigned long n, entries_per_group, groups_per_desc_block; 517 unsigned long n, entries_per_group;
479 unsigned long i, j; 518 unsigned long i, j;
519 spinlock_t *lock;
480 int pos, ret; 520 int pos, ret;
481 521
482 ngroups = nilfs_palloc_groups_count(inode); 522 ngroups = nilfs_palloc_groups_count(inode);
483 maxgroup = ngroups - 1; 523 maxgroup = ngroups - 1;
484 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); 524 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
485 entries_per_group = nilfs_palloc_entries_per_group(inode); 525 entries_per_group = nilfs_palloc_entries_per_group(inode);
486 groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
487 526
488 for (i = 0; i < ngroups; i += n) { 527 for (i = 0; i < ngroups; i += n) {
489 if (group >= ngroups) { 528 if (group >= ngroups) {
@@ -501,8 +540,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
501 n = nilfs_palloc_rest_groups_in_desc_block(inode, group, 540 n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
502 maxgroup); 541 maxgroup);
503 for (j = 0; j < n; j++, desc++, group++) { 542 for (j = 0; j < n; j++, desc++, group++) {
504 if (nilfs_palloc_group_desc_nfrees(inode, group, desc) 543 lock = nilfs_mdt_bgl_lock(inode, group);
505 > 0) { 544 if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) {
506 ret = nilfs_palloc_get_bitmap_block( 545 ret = nilfs_palloc_get_bitmap_block(
507 inode, group, 1, &bitmap_bh); 546 inode, group, 1, &bitmap_bh);
508 if (ret < 0) 547 if (ret < 0)
@@ -510,12 +549,12 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
510 bitmap_kaddr = kmap(bitmap_bh->b_page); 549 bitmap_kaddr = kmap(bitmap_bh->b_page);
511 bitmap = bitmap_kaddr + bh_offset(bitmap_bh); 550 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
512 pos = nilfs_palloc_find_available_slot( 551 pos = nilfs_palloc_find_available_slot(
513 inode, group, group_offset, bitmap, 552 bitmap, group_offset,
514 entries_per_group); 553 entries_per_group, lock);
515 if (pos >= 0) { 554 if (pos >= 0) {
516 /* found a free entry */ 555 /* found a free entry */
517 nilfs_palloc_group_desc_add_entries( 556 nilfs_palloc_group_desc_add_entries(
518 inode, group, desc, -1); 557 desc, lock, -1);
519 req->pr_entry_nr = 558 req->pr_entry_nr =
520 entries_per_group * group + pos; 559 entries_per_group * group + pos;
521 kunmap(desc_bh->b_page); 560 kunmap(desc_bh->b_page);
@@ -573,6 +612,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
573 unsigned long group, group_offset; 612 unsigned long group, group_offset;
574 unsigned char *bitmap; 613 unsigned char *bitmap;
575 void *desc_kaddr, *bitmap_kaddr; 614 void *desc_kaddr, *bitmap_kaddr;
615 spinlock_t *lock;
576 616
577 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); 617 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
578 desc_kaddr = kmap(req->pr_desc_bh->b_page); 618 desc_kaddr = kmap(req->pr_desc_bh->b_page);
@@ -580,13 +620,15 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
580 req->pr_desc_bh, desc_kaddr); 620 req->pr_desc_bh, desc_kaddr);
581 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 621 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
582 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); 622 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
623 lock = nilfs_mdt_bgl_lock(inode, group);
583 624
584 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 625 if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
585 group_offset, bitmap)) 626 nilfs_warning(inode->i_sb, __func__,
586 printk(KERN_WARNING "%s: entry number %llu already freed\n", 627 "entry number %llu already freed: ino=%lu\n",
587 __func__, (unsigned long long)req->pr_entry_nr); 628 (unsigned long long)req->pr_entry_nr,
629 (unsigned long)inode->i_ino);
588 else 630 else
589 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 631 nilfs_palloc_group_desc_add_entries(desc, lock, 1);
590 632
591 kunmap(req->pr_bitmap_bh->b_page); 633 kunmap(req->pr_bitmap_bh->b_page);
592 kunmap(req->pr_desc_bh->b_page); 634 kunmap(req->pr_desc_bh->b_page);
@@ -611,6 +653,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
611 void *desc_kaddr, *bitmap_kaddr; 653 void *desc_kaddr, *bitmap_kaddr;
612 unsigned char *bitmap; 654 unsigned char *bitmap;
613 unsigned long group, group_offset; 655 unsigned long group, group_offset;
656 spinlock_t *lock;
614 657
615 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); 658 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
616 desc_kaddr = kmap(req->pr_desc_bh->b_page); 659 desc_kaddr = kmap(req->pr_desc_bh->b_page);
@@ -618,12 +661,15 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
618 req->pr_desc_bh, desc_kaddr); 661 req->pr_desc_bh, desc_kaddr);
619 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 662 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
620 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); 663 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
621 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 664 lock = nilfs_mdt_bgl_lock(inode, group);
622 group_offset, bitmap)) 665
623 printk(KERN_WARNING "%s: entry number %llu already freed\n", 666 if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
624 __func__, (unsigned long long)req->pr_entry_nr); 667 nilfs_warning(inode->i_sb, __func__,
668 "entry number %llu already freed: ino=%lu\n",
669 (unsigned long long)req->pr_entry_nr,
670 (unsigned long)inode->i_ino);
625 else 671 else
626 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 672 nilfs_palloc_group_desc_add_entries(desc, lock, 1);
627 673
628 kunmap(req->pr_bitmap_bh->b_page); 674 kunmap(req->pr_bitmap_bh->b_page);
629 kunmap(req->pr_desc_bh->b_page); 675 kunmap(req->pr_desc_bh->b_page);
@@ -680,22 +726,6 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
680} 726}
681 727
682/** 728/**
683 * nilfs_palloc_group_is_in - judge if an entry is in a group
684 * @inode: inode of metadata file using this allocator
685 * @group: group number
686 * @nr: serial number of the entry (e.g. inode number)
687 */
688static int
689nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
690{
691 __u64 first, last;
692
693 first = group * nilfs_palloc_entries_per_group(inode);
694 last = first + nilfs_palloc_entries_per_group(inode) - 1;
695 return (nr >= first) && (nr <= last);
696}
697
698/**
699 * nilfs_palloc_freev - deallocate a set of persistent objects 729 * nilfs_palloc_freev - deallocate a set of persistent objects
700 * @inode: inode of metadata file using this allocator 730 * @inode: inode of metadata file using this allocator
701 * @entry_nrs: array of entry numbers to be deallocated 731 * @entry_nrs: array of entry numbers to be deallocated
@@ -708,9 +738,18 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
708 unsigned char *bitmap; 738 unsigned char *bitmap;
709 void *desc_kaddr, *bitmap_kaddr; 739 void *desc_kaddr, *bitmap_kaddr;
710 unsigned long group, group_offset; 740 unsigned long group, group_offset;
711 int i, j, n, ret; 741 __u64 group_min_nr, last_nrs[8];
742 const unsigned long epg = nilfs_palloc_entries_per_group(inode);
743 const unsigned epb = NILFS_MDT(inode)->mi_entries_per_block;
744 unsigned entry_start, end, pos;
745 spinlock_t *lock;
746 int i, j, k, ret;
747 u32 nfree;
712 748
713 for (i = 0; i < nitems; i = j) { 749 for (i = 0; i < nitems; i = j) {
750 int change_group = false;
751 int nempties = 0, n = 0;
752
714 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); 753 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
715 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); 754 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
716 if (ret < 0) 755 if (ret < 0)
@@ -721,38 +760,89 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
721 brelse(desc_bh); 760 brelse(desc_bh);
722 return ret; 761 return ret;
723 } 762 }
724 desc_kaddr = kmap(desc_bh->b_page); 763
725 desc = nilfs_palloc_block_get_group_desc( 764 /* Get the first entry number of the group */
726 inode, group, desc_bh, desc_kaddr); 765 group_min_nr = (__u64)group * epg;
766
727 bitmap_kaddr = kmap(bitmap_bh->b_page); 767 bitmap_kaddr = kmap(bitmap_bh->b_page);
728 bitmap = bitmap_kaddr + bh_offset(bitmap_bh); 768 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
729 for (j = i, n = 0; 769 lock = nilfs_mdt_bgl_lock(inode, group);
730 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 770
731 entry_nrs[j]); 771 j = i;
732 j++) { 772 entry_start = rounddown(group_offset, epb);
733 nilfs_palloc_group(inode, entry_nrs[j], &group_offset); 773 do {
734 if (!nilfs_clear_bit_atomic( 774 if (!nilfs_clear_bit_atomic(lock, group_offset,
735 nilfs_mdt_bgl_lock(inode, group), 775 bitmap)) {
736 group_offset, bitmap)) { 776 nilfs_warning(inode->i_sb, __func__,
737 printk(KERN_WARNING 777 "entry number %llu already freed: ino=%lu\n",
738 "%s: entry number %llu already freed\n", 778 (unsigned long long)entry_nrs[j],
739 __func__, 779 (unsigned long)inode->i_ino);
740 (unsigned long long)entry_nrs[j]);
741 } else { 780 } else {
742 n++; 781 n++;
743 } 782 }
744 } 783
745 nilfs_palloc_group_desc_add_entries(inode, group, desc, n); 784 j++;
785 if (j >= nitems || entry_nrs[j] < group_min_nr ||
786 entry_nrs[j] >= group_min_nr + epg) {
787 change_group = true;
788 } else {
789 group_offset = entry_nrs[j] - group_min_nr;
790 if (group_offset >= entry_start &&
791 group_offset < entry_start + epb) {
792 /* This entry is in the same block */
793 continue;
794 }
795 }
796
797 /* Test if the entry block is empty or not */
798 end = entry_start + epb;
799 pos = nilfs_find_next_bit(bitmap, end, entry_start);
800 if (pos >= end) {
801 last_nrs[nempties++] = entry_nrs[j - 1];
802 if (nempties >= ARRAY_SIZE(last_nrs))
803 break;
804 }
805
806 if (change_group)
807 break;
808
809 /* Go on to the next entry block */
810 entry_start = rounddown(group_offset, epb);
811 } while (true);
746 812
747 kunmap(bitmap_bh->b_page); 813 kunmap(bitmap_bh->b_page);
748 kunmap(desc_bh->b_page); 814 mark_buffer_dirty(bitmap_bh);
815 brelse(bitmap_bh);
749 816
817 for (k = 0; k < nempties; k++) {
818 ret = nilfs_palloc_delete_entry_block(inode,
819 last_nrs[k]);
820 if (ret && ret != -ENOENT) {
821 nilfs_warning(inode->i_sb, __func__,
822 "failed to delete block of entry %llu: ino=%lu, err=%d\n",
823 (unsigned long long)last_nrs[k],
824 (unsigned long)inode->i_ino, ret);
825 }
826 }
827
828 desc_kaddr = kmap_atomic(desc_bh->b_page);
829 desc = nilfs_palloc_block_get_group_desc(
830 inode, group, desc_bh, desc_kaddr);
831 nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
832 kunmap_atomic(desc_kaddr);
750 mark_buffer_dirty(desc_bh); 833 mark_buffer_dirty(desc_bh);
751 mark_buffer_dirty(bitmap_bh);
752 nilfs_mdt_mark_dirty(inode); 834 nilfs_mdt_mark_dirty(inode);
753
754 brelse(bitmap_bh);
755 brelse(desc_bh); 835 brelse(desc_bh);
836
837 if (nfree == nilfs_palloc_entries_per_group(inode)) {
838 ret = nilfs_palloc_delete_bitmap_block(inode, group);
839 if (ret && ret != -ENOENT) {
840 nilfs_warning(inode->i_sb, __func__,
841 "failed to delete bitmap block of group %lu: ino=%lu, err=%d\n",
842 group,
843 (unsigned long)inode->i_ino, ret);
844 }
845 }
756 } 846 }
757 return 0; 847 return 0;
758} 848}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4bd6451b5703..6e6f49aa53df 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -77,6 +77,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
77#define nilfs_set_bit_atomic ext2_set_bit_atomic 77#define nilfs_set_bit_atomic ext2_set_bit_atomic
78#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 78#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
79#define nilfs_find_next_zero_bit find_next_zero_bit_le 79#define nilfs_find_next_zero_bit find_next_zero_bit_le
80#define nilfs_find_next_bit find_next_bit_le
80 81
81/** 82/**
82 * struct nilfs_bh_assoc - block offset and buffer head association 83 * struct nilfs_bh_assoc - block offset and buffer head association
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 919fd5bb14a8..3a3821b00486 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -919,8 +919,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
919 int level, __u64 *keyp, __u64 *ptrp) 919 int level, __u64 *keyp, __u64 *ptrp)
920{ 920{
921 struct nilfs_btree_node *node, *right; 921 struct nilfs_btree_node *node, *right;
922 __u64 newkey;
923 __u64 newptr;
924 int nchildren, n, move, ncblk; 922 int nchildren, n, move, ncblk;
925 923
926 node = nilfs_btree_get_nonroot_node(path, level); 924 node = nilfs_btree_get_nonroot_node(path, level);
@@ -942,9 +940,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
942 if (!buffer_dirty(path[level].bp_sib_bh)) 940 if (!buffer_dirty(path[level].bp_sib_bh))
943 mark_buffer_dirty(path[level].bp_sib_bh); 941 mark_buffer_dirty(path[level].bp_sib_bh);
944 942
945 newkey = nilfs_btree_node_get_key(right, 0);
946 newptr = path[level].bp_newreq.bpr_ptr;
947
948 if (move) { 943 if (move) {
949 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 944 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
950 nilfs_btree_node_insert(right, path[level].bp_index, 945 nilfs_btree_node_insert(right, path[level].bp_index,
@@ -1856,7 +1851,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1856 __u64 key, __u64 ptr, 1851 __u64 key, __u64 ptr,
1857 const __u64 *keys, const __u64 *ptrs, int n) 1852 const __u64 *keys, const __u64 *ptrs, int n)
1858{ 1853{
1859 struct buffer_head *bh; 1854 struct buffer_head *bh = NULL;
1860 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1855 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1861 struct nilfs_bmap_stats stats; 1856 struct nilfs_bmap_stats stats;
1862 int ret; 1857 int ret;
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0d5fada91191..7dc23f100e57 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -155,7 +155,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
155int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 155int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
156{ 156{
157 struct nilfs_dat_entry *entry; 157 struct nilfs_dat_entry *entry;
158 __u64 start;
159 sector_t blocknr; 158 sector_t blocknr;
160 void *kaddr; 159 void *kaddr;
161 int ret; 160 int ret;
@@ -169,7 +168,6 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
169 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 168 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
170 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 169 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
171 req->pr_entry_bh, kaddr); 170 req->pr_entry_bh, kaddr);
172 start = le64_to_cpu(entry->de_start);
173 blocknr = le64_to_cpu(entry->de_blocknr); 171 blocknr = le64_to_cpu(entry->de_blocknr);
174 kunmap_atomic(kaddr); 172 kunmap_atomic(kaddr);
175 173
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 4a73d6dffabf..ac2f64943ff4 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
356 goto failed; 356 goto failed;
357 357
358 mapping_set_gfp_mask(inode->i_mapping, 358 mapping_set_gfp_mask(inode->i_mapping,
359 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 359 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
360 360
361 root = NILFS_I(dir)->i_root; 361 root = NILFS_I(dir)->i_root;
362 ii = NILFS_I(inode); 362 ii = NILFS_I(inode);
@@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb,
522 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 522 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
523 nilfs_set_inode_flags(inode); 523 nilfs_set_inode_flags(inode);
524 mapping_set_gfp_mask(inode->i_mapping, 524 mapping_set_gfp_mask(inode->i_mapping,
525 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 525 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
526 return 0; 526 return 0;
527 527
528 failed_unmap: 528 failed_unmap:
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index dee34d990281..1125f40233ff 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -33,6 +33,7 @@
33#include "page.h" 33#include "page.h"
34#include "mdt.h" 34#include "mdt.h"
35 35
36#include <trace/events/nilfs2.h>
36 37
37#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) 38#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1)
38 39
@@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
68 set_buffer_uptodate(bh); 69 set_buffer_uptodate(bh);
69 mark_buffer_dirty(bh); 70 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 71 nilfs_mdt_mark_dirty(inode);
72
73 trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
74
71 return 0; 75 return 0;
72} 76}
73 77
@@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
158 get_bh(bh); 162 get_bh(bh);
159 submit_bh(mode, bh); 163 submit_bh(mode, bh);
160 ret = 0; 164 ret = 0;
165
166 trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
161 out: 167 out:
162 get_bh(bh); 168 get_bh(bh);
163 *out_bh = bh; 169 *out_bh = bh;
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index fe529a87a208..03246cac3338 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
72} 72}
73 73
74/* Default GFP flags using highmem */ 74/* Default GFP flags using highmem */
75#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 75#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM)
76 76
77int nilfs_mdt_get_block(struct inode *, unsigned long, int, 77int nilfs_mdt_get_block(struct inode *, unsigned long, int,
78 void (*init_block)(struct inode *, 78 void (*init_block)(struct inode *,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ff00a0b7acb9..9b4f205d1173 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -582,7 +582,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
582 struct nilfs_recovery_info *ri) 582 struct nilfs_recovery_info *ri)
583{ 583{
584 struct buffer_head *bh_sum = NULL; 584 struct buffer_head *bh_sum = NULL;
585 struct nilfs_segment_summary *sum; 585 struct nilfs_segment_summary *sum = NULL;
586 sector_t pseg_start; 586 sector_t pseg_start;
587 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ 587 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
588 unsigned long nsalvaged_blocks = 0; 588 unsigned long nsalvaged_blocks = 0;
@@ -814,7 +814,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs,
814 struct nilfs_recovery_info *ri) 814 struct nilfs_recovery_info *ri)
815{ 815{
816 struct buffer_head *bh_sum = NULL; 816 struct buffer_head *bh_sum = NULL;
817 struct nilfs_segment_summary *sum; 817 struct nilfs_segment_summary *sum = NULL;
818 sector_t pseg_start, pseg_end, sr_pseg_start = 0; 818 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
819 sector_t seg_start, seg_end; /* range of full segment (block number) */ 819 sector_t seg_start, seg_end; /* range of full segment (block number) */
820 sector_t b, end; 820 sector_t b, end;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c6abbad9b8e3..3b65adaae7e4 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -77,6 +77,36 @@ enum {
77 NILFS_ST_DONE, 77 NILFS_ST_DONE,
78}; 78};
79 79
80#define CREATE_TRACE_POINTS
81#include <trace/events/nilfs2.h>
82
83/*
84 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
85 * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
86 * the variable must use them because transition of stage count must involve
87 * trace events (trace_nilfs2_collection_stage_transition).
88 *
89 * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
90 * produce tracepoint events. It is provided just for making the intention
91 * clear.
92 */
93static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
94{
95 sci->sc_stage.scnt++;
96 trace_nilfs2_collection_stage_transition(sci);
97}
98
99static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
100{
101 sci->sc_stage.scnt = next_scnt;
102 trace_nilfs2_collection_stage_transition(sci);
103}
104
105static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
106{
107 return sci->sc_stage.scnt;
108}
109
80/* State flags of collection */ 110/* State flags of collection */
81#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 111#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
82#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 112#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
@@ -184,11 +214,18 @@ int nilfs_transaction_begin(struct super_block *sb,
184{ 214{
185 struct the_nilfs *nilfs; 215 struct the_nilfs *nilfs;
186 int ret = nilfs_prepare_segment_lock(ti); 216 int ret = nilfs_prepare_segment_lock(ti);
217 struct nilfs_transaction_info *trace_ti;
187 218
188 if (unlikely(ret < 0)) 219 if (unlikely(ret < 0))
189 return ret; 220 return ret;
190 if (ret > 0) 221 if (ret > 0) {
222 trace_ti = current->journal_info;
223
224 trace_nilfs2_transaction_transition(sb, trace_ti,
225 trace_ti->ti_count, trace_ti->ti_flags,
226 TRACE_NILFS2_TRANSACTION_BEGIN);
191 return 0; 227 return 0;
228 }
192 229
193 sb_start_intwrite(sb); 230 sb_start_intwrite(sb);
194 231
@@ -199,6 +236,11 @@ int nilfs_transaction_begin(struct super_block *sb,
199 ret = -ENOSPC; 236 ret = -ENOSPC;
200 goto failed; 237 goto failed;
201 } 238 }
239
240 trace_ti = current->journal_info;
241 trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
242 trace_ti->ti_flags,
243 TRACE_NILFS2_TRANSACTION_BEGIN);
202 return 0; 244 return 0;
203 245
204 failed: 246 failed:
@@ -231,6 +273,8 @@ int nilfs_transaction_commit(struct super_block *sb)
231 ti->ti_flags |= NILFS_TI_COMMIT; 273 ti->ti_flags |= NILFS_TI_COMMIT;
232 if (ti->ti_count > 0) { 274 if (ti->ti_count > 0) {
233 ti->ti_count--; 275 ti->ti_count--;
276 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
277 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
234 return 0; 278 return 0;
235 } 279 }
236 if (nilfs->ns_writer) { 280 if (nilfs->ns_writer) {
@@ -242,6 +286,9 @@ int nilfs_transaction_commit(struct super_block *sb)
242 nilfs_segctor_do_flush(sci, 0); 286 nilfs_segctor_do_flush(sci, 0);
243 } 287 }
244 up_read(&nilfs->ns_segctor_sem); 288 up_read(&nilfs->ns_segctor_sem);
289 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
290 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
291
245 current->journal_info = ti->ti_save; 292 current->journal_info = ti->ti_save;
246 293
247 if (ti->ti_flags & NILFS_TI_SYNC) 294 if (ti->ti_flags & NILFS_TI_SYNC)
@@ -260,10 +307,15 @@ void nilfs_transaction_abort(struct super_block *sb)
260 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 307 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
261 if (ti->ti_count > 0) { 308 if (ti->ti_count > 0) {
262 ti->ti_count--; 309 ti->ti_count--;
310 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
311 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
263 return; 312 return;
264 } 313 }
265 up_read(&nilfs->ns_segctor_sem); 314 up_read(&nilfs->ns_segctor_sem);
266 315
316 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
317 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
318
267 current->journal_info = ti->ti_save; 319 current->journal_info = ti->ti_save;
268 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 320 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
269 kmem_cache_free(nilfs_transaction_cachep, ti); 321 kmem_cache_free(nilfs_transaction_cachep, ti);
@@ -309,6 +361,9 @@ static void nilfs_transaction_lock(struct super_block *sb,
309 current->journal_info = ti; 361 current->journal_info = ti;
310 362
311 for (;;) { 363 for (;;) {
364 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
365 ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
366
312 down_write(&nilfs->ns_segctor_sem); 367 down_write(&nilfs->ns_segctor_sem);
313 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) 368 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
314 break; 369 break;
@@ -320,6 +375,9 @@ static void nilfs_transaction_lock(struct super_block *sb,
320 } 375 }
321 if (gcflag) 376 if (gcflag)
322 ti->ti_flags |= NILFS_TI_GC; 377 ti->ti_flags |= NILFS_TI_GC;
378
379 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
380 ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
323} 381}
324 382
325static void nilfs_transaction_unlock(struct super_block *sb) 383static void nilfs_transaction_unlock(struct super_block *sb)
@@ -332,6 +390,9 @@ static void nilfs_transaction_unlock(struct super_block *sb)
332 390
333 up_write(&nilfs->ns_segctor_sem); 391 up_write(&nilfs->ns_segctor_sem);
334 current->journal_info = ti->ti_save; 392 current->journal_info = ti->ti_save;
393
394 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
395 ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
335} 396}
336 397
337static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 398static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -1062,7 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1062 size_t ndone; 1123 size_t ndone;
1063 int err = 0; 1124 int err = 0;
1064 1125
1065 switch (sci->sc_stage.scnt) { 1126 switch (nilfs_sc_cstage_get(sci)) {
1066 case NILFS_ST_INIT: 1127 case NILFS_ST_INIT:
1067 /* Pre-processes */ 1128 /* Pre-processes */
1068 sci->sc_stage.flags = 0; 1129 sci->sc_stage.flags = 0;
@@ -1071,7 +1132,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1071 sci->sc_nblk_inc = 0; 1132 sci->sc_nblk_inc = 0;
1072 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; 1133 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1073 if (mode == SC_LSEG_DSYNC) { 1134 if (mode == SC_LSEG_DSYNC) {
1074 sci->sc_stage.scnt = NILFS_ST_DSYNC; 1135 nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
1075 goto dsync_mode; 1136 goto dsync_mode;
1076 } 1137 }
1077 } 1138 }
@@ -1079,10 +1140,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1079 sci->sc_stage.dirty_file_ptr = NULL; 1140 sci->sc_stage.dirty_file_ptr = NULL;
1080 sci->sc_stage.gc_inode_ptr = NULL; 1141 sci->sc_stage.gc_inode_ptr = NULL;
1081 if (mode == SC_FLUSH_DAT) { 1142 if (mode == SC_FLUSH_DAT) {
1082 sci->sc_stage.scnt = NILFS_ST_DAT; 1143 nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
1083 goto dat_stage; 1144 goto dat_stage;
1084 } 1145 }
1085 sci->sc_stage.scnt++; /* Fall through */ 1146 nilfs_sc_cstage_inc(sci); /* Fall through */
1086 case NILFS_ST_GC: 1147 case NILFS_ST_GC:
1087 if (nilfs_doing_gc()) { 1148 if (nilfs_doing_gc()) {
1088 head = &sci->sc_gc_inodes; 1149 head = &sci->sc_gc_inodes;
@@ -1103,7 +1164,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1103 } 1164 }
1104 sci->sc_stage.gc_inode_ptr = NULL; 1165 sci->sc_stage.gc_inode_ptr = NULL;
1105 } 1166 }
1106 sci->sc_stage.scnt++; /* Fall through */ 1167 nilfs_sc_cstage_inc(sci); /* Fall through */
1107 case NILFS_ST_FILE: 1168 case NILFS_ST_FILE:
1108 head = &sci->sc_dirty_files; 1169 head = &sci->sc_dirty_files;
1109 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, 1170 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
@@ -1125,10 +1186,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1125 } 1186 }
1126 sci->sc_stage.dirty_file_ptr = NULL; 1187 sci->sc_stage.dirty_file_ptr = NULL;
1127 if (mode == SC_FLUSH_FILE) { 1188 if (mode == SC_FLUSH_FILE) {
1128 sci->sc_stage.scnt = NILFS_ST_DONE; 1189 nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1129 return 0; 1190 return 0;
1130 } 1191 }
1131 sci->sc_stage.scnt++; 1192 nilfs_sc_cstage_inc(sci);
1132 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; 1193 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1133 /* Fall through */ 1194 /* Fall through */
1134 case NILFS_ST_IFILE: 1195 case NILFS_ST_IFILE:
@@ -1136,7 +1197,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1136 &nilfs_sc_file_ops); 1197 &nilfs_sc_file_ops);
1137 if (unlikely(err)) 1198 if (unlikely(err))
1138 break; 1199 break;
1139 sci->sc_stage.scnt++; 1200 nilfs_sc_cstage_inc(sci);
1140 /* Creating a checkpoint */ 1201 /* Creating a checkpoint */
1141 err = nilfs_segctor_create_checkpoint(sci); 1202 err = nilfs_segctor_create_checkpoint(sci);
1142 if (unlikely(err)) 1203 if (unlikely(err))
@@ -1147,7 +1208,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1147 &nilfs_sc_file_ops); 1208 &nilfs_sc_file_ops);
1148 if (unlikely(err)) 1209 if (unlikely(err))
1149 break; 1210 break;
1150 sci->sc_stage.scnt++; /* Fall through */ 1211 nilfs_sc_cstage_inc(sci); /* Fall through */
1151 case NILFS_ST_SUFILE: 1212 case NILFS_ST_SUFILE:
1152 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, 1213 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1153 sci->sc_nfreesegs, &ndone); 1214 sci->sc_nfreesegs, &ndone);
@@ -1163,7 +1224,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1163 &nilfs_sc_file_ops); 1224 &nilfs_sc_file_ops);
1164 if (unlikely(err)) 1225 if (unlikely(err))
1165 break; 1226 break;
1166 sci->sc_stage.scnt++; /* Fall through */ 1227 nilfs_sc_cstage_inc(sci); /* Fall through */
1167 case NILFS_ST_DAT: 1228 case NILFS_ST_DAT:
1168 dat_stage: 1229 dat_stage:
1169 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, 1230 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
@@ -1171,10 +1232,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1171 if (unlikely(err)) 1232 if (unlikely(err))
1172 break; 1233 break;
1173 if (mode == SC_FLUSH_DAT) { 1234 if (mode == SC_FLUSH_DAT) {
1174 sci->sc_stage.scnt = NILFS_ST_DONE; 1235 nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1175 return 0; 1236 return 0;
1176 } 1237 }
1177 sci->sc_stage.scnt++; /* Fall through */ 1238 nilfs_sc_cstage_inc(sci); /* Fall through */
1178 case NILFS_ST_SR: 1239 case NILFS_ST_SR:
1179 if (mode == SC_LSEG_SR) { 1240 if (mode == SC_LSEG_SR) {
1180 /* Appending a super root */ 1241 /* Appending a super root */
@@ -1184,7 +1245,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1184 } 1245 }
1185 /* End of a logical segment */ 1246 /* End of a logical segment */
1186 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1247 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1187 sci->sc_stage.scnt = NILFS_ST_DONE; 1248 nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1188 return 0; 1249 return 0;
1189 case NILFS_ST_DSYNC: 1250 case NILFS_ST_DSYNC:
1190 dsync_mode: 1251 dsync_mode:
@@ -1197,7 +1258,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1197 if (unlikely(err)) 1258 if (unlikely(err))
1198 break; 1259 break;
1199 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1260 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1200 sci->sc_stage.scnt = NILFS_ST_DONE; 1261 nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1201 return 0; 1262 return 0;
1202 case NILFS_ST_DONE: 1263 case NILFS_ST_DONE:
1203 return 0; 1264 return 0;
@@ -1442,7 +1503,8 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1442 goto failed; 1503 goto failed;
1443 1504
1444 /* The current segment is filled up */ 1505 /* The current segment is filled up */
1445 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) 1506 if (mode != SC_LSEG_SR ||
1507 nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
1446 break; 1508 break;
1447 1509
1448 nilfs_clear_logs(&sci->sc_segbufs); 1510 nilfs_clear_logs(&sci->sc_segbufs);
@@ -1946,7 +2008,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
1946 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2008 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1947 int err; 2009 int err;
1948 2010
1949 sci->sc_stage.scnt = NILFS_ST_INIT; 2011 nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
1950 sci->sc_cno = nilfs->ns_cno; 2012 sci->sc_cno = nilfs->ns_cno;
1951 2013
1952 err = nilfs_segctor_collect_dirty_files(sci, nilfs); 2014 err = nilfs_segctor_collect_dirty_files(sci, nilfs);
@@ -1974,7 +2036,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
1974 goto failed; 2036 goto failed;
1975 2037
1976 /* Avoid empty segment */ 2038 /* Avoid empty segment */
1977 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2039 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
1978 nilfs_segbuf_empty(sci->sc_curseg)) { 2040 nilfs_segbuf_empty(sci->sc_curseg)) {
1979 nilfs_segctor_abort_construction(sci, nilfs, 1); 2041 nilfs_segctor_abort_construction(sci, nilfs, 1);
1980 goto out; 2042 goto out;
@@ -1988,7 +2050,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
1988 nilfs_segctor_fill_in_file_bmap(sci); 2050 nilfs_segctor_fill_in_file_bmap(sci);
1989 2051
1990 if (mode == SC_LSEG_SR && 2052 if (mode == SC_LSEG_SR &&
1991 sci->sc_stage.scnt >= NILFS_ST_CPFILE) { 2053 nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
1992 err = nilfs_segctor_fill_in_checkpoint(sci); 2054 err = nilfs_segctor_fill_in_checkpoint(sci);
1993 if (unlikely(err)) 2055 if (unlikely(err))
1994 goto failed_to_write; 2056 goto failed_to_write;
@@ -2007,7 +2069,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2007 if (unlikely(err)) 2069 if (unlikely(err))
2008 goto failed_to_write; 2070 goto failed_to_write;
2009 2071
2010 if (sci->sc_stage.scnt == NILFS_ST_DONE || 2072 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
2011 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { 2073 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
2012 /* 2074 /*
2013 * At this point, we avoid double buffering 2075 * At this point, we avoid double buffering
@@ -2020,7 +2082,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2020 if (err) 2082 if (err)
2021 goto failed_to_write; 2083 goto failed_to_write;
2022 } 2084 }
2023 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2085 } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
2024 2086
2025 out: 2087 out:
2026 nilfs_segctor_drop_written_files(sci, nilfs); 2088 nilfs_segctor_drop_written_files(sci, nilfs);
@@ -2430,7 +2492,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2430static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2492static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2431{ 2493{
2432 int mode = 0; 2494 int mode = 0;
2433 int err;
2434 2495
2435 spin_lock(&sci->sc_state_lock); 2496 spin_lock(&sci->sc_state_lock);
2436 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? 2497 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
@@ -2438,7 +2499,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2438 spin_unlock(&sci->sc_state_lock); 2499 spin_unlock(&sci->sc_state_lock);
2439 2500
2440 if (mode) { 2501 if (mode) {
2441 err = nilfs_segctor_do_construct(sci, mode); 2502 nilfs_segctor_do_construct(sci, mode);
2442 2503
2443 spin_lock(&sci->sc_state_lock); 2504 spin_lock(&sci->sc_state_lock);
2444 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? 2505 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index a48d6de1e02c..0408b9b2814b 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -67,7 +67,8 @@ struct nilfs_recovery_info {
67 67
68/** 68/**
69 * struct nilfs_cstage - Context of collection stage 69 * struct nilfs_cstage - Context of collection stage
70 * @scnt: Stage count 70 * @scnt: Stage count, must be accessed via wrappers:
71 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get()
71 * @flags: State flags 72 * @flags: State flags
72 * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file 73 * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
73 * @gc_inode_ptr: Pointer on the list of gc-inodes 74 * @gc_inode_ptr: Pointer on the list of gc-inodes
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 2a869c35c362..52821ffc11f4 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -30,6 +30,8 @@
30#include "mdt.h" 30#include "mdt.h"
31#include "sufile.h" 31#include "sufile.h"
32 32
33#include <trace/events/nilfs2.h>
34
33/** 35/**
34 * struct nilfs_sufile_info - on-memory private data of sufile 36 * struct nilfs_sufile_info - on-memory private data of sufile
35 * @mi: on-memory private data of metadata file 37 * @mi: on-memory private data of metadata file
@@ -317,7 +319,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
317 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 319 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
318 __u64 segnum, maxsegnum, last_alloc; 320 __u64 segnum, maxsegnum, last_alloc;
319 void *kaddr; 321 void *kaddr;
320 unsigned long nsegments, ncleansegs, nsus, cnt; 322 unsigned long nsegments, nsus, cnt;
321 int ret, j; 323 int ret, j;
322 324
323 down_write(&NILFS_MDT(sufile)->mi_sem); 325 down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -327,7 +329,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
327 goto out_sem; 329 goto out_sem;
328 kaddr = kmap_atomic(header_bh->b_page); 330 kaddr = kmap_atomic(header_bh->b_page);
329 header = kaddr + bh_offset(header_bh); 331 header = kaddr + bh_offset(header_bh);
330 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
331 last_alloc = le64_to_cpu(header->sh_last_alloc); 332 last_alloc = le64_to_cpu(header->sh_last_alloc);
332 kunmap_atomic(kaddr); 333 kunmap_atomic(kaddr);
333 334
@@ -358,6 +359,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
358 break; /* never happens */ 359 break; /* never happens */
359 } 360 }
360 } 361 }
362 trace_nilfs2_segment_usage_check(sufile, segnum, cnt);
361 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 363 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
362 &su_bh); 364 &su_bh);
363 if (ret < 0) 365 if (ret < 0)
@@ -388,6 +390,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
388 nilfs_mdt_mark_dirty(sufile); 390 nilfs_mdt_mark_dirty(sufile);
389 brelse(su_bh); 391 brelse(su_bh);
390 *segnump = segnum; 392 *segnump = segnum;
393
394 trace_nilfs2_segment_usage_allocated(sufile, segnum);
395
391 goto out_header; 396 goto out_header;
392 } 397 }
393 398
@@ -490,6 +495,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
490 NILFS_SUI(sufile)->ncleansegs++; 495 NILFS_SUI(sufile)->ncleansegs++;
491 496
492 nilfs_mdt_mark_dirty(sufile); 497 nilfs_mdt_mark_dirty(sufile);
498
499 trace_nilfs2_segment_usage_freed(sufile, segnum);
493} 500}
494 501
495/** 502/**
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f47585bfeb01..354013ea22ec 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -361,7 +361,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
361 struct nilfs_super_block *nsbp; 361 struct nilfs_super_block *nsbp;
362 sector_t blocknr, newblocknr; 362 sector_t blocknr, newblocknr;
363 unsigned long offset; 363 unsigned long offset;
364 int sb2i = -1; /* array index of the secondary superblock */ 364 int sb2i; /* array index of the secondary superblock */
365 int ret = 0; 365 int ret = 0;
366 366
367 /* nilfs->ns_sem must be locked by the caller. */ 367 /* nilfs->ns_sem must be locked by the caller. */
@@ -372,6 +372,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
372 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { 372 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
373 sb2i = 0; 373 sb2i = 0;
374 blocknr = nilfs->ns_sbh[0]->b_blocknr; 374 blocknr = nilfs->ns_sbh[0]->b_blocknr;
375 } else {
376 sb2i = -1;
377 blocknr = 0;
375 } 378 }
376 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) 379 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
377 goto out; /* super block location is unchanged */ 380 goto out; /* super block location is unchanged */
@@ -1405,14 +1408,10 @@ static void nilfs_destroy_cachep(void)
1405 */ 1408 */
1406 rcu_barrier(); 1409 rcu_barrier();
1407 1410
1408 if (nilfs_inode_cachep) 1411 kmem_cache_destroy(nilfs_inode_cachep);
1409 kmem_cache_destroy(nilfs_inode_cachep); 1412 kmem_cache_destroy(nilfs_transaction_cachep);
1410 if (nilfs_transaction_cachep) 1413 kmem_cache_destroy(nilfs_segbuf_cachep);
1411 kmem_cache_destroy(nilfs_transaction_cachep); 1414 kmem_cache_destroy(nilfs_btree_path_cache);
1412 if (nilfs_segbuf_cachep)
1413 kmem_cache_destroy(nilfs_segbuf_cachep);
1414 if (nilfs_btree_path_cache)
1415 kmem_cache_destroy(nilfs_btree_path_cache);
1416} 1415}
1417 1416
1418static int __init nilfs_init_cachep(void) 1417static int __init nilfs_init_cachep(void)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 262561fea923..9d383e5eff0e 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
525 } 525 }
526 } 526 }
527 err = add_to_page_cache_lru(*cached_page, mapping, 527 err = add_to_page_cache_lru(*cached_page, mapping,
528 index, 528 index,
529 GFP_KERNEL & mapping_gfp_mask(mapping)); 529 mapping_gfp_constraint(mapping, GFP_KERNEL));
530 if (unlikely(err)) { 530 if (unlikely(err)) {
531 if (err == -EEXIST) 531 if (err == -EEXIST)
532 continue; 532 continue;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index eed2050db9be..d73291f5f0fc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -91,18 +91,18 @@
91static inline void task_name(struct seq_file *m, struct task_struct *p) 91static inline void task_name(struct seq_file *m, struct task_struct *p)
92{ 92{
93 char *buf; 93 char *buf;
94 size_t size;
94 char tcomm[sizeof(p->comm)]; 95 char tcomm[sizeof(p->comm)];
96 int ret;
95 97
96 get_task_comm(tcomm, p); 98 get_task_comm(tcomm, p);
97 99
98 seq_puts(m, "Name:\t"); 100 seq_puts(m, "Name:\t");
99 buf = m->buf + m->count;
100 101
101 /* Ignore error for now */ 102 size = seq_get_buf(m, &buf);
102 buf += string_escape_str(tcomm, buf, m->size - m->count, 103 ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); 104 seq_commit(m, ret < size ? ret : -1);
104 105
105 m->count = buf - m->buf;
106 seq_putc(m, '\n'); 106 seq_putc(m, '\n');
107} 107}
108 108
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 6e5fcd00733e..3c2a915c695a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -291,11 +291,19 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
291 */ 291 */
292int proc_fd_permission(struct inode *inode, int mask) 292int proc_fd_permission(struct inode *inode, int mask)
293{ 293{
294 int rv = generic_permission(inode, mask); 294 struct task_struct *p;
295 int rv;
296
297 rv = generic_permission(inode, mask);
295 if (rv == 0) 298 if (rv == 0)
296 return 0; 299 return rv;
297 if (task_tgid(current) == proc_pid(inode)) 300
301 rcu_read_lock();
302 p = pid_task(proc_pid(inode), PIDTYPE_PID);
303 if (p && same_thread_group(p, current))
298 rv = 0; 304 rv = 0;
305 rcu_read_unlock();
306
299 return rv; 307 return rv;
300} 308}
301 309
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 225586e141ca..e85664b7c7d9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -13,6 +13,7 @@
13#include <linux/cred.h> 13#include <linux/cred.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/printk.h> 15#include <linux/printk.h>
16#include <linux/string_helpers.h>
16 17
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include <asm/page.h> 19#include <asm/page.h>
@@ -25,12 +26,17 @@ static void seq_set_overflow(struct seq_file *m)
25static void *seq_buf_alloc(unsigned long size) 26static void *seq_buf_alloc(unsigned long size)
26{ 27{
27 void *buf; 28 void *buf;
29 gfp_t gfp = GFP_KERNEL;
28 30
29 /* 31 /*
30 * __GFP_NORETRY to avoid oom-killings with high-order allocations - 32 * For high order allocations, use __GFP_NORETRY to avoid oom-killing -
31 * it's better to fall back to vmalloc() than to kill things. 33 * it's better to fall back to vmalloc() than to kill things. For small
34 * allocations, just use GFP_KERNEL which will oom kill, thus no need
35 * for vmalloc fallback.
32 */ 36 */
33 buf = kmalloc(size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); 37 if (size > PAGE_SIZE)
38 gfp |= __GFP_NORETRY | __GFP_NOWARN;
39 buf = kmalloc(size, gfp);
34 if (!buf && size > PAGE_SIZE) 40 if (!buf && size > PAGE_SIZE)
35 buf = vmalloc(size); 41 buf = vmalloc(size);
36 return buf; 42 return buf;
@@ -377,26 +383,12 @@ EXPORT_SYMBOL(seq_release);
377 */ 383 */
378void seq_escape(struct seq_file *m, const char *s, const char *esc) 384void seq_escape(struct seq_file *m, const char *s, const char *esc)
379{ 385{
380 char *end = m->buf + m->size; 386 char *buf;
381 char *p; 387 size_t size = seq_get_buf(m, &buf);
382 char c; 388 int ret;
383 389
384 for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { 390 ret = string_escape_str(s, buf, size, ESCAPE_OCTAL, esc);
385 if (!strchr(esc, c)) { 391 seq_commit(m, ret < size ? ret : -1);
386 *p++ = c;
387 continue;
388 }
389 if (p + 3 < end) {
390 *p++ = '\\';
391 *p++ = '0' + ((c & 0300) >> 6);
392 *p++ = '0' + ((c & 070) >> 3);
393 *p++ = '0' + (c & 07);
394 continue;
395 }
396 seq_set_overflow(m);
397 return;
398 }
399 m->count = p - m->buf;
400} 392}
401EXPORT_SYMBOL(seq_escape); 393EXPORT_SYMBOL(seq_escape);
402 394
@@ -773,6 +765,8 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
773{ 765{
774 const u8 *ptr = buf; 766 const u8 *ptr = buf;
775 int i, linelen, remaining = len; 767 int i, linelen, remaining = len;
768 char *buffer;
769 size_t size;
776 int ret; 770 int ret;
777 771
778 if (rowsize != 16 && rowsize != 32) 772 if (rowsize != 16 && rowsize != 32)
@@ -794,15 +788,12 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
794 break; 788 break;
795 } 789 }
796 790
791 size = seq_get_buf(m, &buffer);
797 ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, 792 ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
798 m->buf + m->count, m->size - m->count, 793 buffer, size, ascii);
799 ascii); 794 seq_commit(m, ret < size ? ret : -1);
800 if (ret >= m->size - m->count) { 795
801 seq_set_overflow(m); 796 seq_putc(m, '\n');
802 } else {
803 m->count += ret;
804 seq_putc(m, '\n');
805 }
806 } 797 }
807} 798}
808EXPORT_SYMBOL(seq_hex_dump); 799EXPORT_SYMBOL(seq_hex_dump);
diff --git a/fs/splice.c b/fs/splice.c
index 5fc1e50a7f30..801c21cd77fe 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
360 break; 360 break;
361 361
362 error = add_to_page_cache_lru(page, mapping, index, 362 error = add_to_page_cache_lru(page, mapping, index,
363 GFP_KERNEL & mapping_gfp_mask(mapping)); 363 mapping_gfp_constraint(mapping, GFP_KERNEL));
364 if (unlikely(error)) { 364 if (unlikely(error)) {
365 page_cache_release(page); 365 page_cache_release(page);
366 if (error == -EEXIST) 366 if (error == -EEXIST)
diff --git a/fs/sync.c b/fs/sync.c
index 4ec430ae2b0d..dd5d1711c7ac 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -348,7 +348,8 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
348 } 348 }
349 349
350 if (flags & SYNC_FILE_RANGE_WRITE) { 350 if (flags & SYNC_FILE_RANGE_WRITE) {
351 ret = filemap_fdatawrite_range(mapping, offset, endbyte); 351 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
352 WB_SYNC_NONE);
352 if (ret < 0) 353 if (ret < 0)
353 goto out_put; 354 goto out_put;
354 } 355 }
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index eac9549efd52..587174fd4f2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
525 unsigned long freed; 525 unsigned long freed;
526 int error; 526 int error;
527 527
528 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) 528 if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
529 return 0; 529 return 0;
530 530
531 INIT_LIST_HEAD(&isol.buffers); 531 INIT_LIST_HEAD(&isol.buffers);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8b5ce7c5d9bb..f56cdcecc1c9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -822,7 +822,6 @@ struct drm_device {
822 822
823 struct drm_sg_mem *sg; /**< Scatter gather memory */ 823 struct drm_sg_mem *sg; /**< Scatter gather memory */
824 unsigned int num_crtcs; /**< Number of CRTCs on this device */ 824 unsigned int num_crtcs; /**< Number of CRTCs on this device */
825 sigset_t sigmask;
826 825
827 struct { 826 struct {
828 int context; 827 int context;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index e63553386ae7..2b8ed123ad36 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
164 * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit 164 * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
165 * @value: value to sign extend 165 * @value: value to sign extend
166 * @index: 0 based bit index (0<=index<32) to sign bit 166 * @index: 0 based bit index (0<=index<32) to sign bit
167 *
168 * This is safe to use for 16- and 8-bit types as well.
167 */ 169 */
168static inline __s32 sign_extend32(__u32 value, int index) 170static inline __s32 sign_extend32(__u32 value, int index)
169{ 171{
@@ -171,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index)
171 return (__s32)(value << shift) >> shift; 173 return (__s32)(value << shift) >> shift;
172} 174}
173 175
176/**
177 * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
178 * @value: value to sign extend
179 * @index: 0 based bit index (0<=index<64) to sign bit
180 */
181static inline __s64 sign_extend64(__u64 value, int index)
182{
183 __u8 shift = 63 - index;
184 return (__s64)(value << shift) >> shift;
185}
186
174static inline unsigned fls_long(unsigned long l) 187static inline unsigned fls_long(unsigned long l)
175{ 188{
176 if (sizeof(l) == 4) 189 if (sizeof(l) == 4)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0e3110a0b771..22ab246feed3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -205,7 +205,10 @@
205 205
206#if GCC_VERSION >= 40600 206#if GCC_VERSION >= 40600
207/* 207/*
208 * Tell the optimizer that something else uses this function or variable. 208 * When used with Link Time Optimization, gcc can optimize away C functions or
209 * variables which are referenced only from assembly code. __visible tells the
210 * optimizer that something else uses this function or variable, thus preventing
211 * this.
209 */ 212 */
210#define __visible __attribute__((externally_visible)) 213#define __visible __attribute__((externally_visible))
211#endif 214#endif
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5a1311942358..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void);
104 */ 104 */
105static inline unsigned int read_mems_allowed_begin(void) 105static inline unsigned int read_mems_allowed_begin(void)
106{ 106{
107 if (!cpusets_enabled())
108 return 0;
109
107 return read_seqcount_begin(&current->mems_allowed_seq); 110 return read_seqcount_begin(&current->mems_allowed_seq);
108} 111}
109 112
@@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void)
115 */ 118 */
116static inline bool read_mems_allowed_retry(unsigned int seq) 119static inline bool read_mems_allowed_retry(unsigned int seq)
117{ 120{
121 if (!cpusets_enabled())
122 return false;
123
118 return read_seqcount_retry(&current->mems_allowed_seq, seq); 124 return read_seqcount_retry(&current->mems_allowed_seq, seq);
119} 125}
120 126
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ac07ff090919..2e551e2d2d03 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_DMA_MAPPING_H 1#ifndef _LINUX_DMA_MAPPING_H
2#define _LINUX_DMA_MAPPING_H 2#define _LINUX_DMA_MAPPING_H
3 3
4#include <linux/sizes.h>
4#include <linux/string.h> 5#include <linux/string.h>
5#include <linux/device.h> 6#include <linux/device.h>
6#include <linux/err.h> 7#include <linux/err.h>
@@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { }
145 146
146static inline unsigned int dma_get_max_seg_size(struct device *dev) 147static inline unsigned int dma_get_max_seg_size(struct device *dev)
147{ 148{
148 return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536; 149 if (dev->dma_parms && dev->dma_parms->max_segment_size)
150 return dev->dma_parms->max_segment_size;
151 return SZ_64K;
149} 152}
150 153
151static inline unsigned int dma_set_max_seg_size(struct device *dev, 154static inline unsigned int dma_set_max_seg_size(struct device *dev,
@@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev,
154 if (dev->dma_parms) { 157 if (dev->dma_parms) {
155 dev->dma_parms->max_segment_size = size; 158 dev->dma_parms->max_segment_size = size;
156 return 0; 159 return 0;
157 } else 160 }
158 return -EIO; 161 return -EIO;
159} 162}
160 163
161static inline unsigned long dma_get_seg_boundary(struct device *dev) 164static inline unsigned long dma_get_seg_boundary(struct device *dev)
162{ 165{
163 return dev->dma_parms ? 166 if (dev->dma_parms && dev->dma_parms->segment_boundary_mask)
164 dev->dma_parms->segment_boundary_mask : 0xffffffff; 167 return dev->dma_parms->segment_boundary_mask;
168 return DMA_BIT_MASK(32);
165} 169}
166 170
167static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) 171static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
@@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
169 if (dev->dma_parms) { 173 if (dev->dma_parms) {
170 dev->dma_parms->segment_boundary_mask = mask; 174 dev->dma_parms->segment_boundary_mask = mask;
171 return 0; 175 return 0;
172 } else 176 }
173 return -EIO; 177 return -EIO;
174} 178}
175 179
176#ifndef dma_max_pfn 180#ifndef dma_max_pfn
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..6523109e136d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
14#define ___GFP_HIGHMEM 0x02u 14#define ___GFP_HIGHMEM 0x02u
15#define ___GFP_DMA32 0x04u 15#define ___GFP_DMA32 0x04u
16#define ___GFP_MOVABLE 0x08u 16#define ___GFP_MOVABLE 0x08u
17#define ___GFP_WAIT 0x10u 17#define ___GFP_RECLAIMABLE 0x10u
18#define ___GFP_HIGH 0x20u 18#define ___GFP_HIGH 0x20u
19#define ___GFP_IO 0x40u 19#define ___GFP_IO 0x40u
20#define ___GFP_FS 0x80u 20#define ___GFP_FS 0x80u
@@ -29,18 +29,17 @@ struct vm_area_struct;
29#define ___GFP_NOMEMALLOC 0x10000u 29#define ___GFP_NOMEMALLOC 0x10000u
30#define ___GFP_HARDWALL 0x20000u 30#define ___GFP_HARDWALL 0x20000u
31#define ___GFP_THISNODE 0x40000u 31#define ___GFP_THISNODE 0x40000u
32#define ___GFP_RECLAIMABLE 0x80000u 32#define ___GFP_ATOMIC 0x80000u
33#define ___GFP_NOACCOUNT 0x100000u 33#define ___GFP_NOACCOUNT 0x100000u
34#define ___GFP_NOTRACK 0x200000u 34#define ___GFP_NOTRACK 0x200000u
35#define ___GFP_NO_KSWAPD 0x400000u 35#define ___GFP_DIRECT_RECLAIM 0x400000u
36#define ___GFP_OTHER_NODE 0x800000u 36#define ___GFP_OTHER_NODE 0x800000u
37#define ___GFP_WRITE 0x1000000u 37#define ___GFP_WRITE 0x1000000u
38#define ___GFP_KSWAPD_RECLAIM 0x2000000u
38/* If the above are modified, __GFP_BITS_SHIFT may need updating */ 39/* If the above are modified, __GFP_BITS_SHIFT may need updating */
39 40
40/* 41/*
41 * GFP bitmasks.. 42 * Physical address zone modifiers (see linux/mmzone.h - low four bits)
42 *
43 * Zone modifiers (see linux/mmzone.h - low three bits)
44 * 43 *
45 * Do not put any conditional on these. If necessary modify the definitions 44 * Do not put any conditional on these. If necessary modify the definitions
46 * without the underscores and use them consistently. The definitions here may 45 * without the underscores and use them consistently. The definitions here may
@@ -50,116 +49,229 @@ struct vm_area_struct;
50#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) 49#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
51#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) 50#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
52#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ 51#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */
52#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
53#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) 53#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
54
55/*
56 * Page mobility and placement hints
57 *
58 * These flags provide hints about how mobile the page is. Pages with similar
59 * mobility are placed within the same pageblocks to minimise problems due
60 * to external fragmentation.
61 *
62 * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
63 * moved by page migration during memory compaction or can be reclaimed.
64 *
65 * __GFP_RECLAIMABLE is used for slab allocations that specify
66 * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
67 *
68 * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
69 * these pages will be spread between local zones to avoid all the dirty
70 * pages being in one zone (fair zone allocation policy).
71 *
72 * __GFP_HARDWALL enforces the cpuset memory allocation policy.
73 *
74 * __GFP_THISNODE forces the allocation to be satisified from the requested
75 * node with no fallbacks or placement policy enforcements.
76 */
77#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
78#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
79#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
80#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
81
54/* 82/*
55 * Action modifiers - doesn't change the zoning 83 * Watermark modifiers -- controls access to emergency reserves
84 *
85 * __GFP_HIGH indicates that the caller is high-priority and that granting
86 * the request is necessary before the system can make forward progress.
87 * For example, creating an IO context to clean pages.
88 *
89 * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
90 * high priority. Users are typically interrupt handlers. This may be
91 * used in conjunction with __GFP_HIGH
92 *
93 * __GFP_MEMALLOC allows access to all memory. This should only be used when
94 * the caller guarantees the allocation will allow more memory to be freed
95 * very shortly e.g. process exiting or swapping. Users either should
96 * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
97 *
98 * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
99 * This takes precedence over the __GFP_MEMALLOC flag if both are set.
100 *
101 * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
102 */
103#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC)
104#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
105#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
106#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
107#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT)
108
109/*
110 * Reclaim modifiers
111 *
112 * __GFP_IO can start physical IO.
113 *
114 * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
115 * allocator recursing into the filesystem which might already be holding
116 * locks.
117 *
118 * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
119 * This flag can be cleared to avoid unnecessary delays when a fallback
120 * option is available.
121 *
122 * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
123 * the low watermark is reached and have it reclaim pages until the high
124 * watermark is reached. A caller may wish to clear this flag when fallback
125 * options are available and the reclaim is likely to disrupt the system. The
126 * canonical example is THP allocation where a fallback is cheap but
127 * reclaim/compaction may cause indirect stalls.
128 *
129 * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
56 * 130 *
57 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt 131 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
58 * _might_ fail. This depends upon the particular VM implementation. 132 * _might_ fail. This depends upon the particular VM implementation.
59 * 133 *
60 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller 134 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
61 * cannot handle allocation failures. New users should be evaluated carefully 135 * cannot handle allocation failures. New users should be evaluated carefully
62 * (and the flag should be used only when there is no reasonable failure policy) 136 * (and the flag should be used only when there is no reasonable failure
63 * but it is definitely preferable to use the flag rather than opencode endless 137 * policy) but it is definitely preferable to use the flag rather than
64 * loop around allocator. 138 * opencode endless loop around allocator.
65 * 139 *
66 * __GFP_NORETRY: The VM implementation must not retry indefinitely and will 140 * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
67 * return NULL when direct reclaim and memory compaction have failed to allow 141 * return NULL when direct reclaim and memory compaction have failed to allow
68 * the allocation to succeed. The OOM killer is not called with the current 142 * the allocation to succeed. The OOM killer is not called with the current
69 * implementation. 143 * implementation.
70 *
71 * __GFP_MOVABLE: Flag that this page will be movable by the page migration
72 * mechanism or reclaimed
73 */ 144 */
74#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */ 145#define __GFP_IO ((__force gfp_t)___GFP_IO)
75#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ 146#define __GFP_FS ((__force gfp_t)___GFP_FS)
76#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ 147#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
77#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ 148#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
78#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */ 149#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
79#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */ 150#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT)
80#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */ 151#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
81#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */ 152#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
82#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */
83#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
84#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */
85#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */
86#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
87 * This takes precedence over the
88 * __GFP_MEMALLOC flag if both are
89 * set
90 */
91#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
92#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
93#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
94#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
95#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
96
97#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
98#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
99#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
100 153
101/* 154/*
102 * This may seem redundant, but it's a way of annotating false positives vs. 155 * Action modifiers
103 * allocations that simply cannot be supported (e.g. page tables). 156 *
157 * __GFP_COLD indicates that the caller does not expect to be used in the near
158 * future. Where possible, a cache-cold page will be returned.
159 *
160 * __GFP_NOWARN suppresses allocation failure reports.
161 *
162 * __GFP_COMP address compound page metadata.
163 *
164 * __GFP_ZERO returns a zeroed page on success.
165 *
166 * __GFP_NOTRACK avoids tracking with kmemcheck.
167 *
168 * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
169 * distinguishing in the source between false positives and allocations that
170 * cannot be supported (e.g. page tables).
171 *
172 * __GFP_OTHER_NODE is for allocations that are on a remote node but that
173 * should not be accounted for as a remote allocation in vmstat. A
174 * typical user would be khugepaged collapsing a huge page on a remote
175 * node.
104 */ 176 */
177#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
178#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
179#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
180#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
181#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK)
105#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 182#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
183#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
106 184
107#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ 185/* Room for N __GFP_FOO bits */
186#define __GFP_BITS_SHIFT 26
108#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 187#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
109 188
110/* This equals 0, but use constants in case they ever change */ 189/*
111#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) 190 * Useful GFP flag combinations that are commonly used. It is recommended
112/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */ 191 * that subsystems start with one of these combinations and then set/clear
113#define GFP_ATOMIC (__GFP_HIGH) 192 * __GFP_FOO flags as necessary.
114#define GFP_NOIO (__GFP_WAIT) 193 *
115#define GFP_NOFS (__GFP_WAIT | __GFP_IO) 194 * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
116#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) 195 * watermark is applied to allow access to "atomic reserves"
117#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ 196 *
197 * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
198 * ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
199 *
200 * GFP_NOWAIT is for kernel allocations that should not stall for direct
201 * reclaim, start physical IO or use any filesystem callback.
202 *
203 * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
204 * that do not require the starting of any physical IO.
205 *
206 * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
207 *
208 * GFP_USER is for userspace allocations that also need to be directly
209 * accessibly by the kernel or hardware. It is typically used by hardware
210 * for buffers that are mapped to userspace (e.g. graphics) that hardware
211 * still must DMA to. cpuset limits are enforced for these allocations.
212 *
213 * GFP_DMA exists for historical reasons and should be avoided where possible.
214 * The flags indicates that the caller requires that the lowest zone be
215 * used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
216 * it would require careful auditing as some users really require it and
217 * others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
218 * lowest zone as a type of emergency reserve.
219 *
220 * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
221 * address.
222 *
223 * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
224 * do not need to be directly accessible by the kernel but that cannot
225 * move once in use. An example may be a hardware allocation that maps
226 * data directly into userspace but has no addressing limitations.
227 *
228 * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
229 * need direct access to but can use kmap() when access is required. They
230 * are expected to be movable via page reclaim or page migration. Typically,
231 * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
232 *
233 * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
234 * that will fail quickly if memory is not available and will not wake
235 * kswapd on failure.
236 */
237#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
238#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
239#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
240#define GFP_NOIO (__GFP_RECLAIM)
241#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
242#define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
118 __GFP_RECLAIMABLE) 243 __GFP_RECLAIMABLE)
119#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) 244#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
245#define GFP_DMA __GFP_DMA
246#define GFP_DMA32 __GFP_DMA32
120#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) 247#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
121#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) 248#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
122#define GFP_IOFS (__GFP_IO | __GFP_FS) 249#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
123#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 250 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
124 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 251 ~__GFP_KSWAPD_RECLAIM)
125 __GFP_NO_KSWAPD)
126 252
127/* This mask makes up all the page movable related flags */ 253/* Convert GFP flags to their corresponding migrate type */
128#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) 254#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
255#define GFP_MOVABLE_SHIFT 3
129 256
130/* Control page allocator reclaim behavior */
131#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
132 __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
133 __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
134
135/* Control slab gfp mask during early boot */
136#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
137
138/* Control allocation constraints */
139#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
140
141/* Do not use these with a slab allocator */
142#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
143
144/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
145 platforms, used as appropriate on others */
146
147#define GFP_DMA __GFP_DMA
148
149/* 4GB DMA on some platforms */
150#define GFP_DMA32 __GFP_DMA32
151
152/* Convert GFP flags to their corresponding migrate type */
153static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) 257static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
154{ 258{
155 WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); 259 VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
260 BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
261 BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
156 262
157 if (unlikely(page_group_by_mobility_disabled)) 263 if (unlikely(page_group_by_mobility_disabled))
158 return MIGRATE_UNMOVABLE; 264 return MIGRATE_UNMOVABLE;
159 265
160 /* Group based on mobility */ 266 /* Group based on mobility */
161 return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | 267 return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
162 ((gfp_flags & __GFP_RECLAIMABLE) != 0); 268}
269#undef GFP_MOVABLE_MASK
270#undef GFP_MOVABLE_SHIFT
271
272static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
273{
274 return gfp_flags & __GFP_DIRECT_RECLAIM;
163} 275}
164 276
165#ifdef CONFIG_HIGHMEM 277#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7edd30515298..24154c26d469 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
32 32
33 if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) 33 if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
34 return NULL; 34 return NULL;
35 return (struct hugetlb_cgroup *)page[2].lru.next; 35 return (struct hugetlb_cgroup *)page[2].private;
36} 36}
37 37
38static inline 38static inline
@@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
42 42
43 if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) 43 if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
44 return -1; 44 return -1;
45 page[2].lru.next = (void *)h_cg; 45 page[2].private = (unsigned long)h_cg;
46 return 0; 46 return 0;
47} 47}
48 48
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5582410727cb..2c13f747ac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -413,6 +413,8 @@ extern __printf(2, 3)
413char *kasprintf(gfp_t gfp, const char *fmt, ...); 413char *kasprintf(gfp_t gfp, const char *fmt, ...);
414extern __printf(2, 0) 414extern __printf(2, 0)
415char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); 415char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
416extern __printf(2, 0)
417const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
416 418
417extern __scanf(2, 3) 419extern __scanf(2, 3)
418int sscanf(const char *, const char *, ...); 420int sscanf(const char *, const char *, ...);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 906c46a05707..00bad7793788 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page,
430#endif 430#endif
431} 431}
432 432
433static inline struct page *compound_head_by_tail(struct page *tail)
434{
435 struct page *head = tail->first_page;
436
437 /*
438 * page->first_page may be a dangling pointer to an old
439 * compound page, so recheck that it is still a tail
440 * page before returning.
441 */
442 smp_rmb();
443 if (likely(PageTail(tail)))
444 return head;
445 return tail;
446}
447
448/*
449 * Since either compound page could be dismantled asynchronously in THP
450 * or we access asynchronously arbitrary positioned struct page, there
451 * would be tail flag race. To handle this race, we should call
452 * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
453 */
454static inline struct page *compound_head(struct page *page)
455{
456 if (unlikely(PageTail(page)))
457 return compound_head_by_tail(page);
458 return page;
459}
460
461/*
462 * If we access compound page synchronously such as access to
463 * allocated page, there is no need to handle tail flag race, so we can
464 * check tail flag directly without any synchronization primitive.
465 */
466static inline struct page *compound_head_fast(struct page *page)
467{
468 if (unlikely(PageTail(page)))
469 return page->first_page;
470 return page;
471}
472
473/* 433/*
474 * The atomic page->_mapcount, starts from -1: so that transitions 434 * The atomic page->_mapcount, starts from -1: so that transitions
475 * both from it and to it can be tracked, using atomic_inc_and_test 435 * both from it and to it can be tracked, using atomic_inc_and_test
@@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page)
518 VM_BUG_ON_PAGE(!PageTail(page), page); 478 VM_BUG_ON_PAGE(!PageTail(page), page);
519 VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); 479 VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
520 VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); 480 VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
521 if (compound_tail_refcounted(page->first_page)) 481 if (compound_tail_refcounted(compound_head(page)))
522 atomic_inc(&page->_mapcount); 482 atomic_inc(&page->_mapcount);
523} 483}
524 484
@@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x)
541{ 501{
542 struct page *page = virt_to_page(x); 502 struct page *page = virt_to_page(x);
543 503
544 /* 504 return compound_head(page);
545 * We don't need to worry about synchronization of tail flag
546 * when we call virt_to_head_page() since it is only called for
547 * already allocated page and this page won't be freed until
548 * this virt_to_head_page() is finished. So use _fast variant.
549 */
550 return compound_head_fast(page);
551} 505}
552 506
553/* 507/*
@@ -568,28 +522,42 @@ int split_free_page(struct page *page);
568/* 522/*
569 * Compound pages have a destructor function. Provide a 523 * Compound pages have a destructor function. Provide a
570 * prototype for that function and accessor functions. 524 * prototype for that function and accessor functions.
571 * These are _only_ valid on the head of a PG_compound page. 525 * These are _only_ valid on the head of a compound page.
572 */ 526 */
527typedef void compound_page_dtor(struct page *);
528
529/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
530enum compound_dtor_id {
531 NULL_COMPOUND_DTOR,
532 COMPOUND_PAGE_DTOR,
533#ifdef CONFIG_HUGETLB_PAGE
534 HUGETLB_PAGE_DTOR,
535#endif
536 NR_COMPOUND_DTORS,
537};
538extern compound_page_dtor * const compound_page_dtors[];
573 539
574static inline void set_compound_page_dtor(struct page *page, 540static inline void set_compound_page_dtor(struct page *page,
575 compound_page_dtor *dtor) 541 enum compound_dtor_id compound_dtor)
576{ 542{
577 page[1].compound_dtor = dtor; 543 VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
544 page[1].compound_dtor = compound_dtor;
578} 545}
579 546
580static inline compound_page_dtor *get_compound_page_dtor(struct page *page) 547static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
581{ 548{
582 return page[1].compound_dtor; 549 VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
550 return compound_page_dtors[page[1].compound_dtor];
583} 551}
584 552
585static inline int compound_order(struct page *page) 553static inline unsigned int compound_order(struct page *page)
586{ 554{
587 if (!PageHead(page)) 555 if (!PageHead(page))
588 return 0; 556 return 0;
589 return page[1].compound_order; 557 return page[1].compound_order;
590} 558}
591 559
592static inline void set_compound_order(struct page *page, unsigned long order) 560static inline void set_compound_order(struct page *page, unsigned int order)
593{ 561{
594 page[1].compound_order = order; 562 page[1].compound_order = order;
595} 563}
@@ -1572,8 +1540,7 @@ static inline bool ptlock_init(struct page *page)
1572 * with 0. Make sure nobody took it in use in between. 1540 * with 0. Make sure nobody took it in use in between.
1573 * 1541 *
1574 * It can happen if arch try to use slab for page table allocation: 1542 * It can happen if arch try to use slab for page table allocation:
1575 * slab code uses page->slab_cache and page->first_page (for tail 1543 * slab code uses page->slab_cache, which share storage with page->ptl.
1576 * pages), which share storage with page->ptl.
1577 */ 1544 */
1578 VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); 1545 VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
1579 if (!ptlock_alloc(page)) 1546 if (!ptlock_alloc(page))
@@ -1843,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val);
1843extern void si_meminfo_node(struct sysinfo *val, int nid); 1810extern void si_meminfo_node(struct sysinfo *val, int nid);
1844 1811
1845extern __printf(3, 4) 1812extern __printf(3, 4)
1846void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); 1813void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
1814 const char *fmt, ...);
1847 1815
1848extern void setup_per_cpu_pageset(void); 1816extern void setup_per_cpu_pageset(void);
1849 1817
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0a85da25a822..f8d1492a114f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,8 +28,6 @@ struct mem_cgroup;
28 IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) 28 IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
29#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) 29#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
30 30
31typedef void compound_page_dtor(struct page *);
32
33/* 31/*
34 * Each physical page in the system has a struct page associated with 32 * Each physical page in the system has a struct page associated with
35 * it to keep track of whatever it is we are using the page for at the 33 * it to keep track of whatever it is we are using the page for at the
@@ -113,7 +111,13 @@ struct page {
113 }; 111 };
114 }; 112 };
115 113
116 /* Third double word block */ 114 /*
115 * Third double word block
116 *
117 * WARNING: bit 0 of the first word encode PageTail(). That means
118 * the rest users of the storage space MUST NOT use the bit to
119 * avoid collision and false-positive PageTail().
120 */
117 union { 121 union {
118 struct list_head lru; /* Pageout list, eg. active_list 122 struct list_head lru; /* Pageout list, eg. active_list
119 * protected by zone->lru_lock ! 123 * protected by zone->lru_lock !
@@ -131,18 +135,37 @@ struct page {
131#endif 135#endif
132 }; 136 };
133 137
134 struct slab *slab_page; /* slab fields */
135 struct rcu_head rcu_head; /* Used by SLAB 138 struct rcu_head rcu_head; /* Used by SLAB
136 * when destroying via RCU 139 * when destroying via RCU
137 */ 140 */
138 /* First tail page of compound page */ 141 /* Tail pages of compound page */
139 struct { 142 struct {
140 compound_page_dtor *compound_dtor; 143 unsigned long compound_head; /* If bit zero is set */
141 unsigned long compound_order; 144
145 /* First tail page only */
146#ifdef CONFIG_64BIT
147 /*
148 * On 64 bit system we have enough space in struct page
149 * to encode compound_dtor and compound_order with
150 * unsigned int. It can help compiler generate better or
151 * smaller code on some archtectures.
152 */
153 unsigned int compound_dtor;
154 unsigned int compound_order;
155#else
156 unsigned short int compound_dtor;
157 unsigned short int compound_order;
158#endif
142 }; 159 };
143 160
144#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS 161#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
145 pgtable_t pmd_huge_pte; /* protected by page->ptl */ 162 struct {
163 unsigned long __pad; /* do not overlay pmd_huge_pte
164 * with compound_head to avoid
165 * possible bit 0 collision.
166 */
167 pgtable_t pmd_huge_pte; /* protected by page->ptl */
168 };
146#endif 169#endif
147 }; 170 };
148 171
@@ -163,7 +186,6 @@ struct page {
163#endif 186#endif
164#endif 187#endif
165 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ 188 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
166 struct page *first_page; /* Compound tail pages */
167 }; 189 };
168 190
169#ifdef CONFIG_MEMCG 191#ifdef CONFIG_MEMCG
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2d7e660cdefe..e23a9e704536 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,10 +37,10 @@
37 37
38enum { 38enum {
39 MIGRATE_UNMOVABLE, 39 MIGRATE_UNMOVABLE,
40 MIGRATE_RECLAIMABLE,
41 MIGRATE_MOVABLE, 40 MIGRATE_MOVABLE,
41 MIGRATE_RECLAIMABLE,
42 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ 42 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
43 MIGRATE_RESERVE = MIGRATE_PCPTYPES, 43 MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
44#ifdef CONFIG_CMA 44#ifdef CONFIG_CMA
45 /* 45 /*
46 * MIGRATE_CMA migration type is designed to mimic the way 46 * MIGRATE_CMA migration type is designed to mimic the way
@@ -334,13 +334,16 @@ struct zone {
334 /* zone watermarks, access with *_wmark_pages(zone) macros */ 334 /* zone watermarks, access with *_wmark_pages(zone) macros */
335 unsigned long watermark[NR_WMARK]; 335 unsigned long watermark[NR_WMARK];
336 336
337 unsigned long nr_reserved_highatomic;
338
337 /* 339 /*
338 * We don't know if the memory that we're going to allocate will be freeable 340 * We don't know if the memory that we're going to allocate will be
339 * or/and it will be released eventually, so to avoid totally wasting several 341 * freeable or/and it will be released eventually, so to avoid totally
340 * GB of ram we must reserve some of the lower zone memory (otherwise we risk 342 * wasting several GB of ram we must reserve some of the lower zone
341 * to run OOM on the lower zones despite there's tons of freeable ram 343 * memory (otherwise we risk to run OOM on the lower zones despite
342 * on the higher zones). This array is recalculated at runtime if the 344 * there being tons of freeable ram on the higher zones). This array is
343 * sysctl_lowmem_reserve_ratio sysctl changes. 345 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
346 * changes.
344 */ 347 */
345 long lowmem_reserve[MAX_NR_ZONES]; 348 long lowmem_reserve[MAX_NR_ZONES];
346 349
@@ -429,12 +432,6 @@ struct zone {
429 432
430 const char *name; 433 const char *name;
431 434
432 /*
433 * Number of MIGRATE_RESERVE page block. To maintain for just
434 * optimization. Protected by zone->lock.
435 */
436 int nr_migrate_reserve_block;
437
438#ifdef CONFIG_MEMORY_ISOLATION 435#ifdef CONFIG_MEMORY_ISOLATION
439 /* 436 /*
440 * Number of isolated pageblock. It is used to solve incorrect 437 * Number of isolated pageblock. It is used to solve incorrect
@@ -589,75 +586,8 @@ static inline bool zone_is_empty(struct zone *zone)
589 * [1] : No fallback (__GFP_THISNODE) 586 * [1] : No fallback (__GFP_THISNODE)
590 */ 587 */
591#define MAX_ZONELISTS 2 588#define MAX_ZONELISTS 2
592
593
594/*
595 * We cache key information from each zonelist for smaller cache
596 * footprint when scanning for free pages in get_page_from_freelist().
597 *
598 * 1) The BITMAP fullzones tracks which zones in a zonelist have come
599 * up short of free memory since the last time (last_fullzone_zap)
600 * we zero'd fullzones.
601 * 2) The array z_to_n[] maps each zone in the zonelist to its node
602 * id, so that we can efficiently evaluate whether that node is
603 * set in the current tasks mems_allowed.
604 *
605 * Both fullzones and z_to_n[] are one-to-one with the zonelist,
606 * indexed by a zones offset in the zonelist zones[] array.
607 *
608 * The get_page_from_freelist() routine does two scans. During the
609 * first scan, we skip zones whose corresponding bit in 'fullzones'
610 * is set or whose corresponding node in current->mems_allowed (which
611 * comes from cpusets) is not set. During the second scan, we bypass
612 * this zonelist_cache, to ensure we look methodically at each zone.
613 *
614 * Once per second, we zero out (zap) fullzones, forcing us to
615 * reconsider nodes that might have regained more free memory.
616 * The field last_full_zap is the time we last zapped fullzones.
617 *
618 * This mechanism reduces the amount of time we waste repeatedly
619 * reexaming zones for free memory when they just came up low on
620 * memory momentarilly ago.
621 *
622 * The zonelist_cache struct members logically belong in struct
623 * zonelist. However, the mempolicy zonelists constructed for
624 * MPOL_BIND are intentionally variable length (and usually much
625 * shorter). A general purpose mechanism for handling structs with
626 * multiple variable length members is more mechanism than we want
627 * here. We resort to some special case hackery instead.
628 *
629 * The MPOL_BIND zonelists don't need this zonelist_cache (in good
630 * part because they are shorter), so we put the fixed length stuff
631 * at the front of the zonelist struct, ending in a variable length
632 * zones[], as is needed by MPOL_BIND.
633 *
634 * Then we put the optional zonelist cache on the end of the zonelist
635 * struct. This optional stuff is found by a 'zlcache_ptr' pointer in
636 * the fixed length portion at the front of the struct. This pointer
637 * both enables us to find the zonelist cache, and in the case of
638 * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
639 * to know that the zonelist cache is not there.
640 *
641 * The end result is that struct zonelists come in two flavors:
642 * 1) The full, fixed length version, shown below, and
643 * 2) The custom zonelists for MPOL_BIND.
644 * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
645 *
646 * Even though there may be multiple CPU cores on a node modifying
647 * fullzones or last_full_zap in the same zonelist_cache at the same
648 * time, we don't lock it. This is just hint data - if it is wrong now
649 * and then, the allocator will still function, perhaps a bit slower.
650 */
651
652
653struct zonelist_cache {
654 unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */
655 DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */
656 unsigned long last_full_zap; /* when last zap'd (jiffies) */
657};
658#else 589#else
659#define MAX_ZONELISTS 1 590#define MAX_ZONELISTS 1
660struct zonelist_cache;
661#endif 591#endif
662 592
663/* 593/*
@@ -675,9 +605,6 @@ struct zoneref {
675 * allocation, the other zones are fallback zones, in decreasing 605 * allocation, the other zones are fallback zones, in decreasing
676 * priority. 606 * priority.
677 * 607 *
678 * If zlcache_ptr is not NULL, then it is just the address of zlcache,
679 * as explained above. If zlcache_ptr is NULL, there is no zlcache.
680 * *
681 * To speed the reading of the zonelist, the zonerefs contain the zone index 608 * To speed the reading of the zonelist, the zonerefs contain the zone index
682 * of the entry being read. Helper functions to access information given 609 * of the entry being read. Helper functions to access information given
683 * a struct zoneref are 610 * a struct zoneref are
@@ -687,11 +614,7 @@ struct zoneref {
687 * zonelist_node_idx() - Return the index of the node for an entry 614 * zonelist_node_idx() - Return the index of the node for an entry
688 */ 615 */
689struct zonelist { 616struct zonelist {
690 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
691 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; 617 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
692#ifdef CONFIG_NUMA
693 struct zonelist_cache zlcache; // optional ...
694#endif
695}; 618};
696 619
697#ifndef CONFIG_DISCONTIGMEM 620#ifndef CONFIG_DISCONTIGMEM
@@ -817,7 +740,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
817bool zone_watermark_ok(struct zone *z, unsigned int order, 740bool zone_watermark_ok(struct zone *z, unsigned int order,
818 unsigned long mark, int classzone_idx, int alloc_flags); 741 unsigned long mark, int classzone_idx, int alloc_flags);
819bool zone_watermark_ok_safe(struct zone *z, unsigned int order, 742bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
820 unsigned long mark, int classzone_idx, int alloc_flags); 743 unsigned long mark, int classzone_idx);
821enum memmap_context { 744enum memmap_context {
822 MEMMAP_EARLY, 745 MEMMAP_EARLY,
823 MEMMAP_HOTPLUG, 746 MEMMAP_HOTPLUG,
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index c12f2147c350..52666d90ca94 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
386extern const struct kernel_param_ops param_ops_charp; 386extern const struct kernel_param_ops param_ops_charp;
387extern int param_set_charp(const char *val, const struct kernel_param *kp); 387extern int param_set_charp(const char *val, const struct kernel_param *kp);
388extern int param_get_charp(char *buffer, const struct kernel_param *kp); 388extern int param_get_charp(char *buffer, const struct kernel_param *kp);
389extern void param_free_charp(void *arg);
389#define param_check_charp(name, p) __param_check(name, p, char *) 390#define param_check_charp(name, p) __param_check(name, p, char *)
390 391
391/* We used to allow int as well as bool. We're taking that away! */ 392/* We used to allow int as well as bool. We're taking that away! */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a525e5067484..bb53c7b86315 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,12 +86,7 @@ enum pageflags {
86 PG_private, /* If pagecache, has fs-private data */ 86 PG_private, /* If pagecache, has fs-private data */
87 PG_private_2, /* If pagecache, has fs aux data */ 87 PG_private_2, /* If pagecache, has fs aux data */
88 PG_writeback, /* Page is under writeback */ 88 PG_writeback, /* Page is under writeback */
89#ifdef CONFIG_PAGEFLAGS_EXTENDED
90 PG_head, /* A head page */ 89 PG_head, /* A head page */
91 PG_tail, /* A tail page */
92#else
93 PG_compound, /* A compound page */
94#endif
95 PG_swapcache, /* Swap page: swp_entry_t in private */ 90 PG_swapcache, /* Swap page: swp_entry_t in private */
96 PG_mappedtodisk, /* Has blocks allocated on-disk */ 91 PG_mappedtodisk, /* Has blocks allocated on-disk */
97 PG_reclaim, /* To be reclaimed asap */ 92 PG_reclaim, /* To be reclaimed asap */
@@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page)
398 test_set_page_writeback_keepwrite(page); 393 test_set_page_writeback_keepwrite(page);
399} 394}
400 395
401#ifdef CONFIG_PAGEFLAGS_EXTENDED
402/*
403 * System with lots of page flags available. This allows separate
404 * flags for PageHead() and PageTail() checks of compound pages so that bit
405 * tests can be used in performance sensitive paths. PageCompound is
406 * generally not used in hot code paths except arch/powerpc/mm/init_64.c
407 * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
408 * and avoid handling those in real mode.
409 */
410__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) 396__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
411__PAGEFLAG(Tail, tail)
412 397
413static inline int PageCompound(struct page *page) 398static inline int PageTail(struct page *page)
414{
415 return page->flags & ((1L << PG_head) | (1L << PG_tail));
416
417}
418#ifdef CONFIG_TRANSPARENT_HUGEPAGE
419static inline void ClearPageCompound(struct page *page)
420{ 399{
421 BUG_ON(!PageHead(page)); 400 return READ_ONCE(page->compound_head) & 1;
422 ClearPageHead(page);
423} 401}
424#endif
425
426#define PG_head_mask ((1L << PG_head))
427 402
428#else 403static inline void set_compound_head(struct page *page, struct page *head)
429/*
430 * Reduce page flag use as much as possible by overlapping
431 * compound page flags with the flags used for page cache pages. Possible
432 * because PageCompound is always set for compound pages and not for
433 * pages on the LRU and/or pagecache.
434 */
435TESTPAGEFLAG(Compound, compound)
436__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound)
437
438/*
439 * PG_reclaim is used in combination with PG_compound to mark the
440 * head and tail of a compound page. This saves one page flag
441 * but makes it impossible to use compound pages for the page cache.
442 * The PG_reclaim bit would have to be used for reclaim or readahead
443 * if compound pages enter the page cache.
444 *
445 * PG_compound & PG_reclaim => Tail page
446 * PG_compound & ~PG_reclaim => Head page
447 */
448#define PG_head_mask ((1L << PG_compound))
449#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
450
451static inline int PageHead(struct page *page)
452{ 404{
453 return ((page->flags & PG_head_tail_mask) == PG_head_mask); 405 WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
454} 406}
455 407
456static inline int PageTail(struct page *page) 408static inline void clear_compound_head(struct page *page)
457{ 409{
458 return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); 410 WRITE_ONCE(page->compound_head, 0);
459} 411}
460 412
461static inline void __SetPageTail(struct page *page) 413static inline struct page *compound_head(struct page *page)
462{ 414{
463 page->flags |= PG_head_tail_mask; 415 unsigned long head = READ_ONCE(page->compound_head);
416
417 if (unlikely(head & 1))
418 return (struct page *) (head - 1);
419 return page;
464} 420}
465 421
466static inline void __ClearPageTail(struct page *page) 422static inline int PageCompound(struct page *page)
467{ 423{
468 page->flags &= ~PG_head_tail_mask; 424 return PageHead(page) || PageTail(page);
469}
470 425
426}
471#ifdef CONFIG_TRANSPARENT_HUGEPAGE 427#ifdef CONFIG_TRANSPARENT_HUGEPAGE
472static inline void ClearPageCompound(struct page *page) 428static inline void ClearPageCompound(struct page *page)
473{ 429{
474 BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); 430 BUG_ON(!PageHead(page));
475 clear_bit(PG_compound, &page->flags); 431 ClearPageHead(page);
476} 432}
477#endif 433#endif
478 434
479#endif /* !PAGEFLAGS_EXTENDED */ 435#define PG_head_mask ((1L << PG_head))
480 436
481#ifdef CONFIG_HUGETLB_PAGE 437#ifdef CONFIG_HUGETLB_PAGE
482int PageHuge(struct page *page); 438int PageHuge(struct page *page);
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2baeee12f48e..e942558b3585 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -44,7 +44,7 @@ enum pageblock_bits {
44#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 44#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
45 45
46/* Huge page sizes are variable */ 46/* Huge page sizes are variable */
47extern int pageblock_order; 47extern unsigned int pageblock_order;
48 48
49#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 49#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
50 50
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a6c78e00ea96..26eabf5ec718 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
69 return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; 69 return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
70} 70}
71 71
72/* Restricts the given gfp_mask to what the mapping allows. */
73static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
74 gfp_t gfp_mask)
75{
76 return mapping_gfp_mask(mapping) & gfp_mask;
77}
78
72/* 79/*
73 * This is non-atomic. Only to be used before the mapping is activated. 80 * This is non-atomic. Only to be used before the mapping is activated.
74 * Probably needs a barrier... 81 * Probably needs a barrier...
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 830c4992088d..a5aa7ae671f4 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
101 }) 101 })
102 102
103/** 103/**
104 * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of 104 * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
105 * given type safe against removal of rb_node entry 105 * given type allowing the backing memory of @pos to be invalidated
106 * 106 *
107 * @pos: the 'type *' to use as a loop cursor. 107 * @pos: the 'type *' to use as a loop cursor.
108 * @n: another 'type *' to use as temporary storage 108 * @n: another 'type *' to use as temporary storage
109 * @root: 'rb_root *' of the rbtree. 109 * @root: 'rb_root *' of the rbtree.
110 * @field: the name of the rb_node field within 'type'. 110 * @field: the name of the rb_node field within 'type'.
111 *
112 * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
113 * list_for_each_entry_safe() and allows the iteration to continue independent
114 * of changes to @pos by the body of the loop.
115 *
116 * Note, however, that it cannot handle other modifications that re-order the
117 * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
118 * rb_erase() may rebalance the tree, causing us to miss some nodes.
111 */ 119 */
112#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ 120#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
113 for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ 121 for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeb5066a44fb..4069febaa34a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1570,9 +1570,7 @@ struct task_struct {
1570 1570
1571 unsigned long sas_ss_sp; 1571 unsigned long sas_ss_sp;
1572 size_t sas_ss_size; 1572 size_t sas_ss_size;
1573 int (*notifier)(void *priv); 1573
1574 void *notifier_data;
1575 sigset_t *notifier_mask;
1576 struct callback_head *task_works; 1574 struct callback_head *task_works;
1577 1575
1578 struct audit_context *audit_context; 1576 struct audit_context *audit_context;
@@ -2464,21 +2462,29 @@ extern void ignore_signals(struct task_struct *);
2464extern void flush_signal_handlers(struct task_struct *, int force_default); 2462extern void flush_signal_handlers(struct task_struct *, int force_default);
2465extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); 2463extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
2466 2464
2467static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) 2465static inline int kernel_dequeue_signal(siginfo_t *info)
2468{ 2466{
2469 unsigned long flags; 2467 struct task_struct *tsk = current;
2468 siginfo_t __info;
2470 int ret; 2469 int ret;
2471 2470
2472 spin_lock_irqsave(&tsk->sighand->siglock, flags); 2471 spin_lock_irq(&tsk->sighand->siglock);
2473 ret = dequeue_signal(tsk, mask, info); 2472 ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
2474 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2473 spin_unlock_irq(&tsk->sighand->siglock);
2475 2474
2476 return ret; 2475 return ret;
2477} 2476}
2478 2477
2479extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2478static inline void kernel_signal_stop(void)
2480 sigset_t *mask); 2479{
2481extern void unblock_all_signals(void); 2480 spin_lock_irq(&current->sighand->siglock);
2481 if (current->jobctl & JOBCTL_STOP_DEQUEUED)
2482 __set_current_state(TASK_STOPPED);
2483 spin_unlock_irq(&current->sighand->siglock);
2484
2485 schedule();
2486}
2487
2482extern void release_task(struct task_struct * p); 2488extern void release_task(struct task_struct * p);
2483extern int send_sig_info(int, struct siginfo *, struct task_struct *); 2489extern int send_sig_info(int, struct siginfo *, struct task_struct *);
2484extern int force_sigsegv(int, struct task_struct *); 2490extern int force_sigsegv(int, struct task_struct *);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 24f4dfd94c51..4355129fff91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
1224 1224
1225static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) 1225static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
1226{ 1226{
1227 might_sleep_if(pri & __GFP_WAIT); 1227 might_sleep_if(gfpflags_allow_blocking(pri));
1228 1228
1229 if (skb_cloned(skb)) 1229 if (skb_cloned(skb))
1230 return pskb_expand_head(skb, 0, 0, pri); 1230 return pskb_expand_head(skb, 0, 0, pri);
@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
1308 */ 1308 */
1309static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) 1309static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
1310{ 1310{
1311 might_sleep_if(pri & __GFP_WAIT); 1311 might_sleep_if(gfpflags_allow_blocking(pri));
1312 if (skb_shared(skb)) { 1312 if (skb_shared(skb)) {
1313 struct sk_buff *nskb = skb_clone(skb, pri); 1313 struct sk_buff *nskb = skb_clone(skb, pri);
1314 1314
@@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
1344static inline struct sk_buff *skb_unshare(struct sk_buff *skb, 1344static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
1345 gfp_t pri) 1345 gfp_t pri)
1346{ 1346{
1347 might_sleep_if(pri & __GFP_WAIT); 1347 might_sleep_if(gfpflags_allow_blocking(pri));
1348 if (skb_cloned(skb)) { 1348 if (skb_cloned(skb)) {
1349 struct sk_buff *nskb = skb_copy(skb, pri); 1349 struct sk_buff *nskb = skb_copy(skb, pri);
1350 1350
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 42f8ec992452..2e97b7707dff 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -38,10 +38,10 @@ enum zpool_mapmode {
38 38
39bool zpool_has_pool(char *type); 39bool zpool_has_pool(char *type);
40 40
41struct zpool *zpool_create_pool(char *type, char *name, 41struct zpool *zpool_create_pool(const char *type, const char *name,
42 gfp_t gfp, const struct zpool_ops *ops); 42 gfp_t gfp, const struct zpool_ops *ops);
43 43
44char *zpool_get_type(struct zpool *pool); 44const char *zpool_get_type(struct zpool *pool);
45 45
46void zpool_destroy_pool(struct zpool *pool); 46void zpool_destroy_pool(struct zpool *pool);
47 47
@@ -83,7 +83,9 @@ struct zpool_driver {
83 atomic_t refcount; 83 atomic_t refcount;
84 struct list_head list; 84 struct list_head list;
85 85
86 void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops, 86 void *(*create)(const char *name,
87 gfp_t gfp,
88 const struct zpool_ops *ops,
87 struct zpool *zpool); 89 struct zpool *zpool);
88 void (*destroy)(void *pool); 90 void (*destroy)(void *pool);
89 91
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6398dfae53f1..34eb16098a33 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -41,7 +41,7 @@ struct zs_pool_stats {
41 41
42struct zs_pool; 42struct zs_pool;
43 43
44struct zs_pool *zs_create_pool(char *name, gfp_t flags); 44struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
45void zs_destroy_pool(struct zs_pool *pool); 45void zs_destroy_pool(struct zs_pool *pool);
46 46
47unsigned long zs_malloc(struct zs_pool *pool, size_t size); 47unsigned long zs_malloc(struct zs_pool *pool, size_t size);
diff --git a/include/linux/zutil.h b/include/linux/zutil.h
index 6adfa9a6ffe9..663689521759 100644
--- a/include/linux/zutil.h
+++ b/include/linux/zutil.h
@@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf,
68 An Adler-32 checksum is almost as reliable as a CRC32 but can be computed 68 An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
69 much faster. Usage example: 69 much faster. Usage example:
70 70
71 uLong adler = adler32(0L, NULL, 0); 71 uLong adler = zlib_adler32(0L, NULL, 0);
72 72
73 while (read_buffer(buffer, length) != EOF) { 73 while (read_buffer(buffer, length) != EOF) {
74 adler = adler32(adler, buffer, length); 74 adler = zlib_adler32(adler, buffer, length);
75 } 75 }
76 if (adler != original_adler) error(); 76 if (adler != original_adler) error();
77*/ 77*/
diff --git a/include/net/sock.h b/include/net/sock.h
index f570e75e3da9..bbf7c2cf15b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
2041 */ 2041 */
2042static inline struct page_frag *sk_page_frag(struct sock *sk) 2042static inline struct page_frag *sk_page_frag(struct sock *sk)
2043{ 2043{
2044 if (sk->sk_allocation & __GFP_WAIT) 2044 if (gfpflags_allow_blocking(sk->sk_allocation))
2045 return &current->task_frag; 2045 return &current->task_frag;
2046 2046
2047 return &sk->sk_frag; 2047 return &sk->sk_frag;
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..dde6bf092c8a 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -20,7 +20,7 @@
20 {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ 20 {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \
21 {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ 21 {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \
22 {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ 22 {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \
23 {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ 23 {(unsigned long)__GFP_ATOMIC, "GFP_ATOMIC"}, \
24 {(unsigned long)__GFP_IO, "GFP_IO"}, \ 24 {(unsigned long)__GFP_IO, "GFP_IO"}, \
25 {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ 25 {(unsigned long)__GFP_COLD, "GFP_COLD"}, \
26 {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ 26 {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \
@@ -36,7 +36,8 @@
36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
39 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ 39 {(unsigned long)__GFP_DIRECT_RECLAIM, "GFP_DIRECT_RECLAIM"}, \
40 {(unsigned long)__GFP_KSWAPD_RECLAIM, "GFP_KSWAPD_RECLAIM"}, \
40 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ 41 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
41 ) : "GFP_NOWAIT" 42 ) : "GFP_NOWAIT"
42 43
diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h
new file mode 100644
index 000000000000..c7805818fcc6
--- /dev/null
+++ b/include/trace/events/nilfs2.h
@@ -0,0 +1,224 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM nilfs2
3
4#if !defined(_TRACE_NILFS2_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NILFS2_H
6
7#include <linux/tracepoint.h>
8
9struct nilfs_sc_info;
10
11#define show_collection_stage(type) \
12 __print_symbolic(type, \
13 { NILFS_ST_INIT, "ST_INIT" }, \
14 { NILFS_ST_GC, "ST_GC" }, \
15 { NILFS_ST_FILE, "ST_FILE" }, \
16 { NILFS_ST_IFILE, "ST_IFILE" }, \
17 { NILFS_ST_CPFILE, "ST_CPFILE" }, \
18 { NILFS_ST_SUFILE, "ST_SUFILE" }, \
19 { NILFS_ST_DAT, "ST_DAT" }, \
20 { NILFS_ST_SR, "ST_SR" }, \
21 { NILFS_ST_DSYNC, "ST_DSYNC" }, \
22 { NILFS_ST_DONE, "ST_DONE"})
23
24TRACE_EVENT(nilfs2_collection_stage_transition,
25
26 TP_PROTO(struct nilfs_sc_info *sci),
27
28 TP_ARGS(sci),
29
30 TP_STRUCT__entry(
31 __field(void *, sci)
32 __field(int, stage)
33 ),
34
35 TP_fast_assign(
36 __entry->sci = sci;
37 __entry->stage = sci->sc_stage.scnt;
38 ),
39
40 TP_printk("sci = %p stage = %s",
41 __entry->sci,
42 show_collection_stage(__entry->stage))
43);
44
45#ifndef TRACE_HEADER_MULTI_READ
46enum nilfs2_transaction_transition_state {
47 TRACE_NILFS2_TRANSACTION_BEGIN,
48 TRACE_NILFS2_TRANSACTION_COMMIT,
49 TRACE_NILFS2_TRANSACTION_ABORT,
50 TRACE_NILFS2_TRANSACTION_TRYLOCK,
51 TRACE_NILFS2_TRANSACTION_LOCK,
52 TRACE_NILFS2_TRANSACTION_UNLOCK,
53};
54#endif
55
56#define show_transaction_state(type) \
57 __print_symbolic(type, \
58 { TRACE_NILFS2_TRANSACTION_BEGIN, "BEGIN" }, \
59 { TRACE_NILFS2_TRANSACTION_COMMIT, "COMMIT" }, \
60 { TRACE_NILFS2_TRANSACTION_ABORT, "ABORT" }, \
61 { TRACE_NILFS2_TRANSACTION_TRYLOCK, "TRYLOCK" }, \
62 { TRACE_NILFS2_TRANSACTION_LOCK, "LOCK" }, \
63 { TRACE_NILFS2_TRANSACTION_UNLOCK, "UNLOCK" })
64
65TRACE_EVENT(nilfs2_transaction_transition,
66 TP_PROTO(struct super_block *sb,
67 struct nilfs_transaction_info *ti,
68 int count,
69 unsigned int flags,
70 enum nilfs2_transaction_transition_state state),
71
72 TP_ARGS(sb, ti, count, flags, state),
73
74 TP_STRUCT__entry(
75 __field(void *, sb)
76 __field(void *, ti)
77 __field(int, count)
78 __field(unsigned int, flags)
79 __field(int, state)
80 ),
81
82 TP_fast_assign(
83 __entry->sb = sb;
84 __entry->ti = ti;
85 __entry->count = count;
86 __entry->flags = flags;
87 __entry->state = state;
88 ),
89
90 TP_printk("sb = %p ti = %p count = %d flags = %x state = %s",
91 __entry->sb,
92 __entry->ti,
93 __entry->count,
94 __entry->flags,
95 show_transaction_state(__entry->state))
96);
97
98TRACE_EVENT(nilfs2_segment_usage_check,
99 TP_PROTO(struct inode *sufile,
100 __u64 segnum,
101 unsigned long cnt),
102
103 TP_ARGS(sufile, segnum, cnt),
104
105 TP_STRUCT__entry(
106 __field(struct inode *, sufile)
107 __field(__u64, segnum)
108 __field(unsigned long, cnt)
109 ),
110
111 TP_fast_assign(
112 __entry->sufile = sufile;
113 __entry->segnum = segnum;
114 __entry->cnt = cnt;
115 ),
116
117 TP_printk("sufile = %p segnum = %llu cnt = %lu",
118 __entry->sufile,
119 __entry->segnum,
120 __entry->cnt)
121);
122
123TRACE_EVENT(nilfs2_segment_usage_allocated,
124 TP_PROTO(struct inode *sufile,
125 __u64 segnum),
126
127 TP_ARGS(sufile, segnum),
128
129 TP_STRUCT__entry(
130 __field(struct inode *, sufile)
131 __field(__u64, segnum)
132 ),
133
134 TP_fast_assign(
135 __entry->sufile = sufile;
136 __entry->segnum = segnum;
137 ),
138
139 TP_printk("sufile = %p segnum = %llu",
140 __entry->sufile,
141 __entry->segnum)
142);
143
144TRACE_EVENT(nilfs2_segment_usage_freed,
145 TP_PROTO(struct inode *sufile,
146 __u64 segnum),
147
148 TP_ARGS(sufile, segnum),
149
150 TP_STRUCT__entry(
151 __field(struct inode *, sufile)
152 __field(__u64, segnum)
153 ),
154
155 TP_fast_assign(
156 __entry->sufile = sufile;
157 __entry->segnum = segnum;
158 ),
159
160 TP_printk("sufile = %p segnum = %llu",
161 __entry->sufile,
162 __entry->segnum)
163);
164
165TRACE_EVENT(nilfs2_mdt_insert_new_block,
166 TP_PROTO(struct inode *inode,
167 unsigned long ino,
168 unsigned long block),
169
170 TP_ARGS(inode, ino, block),
171
172 TP_STRUCT__entry(
173 __field(struct inode *, inode)
174 __field(unsigned long, ino)
175 __field(unsigned long, block)
176 ),
177
178 TP_fast_assign(
179 __entry->inode = inode;
180 __entry->ino = ino;
181 __entry->block = block;
182 ),
183
184 TP_printk("inode = %p ino = %lu block = %lu",
185 __entry->inode,
186 __entry->ino,
187 __entry->block)
188);
189
190TRACE_EVENT(nilfs2_mdt_submit_block,
191 TP_PROTO(struct inode *inode,
192 unsigned long ino,
193 unsigned long blkoff,
194 int mode),
195
196 TP_ARGS(inode, ino, blkoff, mode),
197
198 TP_STRUCT__entry(
199 __field(struct inode *, inode)
200 __field(unsigned long, ino)
201 __field(unsigned long, blkoff)
202 __field(int, mode)
203 ),
204
205 TP_fast_assign(
206 __entry->inode = inode;
207 __entry->ino = ino;
208 __entry->blkoff = blkoff;
209 __entry->mode = mode;
210 ),
211
212 TP_printk("inode = %p ino = %lu blkoff = %lu mode = %x",
213 __entry->inode,
214 __entry->ino,
215 __entry->blkoff,
216 __entry->mode)
217);
218
219#endif /* _TRACE_NILFS2_H */
220
221/* This part must be outside protection */
222#undef TRACE_INCLUDE_FILE
223#define TRACE_INCLUDE_FILE nilfs2
224#include <trace/define_trace.h>
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 71f448e5e927..ed81aafd2392 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -123,7 +123,6 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
123 size_t len = src->m_ts; 123 size_t len = src->m_ts;
124 size_t alen; 124 size_t alen;
125 125
126 WARN_ON(dst == NULL);
127 if (src->m_ts > dst->m_ts) 126 if (src->m_ts > dst->m_ts)
128 return ERR_PTR(-EINVAL); 127 return ERR_PTR(-EINVAL);
129 128
diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a32ded7..5ffcbd354a52 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1371 if (unlikely(audit_filter_type(type))) 1371 if (unlikely(audit_filter_type(type)))
1372 return NULL; 1372 return NULL;
1373 1373
1374 if (gfp_mask & __GFP_WAIT) { 1374 if (gfp_mask & __GFP_DIRECT_RECLAIM) {
1375 if (audit_pid && audit_pid == current->pid) 1375 if (audit_pid && audit_pid == current->pid)
1376 gfp_mask &= ~__GFP_WAIT; 1376 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
1377 else 1377 else
1378 reserve = 0; 1378 reserve = 0;
1379 } 1379 }
1380 1380
1381 while (audit_backlog_limit 1381 while (audit_backlog_limit
1382 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { 1382 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
1383 if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) { 1383 if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
1384 long sleep_time; 1384 long sleep_time;
1385 1385
1386 sleep_time = timeout_start + audit_backlog_wait_time - jiffies; 1386 sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce3f9ce..f1603c153890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
299 299
300 idr_preload(gfp_mask); 300 idr_preload(gfp_mask);
301 spin_lock_bh(&cgroup_idr_lock); 301 spin_lock_bh(&cgroup_idr_lock);
302 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT); 302 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
303 spin_unlock_bh(&cgroup_idr_lock); 303 spin_unlock_bh(&cgroup_idr_lock);
304 idr_preload_end(); 304 idr_preload_end();
305 return ret; 305 return ret;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4c5edc357923..d873b64fbddc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
9#include <linux/capability.h> 11#include <linux/capability.h>
10#include <linux/mm.h> 12#include <linux/mm.h>
11#include <linux/file.h> 13#include <linux/file.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index bd9f8a03cefa..11b64a63c0f8 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,7 +6,7 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 10
11#include <linux/capability.h> 11#include <linux/capability.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
@@ -1027,7 +1027,7 @@ static int __init crash_notes_memory_init(void)
1027 1027
1028 crash_notes = __alloc_percpu(size, align); 1028 crash_notes = __alloc_percpu(size, align);
1029 if (!crash_notes) { 1029 if (!crash_notes) {
1030 pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 1030 pr_warn("Memory allocation for saving cpu register states failed\n");
1031 return -ENOMEM; 1031 return -ENOMEM;
1032 } 1032 }
1033 return 0; 1033 return 0;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 6a9a3f2a0e8e..b70ada0028d2 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -9,6 +9,8 @@
9 * Version 2. See the file COPYING for more details. 9 * Version 2. See the file COPYING for more details.
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/capability.h> 14#include <linux/capability.h>
13#include <linux/mm.h> 15#include <linux/mm.h>
14#include <linux/file.h> 16#include <linux/file.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9952..deae3907ac1e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2738 return; 2738 return;
2739 2739
2740 /* no reclaim without waiting on it */ 2740 /* no reclaim without waiting on it */
2741 if (!(gfp_mask & __GFP_WAIT)) 2741 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
2742 return; 2742 return;
2743 2743
2744 /* this guy won't enter reclaim */ 2744 /* this guy won't enter reclaim */
diff --git a/kernel/panic.c b/kernel/panic.c
index 04e91ff7560b..4579dbb7ed87 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,7 @@
23#include <linux/sysrq.h> 23#include <linux/sysrq.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/nmi.h> 25#include <linux/nmi.h>
26#include <linux/console.h>
26 27
27#define PANIC_TIMER_STEP 100 28#define PANIC_TIMER_STEP 100
28#define PANIC_BLINK_SPD 18 29#define PANIC_BLINK_SPD 18
@@ -147,6 +148,15 @@ void panic(const char *fmt, ...)
147 148
148 bust_spinlocks(0); 149 bust_spinlocks(0);
149 150
151 /*
152 * We may have ended up stopping the CPU holding the lock (in
153 * smp_send_stop()) while still having some valuable data in the console
154 * buffer. Try to acquire the lock then release it regardless of the
155 * result. The release will also print the buffers out.
156 */
157 console_trylock();
158 console_unlock();
159
150 if (!panic_blink) 160 if (!panic_blink)
151 panic_blink = no_blink; 161 panic_blink = no_blink;
152 162
diff --git a/kernel/params.c b/kernel/params.c
index b6554aa71094..93a380a2345d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -325,10 +325,11 @@ int param_get_charp(char *buffer, const struct kernel_param *kp)
325} 325}
326EXPORT_SYMBOL(param_get_charp); 326EXPORT_SYMBOL(param_get_charp);
327 327
328static void param_free_charp(void *arg) 328void param_free_charp(void *arg)
329{ 329{
330 maybe_kfree_parameter(*((char **)arg)); 330 maybe_kfree_parameter(*((char **)arg));
331} 331}
332EXPORT_SYMBOL(param_free_charp);
332 333
333const struct kernel_param_ops param_ops_charp = { 334const struct kernel_param_ops param_ops_charp = {
334 .set = param_set_charp, 335 .set = param_set_charp,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4e1e2f..3a970604308f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1779 while (to_alloc-- > 0) { 1779 while (to_alloc-- > 0) {
1780 struct page *page; 1780 struct page *page;
1781 1781
1782 page = alloc_image_page(__GFP_HIGHMEM); 1782 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
1783 memory_bm_set_bit(bm, page_to_pfn(page)); 1783 memory_bm_set_bit(bm, page_to_pfn(page));
1784 } 1784 }
1785 return nr_highmem; 1785 return nr_highmem;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b2066fb5b10f..12cd989dadf6 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
257 struct bio *bio; 257 struct bio *bio;
258 int error = 0; 258 int error = 0;
259 259
260 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 260 bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
261 bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); 261 bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
262 bio->bi_bdev = hib_resume_bdev; 262 bio->bi_bdev = hib_resume_bdev;
263 263
@@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
356 return -ENOSPC; 356 return -ENOSPC;
357 357
358 if (hb) { 358 if (hb) {
359 src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | 359 src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
360 __GFP_NORETRY); 360 __GFP_NORETRY);
361 if (src) { 361 if (src) {
362 copy_page(src, buf); 362 copy_page(src, buf);
@@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
364 ret = hib_wait_io(hb); /* Free pages */ 364 ret = hib_wait_io(hb); /* Free pages */
365 if (ret) 365 if (ret)
366 return ret; 366 return ret;
367 src = (void *)__get_free_page(__GFP_WAIT | 367 src = (void *)__get_free_page(__GFP_RECLAIM |
368 __GFP_NOWARN | 368 __GFP_NOWARN |
369 __GFP_NORETRY); 369 __GFP_NORETRY);
370 if (src) { 370 if (src) {
@@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
672 nr_threads = num_online_cpus() - 1; 672 nr_threads = num_online_cpus() - 1;
673 nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); 673 nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
674 674
675 page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); 675 page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
676 if (!page) { 676 if (!page) {
677 printk(KERN_ERR "PM: Failed to allocate LZO page\n"); 677 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
678 ret = -ENOMEM; 678 ret = -ENOMEM;
@@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
975 last = tmp; 975 last = tmp;
976 976
977 tmp->map = (struct swap_map_page *) 977 tmp->map = (struct swap_map_page *)
978 __get_free_page(__GFP_WAIT | __GFP_HIGH); 978 __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
979 if (!tmp->map) { 979 if (!tmp->map) {
980 release_swap_reader(handle); 980 release_swap_reader(handle);
981 return -ENOMEM; 981 return -ENOMEM;
@@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
1242 1242
1243 for (i = 0; i < read_pages; i++) { 1243 for (i = 0; i < read_pages; i++) {
1244 page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? 1244 page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1245 __GFP_WAIT | __GFP_HIGH : 1245 __GFP_RECLAIM | __GFP_HIGH :
1246 __GFP_WAIT | __GFP_NOWARN | 1246 __GFP_RECLAIM | __GFP_NOWARN |
1247 __GFP_NORETRY); 1247 __GFP_NORETRY);
1248 1248
1249 if (!page[i]) { 1249 if (!page[i]) {
1250 if (i < LZO_CMP_PAGES) { 1250 if (i < LZO_CMP_PAGES) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b16f35487b67..2ce8826f1053 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,6 +269,9 @@ static u32 clear_idx;
269#define PREFIX_MAX 32 269#define PREFIX_MAX 32
270#define LOG_LINE_MAX (1024 - PREFIX_MAX) 270#define LOG_LINE_MAX (1024 - PREFIX_MAX)
271 271
272#define LOG_LEVEL(v) ((v) & 0x07)
273#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
274
272/* record buffer */ 275/* record buffer */
273#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 276#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
274#define LOG_ALIGN 4 277#define LOG_ALIGN 4
@@ -612,7 +615,6 @@ struct devkmsg_user {
612static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 615static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
613{ 616{
614 char *buf, *line; 617 char *buf, *line;
615 int i;
616 int level = default_message_loglevel; 618 int level = default_message_loglevel;
617 int facility = 1; /* LOG_USER */ 619 int facility = 1; /* LOG_USER */
618 size_t len = iov_iter_count(from); 620 size_t len = iov_iter_count(from);
@@ -642,12 +644,13 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
642 line = buf; 644 line = buf;
643 if (line[0] == '<') { 645 if (line[0] == '<') {
644 char *endp = NULL; 646 char *endp = NULL;
647 unsigned int u;
645 648
646 i = simple_strtoul(line+1, &endp, 10); 649 u = simple_strtoul(line + 1, &endp, 10);
647 if (endp && endp[0] == '>') { 650 if (endp && endp[0] == '>') {
648 level = i & 7; 651 level = LOG_LEVEL(u);
649 if (i >> 3) 652 if (LOG_FACILITY(u) != 0)
650 facility = i >> 3; 653 facility = LOG_FACILITY(u);
651 endp++; 654 endp++;
652 len -= endp - line; 655 len -= endp - line;
653 line = endp; 656 line = endp;
diff --git a/kernel/signal.c b/kernel/signal.c
index 0f6bbbe77b46..c0b01fe24bbd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -503,41 +503,6 @@ int unhandled_signal(struct task_struct *tsk, int sig)
503 return !tsk->ptrace; 503 return !tsk->ptrace;
504} 504}
505 505
506/*
507 * Notify the system that a driver wants to block all signals for this
508 * process, and wants to be notified if any signals at all were to be
509 * sent/acted upon. If the notifier routine returns non-zero, then the
510 * signal will be acted upon after all. If the notifier routine returns 0,
511 * then then signal will be blocked. Only one block per process is
512 * allowed. priv is a pointer to private data that the notifier routine
513 * can use to determine if the signal should be blocked or not.
514 */
515void
516block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
517{
518 unsigned long flags;
519
520 spin_lock_irqsave(&current->sighand->siglock, flags);
521 current->notifier_mask = mask;
522 current->notifier_data = priv;
523 current->notifier = notifier;
524 spin_unlock_irqrestore(&current->sighand->siglock, flags);
525}
526
527/* Notify the system that blocking has ended. */
528
529void
530unblock_all_signals(void)
531{
532 unsigned long flags;
533
534 spin_lock_irqsave(&current->sighand->siglock, flags);
535 current->notifier = NULL;
536 current->notifier_data = NULL;
537 recalc_sigpending();
538 spin_unlock_irqrestore(&current->sighand->siglock, flags);
539}
540
541static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) 506static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
542{ 507{
543 struct sigqueue *q, *first = NULL; 508 struct sigqueue *q, *first = NULL;
@@ -580,19 +545,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
580{ 545{
581 int sig = next_signal(pending, mask); 546 int sig = next_signal(pending, mask);
582 547
583 if (sig) { 548 if (sig)
584 if (current->notifier) {
585 if (sigismember(current->notifier_mask, sig)) {
586 if (!(current->notifier)(current->notifier_data)) {
587 clear_thread_flag(TIF_SIGPENDING);
588 return 0;
589 }
590 }
591 }
592
593 collect_signal(sig, pending, info); 549 collect_signal(sig, pending, info);
594 }
595
596 return sig; 550 return sig;
597} 551}
598 552
@@ -834,7 +788,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
834 sigset_t flush; 788 sigset_t flush;
835 789
836 if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { 790 if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
837 if (signal->flags & SIGNAL_GROUP_COREDUMP) 791 if (!(signal->flags & SIGNAL_GROUP_EXIT))
838 return sig == SIGKILL; 792 return sig == SIGKILL;
839 /* 793 /*
840 * The process is in the middle of dying, nothing to do. 794 * The process is in the middle of dying, nothing to do.
@@ -2483,9 +2437,6 @@ EXPORT_SYMBOL(force_sig);
2483EXPORT_SYMBOL(send_sig); 2437EXPORT_SYMBOL(send_sig);
2484EXPORT_SYMBOL(send_sig_info); 2438EXPORT_SYMBOL(send_sig_info);
2485EXPORT_SYMBOL(sigprocmask); 2439EXPORT_SYMBOL(sigprocmask);
2486EXPORT_SYMBOL(block_all_signals);
2487EXPORT_SYMBOL(unblock_all_signals);
2488
2489 2440
2490/* 2441/*
2491 * System call entry points. 2442 * System call entry points.
diff --git a/kernel/smp.c b/kernel/smp.c
index 07854477c164..d903c02223af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
669 cpumask_var_t cpus; 669 cpumask_var_t cpus;
670 int cpu, ret; 670 int cpu, ret;
671 671
672 might_sleep_if(gfp_flags & __GFP_WAIT); 672 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
673 673
674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { 674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
675 preempt_disable(); 675 preempt_disable();
diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f671a5c..6af9212ab5aa 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,7 +222,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
222 goto out_unlock; /* No processes for this user */ 222 goto out_unlock; /* No processes for this user */
223 } 223 }
224 do_each_thread(g, p) { 224 do_each_thread(g, p) {
225 if (uid_eq(task_uid(p), uid)) 225 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
226 error = set_one_prio(p, niceval, error); 226 error = set_one_prio(p, niceval, error);
227 } while_each_thread(g, p); 227 } while_each_thread(g, p);
228 if (!uid_eq(uid, cred->uid)) 228 if (!uid_eq(uid, cred->uid))
@@ -290,7 +290,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
290 goto out_unlock; /* No processes for this user */ 290 goto out_unlock; /* No processes for this user */
291 } 291 }
292 do_each_thread(g, p) { 292 do_each_thread(g, p) {
293 if (uid_eq(task_uid(p), uid)) { 293 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
294 niceval = nice_to_rlimit(task_nice(p)); 294 niceval = nice_to_rlimit(task_nice(p));
295 if (niceval > retval) 295 if (niceval > retval)
296 retval = niceval; 296 retval = niceval;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1d1521c26302..16bf3bc25e3e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1686,6 +1686,9 @@ config TEST_STRING_HELPERS
1686config TEST_KSTRTOX 1686config TEST_KSTRTOX
1687 tristate "Test kstrto*() family of functions at runtime" 1687 tristate "Test kstrto*() family of functions at runtime"
1688 1688
1689config TEST_PRINTF
1690 tristate "Test printf() family of functions at runtime"
1691
1689config TEST_RHASHTABLE 1692config TEST_RHASHTABLE
1690 tristate "Perform selftest on resizable hash table" 1693 tristate "Perform selftest on resizable hash table"
1691 default n 1694 default n
diff --git a/lib/Makefile b/lib/Makefile
index 8de3b012eac7..7f1de26613d2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
42obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o 42obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
43obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o 43obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
44obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o 44obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
45obj-$(CONFIG_TEST_PRINTF) += test_printf.o
45 46
46ifeq ($(CONFIG_DEBUG_KOBJECT),y) 47ifeq ($(CONFIG_DEBUG_KOBJECT),y)
47CFLAGS_kobject.o += -DDEBUG 48CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fcb65d2a0b94..8855f019ebe8 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1249,6 +1249,14 @@ static void check_sync(struct device *dev,
1249 dir2name[entry->direction], 1249 dir2name[entry->direction],
1250 dir2name[ref->direction]); 1250 dir2name[ref->direction]);
1251 1251
1252 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
1253 ref->sg_call_ents != entry->sg_call_ents) {
1254 err_printk(ref->dev, entry, "DMA-API: device driver syncs "
1255 "DMA sg list with different entry count "
1256 "[map count=%d] [sync count=%d]\n",
1257 entry->sg_call_ents, ref->sg_call_ents);
1258 }
1259
1252out: 1260out:
1253 put_hash_bucket(bucket, &flags); 1261 put_hash_bucket(bucket, &flags);
1254} 1262}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e491e02eff54..e3952e9c8ec0 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -42,7 +42,7 @@ extern struct _ddebug __stop___verbose[];
42 42
43struct ddebug_table { 43struct ddebug_table {
44 struct list_head link; 44 struct list_head link;
45 char *mod_name; 45 const char *mod_name;
46 unsigned int num_ddebugs; 46 unsigned int num_ddebugs;
47 struct _ddebug *ddebugs; 47 struct _ddebug *ddebugs;
48}; 48};
@@ -841,12 +841,12 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
841 const char *name) 841 const char *name)
842{ 842{
843 struct ddebug_table *dt; 843 struct ddebug_table *dt;
844 char *new_name; 844 const char *new_name;
845 845
846 dt = kzalloc(sizeof(*dt), GFP_KERNEL); 846 dt = kzalloc(sizeof(*dt), GFP_KERNEL);
847 if (dt == NULL) 847 if (dt == NULL)
848 return -ENOMEM; 848 return -ENOMEM;
849 new_name = kstrdup(name, GFP_KERNEL); 849 new_name = kstrdup_const(name, GFP_KERNEL);
850 if (new_name == NULL) { 850 if (new_name == NULL) {
851 kfree(dt); 851 kfree(dt);
852 return -ENOMEM; 852 return -ENOMEM;
@@ -907,7 +907,7 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module)
907static void ddebug_table_free(struct ddebug_table *dt) 907static void ddebug_table_free(struct ddebug_table *dt)
908{ 908{
909 list_del_init(&dt->link); 909 list_del_init(&dt->link);
910 kfree(dt->mod_name); 910 kfree_const(dt->mod_name);
911 kfree(dt); 911 kfree(dt);
912} 912}
913 913
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
index a8fe6274a13c..137e861d9690 100644
--- a/lib/halfmd4.c
+++ b/lib/halfmd4.c
@@ -1,6 +1,7 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/export.h> 2#include <linux/export.h>
3#include <linux/cryptohash.h> 3#include <linux/cryptohash.h>
4#include <linux/bitops.h>
4 5
5/* F, G and H are basic MD4 functions: selection, majority, parity */ 6/* F, G and H are basic MD4 functions: selection, majority, parity */
6#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) 7#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
@@ -14,7 +15,7 @@
14 * Rotation is separate from addition to prevent recomputation 15 * Rotation is separate from addition to prevent recomputation
15 */ 16 */
16#define ROUND(f, a, b, c, d, x, s) \ 17#define ROUND(f, a, b, c, d, x, s) \
17 (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) 18 (a += f(b, c, d) + x, a = rol32(a, s))
18#define K1 0 19#define K1 0
19#define K2 013240474631UL 20#define K2 013240474631UL
20#define K3 015666365641UL 21#define K3 015666365641UL
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 8d74c20d8595..992457b1284c 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -169,11 +169,15 @@ int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize,
169 } 169 }
170 } else { 170 } else {
171 for (j = 0; j < len; j++) { 171 for (j = 0; j < len; j++) {
172 if (linebuflen < lx + 3) 172 if (linebuflen < lx + 2)
173 goto overflow2; 173 goto overflow2;
174 ch = ptr[j]; 174 ch = ptr[j];
175 linebuf[lx++] = hex_asc_hi(ch); 175 linebuf[lx++] = hex_asc_hi(ch);
176 if (linebuflen < lx + 2)
177 goto overflow2;
176 linebuf[lx++] = hex_asc_lo(ch); 178 linebuf[lx++] = hex_asc_lo(ch);
179 if (linebuflen < lx + 2)
180 goto overflow2;
177 linebuf[lx++] = ' '; 181 linebuf[lx++] = ' ';
178 } 182 }
179 if (j) 183 if (j)
diff --git a/lib/idr.c b/lib/idr.c
index 5335c43adf46..6098336df267 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask)
399 * allocation guarantee. Disallow usage from those contexts. 399 * allocation guarantee. Disallow usage from those contexts.
400 */ 400 */
401 WARN_ON_ONCE(in_interrupt()); 401 WARN_ON_ONCE(in_interrupt());
402 might_sleep_if(gfp_mask & __GFP_WAIT); 402 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
403 403
404 preempt_disable(); 404 preempt_disable();
405 405
@@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
453 struct idr_layer *pa[MAX_IDR_LEVEL + 1]; 453 struct idr_layer *pa[MAX_IDR_LEVEL + 1];
454 int id; 454 int id;
455 455
456 might_sleep_if(gfp_mask & __GFP_WAIT); 456 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
457 457
458 /* sanity checks */ 458 /* sanity checks */
459 if (WARN_ON_ONCE(start < 0)) 459 if (WARN_ON_ONCE(start < 0))
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index bd2bea963364..391fd23976a2 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -36,8 +36,7 @@ bool current_is_single_threaded(void)
36 if (unlikely(p == task->group_leader)) 36 if (unlikely(p == task->group_leader))
37 continue; 37 continue;
38 38
39 t = p; 39 for_each_thread(p, t) {
40 do {
41 if (unlikely(t->mm == mm)) 40 if (unlikely(t->mm == mm))
42 goto found; 41 goto found;
43 if (likely(t->mm)) 42 if (likely(t->mm))
@@ -48,7 +47,7 @@ bool current_is_single_threaded(void)
48 * forked before exiting. 47 * forked before exiting.
49 */ 48 */
50 smp_rmb(); 49 smp_rmb();
51 } while_each_thread(p, t); 50 }
52 } 51 }
53 ret = true; 52 ret = true;
54found: 53found:
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index 32f12150fc4f..f194e6e593e1 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -31,6 +31,22 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
31} 31}
32EXPORT_SYMBOL(kvasprintf); 32EXPORT_SYMBOL(kvasprintf);
33 33
34/*
35 * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt
36 * (or the sole vararg) points to rodata, we will then save a memory
37 * allocation and string copy. In any case, the return value should be
38 * freed using kfree_const().
39 */
40const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap)
41{
42 if (!strchr(fmt, '%'))
43 return kstrdup_const(fmt, gfp);
44 if (!strcmp(fmt, "%s"))
45 return kstrdup_const(va_arg(ap, const char*), gfp);
46 return kvasprintf(gfp, fmt, ap);
47}
48EXPORT_SYMBOL(kvasprintf_const);
49
34char *kasprintf(gfp_t gfp, const char *fmt, ...) 50char *kasprintf(gfp_t gfp, const char *fmt, ...)
35{ 51{
36 va_list ap; 52 va_list ap;
diff --git a/lib/kobject.c b/lib/kobject.c
index 055407746266..7cbccd2b4c72 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -257,18 +257,32 @@ static int kobject_add_internal(struct kobject *kobj)
257int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, 257int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
258 va_list vargs) 258 va_list vargs)
259{ 259{
260 char *s; 260 const char *s;
261 261
262 if (kobj->name && !fmt) 262 if (kobj->name && !fmt)
263 return 0; 263 return 0;
264 264
265 s = kvasprintf(GFP_KERNEL, fmt, vargs); 265 s = kvasprintf_const(GFP_KERNEL, fmt, vargs);
266 if (!s) 266 if (!s)
267 return -ENOMEM; 267 return -ENOMEM;
268 268
269 /* ewww... some of these buggers have '/' in the name ... */ 269 /*
270 strreplace(s, '/', '!'); 270 * ewww... some of these buggers have '/' in the name ... If
271 kfree(kobj->name); 271 * that's the case, we need to make sure we have an actual
272 * allocated copy to modify, since kvasprintf_const may have
273 * returned something from .rodata.
274 */
275 if (strchr(s, '/')) {
276 char *t;
277
278 t = kstrdup(s, GFP_KERNEL);
279 kfree_const(s);
280 if (!t)
281 return -ENOMEM;
282 strreplace(t, '/', '!');
283 s = t;
284 }
285 kfree_const(kobj->name);
272 kobj->name = s; 286 kobj->name = s;
273 287
274 return 0; 288 return 0;
@@ -466,7 +480,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
466 envp[0] = devpath_string; 480 envp[0] = devpath_string;
467 envp[1] = NULL; 481 envp[1] = NULL;
468 482
469 name = dup_name = kstrdup(new_name, GFP_KERNEL); 483 name = dup_name = kstrdup_const(new_name, GFP_KERNEL);
470 if (!name) { 484 if (!name) {
471 error = -ENOMEM; 485 error = -ENOMEM;
472 goto out; 486 goto out;
@@ -486,7 +500,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
486 kobject_uevent_env(kobj, KOBJ_MOVE, envp); 500 kobject_uevent_env(kobj, KOBJ_MOVE, envp);
487 501
488out: 502out:
489 kfree(dup_name); 503 kfree_const(dup_name);
490 kfree(devpath_string); 504 kfree(devpath_string);
491 kfree(devpath); 505 kfree(devpath);
492 kobject_put(kobj); 506 kobject_put(kobj);
@@ -634,7 +648,7 @@ static void kobject_cleanup(struct kobject *kobj)
634 /* free name if we allocated it */ 648 /* free name if we allocated it */
635 if (name) { 649 if (name) {
636 pr_debug("kobject: '%s': free name\n", name); 650 pr_debug("kobject: '%s': free name\n", name);
637 kfree(name); 651 kfree_const(name);
638 } 652 }
639} 653}
640 654
diff --git a/lib/llist.c b/lib/llist.c
index 0b0e9779d675..ae5872b1df0c 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -66,12 +66,12 @@ struct llist_node *llist_del_first(struct llist_head *head)
66{ 66{
67 struct llist_node *entry, *old_entry, *next; 67 struct llist_node *entry, *old_entry, *next;
68 68
69 entry = head->first; 69 entry = smp_load_acquire(&head->first);
70 for (;;) { 70 for (;;) {
71 if (entry == NULL) 71 if (entry == NULL)
72 return NULL; 72 return NULL;
73 old_entry = entry; 73 old_entry = entry;
74 next = entry->next; 74 next = READ_ONCE(entry->next);
75 entry = cmpxchg(&head->first, old_entry, next); 75 entry = cmpxchg(&head->first, old_entry, next);
76 if (entry == old_entry) 76 if (entry == old_entry)
77 break; 77 break;
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index f75715131f20..6d40944960de 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -135,7 +135,7 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
135 * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). 135 * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
136 * 136 *
137 * @gfp indicates whether or not to wait until a free id is available (it's not 137 * @gfp indicates whether or not to wait until a free id is available (it's not
138 * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep 138 * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
139 * however long it takes until another thread frees an id (same semantics as a 139 * however long it takes until another thread frees an id (same semantics as a
140 * mempool). 140 * mempool).
141 * 141 *
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f9ebe1c82060..fcf5d98574ce 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
188 * preloading in the interrupt anyway as all the allocations have to 188 * preloading in the interrupt anyway as all the allocations have to
189 * be atomic. So just do normal allocation when in interrupt. 189 * be atomic. So just do normal allocation when in interrupt.
190 */ 190 */
191 if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) { 191 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
192 struct radix_tree_preload *rtp; 192 struct radix_tree_preload *rtp;
193 193
194 /* 194 /*
@@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node)
249 * with preemption not disabled. 249 * with preemption not disabled.
250 * 250 *
251 * To make use of this facility, the radix tree must be initialised without 251 * To make use of this facility, the radix tree must be initialised without
252 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 252 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
253 */ 253 */
254static int __radix_tree_preload(gfp_t gfp_mask) 254static int __radix_tree_preload(gfp_t gfp_mask)
255{ 255{
@@ -286,12 +286,12 @@ out:
286 * with preemption not disabled. 286 * with preemption not disabled.
287 * 287 *
288 * To make use of this facility, the radix tree must be initialised without 288 * To make use of this facility, the radix tree must be initialised without
289 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 289 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
290 */ 290 */
291int radix_tree_preload(gfp_t gfp_mask) 291int radix_tree_preload(gfp_t gfp_mask)
292{ 292{
293 /* Warn on non-sensical use... */ 293 /* Warn on non-sensical use... */
294 WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT)); 294 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
295 return __radix_tree_preload(gfp_mask); 295 return __radix_tree_preload(gfp_mask);
296} 296}
297EXPORT_SYMBOL(radix_tree_preload); 297EXPORT_SYMBOL(radix_tree_preload);
@@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload);
303 */ 303 */
304int radix_tree_maybe_preload(gfp_t gfp_mask) 304int radix_tree_maybe_preload(gfp_t gfp_mask)
305{ 305{
306 if (gfp_mask & __GFP_WAIT) 306 if (gfpflags_allow_blocking(gfp_mask))
307 return __radix_tree_preload(gfp_mask); 307 return __radix_tree_preload(gfp_mask);
308 /* Preloading doesn't help anything with this gfp mask, skip it */ 308 /* Preloading doesn't help anything with this gfp mask, skip it */
309 preempt_disable(); 309 preempt_disable();
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 8e376efd88a4..98866a770770 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -326,6 +326,39 @@ out:
326 kfree(out_test); 326 kfree(out_test);
327} 327}
328 328
329#define string_get_size_maxbuf 16
330#define test_string_get_size_one(size, blk_size, units, exp_result) \
331 do { \
332 BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf); \
333 __test_string_get_size((size), (blk_size), (units), \
334 (exp_result)); \
335 } while (0)
336
337
338static __init void __test_string_get_size(const u64 size, const u64 blk_size,
339 const enum string_size_units units,
340 const char *exp_result)
341{
342 char buf[string_get_size_maxbuf];
343
344 string_get_size(size, blk_size, units, buf, sizeof(buf));
345 if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
346 return;
347
348 buf[sizeof(buf) - 1] = '\0';
349 pr_warn("Test 'test_string_get_size_one' failed!\n");
350 pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
351 size, blk_size, units);
352 pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
353}
354
355static __init void test_string_get_size(void)
356{
357 test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
358 test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
359 test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
360}
361
329static int __init test_string_helpers_init(void) 362static int __init test_string_helpers_init(void)
330{ 363{
331 unsigned int i; 364 unsigned int i;
@@ -344,6 +377,9 @@ static int __init test_string_helpers_init(void)
344 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) 377 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
345 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); 378 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
346 379
380 /* Test string_get_size() */
381 test_string_get_size();
382
347 return -EINVAL; 383 return -EINVAL;
348} 384}
349module_init(test_string_helpers_init); 385module_init(test_string_helpers_init);
diff --git a/lib/test_printf.c b/lib/test_printf.c
new file mode 100644
index 000000000000..c5a666af9ba5
--- /dev/null
+++ b/lib/test_printf.c
@@ -0,0 +1,362 @@
1/*
2 * Test cases for printf facility.
3 */
4
5#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/printk.h>
11#include <linux/random.h>
12#include <linux/slab.h>
13#include <linux/string.h>
14
15#include <linux/socket.h>
16#include <linux/in.h>
17
18#define BUF_SIZE 256
19#define FILL_CHAR '$'
20
21#define PTR1 ((void*)0x01234567)
22#define PTR2 ((void*)(long)(int)0xfedcba98)
23
24#if BITS_PER_LONG == 64
25#define PTR1_ZEROES "000000000"
26#define PTR1_SPACES " "
27#define PTR1_STR "1234567"
28#define PTR2_STR "fffffffffedcba98"
29#define PTR_WIDTH 16
30#else
31#define PTR1_ZEROES "0"
32#define PTR1_SPACES " "
33#define PTR1_STR "1234567"
34#define PTR2_STR "fedcba98"
35#define PTR_WIDTH 8
36#endif
37#define PTR_WIDTH_STR stringify(PTR_WIDTH)
38
39static unsigned total_tests __initdata;
40static unsigned failed_tests __initdata;
41static char *test_buffer __initdata;
42
43static int __printf(4, 0) __init
44do_test(int bufsize, const char *expect, int elen,
45 const char *fmt, va_list ap)
46{
47 va_list aq;
48 int ret, written;
49
50 total_tests++;
51
52 memset(test_buffer, FILL_CHAR, BUF_SIZE);
53 va_copy(aq, ap);
54 ret = vsnprintf(test_buffer, bufsize, fmt, aq);
55 va_end(aq);
56
57 if (ret != elen) {
58 pr_warn("vsnprintf(buf, %d, \"%s\", ...) returned %d, expected %d\n",
59 bufsize, fmt, ret, elen);
60 return 1;
61 }
62
63 if (!bufsize) {
64 if (memchr_inv(test_buffer, FILL_CHAR, BUF_SIZE)) {
65 pr_warn("vsnprintf(buf, 0, \"%s\", ...) wrote to buffer\n",
66 fmt);
67 return 1;
68 }
69 return 0;
70 }
71
72 written = min(bufsize-1, elen);
73 if (test_buffer[written]) {
74 pr_warn("vsnprintf(buf, %d, \"%s\", ...) did not nul-terminate buffer\n",
75 bufsize, fmt);
76 return 1;
77 }
78
79 if (memcmp(test_buffer, expect, written)) {
80 pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n",
81 bufsize, fmt, test_buffer, written, expect);
82 return 1;
83 }
84 return 0;
85}
86
87static void __printf(3, 4) __init
88__test(const char *expect, int elen, const char *fmt, ...)
89{
90 va_list ap;
91 int rand;
92 char *p;
93
94 BUG_ON(elen >= BUF_SIZE);
95
96 va_start(ap, fmt);
97
98 /*
99 * Every fmt+args is subjected to four tests: Three where we
100 * tell vsnprintf varying buffer sizes (plenty, not quite
101 * enough and 0), and then we also test that kvasprintf would
102 * be able to print it as expected.
103 */
104 failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap);
105 rand = 1 + prandom_u32_max(elen+1);
106 /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */
107 failed_tests += do_test(rand, expect, elen, fmt, ap);
108 failed_tests += do_test(0, expect, elen, fmt, ap);
109
110 p = kvasprintf(GFP_KERNEL, fmt, ap);
111 if (p) {
112 if (memcmp(p, expect, elen+1)) {
113 pr_warn("kvasprintf(..., \"%s\", ...) returned '%s', expected '%s'\n",
114 fmt, p, expect);
115 failed_tests++;
116 }
117 kfree(p);
118 }
119 va_end(ap);
120}
121
122#define test(expect, fmt, ...) \
123 __test(expect, strlen(expect), fmt, ##__VA_ARGS__)
124
125static void __init
126test_basic(void)
127{
128 /* Work around annoying "warning: zero-length gnu_printf format string". */
129 char nul = '\0';
130
131 test("", &nul);
132 test("100%", "100%%");
133 test("xxx%yyy", "xxx%cyyy", '%');
134 __test("xxx\0yyy", 7, "xxx%cyyy", '\0');
135}
136
137static void __init
138test_number(void)
139{
140 test("0x1234abcd ", "%#-12x", 0x1234abcd);
141 test(" 0x1234abcd", "%#12x", 0x1234abcd);
142 test("0|001| 12|+123| 1234|-123|-1234", "%d|%03d|%3d|%+d|% d|%+d|% d", 0, 1, 12, 123, 1234, -123, -1234);
143}
144
145static void __init
146test_string(void)
147{
148 test("", "%s%.0s", "", "123");
149 test("ABCD|abc|123", "%s|%.3s|%.*s", "ABCD", "abcdef", 3, "123456");
150 test("1 | 2|3 | 4|5 ", "%-3s|%3s|%-*s|%*s|%*s", "1", "2", 3, "3", 3, "4", -3, "5");
151 /*
152 * POSIX and C99 say that a missing precision should be
153 * treated as a precision of 0. However, the kernel's printf
154 * implementation treats this case as if the . wasn't
155 * present. Let's add a test case documenting the current
156 * behaviour; should anyone ever feel the need to follow the
157 * standards more closely, this can be revisited.
158 */
159 test("a||", "%.s|%.0s|%.*s", "a", "b", 0, "c");
160 test("a | | ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c");
161}
162
163static void __init
164plain(void)
165{
166 test(PTR1_ZEROES PTR1_STR " " PTR2_STR, "%p %p", PTR1, PTR2);
167 /*
168 * The field width is overloaded for some %p extensions to
169 * pass another piece of information. For plain pointers, the
170 * behaviour is slightly odd: One cannot pass either the 0
171 * flag nor a precision to %p without gcc complaining, and if
172 * one explicitly gives a field width, the number is no longer
173 * zero-padded.
174 */
175 test("|" PTR1_STR PTR1_SPACES " | " PTR1_SPACES PTR1_STR "|",
176 "|%-*p|%*p|", PTR_WIDTH+2, PTR1, PTR_WIDTH+2, PTR1);
177 test("|" PTR2_STR " | " PTR2_STR "|",
178 "|%-*p|%*p|", PTR_WIDTH+2, PTR2, PTR_WIDTH+2, PTR2);
179
180 /*
181 * Unrecognized %p extensions are treated as plain %p, but the
182 * alphanumeric suffix is ignored (that is, does not occur in
183 * the output.)
184 */
185 test("|"PTR1_ZEROES PTR1_STR"|", "|%p0y|", PTR1);
186 test("|"PTR2_STR"|", "|%p0y|", PTR2);
187}
188
189static void __init
190symbol_ptr(void)
191{
192}
193
194static void __init
195kernel_ptr(void)
196{
197}
198
199static void __init
200struct_resource(void)
201{
202}
203
204static void __init
205addr(void)
206{
207}
208
209static void __init
210escaped_str(void)
211{
212}
213
214static void __init
215hex_string(void)
216{
217 const char buf[3] = {0xc0, 0xff, 0xee};
218
219 test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee",
220 "%3ph|%3phC|%3phD|%3phN", buf, buf, buf, buf);
221 test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee",
222 "%*ph|%*phC|%*phD|%*phN", 3, buf, 3, buf, 3, buf, 3, buf);
223}
224
225static void __init
226mac(void)
227{
228 const u8 addr[6] = {0x2d, 0x48, 0xd6, 0xfc, 0x7a, 0x05};
229
230 test("2d:48:d6:fc:7a:05", "%pM", addr);
231 test("05:7a:fc:d6:48:2d", "%pMR", addr);
232 test("2d-48-d6-fc-7a-05", "%pMF", addr);
233 test("2d48d6fc7a05", "%pm", addr);
234 test("057afcd6482d", "%pmR", addr);
235}
236
237static void __init
238ip4(void)
239{
240 struct sockaddr_in sa;
241
242 sa.sin_family = AF_INET;
243 sa.sin_port = cpu_to_be16(12345);
244 sa.sin_addr.s_addr = cpu_to_be32(0x7f000001);
245
246 test("127.000.000.001|127.0.0.1", "%pi4|%pI4", &sa.sin_addr, &sa.sin_addr);
247 test("127.000.000.001|127.0.0.1", "%piS|%pIS", &sa, &sa);
248 sa.sin_addr.s_addr = cpu_to_be32(0x01020304);
249 test("001.002.003.004:12345|1.2.3.4:12345", "%piSp|%pISp", &sa, &sa);
250}
251
252static void __init
253ip6(void)
254{
255}
256
257static void __init
258ip(void)
259{
260 ip4();
261 ip6();
262}
263
264static void __init
265uuid(void)
266{
267 const char uuid[16] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
268 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
269
270 test("00010203-0405-0607-0809-0a0b0c0d0e0f", "%pUb", uuid);
271 test("00010203-0405-0607-0809-0A0B0C0D0E0F", "%pUB", uuid);
272 test("03020100-0504-0706-0809-0a0b0c0d0e0f", "%pUl", uuid);
273 test("03020100-0504-0706-0809-0A0B0C0D0E0F", "%pUL", uuid);
274}
275
276static void __init
277dentry(void)
278{
279}
280
281static void __init
282struct_va_format(void)
283{
284}
285
286static void __init
287struct_clk(void)
288{
289}
290
291static void __init
292bitmap(void)
293{
294 DECLARE_BITMAP(bits, 20);
295 const int primes[] = {2,3,5,7,11,13,17,19};
296 int i;
297
298 bitmap_zero(bits, 20);
299 test("00000|00000", "%20pb|%*pb", bits, 20, bits);
300 test("|", "%20pbl|%*pbl", bits, 20, bits);
301
302 for (i = 0; i < ARRAY_SIZE(primes); ++i)
303 set_bit(primes[i], bits);
304 test("a28ac|a28ac", "%20pb|%*pb", bits, 20, bits);
305 test("2-3,5,7,11,13,17,19|2-3,5,7,11,13,17,19", "%20pbl|%*pbl", bits, 20, bits);
306
307 bitmap_fill(bits, 20);
308 test("fffff|fffff", "%20pb|%*pb", bits, 20, bits);
309 test("0-19|0-19", "%20pbl|%*pbl", bits, 20, bits);
310}
311
312static void __init
313netdev_features(void)
314{
315}
316
317static void __init
318test_pointer(void)
319{
320 plain();
321 symbol_ptr();
322 kernel_ptr();
323 struct_resource();
324 addr();
325 escaped_str();
326 hex_string();
327 mac();
328 ip();
329 uuid();
330 dentry();
331 struct_va_format();
332 struct_clk();
333 bitmap();
334 netdev_features();
335}
336
337static int __init
338test_printf_init(void)
339{
340 test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
341 if (!test_buffer)
342 return -ENOMEM;
343
344 test_basic();
345 test_number();
346 test_string();
347 test_pointer();
348
349 kfree(test_buffer);
350
351 if (failed_tests == 0)
352 pr_info("all %u tests passed\n", total_tests);
353 else
354 pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
355
356 return failed_tests ? -EINVAL : 0;
357}
358
359module_init(test_printf_init);
360
361MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>");
362MODULE_LICENSE("GPL");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 95cd63b43b99..f9cee8e1233c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1449,6 +1449,8 @@ int kptr_restrict __read_mostly;
1449 * (legacy clock framework) of the clock 1449 * (legacy clock framework) of the clock
1450 * - 'Cr' For a clock, it prints the current rate of the clock 1450 * - 'Cr' For a clock, it prints the current rate of the clock
1451 * 1451 *
1452 * ** Please update also Documentation/printk-formats.txt when making changes **
1453 *
1452 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 1454 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
1453 * function pointers are really function descriptors, which contain a 1455 * function pointers are really function descriptors, which contain a
1454 * pointer to the real address. 1456 * pointer to the real address.
@@ -1457,7 +1459,7 @@ static noinline_for_stack
1457char *pointer(const char *fmt, char *buf, char *end, void *ptr, 1459char *pointer(const char *fmt, char *buf, char *end, void *ptr,
1458 struct printf_spec spec) 1460 struct printf_spec spec)
1459{ 1461{
1460 int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); 1462 const int default_width = 2 * sizeof(void *);
1461 1463
1462 if (!ptr && *fmt != 'K') { 1464 if (!ptr && *fmt != 'K') {
1463 /* 1465 /*
@@ -1769,14 +1771,14 @@ qualifier:
1769 1771
1770 case 'n': 1772 case 'n':
1771 /* 1773 /*
1772 * Since %n poses a greater security risk than utility, treat 1774 * Since %n poses a greater security risk than
1773 * it as an invalid format specifier. Warn about its use so 1775 * utility, treat it as any other invalid or
1774 * that new instances don't get added. 1776 * unsupported format specifier.
1775 */ 1777 */
1776 WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt);
1777 /* Fall-through */ 1778 /* Fall-through */
1778 1779
1779 default: 1780 default:
1781 WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt);
1780 spec->type = FORMAT_TYPE_INVALID; 1782 spec->type = FORMAT_TYPE_INVALID;
1781 return fmt - start; 1783 return fmt - start;
1782 } 1784 }
@@ -1811,41 +1813,16 @@ qualifier:
1811 * @fmt: The format string to use 1813 * @fmt: The format string to use
1812 * @args: Arguments for the format string 1814 * @args: Arguments for the format string
1813 * 1815 *
1814 * This function follows C99 vsnprintf, but has some extensions: 1816 * This function generally follows C99 vsnprintf, but has some
1815 * %pS output the name of a text symbol with offset 1817 * extensions and a few limitations:
1816 * %ps output the name of a text symbol without offset 1818 *
1817 * %pF output the name of a function pointer with its offset 1819 * %n is unsupported
1818 * %pf output the name of a function pointer without its offset 1820 * %p* is handled by pointer()
1819 * %pB output the name of a backtrace symbol with its offset
1820 * %pR output the address range in a struct resource with decoded flags
1821 * %pr output the address range in a struct resource with raw flags
1822 * %pb output the bitmap with field width as the number of bits
1823 * %pbl output the bitmap as range list with field width as the number of bits
1824 * %pM output a 6-byte MAC address with colons
1825 * %pMR output a 6-byte MAC address with colons in reversed order
1826 * %pMF output a 6-byte MAC address with dashes
1827 * %pm output a 6-byte MAC address without colons
1828 * %pmR output a 6-byte MAC address without colons in reversed order
1829 * %pI4 print an IPv4 address without leading zeros
1830 * %pi4 print an IPv4 address with leading zeros
1831 * %pI6 print an IPv6 address with colons
1832 * %pi6 print an IPv6 address without colons
1833 * %pI6c print an IPv6 address as specified by RFC 5952
1834 * %pIS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address
1835 * %piS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address
1836 * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
1837 * case.
1838 * %*pE[achnops] print an escaped buffer
1839 * %*ph[CDN] a variable-length hex string with a separator (supports up to 64
1840 * bytes of the input)
1841 * %pC output the name (Common Clock Framework) or address (legacy clock
1842 * framework) of a clock
1843 * %pCn output the name (Common Clock Framework) or address (legacy clock
1844 * framework) of a clock
1845 * %pCr output the current rate of a clock
1846 * %n is ignored
1847 * 1821 *
1848 * ** Please update Documentation/printk-formats.txt when making changes ** 1822 * See pointer() or Documentation/printk-formats.txt for more
1823 * extensive description.
1824 *
1825 * ** Please update the documentation in both places when making changes **
1849 * 1826 *
1850 * The return value is the number of characters which would 1827 * The return value is the number of characters which would
1851 * be generated for the given input, excluding the trailing 1828 * be generated for the given input, excluding the trailing
@@ -1944,10 +1921,15 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1944 break; 1921 break;
1945 1922
1946 case FORMAT_TYPE_INVALID: 1923 case FORMAT_TYPE_INVALID:
1947 if (str < end) 1924 /*
1948 *str = '%'; 1925 * Presumably the arguments passed gcc's type
1949 ++str; 1926 * checking, but there is no safe or sane way
1950 break; 1927 * for us to continue parsing the format and
1928 * fetching from the va_list; the remaining
1929 * specifiers and arguments would be out of
1930 * sync.
1931 */
1932 goto out;
1951 1933
1952 default: 1934 default:
1953 switch (spec.type) { 1935 switch (spec.type) {
@@ -1992,6 +1974,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1992 } 1974 }
1993 } 1975 }
1994 1976
1977out:
1995 if (size > 0) { 1978 if (size > 0) {
1996 if (str < end) 1979 if (str < end)
1997 *str = '\0'; 1980 *str = '\0';
@@ -2189,9 +2172,10 @@ do { \
2189 2172
2190 switch (spec.type) { 2173 switch (spec.type) {
2191 case FORMAT_TYPE_NONE: 2174 case FORMAT_TYPE_NONE:
2192 case FORMAT_TYPE_INVALID:
2193 case FORMAT_TYPE_PERCENT_CHAR: 2175 case FORMAT_TYPE_PERCENT_CHAR:
2194 break; 2176 break;
2177 case FORMAT_TYPE_INVALID:
2178 goto out;
2195 2179
2196 case FORMAT_TYPE_WIDTH: 2180 case FORMAT_TYPE_WIDTH:
2197 case FORMAT_TYPE_PRECISION: 2181 case FORMAT_TYPE_PRECISION:
@@ -2253,6 +2237,7 @@ do { \
2253 } 2237 }
2254 } 2238 }
2255 2239
2240out:
2256 return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; 2241 return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
2257#undef save_arg 2242#undef save_arg
2258} 2243}
@@ -2286,7 +2271,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
2286 char *str, *end; 2271 char *str, *end;
2287 const char *args = (const char *)bin_buf; 2272 const char *args = (const char *)bin_buf;
2288 2273
2289 if (WARN_ON_ONCE((int) size < 0)) 2274 if (WARN_ON_ONCE(size > INT_MAX))
2290 return 0; 2275 return 0;
2291 2276
2292 str = buf; 2277 str = buf;
@@ -2375,12 +2360,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
2375 break; 2360 break;
2376 2361
2377 case FORMAT_TYPE_PERCENT_CHAR: 2362 case FORMAT_TYPE_PERCENT_CHAR:
2378 case FORMAT_TYPE_INVALID:
2379 if (str < end) 2363 if (str < end)
2380 *str = '%'; 2364 *str = '%';
2381 ++str; 2365 ++str;
2382 break; 2366 break;
2383 2367
2368 case FORMAT_TYPE_INVALID:
2369 goto out;
2370
2384 default: { 2371 default: {
2385 unsigned long long num; 2372 unsigned long long num;
2386 2373
@@ -2423,6 +2410,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
2423 } /* switch(spec.type) */ 2410 } /* switch(spec.type) */
2424 } /* while(*fmt) */ 2411 } /* while(*fmt) */
2425 2412
2413out:
2426 if (size > 0) { 2414 if (size > 0) {
2427 if (str < end) 2415 if (str < end)
2428 *str = '\0'; 2416 *str = '\0';
diff --git a/mm/Kconfig b/mm/Kconfig
index 0d9fdcd01e47..97a4e06b15c0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE
200 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE 200 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
201 depends on MIGRATION 201 depends on MIGRATION
202 202
203#
204# If we have space for more page flags then we can enable additional
205# optimizations and functionality.
206#
207# Regular Sparsemem takes page flag bits for the sectionid if it does not
208# use a virtual memmap. Disable extended page flags for 32 bit platforms
209# that require the use of a sectionid in the page flags.
210#
211config PAGEFLAGS_EXTENDED
212 def_bool y
213 depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM
214
215# Heavily threaded applications may benefit from splitting the mm-wide 203# Heavily threaded applications may benefit from splitting the mm-wide
216# page_table_lock, so that faults on different parts of the user address 204# page_table_lock, so that faults on different parts of the user address
217# space can be handled with less contention: split it at this NR_CPUS. 205# space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 619984fc07ec..8ed2ffd963c5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
637{ 637{
638 struct bdi_writeback *wb; 638 struct bdi_writeback *wb;
639 639
640 might_sleep_if(gfp & __GFP_WAIT); 640 might_sleep_if(gfpflags_allow_blocking(gfp));
641 641
642 if (!memcg_css->parent) 642 if (!memcg_css->parent)
643 return &bdi->wb; 643 return &bdi->wb;
diff --git a/mm/debug.c b/mm/debug.c
index e784110fb51d..668aa35191ca 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = {
25 {1UL << PG_private, "private" }, 25 {1UL << PG_private, "private" },
26 {1UL << PG_private_2, "private_2" }, 26 {1UL << PG_private_2, "private_2" },
27 {1UL << PG_writeback, "writeback" }, 27 {1UL << PG_writeback, "writeback" },
28#ifdef CONFIG_PAGEFLAGS_EXTENDED
29 {1UL << PG_head, "head" }, 28 {1UL << PG_head, "head" },
30 {1UL << PG_tail, "tail" },
31#else
32 {1UL << PG_compound, "compound" },
33#endif
34 {1UL << PG_swapcache, "swapcache" }, 29 {1UL << PG_swapcache, "swapcache" },
35 {1UL << PG_mappedtodisk, "mappedtodisk" }, 30 {1UL << PG_mappedtodisk, "mappedtodisk" },
36 {1UL << PG_reclaim, "reclaim" }, 31 {1UL << PG_reclaim, "reclaim" },
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 312a716fa14c..57312b5d6e12 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
326 size_t offset; 326 size_t offset;
327 void *retval; 327 void *retval;
328 328
329 might_sleep_if(mem_flags & __GFP_WAIT); 329 might_sleep_if(gfpflags_allow_blocking(mem_flags));
330 330
331 spin_lock_irqsave(&pool->lock, flags); 331 spin_lock_irqsave(&pool->lock, flags);
332 list_for_each_entry(page, &pool->page_list, page_list) { 332 list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/failslab.c b/mm/failslab.c
index 98fb490311eb..79171b4a5826 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -3,11 +3,11 @@
3 3
4static struct { 4static struct {
5 struct fault_attr attr; 5 struct fault_attr attr;
6 bool ignore_gfp_wait; 6 bool ignore_gfp_reclaim;
7 bool cache_filter; 7 bool cache_filter;
8} failslab = { 8} failslab = {
9 .attr = FAULT_ATTR_INITIALIZER, 9 .attr = FAULT_ATTR_INITIALIZER,
10 .ignore_gfp_wait = true, 10 .ignore_gfp_reclaim = true,
11 .cache_filter = false, 11 .cache_filter = false,
12}; 12};
13 13
@@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
16 if (gfpflags & __GFP_NOFAIL) 16 if (gfpflags & __GFP_NOFAIL)
17 return false; 17 return false;
18 18
19 if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) 19 if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM))
20 return false; 20 return false;
21 21
22 if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) 22 if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
@@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void)
42 return PTR_ERR(dir); 42 return PTR_ERR(dir);
43 43
44 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, 44 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
45 &failslab.ignore_gfp_wait)) 45 &failslab.ignore_gfp_reclaim))
46 goto fail; 46 goto fail;
47 if (!debugfs_create_bool("cache-filter", mode, dir, 47 if (!debugfs_create_bool("cache-filter", mode, dir,
48 &failslab.cache_filter)) 48 &failslab.cache_filter))
diff --git a/mm/filemap.c b/mm/filemap.c
index 58e04e26f996..1bb007624b53 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1722,7 +1722,7 @@ no_cached_page:
1722 goto out; 1722 goto out;
1723 } 1723 }
1724 error = add_to_page_cache_lru(page, mapping, index, 1724 error = add_to_page_cache_lru(page, mapping, index,
1725 GFP_KERNEL & mapping_gfp_mask(mapping)); 1725 mapping_gfp_constraint(mapping, GFP_KERNEL));
1726 if (error) { 1726 if (error) {
1727 page_cache_release(page); 1727 page_cache_release(page);
1728 if (error == -EEXIST) { 1728 if (error == -EEXIST) {
@@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset)
1824 return -ENOMEM; 1824 return -ENOMEM;
1825 1825
1826 ret = add_to_page_cache_lru(page, mapping, offset, 1826 ret = add_to_page_cache_lru(page, mapping, offset,
1827 GFP_KERNEL & mapping_gfp_mask(mapping)); 1827 mapping_gfp_constraint(mapping, GFP_KERNEL));
1828 if (ret == 0) 1828 if (ret == 0)
1829 ret = mapping->a_ops->readpage(file, page); 1829 ret = mapping->a_ops->readpage(file, page);
1830 else if (ret == -EEXIST) 1830 else if (ret == -EEXIST)
@@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter);
2713 * page is known to the local caching routines. 2713 * page is known to the local caching routines.
2714 * 2714 *
2715 * The @gfp_mask argument specifies whether I/O may be performed to release 2715 * The @gfp_mask argument specifies whether I/O may be performed to release
2716 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). 2716 * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
2717 * 2717 *
2718 */ 2718 */
2719int try_to_release_page(struct page *page, gfp_t gfp_mask) 2719int try_to_release_page(struct page *page, gfp_t gfp_mask)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00cfd1ae2271..c29ddebc8705 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
116 for_each_populated_zone(zone) 116 for_each_populated_zone(zone)
117 nr_zones++; 117 nr_zones++;
118 118
119 /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */ 119 /* Ensure 2 pageblocks are free to assist fragmentation avoidance */
120 recommended_min = pageblock_nr_pages * nr_zones * 2; 120 recommended_min = pageblock_nr_pages * nr_zones * 2;
121 121
122 /* 122 /*
@@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
786 786
787static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) 787static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
788{ 788{
789 return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; 789 return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
790} 790}
791 791
792/* Caller must hold page table lock. */ 792/* Caller must hold page table lock. */
@@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page,
1755 (1L << PG_unevictable))); 1755 (1L << PG_unevictable)));
1756 page_tail->flags |= (1L << PG_dirty); 1756 page_tail->flags |= (1L << PG_dirty);
1757 1757
1758 /* clear PageTail before overwriting first_page */ 1758 clear_compound_head(page_tail);
1759 smp_wmb();
1760 1759
1761 if (page_is_young(page)) 1760 if (page_is_young(page))
1762 set_page_young(page_tail); 1761 set_page_young(page_tail);
@@ -2413,8 +2412,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2413 2412
2414static struct page * 2413static struct page *
2415khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, 2414khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2416 struct vm_area_struct *vma, unsigned long address, 2415 unsigned long address, int node)
2417 int node)
2418{ 2416{
2419 VM_BUG_ON_PAGE(*hpage, *hpage); 2417 VM_BUG_ON_PAGE(*hpage, *hpage);
2420 2418
@@ -2481,8 +2479,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2481 2479
2482static struct page * 2480static struct page *
2483khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, 2481khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2484 struct vm_area_struct *vma, unsigned long address, 2482 unsigned long address, int node)
2485 int node)
2486{ 2483{
2487 up_read(&mm->mmap_sem); 2484 up_read(&mm->mmap_sem);
2488 VM_BUG_ON(!*hpage); 2485 VM_BUG_ON(!*hpage);
@@ -2530,7 +2527,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2530 __GFP_THISNODE; 2527 __GFP_THISNODE;
2531 2528
2532 /* release the mmap_sem read lock. */ 2529 /* release the mmap_sem read lock. */
2533 new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node); 2530 new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
2534 if (!new_page) 2531 if (!new_page)
2535 return; 2532 return;
2536 2533
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 74ef0c6a25dd..7ce07d681265 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -994,23 +994,22 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
994 994
995#if defined(CONFIG_CMA) && defined(CONFIG_X86_64) 995#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
996static void destroy_compound_gigantic_page(struct page *page, 996static void destroy_compound_gigantic_page(struct page *page,
997 unsigned long order) 997 unsigned int order)
998{ 998{
999 int i; 999 int i;
1000 int nr_pages = 1 << order; 1000 int nr_pages = 1 << order;
1001 struct page *p = page + 1; 1001 struct page *p = page + 1;
1002 1002
1003 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { 1003 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
1004 __ClearPageTail(p); 1004 clear_compound_head(p);
1005 set_page_refcounted(p); 1005 set_page_refcounted(p);
1006 p->first_page = NULL;
1007 } 1006 }
1008 1007
1009 set_compound_order(page, 0); 1008 set_compound_order(page, 0);
1010 __ClearPageHead(page); 1009 __ClearPageHead(page);
1011} 1010}
1012 1011
1013static void free_gigantic_page(struct page *page, unsigned order) 1012static void free_gigantic_page(struct page *page, unsigned int order)
1014{ 1013{
1015 free_contig_range(page_to_pfn(page), 1 << order); 1014 free_contig_range(page_to_pfn(page), 1 << order);
1016} 1015}
@@ -1054,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
1054 return zone_spans_pfn(zone, last_pfn); 1053 return zone_spans_pfn(zone, last_pfn);
1055} 1054}
1056 1055
1057static struct page *alloc_gigantic_page(int nid, unsigned order) 1056static struct page *alloc_gigantic_page(int nid, unsigned int order)
1058{ 1057{
1059 unsigned long nr_pages = 1 << order; 1058 unsigned long nr_pages = 1 << order;
1060 unsigned long ret, pfn, flags; 1059 unsigned long ret, pfn, flags;
@@ -1090,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
1090} 1089}
1091 1090
1092static void prep_new_huge_page(struct hstate *h, struct page *page, int nid); 1091static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
1093static void prep_compound_gigantic_page(struct page *page, unsigned long order); 1092static void prep_compound_gigantic_page(struct page *page, unsigned int order);
1094 1093
1095static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid) 1094static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
1096{ 1095{
@@ -1123,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
1123static inline bool gigantic_page_supported(void) { return true; } 1122static inline bool gigantic_page_supported(void) { return true; }
1124#else 1123#else
1125static inline bool gigantic_page_supported(void) { return false; } 1124static inline bool gigantic_page_supported(void) { return false; }
1126static inline void free_gigantic_page(struct page *page, unsigned order) { } 1125static inline void free_gigantic_page(struct page *page, unsigned int order) { }
1127static inline void destroy_compound_gigantic_page(struct page *page, 1126static inline void destroy_compound_gigantic_page(struct page *page,
1128 unsigned long order) { } 1127 unsigned int order) { }
1129static inline int alloc_fresh_gigantic_page(struct hstate *h, 1128static inline int alloc_fresh_gigantic_page(struct hstate *h,
1130 nodemask_t *nodes_allowed) { return 0; } 1129 nodemask_t *nodes_allowed) { return 0; }
1131#endif 1130#endif
@@ -1146,7 +1145,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
1146 1 << PG_writeback); 1145 1 << PG_writeback);
1147 } 1146 }
1148 VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); 1147 VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
1149 set_compound_page_dtor(page, NULL); 1148 set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
1150 set_page_refcounted(page); 1149 set_page_refcounted(page);
1151 if (hstate_is_gigantic(h)) { 1150 if (hstate_is_gigantic(h)) {
1152 destroy_compound_gigantic_page(page, huge_page_order(h)); 1151 destroy_compound_gigantic_page(page, huge_page_order(h));
@@ -1242,7 +1241,7 @@ void free_huge_page(struct page *page)
1242static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) 1241static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
1243{ 1242{
1244 INIT_LIST_HEAD(&page->lru); 1243 INIT_LIST_HEAD(&page->lru);
1245 set_compound_page_dtor(page, free_huge_page); 1244 set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
1246 spin_lock(&hugetlb_lock); 1245 spin_lock(&hugetlb_lock);
1247 set_hugetlb_cgroup(page, NULL); 1246 set_hugetlb_cgroup(page, NULL);
1248 h->nr_huge_pages++; 1247 h->nr_huge_pages++;
@@ -1251,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
1251 put_page(page); /* free it into the hugepage allocator */ 1250 put_page(page); /* free it into the hugepage allocator */
1252} 1251}
1253 1252
1254static void prep_compound_gigantic_page(struct page *page, unsigned long order) 1253static void prep_compound_gigantic_page(struct page *page, unsigned int order)
1255{ 1254{
1256 int i; 1255 int i;
1257 int nr_pages = 1 << order; 1256 int nr_pages = 1 << order;
@@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
1276 */ 1275 */
1277 __ClearPageReserved(p); 1276 __ClearPageReserved(p);
1278 set_page_count(p, 0); 1277 set_page_count(p, 0);
1279 p->first_page = page; 1278 set_compound_head(p, page);
1280 /* Make sure p->first_page is always valid for PageTail() */
1281 smp_wmb();
1282 __SetPageTail(p);
1283 } 1279 }
1284} 1280}
1285 1281
@@ -1294,7 +1290,7 @@ int PageHuge(struct page *page)
1294 return 0; 1290 return 0;
1295 1291
1296 page = compound_head(page); 1292 page = compound_head(page);
1297 return get_compound_page_dtor(page) == free_huge_page; 1293 return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
1298} 1294}
1299EXPORT_SYMBOL_GPL(PageHuge); 1295EXPORT_SYMBOL_GPL(PageHuge);
1300 1296
@@ -1568,7 +1564,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
1568 if (page) { 1564 if (page) {
1569 INIT_LIST_HEAD(&page->lru); 1565 INIT_LIST_HEAD(&page->lru);
1570 r_nid = page_to_nid(page); 1566 r_nid = page_to_nid(page);
1571 set_compound_page_dtor(page, free_huge_page); 1567 set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
1572 set_hugetlb_cgroup(page, NULL); 1568 set_hugetlb_cgroup(page, NULL);
1573 /* 1569 /*
1574 * We incremented the global counters already 1570 * We incremented the global counters already
@@ -1972,7 +1968,8 @@ found:
1972 return 1; 1968 return 1;
1973} 1969}
1974 1970
1975static void __init prep_compound_huge_page(struct page *page, int order) 1971static void __init prep_compound_huge_page(struct page *page,
1972 unsigned int order)
1976{ 1973{
1977 if (unlikely(order > (MAX_ORDER - 1))) 1974 if (unlikely(order > (MAX_ORDER - 1)))
1978 prep_compound_gigantic_page(page, order); 1975 prep_compound_gigantic_page(page, order);
@@ -2683,7 +2680,7 @@ static int __init hugetlb_init(void)
2683module_init(hugetlb_init); 2680module_init(hugetlb_init);
2684 2681
2685/* Should be called on processing a hugepagesz=... option */ 2682/* Should be called on processing a hugepagesz=... option */
2686void __init hugetlb_add_hstate(unsigned order) 2683void __init hugetlb_add_hstate(unsigned int order)
2687{ 2684{
2688 struct hstate *h; 2685 struct hstate *h;
2689 unsigned long i; 2686 unsigned long i;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 33d59abe91f1..d8fb10de0f14 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void)
385 /* 385 /*
386 * Add cgroup control files only if the huge page consists 386 * Add cgroup control files only if the huge page consists
387 * of more than two normal pages. This is because we use 387 * of more than two normal pages. This is because we use
388 * page[2].lru.next for storing cgroup details. 388 * page[2].private for storing cgroup details.
389 */ 389 */
390 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 390 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
391 __hugetlb_cgroup_file_init(hstate_index(h)); 391 __hugetlb_cgroup_file_init(hstate_index(h));
diff --git a/mm/internal.h b/mm/internal.h
index d4b807d6c963..38e24b89e4c4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -14,6 +14,25 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16 16
17/*
18 * The set of flags that only affect watermark checking and reclaim
19 * behaviour. This is used by the MM to obey the caller constraints
20 * about IO, FS and watermark checking while ignoring placement
21 * hints such as HIGHMEM usage.
22 */
23#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
24 __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
25 __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
26
27/* The GFP flags allowed during early boot */
28#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
29
30/* Control allocation cpuset and node placement constraints */
31#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
32
33/* Do not use these with a slab allocator */
34#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
35
17void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, 36void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
18 unsigned long floor, unsigned long ceiling); 37 unsigned long floor, unsigned long ceiling);
19 38
@@ -61,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page,
61 * speculative page access (like in 80 * speculative page access (like in
62 * page_cache_get_speculative()) on tail pages. 81 * page_cache_get_speculative()) on tail pages.
63 */ 82 */
64 VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page); 83 VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
65 if (get_page_head) 84 if (get_page_head)
66 atomic_inc(&page->first_page->_count); 85 atomic_inc(&compound_head(page)->_count);
67 get_huge_page_tail(page); 86 get_huge_page_tail(page);
68} 87}
69 88
@@ -129,6 +148,7 @@ struct alloc_context {
129 int classzone_idx; 148 int classzone_idx;
130 int migratetype; 149 int migratetype;
131 enum zone_type high_zoneidx; 150 enum zone_type high_zoneidx;
151 bool spread_dirty_pages;
132}; 152};
133 153
134/* 154/*
@@ -157,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
157extern int __isolate_free_page(struct page *page, unsigned int order); 177extern int __isolate_free_page(struct page *page, unsigned int order);
158extern void __free_pages_bootmem(struct page *page, unsigned long pfn, 178extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
159 unsigned int order); 179 unsigned int order);
160extern void prep_compound_page(struct page *page, unsigned long order); 180extern void prep_compound_page(struct page *page, unsigned int order);
161#ifdef CONFIG_MEMORY_FAILURE 181#ifdef CONFIG_MEMORY_FAILURE
162extern bool is_free_buddy_page(struct page *page); 182extern bool is_free_buddy_page(struct page *page);
163#endif 183#endif
@@ -215,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
215 * page cannot be allocated or merged in parallel. Alternatively, it must 235 * page cannot be allocated or merged in parallel. Alternatively, it must
216 * handle invalid values gracefully, and use page_order_unsafe() below. 236 * handle invalid values gracefully, and use page_order_unsafe() below.
217 */ 237 */
218static inline unsigned long page_order(struct page *page) 238static inline unsigned int page_order(struct page *page)
219{ 239{
220 /* PageBuddy() must be checked by the caller */ 240 /* PageBuddy() must be checked by the caller */
221 return page_private(page); 241 return page_private(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc502e590366..9acfb165eb52 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2046,7 +2046,7 @@ retry:
2046 if (unlikely(task_in_memcg_oom(current))) 2046 if (unlikely(task_in_memcg_oom(current)))
2047 goto nomem; 2047 goto nomem;
2048 2048
2049 if (!(gfp_mask & __GFP_WAIT)) 2049 if (!gfpflags_allow_blocking(gfp_mask))
2050 goto nomem; 2050 goto nomem;
2051 2051
2052 mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); 2052 mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -2120,7 +2120,7 @@ done_restock:
2120 /* 2120 /*
2121 * If the hierarchy is above the normal consumption range, schedule 2121 * If the hierarchy is above the normal consumption range, schedule
2122 * reclaim on returning to userland. We can perform reclaim here 2122 * reclaim on returning to userland. We can perform reclaim here
2123 * if __GFP_WAIT but let's always punt for simplicity and so that 2123 * if __GFP_RECLAIM but let's always punt for simplicity and so that
2124 * GFP_KERNEL can consistently be used during reclaim. @memcg is 2124 * GFP_KERNEL can consistently be used during reclaim. @memcg is
2125 * not recorded as it most likely matches current's and won't 2125 * not recorded as it most likely matches current's and won't
2126 * change in the meantime. As high limit is checked again before 2126 * change in the meantime. As high limit is checked again before
@@ -2801,7 +2801,7 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
2801 return val; 2801 return val;
2802} 2802}
2803 2803
2804static inline unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) 2804static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
2805{ 2805{
2806 unsigned long val; 2806 unsigned long val;
2807 2807
@@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
4364{ 4364{
4365 int ret; 4365 int ret;
4366 4366
4367 /* Try a single bulk charge without reclaim first */ 4367 /* Try a single bulk charge without reclaim first, kswapd may wake */
4368 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); 4368 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
4369 if (!ret) { 4369 if (!ret) {
4370 mc.precharge += count; 4370 mc.precharge += count;
4371 return ret; 4371 return ret;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 16a0ec385320..8424b64711ac 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
776#define lru (1UL << PG_lru) 776#define lru (1UL << PG_lru)
777#define swapbacked (1UL << PG_swapbacked) 777#define swapbacked (1UL << PG_swapbacked)
778#define head (1UL << PG_head) 778#define head (1UL << PG_head)
779#define tail (1UL << PG_tail)
780#define compound (1UL << PG_compound)
781#define slab (1UL << PG_slab) 779#define slab (1UL << PG_slab)
782#define reserved (1UL << PG_reserved) 780#define reserved (1UL << PG_reserved)
783 781
@@ -800,12 +798,7 @@ static struct page_state {
800 */ 798 */
801 { slab, slab, MF_MSG_SLAB, me_kernel }, 799 { slab, slab, MF_MSG_SLAB, me_kernel },
802 800
803#ifdef CONFIG_PAGEFLAGS_EXTENDED
804 { head, head, MF_MSG_HUGE, me_huge_page }, 801 { head, head, MF_MSG_HUGE, me_huge_page },
805 { tail, tail, MF_MSG_HUGE, me_huge_page },
806#else
807 { compound, compound, MF_MSG_HUGE, me_huge_page },
808#endif
809 802
810 { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty }, 803 { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
811 { sc|dirty, sc, MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean }, 804 { sc|dirty, sc, MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
diff --git a/mm/mempool.c b/mm/mempool.c
index 4c533bc51d73..004d42b1dfaf 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
320 gfp_t gfp_temp; 320 gfp_t gfp_temp;
321 321
322 VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); 322 VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
323 might_sleep_if(gfp_mask & __GFP_WAIT); 323 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
324 324
325 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 325 gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
326 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 326 gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
327 gfp_mask |= __GFP_NOWARN; /* failures are OK */ 327 gfp_mask |= __GFP_NOWARN; /* failures are OK */
328 328
329 gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); 329 gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
330 330
331repeat_alloc: 331repeat_alloc:
332 332
@@ -349,7 +349,7 @@ repeat_alloc:
349 } 349 }
350 350
351 /* 351 /*
352 * We use gfp mask w/o __GFP_WAIT or IO for the first round. If 352 * We use gfp mask w/o direct reclaim or IO for the first round. If
353 * alloc failed with that and @pool was empty, retry immediately. 353 * alloc failed with that and @pool was empty, retry immediately.
354 */ 354 */
355 if (gfp_temp != gfp_mask) { 355 if (gfp_temp != gfp_mask) {
@@ -358,8 +358,8 @@ repeat_alloc:
358 goto repeat_alloc; 358 goto repeat_alloc;
359 } 359 }
360 360
361 /* We must not sleep if !__GFP_WAIT */ 361 /* We must not sleep if !__GFP_DIRECT_RECLAIM */
362 if (!(gfp_mask & __GFP_WAIT)) { 362 if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
363 spin_unlock_irqrestore(&pool->lock, flags); 363 spin_unlock_irqrestore(&pool->lock, flags);
364 return NULL; 364 return NULL;
365 } 365 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 2834faba719a..7890d0bb5e23 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
1578 (GFP_HIGHUSER_MOVABLE | 1578 (GFP_HIGHUSER_MOVABLE |
1579 __GFP_THISNODE | __GFP_NOMEMALLOC | 1579 __GFP_THISNODE | __GFP_NOMEMALLOC |
1580 __GFP_NORETRY | __GFP_NOWARN) & 1580 __GFP_NORETRY | __GFP_NOWARN) &
1581 ~GFP_IOFS, 0); 1581 ~(__GFP_IO | __GFP_FS), 0);
1582 1582
1583 return newpage; 1583 return newpage;
1584} 1584}
@@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1752 goto out_dropref; 1752 goto out_dropref;
1753 1753
1754 new_page = alloc_pages_node(node, 1754 new_page = alloc_pages_node(node,
1755 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, 1755 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
1756 HPAGE_PMD_ORDER); 1756 HPAGE_PMD_ORDER);
1757 if (!new_page) 1757 if (!new_page)
1758 goto out_fail; 1758 goto out_fail;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index e4778285d8d1..d13a33918fa2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -118,6 +118,15 @@ found:
118 return t; 118 return t;
119} 119}
120 120
121/*
122 * order == -1 means the oom kill is required by sysrq, otherwise only
123 * for display purposes.
124 */
125static inline bool is_sysrq_oom(struct oom_control *oc)
126{
127 return oc->order == -1;
128}
129
121/* return true if the task is not adequate as candidate victim task. */ 130/* return true if the task is not adequate as candidate victim task. */
122static bool oom_unkillable_task(struct task_struct *p, 131static bool oom_unkillable_task(struct task_struct *p,
123 struct mem_cgroup *memcg, const nodemask_t *nodemask) 132 struct mem_cgroup *memcg, const nodemask_t *nodemask)
@@ -265,7 +274,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
265 * Don't allow any other task to have access to the reserves. 274 * Don't allow any other task to have access to the reserves.
266 */ 275 */
267 if (test_tsk_thread_flag(task, TIF_MEMDIE)) { 276 if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
268 if (oc->order != -1) 277 if (!is_sysrq_oom(oc))
269 return OOM_SCAN_ABORT; 278 return OOM_SCAN_ABORT;
270 } 279 }
271 if (!task->mm) 280 if (!task->mm)
@@ -278,7 +287,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
278 if (oom_task_origin(task)) 287 if (oom_task_origin(task))
279 return OOM_SCAN_SELECT; 288 return OOM_SCAN_SELECT;
280 289
281 if (task_will_free_mem(task) && oc->order != -1) 290 if (task_will_free_mem(task) && !is_sysrq_oom(oc))
282 return OOM_SCAN_ABORT; 291 return OOM_SCAN_ABORT;
283 292
284 return OOM_SCAN_OK; 293 return OOM_SCAN_OK;
@@ -629,7 +638,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
629 return; 638 return;
630 } 639 }
631 /* Do not panic for oom kills triggered by sysrq */ 640 /* Do not panic for oom kills triggered by sysrq */
632 if (oc->order == -1) 641 if (is_sysrq_oom(oc))
633 return; 642 return;
634 dump_header(oc, NULL, memcg); 643 dump_header(oc, NULL, memcg);
635 panic("Out of memory: %s panic_on_oom is enabled\n", 644 panic("Out of memory: %s panic_on_oom is enabled\n",
@@ -709,7 +718,7 @@ bool out_of_memory(struct oom_control *oc)
709 718
710 p = select_bad_process(oc, &points, totalpages); 719 p = select_bad_process(oc, &points, totalpages);
711 /* Found nothing?!?! Either we hang forever, or we panic. */ 720 /* Found nothing?!?! Either we hang forever, or we panic. */
712 if (!p && oc->order != -1) { 721 if (!p && !is_sysrq_oom(oc)) {
713 dump_header(oc, NULL, NULL); 722 dump_header(oc, NULL, NULL);
714 panic("Out of memory and no killable processes...\n"); 723 panic("Out of memory and no killable processes...\n");
715 } 724 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 446bb36ee59d..208e4c7e771b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,19 +169,19 @@ void pm_restrict_gfp_mask(void)
169 WARN_ON(!mutex_is_locked(&pm_mutex)); 169 WARN_ON(!mutex_is_locked(&pm_mutex));
170 WARN_ON(saved_gfp_mask); 170 WARN_ON(saved_gfp_mask);
171 saved_gfp_mask = gfp_allowed_mask; 171 saved_gfp_mask = gfp_allowed_mask;
172 gfp_allowed_mask &= ~GFP_IOFS; 172 gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
173} 173}
174 174
175bool pm_suspended_storage(void) 175bool pm_suspended_storage(void)
176{ 176{
177 if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS) 177 if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
178 return false; 178 return false;
179 return true; 179 return true;
180} 180}
181#endif /* CONFIG_PM_SLEEP */ 181#endif /* CONFIG_PM_SLEEP */
182 182
183#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 183#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
184int pageblock_order __read_mostly; 184unsigned int pageblock_order __read_mostly;
185#endif 185#endif
186 186
187static void __free_pages_ok(struct page *page, unsigned int order); 187static void __free_pages_ok(struct page *page, unsigned int order);
@@ -229,6 +229,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
229#endif 229#endif
230}; 230};
231 231
232static void free_compound_page(struct page *page);
233compound_page_dtor * const compound_page_dtors[] = {
234 NULL,
235 free_compound_page,
236#ifdef CONFIG_HUGETLB_PAGE
237 free_huge_page,
238#endif
239};
240
232int min_free_kbytes = 1024; 241int min_free_kbytes = 1024;
233int user_min_free_kbytes = -1; 242int user_min_free_kbytes = -1;
234 243
@@ -436,15 +445,15 @@ out:
436/* 445/*
437 * Higher-order pages are called "compound pages". They are structured thusly: 446 * Higher-order pages are called "compound pages". They are structured thusly:
438 * 447 *
439 * The first PAGE_SIZE page is called the "head page". 448 * The first PAGE_SIZE page is called the "head page" and have PG_head set.
440 * 449 *
441 * The remaining PAGE_SIZE pages are called "tail pages". 450 * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
451 * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
442 * 452 *
443 * All pages have PG_compound set. All tail pages have their ->first_page 453 * The first tail page's ->compound_dtor holds the offset in array of compound
444 * pointing at the head page. 454 * page destructors. See compound_page_dtors.
445 * 455 *
446 * The first tail page's ->lru.next holds the address of the compound page's 456 * The first tail page's ->compound_order holds the order of allocation.
447 * put_page() function. Its ->lru.prev holds the order of allocation.
448 * This usage means that zero-order pages may not be compound. 457 * This usage means that zero-order pages may not be compound.
449 */ 458 */
450 459
@@ -453,21 +462,18 @@ static void free_compound_page(struct page *page)
453 __free_pages_ok(page, compound_order(page)); 462 __free_pages_ok(page, compound_order(page));
454} 463}
455 464
456void prep_compound_page(struct page *page, unsigned long order) 465void prep_compound_page(struct page *page, unsigned int order)
457{ 466{
458 int i; 467 int i;
459 int nr_pages = 1 << order; 468 int nr_pages = 1 << order;
460 469
461 set_compound_page_dtor(page, free_compound_page); 470 set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
462 set_compound_order(page, order); 471 set_compound_order(page, order);
463 __SetPageHead(page); 472 __SetPageHead(page);
464 for (i = 1; i < nr_pages; i++) { 473 for (i = 1; i < nr_pages; i++) {
465 struct page *p = page + i; 474 struct page *p = page + i;
466 set_page_count(p, 0); 475 set_page_count(p, 0);
467 p->first_page = page; 476 set_compound_head(p, page);
468 /* Make sure p->first_page is always valid for PageTail() */
469 smp_wmb();
470 __SetPageTail(p);
471 } 477 }
472} 478}
473 479
@@ -656,7 +662,7 @@ static inline void __free_one_page(struct page *page,
656 unsigned long combined_idx; 662 unsigned long combined_idx;
657 unsigned long uninitialized_var(buddy_idx); 663 unsigned long uninitialized_var(buddy_idx);
658 struct page *buddy; 664 struct page *buddy;
659 int max_order = MAX_ORDER; 665 unsigned int max_order = MAX_ORDER;
660 666
661 VM_BUG_ON(!zone_is_initialized(zone)); 667 VM_BUG_ON(!zone_is_initialized(zone));
662 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); 668 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -669,7 +675,7 @@ static inline void __free_one_page(struct page *page,
669 * pageblock. Without this, pageblock isolation 675 * pageblock. Without this, pageblock isolation
670 * could cause incorrect freepage accounting. 676 * could cause incorrect freepage accounting.
671 */ 677 */
672 max_order = min(MAX_ORDER, pageblock_order + 1); 678 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
673 } else { 679 } else {
674 __mod_zone_freepage_state(zone, 1 << order, migratetype); 680 __mod_zone_freepage_state(zone, 1 << order, migratetype);
675 } 681 }
@@ -817,7 +823,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
817 if (unlikely(has_isolate_pageblock(zone))) 823 if (unlikely(has_isolate_pageblock(zone)))
818 mt = get_pageblock_migratetype(page); 824 mt = get_pageblock_migratetype(page);
819 825
820 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
821 __free_one_page(page, page_to_pfn(page), zone, 0, mt); 826 __free_one_page(page, page_to_pfn(page), zone, 0, mt);
822 trace_mm_page_pcpu_drain(page, 0, mt); 827 trace_mm_page_pcpu_drain(page, 0, mt);
823 } while (--to_free && --batch_free && !list_empty(list)); 828 } while (--to_free && --batch_free && !list_empty(list));
@@ -846,17 +851,30 @@ static void free_one_page(struct zone *zone,
846 851
847static int free_tail_pages_check(struct page *head_page, struct page *page) 852static int free_tail_pages_check(struct page *head_page, struct page *page)
848{ 853{
849 if (!IS_ENABLED(CONFIG_DEBUG_VM)) 854 int ret = 1;
850 return 0; 855
856 /*
857 * We rely page->lru.next never has bit 0 set, unless the page
858 * is PageTail(). Let's make sure that's true even for poisoned ->lru.
859 */
860 BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
861
862 if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
863 ret = 0;
864 goto out;
865 }
851 if (unlikely(!PageTail(page))) { 866 if (unlikely(!PageTail(page))) {
852 bad_page(page, "PageTail not set", 0); 867 bad_page(page, "PageTail not set", 0);
853 return 1; 868 goto out;
854 } 869 }
855 if (unlikely(page->first_page != head_page)) { 870 if (unlikely(compound_head(page) != head_page)) {
856 bad_page(page, "first_page not consistent", 0); 871 bad_page(page, "compound_head not consistent", 0);
857 return 1; 872 goto out;
858 } 873 }
859 return 0; 874 ret = 0;
875out:
876 clear_compound_head(page);
877 return ret;
860} 878}
861 879
862static void __meminit __init_single_page(struct page *page, unsigned long pfn, 880static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -923,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
923 struct page *page = pfn_to_page(start_pfn); 941 struct page *page = pfn_to_page(start_pfn);
924 942
925 init_reserved_page(start_pfn); 943 init_reserved_page(start_pfn);
944
945 /* Avoid false-positive PageTail() */
946 INIT_LIST_HEAD(&page->lru);
947
926 SetPageReserved(page); 948 SetPageReserved(page);
927 } 949 }
928 } 950 }
@@ -1417,15 +1439,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
1417 * the free lists for the desirable migrate type are depleted 1439 * the free lists for the desirable migrate type are depleted
1418 */ 1440 */
1419static int fallbacks[MIGRATE_TYPES][4] = { 1441static int fallbacks[MIGRATE_TYPES][4] = {
1420 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 1442 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
1421 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 1443 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
1422 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, 1444 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
1423#ifdef CONFIG_CMA 1445#ifdef CONFIG_CMA
1424 [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ 1446 [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */
1425#endif 1447#endif
1426 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
1427#ifdef CONFIG_MEMORY_ISOLATION 1448#ifdef CONFIG_MEMORY_ISOLATION
1428 [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ 1449 [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */
1429#endif 1450#endif
1430}; 1451};
1431 1452
@@ -1450,7 +1471,7 @@ int move_freepages(struct zone *zone,
1450 int migratetype) 1471 int migratetype)
1451{ 1472{
1452 struct page *page; 1473 struct page *page;
1453 unsigned long order; 1474 unsigned int order;
1454 int pages_moved = 0; 1475 int pages_moved = 0;
1455 1476
1456#ifndef CONFIG_HOLES_IN_ZONE 1477#ifndef CONFIG_HOLES_IN_ZONE
@@ -1563,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
1563static void steal_suitable_fallback(struct zone *zone, struct page *page, 1584static void steal_suitable_fallback(struct zone *zone, struct page *page,
1564 int start_type) 1585 int start_type)
1565{ 1586{
1566 int current_order = page_order(page); 1587 unsigned int current_order = page_order(page);
1567 int pages; 1588 int pages;
1568 1589
1569 /* Take ownership for orders >= pageblock_order */ 1590 /* Take ownership for orders >= pageblock_order */
@@ -1598,7 +1619,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
1598 *can_steal = false; 1619 *can_steal = false;
1599 for (i = 0;; i++) { 1620 for (i = 0;; i++) {
1600 fallback_mt = fallbacks[migratetype][i]; 1621 fallback_mt = fallbacks[migratetype][i];
1601 if (fallback_mt == MIGRATE_RESERVE) 1622 if (fallback_mt == MIGRATE_TYPES)
1602 break; 1623 break;
1603 1624
1604 if (list_empty(&area->free_list[fallback_mt])) 1625 if (list_empty(&area->free_list[fallback_mt]))
@@ -1617,6 +1638,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
1617 return -1; 1638 return -1;
1618} 1639}
1619 1640
1641/*
1642 * Reserve a pageblock for exclusive use of high-order atomic allocations if
1643 * there are no empty page blocks that contain a page with a suitable order
1644 */
1645static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
1646 unsigned int alloc_order)
1647{
1648 int mt;
1649 unsigned long max_managed, flags;
1650
1651 /*
1652 * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
1653 * Check is race-prone but harmless.
1654 */
1655 max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
1656 if (zone->nr_reserved_highatomic >= max_managed)
1657 return;
1658
1659 spin_lock_irqsave(&zone->lock, flags);
1660
1661 /* Recheck the nr_reserved_highatomic limit under the lock */
1662 if (zone->nr_reserved_highatomic >= max_managed)
1663 goto out_unlock;
1664
1665 /* Yoink! */
1666 mt = get_pageblock_migratetype(page);
1667 if (mt != MIGRATE_HIGHATOMIC &&
1668 !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
1669 zone->nr_reserved_highatomic += pageblock_nr_pages;
1670 set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
1671 move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
1672 }
1673
1674out_unlock:
1675 spin_unlock_irqrestore(&zone->lock, flags);
1676}
1677
1678/*
1679 * Used when an allocation is about to fail under memory pressure. This
1680 * potentially hurts the reliability of high-order allocations when under
1681 * intense memory pressure but failed atomic allocations should be easier
1682 * to recover from than an OOM.
1683 */
1684static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
1685{
1686 struct zonelist *zonelist = ac->zonelist;
1687 unsigned long flags;
1688 struct zoneref *z;
1689 struct zone *zone;
1690 struct page *page;
1691 int order;
1692
1693 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
1694 ac->nodemask) {
1695 /* Preserve at least one pageblock */
1696 if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
1697 continue;
1698
1699 spin_lock_irqsave(&zone->lock, flags);
1700 for (order = 0; order < MAX_ORDER; order++) {
1701 struct free_area *area = &(zone->free_area[order]);
1702
1703 if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
1704 continue;
1705
1706 page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
1707 struct page, lru);
1708
1709 /*
1710 * It should never happen but changes to locking could
1711 * inadvertently allow a per-cpu drain to add pages
1712 * to MIGRATE_HIGHATOMIC while unreserving so be safe
1713 * and watch for underflows.
1714 */
1715 zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
1716 zone->nr_reserved_highatomic);
1717
1718 /*
1719 * Convert to ac->migratetype and avoid the normal
1720 * pageblock stealing heuristics. Minimally, the caller
1721 * is doing the work and needs the pages. More
1722 * importantly, if the block was always converted to
1723 * MIGRATE_UNMOVABLE or another type then the number
1724 * of pageblocks that cannot be completely freed
1725 * may increase.
1726 */
1727 set_pageblock_migratetype(page, ac->migratetype);
1728 move_freepages_block(zone, page, ac->migratetype);
1729 spin_unlock_irqrestore(&zone->lock, flags);
1730 return;
1731 }
1732 spin_unlock_irqrestore(&zone->lock, flags);
1733 }
1734}
1735
1620/* Remove an element from the buddy allocator from the fallback list */ 1736/* Remove an element from the buddy allocator from the fallback list */
1621static inline struct page * 1737static inline struct page *
1622__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) 1738__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1672,29 +1788,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
1672 * Call me with the zone->lock already held. 1788 * Call me with the zone->lock already held.
1673 */ 1789 */
1674static struct page *__rmqueue(struct zone *zone, unsigned int order, 1790static struct page *__rmqueue(struct zone *zone, unsigned int order,
1675 int migratetype) 1791 int migratetype, gfp_t gfp_flags)
1676{ 1792{
1677 struct page *page; 1793 struct page *page;
1678 1794
1679retry_reserve:
1680 page = __rmqueue_smallest(zone, order, migratetype); 1795 page = __rmqueue_smallest(zone, order, migratetype);
1681 1796 if (unlikely(!page)) {
1682 if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
1683 if (migratetype == MIGRATE_MOVABLE) 1797 if (migratetype == MIGRATE_MOVABLE)
1684 page = __rmqueue_cma_fallback(zone, order); 1798 page = __rmqueue_cma_fallback(zone, order);
1685 1799
1686 if (!page) 1800 if (!page)
1687 page = __rmqueue_fallback(zone, order, migratetype); 1801 page = __rmqueue_fallback(zone, order, migratetype);
1688
1689 /*
1690 * Use MIGRATE_RESERVE rather than fail an allocation. goto
1691 * is used because __rmqueue_smallest is an inline function
1692 * and we want just one call site
1693 */
1694 if (!page) {
1695 migratetype = MIGRATE_RESERVE;
1696 goto retry_reserve;
1697 }
1698 } 1802 }
1699 1803
1700 trace_mm_page_alloc_zone_locked(page, order, migratetype); 1804 trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -1714,7 +1818,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1714 1818
1715 spin_lock(&zone->lock); 1819 spin_lock(&zone->lock);
1716 for (i = 0; i < count; ++i) { 1820 for (i = 0; i < count; ++i) {
1717 struct page *page = __rmqueue(zone, order, migratetype); 1821 struct page *page = __rmqueue(zone, order, migratetype, 0);
1718 if (unlikely(page == NULL)) 1822 if (unlikely(page == NULL))
1719 break; 1823 break;
1720 1824
@@ -2086,7 +2190,7 @@ int split_free_page(struct page *page)
2086static inline 2190static inline
2087struct page *buffered_rmqueue(struct zone *preferred_zone, 2191struct page *buffered_rmqueue(struct zone *preferred_zone,
2088 struct zone *zone, unsigned int order, 2192 struct zone *zone, unsigned int order,
2089 gfp_t gfp_flags, int migratetype) 2193 gfp_t gfp_flags, int alloc_flags, int migratetype)
2090{ 2194{
2091 unsigned long flags; 2195 unsigned long flags;
2092 struct page *page; 2196 struct page *page;
@@ -2129,7 +2233,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
2129 WARN_ON_ONCE(order > 1); 2233 WARN_ON_ONCE(order > 1);
2130 } 2234 }
2131 spin_lock_irqsave(&zone->lock, flags); 2235 spin_lock_irqsave(&zone->lock, flags);
2132 page = __rmqueue(zone, order, migratetype); 2236
2237 page = NULL;
2238 if (alloc_flags & ALLOC_HARDER) {
2239 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
2240 if (page)
2241 trace_mm_page_alloc_zone_locked(page, order, migratetype);
2242 }
2243 if (!page)
2244 page = __rmqueue(zone, order, migratetype, gfp_flags);
2133 spin_unlock(&zone->lock); 2245 spin_unlock(&zone->lock);
2134 if (!page) 2246 if (!page)
2135 goto failed; 2247 goto failed;
@@ -2160,11 +2272,11 @@ static struct {
2160 struct fault_attr attr; 2272 struct fault_attr attr;
2161 2273
2162 bool ignore_gfp_highmem; 2274 bool ignore_gfp_highmem;
2163 bool ignore_gfp_wait; 2275 bool ignore_gfp_reclaim;
2164 u32 min_order; 2276 u32 min_order;
2165} fail_page_alloc = { 2277} fail_page_alloc = {
2166 .attr = FAULT_ATTR_INITIALIZER, 2278 .attr = FAULT_ATTR_INITIALIZER,
2167 .ignore_gfp_wait = true, 2279 .ignore_gfp_reclaim = true,
2168 .ignore_gfp_highmem = true, 2280 .ignore_gfp_highmem = true,
2169 .min_order = 1, 2281 .min_order = 1,
2170}; 2282};
@@ -2183,7 +2295,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
2183 return false; 2295 return false;
2184 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) 2296 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
2185 return false; 2297 return false;
2186 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT)) 2298 if (fail_page_alloc.ignore_gfp_reclaim &&
2299 (gfp_mask & __GFP_DIRECT_RECLAIM))
2187 return false; 2300 return false;
2188 2301
2189 return should_fail(&fail_page_alloc.attr, 1 << order); 2302 return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2202,7 +2315,7 @@ static int __init fail_page_alloc_debugfs(void)
2202 return PTR_ERR(dir); 2315 return PTR_ERR(dir);
2203 2316
2204 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, 2317 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
2205 &fail_page_alloc.ignore_gfp_wait)) 2318 &fail_page_alloc.ignore_gfp_reclaim))
2206 goto fail; 2319 goto fail;
2207 if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, 2320 if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
2208 &fail_page_alloc.ignore_gfp_highmem)) 2321 &fail_page_alloc.ignore_gfp_highmem))
@@ -2232,42 +2345,77 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
2232#endif /* CONFIG_FAIL_PAGE_ALLOC */ 2345#endif /* CONFIG_FAIL_PAGE_ALLOC */
2233 2346
2234/* 2347/*
2235 * Return true if free pages are above 'mark'. This takes into account the order 2348 * Return true if free base pages are above 'mark'. For high-order checks it
2236 * of the allocation. 2349 * will return true of the order-0 watermark is reached and there is at least
2350 * one free page of a suitable size. Checking now avoids taking the zone lock
2351 * to check in the allocation paths if no pages are free.
2237 */ 2352 */
2238static bool __zone_watermark_ok(struct zone *z, unsigned int order, 2353static bool __zone_watermark_ok(struct zone *z, unsigned int order,
2239 unsigned long mark, int classzone_idx, int alloc_flags, 2354 unsigned long mark, int classzone_idx, int alloc_flags,
2240 long free_pages) 2355 long free_pages)
2241{ 2356{
2242 /* free_pages may go negative - that's OK */
2243 long min = mark; 2357 long min = mark;
2244 int o; 2358 int o;
2245 long free_cma = 0; 2359 const int alloc_harder = (alloc_flags & ALLOC_HARDER);
2246 2360
2361 /* free_pages may go negative - that's OK */
2247 free_pages -= (1 << order) - 1; 2362 free_pages -= (1 << order) - 1;
2363
2248 if (alloc_flags & ALLOC_HIGH) 2364 if (alloc_flags & ALLOC_HIGH)
2249 min -= min / 2; 2365 min -= min / 2;
2250 if (alloc_flags & ALLOC_HARDER) 2366
2367 /*
2368 * If the caller does not have rights to ALLOC_HARDER then subtract
2369 * the high-atomic reserves. This will over-estimate the size of the
2370 * atomic reserve but it avoids a search.
2371 */
2372 if (likely(!alloc_harder))
2373 free_pages -= z->nr_reserved_highatomic;
2374 else
2251 min -= min / 4; 2375 min -= min / 4;
2376
2252#ifdef CONFIG_CMA 2377#ifdef CONFIG_CMA
2253 /* If allocation can't use CMA areas don't use free CMA pages */ 2378 /* If allocation can't use CMA areas don't use free CMA pages */
2254 if (!(alloc_flags & ALLOC_CMA)) 2379 if (!(alloc_flags & ALLOC_CMA))
2255 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); 2380 free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
2256#endif 2381#endif
2257 2382
2258 if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) 2383 /*
2384 * Check watermarks for an order-0 allocation request. If these
2385 * are not met, then a high-order request also cannot go ahead
2386 * even if a suitable page happened to be free.
2387 */
2388 if (free_pages <= min + z->lowmem_reserve[classzone_idx])
2259 return false; 2389 return false;
2260 for (o = 0; o < order; o++) {
2261 /* At the next order, this order's pages become unavailable */
2262 free_pages -= z->free_area[o].nr_free << o;
2263 2390
2264 /* Require fewer higher order pages to be free */ 2391 /* If this is an order-0 request then the watermark is fine */
2265 min >>= 1; 2392 if (!order)
2393 return true;
2394
2395 /* For a high-order request, check at least one suitable page is free */
2396 for (o = order; o < MAX_ORDER; o++) {
2397 struct free_area *area = &z->free_area[o];
2398 int mt;
2399
2400 if (!area->nr_free)
2401 continue;
2402
2403 if (alloc_harder)
2404 return true;
2405
2406 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
2407 if (!list_empty(&area->free_list[mt]))
2408 return true;
2409 }
2266 2410
2267 if (free_pages <= min) 2411#ifdef CONFIG_CMA
2268 return false; 2412 if ((alloc_flags & ALLOC_CMA) &&
2413 !list_empty(&area->free_list[MIGRATE_CMA])) {
2414 return true;
2415 }
2416#endif
2269 } 2417 }
2270 return true; 2418 return false;
2271} 2419}
2272 2420
2273bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, 2421bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
@@ -2278,134 +2426,18 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
2278} 2426}
2279 2427
2280bool zone_watermark_ok_safe(struct zone *z, unsigned int order, 2428bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
2281 unsigned long mark, int classzone_idx, int alloc_flags) 2429 unsigned long mark, int classzone_idx)
2282{ 2430{
2283 long free_pages = zone_page_state(z, NR_FREE_PAGES); 2431 long free_pages = zone_page_state(z, NR_FREE_PAGES);
2284 2432
2285 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) 2433 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
2286 free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); 2434 free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
2287 2435
2288 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, 2436 return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
2289 free_pages); 2437 free_pages);
2290} 2438}
2291 2439
2292#ifdef CONFIG_NUMA 2440#ifdef CONFIG_NUMA
2293/*
2294 * zlc_setup - Setup for "zonelist cache". Uses cached zone data to
2295 * skip over zones that are not allowed by the cpuset, or that have
2296 * been recently (in last second) found to be nearly full. See further
2297 * comments in mmzone.h. Reduces cache footprint of zonelist scans
2298 * that have to skip over a lot of full or unallowed zones.
2299 *
2300 * If the zonelist cache is present in the passed zonelist, then
2301 * returns a pointer to the allowed node mask (either the current
2302 * tasks mems_allowed, or node_states[N_MEMORY].)
2303 *
2304 * If the zonelist cache is not available for this zonelist, does
2305 * nothing and returns NULL.
2306 *
2307 * If the fullzones BITMAP in the zonelist cache is stale (more than
2308 * a second since last zap'd) then we zap it out (clear its bits.)
2309 *
2310 * We hold off even calling zlc_setup, until after we've checked the
2311 * first zone in the zonelist, on the theory that most allocations will
2312 * be satisfied from that first zone, so best to examine that zone as
2313 * quickly as we can.
2314 */
2315static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
2316{
2317 struct zonelist_cache *zlc; /* cached zonelist speedup info */
2318 nodemask_t *allowednodes; /* zonelist_cache approximation */
2319
2320 zlc = zonelist->zlcache_ptr;
2321 if (!zlc)
2322 return NULL;
2323
2324 if (time_after(jiffies, zlc->last_full_zap + HZ)) {
2325 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2326 zlc->last_full_zap = jiffies;
2327 }
2328
2329 allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
2330 &cpuset_current_mems_allowed :
2331 &node_states[N_MEMORY];
2332 return allowednodes;
2333}
2334
2335/*
2336 * Given 'z' scanning a zonelist, run a couple of quick checks to see
2337 * if it is worth looking at further for free memory:
2338 * 1) Check that the zone isn't thought to be full (doesn't have its
2339 * bit set in the zonelist_cache fullzones BITMAP).
2340 * 2) Check that the zones node (obtained from the zonelist_cache
2341 * z_to_n[] mapping) is allowed in the passed in allowednodes mask.
2342 * Return true (non-zero) if zone is worth looking at further, or
2343 * else return false (zero) if it is not.
2344 *
2345 * This check -ignores- the distinction between various watermarks,
2346 * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is
2347 * found to be full for any variation of these watermarks, it will
2348 * be considered full for up to one second by all requests, unless
2349 * we are so low on memory on all allowed nodes that we are forced
2350 * into the second scan of the zonelist.
2351 *
2352 * In the second scan we ignore this zonelist cache and exactly
2353 * apply the watermarks to all zones, even it is slower to do so.
2354 * We are low on memory in the second scan, and should leave no stone
2355 * unturned looking for a free page.
2356 */
2357static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
2358 nodemask_t *allowednodes)
2359{
2360 struct zonelist_cache *zlc; /* cached zonelist speedup info */
2361 int i; /* index of *z in zonelist zones */
2362 int n; /* node that zone *z is on */
2363
2364 zlc = zonelist->zlcache_ptr;
2365 if (!zlc)
2366 return 1;
2367
2368 i = z - zonelist->_zonerefs;
2369 n = zlc->z_to_n[i];
2370
2371 /* This zone is worth trying if it is allowed but not full */
2372 return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);
2373}
2374
2375/*
2376 * Given 'z' scanning a zonelist, set the corresponding bit in
2377 * zlc->fullzones, so that subsequent attempts to allocate a page
2378 * from that zone don't waste time re-examining it.
2379 */
2380static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
2381{
2382 struct zonelist_cache *zlc; /* cached zonelist speedup info */
2383 int i; /* index of *z in zonelist zones */
2384
2385 zlc = zonelist->zlcache_ptr;
2386 if (!zlc)
2387 return;
2388
2389 i = z - zonelist->_zonerefs;
2390
2391 set_bit(i, zlc->fullzones);
2392}
2393
2394/*
2395 * clear all zones full, called after direct reclaim makes progress so that
2396 * a zone that was recently full is not skipped over for up to a second
2397 */
2398static void zlc_clear_zones_full(struct zonelist *zonelist)
2399{
2400 struct zonelist_cache *zlc; /* cached zonelist speedup info */
2401
2402 zlc = zonelist->zlcache_ptr;
2403 if (!zlc)
2404 return;
2405
2406 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2407}
2408
2409static bool zone_local(struct zone *local_zone, struct zone *zone) 2441static bool zone_local(struct zone *local_zone, struct zone *zone)
2410{ 2442{
2411 return local_zone->node == zone->node; 2443 return local_zone->node == zone->node;
@@ -2416,28 +2448,7 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
2416 return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < 2448 return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
2417 RECLAIM_DISTANCE; 2449 RECLAIM_DISTANCE;
2418} 2450}
2419
2420#else /* CONFIG_NUMA */ 2451#else /* CONFIG_NUMA */
2421
2422static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
2423{
2424 return NULL;
2425}
2426
2427static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
2428 nodemask_t *allowednodes)
2429{
2430 return 1;
2431}
2432
2433static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
2434{
2435}
2436
2437static void zlc_clear_zones_full(struct zonelist *zonelist)
2438{
2439}
2440
2441static bool zone_local(struct zone *local_zone, struct zone *zone) 2452static bool zone_local(struct zone *local_zone, struct zone *zone)
2442{ 2453{
2443 return true; 2454 return true;
@@ -2447,7 +2458,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
2447{ 2458{
2448 return true; 2459 return true;
2449} 2460}
2450
2451#endif /* CONFIG_NUMA */ 2461#endif /* CONFIG_NUMA */
2452 2462
2453static void reset_alloc_batches(struct zone *preferred_zone) 2463static void reset_alloc_batches(struct zone *preferred_zone)
@@ -2474,11 +2484,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
2474 struct zoneref *z; 2484 struct zoneref *z;
2475 struct page *page = NULL; 2485 struct page *page = NULL;
2476 struct zone *zone; 2486 struct zone *zone;
2477 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
2478 int zlc_active = 0; /* set if using zonelist_cache */
2479 int did_zlc_setup = 0; /* just call zlc_setup() one time */
2480 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
2481 (gfp_mask & __GFP_WRITE);
2482 int nr_fair_skipped = 0; 2487 int nr_fair_skipped = 0;
2483 bool zonelist_rescan; 2488 bool zonelist_rescan;
2484 2489
@@ -2493,9 +2498,6 @@ zonelist_scan:
2493 ac->nodemask) { 2498 ac->nodemask) {
2494 unsigned long mark; 2499 unsigned long mark;
2495 2500
2496 if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
2497 !zlc_zone_worth_trying(zonelist, z, allowednodes))
2498 continue;
2499 if (cpusets_enabled() && 2501 if (cpusets_enabled() &&
2500 (alloc_flags & ALLOC_CPUSET) && 2502 (alloc_flags & ALLOC_CPUSET) &&
2501 !cpuset_zone_allowed(zone, gfp_mask)) 2503 !cpuset_zone_allowed(zone, gfp_mask))
@@ -2533,14 +2535,14 @@ zonelist_scan:
2533 * 2535 *
2534 * XXX: For now, allow allocations to potentially 2536 * XXX: For now, allow allocations to potentially
2535 * exceed the per-zone dirty limit in the slowpath 2537 * exceed the per-zone dirty limit in the slowpath
2536 * (ALLOC_WMARK_LOW unset) before going into reclaim, 2538 * (spread_dirty_pages unset) before going into reclaim,
2537 * which is important when on a NUMA setup the allowed 2539 * which is important when on a NUMA setup the allowed
2538 * zones are together not big enough to reach the 2540 * zones are together not big enough to reach the
2539 * global limit. The proper fix for these situations 2541 * global limit. The proper fix for these situations
2540 * will require awareness of zones in the 2542 * will require awareness of zones in the
2541 * dirty-throttling and the flusher threads. 2543 * dirty-throttling and the flusher threads.
2542 */ 2544 */
2543 if (consider_zone_dirty && !zone_dirty_ok(zone)) 2545 if (ac->spread_dirty_pages && !zone_dirty_ok(zone))
2544 continue; 2546 continue;
2545 2547
2546 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; 2548 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
@@ -2553,28 +2555,8 @@ zonelist_scan:
2553 if (alloc_flags & ALLOC_NO_WATERMARKS) 2555 if (alloc_flags & ALLOC_NO_WATERMARKS)
2554 goto try_this_zone; 2556 goto try_this_zone;
2555 2557
2556 if (IS_ENABLED(CONFIG_NUMA) &&
2557 !did_zlc_setup && nr_online_nodes > 1) {
2558 /*
2559 * we do zlc_setup if there are multiple nodes
2560 * and before considering the first zone allowed
2561 * by the cpuset.
2562 */
2563 allowednodes = zlc_setup(zonelist, alloc_flags);
2564 zlc_active = 1;
2565 did_zlc_setup = 1;
2566 }
2567
2568 if (zone_reclaim_mode == 0 || 2558 if (zone_reclaim_mode == 0 ||
2569 !zone_allows_reclaim(ac->preferred_zone, zone)) 2559 !zone_allows_reclaim(ac->preferred_zone, zone))
2570 goto this_zone_full;
2571
2572 /*
2573 * As we may have just activated ZLC, check if the first
2574 * eligible zone has failed zone_reclaim recently.
2575 */
2576 if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
2577 !zlc_zone_worth_trying(zonelist, z, allowednodes))
2578 continue; 2560 continue;
2579 2561
2580 ret = zone_reclaim(zone, gfp_mask, order); 2562 ret = zone_reclaim(zone, gfp_mask, order);
@@ -2591,34 +2573,26 @@ zonelist_scan:
2591 ac->classzone_idx, alloc_flags)) 2573 ac->classzone_idx, alloc_flags))
2592 goto try_this_zone; 2574 goto try_this_zone;
2593 2575
2594 /*
2595 * Failed to reclaim enough to meet watermark.
2596 * Only mark the zone full if checking the min
2597 * watermark or if we failed to reclaim just
2598 * 1<<order pages or else the page allocator
2599 * fastpath will prematurely mark zones full
2600 * when the watermark is between the low and
2601 * min watermarks.
2602 */
2603 if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) ||
2604 ret == ZONE_RECLAIM_SOME)
2605 goto this_zone_full;
2606
2607 continue; 2576 continue;
2608 } 2577 }
2609 } 2578 }
2610 2579
2611try_this_zone: 2580try_this_zone:
2612 page = buffered_rmqueue(ac->preferred_zone, zone, order, 2581 page = buffered_rmqueue(ac->preferred_zone, zone, order,
2613 gfp_mask, ac->migratetype); 2582 gfp_mask, alloc_flags, ac->migratetype);
2614 if (page) { 2583 if (page) {
2615 if (prep_new_page(page, order, gfp_mask, alloc_flags)) 2584 if (prep_new_page(page, order, gfp_mask, alloc_flags))
2616 goto try_this_zone; 2585 goto try_this_zone;
2586
2587 /*
2588 * If this is a high-order atomic allocation then check
2589 * if the pageblock should be reserved for the future
2590 */
2591 if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
2592 reserve_highatomic_pageblock(page, zone, order);
2593
2617 return page; 2594 return page;
2618 } 2595 }
2619this_zone_full:
2620 if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
2621 zlc_mark_zone_full(zonelist, z);
2622 } 2596 }
2623 2597
2624 /* 2598 /*
@@ -2639,12 +2613,6 @@ this_zone_full:
2639 zonelist_rescan = true; 2613 zonelist_rescan = true;
2640 } 2614 }
2641 2615
2642 if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
2643 /* Disable zlc cache for second zonelist scan */
2644 zlc_active = 0;
2645 zonelist_rescan = true;
2646 }
2647
2648 if (zonelist_rescan) 2616 if (zonelist_rescan)
2649 goto zonelist_scan; 2617 goto zonelist_scan;
2650 2618
@@ -2669,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
2669 DEFAULT_RATELIMIT_INTERVAL, 2637 DEFAULT_RATELIMIT_INTERVAL,
2670 DEFAULT_RATELIMIT_BURST); 2638 DEFAULT_RATELIMIT_BURST);
2671 2639
2672void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) 2640void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
2673{ 2641{
2674 unsigned int filter = SHOW_MEM_FILTER_NODES; 2642 unsigned int filter = SHOW_MEM_FILTER_NODES;
2675 2643
@@ -2686,7 +2654,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
2686 if (test_thread_flag(TIF_MEMDIE) || 2654 if (test_thread_flag(TIF_MEMDIE) ||
2687 (current->flags & (PF_MEMALLOC | PF_EXITING))) 2655 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2688 filter &= ~SHOW_MEM_FILTER_NODES; 2656 filter &= ~SHOW_MEM_FILTER_NODES;
2689 if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) 2657 if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
2690 filter &= ~SHOW_MEM_FILTER_NODES; 2658 filter &= ~SHOW_MEM_FILTER_NODES;
2691 2659
2692 if (fmt) { 2660 if (fmt) {
@@ -2703,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
2703 va_end(args); 2671 va_end(args);
2704 } 2672 }
2705 2673
2706 pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n", 2674 pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
2707 current->comm, order, gfp_mask); 2675 current->comm, order, gfp_mask);
2708 2676
2709 dump_stack(); 2677 dump_stack();
@@ -2889,19 +2857,17 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
2889 if (unlikely(!(*did_some_progress))) 2857 if (unlikely(!(*did_some_progress)))
2890 return NULL; 2858 return NULL;
2891 2859
2892 /* After successful reclaim, reconsider all zones for allocation */
2893 if (IS_ENABLED(CONFIG_NUMA))
2894 zlc_clear_zones_full(ac->zonelist);
2895
2896retry: 2860retry:
2897 page = get_page_from_freelist(gfp_mask, order, 2861 page = get_page_from_freelist(gfp_mask, order,
2898 alloc_flags & ~ALLOC_NO_WATERMARKS, ac); 2862 alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
2899 2863
2900 /* 2864 /*
2901 * If an allocation failed after direct reclaim, it could be because 2865 * If an allocation failed after direct reclaim, it could be because
2902 * pages are pinned on the per-cpu lists. Drain them and try again 2866 * pages are pinned on the per-cpu lists or in high alloc reserves.
2867 * Shrink them them and try again
2903 */ 2868 */
2904 if (!page && !drained) { 2869 if (!page && !drained) {
2870 unreserve_highatomic_pageblock(ac);
2905 drain_all_pages(NULL); 2871 drain_all_pages(NULL);
2906 drained = true; 2872 drained = true;
2907 goto retry; 2873 goto retry;
@@ -2946,7 +2912,6 @@ static inline int
2946gfp_to_alloc_flags(gfp_t gfp_mask) 2912gfp_to_alloc_flags(gfp_t gfp_mask)
2947{ 2913{
2948 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; 2914 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
2949 const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
2950 2915
2951 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ 2916 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
2952 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); 2917 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2955,11 +2920,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2955 * The caller may dip into page reserves a bit more if the caller 2920 * The caller may dip into page reserves a bit more if the caller
2956 * cannot run direct reclaim, or if the caller has realtime scheduling 2921 * cannot run direct reclaim, or if the caller has realtime scheduling
2957 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will 2922 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
2958 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). 2923 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
2959 */ 2924 */
2960 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); 2925 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
2961 2926
2962 if (atomic) { 2927 if (gfp_mask & __GFP_ATOMIC) {
2963 /* 2928 /*
2964 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even 2929 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
2965 * if it can't schedule. 2930 * if it can't schedule.
@@ -2996,11 +2961,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
2996 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); 2961 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
2997} 2962}
2998 2963
2964static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
2965{
2966 return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
2967}
2968
2999static inline struct page * 2969static inline struct page *
3000__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, 2970__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3001 struct alloc_context *ac) 2971 struct alloc_context *ac)
3002{ 2972{
3003 const gfp_t wait = gfp_mask & __GFP_WAIT; 2973 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
3004 struct page *page = NULL; 2974 struct page *page = NULL;
3005 int alloc_flags; 2975 int alloc_flags;
3006 unsigned long pages_reclaimed = 0; 2976 unsigned long pages_reclaimed = 0;
@@ -3021,15 +2991,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3021 } 2991 }
3022 2992
3023 /* 2993 /*
2994 * We also sanity check to catch abuse of atomic reserves being used by
2995 * callers that are not in atomic context.
2996 */
2997 if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
2998 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
2999 gfp_mask &= ~__GFP_ATOMIC;
3000
3001 /*
3024 * If this allocation cannot block and it is for a specific node, then 3002 * If this allocation cannot block and it is for a specific node, then
3025 * fail early. There's no need to wakeup kswapd or retry for a 3003 * fail early. There's no need to wakeup kswapd or retry for a
3026 * speculative node-specific allocation. 3004 * speculative node-specific allocation.
3027 */ 3005 */
3028 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) 3006 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
3029 goto nopage; 3007 goto nopage;
3030 3008
3031retry: 3009retry:
3032 if (!(gfp_mask & __GFP_NO_KSWAPD)) 3010 if (gfp_mask & __GFP_KSWAPD_RECLAIM)
3033 wake_all_kswapds(order, ac); 3011 wake_all_kswapds(order, ac);
3034 3012
3035 /* 3013 /*
@@ -3072,8 +3050,8 @@ retry:
3072 } 3050 }
3073 } 3051 }
3074 3052
3075 /* Atomic allocations - we can't balance anything */ 3053 /* Caller is not willing to reclaim, we can't balance anything */
3076 if (!wait) { 3054 if (!can_direct_reclaim) {
3077 /* 3055 /*
3078 * All existing users of the deprecated __GFP_NOFAIL are 3056 * All existing users of the deprecated __GFP_NOFAIL are
3079 * blockable, so warn of any new users that actually allow this 3057 * blockable, so warn of any new users that actually allow this
@@ -3103,7 +3081,7 @@ retry:
3103 goto got_pg; 3081 goto got_pg;
3104 3082
3105 /* Checks for THP-specific high-order allocations */ 3083 /* Checks for THP-specific high-order allocations */
3106 if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { 3084 if (is_thp_gfp_mask(gfp_mask)) {
3107 /* 3085 /*
3108 * If compaction is deferred for high-order allocations, it is 3086 * If compaction is deferred for high-order allocations, it is
3109 * because sync compaction recently failed. If this is the case 3087 * because sync compaction recently failed. If this is the case
@@ -3138,8 +3116,7 @@ retry:
3138 * fault, so use asynchronous memory compaction for THP unless it is 3116 * fault, so use asynchronous memory compaction for THP unless it is
3139 * khugepaged trying to collapse. 3117 * khugepaged trying to collapse.
3140 */ 3118 */
3141 if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE || 3119 if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
3142 (current->flags & PF_KTHREAD))
3143 migration_mode = MIGRATE_SYNC_LIGHT; 3120 migration_mode = MIGRATE_SYNC_LIGHT;
3144 3121
3145 /* Try direct reclaim and then allocating */ 3122 /* Try direct reclaim and then allocating */
@@ -3210,7 +3187,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3210 3187
3211 lockdep_trace_alloc(gfp_mask); 3188 lockdep_trace_alloc(gfp_mask);
3212 3189
3213 might_sleep_if(gfp_mask & __GFP_WAIT); 3190 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
3214 3191
3215 if (should_fail_alloc_page(gfp_mask, order)) 3192 if (should_fail_alloc_page(gfp_mask, order))
3216 return NULL; 3193 return NULL;
@@ -3231,6 +3208,10 @@ retry_cpuset:
3231 3208
3232 /* We set it here, as __alloc_pages_slowpath might have changed it */ 3209 /* We set it here, as __alloc_pages_slowpath might have changed it */
3233 ac.zonelist = zonelist; 3210 ac.zonelist = zonelist;
3211
3212 /* Dirty zone balancing only done in the fast path */
3213 ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
3214
3234 /* The preferred zone is used for statistics later */ 3215 /* The preferred zone is used for statistics later */
3235 preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, 3216 preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx,
3236 ac.nodemask ? : &cpuset_current_mems_allowed, 3217 ac.nodemask ? : &cpuset_current_mems_allowed,
@@ -3249,6 +3230,7 @@ retry_cpuset:
3249 * complete. 3230 * complete.
3250 */ 3231 */
3251 alloc_mask = memalloc_noio_flags(gfp_mask); 3232 alloc_mask = memalloc_noio_flags(gfp_mask);
3233 ac.spread_dirty_pages = false;
3252 3234
3253 page = __alloc_pages_slowpath(alloc_mask, order, &ac); 3235 page = __alloc_pages_slowpath(alloc_mask, order, &ac);
3254 } 3236 }
@@ -3467,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
3467 } 3449 }
3468} 3450}
3469 3451
3470static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) 3452static void *make_alloc_exact(unsigned long addr, unsigned int order,
3453 size_t size)
3471{ 3454{
3472 if (addr) { 3455 if (addr) {
3473 unsigned long alloc_end = addr + (PAGE_SIZE << order); 3456 unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3517,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
3517 */ 3500 */
3518void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) 3501void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
3519{ 3502{
3520 unsigned order = get_order(size); 3503 unsigned int order = get_order(size);
3521 struct page *p = alloc_pages_node(nid, gfp_mask, order); 3504 struct page *p = alloc_pages_node(nid, gfp_mask, order);
3522 if (!p) 3505 if (!p)
3523 return NULL; 3506 return NULL;
@@ -3666,7 +3649,6 @@ static void show_migration_types(unsigned char type)
3666 [MIGRATE_UNMOVABLE] = 'U', 3649 [MIGRATE_UNMOVABLE] = 'U',
3667 [MIGRATE_RECLAIMABLE] = 'E', 3650 [MIGRATE_RECLAIMABLE] = 'E',
3668 [MIGRATE_MOVABLE] = 'M', 3651 [MIGRATE_MOVABLE] = 'M',
3669 [MIGRATE_RESERVE] = 'R',
3670#ifdef CONFIG_CMA 3652#ifdef CONFIG_CMA
3671 [MIGRATE_CMA] = 'C', 3653 [MIGRATE_CMA] = 'C',
3672#endif 3654#endif
@@ -3819,7 +3801,8 @@ void show_free_areas(unsigned int filter)
3819 } 3801 }
3820 3802
3821 for_each_populated_zone(zone) { 3803 for_each_populated_zone(zone) {
3822 unsigned long nr[MAX_ORDER], flags, order, total = 0; 3804 unsigned int order;
3805 unsigned long nr[MAX_ORDER], flags, total = 0;
3823 unsigned char types[MAX_ORDER]; 3806 unsigned char types[MAX_ORDER];
3824 3807
3825 if (skip_free_areas_node(filter, zone_to_nid(zone))) 3808 if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4168,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat)
4168 nodemask_t used_mask; 4151 nodemask_t used_mask;
4169 int local_node, prev_node; 4152 int local_node, prev_node;
4170 struct zonelist *zonelist; 4153 struct zonelist *zonelist;
4171 int order = current_zonelist_order; 4154 unsigned int order = current_zonelist_order;
4172 4155
4173 /* initialize zonelists */ 4156 /* initialize zonelists */
4174 for (i = 0; i < MAX_ZONELISTS; i++) { 4157 for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -4212,20 +4195,6 @@ static void build_zonelists(pg_data_t *pgdat)
4212 build_thisnode_zonelists(pgdat); 4195 build_thisnode_zonelists(pgdat);
4213} 4196}
4214 4197
4215/* Construct the zonelist performance cache - see further mmzone.h */
4216static void build_zonelist_cache(pg_data_t *pgdat)
4217{
4218 struct zonelist *zonelist;
4219 struct zonelist_cache *zlc;
4220 struct zoneref *z;
4221
4222 zonelist = &pgdat->node_zonelists[0];
4223 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
4224 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
4225 for (z = zonelist->_zonerefs; z->zone; z++)
4226 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
4227}
4228
4229#ifdef CONFIG_HAVE_MEMORYLESS_NODES 4198#ifdef CONFIG_HAVE_MEMORYLESS_NODES
4230/* 4199/*
4231 * Return node id of node used for "local" allocations. 4200 * Return node id of node used for "local" allocations.
@@ -4286,12 +4255,6 @@ static void build_zonelists(pg_data_t *pgdat)
4286 zonelist->_zonerefs[j].zone_idx = 0; 4255 zonelist->_zonerefs[j].zone_idx = 0;
4287} 4256}
4288 4257
4289/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
4290static void build_zonelist_cache(pg_data_t *pgdat)
4291{
4292 pgdat->node_zonelists[0].zlcache_ptr = NULL;
4293}
4294
4295#endif /* CONFIG_NUMA */ 4258#endif /* CONFIG_NUMA */
4296 4259
4297/* 4260/*
@@ -4332,14 +4295,12 @@ static int __build_all_zonelists(void *data)
4332 4295
4333 if (self && !node_online(self->node_id)) { 4296 if (self && !node_online(self->node_id)) {
4334 build_zonelists(self); 4297 build_zonelists(self);
4335 build_zonelist_cache(self);
4336 } 4298 }
4337 4299
4338 for_each_online_node(nid) { 4300 for_each_online_node(nid) {
4339 pg_data_t *pgdat = NODE_DATA(nid); 4301 pg_data_t *pgdat = NODE_DATA(nid);
4340 4302
4341 build_zonelists(pgdat); 4303 build_zonelists(pgdat);
4342 build_zonelist_cache(pgdat);
4343 } 4304 }
4344 4305
4345 /* 4306 /*
@@ -4499,120 +4460,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
4499} 4460}
4500 4461
4501/* 4462/*
4502 * Check if a pageblock contains reserved pages
4503 */
4504static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
4505{
4506 unsigned long pfn;
4507
4508 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
4509 if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
4510 return 1;
4511 }
4512 return 0;
4513}
4514
4515/*
4516 * Mark a number of pageblocks as MIGRATE_RESERVE. The number
4517 * of blocks reserved is based on min_wmark_pages(zone). The memory within
4518 * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
4519 * higher will lead to a bigger reserve which will get freed as contiguous
4520 * blocks as reclaim kicks in
4521 */
4522static void setup_zone_migrate_reserve(struct zone *zone)
4523{
4524 unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
4525 struct page *page;
4526 unsigned long block_migratetype;
4527 int reserve;
4528 int old_reserve;
4529
4530 /*
4531 * Get the start pfn, end pfn and the number of blocks to reserve
4532 * We have to be careful to be aligned to pageblock_nr_pages to
4533 * make sure that we always check pfn_valid for the first page in
4534 * the block.
4535 */
4536 start_pfn = zone->zone_start_pfn;
4537 end_pfn = zone_end_pfn(zone);
4538 start_pfn = roundup(start_pfn, pageblock_nr_pages);
4539 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
4540 pageblock_order;
4541
4542 /*
4543 * Reserve blocks are generally in place to help high-order atomic
4544 * allocations that are short-lived. A min_free_kbytes value that
4545 * would result in more than 2 reserve blocks for atomic allocations
4546 * is assumed to be in place to help anti-fragmentation for the
4547 * future allocation of hugepages at runtime.
4548 */
4549 reserve = min(2, reserve);
4550 old_reserve = zone->nr_migrate_reserve_block;
4551
4552 /* When memory hot-add, we almost always need to do nothing */
4553 if (reserve == old_reserve)
4554 return;
4555 zone->nr_migrate_reserve_block = reserve;
4556
4557 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
4558 if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
4559 return;
4560
4561 if (!pfn_valid(pfn))
4562 continue;
4563 page = pfn_to_page(pfn);
4564
4565 /* Watch out for overlapping nodes */
4566 if (page_to_nid(page) != zone_to_nid(zone))
4567 continue;
4568
4569 block_migratetype = get_pageblock_migratetype(page);
4570
4571 /* Only test what is necessary when the reserves are not met */
4572 if (reserve > 0) {
4573 /*
4574 * Blocks with reserved pages will never free, skip
4575 * them.
4576 */
4577 block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
4578 if (pageblock_is_reserved(pfn, block_end_pfn))
4579 continue;
4580
4581 /* If this block is reserved, account for it */
4582 if (block_migratetype == MIGRATE_RESERVE) {
4583 reserve--;
4584 continue;
4585 }
4586
4587 /* Suitable for reserving if this block is movable */
4588 if (block_migratetype == MIGRATE_MOVABLE) {
4589 set_pageblock_migratetype(page,
4590 MIGRATE_RESERVE);
4591 move_freepages_block(zone, page,
4592 MIGRATE_RESERVE);
4593 reserve--;
4594 continue;
4595 }
4596 } else if (!old_reserve) {
4597 /*
4598 * At boot time we don't need to scan the whole zone
4599 * for turning off MIGRATE_RESERVE.
4600 */
4601 break;
4602 }
4603
4604 /*
4605 * If the reserve is met and this is a previous reserved block,
4606 * take it back
4607 */
4608 if (block_migratetype == MIGRATE_RESERVE) {
4609 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
4610 move_freepages_block(zone, page, MIGRATE_MOVABLE);
4611 }
4612 }
4613}
4614
4615/*
4616 * Initially all pages are reserved - free ones are freed 4463 * Initially all pages are reserved - free ones are freed
4617 * up by free_all_bootmem() once the early boot process is 4464 * up by free_all_bootmem() once the early boot process is
4618 * done. Non-atomic initialization, single-pass. 4465 * done. Non-atomic initialization, single-pass.
@@ -4651,9 +4498,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4651 * movable at startup. This will force kernel allocations 4498 * movable at startup. This will force kernel allocations
4652 * to reserve their blocks rather than leaking throughout 4499 * to reserve their blocks rather than leaking throughout
4653 * the address space during boot when many long-lived 4500 * the address space during boot when many long-lived
4654 * kernel allocations are made. Later some blocks near 4501 * kernel allocations are made.
4655 * the start are marked MIGRATE_RESERVE by
4656 * setup_zone_migrate_reserve()
4657 * 4502 *
4658 * bitmap is created for zone's valid pfn range. but memmap 4503 * bitmap is created for zone's valid pfn range. but memmap
4659 * can be created for invalid pages (for alignment) 4504 * can be created for invalid pages (for alignment)
@@ -6214,7 +6059,6 @@ static void __setup_per_zone_wmarks(void)
6214 high_wmark_pages(zone) - low_wmark_pages(zone) - 6059 high_wmark_pages(zone) - low_wmark_pages(zone) -
6215 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); 6060 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
6216 6061
6217 setup_zone_migrate_reserve(zone);
6218 spin_unlock_irqrestore(&zone->lock, flags); 6062 spin_unlock_irqrestore(&zone->lock, flags);
6219 } 6063 }
6220 6064
@@ -6836,7 +6680,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
6836 unsigned migratetype) 6680 unsigned migratetype)
6837{ 6681{
6838 unsigned long outer_start, outer_end; 6682 unsigned long outer_start, outer_end;
6839 int ret = 0, order; 6683 unsigned int order;
6684 int ret = 0;
6840 6685
6841 struct compact_control cc = { 6686 struct compact_control cc = {
6842 .nr_migratepages = 0, 6687 .nr_migratepages = 0,
diff --git a/mm/readahead.c b/mm/readahead.c
index 998ad592f408..ba22d7fe0afb 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
90 page = list_to_page(pages); 90 page = list_to_page(pages);
91 list_del(&page->lru); 91 list_del(&page->lru);
92 if (add_to_page_cache_lru(page, mapping, page->index, 92 if (add_to_page_cache_lru(page, mapping, page->index,
93 GFP_KERNEL & mapping_gfp_mask(mapping))) { 93 mapping_gfp_constraint(mapping, GFP_KERNEL))) {
94 read_cache_pages_invalidate_page(mapping, page); 94 read_cache_pages_invalidate_page(mapping, page);
95 continue; 95 continue;
96 } 96 }
@@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
128 struct page *page = list_to_page(pages); 128 struct page *page = list_to_page(pages);
129 list_del(&page->lru); 129 list_del(&page->lru);
130 if (!add_to_page_cache_lru(page, mapping, page->index, 130 if (!add_to_page_cache_lru(page, mapping, page->index,
131 GFP_KERNEL & mapping_gfp_mask(mapping))) { 131 mapping_gfp_constraint(mapping, GFP_KERNEL))) {
132 mapping->a_ops->readpage(filp, page); 132 mapping->a_ops->readpage(filp, page);
133 } 133 }
134 page_cache_release(page); 134 page_cache_release(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b8b73928398..9187eee4128b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -73,6 +73,8 @@ static struct vfsmount *shm_mnt;
73#include <asm/uaccess.h> 73#include <asm/uaccess.h>
74#include <asm/pgtable.h> 74#include <asm/pgtable.h>
75 75
76#include "internal.h"
77
76#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 78#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
77#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 79#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
78 80
diff --git a/mm/slab.c b/mm/slab.c
index 272e809404d5..e0819fa96559 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1031} 1031}
1032 1032
1033/* 1033/*
1034 * Construct gfp mask to allocate from a specific node but do not invoke reclaim 1034 * Construct gfp mask to allocate from a specific node but do not direct reclaim
1035 * or warn about failures. 1035 * or warn about failures. kswapd may still wake to reclaim in the background.
1036 */ 1036 */
1037static inline gfp_t gfp_exact_node(gfp_t flags) 1037static inline gfp_t gfp_exact_node(gfp_t flags)
1038{ 1038{
1039 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; 1039 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
1040} 1040}
1041#endif 1041#endif
1042 1042
@@ -1889,21 +1889,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
1889 1889
1890 freelist = page->freelist; 1890 freelist = page->freelist;
1891 slab_destroy_debugcheck(cachep, page); 1891 slab_destroy_debugcheck(cachep, page);
1892 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1892 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
1893 struct rcu_head *head; 1893 call_rcu(&page->rcu_head, kmem_rcu_free);
1894 1894 else
1895 /*
1896 * RCU free overloads the RCU head over the LRU.
1897 * slab_page has been overloeaded over the LRU,
1898 * however it is not used from now on so that
1899 * we can use it safely.
1900 */
1901 head = (void *)&page->rcu_head;
1902 call_rcu(head, kmem_rcu_free);
1903
1904 } else {
1905 kmem_freepages(cachep, page); 1895 kmem_freepages(cachep, page);
1906 }
1907 1896
1908 /* 1897 /*
1909 * From now on, we don't use freelist 1898 * From now on, we don't use freelist
@@ -2633,7 +2622,7 @@ static int cache_grow(struct kmem_cache *cachep,
2633 2622
2634 offset *= cachep->colour_off; 2623 offset *= cachep->colour_off;
2635 2624
2636 if (local_flags & __GFP_WAIT) 2625 if (gfpflags_allow_blocking(local_flags))
2637 local_irq_enable(); 2626 local_irq_enable();
2638 2627
2639 /* 2628 /*
@@ -2663,7 +2652,7 @@ static int cache_grow(struct kmem_cache *cachep,
2663 2652
2664 cache_init_objs(cachep, page); 2653 cache_init_objs(cachep, page);
2665 2654
2666 if (local_flags & __GFP_WAIT) 2655 if (gfpflags_allow_blocking(local_flags))
2667 local_irq_disable(); 2656 local_irq_disable();
2668 check_irq_off(); 2657 check_irq_off();
2669 spin_lock(&n->list_lock); 2658 spin_lock(&n->list_lock);
@@ -2677,7 +2666,7 @@ static int cache_grow(struct kmem_cache *cachep,
2677opps1: 2666opps1:
2678 kmem_freepages(cachep, page); 2667 kmem_freepages(cachep, page);
2679failed: 2668failed:
2680 if (local_flags & __GFP_WAIT) 2669 if (gfpflags_allow_blocking(local_flags))
2681 local_irq_disable(); 2670 local_irq_disable();
2682 return 0; 2671 return 0;
2683} 2672}
@@ -2869,7 +2858,7 @@ force_grow:
2869static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 2858static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2870 gfp_t flags) 2859 gfp_t flags)
2871{ 2860{
2872 might_sleep_if(flags & __GFP_WAIT); 2861 might_sleep_if(gfpflags_allow_blocking(flags));
2873#if DEBUG 2862#if DEBUG
2874 kmem_flagcheck(cachep, flags); 2863 kmem_flagcheck(cachep, flags);
2875#endif 2864#endif
@@ -3057,11 +3046,11 @@ retry:
3057 */ 3046 */
3058 struct page *page; 3047 struct page *page;
3059 3048
3060 if (local_flags & __GFP_WAIT) 3049 if (gfpflags_allow_blocking(local_flags))
3061 local_irq_enable(); 3050 local_irq_enable();
3062 kmem_flagcheck(cache, flags); 3051 kmem_flagcheck(cache, flags);
3063 page = kmem_getpages(cache, local_flags, numa_mem_id()); 3052 page = kmem_getpages(cache, local_flags, numa_mem_id());
3064 if (local_flags & __GFP_WAIT) 3053 if (gfpflags_allow_blocking(local_flags))
3065 local_irq_disable(); 3054 local_irq_disable();
3066 if (page) { 3055 if (page) {
3067 /* 3056 /*
diff --git a/mm/slub.c b/mm/slub.c
index 75a5fa92ac2a..7cb4bf9ae320 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1265,7 +1265,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
1265{ 1265{
1266 flags &= gfp_allowed_mask; 1266 flags &= gfp_allowed_mask;
1267 lockdep_trace_alloc(flags); 1267 lockdep_trace_alloc(flags);
1268 might_sleep_if(flags & __GFP_WAIT); 1268 might_sleep_if(gfpflags_allow_blocking(flags));
1269 1269
1270 if (should_failslab(s->object_size, flags, s->flags)) 1270 if (should_failslab(s->object_size, flags, s->flags))
1271 return NULL; 1271 return NULL;
@@ -1353,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1353 1353
1354 flags &= gfp_allowed_mask; 1354 flags &= gfp_allowed_mask;
1355 1355
1356 if (flags & __GFP_WAIT) 1356 if (gfpflags_allow_blocking(flags))
1357 local_irq_enable(); 1357 local_irq_enable();
1358 1358
1359 flags |= s->allocflags; 1359 flags |= s->allocflags;
@@ -1363,8 +1363,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1363 * so we fall-back to the minimum order allocation. 1363 * so we fall-back to the minimum order allocation.
1364 */ 1364 */
1365 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; 1365 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1366 if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min)) 1366 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1367 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT; 1367 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
1368 1368
1369 page = alloc_slab_page(s, alloc_gfp, node, oo); 1369 page = alloc_slab_page(s, alloc_gfp, node, oo);
1370 if (unlikely(!page)) { 1370 if (unlikely(!page)) {
@@ -1424,7 +1424,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1424 page->frozen = 1; 1424 page->frozen = 1;
1425 1425
1426out: 1426out:
1427 if (flags & __GFP_WAIT) 1427 if (gfpflags_allow_blocking(flags))
1428 local_irq_disable(); 1428 local_irq_disable();
1429 if (!page) 1429 if (!page)
1430 return NULL; 1430 return NULL;
@@ -1507,10 +1507,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)
1507 VM_BUG_ON(s->reserved != sizeof(*head)); 1507 VM_BUG_ON(s->reserved != sizeof(*head));
1508 head = page_address(page) + offset; 1508 head = page_address(page) + offset;
1509 } else { 1509 } else {
1510 /* 1510 head = &page->rcu_head;
1511 * RCU free overloads the RCU head over the LRU
1512 */
1513 head = (void *)&page->lru;
1514 } 1511 }
1515 1512
1516 call_rcu(head, rcu_free_slab); 1513 call_rcu(head, rcu_free_slab);
diff --git a/mm/swap.c b/mm/swap.c
index 983f692a47fd..39395fb549c0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -201,7 +201,7 @@ out_put_single:
201 __put_single_page(page); 201 __put_single_page(page);
202 return; 202 return;
203 } 203 }
204 VM_BUG_ON_PAGE(page_head != page->first_page, page); 204 VM_BUG_ON_PAGE(page_head != compound_head(page), page);
205 /* 205 /*
206 * We can release the refcount taken by 206 * We can release the refcount taken by
207 * get_page_unless_zero() now that 207 * get_page_unless_zero() now that
@@ -262,7 +262,7 @@ static void put_compound_page(struct page *page)
262 * Case 3 is possible, as we may race with 262 * Case 3 is possible, as we may race with
263 * __split_huge_page_refcount tearing down a THP page. 263 * __split_huge_page_refcount tearing down a THP page.
264 */ 264 */
265 page_head = compound_head_by_tail(page); 265 page_head = compound_head(page);
266 if (!__compound_tail_refcounted(page_head)) 266 if (!__compound_tail_refcounted(page_head))
267 put_unrefcounted_compound_page(page_head, page); 267 put_unrefcounted_compound_page(page_head, page);
268 else 268 else
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9db9ef5e8481..d04563480c94 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,6 +35,8 @@
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/shmparam.h> 36#include <asm/shmparam.h>
37 37
38#include "internal.h"
39
38struct vfree_deferred { 40struct vfree_deferred {
39 struct llist_head list; 41 struct llist_head list;
40 struct work_struct wq; 42 struct work_struct wq;
@@ -1617,7 +1619,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1617 goto fail; 1619 goto fail;
1618 } 1620 }
1619 area->pages[i] = page; 1621 area->pages[i] = page;
1620 if (gfp_mask & __GFP_WAIT) 1622 if (gfpflags_allow_blocking(gfp_mask))
1621 cond_resched(); 1623 cond_resched();
1622 } 1624 }
1623 1625
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55721b619aee..2aec4241b42a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file,
1476 * won't get blocked by normal direct-reclaimers, forming a circular 1476 * won't get blocked by normal direct-reclaimers, forming a circular
1477 * deadlock. 1477 * deadlock.
1478 */ 1478 */
1479 if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) 1479 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
1480 inactive >>= 3; 1480 inactive >>= 3;
1481 1481
1482 return isolated > inactive; 1482 return isolated > inactive;
@@ -2477,7 +2477,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
2477 balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( 2477 balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
2478 zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); 2478 zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
2479 watermark = high_wmark_pages(zone) + balance_gap + (2UL << order); 2479 watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
2480 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); 2480 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0);
2481 2481
2482 /* 2482 /*
2483 * If compaction is deferred, reclaim up to a point where 2483 * If compaction is deferred, reclaim up to a point where
@@ -2960,7 +2960,7 @@ static bool zone_balanced(struct zone *zone, int order,
2960 unsigned long balance_gap, int classzone_idx) 2960 unsigned long balance_gap, int classzone_idx)
2961{ 2961{
2962 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + 2962 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
2963 balance_gap, classzone_idx, 0)) 2963 balance_gap, classzone_idx))
2964 return false; 2964 return false;
2965 2965
2966 if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone, 2966 if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
@@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3791 /* 3791 /*
3792 * Do not scan if the allocation should not be delayed. 3792 * Do not scan if the allocation should not be delayed.
3793 */ 3793 */
3794 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) 3794 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
3795 return ZONE_RECLAIM_NOSCAN; 3795 return ZONE_RECLAIM_NOSCAN;
3796 3796
3797 /* 3797 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ffcb4f58bf3e..879a2be23325 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,7 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
923 "Unmovable", 923 "Unmovable",
924 "Reclaimable", 924 "Reclaimable",
925 "Movable", 925 "Movable",
926 "Reserve", 926 "HighAtomic",
927#ifdef CONFIG_CMA 927#ifdef CONFIG_CMA
928 "CMA", 928 "CMA",
929#endif 929#endif
diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdff9d5..d8a181fd779b 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -137,7 +137,7 @@ static const struct zbud_ops zbud_zpool_ops = {
137 .evict = zbud_zpool_evict 137 .evict = zbud_zpool_evict
138}; 138};
139 139
140static void *zbud_zpool_create(char *name, gfp_t gfp, 140static void *zbud_zpool_create(const char *name, gfp_t gfp,
141 const struct zpool_ops *zpool_ops, 141 const struct zpool_ops *zpool_ops,
142 struct zpool *zpool) 142 struct zpool *zpool)
143{ 143{
diff --git a/mm/zpool.c b/mm/zpool.c
index 8f670d3e8706..fd3ff719c32c 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -18,8 +18,6 @@
18#include <linux/zpool.h> 18#include <linux/zpool.h>
19 19
20struct zpool { 20struct zpool {
21 char *type;
22
23 struct zpool_driver *driver; 21 struct zpool_driver *driver;
24 void *pool; 22 void *pool;
25 const struct zpool_ops *ops; 23 const struct zpool_ops *ops;
@@ -73,7 +71,8 @@ int zpool_unregister_driver(struct zpool_driver *driver)
73} 71}
74EXPORT_SYMBOL(zpool_unregister_driver); 72EXPORT_SYMBOL(zpool_unregister_driver);
75 73
76static struct zpool_driver *zpool_get_driver(char *type) 74/* this assumes @type is null-terminated. */
75static struct zpool_driver *zpool_get_driver(const char *type)
77{ 76{
78 struct zpool_driver *driver; 77 struct zpool_driver *driver;
79 78
@@ -113,6 +112,8 @@ static void zpool_put_driver(struct zpool_driver *driver)
113 * not be loaded, and calling @zpool_create_pool() with the pool type will 112 * not be loaded, and calling @zpool_create_pool() with the pool type will
114 * fail. 113 * fail.
115 * 114 *
115 * The @type string must be null-terminated.
116 *
116 * Returns: true if @type pool is available, false if not 117 * Returns: true if @type pool is available, false if not
117 */ 118 */
118bool zpool_has_pool(char *type) 119bool zpool_has_pool(char *type)
@@ -145,9 +146,11 @@ EXPORT_SYMBOL(zpool_has_pool);
145 * 146 *
146 * Implementations must guarantee this to be thread-safe. 147 * Implementations must guarantee this to be thread-safe.
147 * 148 *
149 * The @type and @name strings must be null-terminated.
150 *
148 * Returns: New zpool on success, NULL on failure. 151 * Returns: New zpool on success, NULL on failure.
149 */ 152 */
150struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, 153struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
151 const struct zpool_ops *ops) 154 const struct zpool_ops *ops)
152{ 155{
153 struct zpool_driver *driver; 156 struct zpool_driver *driver;
@@ -174,7 +177,6 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
174 return NULL; 177 return NULL;
175 } 178 }
176 179
177 zpool->type = driver->type;
178 zpool->driver = driver; 180 zpool->driver = driver;
179 zpool->pool = driver->create(name, gfp, ops, zpool); 181 zpool->pool = driver->create(name, gfp, ops, zpool);
180 zpool->ops = ops; 182 zpool->ops = ops;
@@ -208,7 +210,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
208 */ 210 */
209void zpool_destroy_pool(struct zpool *zpool) 211void zpool_destroy_pool(struct zpool *zpool)
210{ 212{
211 pr_debug("destroying pool type %s\n", zpool->type); 213 pr_debug("destroying pool type %s\n", zpool->driver->type);
212 214
213 spin_lock(&pools_lock); 215 spin_lock(&pools_lock);
214 list_del(&zpool->list); 216 list_del(&zpool->list);
@@ -228,9 +230,9 @@ void zpool_destroy_pool(struct zpool *zpool)
228 * 230 *
229 * Returns: The type of zpool. 231 * Returns: The type of zpool.
230 */ 232 */
231char *zpool_get_type(struct zpool *zpool) 233const char *zpool_get_type(struct zpool *zpool)
232{ 234{
233 return zpool->type; 235 return zpool->driver->type;
234} 236}
235 237
236/** 238/**
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b6fcdc..9f15bdd9163c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -16,7 +16,7 @@
16 * struct page(s) to form a zspage. 16 * struct page(s) to form a zspage.
17 * 17 *
18 * Usage of struct page fields: 18 * Usage of struct page fields:
19 * page->first_page: points to the first component (0-order) page 19 * page->private: points to the first component (0-order) page
20 * page->index (union with page->freelist): offset of the first object 20 * page->index (union with page->freelist): offset of the first object
21 * starting in this page. For the first page, this is 21 * starting in this page. For the first page, this is
22 * always 0, so we use this field (aka freelist) to point 22 * always 0, so we use this field (aka freelist) to point
@@ -26,8 +26,7 @@
26 * 26 *
27 * For _first_ page only: 27 * For _first_ page only:
28 * 28 *
29 * page->private (union with page->first_page): refers to the 29 * page->private: refers to the component page after the first page
30 * component page after the first page
31 * If the page is first_page for huge object, it stores handle. 30 * If the page is first_page for huge object, it stores handle.
32 * Look at size_class->huge. 31 * Look at size_class->huge.
33 * page->freelist: points to the first free object in zspage. 32 * page->freelist: points to the first free object in zspage.
@@ -38,6 +37,7 @@
38 * page->lru: links together first pages of various zspages. 37 * page->lru: links together first pages of various zspages.
39 * Basically forming list of zspages in a fullness group. 38 * Basically forming list of zspages in a fullness group.
40 * page->mapping: class index and fullness group of the zspage 39 * page->mapping: class index and fullness group of the zspage
40 * page->inuse: the number of objects that are used in this zspage
41 * 41 *
42 * Usage of struct page flags: 42 * Usage of struct page flags:
43 * PG_private: identifies the first component page 43 * PG_private: identifies the first component page
@@ -58,7 +58,7 @@
58#include <linux/cpumask.h> 58#include <linux/cpumask.h>
59#include <linux/cpu.h> 59#include <linux/cpu.h>
60#include <linux/vmalloc.h> 60#include <linux/vmalloc.h>
61#include <linux/hardirq.h> 61#include <linux/preempt.h>
62#include <linux/spinlock.h> 62#include <linux/spinlock.h>
63#include <linux/types.h> 63#include <linux/types.h>
64#include <linux/debugfs.h> 64#include <linux/debugfs.h>
@@ -166,9 +166,14 @@ enum zs_stat_type {
166 OBJ_USED, 166 OBJ_USED,
167 CLASS_ALMOST_FULL, 167 CLASS_ALMOST_FULL,
168 CLASS_ALMOST_EMPTY, 168 CLASS_ALMOST_EMPTY,
169 NR_ZS_STAT_TYPE,
170}; 169};
171 170
171#ifdef CONFIG_ZSMALLOC_STAT
172#define NR_ZS_STAT_TYPE (CLASS_ALMOST_EMPTY + 1)
173#else
174#define NR_ZS_STAT_TYPE (OBJ_USED + 1)
175#endif
176
172struct zs_size_stat { 177struct zs_size_stat {
173 unsigned long objs[NR_ZS_STAT_TYPE]; 178 unsigned long objs[NR_ZS_STAT_TYPE];
174}; 179};
@@ -237,7 +242,7 @@ struct link_free {
237}; 242};
238 243
239struct zs_pool { 244struct zs_pool {
240 char *name; 245 const char *name;
241 246
242 struct size_class **size_class; 247 struct size_class **size_class;
243 struct kmem_cache *handle_cachep; 248 struct kmem_cache *handle_cachep;
@@ -311,7 +316,7 @@ static void record_obj(unsigned long handle, unsigned long obj)
311 316
312#ifdef CONFIG_ZPOOL 317#ifdef CONFIG_ZPOOL
313 318
314static void *zs_zpool_create(char *name, gfp_t gfp, 319static void *zs_zpool_create(const char *name, gfp_t gfp,
315 const struct zpool_ops *zpool_ops, 320 const struct zpool_ops *zpool_ops,
316 struct zpool *zpool) 321 struct zpool *zpool)
317{ 322{
@@ -447,19 +452,23 @@ static int get_size_class_index(int size)
447static inline void zs_stat_inc(struct size_class *class, 452static inline void zs_stat_inc(struct size_class *class,
448 enum zs_stat_type type, unsigned long cnt) 453 enum zs_stat_type type, unsigned long cnt)
449{ 454{
450 class->stats.objs[type] += cnt; 455 if (type < NR_ZS_STAT_TYPE)
456 class->stats.objs[type] += cnt;
451} 457}
452 458
453static inline void zs_stat_dec(struct size_class *class, 459static inline void zs_stat_dec(struct size_class *class,
454 enum zs_stat_type type, unsigned long cnt) 460 enum zs_stat_type type, unsigned long cnt)
455{ 461{
456 class->stats.objs[type] -= cnt; 462 if (type < NR_ZS_STAT_TYPE)
463 class->stats.objs[type] -= cnt;
457} 464}
458 465
459static inline unsigned long zs_stat_get(struct size_class *class, 466static inline unsigned long zs_stat_get(struct size_class *class,
460 enum zs_stat_type type) 467 enum zs_stat_type type)
461{ 468{
462 return class->stats.objs[type]; 469 if (type < NR_ZS_STAT_TYPE)
470 return class->stats.objs[type];
471 return 0;
463} 472}
464 473
465#ifdef CONFIG_ZSMALLOC_STAT 474#ifdef CONFIG_ZSMALLOC_STAT
@@ -548,7 +557,7 @@ static const struct file_operations zs_stat_size_ops = {
548 .release = single_release, 557 .release = single_release,
549}; 558};
550 559
551static int zs_pool_stat_create(char *name, struct zs_pool *pool) 560static int zs_pool_stat_create(const char *name, struct zs_pool *pool)
552{ 561{
553 struct dentry *entry; 562 struct dentry *entry;
554 563
@@ -588,7 +597,7 @@ static void __exit zs_stat_exit(void)
588{ 597{
589} 598}
590 599
591static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) 600static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool)
592{ 601{
593 return 0; 602 return 0;
594} 603}
@@ -764,7 +773,7 @@ static struct page *get_first_page(struct page *page)
764 if (is_first_page(page)) 773 if (is_first_page(page))
765 return page; 774 return page;
766 else 775 else
767 return page->first_page; 776 return (struct page *)page_private(page);
768} 777}
769 778
770static struct page *get_next_page(struct page *page) 779static struct page *get_next_page(struct page *page)
@@ -824,7 +833,7 @@ static unsigned long obj_to_head(struct size_class *class, struct page *page,
824{ 833{
825 if (class->huge) { 834 if (class->huge) {
826 VM_BUG_ON(!is_first_page(page)); 835 VM_BUG_ON(!is_first_page(page));
827 return *(unsigned long *)page_private(page); 836 return page_private(page);
828 } else 837 } else
829 return *(unsigned long *)obj; 838 return *(unsigned long *)obj;
830} 839}
@@ -949,7 +958,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
949 * Allocate individual pages and link them together as: 958 * Allocate individual pages and link them together as:
950 * 1. first page->private = first sub-page 959 * 1. first page->private = first sub-page
951 * 2. all sub-pages are linked together using page->lru 960 * 2. all sub-pages are linked together using page->lru
952 * 3. each sub-page is linked to the first page using page->first_page 961 * 3. each sub-page is linked to the first page using page->private
953 * 962 *
954 * For each size class, First/Head pages are linked together using 963 * For each size class, First/Head pages are linked together using
955 * page->lru. Also, we set PG_private to identify the first page 964 * page->lru. Also, we set PG_private to identify the first page
@@ -974,7 +983,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
974 if (i == 1) 983 if (i == 1)
975 set_page_private(first_page, (unsigned long)page); 984 set_page_private(first_page, (unsigned long)page);
976 if (i >= 1) 985 if (i >= 1)
977 page->first_page = first_page; 986 set_page_private(page, (unsigned long)first_page);
978 if (i >= 2) 987 if (i >= 2)
979 list_add(&page->lru, &prev_page->lru); 988 list_add(&page->lru, &prev_page->lru);
980 if (i == class->pages_per_zspage - 1) /* last page */ 989 if (i == class->pages_per_zspage - 1) /* last page */
@@ -1428,8 +1437,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class,
1428 struct page *first_page, *f_page; 1437 struct page *first_page, *f_page;
1429 unsigned long f_objidx, f_offset; 1438 unsigned long f_objidx, f_offset;
1430 void *vaddr; 1439 void *vaddr;
1431 int class_idx;
1432 enum fullness_group fullness;
1433 1440
1434 BUG_ON(!obj); 1441 BUG_ON(!obj);
1435 1442
@@ -1437,7 +1444,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class,
1437 obj_to_location(obj, &f_page, &f_objidx); 1444 obj_to_location(obj, &f_page, &f_objidx);
1438 first_page = get_first_page(f_page); 1445 first_page = get_first_page(f_page);
1439 1446
1440 get_zspage_mapping(first_page, &class_idx, &fullness);
1441 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); 1447 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
1442 1448
1443 vaddr = kmap_atomic(f_page); 1449 vaddr = kmap_atomic(f_page);
@@ -1822,9 +1828,6 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker,
1822 struct zs_pool *pool = container_of(shrinker, struct zs_pool, 1828 struct zs_pool *pool = container_of(shrinker, struct zs_pool,
1823 shrinker); 1829 shrinker);
1824 1830
1825 if (!pool->shrinker_enabled)
1826 return 0;
1827
1828 for (i = zs_size_classes - 1; i >= 0; i--) { 1831 for (i = zs_size_classes - 1; i >= 0; i--) {
1829 class = pool->size_class[i]; 1832 class = pool->size_class[i];
1830 if (!class) 1833 if (!class)
@@ -1866,7 +1869,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
1866 * On success, a pointer to the newly created pool is returned, 1869 * On success, a pointer to the newly created pool is returned,
1867 * otherwise NULL. 1870 * otherwise NULL.
1868 */ 1871 */
1869struct zs_pool *zs_create_pool(char *name, gfp_t flags) 1872struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
1870{ 1873{
1871 int i; 1874 int i;
1872 struct zs_pool *pool; 1875 struct zs_pool *pool;
diff --git a/mm/zswap.c b/mm/zswap.c
index 4043df7c672f..025f8dc723de 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -82,33 +82,27 @@ module_param_named(enabled, zswap_enabled, bool, 0644);
82 82
83/* Crypto compressor to use */ 83/* Crypto compressor to use */
84#define ZSWAP_COMPRESSOR_DEFAULT "lzo" 84#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
85static char zswap_compressor[CRYPTO_MAX_ALG_NAME] = ZSWAP_COMPRESSOR_DEFAULT; 85static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
86static struct kparam_string zswap_compressor_kparam = {
87 .string = zswap_compressor,
88 .maxlen = sizeof(zswap_compressor),
89};
90static int zswap_compressor_param_set(const char *, 86static int zswap_compressor_param_set(const char *,
91 const struct kernel_param *); 87 const struct kernel_param *);
92static struct kernel_param_ops zswap_compressor_param_ops = { 88static struct kernel_param_ops zswap_compressor_param_ops = {
93 .set = zswap_compressor_param_set, 89 .set = zswap_compressor_param_set,
94 .get = param_get_string, 90 .get = param_get_charp,
91 .free = param_free_charp,
95}; 92};
96module_param_cb(compressor, &zswap_compressor_param_ops, 93module_param_cb(compressor, &zswap_compressor_param_ops,
97 &zswap_compressor_kparam, 0644); 94 &zswap_compressor, 0644);
98 95
99/* Compressed storage zpool to use */ 96/* Compressed storage zpool to use */
100#define ZSWAP_ZPOOL_DEFAULT "zbud" 97#define ZSWAP_ZPOOL_DEFAULT "zbud"
101static char zswap_zpool_type[32 /* arbitrary */] = ZSWAP_ZPOOL_DEFAULT; 98static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
102static struct kparam_string zswap_zpool_kparam = {
103 .string = zswap_zpool_type,
104 .maxlen = sizeof(zswap_zpool_type),
105};
106static int zswap_zpool_param_set(const char *, const struct kernel_param *); 99static int zswap_zpool_param_set(const char *, const struct kernel_param *);
107static struct kernel_param_ops zswap_zpool_param_ops = { 100static struct kernel_param_ops zswap_zpool_param_ops = {
108 .set = zswap_zpool_param_set, 101 .set = zswap_zpool_param_set,
109 .get = param_get_string, 102 .get = param_get_charp,
103 .free = param_free_charp,
110}; 104};
111module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_kparam, 0644); 105module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
112 106
113/* The maximum percentage of memory that the compressed pool can occupy */ 107/* The maximum percentage of memory that the compressed pool can occupy */
114static unsigned int zswap_max_pool_percent = 20; 108static unsigned int zswap_max_pool_percent = 20;
@@ -342,7 +336,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
342static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 336static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
343 pgoff_t offset) 337 pgoff_t offset)
344{ 338{
345 struct zswap_entry *entry = NULL; 339 struct zswap_entry *entry;
346 340
347 entry = zswap_rb_search(root, offset); 341 entry = zswap_rb_search(root, offset);
348 if (entry) 342 if (entry)
@@ -571,7 +565,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
571static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 565static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
572{ 566{
573 struct zswap_pool *pool; 567 struct zswap_pool *pool;
574 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; 568 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
575 569
576 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 570 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
577 if (!pool) { 571 if (!pool) {
@@ -615,19 +609,29 @@ error:
615 return NULL; 609 return NULL;
616} 610}
617 611
618static struct zswap_pool *__zswap_pool_create_fallback(void) 612static __init struct zswap_pool *__zswap_pool_create_fallback(void)
619{ 613{
620 if (!crypto_has_comp(zswap_compressor, 0, 0)) { 614 if (!crypto_has_comp(zswap_compressor, 0, 0)) {
615 if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
616 pr_err("default compressor %s not available\n",
617 zswap_compressor);
618 return NULL;
619 }
621 pr_err("compressor %s not available, using default %s\n", 620 pr_err("compressor %s not available, using default %s\n",
622 zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); 621 zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
623 strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT, 622 param_free_charp(&zswap_compressor);
624 sizeof(zswap_compressor)); 623 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
625 } 624 }
626 if (!zpool_has_pool(zswap_zpool_type)) { 625 if (!zpool_has_pool(zswap_zpool_type)) {
626 if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
627 pr_err("default zpool %s not available\n",
628 zswap_zpool_type);
629 return NULL;
630 }
627 pr_err("zpool %s not available, using default %s\n", 631 pr_err("zpool %s not available, using default %s\n",
628 zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); 632 zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
629 strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT, 633 param_free_charp(&zswap_zpool_type);
630 sizeof(zswap_zpool_type)); 634 zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
631 } 635 }
632 636
633 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 637 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
@@ -684,43 +688,39 @@ static void zswap_pool_put(struct zswap_pool *pool)
684* param callbacks 688* param callbacks
685**********************************/ 689**********************************/
686 690
691/* val must be a null-terminated string */
687static int __zswap_param_set(const char *val, const struct kernel_param *kp, 692static int __zswap_param_set(const char *val, const struct kernel_param *kp,
688 char *type, char *compressor) 693 char *type, char *compressor)
689{ 694{
690 struct zswap_pool *pool, *put_pool = NULL; 695 struct zswap_pool *pool, *put_pool = NULL;
691 char str[kp->str->maxlen], *s; 696 char *s = strstrip((char *)val);
692 int ret; 697 int ret;
693 698
694 /* 699 /* no change required */
695 * kp is either zswap_zpool_kparam or zswap_compressor_kparam, defined 700 if (!strcmp(s, *(char **)kp->arg))
696 * at the top of this file, so maxlen is CRYPTO_MAX_ALG_NAME (64) or 701 return 0;
697 * 32 (arbitrary).
698 */
699 strlcpy(str, val, kp->str->maxlen);
700 s = strim(str);
701 702
702 /* if this is load-time (pre-init) param setting, 703 /* if this is load-time (pre-init) param setting,
703 * don't create a pool; that's done during init. 704 * don't create a pool; that's done during init.
704 */ 705 */
705 if (!zswap_init_started) 706 if (!zswap_init_started)
706 return param_set_copystring(s, kp); 707 return param_set_charp(s, kp);
707
708 /* no change required */
709 if (!strncmp(kp->str->string, s, kp->str->maxlen))
710 return 0;
711 708
712 if (!type) { 709 if (!type) {
713 type = s; 710 if (!zpool_has_pool(s)) {
714 if (!zpool_has_pool(type)) { 711 pr_err("zpool %s not available\n", s);
715 pr_err("zpool %s not available\n", type);
716 return -ENOENT; 712 return -ENOENT;
717 } 713 }
714 type = s;
718 } else if (!compressor) { 715 } else if (!compressor) {
719 compressor = s; 716 if (!crypto_has_comp(s, 0, 0)) {
720 if (!crypto_has_comp(compressor, 0, 0)) { 717 pr_err("compressor %s not available\n", s);
721 pr_err("compressor %s not available\n", compressor);
722 return -ENOENT; 718 return -ENOENT;
723 } 719 }
720 compressor = s;
721 } else {
722 WARN_ON(1);
723 return -EINVAL;
724 } 724 }
725 725
726 spin_lock(&zswap_pools_lock); 726 spin_lock(&zswap_pools_lock);
@@ -736,7 +736,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
736 } 736 }
737 737
738 if (pool) 738 if (pool)
739 ret = param_set_copystring(s, kp); 739 ret = param_set_charp(s, kp);
740 else 740 else
741 ret = -EINVAL; 741 ret = -EINVAL;
742 742
@@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1011 /* store */ 1011 /* store */
1012 len = dlen + sizeof(struct zswap_header); 1012 len = dlen + sizeof(struct zswap_header);
1013 ret = zpool_malloc(entry->pool->zpool, len, 1013 ret = zpool_malloc(entry->pool->zpool, len,
1014 __GFP_NORETRY | __GFP_NOWARN, &handle); 1014 __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1015 &handle);
1015 if (ret == -ENOSPC) { 1016 if (ret == -ENOSPC) {
1016 zswap_reject_compress_poor++; 1017 zswap_reject_compress_poor++;
1017 goto put_dstmem; 1018 goto put_dstmem;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
414 len += NET_SKB_PAD; 414 len += NET_SKB_PAD;
415 415
416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
417 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 417 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
419 if (!skb) 419 if (!skb)
420 goto skb_fail; 420 goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
481 len += NET_SKB_PAD + NET_IP_ALIGN; 481 len += NET_SKB_PAD + NET_IP_ALIGN;
482 482
483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
484 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 484 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
486 if (!skb) 486 if (!skb)
487 goto skb_fail; 487 goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4452 return NULL; 4452 return NULL;
4453 4453
4454 gfp_head = gfp_mask; 4454 gfp_head = gfp_mask;
4455 if (gfp_head & __GFP_WAIT) 4455 if (gfp_head & __GFP_DIRECT_RECLAIM)
4456 gfp_head |= __GFP_REPEAT; 4456 gfp_head |= __GFP_REPEAT;
4457 4457
4458 *errcode = -ENOBUFS; 4458 *errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4467 4467
4468 while (order) { 4468 while (order) {
4469 if (npages >= 1 << order) { 4469 if (npages >= 1 << order) {
4470 page = alloc_pages((gfp_mask & ~__GFP_WAIT) | 4470 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
4471 __GFP_COMP | 4471 __GFP_COMP |
4472 __GFP_NOWARN | 4472 __GFP_NOWARN |
4473 __GFP_NORETRY, 4473 __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1944,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1944 1944
1945 pfrag->offset = 0; 1945 pfrag->offset = 0;
1946 if (SKB_FRAG_PAGE_ORDER) { 1946 if (SKB_FRAG_PAGE_ORDER) {
1947 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | 1947 /* Avoid direct reclaim but allow kswapd to wake */
1948 __GFP_NOWARN | __GFP_NORETRY, 1948 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
1949 __GFP_COMP | __GFP_NOWARN |
1950 __GFP_NORETRY,
1949 SKB_FRAG_PAGE_ORDER); 1951 SKB_FRAG_PAGE_ORDER);
1950 if (likely(pfrag->page)) { 1952 if (likely(pfrag->page)) {
1951 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 1953 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
2116 consume_skb(info.skb2); 2116 consume_skb(info.skb2);
2117 2117
2118 if (info.delivered) { 2118 if (info.delivered) {
2119 if (info.congested && (allocation & __GFP_WAIT)) 2119 if (info.congested && gfpflags_allow_blocking(allocation))
2120 yield(); 2120 yield();
2121 return 0; 2121 return 0;
2122 } 2122 }
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
305 gfp_t slab_mask = GFP_NOWAIT; 305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT; 306 gfp_t page_mask = GFP_NOWAIT;
307 307
308 if (gfp & __GFP_WAIT) { 308 if (gfp & __GFP_DIRECT_RECLAIM) {
309 slab_mask = GFP_KERNEL; 309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER; 310 page_mask = GFP_HIGHUSER;
311 } 311 }
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
379 struct ib_recv_wr *failed_wr; 379 struct ib_recv_wr *failed_wr;
380 unsigned int posted = 0; 380 unsigned int posted = 0;
381 int ret = 0; 381 int ret = 0;
382 bool can_wait = !!(gfp & __GFP_WAIT); 382 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
383 u32 pos; 383 u32 pos;
384 384
385 /* the goal here is to just make sure that someone, somewhere 385 /* the goal here is to just make sure that someone, somewhere
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
500 if (bundle->num_conns >= 20) { 500 if (bundle->num_conns >= 20) {
501 _debug("too many conns"); 501 _debug("too many conns");
502 502
503 if (!(gfp & __GFP_WAIT)) { 503 if (!gfpflags_allow_blocking(gfp)) {
504 _leave(" = -EAGAIN"); 504 _leave(" = -EAGAIN");
505 return -EAGAIN; 505 return -EAGAIN;
506 } 506 }
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
1590/* Set an association id for a given association */ 1590/* Set an association id for a given association */
1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) 1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
1592{ 1592{
1593 bool preload = !!(gfp & __GFP_WAIT); 1593 bool preload = gfpflags_allow_blocking(gfp);
1594 int ret; 1594 int ret;
1595 1595
1596 /* If the id is already assigned, keep it. */ 1596 /* If the id is already assigned, keep it. */
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f2a1131b2f8b..2b3c22808c3b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -370,6 +370,8 @@ our $typeTypedefs = qr{(?x:
370 $typeKernelTypedefs\b 370 $typeKernelTypedefs\b
371)}; 371)};
372 372
373our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b};
374
373our $logFunctions = qr{(?x: 375our $logFunctions = qr{(?x:
374 printk(?:_ratelimited|_once|)| 376 printk(?:_ratelimited|_once|)|
375 (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| 377 (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
@@ -2313,42 +2315,43 @@ sub process {
2313 "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr); 2315 "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
2314 } 2316 }
2315 2317
2318# Check if the commit log is in a possible stack dump
2319 if ($in_commit_log && !$commit_log_possible_stack_dump &&
2320 ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
2321 $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
2322 # timestamp
2323 $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
2324 # stack dump address
2325 $commit_log_possible_stack_dump = 1;
2326 }
2327
2316# Check for line lengths > 75 in commit log, warn once 2328# Check for line lengths > 75 in commit log, warn once
2317 if ($in_commit_log && !$commit_log_long_line && 2329 if ($in_commit_log && !$commit_log_long_line &&
2318 length($line) > 75 && 2330 length($line) > 75 &&
2319 !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ || 2331 !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
2320 # file delta changes 2332 # file delta changes
2321 $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ || 2333 $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
2322 # filename then : 2334 # filename then :
2323 $line =~ /^\s*(?:Fixes:|Link:)/i || 2335 $line =~ /^\s*(?:Fixes:|Link:)/i ||
2324 # A Fixes: or Link: line 2336 # A Fixes: or Link: line
2325 $commit_log_possible_stack_dump)) { 2337 $commit_log_possible_stack_dump)) {
2326 WARN("COMMIT_LOG_LONG_LINE", 2338 WARN("COMMIT_LOG_LONG_LINE",
2327 "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); 2339 "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr);
2328 $commit_log_long_line = 1; 2340 $commit_log_long_line = 1;
2329 } 2341 }
2330 2342
2331# Check if the commit log is in a possible stack dump
2332 if ($in_commit_log && !$commit_log_possible_stack_dump &&
2333 ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
2334 $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
2335 # timestamp
2336 $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
2337 # stack dump address
2338 $commit_log_possible_stack_dump = 1;
2339 }
2340
2341# Reset possible stack dump if a blank line is found 2343# Reset possible stack dump if a blank line is found
2342 if ($in_commit_log && $commit_log_possible_stack_dump && 2344 if ($in_commit_log && $commit_log_possible_stack_dump &&
2343 $line =~ /^\s*$/) { 2345 $line =~ /^\s*$/) {
2344 $commit_log_possible_stack_dump = 0; 2346 $commit_log_possible_stack_dump = 0;
2345 } 2347 }
2346 2348
2347# Check for git id commit length and improperly formed commit descriptions 2349# Check for git id commit length and improperly formed commit descriptions
2348 if ($in_commit_log && 2350 if ($in_commit_log && !$commit_log_possible_stack_dump &&
2349 ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i || 2351 ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
2350 ($line =~ /\b[0-9a-f]{12,40}\b/i && 2352 ($line =~ /\b[0-9a-f]{12,40}\b/i &&
2351 $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) { 2353 $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i &&
2354 $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) {
2352 my $init_char = "c"; 2355 my $init_char = "c";
2353 my $orig_commit = ""; 2356 my $orig_commit = "";
2354 my $short = 1; 2357 my $short = 1;
@@ -3333,21 +3336,20 @@ sub process {
3333 } 3336 }
3334 3337
3335# check for global initialisers. 3338# check for global initialisers.
3336 if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) { 3339 if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/) {
3337 if (ERROR("GLOBAL_INITIALISERS", 3340 if (ERROR("GLOBAL_INITIALISERS",
3338 "do not initialise globals to 0 or NULL\n" . 3341 "do not initialise globals to $1\n" . $herecurr) &&
3339 $herecurr) &&
3340 $fix) { 3342 $fix) {
3341 $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/; 3343 $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*$zero_initializer\s*;/$1;/;
3342 } 3344 }
3343 } 3345 }
3344# check for static initialisers. 3346# check for static initialisers.
3345 if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) { 3347 if ($line =~ /^\+.*\bstatic\s.*=\s*($zero_initializer)\s*;/) {
3346 if (ERROR("INITIALISED_STATIC", 3348 if (ERROR("INITIALISED_STATIC",
3347 "do not initialise statics to 0 or NULL\n" . 3349 "do not initialise statics to $1\n" .
3348 $herecurr) && 3350 $herecurr) &&
3349 $fix) { 3351 $fix) {
3350 $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/; 3352 $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*$zero_initializer\s*;/$1;/;
3351 } 3353 }
3352 } 3354 }
3353 3355
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 98bae869f6d0..cab641a12dd5 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -781,6 +781,7 @@ MAINTAINER field selection options:
781 --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers) 781 --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
782 --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent) 782 --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
783 --git-blame => use git blame to find modified commits for patch or file 783 --git-blame => use git blame to find modified commits for patch or file
784 --git-blame-signatures => when used with --git-blame, also include all commit signers
784 --git-since => git history to use (default: $email_git_since) 785 --git-since => git history to use (default: $email_git_since)
785 --hg-since => hg history to use (default: $email_hg_since) 786 --hg-since => hg history to use (default: $email_hg_since)
786 --interactive => display a menu (mostly useful if used with the --git option) 787 --interactive => display a menu (mostly useful if used with the --git option)
@@ -812,7 +813,7 @@ Other options:
812 --help => show this help information 813 --help => show this help information
813 814
814Default options: 815Default options:
815 [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0 816 [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
816 --remove-duplicates --rolestats] 817 --remove-duplicates --rolestats]
817 818
818Notes: 819Notes:
@@ -844,6 +845,9 @@ Notes:
844 Entries in this file can be any command line argument. 845 Entries in this file can be any command line argument.
845 This file is prepended to any additional command line arguments. 846 This file is prepended to any additional command line arguments.
846 Multiple lines and # comments are allowed. 847 Multiple lines and # comments are allowed.
848 Most options have both positive and negative forms.
849 The negative forms for --<foo> are --no<foo> and --no-<foo>.
850
847EOT 851EOT
848} 852}
849 853
@@ -970,20 +974,29 @@ sub find_ending_index {
970 return $index; 974 return $index;
971} 975}
972 976
973sub get_maintainer_role { 977sub get_subsystem_name {
974 my ($index) = @_; 978 my ($index) = @_;
975 979
976 my $i;
977 my $start = find_starting_index($index); 980 my $start = find_starting_index($index);
978 my $end = find_ending_index($index);
979 981
980 my $role = "unknown";
981 my $subsystem = $typevalue[$start]; 982 my $subsystem = $typevalue[$start];
982 if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) { 983 if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
983 $subsystem = substr($subsystem, 0, $output_section_maxlen - 3); 984 $subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
984 $subsystem =~ s/\s*$//; 985 $subsystem =~ s/\s*$//;
985 $subsystem = $subsystem . "..."; 986 $subsystem = $subsystem . "...";
986 } 987 }
988 return $subsystem;
989}
990
991sub get_maintainer_role {
992 my ($index) = @_;
993
994 my $i;
995 my $start = find_starting_index($index);
996 my $end = find_ending_index($index);
997
998 my $role = "unknown";
999 my $subsystem = get_subsystem_name($index);
987 1000
988 for ($i = $start + 1; $i < $end; $i++) { 1001 for ($i = $start + 1; $i < $end; $i++) {
989 my $tv = $typevalue[$i]; 1002 my $tv = $typevalue[$i];
@@ -1017,16 +1030,7 @@ sub get_maintainer_role {
1017sub get_list_role { 1030sub get_list_role {
1018 my ($index) = @_; 1031 my ($index) = @_;
1019 1032
1020 my $i; 1033 my $subsystem = get_subsystem_name($index);
1021 my $start = find_starting_index($index);
1022 my $end = find_ending_index($index);
1023
1024 my $subsystem = $typevalue[$start];
1025 if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1026 $subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1027 $subsystem =~ s/\s*$//;
1028 $subsystem = $subsystem . "...";
1029 }
1030 1034
1031 if ($subsystem eq "THE REST") { 1035 if ($subsystem eq "THE REST") {
1032 $subsystem = ""; 1036 $subsystem = "";
@@ -1114,7 +1118,8 @@ sub add_categories {
1114 } 1118 }
1115 } 1119 }
1116 if ($email_reviewer) { 1120 if ($email_reviewer) {
1117 push_email_addresses($pvalue, 'reviewer'); 1121 my $subsystem = get_subsystem_name($i);
1122 push_email_addresses($pvalue, "reviewer:$subsystem");
1118 } 1123 }
1119 } elsif ($ptype eq "T") { 1124 } elsif ($ptype eq "T") {
1120 push(@scm, $pvalue); 1125 push(@scm, $pvalue);
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index e24121afb2f2..6eb62936c672 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -126,7 +126,7 @@ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size,
126{ 126{
127 void *ptr; 127 void *ptr;
128 int order = ima_maxorder; 128 int order = ima_maxorder;
129 gfp_t gfp_mask = __GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY; 129 gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY;
130 130
131 if (order) 131 if (order)
132 order = min(get_order(max_size), order); 132 order = min(get_order(max_size), order);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index cfe121353eec..4b4957b8df4e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -6,6 +6,7 @@ TARGETS += firmware
6TARGETS += ftrace 6TARGETS += ftrace
7TARGETS += futex 7TARGETS += futex
8TARGETS += kcmp 8TARGETS += kcmp
9TARGETS += lib
9TARGETS += membarrier 10TARGETS += membarrier
10TARGETS += memfd 11TARGETS += memfd
11TARGETS += memory-hotplug 12TARGETS += memory-hotplug
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
new file mode 100644
index 000000000000..47147b968514
--- /dev/null
+++ b/tools/testing/selftests/lib/Makefile
@@ -0,0 +1,8 @@
1# Makefile for lib/ function selftests
2
3# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
4all:
5
6TEST_PROGS := printf.sh
7
8include ../lib.mk
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
new file mode 100644
index 000000000000..4fdc70fe6980
--- /dev/null
+++ b/tools/testing/selftests/lib/printf.sh
@@ -0,0 +1,10 @@
1#!/bin/sh
2# Runs printf infrastructure using test_printf kernel module
3
4if /sbin/modprobe -q test_printf; then
5 /sbin/modprobe -q -r test_printf
6 echo "printf: ok"
7else
8 echo "printf: [FAIL]"
9 exit 1
10fi