summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst72
-rw-r--r--Documentation/blockdev/zram.txt25
-rw-r--r--Documentation/features/vm/pte_special/arch-support.txt2
-rw-r--r--Documentation/filesystems/00-INDEX4
-rw-r--r--Documentation/filesystems/autofs-mount-control.txt (renamed from Documentation/filesystems/autofs4-mount-control.txt)9
-rw-r--r--Documentation/filesystems/autofs.txt (renamed from Documentation/filesystems/autofs4.txt)10
-rw-r--r--Documentation/filesystems/automount-support.txt2
-rw-r--r--Documentation/filesystems/path-lookup.md2
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/pgtable.h2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/pte-common.h3
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/include/asm/pgtable-bits.h3
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/mm/pgalloc.c21
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/include/asm/pgtable.h2
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--drivers/block/zram/Kconfig14
-rw-r--r--drivers/block/zram/zram_drv.c165
-rw-r--r--drivers/block/zram/zram_drv.h14
-rw-r--r--fs/9p/v9fs.c29
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/Makefile1
-rw-r--r--fs/autofs/Kconfig20
-rw-r--r--fs/autofs/Makefile7
-rw-r--r--fs/autofs/autofs_i.h (renamed from fs/autofs4/autofs_i.h)92
-rw-r--r--fs/autofs/dev-ioctl.c (renamed from fs/autofs4/dev-ioctl.c)31
-rw-r--r--fs/autofs/expire.c (renamed from fs/autofs4/expire.c)133
-rw-r--r--fs/autofs/init.c (renamed from fs/autofs4/init.c)12
-rw-r--r--fs/autofs/inode.c (renamed from fs/autofs4/inode.c)54
-rw-r--r--fs/autofs/root.c (renamed from fs/autofs4/root.c)277
-rw-r--r--fs/autofs/symlink.c (renamed from fs/autofs4/symlink.c)16
-rw-r--r--fs/autofs/waitq.c (renamed from fs/autofs4/waitq.c)59
-rw-r--r--fs/autofs4/Kconfig46
-rw-r--r--fs/autofs4/Makefile4
-rw-r--r--fs/binfmt_misc.c12
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/dax.c78
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/ocfs2/dlmglue.c119
-rw-r--r--fs/ocfs2/dlmglue.h1
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/mmap.c44
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2_fs.h8
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c134
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/task_mmu.c26
-rw-r--r--fs/userfaultfd.c22
-rw-r--r--include/asm-generic/int-ll64.h19
-rw-r--r--include/linux/dax.h4
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/linux/hmm.h8
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/ksm.h17
-rw-r--r--include/linux/memcontrol.h51
-rw-r--r--include/linux/memfd.h16
-rw-r--r--include/linux/memory_hotplug.h1
-rw-r--r--include/linux/mm.h12
-rw-r--r--include/linux/mm_types.h239
-rw-r--r--include/linux/mpi.h61
-rw-r--r--include/linux/page-flags.h51
-rw-r--r--include/linux/page_counter.h26
-rw-r--r--include/linux/pfn_t.h4
-rw-r--r--include/linux/sched/mm.h4
-rw-r--r--include/linux/shmem_fs.h13
-rw-r--r--include/linux/slab_def.h7
-rw-r--r--include/linux/slub_def.h1
-rw-r--r--include/linux/types.h34
-rw-r--r--include/linux/userfaultfd_k.h6
-rw-r--r--include/uapi/linux/auto_fs.h169
-rw-r--r--include/uapi/linux/auto_fs4.h153
-rw-r--r--include/uapi/linux/kernel-page-flags.h2
-rw-r--r--kernel/crash_core.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/hung_task.c11
-rw-r--r--kernel/sys.c10
-rw-r--r--lib/bitmap.c5
-rw-r--r--lib/bucket_locks.c5
-rw-r--r--lib/idr.c10
-rw-r--r--lib/mpi/mpi-internal.h75
-rw-r--r--lib/percpu_ida.c63
-rw-r--r--lib/ucs2_string.c2
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/gup.c42
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/hugetlb.c44
-rw-r--r--mm/hugetlb_cgroup.c6
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/ksm.c11
-rw-r--r--mm/memblock.c27
-rw-r--r--mm/memcontrol.c342
-rw-r--r--mm/memfd.c345
-rw-r--r--mm/memory.c36
-rw-r--r--mm/memory_hotplug.c23
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c74
-rw-r--r--mm/page_counter.c100
-rw-r--r--mm/shmem.c382
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c112
-rw-r--r--mm/sparse.c6
-rw-r--r--mm/swap_slots.c10
-rw-r--r--mm/swap_state.c3
-rw-r--r--mm/userfaultfd.c22
-rw-r--r--mm/util.c6
-rw-r--r--mm/vmalloc.c41
-rw-r--r--mm/vmpressure.c35
-rw-r--r--mm/vmscan.c38
-rw-r--r--net/9p/client.c13
-rw-r--r--net/9p/trans_xen.c1
-rwxr-xr-xscripts/checkpatch.pl12
-rwxr-xr-xscripts/get_maintainer.pl17
-rwxr-xr-xscripts/tags.sh6
-rw-r--r--tools/testing/selftests/proc/.gitignore3
-rw-r--r--tools/testing/selftests/proc/Makefile5
-rw-r--r--tools/testing/selftests/proc/fd-001-lookup.c168
-rw-r--r--tools/testing/selftests/proc/fd-002-posix-eq.c57
-rw-r--r--tools/testing/selftests/proc/fd-003-kthread.c178
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h16
-rw-r--r--tools/testing/selftests/proc/proc.h39
-rw-r--r--tools/testing/selftests/proc/read.c17
-rw-r--r--tools/vm/page-types.c1
147 files changed, 2945 insertions, 2066 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 74cdeaed9f7a..8a2c52d5c53b 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1001,14 +1001,44 @@ PAGE_SIZE multiple when read back.
1001 The total amount of memory currently being used by the cgroup 1001 The total amount of memory currently being used by the cgroup
1002 and its descendants. 1002 and its descendants.
1003 1003
1004 memory.min
1005 A read-write single value file which exists on non-root
1006 cgroups. The default is "0".
1007
1008 Hard memory protection. If the memory usage of a cgroup
1009 is within its effective min boundary, the cgroup's memory
1010 won't be reclaimed under any conditions. If there is no
1011 unprotected reclaimable memory available, OOM killer
1012 is invoked.
1013
1014 Effective min boundary is limited by memory.min values of
1015 all ancestor cgroups. If there is memory.min overcommitment
1016 (child cgroup or cgroups are requiring more protected memory
1017 than parent will allow), then each child cgroup will get
1018 the part of parent's protection proportional to its
1019 actual memory usage below memory.min.
1020
1021 Putting more memory than generally available under this
1022 protection is discouraged and may lead to constant OOMs.
1023
1024 If a memory cgroup is not populated with processes,
1025 its memory.min is ignored.
1026
1004 memory.low 1027 memory.low
1005 A read-write single value file which exists on non-root 1028 A read-write single value file which exists on non-root
1006 cgroups. The default is "0". 1029 cgroups. The default is "0".
1007 1030
1008 Best-effort memory protection. If the memory usages of a 1031 Best-effort memory protection. If the memory usage of a
1009 cgroup and all its ancestors are below their low boundaries, 1032 cgroup is within its effective low boundary, the cgroup's
1010 the cgroup's memory won't be reclaimed unless memory can be 1033 memory won't be reclaimed unless memory can be reclaimed
1011 reclaimed from unprotected cgroups. 1034 from unprotected cgroups.
1035
1036 Effective low boundary is limited by memory.low values of
1037 all ancestor cgroups. If there is memory.low overcommitment
1038 (child cgroup or cgroups are requiring more protected memory
1039 than parent will allow), then each child cgroup will get
1040 the part of parent's protection proportional to its
1041 actual memory usage below memory.low.
1012 1042
1013 Putting more memory than generally available under this 1043 Putting more memory than generally available under this
1014 protection is discouraged. 1044 protection is discouraged.
@@ -1199,6 +1229,27 @@ PAGE_SIZE multiple when read back.
1199 Swap usage hard limit. If a cgroup's swap usage reaches this 1229 Swap usage hard limit. If a cgroup's swap usage reaches this
1200 limit, anonymous memory of the cgroup will not be swapped out. 1230 limit, anonymous memory of the cgroup will not be swapped out.
1201 1231
1232 memory.swap.events
1233 A read-only flat-keyed file which exists on non-root cgroups.
1234 The following entries are defined. Unless specified
1235 otherwise, a value change in this file generates a file
1236 modified event.
1237
1238 max
1239 The number of times the cgroup's swap usage was about
1240 to go over the max boundary and swap allocation
1241 failed.
1242
1243 fail
1244 The number of times swap allocation failed either
1245 because of running out of swap system-wide or max
1246 limit.
1247
1248 When reduced under the current usage, the existing swap
1249 entries are reclaimed gradually and the swap usage may stay
1250 higher than the limit for an extended period of time. This
1251 reduces the impact on the workload and memory management.
1252
1202 1253
1203Usage Guidelines 1254Usage Guidelines
1204~~~~~~~~~~~~~~~~ 1255~~~~~~~~~~~~~~~~
@@ -1934,17 +1985,8 @@ system performance due to overreclaim, to the point where the feature
1934becomes self-defeating. 1985becomes self-defeating.
1935 1986
1936The memory.low boundary on the other hand is a top-down allocated 1987The memory.low boundary on the other hand is a top-down allocated
1937reserve. A cgroup enjoys reclaim protection when it and all its 1988reserve. A cgroup enjoys reclaim protection when it's within its low,
1938ancestors are below their low boundaries, which makes delegation of 1989which makes delegation of subtrees possible.
1939subtrees possible. Secondly, new cgroups have no reserve per default
1940and in the common case most cgroups are eligible for the preferred
1941reclaim pass. This allows the new low boundary to be efficiently
1942implemented with just a minor addition to the generic reclaim code,
1943without the need for out-of-band data structures and reclaim passes.
1944Because the generic reclaim code considers all cgroups except for the
1945ones running low in the preferred first reclaim pass, overreclaim of
1946individual groups is eliminated as well, resulting in much better
1947overall workload performance.
1948 1990
1949The original high boundary, the hard limit, is defined as a strict 1991The original high boundary, the hard limit, is defined as a strict
1950limit that can not budge, even if the OOM killer has to be called. 1992limit that can not budge, even if the OOM killer has to be called.
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 257e65714c6a..875b2b56b87f 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -218,6 +218,7 @@ line of text and contains the following stats separated by whitespace:
218 same_pages the number of same element filled pages written to this disk. 218 same_pages the number of same element filled pages written to this disk.
219 No memory is allocated for such pages. 219 No memory is allocated for such pages.
220 pages_compacted the number of pages freed during compaction 220 pages_compacted the number of pages freed during compaction
221 huge_pages the number of incompressible pages
221 222
2229) Deactivate: 2239) Deactivate:
223 swapoff /dev/zram0 224 swapoff /dev/zram0
@@ -242,5 +243,29 @@ to backing storage rather than keeping it in memory.
242User should set up backing device via /sys/block/zramX/backing_dev 243User should set up backing device via /sys/block/zramX/backing_dev
243before disksize setting. 244before disksize setting.
244 245
246= memory tracking
247
248With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
249zram block. It could be useful to catch cold or incompressible
250pages of the process with*pagemap.
251If you enable the feature, you could see block state via
252/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
253
254 300 75.033841 .wh
255 301 63.806904 s..
256 302 63.806919 ..h
257
258First column is zram's block index.
259Second column is access time since the system was booted
260Third column is state of the block.
261(s: same page
262w: written page to backing store
263h: huge page)
264
265First line of above example says 300th block is accessed at 75.033841sec
266and the block's state is huge so it is written back to the backing
267storage. It's a debugging feature so anyone shouldn't rely on it to work
268properly.
269
245Nitin Gupta 270Nitin Gupta
246ngupta@vflare.org 271ngupta@vflare.org
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 6a608a6dcf71..a8378424bc98 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -1,6 +1,6 @@
1# 1#
2# Feature name: pte_special 2# Feature name: pte_special
3# Kconfig: __HAVE_ARCH_PTE_SPECIAL 3# Kconfig: ARCH_HAS_PTE_SPECIAL
4# description: arch supports the pte_special()/pte_mkspecial() VM APIs 4# description: arch supports the pte_special()/pte_mkspecial() VM APIs
5# 5#
6 ----------------------- 6 -----------------------
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index b7bd6c9009cc..a8bd4af7fbce 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -10,8 +10,8 @@ afs.txt
10 - info and examples for the distributed AFS (Andrew File System) fs. 10 - info and examples for the distributed AFS (Andrew File System) fs.
11affs.txt 11affs.txt
12 - info and mount options for the Amiga Fast File System. 12 - info and mount options for the Amiga Fast File System.
13autofs4-mount-control.txt 13autofs-mount-control.txt
14 - info on device control operations for autofs4 module. 14 - info on device control operations for autofs module.
15automount-support.txt 15automount-support.txt
16 - information about filesystem automount support. 16 - information about filesystem automount support.
17befs.txt 17befs.txt
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs-mount-control.txt
index e5177cb31a04..45edad6933cc 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs-mount-control.txt
@@ -1,5 +1,5 @@
1 1
2Miscellaneous Device control operations for the autofs4 kernel module 2Miscellaneous Device control operations for the autofs kernel module
3==================================================================== 3====================================================================
4 4
5The problem 5The problem
@@ -164,7 +164,7 @@ possibility for future development due to the requirements of the
164message bus architecture. 164message bus architecture.
165 165
166 166
167autofs4 Miscellaneous Device mount control interface 167autofs Miscellaneous Device mount control interface
168==================================================== 168====================================================
169 169
170The control interface is opening a device node, typically /dev/autofs. 170The control interface is opening a device node, typically /dev/autofs.
@@ -244,7 +244,7 @@ The device node ioctl operations implemented by this interface are:
244AUTOFS_DEV_IOCTL_VERSION 244AUTOFS_DEV_IOCTL_VERSION
245------------------------ 245------------------------
246 246
247Get the major and minor version of the autofs4 device ioctl kernel module 247Get the major and minor version of the autofs device ioctl kernel module
248implementation. It requires an initialized struct autofs_dev_ioctl as an 248implementation. It requires an initialized struct autofs_dev_ioctl as an
249input parameter and sets the version information in the passed in structure. 249input parameter and sets the version information in the passed in structure.
250It returns 0 on success or the error -EINVAL if a version mismatch is 250It returns 0 on success or the error -EINVAL if a version mismatch is
@@ -254,7 +254,7 @@ detected.
254AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD 254AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD
255------------------------------------------------------------------ 255------------------------------------------------------------------
256 256
257Get the major and minor version of the autofs4 protocol version understood 257Get the major and minor version of the autofs protocol version understood
258by loaded module. This call requires an initialized struct autofs_dev_ioctl 258by loaded module. This call requires an initialized struct autofs_dev_ioctl
259with the ioctlfd field set to a valid autofs mount point descriptor 259with the ioctlfd field set to a valid autofs mount point descriptor
260and sets the requested version number in version field of struct args_protover 260and sets the requested version number in version field of struct args_protover
@@ -404,4 +404,3 @@ type is also given we are looking for a particular autofs mount and if
404a match isn't found a fail is returned. If the the located path is the 404a match isn't found a fail is returned. If the the located path is the
405root of a mount 1 is returned along with the super magic of the mount 405root of a mount 1 is returned along with the super magic of the mount
406or 0 otherwise. 406or 0 otherwise.
407
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs.txt
index f10dd590f69f..373ad25852d3 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs.txt
@@ -30,15 +30,15 @@ key advantages:
30Context 30Context
31------- 31-------
32 32
33The "autofs4" filesystem module is only one part of an autofs system. 33The "autofs" filesystem module is only one part of an autofs system.
34There also needs to be a user-space program which looks up names 34There also needs to be a user-space program which looks up names
35and mounts filesystems. This will often be the "automount" program, 35and mounts filesystems. This will often be the "automount" program,
36though other tools including "systemd" can make use of "autofs4". 36though other tools including "systemd" can make use of "autofs".
37This document describes only the kernel module and the interactions 37This document describes only the kernel module and the interactions
38required with any user-space program. Subsequent text refers to this 38required with any user-space program. Subsequent text refers to this
39as the "automount daemon" or simply "the daemon". 39as the "automount daemon" or simply "the daemon".
40 40
41"autofs4" is a Linux kernel module with provides the "autofs" 41"autofs" is a Linux kernel module with provides the "autofs"
42filesystem type. Several "autofs" filesystems can be mounted and they 42filesystem type. Several "autofs" filesystems can be mounted and they
43can each be managed separately, or all managed by the same daemon. 43can each be managed separately, or all managed by the same daemon.
44 44
@@ -215,7 +215,7 @@ of expiry.
215The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to 215The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
216the `umount` system call. Unmounting with MNT_EXPIRE will fail unless 216the `umount` system call. Unmounting with MNT_EXPIRE will fail unless
217a previous attempt had been made, and the filesystem has been inactive 217a previous attempt had been made, and the filesystem has been inactive
218and untouched since that previous attempt. autofs4 does not depend on 218and untouched since that previous attempt. autofs does not depend on
219this but has its own internal tracking of whether filesystems were 219this but has its own internal tracking of whether filesystems were
220recently used. This allows individual names in the autofs directory 220recently used. This allows individual names in the autofs directory
221to expire separately. 221to expire separately.
@@ -415,7 +415,7 @@ which can be used to communicate directly with the autofs filesystem.
415It requires CAP_SYS_ADMIN for access. 415It requires CAP_SYS_ADMIN for access.
416 416
417The `ioctl`s that can be used on this device are described in a separate 417The `ioctl`s that can be used on this device are described in a separate
418document `autofs4-mount-control.txt`, and are summarized briefly here. 418document `autofs-mount-control.txt`, and are summarized briefly here.
419Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure: 419Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
420 420
421 struct autofs_dev_ioctl { 421 struct autofs_dev_ioctl {
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7eb762eb3136..b0afd3d55eaf 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -9,7 +9,7 @@ also be requested by userspace.
9IN-KERNEL AUTOMOUNTING 9IN-KERNEL AUTOMOUNTING
10====================== 10======================
11 11
12See section "Mount Traps" of Documentation/filesystems/autofs4.txt 12See section "Mount Traps" of Documentation/filesystems/autofs.txt
13 13
14Then from userspace, you can just do something like: 14Then from userspace, you can just do something like:
15 15
diff --git a/Documentation/filesystems/path-lookup.md b/Documentation/filesystems/path-lookup.md
index 1933ef734e63..e2edd45c4bc0 100644
--- a/Documentation/filesystems/path-lookup.md
+++ b/Documentation/filesystems/path-lookup.md
@@ -460,7 +460,7 @@ this retry process in the next article.
460Automount points are locations in the filesystem where an attempt to 460Automount points are locations in the filesystem where an attempt to
461lookup a name can trigger changes to how that lookup should be 461lookup a name can trigger changes to how that lookup should be
462handled, in particular by mounting a filesystem there. These are 462handled, in particular by mounting a filesystem there. These are
463covered in greater detail in autofs4.txt in the Linux documentation 463covered in greater detail in autofs.txt in the Linux documentation
464tree, but a few notes specifically related to path lookup are in order 464tree, but a few notes specifically related to path lookup are in order
465here. 465here.
466 466
diff --git a/MAINTAINERS b/MAINTAINERS
index d325d2dc7600..c9ac159fb023 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7723,11 +7723,11 @@ W: https://linuxtv.org
7723S: Maintained 7723S: Maintained
7724F: drivers/media/radio/radio-keene* 7724F: drivers/media/radio/radio-keene*
7725 7725
7726KERNEL AUTOMOUNTER v4 (AUTOFS4) 7726KERNEL AUTOMOUNTER
7727M: Ian Kent <raven@themaw.net> 7727M: Ian Kent <raven@themaw.net>
7728L: autofs@vger.kernel.org 7728L: autofs@vger.kernel.org
7729S: Maintained 7729S: Maintained
7730F: fs/autofs4/ 7730F: fs/autofs/
7731 7731
7732KERNEL BUILD + files below scripts/ (unless maintained elsewhere) 7732KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
7733M: Masahiro Yamada <yamada.masahiro@socionext.com> 7733M: Masahiro Yamada <yamada.masahiro@socionext.com>
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 89d47eac18b2..e81bcd271be7 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -48,6 +48,7 @@ config ARC
48 select HAVE_GENERIC_DMA_COHERENT 48 select HAVE_GENERIC_DMA_COHERENT
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_LZMA 50 select HAVE_KERNEL_LZMA
51 select ARCH_HAS_PTE_SPECIAL
51 52
52config MIGHT_HAVE_PCI 53config MIGHT_HAVE_PCI
53 bool 54 bool
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 08fe33830d4b..8ec5599a0957 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -320,8 +320,6 @@ PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE));
320PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); 320PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
321PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); 321PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
322 322
323#define __HAVE_ARCH_PTE_SPECIAL
324
325static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 323static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
326{ 324{
327 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); 325 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8f460bdd4be1..534563ac7f5f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,6 +8,7 @@ config ARM
8 select ARCH_HAS_DEVMEM_IS_ALLOWED 8 select ARCH_HAS_DEVMEM_IS_ALLOWED
9 select ARCH_HAS_ELF_RANDOMIZE 9 select ARCH_HAS_ELF_RANDOMIZE
10 select ARCH_HAS_FORTIFY_SOURCE 10 select ARCH_HAS_FORTIFY_SOURCE
11 select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
11 select ARCH_HAS_SET_MEMORY 12 select ARCH_HAS_SET_MEMORY
12 select ARCH_HAS_PHYS_TO_DMA 13 select ARCH_HAS_PHYS_TO_DMA
13 select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL 14 select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 2a4836087358..6d50a11d7793 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -219,7 +219,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
219 pte_val(pte) |= L_PTE_SPECIAL; 219 pte_val(pte) |= L_PTE_SPECIAL;
220 return pte; 220 return pte;
221} 221}
222#define __HAVE_ARCH_PTE_SPECIAL
223 222
224#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) 223#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
225#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) 224#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b25ed7834f6c..4759566a78cb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@ config ARM64
17 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA 17 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
18 select ARCH_HAS_KCOV 18 select ARCH_HAS_KCOV
19 select ARCH_HAS_MEMBARRIER_SYNC_CORE 19 select ARCH_HAS_MEMBARRIER_SYNC_CORE
20 select ARCH_HAS_PTE_SPECIAL
20 select ARCH_HAS_SET_MEMORY 21 select ARCH_HAS_SET_MEMORY
21 select ARCH_HAS_SG_CHAIN 22 select ARCH_HAS_SG_CHAIN
22 select ARCH_HAS_STRICT_KERNEL_RWX 23 select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7c4c8f318ba9..9f82d6b53851 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -306,8 +306,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
306#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 306#define HPAGE_MASK (~(HPAGE_SIZE - 1))
307#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 307#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
308 308
309#define __HAVE_ARCH_PTE_SPECIAL
310
311static inline pte_t pgd_pte(pgd_t pgd) 309static inline pte_t pgd_pte(pgd_t pgd)
312{ 310{
313 return __pte(pgd_val(pgd)); 311 return __pte(pgd_val(pgd));
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 076fe3094856..8f959df2de7a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,6 +135,7 @@ config PPC
135 select ARCH_HAS_GCOV_PROFILE_ALL 135 select ARCH_HAS_GCOV_PROFILE_ALL
136 select ARCH_HAS_PHYS_TO_DMA 136 select ARCH_HAS_PHYS_TO_DMA
137 select ARCH_HAS_PMEM_API if PPC64 137 select ARCH_HAS_PMEM_API if PPC64
138 select ARCH_HAS_PTE_SPECIAL
138 select ARCH_HAS_MEMBARRIER_CALLBACKS 139 select ARCH_HAS_MEMBARRIER_CALLBACKS
139 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE 140 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
140 select ARCH_HAS_SG_CHAIN 141 select ARCH_HAS_SG_CHAIN
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 42fe7c2ff2df..63cee159022b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -335,9 +335,6 @@ extern unsigned long pci_io_base;
335/* Advertise special mapping type for AGP */ 335/* Advertise special mapping type for AGP */
336#define HAVE_PAGE_AGP 336#define HAVE_PAGE_AGP
337 337
338/* Advertise support for _PAGE_SPECIAL */
339#define __HAVE_ARCH_PTE_SPECIAL
340
341#ifndef __ASSEMBLY__ 338#ifndef __ASSEMBLY__
342 339
343/* 340/*
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 050b0d775324..bef56141a549 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -208,9 +208,6 @@ static inline bool pte_user(pte_t pte)
208#define PAGE_AGP (PAGE_KERNEL_NC) 208#define PAGE_AGP (PAGE_KERNEL_NC)
209#define HAVE_PAGE_AGP 209#define HAVE_PAGE_AGP
210 210
211/* Advertise support for _PAGE_SPECIAL */
212#define __HAVE_ARCH_PTE_SPECIAL
213
214#ifndef _PAGE_READ 211#ifndef _PAGE_READ
215/* if not defined, we should not find _PAGE_WRITE too */ 212/* if not defined, we should not find _PAGE_WRITE too */
216#define _PAGE_READ 0 213#define _PAGE_READ 0
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 274bc064c41f..17f19e67993b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -42,6 +42,7 @@ config RISCV
42 select THREAD_INFO_IN_TASK 42 select THREAD_INFO_IN_TASK
43 select RISCV_TIMER 43 select RISCV_TIMER
44 select GENERIC_IRQ_MULTI_HANDLER 44 select GENERIC_IRQ_MULTI_HANDLER
45 select ARCH_HAS_PTE_SPECIAL
45 46
46config MMU 47config MMU
47 def_bool y 48 def_bool y
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index 997ddbb1d370..2fa2942be221 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -42,7 +42,4 @@
42 _PAGE_WRITE | _PAGE_EXEC | \ 42 _PAGE_WRITE | _PAGE_EXEC | \
43 _PAGE_USER | _PAGE_GLOBAL)) 43 _PAGE_USER | _PAGE_GLOBAL))
44 44
45/* Advertise support for _PAGE_SPECIAL */
46#define __HAVE_ARCH_PTE_SPECIAL
47
48#endif /* _ASM_RISCV_PGTABLE_BITS_H */ 45#endif /* _ASM_RISCV_PGTABLE_BITS_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b7deee7e738f..baed39772c84 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -65,6 +65,7 @@ config S390
65 select ARCH_HAS_GCOV_PROFILE_ALL 65 select ARCH_HAS_GCOV_PROFILE_ALL
66 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA 66 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
67 select ARCH_HAS_KCOV 67 select ARCH_HAS_KCOV
68 select ARCH_HAS_PTE_SPECIAL
68 select ARCH_HAS_SET_MEMORY 69 select ARCH_HAS_SET_MEMORY
69 select ARCH_HAS_SG_CHAIN 70 select ARCH_HAS_SG_CHAIN
70 select ARCH_HAS_STRICT_KERNEL_RWX 71 select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2d24d33bf188..9809694e1389 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -171,7 +171,6 @@ static inline int is_module_addr(void *addr)
171#define _PAGE_WRITE 0x020 /* SW pte write bit */ 171#define _PAGE_WRITE 0x020 /* SW pte write bit */
172#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ 172#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
173#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ 173#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
174#define __HAVE_ARCH_PTE_SPECIAL
175 174
176#ifdef CONFIG_MEM_SOFT_DIRTY 175#ifdef CONFIG_MEM_SOFT_DIRTY
177#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */ 176#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 562f72955956..84bd6329a88d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -190,14 +190,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
190 if (!list_empty(&mm->context.pgtable_list)) { 190 if (!list_empty(&mm->context.pgtable_list)) {
191 page = list_first_entry(&mm->context.pgtable_list, 191 page = list_first_entry(&mm->context.pgtable_list,
192 struct page, lru); 192 struct page, lru);
193 mask = atomic_read(&page->_mapcount); 193 mask = atomic_read(&page->_refcount) >> 24;
194 mask = (mask | (mask >> 4)) & 3; 194 mask = (mask | (mask >> 4)) & 3;
195 if (mask != 3) { 195 if (mask != 3) {
196 table = (unsigned long *) page_to_phys(page); 196 table = (unsigned long *) page_to_phys(page);
197 bit = mask & 1; /* =1 -> second 2K */ 197 bit = mask & 1; /* =1 -> second 2K */
198 if (bit) 198 if (bit)
199 table += PTRS_PER_PTE; 199 table += PTRS_PER_PTE;
200 atomic_xor_bits(&page->_mapcount, 1U << bit); 200 atomic_xor_bits(&page->_refcount,
201 1U << (bit + 24));
201 list_del(&page->lru); 202 list_del(&page->lru);
202 } 203 }
203 } 204 }
@@ -218,12 +219,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
218 table = (unsigned long *) page_to_phys(page); 219 table = (unsigned long *) page_to_phys(page);
219 if (mm_alloc_pgste(mm)) { 220 if (mm_alloc_pgste(mm)) {
220 /* Return 4K page table with PGSTEs */ 221 /* Return 4K page table with PGSTEs */
221 atomic_set(&page->_mapcount, 3); 222 atomic_xor_bits(&page->_refcount, 3 << 24);
222 memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); 223 memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
223 memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 224 memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
224 } else { 225 } else {
225 /* Return the first 2K fragment of the page */ 226 /* Return the first 2K fragment of the page */
226 atomic_set(&page->_mapcount, 1); 227 atomic_xor_bits(&page->_refcount, 1 << 24);
227 memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); 228 memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
228 spin_lock_bh(&mm->context.lock); 229 spin_lock_bh(&mm->context.lock);
229 list_add(&page->lru, &mm->context.pgtable_list); 230 list_add(&page->lru, &mm->context.pgtable_list);
@@ -242,7 +243,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
242 /* Free 2K page table fragment of a 4K page */ 243 /* Free 2K page table fragment of a 4K page */
243 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); 244 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
244 spin_lock_bh(&mm->context.lock); 245 spin_lock_bh(&mm->context.lock);
245 mask = atomic_xor_bits(&page->_mapcount, 1U << bit); 246 mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
247 mask >>= 24;
246 if (mask & 3) 248 if (mask & 3)
247 list_add(&page->lru, &mm->context.pgtable_list); 249 list_add(&page->lru, &mm->context.pgtable_list);
248 else 250 else
@@ -253,7 +255,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
253 } 255 }
254 256
255 pgtable_page_dtor(page); 257 pgtable_page_dtor(page);
256 atomic_set(&page->_mapcount, -1);
257 __free_page(page); 258 __free_page(page);
258} 259}
259 260
@@ -274,7 +275,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
274 } 275 }
275 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); 276 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
276 spin_lock_bh(&mm->context.lock); 277 spin_lock_bh(&mm->context.lock);
277 mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); 278 mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
279 mask >>= 24;
278 if (mask & 3) 280 if (mask & 3)
279 list_add_tail(&page->lru, &mm->context.pgtable_list); 281 list_add_tail(&page->lru, &mm->context.pgtable_list);
280 else 282 else
@@ -296,12 +298,13 @@ static void __tlb_remove_table(void *_table)
296 break; 298 break;
297 case 1: /* lower 2K of a 4K page table */ 299 case 1: /* lower 2K of a 4K page table */
298 case 2: /* higher 2K of a 4K page table */ 300 case 2: /* higher 2K of a 4K page table */
299 if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) 301 mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
302 mask >>= 24;
303 if (mask != 0)
300 break; 304 break;
301 /* fallthrough */ 305 /* fallthrough */
302 case 3: /* 4K page table with pgstes */ 306 case 3: /* 4K page table with pgstes */
303 pgtable_page_dtor(page); 307 pgtable_page_dtor(page);
304 atomic_set(&page->_mapcount, -1);
305 __free_page(page); 308 __free_page(page);
306 break; 309 break;
307 } 310 }
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index ae619d54018c..4d61a085982b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,6 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2config SUPERH 2config SUPERH
3 def_bool y 3 def_bool y
4 select ARCH_HAS_PTE_SPECIAL
4 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 5 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
5 select ARCH_MIGHT_HAVE_PC_PARPORT 6 select ARCH_MIGHT_HAVE_PC_PARPORT
6 select ARCH_NO_COHERENT_DMA_MMAP if !MMU 7 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 89c513a982fc..f6abfe2bca93 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -156,8 +156,6 @@ extern void page_table_range_init(unsigned long start, unsigned long end,
156#define HAVE_ARCH_UNMAPPED_AREA 156#define HAVE_ARCH_UNMAPPED_AREA
157#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 157#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
158 158
159#define __HAVE_ARCH_PTE_SPECIAL
160
161#include <asm-generic/pgtable.h> 159#include <asm-generic/pgtable.h>
162 160
163#endif /* __ASM_SH_PGTABLE_H */ 161#endif /* __ASM_SH_PGTABLE_H */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b42ba888217d..9a2b8877f174 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -88,6 +88,7 @@ config SPARC64
88 select ARCH_USE_QUEUED_SPINLOCKS 88 select ARCH_USE_QUEUED_SPINLOCKS
89 select GENERIC_TIME_VSYSCALL 89 select GENERIC_TIME_VSYSCALL
90 select ARCH_CLOCKSOURCE_DATA 90 select ARCH_CLOCKSOURCE_DATA
91 select ARCH_HAS_PTE_SPECIAL
91 92
92config ARCH_DEFCONFIG 93config ARCH_DEFCONFIG
93 string 94 string
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 44d6ac47e035..1393a8ac596b 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -117,9 +117,6 @@ bool kern_addr_valid(unsigned long addr);
117#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ 117#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */
118#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE 118#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE
119 119
120/* Advertise support for _PAGE_SPECIAL */
121#define __HAVE_ARCH_PTE_SPECIAL
122
123/* SUN4U pte bits... */ 120/* SUN4U pte bits... */
124#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ 121#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */
125#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ 122#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb6e3a219294..f182a4e8e5bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
60 select ARCH_HAS_KCOV if X86_64 60 select ARCH_HAS_KCOV if X86_64
61 select ARCH_HAS_MEMBARRIER_SYNC_CORE 61 select ARCH_HAS_MEMBARRIER_SYNC_CORE
62 select ARCH_HAS_PMEM_API if X86_64 62 select ARCH_HAS_PMEM_API if X86_64
63 select ARCH_HAS_PTE_SPECIAL
63 select ARCH_HAS_REFCOUNT 64 select ARCH_HAS_REFCOUNT
64 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 65 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
65 select ARCH_HAS_UACCESS_MCSAFE if X86_64 66 select ARCH_HAS_UACCESS_MCSAFE if X86_64
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 1e5a40673953..99fff853c944 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -65,7 +65,6 @@
65#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) 65#define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0))
66#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) 66#define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0))
67#endif 67#endif
68#define __HAVE_ARCH_PTE_SPECIAL
69 68
70#define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ 69#define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \
71 _PAGE_PKEY_BIT1 | \ 70 _PAGE_PKEY_BIT1 | \
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ffc8c13c50e4..938dbcd46b97 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -114,13 +114,12 @@ static inline void pgd_list_del(pgd_t *pgd)
114 114
115static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) 115static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
116{ 116{
117 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); 117 virt_to_page(pgd)->pt_mm = mm;
118 virt_to_page(pgd)->index = (pgoff_t)mm;
119} 118}
120 119
121struct mm_struct *pgd_page_get_mm(struct page *page) 120struct mm_struct *pgd_page_get_mm(struct page *page)
122{ 121{
123 return (struct mm_struct *)page->index; 122 return page->pt_mm;
124} 123}
125 124
126static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) 125static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index ac3a31d433b2..635235759a0a 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -13,7 +13,7 @@ config ZRAM
13 It has several use cases, for example: /tmp storage, use as swap 13 It has several use cases, for example: /tmp storage, use as swap
14 disks and maybe many more. 14 disks and maybe many more.
15 15
16 See zram.txt for more information. 16 See Documentation/blockdev/zram.txt for more information.
17 17
18config ZRAM_WRITEBACK 18config ZRAM_WRITEBACK
19 bool "Write back incompressible page to backing device" 19 bool "Write back incompressible page to backing device"
@@ -25,4 +25,14 @@ config ZRAM_WRITEBACK
25 For this feature, admin should set up backing device via 25 For this feature, admin should set up backing device via
26 /sys/block/zramX/backing_dev. 26 /sys/block/zramX/backing_dev.
27 27
28 See zram.txt for more infomration. 28 See Documentation/blockdev/zram.txt for more information.
29
30config ZRAM_MEMORY_TRACKING
31 bool "Track zRam block status"
32 depends on ZRAM && DEBUG_FS
33 help
34 With this feature, admin can track the state of allocated blocks
35 of zRAM. Admin could see the information via
36 /sys/kernel/debug/zram/zramX/block_state.
37
38 See Documentation/blockdev/zram.txt for more information.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0f3fadd71230..da51293e7c03 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -31,6 +31,7 @@
31#include <linux/err.h> 31#include <linux/err.h>
32#include <linux/idr.h> 32#include <linux/idr.h>
33#include <linux/sysfs.h> 33#include <linux/sysfs.h>
34#include <linux/debugfs.h>
34#include <linux/cpuhotplug.h> 35#include <linux/cpuhotplug.h>
35 36
36#include "zram_drv.h" 37#include "zram_drv.h"
@@ -52,11 +53,28 @@ static size_t huge_class_size;
52 53
53static void zram_free_page(struct zram *zram, size_t index); 54static void zram_free_page(struct zram *zram, size_t index);
54 55
56static void zram_slot_lock(struct zram *zram, u32 index)
57{
58 bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
59}
60
61static void zram_slot_unlock(struct zram *zram, u32 index)
62{
63 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
64}
65
55static inline bool init_done(struct zram *zram) 66static inline bool init_done(struct zram *zram)
56{ 67{
57 return zram->disksize; 68 return zram->disksize;
58} 69}
59 70
71static inline bool zram_allocated(struct zram *zram, u32 index)
72{
73
74 return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
75 zram->table[index].handle;
76}
77
60static inline struct zram *dev_to_zram(struct device *dev) 78static inline struct zram *dev_to_zram(struct device *dev)
61{ 79{
62 return (struct zram *)dev_to_disk(dev)->private_data; 80 return (struct zram *)dev_to_disk(dev)->private_data;
@@ -73,7 +91,7 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
73} 91}
74 92
75/* flag operations require table entry bit_spin_lock() being held */ 93/* flag operations require table entry bit_spin_lock() being held */
76static int zram_test_flag(struct zram *zram, u32 index, 94static bool zram_test_flag(struct zram *zram, u32 index,
77 enum zram_pageflags flag) 95 enum zram_pageflags flag)
78{ 96{
79 return zram->table[index].value & BIT(flag); 97 return zram->table[index].value & BIT(flag);
@@ -600,6 +618,114 @@ static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
600static void zram_wb_clear(struct zram *zram, u32 index) {} 618static void zram_wb_clear(struct zram *zram, u32 index) {}
601#endif 619#endif
602 620
621#ifdef CONFIG_ZRAM_MEMORY_TRACKING
622
623static struct dentry *zram_debugfs_root;
624
625static void zram_debugfs_create(void)
626{
627 zram_debugfs_root = debugfs_create_dir("zram", NULL);
628}
629
630static void zram_debugfs_destroy(void)
631{
632 debugfs_remove_recursive(zram_debugfs_root);
633}
634
635static void zram_accessed(struct zram *zram, u32 index)
636{
637 zram->table[index].ac_time = ktime_get_boottime();
638}
639
640static void zram_reset_access(struct zram *zram, u32 index)
641{
642 zram->table[index].ac_time = 0;
643}
644
645static ssize_t read_block_state(struct file *file, char __user *buf,
646 size_t count, loff_t *ppos)
647{
648 char *kbuf;
649 ssize_t index, written = 0;
650 struct zram *zram = file->private_data;
651 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
652 struct timespec64 ts;
653
654 kbuf = kvmalloc(count, GFP_KERNEL);
655 if (!kbuf)
656 return -ENOMEM;
657
658 down_read(&zram->init_lock);
659 if (!init_done(zram)) {
660 up_read(&zram->init_lock);
661 kvfree(kbuf);
662 return -EINVAL;
663 }
664
665 for (index = *ppos; index < nr_pages; index++) {
666 int copied;
667
668 zram_slot_lock(zram, index);
669 if (!zram_allocated(zram, index))
670 goto next;
671
672 ts = ktime_to_timespec64(zram->table[index].ac_time);
673 copied = snprintf(kbuf + written, count,
674 "%12zd %12lld.%06lu %c%c%c\n",
675 index, (s64)ts.tv_sec,
676 ts.tv_nsec / NSEC_PER_USEC,
677 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
678 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
679 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
680
681 if (count < copied) {
682 zram_slot_unlock(zram, index);
683 break;
684 }
685 written += copied;
686 count -= copied;
687next:
688 zram_slot_unlock(zram, index);
689 *ppos += 1;
690 }
691
692 up_read(&zram->init_lock);
693 if (copy_to_user(buf, kbuf, written))
694 written = -EFAULT;
695 kvfree(kbuf);
696
697 return written;
698}
699
700static const struct file_operations proc_zram_block_state_op = {
701 .open = simple_open,
702 .read = read_block_state,
703 .llseek = default_llseek,
704};
705
706static void zram_debugfs_register(struct zram *zram)
707{
708 if (!zram_debugfs_root)
709 return;
710
711 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
712 zram_debugfs_root);
713 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
714 zram, &proc_zram_block_state_op);
715}
716
717static void zram_debugfs_unregister(struct zram *zram)
718{
719 debugfs_remove_recursive(zram->debugfs_dir);
720}
721#else
722static void zram_debugfs_create(void) {};
723static void zram_debugfs_destroy(void) {};
724static void zram_accessed(struct zram *zram, u32 index) {};
725static void zram_reset_access(struct zram *zram, u32 index) {};
726static void zram_debugfs_register(struct zram *zram) {};
727static void zram_debugfs_unregister(struct zram *zram) {};
728#endif
603 729
604/* 730/*
605 * We switched to per-cpu streams and this attr is not needed anymore. 731 * We switched to per-cpu streams and this attr is not needed anymore.
@@ -719,14 +845,15 @@ static ssize_t mm_stat_show(struct device *dev,
719 max_used = atomic_long_read(&zram->stats.max_used_pages); 845 max_used = atomic_long_read(&zram->stats.max_used_pages);
720 846
721 ret = scnprintf(buf, PAGE_SIZE, 847 ret = scnprintf(buf, PAGE_SIZE,
722 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 848 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
723 orig_size << PAGE_SHIFT, 849 orig_size << PAGE_SHIFT,
724 (u64)atomic64_read(&zram->stats.compr_data_size), 850 (u64)atomic64_read(&zram->stats.compr_data_size),
725 mem_used << PAGE_SHIFT, 851 mem_used << PAGE_SHIFT,
726 zram->limit_pages << PAGE_SHIFT, 852 zram->limit_pages << PAGE_SHIFT,
727 max_used << PAGE_SHIFT, 853 max_used << PAGE_SHIFT,
728 (u64)atomic64_read(&zram->stats.same_pages), 854 (u64)atomic64_read(&zram->stats.same_pages),
729 pool_stats.pages_compacted); 855 pool_stats.pages_compacted,
856 (u64)atomic64_read(&zram->stats.huge_pages));
730 up_read(&zram->init_lock); 857 up_read(&zram->init_lock);
731 858
732 return ret; 859 return ret;
@@ -753,16 +880,6 @@ static DEVICE_ATTR_RO(io_stat);
753static DEVICE_ATTR_RO(mm_stat); 880static DEVICE_ATTR_RO(mm_stat);
754static DEVICE_ATTR_RO(debug_stat); 881static DEVICE_ATTR_RO(debug_stat);
755 882
756static void zram_slot_lock(struct zram *zram, u32 index)
757{
758 bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value);
759}
760
761static void zram_slot_unlock(struct zram *zram, u32 index)
762{
763 bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value);
764}
765
766static void zram_meta_free(struct zram *zram, u64 disksize) 883static void zram_meta_free(struct zram *zram, u64 disksize)
767{ 884{
768 size_t num_pages = disksize >> PAGE_SHIFT; 885 size_t num_pages = disksize >> PAGE_SHIFT;
@@ -805,6 +922,13 @@ static void zram_free_page(struct zram *zram, size_t index)
805{ 922{
806 unsigned long handle; 923 unsigned long handle;
807 924
925 zram_reset_access(zram, index);
926
927 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
928 zram_clear_flag(zram, index, ZRAM_HUGE);
929 atomic64_dec(&zram->stats.huge_pages);
930 }
931
808 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { 932 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
809 zram_wb_clear(zram, index); 933 zram_wb_clear(zram, index);
810 atomic64_dec(&zram->stats.pages_stored); 934 atomic64_dec(&zram->stats.pages_stored);
@@ -973,6 +1097,7 @@ compress_again:
973 } 1097 }
974 1098
975 if (unlikely(comp_len >= huge_class_size)) { 1099 if (unlikely(comp_len >= huge_class_size)) {
1100 comp_len = PAGE_SIZE;
976 if (zram_wb_enabled(zram) && allow_wb) { 1101 if (zram_wb_enabled(zram) && allow_wb) {
977 zcomp_stream_put(zram->comp); 1102 zcomp_stream_put(zram->comp);
978 ret = write_to_bdev(zram, bvec, index, bio, &element); 1103 ret = write_to_bdev(zram, bvec, index, bio, &element);
@@ -984,7 +1109,6 @@ compress_again:
984 allow_wb = false; 1109 allow_wb = false;
985 goto compress_again; 1110 goto compress_again;
986 } 1111 }
987 comp_len = PAGE_SIZE;
988 } 1112 }
989 1113
990 /* 1114 /*
@@ -1046,6 +1170,11 @@ out:
1046 zram_slot_lock(zram, index); 1170 zram_slot_lock(zram, index);
1047 zram_free_page(zram, index); 1171 zram_free_page(zram, index);
1048 1172
1173 if (comp_len == PAGE_SIZE) {
1174 zram_set_flag(zram, index, ZRAM_HUGE);
1175 atomic64_inc(&zram->stats.huge_pages);
1176 }
1177
1049 if (flags) { 1178 if (flags) {
1050 zram_set_flag(zram, index, flags); 1179 zram_set_flag(zram, index, flags);
1051 zram_set_element(zram, index, element); 1180 zram_set_element(zram, index, element);
@@ -1166,6 +1295,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1166 1295
1167 generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); 1296 generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
1168 1297
1298 zram_slot_lock(zram, index);
1299 zram_accessed(zram, index);
1300 zram_slot_unlock(zram, index);
1301
1169 if (unlikely(ret < 0)) { 1302 if (unlikely(ret < 0)) {
1170 if (!is_write) 1303 if (!is_write)
1171 atomic64_inc(&zram->stats.failed_reads); 1304 atomic64_inc(&zram->stats.failed_reads);
@@ -1577,6 +1710,7 @@ static int zram_add(void)
1577 } 1710 }
1578 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1711 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1579 1712
1713 zram_debugfs_register(zram);
1580 pr_info("Added device: %s\n", zram->disk->disk_name); 1714 pr_info("Added device: %s\n", zram->disk->disk_name);
1581 return device_id; 1715 return device_id;
1582 1716
@@ -1610,6 +1744,7 @@ static int zram_remove(struct zram *zram)
1610 zram->claim = true; 1744 zram->claim = true;
1611 mutex_unlock(&bdev->bd_mutex); 1745 mutex_unlock(&bdev->bd_mutex);
1612 1746
1747 zram_debugfs_unregister(zram);
1613 /* 1748 /*
1614 * Remove sysfs first, so no one will perform a disksize 1749 * Remove sysfs first, so no one will perform a disksize
1615 * store while we destroy the devices. This also helps during 1750 * store while we destroy the devices. This also helps during
@@ -1712,6 +1847,7 @@ static void destroy_devices(void)
1712{ 1847{
1713 class_unregister(&zram_control_class); 1848 class_unregister(&zram_control_class);
1714 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1849 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1850 zram_debugfs_destroy();
1715 idr_destroy(&zram_index_idr); 1851 idr_destroy(&zram_index_idr);
1716 unregister_blkdev(zram_major, "zram"); 1852 unregister_blkdev(zram_major, "zram");
1717 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1853 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
@@ -1733,6 +1869,7 @@ static int __init zram_init(void)
1733 return ret; 1869 return ret;
1734 } 1870 }
1735 1871
1872 zram_debugfs_create();
1736 zram_major = register_blkdev(0, "zram"); 1873 zram_major = register_blkdev(0, "zram");
1737 if (zram_major <= 0) { 1874 if (zram_major <= 0) {
1738 pr_err("Unable to get major number\n"); 1875 pr_err("Unable to get major number\n");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 008861220723..72c8584b6dff 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,10 +43,11 @@
43 43
44/* Flags for zram pages (table[page_no].value) */ 44/* Flags for zram pages (table[page_no].value) */
45enum zram_pageflags { 45enum zram_pageflags {
46 /* Page consists the same element */ 46 /* zram slot is locked */
47 ZRAM_SAME = ZRAM_FLAG_SHIFT, 47 ZRAM_LOCK = ZRAM_FLAG_SHIFT,
48 ZRAM_ACCESS, /* page is now accessed */ 48 ZRAM_SAME, /* Page consists the same element */
49 ZRAM_WB, /* page is stored on backing_device */ 49 ZRAM_WB, /* page is stored on backing_device */
50 ZRAM_HUGE, /* Incompressible page */
50 51
51 __NR_ZRAM_PAGEFLAGS, 52 __NR_ZRAM_PAGEFLAGS,
52}; 53};
@@ -60,6 +61,9 @@ struct zram_table_entry {
60 unsigned long element; 61 unsigned long element;
61 }; 62 };
62 unsigned long value; 63 unsigned long value;
64#ifdef CONFIG_ZRAM_MEMORY_TRACKING
65 ktime_t ac_time;
66#endif
63}; 67};
64 68
65struct zram_stats { 69struct zram_stats {
@@ -71,6 +75,7 @@ struct zram_stats {
71 atomic64_t invalid_io; /* non-page-aligned I/O requests */ 75 atomic64_t invalid_io; /* non-page-aligned I/O requests */
72 atomic64_t notify_free; /* no. of swap slot free notifications */ 76 atomic64_t notify_free; /* no. of swap slot free notifications */
73 atomic64_t same_pages; /* no. of same element filled pages */ 77 atomic64_t same_pages; /* no. of same element filled pages */
78 atomic64_t huge_pages; /* no. of huge pages */
74 atomic64_t pages_stored; /* no. of pages currently stored */ 79 atomic64_t pages_stored; /* no. of pages currently stored */
75 atomic_long_t max_used_pages; /* no. of maximum pages stored */ 80 atomic_long_t max_used_pages; /* no. of maximum pages stored */
76 atomic64_t writestall; /* no. of write slow paths */ 81 atomic64_t writestall; /* no. of write slow paths */
@@ -107,5 +112,8 @@ struct zram {
107 unsigned long nr_pages; 112 unsigned long nr_pages;
108 spinlock_t bitmap_lock; 113 spinlock_t bitmap_lock;
109#endif 114#endif
115#ifdef CONFIG_ZRAM_MEMORY_TRACKING
116 struct dentry *debugfs_dir;
117#endif
110}; 118};
111#endif 119#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index e622f0f10502..0429c8ee58f1 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -210,12 +210,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
210 p9_debug(P9_DEBUG_ERROR, 210 p9_debug(P9_DEBUG_ERROR,
211 "integer field, but no integer?\n"); 211 "integer field, but no integer?\n");
212 ret = r; 212 ret = r;
213 continue; 213 } else {
214 } 214 v9ses->debug = option;
215 v9ses->debug = option;
216#ifdef CONFIG_NET_9P_DEBUG 215#ifdef CONFIG_NET_9P_DEBUG
217 p9_debug_level = option; 216 p9_debug_level = option;
218#endif 217#endif
218 }
219 break; 219 break;
220 220
221 case Opt_dfltuid: 221 case Opt_dfltuid:
@@ -231,7 +231,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
231 p9_debug(P9_DEBUG_ERROR, 231 p9_debug(P9_DEBUG_ERROR,
232 "uid field, but not a uid?\n"); 232 "uid field, but not a uid?\n");
233 ret = -EINVAL; 233 ret = -EINVAL;
234 continue;
235 } 234 }
236 break; 235 break;
237 case Opt_dfltgid: 236 case Opt_dfltgid:
@@ -247,7 +246,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
247 p9_debug(P9_DEBUG_ERROR, 246 p9_debug(P9_DEBUG_ERROR,
248 "gid field, but not a gid?\n"); 247 "gid field, but not a gid?\n");
249 ret = -EINVAL; 248 ret = -EINVAL;
250 continue;
251 } 249 }
252 break; 250 break;
253 case Opt_afid: 251 case Opt_afid:
@@ -256,9 +254,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
256 p9_debug(P9_DEBUG_ERROR, 254 p9_debug(P9_DEBUG_ERROR,
257 "integer field, but no integer?\n"); 255 "integer field, but no integer?\n");
258 ret = r; 256 ret = r;
259 continue; 257 } else {
258 v9ses->afid = option;
260 } 259 }
261 v9ses->afid = option;
262 break; 260 break;
263 case Opt_uname: 261 case Opt_uname:
264 kfree(v9ses->uname); 262 kfree(v9ses->uname);
@@ -306,13 +304,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
306 "problem allocating copy of cache arg\n"); 304 "problem allocating copy of cache arg\n");
307 goto free_and_return; 305 goto free_and_return;
308 } 306 }
309 ret = get_cache_mode(s); 307 r = get_cache_mode(s);
310 if (ret == -EINVAL) { 308 if (r < 0)
311 kfree(s); 309 ret = r;
312 goto free_and_return; 310 else
313 } 311 v9ses->cache = r;
314 312
315 v9ses->cache = ret;
316 kfree(s); 313 kfree(s);
317 break; 314 break;
318 315
@@ -341,14 +338,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
341 pr_info("Unknown access argument %s\n", 338 pr_info("Unknown access argument %s\n",
342 s); 339 s);
343 kfree(s); 340 kfree(s);
344 goto free_and_return; 341 continue;
345 } 342 }
346 v9ses->uid = make_kuid(current_user_ns(), uid); 343 v9ses->uid = make_kuid(current_user_ns(), uid);
347 if (!uid_valid(v9ses->uid)) { 344 if (!uid_valid(v9ses->uid)) {
348 ret = -EINVAL; 345 ret = -EINVAL;
349 pr_info("Uknown uid %s\n", s); 346 pr_info("Uknown uid %s\n", s);
350 kfree(s);
351 goto free_and_return;
352 } 347 }
353 } 348 }
354 349
diff --git a/fs/Kconfig b/fs/Kconfig
index ac4ac908f001..40cdae75e3b4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -108,6 +108,7 @@ source "fs/notify/Kconfig"
108 108
109source "fs/quota/Kconfig" 109source "fs/quota/Kconfig"
110 110
111source "fs/autofs/Kconfig"
111source "fs/autofs4/Kconfig" 112source "fs/autofs4/Kconfig"
112source "fs/fuse/Kconfig" 113source "fs/fuse/Kconfig"
113source "fs/overlayfs/Kconfig" 114source "fs/overlayfs/Kconfig"
@@ -203,6 +204,9 @@ config HUGETLBFS
203config HUGETLB_PAGE 204config HUGETLB_PAGE
204 def_bool HUGETLBFS 205 def_bool HUGETLBFS
205 206
207config MEMFD_CREATE
208 def_bool TMPFS || HUGETLBFS
209
206config ARCH_HAS_GIGANTIC_PAGE 210config ARCH_HAS_GIGANTIC_PAGE
207 bool 211 bool
208 212
diff --git a/fs/Makefile b/fs/Makefile
index c9375fd2c8c4..2e005525cc19 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_AFFS_FS) += affs/
102obj-$(CONFIG_ROMFS_FS) += romfs/ 102obj-$(CONFIG_ROMFS_FS) += romfs/
103obj-$(CONFIG_QNX4FS_FS) += qnx4/ 103obj-$(CONFIG_QNX4FS_FS) += qnx4/
104obj-$(CONFIG_QNX6FS_FS) += qnx6/ 104obj-$(CONFIG_QNX6FS_FS) += qnx6/
105obj-$(CONFIG_AUTOFS_FS) += autofs/
105obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 106obj-$(CONFIG_AUTOFS4_FS) += autofs4/
106obj-$(CONFIG_ADFS_FS) += adfs/ 107obj-$(CONFIG_ADFS_FS) += adfs/
107obj-$(CONFIG_FUSE_FS) += fuse/ 108obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
new file mode 100644
index 000000000000..6a2064eb3b27
--- /dev/null
+++ b/fs/autofs/Kconfig
@@ -0,0 +1,20 @@
1config AUTOFS_FS
2 tristate "Kernel automounter support (supports v3, v4 and v5)"
3 default n
4 help
5 The automounter is a tool to automatically mount remote file systems
6 on demand. This implementation is partially kernel-based to reduce
7 overhead in the already-mounted case; this is unlike the BSD
8 automounter (amd), which is a pure user space daemon.
9
10 To use the automounter you need the user-space tools from
11 <https://www.kernel.org/pub/linux/daemons/autofs/>; you also want
12 to answer Y to "NFS file system support", below.
13
14 To compile this support as a module, choose M here: the module will be
15 called autofs.
16
17 If you are not a part of a fairly large, distributed network or
18 don't have a laptop which needs to dynamically reconfigure to the
19 local network, you probably do not need an automounter, and can say
20 N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
new file mode 100644
index 000000000000..43fedde15c26
--- /dev/null
+++ b/fs/autofs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux autofs-filesystem routines.
3#
4
5obj-$(CONFIG_AUTOFS_FS) += autofs.o
6
7autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs/autofs_i.h
index 4737615f0eaa..9400a9f6318a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -9,7 +9,7 @@
9 9
10/* Internal header file for autofs */ 10/* Internal header file for autofs */
11 11
12#include <linux/auto_fs4.h> 12#include <linux/auto_fs.h>
13#include <linux/auto_dev-ioctl.h> 13#include <linux/auto_dev-ioctl.h>
14 14
15#include <linux/kernel.h> 15#include <linux/kernel.h>
@@ -25,7 +25,7 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/completion.h> 27#include <linux/completion.h>
28#include <asm/current.h> 28#include <linux/file.h>
29 29
30/* This is the range of ioctl() numbers we claim as ours */ 30/* This is the range of ioctl() numbers we claim as ours */
31#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY 31#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -122,44 +122,44 @@ struct autofs_sb_info {
122 struct rcu_head rcu; 122 struct rcu_head rcu;
123}; 123};
124 124
125static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 125static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
126{ 126{
127 return (struct autofs_sb_info *)(sb->s_fs_info); 127 return (struct autofs_sb_info *)(sb->s_fs_info);
128} 128}
129 129
130static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) 130static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
131{ 131{
132 return (struct autofs_info *)(dentry->d_fsdata); 132 return (struct autofs_info *)(dentry->d_fsdata);
133} 133}
134 134
135/* autofs4_oz_mode(): do we see the man behind the curtain? (The 135/* autofs_oz_mode(): do we see the man behind the curtain? (The
136 * processes which do manipulations for us in user space sees the raw 136 * processes which do manipulations for us in user space sees the raw
137 * filesystem without "magic".) 137 * filesystem without "magic".)
138 */ 138 */
139static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) 139static inline int autofs_oz_mode(struct autofs_sb_info *sbi)
140{ 140{
141 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; 141 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
142} 142}
143 143
144struct inode *autofs4_get_inode(struct super_block *, umode_t); 144struct inode *autofs_get_inode(struct super_block *, umode_t);
145void autofs4_free_ino(struct autofs_info *); 145void autofs_free_ino(struct autofs_info *);
146 146
147/* Expiration */ 147/* Expiration */
148int is_autofs4_dentry(struct dentry *); 148int is_autofs_dentry(struct dentry *);
149int autofs4_expire_wait(const struct path *path, int rcu_walk); 149int autofs_expire_wait(const struct path *path, int rcu_walk);
150int autofs4_expire_run(struct super_block *, struct vfsmount *, 150int autofs_expire_run(struct super_block *, struct vfsmount *,
151 struct autofs_sb_info *, 151 struct autofs_sb_info *,
152 struct autofs_packet_expire __user *); 152 struct autofs_packet_expire __user *);
153int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, 153int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
154 struct autofs_sb_info *sbi, int when); 154 struct autofs_sb_info *sbi, int when);
155int autofs4_expire_multi(struct super_block *, struct vfsmount *, 155int autofs_expire_multi(struct super_block *, struct vfsmount *,
156 struct autofs_sb_info *, int __user *); 156 struct autofs_sb_info *, int __user *);
157struct dentry *autofs4_expire_direct(struct super_block *sb, 157struct dentry *autofs_expire_direct(struct super_block *sb,
158 struct vfsmount *mnt, 158 struct vfsmount *mnt,
159 struct autofs_sb_info *sbi, int how); 159 struct autofs_sb_info *sbi, int how);
160struct dentry *autofs4_expire_indirect(struct super_block *sb, 160struct dentry *autofs_expire_indirect(struct super_block *sb,
161 struct vfsmount *mnt, 161 struct vfsmount *mnt,
162 struct autofs_sb_info *sbi, int how); 162 struct autofs_sb_info *sbi, int how);
163 163
164/* Device node initialization */ 164/* Device node initialization */
165 165
@@ -168,11 +168,11 @@ void autofs_dev_ioctl_exit(void);
168 168
169/* Operations structures */ 169/* Operations structures */
170 170
171extern const struct inode_operations autofs4_symlink_inode_operations; 171extern const struct inode_operations autofs_symlink_inode_operations;
172extern const struct inode_operations autofs4_dir_inode_operations; 172extern const struct inode_operations autofs_dir_inode_operations;
173extern const struct file_operations autofs4_dir_operations; 173extern const struct file_operations autofs_dir_operations;
174extern const struct file_operations autofs4_root_operations; 174extern const struct file_operations autofs_root_operations;
175extern const struct dentry_operations autofs4_dentry_operations; 175extern const struct dentry_operations autofs_dentry_operations;
176 176
177/* VFS automount flags management functions */ 177/* VFS automount flags management functions */
178static inline void __managed_dentry_set_managed(struct dentry *dentry) 178static inline void __managed_dentry_set_managed(struct dentry *dentry)
@@ -201,9 +201,9 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry)
201 201
202/* Initializing function */ 202/* Initializing function */
203 203
204int autofs4_fill_super(struct super_block *, void *, int); 204int autofs_fill_super(struct super_block *, void *, int);
205struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); 205struct autofs_info *autofs_new_ino(struct autofs_sb_info *);
206void autofs4_clean_ino(struct autofs_info *); 206void autofs_clean_ino(struct autofs_info *);
207 207
208static inline int autofs_prepare_pipe(struct file *pipe) 208static inline int autofs_prepare_pipe(struct file *pipe)
209{ 209{
@@ -218,25 +218,25 @@ static inline int autofs_prepare_pipe(struct file *pipe)
218 218
219/* Queue management functions */ 219/* Queue management functions */
220 220
221int autofs4_wait(struct autofs_sb_info *, 221int autofs_wait(struct autofs_sb_info *,
222 const struct path *, enum autofs_notify); 222 const struct path *, enum autofs_notify);
223int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); 223int autofs_wait_release(struct autofs_sb_info *, autofs_wqt_t, int);
224void autofs4_catatonic_mode(struct autofs_sb_info *); 224void autofs_catatonic_mode(struct autofs_sb_info *);
225 225
226static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) 226static inline u32 autofs_get_dev(struct autofs_sb_info *sbi)
227{ 227{
228 return new_encode_dev(sbi->sb->s_dev); 228 return new_encode_dev(sbi->sb->s_dev);
229} 229}
230 230
231static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) 231static inline u64 autofs_get_ino(struct autofs_sb_info *sbi)
232{ 232{
233 return d_inode(sbi->sb->s_root)->i_ino; 233 return d_inode(sbi->sb->s_root)->i_ino;
234} 234}
235 235
236static inline void __autofs4_add_expiring(struct dentry *dentry) 236static inline void __autofs_add_expiring(struct dentry *dentry)
237{ 237{
238 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 238 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
239 struct autofs_info *ino = autofs4_dentry_ino(dentry); 239 struct autofs_info *ino = autofs_dentry_ino(dentry);
240 240
241 if (ino) { 241 if (ino) {
242 if (list_empty(&ino->expiring)) 242 if (list_empty(&ino->expiring))
@@ -244,10 +244,10 @@ static inline void __autofs4_add_expiring(struct dentry *dentry)
244 } 244 }
245} 245}
246 246
247static inline void autofs4_add_expiring(struct dentry *dentry) 247static inline void autofs_add_expiring(struct dentry *dentry)
248{ 248{
249 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 249 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
250 struct autofs_info *ino = autofs4_dentry_ino(dentry); 250 struct autofs_info *ino = autofs_dentry_ino(dentry);
251 251
252 if (ino) { 252 if (ino) {
253 spin_lock(&sbi->lookup_lock); 253 spin_lock(&sbi->lookup_lock);
@@ -257,10 +257,10 @@ static inline void autofs4_add_expiring(struct dentry *dentry)
257 } 257 }
258} 258}
259 259
260static inline void autofs4_del_expiring(struct dentry *dentry) 260static inline void autofs_del_expiring(struct dentry *dentry)
261{ 261{
262 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 262 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
263 struct autofs_info *ino = autofs4_dentry_ino(dentry); 263 struct autofs_info *ino = autofs_dentry_ino(dentry);
264 264
265 if (ino) { 265 if (ino) {
266 spin_lock(&sbi->lookup_lock); 266 spin_lock(&sbi->lookup_lock);
@@ -270,4 +270,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry)
270 } 270 }
271} 271}
272 272
273void autofs4_kill_sb(struct super_block *); 273void autofs_kill_sb(struct super_block *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 26f6b4f41ce6..ea4ca1445ab7 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -7,23 +7,10 @@
7 * option, any later version, incorporated herein by reference. 7 * option, any later version, incorporated herein by reference.
8 */ 8 */
9 9
10#include <linux/module.h>
11#include <linux/vmalloc.h>
12#include <linux/miscdevice.h> 10#include <linux/miscdevice.h>
13#include <linux/init.h>
14#include <linux/wait.h>
15#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/file.h>
18#include <linux/fdtable.h>
19#include <linux/sched.h>
20#include <linux/cred.h>
21#include <linux/compat.h> 11#include <linux/compat.h>
22#include <linux/syscalls.h> 12#include <linux/syscalls.h>
23#include <linux/magic.h> 13#include <linux/magic.h>
24#include <linux/dcache.h>
25#include <linux/uaccess.h>
26#include <linux/slab.h>
27 14
28#include "autofs_i.h" 15#include "autofs_i.h"
29 16
@@ -166,7 +153,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
166 153
167 if (f) { 154 if (f) {
168 inode = file_inode(f); 155 inode = file_inode(f);
169 sbi = autofs4_sbi(inode->i_sb); 156 sbi = autofs_sbi(inode->i_sb);
170 } 157 }
171 return sbi; 158 return sbi;
172} 159}
@@ -236,7 +223,7 @@ static int test_by_dev(const struct path *path, void *p)
236 223
237static int test_by_type(const struct path *path, void *p) 224static int test_by_type(const struct path *path, void *p)
238{ 225{
239 struct autofs_info *ino = autofs4_dentry_ino(path->dentry); 226 struct autofs_info *ino = autofs_dentry_ino(path->dentry);
240 227
241 return ino && ino->sbi->type & *(unsigned *)p; 228 return ino && ino->sbi->type & *(unsigned *)p;
242} 229}
@@ -324,7 +311,7 @@ static int autofs_dev_ioctl_ready(struct file *fp,
324 autofs_wqt_t token; 311 autofs_wqt_t token;
325 312
326 token = (autofs_wqt_t) param->ready.token; 313 token = (autofs_wqt_t) param->ready.token;
327 return autofs4_wait_release(sbi, token, 0); 314 return autofs_wait_release(sbi, token, 0);
328} 315}
329 316
330/* 317/*
@@ -340,7 +327,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
340 327
341 token = (autofs_wqt_t) param->fail.token; 328 token = (autofs_wqt_t) param->fail.token;
342 status = param->fail.status < 0 ? param->fail.status : -ENOENT; 329 status = param->fail.status < 0 ? param->fail.status : -ENOENT;
343 return autofs4_wait_release(sbi, token, status); 330 return autofs_wait_release(sbi, token, status);
344} 331}
345 332
346/* 333/*
@@ -412,7 +399,7 @@ static int autofs_dev_ioctl_catatonic(struct file *fp,
412 struct autofs_sb_info *sbi, 399 struct autofs_sb_info *sbi,
413 struct autofs_dev_ioctl *param) 400 struct autofs_dev_ioctl *param)
414{ 401{
415 autofs4_catatonic_mode(sbi); 402 autofs_catatonic_mode(sbi);
416 return 0; 403 return 0;
417} 404}
418 405
@@ -459,10 +446,10 @@ static int autofs_dev_ioctl_requester(struct file *fp,
459 if (err) 446 if (err)
460 goto out; 447 goto out;
461 448
462 ino = autofs4_dentry_ino(path.dentry); 449 ino = autofs_dentry_ino(path.dentry);
463 if (ino) { 450 if (ino) {
464 err = 0; 451 err = 0;
465 autofs4_expire_wait(&path, 0); 452 autofs_expire_wait(&path, 0);
466 spin_lock(&sbi->fs_lock); 453 spin_lock(&sbi->fs_lock);
467 param->requester.uid = 454 param->requester.uid =
468 from_kuid_munged(current_user_ns(), ino->uid); 455 from_kuid_munged(current_user_ns(), ino->uid);
@@ -489,7 +476,7 @@ static int autofs_dev_ioctl_expire(struct file *fp,
489 how = param->expire.how; 476 how = param->expire.how;
490 mnt = fp->f_path.mnt; 477 mnt = fp->f_path.mnt;
491 478
492 return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how); 479 return autofs_do_expire_multi(sbi->sb, mnt, sbi, how);
493} 480}
494 481
495/* Check if autofs mount point is in use */ 482/* Check if autofs mount point is in use */
@@ -686,7 +673,7 @@ static int _autofs_dev_ioctl(unsigned int command,
686 * Admin needs to be able to set the mount catatonic in 673 * Admin needs to be able to set the mount catatonic in
687 * order to be able to perform the re-open. 674 * order to be able to perform the re-open.
688 */ 675 */
689 if (!autofs4_oz_mode(sbi) && 676 if (!autofs_oz_mode(sbi) &&
690 cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { 677 cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) {
691 err = -EACCES; 678 err = -EACCES;
692 fput(fp); 679 fput(fp);
diff --git a/fs/autofs4/expire.c b/fs/autofs/expire.c
index 57725d4a8c59..b332d3f6e730 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs/expire.c
@@ -13,10 +13,10 @@
13static unsigned long now; 13static unsigned long now;
14 14
15/* Check if a dentry can be expired */ 15/* Check if a dentry can be expired */
16static inline int autofs4_can_expire(struct dentry *dentry, 16static inline int autofs_can_expire(struct dentry *dentry,
17 unsigned long timeout, int do_now) 17 unsigned long timeout, int do_now)
18{ 18{
19 struct autofs_info *ino = autofs4_dentry_ino(dentry); 19 struct autofs_info *ino = autofs_dentry_ino(dentry);
20 20
21 /* dentry in the process of being deleted */ 21 /* dentry in the process of being deleted */
22 if (ino == NULL) 22 if (ino == NULL)
@@ -31,7 +31,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
31} 31}
32 32
33/* Check a mount point for busyness */ 33/* Check a mount point for busyness */
34static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) 34static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
35{ 35{
36 struct dentry *top = dentry; 36 struct dentry *top = dentry;
37 struct path path = {.mnt = mnt, .dentry = dentry}; 37 struct path path = {.mnt = mnt, .dentry = dentry};
@@ -44,8 +44,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
44 if (!follow_down_one(&path)) 44 if (!follow_down_one(&path))
45 goto done; 45 goto done;
46 46
47 if (is_autofs4_dentry(path.dentry)) { 47 if (is_autofs_dentry(path.dentry)) {
48 struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); 48 struct autofs_sb_info *sbi = autofs_sbi(path.dentry->d_sb);
49 49
50 /* This is an autofs submount, we can't expire it */ 50 /* This is an autofs submount, we can't expire it */
51 if (autofs_type_indirect(sbi->type)) 51 if (autofs_type_indirect(sbi->type))
@@ -56,7 +56,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
56 if (!may_umount_tree(path.mnt)) { 56 if (!may_umount_tree(path.mnt)) {
57 struct autofs_info *ino; 57 struct autofs_info *ino;
58 58
59 ino = autofs4_dentry_ino(top); 59 ino = autofs_dentry_ino(top);
60 ino->last_used = jiffies; 60 ino->last_used = jiffies;
61 goto done; 61 goto done;
62 } 62 }
@@ -74,7 +74,7 @@ done:
74static struct dentry *get_next_positive_subdir(struct dentry *prev, 74static struct dentry *get_next_positive_subdir(struct dentry *prev,
75 struct dentry *root) 75 struct dentry *root)
76{ 76{
77 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 77 struct autofs_sb_info *sbi = autofs_sbi(root->d_sb);
78 struct list_head *next; 78 struct list_head *next;
79 struct dentry *q; 79 struct dentry *q;
80 80
@@ -121,7 +121,7 @@ cont:
121static struct dentry *get_next_positive_dentry(struct dentry *prev, 121static struct dentry *get_next_positive_dentry(struct dentry *prev,
122 struct dentry *root) 122 struct dentry *root)
123{ 123{
124 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 124 struct autofs_sb_info *sbi = autofs_sbi(root->d_sb);
125 struct list_head *next; 125 struct list_head *next;
126 struct dentry *p, *ret; 126 struct dentry *p, *ret;
127 127
@@ -184,10 +184,10 @@ again:
184 * The tree is not busy iff no mountpoints are busy and there are no 184 * The tree is not busy iff no mountpoints are busy and there are no
185 * autofs submounts. 185 * autofs submounts.
186 */ 186 */
187static int autofs4_direct_busy(struct vfsmount *mnt, 187static int autofs_direct_busy(struct vfsmount *mnt,
188 struct dentry *top, 188 struct dentry *top,
189 unsigned long timeout, 189 unsigned long timeout,
190 int do_now) 190 int do_now)
191{ 191{
192 pr_debug("top %p %pd\n", top, top); 192 pr_debug("top %p %pd\n", top, top);
193 193
@@ -195,14 +195,14 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
195 if (!may_umount_tree(mnt)) { 195 if (!may_umount_tree(mnt)) {
196 struct autofs_info *ino; 196 struct autofs_info *ino;
197 197
198 ino = autofs4_dentry_ino(top); 198 ino = autofs_dentry_ino(top);
199 if (ino) 199 if (ino)
200 ino->last_used = jiffies; 200 ino->last_used = jiffies;
201 return 1; 201 return 1;
202 } 202 }
203 203
204 /* Timeout of a direct mount is determined by its top dentry */ 204 /* Timeout of a direct mount is determined by its top dentry */
205 if (!autofs4_can_expire(top, timeout, do_now)) 205 if (!autofs_can_expire(top, timeout, do_now))
206 return 1; 206 return 1;
207 207
208 return 0; 208 return 0;
@@ -212,12 +212,12 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
212 * Check a directory tree of mount points for busyness 212 * Check a directory tree of mount points for busyness
213 * The tree is not busy iff no mountpoints are busy 213 * The tree is not busy iff no mountpoints are busy
214 */ 214 */
215static int autofs4_tree_busy(struct vfsmount *mnt, 215static int autofs_tree_busy(struct vfsmount *mnt,
216 struct dentry *top, 216 struct dentry *top,
217 unsigned long timeout, 217 unsigned long timeout,
218 int do_now) 218 int do_now)
219{ 219{
220 struct autofs_info *top_ino = autofs4_dentry_ino(top); 220 struct autofs_info *top_ino = autofs_dentry_ino(top);
221 struct dentry *p; 221 struct dentry *p;
222 222
223 pr_debug("top %p %pd\n", top, top); 223 pr_debug("top %p %pd\n", top, top);
@@ -237,13 +237,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
237 * If the fs is busy update the expiry counter. 237 * If the fs is busy update the expiry counter.
238 */ 238 */
239 if (d_mountpoint(p)) { 239 if (d_mountpoint(p)) {
240 if (autofs4_mount_busy(mnt, p)) { 240 if (autofs_mount_busy(mnt, p)) {
241 top_ino->last_used = jiffies; 241 top_ino->last_used = jiffies;
242 dput(p); 242 dput(p);
243 return 1; 243 return 1;
244 } 244 }
245 } else { 245 } else {
246 struct autofs_info *ino = autofs4_dentry_ino(p); 246 struct autofs_info *ino = autofs_dentry_ino(p);
247 unsigned int ino_count = atomic_read(&ino->count); 247 unsigned int ino_count = atomic_read(&ino->count);
248 248
249 /* allow for dget above and top is already dgot */ 249 /* allow for dget above and top is already dgot */
@@ -261,16 +261,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
261 } 261 }
262 262
263 /* Timeout of a tree mount is ultimately determined by its top dentry */ 263 /* Timeout of a tree mount is ultimately determined by its top dentry */
264 if (!autofs4_can_expire(top, timeout, do_now)) 264 if (!autofs_can_expire(top, timeout, do_now))
265 return 1; 265 return 1;
266 266
267 return 0; 267 return 0;
268} 268}
269 269
270static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, 270static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
271 struct dentry *parent, 271 struct dentry *parent,
272 unsigned long timeout, 272 unsigned long timeout,
273 int do_now) 273 int do_now)
274{ 274{
275 struct dentry *p; 275 struct dentry *p;
276 276
@@ -282,11 +282,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
282 282
283 if (d_mountpoint(p)) { 283 if (d_mountpoint(p)) {
284 /* Can we umount this guy */ 284 /* Can we umount this guy */
285 if (autofs4_mount_busy(mnt, p)) 285 if (autofs_mount_busy(mnt, p))
286 continue; 286 continue;
287 287
288 /* Can we expire this guy */ 288 /* Can we expire this guy */
289 if (autofs4_can_expire(p, timeout, do_now)) 289 if (autofs_can_expire(p, timeout, do_now))
290 return p; 290 return p;
291 } 291 }
292 } 292 }
@@ -294,10 +294,10 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
294} 294}
295 295
296/* Check if we can expire a direct mount (possibly a tree) */ 296/* Check if we can expire a direct mount (possibly a tree) */
297struct dentry *autofs4_expire_direct(struct super_block *sb, 297struct dentry *autofs_expire_direct(struct super_block *sb,
298 struct vfsmount *mnt, 298 struct vfsmount *mnt,
299 struct autofs_sb_info *sbi, 299 struct autofs_sb_info *sbi,
300 int how) 300 int how)
301{ 301{
302 unsigned long timeout; 302 unsigned long timeout;
303 struct dentry *root = dget(sb->s_root); 303 struct dentry *root = dget(sb->s_root);
@@ -310,9 +310,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
310 now = jiffies; 310 now = jiffies;
311 timeout = sbi->exp_timeout; 311 timeout = sbi->exp_timeout;
312 312
313 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 313 if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
314 spin_lock(&sbi->fs_lock); 314 spin_lock(&sbi->fs_lock);
315 ino = autofs4_dentry_ino(root); 315 ino = autofs_dentry_ino(root);
316 /* No point expiring a pending mount */ 316 /* No point expiring a pending mount */
317 if (ino->flags & AUTOFS_INF_PENDING) { 317 if (ino->flags & AUTOFS_INF_PENDING) {
318 spin_unlock(&sbi->fs_lock); 318 spin_unlock(&sbi->fs_lock);
@@ -321,7 +321,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
321 ino->flags |= AUTOFS_INF_WANT_EXPIRE; 321 ino->flags |= AUTOFS_INF_WANT_EXPIRE;
322 spin_unlock(&sbi->fs_lock); 322 spin_unlock(&sbi->fs_lock);
323 synchronize_rcu(); 323 synchronize_rcu();
324 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 324 if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
325 spin_lock(&sbi->fs_lock); 325 spin_lock(&sbi->fs_lock);
326 ino->flags |= AUTOFS_INF_EXPIRING; 326 ino->flags |= AUTOFS_INF_EXPIRING;
327 init_completion(&ino->expire_complete); 327 init_completion(&ino->expire_complete);
@@ -350,7 +350,7 @@ static struct dentry *should_expire(struct dentry *dentry,
350{ 350{
351 int do_now = how & AUTOFS_EXP_IMMEDIATE; 351 int do_now = how & AUTOFS_EXP_IMMEDIATE;
352 int exp_leaves = how & AUTOFS_EXP_LEAVES; 352 int exp_leaves = how & AUTOFS_EXP_LEAVES;
353 struct autofs_info *ino = autofs4_dentry_ino(dentry); 353 struct autofs_info *ino = autofs_dentry_ino(dentry);
354 unsigned int ino_count; 354 unsigned int ino_count;
355 355
356 /* No point expiring a pending mount */ 356 /* No point expiring a pending mount */
@@ -367,11 +367,11 @@ static struct dentry *should_expire(struct dentry *dentry,
367 pr_debug("checking mountpoint %p %pd\n", dentry, dentry); 367 pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
368 368
369 /* Can we umount this guy */ 369 /* Can we umount this guy */
370 if (autofs4_mount_busy(mnt, dentry)) 370 if (autofs_mount_busy(mnt, dentry))
371 return NULL; 371 return NULL;
372 372
373 /* Can we expire this guy */ 373 /* Can we expire this guy */
374 if (autofs4_can_expire(dentry, timeout, do_now)) 374 if (autofs_can_expire(dentry, timeout, do_now))
375 return dentry; 375 return dentry;
376 return NULL; 376 return NULL;
377 } 377 }
@@ -382,7 +382,7 @@ static struct dentry *should_expire(struct dentry *dentry,
382 * A symlink can't be "busy" in the usual sense so 382 * A symlink can't be "busy" in the usual sense so
383 * just check last used for expire timeout. 383 * just check last used for expire timeout.
384 */ 384 */
385 if (autofs4_can_expire(dentry, timeout, do_now)) 385 if (autofs_can_expire(dentry, timeout, do_now))
386 return dentry; 386 return dentry;
387 return NULL; 387 return NULL;
388 } 388 }
@@ -397,7 +397,7 @@ static struct dentry *should_expire(struct dentry *dentry,
397 if (d_count(dentry) > ino_count) 397 if (d_count(dentry) > ino_count)
398 return NULL; 398 return NULL;
399 399
400 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) 400 if (!autofs_tree_busy(mnt, dentry, timeout, do_now))
401 return dentry; 401 return dentry;
402 /* 402 /*
403 * Case 3: pseudo direct mount, expire individual leaves 403 * Case 3: pseudo direct mount, expire individual leaves
@@ -411,7 +411,7 @@ static struct dentry *should_expire(struct dentry *dentry,
411 if (d_count(dentry) > ino_count) 411 if (d_count(dentry) > ino_count)
412 return NULL; 412 return NULL;
413 413
414 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 414 expired = autofs_check_leaves(mnt, dentry, timeout, do_now);
415 if (expired) { 415 if (expired) {
416 if (expired == dentry) 416 if (expired == dentry)
417 dput(dentry); 417 dput(dentry);
@@ -427,10 +427,10 @@ static struct dentry *should_expire(struct dentry *dentry,
427 * - it is unused by any user process 427 * - it is unused by any user process
428 * - it has been unused for exp_timeout time 428 * - it has been unused for exp_timeout time
429 */ 429 */
430struct dentry *autofs4_expire_indirect(struct super_block *sb, 430struct dentry *autofs_expire_indirect(struct super_block *sb,
431 struct vfsmount *mnt, 431 struct vfsmount *mnt,
432 struct autofs_sb_info *sbi, 432 struct autofs_sb_info *sbi,
433 int how) 433 int how)
434{ 434{
435 unsigned long timeout; 435 unsigned long timeout;
436 struct dentry *root = sb->s_root; 436 struct dentry *root = sb->s_root;
@@ -450,7 +450,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
450 int flags = how; 450 int flags = how;
451 451
452 spin_lock(&sbi->fs_lock); 452 spin_lock(&sbi->fs_lock);
453 ino = autofs4_dentry_ino(dentry); 453 ino = autofs_dentry_ino(dentry);
454 if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { 454 if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
455 spin_unlock(&sbi->fs_lock); 455 spin_unlock(&sbi->fs_lock);
456 continue; 456 continue;
@@ -462,7 +462,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
462 continue; 462 continue;
463 463
464 spin_lock(&sbi->fs_lock); 464 spin_lock(&sbi->fs_lock);
465 ino = autofs4_dentry_ino(expired); 465 ino = autofs_dentry_ino(expired);
466 ino->flags |= AUTOFS_INF_WANT_EXPIRE; 466 ino->flags |= AUTOFS_INF_WANT_EXPIRE;
467 spin_unlock(&sbi->fs_lock); 467 spin_unlock(&sbi->fs_lock);
468 synchronize_rcu(); 468 synchronize_rcu();
@@ -498,11 +498,11 @@ found:
498 return expired; 498 return expired;
499} 499}
500 500
501int autofs4_expire_wait(const struct path *path, int rcu_walk) 501int autofs_expire_wait(const struct path *path, int rcu_walk)
502{ 502{
503 struct dentry *dentry = path->dentry; 503 struct dentry *dentry = path->dentry;
504 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 504 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
505 struct autofs_info *ino = autofs4_dentry_ino(dentry); 505 struct autofs_info *ino = autofs_dentry_ino(dentry);
506 int status; 506 int status;
507 int state; 507 int state;
508 508
@@ -529,7 +529,7 @@ retry:
529 529
530 pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); 530 pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
531 531
532 status = autofs4_wait(sbi, path, NFY_NONE); 532 status = autofs_wait(sbi, path, NFY_NONE);
533 wait_for_completion(&ino->expire_complete); 533 wait_for_completion(&ino->expire_complete);
534 534
535 pr_debug("expire done status=%d\n", status); 535 pr_debug("expire done status=%d\n", status);
@@ -545,10 +545,10 @@ retry:
545} 545}
546 546
547/* Perform an expiry operation */ 547/* Perform an expiry operation */
548int autofs4_expire_run(struct super_block *sb, 548int autofs_expire_run(struct super_block *sb,
549 struct vfsmount *mnt, 549 struct vfsmount *mnt,
550 struct autofs_sb_info *sbi, 550 struct autofs_sb_info *sbi,
551 struct autofs_packet_expire __user *pkt_p) 551 struct autofs_packet_expire __user *pkt_p)
552{ 552{
553 struct autofs_packet_expire pkt; 553 struct autofs_packet_expire pkt;
554 struct autofs_info *ino; 554 struct autofs_info *ino;
@@ -560,7 +560,7 @@ int autofs4_expire_run(struct super_block *sb,
560 pkt.hdr.proto_version = sbi->version; 560 pkt.hdr.proto_version = sbi->version;
561 pkt.hdr.type = autofs_ptype_expire; 561 pkt.hdr.type = autofs_ptype_expire;
562 562
563 dentry = autofs4_expire_indirect(sb, mnt, sbi, 0); 563 dentry = autofs_expire_indirect(sb, mnt, sbi, 0);
564 if (!dentry) 564 if (!dentry)
565 return -EAGAIN; 565 return -EAGAIN;
566 566
@@ -573,7 +573,7 @@ int autofs4_expire_run(struct super_block *sb,
573 ret = -EFAULT; 573 ret = -EFAULT;
574 574
575 spin_lock(&sbi->fs_lock); 575 spin_lock(&sbi->fs_lock);
576 ino = autofs4_dentry_ino(dentry); 576 ino = autofs_dentry_ino(dentry);
577 /* avoid rapid-fire expire attempts if expiry fails */ 577 /* avoid rapid-fire expire attempts if expiry fails */
578 ino->last_used = now; 578 ino->last_used = now;
579 ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); 579 ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
@@ -583,25 +583,25 @@ int autofs4_expire_run(struct super_block *sb,
583 return ret; 583 return ret;
584} 584}
585 585
586int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, 586int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
587 struct autofs_sb_info *sbi, int when) 587 struct autofs_sb_info *sbi, int when)
588{ 588{
589 struct dentry *dentry; 589 struct dentry *dentry;
590 int ret = -EAGAIN; 590 int ret = -EAGAIN;
591 591
592 if (autofs_type_trigger(sbi->type)) 592 if (autofs_type_trigger(sbi->type))
593 dentry = autofs4_expire_direct(sb, mnt, sbi, when); 593 dentry = autofs_expire_direct(sb, mnt, sbi, when);
594 else 594 else
595 dentry = autofs4_expire_indirect(sb, mnt, sbi, when); 595 dentry = autofs_expire_indirect(sb, mnt, sbi, when);
596 596
597 if (dentry) { 597 if (dentry) {
598 struct autofs_info *ino = autofs4_dentry_ino(dentry); 598 struct autofs_info *ino = autofs_dentry_ino(dentry);
599 const struct path path = { .mnt = mnt, .dentry = dentry }; 599 const struct path path = { .mnt = mnt, .dentry = dentry };
600 600
601 /* This is synchronous because it makes the daemon a 601 /* This is synchronous because it makes the daemon a
602 * little easier 602 * little easier
603 */ 603 */
604 ret = autofs4_wait(sbi, &path, NFY_EXPIRE); 604 ret = autofs_wait(sbi, &path, NFY_EXPIRE);
605 605
606 spin_lock(&sbi->fs_lock); 606 spin_lock(&sbi->fs_lock);
607 /* avoid rapid-fire expire attempts if expiry fails */ 607 /* avoid rapid-fire expire attempts if expiry fails */
@@ -619,7 +619,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
619 * Call repeatedly until it returns -EAGAIN, meaning there's nothing 619 * Call repeatedly until it returns -EAGAIN, meaning there's nothing
620 * more to be done. 620 * more to be done.
621 */ 621 */
622int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, 622int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt,
623 struct autofs_sb_info *sbi, int __user *arg) 623 struct autofs_sb_info *sbi, int __user *arg)
624{ 624{
625 int do_now = 0; 625 int do_now = 0;
@@ -627,6 +627,5 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
627 if (arg && get_user(do_now, arg)) 627 if (arg && get_user(do_now, arg))
628 return -EFAULT; 628 return -EFAULT;
629 629
630 return autofs4_do_expire_multi(sb, mnt, sbi, do_now); 630 return autofs_do_expire_multi(sb, mnt, sbi, do_now);
631} 631}
632
diff --git a/fs/autofs4/init.c b/fs/autofs/init.c
index 8cf0e63389ae..16fb61315843 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs/init.c
@@ -13,18 +13,18 @@
13static struct dentry *autofs_mount(struct file_system_type *fs_type, 13static struct dentry *autofs_mount(struct file_system_type *fs_type,
14 int flags, const char *dev_name, void *data) 14 int flags, const char *dev_name, void *data)
15{ 15{
16 return mount_nodev(fs_type, flags, data, autofs4_fill_super); 16 return mount_nodev(fs_type, flags, data, autofs_fill_super);
17} 17}
18 18
19static struct file_system_type autofs_fs_type = { 19static struct file_system_type autofs_fs_type = {
20 .owner = THIS_MODULE, 20 .owner = THIS_MODULE,
21 .name = "autofs", 21 .name = "autofs",
22 .mount = autofs_mount, 22 .mount = autofs_mount,
23 .kill_sb = autofs4_kill_sb, 23 .kill_sb = autofs_kill_sb,
24}; 24};
25MODULE_ALIAS_FS("autofs"); 25MODULE_ALIAS_FS("autofs");
26 26
27static int __init init_autofs4_fs(void) 27static int __init init_autofs_fs(void)
28{ 28{
29 int err; 29 int err;
30 30
@@ -37,12 +37,12 @@ static int __init init_autofs4_fs(void)
37 return err; 37 return err;
38} 38}
39 39
40static void __exit exit_autofs4_fs(void) 40static void __exit exit_autofs_fs(void)
41{ 41{
42 autofs_dev_ioctl_exit(); 42 autofs_dev_ioctl_exit();
43 unregister_filesystem(&autofs_fs_type); 43 unregister_filesystem(&autofs_fs_type);
44} 44}
45 45
46module_init(init_autofs4_fs) 46module_init(init_autofs_fs)
47module_exit(exit_autofs4_fs) 47module_exit(exit_autofs_fs)
48MODULE_LICENSE("GPL"); 48MODULE_LICENSE("GPL");
diff --git a/fs/autofs4/inode.c b/fs/autofs/inode.c
index 09e7d68dff02..b51980fc274e 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs/inode.c
@@ -7,18 +7,14 @@
7 * option, any later version, incorporated herein by reference. 7 * option, any later version, incorporated herein by reference.
8 */ 8 */
9 9
10#include <linux/kernel.h>
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/seq_file.h> 10#include <linux/seq_file.h>
14#include <linux/pagemap.h> 11#include <linux/pagemap.h>
15#include <linux/parser.h> 12#include <linux/parser.h>
16#include <linux/bitops.h>
17#include <linux/magic.h> 13#include <linux/magic.h>
14
18#include "autofs_i.h" 15#include "autofs_i.h"
19#include <linux/module.h>
20 16
21struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) 17struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
22{ 18{
23 struct autofs_info *ino; 19 struct autofs_info *ino;
24 20
@@ -32,21 +28,21 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
32 return ino; 28 return ino;
33} 29}
34 30
35void autofs4_clean_ino(struct autofs_info *ino) 31void autofs_clean_ino(struct autofs_info *ino)
36{ 32{
37 ino->uid = GLOBAL_ROOT_UID; 33 ino->uid = GLOBAL_ROOT_UID;
38 ino->gid = GLOBAL_ROOT_GID; 34 ino->gid = GLOBAL_ROOT_GID;
39 ino->last_used = jiffies; 35 ino->last_used = jiffies;
40} 36}
41 37
42void autofs4_free_ino(struct autofs_info *ino) 38void autofs_free_ino(struct autofs_info *ino)
43{ 39{
44 kfree(ino); 40 kfree(ino);
45} 41}
46 42
47void autofs4_kill_sb(struct super_block *sb) 43void autofs_kill_sb(struct super_block *sb)
48{ 44{
49 struct autofs_sb_info *sbi = autofs4_sbi(sb); 45 struct autofs_sb_info *sbi = autofs_sbi(sb);
50 46
51 /* 47 /*
52 * In the event of a failure in get_sb_nodev the superblock 48 * In the event of a failure in get_sb_nodev the superblock
@@ -56,7 +52,7 @@ void autofs4_kill_sb(struct super_block *sb)
56 */ 52 */
57 if (sbi) { 53 if (sbi) {
58 /* Free wait queues, close pipe */ 54 /* Free wait queues, close pipe */
59 autofs4_catatonic_mode(sbi); 55 autofs_catatonic_mode(sbi);
60 put_pid(sbi->oz_pgrp); 56 put_pid(sbi->oz_pgrp);
61 } 57 }
62 58
@@ -66,9 +62,9 @@ void autofs4_kill_sb(struct super_block *sb)
66 kfree_rcu(sbi, rcu); 62 kfree_rcu(sbi, rcu);
67} 63}
68 64
69static int autofs4_show_options(struct seq_file *m, struct dentry *root) 65static int autofs_show_options(struct seq_file *m, struct dentry *root)
70{ 66{
71 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 67 struct autofs_sb_info *sbi = autofs_sbi(root->d_sb);
72 struct inode *root_inode = d_inode(root->d_sb->s_root); 68 struct inode *root_inode = d_inode(root->d_sb->s_root);
73 69
74 if (!sbi) 70 if (!sbi)
@@ -101,16 +97,16 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
101 return 0; 97 return 0;
102} 98}
103 99
104static void autofs4_evict_inode(struct inode *inode) 100static void autofs_evict_inode(struct inode *inode)
105{ 101{
106 clear_inode(inode); 102 clear_inode(inode);
107 kfree(inode->i_private); 103 kfree(inode->i_private);
108} 104}
109 105
110static const struct super_operations autofs4_sops = { 106static const struct super_operations autofs_sops = {
111 .statfs = simple_statfs, 107 .statfs = simple_statfs,
112 .show_options = autofs4_show_options, 108 .show_options = autofs_show_options,
113 .evict_inode = autofs4_evict_inode, 109 .evict_inode = autofs_evict_inode,
114}; 110};
115 111
116enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, 112enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
@@ -206,7 +202,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
206 return (*pipefd < 0); 202 return (*pipefd < 0);
207} 203}
208 204
209int autofs4_fill_super(struct super_block *s, void *data, int silent) 205int autofs_fill_super(struct super_block *s, void *data, int silent)
210{ 206{
211 struct inode *root_inode; 207 struct inode *root_inode;
212 struct dentry *root; 208 struct dentry *root;
@@ -246,19 +242,19 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
246 s->s_blocksize = 1024; 242 s->s_blocksize = 1024;
247 s->s_blocksize_bits = 10; 243 s->s_blocksize_bits = 10;
248 s->s_magic = AUTOFS_SUPER_MAGIC; 244 s->s_magic = AUTOFS_SUPER_MAGIC;
249 s->s_op = &autofs4_sops; 245 s->s_op = &autofs_sops;
250 s->s_d_op = &autofs4_dentry_operations; 246 s->s_d_op = &autofs_dentry_operations;
251 s->s_time_gran = 1; 247 s->s_time_gran = 1;
252 248
253 /* 249 /*
254 * Get the root inode and dentry, but defer checking for errors. 250 * Get the root inode and dentry, but defer checking for errors.
255 */ 251 */
256 ino = autofs4_new_ino(sbi); 252 ino = autofs_new_ino(sbi);
257 if (!ino) { 253 if (!ino) {
258 ret = -ENOMEM; 254 ret = -ENOMEM;
259 goto fail_free; 255 goto fail_free;
260 } 256 }
261 root_inode = autofs4_get_inode(s, S_IFDIR | 0755); 257 root_inode = autofs_get_inode(s, S_IFDIR | 0755);
262 root = d_make_root(root_inode); 258 root = d_make_root(root_inode);
263 if (!root) 259 if (!root)
264 goto fail_ino; 260 goto fail_ino;
@@ -305,8 +301,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
305 if (autofs_type_trigger(sbi->type)) 301 if (autofs_type_trigger(sbi->type))
306 __managed_dentry_set_managed(root); 302 __managed_dentry_set_managed(root);
307 303
308 root_inode->i_fop = &autofs4_root_operations; 304 root_inode->i_fop = &autofs_root_operations;
309 root_inode->i_op = &autofs4_dir_inode_operations; 305 root_inode->i_op = &autofs_dir_inode_operations;
310 306
311 pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); 307 pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp));
312 pipe = fget(pipefd); 308 pipe = fget(pipefd);
@@ -340,14 +336,14 @@ fail_dput:
340 dput(root); 336 dput(root);
341 goto fail_free; 337 goto fail_free;
342fail_ino: 338fail_ino:
343 autofs4_free_ino(ino); 339 autofs_free_ino(ino);
344fail_free: 340fail_free:
345 kfree(sbi); 341 kfree(sbi);
346 s->s_fs_info = NULL; 342 s->s_fs_info = NULL;
347 return ret; 343 return ret;
348} 344}
349 345
350struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode) 346struct inode *autofs_get_inode(struct super_block *sb, umode_t mode)
351{ 347{
352 struct inode *inode = new_inode(sb); 348 struct inode *inode = new_inode(sb);
353 349
@@ -364,10 +360,10 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
364 360
365 if (S_ISDIR(mode)) { 361 if (S_ISDIR(mode)) {
366 set_nlink(inode, 2); 362 set_nlink(inode, 2);
367 inode->i_op = &autofs4_dir_inode_operations; 363 inode->i_op = &autofs_dir_inode_operations;
368 inode->i_fop = &autofs4_dir_operations; 364 inode->i_fop = &autofs_dir_operations;
369 } else if (S_ISLNK(mode)) { 365 } else if (S_ISLNK(mode)) {
370 inode->i_op = &autofs4_symlink_inode_operations; 366 inode->i_op = &autofs_symlink_inode_operations;
371 } else 367 } else
372 WARN_ON(1); 368 WARN_ON(1);
373 369
diff --git a/fs/autofs4/root.c b/fs/autofs/root.c
index b12e37f27530..a3d414150578 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs/root.c
@@ -9,72 +9,66 @@
9 */ 9 */
10 10
11#include <linux/capability.h> 11#include <linux/capability.h>
12#include <linux/errno.h>
13#include <linux/stat.h>
14#include <linux/slab.h>
15#include <linux/param.h>
16#include <linux/time.h>
17#include <linux/compat.h> 12#include <linux/compat.h>
18#include <linux/mutex.h>
19 13
20#include "autofs_i.h" 14#include "autofs_i.h"
21 15
22static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *); 16static int autofs_dir_symlink(struct inode *, struct dentry *, const char *);
23static int autofs4_dir_unlink(struct inode *, struct dentry *); 17static int autofs_dir_unlink(struct inode *, struct dentry *);
24static int autofs4_dir_rmdir(struct inode *, struct dentry *); 18static int autofs_dir_rmdir(struct inode *, struct dentry *);
25static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t); 19static int autofs_dir_mkdir(struct inode *, struct dentry *, umode_t);
26static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long); 20static long autofs_root_ioctl(struct file *, unsigned int, unsigned long);
27#ifdef CONFIG_COMPAT 21#ifdef CONFIG_COMPAT
28static long autofs4_root_compat_ioctl(struct file *, 22static long autofs_root_compat_ioctl(struct file *,
29 unsigned int, unsigned long); 23 unsigned int, unsigned long);
30#endif 24#endif
31static int autofs4_dir_open(struct inode *inode, struct file *file); 25static int autofs_dir_open(struct inode *inode, struct file *file);
32static struct dentry *autofs4_lookup(struct inode *, 26static struct dentry *autofs_lookup(struct inode *,
33 struct dentry *, unsigned int); 27 struct dentry *, unsigned int);
34static struct vfsmount *autofs4_d_automount(struct path *); 28static struct vfsmount *autofs_d_automount(struct path *);
35static int autofs4_d_manage(const struct path *, bool); 29static int autofs_d_manage(const struct path *, bool);
36static void autofs4_dentry_release(struct dentry *); 30static void autofs_dentry_release(struct dentry *);
37 31
38const struct file_operations autofs4_root_operations = { 32const struct file_operations autofs_root_operations = {
39 .open = dcache_dir_open, 33 .open = dcache_dir_open,
40 .release = dcache_dir_close, 34 .release = dcache_dir_close,
41 .read = generic_read_dir, 35 .read = generic_read_dir,
42 .iterate_shared = dcache_readdir, 36 .iterate_shared = dcache_readdir,
43 .llseek = dcache_dir_lseek, 37 .llseek = dcache_dir_lseek,
44 .unlocked_ioctl = autofs4_root_ioctl, 38 .unlocked_ioctl = autofs_root_ioctl,
45#ifdef CONFIG_COMPAT 39#ifdef CONFIG_COMPAT
46 .compat_ioctl = autofs4_root_compat_ioctl, 40 .compat_ioctl = autofs_root_compat_ioctl,
47#endif 41#endif
48}; 42};
49 43
50const struct file_operations autofs4_dir_operations = { 44const struct file_operations autofs_dir_operations = {
51 .open = autofs4_dir_open, 45 .open = autofs_dir_open,
52 .release = dcache_dir_close, 46 .release = dcache_dir_close,
53 .read = generic_read_dir, 47 .read = generic_read_dir,
54 .iterate_shared = dcache_readdir, 48 .iterate_shared = dcache_readdir,
55 .llseek = dcache_dir_lseek, 49 .llseek = dcache_dir_lseek,
56}; 50};
57 51
58const struct inode_operations autofs4_dir_inode_operations = { 52const struct inode_operations autofs_dir_inode_operations = {
59 .lookup = autofs4_lookup, 53 .lookup = autofs_lookup,
60 .unlink = autofs4_dir_unlink, 54 .unlink = autofs_dir_unlink,
61 .symlink = autofs4_dir_symlink, 55 .symlink = autofs_dir_symlink,
62 .mkdir = autofs4_dir_mkdir, 56 .mkdir = autofs_dir_mkdir,
63 .rmdir = autofs4_dir_rmdir, 57 .rmdir = autofs_dir_rmdir,
64}; 58};
65 59
66const struct dentry_operations autofs4_dentry_operations = { 60const struct dentry_operations autofs_dentry_operations = {
67 .d_automount = autofs4_d_automount, 61 .d_automount = autofs_d_automount,
68 .d_manage = autofs4_d_manage, 62 .d_manage = autofs_d_manage,
69 .d_release = autofs4_dentry_release, 63 .d_release = autofs_dentry_release,
70}; 64};
71 65
72static void autofs4_add_active(struct dentry *dentry) 66static void autofs_add_active(struct dentry *dentry)
73{ 67{
74 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 68 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
75 struct autofs_info *ino; 69 struct autofs_info *ino;
76 70
77 ino = autofs4_dentry_ino(dentry); 71 ino = autofs_dentry_ino(dentry);
78 if (ino) { 72 if (ino) {
79 spin_lock(&sbi->lookup_lock); 73 spin_lock(&sbi->lookup_lock);
80 if (!ino->active_count) { 74 if (!ino->active_count) {
@@ -86,12 +80,12 @@ static void autofs4_add_active(struct dentry *dentry)
86 } 80 }
87} 81}
88 82
89static void autofs4_del_active(struct dentry *dentry) 83static void autofs_del_active(struct dentry *dentry)
90{ 84{
91 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 85 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
92 struct autofs_info *ino; 86 struct autofs_info *ino;
93 87
94 ino = autofs4_dentry_ino(dentry); 88 ino = autofs_dentry_ino(dentry);
95 if (ino) { 89 if (ino) {
96 spin_lock(&sbi->lookup_lock); 90 spin_lock(&sbi->lookup_lock);
97 ino->active_count--; 91 ino->active_count--;
@@ -103,14 +97,14 @@ static void autofs4_del_active(struct dentry *dentry)
103 } 97 }
104} 98}
105 99
106static int autofs4_dir_open(struct inode *inode, struct file *file) 100static int autofs_dir_open(struct inode *inode, struct file *file)
107{ 101{
108 struct dentry *dentry = file->f_path.dentry; 102 struct dentry *dentry = file->f_path.dentry;
109 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 103 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
110 104
111 pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); 105 pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry);
112 106
113 if (autofs4_oz_mode(sbi)) 107 if (autofs_oz_mode(sbi))
114 goto out; 108 goto out;
115 109
116 /* 110 /*
@@ -133,10 +127,10 @@ out:
133 return dcache_dir_open(inode, file); 127 return dcache_dir_open(inode, file);
134} 128}
135 129
136static void autofs4_dentry_release(struct dentry *de) 130static void autofs_dentry_release(struct dentry *de)
137{ 131{
138 struct autofs_info *ino = autofs4_dentry_ino(de); 132 struct autofs_info *ino = autofs_dentry_ino(de);
139 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); 133 struct autofs_sb_info *sbi = autofs_sbi(de->d_sb);
140 134
141 pr_debug("releasing %p\n", de); 135 pr_debug("releasing %p\n", de);
142 136
@@ -152,12 +146,12 @@ static void autofs4_dentry_release(struct dentry *de)
152 spin_unlock(&sbi->lookup_lock); 146 spin_unlock(&sbi->lookup_lock);
153 } 147 }
154 148
155 autofs4_free_ino(ino); 149 autofs_free_ino(ino);
156} 150}
157 151
158static struct dentry *autofs4_lookup_active(struct dentry *dentry) 152static struct dentry *autofs_lookup_active(struct dentry *dentry)
159{ 153{
160 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 154 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
161 struct dentry *parent = dentry->d_parent; 155 struct dentry *parent = dentry->d_parent;
162 const struct qstr *name = &dentry->d_name; 156 const struct qstr *name = &dentry->d_name;
163 unsigned int len = name->len; 157 unsigned int len = name->len;
@@ -209,10 +203,10 @@ next:
209 return NULL; 203 return NULL;
210} 204}
211 205
212static struct dentry *autofs4_lookup_expiring(struct dentry *dentry, 206static struct dentry *autofs_lookup_expiring(struct dentry *dentry,
213 bool rcu_walk) 207 bool rcu_walk)
214{ 208{
215 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 209 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
216 struct dentry *parent = dentry->d_parent; 210 struct dentry *parent = dentry->d_parent;
217 const struct qstr *name = &dentry->d_name; 211 const struct qstr *name = &dentry->d_name;
218 unsigned int len = name->len; 212 unsigned int len = name->len;
@@ -269,17 +263,17 @@ next:
269 return NULL; 263 return NULL;
270} 264}
271 265
272static int autofs4_mount_wait(const struct path *path, bool rcu_walk) 266static int autofs_mount_wait(const struct path *path, bool rcu_walk)
273{ 267{
274 struct autofs_sb_info *sbi = autofs4_sbi(path->dentry->d_sb); 268 struct autofs_sb_info *sbi = autofs_sbi(path->dentry->d_sb);
275 struct autofs_info *ino = autofs4_dentry_ino(path->dentry); 269 struct autofs_info *ino = autofs_dentry_ino(path->dentry);
276 int status = 0; 270 int status = 0;
277 271
278 if (ino->flags & AUTOFS_INF_PENDING) { 272 if (ino->flags & AUTOFS_INF_PENDING) {
279 if (rcu_walk) 273 if (rcu_walk)
280 return -ECHILD; 274 return -ECHILD;
281 pr_debug("waiting for mount name=%pd\n", path->dentry); 275 pr_debug("waiting for mount name=%pd\n", path->dentry);
282 status = autofs4_wait(sbi, path, NFY_MOUNT); 276 status = autofs_wait(sbi, path, NFY_MOUNT);
283 pr_debug("mount wait done status=%d\n", status); 277 pr_debug("mount wait done status=%d\n", status);
284 } 278 }
285 ino->last_used = jiffies; 279 ino->last_used = jiffies;
@@ -291,11 +285,11 @@ static int do_expire_wait(const struct path *path, bool rcu_walk)
291 struct dentry *dentry = path->dentry; 285 struct dentry *dentry = path->dentry;
292 struct dentry *expiring; 286 struct dentry *expiring;
293 287
294 expiring = autofs4_lookup_expiring(dentry, rcu_walk); 288 expiring = autofs_lookup_expiring(dentry, rcu_walk);
295 if (IS_ERR(expiring)) 289 if (IS_ERR(expiring))
296 return PTR_ERR(expiring); 290 return PTR_ERR(expiring);
297 if (!expiring) 291 if (!expiring)
298 return autofs4_expire_wait(path, rcu_walk); 292 return autofs_expire_wait(path, rcu_walk);
299 else { 293 else {
300 const struct path this = { .mnt = path->mnt, .dentry = expiring }; 294 const struct path this = { .mnt = path->mnt, .dentry = expiring };
301 /* 295 /*
@@ -303,17 +297,17 @@ static int do_expire_wait(const struct path *path, bool rcu_walk)
303 * be quite complete, but the directory has been removed 297 * be quite complete, but the directory has been removed
304 * so it must have been successful, just wait for it. 298 * so it must have been successful, just wait for it.
305 */ 299 */
306 autofs4_expire_wait(&this, 0); 300 autofs_expire_wait(&this, 0);
307 autofs4_del_expiring(expiring); 301 autofs_del_expiring(expiring);
308 dput(expiring); 302 dput(expiring);
309 } 303 }
310 return 0; 304 return 0;
311} 305}
312 306
313static struct dentry *autofs4_mountpoint_changed(struct path *path) 307static struct dentry *autofs_mountpoint_changed(struct path *path)
314{ 308{
315 struct dentry *dentry = path->dentry; 309 struct dentry *dentry = path->dentry;
316 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 310 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
317 311
318 /* 312 /*
319 * If this is an indirect mount the dentry could have gone away 313 * If this is an indirect mount the dentry could have gone away
@@ -327,7 +321,7 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
327 new = d_lookup(parent, &dentry->d_name); 321 new = d_lookup(parent, &dentry->d_name);
328 if (!new) 322 if (!new)
329 return NULL; 323 return NULL;
330 ino = autofs4_dentry_ino(new); 324 ino = autofs_dentry_ino(new);
331 ino->last_used = jiffies; 325 ino->last_used = jiffies;
332 dput(path->dentry); 326 dput(path->dentry);
333 path->dentry = new; 327 path->dentry = new;
@@ -335,17 +329,17 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
335 return path->dentry; 329 return path->dentry;
336} 330}
337 331
338static struct vfsmount *autofs4_d_automount(struct path *path) 332static struct vfsmount *autofs_d_automount(struct path *path)
339{ 333{
340 struct dentry *dentry = path->dentry; 334 struct dentry *dentry = path->dentry;
341 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 335 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
342 struct autofs_info *ino = autofs4_dentry_ino(dentry); 336 struct autofs_info *ino = autofs_dentry_ino(dentry);
343 int status; 337 int status;
344 338
345 pr_debug("dentry=%p %pd\n", dentry, dentry); 339 pr_debug("dentry=%p %pd\n", dentry, dentry);
346 340
347 /* The daemon never triggers a mount. */ 341 /* The daemon never triggers a mount. */
348 if (autofs4_oz_mode(sbi)) 342 if (autofs_oz_mode(sbi))
349 return NULL; 343 return NULL;
350 344
351 /* 345 /*
@@ -364,7 +358,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
364 spin_lock(&sbi->fs_lock); 358 spin_lock(&sbi->fs_lock);
365 if (ino->flags & AUTOFS_INF_PENDING) { 359 if (ino->flags & AUTOFS_INF_PENDING) {
366 spin_unlock(&sbi->fs_lock); 360 spin_unlock(&sbi->fs_lock);
367 status = autofs4_mount_wait(path, 0); 361 status = autofs_mount_wait(path, 0);
368 if (status) 362 if (status)
369 return ERR_PTR(status); 363 return ERR_PTR(status);
370 goto done; 364 goto done;
@@ -405,7 +399,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
405 } 399 }
406 ino->flags |= AUTOFS_INF_PENDING; 400 ino->flags |= AUTOFS_INF_PENDING;
407 spin_unlock(&sbi->fs_lock); 401 spin_unlock(&sbi->fs_lock);
408 status = autofs4_mount_wait(path, 0); 402 status = autofs_mount_wait(path, 0);
409 spin_lock(&sbi->fs_lock); 403 spin_lock(&sbi->fs_lock);
410 ino->flags &= ~AUTOFS_INF_PENDING; 404 ino->flags &= ~AUTOFS_INF_PENDING;
411 if (status) { 405 if (status) {
@@ -416,24 +410,24 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
416 spin_unlock(&sbi->fs_lock); 410 spin_unlock(&sbi->fs_lock);
417done: 411done:
418 /* Mount succeeded, check if we ended up with a new dentry */ 412 /* Mount succeeded, check if we ended up with a new dentry */
419 dentry = autofs4_mountpoint_changed(path); 413 dentry = autofs_mountpoint_changed(path);
420 if (!dentry) 414 if (!dentry)
421 return ERR_PTR(-ENOENT); 415 return ERR_PTR(-ENOENT);
422 416
423 return NULL; 417 return NULL;
424} 418}
425 419
426static int autofs4_d_manage(const struct path *path, bool rcu_walk) 420static int autofs_d_manage(const struct path *path, bool rcu_walk)
427{ 421{
428 struct dentry *dentry = path->dentry; 422 struct dentry *dentry = path->dentry;
429 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 423 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
430 struct autofs_info *ino = autofs4_dentry_ino(dentry); 424 struct autofs_info *ino = autofs_dentry_ino(dentry);
431 int status; 425 int status;
432 426
433 pr_debug("dentry=%p %pd\n", dentry, dentry); 427 pr_debug("dentry=%p %pd\n", dentry, dentry);
434 428
435 /* The daemon never waits. */ 429 /* The daemon never waits. */
436 if (autofs4_oz_mode(sbi)) { 430 if (autofs_oz_mode(sbi)) {
437 if (!path_is_mountpoint(path)) 431 if (!path_is_mountpoint(path))
438 return -EISDIR; 432 return -EISDIR;
439 return 0; 433 return 0;
@@ -447,7 +441,7 @@ static int autofs4_d_manage(const struct path *path, bool rcu_walk)
447 * This dentry may be under construction so wait on mount 441 * This dentry may be under construction so wait on mount
448 * completion. 442 * completion.
449 */ 443 */
450 status = autofs4_mount_wait(path, rcu_walk); 444 status = autofs_mount_wait(path, rcu_walk);
451 if (status) 445 if (status)
452 return status; 446 return status;
453 447
@@ -500,8 +494,8 @@ static int autofs4_d_manage(const struct path *path, bool rcu_walk)
500} 494}
501 495
502/* Lookups in the root directory */ 496/* Lookups in the root directory */
503static struct dentry *autofs4_lookup(struct inode *dir, 497static struct dentry *autofs_lookup(struct inode *dir,
504 struct dentry *dentry, unsigned int flags) 498 struct dentry *dentry, unsigned int flags)
505{ 499{
506 struct autofs_sb_info *sbi; 500 struct autofs_sb_info *sbi;
507 struct autofs_info *ino; 501 struct autofs_info *ino;
@@ -513,13 +507,13 @@ static struct dentry *autofs4_lookup(struct inode *dir,
513 if (dentry->d_name.len > NAME_MAX) 507 if (dentry->d_name.len > NAME_MAX)
514 return ERR_PTR(-ENAMETOOLONG); 508 return ERR_PTR(-ENAMETOOLONG);
515 509
516 sbi = autofs4_sbi(dir->i_sb); 510 sbi = autofs_sbi(dir->i_sb);
517 511
518 pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", 512 pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n",
519 current->pid, task_pgrp_nr(current), sbi->catatonic, 513 current->pid, task_pgrp_nr(current), sbi->catatonic,
520 autofs4_oz_mode(sbi)); 514 autofs_oz_mode(sbi));
521 515
522 active = autofs4_lookup_active(dentry); 516 active = autofs_lookup_active(dentry);
523 if (active) 517 if (active)
524 return active; 518 return active;
525 else { 519 else {
@@ -529,7 +523,7 @@ static struct dentry *autofs4_lookup(struct inode *dir,
529 * can return fail immediately. The daemon however does need 523 * can return fail immediately. The daemon however does need
530 * to create directories within the file system. 524 * to create directories within the file system.
531 */ 525 */
532 if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent)) 526 if (!autofs_oz_mode(sbi) && !IS_ROOT(dentry->d_parent))
533 return ERR_PTR(-ENOENT); 527 return ERR_PTR(-ENOENT);
534 528
535 /* Mark entries in the root as mount triggers */ 529 /* Mark entries in the root as mount triggers */
@@ -537,24 +531,24 @@ static struct dentry *autofs4_lookup(struct inode *dir,
537 autofs_type_indirect(sbi->type)) 531 autofs_type_indirect(sbi->type))
538 __managed_dentry_set_managed(dentry); 532 __managed_dentry_set_managed(dentry);
539 533
540 ino = autofs4_new_ino(sbi); 534 ino = autofs_new_ino(sbi);
541 if (!ino) 535 if (!ino)
542 return ERR_PTR(-ENOMEM); 536 return ERR_PTR(-ENOMEM);
543 537
544 dentry->d_fsdata = ino; 538 dentry->d_fsdata = ino;
545 ino->dentry = dentry; 539 ino->dentry = dentry;
546 540
547 autofs4_add_active(dentry); 541 autofs_add_active(dentry);
548 } 542 }
549 return NULL; 543 return NULL;
550} 544}
551 545
552static int autofs4_dir_symlink(struct inode *dir, 546static int autofs_dir_symlink(struct inode *dir,
553 struct dentry *dentry, 547 struct dentry *dentry,
554 const char *symname) 548 const char *symname)
555{ 549{
556 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 550 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
557 struct autofs_info *ino = autofs4_dentry_ino(dentry); 551 struct autofs_info *ino = autofs_dentry_ino(dentry);
558 struct autofs_info *p_ino; 552 struct autofs_info *p_ino;
559 struct inode *inode; 553 struct inode *inode;
560 size_t size = strlen(symname); 554 size_t size = strlen(symname);
@@ -562,14 +556,14 @@ static int autofs4_dir_symlink(struct inode *dir,
562 556
563 pr_debug("%s <- %pd\n", symname, dentry); 557 pr_debug("%s <- %pd\n", symname, dentry);
564 558
565 if (!autofs4_oz_mode(sbi)) 559 if (!autofs_oz_mode(sbi))
566 return -EACCES; 560 return -EACCES;
567 561
568 BUG_ON(!ino); 562 BUG_ON(!ino);
569 563
570 autofs4_clean_ino(ino); 564 autofs_clean_ino(ino);
571 565
572 autofs4_del_active(dentry); 566 autofs_del_active(dentry);
573 567
574 cp = kmalloc(size + 1, GFP_KERNEL); 568 cp = kmalloc(size + 1, GFP_KERNEL);
575 if (!cp) 569 if (!cp)
@@ -577,7 +571,7 @@ static int autofs4_dir_symlink(struct inode *dir,
577 571
578 strcpy(cp, symname); 572 strcpy(cp, symname);
579 573
580 inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555); 574 inode = autofs_get_inode(dir->i_sb, S_IFLNK | 0555);
581 if (!inode) { 575 if (!inode) {
582 kfree(cp); 576 kfree(cp);
583 return -ENOMEM; 577 return -ENOMEM;
@@ -588,7 +582,7 @@ static int autofs4_dir_symlink(struct inode *dir,
588 582
589 dget(dentry); 583 dget(dentry);
590 atomic_inc(&ino->count); 584 atomic_inc(&ino->count);
591 p_ino = autofs4_dentry_ino(dentry->d_parent); 585 p_ino = autofs_dentry_ino(dentry->d_parent);
592 if (p_ino && !IS_ROOT(dentry)) 586 if (p_ino && !IS_ROOT(dentry))
593 atomic_inc(&p_ino->count); 587 atomic_inc(&p_ino->count);
594 588
@@ -610,20 +604,20 @@ static int autofs4_dir_symlink(struct inode *dir,
610 * If a process is blocked on the dentry waiting for the expire to finish, 604 * If a process is blocked on the dentry waiting for the expire to finish,
611 * it will invalidate the dentry and try to mount with a new one. 605 * it will invalidate the dentry and try to mount with a new one.
612 * 606 *
613 * Also see autofs4_dir_rmdir().. 607 * Also see autofs_dir_rmdir()..
614 */ 608 */
615static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) 609static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
616{ 610{
617 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 611 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
618 struct autofs_info *ino = autofs4_dentry_ino(dentry); 612 struct autofs_info *ino = autofs_dentry_ino(dentry);
619 struct autofs_info *p_ino; 613 struct autofs_info *p_ino;
620 614
621 /* This allows root to remove symlinks */ 615 /* This allows root to remove symlinks */
622 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) 616 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
623 return -EPERM; 617 return -EPERM;
624 618
625 if (atomic_dec_and_test(&ino->count)) { 619 if (atomic_dec_and_test(&ino->count)) {
626 p_ino = autofs4_dentry_ino(dentry->d_parent); 620 p_ino = autofs_dentry_ino(dentry->d_parent);
627 if (p_ino && !IS_ROOT(dentry)) 621 if (p_ino && !IS_ROOT(dentry))
628 atomic_dec(&p_ino->count); 622 atomic_dec(&p_ino->count);
629 } 623 }
@@ -635,7 +629,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
635 dir->i_mtime = current_time(dir); 629 dir->i_mtime = current_time(dir);
636 630
637 spin_lock(&sbi->lookup_lock); 631 spin_lock(&sbi->lookup_lock);
638 __autofs4_add_expiring(dentry); 632 __autofs_add_expiring(dentry);
639 d_drop(dentry); 633 d_drop(dentry);
640 spin_unlock(&sbi->lookup_lock); 634 spin_unlock(&sbi->lookup_lock);
641 635
@@ -692,15 +686,15 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
692 managed_dentry_set_managed(parent); 686 managed_dentry_set_managed(parent);
693} 687}
694 688
695static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) 689static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
696{ 690{
697 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 691 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
698 struct autofs_info *ino = autofs4_dentry_ino(dentry); 692 struct autofs_info *ino = autofs_dentry_ino(dentry);
699 struct autofs_info *p_ino; 693 struct autofs_info *p_ino;
700 694
701 pr_debug("dentry %p, removing %pd\n", dentry, dentry); 695 pr_debug("dentry %p, removing %pd\n", dentry, dentry);
702 696
703 if (!autofs4_oz_mode(sbi)) 697 if (!autofs_oz_mode(sbi))
704 return -EACCES; 698 return -EACCES;
705 699
706 spin_lock(&sbi->lookup_lock); 700 spin_lock(&sbi->lookup_lock);
@@ -708,7 +702,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
708 spin_unlock(&sbi->lookup_lock); 702 spin_unlock(&sbi->lookup_lock);
709 return -ENOTEMPTY; 703 return -ENOTEMPTY;
710 } 704 }
711 __autofs4_add_expiring(dentry); 705 __autofs_add_expiring(dentry);
712 d_drop(dentry); 706 d_drop(dentry);
713 spin_unlock(&sbi->lookup_lock); 707 spin_unlock(&sbi->lookup_lock);
714 708
@@ -716,7 +710,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
716 autofs_clear_leaf_automount_flags(dentry); 710 autofs_clear_leaf_automount_flags(dentry);
717 711
718 if (atomic_dec_and_test(&ino->count)) { 712 if (atomic_dec_and_test(&ino->count)) {
719 p_ino = autofs4_dentry_ino(dentry->d_parent); 713 p_ino = autofs_dentry_ino(dentry->d_parent);
720 if (p_ino && dentry->d_parent != dentry) 714 if (p_ino && dentry->d_parent != dentry)
721 atomic_dec(&p_ino->count); 715 atomic_dec(&p_ino->count);
722 } 716 }
@@ -730,26 +724,26 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
730 return 0; 724 return 0;
731} 725}
732 726
733static int autofs4_dir_mkdir(struct inode *dir, 727static int autofs_dir_mkdir(struct inode *dir,
734 struct dentry *dentry, umode_t mode) 728 struct dentry *dentry, umode_t mode)
735{ 729{
736 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 730 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
737 struct autofs_info *ino = autofs4_dentry_ino(dentry); 731 struct autofs_info *ino = autofs_dentry_ino(dentry);
738 struct autofs_info *p_ino; 732 struct autofs_info *p_ino;
739 struct inode *inode; 733 struct inode *inode;
740 734
741 if (!autofs4_oz_mode(sbi)) 735 if (!autofs_oz_mode(sbi))
742 return -EACCES; 736 return -EACCES;
743 737
744 pr_debug("dentry %p, creating %pd\n", dentry, dentry); 738 pr_debug("dentry %p, creating %pd\n", dentry, dentry);
745 739
746 BUG_ON(!ino); 740 BUG_ON(!ino);
747 741
748 autofs4_clean_ino(ino); 742 autofs_clean_ino(ino);
749 743
750 autofs4_del_active(dentry); 744 autofs_del_active(dentry);
751 745
752 inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); 746 inode = autofs_get_inode(dir->i_sb, S_IFDIR | mode);
753 if (!inode) 747 if (!inode)
754 return -ENOMEM; 748 return -ENOMEM;
755 d_add(dentry, inode); 749 d_add(dentry, inode);
@@ -759,7 +753,7 @@ static int autofs4_dir_mkdir(struct inode *dir,
759 753
760 dget(dentry); 754 dget(dentry);
761 atomic_inc(&ino->count); 755 atomic_inc(&ino->count);
762 p_ino = autofs4_dentry_ino(dentry->d_parent); 756 p_ino = autofs_dentry_ino(dentry->d_parent);
763 if (p_ino && !IS_ROOT(dentry)) 757 if (p_ino && !IS_ROOT(dentry))
764 atomic_inc(&p_ino->count); 758 atomic_inc(&p_ino->count);
765 inc_nlink(dir); 759 inc_nlink(dir);
@@ -770,7 +764,7 @@ static int autofs4_dir_mkdir(struct inode *dir,
770 764
771/* Get/set timeout ioctl() operation */ 765/* Get/set timeout ioctl() operation */
772#ifdef CONFIG_COMPAT 766#ifdef CONFIG_COMPAT
773static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, 767static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
774 compat_ulong_t __user *p) 768 compat_ulong_t __user *p)
775{ 769{
776 unsigned long ntimeout; 770 unsigned long ntimeout;
@@ -795,7 +789,7 @@ error:
795} 789}
796#endif 790#endif
797 791
798static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, 792static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
799 unsigned long __user *p) 793 unsigned long __user *p)
800{ 794{
801 unsigned long ntimeout; 795 unsigned long ntimeout;
@@ -820,14 +814,14 @@ error:
820} 814}
821 815
822/* Return protocol version */ 816/* Return protocol version */
823static inline int autofs4_get_protover(struct autofs_sb_info *sbi, 817static inline int autofs_get_protover(struct autofs_sb_info *sbi,
824 int __user *p) 818 int __user *p)
825{ 819{
826 return put_user(sbi->version, p); 820 return put_user(sbi->version, p);
827} 821}
828 822
829/* Return protocol sub version */ 823/* Return protocol sub version */
830static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, 824static inline int autofs_get_protosubver(struct autofs_sb_info *sbi,
831 int __user *p) 825 int __user *p)
832{ 826{
833 return put_user(sbi->sub_version, p); 827 return put_user(sbi->sub_version, p);
@@ -836,7 +830,7 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi,
836/* 830/*
837* Tells the daemon whether it can umount the autofs mount. 831* Tells the daemon whether it can umount the autofs mount.
838*/ 832*/
839static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) 833static inline int autofs_ask_umount(struct vfsmount *mnt, int __user *p)
840{ 834{
841 int status = 0; 835 int status = 0;
842 836
@@ -850,14 +844,14 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
850 return status; 844 return status;
851} 845}
852 846
853/* Identify autofs4_dentries - this is so we can tell if there's 847/* Identify autofs_dentries - this is so we can tell if there's
854 * an extra dentry refcount or not. We only hold a refcount on the 848 * an extra dentry refcount or not. We only hold a refcount on the
855 * dentry if its non-negative (ie, d_inode != NULL) 849 * dentry if its non-negative (ie, d_inode != NULL)
856 */ 850 */
857int is_autofs4_dentry(struct dentry *dentry) 851int is_autofs_dentry(struct dentry *dentry)
858{ 852{
859 return dentry && d_really_is_positive(dentry) && 853 return dentry && d_really_is_positive(dentry) &&
860 dentry->d_op == &autofs4_dentry_operations && 854 dentry->d_op == &autofs_dentry_operations &&
861 dentry->d_fsdata != NULL; 855 dentry->d_fsdata != NULL;
862} 856}
863 857
@@ -865,10 +859,10 @@ int is_autofs4_dentry(struct dentry *dentry)
865 * ioctl()'s on the root directory is the chief method for the daemon to 859 * ioctl()'s on the root directory is the chief method for the daemon to
866 * generate kernel reactions 860 * generate kernel reactions
867 */ 861 */
868static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, 862static int autofs_root_ioctl_unlocked(struct inode *inode, struct file *filp,
869 unsigned int cmd, unsigned long arg) 863 unsigned int cmd, unsigned long arg)
870{ 864{
871 struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); 865 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
872 void __user *p = (void __user *)arg; 866 void __user *p = (void __user *)arg;
873 867
874 pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", 868 pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",
@@ -878,64 +872,63 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
878 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) 872 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
879 return -ENOTTY; 873 return -ENOTTY;
880 874
881 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) 875 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
882 return -EPERM; 876 return -EPERM;
883 877
884 switch (cmd) { 878 switch (cmd) {
885 case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ 879 case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
886 return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0); 880 return autofs_wait_release(sbi, (autofs_wqt_t) arg, 0);
887 case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ 881 case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
888 return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT); 882 return autofs_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT);
889 case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ 883 case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
890 autofs4_catatonic_mode(sbi); 884 autofs_catatonic_mode(sbi);
891 return 0; 885 return 0;
892 case AUTOFS_IOC_PROTOVER: /* Get protocol version */ 886 case AUTOFS_IOC_PROTOVER: /* Get protocol version */
893 return autofs4_get_protover(sbi, p); 887 return autofs_get_protover(sbi, p);
894 case AUTOFS_IOC_PROTOSUBVER: /* Get protocol sub version */ 888 case AUTOFS_IOC_PROTOSUBVER: /* Get protocol sub version */
895 return autofs4_get_protosubver(sbi, p); 889 return autofs_get_protosubver(sbi, p);
896 case AUTOFS_IOC_SETTIMEOUT: 890 case AUTOFS_IOC_SETTIMEOUT:
897 return autofs4_get_set_timeout(sbi, p); 891 return autofs_get_set_timeout(sbi, p);
898#ifdef CONFIG_COMPAT 892#ifdef CONFIG_COMPAT
899 case AUTOFS_IOC_SETTIMEOUT32: 893 case AUTOFS_IOC_SETTIMEOUT32:
900 return autofs4_compat_get_set_timeout(sbi, p); 894 return autofs_compat_get_set_timeout(sbi, p);
901#endif 895#endif
902 896
903 case AUTOFS_IOC_ASKUMOUNT: 897 case AUTOFS_IOC_ASKUMOUNT:
904 return autofs4_ask_umount(filp->f_path.mnt, p); 898 return autofs_ask_umount(filp->f_path.mnt, p);
905 899
906 /* return a single thing to expire */ 900 /* return a single thing to expire */
907 case AUTOFS_IOC_EXPIRE: 901 case AUTOFS_IOC_EXPIRE:
908 return autofs4_expire_run(inode->i_sb, 902 return autofs_expire_run(inode->i_sb, filp->f_path.mnt, sbi, p);
909 filp->f_path.mnt, sbi, p);
910 /* same as above, but can send multiple expires through pipe */ 903 /* same as above, but can send multiple expires through pipe */
911 case AUTOFS_IOC_EXPIRE_MULTI: 904 case AUTOFS_IOC_EXPIRE_MULTI:
912 return autofs4_expire_multi(inode->i_sb, 905 return autofs_expire_multi(inode->i_sb,
913 filp->f_path.mnt, sbi, p); 906 filp->f_path.mnt, sbi, p);
914 907
915 default: 908 default:
916 return -EINVAL; 909 return -EINVAL;
917 } 910 }
918} 911}
919 912
920static long autofs4_root_ioctl(struct file *filp, 913static long autofs_root_ioctl(struct file *filp,
921 unsigned int cmd, unsigned long arg) 914 unsigned int cmd, unsigned long arg)
922{ 915{
923 struct inode *inode = file_inode(filp); 916 struct inode *inode = file_inode(filp);
924 917
925 return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); 918 return autofs_root_ioctl_unlocked(inode, filp, cmd, arg);
926} 919}
927 920
928#ifdef CONFIG_COMPAT 921#ifdef CONFIG_COMPAT
929static long autofs4_root_compat_ioctl(struct file *filp, 922static long autofs_root_compat_ioctl(struct file *filp,
930 unsigned int cmd, unsigned long arg) 923 unsigned int cmd, unsigned long arg)
931{ 924{
932 struct inode *inode = file_inode(filp); 925 struct inode *inode = file_inode(filp);
933 int ret; 926 int ret;
934 927
935 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) 928 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
936 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); 929 ret = autofs_root_ioctl_unlocked(inode, filp, cmd, arg);
937 else 930 else
938 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, 931 ret = autofs_root_ioctl_unlocked(inode, filp, cmd,
939 (unsigned long) compat_ptr(arg)); 932 (unsigned long) compat_ptr(arg));
940 933
941 return ret; 934 return ret;
diff --git a/fs/autofs4/symlink.c b/fs/autofs/symlink.c
index ab0b4285a202..aad3902c0cc1 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs/symlink.c
@@ -8,22 +8,22 @@
8 8
9#include "autofs_i.h" 9#include "autofs_i.h"
10 10
11static const char *autofs4_get_link(struct dentry *dentry, 11static const char *autofs_get_link(struct dentry *dentry,
12 struct inode *inode, 12 struct inode *inode,
13 struct delayed_call *done) 13 struct delayed_call *done)
14{ 14{
15 struct autofs_sb_info *sbi; 15 struct autofs_sb_info *sbi;
16 struct autofs_info *ino; 16 struct autofs_info *ino;
17 17
18 if (!dentry) 18 if (!dentry)
19 return ERR_PTR(-ECHILD); 19 return ERR_PTR(-ECHILD);
20 sbi = autofs4_sbi(dentry->d_sb); 20 sbi = autofs_sbi(dentry->d_sb);
21 ino = autofs4_dentry_ino(dentry); 21 ino = autofs_dentry_ino(dentry);
22 if (ino && !autofs4_oz_mode(sbi)) 22 if (ino && !autofs_oz_mode(sbi))
23 ino->last_used = jiffies; 23 ino->last_used = jiffies;
24 return d_inode(dentry)->i_private; 24 return d_inode(dentry)->i_private;
25} 25}
26 26
27const struct inode_operations autofs4_symlink_inode_operations = { 27const struct inode_operations autofs_symlink_inode_operations = {
28 .get_link = autofs4_get_link 28 .get_link = autofs_get_link
29}; 29};
diff --git a/fs/autofs4/waitq.c b/fs/autofs/waitq.c
index be9c3dc048ab..f6385c6ef0a5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs/waitq.c
@@ -7,19 +7,15 @@
7 * option, any later version, incorporated herein by reference. 7 * option, any later version, incorporated herein by reference.
8 */ 8 */
9 9
10#include <linux/slab.h>
11#include <linux/time.h>
12#include <linux/signal.h>
13#include <linux/sched/signal.h> 10#include <linux/sched/signal.h>
14#include <linux/file.h>
15#include "autofs_i.h" 11#include "autofs_i.h"
16 12
17/* We make this a static variable rather than a part of the superblock; it 13/* We make this a static variable rather than a part of the superblock; it
18 * is better if we don't reassign numbers easily even across filesystems 14 * is better if we don't reassign numbers easily even across filesystems
19 */ 15 */
20static autofs_wqt_t autofs4_next_wait_queue = 1; 16static autofs_wqt_t autofs_next_wait_queue = 1;
21 17
22void autofs4_catatonic_mode(struct autofs_sb_info *sbi) 18void autofs_catatonic_mode(struct autofs_sb_info *sbi)
23{ 19{
24 struct autofs_wait_queue *wq, *nwq; 20 struct autofs_wait_queue *wq, *nwq;
25 21
@@ -49,8 +45,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
49 mutex_unlock(&sbi->wq_mutex); 45 mutex_unlock(&sbi->wq_mutex);
50} 46}
51 47
52static int autofs4_write(struct autofs_sb_info *sbi, 48static int autofs_write(struct autofs_sb_info *sbi,
53 struct file *file, const void *addr, int bytes) 49 struct file *file, const void *addr, int bytes)
54{ 50{
55 unsigned long sigpipe, flags; 51 unsigned long sigpipe, flags;
56 const char *data = (const char *)addr; 52 const char *data = (const char *)addr;
@@ -82,7 +78,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
82 return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; 78 return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
83} 79}
84 80
85static void autofs4_notify_daemon(struct autofs_sb_info *sbi, 81static void autofs_notify_daemon(struct autofs_sb_info *sbi,
86 struct autofs_wait_queue *wq, 82 struct autofs_wait_queue *wq,
87 int type) 83 int type)
88{ 84{
@@ -167,23 +163,23 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
167 163
168 mutex_unlock(&sbi->wq_mutex); 164 mutex_unlock(&sbi->wq_mutex);
169 165
170 switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { 166 switch (ret = autofs_write(sbi, pipe, &pkt, pktsz)) {
171 case 0: 167 case 0:
172 break; 168 break;
173 case -ENOMEM: 169 case -ENOMEM:
174 case -ERESTARTSYS: 170 case -ERESTARTSYS:
175 /* Just fail this one */ 171 /* Just fail this one */
176 autofs4_wait_release(sbi, wq->wait_queue_token, ret); 172 autofs_wait_release(sbi, wq->wait_queue_token, ret);
177 break; 173 break;
178 default: 174 default:
179 autofs4_catatonic_mode(sbi); 175 autofs_catatonic_mode(sbi);
180 break; 176 break;
181 } 177 }
182 fput(pipe); 178 fput(pipe);
183} 179}
184 180
185static int autofs4_getpath(struct autofs_sb_info *sbi, 181static int autofs_getpath(struct autofs_sb_info *sbi,
186 struct dentry *dentry, char **name) 182 struct dentry *dentry, char *name)
187{ 183{
188 struct dentry *root = sbi->sb->s_root; 184 struct dentry *root = sbi->sb->s_root;
189 struct dentry *tmp; 185 struct dentry *tmp;
@@ -193,7 +189,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
193 unsigned seq; 189 unsigned seq;
194 190
195rename_retry: 191rename_retry:
196 buf = *name; 192 buf = name;
197 len = 0; 193 len = 0;
198 194
199 seq = read_seqbegin(&rename_lock); 195 seq = read_seqbegin(&rename_lock);
@@ -228,7 +224,7 @@ rename_retry:
228} 224}
229 225
230static struct autofs_wait_queue * 226static struct autofs_wait_queue *
231autofs4_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr) 227autofs_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr)
232{ 228{
233 struct autofs_wait_queue *wq; 229 struct autofs_wait_queue *wq;
234 230
@@ -263,7 +259,7 @@ static int validate_request(struct autofs_wait_queue **wait,
263 return -ENOENT; 259 return -ENOENT;
264 260
265 /* Wait in progress, continue; */ 261 /* Wait in progress, continue; */
266 wq = autofs4_find_wait(sbi, qstr); 262 wq = autofs_find_wait(sbi, qstr);
267 if (wq) { 263 if (wq) {
268 *wait = wq; 264 *wait = wq;
269 return 1; 265 return 1;
@@ -272,7 +268,7 @@ static int validate_request(struct autofs_wait_queue **wait,
272 *wait = NULL; 268 *wait = NULL;
273 269
274 /* If we don't yet have any info this is a new request */ 270 /* If we don't yet have any info this is a new request */
275 ino = autofs4_dentry_ino(dentry); 271 ino = autofs_dentry_ino(dentry);
276 if (!ino) 272 if (!ino)
277 return 1; 273 return 1;
278 274
@@ -297,7 +293,7 @@ static int validate_request(struct autofs_wait_queue **wait,
297 if (sbi->catatonic) 293 if (sbi->catatonic)
298 return -ENOENT; 294 return -ENOENT;
299 295
300 wq = autofs4_find_wait(sbi, qstr); 296 wq = autofs_find_wait(sbi, qstr);
301 if (wq) { 297 if (wq) {
302 *wait = wq; 298 *wait = wq;
303 return 1; 299 return 1;
@@ -351,7 +347,7 @@ static int validate_request(struct autofs_wait_queue **wait,
351 return 1; 347 return 1;
352} 348}
353 349
354int autofs4_wait(struct autofs_sb_info *sbi, 350int autofs_wait(struct autofs_sb_info *sbi,
355 const struct path *path, enum autofs_notify notify) 351 const struct path *path, enum autofs_notify notify)
356{ 352{
357 struct dentry *dentry = path->dentry; 353 struct dentry *dentry = path->dentry;
@@ -399,7 +395,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
399 if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) 395 if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type))
400 qstr.len = sprintf(name, "%p", dentry); 396 qstr.len = sprintf(name, "%p", dentry);
401 else { 397 else {
402 qstr.len = autofs4_getpath(sbi, dentry, &name); 398 qstr.len = autofs_getpath(sbi, dentry, name);
403 if (!qstr.len) { 399 if (!qstr.len) {
404 kfree(name); 400 kfree(name);
405 return -ENOENT; 401 return -ENOENT;
@@ -430,15 +426,15 @@ int autofs4_wait(struct autofs_sb_info *sbi,
430 return -ENOMEM; 426 return -ENOMEM;
431 } 427 }
432 428
433 wq->wait_queue_token = autofs4_next_wait_queue; 429 wq->wait_queue_token = autofs_next_wait_queue;
434 if (++autofs4_next_wait_queue == 0) 430 if (++autofs_next_wait_queue == 0)
435 autofs4_next_wait_queue = 1; 431 autofs_next_wait_queue = 1;
436 wq->next = sbi->queues; 432 wq->next = sbi->queues;
437 sbi->queues = wq; 433 sbi->queues = wq;
438 init_waitqueue_head(&wq->queue); 434 init_waitqueue_head(&wq->queue);
439 memcpy(&wq->name, &qstr, sizeof(struct qstr)); 435 memcpy(&wq->name, &qstr, sizeof(struct qstr));
440 wq->dev = autofs4_get_dev(sbi); 436 wq->dev = autofs_get_dev(sbi);
441 wq->ino = autofs4_get_ino(sbi); 437 wq->ino = autofs_get_ino(sbi);
442 wq->uid = current_uid(); 438 wq->uid = current_uid();
443 wq->gid = current_gid(); 439 wq->gid = current_gid();
444 wq->pid = pid; 440 wq->pid = pid;
@@ -467,9 +463,9 @@ int autofs4_wait(struct autofs_sb_info *sbi,
467 wq->name.name, notify); 463 wq->name.name, notify);
468 464
469 /* 465 /*
470 * autofs4_notify_daemon() may block; it will unlock ->wq_mutex 466 * autofs_notify_daemon() may block; it will unlock ->wq_mutex
471 */ 467 */
472 autofs4_notify_daemon(sbi, wq, type); 468 autofs_notify_daemon(sbi, wq, type);
473 } else { 469 } else {
474 wq->wait_ctr++; 470 wq->wait_ctr++;
475 pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", 471 pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
@@ -500,12 +496,12 @@ int autofs4_wait(struct autofs_sb_info *sbi,
500 struct dentry *de = NULL; 496 struct dentry *de = NULL;
501 497
502 /* direct mount or browsable map */ 498 /* direct mount or browsable map */
503 ino = autofs4_dentry_ino(dentry); 499 ino = autofs_dentry_ino(dentry);
504 if (!ino) { 500 if (!ino) {
505 /* If not lookup actual dentry used */ 501 /* If not lookup actual dentry used */
506 de = d_lookup(dentry->d_parent, &dentry->d_name); 502 de = d_lookup(dentry->d_parent, &dentry->d_name);
507 if (de) 503 if (de)
508 ino = autofs4_dentry_ino(de); 504 ino = autofs_dentry_ino(de);
509 } 505 }
510 506
511 /* Set mount requester */ 507 /* Set mount requester */
@@ -530,7 +526,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
530} 526}
531 527
532 528
533int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status) 529int autofs_wait_release(struct autofs_sb_info *sbi,
530 autofs_wqt_t wait_queue_token, int status)
534{ 531{
535 struct autofs_wait_queue *wq, **wql; 532 struct autofs_wait_queue *wq, **wql;
536 533
diff --git a/fs/autofs4/Kconfig b/fs/autofs4/Kconfig
index 44727bf18297..99fda4d6da25 100644
--- a/fs/autofs4/Kconfig
+++ b/fs/autofs4/Kconfig
@@ -1,5 +1,7 @@
1config AUTOFS4_FS 1config AUTOFS4_FS
2 tristate "Kernel automounter version 4 support (also supports v3)" 2 tristate "Kernel automounter version 4 support (also supports v3 and v5)"
3 default n
4 depends on AUTOFS_FS = n
3 help 5 help
4 The automounter is a tool to automatically mount remote file systems 6 The automounter is a tool to automatically mount remote file systems
5 on demand. This implementation is partially kernel-based to reduce 7 on demand. This implementation is partially kernel-based to reduce
@@ -7,14 +9,38 @@ config AUTOFS4_FS
7 automounter (amd), which is a pure user space daemon. 9 automounter (amd), which is a pure user space daemon.
8 10
9 To use the automounter you need the user-space tools from 11 To use the automounter you need the user-space tools from
10 <https://www.kernel.org/pub/linux/daemons/autofs/v4/>; you also 12 <https://www.kernel.org/pub/linux/daemons/autofs/>; you also want
11 want to answer Y to "NFS file system support", below. 13 to answer Y to "NFS file system support", below.
12 14
13 To compile this support as a module, choose M here: the module will be 15 This module is in the process of being renamed from autofs4 to
14 called autofs4. You will need to add "alias autofs autofs4" to your 16 autofs. Since autofs is now the only module that provides the
15 modules configuration file. 17 autofs file system the module is not version 4 specific.
16 18
17 If you are not a part of a fairly large, distributed network or 19 The autofs4 module is now built from the source located in
18 don't have a laptop which needs to dynamically reconfigure to the 20 fs/autofs. The autofs4 directory and its configuration entry
19 local network, you probably do not need an automounter, and can say 21 will be removed two kernel versions from the inclusion of this
20 N here. 22 change.
23
24 Changes that will need to be made should be limited to:
25 - source include statments should be changed from autofs_fs4.h to
26 autofs_fs.h since these two header files have been merged.
27 - user space scripts that manually load autofs4.ko should be
28 changed to load autofs.ko. But since the module directory name
29 and the module name are the same as the file system name there
30 is no need to manually load module.
31 - any "alias autofs autofs4" will need to be removed.
32 - due to the autofs4 module directory name not being the same as
33 its file system name autoloading didn't work properly. Because
34 of this kernel configurations would often build the module into
35 the kernel. This may have resulted in selinux policies that will
36 prevent the autofs module from autoloading and will need to be
37 updated.
38
39 Please configure AUTOFS_FS instead of AUTOFS4_FS from now on.
40
41 NOTE: Since the modules autofs and autofs4 use the same file system
42 type name of "autofs" only one can be built. The "depends"
43 above will result in AUTOFS4_FS not appearing in .config for
44 any setting of AUTOFS_FS other than n and AUTOFS4_FS will
45 appear under the AUTOFS_FS entry otherwise which is intended
46 to draw attention to the module rename change.
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile
index a811c1f7d9ab..417dd726d9ef 100644
--- a/fs/autofs4/Makefile
+++ b/fs/autofs4/Makefile
@@ -4,4 +4,6 @@
4 4
5obj-$(CONFIG_AUTOFS4_FS) += autofs4.o 5obj-$(CONFIG_AUTOFS4_FS) += autofs4.o
6 6
7autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o 7autofs4-objs := ../autofs/init.o ../autofs/inode.o ../autofs/root.o \
8 ../autofs/symlink.o ../autofs/waitq.o ../autofs/expire.o \
9 ../autofs/dev-ioctl.o
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a41b48f82a70..4de191563261 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -387,8 +387,13 @@ static Node *create_entry(const char __user *buffer, size_t count)
387 s = strchr(p, del); 387 s = strchr(p, del);
388 if (!s) 388 if (!s)
389 goto einval; 389 goto einval;
390 *s++ = '\0'; 390 *s = '\0';
391 e->offset = simple_strtoul(p, &p, 10); 391 if (p != s) {
392 int r = kstrtoint(p, 10, &e->offset);
393 if (r != 0 || e->offset < 0)
394 goto einval;
395 }
396 p = s;
392 if (*p++) 397 if (*p++)
393 goto einval; 398 goto einval;
394 pr_debug("register: offset: %#x\n", e->offset); 399 pr_debug("register: offset: %#x\n", e->offset);
@@ -428,7 +433,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
428 if (e->mask && 433 if (e->mask &&
429 string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) 434 string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
430 goto einval; 435 goto einval;
431 if (e->size + e->offset > BINPRM_BUF_SIZE) 436 if (e->size > BINPRM_BUF_SIZE ||
437 BINPRM_BUF_SIZE - e->size < e->offset)
432 goto einval; 438 goto einval;
433 pr_debug("register: magic/mask length: %i\n", e->size); 439 pr_debug("register: magic/mask length: %i\n", e->size);
434 if (USE_DEBUG) { 440 if (USE_DEBUG) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ef80085ed564..9907475b4226 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -38,8 +38,6 @@
38#include <linux/ppp-ioctl.h> 38#include <linux/ppp-ioctl.h>
39#include <linux/if_pppox.h> 39#include <linux/if_pppox.h>
40#include <linux/mtio.h> 40#include <linux/mtio.h>
41#include <linux/auto_fs.h>
42#include <linux/auto_fs4.h>
43#include <linux/tty.h> 41#include <linux/tty.h>
44#include <linux/vt_kern.h> 42#include <linux/vt_kern.h>
45#include <linux/fb.h> 43#include <linux/fb.h>
diff --git a/fs/dax.c b/fs/dax.c
index aa86d9f971a4..08656a2f2aa6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -905,12 +905,12 @@ out:
905 * If this page is ever written to we will re-fault and change the mapping to 905 * If this page is ever written to we will re-fault and change the mapping to
906 * point to real DAX storage instead. 906 * point to real DAX storage instead.
907 */ 907 */
908static int dax_load_hole(struct address_space *mapping, void *entry, 908static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
909 struct vm_fault *vmf) 909 struct vm_fault *vmf)
910{ 910{
911 struct inode *inode = mapping->host; 911 struct inode *inode = mapping->host;
912 unsigned long vaddr = vmf->address; 912 unsigned long vaddr = vmf->address;
913 int ret = VM_FAULT_NOPAGE; 913 vm_fault_t ret = VM_FAULT_NOPAGE;
914 struct page *zero_page; 914 struct page *zero_page;
915 void *entry2; 915 void *entry2;
916 pfn_t pfn; 916 pfn_t pfn;
@@ -929,7 +929,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
929 goto out; 929 goto out;
930 } 930 }
931 931
932 vm_insert_mixed(vmf->vma, vaddr, pfn); 932 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
933out: 933out:
934 trace_dax_load_hole(inode, vmf, ret); 934 trace_dax_load_hole(inode, vmf, ret);
935 return ret; 935 return ret;
@@ -1112,7 +1112,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1112} 1112}
1113EXPORT_SYMBOL_GPL(dax_iomap_rw); 1113EXPORT_SYMBOL_GPL(dax_iomap_rw);
1114 1114
1115static int dax_fault_return(int error) 1115static vm_fault_t dax_fault_return(int error)
1116{ 1116{
1117 if (error == 0) 1117 if (error == 0)
1118 return VM_FAULT_NOPAGE; 1118 return VM_FAULT_NOPAGE;
@@ -1132,7 +1132,7 @@ static bool dax_fault_is_synchronous(unsigned long flags,
1132 && (iomap->flags & IOMAP_F_DIRTY); 1132 && (iomap->flags & IOMAP_F_DIRTY);
1133} 1133}
1134 1134
1135static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, 1135static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1136 int *iomap_errp, const struct iomap_ops *ops) 1136 int *iomap_errp, const struct iomap_ops *ops)
1137{ 1137{
1138 struct vm_area_struct *vma = vmf->vma; 1138 struct vm_area_struct *vma = vmf->vma;
@@ -1145,18 +1145,18 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1145 int error, major = 0; 1145 int error, major = 0;
1146 bool write = vmf->flags & FAULT_FLAG_WRITE; 1146 bool write = vmf->flags & FAULT_FLAG_WRITE;
1147 bool sync; 1147 bool sync;
1148 int vmf_ret = 0; 1148 vm_fault_t ret = 0;
1149 void *entry; 1149 void *entry;
1150 pfn_t pfn; 1150 pfn_t pfn;
1151 1151
1152 trace_dax_pte_fault(inode, vmf, vmf_ret); 1152 trace_dax_pte_fault(inode, vmf, ret);
1153 /* 1153 /*
1154 * Check whether offset isn't beyond end of file now. Caller is supposed 1154 * Check whether offset isn't beyond end of file now. Caller is supposed
1155 * to hold locks serializing us with truncate / punch hole so this is 1155 * to hold locks serializing us with truncate / punch hole so this is
1156 * a reliable test. 1156 * a reliable test.
1157 */ 1157 */
1158 if (pos >= i_size_read(inode)) { 1158 if (pos >= i_size_read(inode)) {
1159 vmf_ret = VM_FAULT_SIGBUS; 1159 ret = VM_FAULT_SIGBUS;
1160 goto out; 1160 goto out;
1161 } 1161 }
1162 1162
@@ -1165,7 +1165,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1165 1165
1166 entry = grab_mapping_entry(mapping, vmf->pgoff, 0); 1166 entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
1167 if (IS_ERR(entry)) { 1167 if (IS_ERR(entry)) {
1168 vmf_ret = dax_fault_return(PTR_ERR(entry)); 1168 ret = dax_fault_return(PTR_ERR(entry));
1169 goto out; 1169 goto out;
1170 } 1170 }
1171 1171
@@ -1176,7 +1176,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1176 * retried. 1176 * retried.
1177 */ 1177 */
1178 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { 1178 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1179 vmf_ret = VM_FAULT_NOPAGE; 1179 ret = VM_FAULT_NOPAGE;
1180 goto unlock_entry; 1180 goto unlock_entry;
1181 } 1181 }
1182 1182
@@ -1189,7 +1189,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1189 if (iomap_errp) 1189 if (iomap_errp)
1190 *iomap_errp = error; 1190 *iomap_errp = error;
1191 if (error) { 1191 if (error) {
1192 vmf_ret = dax_fault_return(error); 1192 ret = dax_fault_return(error);
1193 goto unlock_entry; 1193 goto unlock_entry;
1194 } 1194 }
1195 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { 1195 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
@@ -1219,9 +1219,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1219 goto error_finish_iomap; 1219 goto error_finish_iomap;
1220 1220
1221 __SetPageUptodate(vmf->cow_page); 1221 __SetPageUptodate(vmf->cow_page);
1222 vmf_ret = finish_fault(vmf); 1222 ret = finish_fault(vmf);
1223 if (!vmf_ret) 1223 if (!ret)
1224 vmf_ret = VM_FAULT_DONE_COW; 1224 ret = VM_FAULT_DONE_COW;
1225 goto finish_iomap; 1225 goto finish_iomap;
1226 } 1226 }
1227 1227
@@ -1257,23 +1257,20 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1257 goto error_finish_iomap; 1257 goto error_finish_iomap;
1258 } 1258 }
1259 *pfnp = pfn; 1259 *pfnp = pfn;
1260 vmf_ret = VM_FAULT_NEEDDSYNC | major; 1260 ret = VM_FAULT_NEEDDSYNC | major;
1261 goto finish_iomap; 1261 goto finish_iomap;
1262 } 1262 }
1263 trace_dax_insert_mapping(inode, vmf, entry); 1263 trace_dax_insert_mapping(inode, vmf, entry);
1264 if (write) 1264 if (write)
1265 error = vm_insert_mixed_mkwrite(vma, vaddr, pfn); 1265 ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
1266 else 1266 else
1267 error = vm_insert_mixed(vma, vaddr, pfn); 1267 ret = vmf_insert_mixed(vma, vaddr, pfn);
1268 1268
1269 /* -EBUSY is fine, somebody else faulted on the same PTE */ 1269 goto finish_iomap;
1270 if (error == -EBUSY)
1271 error = 0;
1272 break;
1273 case IOMAP_UNWRITTEN: 1270 case IOMAP_UNWRITTEN:
1274 case IOMAP_HOLE: 1271 case IOMAP_HOLE:
1275 if (!write) { 1272 if (!write) {
1276 vmf_ret = dax_load_hole(mapping, entry, vmf); 1273 ret = dax_load_hole(mapping, entry, vmf);
1277 goto finish_iomap; 1274 goto finish_iomap;
1278 } 1275 }
1279 /*FALLTHRU*/ 1276 /*FALLTHRU*/
@@ -1284,12 +1281,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1284 } 1281 }
1285 1282
1286 error_finish_iomap: 1283 error_finish_iomap:
1287 vmf_ret = dax_fault_return(error) | major; 1284 ret = dax_fault_return(error);
1288 finish_iomap: 1285 finish_iomap:
1289 if (ops->iomap_end) { 1286 if (ops->iomap_end) {
1290 int copied = PAGE_SIZE; 1287 int copied = PAGE_SIZE;
1291 1288
1292 if (vmf_ret & VM_FAULT_ERROR) 1289 if (ret & VM_FAULT_ERROR)
1293 copied = 0; 1290 copied = 0;
1294 /* 1291 /*
1295 * The fault is done by now and there's no way back (other 1292 * The fault is done by now and there's no way back (other
@@ -1302,12 +1299,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1302 unlock_entry: 1299 unlock_entry:
1303 put_locked_mapping_entry(mapping, vmf->pgoff); 1300 put_locked_mapping_entry(mapping, vmf->pgoff);
1304 out: 1301 out:
1305 trace_dax_pte_fault_done(inode, vmf, vmf_ret); 1302 trace_dax_pte_fault_done(inode, vmf, ret);
1306 return vmf_ret; 1303 return ret | major;
1307} 1304}
1308 1305
1309#ifdef CONFIG_FS_DAX_PMD 1306#ifdef CONFIG_FS_DAX_PMD
1310static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, 1307static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
1311 void *entry) 1308 void *entry)
1312{ 1309{
1313 struct address_space *mapping = vmf->vma->vm_file->f_mapping; 1310 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
@@ -1348,7 +1345,7 @@ fallback:
1348 return VM_FAULT_FALLBACK; 1345 return VM_FAULT_FALLBACK;
1349} 1346}
1350 1347
1351static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, 1348static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1352 const struct iomap_ops *ops) 1349 const struct iomap_ops *ops)
1353{ 1350{
1354 struct vm_area_struct *vma = vmf->vma; 1351 struct vm_area_struct *vma = vmf->vma;
@@ -1358,7 +1355,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1358 bool sync; 1355 bool sync;
1359 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; 1356 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
1360 struct inode *inode = mapping->host; 1357 struct inode *inode = mapping->host;
1361 int result = VM_FAULT_FALLBACK; 1358 vm_fault_t result = VM_FAULT_FALLBACK;
1362 struct iomap iomap = { 0 }; 1359 struct iomap iomap = { 0 };
1363 pgoff_t max_pgoff, pgoff; 1360 pgoff_t max_pgoff, pgoff;
1364 void *entry; 1361 void *entry;
@@ -1509,7 +1506,7 @@ out:
1509 return result; 1506 return result;
1510} 1507}
1511#else 1508#else
1512static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, 1509static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1513 const struct iomap_ops *ops) 1510 const struct iomap_ops *ops)
1514{ 1511{
1515 return VM_FAULT_FALLBACK; 1512 return VM_FAULT_FALLBACK;
@@ -1529,7 +1526,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1529 * has done all the necessary locking for page fault to proceed 1526 * has done all the necessary locking for page fault to proceed
1530 * successfully. 1527 * successfully.
1531 */ 1528 */
1532int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 1529vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1533 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) 1530 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1534{ 1531{
1535 switch (pe_size) { 1532 switch (pe_size) {
@@ -1553,14 +1550,14 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
1553 * DAX file. It takes care of marking corresponding radix tree entry as dirty 1550 * DAX file. It takes care of marking corresponding radix tree entry as dirty
1554 * as well. 1551 * as well.
1555 */ 1552 */
1556static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, 1553static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
1557 enum page_entry_size pe_size, 1554 enum page_entry_size pe_size,
1558 pfn_t pfn) 1555 pfn_t pfn)
1559{ 1556{
1560 struct address_space *mapping = vmf->vma->vm_file->f_mapping; 1557 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1561 void *entry, **slot; 1558 void *entry, **slot;
1562 pgoff_t index = vmf->pgoff; 1559 pgoff_t index = vmf->pgoff;
1563 int vmf_ret, error; 1560 vm_fault_t ret;
1564 1561
1565 xa_lock_irq(&mapping->i_pages); 1562 xa_lock_irq(&mapping->i_pages);
1566 entry = get_unlocked_mapping_entry(mapping, index, &slot); 1563 entry = get_unlocked_mapping_entry(mapping, index, &slot);
@@ -1579,21 +1576,20 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
1579 xa_unlock_irq(&mapping->i_pages); 1576 xa_unlock_irq(&mapping->i_pages);
1580 switch (pe_size) { 1577 switch (pe_size) {
1581 case PE_SIZE_PTE: 1578 case PE_SIZE_PTE:
1582 error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); 1579 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1583 vmf_ret = dax_fault_return(error);
1584 break; 1580 break;
1585#ifdef CONFIG_FS_DAX_PMD 1581#ifdef CONFIG_FS_DAX_PMD
1586 case PE_SIZE_PMD: 1582 case PE_SIZE_PMD:
1587 vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, 1583 ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
1588 pfn, true); 1584 pfn, true);
1589 break; 1585 break;
1590#endif 1586#endif
1591 default: 1587 default:
1592 vmf_ret = VM_FAULT_FALLBACK; 1588 ret = VM_FAULT_FALLBACK;
1593 } 1589 }
1594 put_locked_mapping_entry(mapping, index); 1590 put_locked_mapping_entry(mapping, index);
1595 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret); 1591 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1596 return vmf_ret; 1592 return ret;
1597} 1593}
1598 1594
1599/** 1595/**
@@ -1606,8 +1602,8 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
1606 * stored persistently on the media and handles inserting of appropriate page 1602 * stored persistently on the media and handles inserting of appropriate page
1607 * table entry. 1603 * table entry.
1608 */ 1604 */
1609int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 1605vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1610 pfn_t pfn) 1606 enum page_entry_size pe_size, pfn_t pfn)
1611{ 1607{
1612 int err; 1608 int err;
1613 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; 1609 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c42169459298..12273b6ea56d 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -23,7 +23,7 @@
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/pid_namespace.h> 24#include <linux/pid_namespace.h>
25#include <linux/user_namespace.h> 25#include <linux/user_namespace.h>
26#include <linux/shmem_fs.h> 26#include <linux/memfd.h>
27#include <linux/compat.h> 27#include <linux/compat.h>
28 28
29#include <linux/poll.h> 29#include <linux/poll.h>
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 97a972efab83..68728de12864 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -788,35 +788,34 @@ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
788 spin_unlock(&lockres->l_lock); 788 spin_unlock(&lockres->l_lock);
789} 789}
790 790
791static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, 791static struct ocfs2_lock_holder *
792 struct ocfs2_lock_holder *oh) 792ocfs2_pid_holder(struct ocfs2_lock_res *lockres,
793{ 793 struct pid *pid)
794 spin_lock(&lockres->l_lock);
795 list_del(&oh->oh_list);
796 spin_unlock(&lockres->l_lock);
797
798 put_pid(oh->oh_owner_pid);
799}
800
801static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
802{ 794{
803 struct ocfs2_lock_holder *oh; 795 struct ocfs2_lock_holder *oh;
804 struct pid *pid;
805 796
806 /* look in the list of holders for one with the current task as owner */
807 spin_lock(&lockres->l_lock); 797 spin_lock(&lockres->l_lock);
808 pid = task_pid(current);
809 list_for_each_entry(oh, &lockres->l_holders, oh_list) { 798 list_for_each_entry(oh, &lockres->l_holders, oh_list) {
810 if (oh->oh_owner_pid == pid) { 799 if (oh->oh_owner_pid == pid) {
811 spin_unlock(&lockres->l_lock); 800 spin_unlock(&lockres->l_lock);
812 return 1; 801 return oh;
813 } 802 }
814 } 803 }
815 spin_unlock(&lockres->l_lock); 804 spin_unlock(&lockres->l_lock);
805 return NULL;
806}
816 807
817 return 0; 808static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
809 struct ocfs2_lock_holder *oh)
810{
811 spin_lock(&lockres->l_lock);
812 list_del(&oh->oh_list);
813 spin_unlock(&lockres->l_lock);
814
815 put_pid(oh->oh_owner_pid);
818} 816}
819 817
818
820static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 819static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
821 int level) 820 int level)
822{ 821{
@@ -2610,34 +2609,93 @@ void ocfs2_inode_unlock(struct inode *inode,
2610 * 2609 *
2611 * return < 0 on error, return == 0 if there's no lock holder on the stack 2610 * return < 0 on error, return == 0 if there's no lock holder on the stack
2612 * before this call, return == 1 if this call would be a recursive locking. 2611 * before this call, return == 1 if this call would be a recursive locking.
2612 * return == -1 if this lock attempt will cause an upgrade which is forbidden.
2613 *
2614 * When taking lock levels into account,we face some different situations.
2615 *
2616 * 1. no lock is held
2617 * In this case, just lock the inode as requested and return 0
2618 *
2619 * 2. We are holding a lock
2620 * For this situation, things diverges into several cases
2621 *
2622 * wanted holding what to do
2623 * ex ex see 2.1 below
2624 * ex pr see 2.2 below
2625 * pr ex see 2.1 below
2626 * pr pr see 2.1 below
2627 *
2628 * 2.1 lock level that is been held is compatible
2629 * with the wanted level, so no lock action will be tacken.
2630 *
2631 * 2.2 Otherwise, an upgrade is needed, but it is forbidden.
2632 *
2633 * Reason why upgrade within a process is forbidden is that
2634 * lock upgrade may cause dead lock. The following illustrates
2635 * how it happens.
2636 *
2637 * thread on node1 thread on node2
2638 * ocfs2_inode_lock_tracker(ex=0)
2639 *
2640 * <====== ocfs2_inode_lock_tracker(ex=1)
2641 *
2642 * ocfs2_inode_lock_tracker(ex=1)
2613 */ 2643 */
2614int ocfs2_inode_lock_tracker(struct inode *inode, 2644int ocfs2_inode_lock_tracker(struct inode *inode,
2615 struct buffer_head **ret_bh, 2645 struct buffer_head **ret_bh,
2616 int ex, 2646 int ex,
2617 struct ocfs2_lock_holder *oh) 2647 struct ocfs2_lock_holder *oh)
2618{ 2648{
2619 int status; 2649 int status = 0;
2620 int arg_flags = 0, has_locked;
2621 struct ocfs2_lock_res *lockres; 2650 struct ocfs2_lock_res *lockres;
2651 struct ocfs2_lock_holder *tmp_oh;
2652 struct pid *pid = task_pid(current);
2653
2622 2654
2623 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2655 lockres = &OCFS2_I(inode)->ip_inode_lockres;
2624 has_locked = ocfs2_is_locked_by_me(lockres); 2656 tmp_oh = ocfs2_pid_holder(lockres, pid);
2625 /* Just get buffer head if the cluster lock has been taken */
2626 if (has_locked)
2627 arg_flags = OCFS2_META_LOCK_GETBH;
2628 2657
2629 if (likely(!has_locked || ret_bh)) { 2658 if (!tmp_oh) {
2630 status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); 2659 /*
2660 * This corresponds to the case 1.
2661 * We haven't got any lock before.
2662 */
2663 status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
2631 if (status < 0) { 2664 if (status < 0) {
2632 if (status != -ENOENT) 2665 if (status != -ENOENT)
2633 mlog_errno(status); 2666 mlog_errno(status);
2634 return status; 2667 return status;
2635 } 2668 }
2636 } 2669
2637 if (!has_locked) 2670 oh->oh_ex = ex;
2638 ocfs2_add_holder(lockres, oh); 2671 ocfs2_add_holder(lockres, oh);
2672 return 0;
2673 }
2639 2674
2640 return has_locked; 2675 if (unlikely(ex && !tmp_oh->oh_ex)) {
2676 /*
2677 * case 2.2 upgrade may cause dead lock, forbid it.
2678 */
2679 mlog(ML_ERROR, "Recursive locking is not permitted to "
2680 "upgrade to EX level from PR level.\n");
2681 dump_stack();
2682 return -EINVAL;
2683 }
2684
2685 /*
2686 * case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
2687 * ignore the lock level and just update it.
2688 */
2689 if (ret_bh) {
2690 status = ocfs2_inode_lock_full(inode, ret_bh, ex,
2691 OCFS2_META_LOCK_GETBH);
2692 if (status < 0) {
2693 if (status != -ENOENT)
2694 mlog_errno(status);
2695 return status;
2696 }
2697 }
2698 return tmp_oh ? 1 : 0;
2641} 2699}
2642 2700
2643void ocfs2_inode_unlock_tracker(struct inode *inode, 2701void ocfs2_inode_unlock_tracker(struct inode *inode,
@@ -2649,12 +2707,13 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
2649 2707
2650 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2708 lockres = &OCFS2_I(inode)->ip_inode_lockres;
2651 /* had_lock means that the currect process already takes the cluster 2709 /* had_lock means that the currect process already takes the cluster
2652 * lock previously. If had_lock is 1, we have nothing to do here, and 2710 * lock previously.
2653 * it will get unlocked where we got the lock. 2711 * If had_lock is 1, we have nothing to do here.
2712 * If had_lock is 0, we will release the lock.
2654 */ 2713 */
2655 if (!had_lock) { 2714 if (!had_lock) {
2715 ocfs2_inode_unlock(inode, oh->oh_ex);
2656 ocfs2_remove_holder(lockres, oh); 2716 ocfs2_remove_holder(lockres, oh);
2657 ocfs2_inode_unlock(inode, ex);
2658 } 2717 }
2659} 2718}
2660 2719
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 256e0a9067b8..4ec1c828f6e0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -96,6 +96,7 @@ struct ocfs2_trim_fs_info {
96struct ocfs2_lock_holder { 96struct ocfs2_lock_holder {
97 struct list_head oh_list; 97 struct list_head oh_list;
98 struct pid *oh_owner_pid; 98 struct pid *oh_owner_pid;
99 int oh_ex;
99}; 100};
100 101
101/* ocfs2_inode_lock_full() 'arg_flags' flags */ 102/* ocfs2_inode_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6ee94bc23f5b..a2a8603d27e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -563,8 +563,8 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
563 return ret; 563 return ret;
564} 564}
565 565
566static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, 566static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
567 u32 clusters_to_add, int mark_unwritten) 567 u32 clusters_to_add, int mark_unwritten)
568{ 568{
569 int status = 0; 569 int status = 0;
570 int restart_func = 0; 570 int restart_func = 0;
@@ -1035,8 +1035,8 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1035 clusters_to_add -= oi->ip_clusters; 1035 clusters_to_add -= oi->ip_clusters;
1036 1036
1037 if (clusters_to_add) { 1037 if (clusters_to_add) {
1038 ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, 1038 ret = ocfs2_extend_allocation(inode, oi->ip_clusters,
1039 clusters_to_add, 0); 1039 clusters_to_add, 0);
1040 if (ret) { 1040 if (ret) {
1041 mlog_errno(ret); 1041 mlog_errno(ret);
1042 goto out; 1042 goto out;
@@ -1493,7 +1493,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1493 goto next; 1493 goto next;
1494 } 1494 }
1495 1495
1496 ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); 1496 ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
1497 if (ret) { 1497 if (ret) {
1498 if (ret != -ENOSPC) 1498 if (ret != -ENOSPC)
1499 mlog_errno(ret); 1499 mlog_errno(ret);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 1fdc9839cd93..7eb7f03531f6 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -65,8 +65,6 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
65 u64 new_i_size, u64 zero_to); 65 u64 new_i_size, u64 zero_to);
66int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, 66int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
67 loff_t zero_to); 67 loff_t zero_to);
68int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
69 u32 clusters_to_add, int mark_unwritten);
70int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 68int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
71int ocfs2_getattr(const struct path *path, struct kstat *stat, 69int ocfs2_getattr(const struct path *path, struct kstat *stat,
72 u32 request_mask, unsigned int flags); 70 u32 request_mask, unsigned int flags);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index ab30c005cc4b..994726ada857 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -402,7 +402,7 @@ out_err:
402static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, 402static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist,
403 unsigned int chunksize) 403 unsigned int chunksize)
404{ 404{
405 int index; 405 u32 index;
406 406
407 index = __ilog2_u32(chunksize); 407 index = __ilog2_u32(chunksize);
408 if (index >= OCFS2_INFO_MAX_HIST) 408 if (index >= OCFS2_INFO_MAX_HIST)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index fb9a20e3d608..05220b365fb9 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,11 +44,11 @@
44#include "ocfs2_trace.h" 44#include "ocfs2_trace.h"
45 45
46 46
47static int ocfs2_fault(struct vm_fault *vmf) 47static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
48{ 48{
49 struct vm_area_struct *vma = vmf->vma; 49 struct vm_area_struct *vma = vmf->vma;
50 sigset_t oldset; 50 sigset_t oldset;
51 int ret; 51 vm_fault_t ret;
52 52
53 ocfs2_block_signals(&oldset); 53 ocfs2_block_signals(&oldset);
54 ret = filemap_fault(vmf); 54 ret = filemap_fault(vmf);
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_fault *vmf)
59 return ret; 59 return ret;
60} 60}
61 61
62static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, 62static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
63 struct page *page) 63 struct buffer_head *di_bh, struct page *page)
64{ 64{
65 int ret = VM_FAULT_NOPAGE; 65 int err;
66 vm_fault_t ret = VM_FAULT_NOPAGE;
66 struct inode *inode = file_inode(file); 67 struct inode *inode = file_inode(file);
67 struct address_space *mapping = inode->i_mapping; 68 struct address_space *mapping = inode->i_mapping;
68 loff_t pos = page_offset(page); 69 loff_t pos = page_offset(page);
@@ -105,15 +106,12 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
105 if (page->index == last_index) 106 if (page->index == last_index)
106 len = ((size - 1) & ~PAGE_MASK) + 1; 107 len = ((size - 1) & ~PAGE_MASK) + 1;
107 108
108 ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, 109 err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
109 &locked_page, &fsdata, di_bh, page); 110 &locked_page, &fsdata, di_bh, page);
110 if (ret) { 111 if (err) {
111 if (ret != -ENOSPC) 112 if (err != -ENOSPC)
112 mlog_errno(ret); 113 mlog_errno(err);
113 if (ret == -ENOMEM) 114 ret = vmf_error(err);
114 ret = VM_FAULT_OOM;
115 else
116 ret = VM_FAULT_SIGBUS;
117 goto out; 115 goto out;
118 } 116 }
119 117
@@ -121,20 +119,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
121 ret = VM_FAULT_NOPAGE; 119 ret = VM_FAULT_NOPAGE;
122 goto out; 120 goto out;
123 } 121 }
124 ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); 122 err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
125 BUG_ON(ret != len); 123 BUG_ON(err != len);
126 ret = VM_FAULT_LOCKED; 124 ret = VM_FAULT_LOCKED;
127out: 125out:
128 return ret; 126 return ret;
129} 127}
130 128
131static int ocfs2_page_mkwrite(struct vm_fault *vmf) 129static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
132{ 130{
133 struct page *page = vmf->page; 131 struct page *page = vmf->page;
134 struct inode *inode = file_inode(vmf->vma->vm_file); 132 struct inode *inode = file_inode(vmf->vma->vm_file);
135 struct buffer_head *di_bh = NULL; 133 struct buffer_head *di_bh = NULL;
136 sigset_t oldset; 134 sigset_t oldset;
137 int ret; 135 int err;
136 vm_fault_t ret;
138 137
139 sb_start_pagefault(inode->i_sb); 138 sb_start_pagefault(inode->i_sb);
140 ocfs2_block_signals(&oldset); 139 ocfs2_block_signals(&oldset);
@@ -144,13 +143,10 @@ static int ocfs2_page_mkwrite(struct vm_fault *vmf)
144 * node. Taking the data lock will also ensure that we don't 143 * node. Taking the data lock will also ensure that we don't
145 * attempt page truncation as part of a downconvert. 144 * attempt page truncation as part of a downconvert.
146 */ 145 */
147 ret = ocfs2_inode_lock(inode, &di_bh, 1); 146 err = ocfs2_inode_lock(inode, &di_bh, 1);
148 if (ret < 0) { 147 if (err < 0) {
149 mlog_errno(ret); 148 mlog_errno(err);
150 if (ret == -ENOMEM) 149 ret = vmf_error(err);
151 ret = VM_FAULT_OOM;
152 else
153 ret = VM_FAULT_SIGBUS;
154 goto out; 150 goto out;
155 } 151 }
156 152
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8dd6f703c819..b7ca84bc3df7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2332,8 +2332,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2332 struct buffer_head *orphan_dir_bh, 2332 struct buffer_head *orphan_dir_bh,
2333 bool dio) 2333 bool dio)
2334{ 2334{
2335 const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN; 2335 char name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
2336 char name[namelen + 1];
2337 struct ocfs2_dinode *orphan_fe; 2336 struct ocfs2_dinode *orphan_fe;
2338 int status = 0; 2337 int status = 0;
2339 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2338 struct ocfs2_dir_lookup_result lookup = { NULL, };
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5bb4a89f9045..7071ad0dec90 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -807,11 +807,11 @@ struct ocfs2_dir_block_trailer {
807 * in this block. (unused) */ 807 * in this block. (unused) */
808/*10*/ __u8 db_signature[8]; /* Signature for verification */ 808/*10*/ __u8 db_signature[8]; /* Signature for verification */
809 __le64 db_reserved2; 809 __le64 db_reserved2;
810 __le64 db_free_next; /* Next block in list (unused) */ 810/*20*/ __le64 db_free_next; /* Next block in list (unused) */
811/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */ 811 __le64 db_blkno; /* Offset on disk, in blocks */
812 __le64 db_parent_dinode; /* dinode which owns me, in 812/*30*/ __le64 db_parent_dinode; /* dinode which owns me, in
813 blocks */ 813 blocks */
814/*30*/ struct ocfs2_block_check db_check; /* Error checking */ 814 struct ocfs2_block_check db_check; /* Error checking */
815/*40*/ 815/*40*/
816}; 816};
817 817
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 004077f1a7bf..0ceb3b6b37e7 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -268,7 +268,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
268 unsigned long flags; 268 unsigned long flags;
269 sigset_t pending, shpending, blocked, ignored, caught; 269 sigset_t pending, shpending, blocked, ignored, caught;
270 int num_threads = 0; 270 int num_threads = 0;
271 unsigned long qsize = 0; 271 unsigned int qsize = 0;
272 unsigned long qlim = 0; 272 unsigned long qlim = 0;
273 273
274 sigemptyset(&pending); 274 sigemptyset(&pending);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index af128b374143..44dec22e5e9e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -213,10 +213,14 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
213 char *page; 213 char *page;
214 unsigned long count = _count; 214 unsigned long count = _count;
215 unsigned long arg_start, arg_end, env_start, env_end; 215 unsigned long arg_start, arg_end, env_start, env_end;
216 unsigned long len1, len2, len; 216 unsigned long len1, len2;
217 unsigned long p; 217 char __user *buf0 = buf;
218 struct {
219 unsigned long p;
220 unsigned long len;
221 } cmdline[2];
218 char c; 222 char c;
219 ssize_t rv; 223 int rv;
220 224
221 BUG_ON(*pos < 0); 225 BUG_ON(*pos < 0);
222 226
@@ -239,12 +243,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
239 goto out_mmput; 243 goto out_mmput;
240 } 244 }
241 245
242 down_read(&mm->mmap_sem); 246 spin_lock(&mm->arg_lock);
243 arg_start = mm->arg_start; 247 arg_start = mm->arg_start;
244 arg_end = mm->arg_end; 248 arg_end = mm->arg_end;
245 env_start = mm->env_start; 249 env_start = mm->env_start;
246 env_end = mm->env_end; 250 env_end = mm->env_end;
247 up_read(&mm->mmap_sem); 251 spin_unlock(&mm->arg_lock);
248 252
249 BUG_ON(arg_start > arg_end); 253 BUG_ON(arg_start > arg_end);
250 BUG_ON(env_start > env_end); 254 BUG_ON(env_start > env_end);
@@ -253,61 +257,31 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
253 len2 = env_end - env_start; 257 len2 = env_end - env_start;
254 258
255 /* Empty ARGV. */ 259 /* Empty ARGV. */
256 if (len1 == 0) { 260 if (len1 == 0)
257 rv = 0; 261 goto end;
258 goto out_free_page; 262
259 }
260 /* 263 /*
261 * Inherently racy -- command line shares address space 264 * Inherently racy -- command line shares address space
262 * with code and data. 265 * with code and data.
263 */ 266 */
264 rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); 267 if (access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON) != 1)
265 if (rv <= 0) 268 goto end;
266 goto out_free_page;
267
268 rv = 0;
269 269
270 cmdline[0].p = arg_start;
271 cmdline[0].len = len1;
270 if (c == '\0') { 272 if (c == '\0') {
271 /* Command line (set of strings) occupies whole ARGV. */ 273 /* Command line (set of strings) occupies whole ARGV. */
272 if (len1 <= *pos) 274 cmdline[1].len = 0;
273 goto out_free_page;
274
275 p = arg_start + *pos;
276 len = len1 - *pos;
277 while (count > 0 && len > 0) {
278 unsigned int _count;
279 int nr_read;
280
281 _count = min3(count, len, PAGE_SIZE);
282 nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
283 if (nr_read < 0)
284 rv = nr_read;
285 if (nr_read <= 0)
286 goto out_free_page;
287
288 if (copy_to_user(buf, page, nr_read)) {
289 rv = -EFAULT;
290 goto out_free_page;
291 }
292
293 p += nr_read;
294 len -= nr_read;
295 buf += nr_read;
296 count -= nr_read;
297 rv += nr_read;
298 }
299 } else { 275 } else {
300 /* 276 /*
301 * Command line (1 string) occupies ARGV and 277 * Command line (1 string) occupies ARGV and
302 * extends into ENVP. 278 * extends into ENVP.
303 */ 279 */
304 struct { 280 cmdline[1].p = env_start;
305 unsigned long p; 281 cmdline[1].len = len2;
306 unsigned long len; 282 }
307 } cmdline[2] = { 283
308 { .p = arg_start, .len = len1 }, 284 {
309 { .p = env_start, .len = len2 },
310 };
311 loff_t pos1 = *pos; 285 loff_t pos1 = *pos;
312 unsigned int i; 286 unsigned int i;
313 287
@@ -317,44 +291,40 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
317 i++; 291 i++;
318 } 292 }
319 while (i < 2) { 293 while (i < 2) {
294 unsigned long p;
295 unsigned long len;
296
320 p = cmdline[i].p + pos1; 297 p = cmdline[i].p + pos1;
321 len = cmdline[i].len - pos1; 298 len = cmdline[i].len - pos1;
322 while (count > 0 && len > 0) { 299 while (count > 0 && len > 0) {
323 unsigned int _count, l; 300 unsigned int nr_read, nr_write;
324 int nr_read; 301
325 bool final; 302 nr_read = min3(count, len, PAGE_SIZE);
326 303 nr_read = access_remote_vm(mm, p, page, nr_read, FOLL_ANON);
327 _count = min3(count, len, PAGE_SIZE); 304 if (nr_read == 0)
328 nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); 305 goto end;
329 if (nr_read < 0)
330 rv = nr_read;
331 if (nr_read <= 0)
332 goto out_free_page;
333 306
334 /* 307 /*
335 * Command line can be shorter than whole ARGV 308 * Command line can be shorter than whole ARGV
336 * even if last "marker" byte says it is not. 309 * even if last "marker" byte says it is not.
337 */ 310 */
338 final = false; 311 if (c == '\0')
339 l = strnlen(page, nr_read); 312 nr_write = nr_read;
340 if (l < nr_read) { 313 else
341 nr_read = l; 314 nr_write = strnlen(page, nr_read);
342 final = true;
343 }
344 315
345 if (copy_to_user(buf, page, nr_read)) { 316 if (copy_to_user(buf, page, nr_write)) {
346 rv = -EFAULT; 317 rv = -EFAULT;
347 goto out_free_page; 318 goto out_free_page;
348 } 319 }
349 320
350 p += nr_read; 321 p += nr_write;
351 len -= nr_read; 322 len -= nr_write;
352 buf += nr_read; 323 buf += nr_write;
353 count -= nr_read; 324 count -= nr_write;
354 rv += nr_read;
355 325
356 if (final) 326 if (nr_write < nr_read)
357 goto out_free_page; 327 goto end;
358 } 328 }
359 329
360 /* Only first chunk can be read partially. */ 330 /* Only first chunk can be read partially. */
@@ -363,12 +333,13 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
363 } 333 }
364 } 334 }
365 335
336end:
337 *pos += buf - buf0;
338 rv = buf - buf0;
366out_free_page: 339out_free_page:
367 free_page((unsigned long)page); 340 free_page((unsigned long)page);
368out_mmput: 341out_mmput:
369 mmput(mm); 342 mmput(mm);
370 if (rv > 0)
371 *pos += rv;
372 return rv; 343 return rv;
373} 344}
374 345
@@ -430,7 +401,6 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
430 struct stack_trace trace; 401 struct stack_trace trace;
431 unsigned long *entries; 402 unsigned long *entries;
432 int err; 403 int err;
433 int i;
434 404
435 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 405 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
436 if (!entries) 406 if (!entries)
@@ -443,6 +413,8 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
443 413
444 err = lock_trace(task); 414 err = lock_trace(task);
445 if (!err) { 415 if (!err) {
416 unsigned int i;
417
446 save_stack_trace_tsk(task, &trace); 418 save_stack_trace_tsk(task, &trace);
447 419
448 for (i = 0; i < trace.nr_entries; i++) { 420 for (i = 0; i < trace.nr_entries; i++) {
@@ -927,10 +899,10 @@ static ssize_t environ_read(struct file *file, char __user *buf,
927 if (!mmget_not_zero(mm)) 899 if (!mmget_not_zero(mm))
928 goto free; 900 goto free;
929 901
930 down_read(&mm->mmap_sem); 902 spin_lock(&mm->arg_lock);
931 env_start = mm->env_start; 903 env_start = mm->env_start;
932 env_end = mm->env_end; 904 env_end = mm->env_end;
933 up_read(&mm->mmap_sem); 905 spin_unlock(&mm->arg_lock);
934 906
935 while (count > 0) { 907 while (count > 0) {
936 size_t this_len, max_len; 908 size_t this_len, max_len;
@@ -1784,9 +1756,9 @@ int pid_getattr(const struct path *path, struct kstat *stat,
1784 1756
1785 generic_fillattr(inode, stat); 1757 generic_fillattr(inode, stat);
1786 1758
1787 rcu_read_lock();
1788 stat->uid = GLOBAL_ROOT_UID; 1759 stat->uid = GLOBAL_ROOT_UID;
1789 stat->gid = GLOBAL_ROOT_GID; 1760 stat->gid = GLOBAL_ROOT_GID;
1761 rcu_read_lock();
1790 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1762 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1791 if (task) { 1763 if (task) {
1792 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1764 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
@@ -1875,7 +1847,7 @@ const struct dentry_operations pid_dentry_operations =
1875 * by stat. 1847 * by stat.
1876 */ 1848 */
1877bool proc_fill_cache(struct file *file, struct dir_context *ctx, 1849bool proc_fill_cache(struct file *file, struct dir_context *ctx,
1878 const char *name, int len, 1850 const char *name, unsigned int len,
1879 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1851 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1880{ 1852{
1881 struct dentry *child, *dir = file->f_path.dentry; 1853 struct dentry *child, *dir = file->f_path.dentry;
@@ -3251,7 +3223,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
3251 iter.task; 3223 iter.task;
3252 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3224 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3253 char name[10 + 1]; 3225 char name[10 + 1];
3254 int len; 3226 unsigned int len;
3255 3227
3256 cond_resched(); 3228 cond_resched();
3257 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3229 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
@@ -3578,7 +3550,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
3578 task; 3550 task;
3579 task = next_tid(task), ctx->pos++) { 3551 task = next_tid(task), ctx->pos++) {
3580 char name[10 + 1]; 3552 char name[10 + 1];
3581 int len; 3553 unsigned int len;
3582 tid = task_pid_nr_ns(task, ns); 3554 tid = task_pid_nr_ns(task, ns);
3583 len = snprintf(name, sizeof(name), "%u", tid); 3555 len = snprintf(name, sizeof(name), "%u", tid);
3584 if (!proc_fill_cache(file, ctx, name, len, 3556 if (!proc_fill_cache(file, ctx, name, len,
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 05b9893e9a22..81882a13212d 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -248,7 +248,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
248 struct file *f; 248 struct file *f;
249 struct fd_data data; 249 struct fd_data data;
250 char name[10 + 1]; 250 char name[10 + 1];
251 int len; 251 unsigned int len;
252 252
253 f = fcheck_files(files, fd); 253 f = fcheck_files(files, fd);
254 if (!f) 254 if (!f)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 93eb1906c28d..50cb22a08c2f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -163,7 +163,7 @@ extern loff_t mem_lseek(struct file *, loff_t, int);
163/* Lookups */ 163/* Lookups */
164typedef struct dentry *instantiate_t(struct dentry *, 164typedef struct dentry *instantiate_t(struct dentry *,
165 struct task_struct *, const void *); 165 struct task_struct *, const void *);
166extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, 166bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int,
167 instantiate_t, struct task_struct *, const void *); 167 instantiate_t, struct task_struct *, const void *);
168 168
169/* 169/*
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1491918a33c3..792c78a49174 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -154,6 +154,8 @@ u64 stable_page_flags(struct page *page)
154 154
155 if (PageBalloon(page)) 155 if (PageBalloon(page))
156 u |= 1 << KPF_BALLOON; 156 u |= 1 << KPF_BALLOON;
157 if (PageTable(page))
158 u |= 1 << KPF_PGTABLE;
157 159
158 if (page_is_idle(page)) 160 if (page_is_idle(page))
159 u |= 1 << KPF_IDLE; 161 u |= 1 << KPF_IDLE;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7e074138d2f2..597969db9e90 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1259,8 +1259,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1259 if (pte_swp_soft_dirty(pte)) 1259 if (pte_swp_soft_dirty(pte))
1260 flags |= PM_SOFT_DIRTY; 1260 flags |= PM_SOFT_DIRTY;
1261 entry = pte_to_swp_entry(pte); 1261 entry = pte_to_swp_entry(pte);
1262 frame = swp_type(entry) | 1262 if (pm->show_pfn)
1263 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 1263 frame = swp_type(entry) |
1264 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1264 flags |= PM_SWAP; 1265 flags |= PM_SWAP;
1265 if (is_migration_entry(entry)) 1266 if (is_migration_entry(entry))
1266 page = migration_entry_to_page(entry); 1267 page = migration_entry_to_page(entry);
@@ -1311,11 +1312,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1311#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 1312#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1312 else if (is_swap_pmd(pmd)) { 1313 else if (is_swap_pmd(pmd)) {
1313 swp_entry_t entry = pmd_to_swp_entry(pmd); 1314 swp_entry_t entry = pmd_to_swp_entry(pmd);
1314 unsigned long offset = swp_offset(entry); 1315 unsigned long offset;
1315 1316
1316 offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; 1317 if (pm->show_pfn) {
1317 frame = swp_type(entry) | 1318 offset = swp_offset(entry) +
1318 (offset << MAX_SWAPFILES_SHIFT); 1319 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1320 frame = swp_type(entry) |
1321 (offset << MAX_SWAPFILES_SHIFT);
1322 }
1319 flags |= PM_SWAP; 1323 flags |= PM_SWAP;
1320 if (pmd_swp_soft_dirty(pmd)) 1324 if (pmd_swp_soft_dirty(pmd))
1321 flags |= PM_SOFT_DIRTY; 1325 flags |= PM_SOFT_DIRTY;
@@ -1333,10 +1337,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1333 err = add_to_pagemap(addr, &pme, pm); 1337 err = add_to_pagemap(addr, &pme, pm);
1334 if (err) 1338 if (err)
1335 break; 1339 break;
1336 if (pm->show_pfn && (flags & PM_PRESENT)) 1340 if (pm->show_pfn) {
1337 frame++; 1341 if (flags & PM_PRESENT)
1338 else if (flags & PM_SWAP) 1342 frame++;
1339 frame += (1 << MAX_SWAPFILES_SHIFT); 1343 else if (flags & PM_SWAP)
1344 frame += (1 << MAX_SWAPFILES_SHIFT);
1345 }
1340 } 1346 }
1341 spin_unlock(ptl); 1347 spin_unlock(ptl);
1342 return err; 1348 return err;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index cec550c8468f..123bf7d516fc 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -62,6 +62,8 @@ struct userfaultfd_ctx {
62 enum userfaultfd_state state; 62 enum userfaultfd_state state;
63 /* released */ 63 /* released */
64 bool released; 64 bool released;
65 /* memory mappings are changing because of non-cooperative event */
66 bool mmap_changing;
65 /* mm with one ore more vmas attached to this userfaultfd_ctx */ 67 /* mm with one ore more vmas attached to this userfaultfd_ctx */
66 struct mm_struct *mm; 68 struct mm_struct *mm;
67}; 69};
@@ -641,6 +643,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
641 * already released. 643 * already released.
642 */ 644 */
643out: 645out:
646 WRITE_ONCE(ctx->mmap_changing, false);
644 userfaultfd_ctx_put(ctx); 647 userfaultfd_ctx_put(ctx);
645} 648}
646 649
@@ -686,10 +689,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
686 ctx->state = UFFD_STATE_RUNNING; 689 ctx->state = UFFD_STATE_RUNNING;
687 ctx->features = octx->features; 690 ctx->features = octx->features;
688 ctx->released = false; 691 ctx->released = false;
692 ctx->mmap_changing = false;
689 ctx->mm = vma->vm_mm; 693 ctx->mm = vma->vm_mm;
690 mmgrab(ctx->mm); 694 mmgrab(ctx->mm);
691 695
692 userfaultfd_ctx_get(octx); 696 userfaultfd_ctx_get(octx);
697 WRITE_ONCE(octx->mmap_changing, true);
693 fctx->orig = octx; 698 fctx->orig = octx;
694 fctx->new = ctx; 699 fctx->new = ctx;
695 list_add_tail(&fctx->list, fcs); 700 list_add_tail(&fctx->list, fcs);
@@ -732,6 +737,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
732 if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { 737 if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) {
733 vm_ctx->ctx = ctx; 738 vm_ctx->ctx = ctx;
734 userfaultfd_ctx_get(ctx); 739 userfaultfd_ctx_get(ctx);
740 WRITE_ONCE(ctx->mmap_changing, true);
735 } 741 }
736} 742}
737 743
@@ -772,6 +778,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
772 return true; 778 return true;
773 779
774 userfaultfd_ctx_get(ctx); 780 userfaultfd_ctx_get(ctx);
781 WRITE_ONCE(ctx->mmap_changing, true);
775 up_read(&mm->mmap_sem); 782 up_read(&mm->mmap_sem);
776 783
777 msg_init(&ewq.msg); 784 msg_init(&ewq.msg);
@@ -815,6 +822,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
815 return -ENOMEM; 822 return -ENOMEM;
816 823
817 userfaultfd_ctx_get(ctx); 824 userfaultfd_ctx_get(ctx);
825 WRITE_ONCE(ctx->mmap_changing, true);
818 unmap_ctx->ctx = ctx; 826 unmap_ctx->ctx = ctx;
819 unmap_ctx->start = start; 827 unmap_ctx->start = start;
820 unmap_ctx->end = end; 828 unmap_ctx->end = end;
@@ -1653,6 +1661,10 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
1653 1661
1654 user_uffdio_copy = (struct uffdio_copy __user *) arg; 1662 user_uffdio_copy = (struct uffdio_copy __user *) arg;
1655 1663
1664 ret = -EAGAIN;
1665 if (READ_ONCE(ctx->mmap_changing))
1666 goto out;
1667
1656 ret = -EFAULT; 1668 ret = -EFAULT;
1657 if (copy_from_user(&uffdio_copy, user_uffdio_copy, 1669 if (copy_from_user(&uffdio_copy, user_uffdio_copy,
1658 /* don't copy "copy" last field */ 1670 /* don't copy "copy" last field */
@@ -1674,7 +1686,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
1674 goto out; 1686 goto out;
1675 if (mmget_not_zero(ctx->mm)) { 1687 if (mmget_not_zero(ctx->mm)) {
1676 ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, 1688 ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
1677 uffdio_copy.len); 1689 uffdio_copy.len, &ctx->mmap_changing);
1678 mmput(ctx->mm); 1690 mmput(ctx->mm);
1679 } else { 1691 } else {
1680 return -ESRCH; 1692 return -ESRCH;
@@ -1705,6 +1717,10 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
1705 1717
1706 user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; 1718 user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
1707 1719
1720 ret = -EAGAIN;
1721 if (READ_ONCE(ctx->mmap_changing))
1722 goto out;
1723
1708 ret = -EFAULT; 1724 ret = -EFAULT;
1709 if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, 1725 if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
1710 /* don't copy "zeropage" last field */ 1726 /* don't copy "zeropage" last field */
@@ -1721,7 +1737,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
1721 1737
1722 if (mmget_not_zero(ctx->mm)) { 1738 if (mmget_not_zero(ctx->mm)) {
1723 ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, 1739 ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
1724 uffdio_zeropage.range.len); 1740 uffdio_zeropage.range.len,
1741 &ctx->mmap_changing);
1725 mmput(ctx->mm); 1742 mmput(ctx->mm);
1726 } else { 1743 } else {
1727 return -ESRCH; 1744 return -ESRCH;
@@ -1900,6 +1917,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
1900 ctx->features = 0; 1917 ctx->features = 0;
1901 ctx->state = UFFD_STATE_WAIT_API; 1918 ctx->state = UFFD_STATE_WAIT_API;
1902 ctx->released = false; 1919 ctx->released = false;
1920 ctx->mmap_changing = false;
1903 ctx->mm = current->mm; 1921 ctx->mm = current->mm;
1904 /* prevent the mm struct to be freed */ 1922 /* prevent the mm struct to be freed */
1905 mmgrab(ctx->mm); 1923 mmgrab(ctx->mm);
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index ffb68d67be5f..a248545f1e18 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -13,17 +13,14 @@
13 13
14#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
15 15
16typedef signed char s8; 16typedef __s8 s8;
17typedef unsigned char u8; 17typedef __u8 u8;
18 18typedef __s16 s16;
19typedef signed short s16; 19typedef __u16 u16;
20typedef unsigned short u16; 20typedef __s32 s32;
21 21typedef __u32 u32;
22typedef signed int s32; 22typedef __s64 s64;
23typedef unsigned int u32; 23typedef __u64 u64;
24
25typedef signed long long s64;
26typedef unsigned long long u64;
27 24
28#define S8_C(x) x 25#define S8_C(x) x
29#define U8_C(x) x ## U 26#define U8_C(x) x ## U
diff --git a/include/linux/dax.h b/include/linux/dax.h
index c99692ddd4b5..88504e87cd6c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -125,8 +125,8 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
125 const struct iomap_ops *ops); 125 const struct iomap_ops *ops);
126int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 126int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
127 pfn_t *pfnp, int *errp, const struct iomap_ops *ops); 127 pfn_t *pfnp, int *errp, const struct iomap_ops *ops);
128int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 128vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
129 pfn_t pfn); 129 enum page_entry_size pe_size, pfn_t pfn);
130int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); 130int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
131int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 131int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
132 pgoff_t index); 132 pgoff_t index);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fc5ab85278d5..a6afcec53795 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -24,6 +24,7 @@ struct vm_area_struct;
24#define ___GFP_HIGH 0x20u 24#define ___GFP_HIGH 0x20u
25#define ___GFP_IO 0x40u 25#define ___GFP_IO 0x40u
26#define ___GFP_FS 0x80u 26#define ___GFP_FS 0x80u
27#define ___GFP_WRITE 0x100u
27#define ___GFP_NOWARN 0x200u 28#define ___GFP_NOWARN 0x200u
28#define ___GFP_RETRY_MAYFAIL 0x400u 29#define ___GFP_RETRY_MAYFAIL 0x400u
29#define ___GFP_NOFAIL 0x800u 30#define ___GFP_NOFAIL 0x800u
@@ -36,11 +37,10 @@ struct vm_area_struct;
36#define ___GFP_THISNODE 0x40000u 37#define ___GFP_THISNODE 0x40000u
37#define ___GFP_ATOMIC 0x80000u 38#define ___GFP_ATOMIC 0x80000u
38#define ___GFP_ACCOUNT 0x100000u 39#define ___GFP_ACCOUNT 0x100000u
39#define ___GFP_DIRECT_RECLAIM 0x400000u 40#define ___GFP_DIRECT_RECLAIM 0x200000u
40#define ___GFP_WRITE 0x800000u 41#define ___GFP_KSWAPD_RECLAIM 0x400000u
41#define ___GFP_KSWAPD_RECLAIM 0x1000000u
42#ifdef CONFIG_LOCKDEP 42#ifdef CONFIG_LOCKDEP
43#define ___GFP_NOLOCKDEP 0x2000000u 43#define ___GFP_NOLOCKDEP 0x800000u
44#else 44#else
45#define ___GFP_NOLOCKDEP 0 45#define ___GFP_NOLOCKDEP 0
46#endif 46#endif
@@ -205,7 +205,7 @@ struct vm_area_struct;
205#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) 205#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
206 206
207/* Room for N __GFP_FOO bits */ 207/* Room for N __GFP_FOO bits */
208#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) 208#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
209#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 209#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
210 210
211/* 211/*
@@ -343,7 +343,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
343 * 0x1 => DMA or NORMAL 343 * 0x1 => DMA or NORMAL
344 * 0x2 => HIGHMEM or NORMAL 344 * 0x2 => HIGHMEM or NORMAL
345 * 0x3 => BAD (DMA+HIGHMEM) 345 * 0x3 => BAD (DMA+HIGHMEM)
346 * 0x4 => DMA32 or DMA or NORMAL 346 * 0x4 => DMA32 or NORMAL
347 * 0x5 => BAD (DMA+DMA32) 347 * 0x5 => BAD (DMA+DMA32)
348 * 0x6 => BAD (HIGHMEM+DMA32) 348 * 0x6 => BAD (HIGHMEM+DMA32)
349 * 0x7 => BAD (HIGHMEM+DMA32+DMA) 349 * 0x7 => BAD (HIGHMEM+DMA32+DMA)
@@ -351,7 +351,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
351 * 0x9 => DMA or NORMAL (MOVABLE+DMA) 351 * 0x9 => DMA or NORMAL (MOVABLE+DMA)
352 * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) 352 * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too)
353 * 0xb => BAD (MOVABLE+HIGHMEM+DMA) 353 * 0xb => BAD (MOVABLE+HIGHMEM+DMA)
354 * 0xc => DMA32 (MOVABLE+DMA32) 354 * 0xc => DMA32 or NORMAL (MOVABLE+DMA32)
355 * 0xd => BAD (MOVABLE+DMA32+DMA) 355 * 0xd => BAD (MOVABLE+DMA32+DMA)
356 * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) 356 * 0xe => BAD (MOVABLE+DMA32+HIGHMEM)
357 * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) 357 * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 2f1327c37a63..4c92e3ba3e16 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -522,9 +522,7 @@ void hmm_devmem_remove(struct hmm_devmem *devmem);
522static inline void hmm_devmem_page_set_drvdata(struct page *page, 522static inline void hmm_devmem_page_set_drvdata(struct page *page,
523 unsigned long data) 523 unsigned long data)
524{ 524{
525 unsigned long *drvdata = (unsigned long *)&page->pgmap; 525 page->hmm_data = data;
526
527 drvdata[1] = data;
528} 526}
529 527
530/* 528/*
@@ -535,9 +533,7 @@ static inline void hmm_devmem_page_set_drvdata(struct page *page,
535 */ 533 */
536static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) 534static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page)
537{ 535{
538 const unsigned long *drvdata = (const unsigned long *)&page->pgmap; 536 return page->hmm_data;
539
540 return drvdata[1];
541} 537}
542 538
543 539
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7aed92624531..7c4e8f1f72d8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -29,6 +29,7 @@
29#define LLONG_MIN (-LLONG_MAX - 1) 29#define LLONG_MIN (-LLONG_MAX - 1)
30#define ULLONG_MAX (~0ULL) 30#define ULLONG_MAX (~0ULL)
31#define SIZE_MAX (~(size_t)0) 31#define SIZE_MAX (~(size_t)0)
32#define PHYS_ADDR_MAX (~(phys_addr_t)0)
32 33
33#define U8_MAX ((u8)~0U) 34#define U8_MAX ((u8)~0U)
34#define S8_MAX ((s8)(U8_MAX>>1)) 35#define S8_MAX ((s8)(U8_MAX>>1))
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 44368b19b27e..161e8164abcf 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -37,17 +37,6 @@ static inline void ksm_exit(struct mm_struct *mm)
37 __ksm_exit(mm); 37 __ksm_exit(mm);
38} 38}
39 39
40static inline struct stable_node *page_stable_node(struct page *page)
41{
42 return PageKsm(page) ? page_rmapping(page) : NULL;
43}
44
45static inline void set_page_stable_node(struct page *page,
46 struct stable_node *stable_node)
47{
48 page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
49}
50
51/* 40/*
52 * When do_swap_page() first faults in from swap what used to be a KSM page, 41 * When do_swap_page() first faults in from swap what used to be a KSM page,
53 * no problem, it will be assigned to this vma's anon_vma; but thereafter, 42 * no problem, it will be assigned to this vma's anon_vma; but thereafter,
@@ -89,12 +78,6 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
89 return page; 78 return page;
90} 79}
91 80
92static inline int page_referenced_ksm(struct page *page,
93 struct mem_cgroup *memcg, unsigned long *vm_flags)
94{
95 return 0;
96}
97
98static inline void rmap_walk_ksm(struct page *page, 81static inline void rmap_walk_ksm(struct page *page,
99 struct rmap_walk_control *rwc) 82 struct rmap_walk_control *rwc)
100{ 83{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d99b71bc2c66..4f52ec755725 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,9 +53,17 @@ enum memcg_memory_event {
53 MEMCG_HIGH, 53 MEMCG_HIGH,
54 MEMCG_MAX, 54 MEMCG_MAX,
55 MEMCG_OOM, 55 MEMCG_OOM,
56 MEMCG_SWAP_MAX,
57 MEMCG_SWAP_FAIL,
56 MEMCG_NR_MEMORY_EVENTS, 58 MEMCG_NR_MEMORY_EVENTS,
57}; 59};
58 60
61enum mem_cgroup_protection {
62 MEMCG_PROT_NONE,
63 MEMCG_PROT_LOW,
64 MEMCG_PROT_MIN,
65};
66
59struct mem_cgroup_reclaim_cookie { 67struct mem_cgroup_reclaim_cookie {
60 pg_data_t *pgdat; 68 pg_data_t *pgdat;
61 int priority; 69 int priority;
@@ -158,6 +166,15 @@ enum memcg_kmem_state {
158 KMEM_ONLINE, 166 KMEM_ONLINE,
159}; 167};
160 168
169#if defined(CONFIG_SMP)
170struct memcg_padding {
171 char x[0];
172} ____cacheline_internodealigned_in_smp;
173#define MEMCG_PADDING(name) struct memcg_padding name;
174#else
175#define MEMCG_PADDING(name)
176#endif
177
161/* 178/*
162 * The memory controller data structure. The memory controller controls both 179 * The memory controller data structure. The memory controller controls both
163 * page cache and RSS per cgroup. We would eventually like to provide 180 * page cache and RSS per cgroup. We would eventually like to provide
@@ -179,8 +196,7 @@ struct mem_cgroup {
179 struct page_counter kmem; 196 struct page_counter kmem;
180 struct page_counter tcpmem; 197 struct page_counter tcpmem;
181 198
182 /* Normal memory consumption range */ 199 /* Upper bound of normal memory consumption range */
183 unsigned long low;
184 unsigned long high; 200 unsigned long high;
185 201
186 /* Range enforcement for interrupt charges */ 202 /* Range enforcement for interrupt charges */
@@ -205,9 +221,11 @@ struct mem_cgroup {
205 int oom_kill_disable; 221 int oom_kill_disable;
206 222
207 /* memory.events */ 223 /* memory.events */
208 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
209 struct cgroup_file events_file; 224 struct cgroup_file events_file;
210 225
226 /* handle for "memory.swap.events" */
227 struct cgroup_file swap_events_file;
228
211 /* protect arrays of thresholds */ 229 /* protect arrays of thresholds */
212 struct mutex thresholds_lock; 230 struct mutex thresholds_lock;
213 231
@@ -225,19 +243,26 @@ struct mem_cgroup {
225 * mem_cgroup ? And what type of charges should we move ? 243 * mem_cgroup ? And what type of charges should we move ?
226 */ 244 */
227 unsigned long move_charge_at_immigrate; 245 unsigned long move_charge_at_immigrate;
246 /* taken only while moving_account > 0 */
247 spinlock_t move_lock;
248 unsigned long move_lock_flags;
249
250 MEMCG_PADDING(_pad1_);
251
228 /* 252 /*
229 * set > 0 if pages under this cgroup are moving to other cgroup. 253 * set > 0 if pages under this cgroup are moving to other cgroup.
230 */ 254 */
231 atomic_t moving_account; 255 atomic_t moving_account;
232 /* taken only while moving_account > 0 */
233 spinlock_t move_lock;
234 struct task_struct *move_lock_task; 256 struct task_struct *move_lock_task;
235 unsigned long move_lock_flags;
236 257
237 /* memory.stat */ 258 /* memory.stat */
238 struct mem_cgroup_stat_cpu __percpu *stat_cpu; 259 struct mem_cgroup_stat_cpu __percpu *stat_cpu;
260
261 MEMCG_PADDING(_pad2_);
262
239 atomic_long_t stat[MEMCG_NR_STAT]; 263 atomic_long_t stat[MEMCG_NR_STAT];
240 atomic_long_t events[NR_VM_EVENT_ITEMS]; 264 atomic_long_t events[NR_VM_EVENT_ITEMS];
265 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
241 266
242 unsigned long socket_pressure; 267 unsigned long socket_pressure;
243 268
@@ -285,7 +310,8 @@ static inline bool mem_cgroup_disabled(void)
285 return !cgroup_subsys_enabled(memory_cgrp_subsys); 310 return !cgroup_subsys_enabled(memory_cgrp_subsys);
286} 311}
287 312
288bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); 313enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
314 struct mem_cgroup *memcg);
289 315
290int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 316int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
291 gfp_t gfp_mask, struct mem_cgroup **memcgp, 317 gfp_t gfp_mask, struct mem_cgroup **memcgp,
@@ -462,7 +488,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
462 488
463void mem_cgroup_handle_over_high(void); 489void mem_cgroup_handle_over_high(void);
464 490
465unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg); 491unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
466 492
467void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 493void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
468 struct task_struct *p); 494 struct task_struct *p);
@@ -730,10 +756,10 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
730{ 756{
731} 757}
732 758
733static inline bool mem_cgroup_low(struct mem_cgroup *root, 759static inline enum mem_cgroup_protection mem_cgroup_protected(
734 struct mem_cgroup *memcg) 760 struct mem_cgroup *root, struct mem_cgroup *memcg)
735{ 761{
736 return false; 762 return MEMCG_PROT_NONE;
737} 763}
738 764
739static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 765static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
@@ -853,7 +879,7 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
853 return 0; 879 return 0;
854} 880}
855 881
856static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) 882static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
857{ 883{
858 return 0; 884 return 0;
859} 885}
@@ -1093,7 +1119,6 @@ static inline void dec_lruvec_page_state(struct page *page,
1093 1119
1094#ifdef CONFIG_CGROUP_WRITEBACK 1120#ifdef CONFIG_CGROUP_WRITEBACK
1095 1121
1096struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
1097struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); 1122struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
1098void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, 1123void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
1099 unsigned long *pheadroom, unsigned long *pdirty, 1124 unsigned long *pheadroom, unsigned long *pdirty,
diff --git a/include/linux/memfd.h b/include/linux/memfd.h
new file mode 100644
index 000000000000..4f1600413f91
--- /dev/null
+++ b/include/linux/memfd.h
@@ -0,0 +1,16 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __LINUX_MEMFD_H
3#define __LINUX_MEMFD_H
4
5#include <linux/file.h>
6
7#ifdef CONFIG_MEMFD_CREATE
8extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
9#else
10static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
11{
12 return -EINVAL;
13}
14#endif
15
16#endif /* __LINUX_MEMFD_H */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 2b0265265c28..4e9828cda7a2 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -107,7 +107,6 @@ static inline bool movable_node_is_enabled(void)
107} 107}
108 108
109#ifdef CONFIG_MEMORY_HOTREMOVE 109#ifdef CONFIG_MEMORY_HOTREMOVE
110extern bool is_pageblock_removable_nolock(struct page *page);
111extern int arch_remove_memory(u64 start, u64 size, 110extern int arch_remove_memory(u64 start, u64 size,
112 struct vmem_altmap *altmap); 111 struct vmem_altmap *altmap);
113extern int __remove_pages(struct zone *zone, unsigned long start_pfn, 112extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 29c5458b4b5e..4c3881b44ef1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1851,6 +1851,7 @@ static inline bool pgtable_page_ctor(struct page *page)
1851{ 1851{
1852 if (!ptlock_init(page)) 1852 if (!ptlock_init(page))
1853 return false; 1853 return false;
1854 __SetPageTable(page);
1854 inc_zone_page_state(page, NR_PAGETABLE); 1855 inc_zone_page_state(page, NR_PAGETABLE);
1855 return true; 1856 return true;
1856} 1857}
@@ -1858,6 +1859,7 @@ static inline bool pgtable_page_ctor(struct page *page)
1858static inline void pgtable_page_dtor(struct page *page) 1859static inline void pgtable_page_dtor(struct page *page)
1859{ 1860{
1860 pte_lock_deinit(page); 1861 pte_lock_deinit(page);
1862 __ClearPageTable(page);
1861 dec_zone_page_state(page, NR_PAGETABLE); 1863 dec_zone_page_state(page, NR_PAGETABLE);
1862} 1864}
1863 1865
@@ -2303,10 +2305,10 @@ extern void truncate_inode_pages_range(struct address_space *,
2303extern void truncate_inode_pages_final(struct address_space *); 2305extern void truncate_inode_pages_final(struct address_space *);
2304 2306
2305/* generic vm_area_ops exported for stackable file systems */ 2307/* generic vm_area_ops exported for stackable file systems */
2306extern int filemap_fault(struct vm_fault *vmf); 2308extern vm_fault_t filemap_fault(struct vm_fault *vmf);
2307extern void filemap_map_pages(struct vm_fault *vmf, 2309extern void filemap_map_pages(struct vm_fault *vmf,
2308 pgoff_t start_pgoff, pgoff_t end_pgoff); 2310 pgoff_t start_pgoff, pgoff_t end_pgoff);
2309extern int filemap_page_mkwrite(struct vm_fault *vmf); 2311extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
2310 2312
2311/* mm/page-writeback.c */ 2313/* mm/page-writeback.c */
2312int __must_check write_one_page(struct page *page); 2314int __must_check write_one_page(struct page *page);
@@ -2431,8 +2433,8 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
2431 unsigned long pfn, pgprot_t pgprot); 2433 unsigned long pfn, pgprot_t pgprot);
2432int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, 2434int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
2433 pfn_t pfn); 2435 pfn_t pfn);
2434int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, 2436vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
2435 pfn_t pfn); 2437 unsigned long addr, pfn_t pfn);
2436int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); 2438int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
2437 2439
2438static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, 2440static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
@@ -2530,12 +2532,10 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
2530#ifdef CONFIG_PAGE_POISONING 2532#ifdef CONFIG_PAGE_POISONING
2531extern bool page_poisoning_enabled(void); 2533extern bool page_poisoning_enabled(void);
2532extern void kernel_poison_pages(struct page *page, int numpages, int enable); 2534extern void kernel_poison_pages(struct page *page, int numpages, int enable);
2533extern bool page_is_poisoned(struct page *page);
2534#else 2535#else
2535static inline bool page_poisoning_enabled(void) { return false; } 2536static inline bool page_poisoning_enabled(void) { return false; }
2536static inline void kernel_poison_pages(struct page *page, int numpages, 2537static inline void kernel_poison_pages(struct page *page, int numpages,
2537 int enable) { } 2538 int enable) { }
2538static inline bool page_is_poisoned(struct page *page) { return false; }
2539#endif 2539#endif
2540 2540
2541#ifdef CONFIG_DEBUG_PAGEALLOC 2541#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21612347d311..99ce070e7dcb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -33,29 +33,27 @@ struct hmm;
33 * it to keep track of whatever it is we are using the page for at the 33 * it to keep track of whatever it is we are using the page for at the
34 * moment. Note that we have no way to track which tasks are using 34 * moment. Note that we have no way to track which tasks are using
35 * a page, though if it is a pagecache page, rmap structures can tell us 35 * a page, though if it is a pagecache page, rmap structures can tell us
36 * who is mapping it. If you allocate the page using alloc_pages(), you 36 * who is mapping it.
37 * can use some of the space in struct page for your own purposes.
38 * 37 *
39 * Pages that were once in the page cache may be found under the RCU lock 38 * If you allocate the page using alloc_pages(), you can use some of the
40 * even after they have been recycled to a different purpose. The page 39 * space in struct page for your own purposes. The five words in the main
41 * cache reads and writes some of the fields in struct page to pin the 40 * union are available, except for bit 0 of the first word which must be
42 * page before checking that it's still in the page cache. It is vital 41 * kept clear. Many users use this word to store a pointer to an object
43 * that all users of struct page: 42 * which is guaranteed to be aligned. If you use the same storage as
44 * 1. Use the first word as PageFlags. 43 * page->mapping, you must restore it to NULL before freeing the page.
45 * 2. Clear or preserve bit 0 of page->compound_head. It is used as
46 * PageTail for compound pages, and the page cache must not see false
47 * positives. Some users put a pointer here (guaranteed to be at least
48 * 4-byte aligned), other users avoid using the field altogether.
49 * 3. page->_refcount must either not be used, or must be used in such a
50 * way that other CPUs temporarily incrementing and then decrementing the
51 * refcount does not cause problems. On receiving the page from
52 * alloc_pages(), the refcount will be positive.
53 * 4. Either preserve page->_mapcount or restore it to -1 before freeing it.
54 * 44 *
55 * If you allocate pages of order > 0, you can use the fields in the struct 45 * If your page will not be mapped to userspace, you can also use the four
56 * page associated with each page, but bear in mind that the pages may have 46 * bytes in the mapcount union, but you must call page_mapcount_reset()
57 * been inserted individually into the page cache, so you must use the above 47 * before freeing it.
58 * four fields in a compatible way for each struct page. 48 *
49 * If you want to use the refcount field, it must be used in such a way
50 * that other CPUs temporarily incrementing and then decrementing the
51 * refcount does not cause problems. On receiving the page from
52 * alloc_pages(), the refcount will be positive.
53 *
54 * If you allocate pages of order > 0, you can use some of the fields
55 * in each subpage, but you may need to restore some of their values
56 * afterwards.
59 * 57 *
60 * SLUB uses cmpxchg_double() to atomically update its freelist and 58 * SLUB uses cmpxchg_double() to atomically update its freelist and
61 * counters. That requires that freelist & counters be adjacent and 59 * counters. That requires that freelist & counters be adjacent and
@@ -65,135 +63,122 @@ struct hmm;
65 */ 63 */
66#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE 64#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
67#define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) 65#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
68#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
69#define _slub_counter_t unsigned long
70#else 66#else
71#define _slub_counter_t unsigned int
72#endif
73#else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
74#define _struct_page_alignment 67#define _struct_page_alignment
75#define _slub_counter_t unsigned int 68#endif
76#endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
77 69
78struct page { 70struct page {
79 /* First double word block */
80 unsigned long flags; /* Atomic flags, some possibly 71 unsigned long flags; /* Atomic flags, some possibly
81 * updated asynchronously */ 72 * updated asynchronously */
82 union {
83 /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */
84 struct address_space *mapping;
85
86 void *s_mem; /* slab first object */
87 atomic_t compound_mapcount; /* first tail page */
88 /* page_deferred_list().next -- second tail page */
89 };
90
91 /* Second double word */
92 union {
93 pgoff_t index; /* Our offset within mapping. */
94 void *freelist; /* sl[aou]b first free object */
95 /* page_deferred_list().prev -- second tail page */
96 };
97
98 union {
99 _slub_counter_t counters;
100 unsigned int active; /* SLAB */
101 struct { /* SLUB */
102 unsigned inuse:16;
103 unsigned objects:15;
104 unsigned frozen:1;
105 };
106 int units; /* SLOB */
107
108 struct { /* Page cache */
109 /*
110 * Count of ptes mapped in mms, to show when
111 * page is mapped & limit reverse map searches.
112 *
113 * Extra information about page type may be
114 * stored here for pages that are never mapped,
115 * in which case the value MUST BE <= -2.
116 * See page-flags.h for more details.
117 */
118 atomic_t _mapcount;
119
120 /*
121 * Usage count, *USE WRAPPER FUNCTION* when manual
122 * accounting. See page_ref.h
123 */
124 atomic_t _refcount;
125 };
126 };
127
128 /* 73 /*
129 * WARNING: bit 0 of the first word encode PageTail(). That means 74 * Five words (20/40 bytes) are available in this union.
130 * the rest users of the storage space MUST NOT use the bit to 75 * WARNING: bit 0 of the first word is used for PageTail(). That
76 * means the other users of this union MUST NOT use the bit to
131 * avoid collision and false-positive PageTail(). 77 * avoid collision and false-positive PageTail().
132 */ 78 */
133 union { 79 union {
134 struct list_head lru; /* Pageout list, eg. active_list 80 struct { /* Page cache and anonymous pages */
135 * protected by zone_lru_lock ! 81 /**
136 * Can be used as a generic list 82 * @lru: Pageout list, eg. active_list protected by
137 * by the page owner. 83 * zone_lru_lock. Sometimes used as a generic list
138 */ 84 * by the page owner.
139 struct dev_pagemap *pgmap; /* ZONE_DEVICE pages are never on an 85 */
140 * lru or handled by a slab 86 struct list_head lru;
141 * allocator, this points to the 87 /* See page-flags.h for PAGE_MAPPING_FLAGS */
142 * hosting device page map. 88 struct address_space *mapping;
143 */ 89 pgoff_t index; /* Our offset within mapping. */
144 struct { /* slub per cpu partial pages */ 90 /**
145 struct page *next; /* Next partial slab */ 91 * @private: Mapping-private opaque data.
92 * Usually used for buffer_heads if PagePrivate.
93 * Used for swp_entry_t if PageSwapCache.
94 * Indicates order in the buddy system if PageBuddy.
95 */
96 unsigned long private;
97 };
98 struct { /* slab, slob and slub */
99 union {
100 struct list_head slab_list; /* uses lru */
101 struct { /* Partial pages */
102 struct page *next;
146#ifdef CONFIG_64BIT 103#ifdef CONFIG_64BIT
147 int pages; /* Nr of partial slabs left */ 104 int pages; /* Nr of pages left */
148 int pobjects; /* Approximate # of objects */ 105 int pobjects; /* Approximate count */
149#else 106#else
150 short int pages; 107 short int pages;
151 short int pobjects; 108 short int pobjects;
152#endif 109#endif
110 };
111 };
112 struct kmem_cache *slab_cache; /* not slob */
113 /* Double-word boundary */
114 void *freelist; /* first free object */
115 union {
116 void *s_mem; /* slab: first object */
117 unsigned long counters; /* SLUB */
118 struct { /* SLUB */
119 unsigned inuse:16;
120 unsigned objects:15;
121 unsigned frozen:1;
122 };
123 };
153 }; 124 };
154 125 struct { /* Tail pages of compound page */
155 struct rcu_head rcu_head; /* Used by SLAB 126 unsigned long compound_head; /* Bit zero is set */
156 * when destroying via RCU
157 */
158 /* Tail pages of compound page */
159 struct {
160 unsigned long compound_head; /* If bit zero is set */
161 127
162 /* First tail page only */ 128 /* First tail page only */
163 unsigned char compound_dtor; 129 unsigned char compound_dtor;
164 unsigned char compound_order; 130 unsigned char compound_order;
165 /* two/six bytes available here */ 131 atomic_t compound_mapcount;
166 }; 132 };
167 133 struct { /* Second tail page of compound page */
168#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS 134 unsigned long _compound_pad_1; /* compound_head */
169 struct { 135 unsigned long _compound_pad_2;
170 unsigned long __pad; /* do not overlay pmd_huge_pte 136 struct list_head deferred_list;
171 * with compound_head to avoid
172 * possible bit 0 collision.
173 */
174 pgtable_t pmd_huge_pte; /* protected by page->ptl */
175 }; 137 };
138 struct { /* Page table pages */
139 unsigned long _pt_pad_1; /* compound_head */
140 pgtable_t pmd_huge_pte; /* protected by page->ptl */
141 unsigned long _pt_pad_2; /* mapping */
142 struct mm_struct *pt_mm; /* x86 pgds only */
143#if ALLOC_SPLIT_PTLOCKS
144 spinlock_t *ptl;
145#else
146 spinlock_t ptl;
176#endif 147#endif
148 };
149 struct { /* ZONE_DEVICE pages */
150 /** @pgmap: Points to the hosting device page map. */
151 struct dev_pagemap *pgmap;
152 unsigned long hmm_data;
153 unsigned long _zd_pad_1; /* uses mapping */
154 };
155
156 /** @rcu_head: You can use this to free a page by RCU. */
157 struct rcu_head rcu_head;
177 }; 158 };
178 159
179 union { 160 union { /* This union is 4 bytes in size. */
180 /* 161 /*
181 * Mapping-private opaque data: 162 * If the page can be mapped to userspace, encodes the number
182 * Usually used for buffer_heads if PagePrivate 163 * of times this page is referenced by a page table.
183 * Used for swp_entry_t if PageSwapCache
184 * Indicates order in the buddy system if PageBuddy
185 */ 164 */
186 unsigned long private; 165 atomic_t _mapcount;
187#if USE_SPLIT_PTE_PTLOCKS 166
188#if ALLOC_SPLIT_PTLOCKS 167 /*
189 spinlock_t *ptl; 168 * If the page is neither PageSlab nor mappable to userspace,
190#else 169 * the value stored here may help determine what this page
191 spinlock_t ptl; 170 * is used for. See page-flags.h for a list of page types
192#endif 171 * which are currently stored here.
193#endif 172 */
194 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ 173 unsigned int page_type;
174
175 unsigned int active; /* SLAB */
176 int units; /* SLOB */
195 }; 177 };
196 178
179 /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
180 atomic_t _refcount;
181
197#ifdef CONFIG_MEMCG 182#ifdef CONFIG_MEMCG
198 struct mem_cgroup *mem_cgroup; 183 struct mem_cgroup *mem_cgroup;
199#endif 184#endif
@@ -413,6 +398,8 @@ struct mm_struct {
413 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 398 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
414 unsigned long stack_vm; /* VM_STACK */ 399 unsigned long stack_vm; /* VM_STACK */
415 unsigned long def_flags; 400 unsigned long def_flags;
401
402 spinlock_t arg_lock; /* protect the below fields */
416 unsigned long start_code, end_code, start_data, end_data; 403 unsigned long start_code, end_code, start_data, end_data;
417 unsigned long start_brk, brk, start_stack; 404 unsigned long start_brk, brk, start_stack;
418 unsigned long arg_start, arg_end, env_start, env_end; 405 unsigned long arg_start, arg_end, env_start, env_end;
@@ -627,9 +614,9 @@ struct vm_special_mapping {
627 * If non-NULL, then this is called to resolve page faults 614 * If non-NULL, then this is called to resolve page faults
628 * on the special mapping. If used, .pages is not checked. 615 * on the special mapping. If used, .pages is not checked.
629 */ 616 */
630 int (*fault)(const struct vm_special_mapping *sm, 617 vm_fault_t (*fault)(const struct vm_special_mapping *sm,
631 struct vm_area_struct *vma, 618 struct vm_area_struct *vma,
632 struct vm_fault *vmf); 619 struct vm_fault *vmf);
633 620
634 int (*mremap)(const struct vm_special_mapping *sm, 621 int (*mremap)(const struct vm_special_mapping *sm,
635 struct vm_area_struct *new_vma); 622 struct vm_area_struct *new_vma);
diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 1cc5ffb769af..7cd1473c64a4 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -53,93 +53,32 @@ struct gcry_mpi {
53typedef struct gcry_mpi *MPI; 53typedef struct gcry_mpi *MPI;
54 54
55#define mpi_get_nlimbs(a) ((a)->nlimbs) 55#define mpi_get_nlimbs(a) ((a)->nlimbs)
56#define mpi_is_neg(a) ((a)->sign)
57 56
58/*-- mpiutil.c --*/ 57/*-- mpiutil.c --*/
59MPI mpi_alloc(unsigned nlimbs); 58MPI mpi_alloc(unsigned nlimbs);
60MPI mpi_alloc_secure(unsigned nlimbs);
61MPI mpi_alloc_like(MPI a);
62void mpi_free(MPI a); 59void mpi_free(MPI a);
63int mpi_resize(MPI a, unsigned nlimbs); 60int mpi_resize(MPI a, unsigned nlimbs);
64int mpi_copy(MPI *copy, const MPI a);
65void mpi_clear(MPI a);
66int mpi_set(MPI w, MPI u);
67int mpi_set_ui(MPI w, ulong u);
68MPI mpi_alloc_set_ui(unsigned long u);
69void mpi_m_check(MPI a);
70void mpi_swap(MPI a, MPI b);
71 61
72/*-- mpicoder.c --*/ 62/*-- mpicoder.c --*/
73MPI do_encode_md(const void *sha_buffer, unsigned nbits);
74MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); 63MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
75MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); 64MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
76MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); 65MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len);
77int mpi_fromstr(MPI val, const char *str);
78u32 mpi_get_keyid(MPI a, u32 *keyid);
79void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); 66void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
80int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, 67int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
81 int *sign); 68 int *sign);
82void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
83int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, 69int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes,
84 int *sign); 70 int *sign);
85 71
86#define log_mpidump g10_log_mpidump
87
88/*-- mpi-add.c --*/
89int mpi_add_ui(MPI w, MPI u, ulong v);
90int mpi_add(MPI w, MPI u, MPI v);
91int mpi_addm(MPI w, MPI u, MPI v, MPI m);
92int mpi_sub_ui(MPI w, MPI u, ulong v);
93int mpi_sub(MPI w, MPI u, MPI v);
94int mpi_subm(MPI w, MPI u, MPI v, MPI m);
95
96/*-- mpi-mul.c --*/
97int mpi_mul_ui(MPI w, MPI u, ulong v);
98int mpi_mul_2exp(MPI w, MPI u, ulong cnt);
99int mpi_mul(MPI w, MPI u, MPI v);
100int mpi_mulm(MPI w, MPI u, MPI v, MPI m);
101
102/*-- mpi-div.c --*/
103ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor);
104int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
105int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
106int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
107int mpi_tdiv_r(MPI rem, MPI num, MPI den);
108int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
109int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count);
110int mpi_divisible_ui(const MPI dividend, ulong divisor);
111
112/*-- mpi-gcd.c --*/
113int mpi_gcd(MPI g, const MPI a, const MPI b);
114
115/*-- mpi-pow.c --*/ 72/*-- mpi-pow.c --*/
116int mpi_pow(MPI w, MPI u, MPI v);
117int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); 73int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
118 74
119/*-- mpi-mpow.c --*/
120int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI mod);
121
122/*-- mpi-cmp.c --*/ 75/*-- mpi-cmp.c --*/
123int mpi_cmp_ui(MPI u, ulong v); 76int mpi_cmp_ui(MPI u, ulong v);
124int mpi_cmp(MPI u, MPI v); 77int mpi_cmp(MPI u, MPI v);
125 78
126/*-- mpi-scan.c --*/
127int mpi_getbyte(MPI a, unsigned idx);
128void mpi_putbyte(MPI a, unsigned idx, int value);
129unsigned mpi_trailing_zeros(MPI a);
130
131/*-- mpi-bit.c --*/ 79/*-- mpi-bit.c --*/
132void mpi_normalize(MPI a); 80void mpi_normalize(MPI a);
133unsigned mpi_get_nbits(MPI a); 81unsigned mpi_get_nbits(MPI a);
134int mpi_test_bit(MPI a, unsigned n);
135int mpi_set_bit(MPI a, unsigned n);
136int mpi_set_highbit(MPI a, unsigned n);
137void mpi_clear_highbit(MPI a, unsigned n);
138void mpi_clear_bit(MPI a, unsigned n);
139int mpi_rshift(MPI x, MPI a, unsigned n);
140
141/*-- mpi-inv.c --*/
142int mpi_invm(MPI x, MPI u, MPI v);
143 82
144/* inline functions */ 83/* inline functions */
145 84
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e34a27727b9a..901943e4754b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -642,49 +642,62 @@ PAGEFLAG_FALSE(DoubleMap)
642#endif 642#endif
643 643
644/* 644/*
645 * For pages that are never mapped to userspace, page->mapcount may be 645 * For pages that are never mapped to userspace (and aren't PageSlab),
646 * used for storing extra information about page type. Any value used 646 * page_type may be used. Because it is initialised to -1, we invert the
647 * for this purpose must be <= -2, but it's better start not too close 647 * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
648 * to -2 so that an underflow of the page_mapcount() won't be mistaken 648 * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
649 * for a special page. 649 * low bits so that an underflow or overflow of page_mapcount() won't be
650 * mistaken for a page type value.
650 */ 651 */
651#define PAGE_MAPCOUNT_OPS(uname, lname) \ 652
653#define PAGE_TYPE_BASE 0xf0000000
654/* Reserve 0x0000007f to catch underflows of page_mapcount */
655#define PG_buddy 0x00000080
656#define PG_balloon 0x00000100
657#define PG_kmemcg 0x00000200
658#define PG_table 0x00000400
659
660#define PageType(page, flag) \
661 ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
662
663#define PAGE_TYPE_OPS(uname, lname) \
652static __always_inline int Page##uname(struct page *page) \ 664static __always_inline int Page##uname(struct page *page) \
653{ \ 665{ \
654 return atomic_read(&page->_mapcount) == \ 666 return PageType(page, PG_##lname); \
655 PAGE_##lname##_MAPCOUNT_VALUE; \
656} \ 667} \
657static __always_inline void __SetPage##uname(struct page *page) \ 668static __always_inline void __SetPage##uname(struct page *page) \
658{ \ 669{ \
659 VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); \ 670 VM_BUG_ON_PAGE(!PageType(page, 0), page); \
660 atomic_set(&page->_mapcount, PAGE_##lname##_MAPCOUNT_VALUE); \ 671 page->page_type &= ~PG_##lname; \
661} \ 672} \
662static __always_inline void __ClearPage##uname(struct page *page) \ 673static __always_inline void __ClearPage##uname(struct page *page) \
663{ \ 674{ \
664 VM_BUG_ON_PAGE(!Page##uname(page), page); \ 675 VM_BUG_ON_PAGE(!Page##uname(page), page); \
665 atomic_set(&page->_mapcount, -1); \ 676 page->page_type |= PG_##lname; \
666} 677}
667 678
668/* 679/*
669 * PageBuddy() indicate that the page is free and in the buddy system 680 * PageBuddy() indicates that the page is free and in the buddy system
670 * (see mm/page_alloc.c). 681 * (see mm/page_alloc.c).
671 */ 682 */
672#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) 683PAGE_TYPE_OPS(Buddy, buddy)
673PAGE_MAPCOUNT_OPS(Buddy, BUDDY)
674 684
675/* 685/*
676 * PageBalloon() is set on pages that are on the balloon page list 686 * PageBalloon() is true for pages that are on the balloon page list
677 * (see mm/balloon_compaction.c). 687 * (see mm/balloon_compaction.c).
678 */ 688 */
679#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) 689PAGE_TYPE_OPS(Balloon, balloon)
680PAGE_MAPCOUNT_OPS(Balloon, BALLOON)
681 690
682/* 691/*
683 * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on 692 * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
684 * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. 693 * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
685 */ 694 */
686#define PAGE_KMEMCG_MAPCOUNT_VALUE (-512) 695PAGE_TYPE_OPS(Kmemcg, kmemcg)
687PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG) 696
697/*
698 * Marks pages in use as page tables.
699 */
700PAGE_TYPE_OPS(Table, table)
688 701
689extern bool is_free_buddy_page(struct page *page); 702extern bool is_free_buddy_page(struct page *page);
690 703
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index c15ab80ad32d..bab7e57f659b 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -7,10 +7,22 @@
7#include <asm/page.h> 7#include <asm/page.h>
8 8
9struct page_counter { 9struct page_counter {
10 atomic_long_t count; 10 atomic_long_t usage;
11 unsigned long limit; 11 unsigned long min;
12 unsigned long low;
13 unsigned long max;
12 struct page_counter *parent; 14 struct page_counter *parent;
13 15
16 /* effective memory.min and memory.min usage tracking */
17 unsigned long emin;
18 atomic_long_t min_usage;
19 atomic_long_t children_min_usage;
20
21 /* effective memory.low and memory.low usage tracking */
22 unsigned long elow;
23 atomic_long_t low_usage;
24 atomic_long_t children_low_usage;
25
14 /* legacy */ 26 /* legacy */
15 unsigned long watermark; 27 unsigned long watermark;
16 unsigned long failcnt; 28 unsigned long failcnt;
@@ -25,14 +37,14 @@ struct page_counter {
25static inline void page_counter_init(struct page_counter *counter, 37static inline void page_counter_init(struct page_counter *counter,
26 struct page_counter *parent) 38 struct page_counter *parent)
27{ 39{
28 atomic_long_set(&counter->count, 0); 40 atomic_long_set(&counter->usage, 0);
29 counter->limit = PAGE_COUNTER_MAX; 41 counter->max = PAGE_COUNTER_MAX;
30 counter->parent = parent; 42 counter->parent = parent;
31} 43}
32 44
33static inline unsigned long page_counter_read(struct page_counter *counter) 45static inline unsigned long page_counter_read(struct page_counter *counter)
34{ 46{
35 return atomic_long_read(&counter->count); 47 return atomic_long_read(&counter->usage);
36} 48}
37 49
38void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages); 50void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
@@ -41,7 +53,9 @@ bool page_counter_try_charge(struct page_counter *counter,
41 unsigned long nr_pages, 53 unsigned long nr_pages,
42 struct page_counter **fail); 54 struct page_counter **fail);
43void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); 55void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
44int page_counter_limit(struct page_counter *counter, unsigned long limit); 56void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages);
57void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages);
58int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages);
45int page_counter_memparse(const char *buf, const char *max, 59int page_counter_memparse(const char *buf, const char *max,
46 unsigned long *nr_pages); 60 unsigned long *nr_pages);
47 61
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index a03c2642a87c..21713dc14ce2 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -122,7 +122,7 @@ pud_t pud_mkdevmap(pud_t pud);
122#endif 122#endif
123#endif /* __HAVE_ARCH_PTE_DEVMAP */ 123#endif /* __HAVE_ARCH_PTE_DEVMAP */
124 124
125#ifdef __HAVE_ARCH_PTE_SPECIAL 125#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
126static inline bool pfn_t_special(pfn_t pfn) 126static inline bool pfn_t_special(pfn_t pfn)
127{ 127{
128 return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; 128 return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
@@ -132,5 +132,5 @@ static inline bool pfn_t_special(pfn_t pfn)
132{ 132{
133 return false; 133 return false;
134} 134}
135#endif /* __HAVE_ARCH_PTE_SPECIAL */ 135#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
136#endif /* _LINUX_PFN_T_H_ */ 136#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 76a8cb4ef178..44d356f5e47c 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -163,9 +163,13 @@ static inline gfp_t current_gfp_context(gfp_t flags)
163} 163}
164 164
165#ifdef CONFIG_LOCKDEP 165#ifdef CONFIG_LOCKDEP
166extern void __fs_reclaim_acquire(void);
167extern void __fs_reclaim_release(void);
166extern void fs_reclaim_acquire(gfp_t gfp_mask); 168extern void fs_reclaim_acquire(gfp_t gfp_mask);
167extern void fs_reclaim_release(gfp_t gfp_mask); 169extern void fs_reclaim_release(gfp_t gfp_mask);
168#else 170#else
171static inline void __fs_reclaim_acquire(void) { }
172static inline void __fs_reclaim_release(void) { }
169static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } 173static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
170static inline void fs_reclaim_release(gfp_t gfp_mask) { } 174static inline void fs_reclaim_release(gfp_t gfp_mask) { }
171#endif 175#endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 73b5e655a76e..f155dc607112 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -110,19 +110,6 @@ static inline bool shmem_file(struct file *file)
110extern bool shmem_charge(struct inode *inode, long pages); 110extern bool shmem_charge(struct inode *inode, long pages);
111extern void shmem_uncharge(struct inode *inode, long pages); 111extern void shmem_uncharge(struct inode *inode, long pages);
112 112
113#ifdef CONFIG_TMPFS
114
115extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
116
117#else
118
119static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
120{
121 return -EINVAL;
122}
123
124#endif
125
126#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE 113#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
127extern bool shmem_huge_enabled(struct vm_area_struct *vma); 114extern bool shmem_huge_enabled(struct vm_area_struct *vma);
128#else 115#else
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index d9228e4d0320..3485c58cfd1c 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -67,9 +67,10 @@ struct kmem_cache {
67 67
68 /* 68 /*
69 * If debugging is enabled, then the allocator can add additional 69 * If debugging is enabled, then the allocator can add additional
70 * fields and/or padding to every object. size contains the total 70 * fields and/or padding to every object. 'size' contains the total
71 * object size including these internal fields, the following two 71 * object size including these internal fields, while 'obj_offset'
72 * variables contain the offset to the user object and its size. 72 * and 'object_size' contain the offset to the user object and its
73 * size.
73 */ 74 */
74 int obj_offset; 75 int obj_offset;
75#endif /* CONFIG_DEBUG_SLAB */ 76#endif /* CONFIG_DEBUG_SLAB */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 3773e26c08c1..09fa2c6f0e68 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,7 +101,6 @@ struct kmem_cache {
101 void (*ctor)(void *); 101 void (*ctor)(void *);
102 unsigned int inuse; /* Offset to metadata */ 102 unsigned int inuse; /* Offset to metadata */
103 unsigned int align; /* Alignment */ 103 unsigned int align; /* Alignment */
104 unsigned int reserved; /* Reserved bytes at the end of slabs */
105 unsigned int red_left_pad; /* Left redzone padding size */ 104 unsigned int red_left_pad; /* Left redzone padding size */
106 const char *name; /* Name (only for display!) */ 105 const char *name; /* Name (only for display!) */
107 struct list_head list; /* List of slab caches */ 106 struct list_head list; /* List of slab caches */
diff --git a/include/linux/types.h b/include/linux/types.h
index ec13d02b3481..9834e90aa010 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -10,14 +10,14 @@
10#define DECLARE_BITMAP(name,bits) \ 10#define DECLARE_BITMAP(name,bits) \
11 unsigned long name[BITS_TO_LONGS(bits)] 11 unsigned long name[BITS_TO_LONGS(bits)]
12 12
13typedef __u32 __kernel_dev_t; 13typedef u32 __kernel_dev_t;
14 14
15typedef __kernel_fd_set fd_set; 15typedef __kernel_fd_set fd_set;
16typedef __kernel_dev_t dev_t; 16typedef __kernel_dev_t dev_t;
17typedef __kernel_ino_t ino_t; 17typedef __kernel_ino_t ino_t;
18typedef __kernel_mode_t mode_t; 18typedef __kernel_mode_t mode_t;
19typedef unsigned short umode_t; 19typedef unsigned short umode_t;
20typedef __u32 nlink_t; 20typedef u32 nlink_t;
21typedef __kernel_off_t off_t; 21typedef __kernel_off_t off_t;
22typedef __kernel_pid_t pid_t; 22typedef __kernel_pid_t pid_t;
23typedef __kernel_daddr_t daddr_t; 23typedef __kernel_daddr_t daddr_t;
@@ -95,29 +95,29 @@ typedef unsigned long ulong;
95#ifndef __BIT_TYPES_DEFINED__ 95#ifndef __BIT_TYPES_DEFINED__
96#define __BIT_TYPES_DEFINED__ 96#define __BIT_TYPES_DEFINED__
97 97
98typedef __u8 u_int8_t; 98typedef u8 u_int8_t;
99typedef __s8 int8_t; 99typedef s8 int8_t;
100typedef __u16 u_int16_t; 100typedef u16 u_int16_t;
101typedef __s16 int16_t; 101typedef s16 int16_t;
102typedef __u32 u_int32_t; 102typedef u32 u_int32_t;
103typedef __s32 int32_t; 103typedef s32 int32_t;
104 104
105#endif /* !(__BIT_TYPES_DEFINED__) */ 105#endif /* !(__BIT_TYPES_DEFINED__) */
106 106
107typedef __u8 uint8_t; 107typedef u8 uint8_t;
108typedef __u16 uint16_t; 108typedef u16 uint16_t;
109typedef __u32 uint32_t; 109typedef u32 uint32_t;
110 110
111#if defined(__GNUC__) 111#if defined(__GNUC__)
112typedef __u64 uint64_t; 112typedef u64 uint64_t;
113typedef __u64 u_int64_t; 113typedef u64 u_int64_t;
114typedef __s64 int64_t; 114typedef s64 int64_t;
115#endif 115#endif
116 116
117/* this is a special 64bit data type that is 8-byte aligned */ 117/* this is a special 64bit data type that is 8-byte aligned */
118#define aligned_u64 __u64 __attribute__((aligned(8))) 118#define aligned_u64 __aligned_u64
119#define aligned_be64 __be64 __attribute__((aligned(8))) 119#define aligned_be64 __aligned_be64
120#define aligned_le64 __le64 __attribute__((aligned(8))) 120#define aligned_le64 __aligned_le64
121 121
122/** 122/**
123 * The type used for indexing onto a disc or disc partition. 123 * The type used for indexing onto a disc or disc partition.
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f2f3b68ba910..e091f0a11b11 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -31,10 +31,12 @@
31extern int handle_userfault(struct vm_fault *vmf, unsigned long reason); 31extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
32 32
33extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, 33extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
34 unsigned long src_start, unsigned long len); 34 unsigned long src_start, unsigned long len,
35 bool *mmap_changing);
35extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, 36extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
36 unsigned long dst_start, 37 unsigned long dst_start,
37 unsigned long len); 38 unsigned long len,
39 bool *mmap_changing);
38 40
39/* mm helpers */ 41/* mm helpers */
40static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 42static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 2a4432c7a4b4..e13eec3dfb2f 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -1,6 +1,8 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/* 2/*
3 * Copyright 1997 Transmeta Corporation - All Rights Reserved 3 * Copyright 1997 Transmeta Corporation - All Rights Reserved
4 * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
5 * Copyright 2005-2006,2013,2017-2018 Ian Kent <raven@themaw.net>
4 * 6 *
5 * This file is part of the Linux kernel and is made available under 7 * This file is part of the Linux kernel and is made available under
6 * the terms of the GNU General Public License, version 2, or at your 8 * the terms of the GNU General Public License, version 2, or at your
@@ -8,7 +10,6 @@
8 * 10 *
9 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
10 12
11
12#ifndef _UAPI_LINUX_AUTO_FS_H 13#ifndef _UAPI_LINUX_AUTO_FS_H
13#define _UAPI_LINUX_AUTO_FS_H 14#define _UAPI_LINUX_AUTO_FS_H
14 15
@@ -18,13 +19,11 @@
18#include <sys/ioctl.h> 19#include <sys/ioctl.h>
19#endif /* __KERNEL__ */ 20#endif /* __KERNEL__ */
20 21
22#define AUTOFS_PROTO_VERSION 5
23#define AUTOFS_MIN_PROTO_VERSION 3
24#define AUTOFS_MAX_PROTO_VERSION 5
21 25
22/* This file describes autofs v3 */ 26#define AUTOFS_PROTO_SUBVERSION 2
23#define AUTOFS_PROTO_VERSION 3
24
25/* Range of protocol versions defined */
26#define AUTOFS_MAX_PROTO_VERSION AUTOFS_PROTO_VERSION
27#define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION
28 27
29/* 28/*
30 * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed 29 * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
@@ -76,9 +75,155 @@ enum {
76#define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD) 75#define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD)
77#define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD) 76#define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD)
78#define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD) 77#define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD)
79#define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOVER_CMD, int) 78#define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, \
80#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, compat_ulong_t) 79 AUTOFS_IOC_PROTOVER_CMD, int)
81#define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, unsigned long) 80#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, \
82#define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_CMD, struct autofs_packet_expire) 81 AUTOFS_IOC_SETTIMEOUT_CMD, \
82 compat_ulong_t)
83#define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, \
84 AUTOFS_IOC_SETTIMEOUT_CMD, \
85 unsigned long)
86#define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, \
87 AUTOFS_IOC_EXPIRE_CMD, \
88 struct autofs_packet_expire)
89
90/* autofs version 4 and later definitions */
91
92/* Mask for expire behaviour */
93#define AUTOFS_EXP_IMMEDIATE 1
94#define AUTOFS_EXP_LEAVES 2
95
96#define AUTOFS_TYPE_ANY 0U
97#define AUTOFS_TYPE_INDIRECT 1U
98#define AUTOFS_TYPE_DIRECT 2U
99#define AUTOFS_TYPE_OFFSET 4U
100
101static inline void set_autofs_type_indirect(unsigned int *type)
102{
103 *type = AUTOFS_TYPE_INDIRECT;
104}
105
106static inline unsigned int autofs_type_indirect(unsigned int type)
107{
108 return (type == AUTOFS_TYPE_INDIRECT);
109}
110
111static inline void set_autofs_type_direct(unsigned int *type)
112{
113 *type = AUTOFS_TYPE_DIRECT;
114}
115
116static inline unsigned int autofs_type_direct(unsigned int type)
117{
118 return (type == AUTOFS_TYPE_DIRECT);
119}
120
121static inline void set_autofs_type_offset(unsigned int *type)
122{
123 *type = AUTOFS_TYPE_OFFSET;
124}
125
126static inline unsigned int autofs_type_offset(unsigned int type)
127{
128 return (type == AUTOFS_TYPE_OFFSET);
129}
130
131static inline unsigned int autofs_type_trigger(unsigned int type)
132{
133 return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET);
134}
135
136/*
137 * This isn't really a type as we use it to say "no type set" to
138 * indicate we want to search for "any" mount in the
139 * autofs_dev_ioctl_ismountpoint() device ioctl function.
140 */
141static inline void set_autofs_type_any(unsigned int *type)
142{
143 *type = AUTOFS_TYPE_ANY;
144}
145
146static inline unsigned int autofs_type_any(unsigned int type)
147{
148 return (type == AUTOFS_TYPE_ANY);
149}
150
151/* Daemon notification packet types */
152enum autofs_notify {
153 NFY_NONE,
154 NFY_MOUNT,
155 NFY_EXPIRE
156};
157
158/* Kernel protocol version 4 packet types */
159
160/* Expire entry (umount request) */
161#define autofs_ptype_expire_multi 2
162
163/* Kernel protocol version 5 packet types */
164
165/* Indirect mount missing and expire requests. */
166#define autofs_ptype_missing_indirect 3
167#define autofs_ptype_expire_indirect 4
168
169/* Direct mount missing and expire requests */
170#define autofs_ptype_missing_direct 5
171#define autofs_ptype_expire_direct 6
172
173/* v4 multi expire (via pipe) */
174struct autofs_packet_expire_multi {
175 struct autofs_packet_hdr hdr;
176 autofs_wqt_t wait_queue_token;
177 int len;
178 char name[NAME_MAX+1];
179};
180
181union autofs_packet_union {
182 struct autofs_packet_hdr hdr;
183 struct autofs_packet_missing missing;
184 struct autofs_packet_expire expire;
185 struct autofs_packet_expire_multi expire_multi;
186};
187
188/* autofs v5 common packet struct */
189struct autofs_v5_packet {
190 struct autofs_packet_hdr hdr;
191 autofs_wqt_t wait_queue_token;
192 __u32 dev;
193 __u64 ino;
194 __u32 uid;
195 __u32 gid;
196 __u32 pid;
197 __u32 tgid;
198 __u32 len;
199 char name[NAME_MAX+1];
200};
201
202typedef struct autofs_v5_packet autofs_packet_missing_indirect_t;
203typedef struct autofs_v5_packet autofs_packet_expire_indirect_t;
204typedef struct autofs_v5_packet autofs_packet_missing_direct_t;
205typedef struct autofs_v5_packet autofs_packet_expire_direct_t;
206
207union autofs_v5_packet_union {
208 struct autofs_packet_hdr hdr;
209 struct autofs_v5_packet v5_packet;
210 autofs_packet_missing_indirect_t missing_indirect;
211 autofs_packet_expire_indirect_t expire_indirect;
212 autofs_packet_missing_direct_t missing_direct;
213 autofs_packet_expire_direct_t expire_direct;
214};
215
216enum {
217 AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */
218 AUTOFS_IOC_PROTOSUBVER_CMD,
219 AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */
220};
221
222#define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, \
223 AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
224#define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, \
225 AUTOFS_IOC_PROTOSUBVER_CMD, int)
226#define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, \
227 AUTOFS_IOC_ASKUMOUNT_CMD, int)
83 228
84#endif /* _UAPI_LINUX_AUTO_FS_H */ 229#endif /* _UAPI_LINUX_AUTO_FS_H */
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 1f608e27a06f..d01ef0a0189c 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -7,156 +7,9 @@
7 * option, any later version, incorporated herein by reference. 7 * option, any later version, incorporated herein by reference.
8 */ 8 */
9 9
10#ifndef _LINUX_AUTO_FS4_H 10#ifndef _UAPI_LINUX_AUTO_FS4_H
11#define _LINUX_AUTO_FS4_H 11#define _UAPI_LINUX_AUTO_FS4_H
12 12
13/* Include common v3 definitions */
14#include <linux/types.h>
15#include <linux/auto_fs.h> 13#include <linux/auto_fs.h>
16 14
17/* autofs v4 definitions */ 15#endif /* _UAPI_LINUX_AUTO_FS4_H */
18#undef AUTOFS_PROTO_VERSION
19#undef AUTOFS_MIN_PROTO_VERSION
20#undef AUTOFS_MAX_PROTO_VERSION
21
22#define AUTOFS_PROTO_VERSION 5
23#define AUTOFS_MIN_PROTO_VERSION 3
24#define AUTOFS_MAX_PROTO_VERSION 5
25
26#define AUTOFS_PROTO_SUBVERSION 2
27
28/* Mask for expire behaviour */
29#define AUTOFS_EXP_IMMEDIATE 1
30#define AUTOFS_EXP_LEAVES 2
31
32#define AUTOFS_TYPE_ANY 0U
33#define AUTOFS_TYPE_INDIRECT 1U
34#define AUTOFS_TYPE_DIRECT 2U
35#define AUTOFS_TYPE_OFFSET 4U
36
37static inline void set_autofs_type_indirect(unsigned int *type)
38{
39 *type = AUTOFS_TYPE_INDIRECT;
40}
41
42static inline unsigned int autofs_type_indirect(unsigned int type)
43{
44 return (type == AUTOFS_TYPE_INDIRECT);
45}
46
47static inline void set_autofs_type_direct(unsigned int *type)
48{
49 *type = AUTOFS_TYPE_DIRECT;
50}
51
52static inline unsigned int autofs_type_direct(unsigned int type)
53{
54 return (type == AUTOFS_TYPE_DIRECT);
55}
56
57static inline void set_autofs_type_offset(unsigned int *type)
58{
59 *type = AUTOFS_TYPE_OFFSET;
60}
61
62static inline unsigned int autofs_type_offset(unsigned int type)
63{
64 return (type == AUTOFS_TYPE_OFFSET);
65}
66
67static inline unsigned int autofs_type_trigger(unsigned int type)
68{
69 return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET);
70}
71
72/*
73 * This isn't really a type as we use it to say "no type set" to
74 * indicate we want to search for "any" mount in the
75 * autofs_dev_ioctl_ismountpoint() device ioctl function.
76 */
77static inline void set_autofs_type_any(unsigned int *type)
78{
79 *type = AUTOFS_TYPE_ANY;
80}
81
82static inline unsigned int autofs_type_any(unsigned int type)
83{
84 return (type == AUTOFS_TYPE_ANY);
85}
86
87/* Daemon notification packet types */
88enum autofs_notify {
89 NFY_NONE,
90 NFY_MOUNT,
91 NFY_EXPIRE
92};
93
94/* Kernel protocol version 4 packet types */
95
96/* Expire entry (umount request) */
97#define autofs_ptype_expire_multi 2
98
99/* Kernel protocol version 5 packet types */
100
101/* Indirect mount missing and expire requests. */
102#define autofs_ptype_missing_indirect 3
103#define autofs_ptype_expire_indirect 4
104
105/* Direct mount missing and expire requests */
106#define autofs_ptype_missing_direct 5
107#define autofs_ptype_expire_direct 6
108
109/* v4 multi expire (via pipe) */
110struct autofs_packet_expire_multi {
111 struct autofs_packet_hdr hdr;
112 autofs_wqt_t wait_queue_token;
113 int len;
114 char name[NAME_MAX+1];
115};
116
117union autofs_packet_union {
118 struct autofs_packet_hdr hdr;
119 struct autofs_packet_missing missing;
120 struct autofs_packet_expire expire;
121 struct autofs_packet_expire_multi expire_multi;
122};
123
124/* autofs v5 common packet struct */
125struct autofs_v5_packet {
126 struct autofs_packet_hdr hdr;
127 autofs_wqt_t wait_queue_token;
128 __u32 dev;
129 __u64 ino;
130 __u32 uid;
131 __u32 gid;
132 __u32 pid;
133 __u32 tgid;
134 __u32 len;
135 char name[NAME_MAX+1];
136};
137
138typedef struct autofs_v5_packet autofs_packet_missing_indirect_t;
139typedef struct autofs_v5_packet autofs_packet_expire_indirect_t;
140typedef struct autofs_v5_packet autofs_packet_missing_direct_t;
141typedef struct autofs_v5_packet autofs_packet_expire_direct_t;
142
143union autofs_v5_packet_union {
144 struct autofs_packet_hdr hdr;
145 struct autofs_v5_packet v5_packet;
146 autofs_packet_missing_indirect_t missing_indirect;
147 autofs_packet_expire_indirect_t expire_indirect;
148 autofs_packet_missing_direct_t missing_direct;
149 autofs_packet_expire_direct_t expire_direct;
150};
151
152enum {
153 AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */
154 AUTOFS_IOC_PROTOSUBVER_CMD,
155 AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */
156};
157
158#define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
159#define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int)
160#define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int)
161
162#endif /* _LINUX_AUTO_FS4_H */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index fa139841ec18..21b9113c69da 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -35,6 +35,6 @@
35#define KPF_BALLOON 23 35#define KPF_BALLOON 23
36#define KPF_ZERO_PAGE 24 36#define KPF_ZERO_PAGE 24
37#define KPF_IDLE 25 37#define KPF_IDLE 25
38 38#define KPF_PGTABLE 26
39 39
40#endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ 40#endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index f7674d676889..b66aced5e8c2 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -460,6 +460,7 @@ static int __init crash_save_vmcoreinfo_init(void)
460 VMCOREINFO_NUMBER(PG_hwpoison); 460 VMCOREINFO_NUMBER(PG_hwpoison);
461#endif 461#endif
462 VMCOREINFO_NUMBER(PG_head_mask); 462 VMCOREINFO_NUMBER(PG_head_mask);
463#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
463 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 464 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
464#ifdef CONFIG_HUGETLB_PAGE 465#ifdef CONFIG_HUGETLB_PAGE
465 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); 466 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
diff --git a/kernel/fork.c b/kernel/fork.c
index 80b48a8fb47b..c6d1c1ce9ed7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -899,6 +899,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
899 mm->pinned_vm = 0; 899 mm->pinned_vm = 0;
900 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 900 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
901 spin_lock_init(&mm->page_table_lock); 901 spin_lock_init(&mm->page_table_lock);
902 spin_lock_init(&mm->arg_lock);
902 mm_init_cpumask(mm); 903 mm_init_cpumask(mm);
903 mm_init_aio(mm); 904 mm_init_aio(mm);
904 mm_init_owner(mm, p); 905 mm_init_owner(mm, p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 751593ed7c0b..32b479468e4d 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -44,6 +44,7 @@ int __read_mostly sysctl_hung_task_warnings = 10;
44 44
45static int __read_mostly did_panic; 45static int __read_mostly did_panic;
46static bool hung_task_show_lock; 46static bool hung_task_show_lock;
47static bool hung_task_call_panic;
47 48
48static struct task_struct *watchdog_task; 49static struct task_struct *watchdog_task;
49 50
@@ -127,10 +128,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
127 touch_nmi_watchdog(); 128 touch_nmi_watchdog();
128 129
129 if (sysctl_hung_task_panic) { 130 if (sysctl_hung_task_panic) {
130 if (hung_task_show_lock) 131 hung_task_show_lock = true;
131 debug_show_all_locks(); 132 hung_task_call_panic = true;
132 trigger_all_cpu_backtrace();
133 panic("hung_task: blocked tasks");
134 } 133 }
135} 134}
136 135
@@ -193,6 +192,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
193 rcu_read_unlock(); 192 rcu_read_unlock();
194 if (hung_task_show_lock) 193 if (hung_task_show_lock)
195 debug_show_all_locks(); 194 debug_show_all_locks();
195 if (hung_task_call_panic) {
196 trigger_all_cpu_backtrace();
197 panic("hung_task: blocked tasks");
198 }
196} 199}
197 200
198static long hung_timeout_jiffies(unsigned long last_checked, 201static long hung_timeout_jiffies(unsigned long last_checked,
diff --git a/kernel/sys.c b/kernel/sys.c
index d1b2b8d934bb..38509dc1f77b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2018,7 +2018,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
2018 return error; 2018 return error;
2019 } 2019 }
2020 2020
2021 down_write(&mm->mmap_sem); 2021 /*
2022 * arg_lock protects concurent updates but we still need mmap_sem for
2023 * read to exclude races with sys_brk.
2024 */
2025 down_read(&mm->mmap_sem);
2022 2026
2023 /* 2027 /*
2024 * We don't validate if these members are pointing to 2028 * We don't validate if these members are pointing to
@@ -2032,6 +2036,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
2032 * to any problem in kernel itself 2036 * to any problem in kernel itself
2033 */ 2037 */
2034 2038
2039 spin_lock(&mm->arg_lock);
2035 mm->start_code = prctl_map.start_code; 2040 mm->start_code = prctl_map.start_code;
2036 mm->end_code = prctl_map.end_code; 2041 mm->end_code = prctl_map.end_code;
2037 mm->start_data = prctl_map.start_data; 2042 mm->start_data = prctl_map.start_data;
@@ -2043,6 +2048,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
2043 mm->arg_end = prctl_map.arg_end; 2048 mm->arg_end = prctl_map.arg_end;
2044 mm->env_start = prctl_map.env_start; 2049 mm->env_start = prctl_map.env_start;
2045 mm->env_end = prctl_map.env_end; 2050 mm->env_end = prctl_map.env_end;
2051 spin_unlock(&mm->arg_lock);
2046 2052
2047 /* 2053 /*
2048 * Note this update of @saved_auxv is lockless thus 2054 * Note this update of @saved_auxv is lockless thus
@@ -2055,7 +2061,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
2055 if (prctl_map.auxv_size) 2061 if (prctl_map.auxv_size)
2056 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); 2062 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
2057 2063
2058 up_write(&mm->mmap_sem); 2064 up_read(&mm->mmap_sem);
2059 return 0; 2065 return 0;
2060} 2066}
2061#endif /* CONFIG_CHECKPOINT_RESTORE */ 2067#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a42eff7e8c48..58f9750e49c6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -64,12 +64,9 @@ EXPORT_SYMBOL(__bitmap_equal);
64 64
65void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) 65void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
66{ 66{
67 unsigned int k, lim = bits/BITS_PER_LONG; 67 unsigned int k, lim = BITS_TO_LONGS(bits);
68 for (k = 0; k < lim; ++k) 68 for (k = 0; k < lim; ++k)
69 dst[k] = ~src[k]; 69 dst[k] = ~src[k];
70
71 if (bits % BITS_PER_LONG)
72 dst[k] = ~src[k];
73} 70}
74EXPORT_SYMBOL(__bitmap_complement); 71EXPORT_SYMBOL(__bitmap_complement);
75 72
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
index 266a97c5708b..ade3ce6c4af6 100644
--- a/lib/bucket_locks.c
+++ b/lib/bucket_locks.c
@@ -30,10 +30,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
30 } 30 }
31 31
32 if (sizeof(spinlock_t) != 0) { 32 if (sizeof(spinlock_t) != 0) {
33 if (gfpflags_allow_blocking(gfp)) 33 tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp);
34 tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
35 else
36 tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
37 if (!tlocks) 34 if (!tlocks)
38 return -ENOMEM; 35 return -ENOMEM;
39 for (i = 0; i < size; i++) 36 for (i = 0; i < size; i++)
diff --git a/lib/idr.c b/lib/idr.c
index 823b813f08f8..ed9c169c12bd 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -4,9 +4,9 @@
4#include <linux/idr.h> 4#include <linux/idr.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/xarray.h>
7 8
8DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); 9DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
9static DEFINE_SPINLOCK(simple_ida_lock);
10 10
11/** 11/**
12 * idr_alloc_u32() - Allocate an ID. 12 * idr_alloc_u32() - Allocate an ID.
@@ -581,7 +581,7 @@ again:
581 if (!ida_pre_get(ida, gfp_mask)) 581 if (!ida_pre_get(ida, gfp_mask))
582 return -ENOMEM; 582 return -ENOMEM;
583 583
584 spin_lock_irqsave(&simple_ida_lock, flags); 584 xa_lock_irqsave(&ida->ida_rt, flags);
585 ret = ida_get_new_above(ida, start, &id); 585 ret = ida_get_new_above(ida, start, &id);
586 if (!ret) { 586 if (!ret) {
587 if (id > max) { 587 if (id > max) {
@@ -591,7 +591,7 @@ again:
591 ret = id; 591 ret = id;
592 } 592 }
593 } 593 }
594 spin_unlock_irqrestore(&simple_ida_lock, flags); 594 xa_unlock_irqrestore(&ida->ida_rt, flags);
595 595
596 if (unlikely(ret == -EAGAIN)) 596 if (unlikely(ret == -EAGAIN))
597 goto again; 597 goto again;
@@ -615,8 +615,8 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
615 unsigned long flags; 615 unsigned long flags;
616 616
617 BUG_ON((int)id < 0); 617 BUG_ON((int)id < 0);
618 spin_lock_irqsave(&simple_ida_lock, flags); 618 xa_lock_irqsave(&ida->ida_rt, flags);
619 ida_remove(ida, id); 619 ida_remove(ida, id);
620 spin_unlock_irqrestore(&simple_ida_lock, flags); 620 xa_unlock_irqrestore(&ida->ida_rt, flags);
621} 621}
622EXPORT_SYMBOL(ida_simple_remove); 622EXPORT_SYMBOL(ida_simple_remove);
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 7eceeddb3fb8..c2d6f4efcfbc 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -65,13 +65,6 @@
65typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ 65typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
66typedef int mpi_size_t; /* (must be a signed type) */ 66typedef int mpi_size_t; /* (must be a signed type) */
67 67
68static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
69{
70 if (a->alloced < b)
71 return mpi_resize(a, b);
72 return 0;
73}
74
75/* Copy N limbs from S to D. */ 68/* Copy N limbs from S to D. */
76#define MPN_COPY(d, s, n) \ 69#define MPN_COPY(d, s, n) \
77 do { \ 70 do { \
@@ -80,13 +73,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
80 (d)[_i] = (s)[_i]; \ 73 (d)[_i] = (s)[_i]; \
81 } while (0) 74 } while (0)
82 75
83#define MPN_COPY_INCR(d, s, n) \
84 do { \
85 mpi_size_t _i; \
86 for (_i = 0; _i < (n); _i++) \
87 (d)[_i] = (s)[_i]; \
88 } while (0)
89
90#define MPN_COPY_DECR(d, s, n) \ 76#define MPN_COPY_DECR(d, s, n) \
91 do { \ 77 do { \
92 mpi_size_t _i; \ 78 mpi_size_t _i; \
@@ -111,15 +97,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
111 } \ 97 } \
112 } while (0) 98 } while (0)
113 99
114#define MPN_NORMALIZE_NOT_ZERO(d, n) \
115 do { \
116 for (;;) { \
117 if ((d)[(n)-1]) \
118 break; \
119 (n)--; \
120 } \
121 } while (0)
122
123#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ 100#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
124 do { \ 101 do { \
125 if ((size) < KARATSUBA_THRESHOLD) \ 102 if ((size) < KARATSUBA_THRESHOLD) \
@@ -128,46 +105,11 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
128 mul_n(prodp, up, vp, size, tspace); \ 105 mul_n(prodp, up, vp, size, tspace); \
129 } while (0); 106 } while (0);
130 107
131/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
132 * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
133 * If this would yield overflow, DI should be the largest possible number
134 * (i.e., only ones). For correct operation, the most significant bit of D
135 * has to be set. Put the quotient in Q and the remainder in R.
136 */
137#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
138 do { \
139 mpi_limb_t _q, _ql, _r; \
140 mpi_limb_t _xh, _xl; \
141 umul_ppmm(_q, _ql, (nh), (di)); \
142 _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \
143 umul_ppmm(_xh, _xl, _q, (d)); \
144 sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \
145 if (_xh) { \
146 sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \
147 _q++; \
148 if (_xh) { \
149 sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \
150 _q++; \
151 } \
152 } \
153 if (_r >= (d)) { \
154 _r -= (d); \
155 _q++; \
156 } \
157 (r) = _r; \
158 (q) = _q; \
159 } while (0)
160
161/*-- mpiutil.c --*/ 108/*-- mpiutil.c --*/
162mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); 109mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
163void mpi_free_limb_space(mpi_ptr_t a); 110void mpi_free_limb_space(mpi_ptr_t a);
164void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); 111void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
165 112
166/*-- mpi-bit.c --*/
167void mpi_rshift_limbs(MPI a, unsigned int count);
168int mpi_lshift_limbs(MPI a, unsigned int count);
169
170/*-- mpihelp-add.c --*/
171static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 113static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
172 mpi_size_t s1_size, mpi_limb_t s2_limb); 114 mpi_size_t s1_size, mpi_limb_t s2_limb);
173mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 115mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -175,7 +117,6 @@ mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
175static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, 117static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
176 mpi_ptr_t s2_ptr, mpi_size_t s2_size); 118 mpi_ptr_t s2_ptr, mpi_size_t s2_size);
177 119
178/*-- mpihelp-sub.c --*/
179static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 120static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
180 mpi_size_t s1_size, mpi_limb_t s2_limb); 121 mpi_size_t s1_size, mpi_limb_t s2_limb);
181mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 122mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -183,10 +124,10 @@ mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
183static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, 124static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
184 mpi_ptr_t s2_ptr, mpi_size_t s2_size); 125 mpi_ptr_t s2_ptr, mpi_size_t s2_size);
185 126
186/*-- mpihelp-cmp.c --*/ 127/*-- mpih-cmp.c --*/
187int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); 128int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
188 129
189/*-- mpihelp-mul.c --*/ 130/*-- mpih-mul.c --*/
190 131
191struct karatsuba_ctx { 132struct karatsuba_ctx {
192 struct karatsuba_ctx *next; 133 struct karatsuba_ctx *next;
@@ -202,7 +143,6 @@ mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
202 mpi_size_t s1_size, mpi_limb_t s2_limb); 143 mpi_size_t s1_size, mpi_limb_t s2_limb);
203mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 144mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
204 mpi_size_t s1_size, mpi_limb_t s2_limb); 145 mpi_size_t s1_size, mpi_limb_t s2_limb);
205int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
206int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, 146int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
207 mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); 147 mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
208void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); 148void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
@@ -214,21 +154,16 @@ int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
214 mpi_ptr_t vp, mpi_size_t vsize, 154 mpi_ptr_t vp, mpi_size_t vsize,
215 struct karatsuba_ctx *ctx); 155 struct karatsuba_ctx *ctx);
216 156
217/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ 157/*-- generic_mpih-mul1.c --*/
218mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 158mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
219 mpi_size_t s1_size, mpi_limb_t s2_limb); 159 mpi_size_t s1_size, mpi_limb_t s2_limb);
220 160
221/*-- mpihelp-div.c --*/ 161/*-- mpih-div.c --*/
222mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
223 mpi_limb_t divisor_limb);
224mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, 162mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
225 mpi_ptr_t np, mpi_size_t nsize, 163 mpi_ptr_t np, mpi_size_t nsize,
226 mpi_ptr_t dp, mpi_size_t dsize); 164 mpi_ptr_t dp, mpi_size_t dsize);
227mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
228 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
229 mpi_limb_t divisor_limb);
230 165
231/*-- mpihelp-shift.c --*/ 166/*-- generic_mpih-[lr]shift.c --*/
232mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, 167mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
233 unsigned cnt); 168 unsigned cnt);
234mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, 169mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6016f1deb1f5..9bbd9c5d375a 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
112 min(pool->nr_free, pool->percpu_batch_size)); 112 min(pool->nr_free, pool->percpu_batch_size));
113} 113}
114 114
115static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
116{
117 int tag = -ENOSPC;
118
119 spin_lock(&tags->lock);
120 if (tags->nr_free)
121 tag = tags->freelist[--tags->nr_free];
122 spin_unlock(&tags->lock);
123
124 return tag;
125}
126
127/** 115/**
128 * percpu_ida_alloc - allocate a tag 116 * percpu_ida_alloc - allocate a tag
129 * @pool: pool to allocate from 117 * @pool: pool to allocate from
@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
147 DEFINE_WAIT(wait); 135 DEFINE_WAIT(wait);
148 struct percpu_ida_cpu *tags; 136 struct percpu_ida_cpu *tags;
149 unsigned long flags; 137 unsigned long flags;
150 int tag; 138 int tag = -ENOSPC;
151 139
152 local_irq_save(flags); 140 tags = raw_cpu_ptr(pool->tag_cpu);
153 tags = this_cpu_ptr(pool->tag_cpu); 141 spin_lock_irqsave(&tags->lock, flags);
154 142
155 /* Fastpath */ 143 /* Fastpath */
156 tag = alloc_local_tag(tags); 144 if (likely(tags->nr_free >= 0)) {
157 if (likely(tag >= 0)) { 145 tag = tags->freelist[--tags->nr_free];
158 local_irq_restore(flags); 146 spin_unlock_irqrestore(&tags->lock, flags);
159 return tag; 147 return tag;
160 } 148 }
149 spin_unlock_irqrestore(&tags->lock, flags);
161 150
162 while (1) { 151 while (1) {
163 spin_lock(&pool->lock); 152 spin_lock_irqsave(&pool->lock, flags);
153 tags = this_cpu_ptr(pool->tag_cpu);
164 154
165 /* 155 /*
166 * prepare_to_wait() must come before steal_tags(), in case 156 * prepare_to_wait() must come before steal_tags(), in case
@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
184 &pool->cpus_have_tags); 174 &pool->cpus_have_tags);
185 } 175 }
186 176
187 spin_unlock(&pool->lock); 177 spin_unlock_irqrestore(&pool->lock, flags);
188 local_irq_restore(flags);
189 178
190 if (tag >= 0 || state == TASK_RUNNING) 179 if (tag >= 0 || state == TASK_RUNNING)
191 break; 180 break;
@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
196 } 185 }
197 186
198 schedule(); 187 schedule();
199
200 local_irq_save(flags);
201 tags = this_cpu_ptr(pool->tag_cpu);
202 } 188 }
203 if (state != TASK_RUNNING) 189 if (state != TASK_RUNNING)
204 finish_wait(&pool->wait, &wait); 190 finish_wait(&pool->wait, &wait);
@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
222 208
223 BUG_ON(tag >= pool->nr_tags); 209 BUG_ON(tag >= pool->nr_tags);
224 210
225 local_irq_save(flags); 211 tags = raw_cpu_ptr(pool->tag_cpu);
226 tags = this_cpu_ptr(pool->tag_cpu);
227 212
228 spin_lock(&tags->lock); 213 spin_lock_irqsave(&tags->lock, flags);
229 tags->freelist[tags->nr_free++] = tag; 214 tags->freelist[tags->nr_free++] = tag;
230 215
231 nr_free = tags->nr_free; 216 nr_free = tags->nr_free;
232 spin_unlock(&tags->lock);
233 217
234 if (nr_free == 1) { 218 if (nr_free == 1) {
235 cpumask_set_cpu(smp_processor_id(), 219 cpumask_set_cpu(smp_processor_id(),
236 &pool->cpus_have_tags); 220 &pool->cpus_have_tags);
237 wake_up(&pool->wait); 221 wake_up(&pool->wait);
238 } 222 }
223 spin_unlock_irqrestore(&tags->lock, flags);
239 224
240 if (nr_free == pool->percpu_max_size) { 225 if (nr_free == pool->percpu_max_size) {
241 spin_lock(&pool->lock); 226 spin_lock_irqsave(&pool->lock, flags);
227 spin_lock(&tags->lock);
242 228
243 /*
244 * Global lock held and irqs disabled, don't need percpu
245 * lock
246 */
247 if (tags->nr_free == pool->percpu_max_size) { 229 if (tags->nr_free == pool->percpu_max_size) {
248 move_tags(pool->freelist, &pool->nr_free, 230 move_tags(pool->freelist, &pool->nr_free,
249 tags->freelist, &tags->nr_free, 231 tags->freelist, &tags->nr_free,
@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
251 233
252 wake_up(&pool->wait); 234 wake_up(&pool->wait);
253 } 235 }
254 spin_unlock(&pool->lock); 236 spin_unlock(&tags->lock);
237 spin_unlock_irqrestore(&pool->lock, flags);
255 } 238 }
256
257 local_irq_restore(flags);
258} 239}
259EXPORT_SYMBOL_GPL(percpu_ida_free); 240EXPORT_SYMBOL_GPL(percpu_ida_free);
260 241
@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
346 struct percpu_ida_cpu *remote; 327 struct percpu_ida_cpu *remote;
347 unsigned cpu, i, err = 0; 328 unsigned cpu, i, err = 0;
348 329
349 local_irq_save(flags);
350 for_each_possible_cpu(cpu) { 330 for_each_possible_cpu(cpu) {
351 remote = per_cpu_ptr(pool->tag_cpu, cpu); 331 remote = per_cpu_ptr(pool->tag_cpu, cpu);
352 spin_lock(&remote->lock); 332 spin_lock_irqsave(&remote->lock, flags);
353 for (i = 0; i < remote->nr_free; i++) { 333 for (i = 0; i < remote->nr_free; i++) {
354 err = fn(remote->freelist[i], data); 334 err = fn(remote->freelist[i], data);
355 if (err) 335 if (err)
356 break; 336 break;
357 } 337 }
358 spin_unlock(&remote->lock); 338 spin_unlock_irqrestore(&remote->lock, flags);
359 if (err) 339 if (err)
360 goto out; 340 goto out;
361 } 341 }
362 342
363 spin_lock(&pool->lock); 343 spin_lock_irqsave(&pool->lock, flags);
364 for (i = 0; i < pool->nr_free; i++) { 344 for (i = 0; i < pool->nr_free; i++) {
365 err = fn(pool->freelist[i], data); 345 err = fn(pool->freelist[i], data);
366 if (err) 346 if (err)
367 break; 347 break;
368 } 348 }
369 spin_unlock(&pool->lock); 349 spin_unlock_irqrestore(&pool->lock, flags);
370out: 350out:
371 local_irq_restore(flags);
372 return err; 351 return err;
373} 352}
374EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); 353EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index d7e06b28de38..0a559a42359b 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -112,3 +112,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
112 return j; 112 return j;
113} 113}
114EXPORT_SYMBOL(ucs2_as_utf8); 114EXPORT_SYMBOL(ucs2_as_utf8);
115
116MODULE_LICENSE("GPL v2");
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e0b6e87f65d..00bffa7a5112 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -754,3 +754,6 @@ config GUP_BENCHMARK
754 performance of get_user_pages_fast(). 754 performance of get_user_pages_fast().
755 755
756 See tools/testing/selftests/vm/gup_benchmark.c 756 See tools/testing/selftests/vm/gup_benchmark.c
757
758config ARCH_HAS_PTE_SPECIAL
759 bool
diff --git a/mm/Makefile b/mm/Makefile
index b4e54a9ae9c5..8716bdabe1e6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -105,3 +105,4 @@ obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
105obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o 105obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
106obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o 106obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
107obj-$(CONFIG_HMM) += hmm.o 107obj-$(CONFIG_HMM) += hmm.o
108obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8fe3ebd6ac00..347cc834c04a 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -557,7 +557,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
557 memcg = mem_cgroup_from_css(memcg_css); 557 memcg = mem_cgroup_from_css(memcg_css);
558 blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); 558 blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
559 blkcg = css_to_blkcg(blkcg_css); 559 blkcg = css_to_blkcg(blkcg_css);
560 memcg_cgwb_list = mem_cgroup_cgwb_list(memcg); 560 memcg_cgwb_list = &memcg->cgwb_list;
561 blkcg_cgwb_list = &blkcg->cgwb_list; 561 blkcg_cgwb_list = &blkcg->cgwb_list;
562 562
563 /* look up again under lock and discard on blkcg mismatch */ 563 /* look up again under lock and discard on blkcg mismatch */
@@ -736,7 +736,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
736 */ 736 */
737void wb_memcg_offline(struct mem_cgroup *memcg) 737void wb_memcg_offline(struct mem_cgroup *memcg)
738{ 738{
739 struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg); 739 struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
740 struct bdi_writeback *wb, *next; 740 struct bdi_writeback *wb, *next;
741 741
742 spin_lock_irq(&cgwb_lock); 742 spin_lock_irq(&cgwb_lock);
diff --git a/mm/filemap.c b/mm/filemap.c
index 0604cb02e6f3..52517f28e6f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2489,7 +2489,7 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
2489 * 2489 *
2490 * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. 2490 * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
2491 */ 2491 */
2492int filemap_fault(struct vm_fault *vmf) 2492vm_fault_t filemap_fault(struct vm_fault *vmf)
2493{ 2493{
2494 int error; 2494 int error;
2495 struct file *file = vmf->vma->vm_file; 2495 struct file *file = vmf->vma->vm_file;
@@ -2499,7 +2499,7 @@ int filemap_fault(struct vm_fault *vmf)
2499 pgoff_t offset = vmf->pgoff; 2499 pgoff_t offset = vmf->pgoff;
2500 pgoff_t max_off; 2500 pgoff_t max_off;
2501 struct page *page; 2501 struct page *page;
2502 int ret = 0; 2502 vm_fault_t ret = 0;
2503 2503
2504 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 2504 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2505 if (unlikely(offset >= max_off)) 2505 if (unlikely(offset >= max_off))
@@ -2693,11 +2693,11 @@ next:
2693} 2693}
2694EXPORT_SYMBOL(filemap_map_pages); 2694EXPORT_SYMBOL(filemap_map_pages);
2695 2695
2696int filemap_page_mkwrite(struct vm_fault *vmf) 2696vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2697{ 2697{
2698 struct page *page = vmf->page; 2698 struct page *page = vmf->page;
2699 struct inode *inode = file_inode(vmf->vma->vm_file); 2699 struct inode *inode = file_inode(vmf->vma->vm_file);
2700 int ret = VM_FAULT_LOCKED; 2700 vm_fault_t ret = VM_FAULT_LOCKED;
2701 2701
2702 sb_start_pagefault(inode->i_sb); 2702 sb_start_pagefault(inode->i_sb);
2703 file_update_time(vmf->vma->vm_file); 2703 file_update_time(vmf->vma->vm_file);
diff --git a/mm/gup.c b/mm/gup.c
index 541904a7c60f..1020c7f8f5ee 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
212 unsigned long address, pud_t *pudp, 212 unsigned long address, pud_t *pudp,
213 unsigned int flags, unsigned int *page_mask) 213 unsigned int flags, unsigned int *page_mask)
214{ 214{
215 pmd_t *pmd; 215 pmd_t *pmd, pmdval;
216 spinlock_t *ptl; 216 spinlock_t *ptl;
217 struct page *page; 217 struct page *page;
218 struct mm_struct *mm = vma->vm_mm; 218 struct mm_struct *mm = vma->vm_mm;
219 219
220 pmd = pmd_offset(pudp, address); 220 pmd = pmd_offset(pudp, address);
221 if (pmd_none(*pmd)) 221 /*
222 * The READ_ONCE() will stabilize the pmdval in a register or
223 * on the stack so that it will stop changing under the code.
224 */
225 pmdval = READ_ONCE(*pmd);
226 if (pmd_none(pmdval))
222 return no_page_table(vma, flags); 227 return no_page_table(vma, flags);
223 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { 228 if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
224 page = follow_huge_pmd(mm, address, pmd, flags); 229 page = follow_huge_pmd(mm, address, pmd, flags);
225 if (page) 230 if (page)
226 return page; 231 return page;
227 return no_page_table(vma, flags); 232 return no_page_table(vma, flags);
228 } 233 }
229 if (is_hugepd(__hugepd(pmd_val(*pmd)))) { 234 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
230 page = follow_huge_pd(vma, address, 235 page = follow_huge_pd(vma, address,
231 __hugepd(pmd_val(*pmd)), flags, 236 __hugepd(pmd_val(pmdval)), flags,
232 PMD_SHIFT); 237 PMD_SHIFT);
233 if (page) 238 if (page)
234 return page; 239 return page;
235 return no_page_table(vma, flags); 240 return no_page_table(vma, flags);
236 } 241 }
237retry: 242retry:
238 if (!pmd_present(*pmd)) { 243 if (!pmd_present(pmdval)) {
239 if (likely(!(flags & FOLL_MIGRATION))) 244 if (likely(!(flags & FOLL_MIGRATION)))
240 return no_page_table(vma, flags); 245 return no_page_table(vma, flags);
241 VM_BUG_ON(thp_migration_supported() && 246 VM_BUG_ON(thp_migration_supported() &&
242 !is_pmd_migration_entry(*pmd)); 247 !is_pmd_migration_entry(pmdval));
243 if (is_pmd_migration_entry(*pmd)) 248 if (is_pmd_migration_entry(pmdval))
244 pmd_migration_entry_wait(mm, pmd); 249 pmd_migration_entry_wait(mm, pmd);
250 pmdval = READ_ONCE(*pmd);
251 /*
252 * MADV_DONTNEED may convert the pmd to null because
253 * mmap_sem is held in read mode
254 */
255 if (pmd_none(pmdval))
256 return no_page_table(vma, flags);
245 goto retry; 257 goto retry;
246 } 258 }
247 if (pmd_devmap(*pmd)) { 259 if (pmd_devmap(pmdval)) {
248 ptl = pmd_lock(mm, pmd); 260 ptl = pmd_lock(mm, pmd);
249 page = follow_devmap_pmd(vma, address, pmd, flags); 261 page = follow_devmap_pmd(vma, address, pmd, flags);
250 spin_unlock(ptl); 262 spin_unlock(ptl);
251 if (page) 263 if (page)
252 return page; 264 return page;
253 } 265 }
254 if (likely(!pmd_trans_huge(*pmd))) 266 if (likely(!pmd_trans_huge(pmdval)))
255 return follow_page_pte(vma, address, pmd, flags); 267 return follow_page_pte(vma, address, pmd, flags);
256 268
257 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) 269 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
258 return no_page_table(vma, flags); 270 return no_page_table(vma, flags);
259 271
260retry_locked: 272retry_locked:
261 ptl = pmd_lock(mm, pmd); 273 ptl = pmd_lock(mm, pmd);
274 if (unlikely(pmd_none(*pmd))) {
275 spin_unlock(ptl);
276 return no_page_table(vma, flags);
277 }
262 if (unlikely(!pmd_present(*pmd))) { 278 if (unlikely(!pmd_present(*pmd))) {
263 spin_unlock(ptl); 279 spin_unlock(ptl);
264 if (likely(!(flags & FOLL_MIGRATION))) 280 if (likely(!(flags & FOLL_MIGRATION)))
@@ -1354,7 +1370,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
1354 } 1370 }
1355} 1371}
1356 1372
1357#ifdef __HAVE_ARCH_PTE_SPECIAL 1373#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
1358static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 1374static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1359 int write, struct page **pages, int *nr) 1375 int write, struct page **pages, int *nr)
1360{ 1376{
@@ -1430,7 +1446,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1430{ 1446{
1431 return 0; 1447 return 0;
1432} 1448}
1433#endif /* __HAVE_ARCH_PTE_SPECIAL */ 1449#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
1434 1450
1435#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 1451#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1436static int __gup_device_huge(unsigned long pfn, unsigned long addr, 1452static int __gup_device_huge(unsigned long pfn, unsigned long addr,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ac5591d8622c..ba8fdc0b6e7f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -483,11 +483,8 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
483 483
484static inline struct list_head *page_deferred_list(struct page *page) 484static inline struct list_head *page_deferred_list(struct page *page)
485{ 485{
486 /* 486 /* ->lru in the tail pages is occupied by compound_head. */
487 * ->lru in the tail pages is occupied by compound_head. 487 return &page[2].deferred_list;
488 * Let's use ->mapping + ->index in the second tail page as list_head.
489 */
490 return (struct list_head *)&page[2].mapping;
491} 488}
492 489
493void prep_transhuge_page(struct page *page) 490void prep_transhuge_page(struct page *page)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 129088710510..696befffe6f7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3159,7 +3159,7 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
3159 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 3159 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
3160 * this far. 3160 * this far.
3161 */ 3161 */
3162static int hugetlb_vm_op_fault(struct vm_fault *vmf) 3162static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
3163{ 3163{
3164 BUG(); 3164 BUG();
3165 return 0; 3165 return 0;
@@ -3686,6 +3686,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
3686 struct page *page; 3686 struct page *page;
3687 pte_t new_pte; 3687 pte_t new_pte;
3688 spinlock_t *ptl; 3688 spinlock_t *ptl;
3689 unsigned long haddr = address & huge_page_mask(h);
3689 3690
3690 /* 3691 /*
3691 * Currently, we are forced to kill the process in the event the 3692 * Currently, we are forced to kill the process in the event the
@@ -3716,7 +3717,7 @@ retry:
3716 u32 hash; 3717 u32 hash;
3717 struct vm_fault vmf = { 3718 struct vm_fault vmf = {
3718 .vma = vma, 3719 .vma = vma,
3719 .address = address, 3720 .address = haddr,
3720 .flags = flags, 3721 .flags = flags,
3721 /* 3722 /*
3722 * Hard to debug if it ends up being 3723 * Hard to debug if it ends up being
@@ -3733,14 +3734,14 @@ retry:
3733 * fault to make calling code simpler. 3734 * fault to make calling code simpler.
3734 */ 3735 */
3735 hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, 3736 hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
3736 idx, address); 3737 idx, haddr);
3737 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 3738 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
3738 ret = handle_userfault(&vmf, VM_UFFD_MISSING); 3739 ret = handle_userfault(&vmf, VM_UFFD_MISSING);
3739 mutex_lock(&hugetlb_fault_mutex_table[hash]); 3740 mutex_lock(&hugetlb_fault_mutex_table[hash]);
3740 goto out; 3741 goto out;
3741 } 3742 }
3742 3743
3743 page = alloc_huge_page(vma, address, 0); 3744 page = alloc_huge_page(vma, haddr, 0);
3744 if (IS_ERR(page)) { 3745 if (IS_ERR(page)) {
3745 ret = PTR_ERR(page); 3746 ret = PTR_ERR(page);
3746 if (ret == -ENOMEM) 3747 if (ret == -ENOMEM)
@@ -3789,12 +3790,12 @@ retry:
3789 * the spinlock. 3790 * the spinlock.
3790 */ 3791 */
3791 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { 3792 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
3792 if (vma_needs_reservation(h, vma, address) < 0) { 3793 if (vma_needs_reservation(h, vma, haddr) < 0) {
3793 ret = VM_FAULT_OOM; 3794 ret = VM_FAULT_OOM;
3794 goto backout_unlocked; 3795 goto backout_unlocked;
3795 } 3796 }
3796 /* Just decrements count, does not deallocate */ 3797 /* Just decrements count, does not deallocate */
3797 vma_end_reservation(h, vma, address); 3798 vma_end_reservation(h, vma, haddr);
3798 } 3799 }
3799 3800
3800 ptl = huge_pte_lock(h, mm, ptep); 3801 ptl = huge_pte_lock(h, mm, ptep);
@@ -3808,17 +3809,17 @@ retry:
3808 3809
3809 if (anon_rmap) { 3810 if (anon_rmap) {
3810 ClearPagePrivate(page); 3811 ClearPagePrivate(page);
3811 hugepage_add_new_anon_rmap(page, vma, address); 3812 hugepage_add_new_anon_rmap(page, vma, haddr);
3812 } else 3813 } else
3813 page_dup_rmap(page, true); 3814 page_dup_rmap(page, true);
3814 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) 3815 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
3815 && (vma->vm_flags & VM_SHARED))); 3816 && (vma->vm_flags & VM_SHARED)));
3816 set_huge_pte_at(mm, address, ptep, new_pte); 3817 set_huge_pte_at(mm, haddr, ptep, new_pte);
3817 3818
3818 hugetlb_count_add(pages_per_huge_page(h), mm); 3819 hugetlb_count_add(pages_per_huge_page(h), mm);
3819 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { 3820 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
3820 /* Optimization, do the COW without a second fault */ 3821 /* Optimization, do the COW without a second fault */
3821 ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); 3822 ret = hugetlb_cow(mm, vma, haddr, ptep, page, ptl);
3822 } 3823 }
3823 3824
3824 spin_unlock(ptl); 3825 spin_unlock(ptl);
@@ -3830,7 +3831,7 @@ backout:
3830 spin_unlock(ptl); 3831 spin_unlock(ptl);
3831backout_unlocked: 3832backout_unlocked:
3832 unlock_page(page); 3833 unlock_page(page);
3833 restore_reserve_on_error(h, vma, address, page); 3834 restore_reserve_on_error(h, vma, haddr, page);
3834 put_page(page); 3835 put_page(page);
3835 goto out; 3836 goto out;
3836} 3837}
@@ -3883,10 +3884,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3883 struct hstate *h = hstate_vma(vma); 3884 struct hstate *h = hstate_vma(vma);
3884 struct address_space *mapping; 3885 struct address_space *mapping;
3885 int need_wait_lock = 0; 3886 int need_wait_lock = 0;
3887 unsigned long haddr = address & huge_page_mask(h);
3886 3888
3887 address &= huge_page_mask(h); 3889 ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
3888
3889 ptep = huge_pte_offset(mm, address, huge_page_size(h));
3890 if (ptep) { 3890 if (ptep) {
3891 entry = huge_ptep_get(ptep); 3891 entry = huge_ptep_get(ptep);
3892 if (unlikely(is_hugetlb_entry_migration(entry))) { 3892 if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -3896,20 +3896,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3896 return VM_FAULT_HWPOISON_LARGE | 3896 return VM_FAULT_HWPOISON_LARGE |
3897 VM_FAULT_SET_HINDEX(hstate_index(h)); 3897 VM_FAULT_SET_HINDEX(hstate_index(h));
3898 } else { 3898 } else {
3899 ptep = huge_pte_alloc(mm, address, huge_page_size(h)); 3899 ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
3900 if (!ptep) 3900 if (!ptep)
3901 return VM_FAULT_OOM; 3901 return VM_FAULT_OOM;
3902 } 3902 }
3903 3903
3904 mapping = vma->vm_file->f_mapping; 3904 mapping = vma->vm_file->f_mapping;
3905 idx = vma_hugecache_offset(h, vma, address); 3905 idx = vma_hugecache_offset(h, vma, haddr);
3906 3906
3907 /* 3907 /*
3908 * Serialize hugepage allocation and instantiation, so that we don't 3908 * Serialize hugepage allocation and instantiation, so that we don't
3909 * get spurious allocation failures if two CPUs race to instantiate 3909 * get spurious allocation failures if two CPUs race to instantiate
3910 * the same page in the page cache. 3910 * the same page in the page cache.
3911 */ 3911 */
3912 hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address); 3912 hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr);
3913 mutex_lock(&hugetlb_fault_mutex_table[hash]); 3913 mutex_lock(&hugetlb_fault_mutex_table[hash]);
3914 3914
3915 entry = huge_ptep_get(ptep); 3915 entry = huge_ptep_get(ptep);
@@ -3939,16 +3939,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3939 * consumed. 3939 * consumed.
3940 */ 3940 */
3941 if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { 3941 if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
3942 if (vma_needs_reservation(h, vma, address) < 0) { 3942 if (vma_needs_reservation(h, vma, haddr) < 0) {
3943 ret = VM_FAULT_OOM; 3943 ret = VM_FAULT_OOM;
3944 goto out_mutex; 3944 goto out_mutex;
3945 } 3945 }
3946 /* Just decrements count, does not deallocate */ 3946 /* Just decrements count, does not deallocate */
3947 vma_end_reservation(h, vma, address); 3947 vma_end_reservation(h, vma, haddr);
3948 3948
3949 if (!(vma->vm_flags & VM_MAYSHARE)) 3949 if (!(vma->vm_flags & VM_MAYSHARE))
3950 pagecache_page = hugetlbfs_pagecache_page(h, 3950 pagecache_page = hugetlbfs_pagecache_page(h,
3951 vma, address); 3951 vma, haddr);
3952 } 3952 }
3953 3953
3954 ptl = huge_pte_lock(h, mm, ptep); 3954 ptl = huge_pte_lock(h, mm, ptep);
@@ -3973,16 +3973,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3973 3973
3974 if (flags & FAULT_FLAG_WRITE) { 3974 if (flags & FAULT_FLAG_WRITE) {
3975 if (!huge_pte_write(entry)) { 3975 if (!huge_pte_write(entry)) {
3976 ret = hugetlb_cow(mm, vma, address, ptep, 3976 ret = hugetlb_cow(mm, vma, haddr, ptep,
3977 pagecache_page, ptl); 3977 pagecache_page, ptl);
3978 goto out_put_page; 3978 goto out_put_page;
3979 } 3979 }
3980 entry = huge_pte_mkdirty(entry); 3980 entry = huge_pte_mkdirty(entry);
3981 } 3981 }
3982 entry = pte_mkyoung(entry); 3982 entry = pte_mkyoung(entry);
3983 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 3983 if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
3984 flags & FAULT_FLAG_WRITE)) 3984 flags & FAULT_FLAG_WRITE))
3985 update_mmu_cache(vma, address, ptep); 3985 update_mmu_cache(vma, haddr, ptep);
3986out_put_page: 3986out_put_page:
3987 if (page != pagecache_page) 3987 if (page != pagecache_page)
3988 unlock_page(page); 3988 unlock_page(page);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index eec1150125b9..68c2f2f3c05b 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -84,7 +84,7 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
84 84
85 limit = round_down(PAGE_COUNTER_MAX, 85 limit = round_down(PAGE_COUNTER_MAX,
86 1 << huge_page_order(&hstates[idx])); 86 1 << huge_page_order(&hstates[idx]));
87 ret = page_counter_limit(counter, limit); 87 ret = page_counter_set_max(counter, limit);
88 VM_BUG_ON(ret); 88 VM_BUG_ON(ret);
89 } 89 }
90} 90}
@@ -273,7 +273,7 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
273 case RES_USAGE: 273 case RES_USAGE:
274 return (u64)page_counter_read(counter) * PAGE_SIZE; 274 return (u64)page_counter_read(counter) * PAGE_SIZE;
275 case RES_LIMIT: 275 case RES_LIMIT:
276 return (u64)counter->limit * PAGE_SIZE; 276 return (u64)counter->max * PAGE_SIZE;
277 case RES_MAX_USAGE: 277 case RES_MAX_USAGE:
278 return (u64)counter->watermark * PAGE_SIZE; 278 return (u64)counter->watermark * PAGE_SIZE;
279 case RES_FAILCNT: 279 case RES_FAILCNT:
@@ -306,7 +306,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
306 switch (MEMFILE_ATTR(of_cft(of)->private)) { 306 switch (MEMFILE_ATTR(of_cft(of)->private)) {
307 case RES_LIMIT: 307 case RES_LIMIT:
308 mutex_lock(&hugetlb_limit_mutex); 308 mutex_lock(&hugetlb_limit_mutex);
309 ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages); 309 ret = page_counter_set_max(&h_cg->hugepage[idx], nr_pages);
310 mutex_unlock(&hugetlb_limit_mutex); 310 mutex_unlock(&hugetlb_limit_mutex);
311 break; 311 break;
312 default: 312 default:
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f94d5d15ebc0..f0179c9c04c2 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -22,6 +22,7 @@ struct mm_struct init_mm = {
22 .mm_count = ATOMIC_INIT(1), 22 .mm_count = ATOMIC_INIT(1),
23 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), 23 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
24 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), 24 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
25 .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
25 .mmlist = LIST_HEAD_INIT(init_mm.mmlist), 26 .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
26 .user_ns = &init_user_ns, 27 .user_ns = &init_user_ns,
27 INIT_MM_CONTEXT(init_mm) 28 INIT_MM_CONTEXT(init_mm)
diff --git a/mm/ksm.c b/mm/ksm.c
index 7d6558f3bac9..e2d2886fb1df 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -840,6 +840,17 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
840 return err; 840 return err;
841} 841}
842 842
843static inline struct stable_node *page_stable_node(struct page *page)
844{
845 return PageKsm(page) ? page_rmapping(page) : NULL;
846}
847
848static inline void set_page_stable_node(struct page *page,
849 struct stable_node *stable_node)
850{
851 page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
852}
853
843#ifdef CONFIG_SYSFS 854#ifdef CONFIG_SYSFS
844/* 855/*
845 * Only called through the sysfs control interface: 856 * Only called through the sysfs control interface:
diff --git a/mm/memblock.c b/mm/memblock.c
index 5108356ad8aa..93ad42bc8a73 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -68,7 +68,7 @@ ulong __init_memblock choose_memblock_flags(void)
68/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 68/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
69static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 69static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
70{ 70{
71 return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); 71 return *size = min(*size, PHYS_ADDR_MAX - base);
72} 72}
73 73
74/* 74/*
@@ -697,6 +697,11 @@ static int __init_memblock memblock_remove_range(struct memblock_type *type,
697 697
698int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 698int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
699{ 699{
700 phys_addr_t end = base + size - 1;
701
702 memblock_dbg("memblock_remove: [%pa-%pa] %pS\n",
703 &base, &end, (void *)_RET_IP_);
704
700 return memblock_remove_range(&memblock.memory, base, size); 705 return memblock_remove_range(&memblock.memory, base, size);
701} 706}
702 707
@@ -925,7 +930,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
925 r = &type_b->regions[idx_b]; 930 r = &type_b->regions[idx_b];
926 r_start = idx_b ? r[-1].base + r[-1].size : 0; 931 r_start = idx_b ? r[-1].base + r[-1].size : 0;
927 r_end = idx_b < type_b->cnt ? 932 r_end = idx_b < type_b->cnt ?
928 r->base : (phys_addr_t)ULLONG_MAX; 933 r->base : PHYS_ADDR_MAX;
929 934
930 /* 935 /*
931 * if idx_b advanced past idx_a, 936 * if idx_b advanced past idx_a,
@@ -1041,7 +1046,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
1041 r = &type_b->regions[idx_b]; 1046 r = &type_b->regions[idx_b];
1042 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1047 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1043 r_end = idx_b < type_b->cnt ? 1048 r_end = idx_b < type_b->cnt ?
1044 r->base : (phys_addr_t)ULLONG_MAX; 1049 r->base : PHYS_ADDR_MAX;
1045 /* 1050 /*
1046 * if idx_b advanced past idx_a, 1051 * if idx_b advanced past idx_a,
1047 * break out to advance idx_a 1052 * break out to advance idx_a
@@ -1516,13 +1521,13 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1516 1521
1517static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1522static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1518{ 1523{
1519 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; 1524 phys_addr_t max_addr = PHYS_ADDR_MAX;
1520 struct memblock_region *r; 1525 struct memblock_region *r;
1521 1526
1522 /* 1527 /*
1523 * translate the memory @limit size into the max address within one of 1528 * translate the memory @limit size into the max address within one of
1524 * the memory memblock regions, if the @limit exceeds the total size 1529 * the memory memblock regions, if the @limit exceeds the total size
1525 * of those regions, max_addr will keep original value ULLONG_MAX 1530 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
1526 */ 1531 */
1527 for_each_memblock(memory, r) { 1532 for_each_memblock(memory, r) {
1528 if (limit <= r->size) { 1533 if (limit <= r->size) {
@@ -1537,7 +1542,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1537 1542
1538void __init memblock_enforce_memory_limit(phys_addr_t limit) 1543void __init memblock_enforce_memory_limit(phys_addr_t limit)
1539{ 1544{
1540 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; 1545 phys_addr_t max_addr = PHYS_ADDR_MAX;
1541 1546
1542 if (!limit) 1547 if (!limit)
1543 return; 1548 return;
@@ -1545,14 +1550,14 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
1545 max_addr = __find_max_addr(limit); 1550 max_addr = __find_max_addr(limit);
1546 1551
1547 /* @limit exceeds the total size of the memory, do nothing */ 1552 /* @limit exceeds the total size of the memory, do nothing */
1548 if (max_addr == (phys_addr_t)ULLONG_MAX) 1553 if (max_addr == PHYS_ADDR_MAX)
1549 return; 1554 return;
1550 1555
1551 /* truncate both memory and reserved regions */ 1556 /* truncate both memory and reserved regions */
1552 memblock_remove_range(&memblock.memory, max_addr, 1557 memblock_remove_range(&memblock.memory, max_addr,
1553 (phys_addr_t)ULLONG_MAX); 1558 PHYS_ADDR_MAX);
1554 memblock_remove_range(&memblock.reserved, max_addr, 1559 memblock_remove_range(&memblock.reserved, max_addr,
1555 (phys_addr_t)ULLONG_MAX); 1560 PHYS_ADDR_MAX);
1556} 1561}
1557 1562
1558void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1563void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
@@ -1580,7 +1585,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
1580 /* truncate the reserved regions */ 1585 /* truncate the reserved regions */
1581 memblock_remove_range(&memblock.reserved, 0, base); 1586 memblock_remove_range(&memblock.reserved, 0, base);
1582 memblock_remove_range(&memblock.reserved, 1587 memblock_remove_range(&memblock.reserved,
1583 base + size, (phys_addr_t)ULLONG_MAX); 1588 base + size, PHYS_ADDR_MAX);
1584} 1589}
1585 1590
1586void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1591void __init memblock_mem_limit_remove_map(phys_addr_t limit)
@@ -1593,7 +1598,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit)
1593 max_addr = __find_max_addr(limit); 1598 max_addr = __find_max_addr(limit);
1594 1599
1595 /* @limit exceeds the total size of the memory, do nothing */ 1600 /* @limit exceeds the total size of the memory, do nothing */
1596 if (max_addr == (phys_addr_t)ULLONG_MAX) 1601 if (max_addr == PHYS_ADDR_MAX)
1597 return; 1602 return;
1598 1603
1599 memblock_cap_memory_range(0, max_addr); 1604 memblock_cap_memory_range(0, max_addr);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1695f38630f1..c1e64d60ed02 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1034,13 +1034,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1034 unsigned long limit; 1034 unsigned long limit;
1035 1035
1036 count = page_counter_read(&memcg->memory); 1036 count = page_counter_read(&memcg->memory);
1037 limit = READ_ONCE(memcg->memory.limit); 1037 limit = READ_ONCE(memcg->memory.max);
1038 if (count < limit) 1038 if (count < limit)
1039 margin = limit - count; 1039 margin = limit - count;
1040 1040
1041 if (do_memsw_account()) { 1041 if (do_memsw_account()) {
1042 count = page_counter_read(&memcg->memsw); 1042 count = page_counter_read(&memcg->memsw);
1043 limit = READ_ONCE(memcg->memsw.limit); 1043 limit = READ_ONCE(memcg->memsw.max);
1044 if (count <= limit) 1044 if (count <= limit)
1045 margin = min(margin, limit - count); 1045 margin = min(margin, limit - count);
1046 else 1046 else
@@ -1148,13 +1148,13 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
1148 1148
1149 pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", 1149 pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
1150 K((u64)page_counter_read(&memcg->memory)), 1150 K((u64)page_counter_read(&memcg->memory)),
1151 K((u64)memcg->memory.limit), memcg->memory.failcnt); 1151 K((u64)memcg->memory.max), memcg->memory.failcnt);
1152 pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n", 1152 pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
1153 K((u64)page_counter_read(&memcg->memsw)), 1153 K((u64)page_counter_read(&memcg->memsw)),
1154 K((u64)memcg->memsw.limit), memcg->memsw.failcnt); 1154 K((u64)memcg->memsw.max), memcg->memsw.failcnt);
1155 pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n", 1155 pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
1156 K((u64)page_counter_read(&memcg->kmem)), 1156 K((u64)page_counter_read(&memcg->kmem)),
1157 K((u64)memcg->kmem.limit), memcg->kmem.failcnt); 1157 K((u64)memcg->kmem.max), memcg->kmem.failcnt);
1158 1158
1159 for_each_mem_cgroup_tree(iter, memcg) { 1159 for_each_mem_cgroup_tree(iter, memcg) {
1160 pr_info("Memory cgroup stats for "); 1160 pr_info("Memory cgroup stats for ");
@@ -1179,21 +1179,21 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
1179/* 1179/*
1180 * Return the memory (and swap, if configured) limit for a memcg. 1180 * Return the memory (and swap, if configured) limit for a memcg.
1181 */ 1181 */
1182unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) 1182unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
1183{ 1183{
1184 unsigned long limit; 1184 unsigned long max;
1185 1185
1186 limit = memcg->memory.limit; 1186 max = memcg->memory.max;
1187 if (mem_cgroup_swappiness(memcg)) { 1187 if (mem_cgroup_swappiness(memcg)) {
1188 unsigned long memsw_limit; 1188 unsigned long memsw_max;
1189 unsigned long swap_limit; 1189 unsigned long swap_max;
1190 1190
1191 memsw_limit = memcg->memsw.limit; 1191 memsw_max = memcg->memsw.max;
1192 swap_limit = memcg->swap.limit; 1192 swap_max = memcg->swap.max;
1193 swap_limit = min(swap_limit, (unsigned long)total_swap_pages); 1193 swap_max = min(swap_max, (unsigned long)total_swap_pages);
1194 limit = min(limit + swap_limit, memsw_limit); 1194 max = min(max + swap_max, memsw_max);
1195 } 1195 }
1196 return limit; 1196 return max;
1197} 1197}
1198 1198
1199static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, 1199static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -2444,12 +2444,13 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
2444} 2444}
2445#endif 2445#endif
2446 2446
2447static DEFINE_MUTEX(memcg_limit_mutex); 2447static DEFINE_MUTEX(memcg_max_mutex);
2448 2448
2449static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, 2449static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
2450 unsigned long limit, bool memsw) 2450 unsigned long max, bool memsw)
2451{ 2451{
2452 bool enlarge = false; 2452 bool enlarge = false;
2453 bool drained = false;
2453 int ret; 2454 int ret;
2454 bool limits_invariant; 2455 bool limits_invariant;
2455 struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory; 2456 struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory;
@@ -2460,26 +2461,32 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
2460 break; 2461 break;
2461 } 2462 }
2462 2463
2463 mutex_lock(&memcg_limit_mutex); 2464 mutex_lock(&memcg_max_mutex);
2464 /* 2465 /*
2465 * Make sure that the new limit (memsw or memory limit) doesn't 2466 * Make sure that the new limit (memsw or memory limit) doesn't
2466 * break our basic invariant rule memory.limit <= memsw.limit. 2467 * break our basic invariant rule memory.max <= memsw.max.
2467 */ 2468 */
2468 limits_invariant = memsw ? limit >= memcg->memory.limit : 2469 limits_invariant = memsw ? max >= memcg->memory.max :
2469 limit <= memcg->memsw.limit; 2470 max <= memcg->memsw.max;
2470 if (!limits_invariant) { 2471 if (!limits_invariant) {
2471 mutex_unlock(&memcg_limit_mutex); 2472 mutex_unlock(&memcg_max_mutex);
2472 ret = -EINVAL; 2473 ret = -EINVAL;
2473 break; 2474 break;
2474 } 2475 }
2475 if (limit > counter->limit) 2476 if (max > counter->max)
2476 enlarge = true; 2477 enlarge = true;
2477 ret = page_counter_limit(counter, limit); 2478 ret = page_counter_set_max(counter, max);
2478 mutex_unlock(&memcg_limit_mutex); 2479 mutex_unlock(&memcg_max_mutex);
2479 2480
2480 if (!ret) 2481 if (!ret)
2481 break; 2482 break;
2482 2483
2484 if (!drained) {
2485 drain_all_stock(memcg);
2486 drained = true;
2487 continue;
2488 }
2489
2483 if (!try_to_free_mem_cgroup_pages(memcg, 1, 2490 if (!try_to_free_mem_cgroup_pages(memcg, 1,
2484 GFP_KERNEL, !memsw)) { 2491 GFP_KERNEL, !memsw)) {
2485 ret = -EBUSY; 2492 ret = -EBUSY;
@@ -2603,6 +2610,9 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
2603 2610
2604 /* we call try-to-free pages for make this cgroup empty */ 2611 /* we call try-to-free pages for make this cgroup empty */
2605 lru_add_drain_all(); 2612 lru_add_drain_all();
2613
2614 drain_all_stock(memcg);
2615
2606 /* try to free all pages in this cgroup */ 2616 /* try to free all pages in this cgroup */
2607 while (nr_retries && page_counter_read(&memcg->memory)) { 2617 while (nr_retries && page_counter_read(&memcg->memory)) {
2608 int progress; 2618 int progress;
@@ -2757,7 +2767,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
2757 return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE; 2767 return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE;
2758 return (u64)page_counter_read(counter) * PAGE_SIZE; 2768 return (u64)page_counter_read(counter) * PAGE_SIZE;
2759 case RES_LIMIT: 2769 case RES_LIMIT:
2760 return (u64)counter->limit * PAGE_SIZE; 2770 return (u64)counter->max * PAGE_SIZE;
2761 case RES_MAX_USAGE: 2771 case RES_MAX_USAGE:
2762 return (u64)counter->watermark * PAGE_SIZE; 2772 return (u64)counter->watermark * PAGE_SIZE;
2763 case RES_FAILCNT: 2773 case RES_FAILCNT:
@@ -2871,24 +2881,24 @@ static void memcg_free_kmem(struct mem_cgroup *memcg)
2871} 2881}
2872#endif /* !CONFIG_SLOB */ 2882#endif /* !CONFIG_SLOB */
2873 2883
2874static int memcg_update_kmem_limit(struct mem_cgroup *memcg, 2884static int memcg_update_kmem_max(struct mem_cgroup *memcg,
2875 unsigned long limit) 2885 unsigned long max)
2876{ 2886{
2877 int ret; 2887 int ret;
2878 2888
2879 mutex_lock(&memcg_limit_mutex); 2889 mutex_lock(&memcg_max_mutex);
2880 ret = page_counter_limit(&memcg->kmem, limit); 2890 ret = page_counter_set_max(&memcg->kmem, max);
2881 mutex_unlock(&memcg_limit_mutex); 2891 mutex_unlock(&memcg_max_mutex);
2882 return ret; 2892 return ret;
2883} 2893}
2884 2894
2885static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit) 2895static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
2886{ 2896{
2887 int ret; 2897 int ret;
2888 2898
2889 mutex_lock(&memcg_limit_mutex); 2899 mutex_lock(&memcg_max_mutex);
2890 2900
2891 ret = page_counter_limit(&memcg->tcpmem, limit); 2901 ret = page_counter_set_max(&memcg->tcpmem, max);
2892 if (ret) 2902 if (ret)
2893 goto out; 2903 goto out;
2894 2904
@@ -2913,7 +2923,7 @@ static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit)
2913 memcg->tcpmem_active = true; 2923 memcg->tcpmem_active = true;
2914 } 2924 }
2915out: 2925out:
2916 mutex_unlock(&memcg_limit_mutex); 2926 mutex_unlock(&memcg_max_mutex);
2917 return ret; 2927 return ret;
2918} 2928}
2919 2929
@@ -2941,16 +2951,16 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
2941 } 2951 }
2942 switch (MEMFILE_TYPE(of_cft(of)->private)) { 2952 switch (MEMFILE_TYPE(of_cft(of)->private)) {
2943 case _MEM: 2953 case _MEM:
2944 ret = mem_cgroup_resize_limit(memcg, nr_pages, false); 2954 ret = mem_cgroup_resize_max(memcg, nr_pages, false);
2945 break; 2955 break;
2946 case _MEMSWAP: 2956 case _MEMSWAP:
2947 ret = mem_cgroup_resize_limit(memcg, nr_pages, true); 2957 ret = mem_cgroup_resize_max(memcg, nr_pages, true);
2948 break; 2958 break;
2949 case _KMEM: 2959 case _KMEM:
2950 ret = memcg_update_kmem_limit(memcg, nr_pages); 2960 ret = memcg_update_kmem_max(memcg, nr_pages);
2951 break; 2961 break;
2952 case _TCP: 2962 case _TCP:
2953 ret = memcg_update_tcp_limit(memcg, nr_pages); 2963 ret = memcg_update_tcp_max(memcg, nr_pages);
2954 break; 2964 break;
2955 } 2965 }
2956 break; 2966 break;
@@ -3083,7 +3093,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
3083#endif /* CONFIG_NUMA */ 3093#endif /* CONFIG_NUMA */
3084 3094
3085/* Universal VM events cgroup1 shows, original sort order */ 3095/* Universal VM events cgroup1 shows, original sort order */
3086unsigned int memcg1_events[] = { 3096static const unsigned int memcg1_events[] = {
3087 PGPGIN, 3097 PGPGIN,
3088 PGPGOUT, 3098 PGPGOUT,
3089 PGFAULT, 3099 PGFAULT,
@@ -3126,8 +3136,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3126 /* Hierarchical information */ 3136 /* Hierarchical information */
3127 memory = memsw = PAGE_COUNTER_MAX; 3137 memory = memsw = PAGE_COUNTER_MAX;
3128 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) { 3138 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) {
3129 memory = min(memory, mi->memory.limit); 3139 memory = min(memory, mi->memory.max);
3130 memsw = min(memsw, mi->memsw.limit); 3140 memsw = min(memsw, mi->memsw.max);
3131 } 3141 }
3132 seq_printf(m, "hierarchical_memory_limit %llu\n", 3142 seq_printf(m, "hierarchical_memory_limit %llu\n",
3133 (u64)memory * PAGE_SIZE); 3143 (u64)memory * PAGE_SIZE);
@@ -3562,11 +3572,6 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
3562 3572
3563#ifdef CONFIG_CGROUP_WRITEBACK 3573#ifdef CONFIG_CGROUP_WRITEBACK
3564 3574
3565struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg)
3566{
3567 return &memcg->cgwb_list;
3568}
3569
3570static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp) 3575static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
3571{ 3576{
3572 return wb_domain_init(&memcg->cgwb_domain, gfp); 3577 return wb_domain_init(&memcg->cgwb_domain, gfp);
@@ -3626,7 +3631,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
3626 *pheadroom = PAGE_COUNTER_MAX; 3631 *pheadroom = PAGE_COUNTER_MAX;
3627 3632
3628 while ((parent = parent_mem_cgroup(memcg))) { 3633 while ((parent = parent_mem_cgroup(memcg))) {
3629 unsigned long ceiling = min(memcg->memory.limit, memcg->high); 3634 unsigned long ceiling = min(memcg->memory.max, memcg->high);
3630 unsigned long used = page_counter_read(&memcg->memory); 3635 unsigned long used = page_counter_read(&memcg->memory);
3631 3636
3632 *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); 3637 *pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
@@ -4270,7 +4275,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
4270 } 4275 }
4271 spin_unlock(&memcg->event_list_lock); 4276 spin_unlock(&memcg->event_list_lock);
4272 4277
4273 memcg->low = 0; 4278 page_counter_set_min(&memcg->memory, 0);
4279 page_counter_set_low(&memcg->memory, 0);
4274 4280
4275 memcg_offline_kmem(memcg); 4281 memcg_offline_kmem(memcg);
4276 wb_memcg_offline(memcg); 4282 wb_memcg_offline(memcg);
@@ -4319,12 +4325,13 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
4319{ 4325{
4320 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4326 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4321 4327
4322 page_counter_limit(&memcg->memory, PAGE_COUNTER_MAX); 4328 page_counter_set_max(&memcg->memory, PAGE_COUNTER_MAX);
4323 page_counter_limit(&memcg->swap, PAGE_COUNTER_MAX); 4329 page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
4324 page_counter_limit(&memcg->memsw, PAGE_COUNTER_MAX); 4330 page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX);
4325 page_counter_limit(&memcg->kmem, PAGE_COUNTER_MAX); 4331 page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
4326 page_counter_limit(&memcg->tcpmem, PAGE_COUNTER_MAX); 4332 page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
4327 memcg->low = 0; 4333 page_counter_set_min(&memcg->memory, 0);
4334 page_counter_set_low(&memcg->memory, 0);
4328 memcg->high = PAGE_COUNTER_MAX; 4335 memcg->high = PAGE_COUNTER_MAX;
4329 memcg->soft_limit = PAGE_COUNTER_MAX; 4336 memcg->soft_limit = PAGE_COUNTER_MAX;
4330 memcg_wb_domain_size_changed(memcg); 4337 memcg_wb_domain_size_changed(memcg);
@@ -5061,10 +5068,40 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
5061 return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE; 5068 return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
5062} 5069}
5063 5070
5071static int memory_min_show(struct seq_file *m, void *v)
5072{
5073 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5074 unsigned long min = READ_ONCE(memcg->memory.min);
5075
5076 if (min == PAGE_COUNTER_MAX)
5077 seq_puts(m, "max\n");
5078 else
5079 seq_printf(m, "%llu\n", (u64)min * PAGE_SIZE);
5080
5081 return 0;
5082}
5083
5084static ssize_t memory_min_write(struct kernfs_open_file *of,
5085 char *buf, size_t nbytes, loff_t off)
5086{
5087 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5088 unsigned long min;
5089 int err;
5090
5091 buf = strstrip(buf);
5092 err = page_counter_memparse(buf, "max", &min);
5093 if (err)
5094 return err;
5095
5096 page_counter_set_min(&memcg->memory, min);
5097
5098 return nbytes;
5099}
5100
5064static int memory_low_show(struct seq_file *m, void *v) 5101static int memory_low_show(struct seq_file *m, void *v)
5065{ 5102{
5066 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5103 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5067 unsigned long low = READ_ONCE(memcg->low); 5104 unsigned long low = READ_ONCE(memcg->memory.low);
5068 5105
5069 if (low == PAGE_COUNTER_MAX) 5106 if (low == PAGE_COUNTER_MAX)
5070 seq_puts(m, "max\n"); 5107 seq_puts(m, "max\n");
@@ -5086,7 +5123,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
5086 if (err) 5123 if (err)
5087 return err; 5124 return err;
5088 5125
5089 memcg->low = low; 5126 page_counter_set_low(&memcg->memory, low);
5090 5127
5091 return nbytes; 5128 return nbytes;
5092} 5129}
@@ -5131,7 +5168,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
5131static int memory_max_show(struct seq_file *m, void *v) 5168static int memory_max_show(struct seq_file *m, void *v)
5132{ 5169{
5133 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5170 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5134 unsigned long max = READ_ONCE(memcg->memory.limit); 5171 unsigned long max = READ_ONCE(memcg->memory.max);
5135 5172
5136 if (max == PAGE_COUNTER_MAX) 5173 if (max == PAGE_COUNTER_MAX)
5137 seq_puts(m, "max\n"); 5174 seq_puts(m, "max\n");
@@ -5155,7 +5192,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
5155 if (err) 5192 if (err)
5156 return err; 5193 return err;
5157 5194
5158 xchg(&memcg->memory.limit, max); 5195 xchg(&memcg->memory.max, max);
5159 5196
5160 for (;;) { 5197 for (;;) {
5161 unsigned long nr_pages = page_counter_read(&memcg->memory); 5198 unsigned long nr_pages = page_counter_read(&memcg->memory);
@@ -5296,6 +5333,12 @@ static struct cftype memory_files[] = {
5296 .read_u64 = memory_current_read, 5333 .read_u64 = memory_current_read,
5297 }, 5334 },
5298 { 5335 {
5336 .name = "min",
5337 .flags = CFTYPE_NOT_ON_ROOT,
5338 .seq_show = memory_min_show,
5339 .write = memory_min_write,
5340 },
5341 {
5299 .name = "low", 5342 .name = "low",
5300 .flags = CFTYPE_NOT_ON_ROOT, 5343 .flags = CFTYPE_NOT_ON_ROOT,
5301 .seq_show = memory_low_show, 5344 .seq_show = memory_low_show,
@@ -5344,54 +5387,140 @@ struct cgroup_subsys memory_cgrp_subsys = {
5344}; 5387};
5345 5388
5346/** 5389/**
5347 * mem_cgroup_low - check if memory consumption is below the normal range 5390 * mem_cgroup_protected - check if memory consumption is in the normal range
5348 * @root: the top ancestor of the sub-tree being checked 5391 * @root: the top ancestor of the sub-tree being checked
5349 * @memcg: the memory cgroup to check 5392 * @memcg: the memory cgroup to check
5350 * 5393 *
5351 * Returns %true if memory consumption of @memcg, and that of all 5394 * WARNING: This function is not stateless! It can only be used as part
5352 * ancestors up to (but not including) @root, is below the normal range. 5395 * of a top-down tree iteration, not for isolated queries.
5396 *
5397 * Returns one of the following:
5398 * MEMCG_PROT_NONE: cgroup memory is not protected
5399 * MEMCG_PROT_LOW: cgroup memory is protected as long there is
5400 * an unprotected supply of reclaimable memory from other cgroups.
5401 * MEMCG_PROT_MIN: cgroup memory is protected
5353 * 5402 *
5354 * @root is exclusive; it is never low when looked at directly and isn't 5403 * @root is exclusive; it is never protected when looked at directly
5355 * checked when traversing the hierarchy.
5356 * 5404 *
5357 * Excluding @root enables using memory.low to prioritize memory usage 5405 * To provide a proper hierarchical behavior, effective memory.min/low values
5358 * between cgroups within a subtree of the hierarchy that is limited by 5406 * are used. Below is the description of how effective memory.low is calculated.
5359 * memory.high or memory.max. 5407 * Effective memory.min values is calculated in the same way.
5360 * 5408 *
5361 * For example, given cgroup A with children B and C: 5409 * Effective memory.low is always equal or less than the original memory.low.
5410 * If there is no memory.low overcommittment (which is always true for
5411 * top-level memory cgroups), these two values are equal.
5412 * Otherwise, it's a part of parent's effective memory.low,
5413 * calculated as a cgroup's memory.low usage divided by sum of sibling's
5414 * memory.low usages, where memory.low usage is the size of actually
5415 * protected memory.
5362 * 5416 *
5363 * A 5417 * low_usage
5364 * / \ 5418 * elow = min( memory.low, parent->elow * ------------------ ),
5365 * B C 5419 * siblings_low_usage
5366 * 5420 *
5367 * and 5421 * | memory.current, if memory.current < memory.low
5422 * low_usage = |
5423 | 0, otherwise.
5368 * 5424 *
5369 * 1. A/memory.current > A/memory.high
5370 * 2. A/B/memory.current < A/B/memory.low
5371 * 3. A/C/memory.current >= A/C/memory.low
5372 * 5425 *
5373 * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we 5426 * Such definition of the effective memory.low provides the expected
5374 * should reclaim from 'C' until 'A' is no longer high or until we can 5427 * hierarchical behavior: parent's memory.low value is limiting
5375 * no longer reclaim from 'C'. If 'A', i.e. @root, isn't excluded by 5428 * children, unprotected memory is reclaimed first and cgroups,
5376 * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered 5429 * which are not using their guarantee do not affect actual memory
5377 * low and we will reclaim indiscriminately from both 'B' and 'C'. 5430 * distribution.
5431 *
5432 * For example, if there are memcgs A, A/B, A/C, A/D and A/E:
5433 *
5434 * A A/memory.low = 2G, A/memory.current = 6G
5435 * //\\
5436 * BC DE B/memory.low = 3G B/memory.current = 2G
5437 * C/memory.low = 1G C/memory.current = 2G
5438 * D/memory.low = 0 D/memory.current = 2G
5439 * E/memory.low = 10G E/memory.current = 0
5440 *
5441 * and the memory pressure is applied, the following memory distribution
5442 * is expected (approximately):
5443 *
5444 * A/memory.current = 2G
5445 *
5446 * B/memory.current = 1.3G
5447 * C/memory.current = 0.6G
5448 * D/memory.current = 0
5449 * E/memory.current = 0
5450 *
5451 * These calculations require constant tracking of the actual low usages
5452 * (see propagate_protected_usage()), as well as recursive calculation of
5453 * effective memory.low values. But as we do call mem_cgroup_protected()
5454 * path for each memory cgroup top-down from the reclaim,
5455 * it's possible to optimize this part, and save calculated elow
5456 * for next usage. This part is intentionally racy, but it's ok,
5457 * as memory.low is a best-effort mechanism.
5378 */ 5458 */
5379bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) 5459enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
5460 struct mem_cgroup *memcg)
5380{ 5461{
5462 struct mem_cgroup *parent;
5463 unsigned long emin, parent_emin;
5464 unsigned long elow, parent_elow;
5465 unsigned long usage;
5466
5381 if (mem_cgroup_disabled()) 5467 if (mem_cgroup_disabled())
5382 return false; 5468 return MEMCG_PROT_NONE;
5383 5469
5384 if (!root) 5470 if (!root)
5385 root = root_mem_cgroup; 5471 root = root_mem_cgroup;
5386 if (memcg == root) 5472 if (memcg == root)
5387 return false; 5473 return MEMCG_PROT_NONE;
5474
5475 usage = page_counter_read(&memcg->memory);
5476 if (!usage)
5477 return MEMCG_PROT_NONE;
5478
5479 emin = memcg->memory.min;
5480 elow = memcg->memory.low;
5481
5482 parent = parent_mem_cgroup(memcg);
5483 if (parent == root)
5484 goto exit;
5388 5485
5389 for (; memcg != root; memcg = parent_mem_cgroup(memcg)) { 5486 parent_emin = READ_ONCE(parent->memory.emin);
5390 if (page_counter_read(&memcg->memory) >= memcg->low) 5487 emin = min(emin, parent_emin);
5391 return false; 5488 if (emin && parent_emin) {
5489 unsigned long min_usage, siblings_min_usage;
5490
5491 min_usage = min(usage, memcg->memory.min);
5492 siblings_min_usage = atomic_long_read(
5493 &parent->memory.children_min_usage);
5494
5495 if (min_usage && siblings_min_usage)
5496 emin = min(emin, parent_emin * min_usage /
5497 siblings_min_usage);
5392 } 5498 }
5393 5499
5394 return true; 5500 parent_elow = READ_ONCE(parent->memory.elow);
5501 elow = min(elow, parent_elow);
5502 if (elow && parent_elow) {
5503 unsigned long low_usage, siblings_low_usage;
5504
5505 low_usage = min(usage, memcg->memory.low);
5506 siblings_low_usage = atomic_long_read(
5507 &parent->memory.children_low_usage);
5508
5509 if (low_usage && siblings_low_usage)
5510 elow = min(elow, parent_elow * low_usage /
5511 siblings_low_usage);
5512 }
5513
5514exit:
5515 memcg->memory.emin = emin;
5516 memcg->memory.elow = elow;
5517
5518 if (usage <= emin)
5519 return MEMCG_PROT_MIN;
5520 else if (usage <= elow)
5521 return MEMCG_PROT_LOW;
5522 else
5523 return MEMCG_PROT_NONE;
5395} 5524}
5396 5525
5397/** 5526/**
@@ -6012,10 +6141,17 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
6012 if (!memcg) 6141 if (!memcg)
6013 return 0; 6142 return 0;
6014 6143
6144 if (!entry.val) {
6145 memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
6146 return 0;
6147 }
6148
6015 memcg = mem_cgroup_id_get_online(memcg); 6149 memcg = mem_cgroup_id_get_online(memcg);
6016 6150
6017 if (!mem_cgroup_is_root(memcg) && 6151 if (!mem_cgroup_is_root(memcg) &&
6018 !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { 6152 !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
6153 memcg_memory_event(memcg, MEMCG_SWAP_MAX);
6154 memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
6019 mem_cgroup_id_put(memcg); 6155 mem_cgroup_id_put(memcg);
6020 return -ENOMEM; 6156 return -ENOMEM;
6021 } 6157 }
@@ -6067,7 +6203,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
6067 return nr_swap_pages; 6203 return nr_swap_pages;
6068 for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) 6204 for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
6069 nr_swap_pages = min_t(long, nr_swap_pages, 6205 nr_swap_pages = min_t(long, nr_swap_pages,
6070 READ_ONCE(memcg->swap.limit) - 6206 READ_ONCE(memcg->swap.max) -
6071 page_counter_read(&memcg->swap)); 6207 page_counter_read(&memcg->swap));
6072 return nr_swap_pages; 6208 return nr_swap_pages;
6073} 6209}
@@ -6088,7 +6224,7 @@ bool mem_cgroup_swap_full(struct page *page)
6088 return false; 6224 return false;
6089 6225
6090 for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) 6226 for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
6091 if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.limit) 6227 if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.max)
6092 return true; 6228 return true;
6093 6229
6094 return false; 6230 return false;
@@ -6122,7 +6258,7 @@ static u64 swap_current_read(struct cgroup_subsys_state *css,
6122static int swap_max_show(struct seq_file *m, void *v) 6258static int swap_max_show(struct seq_file *m, void *v)
6123{ 6259{
6124 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 6260 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
6125 unsigned long max = READ_ONCE(memcg->swap.limit); 6261 unsigned long max = READ_ONCE(memcg->swap.max);
6126 6262
6127 if (max == PAGE_COUNTER_MAX) 6263 if (max == PAGE_COUNTER_MAX)
6128 seq_puts(m, "max\n"); 6264 seq_puts(m, "max\n");
@@ -6144,15 +6280,23 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
6144 if (err) 6280 if (err)
6145 return err; 6281 return err;
6146 6282
6147 mutex_lock(&memcg_limit_mutex); 6283 xchg(&memcg->swap.max, max);
6148 err = page_counter_limit(&memcg->swap, max);
6149 mutex_unlock(&memcg_limit_mutex);
6150 if (err)
6151 return err;
6152 6284
6153 return nbytes; 6285 return nbytes;
6154} 6286}
6155 6287
6288static int swap_events_show(struct seq_file *m, void *v)
6289{
6290 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
6291
6292 seq_printf(m, "max %lu\n",
6293 atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
6294 seq_printf(m, "fail %lu\n",
6295 atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
6296
6297 return 0;
6298}
6299
6156static struct cftype swap_files[] = { 6300static struct cftype swap_files[] = {
6157 { 6301 {
6158 .name = "swap.current", 6302 .name = "swap.current",
@@ -6165,6 +6309,12 @@ static struct cftype swap_files[] = {
6165 .seq_show = swap_max_show, 6309 .seq_show = swap_max_show,
6166 .write = swap_max_write, 6310 .write = swap_max_write,
6167 }, 6311 },
6312 {
6313 .name = "swap.events",
6314 .flags = CFTYPE_NOT_ON_ROOT,
6315 .file_offset = offsetof(struct mem_cgroup, swap_events_file),
6316 .seq_show = swap_events_show,
6317 },
6168 { } /* terminate */ 6318 { } /* terminate */
6169}; 6319};
6170 6320
diff --git a/mm/memfd.c b/mm/memfd.c
new file mode 100644
index 000000000000..27069518e3c5
--- /dev/null
+++ b/mm/memfd.c
@@ -0,0 +1,345 @@
1/*
2 * memfd_create system call and file sealing support
3 *
4 * Code was originally included in shmem.c, and broken out to facilitate
5 * use by hugetlbfs as well as tmpfs.
6 *
7 * This file is released under the GPL.
8 */
9
10#include <linux/fs.h>
11#include <linux/vfs.h>
12#include <linux/pagemap.h>
13#include <linux/file.h>
14#include <linux/mm.h>
15#include <linux/sched/signal.h>
16#include <linux/khugepaged.h>
17#include <linux/syscalls.h>
18#include <linux/hugetlb.h>
19#include <linux/shmem_fs.h>
20#include <linux/memfd.h>
21#include <uapi/linux/memfd.h>
22
23/*
24 * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
25 * so reuse a tag which we firmly believe is never set or cleared on tmpfs
26 * or hugetlbfs because they are memory only filesystems.
27 */
28#define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE
29#define LAST_SCAN 4 /* about 150ms max */
30
31static void memfd_tag_pins(struct address_space *mapping)
32{
33 struct radix_tree_iter iter;
34 void __rcu **slot;
35 pgoff_t start;
36 struct page *page;
37
38 lru_add_drain();
39 start = 0;
40 rcu_read_lock();
41
42 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
43 page = radix_tree_deref_slot(slot);
44 if (!page || radix_tree_exception(page)) {
45 if (radix_tree_deref_retry(page)) {
46 slot = radix_tree_iter_retry(&iter);
47 continue;
48 }
49 } else if (page_count(page) - page_mapcount(page) > 1) {
50 xa_lock_irq(&mapping->i_pages);
51 radix_tree_tag_set(&mapping->i_pages, iter.index,
52 MEMFD_TAG_PINNED);
53 xa_unlock_irq(&mapping->i_pages);
54 }
55
56 if (need_resched()) {
57 slot = radix_tree_iter_resume(slot, &iter);
58 cond_resched_rcu();
59 }
60 }
61 rcu_read_unlock();
62}
63
64/*
65 * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
66 * via get_user_pages(), drivers might have some pending I/O without any active
67 * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
68 * and see whether it has an elevated ref-count. If so, we tag them and wait for
69 * them to be dropped.
70 * The caller must guarantee that no new user will acquire writable references
71 * to those pages to avoid races.
72 */
73static int memfd_wait_for_pins(struct address_space *mapping)
74{
75 struct radix_tree_iter iter;
76 void __rcu **slot;
77 pgoff_t start;
78 struct page *page;
79 int error, scan;
80
81 memfd_tag_pins(mapping);
82
83 error = 0;
84 for (scan = 0; scan <= LAST_SCAN; scan++) {
85 if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED))
86 break;
87
88 if (!scan)
89 lru_add_drain_all();
90 else if (schedule_timeout_killable((HZ << scan) / 200))
91 scan = LAST_SCAN;
92
93 start = 0;
94 rcu_read_lock();
95 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
96 start, MEMFD_TAG_PINNED) {
97
98 page = radix_tree_deref_slot(slot);
99 if (radix_tree_exception(page)) {
100 if (radix_tree_deref_retry(page)) {
101 slot = radix_tree_iter_retry(&iter);
102 continue;
103 }
104
105 page = NULL;
106 }
107
108 if (page &&
109 page_count(page) - page_mapcount(page) != 1) {
110 if (scan < LAST_SCAN)
111 goto continue_resched;
112
113 /*
114 * On the last scan, we clean up all those tags
115 * we inserted; but make a note that we still
116 * found pages pinned.
117 */
118 error = -EBUSY;
119 }
120
121 xa_lock_irq(&mapping->i_pages);
122 radix_tree_tag_clear(&mapping->i_pages,
123 iter.index, MEMFD_TAG_PINNED);
124 xa_unlock_irq(&mapping->i_pages);
125continue_resched:
126 if (need_resched()) {
127 slot = radix_tree_iter_resume(slot, &iter);
128 cond_resched_rcu();
129 }
130 }
131 rcu_read_unlock();
132 }
133
134 return error;
135}
136
137static unsigned int *memfd_file_seals_ptr(struct file *file)
138{
139 if (shmem_file(file))
140 return &SHMEM_I(file_inode(file))->seals;
141
142#ifdef CONFIG_HUGETLBFS
143 if (is_file_hugepages(file))
144 return &HUGETLBFS_I(file_inode(file))->seals;
145#endif
146
147 return NULL;
148}
149
150#define F_ALL_SEALS (F_SEAL_SEAL | \
151 F_SEAL_SHRINK | \
152 F_SEAL_GROW | \
153 F_SEAL_WRITE)
154
155static int memfd_add_seals(struct file *file, unsigned int seals)
156{
157 struct inode *inode = file_inode(file);
158 unsigned int *file_seals;
159 int error;
160
161 /*
162 * SEALING
163 * Sealing allows multiple parties to share a tmpfs or hugetlbfs file
164 * but restrict access to a specific subset of file operations. Seals
165 * can only be added, but never removed. This way, mutually untrusted
166 * parties can share common memory regions with a well-defined policy.
167 * A malicious peer can thus never perform unwanted operations on a
168 * shared object.
169 *
170 * Seals are only supported on special tmpfs or hugetlbfs files and
171 * always affect the whole underlying inode. Once a seal is set, it
172 * may prevent some kinds of access to the file. Currently, the
173 * following seals are defined:
174 * SEAL_SEAL: Prevent further seals from being set on this file
175 * SEAL_SHRINK: Prevent the file from shrinking
176 * SEAL_GROW: Prevent the file from growing
177 * SEAL_WRITE: Prevent write access to the file
178 *
179 * As we don't require any trust relationship between two parties, we
180 * must prevent seals from being removed. Therefore, sealing a file
181 * only adds a given set of seals to the file, it never touches
182 * existing seals. Furthermore, the "setting seals"-operation can be
183 * sealed itself, which basically prevents any further seal from being
184 * added.
185 *
186 * Semantics of sealing are only defined on volatile files. Only
187 * anonymous tmpfs and hugetlbfs files support sealing. More
188 * importantly, seals are never written to disk. Therefore, there's
189 * no plan to support it on other file types.
190 */
191
192 if (!(file->f_mode & FMODE_WRITE))
193 return -EPERM;
194 if (seals & ~(unsigned int)F_ALL_SEALS)
195 return -EINVAL;
196
197 inode_lock(inode);
198
199 file_seals = memfd_file_seals_ptr(file);
200 if (!file_seals) {
201 error = -EINVAL;
202 goto unlock;
203 }
204
205 if (*file_seals & F_SEAL_SEAL) {
206 error = -EPERM;
207 goto unlock;
208 }
209
210 if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
211 error = mapping_deny_writable(file->f_mapping);
212 if (error)
213 goto unlock;
214
215 error = memfd_wait_for_pins(file->f_mapping);
216 if (error) {
217 mapping_allow_writable(file->f_mapping);
218 goto unlock;
219 }
220 }
221
222 *file_seals |= seals;
223 error = 0;
224
225unlock:
226 inode_unlock(inode);
227 return error;
228}
229
230static int memfd_get_seals(struct file *file)
231{
232 unsigned int *seals = memfd_file_seals_ptr(file);
233
234 return seals ? *seals : -EINVAL;
235}
236
237long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
238{
239 long error;
240
241 switch (cmd) {
242 case F_ADD_SEALS:
243 /* disallow upper 32bit */
244 if (arg > UINT_MAX)
245 return -EINVAL;
246
247 error = memfd_add_seals(file, arg);
248 break;
249 case F_GET_SEALS:
250 error = memfd_get_seals(file);
251 break;
252 default:
253 error = -EINVAL;
254 break;
255 }
256
257 return error;
258}
259
260#define MFD_NAME_PREFIX "memfd:"
261#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
262#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
263
264#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
265
266SYSCALL_DEFINE2(memfd_create,
267 const char __user *, uname,
268 unsigned int, flags)
269{
270 unsigned int *file_seals;
271 struct file *file;
272 int fd, error;
273 char *name;
274 long len;
275
276 if (!(flags & MFD_HUGETLB)) {
277 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
278 return -EINVAL;
279 } else {
280 /* Allow huge page size encoding in flags. */
281 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
282 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
283 return -EINVAL;
284 }
285
286 /* length includes terminating zero */
287 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
288 if (len <= 0)
289 return -EFAULT;
290 if (len > MFD_NAME_MAX_LEN + 1)
291 return -EINVAL;
292
293 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
294 if (!name)
295 return -ENOMEM;
296
297 strcpy(name, MFD_NAME_PREFIX);
298 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
299 error = -EFAULT;
300 goto err_name;
301 }
302
303 /* terminating-zero may have changed after strnlen_user() returned */
304 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
305 error = -EFAULT;
306 goto err_name;
307 }
308
309 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
310 if (fd < 0) {
311 error = fd;
312 goto err_name;
313 }
314
315 if (flags & MFD_HUGETLB) {
316 struct user_struct *user = NULL;
317
318 file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
319 HUGETLB_ANONHUGE_INODE,
320 (flags >> MFD_HUGE_SHIFT) &
321 MFD_HUGE_MASK);
322 } else
323 file = shmem_file_setup(name, 0, VM_NORESERVE);
324 if (IS_ERR(file)) {
325 error = PTR_ERR(file);
326 goto err_fd;
327 }
328 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
329 file->f_flags |= O_RDWR | O_LARGEFILE;
330
331 if (flags & MFD_ALLOW_SEALING) {
332 file_seals = memfd_file_seals_ptr(file);
333 *file_seals &= ~F_SEAL_SEAL;
334 }
335
336 fd_install(fd, file);
337 kfree(name);
338 return fd;
339
340err_fd:
341 put_unused_fd(fd);
342err_name:
343 kfree(name);
344 return error;
345}
diff --git a/mm/memory.c b/mm/memory.c
index 5d8c2afb0730..7206a634270b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
817 * PFNMAP mappings in order to support COWable mappings. 817 * PFNMAP mappings in order to support COWable mappings.
818 * 818 *
819 */ 819 */
820#ifdef __HAVE_ARCH_PTE_SPECIAL
821# define HAVE_PTE_SPECIAL 1
822#else
823# define HAVE_PTE_SPECIAL 0
824#endif
825struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 820struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
826 pte_t pte, bool with_public_device) 821 pte_t pte, bool with_public_device)
827{ 822{
828 unsigned long pfn = pte_pfn(pte); 823 unsigned long pfn = pte_pfn(pte);
829 824
830 if (HAVE_PTE_SPECIAL) { 825 if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
831 if (likely(!pte_special(pte))) 826 if (likely(!pte_special(pte)))
832 goto check_pfn; 827 goto check_pfn;
833 if (vma->vm_ops && vma->vm_ops->find_special_page) 828 if (vma->vm_ops && vma->vm_ops->find_special_page)
@@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
862 return NULL; 857 return NULL;
863 } 858 }
864 859
865 /* !HAVE_PTE_SPECIAL case follows: */ 860 /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
866 861
867 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { 862 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
868 if (vma->vm_flags & VM_MIXEDMAP) { 863 if (vma->vm_flags & VM_MIXEDMAP) {
@@ -881,6 +876,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
881 876
882 if (is_zero_pfn(pfn)) 877 if (is_zero_pfn(pfn))
883 return NULL; 878 return NULL;
879
884check_pfn: 880check_pfn:
885 if (unlikely(pfn > highest_memmap_pfn)) { 881 if (unlikely(pfn > highest_memmap_pfn)) {
886 print_bad_pte(vma, addr, pte, NULL); 882 print_bad_pte(vma, addr, pte, NULL);
@@ -904,7 +900,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
904 /* 900 /*
905 * There is no pmd_special() but there may be special pmds, e.g. 901 * There is no pmd_special() but there may be special pmds, e.g.
906 * in a direct-access (dax) mapping, so let's just replicate the 902 * in a direct-access (dax) mapping, so let's just replicate the
907 * !HAVE_PTE_SPECIAL case from vm_normal_page() here. 903 * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
908 */ 904 */
909 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { 905 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
910 if (vma->vm_flags & VM_MIXEDMAP) { 906 if (vma->vm_flags & VM_MIXEDMAP) {
@@ -1932,7 +1928,8 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1932 * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP 1928 * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
1933 * without pte special, it would there be refcounted as a normal page. 1929 * without pte special, it would there be refcounted as a normal page.
1934 */ 1930 */
1935 if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { 1931 if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
1932 !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
1936 struct page *page; 1933 struct page *page;
1937 1934
1938 /* 1935 /*
@@ -1954,12 +1951,25 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1954} 1951}
1955EXPORT_SYMBOL(vm_insert_mixed); 1952EXPORT_SYMBOL(vm_insert_mixed);
1956 1953
1957int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, 1954/*
1958 pfn_t pfn) 1955 * If the insertion of PTE failed because someone else already added a
1956 * different entry in the mean time, we treat that as success as we assume
1957 * the same entry was actually inserted.
1958 */
1959
1960vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
1961 unsigned long addr, pfn_t pfn)
1959{ 1962{
1960 return __vm_insert_mixed(vma, addr, pfn, true); 1963 int err;
1964
1965 err = __vm_insert_mixed(vma, addr, pfn, true);
1966 if (err == -ENOMEM)
1967 return VM_FAULT_OOM;
1968 if (err < 0 && err != -EBUSY)
1969 return VM_FAULT_SIGBUS;
1970 return VM_FAULT_NOPAGE;
1961} 1971}
1962EXPORT_SYMBOL(vm_insert_mixed_mkwrite); 1972EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);
1963 1973
1964/* 1974/*
1965 * maps a range of physical memory into the requested pages. the old 1975 * maps a range of physical memory into the requested pages. the old
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 25982467800b..7deb49f69e27 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1237,6 +1237,29 @@ static struct page *next_active_pageblock(struct page *page)
1237 return page + pageblock_nr_pages; 1237 return page + pageblock_nr_pages;
1238} 1238}
1239 1239
1240static bool is_pageblock_removable_nolock(struct page *page)
1241{
1242 struct zone *zone;
1243 unsigned long pfn;
1244
1245 /*
1246 * We have to be careful here because we are iterating over memory
1247 * sections which are not zone aware so we might end up outside of
1248 * the zone but still within the section.
1249 * We have to take care about the node as well. If the node is offline
1250 * its NODE_DATA will be NULL - see page_zone.
1251 */
1252 if (!node_online(page_to_nid(page)))
1253 return false;
1254
1255 zone = page_zone(page);
1256 pfn = page_to_pfn(page);
1257 if (!zone_spans_pfn(zone, pfn))
1258 return false;
1259
1260 return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true);
1261}
1262
1240/* Checks if this range of memory is likely to be hot-removable. */ 1263/* Checks if this range of memory is likely to be hot-removable. */
1241bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) 1264bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
1242{ 1265{
diff --git a/mm/mmap.c b/mm/mmap.c
index d817764a9974..d1eb87ef4b1a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3277,7 +3277,7 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3277 mm->data_vm += npages; 3277 mm->data_vm += npages;
3278} 3278}
3279 3279
3280static int special_mapping_fault(struct vm_fault *vmf); 3280static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3281 3281
3282/* 3282/*
3283 * Having a close hook prevents vma merging regardless of flags. 3283 * Having a close hook prevents vma merging regardless of flags.
@@ -3316,7 +3316,7 @@ static const struct vm_operations_struct legacy_special_mapping_vmops = {
3316 .fault = special_mapping_fault, 3316 .fault = special_mapping_fault,
3317}; 3317};
3318 3318
3319static int special_mapping_fault(struct vm_fault *vmf) 3319static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3320{ 3320{
3321 struct vm_area_struct *vma = vmf->vma; 3321 struct vm_area_struct *vma = vmf->vma;
3322 pgoff_t pgoff; 3322 pgoff_t pgoff;
diff --git a/mm/nommu.c b/mm/nommu.c
index 13723736d38f..4452d8bd9ae4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1763,7 +1763,7 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1763 return -ENOMEM; 1763 return -ENOMEM;
1764} 1764}
1765 1765
1766int filemap_fault(struct vm_fault *vmf) 1766vm_fault_t filemap_fault(struct vm_fault *vmf)
1767{ 1767{
1768 BUG(); 1768 BUG();
1769 return 0; 1769 return 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8ba6cb88cf58..6694348b27e9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -256,7 +256,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
256 int nid; 256 int nid;
257 257
258 if (is_memcg_oom(oc)) { 258 if (is_memcg_oom(oc)) {
259 oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1; 259 oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
260 return CONSTRAINT_MEMCG; 260 return CONSTRAINT_MEMCG;
261 } 261 }
262 262
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 22320ea27489..07b3c23762ad 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -705,16 +705,14 @@ static inline void rmv_page_order(struct page *page)
705 705
706/* 706/*
707 * This function checks whether a page is free && is the buddy 707 * This function checks whether a page is free && is the buddy
708 * we can do coalesce a page and its buddy if 708 * we can coalesce a page and its buddy if
709 * (a) the buddy is not in a hole (check before calling!) && 709 * (a) the buddy is not in a hole (check before calling!) &&
710 * (b) the buddy is in the buddy system && 710 * (b) the buddy is in the buddy system &&
711 * (c) a page and its buddy have the same order && 711 * (c) a page and its buddy have the same order &&
712 * (d) a page and its buddy are in the same zone. 712 * (d) a page and its buddy are in the same zone.
713 * 713 *
714 * For recording whether a page is in the buddy system, we set ->_mapcount 714 * For recording whether a page is in the buddy system, we set PageBuddy.
715 * PAGE_BUDDY_MAPCOUNT_VALUE. 715 * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
716 * Setting, clearing, and testing _mapcount PAGE_BUDDY_MAPCOUNT_VALUE is
717 * serialized by zone->lock.
718 * 716 *
719 * For recording page's order, we use page_private(page). 717 * For recording page's order, we use page_private(page).
720 */ 718 */
@@ -759,9 +757,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
759 * as necessary, plus some accounting needed to play nicely with other 757 * as necessary, plus some accounting needed to play nicely with other
760 * parts of the VM system. 758 * parts of the VM system.
761 * At each level, we keep a list of pages, which are heads of continuous 759 * At each level, we keep a list of pages, which are heads of continuous
762 * free pages of length of (1 << order) and marked with _mapcount 760 * free pages of length of (1 << order) and marked with PageBuddy.
763 * PAGE_BUDDY_MAPCOUNT_VALUE. Page's order is recorded in page_private(page) 761 * Page's order is recorded in page_private(page) field.
764 * field.
765 * So when we are allocating or freeing one, we can derive the state of the 762 * So when we are allocating or freeing one, we can derive the state of the
766 * other. That is, if we allocate a small block, and both were 763 * other. That is, if we allocate a small block, and both were
767 * free, the remainder of the region must be split into blocks. 764 * free, the remainder of the region must be split into blocks.
@@ -946,7 +943,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
946 } 943 }
947 switch (page - head_page) { 944 switch (page - head_page) {
948 case 1: 945 case 1:
949 /* the first tail page: ->mapping is compound_mapcount() */ 946 /* the first tail page: ->mapping may be compound_mapcount() */
950 if (unlikely(compound_mapcount(page))) { 947 if (unlikely(compound_mapcount(page))) {
951 bad_page(page, "nonzero compound_mapcount", 0); 948 bad_page(page, "nonzero compound_mapcount", 0);
952 goto out; 949 goto out;
@@ -955,7 +952,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
955 case 2: 952 case 2:
956 /* 953 /*
957 * the second tail page: ->mapping is 954 * the second tail page: ->mapping is
958 * page_deferred_list().next -- ignore value. 955 * deferred_list.next -- ignore value.
959 */ 956 */
960 break; 957 break;
961 default: 958 default:
@@ -3701,7 +3698,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
3701#endif /* CONFIG_COMPACTION */ 3698#endif /* CONFIG_COMPACTION */
3702 3699
3703#ifdef CONFIG_LOCKDEP 3700#ifdef CONFIG_LOCKDEP
3704struct lockdep_map __fs_reclaim_map = 3701static struct lockdep_map __fs_reclaim_map =
3705 STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); 3702 STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
3706 3703
3707static bool __need_fs_reclaim(gfp_t gfp_mask) 3704static bool __need_fs_reclaim(gfp_t gfp_mask)
@@ -3726,17 +3723,27 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
3726 return true; 3723 return true;
3727} 3724}
3728 3725
3726void __fs_reclaim_acquire(void)
3727{
3728 lock_map_acquire(&__fs_reclaim_map);
3729}
3730
3731void __fs_reclaim_release(void)
3732{
3733 lock_map_release(&__fs_reclaim_map);
3734}
3735
3729void fs_reclaim_acquire(gfp_t gfp_mask) 3736void fs_reclaim_acquire(gfp_t gfp_mask)
3730{ 3737{
3731 if (__need_fs_reclaim(gfp_mask)) 3738 if (__need_fs_reclaim(gfp_mask))
3732 lock_map_acquire(&__fs_reclaim_map); 3739 __fs_reclaim_acquire();
3733} 3740}
3734EXPORT_SYMBOL_GPL(fs_reclaim_acquire); 3741EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
3735 3742
3736void fs_reclaim_release(gfp_t gfp_mask) 3743void fs_reclaim_release(gfp_t gfp_mask)
3737{ 3744{
3738 if (__need_fs_reclaim(gfp_mask)) 3745 if (__need_fs_reclaim(gfp_mask))
3739 lock_map_release(&__fs_reclaim_map); 3746 __fs_reclaim_release();
3740} 3747}
3741EXPORT_SYMBOL_GPL(fs_reclaim_release); 3748EXPORT_SYMBOL_GPL(fs_reclaim_release);
3742#endif 3749#endif
@@ -3754,8 +3761,8 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3754 3761
3755 /* We now go into synchronous reclaim */ 3762 /* We now go into synchronous reclaim */
3756 cpuset_memory_pressure_bump(); 3763 cpuset_memory_pressure_bump();
3757 noreclaim_flag = memalloc_noreclaim_save();
3758 fs_reclaim_acquire(gfp_mask); 3764 fs_reclaim_acquire(gfp_mask);
3765 noreclaim_flag = memalloc_noreclaim_save();
3759 reclaim_state.reclaimed_slab = 0; 3766 reclaim_state.reclaimed_slab = 0;
3760 current->reclaim_state = &reclaim_state; 3767 current->reclaim_state = &reclaim_state;
3761 3768
@@ -3763,8 +3770,8 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3763 ac->nodemask); 3770 ac->nodemask);
3764 3771
3765 current->reclaim_state = NULL; 3772 current->reclaim_state = NULL;
3766 fs_reclaim_release(gfp_mask);
3767 memalloc_noreclaim_restore(noreclaim_flag); 3773 memalloc_noreclaim_restore(noreclaim_flag);
3774 fs_reclaim_release(gfp_mask);
3768 3775
3769 cond_resched(); 3776 cond_resched();
3770 3777
@@ -4162,7 +4169,6 @@ retry:
4162 * orientated. 4169 * orientated.
4163 */ 4170 */
4164 if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { 4171 if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
4165 ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
4166 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, 4172 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
4167 ac->high_zoneidx, ac->nodemask); 4173 ac->high_zoneidx, ac->nodemask);
4168 } 4174 }
@@ -4326,8 +4332,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
4326} 4332}
4327 4333
4328/* Determine whether to spread dirty pages and what the first usable zone */ 4334/* Determine whether to spread dirty pages and what the first usable zone */
4329static inline void finalise_ac(gfp_t gfp_mask, 4335static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
4330 unsigned int order, struct alloc_context *ac)
4331{ 4336{
4332 /* Dirty zone balancing only done in the fast path */ 4337 /* Dirty zone balancing only done in the fast path */
4333 ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); 4338 ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
@@ -4358,7 +4363,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
4358 if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) 4363 if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
4359 return NULL; 4364 return NULL;
4360 4365
4361 finalise_ac(gfp_mask, order, &ac); 4366 finalise_ac(gfp_mask, &ac);
4362 4367
4363 /* First allocation attempt */ 4368 /* First allocation attempt */
4364 page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); 4369 page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
@@ -6229,18 +6234,18 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
6229 6234
6230 for (j = 0; j < MAX_NR_ZONES; j++) { 6235 for (j = 0; j < MAX_NR_ZONES; j++) {
6231 struct zone *zone = pgdat->node_zones + j; 6236 struct zone *zone = pgdat->node_zones + j;
6232 unsigned long size, realsize, freesize, memmap_pages; 6237 unsigned long size, freesize, memmap_pages;
6233 unsigned long zone_start_pfn = zone->zone_start_pfn; 6238 unsigned long zone_start_pfn = zone->zone_start_pfn;
6234 6239
6235 size = zone->spanned_pages; 6240 size = zone->spanned_pages;
6236 realsize = freesize = zone->present_pages; 6241 freesize = zone->present_pages;
6237 6242
6238 /* 6243 /*
6239 * Adjust freesize so that it accounts for how much memory 6244 * Adjust freesize so that it accounts for how much memory
6240 * is used by this zone for memmap. This affects the watermark 6245 * is used by this zone for memmap. This affects the watermark
6241 * and per-cpu initialisations 6246 * and per-cpu initialisations
6242 */ 6247 */
6243 memmap_pages = calc_memmap_size(size, realsize); 6248 memmap_pages = calc_memmap_size(size, freesize);
6244 if (!is_highmem_idx(j)) { 6249 if (!is_highmem_idx(j)) {
6245 if (freesize >= memmap_pages) { 6250 if (freesize >= memmap_pages) {
6246 freesize -= memmap_pages; 6251 freesize -= memmap_pages;
@@ -6272,7 +6277,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
6272 * when the bootmem allocator frees pages into the buddy system. 6277 * when the bootmem allocator frees pages into the buddy system.
6273 * And all highmem pages will be managed by the buddy system. 6278 * And all highmem pages will be managed by the buddy system.
6274 */ 6279 */
6275 zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; 6280 zone->managed_pages = freesize;
6276#ifdef CONFIG_NUMA 6281#ifdef CONFIG_NUMA
6277 zone->node = nid; 6282 zone->node = nid;
6278#endif 6283#endif
@@ -7682,29 +7687,6 @@ unmovable:
7682 return true; 7687 return true;
7683} 7688}
7684 7689
7685bool is_pageblock_removable_nolock(struct page *page)
7686{
7687 struct zone *zone;
7688 unsigned long pfn;
7689
7690 /*
7691 * We have to be careful here because we are iterating over memory
7692 * sections which are not zone aware so we might end up outside of
7693 * the zone but still within the section.
7694 * We have to take care about the node as well. If the node is offline
7695 * its NODE_DATA will be NULL - see page_zone.
7696 */
7697 if (!node_online(page_to_nid(page)))
7698 return false;
7699
7700 zone = page_zone(page);
7701 pfn = page_to_pfn(page);
7702 if (!zone_spans_pfn(zone, pfn))
7703 return false;
7704
7705 return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true);
7706}
7707
7708#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) 7690#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
7709 7691
7710static unsigned long pfn_max_align_down(unsigned long pfn) 7692static unsigned long pfn_max_align_down(unsigned long pfn)
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 2a8df3ad60a4..de31470655f6 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -13,6 +13,40 @@
13#include <linux/bug.h> 13#include <linux/bug.h>
14#include <asm/page.h> 14#include <asm/page.h>
15 15
16static void propagate_protected_usage(struct page_counter *c,
17 unsigned long usage)
18{
19 unsigned long protected, old_protected;
20 long delta;
21
22 if (!c->parent)
23 return;
24
25 if (c->min || atomic_long_read(&c->min_usage)) {
26 if (usage <= c->min)
27 protected = usage;
28 else
29 protected = 0;
30
31 old_protected = atomic_long_xchg(&c->min_usage, protected);
32 delta = protected - old_protected;
33 if (delta)
34 atomic_long_add(delta, &c->parent->children_min_usage);
35 }
36
37 if (c->low || atomic_long_read(&c->low_usage)) {
38 if (usage <= c->low)
39 protected = usage;
40 else
41 protected = 0;
42
43 old_protected = atomic_long_xchg(&c->low_usage, protected);
44 delta = protected - old_protected;
45 if (delta)
46 atomic_long_add(delta, &c->parent->children_low_usage);
47 }
48}
49
16/** 50/**
17 * page_counter_cancel - take pages out of the local counter 51 * page_counter_cancel - take pages out of the local counter
18 * @counter: counter 52 * @counter: counter
@@ -22,7 +56,8 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
22{ 56{
23 long new; 57 long new;
24 58
25 new = atomic_long_sub_return(nr_pages, &counter->count); 59 new = atomic_long_sub_return(nr_pages, &counter->usage);
60 propagate_protected_usage(counter, new);
26 /* More uncharges than charges? */ 61 /* More uncharges than charges? */
27 WARN_ON_ONCE(new < 0); 62 WARN_ON_ONCE(new < 0);
28} 63}
@@ -41,7 +76,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
41 for (c = counter; c; c = c->parent) { 76 for (c = counter; c; c = c->parent) {
42 long new; 77 long new;
43 78
44 new = atomic_long_add_return(nr_pages, &c->count); 79 new = atomic_long_add_return(nr_pages, &c->usage);
80 propagate_protected_usage(counter, new);
45 /* 81 /*
46 * This is indeed racy, but we can live with some 82 * This is indeed racy, but we can live with some
47 * inaccuracy in the watermark. 83 * inaccuracy in the watermark.
@@ -82,9 +118,10 @@ bool page_counter_try_charge(struct page_counter *counter,
82 * we either see the new limit or the setter sees the 118 * we either see the new limit or the setter sees the
83 * counter has changed and retries. 119 * counter has changed and retries.
84 */ 120 */
85 new = atomic_long_add_return(nr_pages, &c->count); 121 new = atomic_long_add_return(nr_pages, &c->usage);
86 if (new > c->limit) { 122 if (new > c->max) {
87 atomic_long_sub(nr_pages, &c->count); 123 atomic_long_sub(nr_pages, &c->usage);
124 propagate_protected_usage(counter, new);
88 /* 125 /*
89 * This is racy, but we can live with some 126 * This is racy, but we can live with some
90 * inaccuracy in the failcnt. 127 * inaccuracy in the failcnt.
@@ -93,6 +130,7 @@ bool page_counter_try_charge(struct page_counter *counter,
93 *fail = c; 130 *fail = c;
94 goto failed; 131 goto failed;
95 } 132 }
133 propagate_protected_usage(counter, new);
96 /* 134 /*
97 * Just like with failcnt, we can live with some 135 * Just like with failcnt, we can live with some
98 * inaccuracy in the watermark. 136 * inaccuracy in the watermark.
@@ -123,20 +161,20 @@ void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
123} 161}
124 162
125/** 163/**
126 * page_counter_limit - limit the number of pages allowed 164 * page_counter_set_max - set the maximum number of pages allowed
127 * @counter: counter 165 * @counter: counter
128 * @limit: limit to set 166 * @nr_pages: limit to set
129 * 167 *
130 * Returns 0 on success, -EBUSY if the current number of pages on the 168 * Returns 0 on success, -EBUSY if the current number of pages on the
131 * counter already exceeds the specified limit. 169 * counter already exceeds the specified limit.
132 * 170 *
133 * The caller must serialize invocations on the same counter. 171 * The caller must serialize invocations on the same counter.
134 */ 172 */
135int page_counter_limit(struct page_counter *counter, unsigned long limit) 173int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
136{ 174{
137 for (;;) { 175 for (;;) {
138 unsigned long old; 176 unsigned long old;
139 long count; 177 long usage;
140 178
141 /* 179 /*
142 * Update the limit while making sure that it's not 180 * Update the limit while making sure that it's not
@@ -149,22 +187,56 @@ int page_counter_limit(struct page_counter *counter, unsigned long limit)
149 * the limit, so if it sees the old limit, we see the 187 * the limit, so if it sees the old limit, we see the
150 * modified counter and retry. 188 * modified counter and retry.
151 */ 189 */
152 count = atomic_long_read(&counter->count); 190 usage = atomic_long_read(&counter->usage);
153 191
154 if (count > limit) 192 if (usage > nr_pages)
155 return -EBUSY; 193 return -EBUSY;
156 194
157 old = xchg(&counter->limit, limit); 195 old = xchg(&counter->max, nr_pages);
158 196
159 if (atomic_long_read(&counter->count) <= count) 197 if (atomic_long_read(&counter->usage) <= usage)
160 return 0; 198 return 0;
161 199
162 counter->limit = old; 200 counter->max = old;
163 cond_resched(); 201 cond_resched();
164 } 202 }
165} 203}
166 204
167/** 205/**
206 * page_counter_set_min - set the amount of protected memory
207 * @counter: counter
208 * @nr_pages: value to set
209 *
210 * The caller must serialize invocations on the same counter.
211 */
212void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
213{
214 struct page_counter *c;
215
216 counter->min = nr_pages;
217
218 for (c = counter; c; c = c->parent)
219 propagate_protected_usage(c, atomic_long_read(&c->usage));
220}
221
222/**
223 * page_counter_set_low - set the amount of protected memory
224 * @counter: counter
225 * @nr_pages: value to set
226 *
227 * The caller must serialize invocations on the same counter.
228 */
229void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
230{
231 struct page_counter *c;
232
233 counter->low = nr_pages;
234
235 for (c = counter; c; c = c->parent)
236 propagate_protected_usage(c, atomic_long_read(&c->usage));
237}
238
239/**
168 * page_counter_memparse - memparse() for page counter limits 240 * page_counter_memparse - memparse() for page counter limits
169 * @buf: string to parse 241 * @buf: string to parse
170 * @max: string meaning maximum possible value 242 * @max: string meaning maximum possible value
diff --git a/mm/shmem.c b/mm/shmem.c
index 9d6c7e595415..e9a7ac74823d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -327,7 +327,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
327 pgoff_t index, void *expected, void *replacement) 327 pgoff_t index, void *expected, void *replacement)
328{ 328{
329 struct radix_tree_node *node; 329 struct radix_tree_node *node;
330 void **pslot; 330 void __rcu **pslot;
331 void *item; 331 void *item;
332 332
333 VM_BUG_ON(!expected); 333 VM_BUG_ON(!expected);
@@ -395,7 +395,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
395#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE 395#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
396/* ifdef here to avoid bloating shmem.o when not necessary */ 396/* ifdef here to avoid bloating shmem.o when not necessary */
397 397
398int shmem_huge __read_mostly; 398static int shmem_huge __read_mostly;
399 399
400#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) 400#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
401static int shmem_parse_huge(const char *str) 401static int shmem_parse_huge(const char *str)
@@ -571,6 +571,15 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
571} 571}
572#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ 572#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
573 573
574static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
575{
576 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
577 (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
578 shmem_huge != SHMEM_HUGE_DENY)
579 return true;
580 return false;
581}
582
574/* 583/*
575 * Like add_to_page_cache_locked, but error if expected item has gone. 584 * Like add_to_page_cache_locked, but error if expected item has gone.
576 */ 585 */
@@ -682,7 +691,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
682 pgoff_t start, pgoff_t end) 691 pgoff_t start, pgoff_t end)
683{ 692{
684 struct radix_tree_iter iter; 693 struct radix_tree_iter iter;
685 void **slot; 694 void __rcu **slot;
686 struct page *page; 695 struct page *page;
687 unsigned long swapped = 0; 696 unsigned long swapped = 0;
688 697
@@ -988,6 +997,7 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
988{ 997{
989 struct inode *inode = path->dentry->d_inode; 998 struct inode *inode = path->dentry->d_inode;
990 struct shmem_inode_info *info = SHMEM_I(inode); 999 struct shmem_inode_info *info = SHMEM_I(inode);
1000 struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
991 1001
992 if (info->alloced - info->swapped != inode->i_mapping->nrpages) { 1002 if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
993 spin_lock_irq(&info->lock); 1003 spin_lock_irq(&info->lock);
@@ -995,6 +1005,10 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
995 spin_unlock_irq(&info->lock); 1005 spin_unlock_irq(&info->lock);
996 } 1006 }
997 generic_fillattr(inode, stat); 1007 generic_fillattr(inode, stat);
1008
1009 if (is_huge_enabled(sb_info))
1010 stat->blksize = HPAGE_PMD_SIZE;
1011
998 return 0; 1012 return 0;
999} 1013}
1000 1014
@@ -1098,13 +1112,19 @@ static void shmem_evict_inode(struct inode *inode)
1098static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) 1112static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
1099{ 1113{
1100 struct radix_tree_iter iter; 1114 struct radix_tree_iter iter;
1101 void **slot; 1115 void __rcu **slot;
1102 unsigned long found = -1; 1116 unsigned long found = -1;
1103 unsigned int checked = 0; 1117 unsigned int checked = 0;
1104 1118
1105 rcu_read_lock(); 1119 rcu_read_lock();
1106 radix_tree_for_each_slot(slot, root, &iter, 0) { 1120 radix_tree_for_each_slot(slot, root, &iter, 0) {
1107 if (*slot == item) { 1121 void *entry = radix_tree_deref_slot(slot);
1122
1123 if (radix_tree_deref_retry(entry)) {
1124 slot = radix_tree_iter_retry(&iter);
1125 continue;
1126 }
1127 if (entry == item) {
1108 found = iter.index; 1128 found = iter.index;
1109 break; 1129 break;
1110 } 1130 }
@@ -1322,9 +1342,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1322 if (!swap.val) 1342 if (!swap.val)
1323 goto redirty; 1343 goto redirty;
1324 1344
1325 if (mem_cgroup_try_charge_swap(page, swap))
1326 goto free_swap;
1327
1328 /* 1345 /*
1329 * Add inode to shmem_unuse()'s list of swapped-out inodes, 1346 * Add inode to shmem_unuse()'s list of swapped-out inodes,
1330 * if it's not already there. Do it now before the page is 1347 * if it's not already there. Do it now before the page is
@@ -1353,7 +1370,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1353 } 1370 }
1354 1371
1355 mutex_unlock(&shmem_swaplist_mutex); 1372 mutex_unlock(&shmem_swaplist_mutex);
1356free_swap:
1357 put_swap_page(page, swap); 1373 put_swap_page(page, swap);
1358redirty: 1374redirty:
1359 set_page_dirty(page); 1375 set_page_dirty(page);
@@ -1404,10 +1420,9 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
1404 struct shmem_inode_info *info, pgoff_t index) 1420 struct shmem_inode_info *info, pgoff_t index)
1405{ 1421{
1406 /* Create a pseudo vma that just contains the policy */ 1422 /* Create a pseudo vma that just contains the policy */
1407 vma->vm_start = 0; 1423 memset(vma, 0, sizeof(*vma));
1408 /* Bias interleave by inode number to distribute better across nodes */ 1424 /* Bias interleave by inode number to distribute better across nodes */
1409 vma->vm_pgoff = index + info->vfs_inode.i_ino; 1425 vma->vm_pgoff = index + info->vfs_inode.i_ino;
1410 vma->vm_ops = NULL;
1411 vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); 1426 vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1412} 1427}
1413 1428
@@ -1931,14 +1946,14 @@ static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, in
1931 return ret; 1946 return ret;
1932} 1947}
1933 1948
1934static int shmem_fault(struct vm_fault *vmf) 1949static vm_fault_t shmem_fault(struct vm_fault *vmf)
1935{ 1950{
1936 struct vm_area_struct *vma = vmf->vma; 1951 struct vm_area_struct *vma = vmf->vma;
1937 struct inode *inode = file_inode(vma->vm_file); 1952 struct inode *inode = file_inode(vma->vm_file);
1938 gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 1953 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
1939 enum sgp_type sgp; 1954 enum sgp_type sgp;
1940 int error; 1955 int err;
1941 int ret = VM_FAULT_LOCKED; 1956 vm_fault_t ret = VM_FAULT_LOCKED;
1942 1957
1943 /* 1958 /*
1944 * Trinity finds that probing a hole which tmpfs is punching can 1959 * Trinity finds that probing a hole which tmpfs is punching can
@@ -2006,10 +2021,10 @@ static int shmem_fault(struct vm_fault *vmf)
2006 else if (vma->vm_flags & VM_HUGEPAGE) 2021 else if (vma->vm_flags & VM_HUGEPAGE)
2007 sgp = SGP_HUGE; 2022 sgp = SGP_HUGE;
2008 2023
2009 error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, 2024 err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
2010 gfp, vma, vmf, &ret); 2025 gfp, vma, vmf, &ret);
2011 if (error) 2026 if (err)
2012 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 2027 return vmf_error(err);
2013 return ret; 2028 return ret;
2014} 2029}
2015 2030
@@ -2616,241 +2631,6 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
2616 return offset; 2631 return offset;
2617} 2632}
2618 2633
2619/*
2620 * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
2621 * so reuse a tag which we firmly believe is never set or cleared on shmem.
2622 */
2623#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
2624#define LAST_SCAN 4 /* about 150ms max */
2625
2626static void shmem_tag_pins(struct address_space *mapping)
2627{
2628 struct radix_tree_iter iter;
2629 void **slot;
2630 pgoff_t start;
2631 struct page *page;
2632
2633 lru_add_drain();
2634 start = 0;
2635 rcu_read_lock();
2636
2637 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
2638 page = radix_tree_deref_slot(slot);
2639 if (!page || radix_tree_exception(page)) {
2640 if (radix_tree_deref_retry(page)) {
2641 slot = radix_tree_iter_retry(&iter);
2642 continue;
2643 }
2644 } else if (page_count(page) - page_mapcount(page) > 1) {
2645 xa_lock_irq(&mapping->i_pages);
2646 radix_tree_tag_set(&mapping->i_pages, iter.index,
2647 SHMEM_TAG_PINNED);
2648 xa_unlock_irq(&mapping->i_pages);
2649 }
2650
2651 if (need_resched()) {
2652 slot = radix_tree_iter_resume(slot, &iter);
2653 cond_resched_rcu();
2654 }
2655 }
2656 rcu_read_unlock();
2657}
2658
2659/*
2660 * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
2661 * via get_user_pages(), drivers might have some pending I/O without any active
2662 * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
2663 * and see whether it has an elevated ref-count. If so, we tag them and wait for
2664 * them to be dropped.
2665 * The caller must guarantee that no new user will acquire writable references
2666 * to those pages to avoid races.
2667 */
2668static int shmem_wait_for_pins(struct address_space *mapping)
2669{
2670 struct radix_tree_iter iter;
2671 void **slot;
2672 pgoff_t start;
2673 struct page *page;
2674 int error, scan;
2675
2676 shmem_tag_pins(mapping);
2677
2678 error = 0;
2679 for (scan = 0; scan <= LAST_SCAN; scan++) {
2680 if (!radix_tree_tagged(&mapping->i_pages, SHMEM_TAG_PINNED))
2681 break;
2682
2683 if (!scan)
2684 lru_add_drain_all();
2685 else if (schedule_timeout_killable((HZ << scan) / 200))
2686 scan = LAST_SCAN;
2687
2688 start = 0;
2689 rcu_read_lock();
2690 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
2691 start, SHMEM_TAG_PINNED) {
2692
2693 page = radix_tree_deref_slot(slot);
2694 if (radix_tree_exception(page)) {
2695 if (radix_tree_deref_retry(page)) {
2696 slot = radix_tree_iter_retry(&iter);
2697 continue;
2698 }
2699
2700 page = NULL;
2701 }
2702
2703 if (page &&
2704 page_count(page) - page_mapcount(page) != 1) {
2705 if (scan < LAST_SCAN)
2706 goto continue_resched;
2707
2708 /*
2709 * On the last scan, we clean up all those tags
2710 * we inserted; but make a note that we still
2711 * found pages pinned.
2712 */
2713 error = -EBUSY;
2714 }
2715
2716 xa_lock_irq(&mapping->i_pages);
2717 radix_tree_tag_clear(&mapping->i_pages,
2718 iter.index, SHMEM_TAG_PINNED);
2719 xa_unlock_irq(&mapping->i_pages);
2720continue_resched:
2721 if (need_resched()) {
2722 slot = radix_tree_iter_resume(slot, &iter);
2723 cond_resched_rcu();
2724 }
2725 }
2726 rcu_read_unlock();
2727 }
2728
2729 return error;
2730}
2731
2732static unsigned int *memfd_file_seals_ptr(struct file *file)
2733{
2734 if (file->f_op == &shmem_file_operations)
2735 return &SHMEM_I(file_inode(file))->seals;
2736
2737#ifdef CONFIG_HUGETLBFS
2738 if (file->f_op == &hugetlbfs_file_operations)
2739 return &HUGETLBFS_I(file_inode(file))->seals;
2740#endif
2741
2742 return NULL;
2743}
2744
2745#define F_ALL_SEALS (F_SEAL_SEAL | \
2746 F_SEAL_SHRINK | \
2747 F_SEAL_GROW | \
2748 F_SEAL_WRITE)
2749
2750static int memfd_add_seals(struct file *file, unsigned int seals)
2751{
2752 struct inode *inode = file_inode(file);
2753 unsigned int *file_seals;
2754 int error;
2755
2756 /*
2757 * SEALING
2758 * Sealing allows multiple parties to share a shmem-file but restrict
2759 * access to a specific subset of file operations. Seals can only be
2760 * added, but never removed. This way, mutually untrusted parties can
2761 * share common memory regions with a well-defined policy. A malicious
2762 * peer can thus never perform unwanted operations on a shared object.
2763 *
2764 * Seals are only supported on special shmem-files and always affect
2765 * the whole underlying inode. Once a seal is set, it may prevent some
2766 * kinds of access to the file. Currently, the following seals are
2767 * defined:
2768 * SEAL_SEAL: Prevent further seals from being set on this file
2769 * SEAL_SHRINK: Prevent the file from shrinking
2770 * SEAL_GROW: Prevent the file from growing
2771 * SEAL_WRITE: Prevent write access to the file
2772 *
2773 * As we don't require any trust relationship between two parties, we
2774 * must prevent seals from being removed. Therefore, sealing a file
2775 * only adds a given set of seals to the file, it never touches
2776 * existing seals. Furthermore, the "setting seals"-operation can be
2777 * sealed itself, which basically prevents any further seal from being
2778 * added.
2779 *
2780 * Semantics of sealing are only defined on volatile files. Only
2781 * anonymous shmem files support sealing. More importantly, seals are
2782 * never written to disk. Therefore, there's no plan to support it on
2783 * other file types.
2784 */
2785
2786 if (!(file->f_mode & FMODE_WRITE))
2787 return -EPERM;
2788 if (seals & ~(unsigned int)F_ALL_SEALS)
2789 return -EINVAL;
2790
2791 inode_lock(inode);
2792
2793 file_seals = memfd_file_seals_ptr(file);
2794 if (!file_seals) {
2795 error = -EINVAL;
2796 goto unlock;
2797 }
2798
2799 if (*file_seals & F_SEAL_SEAL) {
2800 error = -EPERM;
2801 goto unlock;
2802 }
2803
2804 if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
2805 error = mapping_deny_writable(file->f_mapping);
2806 if (error)
2807 goto unlock;
2808
2809 error = shmem_wait_for_pins(file->f_mapping);
2810 if (error) {
2811 mapping_allow_writable(file->f_mapping);
2812 goto unlock;
2813 }
2814 }
2815
2816 *file_seals |= seals;
2817 error = 0;
2818
2819unlock:
2820 inode_unlock(inode);
2821 return error;
2822}
2823
2824static int memfd_get_seals(struct file *file)
2825{
2826 unsigned int *seals = memfd_file_seals_ptr(file);
2827
2828 return seals ? *seals : -EINVAL;
2829}
2830
2831long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2832{
2833 long error;
2834
2835 switch (cmd) {
2836 case F_ADD_SEALS:
2837 /* disallow upper 32bit */
2838 if (arg > UINT_MAX)
2839 return -EINVAL;
2840
2841 error = memfd_add_seals(file, arg);
2842 break;
2843 case F_GET_SEALS:
2844 error = memfd_get_seals(file);
2845 break;
2846 default:
2847 error = -EINVAL;
2848 break;
2849 }
2850
2851 return error;
2852}
2853
2854static long shmem_fallocate(struct file *file, int mode, loff_t offset, 2634static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2855 loff_t len) 2635 loff_t len)
2856{ 2636{
@@ -3428,6 +3208,15 @@ static int shmem_match(struct inode *ino, void *vfh)
3428 return ino->i_ino == inum && fh[0] == ino->i_generation; 3208 return ino->i_ino == inum && fh[0] == ino->i_generation;
3429} 3209}
3430 3210
3211/* Find any alias of inode, but prefer a hashed alias */
3212static struct dentry *shmem_find_alias(struct inode *inode)
3213{
3214 struct dentry *alias = d_find_alias(inode);
3215
3216 return alias ?: d_find_any_alias(inode);
3217}
3218
3219
3431static struct dentry *shmem_fh_to_dentry(struct super_block *sb, 3220static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
3432 struct fid *fid, int fh_len, int fh_type) 3221 struct fid *fid, int fh_len, int fh_type)
3433{ 3222{
@@ -3444,7 +3233,7 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
3444 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), 3233 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
3445 shmem_match, fid->raw); 3234 shmem_match, fid->raw);
3446 if (inode) { 3235 if (inode) {
3447 dentry = d_find_alias(inode); 3236 dentry = shmem_find_alias(inode);
3448 iput(inode); 3237 iput(inode);
3449 } 3238 }
3450 3239
@@ -3673,93 +3462,6 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
3673 return 0; 3462 return 0;
3674} 3463}
3675 3464
3676#define MFD_NAME_PREFIX "memfd:"
3677#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
3678#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
3679
3680#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
3681
3682SYSCALL_DEFINE2(memfd_create,
3683 const char __user *, uname,
3684 unsigned int, flags)
3685{
3686 unsigned int *file_seals;
3687 struct file *file;
3688 int fd, error;
3689 char *name;
3690 long len;
3691
3692 if (!(flags & MFD_HUGETLB)) {
3693 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3694 return -EINVAL;
3695 } else {
3696 /* Allow huge page size encoding in flags. */
3697 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
3698 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
3699 return -EINVAL;
3700 }
3701
3702 /* length includes terminating zero */
3703 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3704 if (len <= 0)
3705 return -EFAULT;
3706 if (len > MFD_NAME_MAX_LEN + 1)
3707 return -EINVAL;
3708
3709 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
3710 if (!name)
3711 return -ENOMEM;
3712
3713 strcpy(name, MFD_NAME_PREFIX);
3714 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3715 error = -EFAULT;
3716 goto err_name;
3717 }
3718
3719 /* terminating-zero may have changed after strnlen_user() returned */
3720 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3721 error = -EFAULT;
3722 goto err_name;
3723 }
3724
3725 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3726 if (fd < 0) {
3727 error = fd;
3728 goto err_name;
3729 }
3730
3731 if (flags & MFD_HUGETLB) {
3732 struct user_struct *user = NULL;
3733
3734 file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
3735 HUGETLB_ANONHUGE_INODE,
3736 (flags >> MFD_HUGE_SHIFT) &
3737 MFD_HUGE_MASK);
3738 } else
3739 file = shmem_file_setup(name, 0, VM_NORESERVE);
3740 if (IS_ERR(file)) {
3741 error = PTR_ERR(file);
3742 goto err_fd;
3743 }
3744 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
3745 file->f_flags |= O_RDWR | O_LARGEFILE;
3746
3747 if (flags & MFD_ALLOW_SEALING) {
3748 file_seals = memfd_file_seals_ptr(file);
3749 *file_seals &= ~F_SEAL_SEAL;
3750 }
3751
3752 fd_install(fd, file);
3753 kfree(name);
3754 return fd;
3755
3756err_fd:
3757 put_unused_fd(fd);
3758err_name:
3759 kfree(name);
3760 return error;
3761}
3762
3763#endif /* CONFIG_TMPFS */ 3465#endif /* CONFIG_TMPFS */
3764 3466
3765static void shmem_put_super(struct super_block *sb) 3467static void shmem_put_super(struct super_block *sb)
diff --git a/mm/slab.c b/mm/slab.c
index 2f308253c3d7..36688f6c87eb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1235,8 +1235,6 @@ void __init kmem_cache_init(void)
1235{ 1235{
1236 int i; 1236 int i;
1237 1237
1238 BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
1239 sizeof(struct rcu_head));
1240 kmem_cache = &kmem_cache_boot; 1238 kmem_cache = &kmem_cache_boot;
1241 1239
1242 if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1) 1240 if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
@@ -2665,6 +2663,7 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
2665 invalid_mask, &invalid_mask, flags, &flags); 2663 invalid_mask, &invalid_mask, flags, &flags);
2666 dump_stack(); 2664 dump_stack();
2667 } 2665 }
2666 WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
2668 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 2667 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2669 2668
2670 check_irq_off(); 2669 check_irq_off();
@@ -3071,6 +3070,7 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3071static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, 3070static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3072 gfp_t flags, void *objp, unsigned long caller) 3071 gfp_t flags, void *objp, unsigned long caller)
3073{ 3072{
3073 WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
3074 if (!objp) 3074 if (!objp)
3075 return objp; 3075 return objp;
3076 if (cachep->flags & SLAB_POISON) { 3076 if (cachep->flags & SLAB_POISON) {
diff --git a/mm/slob.c b/mm/slob.c
index 623e8a5c46ce..307c2c9feb44 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -555,8 +555,10 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
555 flags, node); 555 flags, node);
556 } 556 }
557 557
558 if (b && c->ctor) 558 if (b && c->ctor) {
559 WARN_ON_ONCE(flags & __GFP_ZERO);
559 c->ctor(b); 560 c->ctor(b);
561 }
560 562
561 kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); 563 kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
562 return b; 564 return b;
diff --git a/mm/slub.c b/mm/slub.c
index 44aa7847324a..15505479c3ab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -52,11 +52,11 @@
52 * and to synchronize major metadata changes to slab cache structures. 52 * and to synchronize major metadata changes to slab cache structures.
53 * 53 *
54 * The slab_lock is only used for debugging and on arches that do not 54 * The slab_lock is only used for debugging and on arches that do not
55 * have the ability to do a cmpxchg_double. It only protects the second 55 * have the ability to do a cmpxchg_double. It only protects:
56 * double word in the page struct. Meaning
57 * A. page->freelist -> List of object free in a page 56 * A. page->freelist -> List of object free in a page
58 * B. page->counters -> Counters of objects 57 * B. page->inuse -> Number of objects in use
59 * C. page->frozen -> frozen state 58 * C. page->objects -> Number of objects in page
59 * D. page->frozen -> frozen state
60 * 60 *
61 * If a slab is frozen then it is exempt from list management. It is not 61 * If a slab is frozen then it is exempt from list management. It is not
62 * on any list. The processor that froze the slab is the one who can 62 * on any list. The processor that froze the slab is the one who can
@@ -316,16 +316,16 @@ static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
316 return (p - addr) / s->size; 316 return (p - addr) / s->size;
317} 317}
318 318
319static inline unsigned int order_objects(unsigned int order, unsigned int size, unsigned int reserved) 319static inline unsigned int order_objects(unsigned int order, unsigned int size)
320{ 320{
321 return (((unsigned int)PAGE_SIZE << order) - reserved) / size; 321 return ((unsigned int)PAGE_SIZE << order) / size;
322} 322}
323 323
324static inline struct kmem_cache_order_objects oo_make(unsigned int order, 324static inline struct kmem_cache_order_objects oo_make(unsigned int order,
325 unsigned int size, unsigned int reserved) 325 unsigned int size)
326{ 326{
327 struct kmem_cache_order_objects x = { 327 struct kmem_cache_order_objects x = {
328 (order << OO_SHIFT) + order_objects(order, size, reserved) 328 (order << OO_SHIFT) + order_objects(order, size)
329 }; 329 };
330 330
331 return x; 331 return x;
@@ -356,21 +356,6 @@ static __always_inline void slab_unlock(struct page *page)
356 __bit_spin_unlock(PG_locked, &page->flags); 356 __bit_spin_unlock(PG_locked, &page->flags);
357} 357}
358 358
359static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
360{
361 struct page tmp;
362 tmp.counters = counters_new;
363 /*
364 * page->counters can cover frozen/inuse/objects as well
365 * as page->_refcount. If we assign to ->counters directly
366 * we run the risk of losing updates to page->_refcount, so
367 * be careful and only assign to the fields we need.
368 */
369 page->frozen = tmp.frozen;
370 page->inuse = tmp.inuse;
371 page->objects = tmp.objects;
372}
373
374/* Interrupts must be disabled (for the fallback code to work right) */ 359/* Interrupts must be disabled (for the fallback code to work right) */
375static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 360static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
376 void *freelist_old, unsigned long counters_old, 361 void *freelist_old, unsigned long counters_old,
@@ -392,7 +377,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
392 if (page->freelist == freelist_old && 377 if (page->freelist == freelist_old &&
393 page->counters == counters_old) { 378 page->counters == counters_old) {
394 page->freelist = freelist_new; 379 page->freelist = freelist_new;
395 set_page_slub_counters(page, counters_new); 380 page->counters = counters_new;
396 slab_unlock(page); 381 slab_unlock(page);
397 return true; 382 return true;
398 } 383 }
@@ -431,7 +416,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
431 if (page->freelist == freelist_old && 416 if (page->freelist == freelist_old &&
432 page->counters == counters_old) { 417 page->counters == counters_old) {
433 page->freelist = freelist_new; 418 page->freelist = freelist_new;
434 set_page_slub_counters(page, counters_new); 419 page->counters = counters_new;
435 slab_unlock(page); 420 slab_unlock(page);
436 local_irq_restore(flags); 421 local_irq_restore(flags);
437 return true; 422 return true;
@@ -711,7 +696,7 @@ void object_err(struct kmem_cache *s, struct page *page,
711 print_trailer(s, page, object); 696 print_trailer(s, page, object);
712} 697}
713 698
714static void slab_err(struct kmem_cache *s, struct page *page, 699static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
715 const char *fmt, ...) 700 const char *fmt, ...)
716{ 701{
717 va_list args; 702 va_list args;
@@ -847,7 +832,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
847 return 1; 832 return 1;
848 833
849 start = page_address(page); 834 start = page_address(page);
850 length = (PAGE_SIZE << compound_order(page)) - s->reserved; 835 length = PAGE_SIZE << compound_order(page);
851 end = start + length; 836 end = start + length;
852 remainder = length % s->size; 837 remainder = length % s->size;
853 if (!remainder) 838 if (!remainder)
@@ -936,7 +921,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
936 return 0; 921 return 0;
937 } 922 }
938 923
939 maxobj = order_objects(compound_order(page), s->size, s->reserved); 924 maxobj = order_objects(compound_order(page), s->size);
940 if (page->objects > maxobj) { 925 if (page->objects > maxobj) {
941 slab_err(s, page, "objects %u > max %u", 926 slab_err(s, page, "objects %u > max %u",
942 page->objects, maxobj); 927 page->objects, maxobj);
@@ -986,7 +971,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
986 nr++; 971 nr++;
987 } 972 }
988 973
989 max_objects = order_objects(compound_order(page), s->size, s->reserved); 974 max_objects = order_objects(compound_order(page), s->size);
990 if (max_objects > MAX_OBJS_PER_PAGE) 975 if (max_objects > MAX_OBJS_PER_PAGE)
991 max_objects = MAX_OBJS_PER_PAGE; 976 max_objects = MAX_OBJS_PER_PAGE;
992 977
@@ -1694,24 +1679,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1694 __ClearPageSlabPfmemalloc(page); 1679 __ClearPageSlabPfmemalloc(page);
1695 __ClearPageSlab(page); 1680 __ClearPageSlab(page);
1696 1681
1697 page_mapcount_reset(page); 1682 page->mapping = NULL;
1698 if (current->reclaim_state) 1683 if (current->reclaim_state)
1699 current->reclaim_state->reclaimed_slab += pages; 1684 current->reclaim_state->reclaimed_slab += pages;
1700 memcg_uncharge_slab(page, order, s); 1685 memcg_uncharge_slab(page, order, s);
1701 __free_pages(page, order); 1686 __free_pages(page, order);
1702} 1687}
1703 1688
1704#define need_reserve_slab_rcu \
1705 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1706
1707static void rcu_free_slab(struct rcu_head *h) 1689static void rcu_free_slab(struct rcu_head *h)
1708{ 1690{
1709 struct page *page; 1691 struct page *page = container_of(h, struct page, rcu_head);
1710
1711 if (need_reserve_slab_rcu)
1712 page = virt_to_head_page(h);
1713 else
1714 page = container_of((struct list_head *)h, struct page, lru);
1715 1692
1716 __free_slab(page->slab_cache, page); 1693 __free_slab(page->slab_cache, page);
1717} 1694}
@@ -1719,19 +1696,7 @@ static void rcu_free_slab(struct rcu_head *h)
1719static void free_slab(struct kmem_cache *s, struct page *page) 1696static void free_slab(struct kmem_cache *s, struct page *page)
1720{ 1697{
1721 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { 1698 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1722 struct rcu_head *head; 1699 call_rcu(&page->rcu_head, rcu_free_slab);
1723
1724 if (need_reserve_slab_rcu) {
1725 int order = compound_order(page);
1726 int offset = (PAGE_SIZE << order) - s->reserved;
1727
1728 VM_BUG_ON(s->reserved != sizeof(*head));
1729 head = page_address(page) + offset;
1730 } else {
1731 head = &page->rcu_head;
1732 }
1733
1734 call_rcu(head, rcu_free_slab);
1735 } else 1700 } else
1736 __free_slab(s, page); 1701 __free_slab(s, page);
1737} 1702}
@@ -2444,6 +2409,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2444 struct kmem_cache_cpu *c = *pc; 2409 struct kmem_cache_cpu *c = *pc;
2445 struct page *page; 2410 struct page *page;
2446 2411
2412 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2413
2447 freelist = get_partial(s, flags, node, c); 2414 freelist = get_partial(s, flags, node, c);
2448 2415
2449 if (freelist) 2416 if (freelist)
@@ -3226,21 +3193,21 @@ static unsigned int slub_min_objects;
3226 */ 3193 */
3227static inline unsigned int slab_order(unsigned int size, 3194static inline unsigned int slab_order(unsigned int size,
3228 unsigned int min_objects, unsigned int max_order, 3195 unsigned int min_objects, unsigned int max_order,
3229 unsigned int fract_leftover, unsigned int reserved) 3196 unsigned int fract_leftover)
3230{ 3197{
3231 unsigned int min_order = slub_min_order; 3198 unsigned int min_order = slub_min_order;
3232 unsigned int order; 3199 unsigned int order;
3233 3200
3234 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) 3201 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3235 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 3202 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3236 3203
3237 for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved)); 3204 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3238 order <= max_order; order++) { 3205 order <= max_order; order++) {
3239 3206
3240 unsigned int slab_size = (unsigned int)PAGE_SIZE << order; 3207 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3241 unsigned int rem; 3208 unsigned int rem;
3242 3209
3243 rem = (slab_size - reserved) % size; 3210 rem = slab_size % size;
3244 3211
3245 if (rem <= slab_size / fract_leftover) 3212 if (rem <= slab_size / fract_leftover)
3246 break; 3213 break;
@@ -3249,7 +3216,7 @@ static inline unsigned int slab_order(unsigned int size,
3249 return order; 3216 return order;
3250} 3217}
3251 3218
3252static inline int calculate_order(unsigned int size, unsigned int reserved) 3219static inline int calculate_order(unsigned int size)
3253{ 3220{
3254 unsigned int order; 3221 unsigned int order;
3255 unsigned int min_objects; 3222 unsigned int min_objects;
@@ -3266,7 +3233,7 @@ static inline int calculate_order(unsigned int size, unsigned int reserved)
3266 min_objects = slub_min_objects; 3233 min_objects = slub_min_objects;
3267 if (!min_objects) 3234 if (!min_objects)
3268 min_objects = 4 * (fls(nr_cpu_ids) + 1); 3235 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3269 max_objects = order_objects(slub_max_order, size, reserved); 3236 max_objects = order_objects(slub_max_order, size);
3270 min_objects = min(min_objects, max_objects); 3237 min_objects = min(min_objects, max_objects);
3271 3238
3272 while (min_objects > 1) { 3239 while (min_objects > 1) {
@@ -3275,7 +3242,7 @@ static inline int calculate_order(unsigned int size, unsigned int reserved)
3275 fraction = 16; 3242 fraction = 16;
3276 while (fraction >= 4) { 3243 while (fraction >= 4) {
3277 order = slab_order(size, min_objects, 3244 order = slab_order(size, min_objects,
3278 slub_max_order, fraction, reserved); 3245 slub_max_order, fraction);
3279 if (order <= slub_max_order) 3246 if (order <= slub_max_order)
3280 return order; 3247 return order;
3281 fraction /= 2; 3248 fraction /= 2;
@@ -3287,14 +3254,14 @@ static inline int calculate_order(unsigned int size, unsigned int reserved)
3287 * We were unable to place multiple objects in a slab. Now 3254 * We were unable to place multiple objects in a slab. Now
3288 * lets see if we can place a single object there. 3255 * lets see if we can place a single object there.
3289 */ 3256 */
3290 order = slab_order(size, 1, slub_max_order, 1, reserved); 3257 order = slab_order(size, 1, slub_max_order, 1);
3291 if (order <= slub_max_order) 3258 if (order <= slub_max_order)
3292 return order; 3259 return order;
3293 3260
3294 /* 3261 /*
3295 * Doh this slab cannot be placed using slub_max_order. 3262 * Doh this slab cannot be placed using slub_max_order.
3296 */ 3263 */
3297 order = slab_order(size, 1, MAX_ORDER, 1, reserved); 3264 order = slab_order(size, 1, MAX_ORDER, 1);
3298 if (order < MAX_ORDER) 3265 if (order < MAX_ORDER)
3299 return order; 3266 return order;
3300 return -ENOSYS; 3267 return -ENOSYS;
@@ -3562,7 +3529,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
3562 if (forced_order >= 0) 3529 if (forced_order >= 0)
3563 order = forced_order; 3530 order = forced_order;
3564 else 3531 else
3565 order = calculate_order(size, s->reserved); 3532 order = calculate_order(size);
3566 3533
3567 if ((int)order < 0) 3534 if ((int)order < 0)
3568 return 0; 3535 return 0;
@@ -3580,8 +3547,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
3580 /* 3547 /*
3581 * Determine the number of objects per slab 3548 * Determine the number of objects per slab
3582 */ 3549 */
3583 s->oo = oo_make(order, size, s->reserved); 3550 s->oo = oo_make(order, size);
3584 s->min = oo_make(get_order(size), size, s->reserved); 3551 s->min = oo_make(get_order(size), size);
3585 if (oo_objects(s->oo) > oo_objects(s->max)) 3552 if (oo_objects(s->oo) > oo_objects(s->max))
3586 s->max = s->oo; 3553 s->max = s->oo;
3587 3554
@@ -3591,14 +3558,10 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
3591static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) 3558static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3592{ 3559{
3593 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); 3560 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3594 s->reserved = 0;
3595#ifdef CONFIG_SLAB_FREELIST_HARDENED 3561#ifdef CONFIG_SLAB_FREELIST_HARDENED
3596 s->random = get_random_long(); 3562 s->random = get_random_long();
3597#endif 3563#endif
3598 3564
3599 if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
3600 s->reserved = sizeof(struct rcu_head);
3601
3602 if (!calculate_sizes(s, -1)) 3565 if (!calculate_sizes(s, -1))
3603 goto error; 3566 goto error;
3604 if (disable_higher_order_debug) { 3567 if (disable_higher_order_debug) {
@@ -4239,12 +4202,6 @@ void __init kmem_cache_init(void)
4239 SLAB_HWCACHE_ALIGN, 0, 0); 4202 SLAB_HWCACHE_ALIGN, 0, 0);
4240 4203
4241 kmem_cache = bootstrap(&boot_kmem_cache); 4204 kmem_cache = bootstrap(&boot_kmem_cache);
4242
4243 /*
4244 * Allocate kmem_cache_node properly from the kmem_cache slab.
4245 * kmem_cache_node is separately allocated so no need to
4246 * update any list pointers.
4247 */
4248 kmem_cache_node = bootstrap(&boot_kmem_cache_node); 4205 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4249 4206
4250 /* Now we can use the kmem_cache to allocate kmalloc slabs */ 4207 /* Now we can use the kmem_cache to allocate kmalloc slabs */
@@ -5117,12 +5074,6 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5117} 5074}
5118SLAB_ATTR_RO(destroy_by_rcu); 5075SLAB_ATTR_RO(destroy_by_rcu);
5119 5076
5120static ssize_t reserved_show(struct kmem_cache *s, char *buf)
5121{
5122 return sprintf(buf, "%u\n", s->reserved);
5123}
5124SLAB_ATTR_RO(reserved);
5125
5126#ifdef CONFIG_SLUB_DEBUG 5077#ifdef CONFIG_SLUB_DEBUG
5127static ssize_t slabs_show(struct kmem_cache *s, char *buf) 5078static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5128{ 5079{
@@ -5435,7 +5386,6 @@ static struct attribute *slab_attrs[] = {
5435 &reclaim_account_attr.attr, 5386 &reclaim_account_attr.attr,
5436 &destroy_by_rcu_attr.attr, 5387 &destroy_by_rcu_attr.attr,
5437 &shrink_attr.attr, 5388 &shrink_attr.attr,
5438 &reserved_attr.attr,
5439 &slabs_cpu_partial_attr.attr, 5389 &slabs_cpu_partial_attr.attr,
5440#ifdef CONFIG_SLUB_DEBUG 5390#ifdef CONFIG_SLUB_DEBUG
5441 &total_objects_attr.attr, 5391 &total_objects_attr.attr,
diff --git a/mm/sparse.c b/mm/sparse.c
index 73dc2fcc0eab..f13f2723950a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -190,15 +190,13 @@ static inline int next_present_section_nr(int section_nr)
190 section_nr++; 190 section_nr++;
191 if (present_section_nr(section_nr)) 191 if (present_section_nr(section_nr))
192 return section_nr; 192 return section_nr;
193 } while ((section_nr < NR_MEM_SECTIONS) && 193 } while ((section_nr <= __highest_present_section_nr));
194 (section_nr <= __highest_present_section_nr));
195 194
196 return -1; 195 return -1;
197} 196}
198#define for_each_present_section_nr(start, section_nr) \ 197#define for_each_present_section_nr(start, section_nr) \
199 for (section_nr = next_present_section_nr(start-1); \ 198 for (section_nr = next_present_section_nr(start-1); \
200 ((section_nr >= 0) && \ 199 ((section_nr >= 0) && \
201 (section_nr < NR_MEM_SECTIONS) && \
202 (section_nr <= __highest_present_section_nr)); \ 200 (section_nr <= __highest_present_section_nr)); \
203 section_nr = next_present_section_nr(section_nr)) 201 section_nr = next_present_section_nr(section_nr))
204 202
@@ -524,7 +522,7 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func)
524 map_count = 1; 522 map_count = 1;
525 } 523 }
526 /* ok, last chunk */ 524 /* ok, last chunk */
527 alloc_func(data, pnum_begin, NR_MEM_SECTIONS, 525 alloc_func(data, pnum_begin, __highest_present_section_nr+1,
528 map_count, nodeid_begin); 526 map_count, nodeid_begin);
529} 527}
530 528
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index f2641894f440..f51ac051c0c9 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -317,7 +317,7 @@ swp_entry_t get_swap_page(struct page *page)
317 if (PageTransHuge(page)) { 317 if (PageTransHuge(page)) {
318 if (IS_ENABLED(CONFIG_THP_SWAP)) 318 if (IS_ENABLED(CONFIG_THP_SWAP))
319 get_swap_pages(1, true, &entry); 319 get_swap_pages(1, true, &entry);
320 return entry; 320 goto out;
321 } 321 }
322 322
323 /* 323 /*
@@ -347,10 +347,14 @@ repeat:
347 } 347 }
348 mutex_unlock(&cache->alloc_lock); 348 mutex_unlock(&cache->alloc_lock);
349 if (entry.val) 349 if (entry.val)
350 return entry; 350 goto out;
351 } 351 }
352 352
353 get_swap_pages(1, false, &entry); 353 get_swap_pages(1, false, &entry);
354 354out:
355 if (mem_cgroup_try_charge_swap(page, entry)) {
356 put_swap_page(page, entry);
357 entry.val = 0;
358 }
355 return entry; 359 return entry;
356} 360}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 07f9aa2340c3..ab8e59cd18ea 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -216,9 +216,6 @@ int add_to_swap(struct page *page)
216 if (!entry.val) 216 if (!entry.val)
217 return 0; 217 return 0;
218 218
219 if (mem_cgroup_try_charge_swap(page, entry))
220 goto fail;
221
222 /* 219 /*
223 * Radix-tree node allocations from PF_MEMALLOC contexts could 220 * Radix-tree node allocations from PF_MEMALLOC contexts could
224 * completely exhaust the page allocator. __GFP_NOMEMALLOC 221 * completely exhaust the page allocator. __GFP_NOMEMALLOC
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 39791b81ede7..5029f241908f 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -404,7 +404,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
404 unsigned long dst_start, 404 unsigned long dst_start,
405 unsigned long src_start, 405 unsigned long src_start,
406 unsigned long len, 406 unsigned long len,
407 bool zeropage) 407 bool zeropage,
408 bool *mmap_changing)
408{ 409{
409 struct vm_area_struct *dst_vma; 410 struct vm_area_struct *dst_vma;
410 ssize_t err; 411 ssize_t err;
@@ -431,6 +432,15 @@ retry:
431 down_read(&dst_mm->mmap_sem); 432 down_read(&dst_mm->mmap_sem);
432 433
433 /* 434 /*
435 * If memory mappings are changing because of non-cooperative
436 * operation (e.g. mremap) running in parallel, bail out and
437 * request the user to retry later
438 */
439 err = -EAGAIN;
440 if (mmap_changing && READ_ONCE(*mmap_changing))
441 goto out_unlock;
442
443 /*
434 * Make sure the vma is not shared, that the dst range is 444 * Make sure the vma is not shared, that the dst range is
435 * both valid and fully within a single existing vma. 445 * both valid and fully within a single existing vma.
436 */ 446 */
@@ -563,13 +573,15 @@ out:
563} 573}
564 574
565ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, 575ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
566 unsigned long src_start, unsigned long len) 576 unsigned long src_start, unsigned long len,
577 bool *mmap_changing)
567{ 578{
568 return __mcopy_atomic(dst_mm, dst_start, src_start, len, false); 579 return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
580 mmap_changing);
569} 581}
570 582
571ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, 583ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
572 unsigned long len) 584 unsigned long len, bool *mmap_changing)
573{ 585{
574 return __mcopy_atomic(dst_mm, start, 0, len, true); 586 return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
575} 587}
diff --git a/mm/util.c b/mm/util.c
index c2d0a7cdb189..3351659200e6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -391,7 +391,8 @@ EXPORT_SYMBOL(vm_mmap);
391 * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is 391 * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
392 * preferable to the vmalloc fallback, due to visible performance drawbacks. 392 * preferable to the vmalloc fallback, due to visible performance drawbacks.
393 * 393 *
394 * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. 394 * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
395 * fall back to vmalloc.
395 */ 396 */
396void *kvmalloc_node(size_t size, gfp_t flags, int node) 397void *kvmalloc_node(size_t size, gfp_t flags, int node)
397{ 398{
@@ -402,7 +403,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
402 * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) 403 * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
403 * so the given set of flags has to be compatible. 404 * so the given set of flags has to be compatible.
404 */ 405 */
405 WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); 406 if ((flags & GFP_KERNEL) != GFP_KERNEL)
407 return kmalloc_node(size, flags, node);
406 408
407 /* 409 /*
408 * We want to attempt a large physically contiguous block first because 410 * We want to attempt a large physically contiguous block first because
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 63a5f502da08..89efac3a020e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -603,26 +603,6 @@ static void unmap_vmap_area(struct vmap_area *va)
603 vunmap_page_range(va->va_start, va->va_end); 603 vunmap_page_range(va->va_start, va->va_end);
604} 604}
605 605
606static void vmap_debug_free_range(unsigned long start, unsigned long end)
607{
608 /*
609 * Unmap page tables and force a TLB flush immediately if pagealloc
610 * debugging is enabled. This catches use after free bugs similarly to
611 * those in linear kernel virtual address space after a page has been
612 * freed.
613 *
614 * All the lazy freeing logic is still retained, in order to minimise
615 * intrusiveness of this debugging feature.
616 *
617 * This is going to be *slow* (linear kernel virtual address debugging
618 * doesn't do a broadcast TLB flush so it is a lot faster).
619 */
620 if (debug_pagealloc_enabled()) {
621 vunmap_page_range(start, end);
622 flush_tlb_kernel_range(start, end);
623 }
624}
625
626/* 606/*
627 * lazy_max_pages is the maximum amount of virtual address space we gather up 607 * lazy_max_pages is the maximum amount of virtual address space we gather up
628 * before attempting to purge with a TLB flush. 608 * before attempting to purge with a TLB flush.
@@ -756,6 +736,9 @@ static void free_unmap_vmap_area(struct vmap_area *va)
756{ 736{
757 flush_cache_vunmap(va->va_start, va->va_end); 737 flush_cache_vunmap(va->va_start, va->va_end);
758 unmap_vmap_area(va); 738 unmap_vmap_area(va);
739 if (debug_pagealloc_enabled())
740 flush_tlb_kernel_range(va->va_start, va->va_end);
741
759 free_vmap_area_noflush(va); 742 free_vmap_area_noflush(va);
760} 743}
761 744
@@ -1053,6 +1036,10 @@ static void vb_free(const void *addr, unsigned long size)
1053 1036
1054 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); 1037 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
1055 1038
1039 if (debug_pagealloc_enabled())
1040 flush_tlb_kernel_range((unsigned long)addr,
1041 (unsigned long)addr + size);
1042
1056 spin_lock(&vb->lock); 1043 spin_lock(&vb->lock);
1057 1044
1058 /* Expand dirty range */ 1045 /* Expand dirty range */
@@ -1141,16 +1128,16 @@ void vm_unmap_ram(const void *mem, unsigned int count)
1141 BUG_ON(addr > VMALLOC_END); 1128 BUG_ON(addr > VMALLOC_END);
1142 BUG_ON(!PAGE_ALIGNED(addr)); 1129 BUG_ON(!PAGE_ALIGNED(addr));
1143 1130
1144 debug_check_no_locks_freed(mem, size);
1145 vmap_debug_free_range(addr, addr+size);
1146
1147 if (likely(count <= VMAP_MAX_ALLOC)) { 1131 if (likely(count <= VMAP_MAX_ALLOC)) {
1132 debug_check_no_locks_freed(mem, size);
1148 vb_free(mem, size); 1133 vb_free(mem, size);
1149 return; 1134 return;
1150 } 1135 }
1151 1136
1152 va = find_vmap_area(addr); 1137 va = find_vmap_area(addr);
1153 BUG_ON(!va); 1138 BUG_ON(!va);
1139 debug_check_no_locks_freed((void *)va->va_start,
1140 (va->va_end - va->va_start));
1154 free_unmap_vmap_area(va); 1141 free_unmap_vmap_area(va);
1155} 1142}
1156EXPORT_SYMBOL(vm_unmap_ram); 1143EXPORT_SYMBOL(vm_unmap_ram);
@@ -1499,7 +1486,6 @@ struct vm_struct *remove_vm_area(const void *addr)
1499 va->flags |= VM_LAZY_FREE; 1486 va->flags |= VM_LAZY_FREE;
1500 spin_unlock(&vmap_area_lock); 1487 spin_unlock(&vmap_area_lock);
1501 1488
1502 vmap_debug_free_range(va->va_start, va->va_end);
1503 kasan_free_shadow(vm); 1489 kasan_free_shadow(vm);
1504 free_unmap_vmap_area(va); 1490 free_unmap_vmap_area(va);
1505 1491
@@ -1519,16 +1505,17 @@ static void __vunmap(const void *addr, int deallocate_pages)
1519 addr)) 1505 addr))
1520 return; 1506 return;
1521 1507
1522 area = remove_vm_area(addr); 1508 area = find_vmap_area((unsigned long)addr)->vm;
1523 if (unlikely(!area)) { 1509 if (unlikely(!area)) {
1524 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", 1510 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1525 addr); 1511 addr);
1526 return; 1512 return;
1527 } 1513 }
1528 1514
1529 debug_check_no_locks_freed(addr, get_vm_area_size(area)); 1515 debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
1530 debug_check_no_obj_freed(addr, get_vm_area_size(area)); 1516 debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
1531 1517
1518 remove_vm_area(addr);
1532 if (deallocate_pages) { 1519 if (deallocate_pages) {
1533 int i; 1520 int i;
1534 1521
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 85350ce2d25d..4854584ec436 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -342,26 +342,6 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
342 vmpressure(gfp, memcg, true, vmpressure_win, 0); 342 vmpressure(gfp, memcg, true, vmpressure_win, 0);
343} 343}
344 344
345static enum vmpressure_levels str_to_level(const char *arg)
346{
347 enum vmpressure_levels level;
348
349 for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++)
350 if (!strcmp(vmpressure_str_levels[level], arg))
351 return level;
352 return -1;
353}
354
355static enum vmpressure_modes str_to_mode(const char *arg)
356{
357 enum vmpressure_modes mode;
358
359 for (mode = 0; mode < VMPRESSURE_NUM_MODES; mode++)
360 if (!strcmp(vmpressure_str_modes[mode], arg))
361 return mode;
362 return -1;
363}
364
365#define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2) 345#define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2)
366 346
367/** 347/**
@@ -390,27 +370,26 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
390 char *token; 370 char *token;
391 int ret = 0; 371 int ret = 0;
392 372
393 spec_orig = spec = kzalloc(MAX_VMPRESSURE_ARGS_LEN + 1, GFP_KERNEL); 373 spec_orig = spec = kstrndup(args, MAX_VMPRESSURE_ARGS_LEN, GFP_KERNEL);
394 if (!spec) { 374 if (!spec) {
395 ret = -ENOMEM; 375 ret = -ENOMEM;
396 goto out; 376 goto out;
397 } 377 }
398 strncpy(spec, args, MAX_VMPRESSURE_ARGS_LEN);
399 378
400 /* Find required level */ 379 /* Find required level */
401 token = strsep(&spec, ","); 380 token = strsep(&spec, ",");
402 level = str_to_level(token); 381 level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
403 if (level == -1) { 382 if (level < 0) {
404 ret = -EINVAL; 383 ret = level;
405 goto out; 384 goto out;
406 } 385 }
407 386
408 /* Find optional mode */ 387 /* Find optional mode */
409 token = strsep(&spec, ","); 388 token = strsep(&spec, ",");
410 if (token) { 389 if (token) {
411 mode = str_to_mode(token); 390 mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
412 if (mode == -1) { 391 if (mode < 0) {
413 ret = -EINVAL; 392 ret = mode;
414 goto out; 393 goto out;
415 } 394 }
416 } 395 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..03822f86f288 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2544,12 +2544,28 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
2544 unsigned long reclaimed; 2544 unsigned long reclaimed;
2545 unsigned long scanned; 2545 unsigned long scanned;
2546 2546
2547 if (mem_cgroup_low(root, memcg)) { 2547 switch (mem_cgroup_protected(root, memcg)) {
2548 case MEMCG_PROT_MIN:
2549 /*
2550 * Hard protection.
2551 * If there is no reclaimable memory, OOM.
2552 */
2553 continue;
2554 case MEMCG_PROT_LOW:
2555 /*
2556 * Soft protection.
2557 * Respect the protection only as long as
2558 * there is an unprotected supply
2559 * of reclaimable memory from other cgroups.
2560 */
2548 if (!sc->memcg_low_reclaim) { 2561 if (!sc->memcg_low_reclaim) {
2549 sc->memcg_low_skipped = 1; 2562 sc->memcg_low_skipped = 1;
2550 continue; 2563 continue;
2551 } 2564 }
2552 memcg_memory_event(memcg, MEMCG_LOW); 2565 memcg_memory_event(memcg, MEMCG_LOW);
2566 break;
2567 case MEMCG_PROT_NONE:
2568 break;
2553 } 2569 }
2554 2570
2555 reclaimed = sc->nr_reclaimed; 2571 reclaimed = sc->nr_reclaimed;
@@ -3318,11 +3334,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3318 .may_unmap = 1, 3334 .may_unmap = 1,
3319 .may_swap = 1, 3335 .may_swap = 1,
3320 }; 3336 };
3337
3338 __fs_reclaim_acquire();
3339
3321 count_vm_event(PAGEOUTRUN); 3340 count_vm_event(PAGEOUTRUN);
3322 3341
3323 do { 3342 do {
3324 unsigned long nr_reclaimed = sc.nr_reclaimed; 3343 unsigned long nr_reclaimed = sc.nr_reclaimed;
3325 bool raise_priority = true; 3344 bool raise_priority = true;
3345 bool ret;
3326 3346
3327 sc.reclaim_idx = classzone_idx; 3347 sc.reclaim_idx = classzone_idx;
3328 3348
@@ -3395,7 +3415,10 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3395 wake_up_all(&pgdat->pfmemalloc_wait); 3415 wake_up_all(&pgdat->pfmemalloc_wait);
3396 3416
3397 /* Check if kswapd should be suspending */ 3417 /* Check if kswapd should be suspending */
3398 if (try_to_freeze() || kthread_should_stop()) 3418 __fs_reclaim_release();
3419 ret = try_to_freeze();
3420 __fs_reclaim_acquire();
3421 if (ret || kthread_should_stop())
3399 break; 3422 break;
3400 3423
3401 /* 3424 /*
@@ -3412,6 +3435,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3412 3435
3413out: 3436out:
3414 snapshot_refaults(NULL, pgdat); 3437 snapshot_refaults(NULL, pgdat);
3438 __fs_reclaim_release();
3415 /* 3439 /*
3416 * Return the order kswapd stopped reclaiming at as 3440 * Return the order kswapd stopped reclaiming at as
3417 * prepare_kswapd_sleep() takes it into account. If another caller 3441 * prepare_kswapd_sleep() takes it into account. If another caller
@@ -3600,9 +3624,7 @@ kswapd_try_sleep:
3600 */ 3624 */
3601 trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, 3625 trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
3602 alloc_order); 3626 alloc_order);
3603 fs_reclaim_acquire(GFP_KERNEL);
3604 reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); 3627 reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
3605 fs_reclaim_release(GFP_KERNEL);
3606 if (reclaim_order < alloc_order) 3628 if (reclaim_order < alloc_order)
3607 goto kswapd_try_sleep; 3629 goto kswapd_try_sleep;
3608 } 3630 }
@@ -3684,16 +3706,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
3684 unsigned long nr_reclaimed; 3706 unsigned long nr_reclaimed;
3685 unsigned int noreclaim_flag; 3707 unsigned int noreclaim_flag;
3686 3708
3687 noreclaim_flag = memalloc_noreclaim_save();
3688 fs_reclaim_acquire(sc.gfp_mask); 3709 fs_reclaim_acquire(sc.gfp_mask);
3710 noreclaim_flag = memalloc_noreclaim_save();
3689 reclaim_state.reclaimed_slab = 0; 3711 reclaim_state.reclaimed_slab = 0;
3690 p->reclaim_state = &reclaim_state; 3712 p->reclaim_state = &reclaim_state;
3691 3713
3692 nr_reclaimed = do_try_to_free_pages(zonelist, &sc); 3714 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
3693 3715
3694 p->reclaim_state = NULL; 3716 p->reclaim_state = NULL;
3695 fs_reclaim_release(sc.gfp_mask);
3696 memalloc_noreclaim_restore(noreclaim_flag); 3717 memalloc_noreclaim_restore(noreclaim_flag);
3718 fs_reclaim_release(sc.gfp_mask);
3697 3719
3698 return nr_reclaimed; 3720 return nr_reclaimed;
3699} 3721}
@@ -3870,6 +3892,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3870 }; 3892 };
3871 3893
3872 cond_resched(); 3894 cond_resched();
3895 fs_reclaim_acquire(sc.gfp_mask);
3873 /* 3896 /*
3874 * We need to be able to allocate from the reserves for RECLAIM_UNMAP 3897 * We need to be able to allocate from the reserves for RECLAIM_UNMAP
3875 * and we also need to be able to write out pages for RECLAIM_WRITE 3898 * and we also need to be able to write out pages for RECLAIM_WRITE
@@ -3877,7 +3900,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3877 */ 3900 */
3878 noreclaim_flag = memalloc_noreclaim_save(); 3901 noreclaim_flag = memalloc_noreclaim_save();
3879 p->flags |= PF_SWAPWRITE; 3902 p->flags |= PF_SWAPWRITE;
3880 fs_reclaim_acquire(sc.gfp_mask);
3881 reclaim_state.reclaimed_slab = 0; 3903 reclaim_state.reclaimed_slab = 0;
3882 p->reclaim_state = &reclaim_state; 3904 p->reclaim_state = &reclaim_state;
3883 3905
@@ -3892,9 +3914,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3892 } 3914 }
3893 3915
3894 p->reclaim_state = NULL; 3916 p->reclaim_state = NULL;
3895 fs_reclaim_release(gfp_mask);
3896 current->flags &= ~PF_SWAPWRITE; 3917 current->flags &= ~PF_SWAPWRITE;
3897 memalloc_noreclaim_restore(noreclaim_flag); 3918 memalloc_noreclaim_restore(noreclaim_flag);
3919 fs_reclaim_release(sc.gfp_mask);
3898 return sc.nr_reclaimed >= nr_pages; 3920 return sc.nr_reclaimed >= nr_pages;
3899} 3921}
3900 3922
diff --git a/net/9p/client.c b/net/9p/client.c
index 21e6df1cc70f..18c5271910dc 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -198,8 +198,6 @@ static int parse_opts(char *opts, struct p9_client *clnt)
198 pr_info("Could not find request transport: %s\n", 198 pr_info("Could not find request transport: %s\n",
199 s); 199 s);
200 ret = -EINVAL; 200 ret = -EINVAL;
201 kfree(s);
202 goto free_and_return;
203 } 201 }
204 kfree(s); 202 kfree(s);
205 break; 203 break;
@@ -214,13 +212,12 @@ static int parse_opts(char *opts, struct p9_client *clnt)
214 "problem allocating copy of version arg\n"); 212 "problem allocating copy of version arg\n");
215 goto free_and_return; 213 goto free_and_return;
216 } 214 }
217 ret = get_protocol_version(s); 215 r = get_protocol_version(s);
218 if (ret == -EINVAL) { 216 if (r < 0)
219 kfree(s); 217 ret = r;
220 goto free_and_return; 218 else
221 } 219 clnt->proto_version = r;
222 kfree(s); 220 kfree(s);
223 clnt->proto_version = ret;
224 break; 221 break;
225 default: 222 default:
226 continue; 223 continue;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 0f19960390a6..2e2b8bca54f3 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -38,7 +38,6 @@
38 38
39#include <linux/module.h> 39#include <linux/module.h>
40#include <linux/spinlock.h> 40#include <linux/spinlock.h>
41#include <linux/rwlock.h>
42#include <net/9p/9p.h> 41#include <net/9p/9p.h>
43#include <net/9p/client.h> 42#include <net/9p/client.h>
44#include <net/9p/transport.h> 43#include <net/9p/transport.h>
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e6033d3c48d3..e3b7362b0ee4 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,9 +1,11 @@
1#!/usr/bin/env perl 1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
2# (c) 2001, Dave Jones. (the file handling bit) 4# (c) 2001, Dave Jones. (the file handling bit)
3# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) 5# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
4# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) 6# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
5# (c) 2008-2010 Andy Whitcroft <apw@canonical.com> 7# (c) 2008-2010 Andy Whitcroft <apw@canonical.com>
6# Licensed under the terms of the GNU GPL License version 2 8# (c) 2010-2018 Joe Perches <joe@perches.com>
7 9
8use strict; 10use strict;
9use warnings; 11use warnings;
@@ -2375,6 +2377,14 @@ sub process {
2375 2377
2376 my $rawline = $rawlines[$linenr - 1]; 2378 my $rawline = $rawlines[$linenr - 1];
2377 2379
2380# check if it's a mode change, rename or start of a patch
2381 if (!$in_commit_log &&
2382 ($line =~ /^ mode change [0-7]+ => [0-7]+ \S+\s*$/ ||
2383 ($line =~ /^rename (?:from|to) \S+\s*$/ ||
2384 $line =~ /^diff --git a\/[\w\/\.\_\-]+ b\/\S+\s*$/))) {
2385 $is_patch = 1;
2386 }
2387
2378#extract the line range in the file after the patch is applied 2388#extract the line range in the file after the patch is applied
2379 if (!$in_commit_log && 2389 if (!$in_commit_log &&
2380 $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { 2390 $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) {
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 99c96e86eccb..c87fa734e3e1 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1,4 +1,6 @@
1#!/usr/bin/env perl 1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
2# (c) 2007, Joe Perches <joe@perches.com> 4# (c) 2007, Joe Perches <joe@perches.com>
3# created from checkpatch.pl 5# created from checkpatch.pl
4# 6#
@@ -7,8 +9,6 @@
7# 9#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch> 10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9# perl scripts/get_maintainer.pl [OPTIONS] -f <file> 11# perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12 12
13use warnings; 13use warnings;
14use strict; 14use strict;
@@ -542,7 +542,18 @@ foreach my $file (@ARGV) {
542 542
543 while (<$patch>) { 543 while (<$patch>) {
544 my $patch_line = $_; 544 my $patch_line = $_;
545 if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) { 545 if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
546 my $filename = $1;
547 push(@files, $filename);
548 } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
549 my $filename = $1;
550 push(@files, $filename);
551 } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
552 my $filename1 = $1;
553 my $filename2 = $2;
554 push(@files, $filename1);
555 push(@files, $filename2);
556 } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
546 my $filename = $1; 557 my $filename = $1;
547 $filename =~ s@^[^/]*/@@; 558 $filename =~ s@^[^/]*/@@;
548 $filename =~ s@\n@@; 559 $filename =~ s@\n@@;
diff --git a/scripts/tags.sh b/scripts/tags.sh
index e587610d1492..66f08bb1cce9 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -179,9 +179,9 @@ regex_c=(
179 '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/' 179 '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/'
180 '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/' 180 '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/'
181 '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/' 181 '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/'
182 '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/Page\1/' 182 '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/Page\1/'
183 '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__SetPage\1/' 183 '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__SetPage\1/'
184 '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/' 184 '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/'
185 '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/' 185 '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/'
186 '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/' 186 '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/'
187 '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/' 187 '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/'
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 6c16f77c722c..74e5912e9f2e 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -1,3 +1,6 @@
1/fd-001-lookup
2/fd-002-posix-eq
3/fd-003-kthread
1/proc-loadavg-001 4/proc-loadavg-001
2/proc-self-map-files-001 5/proc-self-map-files-001
3/proc-self-map-files-002 6/proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index dbb87e56264c..db310eedc268 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,6 +1,9 @@
1CFLAGS += -Wall -O2 1CFLAGS += -Wall -O2 -Wno-unused-function
2 2
3TEST_GEN_PROGS := 3TEST_GEN_PROGS :=
4TEST_GEN_PROGS += fd-001-lookup
5TEST_GEN_PROGS += fd-002-posix-eq
6TEST_GEN_PROGS += fd-003-kthread
4TEST_GEN_PROGS += proc-loadavg-001 7TEST_GEN_PROGS += proc-loadavg-001
5TEST_GEN_PROGS += proc-self-map-files-001 8TEST_GEN_PROGS += proc-self-map-files-001
6TEST_GEN_PROGS += proc-self-map-files-002 9TEST_GEN_PROGS += proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000000..a2010dfb2110
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test /proc/*/fd lookup.
17#define _GNU_SOURCE
18#undef NDEBUG
19#include <assert.h>
20#include <dirent.h>
21#include <errno.h>
22#include <limits.h>
23#include <sched.h>
24#include <stdio.h>
25#include <unistd.h>
26#include <sys/types.h>
27#include <sys/stat.h>
28#include <fcntl.h>
29
30#include "proc.h"
31
32/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
33static void test_lookup_pass(const char *pathname)
34{
35 struct stat st;
36 ssize_t rv;
37
38 memset(&st, 0, sizeof(struct stat));
39 rv = lstat(pathname, &st);
40 assert(rv == 0);
41 assert(S_ISLNK(st.st_mode));
42}
43
44static void test_lookup_fail(const char *pathname)
45{
46 struct stat st;
47 ssize_t rv;
48
49 rv = lstat(pathname, &st);
50 assert(rv == -1 && errno == ENOENT);
51}
52
53static void test_lookup(unsigned int fd)
54{
55 char buf[64];
56 unsigned int c;
57 unsigned int u;
58 int i;
59
60 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
61 test_lookup_pass(buf);
62
63 /* leading junk */
64 for (c = 1; c <= 255; c++) {
65 if (c == '/')
66 continue;
67 snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
68 test_lookup_fail(buf);
69 }
70
71 /* trailing junk */
72 for (c = 1; c <= 255; c++) {
73 if (c == '/')
74 continue;
75 snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
76 test_lookup_fail(buf);
77 }
78
79 for (i = INT_MIN; i < INT_MIN + 1024; i++) {
80 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
81 test_lookup_fail(buf);
82 }
83 for (i = -1024; i < 0; i++) {
84 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
85 test_lookup_fail(buf);
86 }
87 for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
88 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
89 test_lookup_fail(buf);
90 }
91 for (u = UINT_MAX - 1024; u != 0; u++) {
92 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
93 test_lookup_fail(buf);
94 }
95
96
97}
98
99int main(void)
100{
101 struct dirent *de;
102 unsigned int fd, target_fd;
103
104 if (unshare(CLONE_FILES) == -1)
105 return 1;
106
107 /* Wipe fdtable. */
108 do {
109 DIR *d;
110
111 d = opendir("/proc/self/fd");
112 if (!d)
113 return 1;
114
115 de = xreaddir(d);
116 assert(de->d_type == DT_DIR);
117 assert(streq(de->d_name, "."));
118
119 de = xreaddir(d);
120 assert(de->d_type == DT_DIR);
121 assert(streq(de->d_name, ".."));
122next:
123 de = xreaddir(d);
124 if (de) {
125 unsigned long long fd_ull;
126 unsigned int fd;
127 char *end;
128
129 assert(de->d_type == DT_LNK);
130
131 fd_ull = xstrtoull(de->d_name, &end);
132 assert(*end == '\0');
133 assert(fd_ull == (unsigned int)fd_ull);
134
135 fd = fd_ull;
136 if (fd == dirfd(d))
137 goto next;
138 close(fd);
139 }
140
141 closedir(d);
142 } while (de);
143
144 /* Now fdtable is clean. */
145
146 fd = open("/", O_PATH|O_DIRECTORY);
147 assert(fd == 0);
148 test_lookup(fd);
149 close(fd);
150
151 /* Clean again! */
152
153 fd = open("/", O_PATH|O_DIRECTORY);
154 assert(fd == 0);
155 /* Default RLIMIT_NOFILE-1 */
156 target_fd = 1023;
157 while (target_fd > 0) {
158 if (dup2(fd, target_fd) == target_fd)
159 break;
160 target_fd /= 2;
161 }
162 assert(target_fd > 0);
163 close(fd);
164 test_lookup(target_fd);
165 close(target_fd);
166
167 return 0;
168}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000000..417322ca9c53
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test that open(/proc/*/fd/*) opens the same file.
17#undef NDEBUG
18#include <assert.h>
19#include <stdio.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <fcntl.h>
23#include <unistd.h>
24
25int main(void)
26{
27 int fd0, fd1, fd2;
28 struct stat st0, st1, st2;
29 char buf[64];
30 int rv;
31
32 fd0 = open("/", O_DIRECTORY|O_RDONLY);
33 assert(fd0 >= 0);
34
35 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
36 fd1 = open(buf, O_RDONLY);
37 assert(fd1 >= 0);
38
39 snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
40 fd2 = open(buf, O_RDONLY);
41 assert(fd2 >= 0);
42
43 rv = fstat(fd0, &st0);
44 assert(rv == 0);
45 rv = fstat(fd1, &st1);
46 assert(rv == 0);
47 rv = fstat(fd2, &st2);
48 assert(rv == 0);
49
50 assert(st0.st_dev == st1.st_dev);
51 assert(st0.st_ino == st1.st_ino);
52
53 assert(st0.st_dev == st2.st_dev);
54 assert(st0.st_ino == st2.st_ino);
55
56 return 0;
57}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000000..1d659d55368c
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test that /proc/$KERNEL_THREAD/fd/ is empty.
17#define _GNU_SOURCE
18#undef NDEBUG
19#include <sys/syscall.h>
20#include <assert.h>
21#include <dirent.h>
22#include <limits.h>
23#include <stdio.h>
24#include <string.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <fcntl.h>
28#include <unistd.h>
29
30#include "proc.h"
31
32#define PF_KHTREAD 0x00200000
33
34/*
35 * Test for kernel threadness atomically with openat().
36 *
37 * Return /proc/$PID/fd descriptor if process is kernel thread.
38 * Return -1 if a process is userspace process.
39 */
40static int kernel_thread_fd(unsigned int pid)
41{
42 unsigned int flags = 0;
43 char buf[4096];
44 int dir_fd, fd;
45 ssize_t rv;
46
47 snprintf(buf, sizeof(buf), "/proc/%u", pid);
48 dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
49 if (dir_fd == -1)
50 return -1;
51
52 /*
53 * Believe it or not, struct task_struct::flags is directly exposed
54 * to userspace!
55 */
56 fd = openat(dir_fd, "stat", O_RDONLY);
57 if (fd == -1) {
58 close(dir_fd);
59 return -1;
60 }
61 rv = read(fd, buf, sizeof(buf));
62 close(fd);
63 if (0 < rv && rv <= sizeof(buf)) {
64 unsigned long long flags_ull;
65 char *p, *end;
66 int i;
67
68 assert(buf[rv - 1] == '\n');
69 buf[rv - 1] = '\0';
70
71 /* Search backwards: ->comm can contain whitespace and ')'. */
72 for (i = 0; i < 43; i++) {
73 p = strrchr(buf, ' ');
74 assert(p);
75 *p = '\0';
76 }
77
78 p = strrchr(buf, ' ');
79 assert(p);
80
81 flags_ull = xstrtoull(p + 1, &end);
82 assert(*end == '\0');
83 assert(flags_ull == (unsigned int)flags_ull);
84
85 flags = flags_ull;
86 }
87
88 fd = -1;
89 if (flags & PF_KHTREAD) {
90 fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
91 }
92 close(dir_fd);
93 return fd;
94}
95
96static void test_readdir(int fd)
97{
98 DIR *d;
99 struct dirent *de;
100
101 d = fdopendir(fd);
102 assert(d);
103
104 de = xreaddir(d);
105 assert(streq(de->d_name, "."));
106 assert(de->d_type == DT_DIR);
107
108 de = xreaddir(d);
109 assert(streq(de->d_name, ".."));
110 assert(de->d_type == DT_DIR);
111
112 de = xreaddir(d);
113 assert(!de);
114}
115
116static inline int sys_statx(int dirfd, const char *pathname, int flags,
117 unsigned int mask, void *stx)
118{
119 return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
120}
121
122static void test_lookup_fail(int fd, const char *pathname)
123{
124 char stx[256] __attribute__((aligned(8)));
125 int rv;
126
127 rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
128 assert(rv == -1 && errno == ENOENT);
129}
130
131static void test_lookup(int fd)
132{
133 char buf[64];
134 unsigned int u;
135 int i;
136
137 for (i = INT_MIN; i < INT_MIN + 1024; i++) {
138 snprintf(buf, sizeof(buf), "%d", i);
139 test_lookup_fail(fd, buf);
140 }
141 for (i = -1024; i < 1024; i++) {
142 snprintf(buf, sizeof(buf), "%d", i);
143 test_lookup_fail(fd, buf);
144 }
145 for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
146 snprintf(buf, sizeof(buf), "%u", u);
147 test_lookup_fail(fd, buf);
148 }
149 for (u = UINT_MAX - 1024; u != 0; u++) {
150 snprintf(buf, sizeof(buf), "%u", u);
151 test_lookup_fail(fd, buf);
152 }
153}
154
155int main(void)
156{
157 unsigned int pid;
158 int fd;
159
160 /*
161 * In theory this will loop indefinitely if kernel threads are exiled
162 * from /proc.
163 *
164 * Start with kthreadd.
165 */
166 pid = 2;
167 while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
168 pid++;
169 }
170 /* EACCES if run as non-root. */
171 if (pid >= 1024)
172 return 1;
173
174 test_readdir(fd);
175 test_lookup(fd);
176
177 return 0;
178}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
index 0e464b50e9d9..dc6a42b1d6b0 100644
--- a/tools/testing/selftests/proc/proc-uptime.h
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -20,21 +20,7 @@
20#include <stdlib.h> 20#include <stdlib.h>
21#include <unistd.h> 21#include <unistd.h>
22 22
23static unsigned long long xstrtoull(const char *p, char **end) 23#include "proc.h"
24{
25 if (*p == '0') {
26 *end = (char *)p + 1;
27 return 0;
28 } else if ('1' <= *p && *p <= '9') {
29 unsigned long long val;
30
31 errno = 0;
32 val = strtoull(p, end, 10);
33 assert(errno == 0);
34 return val;
35 } else
36 assert(0);
37}
38 24
39static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) 25static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
40{ 26{
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000000..4e178166fd84
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,39 @@
1#pragma once
2#undef NDEBUG
3#include <assert.h>
4#include <dirent.h>
5#include <errno.h>
6#include <stdbool.h>
7#include <stdlib.h>
8#include <string.h>
9
10static inline bool streq(const char *s1, const char *s2)
11{
12 return strcmp(s1, s2) == 0;
13}
14
15static unsigned long long xstrtoull(const char *p, char **end)
16{
17 if (*p == '0') {
18 *end = (char *)p + 1;
19 return 0;
20 } else if ('1' <= *p && *p <= '9') {
21 unsigned long long val;
22
23 errno = 0;
24 val = strtoull(p, end, 10);
25 assert(errno == 0);
26 return val;
27 } else
28 assert(0);
29}
30
31static struct dirent *xreaddir(DIR *d)
32{
33 struct dirent *de;
34
35 errno = 0;
36 de = readdir(d);
37 assert(de || errno == 0);
38 return de;
39}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
index 1e73c2232097..563e752e6eba 100644
--- a/tools/testing/selftests/proc/read.c
+++ b/tools/testing/selftests/proc/read.c
@@ -31,22 +31,7 @@
31#include <fcntl.h> 31#include <fcntl.h>
32#include <unistd.h> 32#include <unistd.h>
33 33
34static inline bool streq(const char *s1, const char *s2) 34#include "proc.h"
35{
36 return strcmp(s1, s2) == 0;
37}
38
39static struct dirent *xreaddir(DIR *d)
40{
41 struct dirent *de;
42
43 errno = 0;
44 de = readdir(d);
45 if (!de && errno != 0) {
46 exit(1);
47 }
48 return de;
49}
50 35
51static void f_reg(DIR *d, const char *filename) 36static void f_reg(DIR *d, const char *filename)
52{ 37{
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index a8783f48f77f..cce853dca691 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -131,6 +131,7 @@ static const char * const page_flag_names[] = {
131 [KPF_KSM] = "x:ksm", 131 [KPF_KSM] = "x:ksm",
132 [KPF_THP] = "t:thp", 132 [KPF_THP] = "t:thp",
133 [KPF_BALLOON] = "o:balloon", 133 [KPF_BALLOON] = "o:balloon",
134 [KPF_PGTABLE] = "g:pgtable",
134 [KPF_ZERO_PAGE] = "z:zero_page", 135 [KPF_ZERO_PAGE] = "z:zero_page",
135 [KPF_IDLE] = "i:idle_page", 136 [KPF_IDLE] = "i:idle_page",
136 137