diff options
147 files changed, 2945 insertions, 2066 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 74cdeaed9f7a..8a2c52d5c53b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst | |||
@@ -1001,14 +1001,44 @@ PAGE_SIZE multiple when read back. | |||
1001 | The total amount of memory currently being used by the cgroup | 1001 | The total amount of memory currently being used by the cgroup |
1002 | and its descendants. | 1002 | and its descendants. |
1003 | 1003 | ||
1004 | memory.min | ||
1005 | A read-write single value file which exists on non-root | ||
1006 | cgroups. The default is "0". | ||
1007 | |||
1008 | Hard memory protection. If the memory usage of a cgroup | ||
1009 | is within its effective min boundary, the cgroup's memory | ||
1010 | won't be reclaimed under any conditions. If there is no | ||
1011 | unprotected reclaimable memory available, OOM killer | ||
1012 | is invoked. | ||
1013 | |||
1014 | Effective min boundary is limited by memory.min values of | ||
1015 | all ancestor cgroups. If there is memory.min overcommitment | ||
1016 | (child cgroup or cgroups are requiring more protected memory | ||
1017 | than parent will allow), then each child cgroup will get | ||
1018 | the part of parent's protection proportional to its | ||
1019 | actual memory usage below memory.min. | ||
1020 | |||
1021 | Putting more memory than generally available under this | ||
1022 | protection is discouraged and may lead to constant OOMs. | ||
1023 | |||
1024 | If a memory cgroup is not populated with processes, | ||
1025 | its memory.min is ignored. | ||
1026 | |||
1004 | memory.low | 1027 | memory.low |
1005 | A read-write single value file which exists on non-root | 1028 | A read-write single value file which exists on non-root |
1006 | cgroups. The default is "0". | 1029 | cgroups. The default is "0". |
1007 | 1030 | ||
1008 | Best-effort memory protection. If the memory usages of a | 1031 | Best-effort memory protection. If the memory usage of a |
1009 | cgroup and all its ancestors are below their low boundaries, | 1032 | cgroup is within its effective low boundary, the cgroup's |
1010 | the cgroup's memory won't be reclaimed unless memory can be | 1033 | memory won't be reclaimed unless memory can be reclaimed |
1011 | reclaimed from unprotected cgroups. | 1034 | from unprotected cgroups. |
1035 | |||
1036 | Effective low boundary is limited by memory.low values of | ||
1037 | all ancestor cgroups. If there is memory.low overcommitment | ||
1038 | (child cgroup or cgroups are requiring more protected memory | ||
1039 | than parent will allow), then each child cgroup will get | ||
1040 | the part of parent's protection proportional to its | ||
1041 | actual memory usage below memory.low. | ||
1012 | 1042 | ||
1013 | Putting more memory than generally available under this | 1043 | Putting more memory than generally available under this |
1014 | protection is discouraged. | 1044 | protection is discouraged. |
@@ -1199,6 +1229,27 @@ PAGE_SIZE multiple when read back. | |||
1199 | Swap usage hard limit. If a cgroup's swap usage reaches this | 1229 | Swap usage hard limit. If a cgroup's swap usage reaches this |
1200 | limit, anonymous memory of the cgroup will not be swapped out. | 1230 | limit, anonymous memory of the cgroup will not be swapped out. |
1201 | 1231 | ||
1232 | memory.swap.events | ||
1233 | A read-only flat-keyed file which exists on non-root cgroups. | ||
1234 | The following entries are defined. Unless specified | ||
1235 | otherwise, a value change in this file generates a file | ||
1236 | modified event. | ||
1237 | |||
1238 | max | ||
1239 | The number of times the cgroup's swap usage was about | ||
1240 | to go over the max boundary and swap allocation | ||
1241 | failed. | ||
1242 | |||
1243 | fail | ||
1244 | The number of times swap allocation failed either | ||
1245 | because of running out of swap system-wide or max | ||
1246 | limit. | ||
1247 | |||
1248 | When reduced under the current usage, the existing swap | ||
1249 | entries are reclaimed gradually and the swap usage may stay | ||
1250 | higher than the limit for an extended period of time. This | ||
1251 | reduces the impact on the workload and memory management. | ||
1252 | |||
1202 | 1253 | ||
1203 | Usage Guidelines | 1254 | Usage Guidelines |
1204 | ~~~~~~~~~~~~~~~~ | 1255 | ~~~~~~~~~~~~~~~~ |
@@ -1934,17 +1985,8 @@ system performance due to overreclaim, to the point where the feature | |||
1934 | becomes self-defeating. | 1985 | becomes self-defeating. |
1935 | 1986 | ||
1936 | The memory.low boundary on the other hand is a top-down allocated | 1987 | The memory.low boundary on the other hand is a top-down allocated |
1937 | reserve. A cgroup enjoys reclaim protection when it and all its | 1988 | reserve. A cgroup enjoys reclaim protection when it's within its low, |
1938 | ancestors are below their low boundaries, which makes delegation of | 1989 | which makes delegation of subtrees possible. |
1939 | subtrees possible. Secondly, new cgroups have no reserve per default | ||
1940 | and in the common case most cgroups are eligible for the preferred | ||
1941 | reclaim pass. This allows the new low boundary to be efficiently | ||
1942 | implemented with just a minor addition to the generic reclaim code, | ||
1943 | without the need for out-of-band data structures and reclaim passes. | ||
1944 | Because the generic reclaim code considers all cgroups except for the | ||
1945 | ones running low in the preferred first reclaim pass, overreclaim of | ||
1946 | individual groups is eliminated as well, resulting in much better | ||
1947 | overall workload performance. | ||
1948 | 1990 | ||
1949 | The original high boundary, the hard limit, is defined as a strict | 1991 | The original high boundary, the hard limit, is defined as a strict |
1950 | limit that can not budge, even if the OOM killer has to be called. | 1992 | limit that can not budge, even if the OOM killer has to be called. |
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 257e65714c6a..875b2b56b87f 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt | |||
@@ -218,6 +218,7 @@ line of text and contains the following stats separated by whitespace: | |||
218 | same_pages the number of same element filled pages written to this disk. | 218 | same_pages the number of same element filled pages written to this disk. |
219 | No memory is allocated for such pages. | 219 | No memory is allocated for such pages. |
220 | pages_compacted the number of pages freed during compaction | 220 | pages_compacted the number of pages freed during compaction |
221 | huge_pages the number of incompressible pages | ||
221 | 222 | ||
222 | 9) Deactivate: | 223 | 9) Deactivate: |
223 | swapoff /dev/zram0 | 224 | swapoff /dev/zram0 |
@@ -242,5 +243,29 @@ to backing storage rather than keeping it in memory. | |||
242 | User should set up backing device via /sys/block/zramX/backing_dev | 243 | User should set up backing device via /sys/block/zramX/backing_dev |
243 | before disksize setting. | 244 | before disksize setting. |
244 | 245 | ||
246 | = memory tracking | ||
247 | |||
248 | With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the | ||
249 | zram block. It could be useful to catch cold or incompressible | ||
250 | pages of the process with*pagemap. | ||
251 | If you enable the feature, you could see block state via | ||
252 | /sys/kernel/debug/zram/zram0/block_state". The output is as follows, | ||
253 | |||
254 | 300 75.033841 .wh | ||
255 | 301 63.806904 s.. | ||
256 | 302 63.806919 ..h | ||
257 | |||
258 | First column is zram's block index. | ||
259 | Second column is access time since the system was booted | ||
260 | Third column is state of the block. | ||
261 | (s: same page | ||
262 | w: written page to backing store | ||
263 | h: huge page) | ||
264 | |||
265 | First line of above example says 300th block is accessed at 75.033841sec | ||
266 | and the block's state is huge so it is written back to the backing | ||
267 | storage. It's a debugging feature so anyone shouldn't rely on it to work | ||
268 | properly. | ||
269 | |||
245 | Nitin Gupta | 270 | Nitin Gupta |
246 | ngupta@vflare.org | 271 | ngupta@vflare.org |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index 6a608a6dcf71..a8378424bc98 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | # | 1 | # |
2 | # Feature name: pte_special | 2 | # Feature name: pte_special |
3 | # Kconfig: __HAVE_ARCH_PTE_SPECIAL | 3 | # Kconfig: ARCH_HAS_PTE_SPECIAL |
4 | # description: arch supports the pte_special()/pte_mkspecial() VM APIs | 4 | # description: arch supports the pte_special()/pte_mkspecial() VM APIs |
5 | # | 5 | # |
6 | ----------------------- | 6 | ----------------------- |
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index b7bd6c9009cc..a8bd4af7fbce 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -10,8 +10,8 @@ afs.txt | |||
10 | - info and examples for the distributed AFS (Andrew File System) fs. | 10 | - info and examples for the distributed AFS (Andrew File System) fs. |
11 | affs.txt | 11 | affs.txt |
12 | - info and mount options for the Amiga Fast File System. | 12 | - info and mount options for the Amiga Fast File System. |
13 | autofs4-mount-control.txt | 13 | autofs-mount-control.txt |
14 | - info on device control operations for autofs4 module. | 14 | - info on device control operations for autofs module. |
15 | automount-support.txt | 15 | automount-support.txt |
16 | - information about filesystem automount support. | 16 | - information about filesystem automount support. |
17 | befs.txt | 17 | befs.txt |
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs-mount-control.txt index e5177cb31a04..45edad6933cc 100644 --- a/Documentation/filesystems/autofs4-mount-control.txt +++ b/Documentation/filesystems/autofs-mount-control.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | Miscellaneous Device control operations for the autofs4 kernel module | 2 | Miscellaneous Device control operations for the autofs kernel module |
3 | ==================================================================== | 3 | ==================================================================== |
4 | 4 | ||
5 | The problem | 5 | The problem |
@@ -164,7 +164,7 @@ possibility for future development due to the requirements of the | |||
164 | message bus architecture. | 164 | message bus architecture. |
165 | 165 | ||
166 | 166 | ||
167 | autofs4 Miscellaneous Device mount control interface | 167 | autofs Miscellaneous Device mount control interface |
168 | ==================================================== | 168 | ==================================================== |
169 | 169 | ||
170 | The control interface is opening a device node, typically /dev/autofs. | 170 | The control interface is opening a device node, typically /dev/autofs. |
@@ -244,7 +244,7 @@ The device node ioctl operations implemented by this interface are: | |||
244 | AUTOFS_DEV_IOCTL_VERSION | 244 | AUTOFS_DEV_IOCTL_VERSION |
245 | ------------------------ | 245 | ------------------------ |
246 | 246 | ||
247 | Get the major and minor version of the autofs4 device ioctl kernel module | 247 | Get the major and minor version of the autofs device ioctl kernel module |
248 | implementation. It requires an initialized struct autofs_dev_ioctl as an | 248 | implementation. It requires an initialized struct autofs_dev_ioctl as an |
249 | input parameter and sets the version information in the passed in structure. | 249 | input parameter and sets the version information in the passed in structure. |
250 | It returns 0 on success or the error -EINVAL if a version mismatch is | 250 | It returns 0 on success or the error -EINVAL if a version mismatch is |
@@ -254,7 +254,7 @@ detected. | |||
254 | AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD | 254 | AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD |
255 | ------------------------------------------------------------------ | 255 | ------------------------------------------------------------------ |
256 | 256 | ||
257 | Get the major and minor version of the autofs4 protocol version understood | 257 | Get the major and minor version of the autofs protocol version understood |
258 | by loaded module. This call requires an initialized struct autofs_dev_ioctl | 258 | by loaded module. This call requires an initialized struct autofs_dev_ioctl |
259 | with the ioctlfd field set to a valid autofs mount point descriptor | 259 | with the ioctlfd field set to a valid autofs mount point descriptor |
260 | and sets the requested version number in version field of struct args_protover | 260 | and sets the requested version number in version field of struct args_protover |
@@ -404,4 +404,3 @@ type is also given we are looking for a particular autofs mount and if | |||
404 | a match isn't found a fail is returned. If the the located path is the | 404 | a match isn't found a fail is returned. If the the located path is the |
405 | root of a mount 1 is returned along with the super magic of the mount | 405 | root of a mount 1 is returned along with the super magic of the mount |
406 | or 0 otherwise. | 406 | or 0 otherwise. |
407 | |||
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs.txt index f10dd590f69f..373ad25852d3 100644 --- a/Documentation/filesystems/autofs4.txt +++ b/Documentation/filesystems/autofs.txt | |||
@@ -30,15 +30,15 @@ key advantages: | |||
30 | Context | 30 | Context |
31 | ------- | 31 | ------- |
32 | 32 | ||
33 | The "autofs4" filesystem module is only one part of an autofs system. | 33 | The "autofs" filesystem module is only one part of an autofs system. |
34 | There also needs to be a user-space program which looks up names | 34 | There also needs to be a user-space program which looks up names |
35 | and mounts filesystems. This will often be the "automount" program, | 35 | and mounts filesystems. This will often be the "automount" program, |
36 | though other tools including "systemd" can make use of "autofs4". | 36 | though other tools including "systemd" can make use of "autofs". |
37 | This document describes only the kernel module and the interactions | 37 | This document describes only the kernel module and the interactions |
38 | required with any user-space program. Subsequent text refers to this | 38 | required with any user-space program. Subsequent text refers to this |
39 | as the "automount daemon" or simply "the daemon". | 39 | as the "automount daemon" or simply "the daemon". |
40 | 40 | ||
41 | "autofs4" is a Linux kernel module with provides the "autofs" | 41 | "autofs" is a Linux kernel module with provides the "autofs" |
42 | filesystem type. Several "autofs" filesystems can be mounted and they | 42 | filesystem type. Several "autofs" filesystems can be mounted and they |
43 | can each be managed separately, or all managed by the same daemon. | 43 | can each be managed separately, or all managed by the same daemon. |
44 | 44 | ||
@@ -215,7 +215,7 @@ of expiry. | |||
215 | The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to | 215 | The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to |
216 | the `umount` system call. Unmounting with MNT_EXPIRE will fail unless | 216 | the `umount` system call. Unmounting with MNT_EXPIRE will fail unless |
217 | a previous attempt had been made, and the filesystem has been inactive | 217 | a previous attempt had been made, and the filesystem has been inactive |
218 | and untouched since that previous attempt. autofs4 does not depend on | 218 | and untouched since that previous attempt. autofs does not depend on |
219 | this but has its own internal tracking of whether filesystems were | 219 | this but has its own internal tracking of whether filesystems were |
220 | recently used. This allows individual names in the autofs directory | 220 | recently used. This allows individual names in the autofs directory |
221 | to expire separately. | 221 | to expire separately. |
@@ -415,7 +415,7 @@ which can be used to communicate directly with the autofs filesystem. | |||
415 | It requires CAP_SYS_ADMIN for access. | 415 | It requires CAP_SYS_ADMIN for access. |
416 | 416 | ||
417 | The `ioctl`s that can be used on this device are described in a separate | 417 | The `ioctl`s that can be used on this device are described in a separate |
418 | document `autofs4-mount-control.txt`, and are summarized briefly here. | 418 | document `autofs-mount-control.txt`, and are summarized briefly here. |
419 | Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure: | 419 | Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure: |
420 | 420 | ||
421 | struct autofs_dev_ioctl { | 421 | struct autofs_dev_ioctl { |
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt index 7eb762eb3136..b0afd3d55eaf 100644 --- a/Documentation/filesystems/automount-support.txt +++ b/Documentation/filesystems/automount-support.txt | |||
@@ -9,7 +9,7 @@ also be requested by userspace. | |||
9 | IN-KERNEL AUTOMOUNTING | 9 | IN-KERNEL AUTOMOUNTING |
10 | ====================== | 10 | ====================== |
11 | 11 | ||
12 | See section "Mount Traps" of Documentation/filesystems/autofs4.txt | 12 | See section "Mount Traps" of Documentation/filesystems/autofs.txt |
13 | 13 | ||
14 | Then from userspace, you can just do something like: | 14 | Then from userspace, you can just do something like: |
15 | 15 | ||
diff --git a/Documentation/filesystems/path-lookup.md b/Documentation/filesystems/path-lookup.md index 1933ef734e63..e2edd45c4bc0 100644 --- a/Documentation/filesystems/path-lookup.md +++ b/Documentation/filesystems/path-lookup.md | |||
@@ -460,7 +460,7 @@ this retry process in the next article. | |||
460 | Automount points are locations in the filesystem where an attempt to | 460 | Automount points are locations in the filesystem where an attempt to |
461 | lookup a name can trigger changes to how that lookup should be | 461 | lookup a name can trigger changes to how that lookup should be |
462 | handled, in particular by mounting a filesystem there. These are | 462 | handled, in particular by mounting a filesystem there. These are |
463 | covered in greater detail in autofs4.txt in the Linux documentation | 463 | covered in greater detail in autofs.txt in the Linux documentation |
464 | tree, but a few notes specifically related to path lookup are in order | 464 | tree, but a few notes specifically related to path lookup are in order |
465 | here. | 465 | here. |
466 | 466 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index d325d2dc7600..c9ac159fb023 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7723,11 +7723,11 @@ W: https://linuxtv.org | |||
7723 | S: Maintained | 7723 | S: Maintained |
7724 | F: drivers/media/radio/radio-keene* | 7724 | F: drivers/media/radio/radio-keene* |
7725 | 7725 | ||
7726 | KERNEL AUTOMOUNTER v4 (AUTOFS4) | 7726 | KERNEL AUTOMOUNTER |
7727 | M: Ian Kent <raven@themaw.net> | 7727 | M: Ian Kent <raven@themaw.net> |
7728 | L: autofs@vger.kernel.org | 7728 | L: autofs@vger.kernel.org |
7729 | S: Maintained | 7729 | S: Maintained |
7730 | F: fs/autofs4/ | 7730 | F: fs/autofs/ |
7731 | 7731 | ||
7732 | KERNEL BUILD + files below scripts/ (unless maintained elsewhere) | 7732 | KERNEL BUILD + files below scripts/ (unless maintained elsewhere) |
7733 | M: Masahiro Yamada <yamada.masahiro@socionext.com> | 7733 | M: Masahiro Yamada <yamada.masahiro@socionext.com> |
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 89d47eac18b2..e81bcd271be7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig | |||
@@ -48,6 +48,7 @@ config ARC | |||
48 | select HAVE_GENERIC_DMA_COHERENT | 48 | select HAVE_GENERIC_DMA_COHERENT |
49 | select HAVE_KERNEL_GZIP | 49 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_LZMA | 50 | select HAVE_KERNEL_LZMA |
51 | select ARCH_HAS_PTE_SPECIAL | ||
51 | 52 | ||
52 | config MIGHT_HAVE_PCI | 53 | config MIGHT_HAVE_PCI |
53 | bool | 54 | bool |
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 08fe33830d4b..8ec5599a0957 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h | |||
@@ -320,8 +320,6 @@ PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); | |||
320 | PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); | 320 | PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); |
321 | PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); | 321 | PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); |
322 | 322 | ||
323 | #define __HAVE_ARCH_PTE_SPECIAL | ||
324 | |||
325 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | 323 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
326 | { | 324 | { |
327 | return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); | 325 | return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8f460bdd4be1..534563ac7f5f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -8,6 +8,7 @@ config ARM | |||
8 | select ARCH_HAS_DEVMEM_IS_ALLOWED | 8 | select ARCH_HAS_DEVMEM_IS_ALLOWED |
9 | select ARCH_HAS_ELF_RANDOMIZE | 9 | select ARCH_HAS_ELF_RANDOMIZE |
10 | select ARCH_HAS_FORTIFY_SOURCE | 10 | select ARCH_HAS_FORTIFY_SOURCE |
11 | select ARCH_HAS_PTE_SPECIAL if ARM_LPAE | ||
11 | select ARCH_HAS_SET_MEMORY | 12 | select ARCH_HAS_SET_MEMORY |
12 | select ARCH_HAS_PHYS_TO_DMA | 13 | select ARCH_HAS_PHYS_TO_DMA |
13 | select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL | 14 | select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL |
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a4836087358..6d50a11d7793 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h | |||
@@ -219,7 +219,6 @@ static inline pte_t pte_mkspecial(pte_t pte) | |||
219 | pte_val(pte) |= L_PTE_SPECIAL; | 219 | pte_val(pte) |= L_PTE_SPECIAL; |
220 | return pte; | 220 | return pte; |
221 | } | 221 | } |
222 | #define __HAVE_ARCH_PTE_SPECIAL | ||
223 | 222 | ||
224 | #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) | 223 | #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) |
225 | #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) | 224 | #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b25ed7834f6c..4759566a78cb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -17,6 +17,7 @@ config ARM64 | |||
17 | select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA | 17 | select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA |
18 | select ARCH_HAS_KCOV | 18 | select ARCH_HAS_KCOV |
19 | select ARCH_HAS_MEMBARRIER_SYNC_CORE | 19 | select ARCH_HAS_MEMBARRIER_SYNC_CORE |
20 | select ARCH_HAS_PTE_SPECIAL | ||
20 | select ARCH_HAS_SET_MEMORY | 21 | select ARCH_HAS_SET_MEMORY |
21 | select ARCH_HAS_SG_CHAIN | 22 | select ARCH_HAS_SG_CHAIN |
22 | select ARCH_HAS_STRICT_KERNEL_RWX | 23 | select ARCH_HAS_STRICT_KERNEL_RWX |
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7c4c8f318ba9..9f82d6b53851 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h | |||
@@ -306,8 +306,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) | |||
306 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | 306 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) |
307 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 307 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
308 | 308 | ||
309 | #define __HAVE_ARCH_PTE_SPECIAL | ||
310 | |||
311 | static inline pte_t pgd_pte(pgd_t pgd) | 309 | static inline pte_t pgd_pte(pgd_t pgd) |
312 | { | 310 | { |
313 | return __pte(pgd_val(pgd)); | 311 | return __pte(pgd_val(pgd)); |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 076fe3094856..8f959df2de7a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -135,6 +135,7 @@ config PPC | |||
135 | select ARCH_HAS_GCOV_PROFILE_ALL | 135 | select ARCH_HAS_GCOV_PROFILE_ALL |
136 | select ARCH_HAS_PHYS_TO_DMA | 136 | select ARCH_HAS_PHYS_TO_DMA |
137 | select ARCH_HAS_PMEM_API if PPC64 | 137 | select ARCH_HAS_PMEM_API if PPC64 |
138 | select ARCH_HAS_PTE_SPECIAL | ||
138 | select ARCH_HAS_MEMBARRIER_CALLBACKS | 139 | select ARCH_HAS_MEMBARRIER_CALLBACKS |
139 | select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE | 140 | select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE |
140 | select ARCH_HAS_SG_CHAIN | 141 | select ARCH_HAS_SG_CHAIN |
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 42fe7c2ff2df..63cee159022b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h | |||
@@ -335,9 +335,6 @@ extern unsigned long pci_io_base; | |||
335 | /* Advertise special mapping type for AGP */ | 335 | /* Advertise special mapping type for AGP */ |
336 | #define HAVE_PAGE_AGP | 336 | #define HAVE_PAGE_AGP |
337 | 337 | ||
338 | /* Advertise support for _PAGE_SPECIAL */ | ||
339 | #define __HAVE_ARCH_PTE_SPECIAL | ||
340 | |||
341 | #ifndef __ASSEMBLY__ | 338 | #ifndef __ASSEMBLY__ |
342 | 339 | ||
343 | /* | 340 | /* |
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 050b0d775324..bef56141a549 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h | |||
@@ -208,9 +208,6 @@ static inline bool pte_user(pte_t pte) | |||
208 | #define PAGE_AGP (PAGE_KERNEL_NC) | 208 | #define PAGE_AGP (PAGE_KERNEL_NC) |
209 | #define HAVE_PAGE_AGP | 209 | #define HAVE_PAGE_AGP |
210 | 210 | ||
211 | /* Advertise support for _PAGE_SPECIAL */ | ||
212 | #define __HAVE_ARCH_PTE_SPECIAL | ||
213 | |||
214 | #ifndef _PAGE_READ | 211 | #ifndef _PAGE_READ |
215 | /* if not defined, we should not find _PAGE_WRITE too */ | 212 | /* if not defined, we should not find _PAGE_WRITE too */ |
216 | #define _PAGE_READ 0 | 213 | #define _PAGE_READ 0 |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 274bc064c41f..17f19e67993b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig | |||
@@ -42,6 +42,7 @@ config RISCV | |||
42 | select THREAD_INFO_IN_TASK | 42 | select THREAD_INFO_IN_TASK |
43 | select RISCV_TIMER | 43 | select RISCV_TIMER |
44 | select GENERIC_IRQ_MULTI_HANDLER | 44 | select GENERIC_IRQ_MULTI_HANDLER |
45 | select ARCH_HAS_PTE_SPECIAL | ||
45 | 46 | ||
46 | config MMU | 47 | config MMU |
47 | def_bool y | 48 | def_bool y |
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 997ddbb1d370..2fa2942be221 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h | |||
@@ -42,7 +42,4 @@ | |||
42 | _PAGE_WRITE | _PAGE_EXEC | \ | 42 | _PAGE_WRITE | _PAGE_EXEC | \ |
43 | _PAGE_USER | _PAGE_GLOBAL)) | 43 | _PAGE_USER | _PAGE_GLOBAL)) |
44 | 44 | ||
45 | /* Advertise support for _PAGE_SPECIAL */ | ||
46 | #define __HAVE_ARCH_PTE_SPECIAL | ||
47 | |||
48 | #endif /* _ASM_RISCV_PGTABLE_BITS_H */ | 45 | #endif /* _ASM_RISCV_PGTABLE_BITS_H */ |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b7deee7e738f..baed39772c84 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -65,6 +65,7 @@ config S390 | |||
65 | select ARCH_HAS_GCOV_PROFILE_ALL | 65 | select ARCH_HAS_GCOV_PROFILE_ALL |
66 | select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA | 66 | select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA |
67 | select ARCH_HAS_KCOV | 67 | select ARCH_HAS_KCOV |
68 | select ARCH_HAS_PTE_SPECIAL | ||
68 | select ARCH_HAS_SET_MEMORY | 69 | select ARCH_HAS_SET_MEMORY |
69 | select ARCH_HAS_SG_CHAIN | 70 | select ARCH_HAS_SG_CHAIN |
70 | select ARCH_HAS_STRICT_KERNEL_RWX | 71 | select ARCH_HAS_STRICT_KERNEL_RWX |
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2d24d33bf188..9809694e1389 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -171,7 +171,6 @@ static inline int is_module_addr(void *addr) | |||
171 | #define _PAGE_WRITE 0x020 /* SW pte write bit */ | 171 | #define _PAGE_WRITE 0x020 /* SW pte write bit */ |
172 | #define _PAGE_SPECIAL 0x040 /* SW associated with special page */ | 172 | #define _PAGE_SPECIAL 0x040 /* SW associated with special page */ |
173 | #define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ | 173 | #define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ |
174 | #define __HAVE_ARCH_PTE_SPECIAL | ||
175 | 174 | ||
176 | #ifdef CONFIG_MEM_SOFT_DIRTY | 175 | #ifdef CONFIG_MEM_SOFT_DIRTY |
177 | #define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */ | 176 | #define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */ |
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 562f72955956..84bd6329a88d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c | |||
@@ -190,14 +190,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
190 | if (!list_empty(&mm->context.pgtable_list)) { | 190 | if (!list_empty(&mm->context.pgtable_list)) { |
191 | page = list_first_entry(&mm->context.pgtable_list, | 191 | page = list_first_entry(&mm->context.pgtable_list, |
192 | struct page, lru); | 192 | struct page, lru); |
193 | mask = atomic_read(&page->_mapcount); | 193 | mask = atomic_read(&page->_refcount) >> 24; |
194 | mask = (mask | (mask >> 4)) & 3; | 194 | mask = (mask | (mask >> 4)) & 3; |
195 | if (mask != 3) { | 195 | if (mask != 3) { |
196 | table = (unsigned long *) page_to_phys(page); | 196 | table = (unsigned long *) page_to_phys(page); |
197 | bit = mask & 1; /* =1 -> second 2K */ | 197 | bit = mask & 1; /* =1 -> second 2K */ |
198 | if (bit) | 198 | if (bit) |
199 | table += PTRS_PER_PTE; | 199 | table += PTRS_PER_PTE; |
200 | atomic_xor_bits(&page->_mapcount, 1U << bit); | 200 | atomic_xor_bits(&page->_refcount, |
201 | 1U << (bit + 24)); | ||
201 | list_del(&page->lru); | 202 | list_del(&page->lru); |
202 | } | 203 | } |
203 | } | 204 | } |
@@ -218,12 +219,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
218 | table = (unsigned long *) page_to_phys(page); | 219 | table = (unsigned long *) page_to_phys(page); |
219 | if (mm_alloc_pgste(mm)) { | 220 | if (mm_alloc_pgste(mm)) { |
220 | /* Return 4K page table with PGSTEs */ | 221 | /* Return 4K page table with PGSTEs */ |
221 | atomic_set(&page->_mapcount, 3); | 222 | atomic_xor_bits(&page->_refcount, 3 << 24); |
222 | memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); | 223 | memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); |
223 | memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); | 224 | memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); |
224 | } else { | 225 | } else { |
225 | /* Return the first 2K fragment of the page */ | 226 | /* Return the first 2K fragment of the page */ |
226 | atomic_set(&page->_mapcount, 1); | 227 | atomic_xor_bits(&page->_refcount, 1 << 24); |
227 | memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); | 228 | memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); |
228 | spin_lock_bh(&mm->context.lock); | 229 | spin_lock_bh(&mm->context.lock); |
229 | list_add(&page->lru, &mm->context.pgtable_list); | 230 | list_add(&page->lru, &mm->context.pgtable_list); |
@@ -242,7 +243,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
242 | /* Free 2K page table fragment of a 4K page */ | 243 | /* Free 2K page table fragment of a 4K page */ |
243 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); | 244 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); |
244 | spin_lock_bh(&mm->context.lock); | 245 | spin_lock_bh(&mm->context.lock); |
245 | mask = atomic_xor_bits(&page->_mapcount, 1U << bit); | 246 | mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); |
247 | mask >>= 24; | ||
246 | if (mask & 3) | 248 | if (mask & 3) |
247 | list_add(&page->lru, &mm->context.pgtable_list); | 249 | list_add(&page->lru, &mm->context.pgtable_list); |
248 | else | 250 | else |
@@ -253,7 +255,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
253 | } | 255 | } |
254 | 256 | ||
255 | pgtable_page_dtor(page); | 257 | pgtable_page_dtor(page); |
256 | atomic_set(&page->_mapcount, -1); | ||
257 | __free_page(page); | 258 | __free_page(page); |
258 | } | 259 | } |
259 | 260 | ||
@@ -274,7 +275,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, | |||
274 | } | 275 | } |
275 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); | 276 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); |
276 | spin_lock_bh(&mm->context.lock); | 277 | spin_lock_bh(&mm->context.lock); |
277 | mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); | 278 | mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); |
279 | mask >>= 24; | ||
278 | if (mask & 3) | 280 | if (mask & 3) |
279 | list_add_tail(&page->lru, &mm->context.pgtable_list); | 281 | list_add_tail(&page->lru, &mm->context.pgtable_list); |
280 | else | 282 | else |
@@ -296,12 +298,13 @@ static void __tlb_remove_table(void *_table) | |||
296 | break; | 298 | break; |
297 | case 1: /* lower 2K of a 4K page table */ | 299 | case 1: /* lower 2K of a 4K page table */ |
298 | case 2: /* higher 2K of a 4K page table */ | 300 | case 2: /* higher 2K of a 4K page table */ |
299 | if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) | 301 | mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); |
302 | mask >>= 24; | ||
303 | if (mask != 0) | ||
300 | break; | 304 | break; |
301 | /* fallthrough */ | 305 | /* fallthrough */ |
302 | case 3: /* 4K page table with pgstes */ | 306 | case 3: /* 4K page table with pgstes */ |
303 | pgtable_page_dtor(page); | 307 | pgtable_page_dtor(page); |
304 | atomic_set(&page->_mapcount, -1); | ||
305 | __free_page(page); | 308 | __free_page(page); |
306 | break; | 309 | break; |
307 | } | 310 | } |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ae619d54018c..4d61a085982b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
2 | config SUPERH | 2 | config SUPERH |
3 | def_bool y | 3 | def_bool y |
4 | select ARCH_HAS_PTE_SPECIAL | ||
4 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 5 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
5 | select ARCH_MIGHT_HAVE_PC_PARPORT | 6 | select ARCH_MIGHT_HAVE_PC_PARPORT |
6 | select ARCH_NO_COHERENT_DMA_MMAP if !MMU | 7 | select ARCH_NO_COHERENT_DMA_MMAP if !MMU |
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 89c513a982fc..f6abfe2bca93 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h | |||
@@ -156,8 +156,6 @@ extern void page_table_range_init(unsigned long start, unsigned long end, | |||
156 | #define HAVE_ARCH_UNMAPPED_AREA | 156 | #define HAVE_ARCH_UNMAPPED_AREA |
157 | #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN | 157 | #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN |
158 | 158 | ||
159 | #define __HAVE_ARCH_PTE_SPECIAL | ||
160 | |||
161 | #include <asm-generic/pgtable.h> | 159 | #include <asm-generic/pgtable.h> |
162 | 160 | ||
163 | #endif /* __ASM_SH_PGTABLE_H */ | 161 | #endif /* __ASM_SH_PGTABLE_H */ |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b42ba888217d..9a2b8877f174 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -88,6 +88,7 @@ config SPARC64 | |||
88 | select ARCH_USE_QUEUED_SPINLOCKS | 88 | select ARCH_USE_QUEUED_SPINLOCKS |
89 | select GENERIC_TIME_VSYSCALL | 89 | select GENERIC_TIME_VSYSCALL |
90 | select ARCH_CLOCKSOURCE_DATA | 90 | select ARCH_CLOCKSOURCE_DATA |
91 | select ARCH_HAS_PTE_SPECIAL | ||
91 | 92 | ||
92 | config ARCH_DEFCONFIG | 93 | config ARCH_DEFCONFIG |
93 | string | 94 | string |
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 44d6ac47e035..1393a8ac596b 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h | |||
@@ -117,9 +117,6 @@ bool kern_addr_valid(unsigned long addr); | |||
117 | #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ | 117 | #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ |
118 | #define _PAGE_PUD_HUGE _PAGE_PMD_HUGE | 118 | #define _PAGE_PUD_HUGE _PAGE_PMD_HUGE |
119 | 119 | ||
120 | /* Advertise support for _PAGE_SPECIAL */ | ||
121 | #define __HAVE_ARCH_PTE_SPECIAL | ||
122 | |||
123 | /* SUN4U pte bits... */ | 120 | /* SUN4U pte bits... */ |
124 | #define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ | 121 | #define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ |
125 | #define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ | 122 | #define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb6e3a219294..f182a4e8e5bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -60,6 +60,7 @@ config X86 | |||
60 | select ARCH_HAS_KCOV if X86_64 | 60 | select ARCH_HAS_KCOV if X86_64 |
61 | select ARCH_HAS_MEMBARRIER_SYNC_CORE | 61 | select ARCH_HAS_MEMBARRIER_SYNC_CORE |
62 | select ARCH_HAS_PMEM_API if X86_64 | 62 | select ARCH_HAS_PMEM_API if X86_64 |
63 | select ARCH_HAS_PTE_SPECIAL | ||
63 | select ARCH_HAS_REFCOUNT | 64 | select ARCH_HAS_REFCOUNT |
64 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 | 65 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 |
65 | select ARCH_HAS_UACCESS_MCSAFE if X86_64 | 66 | select ARCH_HAS_UACCESS_MCSAFE if X86_64 |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1e5a40673953..99fff853c944 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -65,7 +65,6 @@ | |||
65 | #define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) | 65 | #define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) |
66 | #define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) | 66 | #define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) |
67 | #endif | 67 | #endif |
68 | #define __HAVE_ARCH_PTE_SPECIAL | ||
69 | 68 | ||
70 | #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ | 69 | #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ |
71 | _PAGE_PKEY_BIT1 | \ | 70 | _PAGE_PKEY_BIT1 | \ |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ffc8c13c50e4..938dbcd46b97 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -114,13 +114,12 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
114 | 114 | ||
115 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | 115 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) |
116 | { | 116 | { |
117 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | 117 | virt_to_page(pgd)->pt_mm = mm; |
118 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
119 | } | 118 | } |
120 | 119 | ||
121 | struct mm_struct *pgd_page_get_mm(struct page *page) | 120 | struct mm_struct *pgd_page_get_mm(struct page *page) |
122 | { | 121 | { |
123 | return (struct mm_struct *)page->index; | 122 | return page->pt_mm; |
124 | } | 123 | } |
125 | 124 | ||
126 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | 125 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) |
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index ac3a31d433b2..635235759a0a 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig | |||
@@ -13,7 +13,7 @@ config ZRAM | |||
13 | It has several use cases, for example: /tmp storage, use as swap | 13 | It has several use cases, for example: /tmp storage, use as swap |
14 | disks and maybe many more. | 14 | disks and maybe many more. |
15 | 15 | ||
16 | See zram.txt for more information. | 16 | See Documentation/blockdev/zram.txt for more information. |
17 | 17 | ||
18 | config ZRAM_WRITEBACK | 18 | config ZRAM_WRITEBACK |
19 | bool "Write back incompressible page to backing device" | 19 | bool "Write back incompressible page to backing device" |
@@ -25,4 +25,14 @@ config ZRAM_WRITEBACK | |||
25 | For this feature, admin should set up backing device via | 25 | For this feature, admin should set up backing device via |
26 | /sys/block/zramX/backing_dev. | 26 | /sys/block/zramX/backing_dev. |
27 | 27 | ||
28 | See zram.txt for more infomration. | 28 | See Documentation/blockdev/zram.txt for more information. |
29 | |||
30 | config ZRAM_MEMORY_TRACKING | ||
31 | bool "Track zRam block status" | ||
32 | depends on ZRAM && DEBUG_FS | ||
33 | help | ||
34 | With this feature, admin can track the state of allocated blocks | ||
35 | of zRAM. Admin could see the information via | ||
36 | /sys/kernel/debug/zram/zramX/block_state. | ||
37 | |||
38 | See Documentation/blockdev/zram.txt for more information. | ||
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0f3fadd71230..da51293e7c03 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/err.h> | 31 | #include <linux/err.h> |
32 | #include <linux/idr.h> | 32 | #include <linux/idr.h> |
33 | #include <linux/sysfs.h> | 33 | #include <linux/sysfs.h> |
34 | #include <linux/debugfs.h> | ||
34 | #include <linux/cpuhotplug.h> | 35 | #include <linux/cpuhotplug.h> |
35 | 36 | ||
36 | #include "zram_drv.h" | 37 | #include "zram_drv.h" |
@@ -52,11 +53,28 @@ static size_t huge_class_size; | |||
52 | 53 | ||
53 | static void zram_free_page(struct zram *zram, size_t index); | 54 | static void zram_free_page(struct zram *zram, size_t index); |
54 | 55 | ||
56 | static void zram_slot_lock(struct zram *zram, u32 index) | ||
57 | { | ||
58 | bit_spin_lock(ZRAM_LOCK, &zram->table[index].value); | ||
59 | } | ||
60 | |||
61 | static void zram_slot_unlock(struct zram *zram, u32 index) | ||
62 | { | ||
63 | bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value); | ||
64 | } | ||
65 | |||
55 | static inline bool init_done(struct zram *zram) | 66 | static inline bool init_done(struct zram *zram) |
56 | { | 67 | { |
57 | return zram->disksize; | 68 | return zram->disksize; |
58 | } | 69 | } |
59 | 70 | ||
71 | static inline bool zram_allocated(struct zram *zram, u32 index) | ||
72 | { | ||
73 | |||
74 | return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) || | ||
75 | zram->table[index].handle; | ||
76 | } | ||
77 | |||
60 | static inline struct zram *dev_to_zram(struct device *dev) | 78 | static inline struct zram *dev_to_zram(struct device *dev) |
61 | { | 79 | { |
62 | return (struct zram *)dev_to_disk(dev)->private_data; | 80 | return (struct zram *)dev_to_disk(dev)->private_data; |
@@ -73,7 +91,7 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) | |||
73 | } | 91 | } |
74 | 92 | ||
75 | /* flag operations require table entry bit_spin_lock() being held */ | 93 | /* flag operations require table entry bit_spin_lock() being held */ |
76 | static int zram_test_flag(struct zram *zram, u32 index, | 94 | static bool zram_test_flag(struct zram *zram, u32 index, |
77 | enum zram_pageflags flag) | 95 | enum zram_pageflags flag) |
78 | { | 96 | { |
79 | return zram->table[index].value & BIT(flag); | 97 | return zram->table[index].value & BIT(flag); |
@@ -600,6 +618,114 @@ static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, | |||
600 | static void zram_wb_clear(struct zram *zram, u32 index) {} | 618 | static void zram_wb_clear(struct zram *zram, u32 index) {} |
601 | #endif | 619 | #endif |
602 | 620 | ||
621 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING | ||
622 | |||
623 | static struct dentry *zram_debugfs_root; | ||
624 | |||
625 | static void zram_debugfs_create(void) | ||
626 | { | ||
627 | zram_debugfs_root = debugfs_create_dir("zram", NULL); | ||
628 | } | ||
629 | |||
630 | static void zram_debugfs_destroy(void) | ||
631 | { | ||
632 | debugfs_remove_recursive(zram_debugfs_root); | ||
633 | } | ||
634 | |||
635 | static void zram_accessed(struct zram *zram, u32 index) | ||
636 | { | ||
637 | zram->table[index].ac_time = ktime_get_boottime(); | ||
638 | } | ||
639 | |||
640 | static void zram_reset_access(struct zram *zram, u32 index) | ||
641 | { | ||
642 | zram->table[index].ac_time = 0; | ||
643 | } | ||
644 | |||
645 | static ssize_t read_block_state(struct file *file, char __user *buf, | ||
646 | size_t count, loff_t *ppos) | ||
647 | { | ||
648 | char *kbuf; | ||
649 | ssize_t index, written = 0; | ||
650 | struct zram *zram = file->private_data; | ||
651 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | ||
652 | struct timespec64 ts; | ||
653 | |||
654 | kbuf = kvmalloc(count, GFP_KERNEL); | ||
655 | if (!kbuf) | ||
656 | return -ENOMEM; | ||
657 | |||
658 | down_read(&zram->init_lock); | ||
659 | if (!init_done(zram)) { | ||
660 | up_read(&zram->init_lock); | ||
661 | kvfree(kbuf); | ||
662 | return -EINVAL; | ||
663 | } | ||
664 | |||
665 | for (index = *ppos; index < nr_pages; index++) { | ||
666 | int copied; | ||
667 | |||
668 | zram_slot_lock(zram, index); | ||
669 | if (!zram_allocated(zram, index)) | ||
670 | goto next; | ||
671 | |||
672 | ts = ktime_to_timespec64(zram->table[index].ac_time); | ||
673 | copied = snprintf(kbuf + written, count, | ||
674 | "%12zd %12lld.%06lu %c%c%c\n", | ||
675 | index, (s64)ts.tv_sec, | ||
676 | ts.tv_nsec / NSEC_PER_USEC, | ||
677 | zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', | ||
678 | zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', | ||
679 | zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.'); | ||
680 | |||
681 | if (count < copied) { | ||
682 | zram_slot_unlock(zram, index); | ||
683 | break; | ||
684 | } | ||
685 | written += copied; | ||
686 | count -= copied; | ||
687 | next: | ||
688 | zram_slot_unlock(zram, index); | ||
689 | *ppos += 1; | ||
690 | } | ||
691 | |||
692 | up_read(&zram->init_lock); | ||
693 | if (copy_to_user(buf, kbuf, written)) | ||
694 | written = -EFAULT; | ||
695 | kvfree(kbuf); | ||
696 | |||
697 | return written; | ||
698 | } | ||
699 | |||
700 | static const struct file_operations proc_zram_block_state_op = { | ||
701 | .open = simple_open, | ||
702 | .read = read_block_state, | ||
703 | .llseek = default_llseek, | ||
704 | }; | ||
705 | |||
706 | static void zram_debugfs_register(struct zram *zram) | ||
707 | { | ||
708 | if (!zram_debugfs_root) | ||
709 | return; | ||
710 | |||
711 | zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, | ||
712 | zram_debugfs_root); | ||
713 | debugfs_create_file("block_state", 0400, zram->debugfs_dir, | ||
714 | zram, &proc_zram_block_state_op); | ||
715 | } | ||
716 | |||
717 | static void zram_debugfs_unregister(struct zram *zram) | ||
718 | { | ||
719 | debugfs_remove_recursive(zram->debugfs_dir); | ||
720 | } | ||
721 | #else | ||
722 | static void zram_debugfs_create(void) {}; | ||
723 | static void zram_debugfs_destroy(void) {}; | ||
724 | static void zram_accessed(struct zram *zram, u32 index) {}; | ||
725 | static void zram_reset_access(struct zram *zram, u32 index) {}; | ||
726 | static void zram_debugfs_register(struct zram *zram) {}; | ||
727 | static void zram_debugfs_unregister(struct zram *zram) {}; | ||
728 | #endif | ||
603 | 729 | ||
604 | /* | 730 | /* |
605 | * We switched to per-cpu streams and this attr is not needed anymore. | 731 | * We switched to per-cpu streams and this attr is not needed anymore. |
@@ -719,14 +845,15 @@ static ssize_t mm_stat_show(struct device *dev, | |||
719 | max_used = atomic_long_read(&zram->stats.max_used_pages); | 845 | max_used = atomic_long_read(&zram->stats.max_used_pages); |
720 | 846 | ||
721 | ret = scnprintf(buf, PAGE_SIZE, | 847 | ret = scnprintf(buf, PAGE_SIZE, |
722 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", | 848 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", |
723 | orig_size << PAGE_SHIFT, | 849 | orig_size << PAGE_SHIFT, |
724 | (u64)atomic64_read(&zram->stats.compr_data_size), | 850 | (u64)atomic64_read(&zram->stats.compr_data_size), |
725 | mem_used << PAGE_SHIFT, | 851 | mem_used << PAGE_SHIFT, |
726 | zram->limit_pages << PAGE_SHIFT, | 852 | zram->limit_pages << PAGE_SHIFT, |
727 | max_used << PAGE_SHIFT, | 853 | max_used << PAGE_SHIFT, |
728 | (u64)atomic64_read(&zram->stats.same_pages), | 854 | (u64)atomic64_read(&zram->stats.same_pages), |
729 | pool_stats.pages_compacted); | 855 | pool_stats.pages_compacted, |
856 | (u64)atomic64_read(&zram->stats.huge_pages)); | ||
730 | up_read(&zram->init_lock); | 857 | up_read(&zram->init_lock); |
731 | 858 | ||
732 | return ret; | 859 | return ret; |
@@ -753,16 +880,6 @@ static DEVICE_ATTR_RO(io_stat); | |||
753 | static DEVICE_ATTR_RO(mm_stat); | 880 | static DEVICE_ATTR_RO(mm_stat); |
754 | static DEVICE_ATTR_RO(debug_stat); | 881 | static DEVICE_ATTR_RO(debug_stat); |
755 | 882 | ||
756 | static void zram_slot_lock(struct zram *zram, u32 index) | ||
757 | { | ||
758 | bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); | ||
759 | } | ||
760 | |||
761 | static void zram_slot_unlock(struct zram *zram, u32 index) | ||
762 | { | ||
763 | bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); | ||
764 | } | ||
765 | |||
766 | static void zram_meta_free(struct zram *zram, u64 disksize) | 883 | static void zram_meta_free(struct zram *zram, u64 disksize) |
767 | { | 884 | { |
768 | size_t num_pages = disksize >> PAGE_SHIFT; | 885 | size_t num_pages = disksize >> PAGE_SHIFT; |
@@ -805,6 +922,13 @@ static void zram_free_page(struct zram *zram, size_t index) | |||
805 | { | 922 | { |
806 | unsigned long handle; | 923 | unsigned long handle; |
807 | 924 | ||
925 | zram_reset_access(zram, index); | ||
926 | |||
927 | if (zram_test_flag(zram, index, ZRAM_HUGE)) { | ||
928 | zram_clear_flag(zram, index, ZRAM_HUGE); | ||
929 | atomic64_dec(&zram->stats.huge_pages); | ||
930 | } | ||
931 | |||
808 | if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { | 932 | if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { |
809 | zram_wb_clear(zram, index); | 933 | zram_wb_clear(zram, index); |
810 | atomic64_dec(&zram->stats.pages_stored); | 934 | atomic64_dec(&zram->stats.pages_stored); |
@@ -973,6 +1097,7 @@ compress_again: | |||
973 | } | 1097 | } |
974 | 1098 | ||
975 | if (unlikely(comp_len >= huge_class_size)) { | 1099 | if (unlikely(comp_len >= huge_class_size)) { |
1100 | comp_len = PAGE_SIZE; | ||
976 | if (zram_wb_enabled(zram) && allow_wb) { | 1101 | if (zram_wb_enabled(zram) && allow_wb) { |
977 | zcomp_stream_put(zram->comp); | 1102 | zcomp_stream_put(zram->comp); |
978 | ret = write_to_bdev(zram, bvec, index, bio, &element); | 1103 | ret = write_to_bdev(zram, bvec, index, bio, &element); |
@@ -984,7 +1109,6 @@ compress_again: | |||
984 | allow_wb = false; | 1109 | allow_wb = false; |
985 | goto compress_again; | 1110 | goto compress_again; |
986 | } | 1111 | } |
987 | comp_len = PAGE_SIZE; | ||
988 | } | 1112 | } |
989 | 1113 | ||
990 | /* | 1114 | /* |
@@ -1046,6 +1170,11 @@ out: | |||
1046 | zram_slot_lock(zram, index); | 1170 | zram_slot_lock(zram, index); |
1047 | zram_free_page(zram, index); | 1171 | zram_free_page(zram, index); |
1048 | 1172 | ||
1173 | if (comp_len == PAGE_SIZE) { | ||
1174 | zram_set_flag(zram, index, ZRAM_HUGE); | ||
1175 | atomic64_inc(&zram->stats.huge_pages); | ||
1176 | } | ||
1177 | |||
1049 | if (flags) { | 1178 | if (flags) { |
1050 | zram_set_flag(zram, index, flags); | 1179 | zram_set_flag(zram, index, flags); |
1051 | zram_set_element(zram, index, element); | 1180 | zram_set_element(zram, index, element); |
@@ -1166,6 +1295,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, | |||
1166 | 1295 | ||
1167 | generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); | 1296 | generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); |
1168 | 1297 | ||
1298 | zram_slot_lock(zram, index); | ||
1299 | zram_accessed(zram, index); | ||
1300 | zram_slot_unlock(zram, index); | ||
1301 | |||
1169 | if (unlikely(ret < 0)) { | 1302 | if (unlikely(ret < 0)) { |
1170 | if (!is_write) | 1303 | if (!is_write) |
1171 | atomic64_inc(&zram->stats.failed_reads); | 1304 | atomic64_inc(&zram->stats.failed_reads); |
@@ -1577,6 +1710,7 @@ static int zram_add(void) | |||
1577 | } | 1710 | } |
1578 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); | 1711 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
1579 | 1712 | ||
1713 | zram_debugfs_register(zram); | ||
1580 | pr_info("Added device: %s\n", zram->disk->disk_name); | 1714 | pr_info("Added device: %s\n", zram->disk->disk_name); |
1581 | return device_id; | 1715 | return device_id; |
1582 | 1716 | ||
@@ -1610,6 +1744,7 @@ static int zram_remove(struct zram *zram) | |||
1610 | zram->claim = true; | 1744 | zram->claim = true; |
1611 | mutex_unlock(&bdev->bd_mutex); | 1745 | mutex_unlock(&bdev->bd_mutex); |
1612 | 1746 | ||
1747 | zram_debugfs_unregister(zram); | ||
1613 | /* | 1748 | /* |
1614 | * Remove sysfs first, so no one will perform a disksize | 1749 | * Remove sysfs first, so no one will perform a disksize |
1615 | * store while we destroy the devices. This also helps during | 1750 | * store while we destroy the devices. This also helps during |
@@ -1712,6 +1847,7 @@ static void destroy_devices(void) | |||
1712 | { | 1847 | { |
1713 | class_unregister(&zram_control_class); | 1848 | class_unregister(&zram_control_class); |
1714 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); | 1849 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
1850 | zram_debugfs_destroy(); | ||
1715 | idr_destroy(&zram_index_idr); | 1851 | idr_destroy(&zram_index_idr); |
1716 | unregister_blkdev(zram_major, "zram"); | 1852 | unregister_blkdev(zram_major, "zram"); |
1717 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); | 1853 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
@@ -1733,6 +1869,7 @@ static int __init zram_init(void) | |||
1733 | return ret; | 1869 | return ret; |
1734 | } | 1870 | } |
1735 | 1871 | ||
1872 | zram_debugfs_create(); | ||
1736 | zram_major = register_blkdev(0, "zram"); | 1873 | zram_major = register_blkdev(0, "zram"); |
1737 | if (zram_major <= 0) { | 1874 | if (zram_major <= 0) { |
1738 | pr_err("Unable to get major number\n"); | 1875 | pr_err("Unable to get major number\n"); |
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 008861220723..72c8584b6dff 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h | |||
@@ -43,10 +43,11 @@ | |||
43 | 43 | ||
44 | /* Flags for zram pages (table[page_no].value) */ | 44 | /* Flags for zram pages (table[page_no].value) */ |
45 | enum zram_pageflags { | 45 | enum zram_pageflags { |
46 | /* Page consists the same element */ | 46 | /* zram slot is locked */ |
47 | ZRAM_SAME = ZRAM_FLAG_SHIFT, | 47 | ZRAM_LOCK = ZRAM_FLAG_SHIFT, |
48 | ZRAM_ACCESS, /* page is now accessed */ | 48 | ZRAM_SAME, /* Page consists the same element */ |
49 | ZRAM_WB, /* page is stored on backing_device */ | 49 | ZRAM_WB, /* page is stored on backing_device */ |
50 | ZRAM_HUGE, /* Incompressible page */ | ||
50 | 51 | ||
51 | __NR_ZRAM_PAGEFLAGS, | 52 | __NR_ZRAM_PAGEFLAGS, |
52 | }; | 53 | }; |
@@ -60,6 +61,9 @@ struct zram_table_entry { | |||
60 | unsigned long element; | 61 | unsigned long element; |
61 | }; | 62 | }; |
62 | unsigned long value; | 63 | unsigned long value; |
64 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING | ||
65 | ktime_t ac_time; | ||
66 | #endif | ||
63 | }; | 67 | }; |
64 | 68 | ||
65 | struct zram_stats { | 69 | struct zram_stats { |
@@ -71,6 +75,7 @@ struct zram_stats { | |||
71 | atomic64_t invalid_io; /* non-page-aligned I/O requests */ | 75 | atomic64_t invalid_io; /* non-page-aligned I/O requests */ |
72 | atomic64_t notify_free; /* no. of swap slot free notifications */ | 76 | atomic64_t notify_free; /* no. of swap slot free notifications */ |
73 | atomic64_t same_pages; /* no. of same element filled pages */ | 77 | atomic64_t same_pages; /* no. of same element filled pages */ |
78 | atomic64_t huge_pages; /* no. of huge pages */ | ||
74 | atomic64_t pages_stored; /* no. of pages currently stored */ | 79 | atomic64_t pages_stored; /* no. of pages currently stored */ |
75 | atomic_long_t max_used_pages; /* no. of maximum pages stored */ | 80 | atomic_long_t max_used_pages; /* no. of maximum pages stored */ |
76 | atomic64_t writestall; /* no. of write slow paths */ | 81 | atomic64_t writestall; /* no. of write slow paths */ |
@@ -107,5 +112,8 @@ struct zram { | |||
107 | unsigned long nr_pages; | 112 | unsigned long nr_pages; |
108 | spinlock_t bitmap_lock; | 113 | spinlock_t bitmap_lock; |
109 | #endif | 114 | #endif |
115 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING | ||
116 | struct dentry *debugfs_dir; | ||
117 | #endif | ||
110 | }; | 118 | }; |
111 | #endif | 119 | #endif |
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index e622f0f10502..0429c8ee58f1 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -210,12 +210,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
210 | p9_debug(P9_DEBUG_ERROR, | 210 | p9_debug(P9_DEBUG_ERROR, |
211 | "integer field, but no integer?\n"); | 211 | "integer field, but no integer?\n"); |
212 | ret = r; | 212 | ret = r; |
213 | continue; | 213 | } else { |
214 | } | 214 | v9ses->debug = option; |
215 | v9ses->debug = option; | ||
216 | #ifdef CONFIG_NET_9P_DEBUG | 215 | #ifdef CONFIG_NET_9P_DEBUG |
217 | p9_debug_level = option; | 216 | p9_debug_level = option; |
218 | #endif | 217 | #endif |
218 | } | ||
219 | break; | 219 | break; |
220 | 220 | ||
221 | case Opt_dfltuid: | 221 | case Opt_dfltuid: |
@@ -231,7 +231,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
231 | p9_debug(P9_DEBUG_ERROR, | 231 | p9_debug(P9_DEBUG_ERROR, |
232 | "uid field, but not a uid?\n"); | 232 | "uid field, but not a uid?\n"); |
233 | ret = -EINVAL; | 233 | ret = -EINVAL; |
234 | continue; | ||
235 | } | 234 | } |
236 | break; | 235 | break; |
237 | case Opt_dfltgid: | 236 | case Opt_dfltgid: |
@@ -247,7 +246,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
247 | p9_debug(P9_DEBUG_ERROR, | 246 | p9_debug(P9_DEBUG_ERROR, |
248 | "gid field, but not a gid?\n"); | 247 | "gid field, but not a gid?\n"); |
249 | ret = -EINVAL; | 248 | ret = -EINVAL; |
250 | continue; | ||
251 | } | 249 | } |
252 | break; | 250 | break; |
253 | case Opt_afid: | 251 | case Opt_afid: |
@@ -256,9 +254,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
256 | p9_debug(P9_DEBUG_ERROR, | 254 | p9_debug(P9_DEBUG_ERROR, |
257 | "integer field, but no integer?\n"); | 255 | "integer field, but no integer?\n"); |
258 | ret = r; | 256 | ret = r; |
259 | continue; | 257 | } else { |
258 | v9ses->afid = option; | ||
260 | } | 259 | } |
261 | v9ses->afid = option; | ||
262 | break; | 260 | break; |
263 | case Opt_uname: | 261 | case Opt_uname: |
264 | kfree(v9ses->uname); | 262 | kfree(v9ses->uname); |
@@ -306,13 +304,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
306 | "problem allocating copy of cache arg\n"); | 304 | "problem allocating copy of cache arg\n"); |
307 | goto free_and_return; | 305 | goto free_and_return; |
308 | } | 306 | } |
309 | ret = get_cache_mode(s); | 307 | r = get_cache_mode(s); |
310 | if (ret == -EINVAL) { | 308 | if (r < 0) |
311 | kfree(s); | 309 | ret = r; |
312 | goto free_and_return; | 310 | else |
313 | } | 311 | v9ses->cache = r; |
314 | 312 | ||
315 | v9ses->cache = ret; | ||
316 | kfree(s); | 313 | kfree(s); |
317 | break; | 314 | break; |
318 | 315 | ||
@@ -341,14 +338,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
341 | pr_info("Unknown access argument %s\n", | 338 | pr_info("Unknown access argument %s\n", |
342 | s); | 339 | s); |
343 | kfree(s); | 340 | kfree(s); |
344 | goto free_and_return; | 341 | continue; |
345 | } | 342 | } |
346 | v9ses->uid = make_kuid(current_user_ns(), uid); | 343 | v9ses->uid = make_kuid(current_user_ns(), uid); |
347 | if (!uid_valid(v9ses->uid)) { | 344 | if (!uid_valid(v9ses->uid)) { |
348 | ret = -EINVAL; | 345 | ret = -EINVAL; |
349 | pr_info("Uknown uid %s\n", s); | 346 | pr_info("Uknown uid %s\n", s); |
350 | kfree(s); | ||
351 | goto free_and_return; | ||
352 | } | 347 | } |
353 | } | 348 | } |
354 | 349 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index ac4ac908f001..40cdae75e3b4 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -108,6 +108,7 @@ source "fs/notify/Kconfig" | |||
108 | 108 | ||
109 | source "fs/quota/Kconfig" | 109 | source "fs/quota/Kconfig" |
110 | 110 | ||
111 | source "fs/autofs/Kconfig" | ||
111 | source "fs/autofs4/Kconfig" | 112 | source "fs/autofs4/Kconfig" |
112 | source "fs/fuse/Kconfig" | 113 | source "fs/fuse/Kconfig" |
113 | source "fs/overlayfs/Kconfig" | 114 | source "fs/overlayfs/Kconfig" |
@@ -203,6 +204,9 @@ config HUGETLBFS | |||
203 | config HUGETLB_PAGE | 204 | config HUGETLB_PAGE |
204 | def_bool HUGETLBFS | 205 | def_bool HUGETLBFS |
205 | 206 | ||
207 | config MEMFD_CREATE | ||
208 | def_bool TMPFS || HUGETLBFS | ||
209 | |||
206 | config ARCH_HAS_GIGANTIC_PAGE | 210 | config ARCH_HAS_GIGANTIC_PAGE |
207 | bool | 211 | bool |
208 | 212 | ||
diff --git a/fs/Makefile b/fs/Makefile index c9375fd2c8c4..2e005525cc19 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -102,6 +102,7 @@ obj-$(CONFIG_AFFS_FS) += affs/ | |||
102 | obj-$(CONFIG_ROMFS_FS) += romfs/ | 102 | obj-$(CONFIG_ROMFS_FS) += romfs/ |
103 | obj-$(CONFIG_QNX4FS_FS) += qnx4/ | 103 | obj-$(CONFIG_QNX4FS_FS) += qnx4/ |
104 | obj-$(CONFIG_QNX6FS_FS) += qnx6/ | 104 | obj-$(CONFIG_QNX6FS_FS) += qnx6/ |
105 | obj-$(CONFIG_AUTOFS_FS) += autofs/ | ||
105 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 106 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
106 | obj-$(CONFIG_ADFS_FS) += adfs/ | 107 | obj-$(CONFIG_ADFS_FS) += adfs/ |
107 | obj-$(CONFIG_FUSE_FS) += fuse/ | 108 | obj-$(CONFIG_FUSE_FS) += fuse/ |
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig new file mode 100644 index 000000000000..6a2064eb3b27 --- /dev/null +++ b/fs/autofs/Kconfig | |||
@@ -0,0 +1,20 @@ | |||
1 | config AUTOFS_FS | ||
2 | tristate "Kernel automounter support (supports v3, v4 and v5)" | ||
3 | default n | ||
4 | help | ||
5 | The automounter is a tool to automatically mount remote file systems | ||
6 | on demand. This implementation is partially kernel-based to reduce | ||
7 | overhead in the already-mounted case; this is unlike the BSD | ||
8 | automounter (amd), which is a pure user space daemon. | ||
9 | |||
10 | To use the automounter you need the user-space tools from | ||
11 | <https://www.kernel.org/pub/linux/daemons/autofs/>; you also want | ||
12 | to answer Y to "NFS file system support", below. | ||
13 | |||
14 | To compile this support as a module, choose M here: the module will be | ||
15 | called autofs. | ||
16 | |||
17 | If you are not a part of a fairly large, distributed network or | ||
18 | don't have a laptop which needs to dynamically reconfigure to the | ||
19 | local network, you probably do not need an automounter, and can say | ||
20 | N here. | ||
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile new file mode 100644 index 000000000000..43fedde15c26 --- /dev/null +++ b/fs/autofs/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the linux autofs-filesystem routines. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_AUTOFS_FS) += autofs.o | ||
6 | |||
7 | autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o | ||
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs/autofs_i.h index 4737615f0eaa..9400a9f6318a 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs/autofs_i.h | |||
@@ -9,7 +9,7 @@ | |||
9 | 9 | ||
10 | /* Internal header file for autofs */ | 10 | /* Internal header file for autofs */ |
11 | 11 | ||
12 | #include <linux/auto_fs4.h> | 12 | #include <linux/auto_fs.h> |
13 | #include <linux/auto_dev-ioctl.h> | 13 | #include <linux/auto_dev-ioctl.h> |
14 | 14 | ||
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/completion.h> | 27 | #include <linux/completion.h> |
28 | #include <asm/current.h> | 28 | #include <linux/file.h> |
29 | 29 | ||
30 | /* This is the range of ioctl() numbers we claim as ours */ | 30 | /* This is the range of ioctl() numbers we claim as ours */ |
31 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY | 31 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY |
@@ -122,44 +122,44 @@ struct autofs_sb_info { | |||
122 | struct rcu_head rcu; | 122 | struct rcu_head rcu; |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) | 125 | static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) |
126 | { | 126 | { |
127 | return (struct autofs_sb_info *)(sb->s_fs_info); | 127 | return (struct autofs_sb_info *)(sb->s_fs_info); |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) | 130 | static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) |
131 | { | 131 | { |
132 | return (struct autofs_info *)(dentry->d_fsdata); | 132 | return (struct autofs_info *)(dentry->d_fsdata); |
133 | } | 133 | } |
134 | 134 | ||
135 | /* autofs4_oz_mode(): do we see the man behind the curtain? (The | 135 | /* autofs_oz_mode(): do we see the man behind the curtain? (The |
136 | * processes which do manipulations for us in user space sees the raw | 136 | * processes which do manipulations for us in user space sees the raw |
137 | * filesystem without "magic".) | 137 | * filesystem without "magic".) |
138 | */ | 138 | */ |
139 | static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) | 139 | static inline int autofs_oz_mode(struct autofs_sb_info *sbi) |
140 | { | 140 | { |
141 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; | 141 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; |
142 | } | 142 | } |
143 | 143 | ||
144 | struct inode *autofs4_get_inode(struct super_block *, umode_t); | 144 | struct inode *autofs_get_inode(struct super_block *, umode_t); |
145 | void autofs4_free_ino(struct autofs_info *); | 145 | void autofs_free_ino(struct autofs_info *); |
146 | 146 | ||
147 | /* Expiration */ | 147 | /* Expiration */ |
148 | int is_autofs4_dentry(struct dentry *); | 148 | int is_autofs_dentry(struct dentry *); |
149 | int autofs4_expire_wait(const struct path *path, int rcu_walk); | 149 | int autofs_expire_wait(const struct path *path, int rcu_walk); |
150 | int autofs4_expire_run(struct super_block *, struct vfsmount *, | 150 | int autofs_expire_run(struct super_block *, struct vfsmount *, |
151 | struct autofs_sb_info *, | 151 | struct autofs_sb_info *, |
152 | struct autofs_packet_expire __user *); | 152 | struct autofs_packet_expire __user *); |
153 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 153 | int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
154 | struct autofs_sb_info *sbi, int when); | 154 | struct autofs_sb_info *sbi, int when); |
155 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, | 155 | int autofs_expire_multi(struct super_block *, struct vfsmount *, |
156 | struct autofs_sb_info *, int __user *); | 156 | struct autofs_sb_info *, int __user *); |
157 | struct dentry *autofs4_expire_direct(struct super_block *sb, | 157 | struct dentry *autofs_expire_direct(struct super_block *sb, |
158 | struct vfsmount *mnt, | 158 | struct vfsmount *mnt, |
159 | struct autofs_sb_info *sbi, int how); | 159 | struct autofs_sb_info *sbi, int how); |
160 | struct dentry *autofs4_expire_indirect(struct super_block *sb, | 160 | struct dentry *autofs_expire_indirect(struct super_block *sb, |
161 | struct vfsmount *mnt, | 161 | struct vfsmount *mnt, |
162 | struct autofs_sb_info *sbi, int how); | 162 | struct autofs_sb_info *sbi, int how); |
163 | 163 | ||
164 | /* Device node initialization */ | 164 | /* Device node initialization */ |
165 | 165 | ||
@@ -168,11 +168,11 @@ void autofs_dev_ioctl_exit(void); | |||
168 | 168 | ||
169 | /* Operations structures */ | 169 | /* Operations structures */ |
170 | 170 | ||
171 | extern const struct inode_operations autofs4_symlink_inode_operations; | 171 | extern const struct inode_operations autofs_symlink_inode_operations; |
172 | extern const struct inode_operations autofs4_dir_inode_operations; | 172 | extern const struct inode_operations autofs_dir_inode_operations; |
173 | extern const struct file_operations autofs4_dir_operations; | 173 | extern const struct file_operations autofs_dir_operations; |
174 | extern const struct file_operations autofs4_root_operations; | 174 | extern const struct file_operations autofs_root_operations; |
175 | extern const struct dentry_operations autofs4_dentry_operations; | 175 | extern const struct dentry_operations autofs_dentry_operations; |
176 | 176 | ||
177 | /* VFS automount flags management functions */ | 177 | /* VFS automount flags management functions */ |
178 | static inline void __managed_dentry_set_managed(struct dentry *dentry) | 178 | static inline void __managed_dentry_set_managed(struct dentry *dentry) |
@@ -201,9 +201,9 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry) | |||
201 | 201 | ||
202 | /* Initializing function */ | 202 | /* Initializing function */ |
203 | 203 | ||
204 | int autofs4_fill_super(struct super_block *, void *, int); | 204 | int autofs_fill_super(struct super_block *, void *, int); |
205 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); | 205 | struct autofs_info *autofs_new_ino(struct autofs_sb_info *); |
206 | void autofs4_clean_ino(struct autofs_info *); | 206 | void autofs_clean_ino(struct autofs_info *); |
207 | 207 | ||
208 | static inline int autofs_prepare_pipe(struct file *pipe) | 208 | static inline int autofs_prepare_pipe(struct file *pipe) |
209 | { | 209 | { |
@@ -218,25 +218,25 @@ static inline int autofs_prepare_pipe(struct file *pipe) | |||
218 | 218 | ||
219 | /* Queue management functions */ | 219 | /* Queue management functions */ |
220 | 220 | ||
221 | int autofs4_wait(struct autofs_sb_info *, | 221 | int autofs_wait(struct autofs_sb_info *, |
222 | const struct path *, enum autofs_notify); | 222 | const struct path *, enum autofs_notify); |
223 | int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); | 223 | int autofs_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); |
224 | void autofs4_catatonic_mode(struct autofs_sb_info *); | 224 | void autofs_catatonic_mode(struct autofs_sb_info *); |
225 | 225 | ||
226 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) | 226 | static inline u32 autofs_get_dev(struct autofs_sb_info *sbi) |
227 | { | 227 | { |
228 | return new_encode_dev(sbi->sb->s_dev); | 228 | return new_encode_dev(sbi->sb->s_dev); |
229 | } | 229 | } |
230 | 230 | ||
231 | static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) | 231 | static inline u64 autofs_get_ino(struct autofs_sb_info *sbi) |
232 | { | 232 | { |
233 | return d_inode(sbi->sb->s_root)->i_ino; | 233 | return d_inode(sbi->sb->s_root)->i_ino; |
234 | } | 234 | } |
235 | 235 | ||
236 | static inline void __autofs4_add_expiring(struct dentry *dentry) | 236 | static inline void __autofs_add_expiring(struct dentry *dentry) |
237 | { | 237 | { |
238 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 238 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
239 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 239 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
240 | 240 | ||
241 | if (ino) { | 241 | if (ino) { |
242 | if (list_empty(&ino->expiring)) | 242 | if (list_empty(&ino->expiring)) |
@@ -244,10 +244,10 @@ static inline void __autofs4_add_expiring(struct dentry *dentry) | |||
244 | } | 244 | } |
245 | } | 245 | } |
246 | 246 | ||
247 | static inline void autofs4_add_expiring(struct dentry *dentry) | 247 | static inline void autofs_add_expiring(struct dentry *dentry) |
248 | { | 248 | { |
249 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 249 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
250 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 250 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
251 | 251 | ||
252 | if (ino) { | 252 | if (ino) { |
253 | spin_lock(&sbi->lookup_lock); | 253 | spin_lock(&sbi->lookup_lock); |
@@ -257,10 +257,10 @@ static inline void autofs4_add_expiring(struct dentry *dentry) | |||
257 | } | 257 | } |
258 | } | 258 | } |
259 | 259 | ||
260 | static inline void autofs4_del_expiring(struct dentry *dentry) | 260 | static inline void autofs_del_expiring(struct dentry *dentry) |
261 | { | 261 | { |
262 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 262 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
263 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 263 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
264 | 264 | ||
265 | if (ino) { | 265 | if (ino) { |
266 | spin_lock(&sbi->lookup_lock); | 266 | spin_lock(&sbi->lookup_lock); |
@@ -270,4 +270,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry) | |||
270 | } | 270 | } |
271 | } | 271 | } |
272 | 272 | ||
273 | void autofs4_kill_sb(struct super_block *); | 273 | void autofs_kill_sb(struct super_block *); |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 26f6b4f41ce6..ea4ca1445ab7 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c | |||
@@ -7,23 +7,10 @@ | |||
7 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/vmalloc.h> | ||
12 | #include <linux/miscdevice.h> | 10 | #include <linux/miscdevice.h> |
13 | #include <linux/init.h> | ||
14 | #include <linux/wait.h> | ||
15 | #include <linux/namei.h> | ||
16 | #include <linux/fcntl.h> | ||
17 | #include <linux/file.h> | ||
18 | #include <linux/fdtable.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/cred.h> | ||
21 | #include <linux/compat.h> | 11 | #include <linux/compat.h> |
22 | #include <linux/syscalls.h> | 12 | #include <linux/syscalls.h> |
23 | #include <linux/magic.h> | 13 | #include <linux/magic.h> |
24 | #include <linux/dcache.h> | ||
25 | #include <linux/uaccess.h> | ||
26 | #include <linux/slab.h> | ||
27 | 14 | ||
28 | #include "autofs_i.h" | 15 | #include "autofs_i.h" |
29 | 16 | ||
@@ -166,7 +153,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) | |||
166 | 153 | ||
167 | if (f) { | 154 | if (f) { |
168 | inode = file_inode(f); | 155 | inode = file_inode(f); |
169 | sbi = autofs4_sbi(inode->i_sb); | 156 | sbi = autofs_sbi(inode->i_sb); |
170 | } | 157 | } |
171 | return sbi; | 158 | return sbi; |
172 | } | 159 | } |
@@ -236,7 +223,7 @@ static int test_by_dev(const struct path *path, void *p) | |||
236 | 223 | ||
237 | static int test_by_type(const struct path *path, void *p) | 224 | static int test_by_type(const struct path *path, void *p) |
238 | { | 225 | { |
239 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); | 226 | struct autofs_info *ino = autofs_dentry_ino(path->dentry); |
240 | 227 | ||
241 | return ino && ino->sbi->type & *(unsigned *)p; | 228 | return ino && ino->sbi->type & *(unsigned *)p; |
242 | } | 229 | } |
@@ -324,7 +311,7 @@ static int autofs_dev_ioctl_ready(struct file *fp, | |||
324 | autofs_wqt_t token; | 311 | autofs_wqt_t token; |
325 | 312 | ||
326 | token = (autofs_wqt_t) param->ready.token; | 313 | token = (autofs_wqt_t) param->ready.token; |
327 | return autofs4_wait_release(sbi, token, 0); | 314 | return autofs_wait_release(sbi, token, 0); |
328 | } | 315 | } |
329 | 316 | ||
330 | /* | 317 | /* |
@@ -340,7 +327,7 @@ static int autofs_dev_ioctl_fail(struct file *fp, | |||
340 | 327 | ||
341 | token = (autofs_wqt_t) param->fail.token; | 328 | token = (autofs_wqt_t) param->fail.token; |
342 | status = param->fail.status < 0 ? param->fail.status : -ENOENT; | 329 | status = param->fail.status < 0 ? param->fail.status : -ENOENT; |
343 | return autofs4_wait_release(sbi, token, status); | 330 | return autofs_wait_release(sbi, token, status); |
344 | } | 331 | } |
345 | 332 | ||
346 | /* | 333 | /* |
@@ -412,7 +399,7 @@ static int autofs_dev_ioctl_catatonic(struct file *fp, | |||
412 | struct autofs_sb_info *sbi, | 399 | struct autofs_sb_info *sbi, |
413 | struct autofs_dev_ioctl *param) | 400 | struct autofs_dev_ioctl *param) |
414 | { | 401 | { |
415 | autofs4_catatonic_mode(sbi); | 402 | autofs_catatonic_mode(sbi); |
416 | return 0; | 403 | return 0; |
417 | } | 404 | } |
418 | 405 | ||
@@ -459,10 +446,10 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
459 | if (err) | 446 | if (err) |
460 | goto out; | 447 | goto out; |
461 | 448 | ||
462 | ino = autofs4_dentry_ino(path.dentry); | 449 | ino = autofs_dentry_ino(path.dentry); |
463 | if (ino) { | 450 | if (ino) { |
464 | err = 0; | 451 | err = 0; |
465 | autofs4_expire_wait(&path, 0); | 452 | autofs_expire_wait(&path, 0); |
466 | spin_lock(&sbi->fs_lock); | 453 | spin_lock(&sbi->fs_lock); |
467 | param->requester.uid = | 454 | param->requester.uid = |
468 | from_kuid_munged(current_user_ns(), ino->uid); | 455 | from_kuid_munged(current_user_ns(), ino->uid); |
@@ -489,7 +476,7 @@ static int autofs_dev_ioctl_expire(struct file *fp, | |||
489 | how = param->expire.how; | 476 | how = param->expire.how; |
490 | mnt = fp->f_path.mnt; | 477 | mnt = fp->f_path.mnt; |
491 | 478 | ||
492 | return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how); | 479 | return autofs_do_expire_multi(sbi->sb, mnt, sbi, how); |
493 | } | 480 | } |
494 | 481 | ||
495 | /* Check if autofs mount point is in use */ | 482 | /* Check if autofs mount point is in use */ |
@@ -686,7 +673,7 @@ static int _autofs_dev_ioctl(unsigned int command, | |||
686 | * Admin needs to be able to set the mount catatonic in | 673 | * Admin needs to be able to set the mount catatonic in |
687 | * order to be able to perform the re-open. | 674 | * order to be able to perform the re-open. |
688 | */ | 675 | */ |
689 | if (!autofs4_oz_mode(sbi) && | 676 | if (!autofs_oz_mode(sbi) && |
690 | cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { | 677 | cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { |
691 | err = -EACCES; | 678 | err = -EACCES; |
692 | fput(fp); | 679 | fput(fp); |
diff --git a/fs/autofs4/expire.c b/fs/autofs/expire.c index 57725d4a8c59..b332d3f6e730 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs/expire.c | |||
@@ -13,10 +13,10 @@ | |||
13 | static unsigned long now; | 13 | static unsigned long now; |
14 | 14 | ||
15 | /* Check if a dentry can be expired */ | 15 | /* Check if a dentry can be expired */ |
16 | static inline int autofs4_can_expire(struct dentry *dentry, | 16 | static inline int autofs_can_expire(struct dentry *dentry, |
17 | unsigned long timeout, int do_now) | 17 | unsigned long timeout, int do_now) |
18 | { | 18 | { |
19 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 19 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
20 | 20 | ||
21 | /* dentry in the process of being deleted */ | 21 | /* dentry in the process of being deleted */ |
22 | if (ino == NULL) | 22 | if (ino == NULL) |
@@ -31,7 +31,7 @@ static inline int autofs4_can_expire(struct dentry *dentry, | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /* Check a mount point for busyness */ | 33 | /* Check a mount point for busyness */ |
34 | static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | 34 | static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry) |
35 | { | 35 | { |
36 | struct dentry *top = dentry; | 36 | struct dentry *top = dentry; |
37 | struct path path = {.mnt = mnt, .dentry = dentry}; | 37 | struct path path = {.mnt = mnt, .dentry = dentry}; |
@@ -44,8 +44,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
44 | if (!follow_down_one(&path)) | 44 | if (!follow_down_one(&path)) |
45 | goto done; | 45 | goto done; |
46 | 46 | ||
47 | if (is_autofs4_dentry(path.dentry)) { | 47 | if (is_autofs_dentry(path.dentry)) { |
48 | struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); | 48 | struct autofs_sb_info *sbi = autofs_sbi(path.dentry->d_sb); |
49 | 49 | ||
50 | /* This is an autofs submount, we can't expire it */ | 50 | /* This is an autofs submount, we can't expire it */ |
51 | if (autofs_type_indirect(sbi->type)) | 51 | if (autofs_type_indirect(sbi->type)) |
@@ -56,7 +56,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
56 | if (!may_umount_tree(path.mnt)) { | 56 | if (!may_umount_tree(path.mnt)) { |
57 | struct autofs_info *ino; | 57 | struct autofs_info *ino; |
58 | 58 | ||
59 | ino = autofs4_dentry_ino(top); | 59 | ino = autofs_dentry_ino(top); |
60 | ino->last_used = jiffies; | 60 | ino->last_used = jiffies; |
61 | goto done; | 61 | goto done; |
62 | } | 62 | } |
@@ -74,7 +74,7 @@ done: | |||
74 | static struct dentry *get_next_positive_subdir(struct dentry *prev, | 74 | static struct dentry *get_next_positive_subdir(struct dentry *prev, |
75 | struct dentry *root) | 75 | struct dentry *root) |
76 | { | 76 | { |
77 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 77 | struct autofs_sb_info *sbi = autofs_sbi(root->d_sb); |
78 | struct list_head *next; | 78 | struct list_head *next; |
79 | struct dentry *q; | 79 | struct dentry *q; |
80 | 80 | ||
@@ -121,7 +121,7 @@ cont: | |||
121 | static struct dentry *get_next_positive_dentry(struct dentry *prev, | 121 | static struct dentry *get_next_positive_dentry(struct dentry *prev, |
122 | struct dentry *root) | 122 | struct dentry *root) |
123 | { | 123 | { |
124 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 124 | struct autofs_sb_info *sbi = autofs_sbi(root->d_sb); |
125 | struct list_head *next; | 125 | struct list_head *next; |
126 | struct dentry *p, *ret; | 126 | struct dentry *p, *ret; |
127 | 127 | ||
@@ -184,10 +184,10 @@ again: | |||
184 | * The tree is not busy iff no mountpoints are busy and there are no | 184 | * The tree is not busy iff no mountpoints are busy and there are no |
185 | * autofs submounts. | 185 | * autofs submounts. |
186 | */ | 186 | */ |
187 | static int autofs4_direct_busy(struct vfsmount *mnt, | 187 | static int autofs_direct_busy(struct vfsmount *mnt, |
188 | struct dentry *top, | 188 | struct dentry *top, |
189 | unsigned long timeout, | 189 | unsigned long timeout, |
190 | int do_now) | 190 | int do_now) |
191 | { | 191 | { |
192 | pr_debug("top %p %pd\n", top, top); | 192 | pr_debug("top %p %pd\n", top, top); |
193 | 193 | ||
@@ -195,14 +195,14 @@ static int autofs4_direct_busy(struct vfsmount *mnt, | |||
195 | if (!may_umount_tree(mnt)) { | 195 | if (!may_umount_tree(mnt)) { |
196 | struct autofs_info *ino; | 196 | struct autofs_info *ino; |
197 | 197 | ||
198 | ino = autofs4_dentry_ino(top); | 198 | ino = autofs_dentry_ino(top); |
199 | if (ino) | 199 | if (ino) |
200 | ino->last_used = jiffies; | 200 | ino->last_used = jiffies; |
201 | return 1; | 201 | return 1; |
202 | } | 202 | } |
203 | 203 | ||
204 | /* Timeout of a direct mount is determined by its top dentry */ | 204 | /* Timeout of a direct mount is determined by its top dentry */ |
205 | if (!autofs4_can_expire(top, timeout, do_now)) | 205 | if (!autofs_can_expire(top, timeout, do_now)) |
206 | return 1; | 206 | return 1; |
207 | 207 | ||
208 | return 0; | 208 | return 0; |
@@ -212,12 +212,12 @@ static int autofs4_direct_busy(struct vfsmount *mnt, | |||
212 | * Check a directory tree of mount points for busyness | 212 | * Check a directory tree of mount points for busyness |
213 | * The tree is not busy iff no mountpoints are busy | 213 | * The tree is not busy iff no mountpoints are busy |
214 | */ | 214 | */ |
215 | static int autofs4_tree_busy(struct vfsmount *mnt, | 215 | static int autofs_tree_busy(struct vfsmount *mnt, |
216 | struct dentry *top, | 216 | struct dentry *top, |
217 | unsigned long timeout, | 217 | unsigned long timeout, |
218 | int do_now) | 218 | int do_now) |
219 | { | 219 | { |
220 | struct autofs_info *top_ino = autofs4_dentry_ino(top); | 220 | struct autofs_info *top_ino = autofs_dentry_ino(top); |
221 | struct dentry *p; | 221 | struct dentry *p; |
222 | 222 | ||
223 | pr_debug("top %p %pd\n", top, top); | 223 | pr_debug("top %p %pd\n", top, top); |
@@ -237,13 +237,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
237 | * If the fs is busy update the expiry counter. | 237 | * If the fs is busy update the expiry counter. |
238 | */ | 238 | */ |
239 | if (d_mountpoint(p)) { | 239 | if (d_mountpoint(p)) { |
240 | if (autofs4_mount_busy(mnt, p)) { | 240 | if (autofs_mount_busy(mnt, p)) { |
241 | top_ino->last_used = jiffies; | 241 | top_ino->last_used = jiffies; |
242 | dput(p); | 242 | dput(p); |
243 | return 1; | 243 | return 1; |
244 | } | 244 | } |
245 | } else { | 245 | } else { |
246 | struct autofs_info *ino = autofs4_dentry_ino(p); | 246 | struct autofs_info *ino = autofs_dentry_ino(p); |
247 | unsigned int ino_count = atomic_read(&ino->count); | 247 | unsigned int ino_count = atomic_read(&ino->count); |
248 | 248 | ||
249 | /* allow for dget above and top is already dgot */ | 249 | /* allow for dget above and top is already dgot */ |
@@ -261,16 +261,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
261 | } | 261 | } |
262 | 262 | ||
263 | /* Timeout of a tree mount is ultimately determined by its top dentry */ | 263 | /* Timeout of a tree mount is ultimately determined by its top dentry */ |
264 | if (!autofs4_can_expire(top, timeout, do_now)) | 264 | if (!autofs_can_expire(top, timeout, do_now)) |
265 | return 1; | 265 | return 1; |
266 | 266 | ||
267 | return 0; | 267 | return 0; |
268 | } | 268 | } |
269 | 269 | ||
270 | static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, | 270 | static struct dentry *autofs_check_leaves(struct vfsmount *mnt, |
271 | struct dentry *parent, | 271 | struct dentry *parent, |
272 | unsigned long timeout, | 272 | unsigned long timeout, |
273 | int do_now) | 273 | int do_now) |
274 | { | 274 | { |
275 | struct dentry *p; | 275 | struct dentry *p; |
276 | 276 | ||
@@ -282,11 +282,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, | |||
282 | 282 | ||
283 | if (d_mountpoint(p)) { | 283 | if (d_mountpoint(p)) { |
284 | /* Can we umount this guy */ | 284 | /* Can we umount this guy */ |
285 | if (autofs4_mount_busy(mnt, p)) | 285 | if (autofs_mount_busy(mnt, p)) |
286 | continue; | 286 | continue; |
287 | 287 | ||
288 | /* Can we expire this guy */ | 288 | /* Can we expire this guy */ |
289 | if (autofs4_can_expire(p, timeout, do_now)) | 289 | if (autofs_can_expire(p, timeout, do_now)) |
290 | return p; | 290 | return p; |
291 | } | 291 | } |
292 | } | 292 | } |
@@ -294,10 +294,10 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, | |||
294 | } | 294 | } |
295 | 295 | ||
296 | /* Check if we can expire a direct mount (possibly a tree) */ | 296 | /* Check if we can expire a direct mount (possibly a tree) */ |
297 | struct dentry *autofs4_expire_direct(struct super_block *sb, | 297 | struct dentry *autofs_expire_direct(struct super_block *sb, |
298 | struct vfsmount *mnt, | 298 | struct vfsmount *mnt, |
299 | struct autofs_sb_info *sbi, | 299 | struct autofs_sb_info *sbi, |
300 | int how) | 300 | int how) |
301 | { | 301 | { |
302 | unsigned long timeout; | 302 | unsigned long timeout; |
303 | struct dentry *root = dget(sb->s_root); | 303 | struct dentry *root = dget(sb->s_root); |
@@ -310,9 +310,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
310 | now = jiffies; | 310 | now = jiffies; |
311 | timeout = sbi->exp_timeout; | 311 | timeout = sbi->exp_timeout; |
312 | 312 | ||
313 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { | 313 | if (!autofs_direct_busy(mnt, root, timeout, do_now)) { |
314 | spin_lock(&sbi->fs_lock); | 314 | spin_lock(&sbi->fs_lock); |
315 | ino = autofs4_dentry_ino(root); | 315 | ino = autofs_dentry_ino(root); |
316 | /* No point expiring a pending mount */ | 316 | /* No point expiring a pending mount */ |
317 | if (ino->flags & AUTOFS_INF_PENDING) { | 317 | if (ino->flags & AUTOFS_INF_PENDING) { |
318 | spin_unlock(&sbi->fs_lock); | 318 | spin_unlock(&sbi->fs_lock); |
@@ -321,7 +321,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
321 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; | 321 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; |
322 | spin_unlock(&sbi->fs_lock); | 322 | spin_unlock(&sbi->fs_lock); |
323 | synchronize_rcu(); | 323 | synchronize_rcu(); |
324 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { | 324 | if (!autofs_direct_busy(mnt, root, timeout, do_now)) { |
325 | spin_lock(&sbi->fs_lock); | 325 | spin_lock(&sbi->fs_lock); |
326 | ino->flags |= AUTOFS_INF_EXPIRING; | 326 | ino->flags |= AUTOFS_INF_EXPIRING; |
327 | init_completion(&ino->expire_complete); | 327 | init_completion(&ino->expire_complete); |
@@ -350,7 +350,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
350 | { | 350 | { |
351 | int do_now = how & AUTOFS_EXP_IMMEDIATE; | 351 | int do_now = how & AUTOFS_EXP_IMMEDIATE; |
352 | int exp_leaves = how & AUTOFS_EXP_LEAVES; | 352 | int exp_leaves = how & AUTOFS_EXP_LEAVES; |
353 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 353 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
354 | unsigned int ino_count; | 354 | unsigned int ino_count; |
355 | 355 | ||
356 | /* No point expiring a pending mount */ | 356 | /* No point expiring a pending mount */ |
@@ -367,11 +367,11 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
367 | pr_debug("checking mountpoint %p %pd\n", dentry, dentry); | 367 | pr_debug("checking mountpoint %p %pd\n", dentry, dentry); |
368 | 368 | ||
369 | /* Can we umount this guy */ | 369 | /* Can we umount this guy */ |
370 | if (autofs4_mount_busy(mnt, dentry)) | 370 | if (autofs_mount_busy(mnt, dentry)) |
371 | return NULL; | 371 | return NULL; |
372 | 372 | ||
373 | /* Can we expire this guy */ | 373 | /* Can we expire this guy */ |
374 | if (autofs4_can_expire(dentry, timeout, do_now)) | 374 | if (autofs_can_expire(dentry, timeout, do_now)) |
375 | return dentry; | 375 | return dentry; |
376 | return NULL; | 376 | return NULL; |
377 | } | 377 | } |
@@ -382,7 +382,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
382 | * A symlink can't be "busy" in the usual sense so | 382 | * A symlink can't be "busy" in the usual sense so |
383 | * just check last used for expire timeout. | 383 | * just check last used for expire timeout. |
384 | */ | 384 | */ |
385 | if (autofs4_can_expire(dentry, timeout, do_now)) | 385 | if (autofs_can_expire(dentry, timeout, do_now)) |
386 | return dentry; | 386 | return dentry; |
387 | return NULL; | 387 | return NULL; |
388 | } | 388 | } |
@@ -397,7 +397,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
397 | if (d_count(dentry) > ino_count) | 397 | if (d_count(dentry) > ino_count) |
398 | return NULL; | 398 | return NULL; |
399 | 399 | ||
400 | if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) | 400 | if (!autofs_tree_busy(mnt, dentry, timeout, do_now)) |
401 | return dentry; | 401 | return dentry; |
402 | /* | 402 | /* |
403 | * Case 3: pseudo direct mount, expire individual leaves | 403 | * Case 3: pseudo direct mount, expire individual leaves |
@@ -411,7 +411,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
411 | if (d_count(dentry) > ino_count) | 411 | if (d_count(dentry) > ino_count) |
412 | return NULL; | 412 | return NULL; |
413 | 413 | ||
414 | expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); | 414 | expired = autofs_check_leaves(mnt, dentry, timeout, do_now); |
415 | if (expired) { | 415 | if (expired) { |
416 | if (expired == dentry) | 416 | if (expired == dentry) |
417 | dput(dentry); | 417 | dput(dentry); |
@@ -427,10 +427,10 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
427 | * - it is unused by any user process | 427 | * - it is unused by any user process |
428 | * - it has been unused for exp_timeout time | 428 | * - it has been unused for exp_timeout time |
429 | */ | 429 | */ |
430 | struct dentry *autofs4_expire_indirect(struct super_block *sb, | 430 | struct dentry *autofs_expire_indirect(struct super_block *sb, |
431 | struct vfsmount *mnt, | 431 | struct vfsmount *mnt, |
432 | struct autofs_sb_info *sbi, | 432 | struct autofs_sb_info *sbi, |
433 | int how) | 433 | int how) |
434 | { | 434 | { |
435 | unsigned long timeout; | 435 | unsigned long timeout; |
436 | struct dentry *root = sb->s_root; | 436 | struct dentry *root = sb->s_root; |
@@ -450,7 +450,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
450 | int flags = how; | 450 | int flags = how; |
451 | 451 | ||
452 | spin_lock(&sbi->fs_lock); | 452 | spin_lock(&sbi->fs_lock); |
453 | ino = autofs4_dentry_ino(dentry); | 453 | ino = autofs_dentry_ino(dentry); |
454 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { | 454 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { |
455 | spin_unlock(&sbi->fs_lock); | 455 | spin_unlock(&sbi->fs_lock); |
456 | continue; | 456 | continue; |
@@ -462,7 +462,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
462 | continue; | 462 | continue; |
463 | 463 | ||
464 | spin_lock(&sbi->fs_lock); | 464 | spin_lock(&sbi->fs_lock); |
465 | ino = autofs4_dentry_ino(expired); | 465 | ino = autofs_dentry_ino(expired); |
466 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; | 466 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; |
467 | spin_unlock(&sbi->fs_lock); | 467 | spin_unlock(&sbi->fs_lock); |
468 | synchronize_rcu(); | 468 | synchronize_rcu(); |
@@ -498,11 +498,11 @@ found: | |||
498 | return expired; | 498 | return expired; |
499 | } | 499 | } |
500 | 500 | ||
501 | int autofs4_expire_wait(const struct path *path, int rcu_walk) | 501 | int autofs_expire_wait(const struct path *path, int rcu_walk) |
502 | { | 502 | { |
503 | struct dentry *dentry = path->dentry; | 503 | struct dentry *dentry = path->dentry; |
504 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 504 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
505 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 505 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
506 | int status; | 506 | int status; |
507 | int state; | 507 | int state; |
508 | 508 | ||
@@ -529,7 +529,7 @@ retry: | |||
529 | 529 | ||
530 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); | 530 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); |
531 | 531 | ||
532 | status = autofs4_wait(sbi, path, NFY_NONE); | 532 | status = autofs_wait(sbi, path, NFY_NONE); |
533 | wait_for_completion(&ino->expire_complete); | 533 | wait_for_completion(&ino->expire_complete); |
534 | 534 | ||
535 | pr_debug("expire done status=%d\n", status); | 535 | pr_debug("expire done status=%d\n", status); |
@@ -545,10 +545,10 @@ retry: | |||
545 | } | 545 | } |
546 | 546 | ||
547 | /* Perform an expiry operation */ | 547 | /* Perform an expiry operation */ |
548 | int autofs4_expire_run(struct super_block *sb, | 548 | int autofs_expire_run(struct super_block *sb, |
549 | struct vfsmount *mnt, | 549 | struct vfsmount *mnt, |
550 | struct autofs_sb_info *sbi, | 550 | struct autofs_sb_info *sbi, |
551 | struct autofs_packet_expire __user *pkt_p) | 551 | struct autofs_packet_expire __user *pkt_p) |
552 | { | 552 | { |
553 | struct autofs_packet_expire pkt; | 553 | struct autofs_packet_expire pkt; |
554 | struct autofs_info *ino; | 554 | struct autofs_info *ino; |
@@ -560,7 +560,7 @@ int autofs4_expire_run(struct super_block *sb, | |||
560 | pkt.hdr.proto_version = sbi->version; | 560 | pkt.hdr.proto_version = sbi->version; |
561 | pkt.hdr.type = autofs_ptype_expire; | 561 | pkt.hdr.type = autofs_ptype_expire; |
562 | 562 | ||
563 | dentry = autofs4_expire_indirect(sb, mnt, sbi, 0); | 563 | dentry = autofs_expire_indirect(sb, mnt, sbi, 0); |
564 | if (!dentry) | 564 | if (!dentry) |
565 | return -EAGAIN; | 565 | return -EAGAIN; |
566 | 566 | ||
@@ -573,7 +573,7 @@ int autofs4_expire_run(struct super_block *sb, | |||
573 | ret = -EFAULT; | 573 | ret = -EFAULT; |
574 | 574 | ||
575 | spin_lock(&sbi->fs_lock); | 575 | spin_lock(&sbi->fs_lock); |
576 | ino = autofs4_dentry_ino(dentry); | 576 | ino = autofs_dentry_ino(dentry); |
577 | /* avoid rapid-fire expire attempts if expiry fails */ | 577 | /* avoid rapid-fire expire attempts if expiry fails */ |
578 | ino->last_used = now; | 578 | ino->last_used = now; |
579 | ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); | 579 | ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); |
@@ -583,25 +583,25 @@ int autofs4_expire_run(struct super_block *sb, | |||
583 | return ret; | 583 | return ret; |
584 | } | 584 | } |
585 | 585 | ||
586 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 586 | int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
587 | struct autofs_sb_info *sbi, int when) | 587 | struct autofs_sb_info *sbi, int when) |
588 | { | 588 | { |
589 | struct dentry *dentry; | 589 | struct dentry *dentry; |
590 | int ret = -EAGAIN; | 590 | int ret = -EAGAIN; |
591 | 591 | ||
592 | if (autofs_type_trigger(sbi->type)) | 592 | if (autofs_type_trigger(sbi->type)) |
593 | dentry = autofs4_expire_direct(sb, mnt, sbi, when); | 593 | dentry = autofs_expire_direct(sb, mnt, sbi, when); |
594 | else | 594 | else |
595 | dentry = autofs4_expire_indirect(sb, mnt, sbi, when); | 595 | dentry = autofs_expire_indirect(sb, mnt, sbi, when); |
596 | 596 | ||
597 | if (dentry) { | 597 | if (dentry) { |
598 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 598 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
599 | const struct path path = { .mnt = mnt, .dentry = dentry }; | 599 | const struct path path = { .mnt = mnt, .dentry = dentry }; |
600 | 600 | ||
601 | /* This is synchronous because it makes the daemon a | 601 | /* This is synchronous because it makes the daemon a |
602 | * little easier | 602 | * little easier |
603 | */ | 603 | */ |
604 | ret = autofs4_wait(sbi, &path, NFY_EXPIRE); | 604 | ret = autofs_wait(sbi, &path, NFY_EXPIRE); |
605 | 605 | ||
606 | spin_lock(&sbi->fs_lock); | 606 | spin_lock(&sbi->fs_lock); |
607 | /* avoid rapid-fire expire attempts if expiry fails */ | 607 | /* avoid rapid-fire expire attempts if expiry fails */ |
@@ -619,7 +619,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
619 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing | 619 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing |
620 | * more to be done. | 620 | * more to be done. |
621 | */ | 621 | */ |
622 | int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 622 | int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
623 | struct autofs_sb_info *sbi, int __user *arg) | 623 | struct autofs_sb_info *sbi, int __user *arg) |
624 | { | 624 | { |
625 | int do_now = 0; | 625 | int do_now = 0; |
@@ -627,6 +627,5 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
627 | if (arg && get_user(do_now, arg)) | 627 | if (arg && get_user(do_now, arg)) |
628 | return -EFAULT; | 628 | return -EFAULT; |
629 | 629 | ||
630 | return autofs4_do_expire_multi(sb, mnt, sbi, do_now); | 630 | return autofs_do_expire_multi(sb, mnt, sbi, do_now); |
631 | } | 631 | } |
632 | |||
diff --git a/fs/autofs4/init.c b/fs/autofs/init.c index 8cf0e63389ae..16fb61315843 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs/init.c | |||
@@ -13,18 +13,18 @@ | |||
13 | static struct dentry *autofs_mount(struct file_system_type *fs_type, | 13 | static struct dentry *autofs_mount(struct file_system_type *fs_type, |
14 | int flags, const char *dev_name, void *data) | 14 | int flags, const char *dev_name, void *data) |
15 | { | 15 | { |
16 | return mount_nodev(fs_type, flags, data, autofs4_fill_super); | 16 | return mount_nodev(fs_type, flags, data, autofs_fill_super); |
17 | } | 17 | } |
18 | 18 | ||
19 | static struct file_system_type autofs_fs_type = { | 19 | static struct file_system_type autofs_fs_type = { |
20 | .owner = THIS_MODULE, | 20 | .owner = THIS_MODULE, |
21 | .name = "autofs", | 21 | .name = "autofs", |
22 | .mount = autofs_mount, | 22 | .mount = autofs_mount, |
23 | .kill_sb = autofs4_kill_sb, | 23 | .kill_sb = autofs_kill_sb, |
24 | }; | 24 | }; |
25 | MODULE_ALIAS_FS("autofs"); | 25 | MODULE_ALIAS_FS("autofs"); |
26 | 26 | ||
27 | static int __init init_autofs4_fs(void) | 27 | static int __init init_autofs_fs(void) |
28 | { | 28 | { |
29 | int err; | 29 | int err; |
30 | 30 | ||
@@ -37,12 +37,12 @@ static int __init init_autofs4_fs(void) | |||
37 | return err; | 37 | return err; |
38 | } | 38 | } |
39 | 39 | ||
40 | static void __exit exit_autofs4_fs(void) | 40 | static void __exit exit_autofs_fs(void) |
41 | { | 41 | { |
42 | autofs_dev_ioctl_exit(); | 42 | autofs_dev_ioctl_exit(); |
43 | unregister_filesystem(&autofs_fs_type); | 43 | unregister_filesystem(&autofs_fs_type); |
44 | } | 44 | } |
45 | 45 | ||
46 | module_init(init_autofs4_fs) | 46 | module_init(init_autofs_fs) |
47 | module_exit(exit_autofs4_fs) | 47 | module_exit(exit_autofs_fs) |
48 | MODULE_LICENSE("GPL"); | 48 | MODULE_LICENSE("GPL"); |
diff --git a/fs/autofs4/inode.c b/fs/autofs/inode.c index 09e7d68dff02..b51980fc274e 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs/inode.c | |||
@@ -7,18 +7,14 @@ | |||
7 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/file.h> | ||
13 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
14 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
15 | #include <linux/parser.h> | 12 | #include <linux/parser.h> |
16 | #include <linux/bitops.h> | ||
17 | #include <linux/magic.h> | 13 | #include <linux/magic.h> |
14 | |||
18 | #include "autofs_i.h" | 15 | #include "autofs_i.h" |
19 | #include <linux/module.h> | ||
20 | 16 | ||
21 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) | 17 | struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi) |
22 | { | 18 | { |
23 | struct autofs_info *ino; | 19 | struct autofs_info *ino; |
24 | 20 | ||
@@ -32,21 +28,21 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) | |||
32 | return ino; | 28 | return ino; |
33 | } | 29 | } |
34 | 30 | ||
35 | void autofs4_clean_ino(struct autofs_info *ino) | 31 | void autofs_clean_ino(struct autofs_info *ino) |
36 | { | 32 | { |
37 | ino->uid = GLOBAL_ROOT_UID; | 33 | ino->uid = GLOBAL_ROOT_UID; |
38 | ino->gid = GLOBAL_ROOT_GID; | 34 | ino->gid = GLOBAL_ROOT_GID; |
39 | ino->last_used = jiffies; | 35 | ino->last_used = jiffies; |
40 | } | 36 | } |
41 | 37 | ||
42 | void autofs4_free_ino(struct autofs_info *ino) | 38 | void autofs_free_ino(struct autofs_info *ino) |
43 | { | 39 | { |
44 | kfree(ino); | 40 | kfree(ino); |
45 | } | 41 | } |
46 | 42 | ||
47 | void autofs4_kill_sb(struct super_block *sb) | 43 | void autofs_kill_sb(struct super_block *sb) |
48 | { | 44 | { |
49 | struct autofs_sb_info *sbi = autofs4_sbi(sb); | 45 | struct autofs_sb_info *sbi = autofs_sbi(sb); |
50 | 46 | ||
51 | /* | 47 | /* |
52 | * In the event of a failure in get_sb_nodev the superblock | 48 | * In the event of a failure in get_sb_nodev the superblock |
@@ -56,7 +52,7 @@ void autofs4_kill_sb(struct super_block *sb) | |||
56 | */ | 52 | */ |
57 | if (sbi) { | 53 | if (sbi) { |
58 | /* Free wait queues, close pipe */ | 54 | /* Free wait queues, close pipe */ |
59 | autofs4_catatonic_mode(sbi); | 55 | autofs_catatonic_mode(sbi); |
60 | put_pid(sbi->oz_pgrp); | 56 | put_pid(sbi->oz_pgrp); |
61 | } | 57 | } |
62 | 58 | ||
@@ -66,9 +62,9 @@ void autofs4_kill_sb(struct super_block *sb) | |||
66 | kfree_rcu(sbi, rcu); | 62 | kfree_rcu(sbi, rcu); |
67 | } | 63 | } |
68 | 64 | ||
69 | static int autofs4_show_options(struct seq_file *m, struct dentry *root) | 65 | static int autofs_show_options(struct seq_file *m, struct dentry *root) |
70 | { | 66 | { |
71 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 67 | struct autofs_sb_info *sbi = autofs_sbi(root->d_sb); |
72 | struct inode *root_inode = d_inode(root->d_sb->s_root); | 68 | struct inode *root_inode = d_inode(root->d_sb->s_root); |
73 | 69 | ||
74 | if (!sbi) | 70 | if (!sbi) |
@@ -101,16 +97,16 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) | |||
101 | return 0; | 97 | return 0; |
102 | } | 98 | } |
103 | 99 | ||
104 | static void autofs4_evict_inode(struct inode *inode) | 100 | static void autofs_evict_inode(struct inode *inode) |
105 | { | 101 | { |
106 | clear_inode(inode); | 102 | clear_inode(inode); |
107 | kfree(inode->i_private); | 103 | kfree(inode->i_private); |
108 | } | 104 | } |
109 | 105 | ||
110 | static const struct super_operations autofs4_sops = { | 106 | static const struct super_operations autofs_sops = { |
111 | .statfs = simple_statfs, | 107 | .statfs = simple_statfs, |
112 | .show_options = autofs4_show_options, | 108 | .show_options = autofs_show_options, |
113 | .evict_inode = autofs4_evict_inode, | 109 | .evict_inode = autofs_evict_inode, |
114 | }; | 110 | }; |
115 | 111 | ||
116 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, | 112 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, |
@@ -206,7 +202,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, | |||
206 | return (*pipefd < 0); | 202 | return (*pipefd < 0); |
207 | } | 203 | } |
208 | 204 | ||
209 | int autofs4_fill_super(struct super_block *s, void *data, int silent) | 205 | int autofs_fill_super(struct super_block *s, void *data, int silent) |
210 | { | 206 | { |
211 | struct inode *root_inode; | 207 | struct inode *root_inode; |
212 | struct dentry *root; | 208 | struct dentry *root; |
@@ -246,19 +242,19 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
246 | s->s_blocksize = 1024; | 242 | s->s_blocksize = 1024; |
247 | s->s_blocksize_bits = 10; | 243 | s->s_blocksize_bits = 10; |
248 | s->s_magic = AUTOFS_SUPER_MAGIC; | 244 | s->s_magic = AUTOFS_SUPER_MAGIC; |
249 | s->s_op = &autofs4_sops; | 245 | s->s_op = &autofs_sops; |
250 | s->s_d_op = &autofs4_dentry_operations; | 246 | s->s_d_op = &autofs_dentry_operations; |
251 | s->s_time_gran = 1; | 247 | s->s_time_gran = 1; |
252 | 248 | ||
253 | /* | 249 | /* |
254 | * Get the root inode and dentry, but defer checking for errors. | 250 | * Get the root inode and dentry, but defer checking for errors. |
255 | */ | 251 | */ |
256 | ino = autofs4_new_ino(sbi); | 252 | ino = autofs_new_ino(sbi); |
257 | if (!ino) { | 253 | if (!ino) { |
258 | ret = -ENOMEM; | 254 | ret = -ENOMEM; |
259 | goto fail_free; | 255 | goto fail_free; |
260 | } | 256 | } |
261 | root_inode = autofs4_get_inode(s, S_IFDIR | 0755); | 257 | root_inode = autofs_get_inode(s, S_IFDIR | 0755); |
262 | root = d_make_root(root_inode); | 258 | root = d_make_root(root_inode); |
263 | if (!root) | 259 | if (!root) |
264 | goto fail_ino; | 260 | goto fail_ino; |
@@ -305,8 +301,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
305 | if (autofs_type_trigger(sbi->type)) | 301 | if (autofs_type_trigger(sbi->type)) |
306 | __managed_dentry_set_managed(root); | 302 | __managed_dentry_set_managed(root); |
307 | 303 | ||
308 | root_inode->i_fop = &autofs4_root_operations; | 304 | root_inode->i_fop = &autofs_root_operations; |
309 | root_inode->i_op = &autofs4_dir_inode_operations; | 305 | root_inode->i_op = &autofs_dir_inode_operations; |
310 | 306 | ||
311 | pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); | 307 | pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); |
312 | pipe = fget(pipefd); | 308 | pipe = fget(pipefd); |
@@ -340,14 +336,14 @@ fail_dput: | |||
340 | dput(root); | 336 | dput(root); |
341 | goto fail_free; | 337 | goto fail_free; |
342 | fail_ino: | 338 | fail_ino: |
343 | autofs4_free_ino(ino); | 339 | autofs_free_ino(ino); |
344 | fail_free: | 340 | fail_free: |
345 | kfree(sbi); | 341 | kfree(sbi); |
346 | s->s_fs_info = NULL; | 342 | s->s_fs_info = NULL; |
347 | return ret; | 343 | return ret; |
348 | } | 344 | } |
349 | 345 | ||
350 | struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode) | 346 | struct inode *autofs_get_inode(struct super_block *sb, umode_t mode) |
351 | { | 347 | { |
352 | struct inode *inode = new_inode(sb); | 348 | struct inode *inode = new_inode(sb); |
353 | 349 | ||
@@ -364,10 +360,10 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode) | |||
364 | 360 | ||
365 | if (S_ISDIR(mode)) { | 361 | if (S_ISDIR(mode)) { |
366 | set_nlink(inode, 2); | 362 | set_nlink(inode, 2); |
367 | inode->i_op = &autofs4_dir_inode_operations; | 363 | inode->i_op = &autofs_dir_inode_operations; |
368 | inode->i_fop = &autofs4_dir_operations; | 364 | inode->i_fop = &autofs_dir_operations; |
369 | } else if (S_ISLNK(mode)) { | 365 | } else if (S_ISLNK(mode)) { |
370 | inode->i_op = &autofs4_symlink_inode_operations; | 366 | inode->i_op = &autofs_symlink_inode_operations; |
371 | } else | 367 | } else |
372 | WARN_ON(1); | 368 | WARN_ON(1); |
373 | 369 | ||
diff --git a/fs/autofs4/root.c b/fs/autofs/root.c index b12e37f27530..a3d414150578 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs/root.c | |||
@@ -9,72 +9,66 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
12 | #include <linux/errno.h> | ||
13 | #include <linux/stat.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/param.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/compat.h> | 12 | #include <linux/compat.h> |
18 | #include <linux/mutex.h> | ||
19 | 13 | ||
20 | #include "autofs_i.h" | 14 | #include "autofs_i.h" |
21 | 15 | ||
22 | static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *); | 16 | static int autofs_dir_symlink(struct inode *, struct dentry *, const char *); |
23 | static int autofs4_dir_unlink(struct inode *, struct dentry *); | 17 | static int autofs_dir_unlink(struct inode *, struct dentry *); |
24 | static int autofs4_dir_rmdir(struct inode *, struct dentry *); | 18 | static int autofs_dir_rmdir(struct inode *, struct dentry *); |
25 | static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t); | 19 | static int autofs_dir_mkdir(struct inode *, struct dentry *, umode_t); |
26 | static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long); | 20 | static long autofs_root_ioctl(struct file *, unsigned int, unsigned long); |
27 | #ifdef CONFIG_COMPAT | 21 | #ifdef CONFIG_COMPAT |
28 | static long autofs4_root_compat_ioctl(struct file *, | 22 | static long autofs_root_compat_ioctl(struct file *, |
29 | unsigned int, unsigned long); | 23 | unsigned int, unsigned long); |
30 | #endif | 24 | #endif |
31 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 25 | static int autofs_dir_open(struct inode *inode, struct file *file); |
32 | static struct dentry *autofs4_lookup(struct inode *, | 26 | static struct dentry *autofs_lookup(struct inode *, |
33 | struct dentry *, unsigned int); | 27 | struct dentry *, unsigned int); |
34 | static struct vfsmount *autofs4_d_automount(struct path *); | 28 | static struct vfsmount *autofs_d_automount(struct path *); |
35 | static int autofs4_d_manage(const struct path *, bool); | 29 | static int autofs_d_manage(const struct path *, bool); |
36 | static void autofs4_dentry_release(struct dentry *); | 30 | static void autofs_dentry_release(struct dentry *); |
37 | 31 | ||
38 | const struct file_operations autofs4_root_operations = { | 32 | const struct file_operations autofs_root_operations = { |
39 | .open = dcache_dir_open, | 33 | .open = dcache_dir_open, |
40 | .release = dcache_dir_close, | 34 | .release = dcache_dir_close, |
41 | .read = generic_read_dir, | 35 | .read = generic_read_dir, |
42 | .iterate_shared = dcache_readdir, | 36 | .iterate_shared = dcache_readdir, |
43 | .llseek = dcache_dir_lseek, | 37 | .llseek = dcache_dir_lseek, |
44 | .unlocked_ioctl = autofs4_root_ioctl, | 38 | .unlocked_ioctl = autofs_root_ioctl, |
45 | #ifdef CONFIG_COMPAT | 39 | #ifdef CONFIG_COMPAT |
46 | .compat_ioctl = autofs4_root_compat_ioctl, | 40 | .compat_ioctl = autofs_root_compat_ioctl, |
47 | #endif | 41 | #endif |
48 | }; | 42 | }; |
49 | 43 | ||
50 | const struct file_operations autofs4_dir_operations = { | 44 | const struct file_operations autofs_dir_operations = { |
51 | .open = autofs4_dir_open, | 45 | .open = autofs_dir_open, |
52 | .release = dcache_dir_close, | 46 | .release = dcache_dir_close, |
53 | .read = generic_read_dir, | 47 | .read = generic_read_dir, |
54 | .iterate_shared = dcache_readdir, | 48 | .iterate_shared = dcache_readdir, |
55 | .llseek = dcache_dir_lseek, | 49 | .llseek = dcache_dir_lseek, |
56 | }; | 50 | }; |
57 | 51 | ||
58 | const struct inode_operations autofs4_dir_inode_operations = { | 52 | const struct inode_operations autofs_dir_inode_operations = { |
59 | .lookup = autofs4_lookup, | 53 | .lookup = autofs_lookup, |
60 | .unlink = autofs4_dir_unlink, | 54 | .unlink = autofs_dir_unlink, |
61 | .symlink = autofs4_dir_symlink, | 55 | .symlink = autofs_dir_symlink, |
62 | .mkdir = autofs4_dir_mkdir, | 56 | .mkdir = autofs_dir_mkdir, |
63 | .rmdir = autofs4_dir_rmdir, | 57 | .rmdir = autofs_dir_rmdir, |
64 | }; | 58 | }; |
65 | 59 | ||
66 | const struct dentry_operations autofs4_dentry_operations = { | 60 | const struct dentry_operations autofs_dentry_operations = { |
67 | .d_automount = autofs4_d_automount, | 61 | .d_automount = autofs_d_automount, |
68 | .d_manage = autofs4_d_manage, | 62 | .d_manage = autofs_d_manage, |
69 | .d_release = autofs4_dentry_release, | 63 | .d_release = autofs_dentry_release, |
70 | }; | 64 | }; |
71 | 65 | ||
72 | static void autofs4_add_active(struct dentry *dentry) | 66 | static void autofs_add_active(struct dentry *dentry) |
73 | { | 67 | { |
74 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 68 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
75 | struct autofs_info *ino; | 69 | struct autofs_info *ino; |
76 | 70 | ||
77 | ino = autofs4_dentry_ino(dentry); | 71 | ino = autofs_dentry_ino(dentry); |
78 | if (ino) { | 72 | if (ino) { |
79 | spin_lock(&sbi->lookup_lock); | 73 | spin_lock(&sbi->lookup_lock); |
80 | if (!ino->active_count) { | 74 | if (!ino->active_count) { |
@@ -86,12 +80,12 @@ static void autofs4_add_active(struct dentry *dentry) | |||
86 | } | 80 | } |
87 | } | 81 | } |
88 | 82 | ||
89 | static void autofs4_del_active(struct dentry *dentry) | 83 | static void autofs_del_active(struct dentry *dentry) |
90 | { | 84 | { |
91 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 85 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
92 | struct autofs_info *ino; | 86 | struct autofs_info *ino; |
93 | 87 | ||
94 | ino = autofs4_dentry_ino(dentry); | 88 | ino = autofs_dentry_ino(dentry); |
95 | if (ino) { | 89 | if (ino) { |
96 | spin_lock(&sbi->lookup_lock); | 90 | spin_lock(&sbi->lookup_lock); |
97 | ino->active_count--; | 91 | ino->active_count--; |
@@ -103,14 +97,14 @@ static void autofs4_del_active(struct dentry *dentry) | |||
103 | } | 97 | } |
104 | } | 98 | } |
105 | 99 | ||
106 | static int autofs4_dir_open(struct inode *inode, struct file *file) | 100 | static int autofs_dir_open(struct inode *inode, struct file *file) |
107 | { | 101 | { |
108 | struct dentry *dentry = file->f_path.dentry; | 102 | struct dentry *dentry = file->f_path.dentry; |
109 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 103 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
110 | 104 | ||
111 | pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); | 105 | pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); |
112 | 106 | ||
113 | if (autofs4_oz_mode(sbi)) | 107 | if (autofs_oz_mode(sbi)) |
114 | goto out; | 108 | goto out; |
115 | 109 | ||
116 | /* | 110 | /* |
@@ -133,10 +127,10 @@ out: | |||
133 | return dcache_dir_open(inode, file); | 127 | return dcache_dir_open(inode, file); |
134 | } | 128 | } |
135 | 129 | ||
136 | static void autofs4_dentry_release(struct dentry *de) | 130 | static void autofs_dentry_release(struct dentry *de) |
137 | { | 131 | { |
138 | struct autofs_info *ino = autofs4_dentry_ino(de); | 132 | struct autofs_info *ino = autofs_dentry_ino(de); |
139 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); | 133 | struct autofs_sb_info *sbi = autofs_sbi(de->d_sb); |
140 | 134 | ||
141 | pr_debug("releasing %p\n", de); | 135 | pr_debug("releasing %p\n", de); |
142 | 136 | ||
@@ -152,12 +146,12 @@ static void autofs4_dentry_release(struct dentry *de) | |||
152 | spin_unlock(&sbi->lookup_lock); | 146 | spin_unlock(&sbi->lookup_lock); |
153 | } | 147 | } |
154 | 148 | ||
155 | autofs4_free_ino(ino); | 149 | autofs_free_ino(ino); |
156 | } | 150 | } |
157 | 151 | ||
158 | static struct dentry *autofs4_lookup_active(struct dentry *dentry) | 152 | static struct dentry *autofs_lookup_active(struct dentry *dentry) |
159 | { | 153 | { |
160 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 154 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
161 | struct dentry *parent = dentry->d_parent; | 155 | struct dentry *parent = dentry->d_parent; |
162 | const struct qstr *name = &dentry->d_name; | 156 | const struct qstr *name = &dentry->d_name; |
163 | unsigned int len = name->len; | 157 | unsigned int len = name->len; |
@@ -209,10 +203,10 @@ next: | |||
209 | return NULL; | 203 | return NULL; |
210 | } | 204 | } |
211 | 205 | ||
212 | static struct dentry *autofs4_lookup_expiring(struct dentry *dentry, | 206 | static struct dentry *autofs_lookup_expiring(struct dentry *dentry, |
213 | bool rcu_walk) | 207 | bool rcu_walk) |
214 | { | 208 | { |
215 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 209 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
216 | struct dentry *parent = dentry->d_parent; | 210 | struct dentry *parent = dentry->d_parent; |
217 | const struct qstr *name = &dentry->d_name; | 211 | const struct qstr *name = &dentry->d_name; |
218 | unsigned int len = name->len; | 212 | unsigned int len = name->len; |
@@ -269,17 +263,17 @@ next: | |||
269 | return NULL; | 263 | return NULL; |
270 | } | 264 | } |
271 | 265 | ||
272 | static int autofs4_mount_wait(const struct path *path, bool rcu_walk) | 266 | static int autofs_mount_wait(const struct path *path, bool rcu_walk) |
273 | { | 267 | { |
274 | struct autofs_sb_info *sbi = autofs4_sbi(path->dentry->d_sb); | 268 | struct autofs_sb_info *sbi = autofs_sbi(path->dentry->d_sb); |
275 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); | 269 | struct autofs_info *ino = autofs_dentry_ino(path->dentry); |
276 | int status = 0; | 270 | int status = 0; |
277 | 271 | ||
278 | if (ino->flags & AUTOFS_INF_PENDING) { | 272 | if (ino->flags & AUTOFS_INF_PENDING) { |
279 | if (rcu_walk) | 273 | if (rcu_walk) |
280 | return -ECHILD; | 274 | return -ECHILD; |
281 | pr_debug("waiting for mount name=%pd\n", path->dentry); | 275 | pr_debug("waiting for mount name=%pd\n", path->dentry); |
282 | status = autofs4_wait(sbi, path, NFY_MOUNT); | 276 | status = autofs_wait(sbi, path, NFY_MOUNT); |
283 | pr_debug("mount wait done status=%d\n", status); | 277 | pr_debug("mount wait done status=%d\n", status); |
284 | } | 278 | } |
285 | ino->last_used = jiffies; | 279 | ino->last_used = jiffies; |
@@ -291,11 +285,11 @@ static int do_expire_wait(const struct path *path, bool rcu_walk) | |||
291 | struct dentry *dentry = path->dentry; | 285 | struct dentry *dentry = path->dentry; |
292 | struct dentry *expiring; | 286 | struct dentry *expiring; |
293 | 287 | ||
294 | expiring = autofs4_lookup_expiring(dentry, rcu_walk); | 288 | expiring = autofs_lookup_expiring(dentry, rcu_walk); |
295 | if (IS_ERR(expiring)) | 289 | if (IS_ERR(expiring)) |
296 | return PTR_ERR(expiring); | 290 | return PTR_ERR(expiring); |
297 | if (!expiring) | 291 | if (!expiring) |
298 | return autofs4_expire_wait(path, rcu_walk); | 292 | return autofs_expire_wait(path, rcu_walk); |
299 | else { | 293 | else { |
300 | const struct path this = { .mnt = path->mnt, .dentry = expiring }; | 294 | const struct path this = { .mnt = path->mnt, .dentry = expiring }; |
301 | /* | 295 | /* |
@@ -303,17 +297,17 @@ static int do_expire_wait(const struct path *path, bool rcu_walk) | |||
303 | * be quite complete, but the directory has been removed | 297 | * be quite complete, but the directory has been removed |
304 | * so it must have been successful, just wait for it. | 298 | * so it must have been successful, just wait for it. |
305 | */ | 299 | */ |
306 | autofs4_expire_wait(&this, 0); | 300 | autofs_expire_wait(&this, 0); |
307 | autofs4_del_expiring(expiring); | 301 | autofs_del_expiring(expiring); |
308 | dput(expiring); | 302 | dput(expiring); |
309 | } | 303 | } |
310 | return 0; | 304 | return 0; |
311 | } | 305 | } |
312 | 306 | ||
313 | static struct dentry *autofs4_mountpoint_changed(struct path *path) | 307 | static struct dentry *autofs_mountpoint_changed(struct path *path) |
314 | { | 308 | { |
315 | struct dentry *dentry = path->dentry; | 309 | struct dentry *dentry = path->dentry; |
316 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 310 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
317 | 311 | ||
318 | /* | 312 | /* |
319 | * If this is an indirect mount the dentry could have gone away | 313 | * If this is an indirect mount the dentry could have gone away |
@@ -327,7 +321,7 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
327 | new = d_lookup(parent, &dentry->d_name); | 321 | new = d_lookup(parent, &dentry->d_name); |
328 | if (!new) | 322 | if (!new) |
329 | return NULL; | 323 | return NULL; |
330 | ino = autofs4_dentry_ino(new); | 324 | ino = autofs_dentry_ino(new); |
331 | ino->last_used = jiffies; | 325 | ino->last_used = jiffies; |
332 | dput(path->dentry); | 326 | dput(path->dentry); |
333 | path->dentry = new; | 327 | path->dentry = new; |
@@ -335,17 +329,17 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
335 | return path->dentry; | 329 | return path->dentry; |
336 | } | 330 | } |
337 | 331 | ||
338 | static struct vfsmount *autofs4_d_automount(struct path *path) | 332 | static struct vfsmount *autofs_d_automount(struct path *path) |
339 | { | 333 | { |
340 | struct dentry *dentry = path->dentry; | 334 | struct dentry *dentry = path->dentry; |
341 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 335 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
342 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 336 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
343 | int status; | 337 | int status; |
344 | 338 | ||
345 | pr_debug("dentry=%p %pd\n", dentry, dentry); | 339 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
346 | 340 | ||
347 | /* The daemon never triggers a mount. */ | 341 | /* The daemon never triggers a mount. */ |
348 | if (autofs4_oz_mode(sbi)) | 342 | if (autofs_oz_mode(sbi)) |
349 | return NULL; | 343 | return NULL; |
350 | 344 | ||
351 | /* | 345 | /* |
@@ -364,7 +358,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
364 | spin_lock(&sbi->fs_lock); | 358 | spin_lock(&sbi->fs_lock); |
365 | if (ino->flags & AUTOFS_INF_PENDING) { | 359 | if (ino->flags & AUTOFS_INF_PENDING) { |
366 | spin_unlock(&sbi->fs_lock); | 360 | spin_unlock(&sbi->fs_lock); |
367 | status = autofs4_mount_wait(path, 0); | 361 | status = autofs_mount_wait(path, 0); |
368 | if (status) | 362 | if (status) |
369 | return ERR_PTR(status); | 363 | return ERR_PTR(status); |
370 | goto done; | 364 | goto done; |
@@ -405,7 +399,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
405 | } | 399 | } |
406 | ino->flags |= AUTOFS_INF_PENDING; | 400 | ino->flags |= AUTOFS_INF_PENDING; |
407 | spin_unlock(&sbi->fs_lock); | 401 | spin_unlock(&sbi->fs_lock); |
408 | status = autofs4_mount_wait(path, 0); | 402 | status = autofs_mount_wait(path, 0); |
409 | spin_lock(&sbi->fs_lock); | 403 | spin_lock(&sbi->fs_lock); |
410 | ino->flags &= ~AUTOFS_INF_PENDING; | 404 | ino->flags &= ~AUTOFS_INF_PENDING; |
411 | if (status) { | 405 | if (status) { |
@@ -416,24 +410,24 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
416 | spin_unlock(&sbi->fs_lock); | 410 | spin_unlock(&sbi->fs_lock); |
417 | done: | 411 | done: |
418 | /* Mount succeeded, check if we ended up with a new dentry */ | 412 | /* Mount succeeded, check if we ended up with a new dentry */ |
419 | dentry = autofs4_mountpoint_changed(path); | 413 | dentry = autofs_mountpoint_changed(path); |
420 | if (!dentry) | 414 | if (!dentry) |
421 | return ERR_PTR(-ENOENT); | 415 | return ERR_PTR(-ENOENT); |
422 | 416 | ||
423 | return NULL; | 417 | return NULL; |
424 | } | 418 | } |
425 | 419 | ||
426 | static int autofs4_d_manage(const struct path *path, bool rcu_walk) | 420 | static int autofs_d_manage(const struct path *path, bool rcu_walk) |
427 | { | 421 | { |
428 | struct dentry *dentry = path->dentry; | 422 | struct dentry *dentry = path->dentry; |
429 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 423 | struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); |
430 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 424 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
431 | int status; | 425 | int status; |
432 | 426 | ||
433 | pr_debug("dentry=%p %pd\n", dentry, dentry); | 427 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
434 | 428 | ||
435 | /* The daemon never waits. */ | 429 | /* The daemon never waits. */ |
436 | if (autofs4_oz_mode(sbi)) { | 430 | if (autofs_oz_mode(sbi)) { |
437 | if (!path_is_mountpoint(path)) | 431 | if (!path_is_mountpoint(path)) |
438 | return -EISDIR; | 432 | return -EISDIR; |
439 | return 0; | 433 | return 0; |
@@ -447,7 +441,7 @@ static int autofs4_d_manage(const struct path *path, bool rcu_walk) | |||
447 | * This dentry may be under construction so wait on mount | 441 | * This dentry may be under construction so wait on mount |
448 | * completion. | 442 | * completion. |
449 | */ | 443 | */ |
450 | status = autofs4_mount_wait(path, rcu_walk); | 444 | status = autofs_mount_wait(path, rcu_walk); |
451 | if (status) | 445 | if (status) |
452 | return status; | 446 | return status; |
453 | 447 | ||
@@ -500,8 +494,8 @@ static int autofs4_d_manage(const struct path *path, bool rcu_walk) | |||
500 | } | 494 | } |
501 | 495 | ||
502 | /* Lookups in the root directory */ | 496 | /* Lookups in the root directory */ |
503 | static struct dentry *autofs4_lookup(struct inode *dir, | 497 | static struct dentry *autofs_lookup(struct inode *dir, |
504 | struct dentry *dentry, unsigned int flags) | 498 | struct dentry *dentry, unsigned int flags) |
505 | { | 499 | { |
506 | struct autofs_sb_info *sbi; | 500 | struct autofs_sb_info *sbi; |
507 | struct autofs_info *ino; | 501 | struct autofs_info *ino; |
@@ -513,13 +507,13 @@ static struct dentry *autofs4_lookup(struct inode *dir, | |||
513 | if (dentry->d_name.len > NAME_MAX) | 507 | if (dentry->d_name.len > NAME_MAX) |
514 | return ERR_PTR(-ENAMETOOLONG); | 508 | return ERR_PTR(-ENAMETOOLONG); |
515 | 509 | ||
516 | sbi = autofs4_sbi(dir->i_sb); | 510 | sbi = autofs_sbi(dir->i_sb); |
517 | 511 | ||
518 | pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", | 512 | pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", |
519 | current->pid, task_pgrp_nr(current), sbi->catatonic, | 513 | current->pid, task_pgrp_nr(current), sbi->catatonic, |
520 | autofs4_oz_mode(sbi)); | 514 | autofs_oz_mode(sbi)); |
521 | 515 | ||
522 | active = autofs4_lookup_active(dentry); | 516 | active = autofs_lookup_active(dentry); |
523 | if (active) | 517 | if (active) |
524 | return active; | 518 | return active; |
525 | else { | 519 | else { |
@@ -529,7 +523,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, | |||
529 | * can return fail immediately. The daemon however does need | 523 | * can return fail immediately. The daemon however does need |
530 | * to create directories within the file system. | 524 | * to create directories within the file system. |
531 | */ | 525 | */ |
532 | if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent)) | 526 | if (!autofs_oz_mode(sbi) && !IS_ROOT(dentry->d_parent)) |
533 | return ERR_PTR(-ENOENT); | 527 | return ERR_PTR(-ENOENT); |
534 | 528 | ||
535 | /* Mark entries in the root as mount triggers */ | 529 | /* Mark entries in the root as mount triggers */ |
@@ -537,24 +531,24 @@ static struct dentry *autofs4_lookup(struct inode *dir, | |||
537 | autofs_type_indirect(sbi->type)) | 531 | autofs_type_indirect(sbi->type)) |
538 | __managed_dentry_set_managed(dentry); | 532 | __managed_dentry_set_managed(dentry); |
539 | 533 | ||
540 | ino = autofs4_new_ino(sbi); | 534 | ino = autofs_new_ino(sbi); |
541 | if (!ino) | 535 | if (!ino) |
542 | return ERR_PTR(-ENOMEM); | 536 | return ERR_PTR(-ENOMEM); |
543 | 537 | ||
544 | dentry->d_fsdata = ino; | 538 | dentry->d_fsdata = ino; |
545 | ino->dentry = dentry; | 539 | ino->dentry = dentry; |
546 | 540 | ||
547 | autofs4_add_active(dentry); | 541 | autofs_add_active(dentry); |
548 | } | 542 | } |
549 | return NULL; | 543 | return NULL; |
550 | } | 544 | } |
551 | 545 | ||
552 | static int autofs4_dir_symlink(struct inode *dir, | 546 | static int autofs_dir_symlink(struct inode *dir, |
553 | struct dentry *dentry, | 547 | struct dentry *dentry, |
554 | const char *symname) | 548 | const char *symname) |
555 | { | 549 | { |
556 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 550 | struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); |
557 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 551 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
558 | struct autofs_info *p_ino; | 552 | struct autofs_info *p_ino; |
559 | struct inode *inode; | 553 | struct inode *inode; |
560 | size_t size = strlen(symname); | 554 | size_t size = strlen(symname); |
@@ -562,14 +556,14 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
562 | 556 | ||
563 | pr_debug("%s <- %pd\n", symname, dentry); | 557 | pr_debug("%s <- %pd\n", symname, dentry); |
564 | 558 | ||
565 | if (!autofs4_oz_mode(sbi)) | 559 | if (!autofs_oz_mode(sbi)) |
566 | return -EACCES; | 560 | return -EACCES; |
567 | 561 | ||
568 | BUG_ON(!ino); | 562 | BUG_ON(!ino); |
569 | 563 | ||
570 | autofs4_clean_ino(ino); | 564 | autofs_clean_ino(ino); |
571 | 565 | ||
572 | autofs4_del_active(dentry); | 566 | autofs_del_active(dentry); |
573 | 567 | ||
574 | cp = kmalloc(size + 1, GFP_KERNEL); | 568 | cp = kmalloc(size + 1, GFP_KERNEL); |
575 | if (!cp) | 569 | if (!cp) |
@@ -577,7 +571,7 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
577 | 571 | ||
578 | strcpy(cp, symname); | 572 | strcpy(cp, symname); |
579 | 573 | ||
580 | inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555); | 574 | inode = autofs_get_inode(dir->i_sb, S_IFLNK | 0555); |
581 | if (!inode) { | 575 | if (!inode) { |
582 | kfree(cp); | 576 | kfree(cp); |
583 | return -ENOMEM; | 577 | return -ENOMEM; |
@@ -588,7 +582,7 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
588 | 582 | ||
589 | dget(dentry); | 583 | dget(dentry); |
590 | atomic_inc(&ino->count); | 584 | atomic_inc(&ino->count); |
591 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 585 | p_ino = autofs_dentry_ino(dentry->d_parent); |
592 | if (p_ino && !IS_ROOT(dentry)) | 586 | if (p_ino && !IS_ROOT(dentry)) |
593 | atomic_inc(&p_ino->count); | 587 | atomic_inc(&p_ino->count); |
594 | 588 | ||
@@ -610,20 +604,20 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
610 | * If a process is blocked on the dentry waiting for the expire to finish, | 604 | * If a process is blocked on the dentry waiting for the expire to finish, |
611 | * it will invalidate the dentry and try to mount with a new one. | 605 | * it will invalidate the dentry and try to mount with a new one. |
612 | * | 606 | * |
613 | * Also see autofs4_dir_rmdir().. | 607 | * Also see autofs_dir_rmdir().. |
614 | */ | 608 | */ |
615 | static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | 609 | static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) |
616 | { | 610 | { |
617 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 611 | struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); |
618 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 612 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
619 | struct autofs_info *p_ino; | 613 | struct autofs_info *p_ino; |
620 | 614 | ||
621 | /* This allows root to remove symlinks */ | 615 | /* This allows root to remove symlinks */ |
622 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 616 | if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
623 | return -EPERM; | 617 | return -EPERM; |
624 | 618 | ||
625 | if (atomic_dec_and_test(&ino->count)) { | 619 | if (atomic_dec_and_test(&ino->count)) { |
626 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 620 | p_ino = autofs_dentry_ino(dentry->d_parent); |
627 | if (p_ino && !IS_ROOT(dentry)) | 621 | if (p_ino && !IS_ROOT(dentry)) |
628 | atomic_dec(&p_ino->count); | 622 | atomic_dec(&p_ino->count); |
629 | } | 623 | } |
@@ -635,7 +629,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
635 | dir->i_mtime = current_time(dir); | 629 | dir->i_mtime = current_time(dir); |
636 | 630 | ||
637 | spin_lock(&sbi->lookup_lock); | 631 | spin_lock(&sbi->lookup_lock); |
638 | __autofs4_add_expiring(dentry); | 632 | __autofs_add_expiring(dentry); |
639 | d_drop(dentry); | 633 | d_drop(dentry); |
640 | spin_unlock(&sbi->lookup_lock); | 634 | spin_unlock(&sbi->lookup_lock); |
641 | 635 | ||
@@ -692,15 +686,15 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | |||
692 | managed_dentry_set_managed(parent); | 686 | managed_dentry_set_managed(parent); |
693 | } | 687 | } |
694 | 688 | ||
695 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | 689 | static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) |
696 | { | 690 | { |
697 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 691 | struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); |
698 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 692 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
699 | struct autofs_info *p_ino; | 693 | struct autofs_info *p_ino; |
700 | 694 | ||
701 | pr_debug("dentry %p, removing %pd\n", dentry, dentry); | 695 | pr_debug("dentry %p, removing %pd\n", dentry, dentry); |
702 | 696 | ||
703 | if (!autofs4_oz_mode(sbi)) | 697 | if (!autofs_oz_mode(sbi)) |
704 | return -EACCES; | 698 | return -EACCES; |
705 | 699 | ||
706 | spin_lock(&sbi->lookup_lock); | 700 | spin_lock(&sbi->lookup_lock); |
@@ -708,7 +702,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
708 | spin_unlock(&sbi->lookup_lock); | 702 | spin_unlock(&sbi->lookup_lock); |
709 | return -ENOTEMPTY; | 703 | return -ENOTEMPTY; |
710 | } | 704 | } |
711 | __autofs4_add_expiring(dentry); | 705 | __autofs_add_expiring(dentry); |
712 | d_drop(dentry); | 706 | d_drop(dentry); |
713 | spin_unlock(&sbi->lookup_lock); | 707 | spin_unlock(&sbi->lookup_lock); |
714 | 708 | ||
@@ -716,7 +710,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
716 | autofs_clear_leaf_automount_flags(dentry); | 710 | autofs_clear_leaf_automount_flags(dentry); |
717 | 711 | ||
718 | if (atomic_dec_and_test(&ino->count)) { | 712 | if (atomic_dec_and_test(&ino->count)) { |
719 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 713 | p_ino = autofs_dentry_ino(dentry->d_parent); |
720 | if (p_ino && dentry->d_parent != dentry) | 714 | if (p_ino && dentry->d_parent != dentry) |
721 | atomic_dec(&p_ino->count); | 715 | atomic_dec(&p_ino->count); |
722 | } | 716 | } |
@@ -730,26 +724,26 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
730 | return 0; | 724 | return 0; |
731 | } | 725 | } |
732 | 726 | ||
733 | static int autofs4_dir_mkdir(struct inode *dir, | 727 | static int autofs_dir_mkdir(struct inode *dir, |
734 | struct dentry *dentry, umode_t mode) | 728 | struct dentry *dentry, umode_t mode) |
735 | { | 729 | { |
736 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 730 | struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); |
737 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 731 | struct autofs_info *ino = autofs_dentry_ino(dentry); |
738 | struct autofs_info *p_ino; | 732 | struct autofs_info *p_ino; |
739 | struct inode *inode; | 733 | struct inode *inode; |
740 | 734 | ||
741 | if (!autofs4_oz_mode(sbi)) | 735 | if (!autofs_oz_mode(sbi)) |
742 | return -EACCES; | 736 | return -EACCES; |
743 | 737 | ||
744 | pr_debug("dentry %p, creating %pd\n", dentry, dentry); | 738 | pr_debug("dentry %p, creating %pd\n", dentry, dentry); |
745 | 739 | ||
746 | BUG_ON(!ino); | 740 | BUG_ON(!ino); |
747 | 741 | ||
748 | autofs4_clean_ino(ino); | 742 | autofs_clean_ino(ino); |
749 | 743 | ||
750 | autofs4_del_active(dentry); | 744 | autofs_del_active(dentry); |
751 | 745 | ||
752 | inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); | 746 | inode = autofs_get_inode(dir->i_sb, S_IFDIR | mode); |
753 | if (!inode) | 747 | if (!inode) |
754 | return -ENOMEM; | 748 | return -ENOMEM; |
755 | d_add(dentry, inode); | 749 | d_add(dentry, inode); |
@@ -759,7 +753,7 @@ static int autofs4_dir_mkdir(struct inode *dir, | |||
759 | 753 | ||
760 | dget(dentry); | 754 | dget(dentry); |
761 | atomic_inc(&ino->count); | 755 | atomic_inc(&ino->count); |
762 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 756 | p_ino = autofs_dentry_ino(dentry->d_parent); |
763 | if (p_ino && !IS_ROOT(dentry)) | 757 | if (p_ino && !IS_ROOT(dentry)) |
764 | atomic_inc(&p_ino->count); | 758 | atomic_inc(&p_ino->count); |
765 | inc_nlink(dir); | 759 | inc_nlink(dir); |
@@ -770,7 +764,7 @@ static int autofs4_dir_mkdir(struct inode *dir, | |||
770 | 764 | ||
771 | /* Get/set timeout ioctl() operation */ | 765 | /* Get/set timeout ioctl() operation */ |
772 | #ifdef CONFIG_COMPAT | 766 | #ifdef CONFIG_COMPAT |
773 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | 767 | static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi, |
774 | compat_ulong_t __user *p) | 768 | compat_ulong_t __user *p) |
775 | { | 769 | { |
776 | unsigned long ntimeout; | 770 | unsigned long ntimeout; |
@@ -795,7 +789,7 @@ error: | |||
795 | } | 789 | } |
796 | #endif | 790 | #endif |
797 | 791 | ||
798 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | 792 | static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, |
799 | unsigned long __user *p) | 793 | unsigned long __user *p) |
800 | { | 794 | { |
801 | unsigned long ntimeout; | 795 | unsigned long ntimeout; |
@@ -820,14 +814,14 @@ error: | |||
820 | } | 814 | } |
821 | 815 | ||
822 | /* Return protocol version */ | 816 | /* Return protocol version */ |
823 | static inline int autofs4_get_protover(struct autofs_sb_info *sbi, | 817 | static inline int autofs_get_protover(struct autofs_sb_info *sbi, |
824 | int __user *p) | 818 | int __user *p) |
825 | { | 819 | { |
826 | return put_user(sbi->version, p); | 820 | return put_user(sbi->version, p); |
827 | } | 821 | } |
828 | 822 | ||
829 | /* Return protocol sub version */ | 823 | /* Return protocol sub version */ |
830 | static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, | 824 | static inline int autofs_get_protosubver(struct autofs_sb_info *sbi, |
831 | int __user *p) | 825 | int __user *p) |
832 | { | 826 | { |
833 | return put_user(sbi->sub_version, p); | 827 | return put_user(sbi->sub_version, p); |
@@ -836,7 +830,7 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, | |||
836 | /* | 830 | /* |
837 | * Tells the daemon whether it can umount the autofs mount. | 831 | * Tells the daemon whether it can umount the autofs mount. |
838 | */ | 832 | */ |
839 | static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | 833 | static inline int autofs_ask_umount(struct vfsmount *mnt, int __user *p) |
840 | { | 834 | { |
841 | int status = 0; | 835 | int status = 0; |
842 | 836 | ||
@@ -850,14 +844,14 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
850 | return status; | 844 | return status; |
851 | } | 845 | } |
852 | 846 | ||
853 | /* Identify autofs4_dentries - this is so we can tell if there's | 847 | /* Identify autofs_dentries - this is so we can tell if there's |
854 | * an extra dentry refcount or not. We only hold a refcount on the | 848 | * an extra dentry refcount or not. We only hold a refcount on the |
855 | * dentry if its non-negative (ie, d_inode != NULL) | 849 | * dentry if its non-negative (ie, d_inode != NULL) |
856 | */ | 850 | */ |
857 | int is_autofs4_dentry(struct dentry *dentry) | 851 | int is_autofs_dentry(struct dentry *dentry) |
858 | { | 852 | { |
859 | return dentry && d_really_is_positive(dentry) && | 853 | return dentry && d_really_is_positive(dentry) && |
860 | dentry->d_op == &autofs4_dentry_operations && | 854 | dentry->d_op == &autofs_dentry_operations && |
861 | dentry->d_fsdata != NULL; | 855 | dentry->d_fsdata != NULL; |
862 | } | 856 | } |
863 | 857 | ||
@@ -865,10 +859,10 @@ int is_autofs4_dentry(struct dentry *dentry) | |||
865 | * ioctl()'s on the root directory is the chief method for the daemon to | 859 | * ioctl()'s on the root directory is the chief method for the daemon to |
866 | * generate kernel reactions | 860 | * generate kernel reactions |
867 | */ | 861 | */ |
868 | static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | 862 | static int autofs_root_ioctl_unlocked(struct inode *inode, struct file *filp, |
869 | unsigned int cmd, unsigned long arg) | 863 | unsigned int cmd, unsigned long arg) |
870 | { | 864 | { |
871 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); | 865 | struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); |
872 | void __user *p = (void __user *)arg; | 866 | void __user *p = (void __user *)arg; |
873 | 867 | ||
874 | pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", | 868 | pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", |
@@ -878,64 +872,63 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
878 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) | 872 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) |
879 | return -ENOTTY; | 873 | return -ENOTTY; |
880 | 874 | ||
881 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 875 | if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
882 | return -EPERM; | 876 | return -EPERM; |
883 | 877 | ||
884 | switch (cmd) { | 878 | switch (cmd) { |
885 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ | 879 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ |
886 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0); | 880 | return autofs_wait_release(sbi, (autofs_wqt_t) arg, 0); |
887 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ | 881 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ |
888 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT); | 882 | return autofs_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT); |
889 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ | 883 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ |
890 | autofs4_catatonic_mode(sbi); | 884 | autofs_catatonic_mode(sbi); |
891 | return 0; | 885 | return 0; |
892 | case AUTOFS_IOC_PROTOVER: /* Get protocol version */ | 886 | case AUTOFS_IOC_PROTOVER: /* Get protocol version */ |
893 | return autofs4_get_protover(sbi, p); | 887 | return autofs_get_protover(sbi, p); |
894 | case AUTOFS_IOC_PROTOSUBVER: /* Get protocol sub version */ | 888 | case AUTOFS_IOC_PROTOSUBVER: /* Get protocol sub version */ |
895 | return autofs4_get_protosubver(sbi, p); | 889 | return autofs_get_protosubver(sbi, p); |
896 | case AUTOFS_IOC_SETTIMEOUT: | 890 | case AUTOFS_IOC_SETTIMEOUT: |
897 | return autofs4_get_set_timeout(sbi, p); | 891 | return autofs_get_set_timeout(sbi, p); |
898 | #ifdef CONFIG_COMPAT | 892 | #ifdef CONFIG_COMPAT |
899 | case AUTOFS_IOC_SETTIMEOUT32: | 893 | case AUTOFS_IOC_SETTIMEOUT32: |
900 | return autofs4_compat_get_set_timeout(sbi, p); | 894 | return autofs_compat_get_set_timeout(sbi, p); |
901 | #endif | 895 | #endif |
902 | 896 | ||
903 | case AUTOFS_IOC_ASKUMOUNT: | 897 | case AUTOFS_IOC_ASKUMOUNT: |
904 | return autofs4_ask_umount(filp->f_path.mnt, p); | 898 | return autofs_ask_umount(filp->f_path.mnt, p); |
905 | 899 | ||
906 | /* return a single thing to expire */ | 900 | /* return a single thing to expire */ |
907 | case AUTOFS_IOC_EXPIRE: | 901 | case AUTOFS_IOC_EXPIRE: |
908 | return autofs4_expire_run(inode->i_sb, | 902 | return autofs_expire_run(inode->i_sb, filp->f_path.mnt, sbi, p); |
909 | filp->f_path.mnt, sbi, p); | ||
910 | /* same as above, but can send multiple expires through pipe */ | 903 | /* same as above, but can send multiple expires through pipe */ |
911 | case AUTOFS_IOC_EXPIRE_MULTI: | 904 | case AUTOFS_IOC_EXPIRE_MULTI: |
912 | return autofs4_expire_multi(inode->i_sb, | 905 | return autofs_expire_multi(inode->i_sb, |
913 | filp->f_path.mnt, sbi, p); | 906 | filp->f_path.mnt, sbi, p); |
914 | 907 | ||
915 | default: | 908 | default: |
916 | return -EINVAL; | 909 | return -EINVAL; |
917 | } | 910 | } |
918 | } | 911 | } |
919 | 912 | ||
920 | static long autofs4_root_ioctl(struct file *filp, | 913 | static long autofs_root_ioctl(struct file *filp, |
921 | unsigned int cmd, unsigned long arg) | 914 | unsigned int cmd, unsigned long arg) |
922 | { | 915 | { |
923 | struct inode *inode = file_inode(filp); | 916 | struct inode *inode = file_inode(filp); |
924 | 917 | ||
925 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 918 | return autofs_root_ioctl_unlocked(inode, filp, cmd, arg); |
926 | } | 919 | } |
927 | 920 | ||
928 | #ifdef CONFIG_COMPAT | 921 | #ifdef CONFIG_COMPAT |
929 | static long autofs4_root_compat_ioctl(struct file *filp, | 922 | static long autofs_root_compat_ioctl(struct file *filp, |
930 | unsigned int cmd, unsigned long arg) | 923 | unsigned int cmd, unsigned long arg) |
931 | { | 924 | { |
932 | struct inode *inode = file_inode(filp); | 925 | struct inode *inode = file_inode(filp); |
933 | int ret; | 926 | int ret; |
934 | 927 | ||
935 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) | 928 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) |
936 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 929 | ret = autofs_root_ioctl_unlocked(inode, filp, cmd, arg); |
937 | else | 930 | else |
938 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | 931 | ret = autofs_root_ioctl_unlocked(inode, filp, cmd, |
939 | (unsigned long) compat_ptr(arg)); | 932 | (unsigned long) compat_ptr(arg)); |
940 | 933 | ||
941 | return ret; | 934 | return ret; |
diff --git a/fs/autofs4/symlink.c b/fs/autofs/symlink.c index ab0b4285a202..aad3902c0cc1 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs/symlink.c | |||
@@ -8,22 +8,22 @@ | |||
8 | 8 | ||
9 | #include "autofs_i.h" | 9 | #include "autofs_i.h" |
10 | 10 | ||
11 | static const char *autofs4_get_link(struct dentry *dentry, | 11 | static const char *autofs_get_link(struct dentry *dentry, |
12 | struct inode *inode, | 12 | struct inode *inode, |
13 | struct delayed_call *done) | 13 | struct delayed_call *done) |
14 | { | 14 | { |
15 | struct autofs_sb_info *sbi; | 15 | struct autofs_sb_info *sbi; |
16 | struct autofs_info *ino; | 16 | struct autofs_info *ino; |
17 | 17 | ||
18 | if (!dentry) | 18 | if (!dentry) |
19 | return ERR_PTR(-ECHILD); | 19 | return ERR_PTR(-ECHILD); |
20 | sbi = autofs4_sbi(dentry->d_sb); | 20 | sbi = autofs_sbi(dentry->d_sb); |
21 | ino = autofs4_dentry_ino(dentry); | 21 | ino = autofs_dentry_ino(dentry); |
22 | if (ino && !autofs4_oz_mode(sbi)) | 22 | if (ino && !autofs_oz_mode(sbi)) |
23 | ino->last_used = jiffies; | 23 | ino->last_used = jiffies; |
24 | return d_inode(dentry)->i_private; | 24 | return d_inode(dentry)->i_private; |
25 | } | 25 | } |
26 | 26 | ||
27 | const struct inode_operations autofs4_symlink_inode_operations = { | 27 | const struct inode_operations autofs_symlink_inode_operations = { |
28 | .get_link = autofs4_get_link | 28 | .get_link = autofs_get_link |
29 | }; | 29 | }; |
diff --git a/fs/autofs4/waitq.c b/fs/autofs/waitq.c index be9c3dc048ab..f6385c6ef0a5 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs/waitq.c | |||
@@ -7,19 +7,15 @@ | |||
7 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/signal.h> | ||
13 | #include <linux/sched/signal.h> | 10 | #include <linux/sched/signal.h> |
14 | #include <linux/file.h> | ||
15 | #include "autofs_i.h" | 11 | #include "autofs_i.h" |
16 | 12 | ||
17 | /* We make this a static variable rather than a part of the superblock; it | 13 | /* We make this a static variable rather than a part of the superblock; it |
18 | * is better if we don't reassign numbers easily even across filesystems | 14 | * is better if we don't reassign numbers easily even across filesystems |
19 | */ | 15 | */ |
20 | static autofs_wqt_t autofs4_next_wait_queue = 1; | 16 | static autofs_wqt_t autofs_next_wait_queue = 1; |
21 | 17 | ||
22 | void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | 18 | void autofs_catatonic_mode(struct autofs_sb_info *sbi) |
23 | { | 19 | { |
24 | struct autofs_wait_queue *wq, *nwq; | 20 | struct autofs_wait_queue *wq, *nwq; |
25 | 21 | ||
@@ -49,8 +45,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | |||
49 | mutex_unlock(&sbi->wq_mutex); | 45 | mutex_unlock(&sbi->wq_mutex); |
50 | } | 46 | } |
51 | 47 | ||
52 | static int autofs4_write(struct autofs_sb_info *sbi, | 48 | static int autofs_write(struct autofs_sb_info *sbi, |
53 | struct file *file, const void *addr, int bytes) | 49 | struct file *file, const void *addr, int bytes) |
54 | { | 50 | { |
55 | unsigned long sigpipe, flags; | 51 | unsigned long sigpipe, flags; |
56 | const char *data = (const char *)addr; | 52 | const char *data = (const char *)addr; |
@@ -82,7 +78,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, | |||
82 | return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; | 78 | return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; |
83 | } | 79 | } |
84 | 80 | ||
85 | static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | 81 | static void autofs_notify_daemon(struct autofs_sb_info *sbi, |
86 | struct autofs_wait_queue *wq, | 82 | struct autofs_wait_queue *wq, |
87 | int type) | 83 | int type) |
88 | { | 84 | { |
@@ -167,23 +163,23 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
167 | 163 | ||
168 | mutex_unlock(&sbi->wq_mutex); | 164 | mutex_unlock(&sbi->wq_mutex); |
169 | 165 | ||
170 | switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { | 166 | switch (ret = autofs_write(sbi, pipe, &pkt, pktsz)) { |
171 | case 0: | 167 | case 0: |
172 | break; | 168 | break; |
173 | case -ENOMEM: | 169 | case -ENOMEM: |
174 | case -ERESTARTSYS: | 170 | case -ERESTARTSYS: |
175 | /* Just fail this one */ | 171 | /* Just fail this one */ |
176 | autofs4_wait_release(sbi, wq->wait_queue_token, ret); | 172 | autofs_wait_release(sbi, wq->wait_queue_token, ret); |
177 | break; | 173 | break; |
178 | default: | 174 | default: |
179 | autofs4_catatonic_mode(sbi); | 175 | autofs_catatonic_mode(sbi); |
180 | break; | 176 | break; |
181 | } | 177 | } |
182 | fput(pipe); | 178 | fput(pipe); |
183 | } | 179 | } |
184 | 180 | ||
185 | static int autofs4_getpath(struct autofs_sb_info *sbi, | 181 | static int autofs_getpath(struct autofs_sb_info *sbi, |
186 | struct dentry *dentry, char **name) | 182 | struct dentry *dentry, char *name) |
187 | { | 183 | { |
188 | struct dentry *root = sbi->sb->s_root; | 184 | struct dentry *root = sbi->sb->s_root; |
189 | struct dentry *tmp; | 185 | struct dentry *tmp; |
@@ -193,7 +189,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, | |||
193 | unsigned seq; | 189 | unsigned seq; |
194 | 190 | ||
195 | rename_retry: | 191 | rename_retry: |
196 | buf = *name; | 192 | buf = name; |
197 | len = 0; | 193 | len = 0; |
198 | 194 | ||
199 | seq = read_seqbegin(&rename_lock); | 195 | seq = read_seqbegin(&rename_lock); |
@@ -228,7 +224,7 @@ rename_retry: | |||
228 | } | 224 | } |
229 | 225 | ||
230 | static struct autofs_wait_queue * | 226 | static struct autofs_wait_queue * |
231 | autofs4_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr) | 227 | autofs_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr) |
232 | { | 228 | { |
233 | struct autofs_wait_queue *wq; | 229 | struct autofs_wait_queue *wq; |
234 | 230 | ||
@@ -263,7 +259,7 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
263 | return -ENOENT; | 259 | return -ENOENT; |
264 | 260 | ||
265 | /* Wait in progress, continue; */ | 261 | /* Wait in progress, continue; */ |
266 | wq = autofs4_find_wait(sbi, qstr); | 262 | wq = autofs_find_wait(sbi, qstr); |
267 | if (wq) { | 263 | if (wq) { |
268 | *wait = wq; | 264 | *wait = wq; |
269 | return 1; | 265 | return 1; |
@@ -272,7 +268,7 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
272 | *wait = NULL; | 268 | *wait = NULL; |
273 | 269 | ||
274 | /* If we don't yet have any info this is a new request */ | 270 | /* If we don't yet have any info this is a new request */ |
275 | ino = autofs4_dentry_ino(dentry); | 271 | ino = autofs_dentry_ino(dentry); |
276 | if (!ino) | 272 | if (!ino) |
277 | return 1; | 273 | return 1; |
278 | 274 | ||
@@ -297,7 +293,7 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
297 | if (sbi->catatonic) | 293 | if (sbi->catatonic) |
298 | return -ENOENT; | 294 | return -ENOENT; |
299 | 295 | ||
300 | wq = autofs4_find_wait(sbi, qstr); | 296 | wq = autofs_find_wait(sbi, qstr); |
301 | if (wq) { | 297 | if (wq) { |
302 | *wait = wq; | 298 | *wait = wq; |
303 | return 1; | 299 | return 1; |
@@ -351,7 +347,7 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
351 | return 1; | 347 | return 1; |
352 | } | 348 | } |
353 | 349 | ||
354 | int autofs4_wait(struct autofs_sb_info *sbi, | 350 | int autofs_wait(struct autofs_sb_info *sbi, |
355 | const struct path *path, enum autofs_notify notify) | 351 | const struct path *path, enum autofs_notify notify) |
356 | { | 352 | { |
357 | struct dentry *dentry = path->dentry; | 353 | struct dentry *dentry = path->dentry; |
@@ -399,7 +395,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
399 | if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) | 395 | if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) |
400 | qstr.len = sprintf(name, "%p", dentry); | 396 | qstr.len = sprintf(name, "%p", dentry); |
401 | else { | 397 | else { |
402 | qstr.len = autofs4_getpath(sbi, dentry, &name); | 398 | qstr.len = autofs_getpath(sbi, dentry, name); |
403 | if (!qstr.len) { | 399 | if (!qstr.len) { |
404 | kfree(name); | 400 | kfree(name); |
405 | return -ENOENT; | 401 | return -ENOENT; |
@@ -430,15 +426,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
430 | return -ENOMEM; | 426 | return -ENOMEM; |
431 | } | 427 | } |
432 | 428 | ||
433 | wq->wait_queue_token = autofs4_next_wait_queue; | 429 | wq->wait_queue_token = autofs_next_wait_queue; |
434 | if (++autofs4_next_wait_queue == 0) | 430 | if (++autofs_next_wait_queue == 0) |
435 | autofs4_next_wait_queue = 1; | 431 | autofs_next_wait_queue = 1; |
436 | wq->next = sbi->queues; | 432 | wq->next = sbi->queues; |
437 | sbi->queues = wq; | 433 | sbi->queues = wq; |
438 | init_waitqueue_head(&wq->queue); | 434 | init_waitqueue_head(&wq->queue); |
439 | memcpy(&wq->name, &qstr, sizeof(struct qstr)); | 435 | memcpy(&wq->name, &qstr, sizeof(struct qstr)); |
440 | wq->dev = autofs4_get_dev(sbi); | 436 | wq->dev = autofs_get_dev(sbi); |
441 | wq->ino = autofs4_get_ino(sbi); | 437 | wq->ino = autofs_get_ino(sbi); |
442 | wq->uid = current_uid(); | 438 | wq->uid = current_uid(); |
443 | wq->gid = current_gid(); | 439 | wq->gid = current_gid(); |
444 | wq->pid = pid; | 440 | wq->pid = pid; |
@@ -467,9 +463,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
467 | wq->name.name, notify); | 463 | wq->name.name, notify); |
468 | 464 | ||
469 | /* | 465 | /* |
470 | * autofs4_notify_daemon() may block; it will unlock ->wq_mutex | 466 | * autofs_notify_daemon() may block; it will unlock ->wq_mutex |
471 | */ | 467 | */ |
472 | autofs4_notify_daemon(sbi, wq, type); | 468 | autofs_notify_daemon(sbi, wq, type); |
473 | } else { | 469 | } else { |
474 | wq->wait_ctr++; | 470 | wq->wait_ctr++; |
475 | pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", | 471 | pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", |
@@ -500,12 +496,12 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
500 | struct dentry *de = NULL; | 496 | struct dentry *de = NULL; |
501 | 497 | ||
502 | /* direct mount or browsable map */ | 498 | /* direct mount or browsable map */ |
503 | ino = autofs4_dentry_ino(dentry); | 499 | ino = autofs_dentry_ino(dentry); |
504 | if (!ino) { | 500 | if (!ino) { |
505 | /* If not lookup actual dentry used */ | 501 | /* If not lookup actual dentry used */ |
506 | de = d_lookup(dentry->d_parent, &dentry->d_name); | 502 | de = d_lookup(dentry->d_parent, &dentry->d_name); |
507 | if (de) | 503 | if (de) |
508 | ino = autofs4_dentry_ino(de); | 504 | ino = autofs_dentry_ino(de); |
509 | } | 505 | } |
510 | 506 | ||
511 | /* Set mount requester */ | 507 | /* Set mount requester */ |
@@ -530,7 +526,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
530 | } | 526 | } |
531 | 527 | ||
532 | 528 | ||
533 | int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status) | 529 | int autofs_wait_release(struct autofs_sb_info *sbi, |
530 | autofs_wqt_t wait_queue_token, int status) | ||
534 | { | 531 | { |
535 | struct autofs_wait_queue *wq, **wql; | 532 | struct autofs_wait_queue *wq, **wql; |
536 | 533 | ||
diff --git a/fs/autofs4/Kconfig b/fs/autofs4/Kconfig index 44727bf18297..99fda4d6da25 100644 --- a/fs/autofs4/Kconfig +++ b/fs/autofs4/Kconfig | |||
@@ -1,5 +1,7 @@ | |||
1 | config AUTOFS4_FS | 1 | config AUTOFS4_FS |
2 | tristate "Kernel automounter version 4 support (also supports v3)" | 2 | tristate "Kernel automounter version 4 support (also supports v3 and v5)" |
3 | default n | ||
4 | depends on AUTOFS_FS = n | ||
3 | help | 5 | help |
4 | The automounter is a tool to automatically mount remote file systems | 6 | The automounter is a tool to automatically mount remote file systems |
5 | on demand. This implementation is partially kernel-based to reduce | 7 | on demand. This implementation is partially kernel-based to reduce |
@@ -7,14 +9,38 @@ config AUTOFS4_FS | |||
7 | automounter (amd), which is a pure user space daemon. | 9 | automounter (amd), which is a pure user space daemon. |
8 | 10 | ||
9 | To use the automounter you need the user-space tools from | 11 | To use the automounter you need the user-space tools from |
10 | <https://www.kernel.org/pub/linux/daemons/autofs/v4/>; you also | 12 | <https://www.kernel.org/pub/linux/daemons/autofs/>; you also want |
11 | want to answer Y to "NFS file system support", below. | 13 | to answer Y to "NFS file system support", below. |
12 | 14 | ||
13 | To compile this support as a module, choose M here: the module will be | 15 | This module is in the process of being renamed from autofs4 to |
14 | called autofs4. You will need to add "alias autofs autofs4" to your | 16 | autofs. Since autofs is now the only module that provides the |
15 | modules configuration file. | 17 | autofs file system the module is not version 4 specific. |
16 | 18 | ||
17 | If you are not a part of a fairly large, distributed network or | 19 | The autofs4 module is now built from the source located in |
18 | don't have a laptop which needs to dynamically reconfigure to the | 20 | fs/autofs. The autofs4 directory and its configuration entry |
19 | local network, you probably do not need an automounter, and can say | 21 | will be removed two kernel versions from the inclusion of this |
20 | N here. | 22 | change. |
23 | |||
24 | Changes that will need to be made should be limited to: | ||
25 | - source include statments should be changed from autofs_fs4.h to | ||
26 | autofs_fs.h since these two header files have been merged. | ||
27 | - user space scripts that manually load autofs4.ko should be | ||
28 | changed to load autofs.ko. But since the module directory name | ||
29 | and the module name are the same as the file system name there | ||
30 | is no need to manually load module. | ||
31 | - any "alias autofs autofs4" will need to be removed. | ||
32 | - due to the autofs4 module directory name not being the same as | ||
33 | its file system name autoloading didn't work properly. Because | ||
34 | of this kernel configurations would often build the module into | ||
35 | the kernel. This may have resulted in selinux policies that will | ||
36 | prevent the autofs module from autoloading and will need to be | ||
37 | updated. | ||
38 | |||
39 | Please configure AUTOFS_FS instead of AUTOFS4_FS from now on. | ||
40 | |||
41 | NOTE: Since the modules autofs and autofs4 use the same file system | ||
42 | type name of "autofs" only one can be built. The "depends" | ||
43 | above will result in AUTOFS4_FS not appearing in .config for | ||
44 | any setting of AUTOFS_FS other than n and AUTOFS4_FS will | ||
45 | appear under the AUTOFS_FS entry otherwise which is intended | ||
46 | to draw attention to the module rename change. | ||
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile index a811c1f7d9ab..417dd726d9ef 100644 --- a/fs/autofs4/Makefile +++ b/fs/autofs4/Makefile | |||
@@ -4,4 +4,6 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o | 5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o |
6 | 6 | ||
7 | autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o | 7 | autofs4-objs := ../autofs/init.o ../autofs/inode.o ../autofs/root.o \ |
8 | ../autofs/symlink.o ../autofs/waitq.o ../autofs/expire.o \ | ||
9 | ../autofs/dev-ioctl.o | ||
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a41b48f82a70..4de191563261 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -387,8 +387,13 @@ static Node *create_entry(const char __user *buffer, size_t count) | |||
387 | s = strchr(p, del); | 387 | s = strchr(p, del); |
388 | if (!s) | 388 | if (!s) |
389 | goto einval; | 389 | goto einval; |
390 | *s++ = '\0'; | 390 | *s = '\0'; |
391 | e->offset = simple_strtoul(p, &p, 10); | 391 | if (p != s) { |
392 | int r = kstrtoint(p, 10, &e->offset); | ||
393 | if (r != 0 || e->offset < 0) | ||
394 | goto einval; | ||
395 | } | ||
396 | p = s; | ||
392 | if (*p++) | 397 | if (*p++) |
393 | goto einval; | 398 | goto einval; |
394 | pr_debug("register: offset: %#x\n", e->offset); | 399 | pr_debug("register: offset: %#x\n", e->offset); |
@@ -428,7 +433,8 @@ static Node *create_entry(const char __user *buffer, size_t count) | |||
428 | if (e->mask && | 433 | if (e->mask && |
429 | string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) | 434 | string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) |
430 | goto einval; | 435 | goto einval; |
431 | if (e->size + e->offset > BINPRM_BUF_SIZE) | 436 | if (e->size > BINPRM_BUF_SIZE || |
437 | BINPRM_BUF_SIZE - e->size < e->offset) | ||
432 | goto einval; | 438 | goto einval; |
433 | pr_debug("register: magic/mask length: %i\n", e->size); | 439 | pr_debug("register: magic/mask length: %i\n", e->size); |
434 | if (USE_DEBUG) { | 440 | if (USE_DEBUG) { |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ef80085ed564..9907475b4226 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -38,8 +38,6 @@ | |||
38 | #include <linux/ppp-ioctl.h> | 38 | #include <linux/ppp-ioctl.h> |
39 | #include <linux/if_pppox.h> | 39 | #include <linux/if_pppox.h> |
40 | #include <linux/mtio.h> | 40 | #include <linux/mtio.h> |
41 | #include <linux/auto_fs.h> | ||
42 | #include <linux/auto_fs4.h> | ||
43 | #include <linux/tty.h> | 41 | #include <linux/tty.h> |
44 | #include <linux/vt_kern.h> | 42 | #include <linux/vt_kern.h> |
45 | #include <linux/fb.h> | 43 | #include <linux/fb.h> |
@@ -905,12 +905,12 @@ out: | |||
905 | * If this page is ever written to we will re-fault and change the mapping to | 905 | * If this page is ever written to we will re-fault and change the mapping to |
906 | * point to real DAX storage instead. | 906 | * point to real DAX storage instead. |
907 | */ | 907 | */ |
908 | static int dax_load_hole(struct address_space *mapping, void *entry, | 908 | static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry, |
909 | struct vm_fault *vmf) | 909 | struct vm_fault *vmf) |
910 | { | 910 | { |
911 | struct inode *inode = mapping->host; | 911 | struct inode *inode = mapping->host; |
912 | unsigned long vaddr = vmf->address; | 912 | unsigned long vaddr = vmf->address; |
913 | int ret = VM_FAULT_NOPAGE; | 913 | vm_fault_t ret = VM_FAULT_NOPAGE; |
914 | struct page *zero_page; | 914 | struct page *zero_page; |
915 | void *entry2; | 915 | void *entry2; |
916 | pfn_t pfn; | 916 | pfn_t pfn; |
@@ -929,7 +929,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, | |||
929 | goto out; | 929 | goto out; |
930 | } | 930 | } |
931 | 931 | ||
932 | vm_insert_mixed(vmf->vma, vaddr, pfn); | 932 | ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); |
933 | out: | 933 | out: |
934 | trace_dax_load_hole(inode, vmf, ret); | 934 | trace_dax_load_hole(inode, vmf, ret); |
935 | return ret; | 935 | return ret; |
@@ -1112,7 +1112,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
1112 | } | 1112 | } |
1113 | EXPORT_SYMBOL_GPL(dax_iomap_rw); | 1113 | EXPORT_SYMBOL_GPL(dax_iomap_rw); |
1114 | 1114 | ||
1115 | static int dax_fault_return(int error) | 1115 | static vm_fault_t dax_fault_return(int error) |
1116 | { | 1116 | { |
1117 | if (error == 0) | 1117 | if (error == 0) |
1118 | return VM_FAULT_NOPAGE; | 1118 | return VM_FAULT_NOPAGE; |
@@ -1132,7 +1132,7 @@ static bool dax_fault_is_synchronous(unsigned long flags, | |||
1132 | && (iomap->flags & IOMAP_F_DIRTY); | 1132 | && (iomap->flags & IOMAP_F_DIRTY); |
1133 | } | 1133 | } |
1134 | 1134 | ||
1135 | static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | 1135 | static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, |
1136 | int *iomap_errp, const struct iomap_ops *ops) | 1136 | int *iomap_errp, const struct iomap_ops *ops) |
1137 | { | 1137 | { |
1138 | struct vm_area_struct *vma = vmf->vma; | 1138 | struct vm_area_struct *vma = vmf->vma; |
@@ -1145,18 +1145,18 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1145 | int error, major = 0; | 1145 | int error, major = 0; |
1146 | bool write = vmf->flags & FAULT_FLAG_WRITE; | 1146 | bool write = vmf->flags & FAULT_FLAG_WRITE; |
1147 | bool sync; | 1147 | bool sync; |
1148 | int vmf_ret = 0; | 1148 | vm_fault_t ret = 0; |
1149 | void *entry; | 1149 | void *entry; |
1150 | pfn_t pfn; | 1150 | pfn_t pfn; |
1151 | 1151 | ||
1152 | trace_dax_pte_fault(inode, vmf, vmf_ret); | 1152 | trace_dax_pte_fault(inode, vmf, ret); |
1153 | /* | 1153 | /* |
1154 | * Check whether offset isn't beyond end of file now. Caller is supposed | 1154 | * Check whether offset isn't beyond end of file now. Caller is supposed |
1155 | * to hold locks serializing us with truncate / punch hole so this is | 1155 | * to hold locks serializing us with truncate / punch hole so this is |
1156 | * a reliable test. | 1156 | * a reliable test. |
1157 | */ | 1157 | */ |
1158 | if (pos >= i_size_read(inode)) { | 1158 | if (pos >= i_size_read(inode)) { |
1159 | vmf_ret = VM_FAULT_SIGBUS; | 1159 | ret = VM_FAULT_SIGBUS; |
1160 | goto out; | 1160 | goto out; |
1161 | } | 1161 | } |
1162 | 1162 | ||
@@ -1165,7 +1165,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1165 | 1165 | ||
1166 | entry = grab_mapping_entry(mapping, vmf->pgoff, 0); | 1166 | entry = grab_mapping_entry(mapping, vmf->pgoff, 0); |
1167 | if (IS_ERR(entry)) { | 1167 | if (IS_ERR(entry)) { |
1168 | vmf_ret = dax_fault_return(PTR_ERR(entry)); | 1168 | ret = dax_fault_return(PTR_ERR(entry)); |
1169 | goto out; | 1169 | goto out; |
1170 | } | 1170 | } |
1171 | 1171 | ||
@@ -1176,7 +1176,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1176 | * retried. | 1176 | * retried. |
1177 | */ | 1177 | */ |
1178 | if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { | 1178 | if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { |
1179 | vmf_ret = VM_FAULT_NOPAGE; | 1179 | ret = VM_FAULT_NOPAGE; |
1180 | goto unlock_entry; | 1180 | goto unlock_entry; |
1181 | } | 1181 | } |
1182 | 1182 | ||
@@ -1189,7 +1189,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1189 | if (iomap_errp) | 1189 | if (iomap_errp) |
1190 | *iomap_errp = error; | 1190 | *iomap_errp = error; |
1191 | if (error) { | 1191 | if (error) { |
1192 | vmf_ret = dax_fault_return(error); | 1192 | ret = dax_fault_return(error); |
1193 | goto unlock_entry; | 1193 | goto unlock_entry; |
1194 | } | 1194 | } |
1195 | if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { | 1195 | if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { |
@@ -1219,9 +1219,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1219 | goto error_finish_iomap; | 1219 | goto error_finish_iomap; |
1220 | 1220 | ||
1221 | __SetPageUptodate(vmf->cow_page); | 1221 | __SetPageUptodate(vmf->cow_page); |
1222 | vmf_ret = finish_fault(vmf); | 1222 | ret = finish_fault(vmf); |
1223 | if (!vmf_ret) | 1223 | if (!ret) |
1224 | vmf_ret = VM_FAULT_DONE_COW; | 1224 | ret = VM_FAULT_DONE_COW; |
1225 | goto finish_iomap; | 1225 | goto finish_iomap; |
1226 | } | 1226 | } |
1227 | 1227 | ||
@@ -1257,23 +1257,20 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1257 | goto error_finish_iomap; | 1257 | goto error_finish_iomap; |
1258 | } | 1258 | } |
1259 | *pfnp = pfn; | 1259 | *pfnp = pfn; |
1260 | vmf_ret = VM_FAULT_NEEDDSYNC | major; | 1260 | ret = VM_FAULT_NEEDDSYNC | major; |
1261 | goto finish_iomap; | 1261 | goto finish_iomap; |
1262 | } | 1262 | } |
1263 | trace_dax_insert_mapping(inode, vmf, entry); | 1263 | trace_dax_insert_mapping(inode, vmf, entry); |
1264 | if (write) | 1264 | if (write) |
1265 | error = vm_insert_mixed_mkwrite(vma, vaddr, pfn); | 1265 | ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn); |
1266 | else | 1266 | else |
1267 | error = vm_insert_mixed(vma, vaddr, pfn); | 1267 | ret = vmf_insert_mixed(vma, vaddr, pfn); |
1268 | 1268 | ||
1269 | /* -EBUSY is fine, somebody else faulted on the same PTE */ | 1269 | goto finish_iomap; |
1270 | if (error == -EBUSY) | ||
1271 | error = 0; | ||
1272 | break; | ||
1273 | case IOMAP_UNWRITTEN: | 1270 | case IOMAP_UNWRITTEN: |
1274 | case IOMAP_HOLE: | 1271 | case IOMAP_HOLE: |
1275 | if (!write) { | 1272 | if (!write) { |
1276 | vmf_ret = dax_load_hole(mapping, entry, vmf); | 1273 | ret = dax_load_hole(mapping, entry, vmf); |
1277 | goto finish_iomap; | 1274 | goto finish_iomap; |
1278 | } | 1275 | } |
1279 | /*FALLTHRU*/ | 1276 | /*FALLTHRU*/ |
@@ -1284,12 +1281,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1284 | } | 1281 | } |
1285 | 1282 | ||
1286 | error_finish_iomap: | 1283 | error_finish_iomap: |
1287 | vmf_ret = dax_fault_return(error) | major; | 1284 | ret = dax_fault_return(error); |
1288 | finish_iomap: | 1285 | finish_iomap: |
1289 | if (ops->iomap_end) { | 1286 | if (ops->iomap_end) { |
1290 | int copied = PAGE_SIZE; | 1287 | int copied = PAGE_SIZE; |
1291 | 1288 | ||
1292 | if (vmf_ret & VM_FAULT_ERROR) | 1289 | if (ret & VM_FAULT_ERROR) |
1293 | copied = 0; | 1290 | copied = 0; |
1294 | /* | 1291 | /* |
1295 | * The fault is done by now and there's no way back (other | 1292 | * The fault is done by now and there's no way back (other |
@@ -1302,12 +1299,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1302 | unlock_entry: | 1299 | unlock_entry: |
1303 | put_locked_mapping_entry(mapping, vmf->pgoff); | 1300 | put_locked_mapping_entry(mapping, vmf->pgoff); |
1304 | out: | 1301 | out: |
1305 | trace_dax_pte_fault_done(inode, vmf, vmf_ret); | 1302 | trace_dax_pte_fault_done(inode, vmf, ret); |
1306 | return vmf_ret; | 1303 | return ret | major; |
1307 | } | 1304 | } |
1308 | 1305 | ||
1309 | #ifdef CONFIG_FS_DAX_PMD | 1306 | #ifdef CONFIG_FS_DAX_PMD |
1310 | static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, | 1307 | static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, |
1311 | void *entry) | 1308 | void *entry) |
1312 | { | 1309 | { |
1313 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | 1310 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
@@ -1348,7 +1345,7 @@ fallback: | |||
1348 | return VM_FAULT_FALLBACK; | 1345 | return VM_FAULT_FALLBACK; |
1349 | } | 1346 | } |
1350 | 1347 | ||
1351 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | 1348 | static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
1352 | const struct iomap_ops *ops) | 1349 | const struct iomap_ops *ops) |
1353 | { | 1350 | { |
1354 | struct vm_area_struct *vma = vmf->vma; | 1351 | struct vm_area_struct *vma = vmf->vma; |
@@ -1358,7 +1355,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1358 | bool sync; | 1355 | bool sync; |
1359 | unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; | 1356 | unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; |
1360 | struct inode *inode = mapping->host; | 1357 | struct inode *inode = mapping->host; |
1361 | int result = VM_FAULT_FALLBACK; | 1358 | vm_fault_t result = VM_FAULT_FALLBACK; |
1362 | struct iomap iomap = { 0 }; | 1359 | struct iomap iomap = { 0 }; |
1363 | pgoff_t max_pgoff, pgoff; | 1360 | pgoff_t max_pgoff, pgoff; |
1364 | void *entry; | 1361 | void *entry; |
@@ -1509,7 +1506,7 @@ out: | |||
1509 | return result; | 1506 | return result; |
1510 | } | 1507 | } |
1511 | #else | 1508 | #else |
1512 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | 1509 | static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
1513 | const struct iomap_ops *ops) | 1510 | const struct iomap_ops *ops) |
1514 | { | 1511 | { |
1515 | return VM_FAULT_FALLBACK; | 1512 | return VM_FAULT_FALLBACK; |
@@ -1529,7 +1526,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1529 | * has done all the necessary locking for page fault to proceed | 1526 | * has done all the necessary locking for page fault to proceed |
1530 | * successfully. | 1527 | * successfully. |
1531 | */ | 1528 | */ |
1532 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 1529 | vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
1533 | pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) | 1530 | pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) |
1534 | { | 1531 | { |
1535 | switch (pe_size) { | 1532 | switch (pe_size) { |
@@ -1553,14 +1550,14 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); | |||
1553 | * DAX file. It takes care of marking corresponding radix tree entry as dirty | 1550 | * DAX file. It takes care of marking corresponding radix tree entry as dirty |
1554 | * as well. | 1551 | * as well. |
1555 | */ | 1552 | */ |
1556 | static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, | 1553 | static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, |
1557 | enum page_entry_size pe_size, | 1554 | enum page_entry_size pe_size, |
1558 | pfn_t pfn) | 1555 | pfn_t pfn) |
1559 | { | 1556 | { |
1560 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | 1557 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
1561 | void *entry, **slot; | 1558 | void *entry, **slot; |
1562 | pgoff_t index = vmf->pgoff; | 1559 | pgoff_t index = vmf->pgoff; |
1563 | int vmf_ret, error; | 1560 | vm_fault_t ret; |
1564 | 1561 | ||
1565 | xa_lock_irq(&mapping->i_pages); | 1562 | xa_lock_irq(&mapping->i_pages); |
1566 | entry = get_unlocked_mapping_entry(mapping, index, &slot); | 1563 | entry = get_unlocked_mapping_entry(mapping, index, &slot); |
@@ -1579,21 +1576,20 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, | |||
1579 | xa_unlock_irq(&mapping->i_pages); | 1576 | xa_unlock_irq(&mapping->i_pages); |
1580 | switch (pe_size) { | 1577 | switch (pe_size) { |
1581 | case PE_SIZE_PTE: | 1578 | case PE_SIZE_PTE: |
1582 | error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); | 1579 | ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); |
1583 | vmf_ret = dax_fault_return(error); | ||
1584 | break; | 1580 | break; |
1585 | #ifdef CONFIG_FS_DAX_PMD | 1581 | #ifdef CONFIG_FS_DAX_PMD |
1586 | case PE_SIZE_PMD: | 1582 | case PE_SIZE_PMD: |
1587 | vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, | 1583 | ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, |
1588 | pfn, true); | 1584 | pfn, true); |
1589 | break; | 1585 | break; |
1590 | #endif | 1586 | #endif |
1591 | default: | 1587 | default: |
1592 | vmf_ret = VM_FAULT_FALLBACK; | 1588 | ret = VM_FAULT_FALLBACK; |
1593 | } | 1589 | } |
1594 | put_locked_mapping_entry(mapping, index); | 1590 | put_locked_mapping_entry(mapping, index); |
1595 | trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret); | 1591 | trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); |
1596 | return vmf_ret; | 1592 | return ret; |
1597 | } | 1593 | } |
1598 | 1594 | ||
1599 | /** | 1595 | /** |
@@ -1606,8 +1602,8 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, | |||
1606 | * stored persistently on the media and handles inserting of appropriate page | 1602 | * stored persistently on the media and handles inserting of appropriate page |
1607 | * table entry. | 1603 | * table entry. |
1608 | */ | 1604 | */ |
1609 | int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 1605 | vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, |
1610 | pfn_t pfn) | 1606 | enum page_entry_size pe_size, pfn_t pfn) |
1611 | { | 1607 | { |
1612 | int err; | 1608 | int err; |
1613 | loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; | 1609 | loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index c42169459298..12273b6ea56d 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/pid_namespace.h> | 24 | #include <linux/pid_namespace.h> |
25 | #include <linux/user_namespace.h> | 25 | #include <linux/user_namespace.h> |
26 | #include <linux/shmem_fs.h> | 26 | #include <linux/memfd.h> |
27 | #include <linux/compat.h> | 27 | #include <linux/compat.h> |
28 | 28 | ||
29 | #include <linux/poll.h> | 29 | #include <linux/poll.h> |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 97a972efab83..68728de12864 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -788,35 +788,34 @@ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, | |||
788 | spin_unlock(&lockres->l_lock); | 788 | spin_unlock(&lockres->l_lock); |
789 | } | 789 | } |
790 | 790 | ||
791 | static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, | 791 | static struct ocfs2_lock_holder * |
792 | struct ocfs2_lock_holder *oh) | 792 | ocfs2_pid_holder(struct ocfs2_lock_res *lockres, |
793 | { | 793 | struct pid *pid) |
794 | spin_lock(&lockres->l_lock); | ||
795 | list_del(&oh->oh_list); | ||
796 | spin_unlock(&lockres->l_lock); | ||
797 | |||
798 | put_pid(oh->oh_owner_pid); | ||
799 | } | ||
800 | |||
801 | static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) | ||
802 | { | 794 | { |
803 | struct ocfs2_lock_holder *oh; | 795 | struct ocfs2_lock_holder *oh; |
804 | struct pid *pid; | ||
805 | 796 | ||
806 | /* look in the list of holders for one with the current task as owner */ | ||
807 | spin_lock(&lockres->l_lock); | 797 | spin_lock(&lockres->l_lock); |
808 | pid = task_pid(current); | ||
809 | list_for_each_entry(oh, &lockres->l_holders, oh_list) { | 798 | list_for_each_entry(oh, &lockres->l_holders, oh_list) { |
810 | if (oh->oh_owner_pid == pid) { | 799 | if (oh->oh_owner_pid == pid) { |
811 | spin_unlock(&lockres->l_lock); | 800 | spin_unlock(&lockres->l_lock); |
812 | return 1; | 801 | return oh; |
813 | } | 802 | } |
814 | } | 803 | } |
815 | spin_unlock(&lockres->l_lock); | 804 | spin_unlock(&lockres->l_lock); |
805 | return NULL; | ||
806 | } | ||
816 | 807 | ||
817 | return 0; | 808 | static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, |
809 | struct ocfs2_lock_holder *oh) | ||
810 | { | ||
811 | spin_lock(&lockres->l_lock); | ||
812 | list_del(&oh->oh_list); | ||
813 | spin_unlock(&lockres->l_lock); | ||
814 | |||
815 | put_pid(oh->oh_owner_pid); | ||
818 | } | 816 | } |
819 | 817 | ||
818 | |||
820 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | 819 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, |
821 | int level) | 820 | int level) |
822 | { | 821 | { |
@@ -2610,34 +2609,93 @@ void ocfs2_inode_unlock(struct inode *inode, | |||
2610 | * | 2609 | * |
2611 | * return < 0 on error, return == 0 if there's no lock holder on the stack | 2610 | * return < 0 on error, return == 0 if there's no lock holder on the stack |
2612 | * before this call, return == 1 if this call would be a recursive locking. | 2611 | * before this call, return == 1 if this call would be a recursive locking. |
2612 | * return == -1 if this lock attempt will cause an upgrade which is forbidden. | ||
2613 | * | ||
2614 | * When taking lock levels into account,we face some different situations. | ||
2615 | * | ||
2616 | * 1. no lock is held | ||
2617 | * In this case, just lock the inode as requested and return 0 | ||
2618 | * | ||
2619 | * 2. We are holding a lock | ||
2620 | * For this situation, things diverges into several cases | ||
2621 | * | ||
2622 | * wanted holding what to do | ||
2623 | * ex ex see 2.1 below | ||
2624 | * ex pr see 2.2 below | ||
2625 | * pr ex see 2.1 below | ||
2626 | * pr pr see 2.1 below | ||
2627 | * | ||
2628 | * 2.1 lock level that is been held is compatible | ||
2629 | * with the wanted level, so no lock action will be tacken. | ||
2630 | * | ||
2631 | * 2.2 Otherwise, an upgrade is needed, but it is forbidden. | ||
2632 | * | ||
2633 | * Reason why upgrade within a process is forbidden is that | ||
2634 | * lock upgrade may cause dead lock. The following illustrates | ||
2635 | * how it happens. | ||
2636 | * | ||
2637 | * thread on node1 thread on node2 | ||
2638 | * ocfs2_inode_lock_tracker(ex=0) | ||
2639 | * | ||
2640 | * <====== ocfs2_inode_lock_tracker(ex=1) | ||
2641 | * | ||
2642 | * ocfs2_inode_lock_tracker(ex=1) | ||
2613 | */ | 2643 | */ |
2614 | int ocfs2_inode_lock_tracker(struct inode *inode, | 2644 | int ocfs2_inode_lock_tracker(struct inode *inode, |
2615 | struct buffer_head **ret_bh, | 2645 | struct buffer_head **ret_bh, |
2616 | int ex, | 2646 | int ex, |
2617 | struct ocfs2_lock_holder *oh) | 2647 | struct ocfs2_lock_holder *oh) |
2618 | { | 2648 | { |
2619 | int status; | 2649 | int status = 0; |
2620 | int arg_flags = 0, has_locked; | ||
2621 | struct ocfs2_lock_res *lockres; | 2650 | struct ocfs2_lock_res *lockres; |
2651 | struct ocfs2_lock_holder *tmp_oh; | ||
2652 | struct pid *pid = task_pid(current); | ||
2653 | |||
2622 | 2654 | ||
2623 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2655 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
2624 | has_locked = ocfs2_is_locked_by_me(lockres); | 2656 | tmp_oh = ocfs2_pid_holder(lockres, pid); |
2625 | /* Just get buffer head if the cluster lock has been taken */ | ||
2626 | if (has_locked) | ||
2627 | arg_flags = OCFS2_META_LOCK_GETBH; | ||
2628 | 2657 | ||
2629 | if (likely(!has_locked || ret_bh)) { | 2658 | if (!tmp_oh) { |
2630 | status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); | 2659 | /* |
2660 | * This corresponds to the case 1. | ||
2661 | * We haven't got any lock before. | ||
2662 | */ | ||
2663 | status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0); | ||
2631 | if (status < 0) { | 2664 | if (status < 0) { |
2632 | if (status != -ENOENT) | 2665 | if (status != -ENOENT) |
2633 | mlog_errno(status); | 2666 | mlog_errno(status); |
2634 | return status; | 2667 | return status; |
2635 | } | 2668 | } |
2636 | } | 2669 | |
2637 | if (!has_locked) | 2670 | oh->oh_ex = ex; |
2638 | ocfs2_add_holder(lockres, oh); | 2671 | ocfs2_add_holder(lockres, oh); |
2672 | return 0; | ||
2673 | } | ||
2639 | 2674 | ||
2640 | return has_locked; | 2675 | if (unlikely(ex && !tmp_oh->oh_ex)) { |
2676 | /* | ||
2677 | * case 2.2 upgrade may cause dead lock, forbid it. | ||
2678 | */ | ||
2679 | mlog(ML_ERROR, "Recursive locking is not permitted to " | ||
2680 | "upgrade to EX level from PR level.\n"); | ||
2681 | dump_stack(); | ||
2682 | return -EINVAL; | ||
2683 | } | ||
2684 | |||
2685 | /* | ||
2686 | * case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full. | ||
2687 | * ignore the lock level and just update it. | ||
2688 | */ | ||
2689 | if (ret_bh) { | ||
2690 | status = ocfs2_inode_lock_full(inode, ret_bh, ex, | ||
2691 | OCFS2_META_LOCK_GETBH); | ||
2692 | if (status < 0) { | ||
2693 | if (status != -ENOENT) | ||
2694 | mlog_errno(status); | ||
2695 | return status; | ||
2696 | } | ||
2697 | } | ||
2698 | return tmp_oh ? 1 : 0; | ||
2641 | } | 2699 | } |
2642 | 2700 | ||
2643 | void ocfs2_inode_unlock_tracker(struct inode *inode, | 2701 | void ocfs2_inode_unlock_tracker(struct inode *inode, |
@@ -2649,12 +2707,13 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, | |||
2649 | 2707 | ||
2650 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2708 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
2651 | /* had_lock means that the currect process already takes the cluster | 2709 | /* had_lock means that the currect process already takes the cluster |
2652 | * lock previously. If had_lock is 1, we have nothing to do here, and | 2710 | * lock previously. |
2653 | * it will get unlocked where we got the lock. | 2711 | * If had_lock is 1, we have nothing to do here. |
2712 | * If had_lock is 0, we will release the lock. | ||
2654 | */ | 2713 | */ |
2655 | if (!had_lock) { | 2714 | if (!had_lock) { |
2715 | ocfs2_inode_unlock(inode, oh->oh_ex); | ||
2656 | ocfs2_remove_holder(lockres, oh); | 2716 | ocfs2_remove_holder(lockres, oh); |
2657 | ocfs2_inode_unlock(inode, ex); | ||
2658 | } | 2717 | } |
2659 | } | 2718 | } |
2660 | 2719 | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 256e0a9067b8..4ec1c828f6e0 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -96,6 +96,7 @@ struct ocfs2_trim_fs_info { | |||
96 | struct ocfs2_lock_holder { | 96 | struct ocfs2_lock_holder { |
97 | struct list_head oh_list; | 97 | struct list_head oh_list; |
98 | struct pid *oh_owner_pid; | 98 | struct pid *oh_owner_pid; |
99 | int oh_ex; | ||
99 | }; | 100 | }; |
100 | 101 | ||
101 | /* ocfs2_inode_lock_full() 'arg_flags' flags */ | 102 | /* ocfs2_inode_lock_full() 'arg_flags' flags */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6ee94bc23f5b..a2a8603d27e0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -563,8 +563,8 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, | |||
563 | return ret; | 563 | return ret; |
564 | } | 564 | } |
565 | 565 | ||
566 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | 566 | static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, |
567 | u32 clusters_to_add, int mark_unwritten) | 567 | u32 clusters_to_add, int mark_unwritten) |
568 | { | 568 | { |
569 | int status = 0; | 569 | int status = 0; |
570 | int restart_func = 0; | 570 | int restart_func = 0; |
@@ -1035,8 +1035,8 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, | |||
1035 | clusters_to_add -= oi->ip_clusters; | 1035 | clusters_to_add -= oi->ip_clusters; |
1036 | 1036 | ||
1037 | if (clusters_to_add) { | 1037 | if (clusters_to_add) { |
1038 | ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, | 1038 | ret = ocfs2_extend_allocation(inode, oi->ip_clusters, |
1039 | clusters_to_add, 0); | 1039 | clusters_to_add, 0); |
1040 | if (ret) { | 1040 | if (ret) { |
1041 | mlog_errno(ret); | 1041 | mlog_errno(ret); |
1042 | goto out; | 1042 | goto out; |
@@ -1493,7 +1493,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, | |||
1493 | goto next; | 1493 | goto next; |
1494 | } | 1494 | } |
1495 | 1495 | ||
1496 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); | 1496 | ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1); |
1497 | if (ret) { | 1497 | if (ret) { |
1498 | if (ret != -ENOSPC) | 1498 | if (ret != -ENOSPC) |
1499 | mlog_errno(ret); | 1499 | mlog_errno(ret); |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1fdc9839cd93..7eb7f03531f6 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -65,8 +65,6 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, | |||
65 | u64 new_i_size, u64 zero_to); | 65 | u64 new_i_size, u64 zero_to); |
66 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | 66 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, |
67 | loff_t zero_to); | 67 | loff_t zero_to); |
68 | int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
69 | u32 clusters_to_add, int mark_unwritten); | ||
70 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 68 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
71 | int ocfs2_getattr(const struct path *path, struct kstat *stat, | 69 | int ocfs2_getattr(const struct path *path, struct kstat *stat, |
72 | u32 request_mask, unsigned int flags); | 70 | u32 request_mask, unsigned int flags); |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index ab30c005cc4b..994726ada857 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -402,7 +402,7 @@ out_err: | |||
402 | static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, | 402 | static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, |
403 | unsigned int chunksize) | 403 | unsigned int chunksize) |
404 | { | 404 | { |
405 | int index; | 405 | u32 index; |
406 | 406 | ||
407 | index = __ilog2_u32(chunksize); | 407 | index = __ilog2_u32(chunksize); |
408 | if (index >= OCFS2_INFO_MAX_HIST) | 408 | if (index >= OCFS2_INFO_MAX_HIST) |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index fb9a20e3d608..05220b365fb9 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -44,11 +44,11 @@ | |||
44 | #include "ocfs2_trace.h" | 44 | #include "ocfs2_trace.h" |
45 | 45 | ||
46 | 46 | ||
47 | static int ocfs2_fault(struct vm_fault *vmf) | 47 | static vm_fault_t ocfs2_fault(struct vm_fault *vmf) |
48 | { | 48 | { |
49 | struct vm_area_struct *vma = vmf->vma; | 49 | struct vm_area_struct *vma = vmf->vma; |
50 | sigset_t oldset; | 50 | sigset_t oldset; |
51 | int ret; | 51 | vm_fault_t ret; |
52 | 52 | ||
53 | ocfs2_block_signals(&oldset); | 53 | ocfs2_block_signals(&oldset); |
54 | ret = filemap_fault(vmf); | 54 | ret = filemap_fault(vmf); |
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_fault *vmf) | |||
59 | return ret; | 59 | return ret; |
60 | } | 60 | } |
61 | 61 | ||
62 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | 62 | static vm_fault_t __ocfs2_page_mkwrite(struct file *file, |
63 | struct page *page) | 63 | struct buffer_head *di_bh, struct page *page) |
64 | { | 64 | { |
65 | int ret = VM_FAULT_NOPAGE; | 65 | int err; |
66 | vm_fault_t ret = VM_FAULT_NOPAGE; | ||
66 | struct inode *inode = file_inode(file); | 67 | struct inode *inode = file_inode(file); |
67 | struct address_space *mapping = inode->i_mapping; | 68 | struct address_space *mapping = inode->i_mapping; |
68 | loff_t pos = page_offset(page); | 69 | loff_t pos = page_offset(page); |
@@ -105,15 +106,12 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
105 | if (page->index == last_index) | 106 | if (page->index == last_index) |
106 | len = ((size - 1) & ~PAGE_MASK) + 1; | 107 | len = ((size - 1) & ~PAGE_MASK) + 1; |
107 | 108 | ||
108 | ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, | 109 | err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, |
109 | &locked_page, &fsdata, di_bh, page); | 110 | &locked_page, &fsdata, di_bh, page); |
110 | if (ret) { | 111 | if (err) { |
111 | if (ret != -ENOSPC) | 112 | if (err != -ENOSPC) |
112 | mlog_errno(ret); | 113 | mlog_errno(err); |
113 | if (ret == -ENOMEM) | 114 | ret = vmf_error(err); |
114 | ret = VM_FAULT_OOM; | ||
115 | else | ||
116 | ret = VM_FAULT_SIGBUS; | ||
117 | goto out; | 115 | goto out; |
118 | } | 116 | } |
119 | 117 | ||
@@ -121,20 +119,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
121 | ret = VM_FAULT_NOPAGE; | 119 | ret = VM_FAULT_NOPAGE; |
122 | goto out; | 120 | goto out; |
123 | } | 121 | } |
124 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); | 122 | err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); |
125 | BUG_ON(ret != len); | 123 | BUG_ON(err != len); |
126 | ret = VM_FAULT_LOCKED; | 124 | ret = VM_FAULT_LOCKED; |
127 | out: | 125 | out: |
128 | return ret; | 126 | return ret; |
129 | } | 127 | } |
130 | 128 | ||
131 | static int ocfs2_page_mkwrite(struct vm_fault *vmf) | 129 | static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf) |
132 | { | 130 | { |
133 | struct page *page = vmf->page; | 131 | struct page *page = vmf->page; |
134 | struct inode *inode = file_inode(vmf->vma->vm_file); | 132 | struct inode *inode = file_inode(vmf->vma->vm_file); |
135 | struct buffer_head *di_bh = NULL; | 133 | struct buffer_head *di_bh = NULL; |
136 | sigset_t oldset; | 134 | sigset_t oldset; |
137 | int ret; | 135 | int err; |
136 | vm_fault_t ret; | ||
138 | 137 | ||
139 | sb_start_pagefault(inode->i_sb); | 138 | sb_start_pagefault(inode->i_sb); |
140 | ocfs2_block_signals(&oldset); | 139 | ocfs2_block_signals(&oldset); |
@@ -144,13 +143,10 @@ static int ocfs2_page_mkwrite(struct vm_fault *vmf) | |||
144 | * node. Taking the data lock will also ensure that we don't | 143 | * node. Taking the data lock will also ensure that we don't |
145 | * attempt page truncation as part of a downconvert. | 144 | * attempt page truncation as part of a downconvert. |
146 | */ | 145 | */ |
147 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 146 | err = ocfs2_inode_lock(inode, &di_bh, 1); |
148 | if (ret < 0) { | 147 | if (err < 0) { |
149 | mlog_errno(ret); | 148 | mlog_errno(err); |
150 | if (ret == -ENOMEM) | 149 | ret = vmf_error(err); |
151 | ret = VM_FAULT_OOM; | ||
152 | else | ||
153 | ret = VM_FAULT_SIGBUS; | ||
154 | goto out; | 150 | goto out; |
155 | } | 151 | } |
156 | 152 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8dd6f703c819..b7ca84bc3df7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -2332,8 +2332,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2332 | struct buffer_head *orphan_dir_bh, | 2332 | struct buffer_head *orphan_dir_bh, |
2333 | bool dio) | 2333 | bool dio) |
2334 | { | 2334 | { |
2335 | const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN; | 2335 | char name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1]; |
2336 | char name[namelen + 1]; | ||
2337 | struct ocfs2_dinode *orphan_fe; | 2336 | struct ocfs2_dinode *orphan_fe; |
2338 | int status = 0; | 2337 | int status = 0; |
2339 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 2338 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 5bb4a89f9045..7071ad0dec90 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -807,11 +807,11 @@ struct ocfs2_dir_block_trailer { | |||
807 | * in this block. (unused) */ | 807 | * in this block. (unused) */ |
808 | /*10*/ __u8 db_signature[8]; /* Signature for verification */ | 808 | /*10*/ __u8 db_signature[8]; /* Signature for verification */ |
809 | __le64 db_reserved2; | 809 | __le64 db_reserved2; |
810 | __le64 db_free_next; /* Next block in list (unused) */ | 810 | /*20*/ __le64 db_free_next; /* Next block in list (unused) */ |
811 | /*20*/ __le64 db_blkno; /* Offset on disk, in blocks */ | 811 | __le64 db_blkno; /* Offset on disk, in blocks */ |
812 | __le64 db_parent_dinode; /* dinode which owns me, in | 812 | /*30*/ __le64 db_parent_dinode; /* dinode which owns me, in |
813 | blocks */ | 813 | blocks */ |
814 | /*30*/ struct ocfs2_block_check db_check; /* Error checking */ | 814 | struct ocfs2_block_check db_check; /* Error checking */ |
815 | /*40*/ | 815 | /*40*/ |
816 | }; | 816 | }; |
817 | 817 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 004077f1a7bf..0ceb3b6b37e7 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -268,7 +268,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
268 | unsigned long flags; | 268 | unsigned long flags; |
269 | sigset_t pending, shpending, blocked, ignored, caught; | 269 | sigset_t pending, shpending, blocked, ignored, caught; |
270 | int num_threads = 0; | 270 | int num_threads = 0; |
271 | unsigned long qsize = 0; | 271 | unsigned int qsize = 0; |
272 | unsigned long qlim = 0; | 272 | unsigned long qlim = 0; |
273 | 273 | ||
274 | sigemptyset(&pending); | 274 | sigemptyset(&pending); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index af128b374143..44dec22e5e9e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -213,10 +213,14 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, | |||
213 | char *page; | 213 | char *page; |
214 | unsigned long count = _count; | 214 | unsigned long count = _count; |
215 | unsigned long arg_start, arg_end, env_start, env_end; | 215 | unsigned long arg_start, arg_end, env_start, env_end; |
216 | unsigned long len1, len2, len; | 216 | unsigned long len1, len2; |
217 | unsigned long p; | 217 | char __user *buf0 = buf; |
218 | struct { | ||
219 | unsigned long p; | ||
220 | unsigned long len; | ||
221 | } cmdline[2]; | ||
218 | char c; | 222 | char c; |
219 | ssize_t rv; | 223 | int rv; |
220 | 224 | ||
221 | BUG_ON(*pos < 0); | 225 | BUG_ON(*pos < 0); |
222 | 226 | ||
@@ -239,12 +243,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, | |||
239 | goto out_mmput; | 243 | goto out_mmput; |
240 | } | 244 | } |
241 | 245 | ||
242 | down_read(&mm->mmap_sem); | 246 | spin_lock(&mm->arg_lock); |
243 | arg_start = mm->arg_start; | 247 | arg_start = mm->arg_start; |
244 | arg_end = mm->arg_end; | 248 | arg_end = mm->arg_end; |
245 | env_start = mm->env_start; | 249 | env_start = mm->env_start; |
246 | env_end = mm->env_end; | 250 | env_end = mm->env_end; |
247 | up_read(&mm->mmap_sem); | 251 | spin_unlock(&mm->arg_lock); |
248 | 252 | ||
249 | BUG_ON(arg_start > arg_end); | 253 | BUG_ON(arg_start > arg_end); |
250 | BUG_ON(env_start > env_end); | 254 | BUG_ON(env_start > env_end); |
@@ -253,61 +257,31 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, | |||
253 | len2 = env_end - env_start; | 257 | len2 = env_end - env_start; |
254 | 258 | ||
255 | /* Empty ARGV. */ | 259 | /* Empty ARGV. */ |
256 | if (len1 == 0) { | 260 | if (len1 == 0) |
257 | rv = 0; | 261 | goto end; |
258 | goto out_free_page; | 262 | |
259 | } | ||
260 | /* | 263 | /* |
261 | * Inherently racy -- command line shares address space | 264 | * Inherently racy -- command line shares address space |
262 | * with code and data. | 265 | * with code and data. |
263 | */ | 266 | */ |
264 | rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); | 267 | if (access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON) != 1) |
265 | if (rv <= 0) | 268 | goto end; |
266 | goto out_free_page; | ||
267 | |||
268 | rv = 0; | ||
269 | 269 | ||
270 | cmdline[0].p = arg_start; | ||
271 | cmdline[0].len = len1; | ||
270 | if (c == '\0') { | 272 | if (c == '\0') { |
271 | /* Command line (set of strings) occupies whole ARGV. */ | 273 | /* Command line (set of strings) occupies whole ARGV. */ |
272 | if (len1 <= *pos) | 274 | cmdline[1].len = 0; |
273 | goto out_free_page; | ||
274 | |||
275 | p = arg_start + *pos; | ||
276 | len = len1 - *pos; | ||
277 | while (count > 0 && len > 0) { | ||
278 | unsigned int _count; | ||
279 | int nr_read; | ||
280 | |||
281 | _count = min3(count, len, PAGE_SIZE); | ||
282 | nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); | ||
283 | if (nr_read < 0) | ||
284 | rv = nr_read; | ||
285 | if (nr_read <= 0) | ||
286 | goto out_free_page; | ||
287 | |||
288 | if (copy_to_user(buf, page, nr_read)) { | ||
289 | rv = -EFAULT; | ||
290 | goto out_free_page; | ||
291 | } | ||
292 | |||
293 | p += nr_read; | ||
294 | len -= nr_read; | ||
295 | buf += nr_read; | ||
296 | count -= nr_read; | ||
297 | rv += nr_read; | ||
298 | } | ||
299 | } else { | 275 | } else { |
300 | /* | 276 | /* |
301 | * Command line (1 string) occupies ARGV and | 277 | * Command line (1 string) occupies ARGV and |
302 | * extends into ENVP. | 278 | * extends into ENVP. |
303 | */ | 279 | */ |
304 | struct { | 280 | cmdline[1].p = env_start; |
305 | unsigned long p; | 281 | cmdline[1].len = len2; |
306 | unsigned long len; | 282 | } |
307 | } cmdline[2] = { | 283 | |
308 | { .p = arg_start, .len = len1 }, | 284 | { |
309 | { .p = env_start, .len = len2 }, | ||
310 | }; | ||
311 | loff_t pos1 = *pos; | 285 | loff_t pos1 = *pos; |
312 | unsigned int i; | 286 | unsigned int i; |
313 | 287 | ||
@@ -317,44 +291,40 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, | |||
317 | i++; | 291 | i++; |
318 | } | 292 | } |
319 | while (i < 2) { | 293 | while (i < 2) { |
294 | unsigned long p; | ||
295 | unsigned long len; | ||
296 | |||
320 | p = cmdline[i].p + pos1; | 297 | p = cmdline[i].p + pos1; |
321 | len = cmdline[i].len - pos1; | 298 | len = cmdline[i].len - pos1; |
322 | while (count > 0 && len > 0) { | 299 | while (count > 0 && len > 0) { |
323 | unsigned int _count, l; | 300 | unsigned int nr_read, nr_write; |
324 | int nr_read; | 301 | |
325 | bool final; | 302 | nr_read = min3(count, len, PAGE_SIZE); |
326 | 303 | nr_read = access_remote_vm(mm, p, page, nr_read, FOLL_ANON); | |
327 | _count = min3(count, len, PAGE_SIZE); | 304 | if (nr_read == 0) |
328 | nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); | 305 | goto end; |
329 | if (nr_read < 0) | ||
330 | rv = nr_read; | ||
331 | if (nr_read <= 0) | ||
332 | goto out_free_page; | ||
333 | 306 | ||
334 | /* | 307 | /* |
335 | * Command line can be shorter than whole ARGV | 308 | * Command line can be shorter than whole ARGV |
336 | * even if last "marker" byte says it is not. | 309 | * even if last "marker" byte says it is not. |
337 | */ | 310 | */ |
338 | final = false; | 311 | if (c == '\0') |
339 | l = strnlen(page, nr_read); | 312 | nr_write = nr_read; |
340 | if (l < nr_read) { | 313 | else |
341 | nr_read = l; | 314 | nr_write = strnlen(page, nr_read); |
342 | final = true; | ||
343 | } | ||
344 | 315 | ||
345 | if (copy_to_user(buf, page, nr_read)) { | 316 | if (copy_to_user(buf, page, nr_write)) { |
346 | rv = -EFAULT; | 317 | rv = -EFAULT; |
347 | goto out_free_page; | 318 | goto out_free_page; |
348 | } | 319 | } |
349 | 320 | ||
350 | p += nr_read; | 321 | p += nr_write; |
351 | len -= nr_read; | 322 | len -= nr_write; |
352 | buf += nr_read; | 323 | buf += nr_write; |
353 | count -= nr_read; | 324 | count -= nr_write; |
354 | rv += nr_read; | ||
355 | 325 | ||
356 | if (final) | 326 | if (nr_write < nr_read) |
357 | goto out_free_page; | 327 | goto end; |
358 | } | 328 | } |
359 | 329 | ||
360 | /* Only first chunk can be read partially. */ | 330 | /* Only first chunk can be read partially. */ |
@@ -363,12 +333,13 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, | |||
363 | } | 333 | } |
364 | } | 334 | } |
365 | 335 | ||
336 | end: | ||
337 | *pos += buf - buf0; | ||
338 | rv = buf - buf0; | ||
366 | out_free_page: | 339 | out_free_page: |
367 | free_page((unsigned long)page); | 340 | free_page((unsigned long)page); |
368 | out_mmput: | 341 | out_mmput: |
369 | mmput(mm); | 342 | mmput(mm); |
370 | if (rv > 0) | ||
371 | *pos += rv; | ||
372 | return rv; | 343 | return rv; |
373 | } | 344 | } |
374 | 345 | ||
@@ -430,7 +401,6 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | |||
430 | struct stack_trace trace; | 401 | struct stack_trace trace; |
431 | unsigned long *entries; | 402 | unsigned long *entries; |
432 | int err; | 403 | int err; |
433 | int i; | ||
434 | 404 | ||
435 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); | 405 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); |
436 | if (!entries) | 406 | if (!entries) |
@@ -443,6 +413,8 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | |||
443 | 413 | ||
444 | err = lock_trace(task); | 414 | err = lock_trace(task); |
445 | if (!err) { | 415 | if (!err) { |
416 | unsigned int i; | ||
417 | |||
446 | save_stack_trace_tsk(task, &trace); | 418 | save_stack_trace_tsk(task, &trace); |
447 | 419 | ||
448 | for (i = 0; i < trace.nr_entries; i++) { | 420 | for (i = 0; i < trace.nr_entries; i++) { |
@@ -927,10 +899,10 @@ static ssize_t environ_read(struct file *file, char __user *buf, | |||
927 | if (!mmget_not_zero(mm)) | 899 | if (!mmget_not_zero(mm)) |
928 | goto free; | 900 | goto free; |
929 | 901 | ||
930 | down_read(&mm->mmap_sem); | 902 | spin_lock(&mm->arg_lock); |
931 | env_start = mm->env_start; | 903 | env_start = mm->env_start; |
932 | env_end = mm->env_end; | 904 | env_end = mm->env_end; |
933 | up_read(&mm->mmap_sem); | 905 | spin_unlock(&mm->arg_lock); |
934 | 906 | ||
935 | while (count > 0) { | 907 | while (count > 0) { |
936 | size_t this_len, max_len; | 908 | size_t this_len, max_len; |
@@ -1784,9 +1756,9 @@ int pid_getattr(const struct path *path, struct kstat *stat, | |||
1784 | 1756 | ||
1785 | generic_fillattr(inode, stat); | 1757 | generic_fillattr(inode, stat); |
1786 | 1758 | ||
1787 | rcu_read_lock(); | ||
1788 | stat->uid = GLOBAL_ROOT_UID; | 1759 | stat->uid = GLOBAL_ROOT_UID; |
1789 | stat->gid = GLOBAL_ROOT_GID; | 1760 | stat->gid = GLOBAL_ROOT_GID; |
1761 | rcu_read_lock(); | ||
1790 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1762 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
1791 | if (task) { | 1763 | if (task) { |
1792 | if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { | 1764 | if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { |
@@ -1875,7 +1847,7 @@ const struct dentry_operations pid_dentry_operations = | |||
1875 | * by stat. | 1847 | * by stat. |
1876 | */ | 1848 | */ |
1877 | bool proc_fill_cache(struct file *file, struct dir_context *ctx, | 1849 | bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
1878 | const char *name, int len, | 1850 | const char *name, unsigned int len, |
1879 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1851 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1880 | { | 1852 | { |
1881 | struct dentry *child, *dir = file->f_path.dentry; | 1853 | struct dentry *child, *dir = file->f_path.dentry; |
@@ -3251,7 +3223,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) | |||
3251 | iter.task; | 3223 | iter.task; |
3252 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3224 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
3253 | char name[10 + 1]; | 3225 | char name[10 + 1]; |
3254 | int len; | 3226 | unsigned int len; |
3255 | 3227 | ||
3256 | cond_resched(); | 3228 | cond_resched(); |
3257 | if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) | 3229 | if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) |
@@ -3578,7 +3550,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) | |||
3578 | task; | 3550 | task; |
3579 | task = next_tid(task), ctx->pos++) { | 3551 | task = next_tid(task), ctx->pos++) { |
3580 | char name[10 + 1]; | 3552 | char name[10 + 1]; |
3581 | int len; | 3553 | unsigned int len; |
3582 | tid = task_pid_nr_ns(task, ns); | 3554 | tid = task_pid_nr_ns(task, ns); |
3583 | len = snprintf(name, sizeof(name), "%u", tid); | 3555 | len = snprintf(name, sizeof(name), "%u", tid); |
3584 | if (!proc_fill_cache(file, ctx, name, len, | 3556 | if (!proc_fill_cache(file, ctx, name, len, |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 05b9893e9a22..81882a13212d 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
@@ -248,7 +248,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, | |||
248 | struct file *f; | 248 | struct file *f; |
249 | struct fd_data data; | 249 | struct fd_data data; |
250 | char name[10 + 1]; | 250 | char name[10 + 1]; |
251 | int len; | 251 | unsigned int len; |
252 | 252 | ||
253 | f = fcheck_files(files, fd); | 253 | f = fcheck_files(files, fd); |
254 | if (!f) | 254 | if (!f) |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 93eb1906c28d..50cb22a08c2f 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -163,7 +163,7 @@ extern loff_t mem_lseek(struct file *, loff_t, int); | |||
163 | /* Lookups */ | 163 | /* Lookups */ |
164 | typedef struct dentry *instantiate_t(struct dentry *, | 164 | typedef struct dentry *instantiate_t(struct dentry *, |
165 | struct task_struct *, const void *); | 165 | struct task_struct *, const void *); |
166 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, | 166 | bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, |
167 | instantiate_t, struct task_struct *, const void *); | 167 | instantiate_t, struct task_struct *, const void *); |
168 | 168 | ||
169 | /* | 169 | /* |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 1491918a33c3..792c78a49174 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -154,6 +154,8 @@ u64 stable_page_flags(struct page *page) | |||
154 | 154 | ||
155 | if (PageBalloon(page)) | 155 | if (PageBalloon(page)) |
156 | u |= 1 << KPF_BALLOON; | 156 | u |= 1 << KPF_BALLOON; |
157 | if (PageTable(page)) | ||
158 | u |= 1 << KPF_PGTABLE; | ||
157 | 159 | ||
158 | if (page_is_idle(page)) | 160 | if (page_is_idle(page)) |
159 | u |= 1 << KPF_IDLE; | 161 | u |= 1 << KPF_IDLE; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7e074138d2f2..597969db9e90 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1259,8 +1259,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, | |||
1259 | if (pte_swp_soft_dirty(pte)) | 1259 | if (pte_swp_soft_dirty(pte)) |
1260 | flags |= PM_SOFT_DIRTY; | 1260 | flags |= PM_SOFT_DIRTY; |
1261 | entry = pte_to_swp_entry(pte); | 1261 | entry = pte_to_swp_entry(pte); |
1262 | frame = swp_type(entry) | | 1262 | if (pm->show_pfn) |
1263 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | 1263 | frame = swp_type(entry) | |
1264 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | ||
1264 | flags |= PM_SWAP; | 1265 | flags |= PM_SWAP; |
1265 | if (is_migration_entry(entry)) | 1266 | if (is_migration_entry(entry)) |
1266 | page = migration_entry_to_page(entry); | 1267 | page = migration_entry_to_page(entry); |
@@ -1311,11 +1312,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, | |||
1311 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION | 1312 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION |
1312 | else if (is_swap_pmd(pmd)) { | 1313 | else if (is_swap_pmd(pmd)) { |
1313 | swp_entry_t entry = pmd_to_swp_entry(pmd); | 1314 | swp_entry_t entry = pmd_to_swp_entry(pmd); |
1314 | unsigned long offset = swp_offset(entry); | 1315 | unsigned long offset; |
1315 | 1316 | ||
1316 | offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; | 1317 | if (pm->show_pfn) { |
1317 | frame = swp_type(entry) | | 1318 | offset = swp_offset(entry) + |
1318 | (offset << MAX_SWAPFILES_SHIFT); | 1319 | ((addr & ~PMD_MASK) >> PAGE_SHIFT); |
1320 | frame = swp_type(entry) | | ||
1321 | (offset << MAX_SWAPFILES_SHIFT); | ||
1322 | } | ||
1319 | flags |= PM_SWAP; | 1323 | flags |= PM_SWAP; |
1320 | if (pmd_swp_soft_dirty(pmd)) | 1324 | if (pmd_swp_soft_dirty(pmd)) |
1321 | flags |= PM_SOFT_DIRTY; | 1325 | flags |= PM_SOFT_DIRTY; |
@@ -1333,10 +1337,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, | |||
1333 | err = add_to_pagemap(addr, &pme, pm); | 1337 | err = add_to_pagemap(addr, &pme, pm); |
1334 | if (err) | 1338 | if (err) |
1335 | break; | 1339 | break; |
1336 | if (pm->show_pfn && (flags & PM_PRESENT)) | 1340 | if (pm->show_pfn) { |
1337 | frame++; | 1341 | if (flags & PM_PRESENT) |
1338 | else if (flags & PM_SWAP) | 1342 | frame++; |
1339 | frame += (1 << MAX_SWAPFILES_SHIFT); | 1343 | else if (flags & PM_SWAP) |
1344 | frame += (1 << MAX_SWAPFILES_SHIFT); | ||
1345 | } | ||
1340 | } | 1346 | } |
1341 | spin_unlock(ptl); | 1347 | spin_unlock(ptl); |
1342 | return err; | 1348 | return err; |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index cec550c8468f..123bf7d516fc 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -62,6 +62,8 @@ struct userfaultfd_ctx { | |||
62 | enum userfaultfd_state state; | 62 | enum userfaultfd_state state; |
63 | /* released */ | 63 | /* released */ |
64 | bool released; | 64 | bool released; |
65 | /* memory mappings are changing because of non-cooperative event */ | ||
66 | bool mmap_changing; | ||
65 | /* mm with one ore more vmas attached to this userfaultfd_ctx */ | 67 | /* mm with one ore more vmas attached to this userfaultfd_ctx */ |
66 | struct mm_struct *mm; | 68 | struct mm_struct *mm; |
67 | }; | 69 | }; |
@@ -641,6 +643,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, | |||
641 | * already released. | 643 | * already released. |
642 | */ | 644 | */ |
643 | out: | 645 | out: |
646 | WRITE_ONCE(ctx->mmap_changing, false); | ||
644 | userfaultfd_ctx_put(ctx); | 647 | userfaultfd_ctx_put(ctx); |
645 | } | 648 | } |
646 | 649 | ||
@@ -686,10 +689,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) | |||
686 | ctx->state = UFFD_STATE_RUNNING; | 689 | ctx->state = UFFD_STATE_RUNNING; |
687 | ctx->features = octx->features; | 690 | ctx->features = octx->features; |
688 | ctx->released = false; | 691 | ctx->released = false; |
692 | ctx->mmap_changing = false; | ||
689 | ctx->mm = vma->vm_mm; | 693 | ctx->mm = vma->vm_mm; |
690 | mmgrab(ctx->mm); | 694 | mmgrab(ctx->mm); |
691 | 695 | ||
692 | userfaultfd_ctx_get(octx); | 696 | userfaultfd_ctx_get(octx); |
697 | WRITE_ONCE(octx->mmap_changing, true); | ||
693 | fctx->orig = octx; | 698 | fctx->orig = octx; |
694 | fctx->new = ctx; | 699 | fctx->new = ctx; |
695 | list_add_tail(&fctx->list, fcs); | 700 | list_add_tail(&fctx->list, fcs); |
@@ -732,6 +737,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, | |||
732 | if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { | 737 | if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { |
733 | vm_ctx->ctx = ctx; | 738 | vm_ctx->ctx = ctx; |
734 | userfaultfd_ctx_get(ctx); | 739 | userfaultfd_ctx_get(ctx); |
740 | WRITE_ONCE(ctx->mmap_changing, true); | ||
735 | } | 741 | } |
736 | } | 742 | } |
737 | 743 | ||
@@ -772,6 +778,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma, | |||
772 | return true; | 778 | return true; |
773 | 779 | ||
774 | userfaultfd_ctx_get(ctx); | 780 | userfaultfd_ctx_get(ctx); |
781 | WRITE_ONCE(ctx->mmap_changing, true); | ||
775 | up_read(&mm->mmap_sem); | 782 | up_read(&mm->mmap_sem); |
776 | 783 | ||
777 | msg_init(&ewq.msg); | 784 | msg_init(&ewq.msg); |
@@ -815,6 +822,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, | |||
815 | return -ENOMEM; | 822 | return -ENOMEM; |
816 | 823 | ||
817 | userfaultfd_ctx_get(ctx); | 824 | userfaultfd_ctx_get(ctx); |
825 | WRITE_ONCE(ctx->mmap_changing, true); | ||
818 | unmap_ctx->ctx = ctx; | 826 | unmap_ctx->ctx = ctx; |
819 | unmap_ctx->start = start; | 827 | unmap_ctx->start = start; |
820 | unmap_ctx->end = end; | 828 | unmap_ctx->end = end; |
@@ -1653,6 +1661,10 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, | |||
1653 | 1661 | ||
1654 | user_uffdio_copy = (struct uffdio_copy __user *) arg; | 1662 | user_uffdio_copy = (struct uffdio_copy __user *) arg; |
1655 | 1663 | ||
1664 | ret = -EAGAIN; | ||
1665 | if (READ_ONCE(ctx->mmap_changing)) | ||
1666 | goto out; | ||
1667 | |||
1656 | ret = -EFAULT; | 1668 | ret = -EFAULT; |
1657 | if (copy_from_user(&uffdio_copy, user_uffdio_copy, | 1669 | if (copy_from_user(&uffdio_copy, user_uffdio_copy, |
1658 | /* don't copy "copy" last field */ | 1670 | /* don't copy "copy" last field */ |
@@ -1674,7 +1686,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, | |||
1674 | goto out; | 1686 | goto out; |
1675 | if (mmget_not_zero(ctx->mm)) { | 1687 | if (mmget_not_zero(ctx->mm)) { |
1676 | ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, | 1688 | ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, |
1677 | uffdio_copy.len); | 1689 | uffdio_copy.len, &ctx->mmap_changing); |
1678 | mmput(ctx->mm); | 1690 | mmput(ctx->mm); |
1679 | } else { | 1691 | } else { |
1680 | return -ESRCH; | 1692 | return -ESRCH; |
@@ -1705,6 +1717,10 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, | |||
1705 | 1717 | ||
1706 | user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; | 1718 | user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; |
1707 | 1719 | ||
1720 | ret = -EAGAIN; | ||
1721 | if (READ_ONCE(ctx->mmap_changing)) | ||
1722 | goto out; | ||
1723 | |||
1708 | ret = -EFAULT; | 1724 | ret = -EFAULT; |
1709 | if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, | 1725 | if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, |
1710 | /* don't copy "zeropage" last field */ | 1726 | /* don't copy "zeropage" last field */ |
@@ -1721,7 +1737,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, | |||
1721 | 1737 | ||
1722 | if (mmget_not_zero(ctx->mm)) { | 1738 | if (mmget_not_zero(ctx->mm)) { |
1723 | ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, | 1739 | ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, |
1724 | uffdio_zeropage.range.len); | 1740 | uffdio_zeropage.range.len, |
1741 | &ctx->mmap_changing); | ||
1725 | mmput(ctx->mm); | 1742 | mmput(ctx->mm); |
1726 | } else { | 1743 | } else { |
1727 | return -ESRCH; | 1744 | return -ESRCH; |
@@ -1900,6 +1917,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) | |||
1900 | ctx->features = 0; | 1917 | ctx->features = 0; |
1901 | ctx->state = UFFD_STATE_WAIT_API; | 1918 | ctx->state = UFFD_STATE_WAIT_API; |
1902 | ctx->released = false; | 1919 | ctx->released = false; |
1920 | ctx->mmap_changing = false; | ||
1903 | ctx->mm = current->mm; | 1921 | ctx->mm = current->mm; |
1904 | /* prevent the mm struct to be freed */ | 1922 | /* prevent the mm struct to be freed */ |
1905 | mmgrab(ctx->mm); | 1923 | mmgrab(ctx->mm); |
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h index ffb68d67be5f..a248545f1e18 100644 --- a/include/asm-generic/int-ll64.h +++ b/include/asm-generic/int-ll64.h | |||
@@ -13,17 +13,14 @@ | |||
13 | 13 | ||
14 | #ifndef __ASSEMBLY__ | 14 | #ifndef __ASSEMBLY__ |
15 | 15 | ||
16 | typedef signed char s8; | 16 | typedef __s8 s8; |
17 | typedef unsigned char u8; | 17 | typedef __u8 u8; |
18 | 18 | typedef __s16 s16; | |
19 | typedef signed short s16; | 19 | typedef __u16 u16; |
20 | typedef unsigned short u16; | 20 | typedef __s32 s32; |
21 | 21 | typedef __u32 u32; | |
22 | typedef signed int s32; | 22 | typedef __s64 s64; |
23 | typedef unsigned int u32; | 23 | typedef __u64 u64; |
24 | |||
25 | typedef signed long long s64; | ||
26 | typedef unsigned long long u64; | ||
27 | 24 | ||
28 | #define S8_C(x) x | 25 | #define S8_C(x) x |
29 | #define U8_C(x) x ## U | 26 | #define U8_C(x) x ## U |
diff --git a/include/linux/dax.h b/include/linux/dax.h index c99692ddd4b5..88504e87cd6c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -125,8 +125,8 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
125 | const struct iomap_ops *ops); | 125 | const struct iomap_ops *ops); |
126 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 126 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
127 | pfn_t *pfnp, int *errp, const struct iomap_ops *ops); | 127 | pfn_t *pfnp, int *errp, const struct iomap_ops *ops); |
128 | int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 128 | vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, |
129 | pfn_t pfn); | 129 | enum page_entry_size pe_size, pfn_t pfn); |
130 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); | 130 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); |
131 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, | 131 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, |
132 | pgoff_t index); | 132 | pgoff_t index); |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fc5ab85278d5..a6afcec53795 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -24,6 +24,7 @@ struct vm_area_struct; | |||
24 | #define ___GFP_HIGH 0x20u | 24 | #define ___GFP_HIGH 0x20u |
25 | #define ___GFP_IO 0x40u | 25 | #define ___GFP_IO 0x40u |
26 | #define ___GFP_FS 0x80u | 26 | #define ___GFP_FS 0x80u |
27 | #define ___GFP_WRITE 0x100u | ||
27 | #define ___GFP_NOWARN 0x200u | 28 | #define ___GFP_NOWARN 0x200u |
28 | #define ___GFP_RETRY_MAYFAIL 0x400u | 29 | #define ___GFP_RETRY_MAYFAIL 0x400u |
29 | #define ___GFP_NOFAIL 0x800u | 30 | #define ___GFP_NOFAIL 0x800u |
@@ -36,11 +37,10 @@ struct vm_area_struct; | |||
36 | #define ___GFP_THISNODE 0x40000u | 37 | #define ___GFP_THISNODE 0x40000u |
37 | #define ___GFP_ATOMIC 0x80000u | 38 | #define ___GFP_ATOMIC 0x80000u |
38 | #define ___GFP_ACCOUNT 0x100000u | 39 | #define ___GFP_ACCOUNT 0x100000u |
39 | #define ___GFP_DIRECT_RECLAIM 0x400000u | 40 | #define ___GFP_DIRECT_RECLAIM 0x200000u |
40 | #define ___GFP_WRITE 0x800000u | 41 | #define ___GFP_KSWAPD_RECLAIM 0x400000u |
41 | #define ___GFP_KSWAPD_RECLAIM 0x1000000u | ||
42 | #ifdef CONFIG_LOCKDEP | 42 | #ifdef CONFIG_LOCKDEP |
43 | #define ___GFP_NOLOCKDEP 0x2000000u | 43 | #define ___GFP_NOLOCKDEP 0x800000u |
44 | #else | 44 | #else |
45 | #define ___GFP_NOLOCKDEP 0 | 45 | #define ___GFP_NOLOCKDEP 0 |
46 | #endif | 46 | #endif |
@@ -205,7 +205,7 @@ struct vm_area_struct; | |||
205 | #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) | 205 | #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) |
206 | 206 | ||
207 | /* Room for N __GFP_FOO bits */ | 207 | /* Room for N __GFP_FOO bits */ |
208 | #define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) | 208 | #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) |
209 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) | 209 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) |
210 | 210 | ||
211 | /* | 211 | /* |
@@ -343,7 +343,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) | |||
343 | * 0x1 => DMA or NORMAL | 343 | * 0x1 => DMA or NORMAL |
344 | * 0x2 => HIGHMEM or NORMAL | 344 | * 0x2 => HIGHMEM or NORMAL |
345 | * 0x3 => BAD (DMA+HIGHMEM) | 345 | * 0x3 => BAD (DMA+HIGHMEM) |
346 | * 0x4 => DMA32 or DMA or NORMAL | 346 | * 0x4 => DMA32 or NORMAL |
347 | * 0x5 => BAD (DMA+DMA32) | 347 | * 0x5 => BAD (DMA+DMA32) |
348 | * 0x6 => BAD (HIGHMEM+DMA32) | 348 | * 0x6 => BAD (HIGHMEM+DMA32) |
349 | * 0x7 => BAD (HIGHMEM+DMA32+DMA) | 349 | * 0x7 => BAD (HIGHMEM+DMA32+DMA) |
@@ -351,7 +351,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) | |||
351 | * 0x9 => DMA or NORMAL (MOVABLE+DMA) | 351 | * 0x9 => DMA or NORMAL (MOVABLE+DMA) |
352 | * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) | 352 | * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) |
353 | * 0xb => BAD (MOVABLE+HIGHMEM+DMA) | 353 | * 0xb => BAD (MOVABLE+HIGHMEM+DMA) |
354 | * 0xc => DMA32 (MOVABLE+DMA32) | 354 | * 0xc => DMA32 or NORMAL (MOVABLE+DMA32) |
355 | * 0xd => BAD (MOVABLE+DMA32+DMA) | 355 | * 0xd => BAD (MOVABLE+DMA32+DMA) |
356 | * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) | 356 | * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) |
357 | * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) | 357 | * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) |
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 2f1327c37a63..4c92e3ba3e16 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h | |||
@@ -522,9 +522,7 @@ void hmm_devmem_remove(struct hmm_devmem *devmem); | |||
522 | static inline void hmm_devmem_page_set_drvdata(struct page *page, | 522 | static inline void hmm_devmem_page_set_drvdata(struct page *page, |
523 | unsigned long data) | 523 | unsigned long data) |
524 | { | 524 | { |
525 | unsigned long *drvdata = (unsigned long *)&page->pgmap; | 525 | page->hmm_data = data; |
526 | |||
527 | drvdata[1] = data; | ||
528 | } | 526 | } |
529 | 527 | ||
530 | /* | 528 | /* |
@@ -535,9 +533,7 @@ static inline void hmm_devmem_page_set_drvdata(struct page *page, | |||
535 | */ | 533 | */ |
536 | static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) | 534 | static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) |
537 | { | 535 | { |
538 | const unsigned long *drvdata = (const unsigned long *)&page->pgmap; | 536 | return page->hmm_data; |
539 | |||
540 | return drvdata[1]; | ||
541 | } | 537 | } |
542 | 538 | ||
543 | 539 | ||
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7aed92624531..7c4e8f1f72d8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #define LLONG_MIN (-LLONG_MAX - 1) | 29 | #define LLONG_MIN (-LLONG_MAX - 1) |
30 | #define ULLONG_MAX (~0ULL) | 30 | #define ULLONG_MAX (~0ULL) |
31 | #define SIZE_MAX (~(size_t)0) | 31 | #define SIZE_MAX (~(size_t)0) |
32 | #define PHYS_ADDR_MAX (~(phys_addr_t)0) | ||
32 | 33 | ||
33 | #define U8_MAX ((u8)~0U) | 34 | #define U8_MAX ((u8)~0U) |
34 | #define S8_MAX ((s8)(U8_MAX>>1)) | 35 | #define S8_MAX ((s8)(U8_MAX>>1)) |
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 44368b19b27e..161e8164abcf 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h | |||
@@ -37,17 +37,6 @@ static inline void ksm_exit(struct mm_struct *mm) | |||
37 | __ksm_exit(mm); | 37 | __ksm_exit(mm); |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline struct stable_node *page_stable_node(struct page *page) | ||
41 | { | ||
42 | return PageKsm(page) ? page_rmapping(page) : NULL; | ||
43 | } | ||
44 | |||
45 | static inline void set_page_stable_node(struct page *page, | ||
46 | struct stable_node *stable_node) | ||
47 | { | ||
48 | page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); | ||
49 | } | ||
50 | |||
51 | /* | 40 | /* |
52 | * When do_swap_page() first faults in from swap what used to be a KSM page, | 41 | * When do_swap_page() first faults in from swap what used to be a KSM page, |
53 | * no problem, it will be assigned to this vma's anon_vma; but thereafter, | 42 | * no problem, it will be assigned to this vma's anon_vma; but thereafter, |
@@ -89,12 +78,6 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, | |||
89 | return page; | 78 | return page; |
90 | } | 79 | } |
91 | 80 | ||
92 | static inline int page_referenced_ksm(struct page *page, | ||
93 | struct mem_cgroup *memcg, unsigned long *vm_flags) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static inline void rmap_walk_ksm(struct page *page, | 81 | static inline void rmap_walk_ksm(struct page *page, |
99 | struct rmap_walk_control *rwc) | 82 | struct rmap_walk_control *rwc) |
100 | { | 83 | { |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d99b71bc2c66..4f52ec755725 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -53,9 +53,17 @@ enum memcg_memory_event { | |||
53 | MEMCG_HIGH, | 53 | MEMCG_HIGH, |
54 | MEMCG_MAX, | 54 | MEMCG_MAX, |
55 | MEMCG_OOM, | 55 | MEMCG_OOM, |
56 | MEMCG_SWAP_MAX, | ||
57 | MEMCG_SWAP_FAIL, | ||
56 | MEMCG_NR_MEMORY_EVENTS, | 58 | MEMCG_NR_MEMORY_EVENTS, |
57 | }; | 59 | }; |
58 | 60 | ||
61 | enum mem_cgroup_protection { | ||
62 | MEMCG_PROT_NONE, | ||
63 | MEMCG_PROT_LOW, | ||
64 | MEMCG_PROT_MIN, | ||
65 | }; | ||
66 | |||
59 | struct mem_cgroup_reclaim_cookie { | 67 | struct mem_cgroup_reclaim_cookie { |
60 | pg_data_t *pgdat; | 68 | pg_data_t *pgdat; |
61 | int priority; | 69 | int priority; |
@@ -158,6 +166,15 @@ enum memcg_kmem_state { | |||
158 | KMEM_ONLINE, | 166 | KMEM_ONLINE, |
159 | }; | 167 | }; |
160 | 168 | ||
169 | #if defined(CONFIG_SMP) | ||
170 | struct memcg_padding { | ||
171 | char x[0]; | ||
172 | } ____cacheline_internodealigned_in_smp; | ||
173 | #define MEMCG_PADDING(name) struct memcg_padding name; | ||
174 | #else | ||
175 | #define MEMCG_PADDING(name) | ||
176 | #endif | ||
177 | |||
161 | /* | 178 | /* |
162 | * The memory controller data structure. The memory controller controls both | 179 | * The memory controller data structure. The memory controller controls both |
163 | * page cache and RSS per cgroup. We would eventually like to provide | 180 | * page cache and RSS per cgroup. We would eventually like to provide |
@@ -179,8 +196,7 @@ struct mem_cgroup { | |||
179 | struct page_counter kmem; | 196 | struct page_counter kmem; |
180 | struct page_counter tcpmem; | 197 | struct page_counter tcpmem; |
181 | 198 | ||
182 | /* Normal memory consumption range */ | 199 | /* Upper bound of normal memory consumption range */ |
183 | unsigned long low; | ||
184 | unsigned long high; | 200 | unsigned long high; |
185 | 201 | ||
186 | /* Range enforcement for interrupt charges */ | 202 | /* Range enforcement for interrupt charges */ |
@@ -205,9 +221,11 @@ struct mem_cgroup { | |||
205 | int oom_kill_disable; | 221 | int oom_kill_disable; |
206 | 222 | ||
207 | /* memory.events */ | 223 | /* memory.events */ |
208 | atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; | ||
209 | struct cgroup_file events_file; | 224 | struct cgroup_file events_file; |
210 | 225 | ||
226 | /* handle for "memory.swap.events" */ | ||
227 | struct cgroup_file swap_events_file; | ||
228 | |||
211 | /* protect arrays of thresholds */ | 229 | /* protect arrays of thresholds */ |
212 | struct mutex thresholds_lock; | 230 | struct mutex thresholds_lock; |
213 | 231 | ||
@@ -225,19 +243,26 @@ struct mem_cgroup { | |||
225 | * mem_cgroup ? And what type of charges should we move ? | 243 | * mem_cgroup ? And what type of charges should we move ? |
226 | */ | 244 | */ |
227 | unsigned long move_charge_at_immigrate; | 245 | unsigned long move_charge_at_immigrate; |
246 | /* taken only while moving_account > 0 */ | ||
247 | spinlock_t move_lock; | ||
248 | unsigned long move_lock_flags; | ||
249 | |||
250 | MEMCG_PADDING(_pad1_); | ||
251 | |||
228 | /* | 252 | /* |
229 | * set > 0 if pages under this cgroup are moving to other cgroup. | 253 | * set > 0 if pages under this cgroup are moving to other cgroup. |
230 | */ | 254 | */ |
231 | atomic_t moving_account; | 255 | atomic_t moving_account; |
232 | /* taken only while moving_account > 0 */ | ||
233 | spinlock_t move_lock; | ||
234 | struct task_struct *move_lock_task; | 256 | struct task_struct *move_lock_task; |
235 | unsigned long move_lock_flags; | ||
236 | 257 | ||
237 | /* memory.stat */ | 258 | /* memory.stat */ |
238 | struct mem_cgroup_stat_cpu __percpu *stat_cpu; | 259 | struct mem_cgroup_stat_cpu __percpu *stat_cpu; |
260 | |||
261 | MEMCG_PADDING(_pad2_); | ||
262 | |||
239 | atomic_long_t stat[MEMCG_NR_STAT]; | 263 | atomic_long_t stat[MEMCG_NR_STAT]; |
240 | atomic_long_t events[NR_VM_EVENT_ITEMS]; | 264 | atomic_long_t events[NR_VM_EVENT_ITEMS]; |
265 | atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; | ||
241 | 266 | ||
242 | unsigned long socket_pressure; | 267 | unsigned long socket_pressure; |
243 | 268 | ||
@@ -285,7 +310,8 @@ static inline bool mem_cgroup_disabled(void) | |||
285 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | 310 | return !cgroup_subsys_enabled(memory_cgrp_subsys); |
286 | } | 311 | } |
287 | 312 | ||
288 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); | 313 | enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, |
314 | struct mem_cgroup *memcg); | ||
289 | 315 | ||
290 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | 316 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
291 | gfp_t gfp_mask, struct mem_cgroup **memcgp, | 317 | gfp_t gfp_mask, struct mem_cgroup **memcgp, |
@@ -462,7 +488,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, | |||
462 | 488 | ||
463 | void mem_cgroup_handle_over_high(void); | 489 | void mem_cgroup_handle_over_high(void); |
464 | 490 | ||
465 | unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg); | 491 | unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); |
466 | 492 | ||
467 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, | 493 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, |
468 | struct task_struct *p); | 494 | struct task_struct *p); |
@@ -730,10 +756,10 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, | |||
730 | { | 756 | { |
731 | } | 757 | } |
732 | 758 | ||
733 | static inline bool mem_cgroup_low(struct mem_cgroup *root, | 759 | static inline enum mem_cgroup_protection mem_cgroup_protected( |
734 | struct mem_cgroup *memcg) | 760 | struct mem_cgroup *root, struct mem_cgroup *memcg) |
735 | { | 761 | { |
736 | return false; | 762 | return MEMCG_PROT_NONE; |
737 | } | 763 | } |
738 | 764 | ||
739 | static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | 765 | static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
@@ -853,7 +879,7 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, | |||
853 | return 0; | 879 | return 0; |
854 | } | 880 | } |
855 | 881 | ||
856 | static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) | 882 | static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) |
857 | { | 883 | { |
858 | return 0; | 884 | return 0; |
859 | } | 885 | } |
@@ -1093,7 +1119,6 @@ static inline void dec_lruvec_page_state(struct page *page, | |||
1093 | 1119 | ||
1094 | #ifdef CONFIG_CGROUP_WRITEBACK | 1120 | #ifdef CONFIG_CGROUP_WRITEBACK |
1095 | 1121 | ||
1096 | struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); | ||
1097 | struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); | 1122 | struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); |
1098 | void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, | 1123 | void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, |
1099 | unsigned long *pheadroom, unsigned long *pdirty, | 1124 | unsigned long *pheadroom, unsigned long *pdirty, |
diff --git a/include/linux/memfd.h b/include/linux/memfd.h new file mode 100644 index 000000000000..4f1600413f91 --- /dev/null +++ b/include/linux/memfd.h | |||
@@ -0,0 +1,16 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef __LINUX_MEMFD_H | ||
3 | #define __LINUX_MEMFD_H | ||
4 | |||
5 | #include <linux/file.h> | ||
6 | |||
7 | #ifdef CONFIG_MEMFD_CREATE | ||
8 | extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg); | ||
9 | #else | ||
10 | static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a) | ||
11 | { | ||
12 | return -EINVAL; | ||
13 | } | ||
14 | #endif | ||
15 | |||
16 | #endif /* __LINUX_MEMFD_H */ | ||
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 2b0265265c28..4e9828cda7a2 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -107,7 +107,6 @@ static inline bool movable_node_is_enabled(void) | |||
107 | } | 107 | } |
108 | 108 | ||
109 | #ifdef CONFIG_MEMORY_HOTREMOVE | 109 | #ifdef CONFIG_MEMORY_HOTREMOVE |
110 | extern bool is_pageblock_removable_nolock(struct page *page); | ||
111 | extern int arch_remove_memory(u64 start, u64 size, | 110 | extern int arch_remove_memory(u64 start, u64 size, |
112 | struct vmem_altmap *altmap); | 111 | struct vmem_altmap *altmap); |
113 | extern int __remove_pages(struct zone *zone, unsigned long start_pfn, | 112 | extern int __remove_pages(struct zone *zone, unsigned long start_pfn, |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 29c5458b4b5e..4c3881b44ef1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1851,6 +1851,7 @@ static inline bool pgtable_page_ctor(struct page *page) | |||
1851 | { | 1851 | { |
1852 | if (!ptlock_init(page)) | 1852 | if (!ptlock_init(page)) |
1853 | return false; | 1853 | return false; |
1854 | __SetPageTable(page); | ||
1854 | inc_zone_page_state(page, NR_PAGETABLE); | 1855 | inc_zone_page_state(page, NR_PAGETABLE); |
1855 | return true; | 1856 | return true; |
1856 | } | 1857 | } |
@@ -1858,6 +1859,7 @@ static inline bool pgtable_page_ctor(struct page *page) | |||
1858 | static inline void pgtable_page_dtor(struct page *page) | 1859 | static inline void pgtable_page_dtor(struct page *page) |
1859 | { | 1860 | { |
1860 | pte_lock_deinit(page); | 1861 | pte_lock_deinit(page); |
1862 | __ClearPageTable(page); | ||
1861 | dec_zone_page_state(page, NR_PAGETABLE); | 1863 | dec_zone_page_state(page, NR_PAGETABLE); |
1862 | } | 1864 | } |
1863 | 1865 | ||
@@ -2303,10 +2305,10 @@ extern void truncate_inode_pages_range(struct address_space *, | |||
2303 | extern void truncate_inode_pages_final(struct address_space *); | 2305 | extern void truncate_inode_pages_final(struct address_space *); |
2304 | 2306 | ||
2305 | /* generic vm_area_ops exported for stackable file systems */ | 2307 | /* generic vm_area_ops exported for stackable file systems */ |
2306 | extern int filemap_fault(struct vm_fault *vmf); | 2308 | extern vm_fault_t filemap_fault(struct vm_fault *vmf); |
2307 | extern void filemap_map_pages(struct vm_fault *vmf, | 2309 | extern void filemap_map_pages(struct vm_fault *vmf, |
2308 | pgoff_t start_pgoff, pgoff_t end_pgoff); | 2310 | pgoff_t start_pgoff, pgoff_t end_pgoff); |
2309 | extern int filemap_page_mkwrite(struct vm_fault *vmf); | 2311 | extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); |
2310 | 2312 | ||
2311 | /* mm/page-writeback.c */ | 2313 | /* mm/page-writeback.c */ |
2312 | int __must_check write_one_page(struct page *page); | 2314 | int __must_check write_one_page(struct page *page); |
@@ -2431,8 +2433,8 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, | |||
2431 | unsigned long pfn, pgprot_t pgprot); | 2433 | unsigned long pfn, pgprot_t pgprot); |
2432 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | 2434 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
2433 | pfn_t pfn); | 2435 | pfn_t pfn); |
2434 | int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, | 2436 | vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, |
2435 | pfn_t pfn); | 2437 | unsigned long addr, pfn_t pfn); |
2436 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); | 2438 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
2437 | 2439 | ||
2438 | static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, | 2440 | static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, |
@@ -2530,12 +2532,10 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, | |||
2530 | #ifdef CONFIG_PAGE_POISONING | 2532 | #ifdef CONFIG_PAGE_POISONING |
2531 | extern bool page_poisoning_enabled(void); | 2533 | extern bool page_poisoning_enabled(void); |
2532 | extern void kernel_poison_pages(struct page *page, int numpages, int enable); | 2534 | extern void kernel_poison_pages(struct page *page, int numpages, int enable); |
2533 | extern bool page_is_poisoned(struct page *page); | ||
2534 | #else | 2535 | #else |
2535 | static inline bool page_poisoning_enabled(void) { return false; } | 2536 | static inline bool page_poisoning_enabled(void) { return false; } |
2536 | static inline void kernel_poison_pages(struct page *page, int numpages, | 2537 | static inline void kernel_poison_pages(struct page *page, int numpages, |
2537 | int enable) { } | 2538 | int enable) { } |
2538 | static inline bool page_is_poisoned(struct page *page) { return false; } | ||
2539 | #endif | 2539 | #endif |
2540 | 2540 | ||
2541 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2541 | #ifdef CONFIG_DEBUG_PAGEALLOC |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21612347d311..99ce070e7dcb 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -33,29 +33,27 @@ struct hmm; | |||
33 | * it to keep track of whatever it is we are using the page for at the | 33 | * it to keep track of whatever it is we are using the page for at the |
34 | * moment. Note that we have no way to track which tasks are using | 34 | * moment. Note that we have no way to track which tasks are using |
35 | * a page, though if it is a pagecache page, rmap structures can tell us | 35 | * a page, though if it is a pagecache page, rmap structures can tell us |
36 | * who is mapping it. If you allocate the page using alloc_pages(), you | 36 | * who is mapping it. |
37 | * can use some of the space in struct page for your own purposes. | ||
38 | * | 37 | * |
39 | * Pages that were once in the page cache may be found under the RCU lock | 38 | * If you allocate the page using alloc_pages(), you can use some of the |
40 | * even after they have been recycled to a different purpose. The page | 39 | * space in struct page for your own purposes. The five words in the main |
41 | * cache reads and writes some of the fields in struct page to pin the | 40 | * union are available, except for bit 0 of the first word which must be |
42 | * page before checking that it's still in the page cache. It is vital | 41 | * kept clear. Many users use this word to store a pointer to an object |
43 | * that all users of struct page: | 42 | * which is guaranteed to be aligned. If you use the same storage as |
44 | * 1. Use the first word as PageFlags. | 43 | * page->mapping, you must restore it to NULL before freeing the page. |
45 | * 2. Clear or preserve bit 0 of page->compound_head. It is used as | ||
46 | * PageTail for compound pages, and the page cache must not see false | ||
47 | * positives. Some users put a pointer here (guaranteed to be at least | ||
48 | * 4-byte aligned), other users avoid using the field altogether. | ||
49 | * 3. page->_refcount must either not be used, or must be used in such a | ||
50 | * way that other CPUs temporarily incrementing and then decrementing the | ||
51 | * refcount does not cause problems. On receiving the page from | ||
52 | * alloc_pages(), the refcount will be positive. | ||
53 | * 4. Either preserve page->_mapcount or restore it to -1 before freeing it. | ||
54 | * | 44 | * |
55 | * If you allocate pages of order > 0, you can use the fields in the struct | 45 | * If your page will not be mapped to userspace, you can also use the four |
56 | * page associated with each page, but bear in mind that the pages may have | 46 | * bytes in the mapcount union, but you must call page_mapcount_reset() |
57 | * been inserted individually into the page cache, so you must use the above | 47 | * before freeing it. |
58 | * four fields in a compatible way for each struct page. | 48 | * |
49 | * If you want to use the refcount field, it must be used in such a way | ||
50 | * that other CPUs temporarily incrementing and then decrementing the | ||
51 | * refcount does not cause problems. On receiving the page from | ||
52 | * alloc_pages(), the refcount will be positive. | ||
53 | * | ||
54 | * If you allocate pages of order > 0, you can use some of the fields | ||
55 | * in each subpage, but you may need to restore some of their values | ||
56 | * afterwards. | ||
59 | * | 57 | * |
60 | * SLUB uses cmpxchg_double() to atomically update its freelist and | 58 | * SLUB uses cmpxchg_double() to atomically update its freelist and |
61 | * counters. That requires that freelist & counters be adjacent and | 59 | * counters. That requires that freelist & counters be adjacent and |
@@ -65,135 +63,122 @@ struct hmm; | |||
65 | */ | 63 | */ |
66 | #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE | 64 | #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE |
67 | #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) | 65 | #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) |
68 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) | ||
69 | #define _slub_counter_t unsigned long | ||
70 | #else | 66 | #else |
71 | #define _slub_counter_t unsigned int | ||
72 | #endif | ||
73 | #else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ | ||
74 | #define _struct_page_alignment | 67 | #define _struct_page_alignment |
75 | #define _slub_counter_t unsigned int | 68 | #endif |
76 | #endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ | ||
77 | 69 | ||
78 | struct page { | 70 | struct page { |
79 | /* First double word block */ | ||
80 | unsigned long flags; /* Atomic flags, some possibly | 71 | unsigned long flags; /* Atomic flags, some possibly |
81 | * updated asynchronously */ | 72 | * updated asynchronously */ |
82 | union { | ||
83 | /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */ | ||
84 | struct address_space *mapping; | ||
85 | |||
86 | void *s_mem; /* slab first object */ | ||
87 | atomic_t compound_mapcount; /* first tail page */ | ||
88 | /* page_deferred_list().next -- second tail page */ | ||
89 | }; | ||
90 | |||
91 | /* Second double word */ | ||
92 | union { | ||
93 | pgoff_t index; /* Our offset within mapping. */ | ||
94 | void *freelist; /* sl[aou]b first free object */ | ||
95 | /* page_deferred_list().prev -- second tail page */ | ||
96 | }; | ||
97 | |||
98 | union { | ||
99 | _slub_counter_t counters; | ||
100 | unsigned int active; /* SLAB */ | ||
101 | struct { /* SLUB */ | ||
102 | unsigned inuse:16; | ||
103 | unsigned objects:15; | ||
104 | unsigned frozen:1; | ||
105 | }; | ||
106 | int units; /* SLOB */ | ||
107 | |||
108 | struct { /* Page cache */ | ||
109 | /* | ||
110 | * Count of ptes mapped in mms, to show when | ||
111 | * page is mapped & limit reverse map searches. | ||
112 | * | ||
113 | * Extra information about page type may be | ||
114 | * stored here for pages that are never mapped, | ||
115 | * in which case the value MUST BE <= -2. | ||
116 | * See page-flags.h for more details. | ||
117 | */ | ||
118 | atomic_t _mapcount; | ||
119 | |||
120 | /* | ||
121 | * Usage count, *USE WRAPPER FUNCTION* when manual | ||
122 | * accounting. See page_ref.h | ||
123 | */ | ||
124 | atomic_t _refcount; | ||
125 | }; | ||
126 | }; | ||
127 | |||
128 | /* | 73 | /* |
129 | * WARNING: bit 0 of the first word encode PageTail(). That means | 74 | * Five words (20/40 bytes) are available in this union. |
130 | * the rest users of the storage space MUST NOT use the bit to | 75 | * WARNING: bit 0 of the first word is used for PageTail(). That |
76 | * means the other users of this union MUST NOT use the bit to | ||
131 | * avoid collision and false-positive PageTail(). | 77 | * avoid collision and false-positive PageTail(). |
132 | */ | 78 | */ |
133 | union { | 79 | union { |
134 | struct list_head lru; /* Pageout list, eg. active_list | 80 | struct { /* Page cache and anonymous pages */ |
135 | * protected by zone_lru_lock ! | 81 | /** |
136 | * Can be used as a generic list | 82 | * @lru: Pageout list, eg. active_list protected by |
137 | * by the page owner. | 83 | * zone_lru_lock. Sometimes used as a generic list |
138 | */ | 84 | * by the page owner. |
139 | struct dev_pagemap *pgmap; /* ZONE_DEVICE pages are never on an | 85 | */ |
140 | * lru or handled by a slab | 86 | struct list_head lru; |
141 | * allocator, this points to the | 87 | /* See page-flags.h for PAGE_MAPPING_FLAGS */ |
142 | * hosting device page map. | 88 | struct address_space *mapping; |
143 | */ | 89 | pgoff_t index; /* Our offset within mapping. */ |
144 | struct { /* slub per cpu partial pages */ | 90 | /** |
145 | struct page *next; /* Next partial slab */ | 91 | * @private: Mapping-private opaque data. |
92 | * Usually used for buffer_heads if PagePrivate. | ||
93 | * Used for swp_entry_t if PageSwapCache. | ||
94 | * Indicates order in the buddy system if PageBuddy. | ||
95 | */ | ||
96 | unsigned long private; | ||
97 | }; | ||
98 | struct { /* slab, slob and slub */ | ||
99 | union { | ||
100 | struct list_head slab_list; /* uses lru */ | ||
101 | struct { /* Partial pages */ | ||
102 | struct page *next; | ||
146 | #ifdef CONFIG_64BIT | 103 | #ifdef CONFIG_64BIT |
147 | int pages; /* Nr of partial slabs left */ | 104 | int pages; /* Nr of pages left */ |
148 | int pobjects; /* Approximate # of objects */ | 105 | int pobjects; /* Approximate count */ |
149 | #else | 106 | #else |
150 | short int pages; | 107 | short int pages; |
151 | short int pobjects; | 108 | short int pobjects; |
152 | #endif | 109 | #endif |
110 | }; | ||
111 | }; | ||
112 | struct kmem_cache *slab_cache; /* not slob */ | ||
113 | /* Double-word boundary */ | ||
114 | void *freelist; /* first free object */ | ||
115 | union { | ||
116 | void *s_mem; /* slab: first object */ | ||
117 | unsigned long counters; /* SLUB */ | ||
118 | struct { /* SLUB */ | ||
119 | unsigned inuse:16; | ||
120 | unsigned objects:15; | ||
121 | unsigned frozen:1; | ||
122 | }; | ||
123 | }; | ||
153 | }; | 124 | }; |
154 | 125 | struct { /* Tail pages of compound page */ | |
155 | struct rcu_head rcu_head; /* Used by SLAB | 126 | unsigned long compound_head; /* Bit zero is set */ |
156 | * when destroying via RCU | ||
157 | */ | ||
158 | /* Tail pages of compound page */ | ||
159 | struct { | ||
160 | unsigned long compound_head; /* If bit zero is set */ | ||
161 | 127 | ||
162 | /* First tail page only */ | 128 | /* First tail page only */ |
163 | unsigned char compound_dtor; | 129 | unsigned char compound_dtor; |
164 | unsigned char compound_order; | 130 | unsigned char compound_order; |
165 | /* two/six bytes available here */ | 131 | atomic_t compound_mapcount; |
166 | }; | 132 | }; |
167 | 133 | struct { /* Second tail page of compound page */ | |
168 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS | 134 | unsigned long _compound_pad_1; /* compound_head */ |
169 | struct { | 135 | unsigned long _compound_pad_2; |
170 | unsigned long __pad; /* do not overlay pmd_huge_pte | 136 | struct list_head deferred_list; |
171 | * with compound_head to avoid | ||
172 | * possible bit 0 collision. | ||
173 | */ | ||
174 | pgtable_t pmd_huge_pte; /* protected by page->ptl */ | ||
175 | }; | 137 | }; |
138 | struct { /* Page table pages */ | ||
139 | unsigned long _pt_pad_1; /* compound_head */ | ||
140 | pgtable_t pmd_huge_pte; /* protected by page->ptl */ | ||
141 | unsigned long _pt_pad_2; /* mapping */ | ||
142 | struct mm_struct *pt_mm; /* x86 pgds only */ | ||
143 | #if ALLOC_SPLIT_PTLOCKS | ||
144 | spinlock_t *ptl; | ||
145 | #else | ||
146 | spinlock_t ptl; | ||
176 | #endif | 147 | #endif |
148 | }; | ||
149 | struct { /* ZONE_DEVICE pages */ | ||
150 | /** @pgmap: Points to the hosting device page map. */ | ||
151 | struct dev_pagemap *pgmap; | ||
152 | unsigned long hmm_data; | ||
153 | unsigned long _zd_pad_1; /* uses mapping */ | ||
154 | }; | ||
155 | |||
156 | /** @rcu_head: You can use this to free a page by RCU. */ | ||
157 | struct rcu_head rcu_head; | ||
177 | }; | 158 | }; |
178 | 159 | ||
179 | union { | 160 | union { /* This union is 4 bytes in size. */ |
180 | /* | 161 | /* |
181 | * Mapping-private opaque data: | 162 | * If the page can be mapped to userspace, encodes the number |
182 | * Usually used for buffer_heads if PagePrivate | 163 | * of times this page is referenced by a page table. |
183 | * Used for swp_entry_t if PageSwapCache | ||
184 | * Indicates order in the buddy system if PageBuddy | ||
185 | */ | 164 | */ |
186 | unsigned long private; | 165 | atomic_t _mapcount; |
187 | #if USE_SPLIT_PTE_PTLOCKS | 166 | |
188 | #if ALLOC_SPLIT_PTLOCKS | 167 | /* |
189 | spinlock_t *ptl; | 168 | * If the page is neither PageSlab nor mappable to userspace, |
190 | #else | 169 | * the value stored here may help determine what this page |
191 | spinlock_t ptl; | 170 | * is used for. See page-flags.h for a list of page types |
192 | #endif | 171 | * which are currently stored here. |
193 | #endif | 172 | */ |
194 | struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ | 173 | unsigned int page_type; |
174 | |||
175 | unsigned int active; /* SLAB */ | ||
176 | int units; /* SLOB */ | ||
195 | }; | 177 | }; |
196 | 178 | ||
179 | /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ | ||
180 | atomic_t _refcount; | ||
181 | |||
197 | #ifdef CONFIG_MEMCG | 182 | #ifdef CONFIG_MEMCG |
198 | struct mem_cgroup *mem_cgroup; | 183 | struct mem_cgroup *mem_cgroup; |
199 | #endif | 184 | #endif |
@@ -413,6 +398,8 @@ struct mm_struct { | |||
413 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ | 398 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ |
414 | unsigned long stack_vm; /* VM_STACK */ | 399 | unsigned long stack_vm; /* VM_STACK */ |
415 | unsigned long def_flags; | 400 | unsigned long def_flags; |
401 | |||
402 | spinlock_t arg_lock; /* protect the below fields */ | ||
416 | unsigned long start_code, end_code, start_data, end_data; | 403 | unsigned long start_code, end_code, start_data, end_data; |
417 | unsigned long start_brk, brk, start_stack; | 404 | unsigned long start_brk, brk, start_stack; |
418 | unsigned long arg_start, arg_end, env_start, env_end; | 405 | unsigned long arg_start, arg_end, env_start, env_end; |
@@ -627,9 +614,9 @@ struct vm_special_mapping { | |||
627 | * If non-NULL, then this is called to resolve page faults | 614 | * If non-NULL, then this is called to resolve page faults |
628 | * on the special mapping. If used, .pages is not checked. | 615 | * on the special mapping. If used, .pages is not checked. |
629 | */ | 616 | */ |
630 | int (*fault)(const struct vm_special_mapping *sm, | 617 | vm_fault_t (*fault)(const struct vm_special_mapping *sm, |
631 | struct vm_area_struct *vma, | 618 | struct vm_area_struct *vma, |
632 | struct vm_fault *vmf); | 619 | struct vm_fault *vmf); |
633 | 620 | ||
634 | int (*mremap)(const struct vm_special_mapping *sm, | 621 | int (*mremap)(const struct vm_special_mapping *sm, |
635 | struct vm_area_struct *new_vma); | 622 | struct vm_area_struct *new_vma); |
diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 1cc5ffb769af..7cd1473c64a4 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h | |||
@@ -53,93 +53,32 @@ struct gcry_mpi { | |||
53 | typedef struct gcry_mpi *MPI; | 53 | typedef struct gcry_mpi *MPI; |
54 | 54 | ||
55 | #define mpi_get_nlimbs(a) ((a)->nlimbs) | 55 | #define mpi_get_nlimbs(a) ((a)->nlimbs) |
56 | #define mpi_is_neg(a) ((a)->sign) | ||
57 | 56 | ||
58 | /*-- mpiutil.c --*/ | 57 | /*-- mpiutil.c --*/ |
59 | MPI mpi_alloc(unsigned nlimbs); | 58 | MPI mpi_alloc(unsigned nlimbs); |
60 | MPI mpi_alloc_secure(unsigned nlimbs); | ||
61 | MPI mpi_alloc_like(MPI a); | ||
62 | void mpi_free(MPI a); | 59 | void mpi_free(MPI a); |
63 | int mpi_resize(MPI a, unsigned nlimbs); | 60 | int mpi_resize(MPI a, unsigned nlimbs); |
64 | int mpi_copy(MPI *copy, const MPI a); | ||
65 | void mpi_clear(MPI a); | ||
66 | int mpi_set(MPI w, MPI u); | ||
67 | int mpi_set_ui(MPI w, ulong u); | ||
68 | MPI mpi_alloc_set_ui(unsigned long u); | ||
69 | void mpi_m_check(MPI a); | ||
70 | void mpi_swap(MPI a, MPI b); | ||
71 | 61 | ||
72 | /*-- mpicoder.c --*/ | 62 | /*-- mpicoder.c --*/ |
73 | MPI do_encode_md(const void *sha_buffer, unsigned nbits); | ||
74 | MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); | 63 | MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); |
75 | MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); | 64 | MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); |
76 | MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); | 65 | MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); |
77 | int mpi_fromstr(MPI val, const char *str); | ||
78 | u32 mpi_get_keyid(MPI a, u32 *keyid); | ||
79 | void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); | 66 | void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); |
80 | int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, | 67 | int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, |
81 | int *sign); | 68 | int *sign); |
82 | void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign); | ||
83 | int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, | 69 | int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, |
84 | int *sign); | 70 | int *sign); |
85 | 71 | ||
86 | #define log_mpidump g10_log_mpidump | ||
87 | |||
88 | /*-- mpi-add.c --*/ | ||
89 | int mpi_add_ui(MPI w, MPI u, ulong v); | ||
90 | int mpi_add(MPI w, MPI u, MPI v); | ||
91 | int mpi_addm(MPI w, MPI u, MPI v, MPI m); | ||
92 | int mpi_sub_ui(MPI w, MPI u, ulong v); | ||
93 | int mpi_sub(MPI w, MPI u, MPI v); | ||
94 | int mpi_subm(MPI w, MPI u, MPI v, MPI m); | ||
95 | |||
96 | /*-- mpi-mul.c --*/ | ||
97 | int mpi_mul_ui(MPI w, MPI u, ulong v); | ||
98 | int mpi_mul_2exp(MPI w, MPI u, ulong cnt); | ||
99 | int mpi_mul(MPI w, MPI u, MPI v); | ||
100 | int mpi_mulm(MPI w, MPI u, MPI v, MPI m); | ||
101 | |||
102 | /*-- mpi-div.c --*/ | ||
103 | ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor); | ||
104 | int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); | ||
105 | int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); | ||
106 | int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor); | ||
107 | int mpi_tdiv_r(MPI rem, MPI num, MPI den); | ||
108 | int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den); | ||
109 | int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count); | ||
110 | int mpi_divisible_ui(const MPI dividend, ulong divisor); | ||
111 | |||
112 | /*-- mpi-gcd.c --*/ | ||
113 | int mpi_gcd(MPI g, const MPI a, const MPI b); | ||
114 | |||
115 | /*-- mpi-pow.c --*/ | 72 | /*-- mpi-pow.c --*/ |
116 | int mpi_pow(MPI w, MPI u, MPI v); | ||
117 | int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); | 73 | int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); |
118 | 74 | ||
119 | /*-- mpi-mpow.c --*/ | ||
120 | int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI mod); | ||
121 | |||
122 | /*-- mpi-cmp.c --*/ | 75 | /*-- mpi-cmp.c --*/ |
123 | int mpi_cmp_ui(MPI u, ulong v); | 76 | int mpi_cmp_ui(MPI u, ulong v); |
124 | int mpi_cmp(MPI u, MPI v); | 77 | int mpi_cmp(MPI u, MPI v); |
125 | 78 | ||
126 | /*-- mpi-scan.c --*/ | ||
127 | int mpi_getbyte(MPI a, unsigned idx); | ||
128 | void mpi_putbyte(MPI a, unsigned idx, int value); | ||
129 | unsigned mpi_trailing_zeros(MPI a); | ||
130 | |||
131 | /*-- mpi-bit.c --*/ | 79 | /*-- mpi-bit.c --*/ |
132 | void mpi_normalize(MPI a); | 80 | void mpi_normalize(MPI a); |
133 | unsigned mpi_get_nbits(MPI a); | 81 | unsigned mpi_get_nbits(MPI a); |
134 | int mpi_test_bit(MPI a, unsigned n); | ||
135 | int mpi_set_bit(MPI a, unsigned n); | ||
136 | int mpi_set_highbit(MPI a, unsigned n); | ||
137 | void mpi_clear_highbit(MPI a, unsigned n); | ||
138 | void mpi_clear_bit(MPI a, unsigned n); | ||
139 | int mpi_rshift(MPI x, MPI a, unsigned n); | ||
140 | |||
141 | /*-- mpi-inv.c --*/ | ||
142 | int mpi_invm(MPI x, MPI u, MPI v); | ||
143 | 82 | ||
144 | /* inline functions */ | 83 | /* inline functions */ |
145 | 84 | ||
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e34a27727b9a..901943e4754b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -642,49 +642,62 @@ PAGEFLAG_FALSE(DoubleMap) | |||
642 | #endif | 642 | #endif |
643 | 643 | ||
644 | /* | 644 | /* |
645 | * For pages that are never mapped to userspace, page->mapcount may be | 645 | * For pages that are never mapped to userspace (and aren't PageSlab), |
646 | * used for storing extra information about page type. Any value used | 646 | * page_type may be used. Because it is initialised to -1, we invert the |
647 | * for this purpose must be <= -2, but it's better start not too close | 647 | * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and |
648 | * to -2 so that an underflow of the page_mapcount() won't be mistaken | 648 | * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and |
649 | * for a special page. | 649 | * low bits so that an underflow or overflow of page_mapcount() won't be |
650 | * mistaken for a page type value. | ||
650 | */ | 651 | */ |
651 | #define PAGE_MAPCOUNT_OPS(uname, lname) \ | 652 | |
653 | #define PAGE_TYPE_BASE 0xf0000000 | ||
654 | /* Reserve 0x0000007f to catch underflows of page_mapcount */ | ||
655 | #define PG_buddy 0x00000080 | ||
656 | #define PG_balloon 0x00000100 | ||
657 | #define PG_kmemcg 0x00000200 | ||
658 | #define PG_table 0x00000400 | ||
659 | |||
660 | #define PageType(page, flag) \ | ||
661 | ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) | ||
662 | |||
663 | #define PAGE_TYPE_OPS(uname, lname) \ | ||
652 | static __always_inline int Page##uname(struct page *page) \ | 664 | static __always_inline int Page##uname(struct page *page) \ |
653 | { \ | 665 | { \ |
654 | return atomic_read(&page->_mapcount) == \ | 666 | return PageType(page, PG_##lname); \ |
655 | PAGE_##lname##_MAPCOUNT_VALUE; \ | ||
656 | } \ | 667 | } \ |
657 | static __always_inline void __SetPage##uname(struct page *page) \ | 668 | static __always_inline void __SetPage##uname(struct page *page) \ |
658 | { \ | 669 | { \ |
659 | VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); \ | 670 | VM_BUG_ON_PAGE(!PageType(page, 0), page); \ |
660 | atomic_set(&page->_mapcount, PAGE_##lname##_MAPCOUNT_VALUE); \ | 671 | page->page_type &= ~PG_##lname; \ |
661 | } \ | 672 | } \ |
662 | static __always_inline void __ClearPage##uname(struct page *page) \ | 673 | static __always_inline void __ClearPage##uname(struct page *page) \ |
663 | { \ | 674 | { \ |
664 | VM_BUG_ON_PAGE(!Page##uname(page), page); \ | 675 | VM_BUG_ON_PAGE(!Page##uname(page), page); \ |
665 | atomic_set(&page->_mapcount, -1); \ | 676 | page->page_type |= PG_##lname; \ |
666 | } | 677 | } |
667 | 678 | ||
668 | /* | 679 | /* |
669 | * PageBuddy() indicate that the page is free and in the buddy system | 680 | * PageBuddy() indicates that the page is free and in the buddy system |
670 | * (see mm/page_alloc.c). | 681 | * (see mm/page_alloc.c). |
671 | */ | 682 | */ |
672 | #define PAGE_BUDDY_MAPCOUNT_VALUE (-128) | 683 | PAGE_TYPE_OPS(Buddy, buddy) |
673 | PAGE_MAPCOUNT_OPS(Buddy, BUDDY) | ||
674 | 684 | ||
675 | /* | 685 | /* |
676 | * PageBalloon() is set on pages that are on the balloon page list | 686 | * PageBalloon() is true for pages that are on the balloon page list |
677 | * (see mm/balloon_compaction.c). | 687 | * (see mm/balloon_compaction.c). |
678 | */ | 688 | */ |
679 | #define PAGE_BALLOON_MAPCOUNT_VALUE (-256) | 689 | PAGE_TYPE_OPS(Balloon, balloon) |
680 | PAGE_MAPCOUNT_OPS(Balloon, BALLOON) | ||
681 | 690 | ||
682 | /* | 691 | /* |
683 | * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on | 692 | * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on |
684 | * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. | 693 | * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. |
685 | */ | 694 | */ |
686 | #define PAGE_KMEMCG_MAPCOUNT_VALUE (-512) | 695 | PAGE_TYPE_OPS(Kmemcg, kmemcg) |
687 | PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG) | 696 | |
697 | /* | ||
698 | * Marks pages in use as page tables. | ||
699 | */ | ||
700 | PAGE_TYPE_OPS(Table, table) | ||
688 | 701 | ||
689 | extern bool is_free_buddy_page(struct page *page); | 702 | extern bool is_free_buddy_page(struct page *page); |
690 | 703 | ||
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index c15ab80ad32d..bab7e57f659b 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h | |||
@@ -7,10 +7,22 @@ | |||
7 | #include <asm/page.h> | 7 | #include <asm/page.h> |
8 | 8 | ||
9 | struct page_counter { | 9 | struct page_counter { |
10 | atomic_long_t count; | 10 | atomic_long_t usage; |
11 | unsigned long limit; | 11 | unsigned long min; |
12 | unsigned long low; | ||
13 | unsigned long max; | ||
12 | struct page_counter *parent; | 14 | struct page_counter *parent; |
13 | 15 | ||
16 | /* effective memory.min and memory.min usage tracking */ | ||
17 | unsigned long emin; | ||
18 | atomic_long_t min_usage; | ||
19 | atomic_long_t children_min_usage; | ||
20 | |||
21 | /* effective memory.low and memory.low usage tracking */ | ||
22 | unsigned long elow; | ||
23 | atomic_long_t low_usage; | ||
24 | atomic_long_t children_low_usage; | ||
25 | |||
14 | /* legacy */ | 26 | /* legacy */ |
15 | unsigned long watermark; | 27 | unsigned long watermark; |
16 | unsigned long failcnt; | 28 | unsigned long failcnt; |
@@ -25,14 +37,14 @@ struct page_counter { | |||
25 | static inline void page_counter_init(struct page_counter *counter, | 37 | static inline void page_counter_init(struct page_counter *counter, |
26 | struct page_counter *parent) | 38 | struct page_counter *parent) |
27 | { | 39 | { |
28 | atomic_long_set(&counter->count, 0); | 40 | atomic_long_set(&counter->usage, 0); |
29 | counter->limit = PAGE_COUNTER_MAX; | 41 | counter->max = PAGE_COUNTER_MAX; |
30 | counter->parent = parent; | 42 | counter->parent = parent; |
31 | } | 43 | } |
32 | 44 | ||
33 | static inline unsigned long page_counter_read(struct page_counter *counter) | 45 | static inline unsigned long page_counter_read(struct page_counter *counter) |
34 | { | 46 | { |
35 | return atomic_long_read(&counter->count); | 47 | return atomic_long_read(&counter->usage); |
36 | } | 48 | } |
37 | 49 | ||
38 | void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages); | 50 | void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages); |
@@ -41,7 +53,9 @@ bool page_counter_try_charge(struct page_counter *counter, | |||
41 | unsigned long nr_pages, | 53 | unsigned long nr_pages, |
42 | struct page_counter **fail); | 54 | struct page_counter **fail); |
43 | void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); | 55 | void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); |
44 | int page_counter_limit(struct page_counter *counter, unsigned long limit); | 56 | void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages); |
57 | void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages); | ||
58 | int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages); | ||
45 | int page_counter_memparse(const char *buf, const char *max, | 59 | int page_counter_memparse(const char *buf, const char *max, |
46 | unsigned long *nr_pages); | 60 | unsigned long *nr_pages); |
47 | 61 | ||
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index a03c2642a87c..21713dc14ce2 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h | |||
@@ -122,7 +122,7 @@ pud_t pud_mkdevmap(pud_t pud); | |||
122 | #endif | 122 | #endif |
123 | #endif /* __HAVE_ARCH_PTE_DEVMAP */ | 123 | #endif /* __HAVE_ARCH_PTE_DEVMAP */ |
124 | 124 | ||
125 | #ifdef __HAVE_ARCH_PTE_SPECIAL | 125 | #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL |
126 | static inline bool pfn_t_special(pfn_t pfn) | 126 | static inline bool pfn_t_special(pfn_t pfn) |
127 | { | 127 | { |
128 | return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; | 128 | return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; |
@@ -132,5 +132,5 @@ static inline bool pfn_t_special(pfn_t pfn) | |||
132 | { | 132 | { |
133 | return false; | 133 | return false; |
134 | } | 134 | } |
135 | #endif /* __HAVE_ARCH_PTE_SPECIAL */ | 135 | #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ |
136 | #endif /* _LINUX_PFN_T_H_ */ | 136 | #endif /* _LINUX_PFN_T_H_ */ |
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 76a8cb4ef178..44d356f5e47c 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h | |||
@@ -163,9 +163,13 @@ static inline gfp_t current_gfp_context(gfp_t flags) | |||
163 | } | 163 | } |
164 | 164 | ||
165 | #ifdef CONFIG_LOCKDEP | 165 | #ifdef CONFIG_LOCKDEP |
166 | extern void __fs_reclaim_acquire(void); | ||
167 | extern void __fs_reclaim_release(void); | ||
166 | extern void fs_reclaim_acquire(gfp_t gfp_mask); | 168 | extern void fs_reclaim_acquire(gfp_t gfp_mask); |
167 | extern void fs_reclaim_release(gfp_t gfp_mask); | 169 | extern void fs_reclaim_release(gfp_t gfp_mask); |
168 | #else | 170 | #else |
171 | static inline void __fs_reclaim_acquire(void) { } | ||
172 | static inline void __fs_reclaim_release(void) { } | ||
169 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } | 173 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } |
170 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } | 174 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } |
171 | #endif | 175 | #endif |
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 73b5e655a76e..f155dc607112 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h | |||
@@ -110,19 +110,6 @@ static inline bool shmem_file(struct file *file) | |||
110 | extern bool shmem_charge(struct inode *inode, long pages); | 110 | extern bool shmem_charge(struct inode *inode, long pages); |
111 | extern void shmem_uncharge(struct inode *inode, long pages); | 111 | extern void shmem_uncharge(struct inode *inode, long pages); |
112 | 112 | ||
113 | #ifdef CONFIG_TMPFS | ||
114 | |||
115 | extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg); | ||
116 | |||
117 | #else | ||
118 | |||
119 | static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a) | ||
120 | { | ||
121 | return -EINVAL; | ||
122 | } | ||
123 | |||
124 | #endif | ||
125 | |||
126 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE | 113 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
127 | extern bool shmem_huge_enabled(struct vm_area_struct *vma); | 114 | extern bool shmem_huge_enabled(struct vm_area_struct *vma); |
128 | #else | 115 | #else |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index d9228e4d0320..3485c58cfd1c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -67,9 +67,10 @@ struct kmem_cache { | |||
67 | 67 | ||
68 | /* | 68 | /* |
69 | * If debugging is enabled, then the allocator can add additional | 69 | * If debugging is enabled, then the allocator can add additional |
70 | * fields and/or padding to every object. size contains the total | 70 | * fields and/or padding to every object. 'size' contains the total |
71 | * object size including these internal fields, the following two | 71 | * object size including these internal fields, while 'obj_offset' |
72 | * variables contain the offset to the user object and its size. | 72 | * and 'object_size' contain the offset to the user object and its |
73 | * size. | ||
73 | */ | 74 | */ |
74 | int obj_offset; | 75 | int obj_offset; |
75 | #endif /* CONFIG_DEBUG_SLAB */ | 76 | #endif /* CONFIG_DEBUG_SLAB */ |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 3773e26c08c1..09fa2c6f0e68 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -101,7 +101,6 @@ struct kmem_cache { | |||
101 | void (*ctor)(void *); | 101 | void (*ctor)(void *); |
102 | unsigned int inuse; /* Offset to metadata */ | 102 | unsigned int inuse; /* Offset to metadata */ |
103 | unsigned int align; /* Alignment */ | 103 | unsigned int align; /* Alignment */ |
104 | unsigned int reserved; /* Reserved bytes at the end of slabs */ | ||
105 | unsigned int red_left_pad; /* Left redzone padding size */ | 104 | unsigned int red_left_pad; /* Left redzone padding size */ |
106 | const char *name; /* Name (only for display!) */ | 105 | const char *name; /* Name (only for display!) */ |
107 | struct list_head list; /* List of slab caches */ | 106 | struct list_head list; /* List of slab caches */ |
diff --git a/include/linux/types.h b/include/linux/types.h index ec13d02b3481..9834e90aa010 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -10,14 +10,14 @@ | |||
10 | #define DECLARE_BITMAP(name,bits) \ | 10 | #define DECLARE_BITMAP(name,bits) \ |
11 | unsigned long name[BITS_TO_LONGS(bits)] | 11 | unsigned long name[BITS_TO_LONGS(bits)] |
12 | 12 | ||
13 | typedef __u32 __kernel_dev_t; | 13 | typedef u32 __kernel_dev_t; |
14 | 14 | ||
15 | typedef __kernel_fd_set fd_set; | 15 | typedef __kernel_fd_set fd_set; |
16 | typedef __kernel_dev_t dev_t; | 16 | typedef __kernel_dev_t dev_t; |
17 | typedef __kernel_ino_t ino_t; | 17 | typedef __kernel_ino_t ino_t; |
18 | typedef __kernel_mode_t mode_t; | 18 | typedef __kernel_mode_t mode_t; |
19 | typedef unsigned short umode_t; | 19 | typedef unsigned short umode_t; |
20 | typedef __u32 nlink_t; | 20 | typedef u32 nlink_t; |
21 | typedef __kernel_off_t off_t; | 21 | typedef __kernel_off_t off_t; |
22 | typedef __kernel_pid_t pid_t; | 22 | typedef __kernel_pid_t pid_t; |
23 | typedef __kernel_daddr_t daddr_t; | 23 | typedef __kernel_daddr_t daddr_t; |
@@ -95,29 +95,29 @@ typedef unsigned long ulong; | |||
95 | #ifndef __BIT_TYPES_DEFINED__ | 95 | #ifndef __BIT_TYPES_DEFINED__ |
96 | #define __BIT_TYPES_DEFINED__ | 96 | #define __BIT_TYPES_DEFINED__ |
97 | 97 | ||
98 | typedef __u8 u_int8_t; | 98 | typedef u8 u_int8_t; |
99 | typedef __s8 int8_t; | 99 | typedef s8 int8_t; |
100 | typedef __u16 u_int16_t; | 100 | typedef u16 u_int16_t; |
101 | typedef __s16 int16_t; | 101 | typedef s16 int16_t; |
102 | typedef __u32 u_int32_t; | 102 | typedef u32 u_int32_t; |
103 | typedef __s32 int32_t; | 103 | typedef s32 int32_t; |
104 | 104 | ||
105 | #endif /* !(__BIT_TYPES_DEFINED__) */ | 105 | #endif /* !(__BIT_TYPES_DEFINED__) */ |
106 | 106 | ||
107 | typedef __u8 uint8_t; | 107 | typedef u8 uint8_t; |
108 | typedef __u16 uint16_t; | 108 | typedef u16 uint16_t; |
109 | typedef __u32 uint32_t; | 109 | typedef u32 uint32_t; |
110 | 110 | ||
111 | #if defined(__GNUC__) | 111 | #if defined(__GNUC__) |
112 | typedef __u64 uint64_t; | 112 | typedef u64 uint64_t; |
113 | typedef __u64 u_int64_t; | 113 | typedef u64 u_int64_t; |
114 | typedef __s64 int64_t; | 114 | typedef s64 int64_t; |
115 | #endif | 115 | #endif |
116 | 116 | ||
117 | /* this is a special 64bit data type that is 8-byte aligned */ | 117 | /* this is a special 64bit data type that is 8-byte aligned */ |
118 | #define aligned_u64 __u64 __attribute__((aligned(8))) | 118 | #define aligned_u64 __aligned_u64 |
119 | #define aligned_be64 __be64 __attribute__((aligned(8))) | 119 | #define aligned_be64 __aligned_be64 |
120 | #define aligned_le64 __le64 __attribute__((aligned(8))) | 120 | #define aligned_le64 __aligned_le64 |
121 | 121 | ||
122 | /** | 122 | /** |
123 | * The type used for indexing onto a disc or disc partition. | 123 | * The type used for indexing onto a disc or disc partition. |
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f2f3b68ba910..e091f0a11b11 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h | |||
@@ -31,10 +31,12 @@ | |||
31 | extern int handle_userfault(struct vm_fault *vmf, unsigned long reason); | 31 | extern int handle_userfault(struct vm_fault *vmf, unsigned long reason); |
32 | 32 | ||
33 | extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, | 33 | extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, |
34 | unsigned long src_start, unsigned long len); | 34 | unsigned long src_start, unsigned long len, |
35 | bool *mmap_changing); | ||
35 | extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, | 36 | extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, |
36 | unsigned long dst_start, | 37 | unsigned long dst_start, |
37 | unsigned long len); | 38 | unsigned long len, |
39 | bool *mmap_changing); | ||
38 | 40 | ||
39 | /* mm helpers */ | 41 | /* mm helpers */ |
40 | static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, | 42 | static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, |
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 2a4432c7a4b4..e13eec3dfb2f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h | |||
@@ -1,6 +1,8 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ | 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ |
2 | /* | 2 | /* |
3 | * Copyright 1997 Transmeta Corporation - All Rights Reserved | 3 | * Copyright 1997 Transmeta Corporation - All Rights Reserved |
4 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | ||
5 | * Copyright 2005-2006,2013,2017-2018 Ian Kent <raven@themaw.net> | ||
4 | * | 6 | * |
5 | * This file is part of the Linux kernel and is made available under | 7 | * This file is part of the Linux kernel and is made available under |
6 | * the terms of the GNU General Public License, version 2, or at your | 8 | * the terms of the GNU General Public License, version 2, or at your |
@@ -8,7 +10,6 @@ | |||
8 | * | 10 | * |
9 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
10 | 12 | ||
11 | |||
12 | #ifndef _UAPI_LINUX_AUTO_FS_H | 13 | #ifndef _UAPI_LINUX_AUTO_FS_H |
13 | #define _UAPI_LINUX_AUTO_FS_H | 14 | #define _UAPI_LINUX_AUTO_FS_H |
14 | 15 | ||
@@ -18,13 +19,11 @@ | |||
18 | #include <sys/ioctl.h> | 19 | #include <sys/ioctl.h> |
19 | #endif /* __KERNEL__ */ | 20 | #endif /* __KERNEL__ */ |
20 | 21 | ||
22 | #define AUTOFS_PROTO_VERSION 5 | ||
23 | #define AUTOFS_MIN_PROTO_VERSION 3 | ||
24 | #define AUTOFS_MAX_PROTO_VERSION 5 | ||
21 | 25 | ||
22 | /* This file describes autofs v3 */ | 26 | #define AUTOFS_PROTO_SUBVERSION 2 |
23 | #define AUTOFS_PROTO_VERSION 3 | ||
24 | |||
25 | /* Range of protocol versions defined */ | ||
26 | #define AUTOFS_MAX_PROTO_VERSION AUTOFS_PROTO_VERSION | ||
27 | #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION | ||
28 | 27 | ||
29 | /* | 28 | /* |
30 | * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed | 29 | * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed |
@@ -76,9 +75,155 @@ enum { | |||
76 | #define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD) | 75 | #define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD) |
77 | #define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD) | 76 | #define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD) |
78 | #define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD) | 77 | #define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD) |
79 | #define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOVER_CMD, int) | 78 | #define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, \ |
80 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, compat_ulong_t) | 79 | AUTOFS_IOC_PROTOVER_CMD, int) |
81 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, unsigned long) | 80 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, \ |
82 | #define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_CMD, struct autofs_packet_expire) | 81 | AUTOFS_IOC_SETTIMEOUT_CMD, \ |
82 | compat_ulong_t) | ||
83 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, \ | ||
84 | AUTOFS_IOC_SETTIMEOUT_CMD, \ | ||
85 | unsigned long) | ||
86 | #define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, \ | ||
87 | AUTOFS_IOC_EXPIRE_CMD, \ | ||
88 | struct autofs_packet_expire) | ||
89 | |||
90 | /* autofs version 4 and later definitions */ | ||
91 | |||
92 | /* Mask for expire behaviour */ | ||
93 | #define AUTOFS_EXP_IMMEDIATE 1 | ||
94 | #define AUTOFS_EXP_LEAVES 2 | ||
95 | |||
96 | #define AUTOFS_TYPE_ANY 0U | ||
97 | #define AUTOFS_TYPE_INDIRECT 1U | ||
98 | #define AUTOFS_TYPE_DIRECT 2U | ||
99 | #define AUTOFS_TYPE_OFFSET 4U | ||
100 | |||
101 | static inline void set_autofs_type_indirect(unsigned int *type) | ||
102 | { | ||
103 | *type = AUTOFS_TYPE_INDIRECT; | ||
104 | } | ||
105 | |||
106 | static inline unsigned int autofs_type_indirect(unsigned int type) | ||
107 | { | ||
108 | return (type == AUTOFS_TYPE_INDIRECT); | ||
109 | } | ||
110 | |||
111 | static inline void set_autofs_type_direct(unsigned int *type) | ||
112 | { | ||
113 | *type = AUTOFS_TYPE_DIRECT; | ||
114 | } | ||
115 | |||
116 | static inline unsigned int autofs_type_direct(unsigned int type) | ||
117 | { | ||
118 | return (type == AUTOFS_TYPE_DIRECT); | ||
119 | } | ||
120 | |||
121 | static inline void set_autofs_type_offset(unsigned int *type) | ||
122 | { | ||
123 | *type = AUTOFS_TYPE_OFFSET; | ||
124 | } | ||
125 | |||
126 | static inline unsigned int autofs_type_offset(unsigned int type) | ||
127 | { | ||
128 | return (type == AUTOFS_TYPE_OFFSET); | ||
129 | } | ||
130 | |||
131 | static inline unsigned int autofs_type_trigger(unsigned int type) | ||
132 | { | ||
133 | return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * This isn't really a type as we use it to say "no type set" to | ||
138 | * indicate we want to search for "any" mount in the | ||
139 | * autofs_dev_ioctl_ismountpoint() device ioctl function. | ||
140 | */ | ||
141 | static inline void set_autofs_type_any(unsigned int *type) | ||
142 | { | ||
143 | *type = AUTOFS_TYPE_ANY; | ||
144 | } | ||
145 | |||
146 | static inline unsigned int autofs_type_any(unsigned int type) | ||
147 | { | ||
148 | return (type == AUTOFS_TYPE_ANY); | ||
149 | } | ||
150 | |||
151 | /* Daemon notification packet types */ | ||
152 | enum autofs_notify { | ||
153 | NFY_NONE, | ||
154 | NFY_MOUNT, | ||
155 | NFY_EXPIRE | ||
156 | }; | ||
157 | |||
158 | /* Kernel protocol version 4 packet types */ | ||
159 | |||
160 | /* Expire entry (umount request) */ | ||
161 | #define autofs_ptype_expire_multi 2 | ||
162 | |||
163 | /* Kernel protocol version 5 packet types */ | ||
164 | |||
165 | /* Indirect mount missing and expire requests. */ | ||
166 | #define autofs_ptype_missing_indirect 3 | ||
167 | #define autofs_ptype_expire_indirect 4 | ||
168 | |||
169 | /* Direct mount missing and expire requests */ | ||
170 | #define autofs_ptype_missing_direct 5 | ||
171 | #define autofs_ptype_expire_direct 6 | ||
172 | |||
173 | /* v4 multi expire (via pipe) */ | ||
174 | struct autofs_packet_expire_multi { | ||
175 | struct autofs_packet_hdr hdr; | ||
176 | autofs_wqt_t wait_queue_token; | ||
177 | int len; | ||
178 | char name[NAME_MAX+1]; | ||
179 | }; | ||
180 | |||
181 | union autofs_packet_union { | ||
182 | struct autofs_packet_hdr hdr; | ||
183 | struct autofs_packet_missing missing; | ||
184 | struct autofs_packet_expire expire; | ||
185 | struct autofs_packet_expire_multi expire_multi; | ||
186 | }; | ||
187 | |||
188 | /* autofs v5 common packet struct */ | ||
189 | struct autofs_v5_packet { | ||
190 | struct autofs_packet_hdr hdr; | ||
191 | autofs_wqt_t wait_queue_token; | ||
192 | __u32 dev; | ||
193 | __u64 ino; | ||
194 | __u32 uid; | ||
195 | __u32 gid; | ||
196 | __u32 pid; | ||
197 | __u32 tgid; | ||
198 | __u32 len; | ||
199 | char name[NAME_MAX+1]; | ||
200 | }; | ||
201 | |||
202 | typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; | ||
203 | typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; | ||
204 | typedef struct autofs_v5_packet autofs_packet_missing_direct_t; | ||
205 | typedef struct autofs_v5_packet autofs_packet_expire_direct_t; | ||
206 | |||
207 | union autofs_v5_packet_union { | ||
208 | struct autofs_packet_hdr hdr; | ||
209 | struct autofs_v5_packet v5_packet; | ||
210 | autofs_packet_missing_indirect_t missing_indirect; | ||
211 | autofs_packet_expire_indirect_t expire_indirect; | ||
212 | autofs_packet_missing_direct_t missing_direct; | ||
213 | autofs_packet_expire_direct_t expire_direct; | ||
214 | }; | ||
215 | |||
216 | enum { | ||
217 | AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */ | ||
218 | AUTOFS_IOC_PROTOSUBVER_CMD, | ||
219 | AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */ | ||
220 | }; | ||
221 | |||
222 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, \ | ||
223 | AUTOFS_IOC_EXPIRE_MULTI_CMD, int) | ||
224 | #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, \ | ||
225 | AUTOFS_IOC_PROTOSUBVER_CMD, int) | ||
226 | #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, \ | ||
227 | AUTOFS_IOC_ASKUMOUNT_CMD, int) | ||
83 | 228 | ||
84 | #endif /* _UAPI_LINUX_AUTO_FS_H */ | 229 | #endif /* _UAPI_LINUX_AUTO_FS_H */ |
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 1f608e27a06f..d01ef0a0189c 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h | |||
@@ -7,156 +7,9 @@ | |||
7 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #ifndef _LINUX_AUTO_FS4_H | 10 | #ifndef _UAPI_LINUX_AUTO_FS4_H |
11 | #define _LINUX_AUTO_FS4_H | 11 | #define _UAPI_LINUX_AUTO_FS4_H |
12 | 12 | ||
13 | /* Include common v3 definitions */ | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/auto_fs.h> | 13 | #include <linux/auto_fs.h> |
16 | 14 | ||
17 | /* autofs v4 definitions */ | 15 | #endif /* _UAPI_LINUX_AUTO_FS4_H */ |
18 | #undef AUTOFS_PROTO_VERSION | ||
19 | #undef AUTOFS_MIN_PROTO_VERSION | ||
20 | #undef AUTOFS_MAX_PROTO_VERSION | ||
21 | |||
22 | #define AUTOFS_PROTO_VERSION 5 | ||
23 | #define AUTOFS_MIN_PROTO_VERSION 3 | ||
24 | #define AUTOFS_MAX_PROTO_VERSION 5 | ||
25 | |||
26 | #define AUTOFS_PROTO_SUBVERSION 2 | ||
27 | |||
28 | /* Mask for expire behaviour */ | ||
29 | #define AUTOFS_EXP_IMMEDIATE 1 | ||
30 | #define AUTOFS_EXP_LEAVES 2 | ||
31 | |||
32 | #define AUTOFS_TYPE_ANY 0U | ||
33 | #define AUTOFS_TYPE_INDIRECT 1U | ||
34 | #define AUTOFS_TYPE_DIRECT 2U | ||
35 | #define AUTOFS_TYPE_OFFSET 4U | ||
36 | |||
37 | static inline void set_autofs_type_indirect(unsigned int *type) | ||
38 | { | ||
39 | *type = AUTOFS_TYPE_INDIRECT; | ||
40 | } | ||
41 | |||
42 | static inline unsigned int autofs_type_indirect(unsigned int type) | ||
43 | { | ||
44 | return (type == AUTOFS_TYPE_INDIRECT); | ||
45 | } | ||
46 | |||
47 | static inline void set_autofs_type_direct(unsigned int *type) | ||
48 | { | ||
49 | *type = AUTOFS_TYPE_DIRECT; | ||
50 | } | ||
51 | |||
52 | static inline unsigned int autofs_type_direct(unsigned int type) | ||
53 | { | ||
54 | return (type == AUTOFS_TYPE_DIRECT); | ||
55 | } | ||
56 | |||
57 | static inline void set_autofs_type_offset(unsigned int *type) | ||
58 | { | ||
59 | *type = AUTOFS_TYPE_OFFSET; | ||
60 | } | ||
61 | |||
62 | static inline unsigned int autofs_type_offset(unsigned int type) | ||
63 | { | ||
64 | return (type == AUTOFS_TYPE_OFFSET); | ||
65 | } | ||
66 | |||
67 | static inline unsigned int autofs_type_trigger(unsigned int type) | ||
68 | { | ||
69 | return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * This isn't really a type as we use it to say "no type set" to | ||
74 | * indicate we want to search for "any" mount in the | ||
75 | * autofs_dev_ioctl_ismountpoint() device ioctl function. | ||
76 | */ | ||
77 | static inline void set_autofs_type_any(unsigned int *type) | ||
78 | { | ||
79 | *type = AUTOFS_TYPE_ANY; | ||
80 | } | ||
81 | |||
82 | static inline unsigned int autofs_type_any(unsigned int type) | ||
83 | { | ||
84 | return (type == AUTOFS_TYPE_ANY); | ||
85 | } | ||
86 | |||
87 | /* Daemon notification packet types */ | ||
88 | enum autofs_notify { | ||
89 | NFY_NONE, | ||
90 | NFY_MOUNT, | ||
91 | NFY_EXPIRE | ||
92 | }; | ||
93 | |||
94 | /* Kernel protocol version 4 packet types */ | ||
95 | |||
96 | /* Expire entry (umount request) */ | ||
97 | #define autofs_ptype_expire_multi 2 | ||
98 | |||
99 | /* Kernel protocol version 5 packet types */ | ||
100 | |||
101 | /* Indirect mount missing and expire requests. */ | ||
102 | #define autofs_ptype_missing_indirect 3 | ||
103 | #define autofs_ptype_expire_indirect 4 | ||
104 | |||
105 | /* Direct mount missing and expire requests */ | ||
106 | #define autofs_ptype_missing_direct 5 | ||
107 | #define autofs_ptype_expire_direct 6 | ||
108 | |||
109 | /* v4 multi expire (via pipe) */ | ||
110 | struct autofs_packet_expire_multi { | ||
111 | struct autofs_packet_hdr hdr; | ||
112 | autofs_wqt_t wait_queue_token; | ||
113 | int len; | ||
114 | char name[NAME_MAX+1]; | ||
115 | }; | ||
116 | |||
117 | union autofs_packet_union { | ||
118 | struct autofs_packet_hdr hdr; | ||
119 | struct autofs_packet_missing missing; | ||
120 | struct autofs_packet_expire expire; | ||
121 | struct autofs_packet_expire_multi expire_multi; | ||
122 | }; | ||
123 | |||
124 | /* autofs v5 common packet struct */ | ||
125 | struct autofs_v5_packet { | ||
126 | struct autofs_packet_hdr hdr; | ||
127 | autofs_wqt_t wait_queue_token; | ||
128 | __u32 dev; | ||
129 | __u64 ino; | ||
130 | __u32 uid; | ||
131 | __u32 gid; | ||
132 | __u32 pid; | ||
133 | __u32 tgid; | ||
134 | __u32 len; | ||
135 | char name[NAME_MAX+1]; | ||
136 | }; | ||
137 | |||
138 | typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; | ||
139 | typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; | ||
140 | typedef struct autofs_v5_packet autofs_packet_missing_direct_t; | ||
141 | typedef struct autofs_v5_packet autofs_packet_expire_direct_t; | ||
142 | |||
143 | union autofs_v5_packet_union { | ||
144 | struct autofs_packet_hdr hdr; | ||
145 | struct autofs_v5_packet v5_packet; | ||
146 | autofs_packet_missing_indirect_t missing_indirect; | ||
147 | autofs_packet_expire_indirect_t expire_indirect; | ||
148 | autofs_packet_missing_direct_t missing_direct; | ||
149 | autofs_packet_expire_direct_t expire_direct; | ||
150 | }; | ||
151 | |||
152 | enum { | ||
153 | AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */ | ||
154 | AUTOFS_IOC_PROTOSUBVER_CMD, | ||
155 | AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */ | ||
156 | }; | ||
157 | |||
158 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int) | ||
159 | #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int) | ||
160 | #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int) | ||
161 | |||
162 | #endif /* _LINUX_AUTO_FS4_H */ | ||
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index fa139841ec18..21b9113c69da 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h | |||
@@ -35,6 +35,6 @@ | |||
35 | #define KPF_BALLOON 23 | 35 | #define KPF_BALLOON 23 |
36 | #define KPF_ZERO_PAGE 24 | 36 | #define KPF_ZERO_PAGE 24 |
37 | #define KPF_IDLE 25 | 37 | #define KPF_IDLE 25 |
38 | 38 | #define KPF_PGTABLE 26 | |
39 | 39 | ||
40 | #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ | 40 | #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ |
diff --git a/kernel/crash_core.c b/kernel/crash_core.c index f7674d676889..b66aced5e8c2 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c | |||
@@ -460,6 +460,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
460 | VMCOREINFO_NUMBER(PG_hwpoison); | 460 | VMCOREINFO_NUMBER(PG_hwpoison); |
461 | #endif | 461 | #endif |
462 | VMCOREINFO_NUMBER(PG_head_mask); | 462 | VMCOREINFO_NUMBER(PG_head_mask); |
463 | #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) | ||
463 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | 464 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); |
464 | #ifdef CONFIG_HUGETLB_PAGE | 465 | #ifdef CONFIG_HUGETLB_PAGE |
465 | VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); | 466 | VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); |
diff --git a/kernel/fork.c b/kernel/fork.c index 80b48a8fb47b..c6d1c1ce9ed7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -899,6 +899,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, | |||
899 | mm->pinned_vm = 0; | 899 | mm->pinned_vm = 0; |
900 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); | 900 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
901 | spin_lock_init(&mm->page_table_lock); | 901 | spin_lock_init(&mm->page_table_lock); |
902 | spin_lock_init(&mm->arg_lock); | ||
902 | mm_init_cpumask(mm); | 903 | mm_init_cpumask(mm); |
903 | mm_init_aio(mm); | 904 | mm_init_aio(mm); |
904 | mm_init_owner(mm, p); | 905 | mm_init_owner(mm, p); |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 751593ed7c0b..32b479468e4d 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
@@ -44,6 +44,7 @@ int __read_mostly sysctl_hung_task_warnings = 10; | |||
44 | 44 | ||
45 | static int __read_mostly did_panic; | 45 | static int __read_mostly did_panic; |
46 | static bool hung_task_show_lock; | 46 | static bool hung_task_show_lock; |
47 | static bool hung_task_call_panic; | ||
47 | 48 | ||
48 | static struct task_struct *watchdog_task; | 49 | static struct task_struct *watchdog_task; |
49 | 50 | ||
@@ -127,10 +128,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
127 | touch_nmi_watchdog(); | 128 | touch_nmi_watchdog(); |
128 | 129 | ||
129 | if (sysctl_hung_task_panic) { | 130 | if (sysctl_hung_task_panic) { |
130 | if (hung_task_show_lock) | 131 | hung_task_show_lock = true; |
131 | debug_show_all_locks(); | 132 | hung_task_call_panic = true; |
132 | trigger_all_cpu_backtrace(); | ||
133 | panic("hung_task: blocked tasks"); | ||
134 | } | 133 | } |
135 | } | 134 | } |
136 | 135 | ||
@@ -193,6 +192,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) | |||
193 | rcu_read_unlock(); | 192 | rcu_read_unlock(); |
194 | if (hung_task_show_lock) | 193 | if (hung_task_show_lock) |
195 | debug_show_all_locks(); | 194 | debug_show_all_locks(); |
195 | if (hung_task_call_panic) { | ||
196 | trigger_all_cpu_backtrace(); | ||
197 | panic("hung_task: blocked tasks"); | ||
198 | } | ||
196 | } | 199 | } |
197 | 200 | ||
198 | static long hung_timeout_jiffies(unsigned long last_checked, | 201 | static long hung_timeout_jiffies(unsigned long last_checked, |
diff --git a/kernel/sys.c b/kernel/sys.c index d1b2b8d934bb..38509dc1f77b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -2018,7 +2018,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data | |||
2018 | return error; | 2018 | return error; |
2019 | } | 2019 | } |
2020 | 2020 | ||
2021 | down_write(&mm->mmap_sem); | 2021 | /* |
2022 | * arg_lock protects concurent updates but we still need mmap_sem for | ||
2023 | * read to exclude races with sys_brk. | ||
2024 | */ | ||
2025 | down_read(&mm->mmap_sem); | ||
2022 | 2026 | ||
2023 | /* | 2027 | /* |
2024 | * We don't validate if these members are pointing to | 2028 | * We don't validate if these members are pointing to |
@@ -2032,6 +2036,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data | |||
2032 | * to any problem in kernel itself | 2036 | * to any problem in kernel itself |
2033 | */ | 2037 | */ |
2034 | 2038 | ||
2039 | spin_lock(&mm->arg_lock); | ||
2035 | mm->start_code = prctl_map.start_code; | 2040 | mm->start_code = prctl_map.start_code; |
2036 | mm->end_code = prctl_map.end_code; | 2041 | mm->end_code = prctl_map.end_code; |
2037 | mm->start_data = prctl_map.start_data; | 2042 | mm->start_data = prctl_map.start_data; |
@@ -2043,6 +2048,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data | |||
2043 | mm->arg_end = prctl_map.arg_end; | 2048 | mm->arg_end = prctl_map.arg_end; |
2044 | mm->env_start = prctl_map.env_start; | 2049 | mm->env_start = prctl_map.env_start; |
2045 | mm->env_end = prctl_map.env_end; | 2050 | mm->env_end = prctl_map.env_end; |
2051 | spin_unlock(&mm->arg_lock); | ||
2046 | 2052 | ||
2047 | /* | 2053 | /* |
2048 | * Note this update of @saved_auxv is lockless thus | 2054 | * Note this update of @saved_auxv is lockless thus |
@@ -2055,7 +2061,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data | |||
2055 | if (prctl_map.auxv_size) | 2061 | if (prctl_map.auxv_size) |
2056 | memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); | 2062 | memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); |
2057 | 2063 | ||
2058 | up_write(&mm->mmap_sem); | 2064 | up_read(&mm->mmap_sem); |
2059 | return 0; | 2065 | return 0; |
2060 | } | 2066 | } |
2061 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2067 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
diff --git a/lib/bitmap.c b/lib/bitmap.c index a42eff7e8c48..58f9750e49c6 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -64,12 +64,9 @@ EXPORT_SYMBOL(__bitmap_equal); | |||
64 | 64 | ||
65 | void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) | 65 | void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) |
66 | { | 66 | { |
67 | unsigned int k, lim = bits/BITS_PER_LONG; | 67 | unsigned int k, lim = BITS_TO_LONGS(bits); |
68 | for (k = 0; k < lim; ++k) | 68 | for (k = 0; k < lim; ++k) |
69 | dst[k] = ~src[k]; | 69 | dst[k] = ~src[k]; |
70 | |||
71 | if (bits % BITS_PER_LONG) | ||
72 | dst[k] = ~src[k]; | ||
73 | } | 70 | } |
74 | EXPORT_SYMBOL(__bitmap_complement); | 71 | EXPORT_SYMBOL(__bitmap_complement); |
75 | 72 | ||
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c index 266a97c5708b..ade3ce6c4af6 100644 --- a/lib/bucket_locks.c +++ b/lib/bucket_locks.c | |||
@@ -30,10 +30,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, | |||
30 | } | 30 | } |
31 | 31 | ||
32 | if (sizeof(spinlock_t) != 0) { | 32 | if (sizeof(spinlock_t) != 0) { |
33 | if (gfpflags_allow_blocking(gfp)) | 33 | tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp); |
34 | tlocks = kvmalloc(size * sizeof(spinlock_t), gfp); | ||
35 | else | ||
36 | tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp); | ||
37 | if (!tlocks) | 34 | if (!tlocks) |
38 | return -ENOMEM; | 35 | return -ENOMEM; |
39 | for (i = 0; i < size; i++) | 36 | for (i = 0; i < size; i++) |
@@ -4,9 +4,9 @@ | |||
4 | #include <linux/idr.h> | 4 | #include <linux/idr.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/xarray.h> | ||
7 | 8 | ||
8 | DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); | 9 | DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); |
9 | static DEFINE_SPINLOCK(simple_ida_lock); | ||
10 | 10 | ||
11 | /** | 11 | /** |
12 | * idr_alloc_u32() - Allocate an ID. | 12 | * idr_alloc_u32() - Allocate an ID. |
@@ -581,7 +581,7 @@ again: | |||
581 | if (!ida_pre_get(ida, gfp_mask)) | 581 | if (!ida_pre_get(ida, gfp_mask)) |
582 | return -ENOMEM; | 582 | return -ENOMEM; |
583 | 583 | ||
584 | spin_lock_irqsave(&simple_ida_lock, flags); | 584 | xa_lock_irqsave(&ida->ida_rt, flags); |
585 | ret = ida_get_new_above(ida, start, &id); | 585 | ret = ida_get_new_above(ida, start, &id); |
586 | if (!ret) { | 586 | if (!ret) { |
587 | if (id > max) { | 587 | if (id > max) { |
@@ -591,7 +591,7 @@ again: | |||
591 | ret = id; | 591 | ret = id; |
592 | } | 592 | } |
593 | } | 593 | } |
594 | spin_unlock_irqrestore(&simple_ida_lock, flags); | 594 | xa_unlock_irqrestore(&ida->ida_rt, flags); |
595 | 595 | ||
596 | if (unlikely(ret == -EAGAIN)) | 596 | if (unlikely(ret == -EAGAIN)) |
597 | goto again; | 597 | goto again; |
@@ -615,8 +615,8 @@ void ida_simple_remove(struct ida *ida, unsigned int id) | |||
615 | unsigned long flags; | 615 | unsigned long flags; |
616 | 616 | ||
617 | BUG_ON((int)id < 0); | 617 | BUG_ON((int)id < 0); |
618 | spin_lock_irqsave(&simple_ida_lock, flags); | 618 | xa_lock_irqsave(&ida->ida_rt, flags); |
619 | ida_remove(ida, id); | 619 | ida_remove(ida, id); |
620 | spin_unlock_irqrestore(&simple_ida_lock, flags); | 620 | xa_unlock_irqrestore(&ida->ida_rt, flags); |
621 | } | 621 | } |
622 | EXPORT_SYMBOL(ida_simple_remove); | 622 | EXPORT_SYMBOL(ida_simple_remove); |
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 7eceeddb3fb8..c2d6f4efcfbc 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h | |||
@@ -65,13 +65,6 @@ | |||
65 | typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ | 65 | typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ |
66 | typedef int mpi_size_t; /* (must be a signed type) */ | 66 | typedef int mpi_size_t; /* (must be a signed type) */ |
67 | 67 | ||
68 | static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) | ||
69 | { | ||
70 | if (a->alloced < b) | ||
71 | return mpi_resize(a, b); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | /* Copy N limbs from S to D. */ | 68 | /* Copy N limbs from S to D. */ |
76 | #define MPN_COPY(d, s, n) \ | 69 | #define MPN_COPY(d, s, n) \ |
77 | do { \ | 70 | do { \ |
@@ -80,13 +73,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) | |||
80 | (d)[_i] = (s)[_i]; \ | 73 | (d)[_i] = (s)[_i]; \ |
81 | } while (0) | 74 | } while (0) |
82 | 75 | ||
83 | #define MPN_COPY_INCR(d, s, n) \ | ||
84 | do { \ | ||
85 | mpi_size_t _i; \ | ||
86 | for (_i = 0; _i < (n); _i++) \ | ||
87 | (d)[_i] = (s)[_i]; \ | ||
88 | } while (0) | ||
89 | |||
90 | #define MPN_COPY_DECR(d, s, n) \ | 76 | #define MPN_COPY_DECR(d, s, n) \ |
91 | do { \ | 77 | do { \ |
92 | mpi_size_t _i; \ | 78 | mpi_size_t _i; \ |
@@ -111,15 +97,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) | |||
111 | } \ | 97 | } \ |
112 | } while (0) | 98 | } while (0) |
113 | 99 | ||
114 | #define MPN_NORMALIZE_NOT_ZERO(d, n) \ | ||
115 | do { \ | ||
116 | for (;;) { \ | ||
117 | if ((d)[(n)-1]) \ | ||
118 | break; \ | ||
119 | (n)--; \ | ||
120 | } \ | ||
121 | } while (0) | ||
122 | |||
123 | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ | 100 | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ |
124 | do { \ | 101 | do { \ |
125 | if ((size) < KARATSUBA_THRESHOLD) \ | 102 | if ((size) < KARATSUBA_THRESHOLD) \ |
@@ -128,46 +105,11 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) | |||
128 | mul_n(prodp, up, vp, size, tspace); \ | 105 | mul_n(prodp, up, vp, size, tspace); \ |
129 | } while (0); | 106 | } while (0); |
130 | 107 | ||
131 | /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest | ||
132 | * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). | ||
133 | * If this would yield overflow, DI should be the largest possible number | ||
134 | * (i.e., only ones). For correct operation, the most significant bit of D | ||
135 | * has to be set. Put the quotient in Q and the remainder in R. | ||
136 | */ | ||
137 | #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ | ||
138 | do { \ | ||
139 | mpi_limb_t _q, _ql, _r; \ | ||
140 | mpi_limb_t _xh, _xl; \ | ||
141 | umul_ppmm(_q, _ql, (nh), (di)); \ | ||
142 | _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ | ||
143 | umul_ppmm(_xh, _xl, _q, (d)); \ | ||
144 | sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \ | ||
145 | if (_xh) { \ | ||
146 | sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ | ||
147 | _q++; \ | ||
148 | if (_xh) { \ | ||
149 | sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ | ||
150 | _q++; \ | ||
151 | } \ | ||
152 | } \ | ||
153 | if (_r >= (d)) { \ | ||
154 | _r -= (d); \ | ||
155 | _q++; \ | ||
156 | } \ | ||
157 | (r) = _r; \ | ||
158 | (q) = _q; \ | ||
159 | } while (0) | ||
160 | |||
161 | /*-- mpiutil.c --*/ | 108 | /*-- mpiutil.c --*/ |
162 | mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); | 109 | mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); |
163 | void mpi_free_limb_space(mpi_ptr_t a); | 110 | void mpi_free_limb_space(mpi_ptr_t a); |
164 | void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); | 111 | void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); |
165 | 112 | ||
166 | /*-- mpi-bit.c --*/ | ||
167 | void mpi_rshift_limbs(MPI a, unsigned int count); | ||
168 | int mpi_lshift_limbs(MPI a, unsigned int count); | ||
169 | |||
170 | /*-- mpihelp-add.c --*/ | ||
171 | static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 113 | static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
172 | mpi_size_t s1_size, mpi_limb_t s2_limb); | 114 | mpi_size_t s1_size, mpi_limb_t s2_limb); |
173 | mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 115 | mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
@@ -175,7 +117,6 @@ mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | |||
175 | static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, | 117 | static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, |
176 | mpi_ptr_t s2_ptr, mpi_size_t s2_size); | 118 | mpi_ptr_t s2_ptr, mpi_size_t s2_size); |
177 | 119 | ||
178 | /*-- mpihelp-sub.c --*/ | ||
179 | static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 120 | static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
180 | mpi_size_t s1_size, mpi_limb_t s2_limb); | 121 | mpi_size_t s1_size, mpi_limb_t s2_limb); |
181 | mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 122 | mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
@@ -183,10 +124,10 @@ mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | |||
183 | static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, | 124 | static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, |
184 | mpi_ptr_t s2_ptr, mpi_size_t s2_size); | 125 | mpi_ptr_t s2_ptr, mpi_size_t s2_size); |
185 | 126 | ||
186 | /*-- mpihelp-cmp.c --*/ | 127 | /*-- mpih-cmp.c --*/ |
187 | int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); | 128 | int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); |
188 | 129 | ||
189 | /*-- mpihelp-mul.c --*/ | 130 | /*-- mpih-mul.c --*/ |
190 | 131 | ||
191 | struct karatsuba_ctx { | 132 | struct karatsuba_ctx { |
192 | struct karatsuba_ctx *next; | 133 | struct karatsuba_ctx *next; |
@@ -202,7 +143,6 @@ mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | |||
202 | mpi_size_t s1_size, mpi_limb_t s2_limb); | 143 | mpi_size_t s1_size, mpi_limb_t s2_limb); |
203 | mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 144 | mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
204 | mpi_size_t s1_size, mpi_limb_t s2_limb); | 145 | mpi_size_t s1_size, mpi_limb_t s2_limb); |
205 | int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); | ||
206 | int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, | 146 | int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, |
207 | mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); | 147 | mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); |
208 | void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); | 148 | void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); |
@@ -214,21 +154,16 @@ int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, | |||
214 | mpi_ptr_t vp, mpi_size_t vsize, | 154 | mpi_ptr_t vp, mpi_size_t vsize, |
215 | struct karatsuba_ctx *ctx); | 155 | struct karatsuba_ctx *ctx); |
216 | 156 | ||
217 | /*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ | 157 | /*-- generic_mpih-mul1.c --*/ |
218 | mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, | 158 | mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, |
219 | mpi_size_t s1_size, mpi_limb_t s2_limb); | 159 | mpi_size_t s1_size, mpi_limb_t s2_limb); |
220 | 160 | ||
221 | /*-- mpihelp-div.c --*/ | 161 | /*-- mpih-div.c --*/ |
222 | mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, | ||
223 | mpi_limb_t divisor_limb); | ||
224 | mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, | 162 | mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, |
225 | mpi_ptr_t np, mpi_size_t nsize, | 163 | mpi_ptr_t np, mpi_size_t nsize, |
226 | mpi_ptr_t dp, mpi_size_t dsize); | 164 | mpi_ptr_t dp, mpi_size_t dsize); |
227 | mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, | ||
228 | mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, | ||
229 | mpi_limb_t divisor_limb); | ||
230 | 165 | ||
231 | /*-- mpihelp-shift.c --*/ | 166 | /*-- generic_mpih-[lr]shift.c --*/ |
232 | mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, | 167 | mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, |
233 | unsigned cnt); | 168 | unsigned cnt); |
234 | mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, | 169 | mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, |
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 6016f1deb1f5..9bbd9c5d375a 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c | |||
@@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool, | |||
112 | min(pool->nr_free, pool->percpu_batch_size)); | 112 | min(pool->nr_free, pool->percpu_batch_size)); |
113 | } | 113 | } |
114 | 114 | ||
115 | static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) | ||
116 | { | ||
117 | int tag = -ENOSPC; | ||
118 | |||
119 | spin_lock(&tags->lock); | ||
120 | if (tags->nr_free) | ||
121 | tag = tags->freelist[--tags->nr_free]; | ||
122 | spin_unlock(&tags->lock); | ||
123 | |||
124 | return tag; | ||
125 | } | ||
126 | |||
127 | /** | 115 | /** |
128 | * percpu_ida_alloc - allocate a tag | 116 | * percpu_ida_alloc - allocate a tag |
129 | * @pool: pool to allocate from | 117 | * @pool: pool to allocate from |
@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |||
147 | DEFINE_WAIT(wait); | 135 | DEFINE_WAIT(wait); |
148 | struct percpu_ida_cpu *tags; | 136 | struct percpu_ida_cpu *tags; |
149 | unsigned long flags; | 137 | unsigned long flags; |
150 | int tag; | 138 | int tag = -ENOSPC; |
151 | 139 | ||
152 | local_irq_save(flags); | 140 | tags = raw_cpu_ptr(pool->tag_cpu); |
153 | tags = this_cpu_ptr(pool->tag_cpu); | 141 | spin_lock_irqsave(&tags->lock, flags); |
154 | 142 | ||
155 | /* Fastpath */ | 143 | /* Fastpath */ |
156 | tag = alloc_local_tag(tags); | 144 | if (likely(tags->nr_free >= 0)) { |
157 | if (likely(tag >= 0)) { | 145 | tag = tags->freelist[--tags->nr_free]; |
158 | local_irq_restore(flags); | 146 | spin_unlock_irqrestore(&tags->lock, flags); |
159 | return tag; | 147 | return tag; |
160 | } | 148 | } |
149 | spin_unlock_irqrestore(&tags->lock, flags); | ||
161 | 150 | ||
162 | while (1) { | 151 | while (1) { |
163 | spin_lock(&pool->lock); | 152 | spin_lock_irqsave(&pool->lock, flags); |
153 | tags = this_cpu_ptr(pool->tag_cpu); | ||
164 | 154 | ||
165 | /* | 155 | /* |
166 | * prepare_to_wait() must come before steal_tags(), in case | 156 | * prepare_to_wait() must come before steal_tags(), in case |
@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |||
184 | &pool->cpus_have_tags); | 174 | &pool->cpus_have_tags); |
185 | } | 175 | } |
186 | 176 | ||
187 | spin_unlock(&pool->lock); | 177 | spin_unlock_irqrestore(&pool->lock, flags); |
188 | local_irq_restore(flags); | ||
189 | 178 | ||
190 | if (tag >= 0 || state == TASK_RUNNING) | 179 | if (tag >= 0 || state == TASK_RUNNING) |
191 | break; | 180 | break; |
@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |||
196 | } | 185 | } |
197 | 186 | ||
198 | schedule(); | 187 | schedule(); |
199 | |||
200 | local_irq_save(flags); | ||
201 | tags = this_cpu_ptr(pool->tag_cpu); | ||
202 | } | 188 | } |
203 | if (state != TASK_RUNNING) | 189 | if (state != TASK_RUNNING) |
204 | finish_wait(&pool->wait, &wait); | 190 | finish_wait(&pool->wait, &wait); |
@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |||
222 | 208 | ||
223 | BUG_ON(tag >= pool->nr_tags); | 209 | BUG_ON(tag >= pool->nr_tags); |
224 | 210 | ||
225 | local_irq_save(flags); | 211 | tags = raw_cpu_ptr(pool->tag_cpu); |
226 | tags = this_cpu_ptr(pool->tag_cpu); | ||
227 | 212 | ||
228 | spin_lock(&tags->lock); | 213 | spin_lock_irqsave(&tags->lock, flags); |
229 | tags->freelist[tags->nr_free++] = tag; | 214 | tags->freelist[tags->nr_free++] = tag; |
230 | 215 | ||
231 | nr_free = tags->nr_free; | 216 | nr_free = tags->nr_free; |
232 | spin_unlock(&tags->lock); | ||
233 | 217 | ||
234 | if (nr_free == 1) { | 218 | if (nr_free == 1) { |
235 | cpumask_set_cpu(smp_processor_id(), | 219 | cpumask_set_cpu(smp_processor_id(), |
236 | &pool->cpus_have_tags); | 220 | &pool->cpus_have_tags); |
237 | wake_up(&pool->wait); | 221 | wake_up(&pool->wait); |
238 | } | 222 | } |
223 | spin_unlock_irqrestore(&tags->lock, flags); | ||
239 | 224 | ||
240 | if (nr_free == pool->percpu_max_size) { | 225 | if (nr_free == pool->percpu_max_size) { |
241 | spin_lock(&pool->lock); | 226 | spin_lock_irqsave(&pool->lock, flags); |
227 | spin_lock(&tags->lock); | ||
242 | 228 | ||
243 | /* | ||
244 | * Global lock held and irqs disabled, don't need percpu | ||
245 | * lock | ||
246 | */ | ||
247 | if (tags->nr_free == pool->percpu_max_size) { | 229 | if (tags->nr_free == pool->percpu_max_size) { |
248 | move_tags(pool->freelist, &pool->nr_free, | 230 | move_tags(pool->freelist, &pool->nr_free, |
249 | tags->freelist, &tags->nr_free, | 231 | tags->freelist, &tags->nr_free, |
@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |||
251 | 233 | ||
252 | wake_up(&pool->wait); | 234 | wake_up(&pool->wait); |
253 | } | 235 | } |
254 | spin_unlock(&pool->lock); | 236 | spin_unlock(&tags->lock); |
237 | spin_unlock_irqrestore(&pool->lock, flags); | ||
255 | } | 238 | } |
256 | |||
257 | local_irq_restore(flags); | ||
258 | } | 239 | } |
259 | EXPORT_SYMBOL_GPL(percpu_ida_free); | 240 | EXPORT_SYMBOL_GPL(percpu_ida_free); |
260 | 241 | ||
@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |||
346 | struct percpu_ida_cpu *remote; | 327 | struct percpu_ida_cpu *remote; |
347 | unsigned cpu, i, err = 0; | 328 | unsigned cpu, i, err = 0; |
348 | 329 | ||
349 | local_irq_save(flags); | ||
350 | for_each_possible_cpu(cpu) { | 330 | for_each_possible_cpu(cpu) { |
351 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | 331 | remote = per_cpu_ptr(pool->tag_cpu, cpu); |
352 | spin_lock(&remote->lock); | 332 | spin_lock_irqsave(&remote->lock, flags); |
353 | for (i = 0; i < remote->nr_free; i++) { | 333 | for (i = 0; i < remote->nr_free; i++) { |
354 | err = fn(remote->freelist[i], data); | 334 | err = fn(remote->freelist[i], data); |
355 | if (err) | 335 | if (err) |
356 | break; | 336 | break; |
357 | } | 337 | } |
358 | spin_unlock(&remote->lock); | 338 | spin_unlock_irqrestore(&remote->lock, flags); |
359 | if (err) | 339 | if (err) |
360 | goto out; | 340 | goto out; |
361 | } | 341 | } |
362 | 342 | ||
363 | spin_lock(&pool->lock); | 343 | spin_lock_irqsave(&pool->lock, flags); |
364 | for (i = 0; i < pool->nr_free; i++) { | 344 | for (i = 0; i < pool->nr_free; i++) { |
365 | err = fn(pool->freelist[i], data); | 345 | err = fn(pool->freelist[i], data); |
366 | if (err) | 346 | if (err) |
367 | break; | 347 | break; |
368 | } | 348 | } |
369 | spin_unlock(&pool->lock); | 349 | spin_unlock_irqrestore(&pool->lock, flags); |
370 | out: | 350 | out: |
371 | local_irq_restore(flags); | ||
372 | return err; | 351 | return err; |
373 | } | 352 | } |
374 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | 353 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); |
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index d7e06b28de38..0a559a42359b 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c | |||
@@ -112,3 +112,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) | |||
112 | return j; | 112 | return j; |
113 | } | 113 | } |
114 | EXPORT_SYMBOL(ucs2_as_utf8); | 114 | EXPORT_SYMBOL(ucs2_as_utf8); |
115 | |||
116 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/mm/Kconfig b/mm/Kconfig index 3e0b6e87f65d..00bffa7a5112 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -754,3 +754,6 @@ config GUP_BENCHMARK | |||
754 | performance of get_user_pages_fast(). | 754 | performance of get_user_pages_fast(). |
755 | 755 | ||
756 | See tools/testing/selftests/vm/gup_benchmark.c | 756 | See tools/testing/selftests/vm/gup_benchmark.c |
757 | |||
758 | config ARCH_HAS_PTE_SPECIAL | ||
759 | bool | ||
diff --git a/mm/Makefile b/mm/Makefile index b4e54a9ae9c5..8716bdabe1e6 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -105,3 +105,4 @@ obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o | |||
105 | obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o | 105 | obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o |
106 | obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o | 106 | obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o |
107 | obj-$(CONFIG_HMM) += hmm.o | 107 | obj-$(CONFIG_HMM) += hmm.o |
108 | obj-$(CONFIG_MEMFD_CREATE) += memfd.o | ||
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8fe3ebd6ac00..347cc834c04a 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -557,7 +557,7 @@ static int cgwb_create(struct backing_dev_info *bdi, | |||
557 | memcg = mem_cgroup_from_css(memcg_css); | 557 | memcg = mem_cgroup_from_css(memcg_css); |
558 | blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); | 558 | blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); |
559 | blkcg = css_to_blkcg(blkcg_css); | 559 | blkcg = css_to_blkcg(blkcg_css); |
560 | memcg_cgwb_list = mem_cgroup_cgwb_list(memcg); | 560 | memcg_cgwb_list = &memcg->cgwb_list; |
561 | blkcg_cgwb_list = &blkcg->cgwb_list; | 561 | blkcg_cgwb_list = &blkcg->cgwb_list; |
562 | 562 | ||
563 | /* look up again under lock and discard on blkcg mismatch */ | 563 | /* look up again under lock and discard on blkcg mismatch */ |
@@ -736,7 +736,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) | |||
736 | */ | 736 | */ |
737 | void wb_memcg_offline(struct mem_cgroup *memcg) | 737 | void wb_memcg_offline(struct mem_cgroup *memcg) |
738 | { | 738 | { |
739 | struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg); | 739 | struct list_head *memcg_cgwb_list = &memcg->cgwb_list; |
740 | struct bdi_writeback *wb, *next; | 740 | struct bdi_writeback *wb, *next; |
741 | 741 | ||
742 | spin_lock_irq(&cgwb_lock); | 742 | spin_lock_irq(&cgwb_lock); |
diff --git a/mm/filemap.c b/mm/filemap.c index 0604cb02e6f3..52517f28e6f4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2489,7 +2489,7 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, | |||
2489 | * | 2489 | * |
2490 | * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. | 2490 | * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. |
2491 | */ | 2491 | */ |
2492 | int filemap_fault(struct vm_fault *vmf) | 2492 | vm_fault_t filemap_fault(struct vm_fault *vmf) |
2493 | { | 2493 | { |
2494 | int error; | 2494 | int error; |
2495 | struct file *file = vmf->vma->vm_file; | 2495 | struct file *file = vmf->vma->vm_file; |
@@ -2499,7 +2499,7 @@ int filemap_fault(struct vm_fault *vmf) | |||
2499 | pgoff_t offset = vmf->pgoff; | 2499 | pgoff_t offset = vmf->pgoff; |
2500 | pgoff_t max_off; | 2500 | pgoff_t max_off; |
2501 | struct page *page; | 2501 | struct page *page; |
2502 | int ret = 0; | 2502 | vm_fault_t ret = 0; |
2503 | 2503 | ||
2504 | max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | 2504 | max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
2505 | if (unlikely(offset >= max_off)) | 2505 | if (unlikely(offset >= max_off)) |
@@ -2693,11 +2693,11 @@ next: | |||
2693 | } | 2693 | } |
2694 | EXPORT_SYMBOL(filemap_map_pages); | 2694 | EXPORT_SYMBOL(filemap_map_pages); |
2695 | 2695 | ||
2696 | int filemap_page_mkwrite(struct vm_fault *vmf) | 2696 | vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) |
2697 | { | 2697 | { |
2698 | struct page *page = vmf->page; | 2698 | struct page *page = vmf->page; |
2699 | struct inode *inode = file_inode(vmf->vma->vm_file); | 2699 | struct inode *inode = file_inode(vmf->vma->vm_file); |
2700 | int ret = VM_FAULT_LOCKED; | 2700 | vm_fault_t ret = VM_FAULT_LOCKED; |
2701 | 2701 | ||
2702 | sb_start_pagefault(inode->i_sb); | 2702 | sb_start_pagefault(inode->i_sb); |
2703 | file_update_time(vmf->vma->vm_file); | 2703 | file_update_time(vmf->vma->vm_file); |
@@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, | |||
212 | unsigned long address, pud_t *pudp, | 212 | unsigned long address, pud_t *pudp, |
213 | unsigned int flags, unsigned int *page_mask) | 213 | unsigned int flags, unsigned int *page_mask) |
214 | { | 214 | { |
215 | pmd_t *pmd; | 215 | pmd_t *pmd, pmdval; |
216 | spinlock_t *ptl; | 216 | spinlock_t *ptl; |
217 | struct page *page; | 217 | struct page *page; |
218 | struct mm_struct *mm = vma->vm_mm; | 218 | struct mm_struct *mm = vma->vm_mm; |
219 | 219 | ||
220 | pmd = pmd_offset(pudp, address); | 220 | pmd = pmd_offset(pudp, address); |
221 | if (pmd_none(*pmd)) | 221 | /* |
222 | * The READ_ONCE() will stabilize the pmdval in a register or | ||
223 | * on the stack so that it will stop changing under the code. | ||
224 | */ | ||
225 | pmdval = READ_ONCE(*pmd); | ||
226 | if (pmd_none(pmdval)) | ||
222 | return no_page_table(vma, flags); | 227 | return no_page_table(vma, flags); |
223 | if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { | 228 | if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) { |
224 | page = follow_huge_pmd(mm, address, pmd, flags); | 229 | page = follow_huge_pmd(mm, address, pmd, flags); |
225 | if (page) | 230 | if (page) |
226 | return page; | 231 | return page; |
227 | return no_page_table(vma, flags); | 232 | return no_page_table(vma, flags); |
228 | } | 233 | } |
229 | if (is_hugepd(__hugepd(pmd_val(*pmd)))) { | 234 | if (is_hugepd(__hugepd(pmd_val(pmdval)))) { |
230 | page = follow_huge_pd(vma, address, | 235 | page = follow_huge_pd(vma, address, |
231 | __hugepd(pmd_val(*pmd)), flags, | 236 | __hugepd(pmd_val(pmdval)), flags, |
232 | PMD_SHIFT); | 237 | PMD_SHIFT); |
233 | if (page) | 238 | if (page) |
234 | return page; | 239 | return page; |
235 | return no_page_table(vma, flags); | 240 | return no_page_table(vma, flags); |
236 | } | 241 | } |
237 | retry: | 242 | retry: |
238 | if (!pmd_present(*pmd)) { | 243 | if (!pmd_present(pmdval)) { |
239 | if (likely(!(flags & FOLL_MIGRATION))) | 244 | if (likely(!(flags & FOLL_MIGRATION))) |
240 | return no_page_table(vma, flags); | 245 | return no_page_table(vma, flags); |
241 | VM_BUG_ON(thp_migration_supported() && | 246 | VM_BUG_ON(thp_migration_supported() && |
242 | !is_pmd_migration_entry(*pmd)); | 247 | !is_pmd_migration_entry(pmdval)); |
243 | if (is_pmd_migration_entry(*pmd)) | 248 | if (is_pmd_migration_entry(pmdval)) |
244 | pmd_migration_entry_wait(mm, pmd); | 249 | pmd_migration_entry_wait(mm, pmd); |
250 | pmdval = READ_ONCE(*pmd); | ||
251 | /* | ||
252 | * MADV_DONTNEED may convert the pmd to null because | ||
253 | * mmap_sem is held in read mode | ||
254 | */ | ||
255 | if (pmd_none(pmdval)) | ||
256 | return no_page_table(vma, flags); | ||
245 | goto retry; | 257 | goto retry; |
246 | } | 258 | } |
247 | if (pmd_devmap(*pmd)) { | 259 | if (pmd_devmap(pmdval)) { |
248 | ptl = pmd_lock(mm, pmd); | 260 | ptl = pmd_lock(mm, pmd); |
249 | page = follow_devmap_pmd(vma, address, pmd, flags); | 261 | page = follow_devmap_pmd(vma, address, pmd, flags); |
250 | spin_unlock(ptl); | 262 | spin_unlock(ptl); |
251 | if (page) | 263 | if (page) |
252 | return page; | 264 | return page; |
253 | } | 265 | } |
254 | if (likely(!pmd_trans_huge(*pmd))) | 266 | if (likely(!pmd_trans_huge(pmdval))) |
255 | return follow_page_pte(vma, address, pmd, flags); | 267 | return follow_page_pte(vma, address, pmd, flags); |
256 | 268 | ||
257 | if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) | 269 | if ((flags & FOLL_NUMA) && pmd_protnone(pmdval)) |
258 | return no_page_table(vma, flags); | 270 | return no_page_table(vma, flags); |
259 | 271 | ||
260 | retry_locked: | 272 | retry_locked: |
261 | ptl = pmd_lock(mm, pmd); | 273 | ptl = pmd_lock(mm, pmd); |
274 | if (unlikely(pmd_none(*pmd))) { | ||
275 | spin_unlock(ptl); | ||
276 | return no_page_table(vma, flags); | ||
277 | } | ||
262 | if (unlikely(!pmd_present(*pmd))) { | 278 | if (unlikely(!pmd_present(*pmd))) { |
263 | spin_unlock(ptl); | 279 | spin_unlock(ptl); |
264 | if (likely(!(flags & FOLL_MIGRATION))) | 280 | if (likely(!(flags & FOLL_MIGRATION))) |
@@ -1354,7 +1370,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) | |||
1354 | } | 1370 | } |
1355 | } | 1371 | } |
1356 | 1372 | ||
1357 | #ifdef __HAVE_ARCH_PTE_SPECIAL | 1373 | #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL |
1358 | static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, | 1374 | static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, |
1359 | int write, struct page **pages, int *nr) | 1375 | int write, struct page **pages, int *nr) |
1360 | { | 1376 | { |
@@ -1430,7 +1446,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, | |||
1430 | { | 1446 | { |
1431 | return 0; | 1447 | return 0; |
1432 | } | 1448 | } |
1433 | #endif /* __HAVE_ARCH_PTE_SPECIAL */ | 1449 | #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ |
1434 | 1450 | ||
1435 | #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | 1451 | #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
1436 | static int __gup_device_huge(unsigned long pfn, unsigned long addr, | 1452 | static int __gup_device_huge(unsigned long pfn, unsigned long addr, |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ac5591d8622c..ba8fdc0b6e7f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -483,11 +483,8 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) | |||
483 | 483 | ||
484 | static inline struct list_head *page_deferred_list(struct page *page) | 484 | static inline struct list_head *page_deferred_list(struct page *page) |
485 | { | 485 | { |
486 | /* | 486 | /* ->lru in the tail pages is occupied by compound_head. */ |
487 | * ->lru in the tail pages is occupied by compound_head. | 487 | return &page[2].deferred_list; |
488 | * Let's use ->mapping + ->index in the second tail page as list_head. | ||
489 | */ | ||
490 | return (struct list_head *)&page[2].mapping; | ||
491 | } | 488 | } |
492 | 489 | ||
493 | void prep_transhuge_page(struct page *page) | 490 | void prep_transhuge_page(struct page *page) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 129088710510..696befffe6f7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3159,7 +3159,7 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) | |||
3159 | * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get | 3159 | * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get |
3160 | * this far. | 3160 | * this far. |
3161 | */ | 3161 | */ |
3162 | static int hugetlb_vm_op_fault(struct vm_fault *vmf) | 3162 | static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) |
3163 | { | 3163 | { |
3164 | BUG(); | 3164 | BUG(); |
3165 | return 0; | 3165 | return 0; |
@@ -3686,6 +3686,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3686 | struct page *page; | 3686 | struct page *page; |
3687 | pte_t new_pte; | 3687 | pte_t new_pte; |
3688 | spinlock_t *ptl; | 3688 | spinlock_t *ptl; |
3689 | unsigned long haddr = address & huge_page_mask(h); | ||
3689 | 3690 | ||
3690 | /* | 3691 | /* |
3691 | * Currently, we are forced to kill the process in the event the | 3692 | * Currently, we are forced to kill the process in the event the |
@@ -3716,7 +3717,7 @@ retry: | |||
3716 | u32 hash; | 3717 | u32 hash; |
3717 | struct vm_fault vmf = { | 3718 | struct vm_fault vmf = { |
3718 | .vma = vma, | 3719 | .vma = vma, |
3719 | .address = address, | 3720 | .address = haddr, |
3720 | .flags = flags, | 3721 | .flags = flags, |
3721 | /* | 3722 | /* |
3722 | * Hard to debug if it ends up being | 3723 | * Hard to debug if it ends up being |
@@ -3733,14 +3734,14 @@ retry: | |||
3733 | * fault to make calling code simpler. | 3734 | * fault to make calling code simpler. |
3734 | */ | 3735 | */ |
3735 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, | 3736 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, |
3736 | idx, address); | 3737 | idx, haddr); |
3737 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 3738 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
3738 | ret = handle_userfault(&vmf, VM_UFFD_MISSING); | 3739 | ret = handle_userfault(&vmf, VM_UFFD_MISSING); |
3739 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 3740 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
3740 | goto out; | 3741 | goto out; |
3741 | } | 3742 | } |
3742 | 3743 | ||
3743 | page = alloc_huge_page(vma, address, 0); | 3744 | page = alloc_huge_page(vma, haddr, 0); |
3744 | if (IS_ERR(page)) { | 3745 | if (IS_ERR(page)) { |
3745 | ret = PTR_ERR(page); | 3746 | ret = PTR_ERR(page); |
3746 | if (ret == -ENOMEM) | 3747 | if (ret == -ENOMEM) |
@@ -3789,12 +3790,12 @@ retry: | |||
3789 | * the spinlock. | 3790 | * the spinlock. |
3790 | */ | 3791 | */ |
3791 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { | 3792 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
3792 | if (vma_needs_reservation(h, vma, address) < 0) { | 3793 | if (vma_needs_reservation(h, vma, haddr) < 0) { |
3793 | ret = VM_FAULT_OOM; | 3794 | ret = VM_FAULT_OOM; |
3794 | goto backout_unlocked; | 3795 | goto backout_unlocked; |
3795 | } | 3796 | } |
3796 | /* Just decrements count, does not deallocate */ | 3797 | /* Just decrements count, does not deallocate */ |
3797 | vma_end_reservation(h, vma, address); | 3798 | vma_end_reservation(h, vma, haddr); |
3798 | } | 3799 | } |
3799 | 3800 | ||
3800 | ptl = huge_pte_lock(h, mm, ptep); | 3801 | ptl = huge_pte_lock(h, mm, ptep); |
@@ -3808,17 +3809,17 @@ retry: | |||
3808 | 3809 | ||
3809 | if (anon_rmap) { | 3810 | if (anon_rmap) { |
3810 | ClearPagePrivate(page); | 3811 | ClearPagePrivate(page); |
3811 | hugepage_add_new_anon_rmap(page, vma, address); | 3812 | hugepage_add_new_anon_rmap(page, vma, haddr); |
3812 | } else | 3813 | } else |
3813 | page_dup_rmap(page, true); | 3814 | page_dup_rmap(page, true); |
3814 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) | 3815 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) |
3815 | && (vma->vm_flags & VM_SHARED))); | 3816 | && (vma->vm_flags & VM_SHARED))); |
3816 | set_huge_pte_at(mm, address, ptep, new_pte); | 3817 | set_huge_pte_at(mm, haddr, ptep, new_pte); |
3817 | 3818 | ||
3818 | hugetlb_count_add(pages_per_huge_page(h), mm); | 3819 | hugetlb_count_add(pages_per_huge_page(h), mm); |
3819 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { | 3820 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
3820 | /* Optimization, do the COW without a second fault */ | 3821 | /* Optimization, do the COW without a second fault */ |
3821 | ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); | 3822 | ret = hugetlb_cow(mm, vma, haddr, ptep, page, ptl); |
3822 | } | 3823 | } |
3823 | 3824 | ||
3824 | spin_unlock(ptl); | 3825 | spin_unlock(ptl); |
@@ -3830,7 +3831,7 @@ backout: | |||
3830 | spin_unlock(ptl); | 3831 | spin_unlock(ptl); |
3831 | backout_unlocked: | 3832 | backout_unlocked: |
3832 | unlock_page(page); | 3833 | unlock_page(page); |
3833 | restore_reserve_on_error(h, vma, address, page); | 3834 | restore_reserve_on_error(h, vma, haddr, page); |
3834 | put_page(page); | 3835 | put_page(page); |
3835 | goto out; | 3836 | goto out; |
3836 | } | 3837 | } |
@@ -3883,10 +3884,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3883 | struct hstate *h = hstate_vma(vma); | 3884 | struct hstate *h = hstate_vma(vma); |
3884 | struct address_space *mapping; | 3885 | struct address_space *mapping; |
3885 | int need_wait_lock = 0; | 3886 | int need_wait_lock = 0; |
3887 | unsigned long haddr = address & huge_page_mask(h); | ||
3886 | 3888 | ||
3887 | address &= huge_page_mask(h); | 3889 | ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); |
3888 | |||
3889 | ptep = huge_pte_offset(mm, address, huge_page_size(h)); | ||
3890 | if (ptep) { | 3890 | if (ptep) { |
3891 | entry = huge_ptep_get(ptep); | 3891 | entry = huge_ptep_get(ptep); |
3892 | if (unlikely(is_hugetlb_entry_migration(entry))) { | 3892 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
@@ -3896,20 +3896,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3896 | return VM_FAULT_HWPOISON_LARGE | | 3896 | return VM_FAULT_HWPOISON_LARGE | |
3897 | VM_FAULT_SET_HINDEX(hstate_index(h)); | 3897 | VM_FAULT_SET_HINDEX(hstate_index(h)); |
3898 | } else { | 3898 | } else { |
3899 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); | 3899 | ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); |
3900 | if (!ptep) | 3900 | if (!ptep) |
3901 | return VM_FAULT_OOM; | 3901 | return VM_FAULT_OOM; |
3902 | } | 3902 | } |
3903 | 3903 | ||
3904 | mapping = vma->vm_file->f_mapping; | 3904 | mapping = vma->vm_file->f_mapping; |
3905 | idx = vma_hugecache_offset(h, vma, address); | 3905 | idx = vma_hugecache_offset(h, vma, haddr); |
3906 | 3906 | ||
3907 | /* | 3907 | /* |
3908 | * Serialize hugepage allocation and instantiation, so that we don't | 3908 | * Serialize hugepage allocation and instantiation, so that we don't |
3909 | * get spurious allocation failures if two CPUs race to instantiate | 3909 | * get spurious allocation failures if two CPUs race to instantiate |
3910 | * the same page in the page cache. | 3910 | * the same page in the page cache. |
3911 | */ | 3911 | */ |
3912 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address); | 3912 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr); |
3913 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 3913 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
3914 | 3914 | ||
3915 | entry = huge_ptep_get(ptep); | 3915 | entry = huge_ptep_get(ptep); |
@@ -3939,16 +3939,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3939 | * consumed. | 3939 | * consumed. |
3940 | */ | 3940 | */ |
3941 | if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { | 3941 | if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { |
3942 | if (vma_needs_reservation(h, vma, address) < 0) { | 3942 | if (vma_needs_reservation(h, vma, haddr) < 0) { |
3943 | ret = VM_FAULT_OOM; | 3943 | ret = VM_FAULT_OOM; |
3944 | goto out_mutex; | 3944 | goto out_mutex; |
3945 | } | 3945 | } |
3946 | /* Just decrements count, does not deallocate */ | 3946 | /* Just decrements count, does not deallocate */ |
3947 | vma_end_reservation(h, vma, address); | 3947 | vma_end_reservation(h, vma, haddr); |
3948 | 3948 | ||
3949 | if (!(vma->vm_flags & VM_MAYSHARE)) | 3949 | if (!(vma->vm_flags & VM_MAYSHARE)) |
3950 | pagecache_page = hugetlbfs_pagecache_page(h, | 3950 | pagecache_page = hugetlbfs_pagecache_page(h, |
3951 | vma, address); | 3951 | vma, haddr); |
3952 | } | 3952 | } |
3953 | 3953 | ||
3954 | ptl = huge_pte_lock(h, mm, ptep); | 3954 | ptl = huge_pte_lock(h, mm, ptep); |
@@ -3973,16 +3973,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3973 | 3973 | ||
3974 | if (flags & FAULT_FLAG_WRITE) { | 3974 | if (flags & FAULT_FLAG_WRITE) { |
3975 | if (!huge_pte_write(entry)) { | 3975 | if (!huge_pte_write(entry)) { |
3976 | ret = hugetlb_cow(mm, vma, address, ptep, | 3976 | ret = hugetlb_cow(mm, vma, haddr, ptep, |
3977 | pagecache_page, ptl); | 3977 | pagecache_page, ptl); |
3978 | goto out_put_page; | 3978 | goto out_put_page; |
3979 | } | 3979 | } |
3980 | entry = huge_pte_mkdirty(entry); | 3980 | entry = huge_pte_mkdirty(entry); |
3981 | } | 3981 | } |
3982 | entry = pte_mkyoung(entry); | 3982 | entry = pte_mkyoung(entry); |
3983 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, | 3983 | if (huge_ptep_set_access_flags(vma, haddr, ptep, entry, |
3984 | flags & FAULT_FLAG_WRITE)) | 3984 | flags & FAULT_FLAG_WRITE)) |
3985 | update_mmu_cache(vma, address, ptep); | 3985 | update_mmu_cache(vma, haddr, ptep); |
3986 | out_put_page: | 3986 | out_put_page: |
3987 | if (page != pagecache_page) | 3987 | if (page != pagecache_page) |
3988 | unlock_page(page); | 3988 | unlock_page(page); |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index eec1150125b9..68c2f2f3c05b 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
@@ -84,7 +84,7 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, | |||
84 | 84 | ||
85 | limit = round_down(PAGE_COUNTER_MAX, | 85 | limit = round_down(PAGE_COUNTER_MAX, |
86 | 1 << huge_page_order(&hstates[idx])); | 86 | 1 << huge_page_order(&hstates[idx])); |
87 | ret = page_counter_limit(counter, limit); | 87 | ret = page_counter_set_max(counter, limit); |
88 | VM_BUG_ON(ret); | 88 | VM_BUG_ON(ret); |
89 | } | 89 | } |
90 | } | 90 | } |
@@ -273,7 +273,7 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
273 | case RES_USAGE: | 273 | case RES_USAGE: |
274 | return (u64)page_counter_read(counter) * PAGE_SIZE; | 274 | return (u64)page_counter_read(counter) * PAGE_SIZE; |
275 | case RES_LIMIT: | 275 | case RES_LIMIT: |
276 | return (u64)counter->limit * PAGE_SIZE; | 276 | return (u64)counter->max * PAGE_SIZE; |
277 | case RES_MAX_USAGE: | 277 | case RES_MAX_USAGE: |
278 | return (u64)counter->watermark * PAGE_SIZE; | 278 | return (u64)counter->watermark * PAGE_SIZE; |
279 | case RES_FAILCNT: | 279 | case RES_FAILCNT: |
@@ -306,7 +306,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, | |||
306 | switch (MEMFILE_ATTR(of_cft(of)->private)) { | 306 | switch (MEMFILE_ATTR(of_cft(of)->private)) { |
307 | case RES_LIMIT: | 307 | case RES_LIMIT: |
308 | mutex_lock(&hugetlb_limit_mutex); | 308 | mutex_lock(&hugetlb_limit_mutex); |
309 | ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages); | 309 | ret = page_counter_set_max(&h_cg->hugepage[idx], nr_pages); |
310 | mutex_unlock(&hugetlb_limit_mutex); | 310 | mutex_unlock(&hugetlb_limit_mutex); |
311 | break; | 311 | break; |
312 | default: | 312 | default: |
diff --git a/mm/init-mm.c b/mm/init-mm.c index f94d5d15ebc0..f0179c9c04c2 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c | |||
@@ -22,6 +22,7 @@ struct mm_struct init_mm = { | |||
22 | .mm_count = ATOMIC_INIT(1), | 22 | .mm_count = ATOMIC_INIT(1), |
23 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), | 23 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), |
24 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), | 24 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), |
25 | .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), | ||
25 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), | 26 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), |
26 | .user_ns = &init_user_ns, | 27 | .user_ns = &init_user_ns, |
27 | INIT_MM_CONTEXT(init_mm) | 28 | INIT_MM_CONTEXT(init_mm) |
@@ -840,6 +840,17 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, | |||
840 | return err; | 840 | return err; |
841 | } | 841 | } |
842 | 842 | ||
843 | static inline struct stable_node *page_stable_node(struct page *page) | ||
844 | { | ||
845 | return PageKsm(page) ? page_rmapping(page) : NULL; | ||
846 | } | ||
847 | |||
848 | static inline void set_page_stable_node(struct page *page, | ||
849 | struct stable_node *stable_node) | ||
850 | { | ||
851 | page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); | ||
852 | } | ||
853 | |||
843 | #ifdef CONFIG_SYSFS | 854 | #ifdef CONFIG_SYSFS |
844 | /* | 855 | /* |
845 | * Only called through the sysfs control interface: | 856 | * Only called through the sysfs control interface: |
diff --git a/mm/memblock.c b/mm/memblock.c index 5108356ad8aa..93ad42bc8a73 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -68,7 +68,7 @@ ulong __init_memblock choose_memblock_flags(void) | |||
68 | /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ | 68 | /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ |
69 | static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) | 69 | static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) |
70 | { | 70 | { |
71 | return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); | 71 | return *size = min(*size, PHYS_ADDR_MAX - base); |
72 | } | 72 | } |
73 | 73 | ||
74 | /* | 74 | /* |
@@ -697,6 +697,11 @@ static int __init_memblock memblock_remove_range(struct memblock_type *type, | |||
697 | 697 | ||
698 | int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) | 698 | int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) |
699 | { | 699 | { |
700 | phys_addr_t end = base + size - 1; | ||
701 | |||
702 | memblock_dbg("memblock_remove: [%pa-%pa] %pS\n", | ||
703 | &base, &end, (void *)_RET_IP_); | ||
704 | |||
700 | return memblock_remove_range(&memblock.memory, base, size); | 705 | return memblock_remove_range(&memblock.memory, base, size); |
701 | } | 706 | } |
702 | 707 | ||
@@ -925,7 +930,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags, | |||
925 | r = &type_b->regions[idx_b]; | 930 | r = &type_b->regions[idx_b]; |
926 | r_start = idx_b ? r[-1].base + r[-1].size : 0; | 931 | r_start = idx_b ? r[-1].base + r[-1].size : 0; |
927 | r_end = idx_b < type_b->cnt ? | 932 | r_end = idx_b < type_b->cnt ? |
928 | r->base : (phys_addr_t)ULLONG_MAX; | 933 | r->base : PHYS_ADDR_MAX; |
929 | 934 | ||
930 | /* | 935 | /* |
931 | * if idx_b advanced past idx_a, | 936 | * if idx_b advanced past idx_a, |
@@ -1041,7 +1046,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags, | |||
1041 | r = &type_b->regions[idx_b]; | 1046 | r = &type_b->regions[idx_b]; |
1042 | r_start = idx_b ? r[-1].base + r[-1].size : 0; | 1047 | r_start = idx_b ? r[-1].base + r[-1].size : 0; |
1043 | r_end = idx_b < type_b->cnt ? | 1048 | r_end = idx_b < type_b->cnt ? |
1044 | r->base : (phys_addr_t)ULLONG_MAX; | 1049 | r->base : PHYS_ADDR_MAX; |
1045 | /* | 1050 | /* |
1046 | * if idx_b advanced past idx_a, | 1051 | * if idx_b advanced past idx_a, |
1047 | * break out to advance idx_a | 1052 | * break out to advance idx_a |
@@ -1516,13 +1521,13 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void) | |||
1516 | 1521 | ||
1517 | static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) | 1522 | static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) |
1518 | { | 1523 | { |
1519 | phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; | 1524 | phys_addr_t max_addr = PHYS_ADDR_MAX; |
1520 | struct memblock_region *r; | 1525 | struct memblock_region *r; |
1521 | 1526 | ||
1522 | /* | 1527 | /* |
1523 | * translate the memory @limit size into the max address within one of | 1528 | * translate the memory @limit size into the max address within one of |
1524 | * the memory memblock regions, if the @limit exceeds the total size | 1529 | * the memory memblock regions, if the @limit exceeds the total size |
1525 | * of those regions, max_addr will keep original value ULLONG_MAX | 1530 | * of those regions, max_addr will keep original value PHYS_ADDR_MAX |
1526 | */ | 1531 | */ |
1527 | for_each_memblock(memory, r) { | 1532 | for_each_memblock(memory, r) { |
1528 | if (limit <= r->size) { | 1533 | if (limit <= r->size) { |
@@ -1537,7 +1542,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) | |||
1537 | 1542 | ||
1538 | void __init memblock_enforce_memory_limit(phys_addr_t limit) | 1543 | void __init memblock_enforce_memory_limit(phys_addr_t limit) |
1539 | { | 1544 | { |
1540 | phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; | 1545 | phys_addr_t max_addr = PHYS_ADDR_MAX; |
1541 | 1546 | ||
1542 | if (!limit) | 1547 | if (!limit) |
1543 | return; | 1548 | return; |
@@ -1545,14 +1550,14 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit) | |||
1545 | max_addr = __find_max_addr(limit); | 1550 | max_addr = __find_max_addr(limit); |
1546 | 1551 | ||
1547 | /* @limit exceeds the total size of the memory, do nothing */ | 1552 | /* @limit exceeds the total size of the memory, do nothing */ |
1548 | if (max_addr == (phys_addr_t)ULLONG_MAX) | 1553 | if (max_addr == PHYS_ADDR_MAX) |
1549 | return; | 1554 | return; |
1550 | 1555 | ||
1551 | /* truncate both memory and reserved regions */ | 1556 | /* truncate both memory and reserved regions */ |
1552 | memblock_remove_range(&memblock.memory, max_addr, | 1557 | memblock_remove_range(&memblock.memory, max_addr, |
1553 | (phys_addr_t)ULLONG_MAX); | 1558 | PHYS_ADDR_MAX); |
1554 | memblock_remove_range(&memblock.reserved, max_addr, | 1559 | memblock_remove_range(&memblock.reserved, max_addr, |
1555 | (phys_addr_t)ULLONG_MAX); | 1560 | PHYS_ADDR_MAX); |
1556 | } | 1561 | } |
1557 | 1562 | ||
1558 | void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) | 1563 | void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) |
@@ -1580,7 +1585,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) | |||
1580 | /* truncate the reserved regions */ | 1585 | /* truncate the reserved regions */ |
1581 | memblock_remove_range(&memblock.reserved, 0, base); | 1586 | memblock_remove_range(&memblock.reserved, 0, base); |
1582 | memblock_remove_range(&memblock.reserved, | 1587 | memblock_remove_range(&memblock.reserved, |
1583 | base + size, (phys_addr_t)ULLONG_MAX); | 1588 | base + size, PHYS_ADDR_MAX); |
1584 | } | 1589 | } |
1585 | 1590 | ||
1586 | void __init memblock_mem_limit_remove_map(phys_addr_t limit) | 1591 | void __init memblock_mem_limit_remove_map(phys_addr_t limit) |
@@ -1593,7 +1598,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit) | |||
1593 | max_addr = __find_max_addr(limit); | 1598 | max_addr = __find_max_addr(limit); |
1594 | 1599 | ||
1595 | /* @limit exceeds the total size of the memory, do nothing */ | 1600 | /* @limit exceeds the total size of the memory, do nothing */ |
1596 | if (max_addr == (phys_addr_t)ULLONG_MAX) | 1601 | if (max_addr == PHYS_ADDR_MAX) |
1597 | return; | 1602 | return; |
1598 | 1603 | ||
1599 | memblock_cap_memory_range(0, max_addr); | 1604 | memblock_cap_memory_range(0, max_addr); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1695f38630f1..c1e64d60ed02 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1034,13 +1034,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) | |||
1034 | unsigned long limit; | 1034 | unsigned long limit; |
1035 | 1035 | ||
1036 | count = page_counter_read(&memcg->memory); | 1036 | count = page_counter_read(&memcg->memory); |
1037 | limit = READ_ONCE(memcg->memory.limit); | 1037 | limit = READ_ONCE(memcg->memory.max); |
1038 | if (count < limit) | 1038 | if (count < limit) |
1039 | margin = limit - count; | 1039 | margin = limit - count; |
1040 | 1040 | ||
1041 | if (do_memsw_account()) { | 1041 | if (do_memsw_account()) { |
1042 | count = page_counter_read(&memcg->memsw); | 1042 | count = page_counter_read(&memcg->memsw); |
1043 | limit = READ_ONCE(memcg->memsw.limit); | 1043 | limit = READ_ONCE(memcg->memsw.max); |
1044 | if (count <= limit) | 1044 | if (count <= limit) |
1045 | margin = min(margin, limit - count); | 1045 | margin = min(margin, limit - count); |
1046 | else | 1046 | else |
@@ -1148,13 +1148,13 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1148 | 1148 | ||
1149 | pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", | 1149 | pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", |
1150 | K((u64)page_counter_read(&memcg->memory)), | 1150 | K((u64)page_counter_read(&memcg->memory)), |
1151 | K((u64)memcg->memory.limit), memcg->memory.failcnt); | 1151 | K((u64)memcg->memory.max), memcg->memory.failcnt); |
1152 | pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n", | 1152 | pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n", |
1153 | K((u64)page_counter_read(&memcg->memsw)), | 1153 | K((u64)page_counter_read(&memcg->memsw)), |
1154 | K((u64)memcg->memsw.limit), memcg->memsw.failcnt); | 1154 | K((u64)memcg->memsw.max), memcg->memsw.failcnt); |
1155 | pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n", | 1155 | pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n", |
1156 | K((u64)page_counter_read(&memcg->kmem)), | 1156 | K((u64)page_counter_read(&memcg->kmem)), |
1157 | K((u64)memcg->kmem.limit), memcg->kmem.failcnt); | 1157 | K((u64)memcg->kmem.max), memcg->kmem.failcnt); |
1158 | 1158 | ||
1159 | for_each_mem_cgroup_tree(iter, memcg) { | 1159 | for_each_mem_cgroup_tree(iter, memcg) { |
1160 | pr_info("Memory cgroup stats for "); | 1160 | pr_info("Memory cgroup stats for "); |
@@ -1179,21 +1179,21 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1179 | /* | 1179 | /* |
1180 | * Return the memory (and swap, if configured) limit for a memcg. | 1180 | * Return the memory (and swap, if configured) limit for a memcg. |
1181 | */ | 1181 | */ |
1182 | unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) | 1182 | unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) |
1183 | { | 1183 | { |
1184 | unsigned long limit; | 1184 | unsigned long max; |
1185 | 1185 | ||
1186 | limit = memcg->memory.limit; | 1186 | max = memcg->memory.max; |
1187 | if (mem_cgroup_swappiness(memcg)) { | 1187 | if (mem_cgroup_swappiness(memcg)) { |
1188 | unsigned long memsw_limit; | 1188 | unsigned long memsw_max; |
1189 | unsigned long swap_limit; | 1189 | unsigned long swap_max; |
1190 | 1190 | ||
1191 | memsw_limit = memcg->memsw.limit; | 1191 | memsw_max = memcg->memsw.max; |
1192 | swap_limit = memcg->swap.limit; | 1192 | swap_max = memcg->swap.max; |
1193 | swap_limit = min(swap_limit, (unsigned long)total_swap_pages); | 1193 | swap_max = min(swap_max, (unsigned long)total_swap_pages); |
1194 | limit = min(limit + swap_limit, memsw_limit); | 1194 | max = min(max + swap_max, memsw_max); |
1195 | } | 1195 | } |
1196 | return limit; | 1196 | return max; |
1197 | } | 1197 | } |
1198 | 1198 | ||
1199 | static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | 1199 | static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, |
@@ -2444,12 +2444,13 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
2444 | } | 2444 | } |
2445 | #endif | 2445 | #endif |
2446 | 2446 | ||
2447 | static DEFINE_MUTEX(memcg_limit_mutex); | 2447 | static DEFINE_MUTEX(memcg_max_mutex); |
2448 | 2448 | ||
2449 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | 2449 | static int mem_cgroup_resize_max(struct mem_cgroup *memcg, |
2450 | unsigned long limit, bool memsw) | 2450 | unsigned long max, bool memsw) |
2451 | { | 2451 | { |
2452 | bool enlarge = false; | 2452 | bool enlarge = false; |
2453 | bool drained = false; | ||
2453 | int ret; | 2454 | int ret; |
2454 | bool limits_invariant; | 2455 | bool limits_invariant; |
2455 | struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory; | 2456 | struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory; |
@@ -2460,26 +2461,32 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
2460 | break; | 2461 | break; |
2461 | } | 2462 | } |
2462 | 2463 | ||
2463 | mutex_lock(&memcg_limit_mutex); | 2464 | mutex_lock(&memcg_max_mutex); |
2464 | /* | 2465 | /* |
2465 | * Make sure that the new limit (memsw or memory limit) doesn't | 2466 | * Make sure that the new limit (memsw or memory limit) doesn't |
2466 | * break our basic invariant rule memory.limit <= memsw.limit. | 2467 | * break our basic invariant rule memory.max <= memsw.max. |
2467 | */ | 2468 | */ |
2468 | limits_invariant = memsw ? limit >= memcg->memory.limit : | 2469 | limits_invariant = memsw ? max >= memcg->memory.max : |
2469 | limit <= memcg->memsw.limit; | 2470 | max <= memcg->memsw.max; |
2470 | if (!limits_invariant) { | 2471 | if (!limits_invariant) { |
2471 | mutex_unlock(&memcg_limit_mutex); | 2472 | mutex_unlock(&memcg_max_mutex); |
2472 | ret = -EINVAL; | 2473 | ret = -EINVAL; |
2473 | break; | 2474 | break; |
2474 | } | 2475 | } |
2475 | if (limit > counter->limit) | 2476 | if (max > counter->max) |
2476 | enlarge = true; | 2477 | enlarge = true; |
2477 | ret = page_counter_limit(counter, limit); | 2478 | ret = page_counter_set_max(counter, max); |
2478 | mutex_unlock(&memcg_limit_mutex); | 2479 | mutex_unlock(&memcg_max_mutex); |
2479 | 2480 | ||
2480 | if (!ret) | 2481 | if (!ret) |
2481 | break; | 2482 | break; |
2482 | 2483 | ||
2484 | if (!drained) { | ||
2485 | drain_all_stock(memcg); | ||
2486 | drained = true; | ||
2487 | continue; | ||
2488 | } | ||
2489 | |||
2483 | if (!try_to_free_mem_cgroup_pages(memcg, 1, | 2490 | if (!try_to_free_mem_cgroup_pages(memcg, 1, |
2484 | GFP_KERNEL, !memsw)) { | 2491 | GFP_KERNEL, !memsw)) { |
2485 | ret = -EBUSY; | 2492 | ret = -EBUSY; |
@@ -2603,6 +2610,9 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) | |||
2603 | 2610 | ||
2604 | /* we call try-to-free pages for make this cgroup empty */ | 2611 | /* we call try-to-free pages for make this cgroup empty */ |
2605 | lru_add_drain_all(); | 2612 | lru_add_drain_all(); |
2613 | |||
2614 | drain_all_stock(memcg); | ||
2615 | |||
2606 | /* try to free all pages in this cgroup */ | 2616 | /* try to free all pages in this cgroup */ |
2607 | while (nr_retries && page_counter_read(&memcg->memory)) { | 2617 | while (nr_retries && page_counter_read(&memcg->memory)) { |
2608 | int progress; | 2618 | int progress; |
@@ -2757,7 +2767,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
2757 | return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE; | 2767 | return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE; |
2758 | return (u64)page_counter_read(counter) * PAGE_SIZE; | 2768 | return (u64)page_counter_read(counter) * PAGE_SIZE; |
2759 | case RES_LIMIT: | 2769 | case RES_LIMIT: |
2760 | return (u64)counter->limit * PAGE_SIZE; | 2770 | return (u64)counter->max * PAGE_SIZE; |
2761 | case RES_MAX_USAGE: | 2771 | case RES_MAX_USAGE: |
2762 | return (u64)counter->watermark * PAGE_SIZE; | 2772 | return (u64)counter->watermark * PAGE_SIZE; |
2763 | case RES_FAILCNT: | 2773 | case RES_FAILCNT: |
@@ -2871,24 +2881,24 @@ static void memcg_free_kmem(struct mem_cgroup *memcg) | |||
2871 | } | 2881 | } |
2872 | #endif /* !CONFIG_SLOB */ | 2882 | #endif /* !CONFIG_SLOB */ |
2873 | 2883 | ||
2874 | static int memcg_update_kmem_limit(struct mem_cgroup *memcg, | 2884 | static int memcg_update_kmem_max(struct mem_cgroup *memcg, |
2875 | unsigned long limit) | 2885 | unsigned long max) |
2876 | { | 2886 | { |
2877 | int ret; | 2887 | int ret; |
2878 | 2888 | ||
2879 | mutex_lock(&memcg_limit_mutex); | 2889 | mutex_lock(&memcg_max_mutex); |
2880 | ret = page_counter_limit(&memcg->kmem, limit); | 2890 | ret = page_counter_set_max(&memcg->kmem, max); |
2881 | mutex_unlock(&memcg_limit_mutex); | 2891 | mutex_unlock(&memcg_max_mutex); |
2882 | return ret; | 2892 | return ret; |
2883 | } | 2893 | } |
2884 | 2894 | ||
2885 | static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit) | 2895 | static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) |
2886 | { | 2896 | { |
2887 | int ret; | 2897 | int ret; |
2888 | 2898 | ||
2889 | mutex_lock(&memcg_limit_mutex); | 2899 | mutex_lock(&memcg_max_mutex); |
2890 | 2900 | ||
2891 | ret = page_counter_limit(&memcg->tcpmem, limit); | 2901 | ret = page_counter_set_max(&memcg->tcpmem, max); |
2892 | if (ret) | 2902 | if (ret) |
2893 | goto out; | 2903 | goto out; |
2894 | 2904 | ||
@@ -2913,7 +2923,7 @@ static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit) | |||
2913 | memcg->tcpmem_active = true; | 2923 | memcg->tcpmem_active = true; |
2914 | } | 2924 | } |
2915 | out: | 2925 | out: |
2916 | mutex_unlock(&memcg_limit_mutex); | 2926 | mutex_unlock(&memcg_max_mutex); |
2917 | return ret; | 2927 | return ret; |
2918 | } | 2928 | } |
2919 | 2929 | ||
@@ -2941,16 +2951,16 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, | |||
2941 | } | 2951 | } |
2942 | switch (MEMFILE_TYPE(of_cft(of)->private)) { | 2952 | switch (MEMFILE_TYPE(of_cft(of)->private)) { |
2943 | case _MEM: | 2953 | case _MEM: |
2944 | ret = mem_cgroup_resize_limit(memcg, nr_pages, false); | 2954 | ret = mem_cgroup_resize_max(memcg, nr_pages, false); |
2945 | break; | 2955 | break; |
2946 | case _MEMSWAP: | 2956 | case _MEMSWAP: |
2947 | ret = mem_cgroup_resize_limit(memcg, nr_pages, true); | 2957 | ret = mem_cgroup_resize_max(memcg, nr_pages, true); |
2948 | break; | 2958 | break; |
2949 | case _KMEM: | 2959 | case _KMEM: |
2950 | ret = memcg_update_kmem_limit(memcg, nr_pages); | 2960 | ret = memcg_update_kmem_max(memcg, nr_pages); |
2951 | break; | 2961 | break; |
2952 | case _TCP: | 2962 | case _TCP: |
2953 | ret = memcg_update_tcp_limit(memcg, nr_pages); | 2963 | ret = memcg_update_tcp_max(memcg, nr_pages); |
2954 | break; | 2964 | break; |
2955 | } | 2965 | } |
2956 | break; | 2966 | break; |
@@ -3083,7 +3093,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) | |||
3083 | #endif /* CONFIG_NUMA */ | 3093 | #endif /* CONFIG_NUMA */ |
3084 | 3094 | ||
3085 | /* Universal VM events cgroup1 shows, original sort order */ | 3095 | /* Universal VM events cgroup1 shows, original sort order */ |
3086 | unsigned int memcg1_events[] = { | 3096 | static const unsigned int memcg1_events[] = { |
3087 | PGPGIN, | 3097 | PGPGIN, |
3088 | PGPGOUT, | 3098 | PGPGOUT, |
3089 | PGFAULT, | 3099 | PGFAULT, |
@@ -3126,8 +3136,8 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3126 | /* Hierarchical information */ | 3136 | /* Hierarchical information */ |
3127 | memory = memsw = PAGE_COUNTER_MAX; | 3137 | memory = memsw = PAGE_COUNTER_MAX; |
3128 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) { | 3138 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) { |
3129 | memory = min(memory, mi->memory.limit); | 3139 | memory = min(memory, mi->memory.max); |
3130 | memsw = min(memsw, mi->memsw.limit); | 3140 | memsw = min(memsw, mi->memsw.max); |
3131 | } | 3141 | } |
3132 | seq_printf(m, "hierarchical_memory_limit %llu\n", | 3142 | seq_printf(m, "hierarchical_memory_limit %llu\n", |
3133 | (u64)memory * PAGE_SIZE); | 3143 | (u64)memory * PAGE_SIZE); |
@@ -3562,11 +3572,6 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, | |||
3562 | 3572 | ||
3563 | #ifdef CONFIG_CGROUP_WRITEBACK | 3573 | #ifdef CONFIG_CGROUP_WRITEBACK |
3564 | 3574 | ||
3565 | struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg) | ||
3566 | { | ||
3567 | return &memcg->cgwb_list; | ||
3568 | } | ||
3569 | |||
3570 | static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp) | 3575 | static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp) |
3571 | { | 3576 | { |
3572 | return wb_domain_init(&memcg->cgwb_domain, gfp); | 3577 | return wb_domain_init(&memcg->cgwb_domain, gfp); |
@@ -3626,7 +3631,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, | |||
3626 | *pheadroom = PAGE_COUNTER_MAX; | 3631 | *pheadroom = PAGE_COUNTER_MAX; |
3627 | 3632 | ||
3628 | while ((parent = parent_mem_cgroup(memcg))) { | 3633 | while ((parent = parent_mem_cgroup(memcg))) { |
3629 | unsigned long ceiling = min(memcg->memory.limit, memcg->high); | 3634 | unsigned long ceiling = min(memcg->memory.max, memcg->high); |
3630 | unsigned long used = page_counter_read(&memcg->memory); | 3635 | unsigned long used = page_counter_read(&memcg->memory); |
3631 | 3636 | ||
3632 | *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); | 3637 | *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); |
@@ -4270,7 +4275,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
4270 | } | 4275 | } |
4271 | spin_unlock(&memcg->event_list_lock); | 4276 | spin_unlock(&memcg->event_list_lock); |
4272 | 4277 | ||
4273 | memcg->low = 0; | 4278 | page_counter_set_min(&memcg->memory, 0); |
4279 | page_counter_set_low(&memcg->memory, 0); | ||
4274 | 4280 | ||
4275 | memcg_offline_kmem(memcg); | 4281 | memcg_offline_kmem(memcg); |
4276 | wb_memcg_offline(memcg); | 4282 | wb_memcg_offline(memcg); |
@@ -4319,12 +4325,13 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | |||
4319 | { | 4325 | { |
4320 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 4326 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
4321 | 4327 | ||
4322 | page_counter_limit(&memcg->memory, PAGE_COUNTER_MAX); | 4328 | page_counter_set_max(&memcg->memory, PAGE_COUNTER_MAX); |
4323 | page_counter_limit(&memcg->swap, PAGE_COUNTER_MAX); | 4329 | page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX); |
4324 | page_counter_limit(&memcg->memsw, PAGE_COUNTER_MAX); | 4330 | page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX); |
4325 | page_counter_limit(&memcg->kmem, PAGE_COUNTER_MAX); | 4331 | page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX); |
4326 | page_counter_limit(&memcg->tcpmem, PAGE_COUNTER_MAX); | 4332 | page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX); |
4327 | memcg->low = 0; | 4333 | page_counter_set_min(&memcg->memory, 0); |
4334 | page_counter_set_low(&memcg->memory, 0); | ||
4328 | memcg->high = PAGE_COUNTER_MAX; | 4335 | memcg->high = PAGE_COUNTER_MAX; |
4329 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4336 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4330 | memcg_wb_domain_size_changed(memcg); | 4337 | memcg_wb_domain_size_changed(memcg); |
@@ -5061,10 +5068,40 @@ static u64 memory_current_read(struct cgroup_subsys_state *css, | |||
5061 | return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE; | 5068 | return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE; |
5062 | } | 5069 | } |
5063 | 5070 | ||
5071 | static int memory_min_show(struct seq_file *m, void *v) | ||
5072 | { | ||
5073 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5074 | unsigned long min = READ_ONCE(memcg->memory.min); | ||
5075 | |||
5076 | if (min == PAGE_COUNTER_MAX) | ||
5077 | seq_puts(m, "max\n"); | ||
5078 | else | ||
5079 | seq_printf(m, "%llu\n", (u64)min * PAGE_SIZE); | ||
5080 | |||
5081 | return 0; | ||
5082 | } | ||
5083 | |||
5084 | static ssize_t memory_min_write(struct kernfs_open_file *of, | ||
5085 | char *buf, size_t nbytes, loff_t off) | ||
5086 | { | ||
5087 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5088 | unsigned long min; | ||
5089 | int err; | ||
5090 | |||
5091 | buf = strstrip(buf); | ||
5092 | err = page_counter_memparse(buf, "max", &min); | ||
5093 | if (err) | ||
5094 | return err; | ||
5095 | |||
5096 | page_counter_set_min(&memcg->memory, min); | ||
5097 | |||
5098 | return nbytes; | ||
5099 | } | ||
5100 | |||
5064 | static int memory_low_show(struct seq_file *m, void *v) | 5101 | static int memory_low_show(struct seq_file *m, void *v) |
5065 | { | 5102 | { |
5066 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | 5103 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5067 | unsigned long low = READ_ONCE(memcg->low); | 5104 | unsigned long low = READ_ONCE(memcg->memory.low); |
5068 | 5105 | ||
5069 | if (low == PAGE_COUNTER_MAX) | 5106 | if (low == PAGE_COUNTER_MAX) |
5070 | seq_puts(m, "max\n"); | 5107 | seq_puts(m, "max\n"); |
@@ -5086,7 +5123,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, | |||
5086 | if (err) | 5123 | if (err) |
5087 | return err; | 5124 | return err; |
5088 | 5125 | ||
5089 | memcg->low = low; | 5126 | page_counter_set_low(&memcg->memory, low); |
5090 | 5127 | ||
5091 | return nbytes; | 5128 | return nbytes; |
5092 | } | 5129 | } |
@@ -5131,7 +5168,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, | |||
5131 | static int memory_max_show(struct seq_file *m, void *v) | 5168 | static int memory_max_show(struct seq_file *m, void *v) |
5132 | { | 5169 | { |
5133 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | 5170 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5134 | unsigned long max = READ_ONCE(memcg->memory.limit); | 5171 | unsigned long max = READ_ONCE(memcg->memory.max); |
5135 | 5172 | ||
5136 | if (max == PAGE_COUNTER_MAX) | 5173 | if (max == PAGE_COUNTER_MAX) |
5137 | seq_puts(m, "max\n"); | 5174 | seq_puts(m, "max\n"); |
@@ -5155,7 +5192,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, | |||
5155 | if (err) | 5192 | if (err) |
5156 | return err; | 5193 | return err; |
5157 | 5194 | ||
5158 | xchg(&memcg->memory.limit, max); | 5195 | xchg(&memcg->memory.max, max); |
5159 | 5196 | ||
5160 | for (;;) { | 5197 | for (;;) { |
5161 | unsigned long nr_pages = page_counter_read(&memcg->memory); | 5198 | unsigned long nr_pages = page_counter_read(&memcg->memory); |
@@ -5296,6 +5333,12 @@ static struct cftype memory_files[] = { | |||
5296 | .read_u64 = memory_current_read, | 5333 | .read_u64 = memory_current_read, |
5297 | }, | 5334 | }, |
5298 | { | 5335 | { |
5336 | .name = "min", | ||
5337 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5338 | .seq_show = memory_min_show, | ||
5339 | .write = memory_min_write, | ||
5340 | }, | ||
5341 | { | ||
5299 | .name = "low", | 5342 | .name = "low", |
5300 | .flags = CFTYPE_NOT_ON_ROOT, | 5343 | .flags = CFTYPE_NOT_ON_ROOT, |
5301 | .seq_show = memory_low_show, | 5344 | .seq_show = memory_low_show, |
@@ -5344,54 +5387,140 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
5344 | }; | 5387 | }; |
5345 | 5388 | ||
5346 | /** | 5389 | /** |
5347 | * mem_cgroup_low - check if memory consumption is below the normal range | 5390 | * mem_cgroup_protected - check if memory consumption is in the normal range |
5348 | * @root: the top ancestor of the sub-tree being checked | 5391 | * @root: the top ancestor of the sub-tree being checked |
5349 | * @memcg: the memory cgroup to check | 5392 | * @memcg: the memory cgroup to check |
5350 | * | 5393 | * |
5351 | * Returns %true if memory consumption of @memcg, and that of all | 5394 | * WARNING: This function is not stateless! It can only be used as part |
5352 | * ancestors up to (but not including) @root, is below the normal range. | 5395 | * of a top-down tree iteration, not for isolated queries. |
5396 | * | ||
5397 | * Returns one of the following: | ||
5398 | * MEMCG_PROT_NONE: cgroup memory is not protected | ||
5399 | * MEMCG_PROT_LOW: cgroup memory is protected as long there is | ||
5400 | * an unprotected supply of reclaimable memory from other cgroups. | ||
5401 | * MEMCG_PROT_MIN: cgroup memory is protected | ||
5353 | * | 5402 | * |
5354 | * @root is exclusive; it is never low when looked at directly and isn't | 5403 | * @root is exclusive; it is never protected when looked at directly |
5355 | * checked when traversing the hierarchy. | ||
5356 | * | 5404 | * |
5357 | * Excluding @root enables using memory.low to prioritize memory usage | 5405 | * To provide a proper hierarchical behavior, effective memory.min/low values |
5358 | * between cgroups within a subtree of the hierarchy that is limited by | 5406 | * are used. Below is the description of how effective memory.low is calculated. |
5359 | * memory.high or memory.max. | 5407 | * Effective memory.min values is calculated in the same way. |
5360 | * | 5408 | * |
5361 | * For example, given cgroup A with children B and C: | 5409 | * Effective memory.low is always equal or less than the original memory.low. |
5410 | * If there is no memory.low overcommittment (which is always true for | ||
5411 | * top-level memory cgroups), these two values are equal. | ||
5412 | * Otherwise, it's a part of parent's effective memory.low, | ||
5413 | * calculated as a cgroup's memory.low usage divided by sum of sibling's | ||
5414 | * memory.low usages, where memory.low usage is the size of actually | ||
5415 | * protected memory. | ||
5362 | * | 5416 | * |
5363 | * A | 5417 | * low_usage |
5364 | * / \ | 5418 | * elow = min( memory.low, parent->elow * ------------------ ), |
5365 | * B C | 5419 | * siblings_low_usage |
5366 | * | 5420 | * |
5367 | * and | 5421 | * | memory.current, if memory.current < memory.low |
5422 | * low_usage = | | ||
5423 | | 0, otherwise. | ||
5368 | * | 5424 | * |
5369 | * 1. A/memory.current > A/memory.high | ||
5370 | * 2. A/B/memory.current < A/B/memory.low | ||
5371 | * 3. A/C/memory.current >= A/C/memory.low | ||
5372 | * | 5425 | * |
5373 | * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we | 5426 | * Such definition of the effective memory.low provides the expected |
5374 | * should reclaim from 'C' until 'A' is no longer high or until we can | 5427 | * hierarchical behavior: parent's memory.low value is limiting |
5375 | * no longer reclaim from 'C'. If 'A', i.e. @root, isn't excluded by | 5428 | * children, unprotected memory is reclaimed first and cgroups, |
5376 | * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered | 5429 | * which are not using their guarantee do not affect actual memory |
5377 | * low and we will reclaim indiscriminately from both 'B' and 'C'. | 5430 | * distribution. |
5431 | * | ||
5432 | * For example, if there are memcgs A, A/B, A/C, A/D and A/E: | ||
5433 | * | ||
5434 | * A A/memory.low = 2G, A/memory.current = 6G | ||
5435 | * //\\ | ||
5436 | * BC DE B/memory.low = 3G B/memory.current = 2G | ||
5437 | * C/memory.low = 1G C/memory.current = 2G | ||
5438 | * D/memory.low = 0 D/memory.current = 2G | ||
5439 | * E/memory.low = 10G E/memory.current = 0 | ||
5440 | * | ||
5441 | * and the memory pressure is applied, the following memory distribution | ||
5442 | * is expected (approximately): | ||
5443 | * | ||
5444 | * A/memory.current = 2G | ||
5445 | * | ||
5446 | * B/memory.current = 1.3G | ||
5447 | * C/memory.current = 0.6G | ||
5448 | * D/memory.current = 0 | ||
5449 | * E/memory.current = 0 | ||
5450 | * | ||
5451 | * These calculations require constant tracking of the actual low usages | ||
5452 | * (see propagate_protected_usage()), as well as recursive calculation of | ||
5453 | * effective memory.low values. But as we do call mem_cgroup_protected() | ||
5454 | * path for each memory cgroup top-down from the reclaim, | ||
5455 | * it's possible to optimize this part, and save calculated elow | ||
5456 | * for next usage. This part is intentionally racy, but it's ok, | ||
5457 | * as memory.low is a best-effort mechanism. | ||
5378 | */ | 5458 | */ |
5379 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) | 5459 | enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, |
5460 | struct mem_cgroup *memcg) | ||
5380 | { | 5461 | { |
5462 | struct mem_cgroup *parent; | ||
5463 | unsigned long emin, parent_emin; | ||
5464 | unsigned long elow, parent_elow; | ||
5465 | unsigned long usage; | ||
5466 | |||
5381 | if (mem_cgroup_disabled()) | 5467 | if (mem_cgroup_disabled()) |
5382 | return false; | 5468 | return MEMCG_PROT_NONE; |
5383 | 5469 | ||
5384 | if (!root) | 5470 | if (!root) |
5385 | root = root_mem_cgroup; | 5471 | root = root_mem_cgroup; |
5386 | if (memcg == root) | 5472 | if (memcg == root) |
5387 | return false; | 5473 | return MEMCG_PROT_NONE; |
5474 | |||
5475 | usage = page_counter_read(&memcg->memory); | ||
5476 | if (!usage) | ||
5477 | return MEMCG_PROT_NONE; | ||
5478 | |||
5479 | emin = memcg->memory.min; | ||
5480 | elow = memcg->memory.low; | ||
5481 | |||
5482 | parent = parent_mem_cgroup(memcg); | ||
5483 | if (parent == root) | ||
5484 | goto exit; | ||
5388 | 5485 | ||
5389 | for (; memcg != root; memcg = parent_mem_cgroup(memcg)) { | 5486 | parent_emin = READ_ONCE(parent->memory.emin); |
5390 | if (page_counter_read(&memcg->memory) >= memcg->low) | 5487 | emin = min(emin, parent_emin); |
5391 | return false; | 5488 | if (emin && parent_emin) { |
5489 | unsigned long min_usage, siblings_min_usage; | ||
5490 | |||
5491 | min_usage = min(usage, memcg->memory.min); | ||
5492 | siblings_min_usage = atomic_long_read( | ||
5493 | &parent->memory.children_min_usage); | ||
5494 | |||
5495 | if (min_usage && siblings_min_usage) | ||
5496 | emin = min(emin, parent_emin * min_usage / | ||
5497 | siblings_min_usage); | ||
5392 | } | 5498 | } |
5393 | 5499 | ||
5394 | return true; | 5500 | parent_elow = READ_ONCE(parent->memory.elow); |
5501 | elow = min(elow, parent_elow); | ||
5502 | if (elow && parent_elow) { | ||
5503 | unsigned long low_usage, siblings_low_usage; | ||
5504 | |||
5505 | low_usage = min(usage, memcg->memory.low); | ||
5506 | siblings_low_usage = atomic_long_read( | ||
5507 | &parent->memory.children_low_usage); | ||
5508 | |||
5509 | if (low_usage && siblings_low_usage) | ||
5510 | elow = min(elow, parent_elow * low_usage / | ||
5511 | siblings_low_usage); | ||
5512 | } | ||
5513 | |||
5514 | exit: | ||
5515 | memcg->memory.emin = emin; | ||
5516 | memcg->memory.elow = elow; | ||
5517 | |||
5518 | if (usage <= emin) | ||
5519 | return MEMCG_PROT_MIN; | ||
5520 | else if (usage <= elow) | ||
5521 | return MEMCG_PROT_LOW; | ||
5522 | else | ||
5523 | return MEMCG_PROT_NONE; | ||
5395 | } | 5524 | } |
5396 | 5525 | ||
5397 | /** | 5526 | /** |
@@ -6012,10 +6141,17 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) | |||
6012 | if (!memcg) | 6141 | if (!memcg) |
6013 | return 0; | 6142 | return 0; |
6014 | 6143 | ||
6144 | if (!entry.val) { | ||
6145 | memcg_memory_event(memcg, MEMCG_SWAP_FAIL); | ||
6146 | return 0; | ||
6147 | } | ||
6148 | |||
6015 | memcg = mem_cgroup_id_get_online(memcg); | 6149 | memcg = mem_cgroup_id_get_online(memcg); |
6016 | 6150 | ||
6017 | if (!mem_cgroup_is_root(memcg) && | 6151 | if (!mem_cgroup_is_root(memcg) && |
6018 | !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { | 6152 | !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { |
6153 | memcg_memory_event(memcg, MEMCG_SWAP_MAX); | ||
6154 | memcg_memory_event(memcg, MEMCG_SWAP_FAIL); | ||
6019 | mem_cgroup_id_put(memcg); | 6155 | mem_cgroup_id_put(memcg); |
6020 | return -ENOMEM; | 6156 | return -ENOMEM; |
6021 | } | 6157 | } |
@@ -6067,7 +6203,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) | |||
6067 | return nr_swap_pages; | 6203 | return nr_swap_pages; |
6068 | for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) | 6204 | for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) |
6069 | nr_swap_pages = min_t(long, nr_swap_pages, | 6205 | nr_swap_pages = min_t(long, nr_swap_pages, |
6070 | READ_ONCE(memcg->swap.limit) - | 6206 | READ_ONCE(memcg->swap.max) - |
6071 | page_counter_read(&memcg->swap)); | 6207 | page_counter_read(&memcg->swap)); |
6072 | return nr_swap_pages; | 6208 | return nr_swap_pages; |
6073 | } | 6209 | } |
@@ -6088,7 +6224,7 @@ bool mem_cgroup_swap_full(struct page *page) | |||
6088 | return false; | 6224 | return false; |
6089 | 6225 | ||
6090 | for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) | 6226 | for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) |
6091 | if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.limit) | 6227 | if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.max) |
6092 | return true; | 6228 | return true; |
6093 | 6229 | ||
6094 | return false; | 6230 | return false; |
@@ -6122,7 +6258,7 @@ static u64 swap_current_read(struct cgroup_subsys_state *css, | |||
6122 | static int swap_max_show(struct seq_file *m, void *v) | 6258 | static int swap_max_show(struct seq_file *m, void *v) |
6123 | { | 6259 | { |
6124 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | 6260 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
6125 | unsigned long max = READ_ONCE(memcg->swap.limit); | 6261 | unsigned long max = READ_ONCE(memcg->swap.max); |
6126 | 6262 | ||
6127 | if (max == PAGE_COUNTER_MAX) | 6263 | if (max == PAGE_COUNTER_MAX) |
6128 | seq_puts(m, "max\n"); | 6264 | seq_puts(m, "max\n"); |
@@ -6144,15 +6280,23 @@ static ssize_t swap_max_write(struct kernfs_open_file *of, | |||
6144 | if (err) | 6280 | if (err) |
6145 | return err; | 6281 | return err; |
6146 | 6282 | ||
6147 | mutex_lock(&memcg_limit_mutex); | 6283 | xchg(&memcg->swap.max, max); |
6148 | err = page_counter_limit(&memcg->swap, max); | ||
6149 | mutex_unlock(&memcg_limit_mutex); | ||
6150 | if (err) | ||
6151 | return err; | ||
6152 | 6284 | ||
6153 | return nbytes; | 6285 | return nbytes; |
6154 | } | 6286 | } |
6155 | 6287 | ||
6288 | static int swap_events_show(struct seq_file *m, void *v) | ||
6289 | { | ||
6290 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
6291 | |||
6292 | seq_printf(m, "max %lu\n", | ||
6293 | atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX])); | ||
6294 | seq_printf(m, "fail %lu\n", | ||
6295 | atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL])); | ||
6296 | |||
6297 | return 0; | ||
6298 | } | ||
6299 | |||
6156 | static struct cftype swap_files[] = { | 6300 | static struct cftype swap_files[] = { |
6157 | { | 6301 | { |
6158 | .name = "swap.current", | 6302 | .name = "swap.current", |
@@ -6165,6 +6309,12 @@ static struct cftype swap_files[] = { | |||
6165 | .seq_show = swap_max_show, | 6309 | .seq_show = swap_max_show, |
6166 | .write = swap_max_write, | 6310 | .write = swap_max_write, |
6167 | }, | 6311 | }, |
6312 | { | ||
6313 | .name = "swap.events", | ||
6314 | .flags = CFTYPE_NOT_ON_ROOT, | ||
6315 | .file_offset = offsetof(struct mem_cgroup, swap_events_file), | ||
6316 | .seq_show = swap_events_show, | ||
6317 | }, | ||
6168 | { } /* terminate */ | 6318 | { } /* terminate */ |
6169 | }; | 6319 | }; |
6170 | 6320 | ||
diff --git a/mm/memfd.c b/mm/memfd.c new file mode 100644 index 000000000000..27069518e3c5 --- /dev/null +++ b/mm/memfd.c | |||
@@ -0,0 +1,345 @@ | |||
1 | /* | ||
2 | * memfd_create system call and file sealing support | ||
3 | * | ||
4 | * Code was originally included in shmem.c, and broken out to facilitate | ||
5 | * use by hugetlbfs as well as tmpfs. | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/vfs.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/sched/signal.h> | ||
16 | #include <linux/khugepaged.h> | ||
17 | #include <linux/syscalls.h> | ||
18 | #include <linux/hugetlb.h> | ||
19 | #include <linux/shmem_fs.h> | ||
20 | #include <linux/memfd.h> | ||
21 | #include <uapi/linux/memfd.h> | ||
22 | |||
23 | /* | ||
24 | * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, | ||
25 | * so reuse a tag which we firmly believe is never set or cleared on tmpfs | ||
26 | * or hugetlbfs because they are memory only filesystems. | ||
27 | */ | ||
28 | #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE | ||
29 | #define LAST_SCAN 4 /* about 150ms max */ | ||
30 | |||
31 | static void memfd_tag_pins(struct address_space *mapping) | ||
32 | { | ||
33 | struct radix_tree_iter iter; | ||
34 | void __rcu **slot; | ||
35 | pgoff_t start; | ||
36 | struct page *page; | ||
37 | |||
38 | lru_add_drain(); | ||
39 | start = 0; | ||
40 | rcu_read_lock(); | ||
41 | |||
42 | radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { | ||
43 | page = radix_tree_deref_slot(slot); | ||
44 | if (!page || radix_tree_exception(page)) { | ||
45 | if (radix_tree_deref_retry(page)) { | ||
46 | slot = radix_tree_iter_retry(&iter); | ||
47 | continue; | ||
48 | } | ||
49 | } else if (page_count(page) - page_mapcount(page) > 1) { | ||
50 | xa_lock_irq(&mapping->i_pages); | ||
51 | radix_tree_tag_set(&mapping->i_pages, iter.index, | ||
52 | MEMFD_TAG_PINNED); | ||
53 | xa_unlock_irq(&mapping->i_pages); | ||
54 | } | ||
55 | |||
56 | if (need_resched()) { | ||
57 | slot = radix_tree_iter_resume(slot, &iter); | ||
58 | cond_resched_rcu(); | ||
59 | } | ||
60 | } | ||
61 | rcu_read_unlock(); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Setting SEAL_WRITE requires us to verify there's no pending writer. However, | ||
66 | * via get_user_pages(), drivers might have some pending I/O without any active | ||
67 | * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages | ||
68 | * and see whether it has an elevated ref-count. If so, we tag them and wait for | ||
69 | * them to be dropped. | ||
70 | * The caller must guarantee that no new user will acquire writable references | ||
71 | * to those pages to avoid races. | ||
72 | */ | ||
73 | static int memfd_wait_for_pins(struct address_space *mapping) | ||
74 | { | ||
75 | struct radix_tree_iter iter; | ||
76 | void __rcu **slot; | ||
77 | pgoff_t start; | ||
78 | struct page *page; | ||
79 | int error, scan; | ||
80 | |||
81 | memfd_tag_pins(mapping); | ||
82 | |||
83 | error = 0; | ||
84 | for (scan = 0; scan <= LAST_SCAN; scan++) { | ||
85 | if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED)) | ||
86 | break; | ||
87 | |||
88 | if (!scan) | ||
89 | lru_add_drain_all(); | ||
90 | else if (schedule_timeout_killable((HZ << scan) / 200)) | ||
91 | scan = LAST_SCAN; | ||
92 | |||
93 | start = 0; | ||
94 | rcu_read_lock(); | ||
95 | radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, | ||
96 | start, MEMFD_TAG_PINNED) { | ||
97 | |||
98 | page = radix_tree_deref_slot(slot); | ||
99 | if (radix_tree_exception(page)) { | ||
100 | if (radix_tree_deref_retry(page)) { | ||
101 | slot = radix_tree_iter_retry(&iter); | ||
102 | continue; | ||
103 | } | ||
104 | |||
105 | page = NULL; | ||
106 | } | ||
107 | |||
108 | if (page && | ||
109 | page_count(page) - page_mapcount(page) != 1) { | ||
110 | if (scan < LAST_SCAN) | ||
111 | goto continue_resched; | ||
112 | |||
113 | /* | ||
114 | * On the last scan, we clean up all those tags | ||
115 | * we inserted; but make a note that we still | ||
116 | * found pages pinned. | ||
117 | */ | ||
118 | error = -EBUSY; | ||
119 | } | ||
120 | |||
121 | xa_lock_irq(&mapping->i_pages); | ||
122 | radix_tree_tag_clear(&mapping->i_pages, | ||
123 | iter.index, MEMFD_TAG_PINNED); | ||
124 | xa_unlock_irq(&mapping->i_pages); | ||
125 | continue_resched: | ||
126 | if (need_resched()) { | ||
127 | slot = radix_tree_iter_resume(slot, &iter); | ||
128 | cond_resched_rcu(); | ||
129 | } | ||
130 | } | ||
131 | rcu_read_unlock(); | ||
132 | } | ||
133 | |||
134 | return error; | ||
135 | } | ||
136 | |||
137 | static unsigned int *memfd_file_seals_ptr(struct file *file) | ||
138 | { | ||
139 | if (shmem_file(file)) | ||
140 | return &SHMEM_I(file_inode(file))->seals; | ||
141 | |||
142 | #ifdef CONFIG_HUGETLBFS | ||
143 | if (is_file_hugepages(file)) | ||
144 | return &HUGETLBFS_I(file_inode(file))->seals; | ||
145 | #endif | ||
146 | |||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | #define F_ALL_SEALS (F_SEAL_SEAL | \ | ||
151 | F_SEAL_SHRINK | \ | ||
152 | F_SEAL_GROW | \ | ||
153 | F_SEAL_WRITE) | ||
154 | |||
155 | static int memfd_add_seals(struct file *file, unsigned int seals) | ||
156 | { | ||
157 | struct inode *inode = file_inode(file); | ||
158 | unsigned int *file_seals; | ||
159 | int error; | ||
160 | |||
161 | /* | ||
162 | * SEALING | ||
163 | * Sealing allows multiple parties to share a tmpfs or hugetlbfs file | ||
164 | * but restrict access to a specific subset of file operations. Seals | ||
165 | * can only be added, but never removed. This way, mutually untrusted | ||
166 | * parties can share common memory regions with a well-defined policy. | ||
167 | * A malicious peer can thus never perform unwanted operations on a | ||
168 | * shared object. | ||
169 | * | ||
170 | * Seals are only supported on special tmpfs or hugetlbfs files and | ||
171 | * always affect the whole underlying inode. Once a seal is set, it | ||
172 | * may prevent some kinds of access to the file. Currently, the | ||
173 | * following seals are defined: | ||
174 | * SEAL_SEAL: Prevent further seals from being set on this file | ||
175 | * SEAL_SHRINK: Prevent the file from shrinking | ||
176 | * SEAL_GROW: Prevent the file from growing | ||
177 | * SEAL_WRITE: Prevent write access to the file | ||
178 | * | ||
179 | * As we don't require any trust relationship between two parties, we | ||
180 | * must prevent seals from being removed. Therefore, sealing a file | ||
181 | * only adds a given set of seals to the file, it never touches | ||
182 | * existing seals. Furthermore, the "setting seals"-operation can be | ||
183 | * sealed itself, which basically prevents any further seal from being | ||
184 | * added. | ||
185 | * | ||
186 | * Semantics of sealing are only defined on volatile files. Only | ||
187 | * anonymous tmpfs and hugetlbfs files support sealing. More | ||
188 | * importantly, seals are never written to disk. Therefore, there's | ||
189 | * no plan to support it on other file types. | ||
190 | */ | ||
191 | |||
192 | if (!(file->f_mode & FMODE_WRITE)) | ||
193 | return -EPERM; | ||
194 | if (seals & ~(unsigned int)F_ALL_SEALS) | ||
195 | return -EINVAL; | ||
196 | |||
197 | inode_lock(inode); | ||
198 | |||
199 | file_seals = memfd_file_seals_ptr(file); | ||
200 | if (!file_seals) { | ||
201 | error = -EINVAL; | ||
202 | goto unlock; | ||
203 | } | ||
204 | |||
205 | if (*file_seals & F_SEAL_SEAL) { | ||
206 | error = -EPERM; | ||
207 | goto unlock; | ||
208 | } | ||
209 | |||
210 | if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) { | ||
211 | error = mapping_deny_writable(file->f_mapping); | ||
212 | if (error) | ||
213 | goto unlock; | ||
214 | |||
215 | error = memfd_wait_for_pins(file->f_mapping); | ||
216 | if (error) { | ||
217 | mapping_allow_writable(file->f_mapping); | ||
218 | goto unlock; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | *file_seals |= seals; | ||
223 | error = 0; | ||
224 | |||
225 | unlock: | ||
226 | inode_unlock(inode); | ||
227 | return error; | ||
228 | } | ||
229 | |||
230 | static int memfd_get_seals(struct file *file) | ||
231 | { | ||
232 | unsigned int *seals = memfd_file_seals_ptr(file); | ||
233 | |||
234 | return seals ? *seals : -EINVAL; | ||
235 | } | ||
236 | |||
237 | long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | ||
238 | { | ||
239 | long error; | ||
240 | |||
241 | switch (cmd) { | ||
242 | case F_ADD_SEALS: | ||
243 | /* disallow upper 32bit */ | ||
244 | if (arg > UINT_MAX) | ||
245 | return -EINVAL; | ||
246 | |||
247 | error = memfd_add_seals(file, arg); | ||
248 | break; | ||
249 | case F_GET_SEALS: | ||
250 | error = memfd_get_seals(file); | ||
251 | break; | ||
252 | default: | ||
253 | error = -EINVAL; | ||
254 | break; | ||
255 | } | ||
256 | |||
257 | return error; | ||
258 | } | ||
259 | |||
260 | #define MFD_NAME_PREFIX "memfd:" | ||
261 | #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) | ||
262 | #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) | ||
263 | |||
264 | #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) | ||
265 | |||
266 | SYSCALL_DEFINE2(memfd_create, | ||
267 | const char __user *, uname, | ||
268 | unsigned int, flags) | ||
269 | { | ||
270 | unsigned int *file_seals; | ||
271 | struct file *file; | ||
272 | int fd, error; | ||
273 | char *name; | ||
274 | long len; | ||
275 | |||
276 | if (!(flags & MFD_HUGETLB)) { | ||
277 | if (flags & ~(unsigned int)MFD_ALL_FLAGS) | ||
278 | return -EINVAL; | ||
279 | } else { | ||
280 | /* Allow huge page size encoding in flags. */ | ||
281 | if (flags & ~(unsigned int)(MFD_ALL_FLAGS | | ||
282 | (MFD_HUGE_MASK << MFD_HUGE_SHIFT))) | ||
283 | return -EINVAL; | ||
284 | } | ||
285 | |||
286 | /* length includes terminating zero */ | ||
287 | len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); | ||
288 | if (len <= 0) | ||
289 | return -EFAULT; | ||
290 | if (len > MFD_NAME_MAX_LEN + 1) | ||
291 | return -EINVAL; | ||
292 | |||
293 | name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL); | ||
294 | if (!name) | ||
295 | return -ENOMEM; | ||
296 | |||
297 | strcpy(name, MFD_NAME_PREFIX); | ||
298 | if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) { | ||
299 | error = -EFAULT; | ||
300 | goto err_name; | ||
301 | } | ||
302 | |||
303 | /* terminating-zero may have changed after strnlen_user() returned */ | ||
304 | if (name[len + MFD_NAME_PREFIX_LEN - 1]) { | ||
305 | error = -EFAULT; | ||
306 | goto err_name; | ||
307 | } | ||
308 | |||
309 | fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); | ||
310 | if (fd < 0) { | ||
311 | error = fd; | ||
312 | goto err_name; | ||
313 | } | ||
314 | |||
315 | if (flags & MFD_HUGETLB) { | ||
316 | struct user_struct *user = NULL; | ||
317 | |||
318 | file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, | ||
319 | HUGETLB_ANONHUGE_INODE, | ||
320 | (flags >> MFD_HUGE_SHIFT) & | ||
321 | MFD_HUGE_MASK); | ||
322 | } else | ||
323 | file = shmem_file_setup(name, 0, VM_NORESERVE); | ||
324 | if (IS_ERR(file)) { | ||
325 | error = PTR_ERR(file); | ||
326 | goto err_fd; | ||
327 | } | ||
328 | file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; | ||
329 | file->f_flags |= O_RDWR | O_LARGEFILE; | ||
330 | |||
331 | if (flags & MFD_ALLOW_SEALING) { | ||
332 | file_seals = memfd_file_seals_ptr(file); | ||
333 | *file_seals &= ~F_SEAL_SEAL; | ||
334 | } | ||
335 | |||
336 | fd_install(fd, file); | ||
337 | kfree(name); | ||
338 | return fd; | ||
339 | |||
340 | err_fd: | ||
341 | put_unused_fd(fd); | ||
342 | err_name: | ||
343 | kfree(name); | ||
344 | return error; | ||
345 | } | ||
diff --git a/mm/memory.c b/mm/memory.c index 5d8c2afb0730..7206a634270b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, | |||
817 | * PFNMAP mappings in order to support COWable mappings. | 817 | * PFNMAP mappings in order to support COWable mappings. |
818 | * | 818 | * |
819 | */ | 819 | */ |
820 | #ifdef __HAVE_ARCH_PTE_SPECIAL | ||
821 | # define HAVE_PTE_SPECIAL 1 | ||
822 | #else | ||
823 | # define HAVE_PTE_SPECIAL 0 | ||
824 | #endif | ||
825 | struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | 820 | struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
826 | pte_t pte, bool with_public_device) | 821 | pte_t pte, bool with_public_device) |
827 | { | 822 | { |
828 | unsigned long pfn = pte_pfn(pte); | 823 | unsigned long pfn = pte_pfn(pte); |
829 | 824 | ||
830 | if (HAVE_PTE_SPECIAL) { | 825 | if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { |
831 | if (likely(!pte_special(pte))) | 826 | if (likely(!pte_special(pte))) |
832 | goto check_pfn; | 827 | goto check_pfn; |
833 | if (vma->vm_ops && vma->vm_ops->find_special_page) | 828 | if (vma->vm_ops && vma->vm_ops->find_special_page) |
@@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
862 | return NULL; | 857 | return NULL; |
863 | } | 858 | } |
864 | 859 | ||
865 | /* !HAVE_PTE_SPECIAL case follows: */ | 860 | /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */ |
866 | 861 | ||
867 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { | 862 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { |
868 | if (vma->vm_flags & VM_MIXEDMAP) { | 863 | if (vma->vm_flags & VM_MIXEDMAP) { |
@@ -881,6 +876,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
881 | 876 | ||
882 | if (is_zero_pfn(pfn)) | 877 | if (is_zero_pfn(pfn)) |
883 | return NULL; | 878 | return NULL; |
879 | |||
884 | check_pfn: | 880 | check_pfn: |
885 | if (unlikely(pfn > highest_memmap_pfn)) { | 881 | if (unlikely(pfn > highest_memmap_pfn)) { |
886 | print_bad_pte(vma, addr, pte, NULL); | 882 | print_bad_pte(vma, addr, pte, NULL); |
@@ -904,7 +900,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, | |||
904 | /* | 900 | /* |
905 | * There is no pmd_special() but there may be special pmds, e.g. | 901 | * There is no pmd_special() but there may be special pmds, e.g. |
906 | * in a direct-access (dax) mapping, so let's just replicate the | 902 | * in a direct-access (dax) mapping, so let's just replicate the |
907 | * !HAVE_PTE_SPECIAL case from vm_normal_page() here. | 903 | * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here. |
908 | */ | 904 | */ |
909 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { | 905 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { |
910 | if (vma->vm_flags & VM_MIXEDMAP) { | 906 | if (vma->vm_flags & VM_MIXEDMAP) { |
@@ -1932,7 +1928,8 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | |||
1932 | * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP | 1928 | * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP |
1933 | * without pte special, it would there be refcounted as a normal page. | 1929 | * without pte special, it would there be refcounted as a normal page. |
1934 | */ | 1930 | */ |
1935 | if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { | 1931 | if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && |
1932 | !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { | ||
1936 | struct page *page; | 1933 | struct page *page; |
1937 | 1934 | ||
1938 | /* | 1935 | /* |
@@ -1954,12 +1951,25 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | |||
1954 | } | 1951 | } |
1955 | EXPORT_SYMBOL(vm_insert_mixed); | 1952 | EXPORT_SYMBOL(vm_insert_mixed); |
1956 | 1953 | ||
1957 | int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, | 1954 | /* |
1958 | pfn_t pfn) | 1955 | * If the insertion of PTE failed because someone else already added a |
1956 | * different entry in the mean time, we treat that as success as we assume | ||
1957 | * the same entry was actually inserted. | ||
1958 | */ | ||
1959 | |||
1960 | vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, | ||
1961 | unsigned long addr, pfn_t pfn) | ||
1959 | { | 1962 | { |
1960 | return __vm_insert_mixed(vma, addr, pfn, true); | 1963 | int err; |
1964 | |||
1965 | err = __vm_insert_mixed(vma, addr, pfn, true); | ||
1966 | if (err == -ENOMEM) | ||
1967 | return VM_FAULT_OOM; | ||
1968 | if (err < 0 && err != -EBUSY) | ||
1969 | return VM_FAULT_SIGBUS; | ||
1970 | return VM_FAULT_NOPAGE; | ||
1961 | } | 1971 | } |
1962 | EXPORT_SYMBOL(vm_insert_mixed_mkwrite); | 1972 | EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); |
1963 | 1973 | ||
1964 | /* | 1974 | /* |
1965 | * maps a range of physical memory into the requested pages. the old | 1975 | * maps a range of physical memory into the requested pages. the old |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 25982467800b..7deb49f69e27 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1237,6 +1237,29 @@ static struct page *next_active_pageblock(struct page *page) | |||
1237 | return page + pageblock_nr_pages; | 1237 | return page + pageblock_nr_pages; |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static bool is_pageblock_removable_nolock(struct page *page) | ||
1241 | { | ||
1242 | struct zone *zone; | ||
1243 | unsigned long pfn; | ||
1244 | |||
1245 | /* | ||
1246 | * We have to be careful here because we are iterating over memory | ||
1247 | * sections which are not zone aware so we might end up outside of | ||
1248 | * the zone but still within the section. | ||
1249 | * We have to take care about the node as well. If the node is offline | ||
1250 | * its NODE_DATA will be NULL - see page_zone. | ||
1251 | */ | ||
1252 | if (!node_online(page_to_nid(page))) | ||
1253 | return false; | ||
1254 | |||
1255 | zone = page_zone(page); | ||
1256 | pfn = page_to_pfn(page); | ||
1257 | if (!zone_spans_pfn(zone, pfn)) | ||
1258 | return false; | ||
1259 | |||
1260 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); | ||
1261 | } | ||
1262 | |||
1240 | /* Checks if this range of memory is likely to be hot-removable. */ | 1263 | /* Checks if this range of memory is likely to be hot-removable. */ |
1241 | bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) | 1264 | bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) |
1242 | { | 1265 | { |
@@ -3277,7 +3277,7 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) | |||
3277 | mm->data_vm += npages; | 3277 | mm->data_vm += npages; |
3278 | } | 3278 | } |
3279 | 3279 | ||
3280 | static int special_mapping_fault(struct vm_fault *vmf); | 3280 | static vm_fault_t special_mapping_fault(struct vm_fault *vmf); |
3281 | 3281 | ||
3282 | /* | 3282 | /* |
3283 | * Having a close hook prevents vma merging regardless of flags. | 3283 | * Having a close hook prevents vma merging regardless of flags. |
@@ -3316,7 +3316,7 @@ static const struct vm_operations_struct legacy_special_mapping_vmops = { | |||
3316 | .fault = special_mapping_fault, | 3316 | .fault = special_mapping_fault, |
3317 | }; | 3317 | }; |
3318 | 3318 | ||
3319 | static int special_mapping_fault(struct vm_fault *vmf) | 3319 | static vm_fault_t special_mapping_fault(struct vm_fault *vmf) |
3320 | { | 3320 | { |
3321 | struct vm_area_struct *vma = vmf->vma; | 3321 | struct vm_area_struct *vma = vmf->vma; |
3322 | pgoff_t pgoff; | 3322 | pgoff_t pgoff; |
diff --git a/mm/nommu.c b/mm/nommu.c index 13723736d38f..4452d8bd9ae4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1763,7 +1763,7 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, | |||
1763 | return -ENOMEM; | 1763 | return -ENOMEM; |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | int filemap_fault(struct vm_fault *vmf) | 1766 | vm_fault_t filemap_fault(struct vm_fault *vmf) |
1767 | { | 1767 | { |
1768 | BUG(); | 1768 | BUG(); |
1769 | return 0; | 1769 | return 0; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8ba6cb88cf58..6694348b27e9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -256,7 +256,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc) | |||
256 | int nid; | 256 | int nid; |
257 | 257 | ||
258 | if (is_memcg_oom(oc)) { | 258 | if (is_memcg_oom(oc)) { |
259 | oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1; | 259 | oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1; |
260 | return CONSTRAINT_MEMCG; | 260 | return CONSTRAINT_MEMCG; |
261 | } | 261 | } |
262 | 262 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22320ea27489..07b3c23762ad 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -705,16 +705,14 @@ static inline void rmv_page_order(struct page *page) | |||
705 | 705 | ||
706 | /* | 706 | /* |
707 | * This function checks whether a page is free && is the buddy | 707 | * This function checks whether a page is free && is the buddy |
708 | * we can do coalesce a page and its buddy if | 708 | * we can coalesce a page and its buddy if |
709 | * (a) the buddy is not in a hole (check before calling!) && | 709 | * (a) the buddy is not in a hole (check before calling!) && |
710 | * (b) the buddy is in the buddy system && | 710 | * (b) the buddy is in the buddy system && |
711 | * (c) a page and its buddy have the same order && | 711 | * (c) a page and its buddy have the same order && |
712 | * (d) a page and its buddy are in the same zone. | 712 | * (d) a page and its buddy are in the same zone. |
713 | * | 713 | * |
714 | * For recording whether a page is in the buddy system, we set ->_mapcount | 714 | * For recording whether a page is in the buddy system, we set PageBuddy. |
715 | * PAGE_BUDDY_MAPCOUNT_VALUE. | 715 | * Setting, clearing, and testing PageBuddy is serialized by zone->lock. |
716 | * Setting, clearing, and testing _mapcount PAGE_BUDDY_MAPCOUNT_VALUE is | ||
717 | * serialized by zone->lock. | ||
718 | * | 716 | * |
719 | * For recording page's order, we use page_private(page). | 717 | * For recording page's order, we use page_private(page). |
720 | */ | 718 | */ |
@@ -759,9 +757,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
759 | * as necessary, plus some accounting needed to play nicely with other | 757 | * as necessary, plus some accounting needed to play nicely with other |
760 | * parts of the VM system. | 758 | * parts of the VM system. |
761 | * At each level, we keep a list of pages, which are heads of continuous | 759 | * At each level, we keep a list of pages, which are heads of continuous |
762 | * free pages of length of (1 << order) and marked with _mapcount | 760 | * free pages of length of (1 << order) and marked with PageBuddy. |
763 | * PAGE_BUDDY_MAPCOUNT_VALUE. Page's order is recorded in page_private(page) | 761 | * Page's order is recorded in page_private(page) field. |
764 | * field. | ||
765 | * So when we are allocating or freeing one, we can derive the state of the | 762 | * So when we are allocating or freeing one, we can derive the state of the |
766 | * other. That is, if we allocate a small block, and both were | 763 | * other. That is, if we allocate a small block, and both were |
767 | * free, the remainder of the region must be split into blocks. | 764 | * free, the remainder of the region must be split into blocks. |
@@ -946,7 +943,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) | |||
946 | } | 943 | } |
947 | switch (page - head_page) { | 944 | switch (page - head_page) { |
948 | case 1: | 945 | case 1: |
949 | /* the first tail page: ->mapping is compound_mapcount() */ | 946 | /* the first tail page: ->mapping may be compound_mapcount() */ |
950 | if (unlikely(compound_mapcount(page))) { | 947 | if (unlikely(compound_mapcount(page))) { |
951 | bad_page(page, "nonzero compound_mapcount", 0); | 948 | bad_page(page, "nonzero compound_mapcount", 0); |
952 | goto out; | 949 | goto out; |
@@ -955,7 +952,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) | |||
955 | case 2: | 952 | case 2: |
956 | /* | 953 | /* |
957 | * the second tail page: ->mapping is | 954 | * the second tail page: ->mapping is |
958 | * page_deferred_list().next -- ignore value. | 955 | * deferred_list.next -- ignore value. |
959 | */ | 956 | */ |
960 | break; | 957 | break; |
961 | default: | 958 | default: |
@@ -3701,7 +3698,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla | |||
3701 | #endif /* CONFIG_COMPACTION */ | 3698 | #endif /* CONFIG_COMPACTION */ |
3702 | 3699 | ||
3703 | #ifdef CONFIG_LOCKDEP | 3700 | #ifdef CONFIG_LOCKDEP |
3704 | struct lockdep_map __fs_reclaim_map = | 3701 | static struct lockdep_map __fs_reclaim_map = |
3705 | STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); | 3702 | STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); |
3706 | 3703 | ||
3707 | static bool __need_fs_reclaim(gfp_t gfp_mask) | 3704 | static bool __need_fs_reclaim(gfp_t gfp_mask) |
@@ -3726,17 +3723,27 @@ static bool __need_fs_reclaim(gfp_t gfp_mask) | |||
3726 | return true; | 3723 | return true; |
3727 | } | 3724 | } |
3728 | 3725 | ||
3726 | void __fs_reclaim_acquire(void) | ||
3727 | { | ||
3728 | lock_map_acquire(&__fs_reclaim_map); | ||
3729 | } | ||
3730 | |||
3731 | void __fs_reclaim_release(void) | ||
3732 | { | ||
3733 | lock_map_release(&__fs_reclaim_map); | ||
3734 | } | ||
3735 | |||
3729 | void fs_reclaim_acquire(gfp_t gfp_mask) | 3736 | void fs_reclaim_acquire(gfp_t gfp_mask) |
3730 | { | 3737 | { |
3731 | if (__need_fs_reclaim(gfp_mask)) | 3738 | if (__need_fs_reclaim(gfp_mask)) |
3732 | lock_map_acquire(&__fs_reclaim_map); | 3739 | __fs_reclaim_acquire(); |
3733 | } | 3740 | } |
3734 | EXPORT_SYMBOL_GPL(fs_reclaim_acquire); | 3741 | EXPORT_SYMBOL_GPL(fs_reclaim_acquire); |
3735 | 3742 | ||
3736 | void fs_reclaim_release(gfp_t gfp_mask) | 3743 | void fs_reclaim_release(gfp_t gfp_mask) |
3737 | { | 3744 | { |
3738 | if (__need_fs_reclaim(gfp_mask)) | 3745 | if (__need_fs_reclaim(gfp_mask)) |
3739 | lock_map_release(&__fs_reclaim_map); | 3746 | __fs_reclaim_release(); |
3740 | } | 3747 | } |
3741 | EXPORT_SYMBOL_GPL(fs_reclaim_release); | 3748 | EXPORT_SYMBOL_GPL(fs_reclaim_release); |
3742 | #endif | 3749 | #endif |
@@ -3754,8 +3761,8 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3754 | 3761 | ||
3755 | /* We now go into synchronous reclaim */ | 3762 | /* We now go into synchronous reclaim */ |
3756 | cpuset_memory_pressure_bump(); | 3763 | cpuset_memory_pressure_bump(); |
3757 | noreclaim_flag = memalloc_noreclaim_save(); | ||
3758 | fs_reclaim_acquire(gfp_mask); | 3764 | fs_reclaim_acquire(gfp_mask); |
3765 | noreclaim_flag = memalloc_noreclaim_save(); | ||
3759 | reclaim_state.reclaimed_slab = 0; | 3766 | reclaim_state.reclaimed_slab = 0; |
3760 | current->reclaim_state = &reclaim_state; | 3767 | current->reclaim_state = &reclaim_state; |
3761 | 3768 | ||
@@ -3763,8 +3770,8 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3763 | ac->nodemask); | 3770 | ac->nodemask); |
3764 | 3771 | ||
3765 | current->reclaim_state = NULL; | 3772 | current->reclaim_state = NULL; |
3766 | fs_reclaim_release(gfp_mask); | ||
3767 | memalloc_noreclaim_restore(noreclaim_flag); | 3773 | memalloc_noreclaim_restore(noreclaim_flag); |
3774 | fs_reclaim_release(gfp_mask); | ||
3768 | 3775 | ||
3769 | cond_resched(); | 3776 | cond_resched(); |
3770 | 3777 | ||
@@ -4162,7 +4169,6 @@ retry: | |||
4162 | * orientated. | 4169 | * orientated. |
4163 | */ | 4170 | */ |
4164 | if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { | 4171 | if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { |
4165 | ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); | ||
4166 | ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, | 4172 | ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, |
4167 | ac->high_zoneidx, ac->nodemask); | 4173 | ac->high_zoneidx, ac->nodemask); |
4168 | } | 4174 | } |
@@ -4326,8 +4332,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
4326 | } | 4332 | } |
4327 | 4333 | ||
4328 | /* Determine whether to spread dirty pages and what the first usable zone */ | 4334 | /* Determine whether to spread dirty pages and what the first usable zone */ |
4329 | static inline void finalise_ac(gfp_t gfp_mask, | 4335 | static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) |
4330 | unsigned int order, struct alloc_context *ac) | ||
4331 | { | 4336 | { |
4332 | /* Dirty zone balancing only done in the fast path */ | 4337 | /* Dirty zone balancing only done in the fast path */ |
4333 | ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); | 4338 | ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); |
@@ -4358,7 +4363,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, | |||
4358 | if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) | 4363 | if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) |
4359 | return NULL; | 4364 | return NULL; |
4360 | 4365 | ||
4361 | finalise_ac(gfp_mask, order, &ac); | 4366 | finalise_ac(gfp_mask, &ac); |
4362 | 4367 | ||
4363 | /* First allocation attempt */ | 4368 | /* First allocation attempt */ |
4364 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); | 4369 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); |
@@ -6229,18 +6234,18 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
6229 | 6234 | ||
6230 | for (j = 0; j < MAX_NR_ZONES; j++) { | 6235 | for (j = 0; j < MAX_NR_ZONES; j++) { |
6231 | struct zone *zone = pgdat->node_zones + j; | 6236 | struct zone *zone = pgdat->node_zones + j; |
6232 | unsigned long size, realsize, freesize, memmap_pages; | 6237 | unsigned long size, freesize, memmap_pages; |
6233 | unsigned long zone_start_pfn = zone->zone_start_pfn; | 6238 | unsigned long zone_start_pfn = zone->zone_start_pfn; |
6234 | 6239 | ||
6235 | size = zone->spanned_pages; | 6240 | size = zone->spanned_pages; |
6236 | realsize = freesize = zone->present_pages; | 6241 | freesize = zone->present_pages; |
6237 | 6242 | ||
6238 | /* | 6243 | /* |
6239 | * Adjust freesize so that it accounts for how much memory | 6244 | * Adjust freesize so that it accounts for how much memory |
6240 | * is used by this zone for memmap. This affects the watermark | 6245 | * is used by this zone for memmap. This affects the watermark |
6241 | * and per-cpu initialisations | 6246 | * and per-cpu initialisations |
6242 | */ | 6247 | */ |
6243 | memmap_pages = calc_memmap_size(size, realsize); | 6248 | memmap_pages = calc_memmap_size(size, freesize); |
6244 | if (!is_highmem_idx(j)) { | 6249 | if (!is_highmem_idx(j)) { |
6245 | if (freesize >= memmap_pages) { | 6250 | if (freesize >= memmap_pages) { |
6246 | freesize -= memmap_pages; | 6251 | freesize -= memmap_pages; |
@@ -6272,7 +6277,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
6272 | * when the bootmem allocator frees pages into the buddy system. | 6277 | * when the bootmem allocator frees pages into the buddy system. |
6273 | * And all highmem pages will be managed by the buddy system. | 6278 | * And all highmem pages will be managed by the buddy system. |
6274 | */ | 6279 | */ |
6275 | zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; | 6280 | zone->managed_pages = freesize; |
6276 | #ifdef CONFIG_NUMA | 6281 | #ifdef CONFIG_NUMA |
6277 | zone->node = nid; | 6282 | zone->node = nid; |
6278 | #endif | 6283 | #endif |
@@ -7682,29 +7687,6 @@ unmovable: | |||
7682 | return true; | 7687 | return true; |
7683 | } | 7688 | } |
7684 | 7689 | ||
7685 | bool is_pageblock_removable_nolock(struct page *page) | ||
7686 | { | ||
7687 | struct zone *zone; | ||
7688 | unsigned long pfn; | ||
7689 | |||
7690 | /* | ||
7691 | * We have to be careful here because we are iterating over memory | ||
7692 | * sections which are not zone aware so we might end up outside of | ||
7693 | * the zone but still within the section. | ||
7694 | * We have to take care about the node as well. If the node is offline | ||
7695 | * its NODE_DATA will be NULL - see page_zone. | ||
7696 | */ | ||
7697 | if (!node_online(page_to_nid(page))) | ||
7698 | return false; | ||
7699 | |||
7700 | zone = page_zone(page); | ||
7701 | pfn = page_to_pfn(page); | ||
7702 | if (!zone_spans_pfn(zone, pfn)) | ||
7703 | return false; | ||
7704 | |||
7705 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); | ||
7706 | } | ||
7707 | |||
7708 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) | 7690 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) |
7709 | 7691 | ||
7710 | static unsigned long pfn_max_align_down(unsigned long pfn) | 7692 | static unsigned long pfn_max_align_down(unsigned long pfn) |
diff --git a/mm/page_counter.c b/mm/page_counter.c index 2a8df3ad60a4..de31470655f6 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c | |||
@@ -13,6 +13,40 @@ | |||
13 | #include <linux/bug.h> | 13 | #include <linux/bug.h> |
14 | #include <asm/page.h> | 14 | #include <asm/page.h> |
15 | 15 | ||
16 | static void propagate_protected_usage(struct page_counter *c, | ||
17 | unsigned long usage) | ||
18 | { | ||
19 | unsigned long protected, old_protected; | ||
20 | long delta; | ||
21 | |||
22 | if (!c->parent) | ||
23 | return; | ||
24 | |||
25 | if (c->min || atomic_long_read(&c->min_usage)) { | ||
26 | if (usage <= c->min) | ||
27 | protected = usage; | ||
28 | else | ||
29 | protected = 0; | ||
30 | |||
31 | old_protected = atomic_long_xchg(&c->min_usage, protected); | ||
32 | delta = protected - old_protected; | ||
33 | if (delta) | ||
34 | atomic_long_add(delta, &c->parent->children_min_usage); | ||
35 | } | ||
36 | |||
37 | if (c->low || atomic_long_read(&c->low_usage)) { | ||
38 | if (usage <= c->low) | ||
39 | protected = usage; | ||
40 | else | ||
41 | protected = 0; | ||
42 | |||
43 | old_protected = atomic_long_xchg(&c->low_usage, protected); | ||
44 | delta = protected - old_protected; | ||
45 | if (delta) | ||
46 | atomic_long_add(delta, &c->parent->children_low_usage); | ||
47 | } | ||
48 | } | ||
49 | |||
16 | /** | 50 | /** |
17 | * page_counter_cancel - take pages out of the local counter | 51 | * page_counter_cancel - take pages out of the local counter |
18 | * @counter: counter | 52 | * @counter: counter |
@@ -22,7 +56,8 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) | |||
22 | { | 56 | { |
23 | long new; | 57 | long new; |
24 | 58 | ||
25 | new = atomic_long_sub_return(nr_pages, &counter->count); | 59 | new = atomic_long_sub_return(nr_pages, &counter->usage); |
60 | propagate_protected_usage(counter, new); | ||
26 | /* More uncharges than charges? */ | 61 | /* More uncharges than charges? */ |
27 | WARN_ON_ONCE(new < 0); | 62 | WARN_ON_ONCE(new < 0); |
28 | } | 63 | } |
@@ -41,7 +76,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) | |||
41 | for (c = counter; c; c = c->parent) { | 76 | for (c = counter; c; c = c->parent) { |
42 | long new; | 77 | long new; |
43 | 78 | ||
44 | new = atomic_long_add_return(nr_pages, &c->count); | 79 | new = atomic_long_add_return(nr_pages, &c->usage); |
80 | propagate_protected_usage(counter, new); | ||
45 | /* | 81 | /* |
46 | * This is indeed racy, but we can live with some | 82 | * This is indeed racy, but we can live with some |
47 | * inaccuracy in the watermark. | 83 | * inaccuracy in the watermark. |
@@ -82,9 +118,10 @@ bool page_counter_try_charge(struct page_counter *counter, | |||
82 | * we either see the new limit or the setter sees the | 118 | * we either see the new limit or the setter sees the |
83 | * counter has changed and retries. | 119 | * counter has changed and retries. |
84 | */ | 120 | */ |
85 | new = atomic_long_add_return(nr_pages, &c->count); | 121 | new = atomic_long_add_return(nr_pages, &c->usage); |
86 | if (new > c->limit) { | 122 | if (new > c->max) { |
87 | atomic_long_sub(nr_pages, &c->count); | 123 | atomic_long_sub(nr_pages, &c->usage); |
124 | propagate_protected_usage(counter, new); | ||
88 | /* | 125 | /* |
89 | * This is racy, but we can live with some | 126 | * This is racy, but we can live with some |
90 | * inaccuracy in the failcnt. | 127 | * inaccuracy in the failcnt. |
@@ -93,6 +130,7 @@ bool page_counter_try_charge(struct page_counter *counter, | |||
93 | *fail = c; | 130 | *fail = c; |
94 | goto failed; | 131 | goto failed; |
95 | } | 132 | } |
133 | propagate_protected_usage(counter, new); | ||
96 | /* | 134 | /* |
97 | * Just like with failcnt, we can live with some | 135 | * Just like with failcnt, we can live with some |
98 | * inaccuracy in the watermark. | 136 | * inaccuracy in the watermark. |
@@ -123,20 +161,20 @@ void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) | |||
123 | } | 161 | } |
124 | 162 | ||
125 | /** | 163 | /** |
126 | * page_counter_limit - limit the number of pages allowed | 164 | * page_counter_set_max - set the maximum number of pages allowed |
127 | * @counter: counter | 165 | * @counter: counter |
128 | * @limit: limit to set | 166 | * @nr_pages: limit to set |
129 | * | 167 | * |
130 | * Returns 0 on success, -EBUSY if the current number of pages on the | 168 | * Returns 0 on success, -EBUSY if the current number of pages on the |
131 | * counter already exceeds the specified limit. | 169 | * counter already exceeds the specified limit. |
132 | * | 170 | * |
133 | * The caller must serialize invocations on the same counter. | 171 | * The caller must serialize invocations on the same counter. |
134 | */ | 172 | */ |
135 | int page_counter_limit(struct page_counter *counter, unsigned long limit) | 173 | int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) |
136 | { | 174 | { |
137 | for (;;) { | 175 | for (;;) { |
138 | unsigned long old; | 176 | unsigned long old; |
139 | long count; | 177 | long usage; |
140 | 178 | ||
141 | /* | 179 | /* |
142 | * Update the limit while making sure that it's not | 180 | * Update the limit while making sure that it's not |
@@ -149,22 +187,56 @@ int page_counter_limit(struct page_counter *counter, unsigned long limit) | |||
149 | * the limit, so if it sees the old limit, we see the | 187 | * the limit, so if it sees the old limit, we see the |
150 | * modified counter and retry. | 188 | * modified counter and retry. |
151 | */ | 189 | */ |
152 | count = atomic_long_read(&counter->count); | 190 | usage = atomic_long_read(&counter->usage); |
153 | 191 | ||
154 | if (count > limit) | 192 | if (usage > nr_pages) |
155 | return -EBUSY; | 193 | return -EBUSY; |
156 | 194 | ||
157 | old = xchg(&counter->limit, limit); | 195 | old = xchg(&counter->max, nr_pages); |
158 | 196 | ||
159 | if (atomic_long_read(&counter->count) <= count) | 197 | if (atomic_long_read(&counter->usage) <= usage) |
160 | return 0; | 198 | return 0; |
161 | 199 | ||
162 | counter->limit = old; | 200 | counter->max = old; |
163 | cond_resched(); | 201 | cond_resched(); |
164 | } | 202 | } |
165 | } | 203 | } |
166 | 204 | ||
167 | /** | 205 | /** |
206 | * page_counter_set_min - set the amount of protected memory | ||
207 | * @counter: counter | ||
208 | * @nr_pages: value to set | ||
209 | * | ||
210 | * The caller must serialize invocations on the same counter. | ||
211 | */ | ||
212 | void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) | ||
213 | { | ||
214 | struct page_counter *c; | ||
215 | |||
216 | counter->min = nr_pages; | ||
217 | |||
218 | for (c = counter; c; c = c->parent) | ||
219 | propagate_protected_usage(c, atomic_long_read(&c->usage)); | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * page_counter_set_low - set the amount of protected memory | ||
224 | * @counter: counter | ||
225 | * @nr_pages: value to set | ||
226 | * | ||
227 | * The caller must serialize invocations on the same counter. | ||
228 | */ | ||
229 | void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) | ||
230 | { | ||
231 | struct page_counter *c; | ||
232 | |||
233 | counter->low = nr_pages; | ||
234 | |||
235 | for (c = counter; c; c = c->parent) | ||
236 | propagate_protected_usage(c, atomic_long_read(&c->usage)); | ||
237 | } | ||
238 | |||
239 | /** | ||
168 | * page_counter_memparse - memparse() for page counter limits | 240 | * page_counter_memparse - memparse() for page counter limits |
169 | * @buf: string to parse | 241 | * @buf: string to parse |
170 | * @max: string meaning maximum possible value | 242 | * @max: string meaning maximum possible value |
diff --git a/mm/shmem.c b/mm/shmem.c index 9d6c7e595415..e9a7ac74823d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -327,7 +327,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping, | |||
327 | pgoff_t index, void *expected, void *replacement) | 327 | pgoff_t index, void *expected, void *replacement) |
328 | { | 328 | { |
329 | struct radix_tree_node *node; | 329 | struct radix_tree_node *node; |
330 | void **pslot; | 330 | void __rcu **pslot; |
331 | void *item; | 331 | void *item; |
332 | 332 | ||
333 | VM_BUG_ON(!expected); | 333 | VM_BUG_ON(!expected); |
@@ -395,7 +395,7 @@ static bool shmem_confirm_swap(struct address_space *mapping, | |||
395 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE | 395 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
396 | /* ifdef here to avoid bloating shmem.o when not necessary */ | 396 | /* ifdef here to avoid bloating shmem.o when not necessary */ |
397 | 397 | ||
398 | int shmem_huge __read_mostly; | 398 | static int shmem_huge __read_mostly; |
399 | 399 | ||
400 | #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) | 400 | #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) |
401 | static int shmem_parse_huge(const char *str) | 401 | static int shmem_parse_huge(const char *str) |
@@ -571,6 +571,15 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, | |||
571 | } | 571 | } |
572 | #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ | 572 | #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ |
573 | 573 | ||
574 | static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo) | ||
575 | { | ||
576 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && | ||
577 | (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) && | ||
578 | shmem_huge != SHMEM_HUGE_DENY) | ||
579 | return true; | ||
580 | return false; | ||
581 | } | ||
582 | |||
574 | /* | 583 | /* |
575 | * Like add_to_page_cache_locked, but error if expected item has gone. | 584 | * Like add_to_page_cache_locked, but error if expected item has gone. |
576 | */ | 585 | */ |
@@ -682,7 +691,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, | |||
682 | pgoff_t start, pgoff_t end) | 691 | pgoff_t start, pgoff_t end) |
683 | { | 692 | { |
684 | struct radix_tree_iter iter; | 693 | struct radix_tree_iter iter; |
685 | void **slot; | 694 | void __rcu **slot; |
686 | struct page *page; | 695 | struct page *page; |
687 | unsigned long swapped = 0; | 696 | unsigned long swapped = 0; |
688 | 697 | ||
@@ -988,6 +997,7 @@ static int shmem_getattr(const struct path *path, struct kstat *stat, | |||
988 | { | 997 | { |
989 | struct inode *inode = path->dentry->d_inode; | 998 | struct inode *inode = path->dentry->d_inode; |
990 | struct shmem_inode_info *info = SHMEM_I(inode); | 999 | struct shmem_inode_info *info = SHMEM_I(inode); |
1000 | struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb); | ||
991 | 1001 | ||
992 | if (info->alloced - info->swapped != inode->i_mapping->nrpages) { | 1002 | if (info->alloced - info->swapped != inode->i_mapping->nrpages) { |
993 | spin_lock_irq(&info->lock); | 1003 | spin_lock_irq(&info->lock); |
@@ -995,6 +1005,10 @@ static int shmem_getattr(const struct path *path, struct kstat *stat, | |||
995 | spin_unlock_irq(&info->lock); | 1005 | spin_unlock_irq(&info->lock); |
996 | } | 1006 | } |
997 | generic_fillattr(inode, stat); | 1007 | generic_fillattr(inode, stat); |
1008 | |||
1009 | if (is_huge_enabled(sb_info)) | ||
1010 | stat->blksize = HPAGE_PMD_SIZE; | ||
1011 | |||
998 | return 0; | 1012 | return 0; |
999 | } | 1013 | } |
1000 | 1014 | ||
@@ -1098,13 +1112,19 @@ static void shmem_evict_inode(struct inode *inode) | |||
1098 | static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) | 1112 | static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) |
1099 | { | 1113 | { |
1100 | struct radix_tree_iter iter; | 1114 | struct radix_tree_iter iter; |
1101 | void **slot; | 1115 | void __rcu **slot; |
1102 | unsigned long found = -1; | 1116 | unsigned long found = -1; |
1103 | unsigned int checked = 0; | 1117 | unsigned int checked = 0; |
1104 | 1118 | ||
1105 | rcu_read_lock(); | 1119 | rcu_read_lock(); |
1106 | radix_tree_for_each_slot(slot, root, &iter, 0) { | 1120 | radix_tree_for_each_slot(slot, root, &iter, 0) { |
1107 | if (*slot == item) { | 1121 | void *entry = radix_tree_deref_slot(slot); |
1122 | |||
1123 | if (radix_tree_deref_retry(entry)) { | ||
1124 | slot = radix_tree_iter_retry(&iter); | ||
1125 | continue; | ||
1126 | } | ||
1127 | if (entry == item) { | ||
1108 | found = iter.index; | 1128 | found = iter.index; |
1109 | break; | 1129 | break; |
1110 | } | 1130 | } |
@@ -1322,9 +1342,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1322 | if (!swap.val) | 1342 | if (!swap.val) |
1323 | goto redirty; | 1343 | goto redirty; |
1324 | 1344 | ||
1325 | if (mem_cgroup_try_charge_swap(page, swap)) | ||
1326 | goto free_swap; | ||
1327 | |||
1328 | /* | 1345 | /* |
1329 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | 1346 | * Add inode to shmem_unuse()'s list of swapped-out inodes, |
1330 | * if it's not already there. Do it now before the page is | 1347 | * if it's not already there. Do it now before the page is |
@@ -1353,7 +1370,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1353 | } | 1370 | } |
1354 | 1371 | ||
1355 | mutex_unlock(&shmem_swaplist_mutex); | 1372 | mutex_unlock(&shmem_swaplist_mutex); |
1356 | free_swap: | ||
1357 | put_swap_page(page, swap); | 1373 | put_swap_page(page, swap); |
1358 | redirty: | 1374 | redirty: |
1359 | set_page_dirty(page); | 1375 | set_page_dirty(page); |
@@ -1404,10 +1420,9 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma, | |||
1404 | struct shmem_inode_info *info, pgoff_t index) | 1420 | struct shmem_inode_info *info, pgoff_t index) |
1405 | { | 1421 | { |
1406 | /* Create a pseudo vma that just contains the policy */ | 1422 | /* Create a pseudo vma that just contains the policy */ |
1407 | vma->vm_start = 0; | 1423 | memset(vma, 0, sizeof(*vma)); |
1408 | /* Bias interleave by inode number to distribute better across nodes */ | 1424 | /* Bias interleave by inode number to distribute better across nodes */ |
1409 | vma->vm_pgoff = index + info->vfs_inode.i_ino; | 1425 | vma->vm_pgoff = index + info->vfs_inode.i_ino; |
1410 | vma->vm_ops = NULL; | ||
1411 | vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); | 1426 | vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); |
1412 | } | 1427 | } |
1413 | 1428 | ||
@@ -1931,14 +1946,14 @@ static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, in | |||
1931 | return ret; | 1946 | return ret; |
1932 | } | 1947 | } |
1933 | 1948 | ||
1934 | static int shmem_fault(struct vm_fault *vmf) | 1949 | static vm_fault_t shmem_fault(struct vm_fault *vmf) |
1935 | { | 1950 | { |
1936 | struct vm_area_struct *vma = vmf->vma; | 1951 | struct vm_area_struct *vma = vmf->vma; |
1937 | struct inode *inode = file_inode(vma->vm_file); | 1952 | struct inode *inode = file_inode(vma->vm_file); |
1938 | gfp_t gfp = mapping_gfp_mask(inode->i_mapping); | 1953 | gfp_t gfp = mapping_gfp_mask(inode->i_mapping); |
1939 | enum sgp_type sgp; | 1954 | enum sgp_type sgp; |
1940 | int error; | 1955 | int err; |
1941 | int ret = VM_FAULT_LOCKED; | 1956 | vm_fault_t ret = VM_FAULT_LOCKED; |
1942 | 1957 | ||
1943 | /* | 1958 | /* |
1944 | * Trinity finds that probing a hole which tmpfs is punching can | 1959 | * Trinity finds that probing a hole which tmpfs is punching can |
@@ -2006,10 +2021,10 @@ static int shmem_fault(struct vm_fault *vmf) | |||
2006 | else if (vma->vm_flags & VM_HUGEPAGE) | 2021 | else if (vma->vm_flags & VM_HUGEPAGE) |
2007 | sgp = SGP_HUGE; | 2022 | sgp = SGP_HUGE; |
2008 | 2023 | ||
2009 | error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, | 2024 | err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, |
2010 | gfp, vma, vmf, &ret); | 2025 | gfp, vma, vmf, &ret); |
2011 | if (error) | 2026 | if (err) |
2012 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 2027 | return vmf_error(err); |
2013 | return ret; | 2028 | return ret; |
2014 | } | 2029 | } |
2015 | 2030 | ||
@@ -2616,241 +2631,6 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) | |||
2616 | return offset; | 2631 | return offset; |
2617 | } | 2632 | } |
2618 | 2633 | ||
2619 | /* | ||
2620 | * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, | ||
2621 | * so reuse a tag which we firmly believe is never set or cleared on shmem. | ||
2622 | */ | ||
2623 | #define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE | ||
2624 | #define LAST_SCAN 4 /* about 150ms max */ | ||
2625 | |||
2626 | static void shmem_tag_pins(struct address_space *mapping) | ||
2627 | { | ||
2628 | struct radix_tree_iter iter; | ||
2629 | void **slot; | ||
2630 | pgoff_t start; | ||
2631 | struct page *page; | ||
2632 | |||
2633 | lru_add_drain(); | ||
2634 | start = 0; | ||
2635 | rcu_read_lock(); | ||
2636 | |||
2637 | radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { | ||
2638 | page = radix_tree_deref_slot(slot); | ||
2639 | if (!page || radix_tree_exception(page)) { | ||
2640 | if (radix_tree_deref_retry(page)) { | ||
2641 | slot = radix_tree_iter_retry(&iter); | ||
2642 | continue; | ||
2643 | } | ||
2644 | } else if (page_count(page) - page_mapcount(page) > 1) { | ||
2645 | xa_lock_irq(&mapping->i_pages); | ||
2646 | radix_tree_tag_set(&mapping->i_pages, iter.index, | ||
2647 | SHMEM_TAG_PINNED); | ||
2648 | xa_unlock_irq(&mapping->i_pages); | ||
2649 | } | ||
2650 | |||
2651 | if (need_resched()) { | ||
2652 | slot = radix_tree_iter_resume(slot, &iter); | ||
2653 | cond_resched_rcu(); | ||
2654 | } | ||
2655 | } | ||
2656 | rcu_read_unlock(); | ||
2657 | } | ||
2658 | |||
2659 | /* | ||
2660 | * Setting SEAL_WRITE requires us to verify there's no pending writer. However, | ||
2661 | * via get_user_pages(), drivers might have some pending I/O without any active | ||
2662 | * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages | ||
2663 | * and see whether it has an elevated ref-count. If so, we tag them and wait for | ||
2664 | * them to be dropped. | ||
2665 | * The caller must guarantee that no new user will acquire writable references | ||
2666 | * to those pages to avoid races. | ||
2667 | */ | ||
2668 | static int shmem_wait_for_pins(struct address_space *mapping) | ||
2669 | { | ||
2670 | struct radix_tree_iter iter; | ||
2671 | void **slot; | ||
2672 | pgoff_t start; | ||
2673 | struct page *page; | ||
2674 | int error, scan; | ||
2675 | |||
2676 | shmem_tag_pins(mapping); | ||
2677 | |||
2678 | error = 0; | ||
2679 | for (scan = 0; scan <= LAST_SCAN; scan++) { | ||
2680 | if (!radix_tree_tagged(&mapping->i_pages, SHMEM_TAG_PINNED)) | ||
2681 | break; | ||
2682 | |||
2683 | if (!scan) | ||
2684 | lru_add_drain_all(); | ||
2685 | else if (schedule_timeout_killable((HZ << scan) / 200)) | ||
2686 | scan = LAST_SCAN; | ||
2687 | |||
2688 | start = 0; | ||
2689 | rcu_read_lock(); | ||
2690 | radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, | ||
2691 | start, SHMEM_TAG_PINNED) { | ||
2692 | |||
2693 | page = radix_tree_deref_slot(slot); | ||
2694 | if (radix_tree_exception(page)) { | ||
2695 | if (radix_tree_deref_retry(page)) { | ||
2696 | slot = radix_tree_iter_retry(&iter); | ||
2697 | continue; | ||
2698 | } | ||
2699 | |||
2700 | page = NULL; | ||
2701 | } | ||
2702 | |||
2703 | if (page && | ||
2704 | page_count(page) - page_mapcount(page) != 1) { | ||
2705 | if (scan < LAST_SCAN) | ||
2706 | goto continue_resched; | ||
2707 | |||
2708 | /* | ||
2709 | * On the last scan, we clean up all those tags | ||
2710 | * we inserted; but make a note that we still | ||
2711 | * found pages pinned. | ||
2712 | */ | ||
2713 | error = -EBUSY; | ||
2714 | } | ||
2715 | |||
2716 | xa_lock_irq(&mapping->i_pages); | ||
2717 | radix_tree_tag_clear(&mapping->i_pages, | ||
2718 | iter.index, SHMEM_TAG_PINNED); | ||
2719 | xa_unlock_irq(&mapping->i_pages); | ||
2720 | continue_resched: | ||
2721 | if (need_resched()) { | ||
2722 | slot = radix_tree_iter_resume(slot, &iter); | ||
2723 | cond_resched_rcu(); | ||
2724 | } | ||
2725 | } | ||
2726 | rcu_read_unlock(); | ||
2727 | } | ||
2728 | |||
2729 | return error; | ||
2730 | } | ||
2731 | |||
2732 | static unsigned int *memfd_file_seals_ptr(struct file *file) | ||
2733 | { | ||
2734 | if (file->f_op == &shmem_file_operations) | ||
2735 | return &SHMEM_I(file_inode(file))->seals; | ||
2736 | |||
2737 | #ifdef CONFIG_HUGETLBFS | ||
2738 | if (file->f_op == &hugetlbfs_file_operations) | ||
2739 | return &HUGETLBFS_I(file_inode(file))->seals; | ||
2740 | #endif | ||
2741 | |||
2742 | return NULL; | ||
2743 | } | ||
2744 | |||
2745 | #define F_ALL_SEALS (F_SEAL_SEAL | \ | ||
2746 | F_SEAL_SHRINK | \ | ||
2747 | F_SEAL_GROW | \ | ||
2748 | F_SEAL_WRITE) | ||
2749 | |||
2750 | static int memfd_add_seals(struct file *file, unsigned int seals) | ||
2751 | { | ||
2752 | struct inode *inode = file_inode(file); | ||
2753 | unsigned int *file_seals; | ||
2754 | int error; | ||
2755 | |||
2756 | /* | ||
2757 | * SEALING | ||
2758 | * Sealing allows multiple parties to share a shmem-file but restrict | ||
2759 | * access to a specific subset of file operations. Seals can only be | ||
2760 | * added, but never removed. This way, mutually untrusted parties can | ||
2761 | * share common memory regions with a well-defined policy. A malicious | ||
2762 | * peer can thus never perform unwanted operations on a shared object. | ||
2763 | * | ||
2764 | * Seals are only supported on special shmem-files and always affect | ||
2765 | * the whole underlying inode. Once a seal is set, it may prevent some | ||
2766 | * kinds of access to the file. Currently, the following seals are | ||
2767 | * defined: | ||
2768 | * SEAL_SEAL: Prevent further seals from being set on this file | ||
2769 | * SEAL_SHRINK: Prevent the file from shrinking | ||
2770 | * SEAL_GROW: Prevent the file from growing | ||
2771 | * SEAL_WRITE: Prevent write access to the file | ||
2772 | * | ||
2773 | * As we don't require any trust relationship between two parties, we | ||
2774 | * must prevent seals from being removed. Therefore, sealing a file | ||
2775 | * only adds a given set of seals to the file, it never touches | ||
2776 | * existing seals. Furthermore, the "setting seals"-operation can be | ||
2777 | * sealed itself, which basically prevents any further seal from being | ||
2778 | * added. | ||
2779 | * | ||
2780 | * Semantics of sealing are only defined on volatile files. Only | ||
2781 | * anonymous shmem files support sealing. More importantly, seals are | ||
2782 | * never written to disk. Therefore, there's no plan to support it on | ||
2783 | * other file types. | ||
2784 | */ | ||
2785 | |||
2786 | if (!(file->f_mode & FMODE_WRITE)) | ||
2787 | return -EPERM; | ||
2788 | if (seals & ~(unsigned int)F_ALL_SEALS) | ||
2789 | return -EINVAL; | ||
2790 | |||
2791 | inode_lock(inode); | ||
2792 | |||
2793 | file_seals = memfd_file_seals_ptr(file); | ||
2794 | if (!file_seals) { | ||
2795 | error = -EINVAL; | ||
2796 | goto unlock; | ||
2797 | } | ||
2798 | |||
2799 | if (*file_seals & F_SEAL_SEAL) { | ||
2800 | error = -EPERM; | ||
2801 | goto unlock; | ||
2802 | } | ||
2803 | |||
2804 | if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) { | ||
2805 | error = mapping_deny_writable(file->f_mapping); | ||
2806 | if (error) | ||
2807 | goto unlock; | ||
2808 | |||
2809 | error = shmem_wait_for_pins(file->f_mapping); | ||
2810 | if (error) { | ||
2811 | mapping_allow_writable(file->f_mapping); | ||
2812 | goto unlock; | ||
2813 | } | ||
2814 | } | ||
2815 | |||
2816 | *file_seals |= seals; | ||
2817 | error = 0; | ||
2818 | |||
2819 | unlock: | ||
2820 | inode_unlock(inode); | ||
2821 | return error; | ||
2822 | } | ||
2823 | |||
2824 | static int memfd_get_seals(struct file *file) | ||
2825 | { | ||
2826 | unsigned int *seals = memfd_file_seals_ptr(file); | ||
2827 | |||
2828 | return seals ? *seals : -EINVAL; | ||
2829 | } | ||
2830 | |||
2831 | long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | ||
2832 | { | ||
2833 | long error; | ||
2834 | |||
2835 | switch (cmd) { | ||
2836 | case F_ADD_SEALS: | ||
2837 | /* disallow upper 32bit */ | ||
2838 | if (arg > UINT_MAX) | ||
2839 | return -EINVAL; | ||
2840 | |||
2841 | error = memfd_add_seals(file, arg); | ||
2842 | break; | ||
2843 | case F_GET_SEALS: | ||
2844 | error = memfd_get_seals(file); | ||
2845 | break; | ||
2846 | default: | ||
2847 | error = -EINVAL; | ||
2848 | break; | ||
2849 | } | ||
2850 | |||
2851 | return error; | ||
2852 | } | ||
2853 | |||
2854 | static long shmem_fallocate(struct file *file, int mode, loff_t offset, | 2634 | static long shmem_fallocate(struct file *file, int mode, loff_t offset, |
2855 | loff_t len) | 2635 | loff_t len) |
2856 | { | 2636 | { |
@@ -3428,6 +3208,15 @@ static int shmem_match(struct inode *ino, void *vfh) | |||
3428 | return ino->i_ino == inum && fh[0] == ino->i_generation; | 3208 | return ino->i_ino == inum && fh[0] == ino->i_generation; |
3429 | } | 3209 | } |
3430 | 3210 | ||
3211 | /* Find any alias of inode, but prefer a hashed alias */ | ||
3212 | static struct dentry *shmem_find_alias(struct inode *inode) | ||
3213 | { | ||
3214 | struct dentry *alias = d_find_alias(inode); | ||
3215 | |||
3216 | return alias ?: d_find_any_alias(inode); | ||
3217 | } | ||
3218 | |||
3219 | |||
3431 | static struct dentry *shmem_fh_to_dentry(struct super_block *sb, | 3220 | static struct dentry *shmem_fh_to_dentry(struct super_block *sb, |
3432 | struct fid *fid, int fh_len, int fh_type) | 3221 | struct fid *fid, int fh_len, int fh_type) |
3433 | { | 3222 | { |
@@ -3444,7 +3233,7 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, | |||
3444 | inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), | 3233 | inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), |
3445 | shmem_match, fid->raw); | 3234 | shmem_match, fid->raw); |
3446 | if (inode) { | 3235 | if (inode) { |
3447 | dentry = d_find_alias(inode); | 3236 | dentry = shmem_find_alias(inode); |
3448 | iput(inode); | 3237 | iput(inode); |
3449 | } | 3238 | } |
3450 | 3239 | ||
@@ -3673,93 +3462,6 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) | |||
3673 | return 0; | 3462 | return 0; |
3674 | } | 3463 | } |
3675 | 3464 | ||
3676 | #define MFD_NAME_PREFIX "memfd:" | ||
3677 | #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) | ||
3678 | #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) | ||
3679 | |||
3680 | #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) | ||
3681 | |||
3682 | SYSCALL_DEFINE2(memfd_create, | ||
3683 | const char __user *, uname, | ||
3684 | unsigned int, flags) | ||
3685 | { | ||
3686 | unsigned int *file_seals; | ||
3687 | struct file *file; | ||
3688 | int fd, error; | ||
3689 | char *name; | ||
3690 | long len; | ||
3691 | |||
3692 | if (!(flags & MFD_HUGETLB)) { | ||
3693 | if (flags & ~(unsigned int)MFD_ALL_FLAGS) | ||
3694 | return -EINVAL; | ||
3695 | } else { | ||
3696 | /* Allow huge page size encoding in flags. */ | ||
3697 | if (flags & ~(unsigned int)(MFD_ALL_FLAGS | | ||
3698 | (MFD_HUGE_MASK << MFD_HUGE_SHIFT))) | ||
3699 | return -EINVAL; | ||
3700 | } | ||
3701 | |||
3702 | /* length includes terminating zero */ | ||
3703 | len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); | ||
3704 | if (len <= 0) | ||
3705 | return -EFAULT; | ||
3706 | if (len > MFD_NAME_MAX_LEN + 1) | ||
3707 | return -EINVAL; | ||
3708 | |||
3709 | name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL); | ||
3710 | if (!name) | ||
3711 | return -ENOMEM; | ||
3712 | |||
3713 | strcpy(name, MFD_NAME_PREFIX); | ||
3714 | if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) { | ||
3715 | error = -EFAULT; | ||
3716 | goto err_name; | ||
3717 | } | ||
3718 | |||
3719 | /* terminating-zero may have changed after strnlen_user() returned */ | ||
3720 | if (name[len + MFD_NAME_PREFIX_LEN - 1]) { | ||
3721 | error = -EFAULT; | ||
3722 | goto err_name; | ||
3723 | } | ||
3724 | |||
3725 | fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); | ||
3726 | if (fd < 0) { | ||
3727 | error = fd; | ||
3728 | goto err_name; | ||
3729 | } | ||
3730 | |||
3731 | if (flags & MFD_HUGETLB) { | ||
3732 | struct user_struct *user = NULL; | ||
3733 | |||
3734 | file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, | ||
3735 | HUGETLB_ANONHUGE_INODE, | ||
3736 | (flags >> MFD_HUGE_SHIFT) & | ||
3737 | MFD_HUGE_MASK); | ||
3738 | } else | ||
3739 | file = shmem_file_setup(name, 0, VM_NORESERVE); | ||
3740 | if (IS_ERR(file)) { | ||
3741 | error = PTR_ERR(file); | ||
3742 | goto err_fd; | ||
3743 | } | ||
3744 | file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; | ||
3745 | file->f_flags |= O_RDWR | O_LARGEFILE; | ||
3746 | |||
3747 | if (flags & MFD_ALLOW_SEALING) { | ||
3748 | file_seals = memfd_file_seals_ptr(file); | ||
3749 | *file_seals &= ~F_SEAL_SEAL; | ||
3750 | } | ||
3751 | |||
3752 | fd_install(fd, file); | ||
3753 | kfree(name); | ||
3754 | return fd; | ||
3755 | |||
3756 | err_fd: | ||
3757 | put_unused_fd(fd); | ||
3758 | err_name: | ||
3759 | kfree(name); | ||
3760 | return error; | ||
3761 | } | ||
3762 | |||
3763 | #endif /* CONFIG_TMPFS */ | 3465 | #endif /* CONFIG_TMPFS */ |
3764 | 3466 | ||
3765 | static void shmem_put_super(struct super_block *sb) | 3467 | static void shmem_put_super(struct super_block *sb) |
@@ -1235,8 +1235,6 @@ void __init kmem_cache_init(void) | |||
1235 | { | 1235 | { |
1236 | int i; | 1236 | int i; |
1237 | 1237 | ||
1238 | BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < | ||
1239 | sizeof(struct rcu_head)); | ||
1240 | kmem_cache = &kmem_cache_boot; | 1238 | kmem_cache = &kmem_cache_boot; |
1241 | 1239 | ||
1242 | if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1) | 1240 | if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1) |
@@ -2665,6 +2663,7 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, | |||
2665 | invalid_mask, &invalid_mask, flags, &flags); | 2663 | invalid_mask, &invalid_mask, flags, &flags); |
2666 | dump_stack(); | 2664 | dump_stack(); |
2667 | } | 2665 | } |
2666 | WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); | ||
2668 | local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); | 2667 | local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); |
2669 | 2668 | ||
2670 | check_irq_off(); | 2669 | check_irq_off(); |
@@ -3071,6 +3070,7 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | |||
3071 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 3070 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
3072 | gfp_t flags, void *objp, unsigned long caller) | 3071 | gfp_t flags, void *objp, unsigned long caller) |
3073 | { | 3072 | { |
3073 | WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); | ||
3074 | if (!objp) | 3074 | if (!objp) |
3075 | return objp; | 3075 | return objp; |
3076 | if (cachep->flags & SLAB_POISON) { | 3076 | if (cachep->flags & SLAB_POISON) { |
@@ -555,8 +555,10 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
555 | flags, node); | 555 | flags, node); |
556 | } | 556 | } |
557 | 557 | ||
558 | if (b && c->ctor) | 558 | if (b && c->ctor) { |
559 | WARN_ON_ONCE(flags & __GFP_ZERO); | ||
559 | c->ctor(b); | 560 | c->ctor(b); |
561 | } | ||
560 | 562 | ||
561 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); | 563 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); |
562 | return b; | 564 | return b; |
@@ -52,11 +52,11 @@ | |||
52 | * and to synchronize major metadata changes to slab cache structures. | 52 | * and to synchronize major metadata changes to slab cache structures. |
53 | * | 53 | * |
54 | * The slab_lock is only used for debugging and on arches that do not | 54 | * The slab_lock is only used for debugging and on arches that do not |
55 | * have the ability to do a cmpxchg_double. It only protects the second | 55 | * have the ability to do a cmpxchg_double. It only protects: |
56 | * double word in the page struct. Meaning | ||
57 | * A. page->freelist -> List of object free in a page | 56 | * A. page->freelist -> List of object free in a page |
58 | * B. page->counters -> Counters of objects | 57 | * B. page->inuse -> Number of objects in use |
59 | * C. page->frozen -> frozen state | 58 | * C. page->objects -> Number of objects in page |
59 | * D. page->frozen -> frozen state | ||
60 | * | 60 | * |
61 | * If a slab is frozen then it is exempt from list management. It is not | 61 | * If a slab is frozen then it is exempt from list management. It is not |
62 | * on any list. The processor that froze the slab is the one who can | 62 | * on any list. The processor that froze the slab is the one who can |
@@ -316,16 +316,16 @@ static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) | |||
316 | return (p - addr) / s->size; | 316 | return (p - addr) / s->size; |
317 | } | 317 | } |
318 | 318 | ||
319 | static inline unsigned int order_objects(unsigned int order, unsigned int size, unsigned int reserved) | 319 | static inline unsigned int order_objects(unsigned int order, unsigned int size) |
320 | { | 320 | { |
321 | return (((unsigned int)PAGE_SIZE << order) - reserved) / size; | 321 | return ((unsigned int)PAGE_SIZE << order) / size; |
322 | } | 322 | } |
323 | 323 | ||
324 | static inline struct kmem_cache_order_objects oo_make(unsigned int order, | 324 | static inline struct kmem_cache_order_objects oo_make(unsigned int order, |
325 | unsigned int size, unsigned int reserved) | 325 | unsigned int size) |
326 | { | 326 | { |
327 | struct kmem_cache_order_objects x = { | 327 | struct kmem_cache_order_objects x = { |
328 | (order << OO_SHIFT) + order_objects(order, size, reserved) | 328 | (order << OO_SHIFT) + order_objects(order, size) |
329 | }; | 329 | }; |
330 | 330 | ||
331 | return x; | 331 | return x; |
@@ -356,21 +356,6 @@ static __always_inline void slab_unlock(struct page *page) | |||
356 | __bit_spin_unlock(PG_locked, &page->flags); | 356 | __bit_spin_unlock(PG_locked, &page->flags); |
357 | } | 357 | } |
358 | 358 | ||
359 | static inline void set_page_slub_counters(struct page *page, unsigned long counters_new) | ||
360 | { | ||
361 | struct page tmp; | ||
362 | tmp.counters = counters_new; | ||
363 | /* | ||
364 | * page->counters can cover frozen/inuse/objects as well | ||
365 | * as page->_refcount. If we assign to ->counters directly | ||
366 | * we run the risk of losing updates to page->_refcount, so | ||
367 | * be careful and only assign to the fields we need. | ||
368 | */ | ||
369 | page->frozen = tmp.frozen; | ||
370 | page->inuse = tmp.inuse; | ||
371 | page->objects = tmp.objects; | ||
372 | } | ||
373 | |||
374 | /* Interrupts must be disabled (for the fallback code to work right) */ | 359 | /* Interrupts must be disabled (for the fallback code to work right) */ |
375 | static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | 360 | static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, |
376 | void *freelist_old, unsigned long counters_old, | 361 | void *freelist_old, unsigned long counters_old, |
@@ -392,7 +377,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
392 | if (page->freelist == freelist_old && | 377 | if (page->freelist == freelist_old && |
393 | page->counters == counters_old) { | 378 | page->counters == counters_old) { |
394 | page->freelist = freelist_new; | 379 | page->freelist = freelist_new; |
395 | set_page_slub_counters(page, counters_new); | 380 | page->counters = counters_new; |
396 | slab_unlock(page); | 381 | slab_unlock(page); |
397 | return true; | 382 | return true; |
398 | } | 383 | } |
@@ -431,7 +416,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
431 | if (page->freelist == freelist_old && | 416 | if (page->freelist == freelist_old && |
432 | page->counters == counters_old) { | 417 | page->counters == counters_old) { |
433 | page->freelist = freelist_new; | 418 | page->freelist = freelist_new; |
434 | set_page_slub_counters(page, counters_new); | 419 | page->counters = counters_new; |
435 | slab_unlock(page); | 420 | slab_unlock(page); |
436 | local_irq_restore(flags); | 421 | local_irq_restore(flags); |
437 | return true; | 422 | return true; |
@@ -711,7 +696,7 @@ void object_err(struct kmem_cache *s, struct page *page, | |||
711 | print_trailer(s, page, object); | 696 | print_trailer(s, page, object); |
712 | } | 697 | } |
713 | 698 | ||
714 | static void slab_err(struct kmem_cache *s, struct page *page, | 699 | static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, |
715 | const char *fmt, ...) | 700 | const char *fmt, ...) |
716 | { | 701 | { |
717 | va_list args; | 702 | va_list args; |
@@ -847,7 +832,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
847 | return 1; | 832 | return 1; |
848 | 833 | ||
849 | start = page_address(page); | 834 | start = page_address(page); |
850 | length = (PAGE_SIZE << compound_order(page)) - s->reserved; | 835 | length = PAGE_SIZE << compound_order(page); |
851 | end = start + length; | 836 | end = start + length; |
852 | remainder = length % s->size; | 837 | remainder = length % s->size; |
853 | if (!remainder) | 838 | if (!remainder) |
@@ -936,7 +921,7 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
936 | return 0; | 921 | return 0; |
937 | } | 922 | } |
938 | 923 | ||
939 | maxobj = order_objects(compound_order(page), s->size, s->reserved); | 924 | maxobj = order_objects(compound_order(page), s->size); |
940 | if (page->objects > maxobj) { | 925 | if (page->objects > maxobj) { |
941 | slab_err(s, page, "objects %u > max %u", | 926 | slab_err(s, page, "objects %u > max %u", |
942 | page->objects, maxobj); | 927 | page->objects, maxobj); |
@@ -986,7 +971,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
986 | nr++; | 971 | nr++; |
987 | } | 972 | } |
988 | 973 | ||
989 | max_objects = order_objects(compound_order(page), s->size, s->reserved); | 974 | max_objects = order_objects(compound_order(page), s->size); |
990 | if (max_objects > MAX_OBJS_PER_PAGE) | 975 | if (max_objects > MAX_OBJS_PER_PAGE) |
991 | max_objects = MAX_OBJS_PER_PAGE; | 976 | max_objects = MAX_OBJS_PER_PAGE; |
992 | 977 | ||
@@ -1694,24 +1679,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1694 | __ClearPageSlabPfmemalloc(page); | 1679 | __ClearPageSlabPfmemalloc(page); |
1695 | __ClearPageSlab(page); | 1680 | __ClearPageSlab(page); |
1696 | 1681 | ||
1697 | page_mapcount_reset(page); | 1682 | page->mapping = NULL; |
1698 | if (current->reclaim_state) | 1683 | if (current->reclaim_state) |
1699 | current->reclaim_state->reclaimed_slab += pages; | 1684 | current->reclaim_state->reclaimed_slab += pages; |
1700 | memcg_uncharge_slab(page, order, s); | 1685 | memcg_uncharge_slab(page, order, s); |
1701 | __free_pages(page, order); | 1686 | __free_pages(page, order); |
1702 | } | 1687 | } |
1703 | 1688 | ||
1704 | #define need_reserve_slab_rcu \ | ||
1705 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | ||
1706 | |||
1707 | static void rcu_free_slab(struct rcu_head *h) | 1689 | static void rcu_free_slab(struct rcu_head *h) |
1708 | { | 1690 | { |
1709 | struct page *page; | 1691 | struct page *page = container_of(h, struct page, rcu_head); |
1710 | |||
1711 | if (need_reserve_slab_rcu) | ||
1712 | page = virt_to_head_page(h); | ||
1713 | else | ||
1714 | page = container_of((struct list_head *)h, struct page, lru); | ||
1715 | 1692 | ||
1716 | __free_slab(page->slab_cache, page); | 1693 | __free_slab(page->slab_cache, page); |
1717 | } | 1694 | } |
@@ -1719,19 +1696,7 @@ static void rcu_free_slab(struct rcu_head *h) | |||
1719 | static void free_slab(struct kmem_cache *s, struct page *page) | 1696 | static void free_slab(struct kmem_cache *s, struct page *page) |
1720 | { | 1697 | { |
1721 | if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { | 1698 | if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { |
1722 | struct rcu_head *head; | 1699 | call_rcu(&page->rcu_head, rcu_free_slab); |
1723 | |||
1724 | if (need_reserve_slab_rcu) { | ||
1725 | int order = compound_order(page); | ||
1726 | int offset = (PAGE_SIZE << order) - s->reserved; | ||
1727 | |||
1728 | VM_BUG_ON(s->reserved != sizeof(*head)); | ||
1729 | head = page_address(page) + offset; | ||
1730 | } else { | ||
1731 | head = &page->rcu_head; | ||
1732 | } | ||
1733 | |||
1734 | call_rcu(head, rcu_free_slab); | ||
1735 | } else | 1700 | } else |
1736 | __free_slab(s, page); | 1701 | __free_slab(s, page); |
1737 | } | 1702 | } |
@@ -2444,6 +2409,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | |||
2444 | struct kmem_cache_cpu *c = *pc; | 2409 | struct kmem_cache_cpu *c = *pc; |
2445 | struct page *page; | 2410 | struct page *page; |
2446 | 2411 | ||
2412 | WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); | ||
2413 | |||
2447 | freelist = get_partial(s, flags, node, c); | 2414 | freelist = get_partial(s, flags, node, c); |
2448 | 2415 | ||
2449 | if (freelist) | 2416 | if (freelist) |
@@ -3226,21 +3193,21 @@ static unsigned int slub_min_objects; | |||
3226 | */ | 3193 | */ |
3227 | static inline unsigned int slab_order(unsigned int size, | 3194 | static inline unsigned int slab_order(unsigned int size, |
3228 | unsigned int min_objects, unsigned int max_order, | 3195 | unsigned int min_objects, unsigned int max_order, |
3229 | unsigned int fract_leftover, unsigned int reserved) | 3196 | unsigned int fract_leftover) |
3230 | { | 3197 | { |
3231 | unsigned int min_order = slub_min_order; | 3198 | unsigned int min_order = slub_min_order; |
3232 | unsigned int order; | 3199 | unsigned int order; |
3233 | 3200 | ||
3234 | if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) | 3201 | if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE) |
3235 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; | 3202 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; |
3236 | 3203 | ||
3237 | for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved)); | 3204 | for (order = max(min_order, (unsigned int)get_order(min_objects * size)); |
3238 | order <= max_order; order++) { | 3205 | order <= max_order; order++) { |
3239 | 3206 | ||
3240 | unsigned int slab_size = (unsigned int)PAGE_SIZE << order; | 3207 | unsigned int slab_size = (unsigned int)PAGE_SIZE << order; |
3241 | unsigned int rem; | 3208 | unsigned int rem; |
3242 | 3209 | ||
3243 | rem = (slab_size - reserved) % size; | 3210 | rem = slab_size % size; |
3244 | 3211 | ||
3245 | if (rem <= slab_size / fract_leftover) | 3212 | if (rem <= slab_size / fract_leftover) |
3246 | break; | 3213 | break; |
@@ -3249,7 +3216,7 @@ static inline unsigned int slab_order(unsigned int size, | |||
3249 | return order; | 3216 | return order; |
3250 | } | 3217 | } |
3251 | 3218 | ||
3252 | static inline int calculate_order(unsigned int size, unsigned int reserved) | 3219 | static inline int calculate_order(unsigned int size) |
3253 | { | 3220 | { |
3254 | unsigned int order; | 3221 | unsigned int order; |
3255 | unsigned int min_objects; | 3222 | unsigned int min_objects; |
@@ -3266,7 +3233,7 @@ static inline int calculate_order(unsigned int size, unsigned int reserved) | |||
3266 | min_objects = slub_min_objects; | 3233 | min_objects = slub_min_objects; |
3267 | if (!min_objects) | 3234 | if (!min_objects) |
3268 | min_objects = 4 * (fls(nr_cpu_ids) + 1); | 3235 | min_objects = 4 * (fls(nr_cpu_ids) + 1); |
3269 | max_objects = order_objects(slub_max_order, size, reserved); | 3236 | max_objects = order_objects(slub_max_order, size); |
3270 | min_objects = min(min_objects, max_objects); | 3237 | min_objects = min(min_objects, max_objects); |
3271 | 3238 | ||
3272 | while (min_objects > 1) { | 3239 | while (min_objects > 1) { |
@@ -3275,7 +3242,7 @@ static inline int calculate_order(unsigned int size, unsigned int reserved) | |||
3275 | fraction = 16; | 3242 | fraction = 16; |
3276 | while (fraction >= 4) { | 3243 | while (fraction >= 4) { |
3277 | order = slab_order(size, min_objects, | 3244 | order = slab_order(size, min_objects, |
3278 | slub_max_order, fraction, reserved); | 3245 | slub_max_order, fraction); |
3279 | if (order <= slub_max_order) | 3246 | if (order <= slub_max_order) |
3280 | return order; | 3247 | return order; |
3281 | fraction /= 2; | 3248 | fraction /= 2; |
@@ -3287,14 +3254,14 @@ static inline int calculate_order(unsigned int size, unsigned int reserved) | |||
3287 | * We were unable to place multiple objects in a slab. Now | 3254 | * We were unable to place multiple objects in a slab. Now |
3288 | * lets see if we can place a single object there. | 3255 | * lets see if we can place a single object there. |
3289 | */ | 3256 | */ |
3290 | order = slab_order(size, 1, slub_max_order, 1, reserved); | 3257 | order = slab_order(size, 1, slub_max_order, 1); |
3291 | if (order <= slub_max_order) | 3258 | if (order <= slub_max_order) |
3292 | return order; | 3259 | return order; |
3293 | 3260 | ||
3294 | /* | 3261 | /* |
3295 | * Doh this slab cannot be placed using slub_max_order. | 3262 | * Doh this slab cannot be placed using slub_max_order. |
3296 | */ | 3263 | */ |
3297 | order = slab_order(size, 1, MAX_ORDER, 1, reserved); | 3264 | order = slab_order(size, 1, MAX_ORDER, 1); |
3298 | if (order < MAX_ORDER) | 3265 | if (order < MAX_ORDER) |
3299 | return order; | 3266 | return order; |
3300 | return -ENOSYS; | 3267 | return -ENOSYS; |
@@ -3562,7 +3529,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3562 | if (forced_order >= 0) | 3529 | if (forced_order >= 0) |
3563 | order = forced_order; | 3530 | order = forced_order; |
3564 | else | 3531 | else |
3565 | order = calculate_order(size, s->reserved); | 3532 | order = calculate_order(size); |
3566 | 3533 | ||
3567 | if ((int)order < 0) | 3534 | if ((int)order < 0) |
3568 | return 0; | 3535 | return 0; |
@@ -3580,8 +3547,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3580 | /* | 3547 | /* |
3581 | * Determine the number of objects per slab | 3548 | * Determine the number of objects per slab |
3582 | */ | 3549 | */ |
3583 | s->oo = oo_make(order, size, s->reserved); | 3550 | s->oo = oo_make(order, size); |
3584 | s->min = oo_make(get_order(size), size, s->reserved); | 3551 | s->min = oo_make(get_order(size), size); |
3585 | if (oo_objects(s->oo) > oo_objects(s->max)) | 3552 | if (oo_objects(s->oo) > oo_objects(s->max)) |
3586 | s->max = s->oo; | 3553 | s->max = s->oo; |
3587 | 3554 | ||
@@ -3591,14 +3558,10 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3591 | static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) | 3558 | static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) |
3592 | { | 3559 | { |
3593 | s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); | 3560 | s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); |
3594 | s->reserved = 0; | ||
3595 | #ifdef CONFIG_SLAB_FREELIST_HARDENED | 3561 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
3596 | s->random = get_random_long(); | 3562 | s->random = get_random_long(); |
3597 | #endif | 3563 | #endif |
3598 | 3564 | ||
3599 | if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU)) | ||
3600 | s->reserved = sizeof(struct rcu_head); | ||
3601 | |||
3602 | if (!calculate_sizes(s, -1)) | 3565 | if (!calculate_sizes(s, -1)) |
3603 | goto error; | 3566 | goto error; |
3604 | if (disable_higher_order_debug) { | 3567 | if (disable_higher_order_debug) { |
@@ -4239,12 +4202,6 @@ void __init kmem_cache_init(void) | |||
4239 | SLAB_HWCACHE_ALIGN, 0, 0); | 4202 | SLAB_HWCACHE_ALIGN, 0, 0); |
4240 | 4203 | ||
4241 | kmem_cache = bootstrap(&boot_kmem_cache); | 4204 | kmem_cache = bootstrap(&boot_kmem_cache); |
4242 | |||
4243 | /* | ||
4244 | * Allocate kmem_cache_node properly from the kmem_cache slab. | ||
4245 | * kmem_cache_node is separately allocated so no need to | ||
4246 | * update any list pointers. | ||
4247 | */ | ||
4248 | kmem_cache_node = bootstrap(&boot_kmem_cache_node); | 4205 | kmem_cache_node = bootstrap(&boot_kmem_cache_node); |
4249 | 4206 | ||
4250 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ | 4207 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ |
@@ -5117,12 +5074,6 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) | |||
5117 | } | 5074 | } |
5118 | SLAB_ATTR_RO(destroy_by_rcu); | 5075 | SLAB_ATTR_RO(destroy_by_rcu); |
5119 | 5076 | ||
5120 | static ssize_t reserved_show(struct kmem_cache *s, char *buf) | ||
5121 | { | ||
5122 | return sprintf(buf, "%u\n", s->reserved); | ||
5123 | } | ||
5124 | SLAB_ATTR_RO(reserved); | ||
5125 | |||
5126 | #ifdef CONFIG_SLUB_DEBUG | 5077 | #ifdef CONFIG_SLUB_DEBUG |
5127 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | 5078 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) |
5128 | { | 5079 | { |
@@ -5435,7 +5386,6 @@ static struct attribute *slab_attrs[] = { | |||
5435 | &reclaim_account_attr.attr, | 5386 | &reclaim_account_attr.attr, |
5436 | &destroy_by_rcu_attr.attr, | 5387 | &destroy_by_rcu_attr.attr, |
5437 | &shrink_attr.attr, | 5388 | &shrink_attr.attr, |
5438 | &reserved_attr.attr, | ||
5439 | &slabs_cpu_partial_attr.attr, | 5389 | &slabs_cpu_partial_attr.attr, |
5440 | #ifdef CONFIG_SLUB_DEBUG | 5390 | #ifdef CONFIG_SLUB_DEBUG |
5441 | &total_objects_attr.attr, | 5391 | &total_objects_attr.attr, |
diff --git a/mm/sparse.c b/mm/sparse.c index 73dc2fcc0eab..f13f2723950a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -190,15 +190,13 @@ static inline int next_present_section_nr(int section_nr) | |||
190 | section_nr++; | 190 | section_nr++; |
191 | if (present_section_nr(section_nr)) | 191 | if (present_section_nr(section_nr)) |
192 | return section_nr; | 192 | return section_nr; |
193 | } while ((section_nr < NR_MEM_SECTIONS) && | 193 | } while ((section_nr <= __highest_present_section_nr)); |
194 | (section_nr <= __highest_present_section_nr)); | ||
195 | 194 | ||
196 | return -1; | 195 | return -1; |
197 | } | 196 | } |
198 | #define for_each_present_section_nr(start, section_nr) \ | 197 | #define for_each_present_section_nr(start, section_nr) \ |
199 | for (section_nr = next_present_section_nr(start-1); \ | 198 | for (section_nr = next_present_section_nr(start-1); \ |
200 | ((section_nr >= 0) && \ | 199 | ((section_nr >= 0) && \ |
201 | (section_nr < NR_MEM_SECTIONS) && \ | ||
202 | (section_nr <= __highest_present_section_nr)); \ | 200 | (section_nr <= __highest_present_section_nr)); \ |
203 | section_nr = next_present_section_nr(section_nr)) | 201 | section_nr = next_present_section_nr(section_nr)) |
204 | 202 | ||
@@ -524,7 +522,7 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func) | |||
524 | map_count = 1; | 522 | map_count = 1; |
525 | } | 523 | } |
526 | /* ok, last chunk */ | 524 | /* ok, last chunk */ |
527 | alloc_func(data, pnum_begin, NR_MEM_SECTIONS, | 525 | alloc_func(data, pnum_begin, __highest_present_section_nr+1, |
528 | map_count, nodeid_begin); | 526 | map_count, nodeid_begin); |
529 | } | 527 | } |
530 | 528 | ||
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index f2641894f440..f51ac051c0c9 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c | |||
@@ -317,7 +317,7 @@ swp_entry_t get_swap_page(struct page *page) | |||
317 | if (PageTransHuge(page)) { | 317 | if (PageTransHuge(page)) { |
318 | if (IS_ENABLED(CONFIG_THP_SWAP)) | 318 | if (IS_ENABLED(CONFIG_THP_SWAP)) |
319 | get_swap_pages(1, true, &entry); | 319 | get_swap_pages(1, true, &entry); |
320 | return entry; | 320 | goto out; |
321 | } | 321 | } |
322 | 322 | ||
323 | /* | 323 | /* |
@@ -347,10 +347,14 @@ repeat: | |||
347 | } | 347 | } |
348 | mutex_unlock(&cache->alloc_lock); | 348 | mutex_unlock(&cache->alloc_lock); |
349 | if (entry.val) | 349 | if (entry.val) |
350 | return entry; | 350 | goto out; |
351 | } | 351 | } |
352 | 352 | ||
353 | get_swap_pages(1, false, &entry); | 353 | get_swap_pages(1, false, &entry); |
354 | 354 | out: | |
355 | if (mem_cgroup_try_charge_swap(page, entry)) { | ||
356 | put_swap_page(page, entry); | ||
357 | entry.val = 0; | ||
358 | } | ||
355 | return entry; | 359 | return entry; |
356 | } | 360 | } |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 07f9aa2340c3..ab8e59cd18ea 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -216,9 +216,6 @@ int add_to_swap(struct page *page) | |||
216 | if (!entry.val) | 216 | if (!entry.val) |
217 | return 0; | 217 | return 0; |
218 | 218 | ||
219 | if (mem_cgroup_try_charge_swap(page, entry)) | ||
220 | goto fail; | ||
221 | |||
222 | /* | 219 | /* |
223 | * Radix-tree node allocations from PF_MEMALLOC contexts could | 220 | * Radix-tree node allocations from PF_MEMALLOC contexts could |
224 | * completely exhaust the page allocator. __GFP_NOMEMALLOC | 221 | * completely exhaust the page allocator. __GFP_NOMEMALLOC |
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 39791b81ede7..5029f241908f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c | |||
@@ -404,7 +404,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, | |||
404 | unsigned long dst_start, | 404 | unsigned long dst_start, |
405 | unsigned long src_start, | 405 | unsigned long src_start, |
406 | unsigned long len, | 406 | unsigned long len, |
407 | bool zeropage) | 407 | bool zeropage, |
408 | bool *mmap_changing) | ||
408 | { | 409 | { |
409 | struct vm_area_struct *dst_vma; | 410 | struct vm_area_struct *dst_vma; |
410 | ssize_t err; | 411 | ssize_t err; |
@@ -431,6 +432,15 @@ retry: | |||
431 | down_read(&dst_mm->mmap_sem); | 432 | down_read(&dst_mm->mmap_sem); |
432 | 433 | ||
433 | /* | 434 | /* |
435 | * If memory mappings are changing because of non-cooperative | ||
436 | * operation (e.g. mremap) running in parallel, bail out and | ||
437 | * request the user to retry later | ||
438 | */ | ||
439 | err = -EAGAIN; | ||
440 | if (mmap_changing && READ_ONCE(*mmap_changing)) | ||
441 | goto out_unlock; | ||
442 | |||
443 | /* | ||
434 | * Make sure the vma is not shared, that the dst range is | 444 | * Make sure the vma is not shared, that the dst range is |
435 | * both valid and fully within a single existing vma. | 445 | * both valid and fully within a single existing vma. |
436 | */ | 446 | */ |
@@ -563,13 +573,15 @@ out: | |||
563 | } | 573 | } |
564 | 574 | ||
565 | ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, | 575 | ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, |
566 | unsigned long src_start, unsigned long len) | 576 | unsigned long src_start, unsigned long len, |
577 | bool *mmap_changing) | ||
567 | { | 578 | { |
568 | return __mcopy_atomic(dst_mm, dst_start, src_start, len, false); | 579 | return __mcopy_atomic(dst_mm, dst_start, src_start, len, false, |
580 | mmap_changing); | ||
569 | } | 581 | } |
570 | 582 | ||
571 | ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, | 583 | ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, |
572 | unsigned long len) | 584 | unsigned long len, bool *mmap_changing) |
573 | { | 585 | { |
574 | return __mcopy_atomic(dst_mm, start, 0, len, true); | 586 | return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing); |
575 | } | 587 | } |
@@ -391,7 +391,8 @@ EXPORT_SYMBOL(vm_mmap); | |||
391 | * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is | 391 | * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is |
392 | * preferable to the vmalloc fallback, due to visible performance drawbacks. | 392 | * preferable to the vmalloc fallback, due to visible performance drawbacks. |
393 | * | 393 | * |
394 | * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. | 394 | * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not |
395 | * fall back to vmalloc. | ||
395 | */ | 396 | */ |
396 | void *kvmalloc_node(size_t size, gfp_t flags, int node) | 397 | void *kvmalloc_node(size_t size, gfp_t flags, int node) |
397 | { | 398 | { |
@@ -402,7 +403,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) | |||
402 | * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) | 403 | * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) |
403 | * so the given set of flags has to be compatible. | 404 | * so the given set of flags has to be compatible. |
404 | */ | 405 | */ |
405 | WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); | 406 | if ((flags & GFP_KERNEL) != GFP_KERNEL) |
407 | return kmalloc_node(size, flags, node); | ||
406 | 408 | ||
407 | /* | 409 | /* |
408 | * We want to attempt a large physically contiguous block first because | 410 | * We want to attempt a large physically contiguous block first because |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 63a5f502da08..89efac3a020e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -603,26 +603,6 @@ static void unmap_vmap_area(struct vmap_area *va) | |||
603 | vunmap_page_range(va->va_start, va->va_end); | 603 | vunmap_page_range(va->va_start, va->va_end); |
604 | } | 604 | } |
605 | 605 | ||
606 | static void vmap_debug_free_range(unsigned long start, unsigned long end) | ||
607 | { | ||
608 | /* | ||
609 | * Unmap page tables and force a TLB flush immediately if pagealloc | ||
610 | * debugging is enabled. This catches use after free bugs similarly to | ||
611 | * those in linear kernel virtual address space after a page has been | ||
612 | * freed. | ||
613 | * | ||
614 | * All the lazy freeing logic is still retained, in order to minimise | ||
615 | * intrusiveness of this debugging feature. | ||
616 | * | ||
617 | * This is going to be *slow* (linear kernel virtual address debugging | ||
618 | * doesn't do a broadcast TLB flush so it is a lot faster). | ||
619 | */ | ||
620 | if (debug_pagealloc_enabled()) { | ||
621 | vunmap_page_range(start, end); | ||
622 | flush_tlb_kernel_range(start, end); | ||
623 | } | ||
624 | } | ||
625 | |||
626 | /* | 606 | /* |
627 | * lazy_max_pages is the maximum amount of virtual address space we gather up | 607 | * lazy_max_pages is the maximum amount of virtual address space we gather up |
628 | * before attempting to purge with a TLB flush. | 608 | * before attempting to purge with a TLB flush. |
@@ -756,6 +736,9 @@ static void free_unmap_vmap_area(struct vmap_area *va) | |||
756 | { | 736 | { |
757 | flush_cache_vunmap(va->va_start, va->va_end); | 737 | flush_cache_vunmap(va->va_start, va->va_end); |
758 | unmap_vmap_area(va); | 738 | unmap_vmap_area(va); |
739 | if (debug_pagealloc_enabled()) | ||
740 | flush_tlb_kernel_range(va->va_start, va->va_end); | ||
741 | |||
759 | free_vmap_area_noflush(va); | 742 | free_vmap_area_noflush(va); |
760 | } | 743 | } |
761 | 744 | ||
@@ -1053,6 +1036,10 @@ static void vb_free(const void *addr, unsigned long size) | |||
1053 | 1036 | ||
1054 | vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); | 1037 | vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); |
1055 | 1038 | ||
1039 | if (debug_pagealloc_enabled()) | ||
1040 | flush_tlb_kernel_range((unsigned long)addr, | ||
1041 | (unsigned long)addr + size); | ||
1042 | |||
1056 | spin_lock(&vb->lock); | 1043 | spin_lock(&vb->lock); |
1057 | 1044 | ||
1058 | /* Expand dirty range */ | 1045 | /* Expand dirty range */ |
@@ -1141,16 +1128,16 @@ void vm_unmap_ram(const void *mem, unsigned int count) | |||
1141 | BUG_ON(addr > VMALLOC_END); | 1128 | BUG_ON(addr > VMALLOC_END); |
1142 | BUG_ON(!PAGE_ALIGNED(addr)); | 1129 | BUG_ON(!PAGE_ALIGNED(addr)); |
1143 | 1130 | ||
1144 | debug_check_no_locks_freed(mem, size); | ||
1145 | vmap_debug_free_range(addr, addr+size); | ||
1146 | |||
1147 | if (likely(count <= VMAP_MAX_ALLOC)) { | 1131 | if (likely(count <= VMAP_MAX_ALLOC)) { |
1132 | debug_check_no_locks_freed(mem, size); | ||
1148 | vb_free(mem, size); | 1133 | vb_free(mem, size); |
1149 | return; | 1134 | return; |
1150 | } | 1135 | } |
1151 | 1136 | ||
1152 | va = find_vmap_area(addr); | 1137 | va = find_vmap_area(addr); |
1153 | BUG_ON(!va); | 1138 | BUG_ON(!va); |
1139 | debug_check_no_locks_freed((void *)va->va_start, | ||
1140 | (va->va_end - va->va_start)); | ||
1154 | free_unmap_vmap_area(va); | 1141 | free_unmap_vmap_area(va); |
1155 | } | 1142 | } |
1156 | EXPORT_SYMBOL(vm_unmap_ram); | 1143 | EXPORT_SYMBOL(vm_unmap_ram); |
@@ -1499,7 +1486,6 @@ struct vm_struct *remove_vm_area(const void *addr) | |||
1499 | va->flags |= VM_LAZY_FREE; | 1486 | va->flags |= VM_LAZY_FREE; |
1500 | spin_unlock(&vmap_area_lock); | 1487 | spin_unlock(&vmap_area_lock); |
1501 | 1488 | ||
1502 | vmap_debug_free_range(va->va_start, va->va_end); | ||
1503 | kasan_free_shadow(vm); | 1489 | kasan_free_shadow(vm); |
1504 | free_unmap_vmap_area(va); | 1490 | free_unmap_vmap_area(va); |
1505 | 1491 | ||
@@ -1519,16 +1505,17 @@ static void __vunmap(const void *addr, int deallocate_pages) | |||
1519 | addr)) | 1505 | addr)) |
1520 | return; | 1506 | return; |
1521 | 1507 | ||
1522 | area = remove_vm_area(addr); | 1508 | area = find_vmap_area((unsigned long)addr)->vm; |
1523 | if (unlikely(!area)) { | 1509 | if (unlikely(!area)) { |
1524 | WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", | 1510 | WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", |
1525 | addr); | 1511 | addr); |
1526 | return; | 1512 | return; |
1527 | } | 1513 | } |
1528 | 1514 | ||
1529 | debug_check_no_locks_freed(addr, get_vm_area_size(area)); | 1515 | debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); |
1530 | debug_check_no_obj_freed(addr, get_vm_area_size(area)); | 1516 | debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); |
1531 | 1517 | ||
1518 | remove_vm_area(addr); | ||
1532 | if (deallocate_pages) { | 1519 | if (deallocate_pages) { |
1533 | int i; | 1520 | int i; |
1534 | 1521 | ||
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 85350ce2d25d..4854584ec436 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -342,26 +342,6 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
342 | vmpressure(gfp, memcg, true, vmpressure_win, 0); | 342 | vmpressure(gfp, memcg, true, vmpressure_win, 0); |
343 | } | 343 | } |
344 | 344 | ||
345 | static enum vmpressure_levels str_to_level(const char *arg) | ||
346 | { | ||
347 | enum vmpressure_levels level; | ||
348 | |||
349 | for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) | ||
350 | if (!strcmp(vmpressure_str_levels[level], arg)) | ||
351 | return level; | ||
352 | return -1; | ||
353 | } | ||
354 | |||
355 | static enum vmpressure_modes str_to_mode(const char *arg) | ||
356 | { | ||
357 | enum vmpressure_modes mode; | ||
358 | |||
359 | for (mode = 0; mode < VMPRESSURE_NUM_MODES; mode++) | ||
360 | if (!strcmp(vmpressure_str_modes[mode], arg)) | ||
361 | return mode; | ||
362 | return -1; | ||
363 | } | ||
364 | |||
365 | #define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2) | 345 | #define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2) |
366 | 346 | ||
367 | /** | 347 | /** |
@@ -390,27 +370,26 @@ int vmpressure_register_event(struct mem_cgroup *memcg, | |||
390 | char *token; | 370 | char *token; |
391 | int ret = 0; | 371 | int ret = 0; |
392 | 372 | ||
393 | spec_orig = spec = kzalloc(MAX_VMPRESSURE_ARGS_LEN + 1, GFP_KERNEL); | 373 | spec_orig = spec = kstrndup(args, MAX_VMPRESSURE_ARGS_LEN, GFP_KERNEL); |
394 | if (!spec) { | 374 | if (!spec) { |
395 | ret = -ENOMEM; | 375 | ret = -ENOMEM; |
396 | goto out; | 376 | goto out; |
397 | } | 377 | } |
398 | strncpy(spec, args, MAX_VMPRESSURE_ARGS_LEN); | ||
399 | 378 | ||
400 | /* Find required level */ | 379 | /* Find required level */ |
401 | token = strsep(&spec, ","); | 380 | token = strsep(&spec, ","); |
402 | level = str_to_level(token); | 381 | level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token); |
403 | if (level == -1) { | 382 | if (level < 0) { |
404 | ret = -EINVAL; | 383 | ret = level; |
405 | goto out; | 384 | goto out; |
406 | } | 385 | } |
407 | 386 | ||
408 | /* Find optional mode */ | 387 | /* Find optional mode */ |
409 | token = strsep(&spec, ","); | 388 | token = strsep(&spec, ","); |
410 | if (token) { | 389 | if (token) { |
411 | mode = str_to_mode(token); | 390 | mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token); |
412 | if (mode == -1) { | 391 | if (mode < 0) { |
413 | ret = -EINVAL; | 392 | ret = mode; |
414 | goto out; | 393 | goto out; |
415 | } | 394 | } |
416 | } | 395 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 9270a4370d54..03822f86f288 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2544,12 +2544,28 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) | |||
2544 | unsigned long reclaimed; | 2544 | unsigned long reclaimed; |
2545 | unsigned long scanned; | 2545 | unsigned long scanned; |
2546 | 2546 | ||
2547 | if (mem_cgroup_low(root, memcg)) { | 2547 | switch (mem_cgroup_protected(root, memcg)) { |
2548 | case MEMCG_PROT_MIN: | ||
2549 | /* | ||
2550 | * Hard protection. | ||
2551 | * If there is no reclaimable memory, OOM. | ||
2552 | */ | ||
2553 | continue; | ||
2554 | case MEMCG_PROT_LOW: | ||
2555 | /* | ||
2556 | * Soft protection. | ||
2557 | * Respect the protection only as long as | ||
2558 | * there is an unprotected supply | ||
2559 | * of reclaimable memory from other cgroups. | ||
2560 | */ | ||
2548 | if (!sc->memcg_low_reclaim) { | 2561 | if (!sc->memcg_low_reclaim) { |
2549 | sc->memcg_low_skipped = 1; | 2562 | sc->memcg_low_skipped = 1; |
2550 | continue; | 2563 | continue; |
2551 | } | 2564 | } |
2552 | memcg_memory_event(memcg, MEMCG_LOW); | 2565 | memcg_memory_event(memcg, MEMCG_LOW); |
2566 | break; | ||
2567 | case MEMCG_PROT_NONE: | ||
2568 | break; | ||
2553 | } | 2569 | } |
2554 | 2570 | ||
2555 | reclaimed = sc->nr_reclaimed; | 2571 | reclaimed = sc->nr_reclaimed; |
@@ -3318,11 +3334,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
3318 | .may_unmap = 1, | 3334 | .may_unmap = 1, |
3319 | .may_swap = 1, | 3335 | .may_swap = 1, |
3320 | }; | 3336 | }; |
3337 | |||
3338 | __fs_reclaim_acquire(); | ||
3339 | |||
3321 | count_vm_event(PAGEOUTRUN); | 3340 | count_vm_event(PAGEOUTRUN); |
3322 | 3341 | ||
3323 | do { | 3342 | do { |
3324 | unsigned long nr_reclaimed = sc.nr_reclaimed; | 3343 | unsigned long nr_reclaimed = sc.nr_reclaimed; |
3325 | bool raise_priority = true; | 3344 | bool raise_priority = true; |
3345 | bool ret; | ||
3326 | 3346 | ||
3327 | sc.reclaim_idx = classzone_idx; | 3347 | sc.reclaim_idx = classzone_idx; |
3328 | 3348 | ||
@@ -3395,7 +3415,10 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
3395 | wake_up_all(&pgdat->pfmemalloc_wait); | 3415 | wake_up_all(&pgdat->pfmemalloc_wait); |
3396 | 3416 | ||
3397 | /* Check if kswapd should be suspending */ | 3417 | /* Check if kswapd should be suspending */ |
3398 | if (try_to_freeze() || kthread_should_stop()) | 3418 | __fs_reclaim_release(); |
3419 | ret = try_to_freeze(); | ||
3420 | __fs_reclaim_acquire(); | ||
3421 | if (ret || kthread_should_stop()) | ||
3399 | break; | 3422 | break; |
3400 | 3423 | ||
3401 | /* | 3424 | /* |
@@ -3412,6 +3435,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
3412 | 3435 | ||
3413 | out: | 3436 | out: |
3414 | snapshot_refaults(NULL, pgdat); | 3437 | snapshot_refaults(NULL, pgdat); |
3438 | __fs_reclaim_release(); | ||
3415 | /* | 3439 | /* |
3416 | * Return the order kswapd stopped reclaiming at as | 3440 | * Return the order kswapd stopped reclaiming at as |
3417 | * prepare_kswapd_sleep() takes it into account. If another caller | 3441 | * prepare_kswapd_sleep() takes it into account. If another caller |
@@ -3600,9 +3624,7 @@ kswapd_try_sleep: | |||
3600 | */ | 3624 | */ |
3601 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, | 3625 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, |
3602 | alloc_order); | 3626 | alloc_order); |
3603 | fs_reclaim_acquire(GFP_KERNEL); | ||
3604 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); | 3627 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); |
3605 | fs_reclaim_release(GFP_KERNEL); | ||
3606 | if (reclaim_order < alloc_order) | 3628 | if (reclaim_order < alloc_order) |
3607 | goto kswapd_try_sleep; | 3629 | goto kswapd_try_sleep; |
3608 | } | 3630 | } |
@@ -3684,16 +3706,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
3684 | unsigned long nr_reclaimed; | 3706 | unsigned long nr_reclaimed; |
3685 | unsigned int noreclaim_flag; | 3707 | unsigned int noreclaim_flag; |
3686 | 3708 | ||
3687 | noreclaim_flag = memalloc_noreclaim_save(); | ||
3688 | fs_reclaim_acquire(sc.gfp_mask); | 3709 | fs_reclaim_acquire(sc.gfp_mask); |
3710 | noreclaim_flag = memalloc_noreclaim_save(); | ||
3689 | reclaim_state.reclaimed_slab = 0; | 3711 | reclaim_state.reclaimed_slab = 0; |
3690 | p->reclaim_state = &reclaim_state; | 3712 | p->reclaim_state = &reclaim_state; |
3691 | 3713 | ||
3692 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | 3714 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); |
3693 | 3715 | ||
3694 | p->reclaim_state = NULL; | 3716 | p->reclaim_state = NULL; |
3695 | fs_reclaim_release(sc.gfp_mask); | ||
3696 | memalloc_noreclaim_restore(noreclaim_flag); | 3717 | memalloc_noreclaim_restore(noreclaim_flag); |
3718 | fs_reclaim_release(sc.gfp_mask); | ||
3697 | 3719 | ||
3698 | return nr_reclaimed; | 3720 | return nr_reclaimed; |
3699 | } | 3721 | } |
@@ -3870,6 +3892,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
3870 | }; | 3892 | }; |
3871 | 3893 | ||
3872 | cond_resched(); | 3894 | cond_resched(); |
3895 | fs_reclaim_acquire(sc.gfp_mask); | ||
3873 | /* | 3896 | /* |
3874 | * We need to be able to allocate from the reserves for RECLAIM_UNMAP | 3897 | * We need to be able to allocate from the reserves for RECLAIM_UNMAP |
3875 | * and we also need to be able to write out pages for RECLAIM_WRITE | 3898 | * and we also need to be able to write out pages for RECLAIM_WRITE |
@@ -3877,7 +3900,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
3877 | */ | 3900 | */ |
3878 | noreclaim_flag = memalloc_noreclaim_save(); | 3901 | noreclaim_flag = memalloc_noreclaim_save(); |
3879 | p->flags |= PF_SWAPWRITE; | 3902 | p->flags |= PF_SWAPWRITE; |
3880 | fs_reclaim_acquire(sc.gfp_mask); | ||
3881 | reclaim_state.reclaimed_slab = 0; | 3903 | reclaim_state.reclaimed_slab = 0; |
3882 | p->reclaim_state = &reclaim_state; | 3904 | p->reclaim_state = &reclaim_state; |
3883 | 3905 | ||
@@ -3892,9 +3914,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
3892 | } | 3914 | } |
3893 | 3915 | ||
3894 | p->reclaim_state = NULL; | 3916 | p->reclaim_state = NULL; |
3895 | fs_reclaim_release(gfp_mask); | ||
3896 | current->flags &= ~PF_SWAPWRITE; | 3917 | current->flags &= ~PF_SWAPWRITE; |
3897 | memalloc_noreclaim_restore(noreclaim_flag); | 3918 | memalloc_noreclaim_restore(noreclaim_flag); |
3919 | fs_reclaim_release(sc.gfp_mask); | ||
3898 | return sc.nr_reclaimed >= nr_pages; | 3920 | return sc.nr_reclaimed >= nr_pages; |
3899 | } | 3921 | } |
3900 | 3922 | ||
diff --git a/net/9p/client.c b/net/9p/client.c index 21e6df1cc70f..18c5271910dc 100644 --- a/net/9p/client.c +++ b/net/9p/client.c | |||
@@ -198,8 +198,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) | |||
198 | pr_info("Could not find request transport: %s\n", | 198 | pr_info("Could not find request transport: %s\n", |
199 | s); | 199 | s); |
200 | ret = -EINVAL; | 200 | ret = -EINVAL; |
201 | kfree(s); | ||
202 | goto free_and_return; | ||
203 | } | 201 | } |
204 | kfree(s); | 202 | kfree(s); |
205 | break; | 203 | break; |
@@ -214,13 +212,12 @@ static int parse_opts(char *opts, struct p9_client *clnt) | |||
214 | "problem allocating copy of version arg\n"); | 212 | "problem allocating copy of version arg\n"); |
215 | goto free_and_return; | 213 | goto free_and_return; |
216 | } | 214 | } |
217 | ret = get_protocol_version(s); | 215 | r = get_protocol_version(s); |
218 | if (ret == -EINVAL) { | 216 | if (r < 0) |
219 | kfree(s); | 217 | ret = r; |
220 | goto free_and_return; | 218 | else |
221 | } | 219 | clnt->proto_version = r; |
222 | kfree(s); | 220 | kfree(s); |
223 | clnt->proto_version = ret; | ||
224 | break; | 221 | break; |
225 | default: | 222 | default: |
226 | continue; | 223 | continue; |
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 0f19960390a6..2e2b8bca54f3 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c | |||
@@ -38,7 +38,6 @@ | |||
38 | 38 | ||
39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
40 | #include <linux/spinlock.h> | 40 | #include <linux/spinlock.h> |
41 | #include <linux/rwlock.h> | ||
42 | #include <net/9p/9p.h> | 41 | #include <net/9p/9p.h> |
43 | #include <net/9p/client.h> | 42 | #include <net/9p/client.h> |
44 | #include <net/9p/transport.h> | 43 | #include <net/9p/transport.h> |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e6033d3c48d3..e3b7362b0ee4 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -1,9 +1,11 @@ | |||
1 | #!/usr/bin/env perl | 1 | #!/usr/bin/env perl |
2 | # SPDX-License-Identifier: GPL-2.0 | ||
3 | # | ||
2 | # (c) 2001, Dave Jones. (the file handling bit) | 4 | # (c) 2001, Dave Jones. (the file handling bit) |
3 | # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) | 5 | # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) |
4 | # (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) | 6 | # (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) |
5 | # (c) 2008-2010 Andy Whitcroft <apw@canonical.com> | 7 | # (c) 2008-2010 Andy Whitcroft <apw@canonical.com> |
6 | # Licensed under the terms of the GNU GPL License version 2 | 8 | # (c) 2010-2018 Joe Perches <joe@perches.com> |
7 | 9 | ||
8 | use strict; | 10 | use strict; |
9 | use warnings; | 11 | use warnings; |
@@ -2375,6 +2377,14 @@ sub process { | |||
2375 | 2377 | ||
2376 | my $rawline = $rawlines[$linenr - 1]; | 2378 | my $rawline = $rawlines[$linenr - 1]; |
2377 | 2379 | ||
2380 | # check if it's a mode change, rename or start of a patch | ||
2381 | if (!$in_commit_log && | ||
2382 | ($line =~ /^ mode change [0-7]+ => [0-7]+ \S+\s*$/ || | ||
2383 | ($line =~ /^rename (?:from|to) \S+\s*$/ || | ||
2384 | $line =~ /^diff --git a\/[\w\/\.\_\-]+ b\/\S+\s*$/))) { | ||
2385 | $is_patch = 1; | ||
2386 | } | ||
2387 | |||
2378 | #extract the line range in the file after the patch is applied | 2388 | #extract the line range in the file after the patch is applied |
2379 | if (!$in_commit_log && | 2389 | if (!$in_commit_log && |
2380 | $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { | 2390 | $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { |
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 99c96e86eccb..c87fa734e3e1 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl | |||
@@ -1,4 +1,6 @@ | |||
1 | #!/usr/bin/env perl | 1 | #!/usr/bin/env perl |
2 | # SPDX-License-Identifier: GPL-2.0 | ||
3 | # | ||
2 | # (c) 2007, Joe Perches <joe@perches.com> | 4 | # (c) 2007, Joe Perches <joe@perches.com> |
3 | # created from checkpatch.pl | 5 | # created from checkpatch.pl |
4 | # | 6 | # |
@@ -7,8 +9,6 @@ | |||
7 | # | 9 | # |
8 | # usage: perl scripts/get_maintainer.pl [OPTIONS] <patch> | 10 | # usage: perl scripts/get_maintainer.pl [OPTIONS] <patch> |
9 | # perl scripts/get_maintainer.pl [OPTIONS] -f <file> | 11 | # perl scripts/get_maintainer.pl [OPTIONS] -f <file> |
10 | # | ||
11 | # Licensed under the terms of the GNU GPL License version 2 | ||
12 | 12 | ||
13 | use warnings; | 13 | use warnings; |
14 | use strict; | 14 | use strict; |
@@ -542,7 +542,18 @@ foreach my $file (@ARGV) { | |||
542 | 542 | ||
543 | while (<$patch>) { | 543 | while (<$patch>) { |
544 | my $patch_line = $_; | 544 | my $patch_line = $_; |
545 | if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) { | 545 | if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) { |
546 | my $filename = $1; | ||
547 | push(@files, $filename); | ||
548 | } elsif (m/^rename (?:from|to) (\S+)\s*$/) { | ||
549 | my $filename = $1; | ||
550 | push(@files, $filename); | ||
551 | } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) { | ||
552 | my $filename1 = $1; | ||
553 | my $filename2 = $2; | ||
554 | push(@files, $filename1); | ||
555 | push(@files, $filename2); | ||
556 | } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) { | ||
546 | my $filename = $1; | 557 | my $filename = $1; |
547 | $filename =~ s@^[^/]*/@@; | 558 | $filename =~ s@^[^/]*/@@; |
548 | $filename =~ s@\n@@; | 559 | $filename =~ s@\n@@; |
diff --git a/scripts/tags.sh b/scripts/tags.sh index e587610d1492..66f08bb1cce9 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh | |||
@@ -179,9 +179,9 @@ regex_c=( | |||
179 | '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/' | 179 | '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/' |
180 | '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/' | 180 | '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/' |
181 | '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/' | 181 | '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/' |
182 | '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/Page\1/' | 182 | '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/Page\1/' |
183 | '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__SetPage\1/' | 183 | '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__SetPage\1/' |
184 | '/^PAGE_MAPCOUNT_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/' | 184 | '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/' |
185 | '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/' | 185 | '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/' |
186 | '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/' | 186 | '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/' |
187 | '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/' | 187 | '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/' |
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 6c16f77c722c..74e5912e9f2e 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore | |||
@@ -1,3 +1,6 @@ | |||
1 | /fd-001-lookup | ||
2 | /fd-002-posix-eq | ||
3 | /fd-003-kthread | ||
1 | /proc-loadavg-001 | 4 | /proc-loadavg-001 |
2 | /proc-self-map-files-001 | 5 | /proc-self-map-files-001 |
3 | /proc-self-map-files-002 | 6 | /proc-self-map-files-002 |
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index dbb87e56264c..db310eedc268 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile | |||
@@ -1,6 +1,9 @@ | |||
1 | CFLAGS += -Wall -O2 | 1 | CFLAGS += -Wall -O2 -Wno-unused-function |
2 | 2 | ||
3 | TEST_GEN_PROGS := | 3 | TEST_GEN_PROGS := |
4 | TEST_GEN_PROGS += fd-001-lookup | ||
5 | TEST_GEN_PROGS += fd-002-posix-eq | ||
6 | TEST_GEN_PROGS += fd-003-kthread | ||
4 | TEST_GEN_PROGS += proc-loadavg-001 | 7 | TEST_GEN_PROGS += proc-loadavg-001 |
5 | TEST_GEN_PROGS += proc-self-map-files-001 | 8 | TEST_GEN_PROGS += proc-self-map-files-001 |
6 | TEST_GEN_PROGS += proc-self-map-files-002 | 9 | TEST_GEN_PROGS += proc-self-map-files-002 |
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c new file mode 100644 index 000000000000..a2010dfb2110 --- /dev/null +++ b/tools/testing/selftests/proc/fd-001-lookup.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> | ||
3 | * | ||
4 | * Permission to use, copy, modify, and distribute this software for any | ||
5 | * purpose with or without fee is hereby granted, provided that the above | ||
6 | * copyright notice and this permission notice appear in all copies. | ||
7 | * | ||
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
15 | */ | ||
16 | // Test /proc/*/fd lookup. | ||
17 | #define _GNU_SOURCE | ||
18 | #undef NDEBUG | ||
19 | #include <assert.h> | ||
20 | #include <dirent.h> | ||
21 | #include <errno.h> | ||
22 | #include <limits.h> | ||
23 | #include <sched.h> | ||
24 | #include <stdio.h> | ||
25 | #include <unistd.h> | ||
26 | #include <sys/types.h> | ||
27 | #include <sys/stat.h> | ||
28 | #include <fcntl.h> | ||
29 | |||
30 | #include "proc.h" | ||
31 | |||
32 | /* lstat(2) has more "coverage" in case non-symlink pops up somehow. */ | ||
33 | static void test_lookup_pass(const char *pathname) | ||
34 | { | ||
35 | struct stat st; | ||
36 | ssize_t rv; | ||
37 | |||
38 | memset(&st, 0, sizeof(struct stat)); | ||
39 | rv = lstat(pathname, &st); | ||
40 | assert(rv == 0); | ||
41 | assert(S_ISLNK(st.st_mode)); | ||
42 | } | ||
43 | |||
44 | static void test_lookup_fail(const char *pathname) | ||
45 | { | ||
46 | struct stat st; | ||
47 | ssize_t rv; | ||
48 | |||
49 | rv = lstat(pathname, &st); | ||
50 | assert(rv == -1 && errno == ENOENT); | ||
51 | } | ||
52 | |||
53 | static void test_lookup(unsigned int fd) | ||
54 | { | ||
55 | char buf[64]; | ||
56 | unsigned int c; | ||
57 | unsigned int u; | ||
58 | int i; | ||
59 | |||
60 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); | ||
61 | test_lookup_pass(buf); | ||
62 | |||
63 | /* leading junk */ | ||
64 | for (c = 1; c <= 255; c++) { | ||
65 | if (c == '/') | ||
66 | continue; | ||
67 | snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd); | ||
68 | test_lookup_fail(buf); | ||
69 | } | ||
70 | |||
71 | /* trailing junk */ | ||
72 | for (c = 1; c <= 255; c++) { | ||
73 | if (c == '/') | ||
74 | continue; | ||
75 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c); | ||
76 | test_lookup_fail(buf); | ||
77 | } | ||
78 | |||
79 | for (i = INT_MIN; i < INT_MIN + 1024; i++) { | ||
80 | snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i); | ||
81 | test_lookup_fail(buf); | ||
82 | } | ||
83 | for (i = -1024; i < 0; i++) { | ||
84 | snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i); | ||
85 | test_lookup_fail(buf); | ||
86 | } | ||
87 | for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) { | ||
88 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u); | ||
89 | test_lookup_fail(buf); | ||
90 | } | ||
91 | for (u = UINT_MAX - 1024; u != 0; u++) { | ||
92 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u); | ||
93 | test_lookup_fail(buf); | ||
94 | } | ||
95 | |||
96 | |||
97 | } | ||
98 | |||
99 | int main(void) | ||
100 | { | ||
101 | struct dirent *de; | ||
102 | unsigned int fd, target_fd; | ||
103 | |||
104 | if (unshare(CLONE_FILES) == -1) | ||
105 | return 1; | ||
106 | |||
107 | /* Wipe fdtable. */ | ||
108 | do { | ||
109 | DIR *d; | ||
110 | |||
111 | d = opendir("/proc/self/fd"); | ||
112 | if (!d) | ||
113 | return 1; | ||
114 | |||
115 | de = xreaddir(d); | ||
116 | assert(de->d_type == DT_DIR); | ||
117 | assert(streq(de->d_name, ".")); | ||
118 | |||
119 | de = xreaddir(d); | ||
120 | assert(de->d_type == DT_DIR); | ||
121 | assert(streq(de->d_name, "..")); | ||
122 | next: | ||
123 | de = xreaddir(d); | ||
124 | if (de) { | ||
125 | unsigned long long fd_ull; | ||
126 | unsigned int fd; | ||
127 | char *end; | ||
128 | |||
129 | assert(de->d_type == DT_LNK); | ||
130 | |||
131 | fd_ull = xstrtoull(de->d_name, &end); | ||
132 | assert(*end == '\0'); | ||
133 | assert(fd_ull == (unsigned int)fd_ull); | ||
134 | |||
135 | fd = fd_ull; | ||
136 | if (fd == dirfd(d)) | ||
137 | goto next; | ||
138 | close(fd); | ||
139 | } | ||
140 | |||
141 | closedir(d); | ||
142 | } while (de); | ||
143 | |||
144 | /* Now fdtable is clean. */ | ||
145 | |||
146 | fd = open("/", O_PATH|O_DIRECTORY); | ||
147 | assert(fd == 0); | ||
148 | test_lookup(fd); | ||
149 | close(fd); | ||
150 | |||
151 | /* Clean again! */ | ||
152 | |||
153 | fd = open("/", O_PATH|O_DIRECTORY); | ||
154 | assert(fd == 0); | ||
155 | /* Default RLIMIT_NOFILE-1 */ | ||
156 | target_fd = 1023; | ||
157 | while (target_fd > 0) { | ||
158 | if (dup2(fd, target_fd) == target_fd) | ||
159 | break; | ||
160 | target_fd /= 2; | ||
161 | } | ||
162 | assert(target_fd > 0); | ||
163 | close(fd); | ||
164 | test_lookup(target_fd); | ||
165 | close(target_fd); | ||
166 | |||
167 | return 0; | ||
168 | } | ||
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c new file mode 100644 index 000000000000..417322ca9c53 --- /dev/null +++ b/tools/testing/selftests/proc/fd-002-posix-eq.c | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> | ||
3 | * | ||
4 | * Permission to use, copy, modify, and distribute this software for any | ||
5 | * purpose with or without fee is hereby granted, provided that the above | ||
6 | * copyright notice and this permission notice appear in all copies. | ||
7 | * | ||
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
15 | */ | ||
16 | // Test that open(/proc/*/fd/*) opens the same file. | ||
17 | #undef NDEBUG | ||
18 | #include <assert.h> | ||
19 | #include <stdio.h> | ||
20 | #include <sys/types.h> | ||
21 | #include <sys/stat.h> | ||
22 | #include <fcntl.h> | ||
23 | #include <unistd.h> | ||
24 | |||
25 | int main(void) | ||
26 | { | ||
27 | int fd0, fd1, fd2; | ||
28 | struct stat st0, st1, st2; | ||
29 | char buf[64]; | ||
30 | int rv; | ||
31 | |||
32 | fd0 = open("/", O_DIRECTORY|O_RDONLY); | ||
33 | assert(fd0 >= 0); | ||
34 | |||
35 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0); | ||
36 | fd1 = open(buf, O_RDONLY); | ||
37 | assert(fd1 >= 0); | ||
38 | |||
39 | snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0); | ||
40 | fd2 = open(buf, O_RDONLY); | ||
41 | assert(fd2 >= 0); | ||
42 | |||
43 | rv = fstat(fd0, &st0); | ||
44 | assert(rv == 0); | ||
45 | rv = fstat(fd1, &st1); | ||
46 | assert(rv == 0); | ||
47 | rv = fstat(fd2, &st2); | ||
48 | assert(rv == 0); | ||
49 | |||
50 | assert(st0.st_dev == st1.st_dev); | ||
51 | assert(st0.st_ino == st1.st_ino); | ||
52 | |||
53 | assert(st0.st_dev == st2.st_dev); | ||
54 | assert(st0.st_ino == st2.st_ino); | ||
55 | |||
56 | return 0; | ||
57 | } | ||
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c new file mode 100644 index 000000000000..1d659d55368c --- /dev/null +++ b/tools/testing/selftests/proc/fd-003-kthread.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* | ||
2 | * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> | ||
3 | * | ||
4 | * Permission to use, copy, modify, and distribute this software for any | ||
5 | * purpose with or without fee is hereby granted, provided that the above | ||
6 | * copyright notice and this permission notice appear in all copies. | ||
7 | * | ||
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
15 | */ | ||
16 | // Test that /proc/$KERNEL_THREAD/fd/ is empty. | ||
17 | #define _GNU_SOURCE | ||
18 | #undef NDEBUG | ||
19 | #include <sys/syscall.h> | ||
20 | #include <assert.h> | ||
21 | #include <dirent.h> | ||
22 | #include <limits.h> | ||
23 | #include <stdio.h> | ||
24 | #include <string.h> | ||
25 | #include <sys/types.h> | ||
26 | #include <sys/stat.h> | ||
27 | #include <fcntl.h> | ||
28 | #include <unistd.h> | ||
29 | |||
30 | #include "proc.h" | ||
31 | |||
32 | #define PF_KHTREAD 0x00200000 | ||
33 | |||
34 | /* | ||
35 | * Test for kernel threadness atomically with openat(). | ||
36 | * | ||
37 | * Return /proc/$PID/fd descriptor if process is kernel thread. | ||
38 | * Return -1 if a process is userspace process. | ||
39 | */ | ||
40 | static int kernel_thread_fd(unsigned int pid) | ||
41 | { | ||
42 | unsigned int flags = 0; | ||
43 | char buf[4096]; | ||
44 | int dir_fd, fd; | ||
45 | ssize_t rv; | ||
46 | |||
47 | snprintf(buf, sizeof(buf), "/proc/%u", pid); | ||
48 | dir_fd = open(buf, O_RDONLY|O_DIRECTORY); | ||
49 | if (dir_fd == -1) | ||
50 | return -1; | ||
51 | |||
52 | /* | ||
53 | * Believe it or not, struct task_struct::flags is directly exposed | ||
54 | * to userspace! | ||
55 | */ | ||
56 | fd = openat(dir_fd, "stat", O_RDONLY); | ||
57 | if (fd == -1) { | ||
58 | close(dir_fd); | ||
59 | return -1; | ||
60 | } | ||
61 | rv = read(fd, buf, sizeof(buf)); | ||
62 | close(fd); | ||
63 | if (0 < rv && rv <= sizeof(buf)) { | ||
64 | unsigned long long flags_ull; | ||
65 | char *p, *end; | ||
66 | int i; | ||
67 | |||
68 | assert(buf[rv - 1] == '\n'); | ||
69 | buf[rv - 1] = '\0'; | ||
70 | |||
71 | /* Search backwards: ->comm can contain whitespace and ')'. */ | ||
72 | for (i = 0; i < 43; i++) { | ||
73 | p = strrchr(buf, ' '); | ||
74 | assert(p); | ||
75 | *p = '\0'; | ||
76 | } | ||
77 | |||
78 | p = strrchr(buf, ' '); | ||
79 | assert(p); | ||
80 | |||
81 | flags_ull = xstrtoull(p + 1, &end); | ||
82 | assert(*end == '\0'); | ||
83 | assert(flags_ull == (unsigned int)flags_ull); | ||
84 | |||
85 | flags = flags_ull; | ||
86 | } | ||
87 | |||
88 | fd = -1; | ||
89 | if (flags & PF_KHTREAD) { | ||
90 | fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY); | ||
91 | } | ||
92 | close(dir_fd); | ||
93 | return fd; | ||
94 | } | ||
95 | |||
96 | static void test_readdir(int fd) | ||
97 | { | ||
98 | DIR *d; | ||
99 | struct dirent *de; | ||
100 | |||
101 | d = fdopendir(fd); | ||
102 | assert(d); | ||
103 | |||
104 | de = xreaddir(d); | ||
105 | assert(streq(de->d_name, ".")); | ||
106 | assert(de->d_type == DT_DIR); | ||
107 | |||
108 | de = xreaddir(d); | ||
109 | assert(streq(de->d_name, "..")); | ||
110 | assert(de->d_type == DT_DIR); | ||
111 | |||
112 | de = xreaddir(d); | ||
113 | assert(!de); | ||
114 | } | ||
115 | |||
116 | static inline int sys_statx(int dirfd, const char *pathname, int flags, | ||
117 | unsigned int mask, void *stx) | ||
118 | { | ||
119 | return syscall(SYS_statx, dirfd, pathname, flags, mask, stx); | ||
120 | } | ||
121 | |||
122 | static void test_lookup_fail(int fd, const char *pathname) | ||
123 | { | ||
124 | char stx[256] __attribute__((aligned(8))); | ||
125 | int rv; | ||
126 | |||
127 | rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx); | ||
128 | assert(rv == -1 && errno == ENOENT); | ||
129 | } | ||
130 | |||
131 | static void test_lookup(int fd) | ||
132 | { | ||
133 | char buf[64]; | ||
134 | unsigned int u; | ||
135 | int i; | ||
136 | |||
137 | for (i = INT_MIN; i < INT_MIN + 1024; i++) { | ||
138 | snprintf(buf, sizeof(buf), "%d", i); | ||
139 | test_lookup_fail(fd, buf); | ||
140 | } | ||
141 | for (i = -1024; i < 1024; i++) { | ||
142 | snprintf(buf, sizeof(buf), "%d", i); | ||
143 | test_lookup_fail(fd, buf); | ||
144 | } | ||
145 | for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) { | ||
146 | snprintf(buf, sizeof(buf), "%u", u); | ||
147 | test_lookup_fail(fd, buf); | ||
148 | } | ||
149 | for (u = UINT_MAX - 1024; u != 0; u++) { | ||
150 | snprintf(buf, sizeof(buf), "%u", u); | ||
151 | test_lookup_fail(fd, buf); | ||
152 | } | ||
153 | } | ||
154 | |||
155 | int main(void) | ||
156 | { | ||
157 | unsigned int pid; | ||
158 | int fd; | ||
159 | |||
160 | /* | ||
161 | * In theory this will loop indefinitely if kernel threads are exiled | ||
162 | * from /proc. | ||
163 | * | ||
164 | * Start with kthreadd. | ||
165 | */ | ||
166 | pid = 2; | ||
167 | while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) { | ||
168 | pid++; | ||
169 | } | ||
170 | /* EACCES if run as non-root. */ | ||
171 | if (pid >= 1024) | ||
172 | return 1; | ||
173 | |||
174 | test_readdir(fd); | ||
175 | test_lookup(fd); | ||
176 | |||
177 | return 0; | ||
178 | } | ||
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h index 0e464b50e9d9..dc6a42b1d6b0 100644 --- a/tools/testing/selftests/proc/proc-uptime.h +++ b/tools/testing/selftests/proc/proc-uptime.h | |||
@@ -20,21 +20,7 @@ | |||
20 | #include <stdlib.h> | 20 | #include <stdlib.h> |
21 | #include <unistd.h> | 21 | #include <unistd.h> |
22 | 22 | ||
23 | static unsigned long long xstrtoull(const char *p, char **end) | 23 | #include "proc.h" |
24 | { | ||
25 | if (*p == '0') { | ||
26 | *end = (char *)p + 1; | ||
27 | return 0; | ||
28 | } else if ('1' <= *p && *p <= '9') { | ||
29 | unsigned long long val; | ||
30 | |||
31 | errno = 0; | ||
32 | val = strtoull(p, end, 10); | ||
33 | assert(errno == 0); | ||
34 | return val; | ||
35 | } else | ||
36 | assert(0); | ||
37 | } | ||
38 | 24 | ||
39 | static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) | 25 | static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) |
40 | { | 26 | { |
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h new file mode 100644 index 000000000000..4e178166fd84 --- /dev/null +++ b/tools/testing/selftests/proc/proc.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #pragma once | ||
2 | #undef NDEBUG | ||
3 | #include <assert.h> | ||
4 | #include <dirent.h> | ||
5 | #include <errno.h> | ||
6 | #include <stdbool.h> | ||
7 | #include <stdlib.h> | ||
8 | #include <string.h> | ||
9 | |||
10 | static inline bool streq(const char *s1, const char *s2) | ||
11 | { | ||
12 | return strcmp(s1, s2) == 0; | ||
13 | } | ||
14 | |||
15 | static unsigned long long xstrtoull(const char *p, char **end) | ||
16 | { | ||
17 | if (*p == '0') { | ||
18 | *end = (char *)p + 1; | ||
19 | return 0; | ||
20 | } else if ('1' <= *p && *p <= '9') { | ||
21 | unsigned long long val; | ||
22 | |||
23 | errno = 0; | ||
24 | val = strtoull(p, end, 10); | ||
25 | assert(errno == 0); | ||
26 | return val; | ||
27 | } else | ||
28 | assert(0); | ||
29 | } | ||
30 | |||
31 | static struct dirent *xreaddir(DIR *d) | ||
32 | { | ||
33 | struct dirent *de; | ||
34 | |||
35 | errno = 0; | ||
36 | de = readdir(d); | ||
37 | assert(de || errno == 0); | ||
38 | return de; | ||
39 | } | ||
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index 1e73c2232097..563e752e6eba 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c | |||
@@ -31,22 +31,7 @@ | |||
31 | #include <fcntl.h> | 31 | #include <fcntl.h> |
32 | #include <unistd.h> | 32 | #include <unistd.h> |
33 | 33 | ||
34 | static inline bool streq(const char *s1, const char *s2) | 34 | #include "proc.h" |
35 | { | ||
36 | return strcmp(s1, s2) == 0; | ||
37 | } | ||
38 | |||
39 | static struct dirent *xreaddir(DIR *d) | ||
40 | { | ||
41 | struct dirent *de; | ||
42 | |||
43 | errno = 0; | ||
44 | de = readdir(d); | ||
45 | if (!de && errno != 0) { | ||
46 | exit(1); | ||
47 | } | ||
48 | return de; | ||
49 | } | ||
50 | 35 | ||
51 | static void f_reg(DIR *d, const char *filename) | 36 | static void f_reg(DIR *d, const char *filename) |
52 | { | 37 | { |
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index a8783f48f77f..cce853dca691 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c | |||
@@ -131,6 +131,7 @@ static const char * const page_flag_names[] = { | |||
131 | [KPF_KSM] = "x:ksm", | 131 | [KPF_KSM] = "x:ksm", |
132 | [KPF_THP] = "t:thp", | 132 | [KPF_THP] = "t:thp", |
133 | [KPF_BALLOON] = "o:balloon", | 133 | [KPF_BALLOON] = "o:balloon", |
134 | [KPF_PGTABLE] = "g:pgtable", | ||
134 | [KPF_ZERO_PAGE] = "z:zero_page", | 135 | [KPF_ZERO_PAGE] = "z:zero_page", |
135 | [KPF_IDLE] = "i:idle_page", | 136 | [KPF_IDLE] = "i:idle_page", |
136 | 137 | ||