aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/accounting/psi.txt107
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--Documentation/core-api/kernel-api.rst4
-rw-r--r--Documentation/dev-tools/gcov.rst18
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.txt7
-rw-r--r--Documentation/filesystems/autofs-mount-control.txt6
-rw-r--r--Documentation/filesystems/autofs.txt66
-rw-r--r--arch/arm/common/sa1111.c2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/hardirq.h1
-rw-r--r--arch/arm/kernel/atags.h2
-rw-r--r--arch/arm/kernel/smp.c6
-rw-r--r--arch/arm/mach-imx/devices/platform-fec.c2
-rw-r--r--arch/arm/mach-imx/devices/platform-gpio_keys.c2
-rw-r--r--arch/arm/mach-imx/devices/platform-imx2-wdt.c2
-rw-r--r--arch/arm/mach-imx/devices/platform-mxc_nand.c2
-rw-r--r--arch/arm/mach-imx/hardware.h2
-rw-r--r--arch/arm/mach-integrator/impd1.c2
-rw-r--r--arch/arm/mach-iop13xx/pci.c2
-rw-r--r--arch/arm/mach-iop13xx/tpmi.c2
-rw-r--r--arch/arm/mach-ixp4xx/common-pci.c2
-rw-r--r--arch/arm/mach-ks8695/include/mach/hardware.h2
-rw-r--r--arch/arm/mach-omap1/include/mach/hardware.h2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c2
-rw-r--r--arch/arm/mach-prima2/common.c2
-rw-r--r--arch/arm/mach-pxa/balloon3.c2
-rw-r--r--arch/arm/mach-pxa/colibri-pxa270.c2
-rw-r--r--arch/arm/mach-pxa/colibri-pxa300.c2
-rw-r--r--arch/arm/mach-pxa/colibri-pxa320.c2
-rw-r--r--arch/arm/mach-pxa/colibri-pxa3xx.c2
-rw-r--r--arch/arm/mach-pxa/gumstix.c2
-rw-r--r--arch/arm/mach-pxa/lpd270.c2
-rw-r--r--arch/arm/mach-pxa/lubbock.c2
-rw-r--r--arch/arm/mach-pxa/mainstone.c2
-rw-r--r--arch/arm/mach-pxa/trizeps4.c2
-rw-r--r--arch/arm/mach-pxa/viper.c2
-rw-r--r--arch/arm/mach-s3c24xx/include/mach/hardware.h2
-rw-r--r--arch/arm/mach-sa1100/include/mach/memory.h2
-rw-r--r--arch/arm/mach-sa1100/neponset.c2
-rw-r--r--arch/arm/mach-tegra/iomap.h2
-rw-r--r--arch/arm/mach-tegra/irammap.h2
-rw-r--r--arch/arm/mach-w90x900/include/mach/hardware.h2
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/boot.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h4
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/bitops.h4
-rw-r--r--arch/mips/kernel/cpu-bugs64.c4
-rw-r--r--arch/nds32/include/asm/Kbuild1
-rw-r--r--arch/nds32/include/asm/pgtable.h2
-rw-r--r--arch/nds32/kernel/head.S2
-rw-r--r--arch/powerpc/kernel/prom_init.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c8
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c10
-rw-r--r--arch/s390/include/asm/cpacf.h2
-rw-r--r--arch/sh/boards/board-apsh4a3a.c2
-rw-r--r--arch/sh/boards/board-apsh4ad0a.c2
-rw-r--r--arch/sh/boards/board-edosk7705.c2
-rw-r--r--arch/sh/boards/board-edosk7760.c2
-rw-r--r--arch/sh/boards/board-espt.c2
-rw-r--r--arch/sh/boards/board-urquell.c2
-rw-r--r--arch/sh/boards/mach-microdev/setup.c2
-rw-r--r--arch/sh/boards/mach-sdk7786/fpga.c2
-rw-r--r--arch/sh/boards/mach-sdk7786/setup.c2
-rw-r--r--arch/sh/boards/mach-sdk7786/sram.c2
-rw-r--r--arch/sh/boards/mach-se/7343/irq.c2
-rw-r--r--arch/sh/boards/mach-se/7722/irq.c2
-rw-r--r--arch/sh/drivers/pci/pci-sh7751.c2
-rw-r--r--arch/sh/drivers/pci/pci-sh7780.c2
-rw-r--r--arch/sh/drivers/pci/pcie-sh7786.c2
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sh/mm/init.c2
-rw-r--r--arch/sh/mm/pmb.c2
-rw-r--r--arch/sh/mm/uncached.c2
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/asm/memory.h2
-rw-r--r--arch/unicore32/mm/init.c2
-rw-r--r--arch/unicore32/mm/ioremap.c2
-rw-r--r--arch/unicore32/mm/mmu.c2
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug14
-rw-r--r--arch/x86/events/intel/bts.c2
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h2
-rw-r--r--arch/xtensa/include/asm/irqflags.h2
-rw-r--r--arch/xtensa/kernel/smp.c2
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h2
-rw-r--r--drivers/iommu/msm_iommu.c2
-rw-r--r--drivers/mmc/host/mvsdio.c2
-rw-r--r--drivers/mmc/host/pxamci.c2
-rw-r--r--drivers/mtd/maps/sa1100-flash.c2
-rw-r--r--drivers/mtd/nand/raw/vf610_nfc.c2
-rw-r--r--drivers/pcmcia/omap_cf.c2
-rw-r--r--drivers/pps/clients/pps-gpio.c153
-rw-r--r--drivers/rapidio/rio_cm.c8
-rw-r--r--drivers/sh/intc/userimask.c2
-rw-r--r--drivers/spi/spi-rockchip.c1
-rw-r--r--drivers/tty/sysrq.c6
-rw-r--r--drivers/video/backlight/pwm_bl.c15
-rw-r--r--drivers/video/fbdev/fb-puv3.c2
-rw-r--r--drivers/virt/fsl_hypervisor.c29
-rw-r--r--fs/binfmt_elf.c180
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/cachefiles/namei.c1
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/eventfd.c8
-rw-r--r--fs/exec.c4
-rw-r--r--fs/fat/file.c11
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/reiserfs/xattr.c9
-rw-r--r--include/asm-generic/shmparam.h2
-rw-r--r--include/asm-generic/sizes.h2
-rw-r--r--include/linux/binfmts.h3
-rw-r--r--include/linux/bitops.h16
-rw-r--r--include/linux/compiler_types.h3
-rw-r--r--include/linux/cpumask.h3
-rw-r--r--include/linux/ipc_namespace.h1
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/kthread.h3
-rw-r--r--include/linux/latencytop.h4
-rw-r--r--include/linux/list.h17
-rw-r--r--include/linux/list_sort.h1
-rw-r--r--include/linux/memcontrol.h108
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmzone.h59
-rw-r--r--include/linux/plist.h4
-rw-r--r--include/linux/poll.h4
-rw-r--r--include/linux/pps-gpio.h5
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/psi.h9
-rw-r--r--include/linux/psi_types.h105
-rw-r--r--include/linux/qcom-geni-se.h2
-rw-r--r--include/linux/reboot.h2
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/sched/signal.h51
-rw-r--r--init/Kconfig24
-rw-r--r--ipc/ipc_sysctl.c14
-rw-r--r--ipc/mqueue.c72
-rw-r--r--ipc/msgutil.c6
-rw-r--r--ipc/util.c48
-rw-r--r--ipc/util.h47
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup/cgroup.c89
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c33
-rw-r--r--kernel/gcov/Kconfig3
-rw-r--r--kernel/gcov/Makefile5
-rw-r--r--kernel/gcov/base.c86
-rw-r--r--kernel/gcov/clang.c581
-rw-r--r--kernel/gcov/gcc_3_4.c12
-rw-r--r--kernel/gcov/gcc_4_7.c12
-rw-r--r--kernel/gcov/gcc_base.c86
-rw-r--r--kernel/gcov/gcov.h5
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/latencytop.c8
-rw-r--r--kernel/notifier.c1
-rw-r--r--kernel/panic.c5
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/printk/printk.c10
-rw-r--r--kernel/reboot.c20
-rw-r--r--kernel/sched/psi.c617
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/sysctl.c41
-rw-r--r--kernel/user.c7
-rw-r--r--lib/Kconfig14
-rw-r--r--lib/Kconfig.debug25
-rw-r--r--lib/Makefile15
-rw-r--r--lib/bitmap.c280
-rw-r--r--lib/list_sort.c242
-rw-r--r--lib/math/Kconfig11
-rw-r--r--lib/math/Makefile5
-rw-r--r--lib/math/cordic.c (renamed from lib/cordic.c)0
-rw-r--r--lib/math/div64.c (renamed from lib/div64.c)2
-rw-r--r--lib/math/gcd.c (renamed from lib/gcd.c)0
-rw-r--r--lib/math/int_pow.c32
-rw-r--r--lib/math/int_sqrt.c (renamed from lib/int_sqrt.c)0
-rw-r--r--lib/math/lcm.c (renamed from lib/lcm.c)0
-rw-r--r--lib/math/prime_numbers.c (renamed from lib/prime_numbers.c)0
-rw-r--r--lib/math/rational.c (renamed from lib/rational.c)0
-rw-r--r--lib/math/reciprocal_div.c (renamed from lib/reciprocal_div.c)0
-rw-r--r--lib/plist.c4
-rw-r--r--lib/sort.c254
-rw-r--r--lib/test_bitmap.c67
-rw-r--r--lib/test_sysctl.c18
-rw-r--r--lib/test_vmalloc.c8
-rw-r--r--mm/Makefile7
-rw-r--r--mm/compaction.c4
-rw-r--r--mm/debug.c2
-rw-r--r--mm/memcontrol.c312
-rw-r--r--mm/memory_hotplug.c3
-rw-r--r--mm/mincore.c23
-rw-r--r--mm/page_alloc.c82
-rw-r--r--mm/shuffle.c207
-rw-r--r--mm/shuffle.h64
-rw-r--r--mm/vmalloc.c32
-rw-r--r--mm/vmscan.c6
-rw-r--r--mm/workingset.c7
-rw-r--r--net/netfilter/core.c2
-rw-r--r--scripts/gdb/linux/clk.py76
-rw-r--r--scripts/gdb/linux/config.py44
-rw-r--r--scripts/gdb/linux/constants.py.in17
-rw-r--r--scripts/gdb/linux/cpus.py1
-rw-r--r--scripts/gdb/linux/lists.py26
-rw-r--r--scripts/gdb/linux/proc.py10
-rw-r--r--scripts/gdb/linux/rbtree.py177
-rw-r--r--scripts/gdb/linux/symbols.py6
-rw-r--r--scripts/gdb/linux/tasks.py2
-rw-r--r--scripts/gdb/linux/timerlist.py219
-rw-r--r--scripts/gdb/linux/utils.py7
-rw-r--r--scripts/gdb/vmlinux-gdb.py4
-rw-r--r--tools/testing/selftests/exec/.gitignore3
-rw-r--r--tools/testing/selftests/exec/Makefile4
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c67
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh161
218 files changed, 4500 insertions, 1178 deletions
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index 7e71c9c1d8e9..5cbe5659e3b7 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -63,6 +63,110 @@ as well as medium and long term trends. The total absolute stall time
63spikes which wouldn't necessarily make a dent in the time averages, 63spikes which wouldn't necessarily make a dent in the time averages,
64or to average trends over custom time frames. 64or to average trends over custom time frames.
65 65
66Monitoring for pressure thresholds
67==================================
68
69Users can register triggers and use poll() to be woken up when resource
70pressure exceeds certain thresholds.
71
72A trigger describes the maximum cumulative stall time over a specific
73time window, e.g. 100ms of total stall time within any 500ms window to
74generate a wakeup event.
75
76To register a trigger user has to open psi interface file under
77/proc/pressure/ representing the resource to be monitored and write the
78desired threshold and time window. The open file descriptor should be
79used to wait for trigger events using select(), poll() or epoll().
80The following format is used:
81
82<some|full> <stall amount in us> <time window in us>
83
84For example writing "some 150000 1000000" into /proc/pressure/memory
85would add 150ms threshold for partial memory stall measured within
861sec time window. Writing "full 50000 1000000" into /proc/pressure/io
87would add 50ms threshold for full io stall measured within 1sec time window.
88
89Triggers can be set on more than one psi metric and more than one trigger
90for the same psi metric can be specified. However for each trigger a separate
91file descriptor is required to be able to poll it separately from others,
92therefore for each trigger a separate open() syscall should be made even
93when opening the same psi interface file.
94
95Monitors activate only when system enters stall state for the monitored
96psi metric and deactivates upon exit from the stall state. While system is
97in the stall state psi signal growth is monitored at a rate of 10 times per
98tracking window.
99
100The kernel accepts window sizes ranging from 500ms to 10s, therefore min
101monitoring update interval is 50ms and max is 1s. Min limit is set to
102prevent overly frequent polling. Max limit is chosen as a high enough number
103after which monitors are most likely not needed and psi averages can be used
104instead.
105
106When activated, psi monitor stays active for at least the duration of one
107tracking window to avoid repeated activations/deactivations when system is
108bouncing in and out of the stall state.
109
110Notifications to the userspace are rate-limited to one per tracking window.
111
112The trigger will de-register when the file descriptor used to define the
113trigger is closed.
114
115Userspace monitor usage example
116===============================
117
118#include <errno.h>
119#include <fcntl.h>
120#include <stdio.h>
121#include <poll.h>
122#include <string.h>
123#include <unistd.h>
124
125/*
126 * Monitor memory partial stall with 1s tracking window size
127 * and 150ms threshold.
128 */
129int main() {
130 const char trig[] = "some 150000 1000000";
131 struct pollfd fds;
132 int n;
133
134 fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
135 if (fds.fd < 0) {
136 printf("/proc/pressure/memory open error: %s\n",
137 strerror(errno));
138 return 1;
139 }
140 fds.events = POLLPRI;
141
142 if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
143 printf("/proc/pressure/memory write error: %s\n",
144 strerror(errno));
145 return 1;
146 }
147
148 printf("waiting for events...\n");
149 while (1) {
150 n = poll(&fds, 1, -1);
151 if (n < 0) {
152 printf("poll error: %s\n", strerror(errno));
153 return 1;
154 }
155 if (fds.revents & POLLERR) {
156 printf("got POLLERR, event source is gone\n");
157 return 0;
158 }
159 if (fds.revents & POLLPRI) {
160 printf("event triggered!\n");
161 } else {
162 printf("unknown event received: 0x%x\n", fds.revents);
163 return 1;
164 }
165 }
166
167 return 0;
168}
169
66Cgroup2 interface 170Cgroup2 interface
67================= 171=================
68 172
@@ -71,3 +175,6 @@ mounted, pressure stall information is also tracked for tasks grouped
71into cgroups. Each subdirectory in the cgroupfs mountpoint contains 175into cgroups. Each subdirectory in the cgroupfs mountpoint contains
72cpu.pressure, memory.pressure, and io.pressure files; the format is 176cpu.pressure, memory.pressure, and io.pressure files; the format is
73the same as the /proc/pressure/ files. 177the same as the /proc/pressure/ files.
178
179Per-cgroup psi monitors can be specified and used the same way as
180system-wide ones.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 43176340c73d..d1d1da911085 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1830,6 +1830,9 @@
1830 ip= [IP_PNP] 1830 ip= [IP_PNP]
1831 See Documentation/filesystems/nfs/nfsroot.txt. 1831 See Documentation/filesystems/nfs/nfsroot.txt.
1832 1832
1833 ipcmni_extend [KNL] Extend the maximum number of unique System V
1834 IPC identifiers from 32,768 to 16,777,216.
1835
1833 irqaffinity= [SMP] Set the default irq affinity mask 1836 irqaffinity= [SMP] Set the default irq affinity mask
1834 The argument is a cpu list, as described above. 1837 The argument is a cpu list, as described above.
1835 1838
@@ -3174,6 +3177,16 @@
3174 This will also cause panics on machine check exceptions. 3177 This will also cause panics on machine check exceptions.
3175 Useful together with panic=30 to trigger a reboot. 3178 Useful together with panic=30 to trigger a reboot.
3176 3179
3180 page_alloc.shuffle=
3181 [KNL] Boolean flag to control whether the page allocator
3182 should randomize its free lists. The randomization may
3183 be automatically enabled if the kernel detects it is
3184 running on a platform with a direct-mapped memory-side
3185 cache, and this parameter can be used to
3186 override/disable that behavior. The state of the flag
3187 can be read from sysfs at:
3188 /sys/module/page_alloc/parameters/shuffle.
3189
3177 page_owner= [KNL] Boot-time page_owner enabling option. 3190 page_owner= [KNL] Boot-time page_owner enabling option.
3178 Storage of the information about who allocated 3191 Storage of the information about who allocated
3179 each page is disabled in default. With this switch, 3192 each page is disabled in default. With this switch,
@@ -4054,7 +4067,9 @@
4054 [[,]s[mp]#### \ 4067 [[,]s[mp]#### \
4055 [[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \ 4068 [[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \
4056 [[,]f[orce] 4069 [[,]f[orce]
4057 Where reboot_mode is one of warm (soft) or cold (hard) or gpio, 4070 Where reboot_mode is one of warm (soft) or cold (hard) or gpio
4071 (prefix with 'panic_' to set mode for panic
4072 reboot only),
4058 reboot_type is one of bios, acpi, kbd, triple, efi, or pci, 4073 reboot_type is one of bios, acpi, kbd, triple, efi, or pci,
4059 reboot_force is either force or not specified, 4074 reboot_force is either force or not specified,
4060 reboot_cpu is s[mp]#### with #### being the processor 4075 reboot_cpu is s[mp]#### with #### being the processor
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 71f5d2fe39b7..a29c99d13331 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -147,10 +147,10 @@ Division Functions
147.. kernel-doc:: include/linux/math64.h 147.. kernel-doc:: include/linux/math64.h
148 :internal: 148 :internal:
149 149
150.. kernel-doc:: lib/div64.c 150.. kernel-doc:: lib/math/div64.c
151 :functions: div_s64_rem div64_u64_rem div64_u64 div64_s64 151 :functions: div_s64_rem div64_u64_rem div64_u64 div64_s64
152 152
153.. kernel-doc:: lib/gcd.c 153.. kernel-doc:: lib/math/gcd.c
154 :export: 154 :export:
155 155
156UUID/GUID 156UUID/GUID
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
index 69a7d90c320a..46aae52a41d0 100644
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -34,10 +34,6 @@ Configure the kernel with::
34 CONFIG_DEBUG_FS=y 34 CONFIG_DEBUG_FS=y
35 CONFIG_GCOV_KERNEL=y 35 CONFIG_GCOV_KERNEL=y
36 36
37select the gcc's gcov format, default is autodetect based on gcc version::
38
39 CONFIG_GCOV_FORMAT_AUTODETECT=y
40
41and to get coverage data for the entire kernel:: 37and to get coverage data for the entire kernel::
42 38
43 CONFIG_GCOV_PROFILE_ALL=y 39 CONFIG_GCOV_PROFILE_ALL=y
@@ -169,6 +165,20 @@ b) gcov is run on the BUILD machine
169 [user@build] gcov -o /tmp/coverage/tmp/out/init main.c 165 [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
170 166
171 167
168Note on compilers
169-----------------
170
171GCC and LLVM gcov tools are not necessarily compatible. Use gcov_ to work with
172GCC-generated .gcno and .gcda files, and use llvm-cov_ for Clang.
173
174.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
175.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
176
177Build differences between GCC and Clang gcov are handled by Kconfig. It
178automatically selects the appropriate gcov format depending on the detected
179toolchain.
180
181
172Troubleshooting 182Troubleshooting
173--------------- 183---------------
174 184
diff --git a/Documentation/devicetree/bindings/pps/pps-gpio.txt b/Documentation/devicetree/bindings/pps/pps-gpio.txt
index 3683874832ae..9012a2a02e14 100644
--- a/Documentation/devicetree/bindings/pps/pps-gpio.txt
+++ b/Documentation/devicetree/bindings/pps/pps-gpio.txt
@@ -7,6 +7,10 @@ Required properties:
7- compatible: should be "pps-gpio" 7- compatible: should be "pps-gpio"
8- gpios: one PPS GPIO in the format described by ../gpio/gpio.txt 8- gpios: one PPS GPIO in the format described by ../gpio/gpio.txt
9 9
10Additional required properties for the PPS ECHO functionality:
11- echo-gpios: one PPS ECHO GPIO in the format described by ../gpio/gpio.txt
12- echo-active-ms: duration in ms of the active portion of the echo pulse
13
10Optional properties: 14Optional properties:
11- assert-falling-edge: when present, assert is indicated by a falling edge 15- assert-falling-edge: when present, assert is indicated by a falling edge
12 (instead of by a rising edge) 16 (instead of by a rising edge)
@@ -19,5 +23,8 @@ Example:
19 gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>; 23 gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>;
20 assert-falling-edge; 24 assert-falling-edge;
21 25
26 echo-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
27 echo-active-ms = <100>;
28
22 compatible = "pps-gpio"; 29 compatible = "pps-gpio";
23 }; 30 };
diff --git a/Documentation/filesystems/autofs-mount-control.txt b/Documentation/filesystems/autofs-mount-control.txt
index 45edad6933cc..acc02fc57993 100644
--- a/Documentation/filesystems/autofs-mount-control.txt
+++ b/Documentation/filesystems/autofs-mount-control.txt
@@ -354,8 +354,10 @@ this ioctl is called until no further expire candidates are found.
354 354
355The call requires an initialized struct autofs_dev_ioctl with the 355The call requires an initialized struct autofs_dev_ioctl with the
356ioctlfd field set to the descriptor obtained from the open call. In 356ioctlfd field set to the descriptor obtained from the open call. In
357addition an immediate expire, independent of the mount timeout, can be 357addition an immediate expire that's independent of the mount timeout,
358requested by setting the how field of struct args_expire to 1. If no 358and a forced expire that's independent of whether the mount is busy,
359can be requested by setting the how field of struct args_expire to
360AUTOFS_EXP_IMMEDIATE or AUTOFS_EXP_FORCED, respectively . If no
359expire candidates can be found the ioctl returns -1 with errno set to 361expire candidates can be found the ioctl returns -1 with errno set to
360EAGAIN. 362EAGAIN.
361 363
diff --git a/Documentation/filesystems/autofs.txt b/Documentation/filesystems/autofs.txt
index 373ad25852d3..3af38c7fd26d 100644
--- a/Documentation/filesystems/autofs.txt
+++ b/Documentation/filesystems/autofs.txt
@@ -116,7 +116,7 @@ that purpose there is another flag.
116**DCACHE_MANAGE_TRANSIT** 116**DCACHE_MANAGE_TRANSIT**
117 117
118If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but 118If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
119related behaviors are invoked, both using the `d_op->d_manage()` 119related behaviours are invoked, both using the `d_op->d_manage()`
120dentry operation. 120dentry operation.
121 121
122Firstly, before checking to see if any filesystem is mounted on the 122Firstly, before checking to see if any filesystem is mounted on the
@@ -193,8 +193,8 @@ VFS remain in RCU-walk mode, but can only tell it to get out of
193RCU-walk mode by returning `-ECHILD`. 193RCU-walk mode by returning `-ECHILD`.
194 194
195So `d_manage()`, when called with `rcu_walk` set, should either return 195So `d_manage()`, when called with `rcu_walk` set, should either return
196-ECHILD if there is any reason to believe it is unsafe to end the 196-ECHILD if there is any reason to believe it is unsafe to enter the
197mounted filesystem, and otherwise should return 0. 197mounted filesystem, otherwise it should return 0.
198 198
199autofs will return `-ECHILD` if an expiry of the filesystem has been 199autofs will return `-ECHILD` if an expiry of the filesystem has been
200initiated or is being considered, otherwise it returns 0. 200initiated or is being considered, otherwise it returns 0.
@@ -210,7 +210,7 @@ mounts that were created by `d_automount()` returning a filesystem to be
210mounted. As autofs doesn't return such a filesystem but leaves the 210mounted. As autofs doesn't return such a filesystem but leaves the
211mounting to the automount daemon, it must involve the automount daemon 211mounting to the automount daemon, it must involve the automount daemon
212in unmounting as well. This also means that autofs has more control 212in unmounting as well. This also means that autofs has more control
213of expiry. 213over expiry.
214 214
215The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to 215The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
216the `umount` system call. Unmounting with MNT_EXPIRE will fail unless 216the `umount` system call. Unmounting with MNT_EXPIRE will fail unless
@@ -225,7 +225,7 @@ unmount any filesystems mounted on the autofs filesystem or remove any
225symbolic links or empty directories any time it likes. If the unmount 225symbolic links or empty directories any time it likes. If the unmount
226or removal is successful the filesystem will be returned to the state 226or removal is successful the filesystem will be returned to the state
227it was before the mount or creation, so that any access of the name 227it was before the mount or creation, so that any access of the name
228will trigger normal auto-mount processing. In particlar, `rmdir` and 228will trigger normal auto-mount processing. In particular, `rmdir` and
229`unlink` do not leave negative entries in the dcache as a normal 229`unlink` do not leave negative entries in the dcache as a normal
230filesystem would, so an attempt to access a recently-removed object is 230filesystem would, so an attempt to access a recently-removed object is
231passed to autofs for handling. 231passed to autofs for handling.
@@ -240,11 +240,18 @@ Normally the daemon only wants to remove entries which haven't been
240used for a while. For this purpose autofs maintains a "`last_used`" 240used for a while. For this purpose autofs maintains a "`last_used`"
241time stamp on each directory or symlink. For symlinks it genuinely 241time stamp on each directory or symlink. For symlinks it genuinely
242does record the last time the symlink was "used" or followed to find 242does record the last time the symlink was "used" or followed to find
243out where it points to. For directories the field is a slight 243out where it points to. For directories the field is used slightly
244misnomer. It actually records the last time that autofs checked if 244differently. The field is updated at mount time and during expire
245the directory or one of its descendents was busy and found that it 245checks if it is found to be in use (ie. open file descriptor or
246was. This is just as useful and doesn't require updating the field so 246process working directory) and during path walks. The update done
247often. 247during path walks prevents frequent expire and immediate mount of
248frequently accessed automounts. But in the case where a GUI continually
249access or an application frequently scans an autofs directory tree
250there can be an accumulation of mounts that aren't actually being
251used. To cater for this case the "`strictexpire`" autofs mount option
252can be used to avoid the "`last_used`" update on path walk thereby
253preventing this apparent inability to expire mounts that aren't
254really in use.
248 255
249The daemon is able to ask autofs if anything is due to be expired, 256The daemon is able to ask autofs if anything is due to be expired,
250using an `ioctl` as discussed later. For a *direct* mount, autofs 257using an `ioctl` as discussed later. For a *direct* mount, autofs
@@ -255,8 +262,12 @@ up.
255 262
256There is an option with indirect mounts to consider each of the leaves 263There is an option with indirect mounts to consider each of the leaves
257that has been mounted on instead of considering the top-level names. 264that has been mounted on instead of considering the top-level names.
258This is intended for compatability with version 4 of autofs and should 265This was originally intended for compatibility with version 4 of autofs
259be considered as deprecated. 266and should be considered as deprecated for Sun Format automount maps.
267However, it may be used again for amd format mount maps (which are
268generally indirect maps) because the amd automounter allows for the
269setting of an expire timeout for individual mounts. But there are
270some difficulties in making the needed changes for this.
260 271
261When autofs considers a directory it checks the `last_used` time and 272When autofs considers a directory it checks the `last_used` time and
262compares it with the "timeout" value set when the filesystem was 273compares it with the "timeout" value set when the filesystem was
@@ -273,7 +284,7 @@ mounts. If it finds something in the root directory to expire it will
273return the name of that thing. Once a name has been returned the 284return the name of that thing. Once a name has been returned the
274automount daemon needs to unmount any filesystems mounted below the 285automount daemon needs to unmount any filesystems mounted below the
275name normally. As described above, this is unsafe for non-toplevel 286name normally. As described above, this is unsafe for non-toplevel
276mounts in a version-5 autofs. For this reason the current `automountd` 287mounts in a version-5 autofs. For this reason the current `automount(8)`
277does not use this ioctl. 288does not use this ioctl.
278 289
279The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or 290The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
@@ -345,7 +356,7 @@ The `wait_queue_token` is a unique number which can identify a
345particular request to be acknowledged. When a message is sent over 356particular request to be acknowledged. When a message is sent over
346the pipe the affected dentry is marked as either "active" or 357the pipe the affected dentry is marked as either "active" or
347"expiring" and other accesses to it block until the message is 358"expiring" and other accesses to it block until the message is
348acknowledged using one of the ioctls below and the relevant 359acknowledged using one of the ioctls below with the relevant
349`wait_queue_token`. 360`wait_queue_token`.
350 361
351Communicating with autofs: root directory ioctls 362Communicating with autofs: root directory ioctls
@@ -367,15 +378,14 @@ The available ioctl commands are:
367 This mode is also entered if a write to the pipe fails. 378 This mode is also entered if a write to the pipe fails.
368- **AUTOFS_IOC_PROTOVER**: This returns the protocol version in use. 379- **AUTOFS_IOC_PROTOVER**: This returns the protocol version in use.
369- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which 380- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which
370 is really a version number for the implementation. It is 381 is really a version number for the implementation.
371 currently 2.
372- **AUTOFS_IOC_SETTIMEOUT**: This passes a pointer to an unsigned 382- **AUTOFS_IOC_SETTIMEOUT**: This passes a pointer to an unsigned
373 long. The value is used to set the timeout for expiry, and 383 long. The value is used to set the timeout for expiry, and
374 the current timeout value is stored back through the pointer. 384 the current timeout value is stored back through the pointer.
375- **AUTOFS_IOC_ASKUMOUNT**: Returns, in the pointed-to `int`, 1 if 385- **AUTOFS_IOC_ASKUMOUNT**: Returns, in the pointed-to `int`, 1 if
376 the filesystem could be unmounted. This is only a hint as 386 the filesystem could be unmounted. This is only a hint as
377 the situation could change at any instant. This call can be 387 the situation could change at any instant. This call can be
378 use to avoid a more expensive full unmount attempt. 388 used to avoid a more expensive full unmount attempt.
379- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is 389- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is
380 anything suitable to expire. A pointer to a packet: 390 anything suitable to expire. A pointer to a packet:
381 391
@@ -400,6 +410,11 @@ The available ioctl commands are:
400 **AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored 410 **AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
401 and objects are expired if the are not in use. 411 and objects are expired if the are not in use.
402 412
413 **AUTOFS_EXP_FORCED** causes the in use status to be ignored
414 and objects are expired ieven if they are in use. This assumes
415 that the daemon has requested this because it is capable of
416 performing the umount.
417
403 **AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level 418 **AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
404 name to expire. This is only safe when *maxproto* is 4. 419 name to expire. This is only safe when *maxproto* is 4.
405 420
@@ -415,7 +430,7 @@ which can be used to communicate directly with the autofs filesystem.
415It requires CAP_SYS_ADMIN for access. 430It requires CAP_SYS_ADMIN for access.
416 431
417The `ioctl`s that can be used on this device are described in a separate 432The `ioctl`s that can be used on this device are described in a separate
418document `autofs-mount-control.txt`, and are summarized briefly here. 433document `autofs-mount-control.txt`, and are summarised briefly here.
419Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure: 434Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
420 435
421 struct autofs_dev_ioctl { 436 struct autofs_dev_ioctl {
@@ -511,6 +526,21 @@ directories.
511Catatonic mode can only be left via the 526Catatonic mode can only be left via the
512**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`. 527**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
513 528
529The "ignore" mount option
530-------------------------
531
532The "ignore" mount option can be used to provide a generic indicator
533to applications that the mount entry should be ignored when displaying
534mount information.
535
536In other OSes that provide autofs and that provide a mount list to user
537space based on the kernel mount list a no-op mount option ("ignore" is
538the one use on the most common OSes) is allowed so that autofs file
539system users can optionally use it.
540
541This is intended to be used by user space programs to exclude autofs
542mounts from consideration when reading the mounts list.
543
514autofs, name spaces, and shared mounts 544autofs, name spaces, and shared mounts
515-------------------------------------- 545--------------------------------------
516 546
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 45412d21aa6b..179ca8757a74 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -32,7 +32,7 @@
32#include <mach/hardware.h> 32#include <mach/hardware.h>
33#include <asm/mach/irq.h> 33#include <asm/mach/irq.h>
34#include <asm/mach-types.h> 34#include <asm/mach-types.h>
35#include <asm/sizes.h> 35#include <linux/sizes.h>
36 36
37#include <asm/hardware/sa1111.h> 37#include <asm/hardware/sa1111.h>
38 38
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 41deac2451af..0b2ecc98e086 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += seccomp.h
17generic-y += segment.h 17generic-y += segment.h
18generic-y += serial.h 18generic-y += serial.h
19generic-y += simd.h 19generic-y += simd.h
20generic-y += sizes.h
21generic-y += trace_clock.h 20generic-y += trace_clock.h
22 21
23generated-y += mach-types.h 22generated-y += mach-types.h
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index cba23eaa6072..7a88f160b1fb 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -6,6 +6,7 @@
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <asm/irq.h> 7#include <asm/irq.h>
8 8
9/* number of IPIS _not_ including IPI_CPU_BACKTRACE */
9#define NR_IPI 7 10#define NR_IPI 7
10 11
11typedef struct { 12typedef struct {
diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
index 201100226301..067e12edc341 100644
--- a/arch/arm/kernel/atags.h
+++ b/arch/arm/kernel/atags.h
@@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags);
5const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, 5const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer,
6 unsigned int machine_nr); 6 unsigned int machine_nr);
7#else 7#else
8static inline const struct machine_desc * 8static inline const struct machine_desc * __init __noreturn
9setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) 9setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
10{ 10{
11 early_print("no ATAGS support: can't continue\n"); 11 early_print("no ATAGS support: can't continue\n");
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index facd4240ca02..c93fe0f256de 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -70,6 +70,10 @@ enum ipi_msg_type {
70 IPI_CPU_STOP, 70 IPI_CPU_STOP,
71 IPI_IRQ_WORK, 71 IPI_IRQ_WORK,
72 IPI_COMPLETION, 72 IPI_COMPLETION,
73 /*
74 * CPU_BACKTRACE is special and not included in NR_IPI
75 * or tracable with trace_ipi_*
76 */
73 IPI_CPU_BACKTRACE, 77 IPI_CPU_BACKTRACE,
74 /* 78 /*
75 * SGI8-15 can be reserved by secure firmware, and thus may 79 * SGI8-15 can be reserved by secure firmware, and thus may
@@ -797,7 +801,7 @@ core_initcall(register_cpufreq_notifier);
797 801
798static void raise_nmi(cpumask_t *mask) 802static void raise_nmi(cpumask_t *mask)
799{ 803{
800 smp_cross_call(mask, IPI_CPU_BACKTRACE); 804 __smp_cross_call(mask, IPI_CPU_BACKTRACE);
801} 805}
802 806
803void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) 807void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
diff --git a/arch/arm/mach-imx/devices/platform-fec.c b/arch/arm/mach-imx/devices/platform-fec.c
index b403a4fe2892..605c0af5851d 100644
--- a/arch/arm/mach-imx/devices/platform-fec.c
+++ b/arch/arm/mach-imx/devices/platform-fec.c
@@ -7,7 +7,7 @@
7 * Free Software Foundation. 7 * Free Software Foundation.
8 */ 8 */
9#include <linux/dma-mapping.h> 9#include <linux/dma-mapping.h>
10#include <asm/sizes.h> 10#include <linux/sizes.h>
11 11
12#include "../hardware.h" 12#include "../hardware.h"
13#include "devices-common.h" 13#include "devices-common.h"
diff --git a/arch/arm/mach-imx/devices/platform-gpio_keys.c b/arch/arm/mach-imx/devices/platform-gpio_keys.c
index 486282539c76..9f0a132ea1bc 100644
--- a/arch/arm/mach-imx/devices/platform-gpio_keys.c
+++ b/arch/arm/mach-imx/devices/platform-gpio_keys.c
@@ -15,7 +15,7 @@
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor,
16 * Boston, MA 02110-1301, USA. 16 * Boston, MA 02110-1301, USA.
17 */ 17 */
18#include <asm/sizes.h> 18#include <linux/sizes.h>
19 19
20#include "../hardware.h" 20#include "../hardware.h"
21#include "devices-common.h" 21#include "devices-common.h"
diff --git a/arch/arm/mach-imx/devices/platform-imx2-wdt.c b/arch/arm/mach-imx/devices/platform-imx2-wdt.c
index 8c134c8d7500..0c6d3c05fd6d 100644
--- a/arch/arm/mach-imx/devices/platform-imx2-wdt.c
+++ b/arch/arm/mach-imx/devices/platform-imx2-wdt.c
@@ -6,7 +6,7 @@
6 * the terms of the GNU General Public License version 2 as published by the 6 * the terms of the GNU General Public License version 2 as published by the
7 * Free Software Foundation. 7 * Free Software Foundation.
8 */ 8 */
9#include <asm/sizes.h> 9#include <linux/sizes.h>
10 10
11#include "../hardware.h" 11#include "../hardware.h"
12#include "devices-common.h" 12#include "devices-common.h"
diff --git a/arch/arm/mach-imx/devices/platform-mxc_nand.c b/arch/arm/mach-imx/devices/platform-mxc_nand.c
index 676df4920c7b..046e0cc826c1 100644
--- a/arch/arm/mach-imx/devices/platform-mxc_nand.c
+++ b/arch/arm/mach-imx/devices/platform-mxc_nand.c
@@ -6,7 +6,7 @@
6 * the terms of the GNU General Public License version 2 as published by the 6 * the terms of the GNU General Public License version 2 as published by the
7 * Free Software Foundation. 7 * Free Software Foundation.
8 */ 8 */
9#include <asm/sizes.h> 9#include <linux/sizes.h>
10 10
11#include "../hardware.h" 11#include "../hardware.h"
12#include "devices-common.h" 12#include "devices-common.h"
diff --git a/arch/arm/mach-imx/hardware.h b/arch/arm/mach-imx/hardware.h
index 90e10cbd8fd1..b5ca8cebe1d6 100644
--- a/arch/arm/mach-imx/hardware.h
+++ b/arch/arm/mach-imx/hardware.h
@@ -24,7 +24,7 @@
24#include <asm/io.h> 24#include <asm/io.h>
25#include <soc/imx/revision.h> 25#include <soc/imx/revision.h>
26#endif 26#endif
27#include <asm/sizes.h> 27#include <linux/sizes.h>
28 28
29#define addr_in_module(addr, mod) \ 29#define addr_in_module(addr, mod) \
30 ((unsigned long)(addr) - mod ## _BASE_ADDR < mod ## _SIZE) 30 ((unsigned long)(addr) - mod ## _BASE_ADDR < mod ## _SIZE)
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index 8dfad012dfae..6ddbe153910a 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -27,7 +27,7 @@
27#include <linux/irqchip/arm-vic.h> 27#include <linux/irqchip/arm-vic.h>
28#include <linux/gpio/machine.h> 28#include <linux/gpio/machine.h>
29 29
30#include <asm/sizes.h> 30#include <linux/sizes.h>
31#include "lm.h" 31#include "lm.h"
32#include "impd1.h" 32#include "impd1.h"
33 33
diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c
index 070d92ae1b6f..8426ab9e2f5a 100644
--- a/arch/arm/mach-iop13xx/pci.c
+++ b/arch/arm/mach-iop13xx/pci.c
@@ -24,7 +24,7 @@
24#include <linux/export.h> 24#include <linux/export.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <mach/hardware.h> 26#include <mach/hardware.h>
27#include <asm/sizes.h> 27#include <linux/sizes.h>
28#include <asm/signal.h> 28#include <asm/signal.h>
29#include <asm/mach/pci.h> 29#include <asm/mach/pci.h>
30#include "pci.h" 30#include "pci.h"
diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c
index 116feb6b261e..d3d8c78e7d10 100644
--- a/arch/arm/mach-iop13xx/tpmi.c
+++ b/arch/arm/mach-iop13xx/tpmi.c
@@ -23,7 +23,7 @@
23#include <linux/dma-mapping.h> 23#include <linux/dma-mapping.h>
24#include <linux/io.h> 24#include <linux/io.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/sizes.h> 26#include <linux/sizes.h>
27#include <mach/irqs.h> 27#include <mach/irqs.h>
28 28
29/* assumes CONTROLLER_ONLY# is never asserted in the ESSR register */ 29/* assumes CONTROLLER_ONLY# is never asserted in the ESSR register */
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 6835b17113e5..a53104bb28f5 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -31,7 +31,7 @@
31 31
32#include <asm/cputype.h> 32#include <asm/cputype.h>
33#include <asm/irq.h> 33#include <asm/irq.h>
34#include <asm/sizes.h> 34#include <linux/sizes.h>
35#include <asm/mach/pci.h> 35#include <asm/mach/pci.h>
36#include <mach/hardware.h> 36#include <mach/hardware.h>
37 37
diff --git a/arch/arm/mach-ks8695/include/mach/hardware.h b/arch/arm/mach-ks8695/include/mach/hardware.h
index 959c748ee8bb..877629b3d944 100644
--- a/arch/arm/mach-ks8695/include/mach/hardware.h
+++ b/arch/arm/mach-ks8695/include/mach/hardware.h
@@ -14,7 +14,7 @@
14#ifndef __ASM_ARCH_HARDWARE_H 14#ifndef __ASM_ARCH_HARDWARE_H
15#define __ASM_ARCH_HARDWARE_H 15#define __ASM_ARCH_HARDWARE_H
16 16
17#include <asm/sizes.h> 17#include <linux/sizes.h>
18 18
19/* 19/*
20 * Clocks are derived from MCLK, which is 25MHz 20 * Clocks are derived from MCLK, which is 25MHz
diff --git a/arch/arm/mach-omap1/include/mach/hardware.h b/arch/arm/mach-omap1/include/mach/hardware.h
index 5875a5098d35..e7c8ac7d83e3 100644
--- a/arch/arm/mach-omap1/include/mach/hardware.h
+++ b/arch/arm/mach-omap1/include/mach/hardware.h
@@ -36,7 +36,7 @@
36#ifndef __ASM_ARCH_OMAP_HARDWARE_H 36#ifndef __ASM_ARCH_OMAP_HARDWARE_H
37#define __ASM_ARCH_OMAP_HARDWARE_H 37#define __ASM_ARCH_OMAP_HARDWARE_H
38 38
39#include <asm/sizes.h> 39#include <linux/sizes.h>
40#ifndef __ASSEMBLER__ 40#ifndef __ASSEMBLER__
41#include <asm/types.h> 41#include <asm/types.h>
42#include <mach/soc.h> 42#include <mach/soc.h>
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c
index 9b30b6b471ae..e19f620c4074 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c
@@ -11,7 +11,7 @@
11 * XXX handle crossbar/shared link difference for L3? 11 * XXX handle crossbar/shared link difference for L3?
12 * XXX these should be marked initdata for multi-OMAP kernels 12 * XXX these should be marked initdata for multi-OMAP kernels
13 */ 13 */
14#include <asm/sizes.h> 14#include <linux/sizes.h>
15 15
16#include "omap_hwmod.h" 16#include "omap_hwmod.h"
17#include "l3_2xxx.h" 17#include "l3_2xxx.h"
diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c
index ffe05c27087e..1607deab5290 100644
--- a/arch/arm/mach-prima2/common.c
+++ b/arch/arm/mach-prima2/common.c
@@ -8,7 +8,7 @@
8 8
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <asm/sizes.h> 11#include <linux/sizes.h>
12#include <asm/mach-types.h> 12#include <asm/mach-types.h>
13#include <asm/mach/arch.h> 13#include <asm/mach/arch.h>
14#include <linux/of.h> 14#include <linux/of.h>
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 4bcbd3d55b36..1f24e0259f99 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -35,7 +35,7 @@
35#include <asm/setup.h> 35#include <asm/setup.h>
36#include <asm/mach-types.h> 36#include <asm/mach-types.h>
37#include <asm/irq.h> 37#include <asm/irq.h>
38#include <asm/sizes.h> 38#include <linux/sizes.h>
39 39
40#include <asm/mach/arch.h> 40#include <asm/mach/arch.h>
41#include <asm/mach/map.h> 41#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index e68acdd0cdbb..510625dde3cb 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -24,7 +24,7 @@
24#include <asm/mach/arch.h> 24#include <asm/mach/arch.h>
25#include <asm/mach/flash.h> 25#include <asm/mach/flash.h>
26#include <asm/mach-types.h> 26#include <asm/mach-types.h>
27#include <asm/sizes.h> 27#include <linux/sizes.h>
28 28
29#include <mach/audio.h> 29#include <mach/audio.h>
30#include "colibri.h" 30#include "colibri.h"
diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c
index 6a5558d95d4e..2f635bdc797f 100644
--- a/arch/arm/mach-pxa/colibri-pxa300.c
+++ b/arch/arm/mach-pxa/colibri-pxa300.c
@@ -18,7 +18,7 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19 19
20#include <asm/mach-types.h> 20#include <asm/mach-types.h>
21#include <asm/sizes.h> 21#include <linux/sizes.h>
22#include <asm/mach/arch.h> 22#include <asm/mach/arch.h>
23#include <asm/mach/irq.h> 23#include <asm/mach/irq.h>
24 24
diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c
index 17067a3039a8..ffcefe6dbc82 100644
--- a/arch/arm/mach-pxa/colibri-pxa320.c
+++ b/arch/arm/mach-pxa/colibri-pxa320.c
@@ -19,7 +19,7 @@
19#include <linux/usb/gpio_vbus.h> 19#include <linux/usb/gpio_vbus.h>
20 20
21#include <asm/mach-types.h> 21#include <asm/mach-types.h>
22#include <asm/sizes.h> 22#include <linux/sizes.h>
23#include <asm/mach/arch.h> 23#include <asm/mach/arch.h>
24#include <asm/mach/irq.h> 24#include <asm/mach/irq.h>
25 25
diff --git a/arch/arm/mach-pxa/colibri-pxa3xx.c b/arch/arm/mach-pxa/colibri-pxa3xx.c
index e31a591e949f..0c88e4e417b4 100644
--- a/arch/arm/mach-pxa/colibri-pxa3xx.c
+++ b/arch/arm/mach-pxa/colibri-pxa3xx.c
@@ -17,7 +17,7 @@
17#include <linux/etherdevice.h> 17#include <linux/etherdevice.h>
18#include <asm/mach-types.h> 18#include <asm/mach-types.h>
19#include <mach/hardware.h> 19#include <mach/hardware.h>
20#include <asm/sizes.h> 20#include <linux/sizes.h>
21#include <asm/system_info.h> 21#include <asm/system_info.h>
22#include <asm/mach/arch.h> 22#include <asm/mach/arch.h>
23#include <asm/mach/irq.h> 23#include <asm/mach/irq.h>
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 4764acca5480..eb03283ccdee 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -33,7 +33,7 @@
33#include <asm/mach-types.h> 33#include <asm/mach-types.h>
34#include <mach/hardware.h> 34#include <mach/hardware.h>
35#include <asm/irq.h> 35#include <asm/irq.h>
36#include <asm/sizes.h> 36#include <linux/sizes.h>
37 37
38#include <asm/mach/arch.h> 38#include <asm/mach/arch.h>
39#include <asm/mach/map.h> 39#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index e9f401b0a432..5c03c4f7b82e 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -33,7 +33,7 @@
33#include <asm/mach-types.h> 33#include <asm/mach-types.h>
34#include <mach/hardware.h> 34#include <mach/hardware.h>
35#include <asm/irq.h> 35#include <asm/irq.h>
36#include <asm/sizes.h> 36#include <linux/sizes.h>
37 37
38#include <asm/mach/arch.h> 38#include <asm/mach/arch.h>
39#include <asm/mach/map.h> 39#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index c1bd0d544981..825939877839 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -39,7 +39,7 @@
39#include <asm/mach-types.h> 39#include <asm/mach-types.h>
40#include <mach/hardware.h> 40#include <mach/hardware.h>
41#include <asm/irq.h> 41#include <asm/irq.h>
42#include <asm/sizes.h> 42#include <linux/sizes.h>
43 43
44#include <asm/mach/arch.h> 44#include <asm/mach/arch.h>
45#include <asm/mach/map.h> 45#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d6e17d407ac0..b3f8592eebe6 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -40,7 +40,7 @@
40#include <asm/mach-types.h> 40#include <asm/mach-types.h>
41#include <mach/hardware.h> 41#include <mach/hardware.h>
42#include <asm/irq.h> 42#include <asm/irq.h>
43#include <asm/sizes.h> 43#include <linux/sizes.h>
44 44
45#include <asm/mach/arch.h> 45#include <asm/mach/arch.h>
46#include <asm/mach/map.h> 46#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index c76f1daecfc9..99a2ee433f1f 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -35,7 +35,7 @@
35#include <asm/memory.h> 35#include <asm/memory.h>
36#include <asm/mach-types.h> 36#include <asm/mach-types.h>
37#include <asm/irq.h> 37#include <asm/irq.h>
38#include <asm/sizes.h> 38#include <linux/sizes.h>
39 39
40#include <asm/mach/arch.h> 40#include <asm/mach/arch.h>
41#include <asm/mach/map.h> 41#include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index ab2f89266bbd..c4c25a2f24f6 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -58,7 +58,7 @@
58#include <asm/setup.h> 58#include <asm/setup.h>
59#include <asm/mach-types.h> 59#include <asm/mach-types.h>
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/sizes.h> 61#include <linux/sizes.h>
62#include <asm/system_info.h> 62#include <asm/system_info.h>
63 63
64#include <asm/mach/arch.h> 64#include <asm/mach/arch.h>
diff --git a/arch/arm/mach-s3c24xx/include/mach/hardware.h b/arch/arm/mach-s3c24xx/include/mach/hardware.h
index 1b2975708e3f..f28ac6c78d82 100644
--- a/arch/arm/mach-s3c24xx/include/mach/hardware.h
+++ b/arch/arm/mach-s3c24xx/include/mach/hardware.h
@@ -15,7 +15,7 @@ extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg);
15 15
16#endif /* __ASSEMBLY__ */ 16#endif /* __ASSEMBLY__ */
17 17
18#include <asm/sizes.h> 18#include <linux/sizes.h>
19#include <mach/map.h> 19#include <mach/map.h>
20 20
21#endif /* __ASM_ARCH_HARDWARE_H */ 21#endif /* __ASM_ARCH_HARDWARE_H */
diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h
index fa5cf4744992..3b19296f5062 100644
--- a/arch/arm/mach-sa1100/include/mach/memory.h
+++ b/arch/arm/mach-sa1100/include/mach/memory.h
@@ -8,7 +8,7 @@
8#ifndef __ASM_ARCH_MEMORY_H 8#ifndef __ASM_ARCH_MEMORY_H
9#define __ASM_ARCH_MEMORY_H 9#define __ASM_ARCH_MEMORY_H
10 10
11#include <asm/sizes.h> 11#include <linux/sizes.h>
12 12
13/* 13/*
14 * Because of the wide memory address space between physical RAM banks on the 14 * Because of the wide memory address space between physical RAM banks on the
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index eb60a71cf125..a671e4c994cf 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -21,7 +21,7 @@
21#include <asm/mach-types.h> 21#include <asm/mach-types.h>
22#include <asm/mach/map.h> 22#include <asm/mach/map.h>
23#include <asm/hardware/sa1111.h> 23#include <asm/hardware/sa1111.h>
24#include <asm/sizes.h> 24#include <linux/sizes.h>
25 25
26#include <mach/hardware.h> 26#include <mach/hardware.h>
27#include <mach/assabet.h> 27#include <mach/assabet.h>
diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h
index 9bc291e76887..4af9e92a216f 100644
--- a/arch/arm/mach-tegra/iomap.h
+++ b/arch/arm/mach-tegra/iomap.h
@@ -20,7 +20,7 @@
20#define __MACH_TEGRA_IOMAP_H 20#define __MACH_TEGRA_IOMAP_H
21 21
22#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/sizes.h> 23#include <linux/sizes.h>
24 24
25#define TEGRA_IRAM_BASE 0x40000000 25#define TEGRA_IRAM_BASE 0x40000000
26#define TEGRA_IRAM_SIZE SZ_256K 26#define TEGRA_IRAM_SIZE SZ_256K
diff --git a/arch/arm/mach-tegra/irammap.h b/arch/arm/mach-tegra/irammap.h
index e32e1742c9a1..6a7bb887585e 100644
--- a/arch/arm/mach-tegra/irammap.h
+++ b/arch/arm/mach-tegra/irammap.h
@@ -17,7 +17,7 @@
17#ifndef __MACH_TEGRA_IRAMMAP_H 17#ifndef __MACH_TEGRA_IRAMMAP_H
18#define __MACH_TEGRA_IRAMMAP_H 18#define __MACH_TEGRA_IRAMMAP_H
19 19
20#include <asm/sizes.h> 20#include <linux/sizes.h>
21 21
22/* The first 1K of IRAM is permanently reserved for the CPU reset handler */ 22/* The first 1K of IRAM is permanently reserved for the CPU reset handler */
23#define TEGRA_IRAM_RESET_HANDLER_OFFSET 0 23#define TEGRA_IRAM_RESET_HANDLER_OFFSET 0
diff --git a/arch/arm/mach-w90x900/include/mach/hardware.h b/arch/arm/mach-w90x900/include/mach/hardware.h
index fe3c6265a466..2e6555df538e 100644
--- a/arch/arm/mach-w90x900/include/mach/hardware.h
+++ b/arch/arm/mach-w90x900/include/mach/hardware.h
@@ -18,7 +18,7 @@
18#ifndef __ASM_ARCH_HARDWARE_H 18#ifndef __ASM_ARCH_HARDWARE_H
19#define __ASM_ARCH_HARDWARE_H 19#define __ASM_ARCH_HARDWARE_H
20 20
21#include <asm/sizes.h> 21#include <linux/sizes.h>
22#include <mach/map.h> 22#include <mach/map.h>
23 23
24#endif /* __ASM_ARCH_HARDWARE_H */ 24#endif /* __ASM_ARCH_HARDWARE_H */
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index eb0df239a759..9e977dedf193 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += qspinlock.h
20generic-y += segment.h 20generic-y += segment.h
21generic-y += serial.h 21generic-y += serial.h
22generic-y += set_memory.h 22generic-y += set_memory.h
23generic-y += sizes.h
24generic-y += switch_to.h 23generic-y += switch_to.h
25generic-y += trace_clock.h 24generic-y += trace_clock.h
26generic-y += unaligned.h 25generic-y += unaligned.h
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index 355e552a9175..c7f67da13cd9 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -3,7 +3,7 @@
3#ifndef __ASM_BOOT_H 3#ifndef __ASM_BOOT_H
4#define __ASM_BOOT_H 4#define __ASM_BOOT_H
5 5
6#include <asm/sizes.h> 6#include <linux/sizes.h>
7 7
8/* 8/*
9 * arm64 requires the DTB to be 8 byte aligned and 9 * arm64 requires the DTB to be 8 byte aligned and
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f210bcf096f7..bc895c869892 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -401,7 +401,7 @@ unsigned long cpu_get_elf_hwcap2(void);
401#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) 401#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
402 402
403/* System capability check for constant caps */ 403/* System capability check for constant caps */
404static inline bool __cpus_have_const_cap(int num) 404static __always_inline bool __cpus_have_const_cap(int num)
405{ 405{
406 if (num >= ARM64_NCAPS) 406 if (num >= ARM64_NCAPS)
407 return false; 407 return false;
@@ -415,7 +415,7 @@ static inline bool cpus_have_cap(unsigned int num)
415 return test_bit(num, cpu_hwcaps); 415 return test_bit(num, cpu_hwcaps);
416} 416}
417 417
418static inline bool cpus_have_const_cap(int num) 418static __always_inline bool cpus_have_const_cap(int num)
419{ 419{
420 if (static_branch_likely(&arm64_const_caps_ready)) 420 if (static_branch_likely(&arm64_const_caps_ready))
421 return __cpus_have_const_cap(num); 421 return __cpus_have_const_cap(num);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 2cb8248fa2c8..8ffcf5a512bb 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -26,7 +26,7 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <asm/bug.h> 27#include <asm/bug.h>
28#include <asm/page-def.h> 28#include <asm/page-def.h>
29#include <asm/sizes.h> 29#include <linux/sizes.h>
30 30
31/* 31/*
32 * Size of the PCI I/O space. This must remain a power of two so that 32 * Size of the PCI I/O space. This must remain a power of two so that
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 007c05a4cce0..d2adffb81b5d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -48,7 +48,7 @@
48#include <asm/numa.h> 48#include <asm/numa.h>
49#include <asm/sections.h> 49#include <asm/sections.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/sizes.h> 51#include <linux/sizes.h>
52#include <asm/tlb.h> 52#include <asm/tlb.h>
53#include <asm/alternative.h> 53#include <asm/alternative.h>
54 54
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ef32d4839c3f..a170c6369a68 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,7 +40,7 @@
40#include <asm/kernel-pgtable.h> 40#include <asm/kernel-pgtable.h>
41#include <asm/sections.h> 41#include <asm/sections.h>
42#include <asm/setup.h> 42#include <asm/setup.h>
43#include <asm/sizes.h> 43#include <linux/sizes.h>
44#include <asm/tlb.h> 44#include <asm/tlb.h>
45#include <asm/mmu_context.h> 45#include <asm/mmu_context.h>
46#include <asm/ptdump.h> 46#include <asm/ptdump.h>
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 123d8f54be4a..f2e22058e488 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -42,7 +42,6 @@ generic-y += scatterlist.h
42generic-y += sections.h 42generic-y += sections.h
43generic-y += serial.h 43generic-y += serial.h
44generic-y += shmparam.h 44generic-y += shmparam.h
45generic-y += sizes.h
46generic-y += spinlock.h 45generic-y += spinlock.h
47generic-y += timex.h 46generic-y += timex.h
48generic-y += tlbflush.h 47generic-y += tlbflush.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 6234a303d2a3..4a3d72f76ea2 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -32,7 +32,6 @@ generic-y += sections.h
32generic-y += segment.h 32generic-y += segment.h
33generic-y += serial.h 33generic-y += serial.h
34generic-y += shmparam.h 34generic-y += shmparam.h
35generic-y += sizes.h
36generic-y += topology.h 35generic-y += topology.h
37generic-y += trace_clock.h 36generic-y += trace_clock.h
38generic-y += unaligned.h 37generic-y += unaligned.h
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 830c93a010c3..9a466dde9b96 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -482,7 +482,7 @@ static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *
482 * Return the bit position (0..63) of the most significant 1 bit in a word 482 * Return the bit position (0..63) of the most significant 1 bit in a word
483 * Returns -1 if no 1 bit exists 483 * Returns -1 if no 1 bit exists
484 */ 484 */
485static inline unsigned long __fls(unsigned long word) 485static __always_inline unsigned long __fls(unsigned long word)
486{ 486{
487 int num; 487 int num;
488 488
@@ -548,7 +548,7 @@ static inline unsigned long __fls(unsigned long word)
548 * Returns 0..SZLONG-1 548 * Returns 0..SZLONG-1
549 * Undefined if no bit exists, so code should check against 0 first. 549 * Undefined if no bit exists, so code should check against 0 first.
550 */ 550 */
551static inline unsigned long __ffs(unsigned long word) 551static __always_inline unsigned long __ffs(unsigned long word)
552{ 552{
553 return __fls(word & -word); 553 return __fls(word & -word);
554} 554}
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
index bada74af7641..c04b97aace4a 100644
--- a/arch/mips/kernel/cpu-bugs64.c
+++ b/arch/mips/kernel/cpu-bugs64.c
@@ -42,8 +42,8 @@ static inline void align_mod(const int align, const int mod)
42 : "n"(align), "n"(mod)); 42 : "n"(align), "n"(mod));
43} 43}
44 44
45static inline void mult_sh_align_mod(long *v1, long *v2, long *w, 45static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w,
46 const int align, const int mod) 46 const int align, const int mod)
47{ 47{
48 unsigned long flags; 48 unsigned long flags;
49 int m1, m2; 49 int m1, m2;
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index 688b6ed26227..f67a327777b5 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -39,7 +39,6 @@ generic-y += preempt.h
39generic-y += sections.h 39generic-y += sections.h
40generic-y += segment.h 40generic-y += segment.h
41generic-y += serial.h 41generic-y += serial.h
42generic-y += sizes.h
43generic-y += switch_to.h 42generic-y += switch_to.h
44generic-y += timex.h 43generic-y += timex.h
45generic-y += topology.h 44generic-y += topology.h
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 9f52db930c00..ee59c1f9e4fc 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -6,7 +6,7 @@
6 6
7#define __PAGETABLE_PMD_FOLDED 1 7#define __PAGETABLE_PMD_FOLDED 1
8#include <asm-generic/4level-fixup.h> 8#include <asm-generic/4level-fixup.h>
9#include <asm-generic/sizes.h> 9#include <linux/sizes.h>
10 10
11#include <asm/memory.h> 11#include <asm/memory.h>
12#include <asm/nds32.h> 12#include <asm/nds32.h>
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
index db64b78b1232..fcefb62606ca 100644
--- a/arch/nds32/kernel/head.S
+++ b/arch/nds32/kernel/head.S
@@ -7,7 +7,7 @@
7#include <asm/asm-offsets.h> 7#include <asm/asm-offsets.h>
8#include <asm/page.h> 8#include <asm/page.h>
9#include <asm/pgtable.h> 9#include <asm/pgtable.h>
10#include <asm/sizes.h> 10#include <linux/sizes.h>
11#include <asm/thread_info.h> 11#include <asm/thread_info.h>
12 12
13#ifdef CONFIG_CPU_BIG_ENDIAN 13#ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 523bb99d7676..00682b8df330 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -628,14 +628,14 @@ static int __init prom_next_node(phandle *nodep)
628 } 628 }
629} 629}
630 630
631static inline int prom_getprop(phandle node, const char *pname, 631static inline int __init prom_getprop(phandle node, const char *pname,
632 void *value, size_t valuelen) 632 void *value, size_t valuelen)
633{ 633{
634 return call_prom("getprop", 4, 1, node, ADDR(pname), 634 return call_prom("getprop", 4, 1, node, ADDR(pname),
635 (u32)(unsigned long) value, (u32) valuelen); 635 (u32)(unsigned long) value, (u32) valuelen);
636} 636}
637 637
638static inline int prom_getproplen(phandle node, const char *pname) 638static inline int __init prom_getproplen(phandle node, const char *pname)
639{ 639{
640 return call_prom("getproplen", 2, 1, node, ADDR(pname)); 640 return call_prom("getproplen", 2, 1, node, ADDR(pname));
641} 641}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e8e93c2c7d03..7a1708875d27 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -610,7 +610,7 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
610SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); 610SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
611SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); 611SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
612SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); 612SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
613#ifdef CONFIG_DEBUG_KERNEL 613#ifdef CONFIG_DEBUG_MISC
614SYSFS_SPRSETUP(hid0, SPRN_HID0); 614SYSFS_SPRSETUP(hid0, SPRN_HID0);
615SYSFS_SPRSETUP(hid1, SPRN_HID1); 615SYSFS_SPRSETUP(hid1, SPRN_HID1);
616SYSFS_SPRSETUP(hid4, SPRN_HID4); 616SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -639,7 +639,7 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
639SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); 639SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
640SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); 640SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
641SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); 641SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
642#endif /* CONFIG_DEBUG_KERNEL */ 642#endif /* CONFIG_DEBUG_MISC */
643#endif /* HAS_PPC_PMC_PA6T */ 643#endif /* HAS_PPC_PMC_PA6T */
644 644
645#ifdef HAS_PPC_PMC_IBM 645#ifdef HAS_PPC_PMC_IBM
@@ -680,7 +680,7 @@ static struct device_attribute pa6t_attrs[] = {
680 __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), 680 __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
681 __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), 681 __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
682 __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), 682 __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
683#ifdef CONFIG_DEBUG_KERNEL 683#ifdef CONFIG_DEBUG_MISC
684 __ATTR(hid0, 0600, show_hid0, store_hid0), 684 __ATTR(hid0, 0600, show_hid0, store_hid0),
685 __ATTR(hid1, 0600, show_hid1, store_hid1), 685 __ATTR(hid1, 0600, show_hid1, store_hid1),
686 __ATTR(hid4, 0600, show_hid4, store_hid4), 686 __ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -709,7 +709,7 @@ static struct device_attribute pa6t_attrs[] = {
709 __ATTR(tsr1, 0600, show_tsr1, store_tsr1), 709 __ATTR(tsr1, 0600, show_tsr1, store_tsr1),
710 __ATTR(tsr2, 0600, show_tsr2, store_tsr2), 710 __ATTR(tsr2, 0600, show_tsr2, store_tsr2),
711 __ATTR(tsr3, 0600, show_tsr3, store_tsr3), 711 __ATTR(tsr3, 0600, show_tsr3, store_tsr3),
712#endif /* CONFIG_DEBUG_KERNEL */ 712#endif /* CONFIG_DEBUG_MISC */
713}; 713};
714#endif /* HAS_PPC_PMC_PA6T */ 714#endif /* HAS_PPC_PMC_PA6T */
715#endif /* HAS_PPC_PMC_CLASSIC */ 715#endif /* HAS_PPC_PMC_CLASSIC */
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 6a23b9ebd2a1..4d841369399f 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -90,7 +90,7 @@ void radix__tlbiel_all(unsigned int action)
90 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 90 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
91} 91}
92 92
93static inline void __tlbiel_pid(unsigned long pid, int set, 93static __always_inline void __tlbiel_pid(unsigned long pid, int set,
94 unsigned long ric) 94 unsigned long ric)
95{ 95{
96 unsigned long rb,rs,prs,r; 96 unsigned long rb,rs,prs,r;
@@ -106,7 +106,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r);
107} 107}
108 108
109static inline void __tlbie_pid(unsigned long pid, unsigned long ric) 109static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
110{ 110{
111 unsigned long rb,rs,prs,r; 111 unsigned long rb,rs,prs,r;
112 112
@@ -120,7 +120,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r);
121} 121}
122 122
123static inline void __tlbiel_lpid(unsigned long lpid, int set, 123static __always_inline void __tlbiel_lpid(unsigned long lpid, int set,
124 unsigned long ric) 124 unsigned long ric)
125{ 125{
126 unsigned long rb,rs,prs,r; 126 unsigned long rb,rs,prs,r;
@@ -136,7 +136,7 @@ static inline void __tlbiel_lpid(unsigned long lpid, int set,
136 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 136 trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
137} 137}
138 138
139static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 139static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
140{ 140{
141 unsigned long rb,rs,prs,r; 141 unsigned long rb,rs,prs,r;
142 142
@@ -928,7 +928,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
928 tlb->need_flush_all = 0; 928 tlb->need_flush_all = 0;
929} 929}
930 930
931static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 931static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
932 unsigned long start, unsigned long end, 932 unsigned long start, unsigned long end,
933 int psize, bool also_pwc) 933 int psize, bool also_pwc)
934{ 934{
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 3cc52e37b4b2..f316de40e51b 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -202,7 +202,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode)
202 } 202 }
203} 203}
204 204
205static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) 205static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
206{ 206{
207 if (__cpacf_check_opcode(opcode)) { 207 if (__cpacf_check_opcode(opcode)) {
208 __cpacf_query(opcode, mask); 208 __cpacf_query(opcode, mask);
diff --git a/arch/sh/boards/board-apsh4a3a.c b/arch/sh/boards/board-apsh4a3a.c
index 346eda7a2ef6..abf19a947df3 100644
--- a/arch/sh/boards/board-apsh4a3a.c
+++ b/arch/sh/boards/board-apsh4a3a.c
@@ -16,7 +16,7 @@
16#include <linux/irq.h> 16#include <linux/irq.h>
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <asm/machvec.h> 18#include <asm/machvec.h>
19#include <asm/sizes.h> 19#include <linux/sizes.h>
20#include <asm/clock.h> 20#include <asm/clock.h>
21 21
22static struct mtd_partition nor_flash_partitions[] = { 22static struct mtd_partition nor_flash_partitions[] = {
diff --git a/arch/sh/boards/board-apsh4ad0a.c b/arch/sh/boards/board-apsh4ad0a.c
index 4efa9c571f64..fa031a16c9b5 100644
--- a/arch/sh/boards/board-apsh4ad0a.c
+++ b/arch/sh/boards/board-apsh4ad0a.c
@@ -15,7 +15,7 @@
15#include <linux/irq.h> 15#include <linux/irq.h>
16#include <linux/clk.h> 16#include <linux/clk.h>
17#include <asm/machvec.h> 17#include <asm/machvec.h>
18#include <asm/sizes.h> 18#include <linux/sizes.h>
19 19
20/* Dummy supplies, where voltage doesn't matter */ 20/* Dummy supplies, where voltage doesn't matter */
21static struct regulator_consumer_supply dummy_supplies[] = { 21static struct regulator_consumer_supply dummy_supplies[] = {
diff --git a/arch/sh/boards/board-edosk7705.c b/arch/sh/boards/board-edosk7705.c
index 67a8803eb3f9..0de7d603da2d 100644
--- a/arch/sh/boards/board-edosk7705.c
+++ b/arch/sh/boards/board-edosk7705.c
@@ -16,7 +16,7 @@
16#include <linux/smc91x.h> 16#include <linux/smc91x.h>
17#include <linux/sh_intc.h> 17#include <linux/sh_intc.h>
18#include <asm/machvec.h> 18#include <asm/machvec.h>
19#include <asm/sizes.h> 19#include <linux/sizes.h>
20 20
21#define SMC_IOBASE 0xA2000000 21#define SMC_IOBASE 0xA2000000
22#define SMC_IO_OFFSET 0x300 22#define SMC_IO_OFFSET 0x300
diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c
index 0fbe91cba67a..7569d85c5ff5 100644
--- a/arch/sh/boards/board-edosk7760.c
+++ b/arch/sh/boards/board-edosk7760.c
@@ -18,7 +18,7 @@
18#include <asm/addrspace.h> 18#include <asm/addrspace.h>
19#include <asm/delay.h> 19#include <asm/delay.h>
20#include <asm/i2c-sh7760.h> 20#include <asm/i2c-sh7760.h>
21#include <asm/sizes.h> 21#include <linux/sizes.h>
22 22
23/* Bus state controller registers for CS4 area */ 23/* Bus state controller registers for CS4 area */
24#define BSC_CS4BCR 0xA4FD0010 24#define BSC_CS4BCR 0xA4FD0010
diff --git a/arch/sh/boards/board-espt.c b/arch/sh/boards/board-espt.c
index f478fee3b48a..6e784b5cf5a0 100644
--- a/arch/sh/boards/board-espt.c
+++ b/arch/sh/boards/board-espt.c
@@ -13,7 +13,7 @@
13#include <linux/sh_eth.h> 13#include <linux/sh_eth.h>
14#include <linux/sh_intc.h> 14#include <linux/sh_intc.h>
15#include <asm/machvec.h> 15#include <asm/machvec.h>
16#include <asm/sizes.h> 16#include <linux/sizes.h>
17 17
18/* NOR Flash */ 18/* NOR Flash */
19static struct mtd_partition espt_nor_flash_partitions[] = { 19static struct mtd_partition espt_nor_flash_partitions[] = {
diff --git a/arch/sh/boards/board-urquell.c b/arch/sh/boards/board-urquell.c
index 799af57c0b81..dad2b3b40735 100644
--- a/arch/sh/boards/board-urquell.c
+++ b/arch/sh/boards/board-urquell.c
@@ -21,7 +21,7 @@
21#include <mach/urquell.h> 21#include <mach/urquell.h>
22#include <cpu/sh7786.h> 22#include <cpu/sh7786.h>
23#include <asm/heartbeat.h> 23#include <asm/heartbeat.h>
24#include <asm/sizes.h> 24#include <linux/sizes.h>
25#include <asm/smp-ops.h> 25#include <asm/smp-ops.h>
26 26
27/* 27/*
diff --git a/arch/sh/boards/mach-microdev/setup.c b/arch/sh/boards/mach-microdev/setup.c
index 706b48f797be..f4a777fe2d01 100644
--- a/arch/sh/boards/mach-microdev/setup.c
+++ b/arch/sh/boards/mach-microdev/setup.c
@@ -15,7 +15,7 @@
15#include <mach/microdev.h> 15#include <mach/microdev.h>
16#include <asm/io.h> 16#include <asm/io.h>
17#include <asm/machvec.h> 17#include <asm/machvec.h>
18#include <asm/sizes.h> 18#include <linux/sizes.h>
19 19
20static struct resource smc91x_resources[] = { 20static struct resource smc91x_resources[] = {
21 [0] = { 21 [0] = {
diff --git a/arch/sh/boards/mach-sdk7786/fpga.c b/arch/sh/boards/mach-sdk7786/fpga.c
index 6d2a3d381c2a..895576ff8376 100644
--- a/arch/sh/boards/mach-sdk7786/fpga.c
+++ b/arch/sh/boards/mach-sdk7786/fpga.c
@@ -8,7 +8,7 @@
8#include <linux/io.h> 8#include <linux/io.h>
9#include <linux/bcd.h> 9#include <linux/bcd.h>
10#include <mach/fpga.h> 10#include <mach/fpga.h>
11#include <asm/sizes.h> 11#include <linux/sizes.h>
12 12
13#define FPGA_REGS_OFFSET 0x03fff800 13#define FPGA_REGS_OFFSET 0x03fff800
14#define FPGA_REGS_SIZE 0x490 14#define FPGA_REGS_SIZE 0x490
diff --git a/arch/sh/boards/mach-sdk7786/setup.c b/arch/sh/boards/mach-sdk7786/setup.c
index 65721c3a482c..d183026dbeb1 100644
--- a/arch/sh/boards/mach-sdk7786/setup.c
+++ b/arch/sh/boards/mach-sdk7786/setup.c
@@ -19,7 +19,7 @@
19#include <mach/irq.h> 19#include <mach/irq.h>
20#include <asm/machvec.h> 20#include <asm/machvec.h>
21#include <asm/heartbeat.h> 21#include <asm/heartbeat.h>
22#include <asm/sizes.h> 22#include <linux/sizes.h>
23#include <asm/clock.h> 23#include <asm/clock.h>
24#include <asm/reboot.h> 24#include <asm/reboot.h>
25#include <asm/smp-ops.h> 25#include <asm/smp-ops.h>
diff --git a/arch/sh/boards/mach-sdk7786/sram.c b/arch/sh/boards/mach-sdk7786/sram.c
index d76cdb7ede39..7c6ca976f332 100644
--- a/arch/sh/boards/mach-sdk7786/sram.c
+++ b/arch/sh/boards/mach-sdk7786/sram.c
@@ -13,7 +13,7 @@
13#include <linux/string.h> 13#include <linux/string.h>
14#include <mach/fpga.h> 14#include <mach/fpga.h>
15#include <asm/sram.h> 15#include <asm/sram.h>
16#include <asm/sizes.h> 16#include <linux/sizes.h>
17 17
18static int __init fpga_sram_init(void) 18static int __init fpga_sram_init(void)
19{ 19{
diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c
index 39a3175e72b2..1aedbfe32654 100644
--- a/arch/sh/boards/mach-se/7343/irq.c
+++ b/arch/sh/boards/mach-se/7343/irq.c
@@ -16,7 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/irqdomain.h> 17#include <linux/irqdomain.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <asm/sizes.h> 19#include <linux/sizes.h>
20#include <mach-se/mach/se7343.h> 20#include <mach-se/mach/se7343.h>
21 21
22#define PA_CPLD_BASE_ADDR 0x11400000 22#define PA_CPLD_BASE_ADDR 0x11400000
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index f6e3009edd4e..6d34592767f8 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -14,7 +14,7 @@
14#include <linux/irqdomain.h> 14#include <linux/irqdomain.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <asm/sizes.h> 17#include <linux/sizes.h>
18#include <mach-se/mach/se7722.h> 18#include <mach-se/mach/se7722.h>
19 19
20#define IRQ01_BASE_ADDR 0x11800000 20#define IRQ01_BASE_ADDR 0x11800000
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 1b9e5caac389..11ed21c2e9bb 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -14,7 +14,7 @@
14#include <linux/io.h> 14#include <linux/io.h>
15#include "pci-sh4.h" 15#include "pci-sh4.h"
16#include <asm/addrspace.h> 16#include <asm/addrspace.h>
17#include <asm/sizes.h> 17#include <linux/sizes.h>
18 18
19static int __init __area_sdram_check(struct pci_channel *chan, 19static int __init __area_sdram_check(struct pci_channel *chan,
20 unsigned int area) 20 unsigned int area)
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 3fd0f392a0ee..287b3a68570c 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -16,7 +16,7 @@
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include "pci-sh4.h" 17#include "pci-sh4.h"
18#include <asm/mmu.h> 18#include <asm/mmu.h>
19#include <asm/sizes.h> 19#include <linux/sizes.h>
20 20
21#if defined(CONFIG_CPU_BIG_ENDIAN) 21#if defined(CONFIG_CPU_BIG_ENDIAN)
22# define PCICR_ENDIANNESS SH4_PCICR_BSWP 22# define PCICR_ENDIANNESS SH4_PCICR_BSWP
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index a58b77cea295..e0b568aaa701 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -18,7 +18,7 @@
18#include <linux/sh_intc.h> 18#include <linux/sh_intc.h>
19#include <cpu/sh7786.h> 19#include <cpu/sh7786.h>
20#include "pcie-sh7786.h" 20#include "pcie-sh7786.h"
21#include <asm/sizes.h> 21#include <linux/sizes.h>
22 22
23struct sh7786_pcie_port { 23struct sh7786_pcie_port {
24 struct pci_channel *hose; 24 struct pci_channel *hose;
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 73fff39a0122..51a54df22c11 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -18,6 +18,5 @@ generic-y += parport.h
18generic-y += percpu.h 18generic-y += percpu.h
19generic-y += preempt.h 19generic-y += preempt.h
20generic-y += serial.h 20generic-y += serial.h
21generic-y += sizes.h
22generic-y += trace_clock.h 21generic-y += trace_clock.h
23generic-y += xor.h 22generic-y += xor.h
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index b95e343e3c9d..5aeb4d7099a1 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -26,7 +26,7 @@
26#include <asm/sections.h> 26#include <asm/sections.h>
27#include <asm/setup.h> 27#include <asm/setup.h>
28#include <asm/cache.h> 28#include <asm/cache.h>
29#include <asm/sizes.h> 29#include <linux/sizes.h>
30 30
31pgd_t swapper_pg_dir[PTRS_PER_PGD]; 31pgd_t swapper_pg_dir[PTRS_PER_PGD];
32 32
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 7b2cc490ebb7..a53a040d0054 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -24,7 +24,7 @@
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/sizes.h> 27#include <linux/sizes.h>
28#include <linux/uaccess.h> 28#include <linux/uaccess.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable.h>
30#include <asm/page.h> 30#include <asm/page.h>
diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c
index 010010bf205a..bd1585e8efed 100644
--- a/arch/sh/mm/uncached.c
+++ b/arch/sh/mm/uncached.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/init.h> 2#include <linux/init.h>
3#include <linux/module.h> 3#include <linux/module.h>
4#include <asm/sizes.h> 4#include <linux/sizes.h>
5#include <asm/page.h> 5#include <asm/page.h>
6#include <asm/addrspace.h> 6#include <asm/addrspace.h>
7 7
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index b301a0b3c0b2..c93dc6478cb2 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -31,7 +31,6 @@ generic-y += sections.h
31generic-y += segment.h 31generic-y += segment.h
32generic-y += serial.h 32generic-y += serial.h
33generic-y += shmparam.h 33generic-y += shmparam.h
34generic-y += sizes.h
35generic-y += syscalls.h 34generic-y += syscalls.h
36generic-y += topology.h 35generic-y += topology.h
37generic-y += trace_clock.h 36generic-y += trace_clock.h
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index 66bb9f6525c0..46cf27efbb7e 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -16,7 +16,7 @@
16 16
17#include <linux/compiler.h> 17#include <linux/compiler.h>
18#include <linux/const.h> 18#include <linux/const.h>
19#include <asm/sizes.h> 19#include <linux/sizes.h>
20#include <mach/memory.h> 20#include <mach/memory.h>
21 21
22/* 22/*
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index b4442f3060ce..c994cdf14119 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -23,7 +23,7 @@
23 23
24#include <asm/sections.h> 24#include <asm/sections.h>
25#include <asm/setup.h> 25#include <asm/setup.h>
26#include <asm/sizes.h> 26#include <linux/sizes.h>
27#include <asm/tlb.h> 27#include <asm/tlb.h>
28#include <asm/memblock.h> 28#include <asm/memblock.h>
29#include <mach/map.h> 29#include <mach/map.h>
diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c
index bf012b2b71a9..b69cb18ce8b1 100644
--- a/arch/unicore32/mm/ioremap.c
+++ b/arch/unicore32/mm/ioremap.c
@@ -34,7 +34,7 @@
34#include <asm/mmu_context.h> 34#include <asm/mmu_context.h>
35#include <asm/pgalloc.h> 35#include <asm/pgalloc.h>
36#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
37#include <asm/sizes.h> 37#include <linux/sizes.h>
38 38
39#include <mach/map.h> 39#include <mach/map.h>
40#include "mm.h" 40#include "mm.h"
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index aa2060beb408..f0ae623b305f 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -22,7 +22,7 @@
22#include <asm/cputype.h> 22#include <asm/cputype.h>
23#include <asm/sections.h> 23#include <asm/sections.h>
24#include <asm/setup.h> 24#include <asm/setup.h>
25#include <asm/sizes.h> 25#include <linux/sizes.h>
26#include <asm/tlb.h> 26#include <asm/tlb.h>
27#include <asm/memblock.h> 27#include <asm/memblock.h>
28 28
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 818b361094ed..326b2d5bab9d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -305,9 +305,6 @@ config ZONE_DMA32
305config AUDIT_ARCH 305config AUDIT_ARCH
306 def_bool y if X86_64 306 def_bool y if X86_64
307 307
308config ARCH_SUPPORTS_OPTIMIZED_INLINING
309 def_bool y
310
311config ARCH_SUPPORTS_DEBUG_PAGEALLOC 308config ARCH_SUPPORTS_DEBUG_PAGEALLOC
312 def_bool y 309 def_bool y
313 310
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 15d0fbe27872..f730680dc818 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -266,20 +266,6 @@ config CPA_DEBUG
266 ---help--- 266 ---help---
267 Do change_page_attr() self-tests every 30 seconds. 267 Do change_page_attr() self-tests every 30 seconds.
268 268
269config OPTIMIZE_INLINING
270 bool "Allow gcc to uninline functions marked 'inline'"
271 ---help---
272 This option determines if the kernel forces gcc to inline the functions
273 developers have marked 'inline'. Doing so takes away freedom from gcc to
274 do what it thinks is best, which is desirable for the gcc 3.x series of
275 compilers. The gcc 4.x series have a rewritten inlining algorithm and
276 enabling this option will generate a smaller kernel there. Hopefully
277 this algorithm is so good that allowing gcc 4.x and above to make the
278 decision will become the default in the future. Until then this option
279 is there to test gcc for this.
280
281 If unsure, say N.
282
283config DEBUG_ENTRY 269config DEBUG_ENTRY
284 bool "Debug low-level entry code" 270 bool "Debug low-level entry code"
285 depends on DEBUG_KERNEL 271 depends on DEBUG_KERNEL
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 7cdd7b13bbda..890a3fb5706f 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -23,7 +23,7 @@
23#include <linux/device.h> 23#include <linux/device.h>
24#include <linux/coredump.h> 24#include <linux/coredump.h>
25 25
26#include <asm-generic/sizes.h> 26#include <linux/sizes.h>
27#include <asm/perf_event.h> 27#include <asm/perf_event.h>
28 28
29#include "../perf_event.h" 29#include "../perf_event.h"
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 2bdbbbcfa393..cdf44aa9a501 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 1/* SPDX-License-Identifier: GPL-2.0 */
2 2
3/* 3/*
4 * This file contains definitions from Hyper-V Hypervisor Top-Level Functional 4 * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index 9b5e8526afe5..12890681587b 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -27,7 +27,7 @@ static inline unsigned long arch_local_irq_save(void)
27{ 27{
28 unsigned long flags; 28 unsigned long flags;
29#if XTENSA_FAKE_NMI 29#if XTENSA_FAKE_NMI
30#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL 30#if defined(CONFIG_DEBUG_MISC) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
31 unsigned long tmp; 31 unsigned long tmp;
32 32
33 asm volatile("rsr %0, ps\t\n" 33 asm volatile("rsr %0, ps\t\n"
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 3699d6d3e479..83b244ce61ee 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -126,7 +126,7 @@ void secondary_start_kernel(void)
126 126
127 init_mmu(); 127 init_mmu();
128 128
129#ifdef CONFIG_DEBUG_KERNEL 129#ifdef CONFIG_DEBUG_MISC
130 if (boot_secondary_processors == 0) { 130 if (boot_secondary_processors == 0) {
131 pr_debug("%s: boot_secondary_processors:%d; Hanging cpu:%d\n", 131 pr_debug("%s: boot_secondary_processors:%d; Hanging cpu:%d\n",
132 __func__, boot_secondary_processors, cpu); 132 __func__, boot_secondary_processors, cpu);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index eb33d2d00d77..e20e6b429804 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -33,7 +33,7 @@
33#include <linux/types.h> 33#include <linux/types.h>
34#include <linux/of_graph.h> 34#include <linux/of_graph.h>
35#include <linux/of_device.h> 35#include <linux/of_device.h>
36#include <asm/sizes.h> 36#include <linux/sizes.h>
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38 38
39#include <drm/drmP.h> 39#include <drm/drmP.h>
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 9fb0eb7a4d02..b4b87d6ae67f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -34,7 +34,7 @@
34#include <linux/of_iommu.h> 34#include <linux/of_iommu.h>
35 35
36#include <asm/cacheflush.h> 36#include <asm/cacheflush.h>
37#include <asm/sizes.h> 37#include <linux/sizes.h>
38 38
39#include "msm_iommu_hw-8xxx.h" 39#include "msm_iommu_hw-8xxx.h"
40#include "msm_iommu.h" 40#include "msm_iommu.h"
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index e22bbff89c8d..9cb93e15b197 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -24,7 +24,7 @@
24#include <linux/mmc/host.h> 24#include <linux/mmc/host.h>
25#include <linux/mmc/slot-gpio.h> 25#include <linux/mmc/slot-gpio.h>
26 26
27#include <asm/sizes.h> 27#include <linux/sizes.h>
28#include <asm/unaligned.h> 28#include <asm/unaligned.h>
29 29
30#include "mvsdio.h" 30#include "mvsdio.h"
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index c1d3f0e38921..e7d80c83da2c 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -35,7 +35,7 @@
35#include <linux/of.h> 35#include <linux/of.h>
36#include <linux/of_device.h> 36#include <linux/of_device.h>
37 37
38#include <asm/sizes.h> 38#include <linux/sizes.h>
39 39
40#include <mach/hardware.h> 40#include <mach/hardware.h>
41#include <linux/platform_data/mmc-pxamci.h> 41#include <linux/platform_data/mmc-pxamci.h>
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index fd5fe12d7461..893239629d6b 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -20,7 +20,7 @@
20#include <linux/mtd/concat.h> 20#include <linux/mtd/concat.h>
21 21
22#include <mach/hardware.h> 22#include <mach/hardware.h>
23#include <asm/sizes.h> 23#include <linux/sizes.h>
24#include <asm/mach/flash.h> 24#include <asm/mach/flash.h>
25 25
26struct sa_subdev_info { 26struct sa_subdev_info {
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index 6d43ddb3332f..e4fe8c4bc711 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -364,7 +364,7 @@ static int vf610_nfc_cmd(struct nand_chip *chip,
364{ 364{
365 const struct nand_op_instr *instr; 365 const struct nand_op_instr *instr;
366 struct vf610_nfc *nfc = chip_to_nfc(chip); 366 struct vf610_nfc *nfc = chip_to_nfc(chip);
367 int op_id = -1, trfr_sz = 0, offset; 367 int op_id = -1, trfr_sz = 0, offset = 0;
368 u32 col = 0, row = 0, cmd1 = 0, cmd2 = 0, code = 0; 368 u32 col = 0, row = 0, cmd1 = 0, cmd2 = 0, code = 0;
369 bool force8bit = false; 369 bool force8bit = false;
370 370
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index c2a17a79f0b2..267fb875e40f 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -22,7 +22,7 @@
22 22
23#include <mach/hardware.h> 23#include <mach/hardware.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/sizes.h> 25#include <linux/sizes.h>
26 26
27#include <mach/mux.h> 27#include <mach/mux.h>
28#include <mach/tc.h> 28#include <mach/tc.h>
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
index dd5d1103e02b..4b6418039387 100644
--- a/drivers/pps/clients/pps-gpio.c
+++ b/drivers/pps/clients/pps-gpio.c
@@ -31,19 +31,25 @@
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/pps_kernel.h> 32#include <linux/pps_kernel.h>
33#include <linux/pps-gpio.h> 33#include <linux/pps-gpio.h>
34#include <linux/gpio.h> 34#include <linux/gpio/consumer.h>
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/of_device.h> 36#include <linux/of_device.h>
37#include <linux/of_gpio.h> 37#include <linux/of_gpio.h>
38#include <linux/timer.h>
39#include <linux/jiffies.h>
38 40
39/* Info for each registered platform device */ 41/* Info for each registered platform device */
40struct pps_gpio_device_data { 42struct pps_gpio_device_data {
41 int irq; /* IRQ used as PPS source */ 43 int irq; /* IRQ used as PPS source */
42 struct pps_device *pps; /* PPS source device */ 44 struct pps_device *pps; /* PPS source device */
43 struct pps_source_info info; /* PPS source information */ 45 struct pps_source_info info; /* PPS source information */
46 struct gpio_desc *gpio_pin; /* GPIO port descriptors */
47 struct gpio_desc *echo_pin;
48 struct timer_list echo_timer; /* timer to reset echo active state */
44 bool assert_falling_edge; 49 bool assert_falling_edge;
45 bool capture_clear; 50 bool capture_clear;
46 unsigned int gpio_pin; 51 unsigned int echo_active_ms; /* PPS echo active duration */
52 unsigned long echo_timeout; /* timer timeout value in jiffies */
47}; 53};
48 54
49/* 55/*
@@ -61,18 +67,101 @@ static irqreturn_t pps_gpio_irq_handler(int irq, void *data)
61 67
62 info = data; 68 info = data;
63 69
64 rising_edge = gpio_get_value(info->gpio_pin); 70 rising_edge = gpiod_get_value(info->gpio_pin);
65 if ((rising_edge && !info->assert_falling_edge) || 71 if ((rising_edge && !info->assert_falling_edge) ||
66 (!rising_edge && info->assert_falling_edge)) 72 (!rising_edge && info->assert_falling_edge))
67 pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL); 73 pps_event(info->pps, &ts, PPS_CAPTUREASSERT, data);
68 else if (info->capture_clear && 74 else if (info->capture_clear &&
69 ((rising_edge && info->assert_falling_edge) || 75 ((rising_edge && info->assert_falling_edge) ||
70 (!rising_edge && !info->assert_falling_edge))) 76 (!rising_edge && !info->assert_falling_edge)))
71 pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL); 77 pps_event(info->pps, &ts, PPS_CAPTURECLEAR, data);
72 78
73 return IRQ_HANDLED; 79 return IRQ_HANDLED;
74} 80}
75 81
82/* This function will only be called when an ECHO GPIO is defined */
83static void pps_gpio_echo(struct pps_device *pps, int event, void *data)
84{
85 /* add_timer() needs to write into info->echo_timer */
86 struct pps_gpio_device_data *info = data;
87
88 switch (event) {
89 case PPS_CAPTUREASSERT:
90 if (pps->params.mode & PPS_ECHOASSERT)
91 gpiod_set_value(info->echo_pin, 1);
92 break;
93
94 case PPS_CAPTURECLEAR:
95 if (pps->params.mode & PPS_ECHOCLEAR)
96 gpiod_set_value(info->echo_pin, 1);
97 break;
98 }
99
100 /* fire the timer */
101 if (info->pps->params.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) {
102 info->echo_timer.expires = jiffies + info->echo_timeout;
103 add_timer(&info->echo_timer);
104 }
105}
106
107/* Timer callback to reset the echo pin to the inactive state */
108static void pps_gpio_echo_timer_callback(struct timer_list *t)
109{
110 const struct pps_gpio_device_data *info;
111
112 info = from_timer(info, t, echo_timer);
113
114 gpiod_set_value(info->echo_pin, 0);
115}
116
117static int pps_gpio_setup(struct platform_device *pdev)
118{
119 struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
120 struct device_node *np = pdev->dev.of_node;
121 int ret;
122 u32 value;
123
124 data->gpio_pin = devm_gpiod_get(&pdev->dev,
125 NULL, /* request "gpios" */
126 GPIOD_IN);
127 if (IS_ERR(data->gpio_pin)) {
128 dev_err(&pdev->dev,
129 "failed to request PPS GPIO\n");
130 return PTR_ERR(data->gpio_pin);
131 }
132
133 data->echo_pin = devm_gpiod_get_optional(&pdev->dev,
134 "echo",
135 GPIOD_OUT_LOW);
136 if (data->echo_pin) {
137 if (IS_ERR(data->echo_pin)) {
138 dev_err(&pdev->dev, "failed to request ECHO GPIO\n");
139 return PTR_ERR(data->echo_pin);
140 }
141
142 ret = of_property_read_u32(np,
143 "echo-active-ms",
144 &value);
145 if (ret) {
146 dev_err(&pdev->dev,
147 "failed to get echo-active-ms from OF\n");
148 return ret;
149 }
150 data->echo_active_ms = value;
151 /* sanity check on echo_active_ms */
152 if (!data->echo_active_ms || data->echo_active_ms > 999) {
153 dev_err(&pdev->dev,
154 "echo-active-ms: %u - bad value from OF\n",
155 data->echo_active_ms);
156 return -EINVAL;
157 }
158 }
159
160 if (of_property_read_bool(np, "assert-falling-edge"))
161 data->assert_falling_edge = true;
162 return 0;
163}
164
76static unsigned long 165static unsigned long
77get_irqf_trigger_flags(const struct pps_gpio_device_data *data) 166get_irqf_trigger_flags(const struct pps_gpio_device_data *data)
78{ 167{
@@ -90,53 +179,32 @@ get_irqf_trigger_flags(const struct pps_gpio_device_data *data)
90static int pps_gpio_probe(struct platform_device *pdev) 179static int pps_gpio_probe(struct platform_device *pdev)
91{ 180{
92 struct pps_gpio_device_data *data; 181 struct pps_gpio_device_data *data;
93 const char *gpio_label;
94 int ret; 182 int ret;
95 int pps_default_params; 183 int pps_default_params;
96 const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; 184 const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
97 struct device_node *np = pdev->dev.of_node;
98 185
99 /* allocate space for device info */ 186 /* allocate space for device info */
100 data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), 187 data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
101 GFP_KERNEL);
102 if (!data) 188 if (!data)
103 return -ENOMEM; 189 return -ENOMEM;
190 platform_set_drvdata(pdev, data);
104 191
192 /* GPIO setup */
105 if (pdata) { 193 if (pdata) {
106 data->gpio_pin = pdata->gpio_pin; 194 data->gpio_pin = pdata->gpio_pin;
107 gpio_label = pdata->gpio_label; 195 data->echo_pin = pdata->echo_pin;
108 196
109 data->assert_falling_edge = pdata->assert_falling_edge; 197 data->assert_falling_edge = pdata->assert_falling_edge;
110 data->capture_clear = pdata->capture_clear; 198 data->capture_clear = pdata->capture_clear;
199 data->echo_active_ms = pdata->echo_active_ms;
111 } else { 200 } else {
112 ret = of_get_gpio(np, 0); 201 ret = pps_gpio_setup(pdev);
113 if (ret < 0) { 202 if (ret)
114 dev_err(&pdev->dev, "failed to get GPIO from device tree\n"); 203 return -EINVAL;
115 return ret;
116 }
117 data->gpio_pin = ret;
118 gpio_label = PPS_GPIO_NAME;
119
120 if (of_get_property(np, "assert-falling-edge", NULL))
121 data->assert_falling_edge = true;
122 }
123
124 /* GPIO setup */
125 ret = devm_gpio_request(&pdev->dev, data->gpio_pin, gpio_label);
126 if (ret) {
127 dev_err(&pdev->dev, "failed to request GPIO %u\n",
128 data->gpio_pin);
129 return ret;
130 }
131
132 ret = gpio_direction_input(data->gpio_pin);
133 if (ret) {
134 dev_err(&pdev->dev, "failed to set pin direction\n");
135 return -EINVAL;
136 } 204 }
137 205
138 /* IRQ setup */ 206 /* IRQ setup */
139 ret = gpio_to_irq(data->gpio_pin); 207 ret = gpiod_to_irq(data->gpio_pin);
140 if (ret < 0) { 208 if (ret < 0) {
141 dev_err(&pdev->dev, "failed to map GPIO to IRQ: %d\n", ret); 209 dev_err(&pdev->dev, "failed to map GPIO to IRQ: %d\n", ret);
142 return -EINVAL; 210 return -EINVAL;
@@ -152,6 +220,11 @@ static int pps_gpio_probe(struct platform_device *pdev)
152 data->info.owner = THIS_MODULE; 220 data->info.owner = THIS_MODULE;
153 snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d", 221 snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d",
154 pdev->name, pdev->id); 222 pdev->name, pdev->id);
223 if (data->echo_pin) {
224 data->info.echo = pps_gpio_echo;
225 data->echo_timeout = msecs_to_jiffies(data->echo_active_ms);
226 timer_setup(&data->echo_timer, pps_gpio_echo_timer_callback, 0);
227 }
155 228
156 /* register PPS source */ 229 /* register PPS source */
157 pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT; 230 pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT;
@@ -173,7 +246,6 @@ static int pps_gpio_probe(struct platform_device *pdev)
173 return -EINVAL; 246 return -EINVAL;
174 } 247 }
175 248
176 platform_set_drvdata(pdev, data);
177 dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", 249 dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n",
178 data->irq); 250 data->irq);
179 251
@@ -185,6 +257,11 @@ static int pps_gpio_remove(struct platform_device *pdev)
185 struct pps_gpio_device_data *data = platform_get_drvdata(pdev); 257 struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
186 258
187 pps_unregister_source(data->pps); 259 pps_unregister_source(data->pps);
260 if (data->echo_pin) {
261 del_timer_sync(&data->echo_timer);
262 /* reset echo pin in any case */
263 gpiod_set_value(data->echo_pin, 0);
264 }
188 dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq); 265 dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq);
189 return 0; 266 return 0;
190} 267}
@@ -209,4 +286,4 @@ MODULE_AUTHOR("Ricardo Martins <rasm@fe.up.pt>");
209MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>"); 286MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>");
210MODULE_DESCRIPTION("Use GPIO pin as PPS source"); 287MODULE_DESCRIPTION("Use GPIO pin as PPS source");
211MODULE_LICENSE("GPL"); 288MODULE_LICENSE("GPL");
212MODULE_VERSION("1.0.0"); 289MODULE_VERSION("1.2.0");
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index cf45829585cb..b29fc258eeba 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2147,6 +2147,14 @@ static int riocm_add_mport(struct device *dev,
2147 mutex_init(&cm->rx_lock); 2147 mutex_init(&cm->rx_lock);
2148 riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); 2148 riocm_rx_fill(cm, RIOCM_RX_RING_SIZE);
2149 cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); 2149 cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
2150 if (!cm->rx_wq) {
2151 riocm_error("failed to allocate IBMBOX_%d on %s",
2152 cmbox, mport->name);
2153 rio_release_outb_mbox(mport, cmbox);
2154 kfree(cm);
2155 return -ENOMEM;
2156 }
2157
2150 INIT_WORK(&cm->rx_work, rio_ibmsg_handler); 2158 INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
2151 2159
2152 cm->tx_slot = 0; 2160 cm->tx_slot = 0;
diff --git a/drivers/sh/intc/userimask.c b/drivers/sh/intc/userimask.c
index e649ceaaa410..87d69e7471f9 100644
--- a/drivers/sh/intc/userimask.c
+++ b/drivers/sh/intc/userimask.c
@@ -14,7 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/stat.h> 16#include <linux/stat.h>
17#include <asm/sizes.h> 17#include <linux/sizes.h>
18#include "internals.h" 18#include "internals.h"
19 19
20static void __iomem *uimask; 20static void __iomem *uimask;
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 3912526ead66..cdb613d38062 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/clk.h> 16#include <linux/clk.h>
17#include <linux/dmaengine.h> 17#include <linux/dmaengine.h>
18#include <linux/interrupt.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/of.h> 20#include <linux/of.h>
20#include <linux/pinctrl/consumer.h> 21#include <linux/pinctrl/consumer.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 59e82e6d776d..573b2055173c 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -527,8 +527,12 @@ void __handle_sysrq(int key, bool check_mask)
527{ 527{
528 struct sysrq_key_op *op_p; 528 struct sysrq_key_op *op_p;
529 int orig_log_level; 529 int orig_log_level;
530 int orig_suppress_printk;
530 int i; 531 int i;
531 532
533 orig_suppress_printk = suppress_printk;
534 suppress_printk = 0;
535
532 rcu_sysrq_start(); 536 rcu_sysrq_start();
533 rcu_read_lock(); 537 rcu_read_lock();
534 /* 538 /*
@@ -574,6 +578,8 @@ void __handle_sysrq(int key, bool check_mask)
574 } 578 }
575 rcu_read_unlock(); 579 rcu_read_unlock();
576 rcu_sysrq_end(); 580 rcu_sysrq_end();
581
582 suppress_printk = orig_suppress_printk;
577} 583}
578 584
579void handle_sysrq(int key) 585void handle_sysrq(int key)
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 53b8ceea9bde..fb45f866b923 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -155,21 +155,6 @@ static const struct backlight_ops pwm_backlight_ops = {
155#ifdef CONFIG_OF 155#ifdef CONFIG_OF
156#define PWM_LUMINANCE_SCALE 10000 /* luminance scale */ 156#define PWM_LUMINANCE_SCALE 10000 /* luminance scale */
157 157
158/* An integer based power function */
159static u64 int_pow(u64 base, int exp)
160{
161 u64 result = 1;
162
163 while (exp) {
164 if (exp & 1)
165 result *= base;
166 exp >>= 1;
167 base *= base;
168 }
169
170 return result;
171}
172
173/* 158/*
174 * CIE lightness to PWM conversion. 159 * CIE lightness to PWM conversion.
175 * 160 *
diff --git a/drivers/video/fbdev/fb-puv3.c b/drivers/video/fbdev/fb-puv3.c
index d9e816d53531..1bddcc20b2c0 100644
--- a/drivers/video/fbdev/fb-puv3.c
+++ b/drivers/video/fbdev/fb-puv3.c
@@ -20,7 +20,7 @@
20#include <linux/console.h> 20#include <linux/console.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22 22
23#include <asm/sizes.h> 23#include <linux/sizes.h>
24#include <asm/pgtable.h> 24#include <asm/pgtable.h>
25#include <mach/hardware.h> 25#include <mach/hardware.h>
26 26
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 6446bcab4185..93d5bebf9572 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -215,6 +215,9 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
215 * hypervisor. 215 * hypervisor.
216 */ 216 */
217 lb_offset = param.local_vaddr & (PAGE_SIZE - 1); 217 lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
218 if (param.count == 0 ||
219 param.count > U64_MAX - lb_offset - PAGE_SIZE + 1)
220 return -EINVAL;
218 num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 221 num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
219 222
220 /* Allocate the buffers we need */ 223 /* Allocate the buffers we need */
@@ -331,8 +334,8 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
331 struct fsl_hv_ioctl_prop param; 334 struct fsl_hv_ioctl_prop param;
332 char __user *upath, *upropname; 335 char __user *upath, *upropname;
333 void __user *upropval; 336 void __user *upropval;
334 char *path = NULL, *propname = NULL; 337 char *path, *propname;
335 void *propval = NULL; 338 void *propval;
336 int ret = 0; 339 int ret = 0;
337 340
338 /* Get the parameters from the user. */ 341 /* Get the parameters from the user. */
@@ -344,32 +347,30 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
344 upropval = (void __user *)(uintptr_t)param.propval; 347 upropval = (void __user *)(uintptr_t)param.propval;
345 348
346 path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN); 349 path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
347 if (IS_ERR(path)) { 350 if (IS_ERR(path))
348 ret = PTR_ERR(path); 351 return PTR_ERR(path);
349 goto out;
350 }
351 352
352 propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN); 353 propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
353 if (IS_ERR(propname)) { 354 if (IS_ERR(propname)) {
354 ret = PTR_ERR(propname); 355 ret = PTR_ERR(propname);
355 goto out; 356 goto err_free_path;
356 } 357 }
357 358
358 if (param.proplen > FH_DTPROP_MAX_PROPLEN) { 359 if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
359 ret = -EINVAL; 360 ret = -EINVAL;
360 goto out; 361 goto err_free_propname;
361 } 362 }
362 363
363 propval = kmalloc(param.proplen, GFP_KERNEL); 364 propval = kmalloc(param.proplen, GFP_KERNEL);
364 if (!propval) { 365 if (!propval) {
365 ret = -ENOMEM; 366 ret = -ENOMEM;
366 goto out; 367 goto err_free_propname;
367 } 368 }
368 369
369 if (set) { 370 if (set) {
370 if (copy_from_user(propval, upropval, param.proplen)) { 371 if (copy_from_user(propval, upropval, param.proplen)) {
371 ret = -EFAULT; 372 ret = -EFAULT;
372 goto out; 373 goto err_free_propval;
373 } 374 }
374 375
375 param.ret = fh_partition_set_dtprop(param.handle, 376 param.ret = fh_partition_set_dtprop(param.handle,
@@ -388,7 +389,7 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
388 if (copy_to_user(upropval, propval, param.proplen) || 389 if (copy_to_user(upropval, propval, param.proplen) ||
389 put_user(param.proplen, &p->proplen)) { 390 put_user(param.proplen, &p->proplen)) {
390 ret = -EFAULT; 391 ret = -EFAULT;
391 goto out; 392 goto err_free_propval;
392 } 393 }
393 } 394 }
394 } 395 }
@@ -396,10 +397,12 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
396 if (put_user(param.ret, &p->ret)) 397 if (put_user(param.ret, &p->ret))
397 ret = -EFAULT; 398 ret = -EFAULT;
398 399
399out: 400err_free_propval:
400 kfree(path);
401 kfree(propval); 401 kfree(propval);
402err_free_propname:
402 kfree(propname); 403 kfree(propname);
404err_free_path:
405 kfree(path);
403 406
404 return ret; 407 return ret;
405} 408}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d09d125f148..fa9e99a962e0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -524,6 +524,19 @@ static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
524 524
525#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ 525#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
526 526
527static inline int make_prot(u32 p_flags)
528{
529 int prot = 0;
530
531 if (p_flags & PF_R)
532 prot |= PROT_READ;
533 if (p_flags & PF_W)
534 prot |= PROT_WRITE;
535 if (p_flags & PF_X)
536 prot |= PROT_EXEC;
537 return prot;
538}
539
527/* This is much more generalized than the library routine read function, 540/* This is much more generalized than the library routine read function,
528 so we keep this separate. Technically the library read function 541 so we keep this separate. Technically the library read function
529 is only provided so that we can read a.out libraries that have 542 is only provided so that we can read a.out libraries that have
@@ -563,16 +576,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
563 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { 576 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
564 if (eppnt->p_type == PT_LOAD) { 577 if (eppnt->p_type == PT_LOAD) {
565 int elf_type = MAP_PRIVATE | MAP_DENYWRITE; 578 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
566 int elf_prot = 0; 579 int elf_prot = make_prot(eppnt->p_flags);
567 unsigned long vaddr = 0; 580 unsigned long vaddr = 0;
568 unsigned long k, map_addr; 581 unsigned long k, map_addr;
569 582
570 if (eppnt->p_flags & PF_R)
571 elf_prot = PROT_READ;
572 if (eppnt->p_flags & PF_W)
573 elf_prot |= PROT_WRITE;
574 if (eppnt->p_flags & PF_X)
575 elf_prot |= PROT_EXEC;
576 vaddr = eppnt->p_vaddr; 583 vaddr = eppnt->p_vaddr;
577 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 584 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
578 elf_type |= MAP_FIXED_NOREPLACE; 585 elf_type |= MAP_FIXED_NOREPLACE;
@@ -687,7 +694,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
687 struct file *interpreter = NULL; /* to shut gcc up */ 694 struct file *interpreter = NULL; /* to shut gcc up */
688 unsigned long load_addr = 0, load_bias = 0; 695 unsigned long load_addr = 0, load_bias = 0;
689 int load_addr_set = 0; 696 int load_addr_set = 0;
690 char * elf_interpreter = NULL;
691 unsigned long error; 697 unsigned long error;
692 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; 698 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
693 unsigned long elf_bss, elf_brk; 699 unsigned long elf_bss, elf_brk;
@@ -698,13 +704,12 @@ static int load_elf_binary(struct linux_binprm *bprm)
698 unsigned long start_code, end_code, start_data, end_data; 704 unsigned long start_code, end_code, start_data, end_data;
699 unsigned long reloc_func_desc __maybe_unused = 0; 705 unsigned long reloc_func_desc __maybe_unused = 0;
700 int executable_stack = EXSTACK_DEFAULT; 706 int executable_stack = EXSTACK_DEFAULT;
701 struct pt_regs *regs = current_pt_regs();
702 struct { 707 struct {
703 struct elfhdr elf_ex; 708 struct elfhdr elf_ex;
704 struct elfhdr interp_elf_ex; 709 struct elfhdr interp_elf_ex;
705 } *loc; 710 } *loc;
706 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; 711 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
707 loff_t pos; 712 struct pt_regs *regs;
708 713
709 loc = kmalloc(sizeof(*loc), GFP_KERNEL); 714 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
710 if (!loc) { 715 if (!loc) {
@@ -734,69 +739,66 @@ static int load_elf_binary(struct linux_binprm *bprm)
734 goto out; 739 goto out;
735 740
736 elf_ppnt = elf_phdata; 741 elf_ppnt = elf_phdata;
737 elf_bss = 0; 742 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
738 elf_brk = 0; 743 char *elf_interpreter;
744 loff_t pos;
739 745
740 start_code = ~0UL; 746 if (elf_ppnt->p_type != PT_INTERP)
741 end_code = 0; 747 continue;
742 start_data = 0;
743 end_data = 0;
744 748
745 for (i = 0; i < loc->elf_ex.e_phnum; i++) { 749 /*
746 if (elf_ppnt->p_type == PT_INTERP) { 750 * This is the program interpreter used for shared libraries -
747 /* This is the program interpreter used for 751 * for now assume that this is an a.out format binary.
748 * shared libraries - for now assume that this 752 */
749 * is an a.out format binary 753 retval = -ENOEXEC;
750 */ 754 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
751 retval = -ENOEXEC; 755 goto out_free_ph;
752 if (elf_ppnt->p_filesz > PATH_MAX ||
753 elf_ppnt->p_filesz < 2)
754 goto out_free_ph;
755
756 retval = -ENOMEM;
757 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
758 GFP_KERNEL);
759 if (!elf_interpreter)
760 goto out_free_ph;
761
762 pos = elf_ppnt->p_offset;
763 retval = kernel_read(bprm->file, elf_interpreter,
764 elf_ppnt->p_filesz, &pos);
765 if (retval != elf_ppnt->p_filesz) {
766 if (retval >= 0)
767 retval = -EIO;
768 goto out_free_interp;
769 }
770 /* make sure path is NULL terminated */
771 retval = -ENOEXEC;
772 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
773 goto out_free_interp;
774 756
775 interpreter = open_exec(elf_interpreter); 757 retval = -ENOMEM;
776 retval = PTR_ERR(interpreter); 758 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
777 if (IS_ERR(interpreter)) 759 if (!elf_interpreter)
778 goto out_free_interp; 760 goto out_free_ph;
779 761
780 /* 762 pos = elf_ppnt->p_offset;
781 * If the binary is not readable then enforce 763 retval = kernel_read(bprm->file, elf_interpreter,
782 * mm->dumpable = 0 regardless of the interpreter's 764 elf_ppnt->p_filesz, &pos);
783 * permissions. 765 if (retval != elf_ppnt->p_filesz) {
784 */ 766 if (retval >= 0)
785 would_dump(bprm, interpreter); 767 retval = -EIO;
786 768 goto out_free_interp;
787 /* Get the exec headers */ 769 }
788 pos = 0; 770 /* make sure path is NULL terminated */
789 retval = kernel_read(interpreter, &loc->interp_elf_ex, 771 retval = -ENOEXEC;
790 sizeof(loc->interp_elf_ex), &pos); 772 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
791 if (retval != sizeof(loc->interp_elf_ex)) { 773 goto out_free_interp;
792 if (retval >= 0)
793 retval = -EIO;
794 goto out_free_dentry;
795 }
796 774
797 break; 775 interpreter = open_exec(elf_interpreter);
776 kfree(elf_interpreter);
777 retval = PTR_ERR(interpreter);
778 if (IS_ERR(interpreter))
779 goto out_free_ph;
780
781 /*
782 * If the binary is not readable then enforce mm->dumpable = 0
783 * regardless of the interpreter's permissions.
784 */
785 would_dump(bprm, interpreter);
786
787 /* Get the exec headers */
788 pos = 0;
789 retval = kernel_read(interpreter, &loc->interp_elf_ex,
790 sizeof(loc->interp_elf_ex), &pos);
791 if (retval != sizeof(loc->interp_elf_ex)) {
792 if (retval >= 0)
793 retval = -EIO;
794 goto out_free_dentry;
798 } 795 }
799 elf_ppnt++; 796
797 break;
798
799out_free_interp:
800 kfree(elf_interpreter);
801 goto out_free_ph;
800 } 802 }
801 803
802 elf_ppnt = elf_phdata; 804 elf_ppnt = elf_phdata;
@@ -819,7 +821,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
819 } 821 }
820 822
821 /* Some simple consistency checks for the interpreter */ 823 /* Some simple consistency checks for the interpreter */
822 if (elf_interpreter) { 824 if (interpreter) {
823 retval = -ELIBBAD; 825 retval = -ELIBBAD;
824 /* Not an ELF interpreter */ 826 /* Not an ELF interpreter */
825 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 827 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
@@ -884,13 +886,19 @@ static int load_elf_binary(struct linux_binprm *bprm)
884 if (retval < 0) 886 if (retval < 0)
885 goto out_free_dentry; 887 goto out_free_dentry;
886 888
887 current->mm->start_stack = bprm->p; 889 elf_bss = 0;
890 elf_brk = 0;
891
892 start_code = ~0UL;
893 end_code = 0;
894 start_data = 0;
895 end_data = 0;
888 896
889 /* Now we do a little grungy work by mmapping the ELF image into 897 /* Now we do a little grungy work by mmapping the ELF image into
890 the correct location in memory. */ 898 the correct location in memory. */
891 for(i = 0, elf_ppnt = elf_phdata; 899 for(i = 0, elf_ppnt = elf_phdata;
892 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 900 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
893 int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE; 901 int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
894 unsigned long k, vaddr; 902 unsigned long k, vaddr;
895 unsigned long total_size = 0; 903 unsigned long total_size = 0;
896 904
@@ -931,12 +939,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
931 elf_fixed = MAP_FIXED; 939 elf_fixed = MAP_FIXED;
932 } 940 }
933 941
934 if (elf_ppnt->p_flags & PF_R) 942 elf_prot = make_prot(elf_ppnt->p_flags);
935 elf_prot |= PROT_READ;
936 if (elf_ppnt->p_flags & PF_W)
937 elf_prot |= PROT_WRITE;
938 if (elf_ppnt->p_flags & PF_X)
939 elf_prot |= PROT_EXEC;
940 943
941 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; 944 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
942 945
@@ -978,7 +981,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
978 * independently randomized mmap region (0 load_bias 981 * independently randomized mmap region (0 load_bias
979 * without MAP_FIXED). 982 * without MAP_FIXED).
980 */ 983 */
981 if (elf_interpreter) { 984 if (interpreter) {
982 load_bias = ELF_ET_DYN_BASE; 985 load_bias = ELF_ET_DYN_BASE;
983 if (current->flags & PF_RANDOMIZE) 986 if (current->flags & PF_RANDOMIZE)
984 load_bias += arch_mmap_rnd(); 987 load_bias += arch_mmap_rnd();
@@ -1076,7 +1079,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
1076 goto out_free_dentry; 1079 goto out_free_dentry;
1077 } 1080 }
1078 1081
1079 if (elf_interpreter) { 1082 if (interpreter) {
1080 unsigned long interp_map_addr = 0; 1083 unsigned long interp_map_addr = 0;
1081 1084
1082 elf_entry = load_elf_interp(&loc->interp_elf_ex, 1085 elf_entry = load_elf_interp(&loc->interp_elf_ex,
@@ -1100,7 +1103,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
1100 1103
1101 allow_write_access(interpreter); 1104 allow_write_access(interpreter);
1102 fput(interpreter); 1105 fput(interpreter);
1103 kfree(elf_interpreter);
1104 } else { 1106 } else {
1105 elf_entry = loc->elf_ex.e_entry; 1107 elf_entry = loc->elf_ex.e_entry;
1106 if (BAD_ADDR(elf_entry)) { 1108 if (BAD_ADDR(elf_entry)) {
@@ -1115,7 +1117,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
1115 set_binfmt(&elf_format); 1117 set_binfmt(&elf_format);
1116 1118
1117#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES 1119#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1118 retval = arch_setup_additional_pages(bprm, !!elf_interpreter); 1120 retval = arch_setup_additional_pages(bprm, !!interpreter);
1119 if (retval < 0) 1121 if (retval < 0)
1120 goto out; 1122 goto out;
1121#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 1123#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
@@ -1132,6 +1134,17 @@ static int load_elf_binary(struct linux_binprm *bprm)
1132 current->mm->start_stack = bprm->p; 1134 current->mm->start_stack = bprm->p;
1133 1135
1134 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1136 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1137 /*
1138 * For architectures with ELF randomization, when executing
1139 * a loader directly (i.e. no interpreter listed in ELF
1140 * headers), move the brk area out of the mmap region
1141 * (since it grows up, and may collide early with the stack
1142 * growing down), and into the unused ELF_ET_DYN_BASE region.
1143 */
1144 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
1145 current->mm->brk = current->mm->start_brk =
1146 ELF_ET_DYN_BASE;
1147
1135 current->mm->brk = current->mm->start_brk = 1148 current->mm->brk = current->mm->start_brk =
1136 arch_randomize_brk(current->mm); 1149 arch_randomize_brk(current->mm);
1137#ifdef compat_brk_randomized 1150#ifdef compat_brk_randomized
@@ -1148,6 +1161,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
1148 MAP_FIXED | MAP_PRIVATE, 0); 1161 MAP_FIXED | MAP_PRIVATE, 0);
1149 } 1162 }
1150 1163
1164 regs = current_pt_regs();
1151#ifdef ELF_PLAT_INIT 1165#ifdef ELF_PLAT_INIT
1152 /* 1166 /*
1153 * The ABI may specify that certain registers be set up in special 1167 * The ABI may specify that certain registers be set up in special
@@ -1176,8 +1190,6 @@ out_free_dentry:
1176 allow_write_access(interpreter); 1190 allow_write_access(interpreter);
1177 if (interpreter) 1191 if (interpreter)
1178 fput(interpreter); 1192 fput(interpreter);
1179out_free_interp:
1180 kfree(elf_interpreter);
1181out_free_ph: 1193out_free_ph:
1182 kfree(elf_phdata); 1194 kfree(elf_phdata);
1183 goto out; 1195 goto out;
@@ -1456,8 +1468,6 @@ static void fill_elf_header(struct elfhdr *elf, int segs,
1456 elf->e_ehsize = sizeof(struct elfhdr); 1468 elf->e_ehsize = sizeof(struct elfhdr);
1457 elf->e_phentsize = sizeof(struct elf_phdr); 1469 elf->e_phentsize = sizeof(struct elf_phdr);
1458 elf->e_phnum = segs; 1470 elf->e_phnum = segs;
1459
1460 return;
1461} 1471}
1462 1472
1463static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) 1473static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
@@ -1470,7 +1480,6 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1470 phdr->p_memsz = 0; 1480 phdr->p_memsz = 0;
1471 phdr->p_flags = 0; 1481 phdr->p_flags = 0;
1472 phdr->p_align = 0; 1482 phdr->p_align = 0;
1473 return;
1474} 1483}
1475 1484
1476static void fill_note(struct memelfnote *note, const char *name, int type, 1485static void fill_note(struct memelfnote *note, const char *name, int type,
@@ -1480,7 +1489,6 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
1480 note->type = type; 1489 note->type = type;
1481 note->datasz = sz; 1490 note->datasz = sz;
1482 note->data = data; 1491 note->data = data;
1483 return;
1484} 1492}
1485 1493
1486/* 1494/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f80045048bb7..0f7552a87d54 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,7 +29,6 @@
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/cleancache.h> 31#include <linux/cleancache.h>
32#include <linux/dax.h>
33#include <linux/task_io_accounting_ops.h> 32#include <linux/task_io_accounting_ops.h>
34#include <linux/falloc.h> 33#include <linux/falloc.h>
35#include <linux/uaccess.h> 34#include <linux/uaccess.h>
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1645fcfd9691..d27720cd3664 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -20,7 +20,6 @@
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/security.h> 21#include <linux/security.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/xattr.h>
24#include "internal.h" 23#include "internal.h"
25 24
26#define CACHEFILES_KEYBUF_SIZE 512 25#define CACHEFILES_KEYBUF_SIZE 512
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index c5234c21b539..f2bb7985d21c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -39,7 +39,6 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/pid_namespace.h> 40#include <linux/pid_namespace.h>
41#include <asm/io.h> 41#include <asm/io.h>
42#include <linux/poll.h>
43#include <linux/uaccess.h> 42#include <linux/uaccess.h>
44 43
45#include <linux/coda.h> 44#include <linux/coda.h>
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd602f73..93b1fa7bb298 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -21,6 +21,9 @@
21#include <linux/eventfd.h> 21#include <linux/eventfd.h>
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24#include <linux/idr.h>
25
26static DEFINE_IDA(eventfd_ida);
24 27
25struct eventfd_ctx { 28struct eventfd_ctx {
26 struct kref kref; 29 struct kref kref;
@@ -35,6 +38,7 @@ struct eventfd_ctx {
35 */ 38 */
36 __u64 count; 39 __u64 count;
37 unsigned int flags; 40 unsigned int flags;
41 int id;
38}; 42};
39 43
40/** 44/**
@@ -69,6 +73,8 @@ EXPORT_SYMBOL_GPL(eventfd_signal);
69 73
70static void eventfd_free_ctx(struct eventfd_ctx *ctx) 74static void eventfd_free_ctx(struct eventfd_ctx *ctx)
71{ 75{
76 if (ctx->id >= 0)
77 ida_simple_remove(&eventfd_ida, ctx->id);
72 kfree(ctx); 78 kfree(ctx);
73} 79}
74 80
@@ -297,6 +303,7 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
297 seq_printf(m, "eventfd-count: %16llx\n", 303 seq_printf(m, "eventfd-count: %16llx\n",
298 (unsigned long long)ctx->count); 304 (unsigned long long)ctx->count);
299 spin_unlock_irq(&ctx->wqh.lock); 305 spin_unlock_irq(&ctx->wqh.lock);
306 seq_printf(m, "eventfd-id: %d\n", ctx->id);
300} 307}
301#endif 308#endif
302 309
@@ -400,6 +407,7 @@ static int do_eventfd(unsigned int count, int flags)
400 init_waitqueue_head(&ctx->wqh); 407 init_waitqueue_head(&ctx->wqh);
401 ctx->count = count; 408 ctx->count = count;
402 ctx->flags = flags; 409 ctx->flags = flags;
410 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
403 411
404 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 412 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
405 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS)); 413 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/exec.c b/fs/exec.c
index 2e0033348d8e..d88584ebf07f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1652,11 +1652,13 @@ int search_binary_handler(struct linux_binprm *bprm)
1652 if (!try_module_get(fmt->module)) 1652 if (!try_module_get(fmt->module))
1653 continue; 1653 continue;
1654 read_unlock(&binfmt_lock); 1654 read_unlock(&binfmt_lock);
1655
1655 bprm->recursion_depth++; 1656 bprm->recursion_depth++;
1656 retval = fmt->load_binary(bprm); 1657 retval = fmt->load_binary(bprm);
1658 bprm->recursion_depth--;
1659
1657 read_lock(&binfmt_lock); 1660 read_lock(&binfmt_lock);
1658 put_binfmt(fmt); 1661 put_binfmt(fmt);
1659 bprm->recursion_depth--;
1660 if (retval < 0 && !bprm->mm) { 1662 if (retval < 0 && !bprm->mm) {
1661 /* we got to flush_old_exec() and failed after it */ 1663 /* we got to flush_old_exec() and failed after it */
1662 read_unlock(&binfmt_lock); 1664 read_unlock(&binfmt_lock);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b3bed32946b1..0e3ed79fcc3f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -193,12 +193,17 @@ static int fat_file_release(struct inode *inode, struct file *filp)
193int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) 193int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
194{ 194{
195 struct inode *inode = filp->f_mapping->host; 195 struct inode *inode = filp->f_mapping->host;
196 int res, err; 196 int err;
197
198 err = __generic_file_fsync(filp, start, end, datasync);
199 if (err)
200 return err;
197 201
198 res = generic_file_fsync(filp, start, end, datasync);
199 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); 202 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
203 if (err)
204 return err;
200 205
201 return res ? res : err; 206 return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
202} 207}
203 208
204 209
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b6ccb6c57706..9c8ca6cd3ce4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -510,7 +510,7 @@ static ssize_t lstats_write(struct file *file, const char __user *buf,
510 510
511 if (!task) 511 if (!task)
512 return -ESRCH; 512 return -ESRCH;
513 clear_all_latency_tracing(task); 513 clear_tsk_latency_tracing(task);
514 put_task_struct(task); 514 put_task_struct(task);
515 515
516 return count; 516 return count;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 32d8986c26fb..b5b26d8a192c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -450,6 +450,15 @@ fail:
450 450
451static inline __u32 xattr_hash(const char *msg, int len) 451static inline __u32 xattr_hash(const char *msg, int len)
452{ 452{
453 /*
454 * csum_partial() gives different results for little-endian and
455 * big endian hosts. Images created on little-endian hosts and
456 * mounted on big-endian hosts(and vice versa) will see csum mismatches
457 * when trying to fetch xattrs. Treating the hash as __wsum_t would
458 * lower the frequency of mismatch. This is an endianness bug in
459 * reiserfs. The return statement would result in a sparse warning. Do
460 * not fix the sparse warning so as to not hide a reminder of the bug.
461 */
453 return csum_partial(msg, len, 0); 462 return csum_partial(msg, len, 0);
454} 463}
455 464
diff --git a/include/asm-generic/shmparam.h b/include/asm-generic/shmparam.h
index 8b78c0ba08b1..b8f9035ffc2c 100644
--- a/include/asm-generic/shmparam.h
+++ b/include/asm-generic/shmparam.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_GENERIC_SHMPARAM_H 2#ifndef __ASM_GENERIC_SHMPARAM_H
3#define __ASM_GENERIC_SHMPARAM_H 3#define __ASM_GENERIC_SHMPARAM_H
4 4
diff --git a/include/asm-generic/sizes.h b/include/asm-generic/sizes.h
deleted file mode 100644
index 1dcfad9629ef..000000000000
--- a/include/asm-generic/sizes.h
+++ /dev/null
@@ -1,2 +0,0 @@
1/* This is a placeholder, to be removed over time */
2#include <linux/sizes.h>
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 688ab0de7810..b40fc633f3be 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -15,7 +15,6 @@ struct filename;
15 * This structure is used to hold the arguments that are used when loading binaries. 15 * This structure is used to hold the arguments that are used when loading binaries.
16 */ 16 */
17struct linux_binprm { 17struct linux_binprm {
18 char buf[BINPRM_BUF_SIZE];
19#ifdef CONFIG_MMU 18#ifdef CONFIG_MMU
20 struct vm_area_struct *vma; 19 struct vm_area_struct *vma;
21 unsigned long vma_pages; 20 unsigned long vma_pages;
@@ -64,6 +63,8 @@ struct linux_binprm {
64 unsigned long loader, exec; 63 unsigned long loader, exec;
65 64
66 struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */ 65 struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
66
67 char buf[BINPRM_BUF_SIZE];
67} __randomize_layout; 68} __randomize_layout;
68 69
69#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 70#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 602af23b98c7..cf074bce3eb3 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -60,7 +60,7 @@ static __always_inline unsigned long hweight_long(unsigned long w)
60 */ 60 */
61static inline __u64 rol64(__u64 word, unsigned int shift) 61static inline __u64 rol64(__u64 word, unsigned int shift)
62{ 62{
63 return (word << shift) | (word >> (64 - shift)); 63 return (word << (shift & 63)) | (word >> ((-shift) & 63));
64} 64}
65 65
66/** 66/**
@@ -70,7 +70,7 @@ static inline __u64 rol64(__u64 word, unsigned int shift)
70 */ 70 */
71static inline __u64 ror64(__u64 word, unsigned int shift) 71static inline __u64 ror64(__u64 word, unsigned int shift)
72{ 72{
73 return (word >> shift) | (word << (64 - shift)); 73 return (word >> (shift & 63)) | (word << ((-shift) & 63));
74} 74}
75 75
76/** 76/**
@@ -80,7 +80,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift)
80 */ 80 */
81static inline __u32 rol32(__u32 word, unsigned int shift) 81static inline __u32 rol32(__u32 word, unsigned int shift)
82{ 82{
83 return (word << shift) | (word >> ((-shift) & 31)); 83 return (word << (shift & 31)) | (word >> ((-shift) & 31));
84} 84}
85 85
86/** 86/**
@@ -90,7 +90,7 @@ static inline __u32 rol32(__u32 word, unsigned int shift)
90 */ 90 */
91static inline __u32 ror32(__u32 word, unsigned int shift) 91static inline __u32 ror32(__u32 word, unsigned int shift)
92{ 92{
93 return (word >> shift) | (word << (32 - shift)); 93 return (word >> (shift & 31)) | (word << ((-shift) & 31));
94} 94}
95 95
96/** 96/**
@@ -100,7 +100,7 @@ static inline __u32 ror32(__u32 word, unsigned int shift)
100 */ 100 */
101static inline __u16 rol16(__u16 word, unsigned int shift) 101static inline __u16 rol16(__u16 word, unsigned int shift)
102{ 102{
103 return (word << shift) | (word >> (16 - shift)); 103 return (word << (shift & 15)) | (word >> ((-shift) & 15));
104} 104}
105 105
106/** 106/**
@@ -110,7 +110,7 @@ static inline __u16 rol16(__u16 word, unsigned int shift)
110 */ 110 */
111static inline __u16 ror16(__u16 word, unsigned int shift) 111static inline __u16 ror16(__u16 word, unsigned int shift)
112{ 112{
113 return (word >> shift) | (word << (16 - shift)); 113 return (word >> (shift & 15)) | (word << ((-shift) & 15));
114} 114}
115 115
116/** 116/**
@@ -120,7 +120,7 @@ static inline __u16 ror16(__u16 word, unsigned int shift)
120 */ 120 */
121static inline __u8 rol8(__u8 word, unsigned int shift) 121static inline __u8 rol8(__u8 word, unsigned int shift)
122{ 122{
123 return (word << shift) | (word >> (8 - shift)); 123 return (word << (shift & 7)) | (word >> ((-shift) & 7));
124} 124}
125 125
126/** 126/**
@@ -130,7 +130,7 @@ static inline __u8 rol8(__u8 word, unsigned int shift)
130 */ 130 */
131static inline __u8 ror8(__u8 word, unsigned int shift) 131static inline __u8 ror8(__u8 word, unsigned int shift)
132{ 132{
133 return (word >> shift) | (word << (8 - shift)); 133 return (word >> (shift & 7)) | (word << ((-shift) & 7));
134} 134}
135 135
136/** 136/**
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index ba814f18cb4c..19e58b9138a0 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -140,8 +140,7 @@ struct ftrace_likely_data {
140 * Do not use __always_inline here, since currently it expands to inline again 140 * Do not use __always_inline here, since currently it expands to inline again
141 * (which would break users of __always_inline). 141 * (which would break users of __always_inline).
142 */ 142 */
143#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ 143#if !defined(CONFIG_OPTIMIZE_INLINING)
144 !defined(CONFIG_OPTIMIZE_INLINING)
145#define inline inline __attribute__((__always_inline__)) __gnu_inline \ 144#define inline inline __attribute__((__always_inline__)) __gnu_inline \
146 __maybe_unused notrace 145 __maybe_unused notrace
147#else 146#else
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 147bdec42215..21755471b1c3 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -633,8 +633,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
633 */ 633 */
634static inline int cpumask_parse(const char *buf, struct cpumask *dstp) 634static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
635{ 635{
636 char *nl = strchr(buf, '\n'); 636 unsigned int len = strchrnul(buf, '\n') - buf;
637 unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
638 637
639 return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); 638 return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
640} 639}
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6ab8c1bada3f..c309f43bde45 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -19,6 +19,7 @@ struct ipc_ids {
19 struct rw_semaphore rwsem; 19 struct rw_semaphore rwsem;
20 struct idr ipcs_idr; 20 struct idr ipcs_idr;
21 int max_idx; 21 int max_idx;
22 int last_idx; /* For wrap around detection */
22#ifdef CONFIG_CHECKPOINT_RESTORE 23#ifdef CONFIG_CHECKPOINT_RESTORE
23 int next_id; 24 int next_id;
24#endif 25#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a3b59d143afb..74b1ee9027f5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -484,6 +484,7 @@ extern int __kernel_text_address(unsigned long addr);
484extern int kernel_text_address(unsigned long addr); 484extern int kernel_text_address(unsigned long addr);
485extern int func_ptr_is_kernel_text(void *ptr); 485extern int func_ptr_is_kernel_text(void *ptr);
486 486
487u64 int_pow(u64 base, unsigned int exp);
487unsigned long int_sqrt(unsigned long); 488unsigned long int_sqrt(unsigned long);
488 489
489#if BITS_PER_LONG < 64 490#if BITS_PER_LONG < 64
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2c89e60bc752..0f9da966934e 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,7 +4,6 @@
4/* Simple interface for creating and stopping kernel threads without mess. */ 4/* Simple interface for creating and stopping kernel threads without mess. */
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/cgroup.h>
8 7
9__printf(4, 5) 8__printf(4, 5)
10struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), 9struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
@@ -198,6 +197,8 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work);
198 197
199void kthread_destroy_worker(struct kthread_worker *worker); 198void kthread_destroy_worker(struct kthread_worker *worker);
200 199
200struct cgroup_subsys_state;
201
201#ifdef CONFIG_BLK_CGROUP 202#ifdef CONFIG_BLK_CGROUP
202void kthread_associate_blkcg(struct cgroup_subsys_state *css); 203void kthread_associate_blkcg(struct cgroup_subsys_state *css);
203struct cgroup_subsys_state *kthread_blkcg(void); 204struct cgroup_subsys_state *kthread_blkcg(void);
diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index 7c560e0dc8f4..9022f0c2e2e4 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -36,7 +36,7 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
36 __account_scheduler_latency(task, usecs, inter); 36 __account_scheduler_latency(task, usecs, inter);
37} 37}
38 38
39void clear_all_latency_tracing(struct task_struct *p); 39void clear_tsk_latency_tracing(struct task_struct *p);
40 40
41extern int sysctl_latencytop(struct ctl_table *table, int write, 41extern int sysctl_latencytop(struct ctl_table *table, int write,
42 void __user *buffer, size_t *lenp, loff_t *ppos); 42 void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -48,7 +48,7 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
48{ 48{
49} 49}
50 50
51static inline void clear_all_latency_tracing(struct task_struct *p) 51static inline void clear_tsk_latency_tracing(struct task_struct *p)
52{ 52{
53} 53}
54 54
diff --git a/include/linux/list.h b/include/linux/list.h
index 9e9a6403dbe4..d3b4db895340 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -151,6 +151,23 @@ static inline void list_replace_init(struct list_head *old,
151} 151}
152 152
153/** 153/**
154 * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position
155 * @entry1: the location to place entry2
156 * @entry2: the location to place entry1
157 */
158static inline void list_swap(struct list_head *entry1,
159 struct list_head *entry2)
160{
161 struct list_head *pos = entry2->prev;
162
163 list_del(entry2);
164 list_replace(entry1, entry2);
165 if (pos == entry1)
166 pos = entry2;
167 list_add(entry1, pos);
168}
169
170/**
154 * list_del_init - deletes entry from list and reinitialize it. 171 * list_del_init - deletes entry from list and reinitialize it.
155 * @entry: the element to delete from the list. 172 * @entry: the element to delete from the list.
156 */ 173 */
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
index ba79956e848d..20f178c24e9d 100644
--- a/include/linux/list_sort.h
+++ b/include/linux/list_sort.h
@@ -6,6 +6,7 @@
6 6
7struct list_head; 7struct list_head;
8 8
9__attribute__((nonnull(2,3)))
9void list_sort(void *priv, struct list_head *head, 10void list_sort(void *priv, struct list_head *head,
10 int (*cmp)(void *priv, struct list_head *a, 11 int (*cmp)(void *priv, struct list_head *a,
11 struct list_head *b)); 12 struct list_head *b));
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 30561a954ee0..bc74d6a4407c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -94,8 +94,8 @@ enum mem_cgroup_events_target {
94 MEM_CGROUP_NTARGETS, 94 MEM_CGROUP_NTARGETS,
95}; 95};
96 96
97struct mem_cgroup_stat_cpu { 97struct memcg_vmstats_percpu {
98 long count[MEMCG_NR_STAT]; 98 long stat[MEMCG_NR_STAT];
99 unsigned long events[NR_VM_EVENT_ITEMS]; 99 unsigned long events[NR_VM_EVENT_ITEMS];
100 unsigned long nr_page_events; 100 unsigned long nr_page_events;
101 unsigned long targets[MEM_CGROUP_NTARGETS]; 101 unsigned long targets[MEM_CGROUP_NTARGETS];
@@ -128,6 +128,7 @@ struct mem_cgroup_per_node {
128 128
129 struct lruvec_stat __percpu *lruvec_stat_cpu; 129 struct lruvec_stat __percpu *lruvec_stat_cpu;
130 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; 130 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
131 atomic_long_t lruvec_stat_local[NR_VM_NODE_STAT_ITEMS];
131 132
132 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 133 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
133 134
@@ -274,13 +275,17 @@ struct mem_cgroup {
274 struct task_struct *move_lock_task; 275 struct task_struct *move_lock_task;
275 276
276 /* memory.stat */ 277 /* memory.stat */
277 struct mem_cgroup_stat_cpu __percpu *stat_cpu; 278 struct memcg_vmstats_percpu __percpu *vmstats_percpu;
278 279
279 MEMCG_PADDING(_pad2_); 280 MEMCG_PADDING(_pad2_);
280 281
281 atomic_long_t stat[MEMCG_NR_STAT]; 282 atomic_long_t vmstats[MEMCG_NR_STAT];
282 atomic_long_t events[NR_VM_EVENT_ITEMS]; 283 atomic_long_t vmstats_local[MEMCG_NR_STAT];
283 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 284
285 atomic_long_t vmevents[NR_VM_EVENT_ITEMS];
286 atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS];
287
288 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
284 289
285 unsigned long socket_pressure; 290 unsigned long socket_pressure;
286 291
@@ -554,10 +559,9 @@ void unlock_page_memcg(struct page *page);
554 * idx can be of type enum memcg_stat_item or node_stat_item. 559 * idx can be of type enum memcg_stat_item or node_stat_item.
555 * Keep in sync with memcg_exact_page_state(). 560 * Keep in sync with memcg_exact_page_state().
556 */ 561 */
557static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, 562static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
558 int idx)
559{ 563{
560 long x = atomic_long_read(&memcg->stat[idx]); 564 long x = atomic_long_read(&memcg->vmstats[idx]);
561#ifdef CONFIG_SMP 565#ifdef CONFIG_SMP
562 if (x < 0) 566 if (x < 0)
563 x = 0; 567 x = 0;
@@ -565,23 +569,23 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
565 return x; 569 return x;
566} 570}
567 571
568/* idx can be of type enum memcg_stat_item or node_stat_item */ 572/*
569static inline void __mod_memcg_state(struct mem_cgroup *memcg, 573 * idx can be of type enum memcg_stat_item or node_stat_item.
570 int idx, int val) 574 * Keep in sync with memcg_exact_page_state().
575 */
576static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
577 int idx)
571{ 578{
572 long x; 579 long x = atomic_long_read(&memcg->vmstats_local[idx]);
573 580#ifdef CONFIG_SMP
574 if (mem_cgroup_disabled()) 581 if (x < 0)
575 return;
576
577 x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
578 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
579 atomic_long_add(x, &memcg->stat[idx]);
580 x = 0; 582 x = 0;
581 } 583#endif
582 __this_cpu_write(memcg->stat_cpu->count[idx], x); 584 return x;
583} 585}
584 586
587void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
588
585/* idx can be of type enum memcg_stat_item or node_stat_item */ 589/* idx can be of type enum memcg_stat_item or node_stat_item */
586static inline void mod_memcg_state(struct mem_cgroup *memcg, 590static inline void mod_memcg_state(struct mem_cgroup *memcg,
587 int idx, int val) 591 int idx, int val)
@@ -642,32 +646,27 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
642 return x; 646 return x;
643} 647}
644 648
645static inline void __mod_lruvec_state(struct lruvec *lruvec, 649static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
646 enum node_stat_item idx, int val) 650 enum node_stat_item idx)
647{ 651{
648 struct mem_cgroup_per_node *pn; 652 struct mem_cgroup_per_node *pn;
649 long x; 653 long x;
650 654
651 /* Update node */
652 __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
653
654 if (mem_cgroup_disabled()) 655 if (mem_cgroup_disabled())
655 return; 656 return node_page_state(lruvec_pgdat(lruvec), idx);
656 657
657 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 658 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
658 659 x = atomic_long_read(&pn->lruvec_stat_local[idx]);
659 /* Update memcg */ 660#ifdef CONFIG_SMP
660 __mod_memcg_state(pn->memcg, idx, val); 661 if (x < 0)
661
662 /* Update lruvec */
663 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
664 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
665 atomic_long_add(x, &pn->lruvec_stat[idx]);
666 x = 0; 662 x = 0;
667 } 663#endif
668 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); 664 return x;
669} 665}
670 666
667void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
668 int val);
669
671static inline void mod_lruvec_state(struct lruvec *lruvec, 670static inline void mod_lruvec_state(struct lruvec *lruvec,
672 enum node_stat_item idx, int val) 671 enum node_stat_item idx, int val)
673{ 672{
@@ -708,22 +707,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
708 gfp_t gfp_mask, 707 gfp_t gfp_mask,
709 unsigned long *total_scanned); 708 unsigned long *total_scanned);
710 709
711static inline void __count_memcg_events(struct mem_cgroup *memcg, 710void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
712 enum vm_event_item idx, 711 unsigned long count);
713 unsigned long count)
714{
715 unsigned long x;
716
717 if (mem_cgroup_disabled())
718 return;
719
720 x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
721 if (unlikely(x > MEMCG_CHARGE_BATCH)) {
722 atomic_long_add(x, &memcg->events[idx]);
723 x = 0;
724 }
725 __this_cpu_write(memcg->stat_cpu->events[idx], x);
726}
727 712
728static inline void count_memcg_events(struct mem_cgroup *memcg, 713static inline void count_memcg_events(struct mem_cgroup *memcg,
729 enum vm_event_item idx, 714 enum vm_event_item idx,
@@ -1011,8 +996,13 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
1011{ 996{
1012} 997}
1013 998
1014static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, 999static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
1015 int idx) 1000{
1001 return 0;
1002}
1003
1004static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
1005 int idx)
1016{ 1006{
1017 return 0; 1007 return 0;
1018} 1008}
@@ -1047,6 +1037,12 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
1047 return node_page_state(lruvec_pgdat(lruvec), idx); 1037 return node_page_state(lruvec_pgdat(lruvec), idx);
1048} 1038}
1049 1039
1040static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
1041 enum node_stat_item idx)
1042{
1043 return node_page_state(lruvec_pgdat(lruvec), idx);
1044}
1045
1050static inline void __mod_lruvec_state(struct lruvec *lruvec, 1046static inline void __mod_lruvec_state(struct lruvec *lruvec,
1051 enum node_stat_item idx, int val) 1047 enum node_stat_item idx, int val)
1052{ 1048{
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 912614fbbef3..0e8834ac32b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -536,9 +536,6 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
536struct mmu_gather; 536struct mmu_gather;
537struct inode; 537struct inode;
538 538
539#define page_private(page) ((page)->private)
540#define set_page_private(page, v) ((page)->private = (v))
541
542#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) 539#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
543static inline int pmd_devmap(pmd_t pmd) 540static inline int pmd_devmap(pmd_t pmd)
544{ 541{
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1f42a07d8f0..8ec38b11b361 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -220,6 +220,9 @@ struct page {
220#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) 220#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
221#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) 221#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
222 222
223#define page_private(page) ((page)->private)
224#define set_page_private(page, v) ((page)->private = (v))
225
223struct page_frag_cache { 226struct page_frag_cache {
224 void * va; 227 void * va;
225#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) 228#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5a4aedc160bd..70394cabaf4e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -18,6 +18,8 @@
18#include <linux/pageblock-flags.h> 18#include <linux/pageblock-flags.h>
19#include <linux/page-flags-layout.h> 19#include <linux/page-flags-layout.h>
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/mm_types.h>
22#include <linux/page-flags.h>
21#include <asm/page.h> 23#include <asm/page.h>
22 24
23/* Free memory management - zoned buddy allocator. */ 25/* Free memory management - zoned buddy allocator. */
@@ -98,6 +100,62 @@ struct free_area {
98 unsigned long nr_free; 100 unsigned long nr_free;
99}; 101};
100 102
103/* Used for pages not on another list */
104static inline void add_to_free_area(struct page *page, struct free_area *area,
105 int migratetype)
106{
107 list_add(&page->lru, &area->free_list[migratetype]);
108 area->nr_free++;
109}
110
111/* Used for pages not on another list */
112static inline void add_to_free_area_tail(struct page *page, struct free_area *area,
113 int migratetype)
114{
115 list_add_tail(&page->lru, &area->free_list[migratetype]);
116 area->nr_free++;
117}
118
119#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR
120/* Used to preserve page allocation order entropy */
121void add_to_free_area_random(struct page *page, struct free_area *area,
122 int migratetype);
123#else
124static inline void add_to_free_area_random(struct page *page,
125 struct free_area *area, int migratetype)
126{
127 add_to_free_area(page, area, migratetype);
128}
129#endif
130
131/* Used for pages which are on another list */
132static inline void move_to_free_area(struct page *page, struct free_area *area,
133 int migratetype)
134{
135 list_move(&page->lru, &area->free_list[migratetype]);
136}
137
138static inline struct page *get_page_from_free_area(struct free_area *area,
139 int migratetype)
140{
141 return list_first_entry_or_null(&area->free_list[migratetype],
142 struct page, lru);
143}
144
145static inline void del_page_from_free_area(struct page *page,
146 struct free_area *area)
147{
148 list_del(&page->lru);
149 __ClearPageBuddy(page);
150 set_page_private(page, 0);
151 area->nr_free--;
152}
153
154static inline bool free_area_empty(struct free_area *area, int migratetype)
155{
156 return list_empty(&area->free_list[migratetype]);
157}
158
101struct pglist_data; 159struct pglist_data;
102 160
103/* 161/*
@@ -1271,6 +1329,7 @@ void sparse_init(void);
1271#else 1329#else
1272#define sparse_init() do {} while (0) 1330#define sparse_init() do {} while (0)
1273#define sparse_index_init(_sec, _nid) do {} while (0) 1331#define sparse_index_init(_sec, _nid) do {} while (0)
1332#define pfn_present pfn_valid
1274#endif /* CONFIG_SPARSEMEM */ 1333#endif /* CONFIG_SPARSEMEM */
1275 1334
1276/* 1335/*
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 97883604a3c5..9365df5a823f 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -231,7 +231,7 @@ static inline int plist_node_empty(const struct plist_node *node)
231 * @type: the type of the struct this is embedded in 231 * @type: the type of the struct this is embedded in
232 * @member: the name of the list_head within the struct 232 * @member: the name of the list_head within the struct
233 */ 233 */
234#ifdef CONFIG_DEBUG_PI_LIST 234#ifdef CONFIG_DEBUG_PLIST
235# define plist_first_entry(head, type, member) \ 235# define plist_first_entry(head, type, member) \
236({ \ 236({ \
237 WARN_ON(plist_head_empty(head)); \ 237 WARN_ON(plist_head_empty(head)); \
@@ -248,7 +248,7 @@ static inline int plist_node_empty(const struct plist_node *node)
248 * @type: the type of the struct this is embedded in 248 * @type: the type of the struct this is embedded in
249 * @member: the name of the list_head within the struct 249 * @member: the name of the list_head within the struct
250 */ 250 */
251#ifdef CONFIG_DEBUG_PI_LIST 251#ifdef CONFIG_DEBUG_PLIST
252# define plist_last_entry(head, type, member) \ 252# define plist_last_entry(head, type, member) \
253({ \ 253({ \
254 WARN_ON(plist_head_empty(head)); \ 254 WARN_ON(plist_head_empty(head)); \
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 7e0fdcf905d2..1cdc32b1f1b0 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -16,7 +16,11 @@
16extern struct ctl_table epoll_table[]; /* for sysctl */ 16extern struct ctl_table epoll_table[]; /* for sysctl */
17/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating 17/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
18 additional memory. */ 18 additional memory. */
19#ifdef __clang__
20#define MAX_STACK_ALLOC 768
21#else
19#define MAX_STACK_ALLOC 832 22#define MAX_STACK_ALLOC 832
23#endif
20#define FRONTEND_STACK_ALLOC 256 24#define FRONTEND_STACK_ALLOC 256
21#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC 25#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC
22#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC 26#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h
index 56f35dd3d01d..44171e6b7197 100644
--- a/include/linux/pps-gpio.h
+++ b/include/linux/pps-gpio.h
@@ -23,10 +23,11 @@
23#define _PPS_GPIO_H 23#define _PPS_GPIO_H
24 24
25struct pps_gpio_platform_data { 25struct pps_gpio_platform_data {
26 struct gpio_desc *gpio_pin;
27 struct gpio_desc *echo_pin;
26 bool assert_falling_edge; 28 bool assert_falling_edge;
27 bool capture_clear; 29 bool capture_clear;
28 unsigned int gpio_pin; 30 unsigned int echo_active_ms;
29 const char *gpio_label;
30}; 31};
31 32
32#endif /* _PPS_GPIO_H */ 33#endif /* _PPS_GPIO_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 84ea4d094af3..cefd374c47b1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -82,6 +82,8 @@ static inline void console_verbose(void)
82extern char devkmsg_log_str[]; 82extern char devkmsg_log_str[];
83struct ctl_table; 83struct ctl_table;
84 84
85extern int suppress_printk;
86
85struct va_format { 87struct va_format {
86 const char *fmt; 88 const char *fmt;
87 va_list *va; 89 va_list *va;
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 7006008d5b72..7b3de7321219 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -4,6 +4,7 @@
4#include <linux/jump_label.h> 4#include <linux/jump_label.h>
5#include <linux/psi_types.h> 5#include <linux/psi_types.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/poll.h>
7 8
8struct seq_file; 9struct seq_file;
9struct css_set; 10struct css_set;
@@ -11,6 +12,7 @@ struct css_set;
11#ifdef CONFIG_PSI 12#ifdef CONFIG_PSI
12 13
13extern struct static_key_false psi_disabled; 14extern struct static_key_false psi_disabled;
15extern struct psi_group psi_system;
14 16
15void psi_init(void); 17void psi_init(void);
16 18
@@ -26,6 +28,13 @@ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
26int psi_cgroup_alloc(struct cgroup *cgrp); 28int psi_cgroup_alloc(struct cgroup *cgrp);
27void psi_cgroup_free(struct cgroup *cgrp); 29void psi_cgroup_free(struct cgroup *cgrp);
28void cgroup_move_task(struct task_struct *p, struct css_set *to); 30void cgroup_move_task(struct task_struct *p, struct css_set *to);
31
32struct psi_trigger *psi_trigger_create(struct psi_group *group,
33 char *buf, size_t nbytes, enum psi_res res);
34void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
35
36__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
37 poll_table *wait);
29#endif 38#endif
30 39
31#else /* CONFIG_PSI */ 40#else /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 2cf422db5d18..07aaf9b82241 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -1,8 +1,11 @@
1#ifndef _LINUX_PSI_TYPES_H 1#ifndef _LINUX_PSI_TYPES_H
2#define _LINUX_PSI_TYPES_H 2#define _LINUX_PSI_TYPES_H
3 3
4#include <linux/kthread.h>
4#include <linux/seqlock.h> 5#include <linux/seqlock.h>
5#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/kref.h>
8#include <linux/wait.h>
6 9
7#ifdef CONFIG_PSI 10#ifdef CONFIG_PSI
8 11
@@ -11,7 +14,7 @@ enum psi_task_count {
11 NR_IOWAIT, 14 NR_IOWAIT,
12 NR_MEMSTALL, 15 NR_MEMSTALL,
13 NR_RUNNING, 16 NR_RUNNING,
14 NR_PSI_TASK_COUNTS, 17 NR_PSI_TASK_COUNTS = 3,
15}; 18};
16 19
17/* Task state bitmasks */ 20/* Task state bitmasks */
@@ -24,7 +27,7 @@ enum psi_res {
24 PSI_IO, 27 PSI_IO,
25 PSI_MEM, 28 PSI_MEM,
26 PSI_CPU, 29 PSI_CPU,
27 NR_PSI_RESOURCES, 30 NR_PSI_RESOURCES = 3,
28}; 31};
29 32
30/* 33/*
@@ -41,7 +44,13 @@ enum psi_states {
41 PSI_CPU_SOME, 44 PSI_CPU_SOME,
42 /* Only per-CPU, to weigh the CPU in the global average: */ 45 /* Only per-CPU, to weigh the CPU in the global average: */
43 PSI_NONIDLE, 46 PSI_NONIDLE,
44 NR_PSI_STATES, 47 NR_PSI_STATES = 6,
48};
49
50enum psi_aggregators {
51 PSI_AVGS = 0,
52 PSI_POLL,
53 NR_PSI_AGGREGATORS,
45}; 54};
46 55
47struct psi_group_cpu { 56struct psi_group_cpu {
@@ -53,6 +62,9 @@ struct psi_group_cpu {
53 /* States of the tasks belonging to this group */ 62 /* States of the tasks belonging to this group */
54 unsigned int tasks[NR_PSI_TASK_COUNTS]; 63 unsigned int tasks[NR_PSI_TASK_COUNTS];
55 64
65 /* Aggregate pressure state derived from the tasks */
66 u32 state_mask;
67
56 /* Period time sampling buckets for each state of interest (ns) */ 68 /* Period time sampling buckets for each state of interest (ns) */
57 u32 times[NR_PSI_STATES]; 69 u32 times[NR_PSI_STATES];
58 70
@@ -62,25 +74,94 @@ struct psi_group_cpu {
62 /* 2nd cacheline updated by the aggregator */ 74 /* 2nd cacheline updated by the aggregator */
63 75
64 /* Delta detection against the sampling buckets */ 76 /* Delta detection against the sampling buckets */
65 u32 times_prev[NR_PSI_STATES] ____cacheline_aligned_in_smp; 77 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
78 ____cacheline_aligned_in_smp;
79};
80
81/* PSI growth tracking window */
82struct psi_window {
83 /* Window size in ns */
84 u64 size;
85
86 /* Start time of the current window in ns */
87 u64 start_time;
88
89 /* Value at the start of the window */
90 u64 start_value;
91
92 /* Value growth in the previous window */
93 u64 prev_growth;
94};
95
96struct psi_trigger {
97 /* PSI state being monitored by the trigger */
98 enum psi_states state;
99
100 /* User-spacified threshold in ns */
101 u64 threshold;
102
103 /* List node inside triggers list */
104 struct list_head node;
105
106 /* Backpointer needed during trigger destruction */
107 struct psi_group *group;
108
109 /* Wait queue for polling */
110 wait_queue_head_t event_wait;
111
112 /* Pending event flag */
113 int event;
114
115 /* Tracking window */
116 struct psi_window win;
117
118 /*
119 * Time last event was generated. Used for rate-limiting
120 * events to one per window
121 */
122 u64 last_event_time;
123
124 /* Refcounting to prevent premature destruction */
125 struct kref refcount;
66}; 126};
67 127
68struct psi_group { 128struct psi_group {
69 /* Protects data updated during an aggregation */ 129 /* Protects data used by the aggregator */
70 struct mutex stat_lock; 130 struct mutex avgs_lock;
71 131
72 /* Per-cpu task state & time tracking */ 132 /* Per-cpu task state & time tracking */
73 struct psi_group_cpu __percpu *pcpu; 133 struct psi_group_cpu __percpu *pcpu;
74 134
75 /* Periodic aggregation state */ 135 /* Running pressure averages */
76 u64 total_prev[NR_PSI_STATES - 1]; 136 u64 avg_total[NR_PSI_STATES - 1];
77 u64 last_update; 137 u64 avg_last_update;
78 u64 next_update; 138 u64 avg_next_update;
79 struct delayed_work clock_work; 139
140 /* Aggregator work control */
141 struct delayed_work avgs_work;
80 142
81 /* Total stall times and sampled pressure averages */ 143 /* Total stall times and sampled pressure averages */
82 u64 total[NR_PSI_STATES - 1]; 144 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
83 unsigned long avg[NR_PSI_STATES - 1][3]; 145 unsigned long avg[NR_PSI_STATES - 1][3];
146
147 /* Monitor work control */
148 atomic_t poll_scheduled;
149 struct kthread_worker __rcu *poll_kworker;
150 struct kthread_delayed_work poll_work;
151
152 /* Protects data used by the monitor */
153 struct mutex trigger_lock;
154
155 /* Configured polling triggers */
156 struct list_head triggers;
157 u32 nr_triggers[NR_PSI_STATES - 1];
158 u32 poll_states;
159 u64 poll_min_period;
160
161 /* Total stall times at the start of monitor activation */
162 u64 polling_total[NR_PSI_STATES - 1];
163 u64 polling_next_update;
164 u64 polling_until;
84}; 165};
85 166
86#else /* CONFIG_PSI */ 167#else /* CONFIG_PSI */
diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
index 3bcd67fd5548..dd464943f717 100644
--- a/include/linux/qcom-geni-se.h
+++ b/include/linux/qcom-geni-se.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 1/* SPDX-License-Identifier: GPL-2.0 */
2/* 2/*
3 * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. 3 * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
4 */ 4 */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index e63799a6e895..3734cd8f38a8 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -14,6 +14,7 @@ struct device;
14#define SYS_POWER_OFF 0x0003 /* Notify of system power off */ 14#define SYS_POWER_OFF 0x0003 /* Notify of system power off */
15 15
16enum reboot_mode { 16enum reboot_mode {
17 REBOOT_UNDEFINED = -1,
17 REBOOT_COLD = 0, 18 REBOOT_COLD = 0,
18 REBOOT_WARM, 19 REBOOT_WARM,
19 REBOOT_HARD, 20 REBOOT_HARD,
@@ -21,6 +22,7 @@ enum reboot_mode {
21 REBOOT_GPIO, 22 REBOOT_GPIO,
22}; 23};
23extern enum reboot_mode reboot_mode; 24extern enum reboot_mode reboot_mode;
25extern enum reboot_mode panic_reboot_mode;
24 26
25enum reboot_type { 27enum reboot_type {
26 BOOT_TRIPLE = 't', 28 BOOT_TRIPLE = 't',
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a2cd15855bad..11837410690f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -26,7 +26,6 @@
26#include <linux/latencytop.h> 26#include <linux/latencytop.h>
27#include <linux/sched/prio.h> 27#include <linux/sched/prio.h>
28#include <linux/signal_types.h> 28#include <linux/signal_types.h>
29#include <linux/psi_types.h>
30#include <linux/mm_types_task.h> 29#include <linux/mm_types_task.h>
31#include <linux/task_io_accounting.h> 30#include <linux/task_io_accounting.h>
32#include <linux/rseq.h> 31#include <linux/rseq.h>
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e412c092c1e8..38a0f0785323 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -271,17 +271,18 @@ static inline int signal_group_exit(const struct signal_struct *sig)
271extern void flush_signals(struct task_struct *); 271extern void flush_signals(struct task_struct *);
272extern void ignore_signals(struct task_struct *); 272extern void ignore_signals(struct task_struct *);
273extern void flush_signal_handlers(struct task_struct *, int force_default); 273extern void flush_signal_handlers(struct task_struct *, int force_default);
274extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info); 274extern int dequeue_signal(struct task_struct *task,
275 sigset_t *mask, kernel_siginfo_t *info);
275 276
276static inline int kernel_dequeue_signal(void) 277static inline int kernel_dequeue_signal(void)
277{ 278{
278 struct task_struct *tsk = current; 279 struct task_struct *task = current;
279 kernel_siginfo_t __info; 280 kernel_siginfo_t __info;
280 int ret; 281 int ret;
281 282
282 spin_lock_irq(&tsk->sighand->siglock); 283 spin_lock_irq(&task->sighand->siglock);
283 ret = dequeue_signal(tsk, &tsk->blocked, &__info); 284 ret = dequeue_signal(task, &task->blocked, &__info);
284 spin_unlock_irq(&tsk->sighand->siglock); 285 spin_unlock_irq(&task->sighand->siglock);
285 286
286 return ret; 287 return ret;
287} 288}
@@ -419,18 +420,18 @@ static inline void set_restore_sigmask(void)
419 WARN_ON(!test_thread_flag(TIF_SIGPENDING)); 420 WARN_ON(!test_thread_flag(TIF_SIGPENDING));
420} 421}
421 422
422static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) 423static inline void clear_tsk_restore_sigmask(struct task_struct *task)
423{ 424{
424 clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); 425 clear_tsk_thread_flag(task, TIF_RESTORE_SIGMASK);
425} 426}
426 427
427static inline void clear_restore_sigmask(void) 428static inline void clear_restore_sigmask(void)
428{ 429{
429 clear_thread_flag(TIF_RESTORE_SIGMASK); 430 clear_thread_flag(TIF_RESTORE_SIGMASK);
430} 431}
431static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) 432static inline bool test_tsk_restore_sigmask(struct task_struct *task)
432{ 433{
433 return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); 434 return test_tsk_thread_flag(task, TIF_RESTORE_SIGMASK);
434} 435}
435static inline bool test_restore_sigmask(void) 436static inline bool test_restore_sigmask(void)
436{ 437{
@@ -449,9 +450,9 @@ static inline void set_restore_sigmask(void)
449 current->restore_sigmask = true; 450 current->restore_sigmask = true;
450 WARN_ON(!test_thread_flag(TIF_SIGPENDING)); 451 WARN_ON(!test_thread_flag(TIF_SIGPENDING));
451} 452}
452static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) 453static inline void clear_tsk_restore_sigmask(struct task_struct *task)
453{ 454{
454 tsk->restore_sigmask = false; 455 task->restore_sigmask = false;
455} 456}
456static inline void clear_restore_sigmask(void) 457static inline void clear_restore_sigmask(void)
457{ 458{
@@ -461,9 +462,9 @@ static inline bool test_restore_sigmask(void)
461{ 462{
462 return current->restore_sigmask; 463 return current->restore_sigmask;
463} 464}
464static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) 465static inline bool test_tsk_restore_sigmask(struct task_struct *task)
465{ 466{
466 return tsk->restore_sigmask; 467 return task->restore_sigmask;
467} 468}
468static inline bool test_and_clear_restore_sigmask(void) 469static inline bool test_and_clear_restore_sigmask(void)
469{ 470{
@@ -617,9 +618,9 @@ static inline struct pid *task_session(struct task_struct *task)
617 return task->signal->pids[PIDTYPE_SID]; 618 return task->signal->pids[PIDTYPE_SID];
618} 619}
619 620
620static inline int get_nr_threads(struct task_struct *tsk) 621static inline int get_nr_threads(struct task_struct *task)
621{ 622{
622 return tsk->signal->nr_threads; 623 return task->signal->nr_threads;
623} 624}
624 625
625static inline bool thread_group_leader(struct task_struct *p) 626static inline bool thread_group_leader(struct task_struct *p)
@@ -658,35 +659,35 @@ static inline int thread_group_empty(struct task_struct *p)
658#define delay_group_leader(p) \ 659#define delay_group_leader(p) \
659 (thread_group_leader(p) && !thread_group_empty(p)) 660 (thread_group_leader(p) && !thread_group_empty(p))
660 661
661extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, 662extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
662 unsigned long *flags); 663 unsigned long *flags);
663 664
664static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, 665static inline struct sighand_struct *lock_task_sighand(struct task_struct *task,
665 unsigned long *flags) 666 unsigned long *flags)
666{ 667{
667 struct sighand_struct *ret; 668 struct sighand_struct *ret;
668 669
669 ret = __lock_task_sighand(tsk, flags); 670 ret = __lock_task_sighand(task, flags);
670 (void)__cond_lock(&tsk->sighand->siglock, ret); 671 (void)__cond_lock(&task->sighand->siglock, ret);
671 return ret; 672 return ret;
672} 673}
673 674
674static inline void unlock_task_sighand(struct task_struct *tsk, 675static inline void unlock_task_sighand(struct task_struct *task,
675 unsigned long *flags) 676 unsigned long *flags)
676{ 677{
677 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); 678 spin_unlock_irqrestore(&task->sighand->siglock, *flags);
678} 679}
679 680
680static inline unsigned long task_rlimit(const struct task_struct *tsk, 681static inline unsigned long task_rlimit(const struct task_struct *task,
681 unsigned int limit) 682 unsigned int limit)
682{ 683{
683 return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); 684 return READ_ONCE(task->signal->rlim[limit].rlim_cur);
684} 685}
685 686
686static inline unsigned long task_rlimit_max(const struct task_struct *tsk, 687static inline unsigned long task_rlimit_max(const struct task_struct *task,
687 unsigned int limit) 688 unsigned int limit)
688{ 689{
689 return READ_ONCE(tsk->signal->rlim[limit].rlim_max); 690 return READ_ONCE(task->signal->rlim[limit].rlim_max);
690} 691}
691 692
692static inline unsigned long rlimit(unsigned int limit) 693static inline unsigned long rlimit(unsigned int limit)
diff --git a/init/Kconfig b/init/Kconfig
index 82b84e5ee30d..8b9ffe236e4f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1752,6 +1752,30 @@ config SLAB_FREELIST_HARDENED
1752 sacrifies to harden the kernel slab allocator against common 1752 sacrifies to harden the kernel slab allocator against common
1753 freelist exploit methods. 1753 freelist exploit methods.
1754 1754
1755config SHUFFLE_PAGE_ALLOCATOR
1756 bool "Page allocator randomization"
1757 default SLAB_FREELIST_RANDOM && ACPI_NUMA
1758 help
1759 Randomization of the page allocator improves the average
1760 utilization of a direct-mapped memory-side-cache. See section
1761 5.2.27 Heterogeneous Memory Attribute Table (HMAT) in the ACPI
1762 6.2a specification for an example of how a platform advertises
1763 the presence of a memory-side-cache. There are also incidental
1764 security benefits as it reduces the predictability of page
1765 allocations to compliment SLAB_FREELIST_RANDOM, but the
1766 default granularity of shuffling on the "MAX_ORDER - 1" i.e,
1767 10th order of pages is selected based on cache utilization
1768 benefits on x86.
1769
1770 While the randomization improves cache utilization it may
1771 negatively impact workloads on platforms without a cache. For
1772 this reason, by default, the randomization is enabled only
1773 after runtime detection of a direct-mapped memory-side-cache.
1774 Otherwise, the randomization may be force enabled with the
1775 'page_alloc.shuffle' kernel command line parameter.
1776
1777 Say Y if unsure.
1778
1755config SLUB_CPU_PARTIAL 1779config SLUB_CPU_PARTIAL
1756 default y 1780 default y
1757 depends on SLUB && SMP 1781 depends on SLUB && SMP
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 49f9bf4ffc7f..bfaae457810c 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -120,7 +120,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
120static int zero; 120static int zero;
121static int one = 1; 121static int one = 1;
122static int int_max = INT_MAX; 122static int int_max = INT_MAX;
123static int ipc_mni = IPCMNI; 123int ipc_mni = IPCMNI;
124int ipc_mni_shift = IPCMNI_SHIFT;
125int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
124 126
125static struct ctl_table ipc_kern_table[] = { 127static struct ctl_table ipc_kern_table[] = {
126 { 128 {
@@ -246,3 +248,13 @@ static int __init ipc_sysctl_init(void)
246} 248}
247 249
248device_initcall(ipc_sysctl_init); 250device_initcall(ipc_sysctl_init);
251
252static int __init ipc_mni_extend(char *str)
253{
254 ipc_mni = IPCMNI_EXTEND;
255 ipc_mni_shift = IPCMNI_EXTEND_SHIFT;
256 ipc_min_cycle = IPCMNI_EXTEND_MIN_CYCLE;
257 pr_info("IPCMNI extended to %d.\n", ipc_mni);
258 return 0;
259}
260early_param("ipcmni_extend", ipc_mni_extend);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ba44164ea1f9..216cad1ff0d0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -76,6 +76,7 @@ struct mqueue_inode_info {
76 wait_queue_head_t wait_q; 76 wait_queue_head_t wait_q;
77 77
78 struct rb_root msg_tree; 78 struct rb_root msg_tree;
79 struct rb_node *msg_tree_rightmost;
79 struct posix_msg_tree_node *node_cache; 80 struct posix_msg_tree_node *node_cache;
80 struct mq_attr attr; 81 struct mq_attr attr;
81 82
@@ -131,6 +132,7 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
131{ 132{
132 struct rb_node **p, *parent = NULL; 133 struct rb_node **p, *parent = NULL;
133 struct posix_msg_tree_node *leaf; 134 struct posix_msg_tree_node *leaf;
135 bool rightmost = true;
134 136
135 p = &info->msg_tree.rb_node; 137 p = &info->msg_tree.rb_node;
136 while (*p) { 138 while (*p) {
@@ -139,9 +141,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
139 141
140 if (likely(leaf->priority == msg->m_type)) 142 if (likely(leaf->priority == msg->m_type))
141 goto insert_msg; 143 goto insert_msg;
142 else if (msg->m_type < leaf->priority) 144 else if (msg->m_type < leaf->priority) {
143 p = &(*p)->rb_left; 145 p = &(*p)->rb_left;
144 else 146 rightmost = false;
147 } else
145 p = &(*p)->rb_right; 148 p = &(*p)->rb_right;
146 } 149 }
147 if (info->node_cache) { 150 if (info->node_cache) {
@@ -154,6 +157,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
154 INIT_LIST_HEAD(&leaf->msg_list); 157 INIT_LIST_HEAD(&leaf->msg_list);
155 } 158 }
156 leaf->priority = msg->m_type; 159 leaf->priority = msg->m_type;
160
161 if (rightmost)
162 info->msg_tree_rightmost = &leaf->rb_node;
163
157 rb_link_node(&leaf->rb_node, parent, p); 164 rb_link_node(&leaf->rb_node, parent, p);
158 rb_insert_color(&leaf->rb_node, &info->msg_tree); 165 rb_insert_color(&leaf->rb_node, &info->msg_tree);
159insert_msg: 166insert_msg:
@@ -163,23 +170,35 @@ insert_msg:
163 return 0; 170 return 0;
164} 171}
165 172
173static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
174 struct mqueue_inode_info *info)
175{
176 struct rb_node *node = &leaf->rb_node;
177
178 if (info->msg_tree_rightmost == node)
179 info->msg_tree_rightmost = rb_prev(node);
180
181 rb_erase(node, &info->msg_tree);
182 if (info->node_cache) {
183 kfree(leaf);
184 } else {
185 info->node_cache = leaf;
186 }
187}
188
166static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) 189static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
167{ 190{
168 struct rb_node **p, *parent = NULL; 191 struct rb_node *parent = NULL;
169 struct posix_msg_tree_node *leaf; 192 struct posix_msg_tree_node *leaf;
170 struct msg_msg *msg; 193 struct msg_msg *msg;
171 194
172try_again: 195try_again:
173 p = &info->msg_tree.rb_node; 196 /*
174 while (*p) { 197 * During insert, low priorities go to the left and high to the
175 parent = *p; 198 * right. On receive, we want the highest priorities first, so
176 /* 199 * walk all the way to the right.
177 * During insert, low priorities go to the left and high to the 200 */
178 * right. On receive, we want the highest priorities first, so 201 parent = info->msg_tree_rightmost;
179 * walk all the way to the right.
180 */
181 p = &(*p)->rb_right;
182 }
183 if (!parent) { 202 if (!parent) {
184 if (info->attr.mq_curmsgs) { 203 if (info->attr.mq_curmsgs) {
185 pr_warn_once("Inconsistency in POSIX message queue, " 204 pr_warn_once("Inconsistency in POSIX message queue, "
@@ -194,24 +213,14 @@ try_again:
194 pr_warn_once("Inconsistency in POSIX message queue, " 213 pr_warn_once("Inconsistency in POSIX message queue, "
195 "empty leaf node but we haven't implemented " 214 "empty leaf node but we haven't implemented "
196 "lazy leaf delete!\n"); 215 "lazy leaf delete!\n");
197 rb_erase(&leaf->rb_node, &info->msg_tree); 216 msg_tree_erase(leaf, info);
198 if (info->node_cache) {
199 kfree(leaf);
200 } else {
201 info->node_cache = leaf;
202 }
203 goto try_again; 217 goto try_again;
204 } else { 218 } else {
205 msg = list_first_entry(&leaf->msg_list, 219 msg = list_first_entry(&leaf->msg_list,
206 struct msg_msg, m_list); 220 struct msg_msg, m_list);
207 list_del(&msg->m_list); 221 list_del(&msg->m_list);
208 if (list_empty(&leaf->msg_list)) { 222 if (list_empty(&leaf->msg_list)) {
209 rb_erase(&leaf->rb_node, &info->msg_tree); 223 msg_tree_erase(leaf, info);
210 if (info->node_cache) {
211 kfree(leaf);
212 } else {
213 info->node_cache = leaf;
214 }
215 } 224 }
216 } 225 }
217 info->attr.mq_curmsgs--; 226 info->attr.mq_curmsgs--;
@@ -254,6 +263,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
254 info->qsize = 0; 263 info->qsize = 0;
255 info->user = NULL; /* set when all is ok */ 264 info->user = NULL; /* set when all is ok */
256 info->msg_tree = RB_ROOT; 265 info->msg_tree = RB_ROOT;
266 info->msg_tree_rightmost = NULL;
257 info->node_cache = NULL; 267 info->node_cache = NULL;
258 memset(&info->attr, 0, sizeof(info->attr)); 268 memset(&info->attr, 0, sizeof(info->attr));
259 info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, 269 info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
@@ -430,7 +440,8 @@ static void mqueue_evict_inode(struct inode *inode)
430 struct user_struct *user; 440 struct user_struct *user;
431 unsigned long mq_bytes, mq_treesize; 441 unsigned long mq_bytes, mq_treesize;
432 struct ipc_namespace *ipc_ns; 442 struct ipc_namespace *ipc_ns;
433 struct msg_msg *msg; 443 struct msg_msg *msg, *nmsg;
444 LIST_HEAD(tmp_msg);
434 445
435 clear_inode(inode); 446 clear_inode(inode);
436 447
@@ -441,10 +452,15 @@ static void mqueue_evict_inode(struct inode *inode)
441 info = MQUEUE_I(inode); 452 info = MQUEUE_I(inode);
442 spin_lock(&info->lock); 453 spin_lock(&info->lock);
443 while ((msg = msg_get(info)) != NULL) 454 while ((msg = msg_get(info)) != NULL)
444 free_msg(msg); 455 list_add_tail(&msg->m_list, &tmp_msg);
445 kfree(info->node_cache); 456 kfree(info->node_cache);
446 spin_unlock(&info->lock); 457 spin_unlock(&info->lock);
447 458
459 list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
460 list_del(&msg->m_list);
461 free_msg(msg);
462 }
463
448 /* Total amount of bytes accounted for the mqueue */ 464 /* Total amount of bytes accounted for the mqueue */
449 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + 465 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
450 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * 466 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
@@ -605,8 +621,6 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
605{ 621{
606 struct ext_wait_queue *walk; 622 struct ext_wait_queue *walk;
607 623
608 ewp->task = current;
609
610 list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { 624 list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
611 if (walk->task->prio <= current->prio) { 625 if (walk->task->prio <= current->prio) {
612 list_add_tail(&ewp->list, &walk->list); 626 list_add_tail(&ewp->list, &walk->list);
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 84598025a6ad..e65593742e2b 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -18,6 +18,7 @@
18#include <linux/utsname.h> 18#include <linux/utsname.h>
19#include <linux/proc_ns.h> 19#include <linux/proc_ns.h>
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/sched.h>
21 22
22#include "util.h" 23#include "util.h"
23 24
@@ -64,6 +65,9 @@ static struct msg_msg *alloc_msg(size_t len)
64 pseg = &msg->next; 65 pseg = &msg->next;
65 while (len > 0) { 66 while (len > 0) {
66 struct msg_msgseg *seg; 67 struct msg_msgseg *seg;
68
69 cond_resched();
70
67 alen = min(len, DATALEN_SEG); 71 alen = min(len, DATALEN_SEG);
68 seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT); 72 seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
69 if (seg == NULL) 73 if (seg == NULL)
@@ -176,6 +180,8 @@ void free_msg(struct msg_msg *msg)
176 kfree(msg); 180 kfree(msg);
177 while (seg != NULL) { 181 while (seg != NULL) {
178 struct msg_msgseg *tmp = seg->next; 182 struct msg_msgseg *tmp = seg->next;
183
184 cond_resched();
179 kfree(seg); 185 kfree(seg);
180 seg = tmp; 186 seg = tmp;
181 } 187 }
diff --git a/ipc/util.c b/ipc/util.c
index 095274a871f8..d126d156efc6 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -109,7 +109,7 @@ static const struct rhashtable_params ipc_kht_params = {
109 * @ids: ipc identifier set 109 * @ids: ipc identifier set
110 * 110 *
111 * Set up the sequence range to use for the ipc identifier range (limited 111 * Set up the sequence range to use for the ipc identifier range (limited
112 * below IPCMNI) then initialise the keys hashtable and ids idr. 112 * below ipc_mni) then initialise the keys hashtable and ids idr.
113 */ 113 */
114void ipc_init_ids(struct ipc_ids *ids) 114void ipc_init_ids(struct ipc_ids *ids)
115{ 115{
@@ -119,6 +119,7 @@ void ipc_init_ids(struct ipc_ids *ids)
119 rhashtable_init(&ids->key_ht, &ipc_kht_params); 119 rhashtable_init(&ids->key_ht, &ipc_kht_params);
120 idr_init(&ids->ipcs_idr); 120 idr_init(&ids->ipcs_idr);
121 ids->max_idx = -1; 121 ids->max_idx = -1;
122 ids->last_idx = -1;
122#ifdef CONFIG_CHECKPOINT_RESTORE 123#ifdef CONFIG_CHECKPOINT_RESTORE
123 ids->next_id = -1; 124 ids->next_id = -1;
124#endif 125#endif
@@ -192,6 +193,10 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
192 * 193 *
193 * The caller must own kern_ipc_perm.lock.of the new object. 194 * The caller must own kern_ipc_perm.lock.of the new object.
194 * On error, the function returns a (negative) error code. 195 * On error, the function returns a (negative) error code.
196 *
197 * To conserve sequence number space, especially with extended ipc_mni,
198 * the sequence number is incremented only when the returned ID is less than
199 * the last one.
195 */ 200 */
196static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new) 201static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
197{ 202{
@@ -215,17 +220,42 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
215 */ 220 */
216 221
217 if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */ 222 if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
218 new->seq = ids->seq++; 223 int max_idx;
219 if (ids->seq > IPCID_SEQ_MAX) 224
220 ids->seq = 0; 225 max_idx = max(ids->in_use*3/2, ipc_min_cycle);
221 idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT); 226 max_idx = min(max_idx, ipc_mni);
227
228 /* allocate the idx, with a NULL struct kern_ipc_perm */
229 idx = idr_alloc_cyclic(&ids->ipcs_idr, NULL, 0, max_idx,
230 GFP_NOWAIT);
231
232 if (idx >= 0) {
233 /*
234 * idx got allocated successfully.
235 * Now calculate the sequence number and set the
236 * pointer for real.
237 */
238 if (idx <= ids->last_idx) {
239 ids->seq++;
240 if (ids->seq >= ipcid_seq_max())
241 ids->seq = 0;
242 }
243 ids->last_idx = idx;
244
245 new->seq = ids->seq;
246 /* no need for smp_wmb(), this is done
247 * inside idr_replace, as part of
248 * rcu_assign_pointer
249 */
250 idr_replace(&ids->ipcs_idr, new, idx);
251 }
222 } else { 252 } else {
223 new->seq = ipcid_to_seqx(next_id); 253 new->seq = ipcid_to_seqx(next_id);
224 idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id), 254 idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
225 0, GFP_NOWAIT); 255 0, GFP_NOWAIT);
226 } 256 }
227 if (idx >= 0) 257 if (idx >= 0)
228 new->id = SEQ_MULTIPLIER * new->seq + idx; 258 new->id = (new->seq << ipcmni_seq_shift()) + idx;
229 return idx; 259 return idx;
230} 260}
231 261
@@ -253,8 +283,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
253 /* 1) Initialize the refcount so that ipc_rcu_putref works */ 283 /* 1) Initialize the refcount so that ipc_rcu_putref works */
254 refcount_set(&new->refcount, 1); 284 refcount_set(&new->refcount, 1);
255 285
256 if (limit > IPCMNI) 286 if (limit > ipc_mni)
257 limit = IPCMNI; 287 limit = ipc_mni;
258 288
259 if (ids->in_use >= limit) 289 if (ids->in_use >= limit)
260 return -ENOSPC; 290 return -ENOSPC;
@@ -737,7 +767,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
737 if (total >= ids->in_use) 767 if (total >= ids->in_use)
738 return NULL; 768 return NULL;
739 769
740 for (; pos < IPCMNI; pos++) { 770 for (; pos < ipc_mni; pos++) {
741 ipc = idr_find(&ids->ipcs_idr, pos); 771 ipc = idr_find(&ids->ipcs_idr, pos);
742 if (ipc != NULL) { 772 if (ipc != NULL) {
743 *new_pos = pos + 1; 773 *new_pos = pos + 1;
diff --git a/ipc/util.h b/ipc/util.h
index e272be622ae7..0fcf8e719b76 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -15,8 +15,37 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/ipc_namespace.h> 16#include <linux/ipc_namespace.h>
17 17
18#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ 18/*
19#define SEQ_MULTIPLIER (IPCMNI) 19 * The IPC ID contains 2 separate numbers - index and sequence number.
20 * By default,
21 * bits 0-14: index (32k, 15 bits)
22 * bits 15-30: sequence number (64k, 16 bits)
23 *
24 * When IPCMNI extension mode is turned on, the composition changes:
25 * bits 0-23: index (16M, 24 bits)
26 * bits 24-30: sequence number (128, 7 bits)
27 */
28#define IPCMNI_SHIFT 15
29#define IPCMNI_EXTEND_SHIFT 24
30#define IPCMNI_EXTEND_MIN_CYCLE (RADIX_TREE_MAP_SIZE * RADIX_TREE_MAP_SIZE)
31#define IPCMNI (1 << IPCMNI_SHIFT)
32#define IPCMNI_EXTEND (1 << IPCMNI_EXTEND_SHIFT)
33
34#ifdef CONFIG_SYSVIPC_SYSCTL
35extern int ipc_mni;
36extern int ipc_mni_shift;
37extern int ipc_min_cycle;
38
39#define ipcmni_seq_shift() ipc_mni_shift
40#define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1)
41
42#else /* CONFIG_SYSVIPC_SYSCTL */
43
44#define ipc_mni IPCMNI
45#define ipc_min_cycle ((int)RADIX_TREE_MAP_SIZE)
46#define ipcmni_seq_shift() IPCMNI_SHIFT
47#define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1)
48#endif /* CONFIG_SYSVIPC_SYSCTL */
20 49
21void sem_init(void); 50void sem_init(void);
22void msg_init(void); 51void msg_init(void);
@@ -96,9 +125,9 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *);
96#define IPC_MSG_IDS 1 125#define IPC_MSG_IDS 1
97#define IPC_SHM_IDS 2 126#define IPC_SHM_IDS 2
98 127
99#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) 128#define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK)
100#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) 129#define ipcid_to_seqx(id) ((id) >> ipcmni_seq_shift())
101#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX) 130#define ipcid_seq_max() (INT_MAX >> ipcmni_seq_shift())
102 131
103/* must be called with ids->rwsem acquired for writing */ 132/* must be called with ids->rwsem acquired for writing */
104int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); 133int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
@@ -123,8 +152,8 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
123 if (ids->in_use == 0) 152 if (ids->in_use == 0)
124 return -1; 153 return -1;
125 154
126 if (ids->in_use == IPCMNI) 155 if (ids->in_use == ipc_mni)
127 return IPCMNI - 1; 156 return ipc_mni - 1;
128 157
129 return ids->max_idx; 158 return ids->max_idx;
130} 159}
@@ -216,10 +245,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
216 245
217static inline int sem_check_semmni(struct ipc_namespace *ns) { 246static inline int sem_check_semmni(struct ipc_namespace *ns) {
218 /* 247 /*
219 * Check semmni range [0, IPCMNI] 248 * Check semmni range [0, ipc_mni]
220 * semmni is the last element of sem_ctls[4] array 249 * semmni is the last element of sem_ctls[4] array
221 */ 250 */
222 return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > IPCMNI)) 251 return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > ipc_mni))
223 ? -ERANGE : 0; 252 ? -ERANGE : 0;
224} 253}
225 254
diff --git a/kernel/Makefile b/kernel/Makefile
index 298437bb2c6a..33824f0385b3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -127,7 +127,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
127$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz 127$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
128 128
129quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz 129quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
130cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@ 130cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_ikh_data.sh $@
131$(obj)/kheaders_data.tar.xz: FORCE 131$(obj)/kheaders_data.tar.xz: FORCE
132 $(call cmd,genikh) 132 $(call cmd,genikh)
133 133
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 327f37c9fdfa..217cec4e22c6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3540,17 +3540,84 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
3540#ifdef CONFIG_PSI 3540#ifdef CONFIG_PSI
3541static int cgroup_io_pressure_show(struct seq_file *seq, void *v) 3541static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3542{ 3542{
3543 return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_IO); 3543 struct cgroup *cgroup = seq_css(seq)->cgroup;
3544 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3545
3546 return psi_show(seq, psi, PSI_IO);
3544} 3547}
3545static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) 3548static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3546{ 3549{
3547 return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_MEM); 3550 struct cgroup *cgroup = seq_css(seq)->cgroup;
3551 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3552
3553 return psi_show(seq, psi, PSI_MEM);
3548} 3554}
3549static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) 3555static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3550{ 3556{
3551 return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU); 3557 struct cgroup *cgroup = seq_css(seq)->cgroup;
3558 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3559
3560 return psi_show(seq, psi, PSI_CPU);
3552} 3561}
3553#endif 3562
3563static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3564 size_t nbytes, enum psi_res res)
3565{
3566 struct psi_trigger *new;
3567 struct cgroup *cgrp;
3568
3569 cgrp = cgroup_kn_lock_live(of->kn, false);
3570 if (!cgrp)
3571 return -ENODEV;
3572
3573 cgroup_get(cgrp);
3574 cgroup_kn_unlock(of->kn);
3575
3576 new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
3577 if (IS_ERR(new)) {
3578 cgroup_put(cgrp);
3579 return PTR_ERR(new);
3580 }
3581
3582 psi_trigger_replace(&of->priv, new);
3583
3584 cgroup_put(cgrp);
3585
3586 return nbytes;
3587}
3588
3589static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3590 char *buf, size_t nbytes,
3591 loff_t off)
3592{
3593 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3594}
3595
3596static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3597 char *buf, size_t nbytes,
3598 loff_t off)
3599{
3600 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3601}
3602
3603static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3604 char *buf, size_t nbytes,
3605 loff_t off)
3606{
3607 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3608}
3609
3610static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3611 poll_table *pt)
3612{
3613 return psi_trigger_poll(&of->priv, of->file, pt);
3614}
3615
3616static void cgroup_pressure_release(struct kernfs_open_file *of)
3617{
3618 psi_trigger_replace(&of->priv, NULL);
3619}
3620#endif /* CONFIG_PSI */
3554 3621
3555static int cgroup_freeze_show(struct seq_file *seq, void *v) 3622static int cgroup_freeze_show(struct seq_file *seq, void *v)
3556{ 3623{
@@ -4743,20 +4810,26 @@ static struct cftype cgroup_base_files[] = {
4743#ifdef CONFIG_PSI 4810#ifdef CONFIG_PSI
4744 { 4811 {
4745 .name = "io.pressure", 4812 .name = "io.pressure",
4746 .flags = CFTYPE_NOT_ON_ROOT,
4747 .seq_show = cgroup_io_pressure_show, 4813 .seq_show = cgroup_io_pressure_show,
4814 .write = cgroup_io_pressure_write,
4815 .poll = cgroup_pressure_poll,
4816 .release = cgroup_pressure_release,
4748 }, 4817 },
4749 { 4818 {
4750 .name = "memory.pressure", 4819 .name = "memory.pressure",
4751 .flags = CFTYPE_NOT_ON_ROOT,
4752 .seq_show = cgroup_memory_pressure_show, 4820 .seq_show = cgroup_memory_pressure_show,
4821 .write = cgroup_memory_pressure_write,
4822 .poll = cgroup_pressure_poll,
4823 .release = cgroup_pressure_release,
4753 }, 4824 },
4754 { 4825 {
4755 .name = "cpu.pressure", 4826 .name = "cpu.pressure",
4756 .flags = CFTYPE_NOT_ON_ROOT,
4757 .seq_show = cgroup_cpu_pressure_show, 4827 .seq_show = cgroup_cpu_pressure_show,
4828 .write = cgroup_cpu_pressure_write,
4829 .poll = cgroup_pressure_poll,
4830 .release = cgroup_pressure_release,
4758 }, 4831 },
4759#endif 4832#endif /* CONFIG_PSI */
4760 { } /* terminate */ 4833 { } /* terminate */
4761}; 4834};
4762 4835
diff --git a/kernel/exit.c b/kernel/exit.c
index 2166c2d92ddc..8361a560cd1d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -422,7 +422,7 @@ retry:
422 * freed task structure. 422 * freed task structure.
423 */ 423 */
424 if (atomic_read(&mm->mm_users) <= 1) { 424 if (atomic_read(&mm->mm_users) <= 1) {
425 mm->owner = NULL; 425 WRITE_ONCE(mm->owner, NULL);
426 return; 426 return;
427 } 427 }
428 428
@@ -462,7 +462,7 @@ retry:
462 * most likely racing with swapoff (try_to_unuse()) or /proc or 462 * most likely racing with swapoff (try_to_unuse()) or /proc or
463 * ptrace or page migration (get_task_mm()). Mark owner as NULL. 463 * ptrace or page migration (get_task_mm()). Mark owner as NULL.
464 */ 464 */
465 mm->owner = NULL; 465 WRITE_ONCE(mm->owner, NULL);
466 return; 466 return;
467 467
468assign_new_owner: 468assign_new_owner:
@@ -483,7 +483,7 @@ assign_new_owner:
483 put_task_struct(c); 483 put_task_struct(c);
484 goto retry; 484 goto retry;
485 } 485 }
486 mm->owner = c; 486 WRITE_ONCE(mm->owner, c);
487 task_unlock(c); 487 task_unlock(c);
488 put_task_struct(c); 488 put_task_struct(c);
489} 489}
diff --git a/kernel/fork.c b/kernel/fork.c
index 737db1828437..b4cba953040a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -955,6 +955,15 @@ static void mm_init_aio(struct mm_struct *mm)
955#endif 955#endif
956} 956}
957 957
958static __always_inline void mm_clear_owner(struct mm_struct *mm,
959 struct task_struct *p)
960{
961#ifdef CONFIG_MEMCG
962 if (mm->owner == p)
963 WRITE_ONCE(mm->owner, NULL);
964#endif
965}
966
958static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) 967static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
959{ 968{
960#ifdef CONFIG_MEMCG 969#ifdef CONFIG_MEMCG
@@ -1343,6 +1352,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
1343free_pt: 1352free_pt:
1344 /* don't put binfmt in mmput, we haven't got module yet */ 1353 /* don't put binfmt in mmput, we haven't got module yet */
1345 mm->binfmt = NULL; 1354 mm->binfmt = NULL;
1355 mm_init_owner(mm, NULL);
1346 mmput(mm); 1356 mmput(mm);
1347 1357
1348fail_nomem: 1358fail_nomem:
@@ -1726,6 +1736,21 @@ static int pidfd_create(struct pid *pid)
1726 return fd; 1736 return fd;
1727} 1737}
1728 1738
1739static void __delayed_free_task(struct rcu_head *rhp)
1740{
1741 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
1742
1743 free_task(tsk);
1744}
1745
1746static __always_inline void delayed_free_task(struct task_struct *tsk)
1747{
1748 if (IS_ENABLED(CONFIG_MEMCG))
1749 call_rcu(&tsk->rcu, __delayed_free_task);
1750 else
1751 free_task(tsk);
1752}
1753
1729/* 1754/*
1730 * This creates a new process as a copy of the old one, 1755 * This creates a new process as a copy of the old one,
1731 * but does not actually start it yet. 1756 * but does not actually start it yet.
@@ -2068,7 +2093,7 @@ static __latent_entropy struct task_struct *copy_process(
2068#ifdef TIF_SYSCALL_EMU 2093#ifdef TIF_SYSCALL_EMU
2069 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 2094 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
2070#endif 2095#endif
2071 clear_all_latency_tracing(p); 2096 clear_tsk_latency_tracing(p);
2072 2097
2073 /* ok, now we should be set up.. */ 2098 /* ok, now we should be set up.. */
2074 p->pid = pid_nr(pid); 2099 p->pid = pid_nr(pid);
@@ -2233,8 +2258,10 @@ bad_fork_cleanup_io:
2233bad_fork_cleanup_namespaces: 2258bad_fork_cleanup_namespaces:
2234 exit_task_namespaces(p); 2259 exit_task_namespaces(p);
2235bad_fork_cleanup_mm: 2260bad_fork_cleanup_mm:
2236 if (p->mm) 2261 if (p->mm) {
2262 mm_clear_owner(p->mm, p);
2237 mmput(p->mm); 2263 mmput(p->mm);
2264 }
2238bad_fork_cleanup_signal: 2265bad_fork_cleanup_signal:
2239 if (!(clone_flags & CLONE_THREAD)) 2266 if (!(clone_flags & CLONE_THREAD))
2240 free_signal_struct(p->signal); 2267 free_signal_struct(p->signal);
@@ -2265,7 +2292,7 @@ bad_fork_cleanup_count:
2265bad_fork_free: 2292bad_fork_free:
2266 p->state = TASK_DEAD; 2293 p->state = TASK_DEAD;
2267 put_task_stack(p); 2294 put_task_stack(p);
2268 free_task(p); 2295 delayed_free_task(p);
2269fork_out: 2296fork_out:
2270 spin_lock_irq(&current->sighand->siglock); 2297 spin_lock_irq(&current->sighand->siglock);
2271 hlist_del_init(&delayed.node); 2298 hlist_del_init(&delayed.node);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 1e3823fa799b..f71c1adcff31 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -53,6 +53,7 @@ config GCOV_PROFILE_ALL
53choice 53choice
54 prompt "Specify GCOV format" 54 prompt "Specify GCOV format"
55 depends on GCOV_KERNEL 55 depends on GCOV_KERNEL
56 depends on CC_IS_GCC
56 ---help--- 57 ---help---
57 The gcov format is usually determined by the GCC version, and the 58 The gcov format is usually determined by the GCC version, and the
58 default is chosen according to your GCC version. However, there are 59 default is chosen according to your GCC version. However, there are
@@ -62,7 +63,7 @@ choice
62 63
63config GCOV_FORMAT_3_4 64config GCOV_FORMAT_3_4
64 bool "GCC 3.4 format" 65 bool "GCC 3.4 format"
65 depends on CC_IS_GCC && GCC_VERSION < 40700 66 depends on GCC_VERSION < 40700
66 ---help--- 67 ---help---
67 Select this option to use the format defined by GCC 3.4. 68 Select this option to use the format defined by GCC 3.4.
68 69
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index ff06d64df397..d66a74b0f100 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -2,5 +2,6 @@
2ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' 2ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
3 3
4obj-y := base.o fs.o 4obj-y := base.o fs.o
5obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o 5obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_base.o gcc_3_4.o
6obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o 6obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_base.o gcc_4_7.o
7obj-$(CONFIG_CC_IS_CLANG) += clang.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 9c7c8d5c18f2..0ffe9f194080 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -22,88 +22,8 @@
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include "gcov.h" 23#include "gcov.h"
24 24
25static int gcov_events_enabled; 25int gcov_events_enabled;
26static DEFINE_MUTEX(gcov_lock); 26DEFINE_MUTEX(gcov_lock);
27
28/*
29 * __gcov_init is called by gcc-generated constructor code for each object
30 * file compiled with -fprofile-arcs.
31 */
32void __gcov_init(struct gcov_info *info)
33{
34 static unsigned int gcov_version;
35
36 mutex_lock(&gcov_lock);
37 if (gcov_version == 0) {
38 gcov_version = gcov_info_version(info);
39 /*
40 * Printing gcc's version magic may prove useful for debugging
41 * incompatibility reports.
42 */
43 pr_info("version magic: 0x%x\n", gcov_version);
44 }
45 /*
46 * Add new profiling data structure to list and inform event
47 * listener.
48 */
49 gcov_info_link(info);
50 if (gcov_events_enabled)
51 gcov_event(GCOV_ADD, info);
52 mutex_unlock(&gcov_lock);
53}
54EXPORT_SYMBOL(__gcov_init);
55
56/*
57 * These functions may be referenced by gcc-generated profiling code but serve
58 * no function for kernel profiling.
59 */
60void __gcov_flush(void)
61{
62 /* Unused. */
63}
64EXPORT_SYMBOL(__gcov_flush);
65
66void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
67{
68 /* Unused. */
69}
70EXPORT_SYMBOL(__gcov_merge_add);
71
72void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
73{
74 /* Unused. */
75}
76EXPORT_SYMBOL(__gcov_merge_single);
77
78void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
79{
80 /* Unused. */
81}
82EXPORT_SYMBOL(__gcov_merge_delta);
83
84void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters)
85{
86 /* Unused. */
87}
88EXPORT_SYMBOL(__gcov_merge_ior);
89
90void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
91{
92 /* Unused. */
93}
94EXPORT_SYMBOL(__gcov_merge_time_profile);
95
96void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
97{
98 /* Unused. */
99}
100EXPORT_SYMBOL(__gcov_merge_icall_topn);
101
102void __gcov_exit(void)
103{
104 /* Unused. */
105}
106EXPORT_SYMBOL(__gcov_exit);
107 27
108/** 28/**
109 * gcov_enable_events - enable event reporting through gcov_event() 29 * gcov_enable_events - enable event reporting through gcov_event()
@@ -144,7 +64,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
144 64
145 /* Remove entries located in module from linked list. */ 65 /* Remove entries located in module from linked list. */
146 while ((info = gcov_info_next(info))) { 66 while ((info = gcov_info_next(info))) {
147 if (within_module((unsigned long)info, mod)) { 67 if (gcov_info_within_module(info, mod)) {
148 gcov_info_unlink(prev, info); 68 gcov_info_unlink(prev, info);
149 if (gcov_events_enabled) 69 if (gcov_events_enabled)
150 gcov_event(GCOV_REMOVE, info); 70 gcov_event(GCOV_REMOVE, info);
diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c
new file mode 100644
index 000000000000..c94b820a1b62
--- /dev/null
+++ b/kernel/gcov/clang.c
@@ -0,0 +1,581 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2019 Google, Inc.
4 * modified from kernel/gcov/gcc_4_7.c
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 *
16 * LLVM uses profiling data that's deliberately similar to GCC, but has a
17 * very different way of exporting that data. LLVM calls llvm_gcov_init() once
18 * per module, and provides a couple of callbacks that we can use to ask for
19 * more data.
20 *
21 * We care about the "writeout" callback, which in turn calls back into
22 * compiler-rt/this module to dump all the gathered coverage data to disk:
23 *
24 * llvm_gcda_start_file()
25 * llvm_gcda_emit_function()
26 * llvm_gcda_emit_arcs()
27 * llvm_gcda_emit_function()
28 * llvm_gcda_emit_arcs()
29 * [... repeats for each function ...]
30 * llvm_gcda_summary_info()
31 * llvm_gcda_end_file()
32 *
33 * This design is much more stateless and unstructured than gcc's, and is
34 * intended to run at process exit. This forces us to keep some local state
35 * about which module we're dealing with at the moment. On the other hand, it
36 * also means we don't depend as much on how LLVM represents profiling data
37 * internally.
38 *
39 * See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more
40 * details on how this works, particularly GCOVProfiler::emitProfileArcs(),
41 * GCOVProfiler::insertCounterWriteout(), and
42 * GCOVProfiler::insertFlush().
43 */
44
45#define pr_fmt(fmt) "gcov: " fmt
46
47#include <linux/kernel.h>
48#include <linux/list.h>
49#include <linux/printk.h>
50#include <linux/ratelimit.h>
51#include <linux/seq_file.h>
52#include <linux/slab.h>
53#include <linux/vmalloc.h>
54#include "gcov.h"
55
56typedef void (*llvm_gcov_callback)(void);
57
58struct gcov_info {
59 struct list_head head;
60
61 const char *filename;
62 unsigned int version;
63 u32 checksum;
64
65 struct list_head functions;
66};
67
68struct gcov_fn_info {
69 struct list_head head;
70
71 u32 ident;
72 u32 checksum;
73 u8 use_extra_checksum;
74 u32 cfg_checksum;
75
76 u32 num_counters;
77 u64 *counters;
78 const char *function_name;
79};
80
81static struct gcov_info *current_info;
82
83static LIST_HEAD(clang_gcov_list);
84
85void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush)
86{
87 struct gcov_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
88
89 if (!info)
90 return;
91
92 INIT_LIST_HEAD(&info->head);
93 INIT_LIST_HEAD(&info->functions);
94
95 mutex_lock(&gcov_lock);
96
97 list_add_tail(&info->head, &clang_gcov_list);
98 current_info = info;
99 writeout();
100 current_info = NULL;
101 if (gcov_events_enabled)
102 gcov_event(GCOV_ADD, info);
103
104 mutex_unlock(&gcov_lock);
105}
106EXPORT_SYMBOL(llvm_gcov_init);
107
108void llvm_gcda_start_file(const char *orig_filename, const char version[4],
109 u32 checksum)
110{
111 current_info->filename = orig_filename;
112 memcpy(&current_info->version, version, sizeof(current_info->version));
113 current_info->checksum = checksum;
114}
115EXPORT_SYMBOL(llvm_gcda_start_file);
116
117void llvm_gcda_emit_function(u32 ident, const char *function_name,
118 u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum)
119{
120 struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
121
122 if (!info)
123 return;
124
125 INIT_LIST_HEAD(&info->head);
126 info->ident = ident;
127 info->checksum = func_checksum;
128 info->use_extra_checksum = use_extra_checksum;
129 info->cfg_checksum = cfg_checksum;
130 if (function_name)
131 info->function_name = kstrdup(function_name, GFP_KERNEL);
132
133 list_add_tail(&info->head, &current_info->functions);
134}
135EXPORT_SYMBOL(llvm_gcda_emit_function);
136
137void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters)
138{
139 struct gcov_fn_info *info = list_last_entry(&current_info->functions,
140 struct gcov_fn_info, head);
141
142 info->num_counters = num_counters;
143 info->counters = counters;
144}
145EXPORT_SYMBOL(llvm_gcda_emit_arcs);
146
147void llvm_gcda_summary_info(void)
148{
149}
150EXPORT_SYMBOL(llvm_gcda_summary_info);
151
152void llvm_gcda_end_file(void)
153{
154}
155EXPORT_SYMBOL(llvm_gcda_end_file);
156
157/**
158 * gcov_info_filename - return info filename
159 * @info: profiling data set
160 */
161const char *gcov_info_filename(struct gcov_info *info)
162{
163 return info->filename;
164}
165
166/**
167 * gcov_info_version - return info version
168 * @info: profiling data set
169 */
170unsigned int gcov_info_version(struct gcov_info *info)
171{
172 return info->version;
173}
174
175/**
176 * gcov_info_next - return next profiling data set
177 * @info: profiling data set
178 *
179 * Returns next gcov_info following @info or first gcov_info in the chain if
180 * @info is %NULL.
181 */
182struct gcov_info *gcov_info_next(struct gcov_info *info)
183{
184 if (!info)
185 return list_first_entry_or_null(&clang_gcov_list,
186 struct gcov_info, head);
187 if (list_is_last(&info->head, &clang_gcov_list))
188 return NULL;
189 return list_next_entry(info, head);
190}
191
192/**
193 * gcov_info_link - link/add profiling data set to the list
194 * @info: profiling data set
195 */
196void gcov_info_link(struct gcov_info *info)
197{
198 list_add_tail(&info->head, &clang_gcov_list);
199}
200
201/**
202 * gcov_info_unlink - unlink/remove profiling data set from the list
203 * @prev: previous profiling data set
204 * @info: profiling data set
205 */
206void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info)
207{
208 /* Generic code unlinks while iterating. */
209 __list_del_entry(&info->head);
210}
211
212/**
213 * gcov_info_within_module - check if a profiling data set belongs to a module
214 * @info: profiling data set
215 * @mod: module
216 *
217 * Returns true if profiling data belongs module, false otherwise.
218 */
219bool gcov_info_within_module(struct gcov_info *info, struct module *mod)
220{
221 return within_module((unsigned long)info->filename, mod);
222}
223
224/* Symbolic links to be created for each profiling data file. */
225const struct gcov_link gcov_link[] = {
226 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */
227 { 0, NULL},
228};
229
230/**
231 * gcov_info_reset - reset profiling data to zero
232 * @info: profiling data set
233 */
234void gcov_info_reset(struct gcov_info *info)
235{
236 struct gcov_fn_info *fn;
237
238 list_for_each_entry(fn, &info->functions, head)
239 memset(fn->counters, 0,
240 sizeof(fn->counters[0]) * fn->num_counters);
241}
242
243/**
244 * gcov_info_is_compatible - check if profiling data can be added
245 * @info1: first profiling data set
246 * @info2: second profiling data set
247 *
248 * Returns non-zero if profiling data can be added, zero otherwise.
249 */
250int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2)
251{
252 struct gcov_fn_info *fn_ptr1 = list_first_entry_or_null(
253 &info1->functions, struct gcov_fn_info, head);
254 struct gcov_fn_info *fn_ptr2 = list_first_entry_or_null(
255 &info2->functions, struct gcov_fn_info, head);
256
257 if (info1->checksum != info2->checksum)
258 return false;
259 if (!fn_ptr1)
260 return fn_ptr1 == fn_ptr2;
261 while (!list_is_last(&fn_ptr1->head, &info1->functions) &&
262 !list_is_last(&fn_ptr2->head, &info2->functions)) {
263 if (fn_ptr1->checksum != fn_ptr2->checksum)
264 return false;
265 if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum)
266 return false;
267 if (fn_ptr1->use_extra_checksum &&
268 fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum)
269 return false;
270 fn_ptr1 = list_next_entry(fn_ptr1, head);
271 fn_ptr2 = list_next_entry(fn_ptr2, head);
272 }
273 return list_is_last(&fn_ptr1->head, &info1->functions) &&
274 list_is_last(&fn_ptr2->head, &info2->functions);
275}
276
277/**
278 * gcov_info_add - add up profiling data
279 * @dest: profiling data set to which data is added
280 * @source: profiling data set which is added
281 *
282 * Adds profiling counts of @source to @dest.
283 */
284void gcov_info_add(struct gcov_info *dst, struct gcov_info *src)
285{
286 struct gcov_fn_info *dfn_ptr;
287 struct gcov_fn_info *sfn_ptr = list_first_entry_or_null(&src->functions,
288 struct gcov_fn_info, head);
289
290 list_for_each_entry(dfn_ptr, &dst->functions, head) {
291 u32 i;
292
293 for (i = 0; i < sfn_ptr->num_counters; i++)
294 dfn_ptr->counters[i] += sfn_ptr->counters[i];
295 }
296}
297
298static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn)
299{
300 size_t cv_size; /* counter values size */
301 struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn),
302 GFP_KERNEL);
303 if (!fn_dup)
304 return NULL;
305 INIT_LIST_HEAD(&fn_dup->head);
306
307 fn_dup->function_name = kstrdup(fn->function_name, GFP_KERNEL);
308 if (!fn_dup->function_name)
309 goto err_name;
310
311 cv_size = fn->num_counters * sizeof(fn->counters[0]);
312 fn_dup->counters = vmalloc(cv_size);
313 if (!fn_dup->counters)
314 goto err_counters;
315 memcpy(fn_dup->counters, fn->counters, cv_size);
316
317 return fn_dup;
318
319err_counters:
320 kfree(fn_dup->function_name);
321err_name:
322 kfree(fn_dup);
323 return NULL;
324}
325
326/**
327 * gcov_info_dup - duplicate profiling data set
328 * @info: profiling data set to duplicate
329 *
330 * Return newly allocated duplicate on success, %NULL on error.
331 */
332struct gcov_info *gcov_info_dup(struct gcov_info *info)
333{
334 struct gcov_info *dup;
335 struct gcov_fn_info *fn;
336
337 dup = kmemdup(info, sizeof(*dup), GFP_KERNEL);
338 if (!dup)
339 return NULL;
340 INIT_LIST_HEAD(&dup->head);
341 INIT_LIST_HEAD(&dup->functions);
342 dup->filename = kstrdup(info->filename, GFP_KERNEL);
343 if (!dup->filename)
344 goto err;
345
346 list_for_each_entry(fn, &info->functions, head) {
347 struct gcov_fn_info *fn_dup = gcov_fn_info_dup(fn);
348
349 if (!fn_dup)
350 goto err;
351 list_add_tail(&fn_dup->head, &dup->functions);
352 }
353
354 return dup;
355
356err:
357 gcov_info_free(dup);
358 return NULL;
359}
360
361/**
362 * gcov_info_free - release memory for profiling data set duplicate
363 * @info: profiling data set duplicate to free
364 */
365void gcov_info_free(struct gcov_info *info)
366{
367 struct gcov_fn_info *fn, *tmp;
368
369 list_for_each_entry_safe(fn, tmp, &info->functions, head) {
370 kfree(fn->function_name);
371 vfree(fn->counters);
372 list_del(&fn->head);
373 kfree(fn);
374 }
375 kfree(info->filename);
376 kfree(info);
377}
378
379#define ITER_STRIDE PAGE_SIZE
380
381/**
382 * struct gcov_iterator - specifies current file position in logical records
383 * @info: associated profiling data
384 * @buffer: buffer containing file data
385 * @size: size of buffer
386 * @pos: current position in file
387 */
388struct gcov_iterator {
389 struct gcov_info *info;
390 void *buffer;
391 size_t size;
392 loff_t pos;
393};
394
395/**
396 * store_gcov_u32 - store 32 bit number in gcov format to buffer
397 * @buffer: target buffer or NULL
398 * @off: offset into the buffer
399 * @v: value to be stored
400 *
401 * Number format defined by gcc: numbers are recorded in the 32 bit
402 * unsigned binary form of the endianness of the machine generating the
403 * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't
404 * store anything.
405 */
406static size_t store_gcov_u32(void *buffer, size_t off, u32 v)
407{
408 u32 *data;
409
410 if (buffer) {
411 data = buffer + off;
412 *data = v;
413 }
414
415 return sizeof(*data);
416}
417
418/**
419 * store_gcov_u64 - store 64 bit number in gcov format to buffer
420 * @buffer: target buffer or NULL
421 * @off: offset into the buffer
422 * @v: value to be stored
423 *
424 * Number format defined by gcc: numbers are recorded in the 32 bit
425 * unsigned binary form of the endianness of the machine generating the
426 * file. 64 bit numbers are stored as two 32 bit numbers, the low part
427 * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store
428 * anything.
429 */
430static size_t store_gcov_u64(void *buffer, size_t off, u64 v)
431{
432 u32 *data;
433
434 if (buffer) {
435 data = buffer + off;
436
437 data[0] = (v & 0xffffffffUL);
438 data[1] = (v >> 32);
439 }
440
441 return sizeof(*data) * 2;
442}
443
444/**
445 * convert_to_gcda - convert profiling data set to gcda file format
446 * @buffer: the buffer to store file data or %NULL if no data should be stored
447 * @info: profiling data set to be converted
448 *
449 * Returns the number of bytes that were/would have been stored into the buffer.
450 */
451static size_t convert_to_gcda(char *buffer, struct gcov_info *info)
452{
453 struct gcov_fn_info *fi_ptr;
454 size_t pos = 0;
455
456 /* File header. */
457 pos += store_gcov_u32(buffer, pos, GCOV_DATA_MAGIC);
458 pos += store_gcov_u32(buffer, pos, info->version);
459 pos += store_gcov_u32(buffer, pos, info->checksum);
460
461 list_for_each_entry(fi_ptr, &info->functions, head) {
462 u32 i;
463 u32 len = 2;
464
465 if (fi_ptr->use_extra_checksum)
466 len++;
467
468 pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION);
469 pos += store_gcov_u32(buffer, pos, len);
470 pos += store_gcov_u32(buffer, pos, fi_ptr->ident);
471 pos += store_gcov_u32(buffer, pos, fi_ptr->checksum);
472 if (fi_ptr->use_extra_checksum)
473 pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum);
474
475 pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE);
476 pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2);
477 for (i = 0; i < fi_ptr->num_counters; i++)
478 pos += store_gcov_u64(buffer, pos, fi_ptr->counters[i]);
479 }
480
481 return pos;
482}
483
484/**
485 * gcov_iter_new - allocate and initialize profiling data iterator
486 * @info: profiling data set to be iterated
487 *
488 * Return file iterator on success, %NULL otherwise.
489 */
490struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
491{
492 struct gcov_iterator *iter;
493
494 iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL);
495 if (!iter)
496 goto err_free;
497
498 iter->info = info;
499 /* Dry-run to get the actual buffer size. */
500 iter->size = convert_to_gcda(NULL, info);
501 iter->buffer = vmalloc(iter->size);
502 if (!iter->buffer)
503 goto err_free;
504
505 convert_to_gcda(iter->buffer, info);
506
507 return iter;
508
509err_free:
510 kfree(iter);
511 return NULL;
512}
513
514
515/**
516 * gcov_iter_get_info - return profiling data set for given file iterator
517 * @iter: file iterator
518 */
519void gcov_iter_free(struct gcov_iterator *iter)
520{
521 vfree(iter->buffer);
522 kfree(iter);
523}
524
525/**
526 * gcov_iter_get_info - return profiling data set for given file iterator
527 * @iter: file iterator
528 */
529struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
530{
531 return iter->info;
532}
533
534/**
535 * gcov_iter_start - reset file iterator to starting position
536 * @iter: file iterator
537 */
538void gcov_iter_start(struct gcov_iterator *iter)
539{
540 iter->pos = 0;
541}
542
543/**
544 * gcov_iter_next - advance file iterator to next logical record
545 * @iter: file iterator
546 *
547 * Return zero if new position is valid, non-zero if iterator has reached end.
548 */
549int gcov_iter_next(struct gcov_iterator *iter)
550{
551 if (iter->pos < iter->size)
552 iter->pos += ITER_STRIDE;
553
554 if (iter->pos >= iter->size)
555 return -EINVAL;
556
557 return 0;
558}
559
560/**
561 * gcov_iter_write - write data for current pos to seq_file
562 * @iter: file iterator
563 * @seq: seq_file handle
564 *
565 * Return zero on success, non-zero otherwise.
566 */
567int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
568{
569 size_t len;
570
571 if (iter->pos >= iter->size)
572 return -EINVAL;
573
574 len = ITER_STRIDE;
575 if (iter->pos + len > iter->size)
576 len = iter->size - iter->pos;
577
578 seq_write(seq, iter->buffer + iter->pos, len);
579
580 return 0;
581}
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
index 2dddecbdbe6e..801ee4b0b969 100644
--- a/kernel/gcov/gcc_3_4.c
+++ b/kernel/gcov/gcc_3_4.c
@@ -137,6 +137,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info)
137 gcov_info_head = info->next; 137 gcov_info_head = info->next;
138} 138}
139 139
140/**
141 * gcov_info_within_module - check if a profiling data set belongs to a module
142 * @info: profiling data set
143 * @mod: module
144 *
145 * Returns true if profiling data belongs module, false otherwise.
146 */
147bool gcov_info_within_module(struct gcov_info *info, struct module *mod)
148{
149 return within_module((unsigned long)info, mod);
150}
151
140/* Symbolic links to be created for each profiling data file. */ 152/* Symbolic links to be created for each profiling data file. */
141const struct gcov_link gcov_link[] = { 153const struct gcov_link gcov_link[] = {
142 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ 154 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index ca5e5c0ef853..ec37563674d6 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -150,6 +150,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info)
150 gcov_info_head = info->next; 150 gcov_info_head = info->next;
151} 151}
152 152
153/**
154 * gcov_info_within_module - check if a profiling data set belongs to a module
155 * @info: profiling data set
156 * @mod: module
157 *
158 * Returns true if profiling data belongs module, false otherwise.
159 */
160bool gcov_info_within_module(struct gcov_info *info, struct module *mod)
161{
162 return within_module((unsigned long)info, mod);
163}
164
153/* Symbolic links to be created for each profiling data file. */ 165/* Symbolic links to be created for each profiling data file. */
154const struct gcov_link gcov_link[] = { 166const struct gcov_link gcov_link[] = {
155 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ 167 { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */
diff --git a/kernel/gcov/gcc_base.c b/kernel/gcov/gcc_base.c
new file mode 100644
index 000000000000..3cf736b9f880
--- /dev/null
+++ b/kernel/gcov/gcc_base.c
@@ -0,0 +1,86 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/export.h>
4#include <linux/kernel.h>
5#include <linux/mutex.h>
6#include "gcov.h"
7
8/*
9 * __gcov_init is called by gcc-generated constructor code for each object
10 * file compiled with -fprofile-arcs.
11 */
12void __gcov_init(struct gcov_info *info)
13{
14 static unsigned int gcov_version;
15
16 mutex_lock(&gcov_lock);
17 if (gcov_version == 0) {
18 gcov_version = gcov_info_version(info);
19 /*
20 * Printing gcc's version magic may prove useful for debugging
21 * incompatibility reports.
22 */
23 pr_info("version magic: 0x%x\n", gcov_version);
24 }
25 /*
26 * Add new profiling data structure to list and inform event
27 * listener.
28 */
29 gcov_info_link(info);
30 if (gcov_events_enabled)
31 gcov_event(GCOV_ADD, info);
32 mutex_unlock(&gcov_lock);
33}
34EXPORT_SYMBOL(__gcov_init);
35
36/*
37 * These functions may be referenced by gcc-generated profiling code but serve
38 * no function for kernel profiling.
39 */
40void __gcov_flush(void)
41{
42 /* Unused. */
43}
44EXPORT_SYMBOL(__gcov_flush);
45
46void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
47{
48 /* Unused. */
49}
50EXPORT_SYMBOL(__gcov_merge_add);
51
52void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
53{
54 /* Unused. */
55}
56EXPORT_SYMBOL(__gcov_merge_single);
57
58void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
59{
60 /* Unused. */
61}
62EXPORT_SYMBOL(__gcov_merge_delta);
63
64void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters)
65{
66 /* Unused. */
67}
68EXPORT_SYMBOL(__gcov_merge_ior);
69
70void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
71{
72 /* Unused. */
73}
74EXPORT_SYMBOL(__gcov_merge_time_profile);
75
76void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
77{
78 /* Unused. */
79}
80EXPORT_SYMBOL(__gcov_merge_icall_topn);
81
82void __gcov_exit(void)
83{
84 /* Unused. */
85}
86EXPORT_SYMBOL(__gcov_exit);
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
index de118ad4a024..6ab2c1808c9d 100644
--- a/kernel/gcov/gcov.h
+++ b/kernel/gcov/gcov.h
@@ -15,6 +15,7 @@
15#ifndef GCOV_H 15#ifndef GCOV_H
16#define GCOV_H GCOV_H 16#define GCOV_H GCOV_H
17 17
18#include <linux/module.h>
18#include <linux/types.h> 19#include <linux/types.h>
19 20
20/* 21/*
@@ -46,6 +47,7 @@ unsigned int gcov_info_version(struct gcov_info *info);
46struct gcov_info *gcov_info_next(struct gcov_info *info); 47struct gcov_info *gcov_info_next(struct gcov_info *info);
47void gcov_info_link(struct gcov_info *info); 48void gcov_info_link(struct gcov_info *info);
48void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info); 49void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info);
50bool gcov_info_within_module(struct gcov_info *info, struct module *mod);
49 51
50/* Base interface. */ 52/* Base interface. */
51enum gcov_action { 53enum gcov_action {
@@ -83,4 +85,7 @@ struct gcov_link {
83}; 85};
84extern const struct gcov_link gcov_link[]; 86extern const struct gcov_link gcov_link[];
85 87
88extern int gcov_events_enabled;
89extern struct mutex gcov_lock;
90
86#endif /* GCOV_H */ 91#endif /* GCOV_H */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5942eeafb9ac..be4e8795561a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -11,6 +11,7 @@
11#include <linux/kthread.h> 11#include <linux/kthread.h>
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/cgroup.h>
14#include <linux/cpuset.h> 15#include <linux/cpuset.h>
15#include <linux/unistd.h> 16#include <linux/unistd.h>
16#include <linux/file.h> 17#include <linux/file.h>
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 99a5b5f46dc5..871734ea2f04 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -67,13 +67,10 @@ static struct latency_record latency_record[MAXLR];
67 67
68int latencytop_enabled; 68int latencytop_enabled;
69 69
70void clear_all_latency_tracing(struct task_struct *p) 70void clear_tsk_latency_tracing(struct task_struct *p)
71{ 71{
72 unsigned long flags; 72 unsigned long flags;
73 73
74 if (!latencytop_enabled)
75 return;
76
77 raw_spin_lock_irqsave(&latency_lock, flags); 74 raw_spin_lock_irqsave(&latency_lock, flags);
78 memset(&p->latency_record, 0, sizeof(p->latency_record)); 75 memset(&p->latency_record, 0, sizeof(p->latency_record));
79 p->latency_record_count = 0; 76 p->latency_record_count = 0;
@@ -96,9 +93,6 @@ account_global_scheduler_latency(struct task_struct *tsk,
96 int firstnonnull = MAXLR + 1; 93 int firstnonnull = MAXLR + 1;
97 int i; 94 int i;
98 95
99 if (!latencytop_enabled)
100 return;
101
102 /* skip kernel threads for now */ 96 /* skip kernel threads for now */
103 if (!tsk->mm) 97 if (!tsk->mm)
104 return; 98 return;
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 6196af8a8223..bfc95b3e4235 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -22,6 +22,7 @@ static int notifier_chain_register(struct notifier_block **nl,
22 struct notifier_block *n) 22 struct notifier_block *n)
23{ 23{
24 while ((*nl) != NULL) { 24 while ((*nl) != NULL) {
25 WARN_ONCE(((*nl) == n), "double register detected");
25 if (n->priority > (*nl)->priority) 26 if (n->priority > (*nl)->priority)
26 break; 27 break;
27 nl = &((*nl)->next); 28 nl = &((*nl)->next);
diff --git a/kernel/panic.c b/kernel/panic.c
index c1fcaad337b7..8779d64bace0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -306,6 +306,8 @@ void panic(const char *fmt, ...)
306 * shutting down. But if there is a chance of 306 * shutting down. But if there is a chance of
307 * rebooting the system it will be rebooted. 307 * rebooting the system it will be rebooted.
308 */ 308 */
309 if (panic_reboot_mode != REBOOT_UNDEFINED)
310 reboot_mode = panic_reboot_mode;
309 emergency_restart(); 311 emergency_restart();
310 } 312 }
311#ifdef __sparc__ 313#ifdef __sparc__
@@ -321,6 +323,9 @@ void panic(const char *fmt, ...)
321 disabled_wait(); 323 disabled_wait();
322#endif 324#endif
323 pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); 325 pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
326
327 /* Do not scroll important messages printed above */
328 suppress_printk = 1;
324 local_irq_enable(); 329 local_irq_enable();
325 for (i = 0; ; i += PANIC_TIMER_STEP) { 330 for (i = 0; ; i += PANIC_TIMER_STEP) {
326 touch_softlockup_watchdog(); 331 touch_softlockup_watchdog();
diff --git a/kernel/pid.c b/kernel/pid.c
index 20881598bdfa..89548d35eefb 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -32,7 +32,6 @@
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/rculist.h> 33#include <linux/rculist.h>
34#include <linux/memblock.h> 34#include <linux/memblock.h>
35#include <linux/hash.h>
36#include <linux/pid_namespace.h> 35#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 36#include <linux/init_task.h>
38#include <linux/syscalls.h> 37#include <linux/syscalls.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02ca827b8fac..17102fd4c136 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -86,6 +86,12 @@ static DEFINE_SEMAPHORE(console_sem);
86struct console *console_drivers; 86struct console *console_drivers;
87EXPORT_SYMBOL_GPL(console_drivers); 87EXPORT_SYMBOL_GPL(console_drivers);
88 88
89/*
90 * System may need to suppress printk message under certain
91 * circumstances, like after kernel panic happens.
92 */
93int __read_mostly suppress_printk;
94
89#ifdef CONFIG_LOCKDEP 95#ifdef CONFIG_LOCKDEP
90static struct lockdep_map console_lock_dep_map = { 96static struct lockdep_map console_lock_dep_map = {
91 .name = "console_lock" 97 .name = "console_lock"
@@ -1943,6 +1949,10 @@ asmlinkage int vprintk_emit(int facility, int level,
1943 unsigned long flags; 1949 unsigned long flags;
1944 u64 curr_log_seq; 1950 u64 curr_log_seq;
1945 1951
1952 /* Suppress unimportant messages after panic happens */
1953 if (unlikely(suppress_printk))
1954 return 0;
1955
1946 if (level == LOGLEVEL_SCHED) { 1956 if (level == LOGLEVEL_SCHED) {
1947 level = LOGLEVEL_DEFAULT; 1957 level = LOGLEVEL_DEFAULT;
1948 in_sched = true; 1958 in_sched = true;
diff --git a/kernel/reboot.c b/kernel/reboot.c
index e1b79b6a2735..b9e79e8c7226 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(cad_pid);
31#define DEFAULT_REBOOT_MODE 31#define DEFAULT_REBOOT_MODE
32#endif 32#endif
33enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; 33enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
34enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED;
34 35
35/* 36/*
36 * This variable is used privately to keep track of whether or not 37 * This variable is used privately to keep track of whether or not
@@ -519,6 +520,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot);
519static int __init reboot_setup(char *str) 520static int __init reboot_setup(char *str)
520{ 521{
521 for (;;) { 522 for (;;) {
523 enum reboot_mode *mode;
524
522 /* 525 /*
523 * Having anything passed on the command line via 526 * Having anything passed on the command line via
524 * reboot= will cause us to disable DMI checking 527 * reboot= will cause us to disable DMI checking
@@ -526,17 +529,24 @@ static int __init reboot_setup(char *str)
526 */ 529 */
527 reboot_default = 0; 530 reboot_default = 0;
528 531
532 if (!strncmp(str, "panic_", 6)) {
533 mode = &panic_reboot_mode;
534 str += 6;
535 } else {
536 mode = &reboot_mode;
537 }
538
529 switch (*str) { 539 switch (*str) {
530 case 'w': 540 case 'w':
531 reboot_mode = REBOOT_WARM; 541 *mode = REBOOT_WARM;
532 break; 542 break;
533 543
534 case 'c': 544 case 'c':
535 reboot_mode = REBOOT_COLD; 545 *mode = REBOOT_COLD;
536 break; 546 break;
537 547
538 case 'h': 548 case 'h':
539 reboot_mode = REBOOT_HARD; 549 *mode = REBOOT_HARD;
540 break; 550 break;
541 551
542 case 's': 552 case 's':
@@ -553,11 +563,11 @@ static int __init reboot_setup(char *str)
553 if (rc) 563 if (rc)
554 return rc; 564 return rc;
555 } else 565 } else
556 reboot_mode = REBOOT_SOFT; 566 *mode = REBOOT_SOFT;
557 break; 567 break;
558 } 568 }
559 case 'g': 569 case 'g':
560 reboot_mode = REBOOT_GPIO; 570 *mode = REBOOT_GPIO;
561 break; 571 break;
562 572
563 case 'b': 573 case 'b':
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 0e97ca9306ef..7acc632c3b82 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -4,6 +4,9 @@
4 * Copyright (c) 2018 Facebook, Inc. 4 * Copyright (c) 2018 Facebook, Inc.
5 * Author: Johannes Weiner <hannes@cmpxchg.org> 5 * Author: Johannes Weiner <hannes@cmpxchg.org>
6 * 6 *
7 * Polling support by Suren Baghdasaryan <surenb@google.com>
8 * Copyright (c) 2018 Google, Inc.
9 *
7 * When CPU, memory and IO are contended, tasks experience delays that 10 * When CPU, memory and IO are contended, tasks experience delays that
8 * reduce throughput and introduce latencies into the workload. Memory 11 * reduce throughput and introduce latencies into the workload. Memory
9 * and IO contention, in addition, can cause a full loss of forward 12 * and IO contention, in addition, can cause a full loss of forward
@@ -129,9 +132,13 @@
129#include <linux/seq_file.h> 132#include <linux/seq_file.h>
130#include <linux/proc_fs.h> 133#include <linux/proc_fs.h>
131#include <linux/seqlock.h> 134#include <linux/seqlock.h>
135#include <linux/uaccess.h>
132#include <linux/cgroup.h> 136#include <linux/cgroup.h>
133#include <linux/module.h> 137#include <linux/module.h>
134#include <linux/sched.h> 138#include <linux/sched.h>
139#include <linux/ctype.h>
140#include <linux/file.h>
141#include <linux/poll.h>
135#include <linux/psi.h> 142#include <linux/psi.h>
136#include "sched.h" 143#include "sched.h"
137 144
@@ -140,9 +147,9 @@ static int psi_bug __read_mostly;
140DEFINE_STATIC_KEY_FALSE(psi_disabled); 147DEFINE_STATIC_KEY_FALSE(psi_disabled);
141 148
142#ifdef CONFIG_PSI_DEFAULT_DISABLED 149#ifdef CONFIG_PSI_DEFAULT_DISABLED
143bool psi_enable; 150static bool psi_enable;
144#else 151#else
145bool psi_enable = true; 152static bool psi_enable = true;
146#endif 153#endif
147static int __init setup_psi(char *str) 154static int __init setup_psi(char *str)
148{ 155{
@@ -156,16 +163,21 @@ __setup("psi=", setup_psi);
156#define EXP_60s 1981 /* 1/exp(2s/60s) */ 163#define EXP_60s 1981 /* 1/exp(2s/60s) */
157#define EXP_300s 2034 /* 1/exp(2s/300s) */ 164#define EXP_300s 2034 /* 1/exp(2s/300s) */
158 165
166/* PSI trigger definitions */
167#define WINDOW_MIN_US 500000 /* Min window size is 500ms */
168#define WINDOW_MAX_US 10000000 /* Max window size is 10s */
169#define UPDATES_PER_WINDOW 10 /* 10 updates per window */
170
159/* Sampling frequency in nanoseconds */ 171/* Sampling frequency in nanoseconds */
160static u64 psi_period __read_mostly; 172static u64 psi_period __read_mostly;
161 173
162/* System-level pressure and stall tracking */ 174/* System-level pressure and stall tracking */
163static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu); 175static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu);
164static struct psi_group psi_system = { 176struct psi_group psi_system = {
165 .pcpu = &system_group_pcpu, 177 .pcpu = &system_group_pcpu,
166}; 178};
167 179
168static void psi_update_work(struct work_struct *work); 180static void psi_avgs_work(struct work_struct *work);
169 181
170static void group_init(struct psi_group *group) 182static void group_init(struct psi_group *group)
171{ 183{
@@ -173,9 +185,20 @@ static void group_init(struct psi_group *group)
173 185
174 for_each_possible_cpu(cpu) 186 for_each_possible_cpu(cpu)
175 seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); 187 seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
176 group->next_update = sched_clock() + psi_period; 188 group->avg_next_update = sched_clock() + psi_period;
177 INIT_DELAYED_WORK(&group->clock_work, psi_update_work); 189 INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
178 mutex_init(&group->stat_lock); 190 mutex_init(&group->avgs_lock);
191 /* Init trigger-related members */
192 atomic_set(&group->poll_scheduled, 0);
193 mutex_init(&group->trigger_lock);
194 INIT_LIST_HEAD(&group->triggers);
195 memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
196 group->poll_states = 0;
197 group->poll_min_period = U32_MAX;
198 memset(group->polling_total, 0, sizeof(group->polling_total));
199 group->polling_next_update = ULLONG_MAX;
200 group->polling_until = 0;
201 rcu_assign_pointer(group->poll_kworker, NULL);
179} 202}
180 203
181void __init psi_init(void) 204void __init psi_init(void)
@@ -210,20 +233,24 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
210 } 233 }
211} 234}
212 235
213static void get_recent_times(struct psi_group *group, int cpu, u32 *times) 236static void get_recent_times(struct psi_group *group, int cpu,
237 enum psi_aggregators aggregator, u32 *times,
238 u32 *pchanged_states)
214{ 239{
215 struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); 240 struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
216 unsigned int tasks[NR_PSI_TASK_COUNTS];
217 u64 now, state_start; 241 u64 now, state_start;
242 enum psi_states s;
218 unsigned int seq; 243 unsigned int seq;
219 int s; 244 u32 state_mask;
245
246 *pchanged_states = 0;
220 247
221 /* Snapshot a coherent view of the CPU state */ 248 /* Snapshot a coherent view of the CPU state */
222 do { 249 do {
223 seq = read_seqcount_begin(&groupc->seq); 250 seq = read_seqcount_begin(&groupc->seq);
224 now = cpu_clock(cpu); 251 now = cpu_clock(cpu);
225 memcpy(times, groupc->times, sizeof(groupc->times)); 252 memcpy(times, groupc->times, sizeof(groupc->times));
226 memcpy(tasks, groupc->tasks, sizeof(groupc->tasks)); 253 state_mask = groupc->state_mask;
227 state_start = groupc->state_start; 254 state_start = groupc->state_start;
228 } while (read_seqcount_retry(&groupc->seq, seq)); 255 } while (read_seqcount_retry(&groupc->seq, seq));
229 256
@@ -239,13 +266,15 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
239 * (u32) and our reported pressure close to what's 266 * (u32) and our reported pressure close to what's
240 * actually happening. 267 * actually happening.
241 */ 268 */
242 if (test_state(tasks, s)) 269 if (state_mask & (1 << s))
243 times[s] += now - state_start; 270 times[s] += now - state_start;
244 271
245 delta = times[s] - groupc->times_prev[s]; 272 delta = times[s] - groupc->times_prev[aggregator][s];
246 groupc->times_prev[s] = times[s]; 273 groupc->times_prev[aggregator][s] = times[s];
247 274
248 times[s] = delta; 275 times[s] = delta;
276 if (delta)
277 *pchanged_states |= (1 << s);
249 } 278 }
250} 279}
251 280
@@ -269,17 +298,16 @@ static void calc_avgs(unsigned long avg[3], int missed_periods,
269 avg[2] = calc_load(avg[2], EXP_300s, pct); 298 avg[2] = calc_load(avg[2], EXP_300s, pct);
270} 299}
271 300
272static bool update_stats(struct psi_group *group) 301static void collect_percpu_times(struct psi_group *group,
302 enum psi_aggregators aggregator,
303 u32 *pchanged_states)
273{ 304{
274 u64 deltas[NR_PSI_STATES - 1] = { 0, }; 305 u64 deltas[NR_PSI_STATES - 1] = { 0, };
275 unsigned long missed_periods = 0;
276 unsigned long nonidle_total = 0; 306 unsigned long nonidle_total = 0;
277 u64 now, expires, period; 307 u32 changed_states = 0;
278 int cpu; 308 int cpu;
279 int s; 309 int s;
280 310
281 mutex_lock(&group->stat_lock);
282
283 /* 311 /*
284 * Collect the per-cpu time buckets and average them into a 312 * Collect the per-cpu time buckets and average them into a
285 * single time sample that is normalized to wallclock time. 313 * single time sample that is normalized to wallclock time.
@@ -291,8 +319,11 @@ static bool update_stats(struct psi_group *group)
291 for_each_possible_cpu(cpu) { 319 for_each_possible_cpu(cpu) {
292 u32 times[NR_PSI_STATES]; 320 u32 times[NR_PSI_STATES];
293 u32 nonidle; 321 u32 nonidle;
322 u32 cpu_changed_states;
294 323
295 get_recent_times(group, cpu, times); 324 get_recent_times(group, cpu, aggregator, times,
325 &cpu_changed_states);
326 changed_states |= cpu_changed_states;
296 327
297 nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]); 328 nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
298 nonidle_total += nonidle; 329 nonidle_total += nonidle;
@@ -315,13 +346,22 @@ static bool update_stats(struct psi_group *group)
315 346
316 /* total= */ 347 /* total= */
317 for (s = 0; s < NR_PSI_STATES - 1; s++) 348 for (s = 0; s < NR_PSI_STATES - 1; s++)
318 group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL)); 349 group->total[aggregator][s] +=
350 div_u64(deltas[s], max(nonidle_total, 1UL));
351
352 if (pchanged_states)
353 *pchanged_states = changed_states;
354}
355
356static u64 update_averages(struct psi_group *group, u64 now)
357{
358 unsigned long missed_periods = 0;
359 u64 expires, period;
360 u64 avg_next_update;
361 int s;
319 362
320 /* avgX= */ 363 /* avgX= */
321 now = sched_clock(); 364 expires = group->avg_next_update;
322 expires = group->next_update;
323 if (now < expires)
324 goto out;
325 if (now - expires >= psi_period) 365 if (now - expires >= psi_period)
326 missed_periods = div_u64(now - expires, psi_period); 366 missed_periods = div_u64(now - expires, psi_period);
327 367
@@ -332,14 +372,14 @@ static bool update_stats(struct psi_group *group)
332 * But the deltas we sample out of the per-cpu buckets above 372 * But the deltas we sample out of the per-cpu buckets above
333 * are based on the actual time elapsing between clock ticks. 373 * are based on the actual time elapsing between clock ticks.
334 */ 374 */
335 group->next_update = expires + ((1 + missed_periods) * psi_period); 375 avg_next_update = expires + ((1 + missed_periods) * psi_period);
336 period = now - (group->last_update + (missed_periods * psi_period)); 376 period = now - (group->avg_last_update + (missed_periods * psi_period));
337 group->last_update = now; 377 group->avg_last_update = now;
338 378
339 for (s = 0; s < NR_PSI_STATES - 1; s++) { 379 for (s = 0; s < NR_PSI_STATES - 1; s++) {
340 u32 sample; 380 u32 sample;
341 381
342 sample = group->total[s] - group->total_prev[s]; 382 sample = group->total[PSI_AVGS][s] - group->avg_total[s];
343 /* 383 /*
344 * Due to the lockless sampling of the time buckets, 384 * Due to the lockless sampling of the time buckets,
345 * recorded time deltas can slip into the next period, 385 * recorded time deltas can slip into the next period,
@@ -359,23 +399,30 @@ static bool update_stats(struct psi_group *group)
359 */ 399 */
360 if (sample > period) 400 if (sample > period)
361 sample = period; 401 sample = period;
362 group->total_prev[s] += sample; 402 group->avg_total[s] += sample;
363 calc_avgs(group->avg[s], missed_periods, sample, period); 403 calc_avgs(group->avg[s], missed_periods, sample, period);
364 } 404 }
365out: 405
366 mutex_unlock(&group->stat_lock); 406 return avg_next_update;
367 return nonidle_total;
368} 407}
369 408
370static void psi_update_work(struct work_struct *work) 409static void psi_avgs_work(struct work_struct *work)
371{ 410{
372 struct delayed_work *dwork; 411 struct delayed_work *dwork;
373 struct psi_group *group; 412 struct psi_group *group;
413 u32 changed_states;
374 bool nonidle; 414 bool nonidle;
415 u64 now;
375 416
376 dwork = to_delayed_work(work); 417 dwork = to_delayed_work(work);
377 group = container_of(dwork, struct psi_group, clock_work); 418 group = container_of(dwork, struct psi_group, avgs_work);
419
420 mutex_lock(&group->avgs_lock);
378 421
422 now = sched_clock();
423
424 collect_percpu_times(group, PSI_AVGS, &changed_states);
425 nonidle = changed_states & (1 << PSI_NONIDLE);
379 /* 426 /*
380 * If there is task activity, periodically fold the per-cpu 427 * If there is task activity, periodically fold the per-cpu
381 * times and feed samples into the running averages. If things 428 * times and feed samples into the running averages. If things
@@ -383,18 +430,196 @@ static void psi_update_work(struct work_struct *work)
383 * Once restarted, we'll catch up the running averages in one 430 * Once restarted, we'll catch up the running averages in one
384 * go - see calc_avgs() and missed_periods. 431 * go - see calc_avgs() and missed_periods.
385 */ 432 */
386 433 if (now >= group->avg_next_update)
387 nonidle = update_stats(group); 434 group->avg_next_update = update_averages(group, now);
388 435
389 if (nonidle) { 436 if (nonidle) {
390 unsigned long delay = 0; 437 schedule_delayed_work(dwork, nsecs_to_jiffies(
391 u64 now; 438 group->avg_next_update - now) + 1);
439 }
440
441 mutex_unlock(&group->avgs_lock);
442}
443
444/* Trigger tracking window manupulations */
445static void window_reset(struct psi_window *win, u64 now, u64 value,
446 u64 prev_growth)
447{
448 win->start_time = now;
449 win->start_value = value;
450 win->prev_growth = prev_growth;
451}
452
453/*
454 * PSI growth tracking window update and growth calculation routine.
455 *
456 * This approximates a sliding tracking window by interpolating
457 * partially elapsed windows using historical growth data from the
458 * previous intervals. This minimizes memory requirements (by not storing
459 * all the intermediate values in the previous window) and simplifies
460 * the calculations. It works well because PSI signal changes only in
461 * positive direction and over relatively small window sizes the growth
462 * is close to linear.
463 */
464static u64 window_update(struct psi_window *win, u64 now, u64 value)
465{
466 u64 elapsed;
467 u64 growth;
468
469 elapsed = now - win->start_time;
470 growth = value - win->start_value;
471 /*
472 * After each tracking window passes win->start_value and
473 * win->start_time get reset and win->prev_growth stores
474 * the average per-window growth of the previous window.
475 * win->prev_growth is then used to interpolate additional
476 * growth from the previous window assuming it was linear.
477 */
478 if (elapsed > win->size)
479 window_reset(win, now, value, growth);
480 else {
481 u32 remaining;
482
483 remaining = win->size - elapsed;
484 growth += div_u64(win->prev_growth * remaining, win->size);
485 }
486
487 return growth;
488}
489
490static void init_triggers(struct psi_group *group, u64 now)
491{
492 struct psi_trigger *t;
493
494 list_for_each_entry(t, &group->triggers, node)
495 window_reset(&t->win, now,
496 group->total[PSI_POLL][t->state], 0);
497 memcpy(group->polling_total, group->total[PSI_POLL],
498 sizeof(group->polling_total));
499 group->polling_next_update = now + group->poll_min_period;
500}
501
502static u64 update_triggers(struct psi_group *group, u64 now)
503{
504 struct psi_trigger *t;
505 bool new_stall = false;
506 u64 *total = group->total[PSI_POLL];
507
508 /*
509 * On subsequent updates, calculate growth deltas and let
510 * watchers know when their specified thresholds are exceeded.
511 */
512 list_for_each_entry(t, &group->triggers, node) {
513 u64 growth;
514
515 /* Check for stall activity */
516 if (group->polling_total[t->state] == total[t->state])
517 continue;
518
519 /*
520 * Multiple triggers might be looking at the same state,
521 * remember to update group->polling_total[] once we've
522 * been through all of them. Also remember to extend the
523 * polling time if we see new stall activity.
524 */
525 new_stall = true;
526
527 /* Calculate growth since last update */
528 growth = window_update(&t->win, now, total[t->state]);
529 if (growth < t->threshold)
530 continue;
531
532 /* Limit event signaling to once per window */
533 if (now < t->last_event_time + t->win.size)
534 continue;
535
536 /* Generate an event */
537 if (cmpxchg(&t->event, 0, 1) == 0)
538 wake_up_interruptible(&t->event_wait);
539 t->last_event_time = now;
540 }
541
542 if (new_stall)
543 memcpy(group->polling_total, total,
544 sizeof(group->polling_total));
545
546 return now + group->poll_min_period;
547}
548
549/*
550 * Schedule polling if it's not already scheduled. It's safe to call even from
551 * hotpath because even though kthread_queue_delayed_work takes worker->lock
552 * spinlock that spinlock is never contended due to poll_scheduled atomic
553 * preventing such competition.
554 */
555static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
556{
557 struct kthread_worker *kworker;
558
559 /* Do not reschedule if already scheduled */
560 if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0)
561 return;
562
563 rcu_read_lock();
392 564
393 now = sched_clock(); 565 kworker = rcu_dereference(group->poll_kworker);
394 if (group->next_update > now) 566 /*
395 delay = nsecs_to_jiffies(group->next_update - now) + 1; 567 * kworker might be NULL in case psi_trigger_destroy races with
396 schedule_delayed_work(dwork, delay); 568 * psi_task_change (hotpath) which can't use locks
569 */
570 if (likely(kworker))
571 kthread_queue_delayed_work(kworker, &group->poll_work, delay);
572 else
573 atomic_set(&group->poll_scheduled, 0);
574
575 rcu_read_unlock();
576}
577
578static void psi_poll_work(struct kthread_work *work)
579{
580 struct kthread_delayed_work *dwork;
581 struct psi_group *group;
582 u32 changed_states;
583 u64 now;
584
585 dwork = container_of(work, struct kthread_delayed_work, work);
586 group = container_of(dwork, struct psi_group, poll_work);
587
588 atomic_set(&group->poll_scheduled, 0);
589
590 mutex_lock(&group->trigger_lock);
591
592 now = sched_clock();
593
594 collect_percpu_times(group, PSI_POLL, &changed_states);
595
596 if (changed_states & group->poll_states) {
597 /* Initialize trigger windows when entering polling mode */
598 if (now > group->polling_until)
599 init_triggers(group, now);
600
601 /*
602 * Keep the monitor active for at least the duration of the
603 * minimum tracking window as long as monitor states are
604 * changing.
605 */
606 group->polling_until = now +
607 group->poll_min_period * UPDATES_PER_WINDOW;
608 }
609
610 if (now > group->polling_until) {
611 group->polling_next_update = ULLONG_MAX;
612 goto out;
397 } 613 }
614
615 if (now >= group->polling_next_update)
616 group->polling_next_update = update_triggers(group, now);
617
618 psi_schedule_poll_work(group,
619 nsecs_to_jiffies(group->polling_next_update - now) + 1);
620
621out:
622 mutex_unlock(&group->trigger_lock);
398} 623}
399 624
400static void record_times(struct psi_group_cpu *groupc, int cpu, 625static void record_times(struct psi_group_cpu *groupc, int cpu,
@@ -407,15 +632,15 @@ static void record_times(struct psi_group_cpu *groupc, int cpu,
407 delta = now - groupc->state_start; 632 delta = now - groupc->state_start;
408 groupc->state_start = now; 633 groupc->state_start = now;
409 634
410 if (test_state(groupc->tasks, PSI_IO_SOME)) { 635 if (groupc->state_mask & (1 << PSI_IO_SOME)) {
411 groupc->times[PSI_IO_SOME] += delta; 636 groupc->times[PSI_IO_SOME] += delta;
412 if (test_state(groupc->tasks, PSI_IO_FULL)) 637 if (groupc->state_mask & (1 << PSI_IO_FULL))
413 groupc->times[PSI_IO_FULL] += delta; 638 groupc->times[PSI_IO_FULL] += delta;
414 } 639 }
415 640
416 if (test_state(groupc->tasks, PSI_MEM_SOME)) { 641 if (groupc->state_mask & (1 << PSI_MEM_SOME)) {
417 groupc->times[PSI_MEM_SOME] += delta; 642 groupc->times[PSI_MEM_SOME] += delta;
418 if (test_state(groupc->tasks, PSI_MEM_FULL)) 643 if (groupc->state_mask & (1 << PSI_MEM_FULL))
419 groupc->times[PSI_MEM_FULL] += delta; 644 groupc->times[PSI_MEM_FULL] += delta;
420 else if (memstall_tick) { 645 else if (memstall_tick) {
421 u32 sample; 646 u32 sample;
@@ -436,18 +661,20 @@ static void record_times(struct psi_group_cpu *groupc, int cpu,
436 } 661 }
437 } 662 }
438 663
439 if (test_state(groupc->tasks, PSI_CPU_SOME)) 664 if (groupc->state_mask & (1 << PSI_CPU_SOME))
440 groupc->times[PSI_CPU_SOME] += delta; 665 groupc->times[PSI_CPU_SOME] += delta;
441 666
442 if (test_state(groupc->tasks, PSI_NONIDLE)) 667 if (groupc->state_mask & (1 << PSI_NONIDLE))
443 groupc->times[PSI_NONIDLE] += delta; 668 groupc->times[PSI_NONIDLE] += delta;
444} 669}
445 670
446static void psi_group_change(struct psi_group *group, int cpu, 671static u32 psi_group_change(struct psi_group *group, int cpu,
447 unsigned int clear, unsigned int set) 672 unsigned int clear, unsigned int set)
448{ 673{
449 struct psi_group_cpu *groupc; 674 struct psi_group_cpu *groupc;
450 unsigned int t, m; 675 unsigned int t, m;
676 enum psi_states s;
677 u32 state_mask = 0;
451 678
452 groupc = per_cpu_ptr(group->pcpu, cpu); 679 groupc = per_cpu_ptr(group->pcpu, cpu);
453 680
@@ -480,7 +707,16 @@ static void psi_group_change(struct psi_group *group, int cpu,
480 if (set & (1 << t)) 707 if (set & (1 << t))
481 groupc->tasks[t]++; 708 groupc->tasks[t]++;
482 709
710 /* Calculate state mask representing active states */
711 for (s = 0; s < NR_PSI_STATES; s++) {
712 if (test_state(groupc->tasks, s))
713 state_mask |= (1 << s);
714 }
715 groupc->state_mask = state_mask;
716
483 write_seqcount_end(&groupc->seq); 717 write_seqcount_end(&groupc->seq);
718
719 return state_mask;
484} 720}
485 721
486static struct psi_group *iterate_groups(struct task_struct *task, void **iter) 722static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
@@ -537,13 +773,17 @@ void psi_task_change(struct task_struct *task, int clear, int set)
537 */ 773 */
538 if (unlikely((clear & TSK_RUNNING) && 774 if (unlikely((clear & TSK_RUNNING) &&
539 (task->flags & PF_WQ_WORKER) && 775 (task->flags & PF_WQ_WORKER) &&
540 wq_worker_last_func(task) == psi_update_work)) 776 wq_worker_last_func(task) == psi_avgs_work))
541 wake_clock = false; 777 wake_clock = false;
542 778
543 while ((group = iterate_groups(task, &iter))) { 779 while ((group = iterate_groups(task, &iter))) {
544 psi_group_change(group, cpu, clear, set); 780 u32 state_mask = psi_group_change(group, cpu, clear, set);
545 if (wake_clock && !delayed_work_pending(&group->clock_work)) 781
546 schedule_delayed_work(&group->clock_work, PSI_FREQ); 782 if (state_mask & group->poll_states)
783 psi_schedule_poll_work(group, 1);
784
785 if (wake_clock && !delayed_work_pending(&group->avgs_work))
786 schedule_delayed_work(&group->avgs_work, PSI_FREQ);
547 } 787 }
548} 788}
549 789
@@ -640,8 +880,10 @@ void psi_cgroup_free(struct cgroup *cgroup)
640 if (static_branch_likely(&psi_disabled)) 880 if (static_branch_likely(&psi_disabled))
641 return; 881 return;
642 882
643 cancel_delayed_work_sync(&cgroup->psi.clock_work); 883 cancel_delayed_work_sync(&cgroup->psi.avgs_work);
644 free_percpu(cgroup->psi.pcpu); 884 free_percpu(cgroup->psi.pcpu);
885 /* All triggers must be removed by now */
886 WARN_ONCE(cgroup->psi.poll_states, "psi: trigger leak\n");
645} 887}
646 888
647/** 889/**
@@ -697,11 +939,18 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
697int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) 939int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
698{ 940{
699 int full; 941 int full;
942 u64 now;
700 943
701 if (static_branch_likely(&psi_disabled)) 944 if (static_branch_likely(&psi_disabled))
702 return -EOPNOTSUPP; 945 return -EOPNOTSUPP;
703 946
704 update_stats(group); 947 /* Update averages before reporting them */
948 mutex_lock(&group->avgs_lock);
949 now = sched_clock();
950 collect_percpu_times(group, PSI_AVGS, NULL);
951 if (now >= group->avg_next_update)
952 group->avg_next_update = update_averages(group, now);
953 mutex_unlock(&group->avgs_lock);
705 954
706 for (full = 0; full < 2 - (res == PSI_CPU); full++) { 955 for (full = 0; full < 2 - (res == PSI_CPU); full++) {
707 unsigned long avg[3]; 956 unsigned long avg[3];
@@ -710,7 +959,8 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
710 959
711 for (w = 0; w < 3; w++) 960 for (w = 0; w < 3; w++)
712 avg[w] = group->avg[res * 2 + full][w]; 961 avg[w] = group->avg[res * 2 + full][w];
713 total = div_u64(group->total[res * 2 + full], NSEC_PER_USEC); 962 total = div_u64(group->total[PSI_AVGS][res * 2 + full],
963 NSEC_PER_USEC);
714 964
715 seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n", 965 seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
716 full ? "full" : "some", 966 full ? "full" : "some",
@@ -753,25 +1003,270 @@ static int psi_cpu_open(struct inode *inode, struct file *file)
753 return single_open(file, psi_cpu_show, NULL); 1003 return single_open(file, psi_cpu_show, NULL);
754} 1004}
755 1005
1006struct psi_trigger *psi_trigger_create(struct psi_group *group,
1007 char *buf, size_t nbytes, enum psi_res res)
1008{
1009 struct psi_trigger *t;
1010 enum psi_states state;
1011 u32 threshold_us;
1012 u32 window_us;
1013
1014 if (static_branch_likely(&psi_disabled))
1015 return ERR_PTR(-EOPNOTSUPP);
1016
1017 if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
1018 state = PSI_IO_SOME + res * 2;
1019 else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
1020 state = PSI_IO_FULL + res * 2;
1021 else
1022 return ERR_PTR(-EINVAL);
1023
1024 if (state >= PSI_NONIDLE)
1025 return ERR_PTR(-EINVAL);
1026
1027 if (window_us < WINDOW_MIN_US ||
1028 window_us > WINDOW_MAX_US)
1029 return ERR_PTR(-EINVAL);
1030
1031 /* Check threshold */
1032 if (threshold_us == 0 || threshold_us > window_us)
1033 return ERR_PTR(-EINVAL);
1034
1035 t = kmalloc(sizeof(*t), GFP_KERNEL);
1036 if (!t)
1037 return ERR_PTR(-ENOMEM);
1038
1039 t->group = group;
1040 t->state = state;
1041 t->threshold = threshold_us * NSEC_PER_USEC;
1042 t->win.size = window_us * NSEC_PER_USEC;
1043 window_reset(&t->win, 0, 0, 0);
1044
1045 t->event = 0;
1046 t->last_event_time = 0;
1047 init_waitqueue_head(&t->event_wait);
1048 kref_init(&t->refcount);
1049
1050 mutex_lock(&group->trigger_lock);
1051
1052 if (!rcu_access_pointer(group->poll_kworker)) {
1053 struct sched_param param = {
1054 .sched_priority = MAX_RT_PRIO - 1,
1055 };
1056 struct kthread_worker *kworker;
1057
1058 kworker = kthread_create_worker(0, "psimon");
1059 if (IS_ERR(kworker)) {
1060 kfree(t);
1061 mutex_unlock(&group->trigger_lock);
1062 return ERR_CAST(kworker);
1063 }
1064 sched_setscheduler(kworker->task, SCHED_FIFO, &param);
1065 kthread_init_delayed_work(&group->poll_work,
1066 psi_poll_work);
1067 rcu_assign_pointer(group->poll_kworker, kworker);
1068 }
1069
1070 list_add(&t->node, &group->triggers);
1071 group->poll_min_period = min(group->poll_min_period,
1072 div_u64(t->win.size, UPDATES_PER_WINDOW));
1073 group->nr_triggers[t->state]++;
1074 group->poll_states |= (1 << t->state);
1075
1076 mutex_unlock(&group->trigger_lock);
1077
1078 return t;
1079}
1080
1081static void psi_trigger_destroy(struct kref *ref)
1082{
1083 struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
1084 struct psi_group *group = t->group;
1085 struct kthread_worker *kworker_to_destroy = NULL;
1086
1087 if (static_branch_likely(&psi_disabled))
1088 return;
1089
1090 /*
1091 * Wakeup waiters to stop polling. Can happen if cgroup is deleted
1092 * from under a polling process.
1093 */
1094 wake_up_interruptible(&t->event_wait);
1095
1096 mutex_lock(&group->trigger_lock);
1097
1098 if (!list_empty(&t->node)) {
1099 struct psi_trigger *tmp;
1100 u64 period = ULLONG_MAX;
1101
1102 list_del(&t->node);
1103 group->nr_triggers[t->state]--;
1104 if (!group->nr_triggers[t->state])
1105 group->poll_states &= ~(1 << t->state);
1106 /* reset min update period for the remaining triggers */
1107 list_for_each_entry(tmp, &group->triggers, node)
1108 period = min(period, div_u64(tmp->win.size,
1109 UPDATES_PER_WINDOW));
1110 group->poll_min_period = period;
1111 /* Destroy poll_kworker when the last trigger is destroyed */
1112 if (group->poll_states == 0) {
1113 group->polling_until = 0;
1114 kworker_to_destroy = rcu_dereference_protected(
1115 group->poll_kworker,
1116 lockdep_is_held(&group->trigger_lock));
1117 rcu_assign_pointer(group->poll_kworker, NULL);
1118 }
1119 }
1120
1121 mutex_unlock(&group->trigger_lock);
1122
1123 /*
1124 * Wait for both *trigger_ptr from psi_trigger_replace and
1125 * poll_kworker RCUs to complete their read-side critical sections
1126 * before destroying the trigger and optionally the poll_kworker
1127 */
1128 synchronize_rcu();
1129 /*
1130 * Destroy the kworker after releasing trigger_lock to prevent a
1131 * deadlock while waiting for psi_poll_work to acquire trigger_lock
1132 */
1133 if (kworker_to_destroy) {
1134 kthread_cancel_delayed_work_sync(&group->poll_work);
1135 kthread_destroy_worker(kworker_to_destroy);
1136 }
1137 kfree(t);
1138}
1139
1140void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
1141{
1142 struct psi_trigger *old = *trigger_ptr;
1143
1144 if (static_branch_likely(&psi_disabled))
1145 return;
1146
1147 rcu_assign_pointer(*trigger_ptr, new);
1148 if (old)
1149 kref_put(&old->refcount, psi_trigger_destroy);
1150}
1151
1152__poll_t psi_trigger_poll(void **trigger_ptr,
1153 struct file *file, poll_table *wait)
1154{
1155 __poll_t ret = DEFAULT_POLLMASK;
1156 struct psi_trigger *t;
1157
1158 if (static_branch_likely(&psi_disabled))
1159 return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
1160
1161 rcu_read_lock();
1162
1163 t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
1164 if (!t) {
1165 rcu_read_unlock();
1166 return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
1167 }
1168 kref_get(&t->refcount);
1169
1170 rcu_read_unlock();
1171
1172 poll_wait(file, &t->event_wait, wait);
1173
1174 if (cmpxchg(&t->event, 1, 0) == 1)
1175 ret |= EPOLLPRI;
1176
1177 kref_put(&t->refcount, psi_trigger_destroy);
1178
1179 return ret;
1180}
1181
1182static ssize_t psi_write(struct file *file, const char __user *user_buf,
1183 size_t nbytes, enum psi_res res)
1184{
1185 char buf[32];
1186 size_t buf_size;
1187 struct seq_file *seq;
1188 struct psi_trigger *new;
1189
1190 if (static_branch_likely(&psi_disabled))
1191 return -EOPNOTSUPP;
1192
1193 buf_size = min(nbytes, (sizeof(buf) - 1));
1194 if (copy_from_user(buf, user_buf, buf_size))
1195 return -EFAULT;
1196
1197 buf[buf_size - 1] = '\0';
1198
1199 new = psi_trigger_create(&psi_system, buf, nbytes, res);
1200 if (IS_ERR(new))
1201 return PTR_ERR(new);
1202
1203 seq = file->private_data;
1204 /* Take seq->lock to protect seq->private from concurrent writes */
1205 mutex_lock(&seq->lock);
1206 psi_trigger_replace(&seq->private, new);
1207 mutex_unlock(&seq->lock);
1208
1209 return nbytes;
1210}
1211
1212static ssize_t psi_io_write(struct file *file, const char __user *user_buf,
1213 size_t nbytes, loff_t *ppos)
1214{
1215 return psi_write(file, user_buf, nbytes, PSI_IO);
1216}
1217
1218static ssize_t psi_memory_write(struct file *file, const char __user *user_buf,
1219 size_t nbytes, loff_t *ppos)
1220{
1221 return psi_write(file, user_buf, nbytes, PSI_MEM);
1222}
1223
1224static ssize_t psi_cpu_write(struct file *file, const char __user *user_buf,
1225 size_t nbytes, loff_t *ppos)
1226{
1227 return psi_write(file, user_buf, nbytes, PSI_CPU);
1228}
1229
1230static __poll_t psi_fop_poll(struct file *file, poll_table *wait)
1231{
1232 struct seq_file *seq = file->private_data;
1233
1234 return psi_trigger_poll(&seq->private, file, wait);
1235}
1236
1237static int psi_fop_release(struct inode *inode, struct file *file)
1238{
1239 struct seq_file *seq = file->private_data;
1240
1241 psi_trigger_replace(&seq->private, NULL);
1242 return single_release(inode, file);
1243}
1244
756static const struct file_operations psi_io_fops = { 1245static const struct file_operations psi_io_fops = {
757 .open = psi_io_open, 1246 .open = psi_io_open,
758 .read = seq_read, 1247 .read = seq_read,
759 .llseek = seq_lseek, 1248 .llseek = seq_lseek,
760 .release = single_release, 1249 .write = psi_io_write,
1250 .poll = psi_fop_poll,
1251 .release = psi_fop_release,
761}; 1252};
762 1253
763static const struct file_operations psi_memory_fops = { 1254static const struct file_operations psi_memory_fops = {
764 .open = psi_memory_open, 1255 .open = psi_memory_open,
765 .read = seq_read, 1256 .read = seq_read,
766 .llseek = seq_lseek, 1257 .llseek = seq_lseek,
767 .release = single_release, 1258 .write = psi_memory_write,
1259 .poll = psi_fop_poll,
1260 .release = psi_fop_release,
768}; 1261};
769 1262
770static const struct file_operations psi_cpu_fops = { 1263static const struct file_operations psi_cpu_fops = {
771 .open = psi_cpu_open, 1264 .open = psi_cpu_open,
772 .read = seq_read, 1265 .read = seq_read,
773 .llseek = seq_lseek, 1266 .llseek = seq_lseek,
774 .release = single_release, 1267 .write = psi_cpu_write,
1268 .poll = psi_fop_poll,
1269 .release = psi_fop_release,
775}; 1270};
776 1271
777static int __init psi_proc_init(void) 1272static int __init psi_proc_init(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index 62f9aea4a15a..c4dd66436fc5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -840,6 +840,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info,
840 */ 840 */
841 if (!sid || sid == task_session(current)) 841 if (!sid || sid == task_session(current))
842 break; 842 break;
843 /* fall through */
843 default: 844 default:
844 return -EPERM; 845 return -EPERM;
845 } 846 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba158f61aab4..943c89178e3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2886,8 +2886,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
2886 if (neg) 2886 if (neg)
2887 continue; 2887 continue;
2888 val = convmul * val / convdiv; 2888 val = convmul * val / convdiv;
2889 if ((min && val < *min) || (max && val > *max)) 2889 if ((min && val < *min) || (max && val > *max)) {
2890 continue; 2890 err = -EINVAL;
2891 break;
2892 }
2891 *i = val; 2893 *i = val;
2892 } else { 2894 } else {
2893 val = convdiv * (*i) / convmul; 2895 val = convdiv * (*i) / convmul;
@@ -3170,17 +3172,19 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
3170 3172
3171 if (write) { 3173 if (write) {
3172 char *kbuf, *p; 3174 char *kbuf, *p;
3175 size_t skipped = 0;
3173 3176
3174 if (left > PAGE_SIZE - 1) 3177 if (left > PAGE_SIZE - 1) {
3175 left = PAGE_SIZE - 1; 3178 left = PAGE_SIZE - 1;
3179 /* How much of the buffer we'll skip this pass */
3180 skipped = *lenp - left;
3181 }
3176 3182
3177 p = kbuf = memdup_user_nul(buffer, left); 3183 p = kbuf = memdup_user_nul(buffer, left);
3178 if (IS_ERR(kbuf)) 3184 if (IS_ERR(kbuf))
3179 return PTR_ERR(kbuf); 3185 return PTR_ERR(kbuf);
3180 3186
3181 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len), 3187 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
3182 sizeof(unsigned long),
3183 GFP_KERNEL);
3184 if (!tmp_bitmap) { 3188 if (!tmp_bitmap) {
3185 kfree(kbuf); 3189 kfree(kbuf);
3186 return -ENOMEM; 3190 return -ENOMEM;
@@ -3189,9 +3193,22 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
3189 while (!err && left) { 3193 while (!err && left) {
3190 unsigned long val_a, val_b; 3194 unsigned long val_a, val_b;
3191 bool neg; 3195 bool neg;
3196 size_t saved_left;
3192 3197
3198 /* In case we stop parsing mid-number, we can reset */
3199 saved_left = left;
3193 err = proc_get_long(&p, &left, &val_a, &neg, tr_a, 3200 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3194 sizeof(tr_a), &c); 3201 sizeof(tr_a), &c);
3202 /*
3203 * If we consumed the entirety of a truncated buffer or
3204 * only one char is left (may be a "-"), then stop here,
3205 * reset, & come back for more.
3206 */
3207 if ((left <= 1) && skipped) {
3208 left = saved_left;
3209 break;
3210 }
3211
3195 if (err) 3212 if (err)
3196 break; 3213 break;
3197 if (val_a >= bitmap_len || neg) { 3214 if (val_a >= bitmap_len || neg) {
@@ -3209,6 +3226,15 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
3209 err = proc_get_long(&p, &left, &val_b, 3226 err = proc_get_long(&p, &left, &val_b,
3210 &neg, tr_b, sizeof(tr_b), 3227 &neg, tr_b, sizeof(tr_b),
3211 &c); 3228 &c);
3229 /*
3230 * If we consumed all of a truncated buffer or
3231 * then stop here, reset, & come back for more.
3232 */
3233 if (!left && skipped) {
3234 left = saved_left;
3235 break;
3236 }
3237
3212 if (err) 3238 if (err)
3213 break; 3239 break;
3214 if (val_b >= bitmap_len || neg || 3240 if (val_b >= bitmap_len || neg ||
@@ -3227,6 +3253,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
3227 proc_skip_char(&p, &left, '\n'); 3253 proc_skip_char(&p, &left, '\n');
3228 } 3254 }
3229 kfree(kbuf); 3255 kfree(kbuf);
3256 left += skipped;
3230 } else { 3257 } else {
3231 unsigned long bit_a, bit_b = 0; 3258 unsigned long bit_a, bit_b = 0;
3232 3259
@@ -3271,7 +3298,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
3271 *ppos += *lenp; 3298 *ppos += *lenp;
3272 } 3299 }
3273 3300
3274 kfree(tmp_bitmap); 3301 bitmap_free(tmp_bitmap);
3275 return err; 3302 return err;
3276} 3303}
3277 3304
diff --git a/kernel/user.c b/kernel/user.c
index 0df9b1640b2a..88b834f0eebc 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,7 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid)
185 if (!up) { 185 if (!up) {
186 new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL); 186 new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL);
187 if (!new) 187 if (!new)
188 goto out_unlock; 188 return NULL;
189 189
190 new->uid = uid; 190 new->uid = uid;
191 refcount_set(&new->__count, 1); 191 refcount_set(&new->__count, 1);
@@ -199,8 +199,6 @@ struct user_struct *alloc_uid(kuid_t uid)
199 spin_lock_irq(&uidhash_lock); 199 spin_lock_irq(&uidhash_lock);
200 up = uid_hash_find(uid, hashent); 200 up = uid_hash_find(uid, hashent);
201 if (up) { 201 if (up) {
202 key_put(new->uid_keyring);
203 key_put(new->session_keyring);
204 kmem_cache_free(uid_cachep, new); 202 kmem_cache_free(uid_cachep, new);
205 } else { 203 } else {
206 uid_hash_insert(new, hashent); 204 uid_hash_insert(new, hashent);
@@ -210,9 +208,6 @@ struct user_struct *alloc_uid(kuid_t uid)
210 } 208 }
211 209
212 return up; 210 return up;
213
214out_unlock:
215 return NULL;
216} 211}
217 212
218static int __init uid_cache_init(void) 213static int __init uid_cache_init(void)
diff --git a/lib/Kconfig b/lib/Kconfig
index f323b85ad11c..3577609b61be 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -46,9 +46,6 @@ config HAVE_ARCH_BITREVERSE
46 This option enables the use of hardware bit-reversal instructions on 46 This option enables the use of hardware bit-reversal instructions on
47 architectures which support such operations. 47 architectures which support such operations.
48 48
49config RATIONAL
50 bool
51
52config GENERIC_STRNCPY_FROM_USER 49config GENERIC_STRNCPY_FROM_USER
53 bool 50 bool
54 51
@@ -61,6 +58,8 @@ config GENERIC_NET_UTILS
61config GENERIC_FIND_FIRST_BIT 58config GENERIC_FIND_FIRST_BIT
62 bool 59 bool
63 60
61source "lib/math/Kconfig"
62
64config NO_GENERIC_PCI_IOPORT_MAP 63config NO_GENERIC_PCI_IOPORT_MAP
65 bool 64 bool
66 65
@@ -531,12 +530,6 @@ config LRU_CACHE
531config CLZ_TAB 530config CLZ_TAB
532 bool 531 bool
533 532
534config CORDIC
535 tristate "CORDIC algorithm"
536 help
537 This option provides an implementation of the CORDIC algorithm;
538 calculations are in fixed point. Module will be called cordic.
539
540config DDR 533config DDR
541 bool "JEDEC DDR data" 534 bool "JEDEC DDR data"
542 help 535 help
@@ -628,9 +621,6 @@ config SBITMAP
628config PARMAN 621config PARMAN
629 tristate "parman" if COMPILE_TEST 622 tristate "parman" if COMPILE_TEST
630 623
631config PRIME_NUMBERS
632 tristate
633
634config STRING_SELFTEST 624config STRING_SELFTEST
635 tristate "Test string functions" 625 tristate "Test string functions"
636 626
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d695ec1477f3..fdfa173651eb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -318,6 +318,20 @@ config HEADERS_CHECK
318 exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in 318 exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
319 your build tree), to make sure they're suitable. 319 your build tree), to make sure they're suitable.
320 320
321config OPTIMIZE_INLINING
322 bool "Allow compiler to uninline functions marked 'inline'"
323 help
324 This option determines if the kernel forces gcc to inline the functions
325 developers have marked 'inline'. Doing so takes away freedom from gcc to
326 do what it thinks is best, which is desirable for the gcc 3.x series of
327 compilers. The gcc 4.x series have a rewritten inlining algorithm and
328 enabling this option will generate a smaller kernel there. Hopefully
329 this algorithm is so good that allowing gcc 4.x and above to make the
330 decision will become the default in the future. Until then this option
331 is there to test gcc for this.
332
333 If unsure, say N.
334
321config DEBUG_SECTION_MISMATCH 335config DEBUG_SECTION_MISMATCH
322 bool "Enable full Section mismatch analysis" 336 bool "Enable full Section mismatch analysis"
323 help 337 help
@@ -446,6 +460,15 @@ config DEBUG_KERNEL
446 Say Y here if you are developing drivers or trying to debug and 460 Say Y here if you are developing drivers or trying to debug and
447 identify kernel problems. 461 identify kernel problems.
448 462
463config DEBUG_MISC
464 bool "Miscellaneous debug code"
465 default DEBUG_KERNEL
466 depends on DEBUG_KERNEL
467 help
468 Say Y here if you need to enable miscellaneous debug code that should
469 be under a more specific debug option but isn't.
470
471
449menu "Memory Debugging" 472menu "Memory Debugging"
450 473
451source "mm/Kconfig.debug" 474source "mm/Kconfig.debug"
@@ -1358,7 +1381,7 @@ config DEBUG_LIST
1358 1381
1359 If unsure, say N. 1382 If unsure, say N.
1360 1383
1361config DEBUG_PI_LIST 1384config DEBUG_PLIST
1362 bool "Debug priority linked list manipulation" 1385 bool "Debug priority linked list manipulation"
1363 depends on DEBUG_KERNEL 1386 depends on DEBUG_KERNEL
1364 help 1387 help
diff --git a/lib/Makefile b/lib/Makefile
index 83d7df2661ff..fb7697031a79 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,7 +30,7 @@ endif
30 30
31lib-y := ctype.o string.o vsprintf.o cmdline.o \ 31lib-y := ctype.o string.o vsprintf.o cmdline.o \
32 rbtree.o radix-tree.o timerqueue.o xarray.o \ 32 rbtree.o radix-tree.o timerqueue.o xarray.o \
33 idr.o int_sqrt.o extable.o \ 33 idr.o extable.o \
34 sha1.o chacha.o irq_regs.o argv_split.o \ 34 sha1.o chacha.o irq_regs.o argv_split.o \
35 flex_proportions.o ratelimit.o show_mem.o \ 35 flex_proportions.o ratelimit.o show_mem.o \
36 is_single_threaded.o plist.o decompress.o kobject_uevent.o \ 36 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
@@ -44,11 +44,11 @@ lib-$(CONFIG_SMP) += cpumask.o
44lib-y += kobject.o klist.o 44lib-y += kobject.o klist.o
45obj-y += lockref.o 45obj-y += lockref.o
46 46
47obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ 47obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
48 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ 48 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
49 gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \ 49 list_sort.o uuid.o iov_iter.o clz_ctz.o \
50 bsearch.o find_bit.o llist.o memweight.o kfifo.o \ 50 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
51 percpu-refcount.o rhashtable.o reciprocal_div.o \ 51 percpu-refcount.o rhashtable.o \
52 once.o refcount.o usercopy.o errseq.o bucket_locks.o \ 52 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
53 generic-radix-tree.o 53 generic-radix-tree.o
54obj-$(CONFIG_STRING_SELFTEST) += test_string.o 54obj-$(CONFIG_STRING_SELFTEST) += test_string.o
@@ -102,6 +102,8 @@ endif
102obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o 102obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o
103CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) 103CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any)
104 104
105obj-y += math/
106
105obj-$(CONFIG_GENERIC_IOMAP) += iomap.o 107obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
106obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o 108obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
107obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o 109obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
@@ -121,7 +123,6 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
121 123
122obj-$(CONFIG_BITREVERSE) += bitrev.o 124obj-$(CONFIG_BITREVERSE) += bitrev.o
123obj-$(CONFIG_PACKING) += packing.o 125obj-$(CONFIG_PACKING) += packing.o
124obj-$(CONFIG_RATIONAL) += rational.o
125obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o 126obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
126obj-$(CONFIG_CRC16) += crc16.o 127obj-$(CONFIG_CRC16) += crc16.o
127obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o 128obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
@@ -195,8 +196,6 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
195 196
196obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o 197obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
197 198
198obj-$(CONFIG_CORDIC) += cordic.o
199
200obj-$(CONFIG_DQL) += dynamic_queue_limits.o 199obj-$(CONFIG_DQL) += dynamic_queue_limits.o
201 200
202obj-$(CONFIG_GLOB) += glob.o 201obj-$(CONFIG_GLOB) += glob.o
@@ -238,8 +237,6 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o
238 237
239obj-$(CONFIG_FONT_SUPPORT) += fonts/ 238obj-$(CONFIG_FONT_SUPPORT) += fonts/
240 239
241obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
242
243hostprogs-y := gen_crc32table 240hostprogs-y := gen_crc32table
244hostprogs-y += gen_crc64table 241hostprogs-y += gen_crc64table
245clean-files := crc32table.h 242clean-files := crc32table.h
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 98872e9025da..f235434df87b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -20,6 +20,8 @@
20 20
21#include <asm/page.h> 21#include <asm/page.h>
22 22
23#include "kstrtox.h"
24
23/** 25/**
24 * DOC: bitmap introduction 26 * DOC: bitmap introduction
25 * 27 *
@@ -477,12 +479,128 @@ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp,
477} 479}
478EXPORT_SYMBOL(bitmap_print_to_pagebuf); 480EXPORT_SYMBOL(bitmap_print_to_pagebuf);
479 481
482/*
483 * Region 9-38:4/10 describes the following bitmap structure:
484 * 0 9 12 18 38
485 * .........****......****......****......
486 * ^ ^ ^ ^
487 * start off group_len end
488 */
489struct region {
490 unsigned int start;
491 unsigned int off;
492 unsigned int group_len;
493 unsigned int end;
494};
495
496static int bitmap_set_region(const struct region *r,
497 unsigned long *bitmap, int nbits)
498{
499 unsigned int start;
500
501 if (r->end >= nbits)
502 return -ERANGE;
503
504 for (start = r->start; start <= r->end; start += r->group_len)
505 bitmap_set(bitmap, start, min(r->end - start + 1, r->off));
506
507 return 0;
508}
509
510static int bitmap_check_region(const struct region *r)
511{
512 if (r->start > r->end || r->group_len == 0 || r->off > r->group_len)
513 return -EINVAL;
514
515 return 0;
516}
517
518static const char *bitmap_getnum(const char *str, unsigned int *num)
519{
520 unsigned long long n;
521 unsigned int len;
522
523 len = _parse_integer(str, 10, &n);
524 if (!len)
525 return ERR_PTR(-EINVAL);
526 if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n)
527 return ERR_PTR(-EOVERFLOW);
528
529 *num = n;
530 return str + len;
531}
532
533static inline bool end_of_str(char c)
534{
535 return c == '\0' || c == '\n';
536}
537
538static inline bool __end_of_region(char c)
539{
540 return isspace(c) || c == ',';
541}
542
543static inline bool end_of_region(char c)
544{
545 return __end_of_region(c) || end_of_str(c);
546}
547
548/*
549 * The format allows commas and whitespases at the beginning
550 * of the region.
551 */
552static const char *bitmap_find_region(const char *str)
553{
554 while (__end_of_region(*str))
555 str++;
556
557 return end_of_str(*str) ? NULL : str;
558}
559
560static const char *bitmap_parse_region(const char *str, struct region *r)
561{
562 str = bitmap_getnum(str, &r->start);
563 if (IS_ERR(str))
564 return str;
565
566 if (end_of_region(*str))
567 goto no_end;
568
569 if (*str != '-')
570 return ERR_PTR(-EINVAL);
571
572 str = bitmap_getnum(str + 1, &r->end);
573 if (IS_ERR(str))
574 return str;
575
576 if (end_of_region(*str))
577 goto no_pattern;
578
579 if (*str != ':')
580 return ERR_PTR(-EINVAL);
581
582 str = bitmap_getnum(str + 1, &r->off);
583 if (IS_ERR(str))
584 return str;
585
586 if (*str != '/')
587 return ERR_PTR(-EINVAL);
588
589 return bitmap_getnum(str + 1, &r->group_len);
590
591no_end:
592 r->end = r->start;
593no_pattern:
594 r->off = r->end + 1;
595 r->group_len = r->end + 1;
596
597 return end_of_str(*str) ? NULL : str;
598}
599
480/** 600/**
481 * __bitmap_parselist - convert list format ASCII string to bitmap 601 * bitmap_parselist - convert list format ASCII string to bitmap
482 * @buf: read nul-terminated user string from this buffer 602 * @buf: read user string from this buffer; must be terminated
483 * @buflen: buffer size in bytes. If string is smaller than this 603 * with a \0 or \n.
484 * then it must be terminated with a \0.
485 * @is_user: location of buffer, 0 indicates kernel space
486 * @maskp: write resulting mask here 604 * @maskp: write resulting mask here
487 * @nmaskbits: number of bits in mask to be written 605 * @nmaskbits: number of bits in mask to be written
488 * 606 *
@@ -498,127 +616,38 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf);
498 * 616 *
499 * Returns: 0 on success, -errno on invalid input strings. Error values: 617 * Returns: 0 on success, -errno on invalid input strings. Error values:
500 * 618 *
501 * - ``-EINVAL``: second number in range smaller than first 619 * - ``-EINVAL``: wrong region format
502 * - ``-EINVAL``: invalid character in string 620 * - ``-EINVAL``: invalid character in string
503 * - ``-ERANGE``: bit number specified too large for mask 621 * - ``-ERANGE``: bit number specified too large for mask
622 * - ``-EOVERFLOW``: integer overflow in the input parameters
504 */ 623 */
505static int __bitmap_parselist(const char *buf, unsigned int buflen, 624int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits)
506 int is_user, unsigned long *maskp,
507 int nmaskbits)
508{ 625{
509 unsigned int a, b, old_a, old_b; 626 struct region r;
510 unsigned int group_size, used_size, off; 627 long ret;
511 int c, old_c, totaldigits, ndigits;
512 const char __user __force *ubuf = (const char __user __force *)buf;
513 int at_start, in_range, in_partial_range;
514 628
515 totaldigits = c = 0;
516 old_a = old_b = 0;
517 group_size = used_size = 0;
518 bitmap_zero(maskp, nmaskbits); 629 bitmap_zero(maskp, nmaskbits);
519 do {
520 at_start = 1;
521 in_range = 0;
522 in_partial_range = 0;
523 a = b = 0;
524 ndigits = totaldigits;
525
526 /* Get the next cpu# or a range of cpu#'s */
527 while (buflen) {
528 old_c = c;
529 if (is_user) {
530 if (__get_user(c, ubuf++))
531 return -EFAULT;
532 } else
533 c = *buf++;
534 buflen--;
535 if (isspace(c))
536 continue;
537
538 /* A '\0' or a ',' signal the end of a cpu# or range */
539 if (c == '\0' || c == ',')
540 break;
541 /*
542 * whitespaces between digits are not allowed,
543 * but it's ok if whitespaces are on head or tail.
544 * when old_c is whilespace,
545 * if totaldigits == ndigits, whitespace is on head.
546 * if whitespace is on tail, it should not run here.
547 * as c was ',' or '\0',
548 * the last code line has broken the current loop.
549 */
550 if ((totaldigits != ndigits) && isspace(old_c))
551 return -EINVAL;
552 630
553 if (c == '/') { 631 while (buf) {
554 used_size = a; 632 buf = bitmap_find_region(buf);
555 at_start = 1; 633 if (buf == NULL)
556 in_range = 0; 634 return 0;
557 a = b = 0;
558 continue;
559 }
560 635
561 if (c == ':') { 636 buf = bitmap_parse_region(buf, &r);
562 old_a = a; 637 if (IS_ERR(buf))
563 old_b = b; 638 return PTR_ERR(buf);
564 at_start = 1;
565 in_range = 0;
566 in_partial_range = 1;
567 a = b = 0;
568 continue;
569 }
570 639
571 if (c == '-') { 640 ret = bitmap_check_region(&r);
572 if (at_start || in_range) 641 if (ret)
573 return -EINVAL; 642 return ret;
574 b = 0;
575 in_range = 1;
576 at_start = 1;
577 continue;
578 }
579 643
580 if (!isdigit(c)) 644 ret = bitmap_set_region(&r, maskp, nmaskbits);
581 return -EINVAL; 645 if (ret)
646 return ret;
647 }
582 648
583 b = b * 10 + (c - '0');
584 if (!in_range)
585 a = b;
586 at_start = 0;
587 totaldigits++;
588 }
589 if (ndigits == totaldigits)
590 continue;
591 if (in_partial_range) {
592 group_size = a;
593 a = old_a;
594 b = old_b;
595 old_a = old_b = 0;
596 } else {
597 used_size = group_size = b - a + 1;
598 }
599 /* if no digit is after '-', it's wrong*/
600 if (at_start && in_range)
601 return -EINVAL;
602 if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
603 return -EINVAL;
604 if (b >= nmaskbits)
605 return -ERANGE;
606 while (a <= b) {
607 off = min(b - a + 1, used_size);
608 bitmap_set(maskp, a, off);
609 a += group_size;
610 }
611 } while (buflen && c == ',');
612 return 0; 649 return 0;
613} 650}
614
615int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
616{
617 char *nl = strchrnul(bp, '\n');
618 int len = nl - bp;
619
620 return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
621}
622EXPORT_SYMBOL(bitmap_parselist); 651EXPORT_SYMBOL(bitmap_parselist);
623 652
624 653
@@ -632,23 +661,27 @@ EXPORT_SYMBOL(bitmap_parselist);
632 * @nmaskbits: size of bitmap, in bits. 661 * @nmaskbits: size of bitmap, in bits.
633 * 662 *
634 * Wrapper for bitmap_parselist(), providing it with user buffer. 663 * Wrapper for bitmap_parselist(), providing it with user buffer.
635 *
636 * We cannot have this as an inline function in bitmap.h because it needs
637 * linux/uaccess.h to get the access_ok() declaration and this causes
638 * cyclic dependencies.
639 */ 664 */
640int bitmap_parselist_user(const char __user *ubuf, 665int bitmap_parselist_user(const char __user *ubuf,
641 unsigned int ulen, unsigned long *maskp, 666 unsigned int ulen, unsigned long *maskp,
642 int nmaskbits) 667 int nmaskbits)
643{ 668{
644 if (!access_ok(ubuf, ulen)) 669 char *buf;
645 return -EFAULT; 670 int ret;
646 return __bitmap_parselist((const char __force *)ubuf, 671
647 ulen, 1, maskp, nmaskbits); 672 buf = memdup_user_nul(ubuf, ulen);
673 if (IS_ERR(buf))
674 return PTR_ERR(buf);
675
676 ret = bitmap_parselist(buf, maskp, nmaskbits);
677
678 kfree(buf);
679 return ret;
648} 680}
649EXPORT_SYMBOL(bitmap_parselist_user); 681EXPORT_SYMBOL(bitmap_parselist_user);
650 682
651 683
684#ifdef CONFIG_NUMA
652/** 685/**
653 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap 686 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
654 * @buf: pointer to a bitmap 687 * @buf: pointer to a bitmap
@@ -757,7 +790,6 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
757 set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst); 790 set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst);
758 } 791 }
759} 792}
760EXPORT_SYMBOL(bitmap_remap);
761 793
762/** 794/**
763 * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit 795 * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit
@@ -795,7 +827,6 @@ int bitmap_bitremap(int oldbit, const unsigned long *old,
795 else 827 else
796 return bitmap_ord_to_pos(new, n % w, bits); 828 return bitmap_ord_to_pos(new, n % w, bits);
797} 829}
798EXPORT_SYMBOL(bitmap_bitremap);
799 830
800/** 831/**
801 * bitmap_onto - translate one bitmap relative to another 832 * bitmap_onto - translate one bitmap relative to another
@@ -930,7 +961,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
930 m++; 961 m++;
931 } 962 }
932} 963}
933EXPORT_SYMBOL(bitmap_onto);
934 964
935/** 965/**
936 * bitmap_fold - fold larger bitmap into smaller, modulo specified size 966 * bitmap_fold - fold larger bitmap into smaller, modulo specified size
@@ -955,7 +985,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
955 for_each_set_bit(oldbit, orig, nbits) 985 for_each_set_bit(oldbit, orig, nbits)
956 set_bit(oldbit % sz, dst); 986 set_bit(oldbit % sz, dst);
957} 987}
958EXPORT_SYMBOL(bitmap_fold); 988#endif /* CONFIG_NUMA */
959 989
960/* 990/*
961 * Common code for bitmap_*_region() routines. 991 * Common code for bitmap_*_region() routines.
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 85759928215b..06e900c5587b 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -7,33 +7,41 @@
7#include <linux/list_sort.h> 7#include <linux/list_sort.h>
8#include <linux/list.h> 8#include <linux/list.h>
9 9
10#define MAX_LIST_LENGTH_BITS 20 10typedef int __attribute__((nonnull(2,3))) (*cmp_func)(void *,
11 struct list_head const *, struct list_head const *);
11 12
12/* 13/*
13 * Returns a list organized in an intermediate format suited 14 * Returns a list organized in an intermediate format suited
14 * to chaining of merge() calls: null-terminated, no reserved or 15 * to chaining of merge() calls: null-terminated, no reserved or
15 * sentinel head node, "prev" links not maintained. 16 * sentinel head node, "prev" links not maintained.
16 */ 17 */
17static struct list_head *merge(void *priv, 18__attribute__((nonnull(2,3,4)))
18 int (*cmp)(void *priv, struct list_head *a, 19static struct list_head *merge(void *priv, cmp_func cmp,
19 struct list_head *b),
20 struct list_head *a, struct list_head *b) 20 struct list_head *a, struct list_head *b)
21{ 21{
22 struct list_head head, *tail = &head; 22 struct list_head *head, **tail = &head;
23 23
24 while (a && b) { 24 for (;;) {
25 /* if equal, take 'a' -- important for sort stability */ 25 /* if equal, take 'a' -- important for sort stability */
26 if ((*cmp)(priv, a, b) <= 0) { 26 if (cmp(priv, a, b) <= 0) {
27 tail->next = a; 27 *tail = a;
28 tail = &a->next;
28 a = a->next; 29 a = a->next;
30 if (!a) {
31 *tail = b;
32 break;
33 }
29 } else { 34 } else {
30 tail->next = b; 35 *tail = b;
36 tail = &b->next;
31 b = b->next; 37 b = b->next;
38 if (!b) {
39 *tail = a;
40 break;
41 }
32 } 42 }
33 tail = tail->next;
34 } 43 }
35 tail->next = a?:b; 44 return head;
36 return head.next;
37} 45}
38 46
39/* 47/*
@@ -43,44 +51,52 @@ static struct list_head *merge(void *priv,
43 * prev-link restoration pass, or maintaining the prev links 51 * prev-link restoration pass, or maintaining the prev links
44 * throughout. 52 * throughout.
45 */ 53 */
46static void merge_and_restore_back_links(void *priv, 54__attribute__((nonnull(2,3,4,5)))
47 int (*cmp)(void *priv, struct list_head *a, 55static void merge_final(void *priv, cmp_func cmp, struct list_head *head,
48 struct list_head *b), 56 struct list_head *a, struct list_head *b)
49 struct list_head *head,
50 struct list_head *a, struct list_head *b)
51{ 57{
52 struct list_head *tail = head; 58 struct list_head *tail = head;
53 u8 count = 0; 59 u8 count = 0;
54 60
55 while (a && b) { 61 for (;;) {
56 /* if equal, take 'a' -- important for sort stability */ 62 /* if equal, take 'a' -- important for sort stability */
57 if ((*cmp)(priv, a, b) <= 0) { 63 if (cmp(priv, a, b) <= 0) {
58 tail->next = a; 64 tail->next = a;
59 a->prev = tail; 65 a->prev = tail;
66 tail = a;
60 a = a->next; 67 a = a->next;
68 if (!a)
69 break;
61 } else { 70 } else {
62 tail->next = b; 71 tail->next = b;
63 b->prev = tail; 72 b->prev = tail;
73 tail = b;
64 b = b->next; 74 b = b->next;
75 if (!b) {
76 b = a;
77 break;
78 }
65 } 79 }
66 tail = tail->next;
67 } 80 }
68 tail->next = a ? : b;
69 81
82 /* Finish linking remainder of list b on to tail */
83 tail->next = b;
70 do { 84 do {
71 /* 85 /*
72 * In worst cases this loop may run many iterations. 86 * If the merge is highly unbalanced (e.g. the input is
87 * already sorted), this loop may run many iterations.
73 * Continue callbacks to the client even though no 88 * Continue callbacks to the client even though no
74 * element comparison is needed, so the client's cmp() 89 * element comparison is needed, so the client's cmp()
75 * routine can invoke cond_resched() periodically. 90 * routine can invoke cond_resched() periodically.
76 */ 91 */
77 if (unlikely(!(++count))) 92 if (unlikely(!++count))
78 (*cmp)(priv, tail->next, tail->next); 93 cmp(priv, b, b);
79 94 b->prev = tail;
80 tail->next->prev = tail; 95 tail = b;
81 tail = tail->next; 96 b = b->next;
82 } while (tail->next); 97 } while (b);
83 98
99 /* And the final links to make a circular doubly-linked list */
84 tail->next = head; 100 tail->next = head;
85 head->prev = tail; 101 head->prev = tail;
86} 102}
@@ -91,55 +107,149 @@ static void merge_and_restore_back_links(void *priv,
91 * @head: the list to sort 107 * @head: the list to sort
92 * @cmp: the elements comparison function 108 * @cmp: the elements comparison function
93 * 109 *
94 * This function implements "merge sort", which has O(nlog(n)) 110 * The comparison funtion @cmp must return > 0 if @a should sort after
95 * complexity. 111 * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
112 * sort before @b *or* their original order should be preserved. It is
113 * always called with the element that came first in the input in @a,
114 * and list_sort is a stable sort, so it is not necessary to distinguish
115 * the @a < @b and @a == @b cases.
116 *
117 * This is compatible with two styles of @cmp function:
118 * - The traditional style which returns <0 / =0 / >0, or
119 * - Returning a boolean 0/1.
120 * The latter offers a chance to save a few cycles in the comparison
121 * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
122 *
123 * A good way to write a multi-word comparison is
124 * if (a->high != b->high)
125 * return a->high > b->high;
126 * if (a->middle != b->middle)
127 * return a->middle > b->middle;
128 * return a->low > b->low;
129 *
130 *
131 * This mergesort is as eager as possible while always performing at least
132 * 2:1 balanced merges. Given two pending sublists of size 2^k, they are
133 * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
134 *
135 * Thus, it will avoid cache thrashing as long as 3*2^k elements can
136 * fit into the cache. Not quite as good as a fully-eager bottom-up
137 * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
138 * the common case that everything fits into L1.
139 *
140 *
141 * The merging is controlled by "count", the number of elements in the
142 * pending lists. This is beautiully simple code, but rather subtle.
96 * 143 *
97 * The comparison function @cmp must return a negative value if @a 144 * Each time we increment "count", we set one bit (bit k) and clear
98 * should sort before @b, and a positive value if @a should sort after 145 * bits k-1 .. 0. Each time this happens (except the very first time
99 * @b. If @a and @b are equivalent, and their original relative 146 * for each bit, when count increments to 2^k), we merge two lists of
100 * ordering is to be preserved, @cmp must return 0. 147 * size 2^k into one list of size 2^(k+1).
148 *
149 * This merge happens exactly when the count reaches an odd multiple of
150 * 2^k, which is when we have 2^k elements pending in smaller lists,
151 * so it's safe to merge away two lists of size 2^k.
152 *
153 * After this happens twice, we have created two lists of size 2^(k+1),
154 * which will be merged into a list of size 2^(k+2) before we create
155 * a third list of size 2^(k+1), so there are never more than two pending.
156 *
157 * The number of pending lists of size 2^k is determined by the
158 * state of bit k of "count" plus two extra pieces of information:
159 * - The state of bit k-1 (when k == 0, consider bit -1 always set), and
160 * - Whether the higher-order bits are zero or non-zero (i.e.
161 * is count >= 2^(k+1)).
162 * There are six states we distinguish. "x" represents some arbitrary
163 * bits, and "y" represents some arbitrary non-zero bits:
164 * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k
165 * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
166 * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k
167 * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
168 * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k
169 * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
170 * (merge and loop back to state 2)
171 *
172 * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
173 * bit k-1 is set while the more significant bits are non-zero) and
174 * merge them away in the 5->2 transition. Note in particular that just
175 * before the 5->2 transition, all lower-order bits are 11 (state 3),
176 * so there is one list of each smaller size.
177 *
178 * When we reach the end of the input, we merge all the pending
179 * lists, from smallest to largest. If you work through cases 2 to
180 * 5 above, you can see that the number of elements we merge with a list
181 * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
182 * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
101 */ 183 */
184__attribute__((nonnull(2,3)))
102void list_sort(void *priv, struct list_head *head, 185void list_sort(void *priv, struct list_head *head,
103 int (*cmp)(void *priv, struct list_head *a, 186 int (*cmp)(void *priv, struct list_head *a,
104 struct list_head *b)) 187 struct list_head *b))
105{ 188{
106 struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists 189 struct list_head *list = head->next, *pending = NULL;
107 -- last slot is a sentinel */ 190 size_t count = 0; /* Count of pending */
108 int lev; /* index into part[] */
109 int max_lev = 0;
110 struct list_head *list;
111 191
112 if (list_empty(head)) 192 if (list == head->prev) /* Zero or one elements */
113 return; 193 return;
114 194
115 memset(part, 0, sizeof(part)); 195 /* Convert to a null-terminated singly-linked list. */
116
117 head->prev->next = NULL; 196 head->prev->next = NULL;
118 list = head->next;
119
120 while (list) {
121 struct list_head *cur = list;
122 list = list->next;
123 cur->next = NULL;
124 197
125 for (lev = 0; part[lev]; lev++) { 198 /*
126 cur = merge(priv, cmp, part[lev], cur); 199 * Data structure invariants:
127 part[lev] = NULL; 200 * - All lists are singly linked and null-terminated; prev
128 } 201 * pointers are not maintained.
129 if (lev > max_lev) { 202 * - pending is a prev-linked "list of lists" of sorted
130 if (unlikely(lev >= ARRAY_SIZE(part)-1)) { 203 * sublists awaiting further merging.
131 printk_once(KERN_DEBUG "list too long for efficiency\n"); 204 * - Each of the sorted sublists is power-of-two in size.
132 lev--; 205 * - Sublists are sorted by size and age, smallest & newest at front.
133 } 206 * - There are zero to two sublists of each size.
134 max_lev = lev; 207 * - A pair of pending sublists are merged as soon as the number
208 * of following pending elements equals their size (i.e.
209 * each time count reaches an odd multiple of that size).
210 * That ensures each later final merge will be at worst 2:1.
211 * - Each round consists of:
212 * - Merging the two sublists selected by the highest bit
213 * which flips when count is incremented, and
214 * - Adding an element from the input as a size-1 sublist.
215 */
216 do {
217 size_t bits;
218 struct list_head **tail = &pending;
219
220 /* Find the least-significant clear bit in count */
221 for (bits = count; bits & 1; bits >>= 1)
222 tail = &(*tail)->prev;
223 /* Do the indicated merge */
224 if (likely(bits)) {
225 struct list_head *a = *tail, *b = a->prev;
226
227 a = merge(priv, (cmp_func)cmp, b, a);
228 /* Install the merged result in place of the inputs */
229 a->prev = b->prev;
230 *tail = a;
135 } 231 }
136 part[lev] = cur;
137 }
138 232
139 for (lev = 0; lev < max_lev; lev++) 233 /* Move one element from input list to pending */
140 if (part[lev]) 234 list->prev = pending;
141 list = merge(priv, cmp, part[lev], list); 235 pending = list;
142 236 list = list->next;
143 merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); 237 pending->next = NULL;
238 count++;
239 } while (list);
240
241 /* End of input; merge together all the pending lists. */
242 list = pending;
243 pending = pending->prev;
244 for (;;) {
245 struct list_head *next = pending->prev;
246
247 if (!next)
248 break;
249 list = merge(priv, (cmp_func)cmp, pending, list);
250 pending = next;
251 }
252 /* The final merge, rebuilding prev links */
253 merge_final(priv, (cmp_func)cmp, head, pending, list);
144} 254}
145EXPORT_SYMBOL(list_sort); 255EXPORT_SYMBOL(list_sort);
diff --git a/lib/math/Kconfig b/lib/math/Kconfig
new file mode 100644
index 000000000000..73bdf37178d1
--- /dev/null
+++ b/lib/math/Kconfig
@@ -0,0 +1,11 @@
1config CORDIC
2 tristate "CORDIC algorithm"
3 help
4 This option provides an implementation of the CORDIC algorithm;
5 calculations are in fixed point. Module will be called cordic.
6
7config PRIME_NUMBERS
8 tristate
9
10config RATIONAL
11 bool
diff --git a/lib/math/Makefile b/lib/math/Makefile
new file mode 100644
index 000000000000..583bbfebfc09
--- /dev/null
+++ b/lib/math/Makefile
@@ -0,0 +1,5 @@
1obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o
2
3obj-$(CONFIG_CORDIC) += cordic.o
4obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
5obj-$(CONFIG_RATIONAL) += rational.o
diff --git a/lib/cordic.c b/lib/math/cordic.c
index 8ef27c12956f..8ef27c12956f 100644
--- a/lib/cordic.c
+++ b/lib/math/cordic.c
diff --git a/lib/div64.c b/lib/math/div64.c
index ee146bb4c558..368ca7fd0d82 100644
--- a/lib/div64.c
+++ b/lib/math/div64.c
@@ -10,7 +10,7 @@
10 * Generic C version of 64bit/32bit division and modulo, with 10 * Generic C version of 64bit/32bit division and modulo, with
11 * 64bit result and 32bit remainder. 11 * 64bit result and 32bit remainder.
12 * 12 *
13 * The fast case for (n>>32 == 0) is handled inline by do_div(). 13 * The fast case for (n>>32 == 0) is handled inline by do_div().
14 * 14 *
15 * Code generated for this function might be very inefficient 15 * Code generated for this function might be very inefficient
16 * for some CPUs. __div64_32() can be overridden by linking arch-specific 16 * for some CPUs. __div64_32() can be overridden by linking arch-specific
diff --git a/lib/gcd.c b/lib/math/gcd.c
index 7948ab27f0a4..7948ab27f0a4 100644
--- a/lib/gcd.c
+++ b/lib/math/gcd.c
diff --git a/lib/math/int_pow.c b/lib/math/int_pow.c
new file mode 100644
index 000000000000..622fc1ab3c74
--- /dev/null
+++ b/lib/math/int_pow.c
@@ -0,0 +1,32 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * An integer based power function
4 *
5 * Derived from drivers/video/backlight/pwm_bl.c
6 */
7
8#include <linux/export.h>
9#include <linux/kernel.h>
10#include <linux/types.h>
11
12/**
13 * int_pow - computes the exponentiation of the given base and exponent
14 * @base: base which will be raised to the given power
15 * @exp: power to be raised to
16 *
17 * Computes: pow(base, exp), i.e. @base raised to the @exp power
18 */
19u64 int_pow(u64 base, unsigned int exp)
20{
21 u64 result = 1;
22
23 while (exp) {
24 if (exp & 1)
25 result *= base;
26 exp >>= 1;
27 base *= base;
28 }
29
30 return result;
31}
32EXPORT_SYMBOL_GPL(int_pow);
diff --git a/lib/int_sqrt.c b/lib/math/int_sqrt.c
index 30e0f9770f88..30e0f9770f88 100644
--- a/lib/int_sqrt.c
+++ b/lib/math/int_sqrt.c
diff --git a/lib/lcm.c b/lib/math/lcm.c
index 03d7fcb420b5..03d7fcb420b5 100644
--- a/lib/lcm.c
+++ b/lib/math/lcm.c
diff --git a/lib/prime_numbers.c b/lib/math/prime_numbers.c
index 550eec457c2e..550eec457c2e 100644
--- a/lib/prime_numbers.c
+++ b/lib/math/prime_numbers.c
diff --git a/lib/rational.c b/lib/math/rational.c
index ba7443677c90..ba7443677c90 100644
--- a/lib/rational.c
+++ b/lib/math/rational.c
diff --git a/lib/reciprocal_div.c b/lib/math/reciprocal_div.c
index bf043258fa00..bf043258fa00 100644
--- a/lib/reciprocal_div.c
+++ b/lib/math/reciprocal_div.c
diff --git a/lib/plist.c b/lib/plist.c
index 199408f91057..d3bd8827186f 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -26,7 +26,7 @@
26#include <linux/bug.h> 26#include <linux/bug.h>
27#include <linux/plist.h> 27#include <linux/plist.h>
28 28
29#ifdef CONFIG_DEBUG_PI_LIST 29#ifdef CONFIG_DEBUG_PLIST
30 30
31static struct plist_head test_head; 31static struct plist_head test_head;
32 32
@@ -173,7 +173,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head)
173 plist_check_head(head); 173 plist_check_head(head);
174} 174}
175 175
176#ifdef CONFIG_DEBUG_PI_LIST 176#ifdef CONFIG_DEBUG_PLIST
177#include <linux/sched.h> 177#include <linux/sched.h>
178#include <linux/sched/clock.h> 178#include <linux/sched/clock.h>
179#include <linux/module.h> 179#include <linux/module.h>
diff --git a/lib/sort.c b/lib/sort.c
index d6b7a202b0b6..50855ea8c262 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -1,8 +1,13 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * A fast, small, non-recursive O(nlog n) sort for the Linux kernel 3 * A fast, small, non-recursive O(n log n) sort for the Linux kernel
4 * 4 *
5 * Jan 23 2005 Matt Mackall <mpm@selenic.com> 5 * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
6 * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
7 *
8 * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
9 * better) at the expense of stack usage and much larger code to avoid
10 * quicksort's O(n^2) worst case.
6 */ 11 */
7 12
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -11,35 +16,155 @@
11#include <linux/export.h> 16#include <linux/export.h>
12#include <linux/sort.h> 17#include <linux/sort.h>
13 18
14static int alignment_ok(const void *base, int align) 19/**
20 * is_aligned - is this pointer & size okay for word-wide copying?
21 * @base: pointer to data
22 * @size: size of each element
23 * @align: required alignment (typically 4 or 8)
24 *
25 * Returns true if elements can be copied using word loads and stores.
26 * The size must be a multiple of the alignment, and the base address must
27 * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
28 *
29 * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
30 * to "if ((a | b) & mask)", so we do that by hand.
31 */
32__attribute_const__ __always_inline
33static bool is_aligned(const void *base, size_t size, unsigned char align)
15{ 34{
16 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || 35 unsigned char lsbits = (unsigned char)size;
17 ((unsigned long)base & (align - 1)) == 0; 36
37 (void)base;
38#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
39 lsbits |= (unsigned char)(uintptr_t)base;
40#endif
41 return (lsbits & (align - 1)) == 0;
18} 42}
19 43
20static void u32_swap(void *a, void *b, int size) 44/**
45 * swap_words_32 - swap two elements in 32-bit chunks
46 * @a, @b: pointers to the elements
47 * @size: element size (must be a multiple of 4)
48 *
49 * Exchange the two objects in memory. This exploits base+index addressing,
50 * which basically all CPUs have, to minimize loop overhead computations.
51 *
52 * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
53 * bottom of the loop, even though the zero flag is stil valid from the
54 * subtract (since the intervening mov instructions don't alter the flags).
55 * Gcc 8.1.0 doesn't have that problem.
56 */
57static void swap_words_32(void *a, void *b, size_t n)
21{ 58{
22 u32 t = *(u32 *)a; 59 do {
23 *(u32 *)a = *(u32 *)b; 60 u32 t = *(u32 *)(a + (n -= 4));
24 *(u32 *)b = t; 61 *(u32 *)(a + n) = *(u32 *)(b + n);
62 *(u32 *)(b + n) = t;
63 } while (n);
25} 64}
26 65
27static void u64_swap(void *a, void *b, int size) 66/**
67 * swap_words_64 - swap two elements in 64-bit chunks
68 * @a, @b: pointers to the elements
69 * @size: element size (must be a multiple of 8)
70 *
71 * Exchange the two objects in memory. This exploits base+index
72 * addressing, which basically all CPUs have, to minimize loop overhead
73 * computations.
74 *
75 * We'd like to use 64-bit loads if possible. If they're not, emulating
76 * one requires base+index+4 addressing which x86 has but most other
77 * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
78 * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
79 * x32 ABI). Are there any cases the kernel needs to worry about?
80 */
81static void swap_words_64(void *a, void *b, size_t n)
28{ 82{
29 u64 t = *(u64 *)a; 83 do {
30 *(u64 *)a = *(u64 *)b; 84#ifdef CONFIG_64BIT
31 *(u64 *)b = t; 85 u64 t = *(u64 *)(a + (n -= 8));
86 *(u64 *)(a + n) = *(u64 *)(b + n);
87 *(u64 *)(b + n) = t;
88#else
89 /* Use two 32-bit transfers to avoid base+index+4 addressing */
90 u32 t = *(u32 *)(a + (n -= 4));
91 *(u32 *)(a + n) = *(u32 *)(b + n);
92 *(u32 *)(b + n) = t;
93
94 t = *(u32 *)(a + (n -= 4));
95 *(u32 *)(a + n) = *(u32 *)(b + n);
96 *(u32 *)(b + n) = t;
97#endif
98 } while (n);
32} 99}
33 100
34static void generic_swap(void *a, void *b, int size) 101/**
102 * swap_bytes - swap two elements a byte at a time
103 * @a, @b: pointers to the elements
104 * @size: element size
105 *
106 * This is the fallback if alignment doesn't allow using larger chunks.
107 */
108static void swap_bytes(void *a, void *b, size_t n)
35{ 109{
36 char t;
37
38 do { 110 do {
39 t = *(char *)a; 111 char t = ((char *)a)[--n];
40 *(char *)a++ = *(char *)b; 112 ((char *)a)[n] = ((char *)b)[n];
41 *(char *)b++ = t; 113 ((char *)b)[n] = t;
42 } while (--size > 0); 114 } while (n);
115}
116
117typedef void (*swap_func_t)(void *a, void *b, int size);
118
119/*
120 * The values are arbitrary as long as they can't be confused with
121 * a pointer, but small integers make for the smallest compare
122 * instructions.
123 */
124#define SWAP_WORDS_64 (swap_func_t)0
125#define SWAP_WORDS_32 (swap_func_t)1
126#define SWAP_BYTES (swap_func_t)2
127
128/*
129 * The function pointer is last to make tail calls most efficient if the
130 * compiler decides not to inline this function.
131 */
132static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
133{
134 if (swap_func == SWAP_WORDS_64)
135 swap_words_64(a, b, size);
136 else if (swap_func == SWAP_WORDS_32)
137 swap_words_32(a, b, size);
138 else if (swap_func == SWAP_BYTES)
139 swap_bytes(a, b, size);
140 else
141 swap_func(a, b, (int)size);
142}
143
144/**
145 * parent - given the offset of the child, find the offset of the parent.
146 * @i: the offset of the heap element whose parent is sought. Non-zero.
147 * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
148 * @size: size of each element
149 *
150 * In terms of array indexes, the parent of element j = @i/@size is simply
151 * (j-1)/2. But when working in byte offsets, we can't use implicit
152 * truncation of integer divides.
153 *
154 * Fortunately, we only need one bit of the quotient, not the full divide.
155 * @size has a least significant bit. That bit will be clear if @i is
156 * an even multiple of @size, and set if it's an odd multiple.
157 *
158 * Logically, we're doing "if (i & lsbit) i -= size;", but since the
159 * branch is unpredictable, it's done with a bit of clever branch-free
160 * code instead.
161 */
162__attribute_const__ __always_inline
163static size_t parent(size_t i, unsigned int lsbit, size_t size)
164{
165 i -= size;
166 i -= size & -(i & lsbit);
167 return i / 2;
43} 168}
44 169
45/** 170/**
@@ -50,57 +175,78 @@ static void generic_swap(void *a, void *b, int size)
50 * @cmp_func: pointer to comparison function 175 * @cmp_func: pointer to comparison function
51 * @swap_func: pointer to swap function or NULL 176 * @swap_func: pointer to swap function or NULL
52 * 177 *
53 * This function does a heapsort on the given array. You may provide a 178 * This function does a heapsort on the given array. You may provide
54 * swap_func function optimized to your element type. 179 * a swap_func function if you need to do something more than a memory
180 * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
181 * avoids a slow retpoline and so is significantly faster.
55 * 182 *
56 * Sorting time is O(n log n) both on average and worst-case. While 183 * Sorting time is O(n log n) both on average and worst-case. While
57 * qsort is about 20% faster on average, it suffers from exploitable 184 * quicksort is slightly faster on average, it suffers from exploitable
58 * O(n*n) worst-case behavior and extra memory requirements that make 185 * O(n*n) worst-case behavior and extra memory requirements that make
59 * it less suitable for kernel use. 186 * it less suitable for kernel use.
60 */ 187 */
61
62void sort(void *base, size_t num, size_t size, 188void sort(void *base, size_t num, size_t size,
63 int (*cmp_func)(const void *, const void *), 189 int (*cmp_func)(const void *, const void *),
64 void (*swap_func)(void *, void *, int size)) 190 void (*swap_func)(void *, void *, int size))
65{ 191{
66 /* pre-scale counters for performance */ 192 /* pre-scale counters for performance */
67 int i = (num/2 - 1) * size, n = num * size, c, r; 193 size_t n = num * size, a = (num/2) * size;
194 const unsigned int lsbit = size & -size; /* Used to find parent */
195
196 if (!a) /* num < 2 || size == 0 */
197 return;
68 198
69 if (!swap_func) { 199 if (!swap_func) {
70 if (size == 4 && alignment_ok(base, 4)) 200 if (is_aligned(base, size, 8))
71 swap_func = u32_swap; 201 swap_func = SWAP_WORDS_64;
72 else if (size == 8 && alignment_ok(base, 8)) 202 else if (is_aligned(base, size, 4))
73 swap_func = u64_swap; 203 swap_func = SWAP_WORDS_32;
74 else 204 else
75 swap_func = generic_swap; 205 swap_func = SWAP_BYTES;
76 } 206 }
77 207
78 /* heapify */ 208 /*
79 for ( ; i >= 0; i -= size) { 209 * Loop invariants:
80 for (r = i; r * 2 + size < n; r = c) { 210 * 1. elements [a,n) satisfy the heap property (compare greater than
81 c = r * 2 + size; 211 * all of their children),
82 if (c < n - size && 212 * 2. elements [n,num*size) are sorted, and
83 cmp_func(base + c, base + c + size) < 0) 213 * 3. a <= b <= c <= d <= n (whenever they are valid).
84 c += size; 214 */
85 if (cmp_func(base + r, base + c) >= 0) 215 for (;;) {
86 break; 216 size_t b, c, d;
87 swap_func(base + r, base + c, size); 217
88 } 218 if (a) /* Building heap: sift down --a */
89 } 219 a -= size;
220 else if (n -= size) /* Sorting: Extract root to --n */
221 do_swap(base, base + n, size, swap_func);
222 else /* Sort complete */
223 break;
90 224
91 /* sort */ 225 /*
92 for (i = n - size; i > 0; i -= size) { 226 * Sift element at "a" down into heap. This is the
93 swap_func(base, base + i, size); 227 * "bottom-up" variant, which significantly reduces
94 for (r = 0; r * 2 + size < i; r = c) { 228 * calls to cmp_func(): we find the sift-down path all
95 c = r * 2 + size; 229 * the way to the leaves (one compare per level), then
96 if (c < i - size && 230 * backtrack to find where to insert the target element.
97 cmp_func(base + c, base + c + size) < 0) 231 *
98 c += size; 232 * Because elements tend to sift down close to the leaves,
99 if (cmp_func(base + r, base + c) >= 0) 233 * this uses fewer compares than doing two per level
100 break; 234 * on the way down. (A bit more than half as many on
101 swap_func(base + r, base + c, size); 235 * average, 3/4 worst-case.)
236 */
237 for (b = a; c = 2*b + size, (d = c + size) < n;)
238 b = cmp_func(base + c, base + d) >= 0 ? c : d;
239 if (d == n) /* Special case last leaf with no sibling */
240 b = c;
241
242 /* Now backtrack from "b" to the correct location for "a" */
243 while (b != a && cmp_func(base + a, base + b) >= 0)
244 b = parent(b, lsbit, size);
245 c = b; /* Where "a" belongs */
246 while (b != a) { /* Shift it into place */
247 b = parent(b, lsbit, size);
248 do_swap(base + b, base + c, size, swap_func);
102 } 249 }
103 } 250 }
104} 251}
105
106EXPORT_SYMBOL(sort); 252EXPORT_SYMBOL(sort);
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 792d90608052..d3a501f2a81a 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -11,6 +11,7 @@
11#include <linux/printk.h> 11#include <linux/printk.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/uaccess.h>
14 15
15#include "../tools/testing/selftests/kselftest_module.h" 16#include "../tools/testing/selftests/kselftest_module.h"
16 17
@@ -226,7 +227,8 @@ static const unsigned long exp[] __initconst = {
226 BITMAP_FROM_U64(0xffffffff), 227 BITMAP_FROM_U64(0xffffffff),
227 BITMAP_FROM_U64(0xfffffffe), 228 BITMAP_FROM_U64(0xfffffffe),
228 BITMAP_FROM_U64(0x3333333311111111ULL), 229 BITMAP_FROM_U64(0x3333333311111111ULL),
229 BITMAP_FROM_U64(0xffffffff77777777ULL) 230 BITMAP_FROM_U64(0xffffffff77777777ULL),
231 BITMAP_FROM_U64(0),
230}; 232};
231 233
232static const unsigned long exp2[] __initconst = { 234static const unsigned long exp2[] __initconst = {
@@ -249,55 +251,93 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
249 {0, "1-31:4/4", &exp[9 * step], 32, 0}, 251 {0, "1-31:4/4", &exp[9 * step], 32, 0},
250 {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0}, 252 {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0},
251 {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0}, 253 {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0},
254 {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp[11 * step], 64, 0},
252 255
253 {0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0}, 256 {0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0},
254 257
255 {0, "0-2047:128/256", NULL, 2048, PARSE_TIME}, 258 {0, "0-2047:128/256", NULL, 2048, PARSE_TIME},
256 259
260 {0, "", &exp[12 * step], 8, 0},
261 {0, "\n", &exp[12 * step], 8, 0},
262 {0, ",, ,, , , ,", &exp[12 * step], 8, 0},
263 {0, " , ,, , , ", &exp[12 * step], 8, 0},
264 {0, " , ,, , , \n", &exp[12 * step], 8, 0},
265
257 {-EINVAL, "-1", NULL, 8, 0}, 266 {-EINVAL, "-1", NULL, 8, 0},
258 {-EINVAL, "-0", NULL, 8, 0}, 267 {-EINVAL, "-0", NULL, 8, 0},
259 {-EINVAL, "10-1", NULL, 8, 0}, 268 {-EINVAL, "10-1", NULL, 8, 0},
260 {-EINVAL, "0-31:", NULL, 8, 0}, 269 {-EINVAL, "0-31:", NULL, 8, 0},
261 {-EINVAL, "0-31:0", NULL, 8, 0}, 270 {-EINVAL, "0-31:0", NULL, 8, 0},
271 {-EINVAL, "0-31:0/", NULL, 8, 0},
262 {-EINVAL, "0-31:0/0", NULL, 8, 0}, 272 {-EINVAL, "0-31:0/0", NULL, 8, 0},
263 {-EINVAL, "0-31:1/0", NULL, 8, 0}, 273 {-EINVAL, "0-31:1/0", NULL, 8, 0},
264 {-EINVAL, "0-31:10/1", NULL, 8, 0}, 274 {-EINVAL, "0-31:10/1", NULL, 8, 0},
275 {-EOVERFLOW, "0-98765432123456789:10/1", NULL, 8, 0},
276
277 {-EINVAL, "a-31", NULL, 8, 0},
278 {-EINVAL, "0-a1", NULL, 8, 0},
279 {-EINVAL, "a-31:10/1", NULL, 8, 0},
280 {-EINVAL, "0-31:a/1", NULL, 8, 0},
281 {-EINVAL, "0-\n", NULL, 8, 0},
265}; 282};
266 283
267static void __init test_bitmap_parselist(void) 284static void __init __test_bitmap_parselist(int is_user)
268{ 285{
269 int i; 286 int i;
270 int err; 287 int err;
271 cycles_t cycles; 288 ktime_t time;
272 DECLARE_BITMAP(bmap, 2048); 289 DECLARE_BITMAP(bmap, 2048);
290 char *mode = is_user ? "_user" : "";
273 291
274 for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { 292 for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
275#define ptest parselist_tests[i] 293#define ptest parselist_tests[i]
276 294
277 cycles = get_cycles(); 295 if (is_user) {
278 err = bitmap_parselist(ptest.in, bmap, ptest.nbits); 296 mm_segment_t orig_fs = get_fs();
279 cycles = get_cycles() - cycles; 297 size_t len = strlen(ptest.in);
298
299 set_fs(KERNEL_DS);
300 time = ktime_get();
301 err = bitmap_parselist_user(ptest.in, len,
302 bmap, ptest.nbits);
303 time = ktime_get() - time;
304 set_fs(orig_fs);
305 } else {
306 time = ktime_get();
307 err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
308 time = ktime_get() - time;
309 }
280 310
281 if (err != ptest.errno) { 311 if (err != ptest.errno) {
282 pr_err("test %d: input is %s, errno is %d, expected %d\n", 312 pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
283 i, ptest.in, err, ptest.errno); 313 mode, i, ptest.in, err, ptest.errno);
284 continue; 314 continue;
285 } 315 }
286 316
287 if (!err && ptest.expected 317 if (!err && ptest.expected
288 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) { 318 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
289 pr_err("test %d: input is %s, result is 0x%lx, expected 0x%lx\n", 319 pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
290 i, ptest.in, bmap[0], *ptest.expected); 320 mode, i, ptest.in, bmap[0],
321 *ptest.expected);
291 continue; 322 continue;
292 } 323 }
293 324
294 if (ptest.flags & PARSE_TIME) 325 if (ptest.flags & PARSE_TIME)
295 pr_err("test %d: input is '%s' OK, Time: %llu\n", 326 pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
296 i, ptest.in, 327 mode, i, ptest.in, time);
297 (unsigned long long)cycles);
298 } 328 }
299} 329}
300 330
331static void __init test_bitmap_parselist(void)
332{
333 __test_bitmap_parselist(0);
334}
335
336static void __init test_bitmap_parselist_user(void)
337{
338 __test_bitmap_parselist(1);
339}
340
301#define EXP_BYTES (sizeof(exp) * 8) 341#define EXP_BYTES (sizeof(exp) * 8)
302 342
303static void __init test_bitmap_arr32(void) 343static void __init test_bitmap_arr32(void)
@@ -370,6 +410,7 @@ static void __init selftest(void)
370 test_copy(); 410 test_copy();
371 test_bitmap_arr32(); 411 test_bitmap_arr32();
372 test_bitmap_parselist(); 412 test_bitmap_parselist();
413 test_bitmap_parselist_user();
373 test_mem_optimisations(); 414 test_mem_optimisations();
374} 415}
375 416
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 3dd801c1c85b..566dad3f4196 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -47,6 +47,9 @@ struct test_sysctl_data {
47 unsigned int uint_0001; 47 unsigned int uint_0001;
48 48
49 char string_0001[65]; 49 char string_0001[65];
50
51#define SYSCTL_TEST_BITMAP_SIZE 65536
52 unsigned long *bitmap_0001;
50}; 53};
51 54
52static struct test_sysctl_data test_data = { 55static struct test_sysctl_data test_data = {
@@ -102,6 +105,13 @@ static struct ctl_table test_table[] = {
102 .mode = 0644, 105 .mode = 0644,
103 .proc_handler = proc_dostring, 106 .proc_handler = proc_dostring,
104 }, 107 },
108 {
109 .procname = "bitmap_0001",
110 .data = &test_data.bitmap_0001,
111 .maxlen = SYSCTL_TEST_BITMAP_SIZE,
112 .mode = 0644,
113 .proc_handler = proc_do_large_bitmap,
114 },
105 { } 115 { }
106}; 116};
107 117
@@ -129,15 +139,21 @@ static struct ctl_table_header *test_sysctl_header;
129 139
130static int __init test_sysctl_init(void) 140static int __init test_sysctl_init(void)
131{ 141{
142 test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
143 if (!test_data.bitmap_0001)
144 return -ENOMEM;
132 test_sysctl_header = register_sysctl_table(test_sysctl_root_table); 145 test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
133 if (!test_sysctl_header) 146 if (!test_sysctl_header) {
147 kfree(test_data.bitmap_0001);
134 return -ENOMEM; 148 return -ENOMEM;
149 }
135 return 0; 150 return 0;
136} 151}
137late_initcall(test_sysctl_init); 152late_initcall(test_sysctl_init);
138 153
139static void __exit test_sysctl_exit(void) 154static void __exit test_sysctl_exit(void)
140{ 155{
156 kfree(test_data.bitmap_0001);
141 if (test_sysctl_header) 157 if (test_sysctl_header)
142 unregister_sysctl_table(test_sysctl_header); 158 unregister_sysctl_table(test_sysctl_header);
143} 159}
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index f832b095afba..8bbefcaddfe8 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -384,12 +384,11 @@ static int test_func(void *private)
384{ 384{
385 struct test_driver *t = private; 385 struct test_driver *t = private;
386 int random_array[ARRAY_SIZE(test_case_array)]; 386 int random_array[ARRAY_SIZE(test_case_array)];
387 int index, i, j, ret; 387 int index, i, j;
388 ktime_t kt; 388 ktime_t kt;
389 u64 delta; 389 u64 delta;
390 390
391 ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu)); 391 if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0)
392 if (ret < 0)
393 pr_err("Failed to set affinity to %d CPU\n", t->cpu); 392 pr_err("Failed to set affinity to %d CPU\n", t->cpu);
394 393
395 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 394 for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
@@ -415,8 +414,7 @@ static int test_func(void *private)
415 414
416 kt = ktime_get(); 415 kt = ktime_get();
417 for (j = 0; j < test_repeat_count; j++) { 416 for (j = 0; j < test_repeat_count; j++) {
418 ret = test_case_array[index].test_func(); 417 if (!test_case_array[index].test_func())
419 if (!ret)
420 per_cpu_test_data[t->cpu][index].test_passed++; 418 per_cpu_test_data[t->cpu][index].test_passed++;
421 else 419 else
422 per_cpu_test_data[t->cpu][index].test_failed++; 420 per_cpu_test_data[t->cpu][index].test_failed++;
diff --git a/mm/Makefile b/mm/Makefile
index d210cc9d6f80..ac5e5ba78874 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,7 +33,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o
33endif 33endif
34 34
35obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ 35obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
36 maccess.o page_alloc.o page-writeback.o \ 36 maccess.o page-writeback.o \
37 readahead.o swap.o truncate.o vmscan.o shmem.o \ 37 readahead.o swap.o truncate.o vmscan.o shmem.o \
38 util.o mmzone.o vmstat.o backing-dev.o \ 38 util.o mmzone.o vmstat.o backing-dev.o \
39 mm_init.o mmu_context.o percpu.o slab_common.o \ 39 mm_init.o mmu_context.o percpu.o slab_common.o \
@@ -41,6 +41,11 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
41 interval_tree.o list_lru.o workingset.o \ 41 interval_tree.o list_lru.o workingset.o \
42 debug.o $(mmu-y) 42 debug.o $(mmu-y)
43 43
44# Give 'page_alloc' its own module-parameter namespace
45page-alloc-y := page_alloc.o
46page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
47
48obj-y += page-alloc.o
44obj-y += init-mm.o 49obj-y += init-mm.o
45obj-y += memblock.o 50obj-y += memblock.o
46 51
diff --git a/mm/compaction.c b/mm/compaction.c
index 6cc4bea33dcb..cbac7277978a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1888,13 +1888,13 @@ static enum compact_result __compact_finished(struct compact_control *cc)
1888 bool can_steal; 1888 bool can_steal;
1889 1889
1890 /* Job done if page is free of the right migratetype */ 1890 /* Job done if page is free of the right migratetype */
1891 if (!list_empty(&area->free_list[migratetype])) 1891 if (!free_area_empty(area, migratetype))
1892 return COMPACT_SUCCESS; 1892 return COMPACT_SUCCESS;
1893 1893
1894#ifdef CONFIG_CMA 1894#ifdef CONFIG_CMA
1895 /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ 1895 /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
1896 if (migratetype == MIGRATE_MOVABLE && 1896 if (migratetype == MIGRATE_MOVABLE &&
1897 !list_empty(&area->free_list[MIGRATE_CMA])) 1897 !free_area_empty(area, MIGRATE_CMA))
1898 return COMPACT_SUCCESS; 1898 return COMPACT_SUCCESS;
1899#endif 1899#endif
1900 /* 1900 /*
diff --git a/mm/debug.c b/mm/debug.c
index eee9c221280c..8345bb6e4769 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -67,7 +67,7 @@ void __dump_page(struct page *page, const char *reason)
67 */ 67 */
68 mapcount = PageSlab(page) ? 0 : page_mapcount(page); 68 mapcount = PageSlab(page) ? 0 : page_mapcount(page);
69 69
70 pr_warn("page:%px count:%d mapcount:%d mapping:%px index:%#lx", 70 pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx",
71 page, page_ref_count(page), mapcount, 71 page, page_ref_count(page), mapcount,
72 page->mapping, page_to_pgoff(page)); 72 page->mapping, page_to_pgoff(page));
73 if (PageCompound(page)) 73 if (PageCompound(page))
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 287933005e11..e50a2db5b4ff 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -687,10 +687,119 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
687 return mz; 687 return mz;
688} 688}
689 689
690static unsigned long memcg_sum_events(struct mem_cgroup *memcg, 690/**
691 int event) 691 * __mod_memcg_state - update cgroup memory statistics
692 * @memcg: the memory cgroup
693 * @idx: the stat item - can be enum memcg_stat_item or enum node_stat_item
694 * @val: delta to add to the counter, can be negative
695 */
696void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
697{
698 long x;
699
700 if (mem_cgroup_disabled())
701 return;
702
703 x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
704 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
705 struct mem_cgroup *mi;
706
707 atomic_long_add(x, &memcg->vmstats_local[idx]);
708 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
709 atomic_long_add(x, &mi->vmstats[idx]);
710 x = 0;
711 }
712 __this_cpu_write(memcg->vmstats_percpu->stat[idx], x);
713}
714
715static struct mem_cgroup_per_node *
716parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
717{
718 struct mem_cgroup *parent;
719
720 parent = parent_mem_cgroup(pn->memcg);
721 if (!parent)
722 return NULL;
723 return mem_cgroup_nodeinfo(parent, nid);
724}
725
726/**
727 * __mod_lruvec_state - update lruvec memory statistics
728 * @lruvec: the lruvec
729 * @idx: the stat item
730 * @val: delta to add to the counter, can be negative
731 *
732 * The lruvec is the intersection of the NUMA node and a cgroup. This
733 * function updates the all three counters that are affected by a
734 * change of state at this level: per-node, per-cgroup, per-lruvec.
735 */
736void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
737 int val)
692{ 738{
693 return atomic_long_read(&memcg->events[event]); 739 pg_data_t *pgdat = lruvec_pgdat(lruvec);
740 struct mem_cgroup_per_node *pn;
741 struct mem_cgroup *memcg;
742 long x;
743
744 /* Update node */
745 __mod_node_page_state(pgdat, idx, val);
746
747 if (mem_cgroup_disabled())
748 return;
749
750 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
751 memcg = pn->memcg;
752
753 /* Update memcg */
754 __mod_memcg_state(memcg, idx, val);
755
756 /* Update lruvec */
757 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
758 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
759 struct mem_cgroup_per_node *pi;
760
761 atomic_long_add(x, &pn->lruvec_stat_local[idx]);
762 for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
763 atomic_long_add(x, &pi->lruvec_stat[idx]);
764 x = 0;
765 }
766 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
767}
768
769/**
770 * __count_memcg_events - account VM events in a cgroup
771 * @memcg: the memory cgroup
772 * @idx: the event item
773 * @count: the number of events that occured
774 */
775void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
776 unsigned long count)
777{
778 unsigned long x;
779
780 if (mem_cgroup_disabled())
781 return;
782
783 x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
784 if (unlikely(x > MEMCG_CHARGE_BATCH)) {
785 struct mem_cgroup *mi;
786
787 atomic_long_add(x, &memcg->vmevents_local[idx]);
788 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
789 atomic_long_add(x, &mi->vmevents[idx]);
790 x = 0;
791 }
792 __this_cpu_write(memcg->vmstats_percpu->events[idx], x);
793}
794
795static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
796{
797 return atomic_long_read(&memcg->vmevents[event]);
798}
799
800static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
801{
802 return atomic_long_read(&memcg->vmevents_local[event]);
694} 803}
695 804
696static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, 805static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -722,7 +831,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
722 nr_pages = -nr_pages; /* for event */ 831 nr_pages = -nr_pages; /* for event */
723 } 832 }
724 833
725 __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages); 834 __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);
726} 835}
727 836
728static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, 837static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
@@ -730,8 +839,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
730{ 839{
731 unsigned long val, next; 840 unsigned long val, next;
732 841
733 val = __this_cpu_read(memcg->stat_cpu->nr_page_events); 842 val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events);
734 next = __this_cpu_read(memcg->stat_cpu->targets[target]); 843 next = __this_cpu_read(memcg->vmstats_percpu->targets[target]);
735 /* from time_after() in jiffies.h */ 844 /* from time_after() in jiffies.h */
736 if ((long)(next - val) < 0) { 845 if ((long)(next - val) < 0) {
737 switch (target) { 846 switch (target) {
@@ -747,7 +856,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
747 default: 856 default:
748 break; 857 break;
749 } 858 }
750 __this_cpu_write(memcg->stat_cpu->targets[target], next); 859 __this_cpu_write(memcg->vmstats_percpu->targets[target], next);
751 return true; 860 return true;
752 } 861 }
753 return false; 862 return false;
@@ -1325,12 +1434,14 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
1325 if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account) 1434 if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account)
1326 continue; 1435 continue;
1327 pr_cont(" %s:%luKB", memcg1_stat_names[i], 1436 pr_cont(" %s:%luKB", memcg1_stat_names[i],
1328 K(memcg_page_state(iter, memcg1_stats[i]))); 1437 K(memcg_page_state_local(iter,
1438 memcg1_stats[i])));
1329 } 1439 }
1330 1440
1331 for (i = 0; i < NR_LRU_LISTS; i++) 1441 for (i = 0; i < NR_LRU_LISTS; i++)
1332 pr_cont(" %s:%luKB", mem_cgroup_lru_names[i], 1442 pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
1333 K(memcg_page_state(iter, NR_LRU_BASE + i))); 1443 K(memcg_page_state_local(iter,
1444 NR_LRU_BASE + i)));
1334 1445
1335 pr_cont("\n"); 1446 pr_cont("\n");
1336 } 1447 }
@@ -2076,7 +2187,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
2076static int memcg_hotplug_cpu_dead(unsigned int cpu) 2187static int memcg_hotplug_cpu_dead(unsigned int cpu)
2077{ 2188{
2078 struct memcg_stock_pcp *stock; 2189 struct memcg_stock_pcp *stock;
2079 struct mem_cgroup *memcg; 2190 struct mem_cgroup *memcg, *mi;
2080 2191
2081 stock = &per_cpu(memcg_stock, cpu); 2192 stock = &per_cpu(memcg_stock, cpu);
2082 drain_stock(stock); 2193 drain_stock(stock);
@@ -2088,9 +2199,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
2088 int nid; 2199 int nid;
2089 long x; 2200 long x;
2090 2201
2091 x = this_cpu_xchg(memcg->stat_cpu->count[i], 0); 2202 x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
2092 if (x) 2203 if (x) {
2093 atomic_long_add(x, &memcg->stat[i]); 2204 atomic_long_add(x, &memcg->vmstats_local[i]);
2205 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
2206 atomic_long_add(x, &memcg->vmstats[i]);
2207 }
2094 2208
2095 if (i >= NR_VM_NODE_STAT_ITEMS) 2209 if (i >= NR_VM_NODE_STAT_ITEMS)
2096 continue; 2210 continue;
@@ -2100,17 +2214,24 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
2100 2214
2101 pn = mem_cgroup_nodeinfo(memcg, nid); 2215 pn = mem_cgroup_nodeinfo(memcg, nid);
2102 x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); 2216 x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
2103 if (x) 2217 if (x) {
2104 atomic_long_add(x, &pn->lruvec_stat[i]); 2218 atomic_long_add(x, &pn->lruvec_stat_local[i]);
2219 do {
2220 atomic_long_add(x, &pn->lruvec_stat[i]);
2221 } while ((pn = parent_nodeinfo(pn, nid)));
2222 }
2105 } 2223 }
2106 } 2224 }
2107 2225
2108 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 2226 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
2109 long x; 2227 long x;
2110 2228
2111 x = this_cpu_xchg(memcg->stat_cpu->events[i], 0); 2229 x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
2112 if (x) 2230 if (x) {
2113 atomic_long_add(x, &memcg->events[i]); 2231 atomic_long_add(x, &memcg->vmevents_local[i]);
2232 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
2233 atomic_long_add(x, &memcg->vmevents[i]);
2234 }
2114 } 2235 }
2115 } 2236 }
2116 2237
@@ -2940,50 +3061,15 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
2940 return retval; 3061 return retval;
2941} 3062}
2942 3063
2943struct accumulated_stats {
2944 unsigned long stat[MEMCG_NR_STAT];
2945 unsigned long events[NR_VM_EVENT_ITEMS];
2946 unsigned long lru_pages[NR_LRU_LISTS];
2947 const unsigned int *stats_array;
2948 const unsigned int *events_array;
2949 int stats_size;
2950 int events_size;
2951};
2952
2953static void accumulate_memcg_tree(struct mem_cgroup *memcg,
2954 struct accumulated_stats *acc)
2955{
2956 struct mem_cgroup *mi;
2957 int i;
2958
2959 for_each_mem_cgroup_tree(mi, memcg) {
2960 for (i = 0; i < acc->stats_size; i++)
2961 acc->stat[i] += memcg_page_state(mi,
2962 acc->stats_array ? acc->stats_array[i] : i);
2963
2964 for (i = 0; i < acc->events_size; i++)
2965 acc->events[i] += memcg_sum_events(mi,
2966 acc->events_array ? acc->events_array[i] : i);
2967
2968 for (i = 0; i < NR_LRU_LISTS; i++)
2969 acc->lru_pages[i] += memcg_page_state(mi,
2970 NR_LRU_BASE + i);
2971 }
2972}
2973
2974static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) 3064static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
2975{ 3065{
2976 unsigned long val = 0; 3066 unsigned long val;
2977 3067
2978 if (mem_cgroup_is_root(memcg)) { 3068 if (mem_cgroup_is_root(memcg)) {
2979 struct mem_cgroup *iter; 3069 val = memcg_page_state(memcg, MEMCG_CACHE) +
2980 3070 memcg_page_state(memcg, MEMCG_RSS);
2981 for_each_mem_cgroup_tree(iter, memcg) { 3071 if (swap)
2982 val += memcg_page_state(iter, MEMCG_CACHE); 3072 val += memcg_page_state(memcg, MEMCG_SWAP);
2983 val += memcg_page_state(iter, MEMCG_RSS);
2984 if (swap)
2985 val += memcg_page_state(iter, MEMCG_SWAP);
2986 }
2987 } else { 3073 } else {
2988 if (!swap) 3074 if (!swap)
2989 val = page_counter_read(&memcg->memory); 3075 val = page_counter_read(&memcg->memory);
@@ -3324,7 +3410,7 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
3324 for_each_lru(lru) { 3410 for_each_lru(lru) {
3325 if (!(BIT(lru) & lru_mask)) 3411 if (!(BIT(lru) & lru_mask))
3326 continue; 3412 continue;
3327 nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru); 3413 nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
3328 } 3414 }
3329 return nr; 3415 return nr;
3330} 3416}
@@ -3338,7 +3424,7 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
3338 for_each_lru(lru) { 3424 for_each_lru(lru) {
3339 if (!(BIT(lru) & lru_mask)) 3425 if (!(BIT(lru) & lru_mask))
3340 continue; 3426 continue;
3341 nr += memcg_page_state(memcg, NR_LRU_BASE + lru); 3427 nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru);
3342 } 3428 }
3343 return nr; 3429 return nr;
3344} 3430}
@@ -3414,7 +3500,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3414 unsigned long memory, memsw; 3500 unsigned long memory, memsw;
3415 struct mem_cgroup *mi; 3501 struct mem_cgroup *mi;
3416 unsigned int i; 3502 unsigned int i;
3417 struct accumulated_stats acc;
3418 3503
3419 BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); 3504 BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
3420 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); 3505 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
@@ -3423,17 +3508,17 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3423 if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) 3508 if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
3424 continue; 3509 continue;
3425 seq_printf(m, "%s %lu\n", memcg1_stat_names[i], 3510 seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
3426 memcg_page_state(memcg, memcg1_stats[i]) * 3511 memcg_page_state_local(memcg, memcg1_stats[i]) *
3427 PAGE_SIZE); 3512 PAGE_SIZE);
3428 } 3513 }
3429 3514
3430 for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) 3515 for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
3431 seq_printf(m, "%s %lu\n", memcg1_event_names[i], 3516 seq_printf(m, "%s %lu\n", memcg1_event_names[i],
3432 memcg_sum_events(memcg, memcg1_events[i])); 3517 memcg_events_local(memcg, memcg1_events[i]));
3433 3518
3434 for (i = 0; i < NR_LRU_LISTS; i++) 3519 for (i = 0; i < NR_LRU_LISTS; i++)
3435 seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], 3520 seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
3436 memcg_page_state(memcg, NR_LRU_BASE + i) * 3521 memcg_page_state_local(memcg, NR_LRU_BASE + i) *
3437 PAGE_SIZE); 3522 PAGE_SIZE);
3438 3523
3439 /* Hierarchical information */ 3524 /* Hierarchical information */
@@ -3448,27 +3533,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3448 seq_printf(m, "hierarchical_memsw_limit %llu\n", 3533 seq_printf(m, "hierarchical_memsw_limit %llu\n",
3449 (u64)memsw * PAGE_SIZE); 3534 (u64)memsw * PAGE_SIZE);
3450 3535
3451 memset(&acc, 0, sizeof(acc));
3452 acc.stats_size = ARRAY_SIZE(memcg1_stats);
3453 acc.stats_array = memcg1_stats;
3454 acc.events_size = ARRAY_SIZE(memcg1_events);
3455 acc.events_array = memcg1_events;
3456 accumulate_memcg_tree(memcg, &acc);
3457
3458 for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { 3536 for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
3459 if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) 3537 if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
3460 continue; 3538 continue;
3461 seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], 3539 seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
3462 (u64)acc.stat[i] * PAGE_SIZE); 3540 (u64)memcg_page_state(memcg, i) * PAGE_SIZE);
3463 } 3541 }
3464 3542
3465 for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) 3543 for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
3466 seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], 3544 seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
3467 (u64)acc.events[i]); 3545 (u64)memcg_events(memcg, i));
3468 3546
3469 for (i = 0; i < NR_LRU_LISTS; i++) 3547 for (i = 0; i < NR_LRU_LISTS; i++)
3470 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], 3548 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
3471 (u64)acc.lru_pages[i] * PAGE_SIZE); 3549 (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
3550 PAGE_SIZE);
3472 3551
3473#ifdef CONFIG_DEBUG_VM 3552#ifdef CONFIG_DEBUG_VM
3474 { 3553 {
@@ -3901,11 +3980,11 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
3901 */ 3980 */
3902static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) 3981static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
3903{ 3982{
3904 long x = atomic_long_read(&memcg->stat[idx]); 3983 long x = atomic_long_read(&memcg->vmstats[idx]);
3905 int cpu; 3984 int cpu;
3906 3985
3907 for_each_online_cpu(cpu) 3986 for_each_online_cpu(cpu)
3908 x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx]; 3987 x += per_cpu_ptr(memcg->vmstats_percpu, cpu)->stat[idx];
3909 if (x < 0) 3988 if (x < 0)
3910 x = 0; 3989 x = 0;
3911 return x; 3990 return x;
@@ -4445,7 +4524,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4445 4524
4446 for_each_node(node) 4525 for_each_node(node)
4447 free_mem_cgroup_per_node_info(memcg, node); 4526 free_mem_cgroup_per_node_info(memcg, node);
4448 free_percpu(memcg->stat_cpu); 4527 free_percpu(memcg->vmstats_percpu);
4449 kfree(memcg); 4528 kfree(memcg);
4450} 4529}
4451 4530
@@ -4474,8 +4553,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
4474 if (memcg->id.id < 0) 4553 if (memcg->id.id < 0)
4475 goto fail; 4554 goto fail;
4476 4555
4477 memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu); 4556 memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
4478 if (!memcg->stat_cpu) 4557 if (!memcg->vmstats_percpu)
4479 goto fail; 4558 goto fail;
4480 4559
4481 for_each_node(node) 4560 for_each_node(node)
@@ -5561,7 +5640,6 @@ static int memory_events_show(struct seq_file *m, void *v)
5561static int memory_stat_show(struct seq_file *m, void *v) 5640static int memory_stat_show(struct seq_file *m, void *v)
5562{ 5641{
5563 struct mem_cgroup *memcg = mem_cgroup_from_seq(m); 5642 struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
5564 struct accumulated_stats acc;
5565 int i; 5643 int i;
5566 5644
5567 /* 5645 /*
@@ -5575,31 +5653,27 @@ static int memory_stat_show(struct seq_file *m, void *v)
5575 * Current memory state: 5653 * Current memory state:
5576 */ 5654 */
5577 5655
5578 memset(&acc, 0, sizeof(acc));
5579 acc.stats_size = MEMCG_NR_STAT;
5580 acc.events_size = NR_VM_EVENT_ITEMS;
5581 accumulate_memcg_tree(memcg, &acc);
5582
5583 seq_printf(m, "anon %llu\n", 5656 seq_printf(m, "anon %llu\n",
5584 (u64)acc.stat[MEMCG_RSS] * PAGE_SIZE); 5657 (u64)memcg_page_state(memcg, MEMCG_RSS) * PAGE_SIZE);
5585 seq_printf(m, "file %llu\n", 5658 seq_printf(m, "file %llu\n",
5586 (u64)acc.stat[MEMCG_CACHE] * PAGE_SIZE); 5659 (u64)memcg_page_state(memcg, MEMCG_CACHE) * PAGE_SIZE);
5587 seq_printf(m, "kernel_stack %llu\n", 5660 seq_printf(m, "kernel_stack %llu\n",
5588 (u64)acc.stat[MEMCG_KERNEL_STACK_KB] * 1024); 5661 (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * 1024);
5589 seq_printf(m, "slab %llu\n", 5662 seq_printf(m, "slab %llu\n",
5590 (u64)(acc.stat[NR_SLAB_RECLAIMABLE] + 5663 (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) +
5591 acc.stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); 5664 memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) *
5665 PAGE_SIZE);
5592 seq_printf(m, "sock %llu\n", 5666 seq_printf(m, "sock %llu\n",
5593 (u64)acc.stat[MEMCG_SOCK] * PAGE_SIZE); 5667 (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE);
5594 5668
5595 seq_printf(m, "shmem %llu\n", 5669 seq_printf(m, "shmem %llu\n",
5596 (u64)acc.stat[NR_SHMEM] * PAGE_SIZE); 5670 (u64)memcg_page_state(memcg, NR_SHMEM) * PAGE_SIZE);
5597 seq_printf(m, "file_mapped %llu\n", 5671 seq_printf(m, "file_mapped %llu\n",
5598 (u64)acc.stat[NR_FILE_MAPPED] * PAGE_SIZE); 5672 (u64)memcg_page_state(memcg, NR_FILE_MAPPED) * PAGE_SIZE);
5599 seq_printf(m, "file_dirty %llu\n", 5673 seq_printf(m, "file_dirty %llu\n",
5600 (u64)acc.stat[NR_FILE_DIRTY] * PAGE_SIZE); 5674 (u64)memcg_page_state(memcg, NR_FILE_DIRTY) * PAGE_SIZE);
5601 seq_printf(m, "file_writeback %llu\n", 5675 seq_printf(m, "file_writeback %llu\n",
5602 (u64)acc.stat[NR_WRITEBACK] * PAGE_SIZE); 5676 (u64)memcg_page_state(memcg, NR_WRITEBACK) * PAGE_SIZE);
5603 5677
5604 /* 5678 /*
5605 * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter 5679 * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter
@@ -5608,43 +5682,47 @@ static int memory_stat_show(struct seq_file *m, void *v)
5608 * where the page->mem_cgroup is set up and stable. 5682 * where the page->mem_cgroup is set up and stable.
5609 */ 5683 */
5610 seq_printf(m, "anon_thp %llu\n", 5684 seq_printf(m, "anon_thp %llu\n",
5611 (u64)acc.stat[MEMCG_RSS_HUGE] * PAGE_SIZE); 5685 (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) * PAGE_SIZE);
5612 5686
5613 for (i = 0; i < NR_LRU_LISTS; i++) 5687 for (i = 0; i < NR_LRU_LISTS; i++)
5614 seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i], 5688 seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i],
5615 (u64)acc.lru_pages[i] * PAGE_SIZE); 5689 (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
5690 PAGE_SIZE);
5616 5691
5617 seq_printf(m, "slab_reclaimable %llu\n", 5692 seq_printf(m, "slab_reclaimable %llu\n",
5618 (u64)acc.stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE); 5693 (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) *
5694 PAGE_SIZE);
5619 seq_printf(m, "slab_unreclaimable %llu\n", 5695 seq_printf(m, "slab_unreclaimable %llu\n",
5620 (u64)acc.stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE); 5696 (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) *
5697 PAGE_SIZE);
5621 5698
5622 /* Accumulated memory events */ 5699 /* Accumulated memory events */
5623 5700
5624 seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]); 5701 seq_printf(m, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
5625 seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]); 5702 seq_printf(m, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
5626 5703
5627 seq_printf(m, "workingset_refault %lu\n", 5704 seq_printf(m, "workingset_refault %lu\n",
5628 acc.stat[WORKINGSET_REFAULT]); 5705 memcg_page_state(memcg, WORKINGSET_REFAULT));
5629 seq_printf(m, "workingset_activate %lu\n", 5706 seq_printf(m, "workingset_activate %lu\n",
5630 acc.stat[WORKINGSET_ACTIVATE]); 5707 memcg_page_state(memcg, WORKINGSET_ACTIVATE));
5631 seq_printf(m, "workingset_nodereclaim %lu\n", 5708 seq_printf(m, "workingset_nodereclaim %lu\n",
5632 acc.stat[WORKINGSET_NODERECLAIM]); 5709 memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
5633 5710
5634 seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]); 5711 seq_printf(m, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
5635 seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] + 5712 seq_printf(m, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) +
5636 acc.events[PGSCAN_DIRECT]); 5713 memcg_events(memcg, PGSCAN_DIRECT));
5637 seq_printf(m, "pgsteal %lu\n", acc.events[PGSTEAL_KSWAPD] + 5714 seq_printf(m, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) +
5638 acc.events[PGSTEAL_DIRECT]); 5715 memcg_events(memcg, PGSTEAL_DIRECT));
5639 seq_printf(m, "pgactivate %lu\n", acc.events[PGACTIVATE]); 5716 seq_printf(m, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
5640 seq_printf(m, "pgdeactivate %lu\n", acc.events[PGDEACTIVATE]); 5717 seq_printf(m, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
5641 seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]); 5718 seq_printf(m, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
5642 seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]); 5719 seq_printf(m, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
5643 5720
5644#ifdef CONFIG_TRANSPARENT_HUGEPAGE 5721#ifdef CONFIG_TRANSPARENT_HUGEPAGE
5645 seq_printf(m, "thp_fault_alloc %lu\n", acc.events[THP_FAULT_ALLOC]); 5722 seq_printf(m, "thp_fault_alloc %lu\n",
5723 memcg_events(memcg, THP_FAULT_ALLOC));
5646 seq_printf(m, "thp_collapse_alloc %lu\n", 5724 seq_printf(m, "thp_collapse_alloc %lu\n",
5647 acc.events[THP_COLLAPSE_ALLOC]); 5725 memcg_events(memcg, THP_COLLAPSE_ALLOC));
5648#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 5726#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
5649 5727
5650 return 0; 5728 return 0;
@@ -6080,7 +6158,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
6080 __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); 6158 __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
6081 __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); 6159 __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
6082 __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); 6160 __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
6083 __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); 6161 __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, nr_pages);
6084 memcg_check_events(ug->memcg, ug->dummy_page); 6162 memcg_check_events(ug->memcg, ug->dummy_page);
6085 local_irq_restore(flags); 6163 local_irq_restore(flags);
6086 6164
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6c0c4f48638e..328878b6799d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -39,6 +39,7 @@
39#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
40 40
41#include "internal.h" 41#include "internal.h"
42#include "shuffle.h"
42 43
43/* 44/*
44 * online_page_callback contains pointer to current page onlining function. 45 * online_page_callback contains pointer to current page onlining function.
@@ -891,6 +892,8 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
891 zone->zone_pgdat->node_present_pages += onlined_pages; 892 zone->zone_pgdat->node_present_pages += onlined_pages;
892 pgdat_resize_unlock(zone->zone_pgdat, &flags); 893 pgdat_resize_unlock(zone->zone_pgdat, &flags);
893 894
895 shuffle_zone(zone);
896
894 if (onlined_pages) { 897 if (onlined_pages) {
895 node_states_set_node(nid, &arg); 898 node_states_set_node(nid, &arg);
896 if (need_zonelists_rebuild) 899 if (need_zonelists_rebuild)
diff --git a/mm/mincore.c b/mm/mincore.c
index 218099b5ed31..c3f058bd0faf 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -169,6 +169,22 @@ out:
169 return 0; 169 return 0;
170} 170}
171 171
172static inline bool can_do_mincore(struct vm_area_struct *vma)
173{
174 if (vma_is_anonymous(vma))
175 return true;
176 if (!vma->vm_file)
177 return false;
178 /*
179 * Reveal pagecache information only for non-anonymous mappings that
180 * correspond to the files the calling process could (if tried) open
181 * for writing; otherwise we'd be including shared non-exclusive
182 * mappings, which opens a side channel.
183 */
184 return inode_owner_or_capable(file_inode(vma->vm_file)) ||
185 inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
186}
187
172/* 188/*
173 * Do a chunk of "sys_mincore()". We've already checked 189 * Do a chunk of "sys_mincore()". We've already checked
174 * all the arguments, we hold the mmap semaphore: we should 190 * all the arguments, we hold the mmap semaphore: we should
@@ -189,8 +205,13 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
189 vma = find_vma(current->mm, addr); 205 vma = find_vma(current->mm, addr);
190 if (!vma || addr < vma->vm_start) 206 if (!vma || addr < vma->vm_start)
191 return -ENOMEM; 207 return -ENOMEM;
192 mincore_walk.mm = vma->vm_mm;
193 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); 208 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
209 if (!can_do_mincore(vma)) {
210 unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
211 memset(vec, 1, pages);
212 return pages;
213 }
214 mincore_walk.mm = vma->vm_mm;
194 err = walk_page_range(addr, end, &mincore_walk); 215 err = walk_page_range(addr, end, &mincore_walk);
195 if (err < 0) 216 if (err < 0)
196 return err; 217 return err;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f2f3fb4921d1..3b13d3914176 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -43,6 +43,7 @@
43#include <linux/mempolicy.h> 43#include <linux/mempolicy.h>
44#include <linux/memremap.h> 44#include <linux/memremap.h>
45#include <linux/stop_machine.h> 45#include <linux/stop_machine.h>
46#include <linux/random.h>
46#include <linux/sort.h> 47#include <linux/sort.h>
47#include <linux/pfn.h> 48#include <linux/pfn.h>
48#include <linux/backing-dev.h> 49#include <linux/backing-dev.h>
@@ -72,6 +73,7 @@
72#include <asm/tlbflush.h> 73#include <asm/tlbflush.h>
73#include <asm/div64.h> 74#include <asm/div64.h>
74#include "internal.h" 75#include "internal.h"
76#include "shuffle.h"
75 77
76/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ 78/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
77static DEFINE_MUTEX(pcp_batch_high_lock); 79static DEFINE_MUTEX(pcp_batch_high_lock);
@@ -755,12 +757,6 @@ static inline void set_page_order(struct page *page, unsigned int order)
755 __SetPageBuddy(page); 757 __SetPageBuddy(page);
756} 758}
757 759
758static inline void rmv_page_order(struct page *page)
759{
760 __ClearPageBuddy(page);
761 set_page_private(page, 0);
762}
763
764/* 760/*
765 * This function checks whether a page is free && is the buddy 761 * This function checks whether a page is free && is the buddy
766 * we can coalesce a page and its buddy if 762 * we can coalesce a page and its buddy if
@@ -918,13 +914,10 @@ continue_merging:
918 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, 914 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
919 * merge with it and move up one order. 915 * merge with it and move up one order.
920 */ 916 */
921 if (page_is_guard(buddy)) { 917 if (page_is_guard(buddy))
922 clear_page_guard(zone, buddy, order, migratetype); 918 clear_page_guard(zone, buddy, order, migratetype);
923 } else { 919 else
924 list_del(&buddy->lru); 920 del_page_from_free_area(buddy, &zone->free_area[order]);
925 zone->free_area[order].nr_free--;
926 rmv_page_order(buddy);
927 }
928 combined_pfn = buddy_pfn & pfn; 921 combined_pfn = buddy_pfn & pfn;
929 page = page + (combined_pfn - pfn); 922 page = page + (combined_pfn - pfn);
930 pfn = combined_pfn; 923 pfn = combined_pfn;
@@ -966,7 +959,8 @@ done_merging:
966 * so it's less likely to be used soon and more likely to be merged 959 * so it's less likely to be used soon and more likely to be merged
967 * as a higher order page 960 * as a higher order page
968 */ 961 */
969 if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) { 962 if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)
963 && !is_shuffle_order(order)) {
970 struct page *higher_page, *higher_buddy; 964 struct page *higher_page, *higher_buddy;
971 combined_pfn = buddy_pfn & pfn; 965 combined_pfn = buddy_pfn & pfn;
972 higher_page = page + (combined_pfn - pfn); 966 higher_page = page + (combined_pfn - pfn);
@@ -974,15 +968,18 @@ done_merging:
974 higher_buddy = higher_page + (buddy_pfn - combined_pfn); 968 higher_buddy = higher_page + (buddy_pfn - combined_pfn);
975 if (pfn_valid_within(buddy_pfn) && 969 if (pfn_valid_within(buddy_pfn) &&
976 page_is_buddy(higher_page, higher_buddy, order + 1)) { 970 page_is_buddy(higher_page, higher_buddy, order + 1)) {
977 list_add_tail(&page->lru, 971 add_to_free_area_tail(page, &zone->free_area[order],
978 &zone->free_area[order].free_list[migratetype]); 972 migratetype);
979 goto out; 973 return;
980 } 974 }
981 } 975 }
982 976
983 list_add(&page->lru, &zone->free_area[order].free_list[migratetype]); 977 if (is_shuffle_order(order))
984out: 978 add_to_free_area_random(page, &zone->free_area[order],
985 zone->free_area[order].nr_free++; 979 migratetype);
980 else
981 add_to_free_area(page, &zone->free_area[order], migratetype);
982
986} 983}
987 984
988/* 985/*
@@ -1874,9 +1871,9 @@ _deferred_grow_zone(struct zone *zone, unsigned int order)
1874void __init page_alloc_init_late(void) 1871void __init page_alloc_init_late(void)
1875{ 1872{
1876 struct zone *zone; 1873 struct zone *zone;
1874 int nid;
1877 1875
1878#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1876#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1879 int nid;
1880 1877
1881 /* There will be num_node_state(N_MEMORY) threads */ 1878 /* There will be num_node_state(N_MEMORY) threads */
1882 atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY)); 1879 atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
@@ -1900,6 +1897,9 @@ void __init page_alloc_init_late(void)
1900 /* Discard memblock private memory */ 1897 /* Discard memblock private memory */
1901 memblock_discard(); 1898 memblock_discard();
1902 1899
1900 for_each_node_state(nid, N_MEMORY)
1901 shuffle_free_memory(NODE_DATA(nid));
1902
1903 for_each_populated_zone(zone) 1903 for_each_populated_zone(zone)
1904 set_zone_contiguous(zone); 1904 set_zone_contiguous(zone);
1905} 1905}
@@ -1970,8 +1970,7 @@ static inline void expand(struct zone *zone, struct page *page,
1970 if (set_page_guard(zone, &page[size], high, migratetype)) 1970 if (set_page_guard(zone, &page[size], high, migratetype))
1971 continue; 1971 continue;
1972 1972
1973 list_add(&page[size].lru, &area->free_list[migratetype]); 1973 add_to_free_area(&page[size], area, migratetype);
1974 area->nr_free++;
1975 set_page_order(&page[size], high); 1974 set_page_order(&page[size], high);
1976 } 1975 }
1977} 1976}
@@ -1986,7 +1985,7 @@ static void check_new_page_bad(struct page *page)
1986 if (unlikely(page->mapping != NULL)) 1985 if (unlikely(page->mapping != NULL))
1987 bad_reason = "non-NULL mapping"; 1986 bad_reason = "non-NULL mapping";
1988 if (unlikely(page_ref_count(page) != 0)) 1987 if (unlikely(page_ref_count(page) != 0))
1989 bad_reason = "nonzero _count"; 1988 bad_reason = "nonzero _refcount";
1990 if (unlikely(page->flags & __PG_HWPOISON)) { 1989 if (unlikely(page->flags & __PG_HWPOISON)) {
1991 bad_reason = "HWPoisoned (hardware-corrupted)"; 1990 bad_reason = "HWPoisoned (hardware-corrupted)";
1992 bad_flags = __PG_HWPOISON; 1991 bad_flags = __PG_HWPOISON;
@@ -2113,13 +2112,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
2113 /* Find a page of the appropriate size in the preferred list */ 2112 /* Find a page of the appropriate size in the preferred list */
2114 for (current_order = order; current_order < MAX_ORDER; ++current_order) { 2113 for (current_order = order; current_order < MAX_ORDER; ++current_order) {
2115 area = &(zone->free_area[current_order]); 2114 area = &(zone->free_area[current_order]);
2116 page = list_first_entry_or_null(&area->free_list[migratetype], 2115 page = get_page_from_free_area(area, migratetype);
2117 struct page, lru);
2118 if (!page) 2116 if (!page)
2119 continue; 2117 continue;
2120 list_del(&page->lru); 2118 del_page_from_free_area(page, area);
2121 rmv_page_order(page);
2122 area->nr_free--;
2123 expand(zone, page, order, current_order, area, migratetype); 2119 expand(zone, page, order, current_order, area, migratetype);
2124 set_pcppage_migratetype(page, migratetype); 2120 set_pcppage_migratetype(page, migratetype);
2125 return page; 2121 return page;
@@ -2205,8 +2201,7 @@ static int move_freepages(struct zone *zone,
2205 } 2201 }
2206 2202
2207 order = page_order(page); 2203 order = page_order(page);
2208 list_move(&page->lru, 2204 move_to_free_area(page, &zone->free_area[order], migratetype);
2209 &zone->free_area[order].free_list[migratetype]);
2210 page += 1 << order; 2205 page += 1 << order;
2211 pages_moved += 1 << order; 2206 pages_moved += 1 << order;
2212 } 2207 }
@@ -2394,7 +2389,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
2394 2389
2395single_page: 2390single_page:
2396 area = &zone->free_area[current_order]; 2391 area = &zone->free_area[current_order];
2397 list_move(&page->lru, &area->free_list[start_type]); 2392 move_to_free_area(page, area, start_type);
2398} 2393}
2399 2394
2400/* 2395/*
@@ -2418,7 +2413,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
2418 if (fallback_mt == MIGRATE_TYPES) 2413 if (fallback_mt == MIGRATE_TYPES)
2419 break; 2414 break;
2420 2415
2421 if (list_empty(&area->free_list[fallback_mt])) 2416 if (free_area_empty(area, fallback_mt))
2422 continue; 2417 continue;
2423 2418
2424 if (can_steal_fallback(order, migratetype)) 2419 if (can_steal_fallback(order, migratetype))
@@ -2505,9 +2500,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
2505 for (order = 0; order < MAX_ORDER; order++) { 2500 for (order = 0; order < MAX_ORDER; order++) {
2506 struct free_area *area = &(zone->free_area[order]); 2501 struct free_area *area = &(zone->free_area[order]);
2507 2502
2508 page = list_first_entry_or_null( 2503 page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
2509 &area->free_list[MIGRATE_HIGHATOMIC],
2510 struct page, lru);
2511 if (!page) 2504 if (!page)
2512 continue; 2505 continue;
2513 2506
@@ -2630,8 +2623,7 @@ find_smallest:
2630 VM_BUG_ON(current_order == MAX_ORDER); 2623 VM_BUG_ON(current_order == MAX_ORDER);
2631 2624
2632do_steal: 2625do_steal:
2633 page = list_first_entry(&area->free_list[fallback_mt], 2626 page = get_page_from_free_area(area, fallback_mt);
2634 struct page, lru);
2635 2627
2636 steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, 2628 steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
2637 can_steal); 2629 can_steal);
@@ -3068,6 +3060,7 @@ EXPORT_SYMBOL_GPL(split_page);
3068 3060
3069int __isolate_free_page(struct page *page, unsigned int order) 3061int __isolate_free_page(struct page *page, unsigned int order)
3070{ 3062{
3063 struct free_area *area = &page_zone(page)->free_area[order];
3071 unsigned long watermark; 3064 unsigned long watermark;
3072 struct zone *zone; 3065 struct zone *zone;
3073 int mt; 3066 int mt;
@@ -3092,9 +3085,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
3092 } 3085 }
3093 3086
3094 /* Remove page from free list */ 3087 /* Remove page from free list */
3095 list_del(&page->lru); 3088
3096 zone->free_area[order].nr_free--; 3089 del_page_from_free_area(page, area);
3097 rmv_page_order(page);
3098 3090
3099 /* 3091 /*
3100 * Set the pageblock if the isolated page is at least half of a 3092 * Set the pageblock if the isolated page is at least half of a
@@ -3391,13 +3383,13 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
3391 continue; 3383 continue;
3392 3384
3393 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { 3385 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
3394 if (!list_empty(&area->free_list[mt])) 3386 if (!free_area_empty(area, mt))
3395 return true; 3387 return true;
3396 } 3388 }
3397 3389
3398#ifdef CONFIG_CMA 3390#ifdef CONFIG_CMA
3399 if ((alloc_flags & ALLOC_CMA) && 3391 if ((alloc_flags & ALLOC_CMA) &&
3400 !list_empty(&area->free_list[MIGRATE_CMA])) { 3392 !free_area_empty(area, MIGRATE_CMA)) {
3401 return true; 3393 return true;
3402 } 3394 }
3403#endif 3395#endif
@@ -5324,7 +5316,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
5324 5316
5325 types[order] = 0; 5317 types[order] = 0;
5326 for (type = 0; type < MIGRATE_TYPES; type++) { 5318 for (type = 0; type < MIGRATE_TYPES; type++) {
5327 if (!list_empty(&area->free_list[type])) 5319 if (!free_area_empty(area, type))
5328 types[order] |= 1 << type; 5320 types[order] |= 1 << type;
5329 } 5321 }
5330 } 5322 }
@@ -8497,9 +8489,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
8497 pr_info("remove from free list %lx %d %lx\n", 8489 pr_info("remove from free list %lx %d %lx\n",
8498 pfn, 1 << order, end_pfn); 8490 pfn, 1 << order, end_pfn);
8499#endif 8491#endif
8500 list_del(&page->lru); 8492 del_page_from_free_area(page, &zone->free_area[order]);
8501 rmv_page_order(page);
8502 zone->free_area[order].nr_free--;
8503 for (i = 0; i < (1 << order); i++) 8493 for (i = 0; i < (1 << order); i++)
8504 SetPageReserved((page+i)); 8494 SetPageReserved((page+i));
8505 pfn += (1 << order); 8495 pfn += (1 << order);
diff --git a/mm/shuffle.c b/mm/shuffle.c
new file mode 100644
index 000000000000..3ce12481b1dc
--- /dev/null
+++ b/mm/shuffle.c
@@ -0,0 +1,207 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3
4#include <linux/mm.h>
5#include <linux/init.h>
6#include <linux/mmzone.h>
7#include <linux/random.h>
8#include <linux/moduleparam.h>
9#include "internal.h"
10#include "shuffle.h"
11
12DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
13static unsigned long shuffle_state __ro_after_init;
14
15/*
16 * Depending on the architecture, module parameter parsing may run
17 * before, or after the cache detection. SHUFFLE_FORCE_DISABLE prevents,
18 * or reverts the enabling of the shuffle implementation. SHUFFLE_ENABLE
19 * attempts to turn on the implementation, but aborts if it finds
20 * SHUFFLE_FORCE_DISABLE already set.
21 */
22__meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl)
23{
24 if (ctl == SHUFFLE_FORCE_DISABLE)
25 set_bit(SHUFFLE_FORCE_DISABLE, &shuffle_state);
26
27 if (test_bit(SHUFFLE_FORCE_DISABLE, &shuffle_state)) {
28 if (test_and_clear_bit(SHUFFLE_ENABLE, &shuffle_state))
29 static_branch_disable(&page_alloc_shuffle_key);
30 } else if (ctl == SHUFFLE_ENABLE
31 && !test_and_set_bit(SHUFFLE_ENABLE, &shuffle_state))
32 static_branch_enable(&page_alloc_shuffle_key);
33}
34
35static bool shuffle_param;
36extern int shuffle_show(char *buffer, const struct kernel_param *kp)
37{
38 return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state)
39 ? 'Y' : 'N');
40}
41
42static __meminit int shuffle_store(const char *val,
43 const struct kernel_param *kp)
44{
45 int rc = param_set_bool(val, kp);
46
47 if (rc < 0)
48 return rc;
49 if (shuffle_param)
50 page_alloc_shuffle(SHUFFLE_ENABLE);
51 else
52 page_alloc_shuffle(SHUFFLE_FORCE_DISABLE);
53 return 0;
54}
55module_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400);
56
57/*
58 * For two pages to be swapped in the shuffle, they must be free (on a
59 * 'free_area' lru), have the same order, and have the same migratetype.
60 */
61static struct page * __meminit shuffle_valid_page(unsigned long pfn, int order)
62{
63 struct page *page;
64
65 /*
66 * Given we're dealing with randomly selected pfns in a zone we
67 * need to ask questions like...
68 */
69
70 /* ...is the pfn even in the memmap? */
71 if (!pfn_valid_within(pfn))
72 return NULL;
73
74 /* ...is the pfn in a present section or a hole? */
75 if (!pfn_present(pfn))
76 return NULL;
77
78 /* ...is the page free and currently on a free_area list? */
79 page = pfn_to_page(pfn);
80 if (!PageBuddy(page))
81 return NULL;
82
83 /*
84 * ...is the page on the same list as the page we will
85 * shuffle it with?
86 */
87 if (page_order(page) != order)
88 return NULL;
89
90 return page;
91}
92
93/*
94 * Fisher-Yates shuffle the freelist which prescribes iterating through an
95 * array, pfns in this case, and randomly swapping each entry with another in
96 * the span, end_pfn - start_pfn.
97 *
98 * To keep the implementation simple it does not attempt to correct for sources
99 * of bias in the distribution, like modulo bias or pseudo-random number
100 * generator bias. I.e. the expectation is that this shuffling raises the bar
101 * for attacks that exploit the predictability of page allocations, but need not
102 * be a perfect shuffle.
103 */
104#define SHUFFLE_RETRY 10
105void __meminit __shuffle_zone(struct zone *z)
106{
107 unsigned long i, flags;
108 unsigned long start_pfn = z->zone_start_pfn;
109 unsigned long end_pfn = zone_end_pfn(z);
110 const int order = SHUFFLE_ORDER;
111 const int order_pages = 1 << order;
112
113 spin_lock_irqsave(&z->lock, flags);
114 start_pfn = ALIGN(start_pfn, order_pages);
115 for (i = start_pfn; i < end_pfn; i += order_pages) {
116 unsigned long j;
117 int migratetype, retry;
118 struct page *page_i, *page_j;
119
120 /*
121 * We expect page_i, in the sub-range of a zone being added
122 * (@start_pfn to @end_pfn), to more likely be valid compared to
123 * page_j randomly selected in the span @zone_start_pfn to
124 * @spanned_pages.
125 */
126 page_i = shuffle_valid_page(i, order);
127 if (!page_i)
128 continue;
129
130 for (retry = 0; retry < SHUFFLE_RETRY; retry++) {
131 /*
132 * Pick a random order aligned page in the zone span as
133 * a swap target. If the selected pfn is a hole, retry
134 * up to SHUFFLE_RETRY attempts find a random valid pfn
135 * in the zone.
136 */
137 j = z->zone_start_pfn +
138 ALIGN_DOWN(get_random_long() % z->spanned_pages,
139 order_pages);
140 page_j = shuffle_valid_page(j, order);
141 if (page_j && page_j != page_i)
142 break;
143 }
144 if (retry >= SHUFFLE_RETRY) {
145 pr_debug("%s: failed to swap %#lx\n", __func__, i);
146 continue;
147 }
148
149 /*
150 * Each migratetype corresponds to its own list, make sure the
151 * types match otherwise we're moving pages to lists where they
152 * do not belong.
153 */
154 migratetype = get_pageblock_migratetype(page_i);
155 if (get_pageblock_migratetype(page_j) != migratetype) {
156 pr_debug("%s: migratetype mismatch %#lx\n", __func__, i);
157 continue;
158 }
159
160 list_swap(&page_i->lru, &page_j->lru);
161
162 pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j);
163
164 /* take it easy on the zone lock */
165 if ((i % (100 * order_pages)) == 0) {
166 spin_unlock_irqrestore(&z->lock, flags);
167 cond_resched();
168 spin_lock_irqsave(&z->lock, flags);
169 }
170 }
171 spin_unlock_irqrestore(&z->lock, flags);
172}
173
174/**
175 * shuffle_free_memory - reduce the predictability of the page allocator
176 * @pgdat: node page data
177 */
178void __meminit __shuffle_free_memory(pg_data_t *pgdat)
179{
180 struct zone *z;
181
182 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
183 shuffle_zone(z);
184}
185
186void add_to_free_area_random(struct page *page, struct free_area *area,
187 int migratetype)
188{
189 static u64 rand;
190 static u8 rand_bits;
191
192 /*
193 * The lack of locking is deliberate. If 2 threads race to
194 * update the rand state it just adds to the entropy.
195 */
196 if (rand_bits == 0) {
197 rand_bits = 64;
198 rand = get_random_u64();
199 }
200
201 if (rand & 1)
202 add_to_free_area(page, area, migratetype);
203 else
204 add_to_free_area_tail(page, area, migratetype);
205 rand_bits--;
206 rand >>= 1;
207}
diff --git a/mm/shuffle.h b/mm/shuffle.h
new file mode 100644
index 000000000000..777a257a0d2f
--- /dev/null
+++ b/mm/shuffle.h
@@ -0,0 +1,64 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright(c) 2018 Intel Corporation. All rights reserved.
3#ifndef _MM_SHUFFLE_H
4#define _MM_SHUFFLE_H
5#include <linux/jump_label.h>
6
7/*
8 * SHUFFLE_ENABLE is called from the command line enabling path, or by
9 * platform-firmware enabling that indicates the presence of a
10 * direct-mapped memory-side-cache. SHUFFLE_FORCE_DISABLE is called from
11 * the command line path and overrides any previous or future
12 * SHUFFLE_ENABLE.
13 */
14enum mm_shuffle_ctl {
15 SHUFFLE_ENABLE,
16 SHUFFLE_FORCE_DISABLE,
17};
18
19#define SHUFFLE_ORDER (MAX_ORDER-1)
20
21#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR
22DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
23extern void page_alloc_shuffle(enum mm_shuffle_ctl ctl);
24extern void __shuffle_free_memory(pg_data_t *pgdat);
25static inline void shuffle_free_memory(pg_data_t *pgdat)
26{
27 if (!static_branch_unlikely(&page_alloc_shuffle_key))
28 return;
29 __shuffle_free_memory(pgdat);
30}
31
32extern void __shuffle_zone(struct zone *z);
33static inline void shuffle_zone(struct zone *z)
34{
35 if (!static_branch_unlikely(&page_alloc_shuffle_key))
36 return;
37 __shuffle_zone(z);
38}
39
40static inline bool is_shuffle_order(int order)
41{
42 if (!static_branch_unlikely(&page_alloc_shuffle_key))
43 return false;
44 return order >= SHUFFLE_ORDER;
45}
46#else
47static inline void shuffle_free_memory(pg_data_t *pgdat)
48{
49}
50
51static inline void shuffle_zone(struct zone *z)
52{
53}
54
55static inline void page_alloc_shuffle(enum mm_shuffle_ctl ctl)
56{
57}
58
59static inline bool is_shuffle_order(int order)
60{
61 return false;
62}
63#endif
64#endif /* _MM_SHUFFLE_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e5e9e1fcac01..67bbb8d2a0a8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -633,7 +633,7 @@ static unsigned long lazy_max_pages(void)
633 return log * (32UL * 1024 * 1024 / PAGE_SIZE); 633 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
634} 634}
635 635
636static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); 636static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
637 637
638/* 638/*
639 * Serialize vmap purging. There is no actual criticial section protected 639 * Serialize vmap purging. There is no actual criticial section protected
@@ -651,7 +651,7 @@ static void purge_fragmented_blocks_allcpus(void);
651 */ 651 */
652void set_iounmap_nonlazy(void) 652void set_iounmap_nonlazy(void)
653{ 653{
654 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); 654 atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
655} 655}
656 656
657/* 657/*
@@ -659,34 +659,40 @@ void set_iounmap_nonlazy(void)
659 */ 659 */
660static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) 660static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
661{ 661{
662 unsigned long resched_threshold;
662 struct llist_node *valist; 663 struct llist_node *valist;
663 struct vmap_area *va; 664 struct vmap_area *va;
664 struct vmap_area *n_va; 665 struct vmap_area *n_va;
665 bool do_free = false;
666 666
667 lockdep_assert_held(&vmap_purge_lock); 667 lockdep_assert_held(&vmap_purge_lock);
668 668
669 valist = llist_del_all(&vmap_purge_list); 669 valist = llist_del_all(&vmap_purge_list);
670 if (unlikely(valist == NULL))
671 return false;
672
673 /*
674 * TODO: to calculate a flush range without looping.
675 * The list can be up to lazy_max_pages() elements.
676 */
670 llist_for_each_entry(va, valist, purge_list) { 677 llist_for_each_entry(va, valist, purge_list) {
671 if (va->va_start < start) 678 if (va->va_start < start)
672 start = va->va_start; 679 start = va->va_start;
673 if (va->va_end > end) 680 if (va->va_end > end)
674 end = va->va_end; 681 end = va->va_end;
675 do_free = true;
676 } 682 }
677 683
678 if (!do_free)
679 return false;
680
681 flush_tlb_kernel_range(start, end); 684 flush_tlb_kernel_range(start, end);
685 resched_threshold = lazy_max_pages() << 1;
682 686
683 spin_lock(&vmap_area_lock); 687 spin_lock(&vmap_area_lock);
684 llist_for_each_entry_safe(va, n_va, valist, purge_list) { 688 llist_for_each_entry_safe(va, n_va, valist, purge_list) {
685 int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; 689 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
686 690
687 __free_vmap_area(va); 691 __free_vmap_area(va);
688 atomic_sub(nr, &vmap_lazy_nr); 692 atomic_long_sub(nr, &vmap_lazy_nr);
689 cond_resched_lock(&vmap_area_lock); 693
694 if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
695 cond_resched_lock(&vmap_area_lock);
690 } 696 }
691 spin_unlock(&vmap_area_lock); 697 spin_unlock(&vmap_area_lock);
692 return true; 698 return true;
@@ -722,10 +728,10 @@ static void purge_vmap_area_lazy(void)
722 */ 728 */
723static void free_vmap_area_noflush(struct vmap_area *va) 729static void free_vmap_area_noflush(struct vmap_area *va)
724{ 730{
725 int nr_lazy; 731 unsigned long nr_lazy;
726 732
727 nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, 733 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
728 &vmap_lazy_nr); 734 PAGE_SHIFT, &vmap_lazy_nr);
729 735
730 /* After this point, we may free va at any time */ 736 /* After this point, we may free va at any time */
731 llist_add(&va->purge_list, &vmap_purge_list); 737 llist_add(&va->purge_list, &vmap_purge_list);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d96c54703948..7acd0afdfc2a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -346,7 +346,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
346 int zid; 346 int zid;
347 347
348 if (!mem_cgroup_disabled()) 348 if (!mem_cgroup_disabled())
349 lru_size = lruvec_page_state(lruvec, NR_LRU_BASE + lru); 349 lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
350 else 350 else
351 lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); 351 lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
352 352
@@ -2150,7 +2150,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
2150 * is being established. Disable active list protection to get 2150 * is being established. Disable active list protection to get
2151 * rid of the stale workingset quickly. 2151 * rid of the stale workingset quickly.
2152 */ 2152 */
2153 refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); 2153 refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
2154 if (file && actual_reclaim && lruvec->refaults != refaults) { 2154 if (file && actual_reclaim && lruvec->refaults != refaults) {
2155 inactive_ratio = 0; 2155 inactive_ratio = 0;
2156 } else { 2156 } else {
@@ -2912,7 +2912,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
2912 struct lruvec *lruvec; 2912 struct lruvec *lruvec;
2913 2913
2914 lruvec = mem_cgroup_lruvec(pgdat, memcg); 2914 lruvec = mem_cgroup_lruvec(pgdat, memcg);
2915 refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); 2915 refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
2916 lruvec->refaults = refaults; 2916 lruvec->refaults = refaults;
2917 } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); 2917 } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
2918} 2918}
diff --git a/mm/workingset.c b/mm/workingset.c
index 6419baebd306..e0b4edcb88c8 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -430,9 +430,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
430 430
431 lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg); 431 lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg);
432 for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) 432 for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
433 pages += lruvec_page_state(lruvec, NR_LRU_BASE + i); 433 pages += lruvec_page_state_local(lruvec,
434 pages += lruvec_page_state(lruvec, NR_SLAB_RECLAIMABLE); 434 NR_LRU_BASE + i);
435 pages += lruvec_page_state(lruvec, NR_SLAB_UNRECLAIMABLE); 435 pages += lruvec_page_state_local(lruvec, NR_SLAB_RECLAIMABLE);
436 pages += lruvec_page_state_local(lruvec, NR_SLAB_UNRECLAIMABLE);
436 } else 437 } else
437#endif 438#endif
438 pages = node_present_pages(sc->nid); 439 pages = node_present_pages(sc->nid);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 71f06900473e..b96fd3f54705 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -163,7 +163,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
163 163
164static void hooks_validate(const struct nf_hook_entries *hooks) 164static void hooks_validate(const struct nf_hook_entries *hooks)
165{ 165{
166#ifdef CONFIG_DEBUG_KERNEL 166#ifdef CONFIG_DEBUG_MISC
167 struct nf_hook_ops **orig_ops; 167 struct nf_hook_ops **orig_ops;
168 int prio = INT_MIN; 168 int prio = INT_MIN;
169 size_t i = 0; 169 size_t i = 0;
diff --git a/scripts/gdb/linux/clk.py b/scripts/gdb/linux/clk.py
new file mode 100644
index 000000000000..061aecfa294e
--- /dev/null
+++ b/scripts/gdb/linux/clk.py
@@ -0,0 +1,76 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Copyright (c) NXP 2019
4
5import gdb
6import sys
7
8from linux import utils, lists, constants
9
10clk_core_type = utils.CachedType("struct clk_core")
11
12
13def clk_core_for_each_child(hlist_head):
14 return lists.hlist_for_each_entry(hlist_head,
15 clk_core_type.get_type().pointer(), "child_node")
16
17
18class LxClkSummary(gdb.Command):
19 """Print clk tree summary
20
21Output is a subset of /sys/kernel/debug/clk/clk_summary
22
23No calls are made during printing, instead a (c) if printed after values which
24are cached and potentially out of date"""
25
26 def __init__(self):
27 super(LxClkSummary, self).__init__("lx-clk-summary", gdb.COMMAND_DATA)
28
29 def show_subtree(self, clk, level):
30 gdb.write("%*s%-*s %7d %8d %8d %11lu%s\n" % (
31 level * 3 + 1, "",
32 30 - level * 3,
33 clk['name'].string(),
34 clk['enable_count'],
35 clk['prepare_count'],
36 clk['protect_count'],
37 clk['rate'],
38 '(c)' if clk['flags'] & constants.LX_CLK_GET_RATE_NOCACHE else ' '))
39
40 for child in clk_core_for_each_child(clk['children']):
41 self.show_subtree(child, level + 1)
42
43 def invoke(self, arg, from_tty):
44 gdb.write(" enable prepare protect \n")
45 gdb.write(" clock count count count rate \n")
46 gdb.write("------------------------------------------------------------------------\n")
47 for clk in clk_core_for_each_child(gdb.parse_and_eval("clk_root_list")):
48 self.show_subtree(clk, 0)
49 for clk in clk_core_for_each_child(gdb.parse_and_eval("clk_orphan_list")):
50 self.show_subtree(clk, 0)
51
52
53LxClkSummary()
54
55
56class LxClkCoreLookup(gdb.Function):
57 """Find struct clk_core by name"""
58
59 def __init__(self):
60 super(LxClkCoreLookup, self).__init__("lx_clk_core_lookup")
61
62 def lookup_hlist(self, hlist_head, name):
63 for child in clk_core_for_each_child(hlist_head):
64 if child['name'].string() == name:
65 return child
66 result = self.lookup_hlist(child['children'], name)
67 if result:
68 return result
69
70 def invoke(self, name):
71 name = name.string()
72 return (self.lookup_hlist(gdb.parse_and_eval("clk_root_list"), name) or
73 self.lookup_hlist(gdb.parse_and_eval("clk_orphan_list"), name))
74
75
76LxClkCoreLookup()
diff --git a/scripts/gdb/linux/config.py b/scripts/gdb/linux/config.py
new file mode 100644
index 000000000000..90e1565b1967
--- /dev/null
+++ b/scripts/gdb/linux/config.py
@@ -0,0 +1,44 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Copyright 2019 Google LLC.
4
5import gdb
6import zlib
7
8from linux import utils
9
10
11class LxConfigDump(gdb.Command):
12 """Output kernel config to the filename specified as the command
13 argument. Equivalent to 'zcat /proc/config.gz > config.txt' on
14 a running target"""
15
16 def __init__(self):
17 super(LxConfigDump, self).__init__("lx-configdump", gdb.COMMAND_DATA,
18 gdb.COMPLETE_FILENAME)
19
20 def invoke(self, arg, from_tty):
21 if len(arg) == 0:
22 filename = "config.txt"
23 else:
24 filename = arg
25
26 try:
27 py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8")
28 py_config_size = gdb.parse_and_eval(
29 "sizeof(kernel_config_data) - 1 - 8 * 2")
30 except gdb.error as e:
31 raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?")
32
33 inf = gdb.inferiors()[0]
34 zconfig_buf = utils.read_memoryview(inf, py_config_ptr,
35 py_config_size).tobytes()
36
37 config_buf = zlib.decompress(zconfig_buf, 16)
38 with open(filename, 'wb') as f:
39 f.write(config_buf)
40
41 gdb.write("Dumped config to " + filename + "\n")
42
43
44LxConfigDump()
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index d3319a80788a..1d73083da6cb 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -12,9 +12,12 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/clk-provider.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/hrtimer.h>
16#include <linux/mount.h> 18#include <linux/mount.h>
17#include <linux/of_fdt.h> 19#include <linux/of_fdt.h>
20#include <linux/threads.h>
18 21
19/* We need to stringify expanded macros so that they can be parsed */ 22/* We need to stringify expanded macros so that they can be parsed */
20 23
@@ -36,6 +39,9 @@
36 39
37import gdb 40import gdb
38 41
42/* linux/clk-provider.h */
43LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
44
39/* linux/fs.h */ 45/* linux/fs.h */
40LX_VALUE(SB_RDONLY) 46LX_VALUE(SB_RDONLY)
41LX_VALUE(SB_SYNCHRONOUS) 47LX_VALUE(SB_SYNCHRONOUS)
@@ -44,6 +50,9 @@ LX_VALUE(SB_DIRSYNC)
44LX_VALUE(SB_NOATIME) 50LX_VALUE(SB_NOATIME)
45LX_VALUE(SB_NODIRATIME) 51LX_VALUE(SB_NODIRATIME)
46 52
53/* linux/htimer.h */
54LX_GDBPARSED(hrtimer_resolution)
55
47/* linux/mount.h */ 56/* linux/mount.h */
48LX_VALUE(MNT_NOSUID) 57LX_VALUE(MNT_NOSUID)
49LX_VALUE(MNT_NODEV) 58LX_VALUE(MNT_NODEV)
@@ -52,8 +61,16 @@ LX_VALUE(MNT_NOATIME)
52LX_VALUE(MNT_NODIRATIME) 61LX_VALUE(MNT_NODIRATIME)
53LX_VALUE(MNT_RELATIME) 62LX_VALUE(MNT_RELATIME)
54 63
64/* linux/threads.h */
65LX_VALUE(NR_CPUS)
66
55/* linux/of_fdt.h> */ 67/* linux/of_fdt.h> */
56LX_VALUE(OF_DT_HEADER) 68LX_VALUE(OF_DT_HEADER)
57 69
58/* Kernel Configs */ 70/* Kernel Configs */
71LX_CONFIG(CONFIG_GENERIC_CLOCKEVENTS)
72LX_CONFIG(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)
73LX_CONFIG(CONFIG_HIGH_RES_TIMERS)
74LX_CONFIG(CONFIG_NR_CPUS)
59LX_CONFIG(CONFIG_OF) 75LX_CONFIG(CONFIG_OF)
76LX_CONFIG(CONFIG_TICK_ONESHOT)
diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index ca11e8df31b6..008e62f3190d 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -135,6 +135,7 @@ and can help identify the state of hotplugged CPUs"""
135 gdb.write("Online CPUs : {}\n".format(list(each_online_cpu()))) 135 gdb.write("Online CPUs : {}\n".format(list(each_online_cpu())))
136 gdb.write("Active CPUs : {}\n".format(list(each_active_cpu()))) 136 gdb.write("Active CPUs : {}\n".format(list(each_active_cpu())))
137 137
138
138LxCpus() 139LxCpus()
139 140
140 141
diff --git a/scripts/gdb/linux/lists.py b/scripts/gdb/linux/lists.py
index 2f335fbd86fd..c487ddf09d38 100644
--- a/scripts/gdb/linux/lists.py
+++ b/scripts/gdb/linux/lists.py
@@ -16,13 +16,15 @@ import gdb
16from linux import utils 16from linux import utils
17 17
18list_head = utils.CachedType("struct list_head") 18list_head = utils.CachedType("struct list_head")
19hlist_head = utils.CachedType("struct hlist_head")
20hlist_node = utils.CachedType("struct hlist_node")
19 21
20 22
21def list_for_each(head): 23def list_for_each(head):
22 if head.type == list_head.get_type().pointer(): 24 if head.type == list_head.get_type().pointer():
23 head = head.dereference() 25 head = head.dereference()
24 elif head.type != list_head.get_type(): 26 elif head.type != list_head.get_type():
25 raise gdb.GdbError("Must be struct list_head not {}" 27 raise TypeError("Must be struct list_head not {}"
26 .format(head.type)) 28 .format(head.type))
27 29
28 node = head['next'].dereference() 30 node = head['next'].dereference()
@@ -33,9 +35,24 @@ def list_for_each(head):
33 35
34def list_for_each_entry(head, gdbtype, member): 36def list_for_each_entry(head, gdbtype, member):
35 for node in list_for_each(head): 37 for node in list_for_each(head):
36 if node.type != list_head.get_type().pointer(): 38 yield utils.container_of(node, gdbtype, member)
37 raise TypeError("Type {} found. Expected struct list_head *." 39
38 .format(node.type)) 40
41def hlist_for_each(head):
42 if head.type == hlist_head.get_type().pointer():
43 head = head.dereference()
44 elif head.type != hlist_head.get_type():
45 raise TypeError("Must be struct hlist_head not {}"
46 .format(head.type))
47
48 node = head['first'].dereference()
49 while node.address:
50 yield node.address
51 node = node['next'].dereference()
52
53
54def hlist_for_each_entry(head, gdbtype, member):
55 for node in hlist_for_each(head):
39 yield utils.container_of(node, gdbtype, member) 56 yield utils.container_of(node, gdbtype, member)
40 57
41 58
@@ -110,4 +127,5 @@ class LxListChk(gdb.Command):
110 raise gdb.GdbError("lx-list-check takes one argument") 127 raise gdb.GdbError("lx-list-check takes one argument")
111 list_check(gdb.parse_and_eval(argv[0])) 128 list_check(gdb.parse_and_eval(argv[0]))
112 129
130
113LxListChk() 131LxListChk()
diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
index 2f01a958eb22..6a56bba233a9 100644
--- a/scripts/gdb/linux/proc.py
+++ b/scripts/gdb/linux/proc.py
@@ -29,6 +29,7 @@ class LxCmdLine(gdb.Command):
29 def invoke(self, arg, from_tty): 29 def invoke(self, arg, from_tty):
30 gdb.write(gdb.parse_and_eval("saved_command_line").string() + "\n") 30 gdb.write(gdb.parse_and_eval("saved_command_line").string() + "\n")
31 31
32
32LxCmdLine() 33LxCmdLine()
33 34
34 35
@@ -43,6 +44,7 @@ class LxVersion(gdb.Command):
43 # linux_banner should contain a newline 44 # linux_banner should contain a newline
44 gdb.write(gdb.parse_and_eval("(char *)linux_banner").string()) 45 gdb.write(gdb.parse_and_eval("(char *)linux_banner").string())
45 46
47
46LxVersion() 48LxVersion()
47 49
48 50
@@ -86,6 +88,7 @@ Equivalent to cat /proc/iomem on a running target"""
86 def invoke(self, arg, from_tty): 88 def invoke(self, arg, from_tty):
87 return show_lx_resources("iomem_resource") 89 return show_lx_resources("iomem_resource")
88 90
91
89LxIOMem() 92LxIOMem()
90 93
91 94
@@ -100,6 +103,7 @@ Equivalent to cat /proc/ioports on a running target"""
100 def invoke(self, arg, from_tty): 103 def invoke(self, arg, from_tty):
101 return show_lx_resources("ioport_resource") 104 return show_lx_resources("ioport_resource")
102 105
106
103LxIOPorts() 107LxIOPorts()
104 108
105 109
@@ -149,7 +153,7 @@ values of that process namespace"""
149 if len(argv) >= 1: 153 if len(argv) >= 1:
150 try: 154 try:
151 pid = int(argv[0]) 155 pid = int(argv[0])
152 except: 156 except gdb.error:
153 raise gdb.GdbError("Provide a PID as integer value") 157 raise gdb.GdbError("Provide a PID as integer value")
154 else: 158 else:
155 pid = 1 159 pid = 1
@@ -195,6 +199,7 @@ values of that process namespace"""
195 info_opts(FS_INFO, s_flags), 199 info_opts(FS_INFO, s_flags),
196 info_opts(MNT_INFO, m_flags))) 200 info_opts(MNT_INFO, m_flags)))
197 201
202
198LxMounts() 203LxMounts()
199 204
200 205
@@ -259,7 +264,7 @@ class LxFdtDump(gdb.Command):
259 264
260 try: 265 try:
261 f = open(filename, 'wb') 266 f = open(filename, 'wb')
262 except: 267 except gdb.error:
263 raise gdb.GdbError("Could not open file to dump fdt") 268 raise gdb.GdbError("Could not open file to dump fdt")
264 269
265 f.write(fdt_buf) 270 f.write(fdt_buf)
@@ -267,4 +272,5 @@ class LxFdtDump(gdb.Command):
267 272
268 gdb.write("Dumped fdt blob to " + filename + "\n") 273 gdb.write("Dumped fdt blob to " + filename + "\n")
269 274
275
270LxFdtDump() 276LxFdtDump()
diff --git a/scripts/gdb/linux/rbtree.py b/scripts/gdb/linux/rbtree.py
new file mode 100644
index 000000000000..39db889b874c
--- /dev/null
+++ b/scripts/gdb/linux/rbtree.py
@@ -0,0 +1,177 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Copyright 2019 Google LLC.
4
5import gdb
6
7from linux import utils
8
9rb_root_type = utils.CachedType("struct rb_root")
10rb_node_type = utils.CachedType("struct rb_node")
11
12
13def rb_first(root):
14 if root.type == rb_root_type.get_type():
15 node = node.address.cast(rb_root_type.get_type().pointer())
16 elif root.type != rb_root_type.get_type().pointer():
17 raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
18
19 node = root['rb_node']
20 if node is 0:
21 return None
22
23 while node['rb_left']:
24 node = node['rb_left']
25
26 return node
27
28
29def rb_last(root):
30 if root.type == rb_root_type.get_type():
31 node = node.address.cast(rb_root_type.get_type().pointer())
32 elif root.type != rb_root_type.get_type().pointer():
33 raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
34
35 node = root['rb_node']
36 if node is 0:
37 return None
38
39 while node['rb_right']:
40 node = node['rb_right']
41
42 return node
43
44
45def rb_parent(node):
46 parent = gdb.Value(node['__rb_parent_color'] & ~3)
47 return parent.cast(rb_node_type.get_type().pointer())
48
49
50def rb_empty_node(node):
51 return node['__rb_parent_color'] == node.address
52
53
54def rb_next(node):
55 if node.type == rb_node_type.get_type():
56 node = node.address.cast(rb_node_type.get_type().pointer())
57 elif node.type != rb_node_type.get_type().pointer():
58 raise gdb.GdbError("Must be struct rb_node not {}".format(node.type))
59
60 if rb_empty_node(node):
61 return None
62
63 if node['rb_right']:
64 node = node['rb_right']
65 while node['rb_left']:
66 node = node['rb_left']
67 return node
68
69 parent = rb_parent(node)
70 while parent and node == parent['rb_right']:
71 node = parent
72 parent = rb_parent(node)
73
74 return parent
75
76
77def rb_prev(node):
78 if node.type == rb_node_type.get_type():
79 node = node.address.cast(rb_node_type.get_type().pointer())
80 elif node.type != rb_node_type.get_type().pointer():
81 raise gdb.GdbError("Must be struct rb_node not {}".format(node.type))
82
83 if rb_empty_node(node):
84 return None
85
86 if node['rb_left']:
87 node = node['rb_left']
88 while node['rb_right']:
89 node = node['rb_right']
90 return node.dereference()
91
92 parent = rb_parent(node)
93 while parent and node == parent['rb_left'].dereference():
94 node = parent
95 parent = rb_parent(node)
96
97 return parent
98
99
100class LxRbFirst(gdb.Function):
101 """Lookup and return a node from an RBTree
102
103$lx_rb_first(root): Return the node at the given index.
104If index is omitted, the root node is dereferenced and returned."""
105
106 def __init__(self):
107 super(LxRbFirst, self).__init__("lx_rb_first")
108
109 def invoke(self, root):
110 result = rb_first(root)
111 if result is None:
112 raise gdb.GdbError("No entry in tree")
113
114 return result
115
116
117LxRbFirst()
118
119
120class LxRbLast(gdb.Function):
121 """Lookup and return a node from an RBTree.
122
123$lx_rb_last(root): Return the node at the given index.
124If index is omitted, the root node is dereferenced and returned."""
125
126 def __init__(self):
127 super(LxRbLast, self).__init__("lx_rb_last")
128
129 def invoke(self, root):
130 result = rb_last(root)
131 if result is None:
132 raise gdb.GdbError("No entry in tree")
133
134 return result
135
136
137LxRbLast()
138
139
140class LxRbNext(gdb.Function):
141 """Lookup and return a node from an RBTree.
142
143$lx_rb_next(node): Return the node at the given index.
144If index is omitted, the root node is dereferenced and returned."""
145
146 def __init__(self):
147 super(LxRbNext, self).__init__("lx_rb_next")
148
149 def invoke(self, node):
150 result = rb_next(node)
151 if result is None:
152 raise gdb.GdbError("No entry in tree")
153
154 return result
155
156
157LxRbNext()
158
159
160class LxRbPrev(gdb.Function):
161 """Lookup and return a node from an RBTree.
162
163$lx_rb_prev(node): Return the node at the given index.
164If index is omitted, the root node is dereferenced and returned."""
165
166 def __init__(self):
167 super(LxRbPrev, self).__init__("lx_rb_prev")
168
169 def invoke(self, node):
170 result = rb_prev(node)
171 if result is None:
172 raise gdb.GdbError("No entry in tree")
173
174 return result
175
176
177LxRbPrev()
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 004b0ac7fa72..2f5b95f09fa0 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -139,8 +139,12 @@ lx-symbols command."""
139 saved_states.append({'breakpoint': bp, 'enabled': bp.enabled}) 139 saved_states.append({'breakpoint': bp, 'enabled': bp.enabled})
140 140
141 # drop all current symbols and reload vmlinux 141 # drop all current symbols and reload vmlinux
142 orig_vmlinux = 'vmlinux'
143 for obj in gdb.objfiles():
144 if obj.filename.endswith('vmlinux'):
145 orig_vmlinux = obj.filename
142 gdb.execute("symbol-file", to_string=True) 146 gdb.execute("symbol-file", to_string=True)
143 gdb.execute("symbol-file vmlinux") 147 gdb.execute("symbol-file {0}".format(orig_vmlinux))
144 148
145 self.loaded_modules = [] 149 self.loaded_modules = []
146 module_list = modules.module_list() 150 module_list = modules.module_list()
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index f6ab3ccf698f..0301dc1e0138 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -79,6 +79,7 @@ class LxPs(gdb.Command):
79 pid=task["pid"], 79 pid=task["pid"],
80 comm=task["comm"].string())) 80 comm=task["comm"].string()))
81 81
82
82LxPs() 83LxPs()
83 84
84 85
@@ -134,4 +135,5 @@ variable."""
134 else: 135 else:
135 raise gdb.GdbError("No task of PID " + str(pid)) 136 raise gdb.GdbError("No task of PID " + str(pid))
136 137
138
137LxThreadInfoByPidFunc() 139LxThreadInfoByPidFunc()
diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py
new file mode 100644
index 000000000000..071d0dd5a634
--- /dev/null
+++ b/scripts/gdb/linux/timerlist.py
@@ -0,0 +1,219 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Copyright 2019 Google LLC.
4
5import binascii
6import gdb
7
8from linux import constants
9from linux import cpus
10from linux import rbtree
11from linux import utils
12
13timerqueue_node_type = utils.CachedType("struct timerqueue_node").get_type()
14hrtimer_type = utils.CachedType("struct hrtimer").get_type()
15
16
17def ktime_get():
18 """Returns the current time, but not very accurately
19
20 We can't read the hardware timer itself to add any nanoseconds
21 that need to be added since we last stored the time in the
22 timekeeper. But this is probably good enough for debug purposes."""
23 tk_core = gdb.parse_and_eval("&tk_core")
24
25 return tk_core['timekeeper']['tkr_mono']['base']
26
27
28def print_timer(rb_node, idx):
29 timerqueue = utils.container_of(rb_node, timerqueue_node_type.pointer(),
30 "node")
31 timer = utils.container_of(timerqueue, hrtimer_type.pointer(), "node")
32
33 function = str(timer['function']).split(" ")[1].strip("<>")
34 softexpires = timer['_softexpires']
35 expires = timer['node']['expires']
36 now = ktime_get()
37
38 text = " #{}: <{}>, {}, ".format(idx, timer, function)
39 text += "S:{:02x}\n".format(int(timer['state']))
40 text += " # expires at {}-{} nsecs [in {} to {} nsecs]\n".format(
41 softexpires, expires, softexpires - now, expires - now)
42 return text
43
44
45def print_active_timers(base):
46 curr = base['active']['next']['node']
47 curr = curr.address.cast(rbtree.rb_node_type.get_type().pointer())
48 idx = 0
49 while curr:
50 yield print_timer(curr, idx)
51 curr = rbtree.rb_next(curr)
52 idx += 1
53
54
55def print_base(base):
56 text = " .base: {}\n".format(base.address)
57 text += " .index: {}\n".format(base['index'])
58
59 text += " .resolution: {} nsecs\n".format(constants.LX_hrtimer_resolution)
60
61 text += " .get_time: {}\n".format(base['get_time'])
62 if constants.LX_CONFIG_HIGH_RES_TIMERS:
63 text += " .offset: {} nsecs\n".format(base['offset'])
64 text += "active timers:\n"
65 text += "".join([x for x in print_active_timers(base)])
66 return text
67
68
69def print_cpu(hrtimer_bases, cpu, max_clock_bases):
70 cpu_base = cpus.per_cpu(hrtimer_bases, cpu)
71 jiffies = gdb.parse_and_eval("jiffies_64")
72 tick_sched_ptr = gdb.parse_and_eval("&tick_cpu_sched")
73 ts = cpus.per_cpu(tick_sched_ptr, cpu)
74
75 text = "cpu: {}\n".format(cpu)
76 for i in xrange(max_clock_bases):
77 text += " clock {}:\n".format(i)
78 text += print_base(cpu_base['clock_base'][i])
79
80 if constants.LX_CONFIG_HIGH_RES_TIMERS:
81 fmts = [(" .{} : {} nsecs", 'expires_next'),
82 (" .{} : {}", 'hres_active'),
83 (" .{} : {}", 'nr_events'),
84 (" .{} : {}", 'nr_retries'),
85 (" .{} : {}", 'nr_hangs'),
86 (" .{} : {}", 'max_hang_time')]
87 text += "\n".join([s.format(f, cpu_base[f]) for s, f in fmts])
88 text += "\n"
89
90 if constants.LX_CONFIG_TICK_ONESHOT:
91 fmts = [(" .{} : {}", 'nohz_mode'),
92 (" .{} : {} nsecs", 'last_tick'),
93 (" .{} : {}", 'tick_stopped'),
94 (" .{} : {}", 'idle_jiffies'),
95 (" .{} : {}", 'idle_calls'),
96 (" .{} : {}", 'idle_sleeps'),
97 (" .{} : {} nsecs", 'idle_entrytime'),
98 (" .{} : {} nsecs", 'idle_waketime'),
99 (" .{} : {} nsecs", 'idle_exittime'),
100 (" .{} : {} nsecs", 'idle_sleeptime'),
101 (" .{}: {} nsecs", 'iowait_sleeptime'),
102 (" .{} : {}", 'last_jiffies'),
103 (" .{} : {}", 'next_timer'),
104 (" .{} : {} nsecs", 'idle_expires')]
105 text += "\n".join([s.format(f, ts[f]) for s, f in fmts])
106 text += "\njiffies: {}\n".format(jiffies)
107
108 text += "\n"
109
110 return text
111
112
113def print_tickdevice(td, cpu):
114 dev = td['evtdev']
115 text = "Tick Device: mode: {}\n".format(td['mode'])
116 if cpu < 0:
117 text += "Broadcast device\n"
118 else:
119 text += "Per CPU device: {}\n".format(cpu)
120
121 text += "Clock Event Device: "
122 if dev == 0:
123 text += "<NULL>\n"
124 return text
125
126 text += "{}\n".format(dev['name'])
127 text += " max_delta_ns: {}\n".format(dev['max_delta_ns'])
128 text += " min_delta_ns: {}\n".format(dev['min_delta_ns'])
129 text += " mult: {}\n".format(dev['mult'])
130 text += " shift: {}\n".format(dev['shift'])
131 text += " mode: {}\n".format(dev['state_use_accessors'])
132 text += " next_event: {} nsecs\n".format(dev['next_event'])
133
134 text += " set_next_event: {}\n".format(dev['set_next_event'])
135
136 members = [('set_state_shutdown', " shutdown: {}\n"),
137 ('set_state_periodic', " periodic: {}\n"),
138 ('set_state_oneshot', " oneshot: {}\n"),
139 ('set_state_oneshot_stopped', " oneshot stopped: {}\n"),
140 ('tick_resume', " resume: {}\n")]
141 for member, fmt in members:
142 if dev[member]:
143 text += fmt.format(dev[member])
144
145 text += " event_handler: {}\n".format(dev['event_handler'])
146 text += " retries: {}\n".format(dev['retries'])
147
148 return text
149
150
151def pr_cpumask(mask):
152 nr_cpu_ids = 1
153 if constants.LX_NR_CPUS > 1:
154 nr_cpu_ids = gdb.parse_and_eval("nr_cpu_ids")
155
156 inf = gdb.inferiors()[0]
157 bits = mask['bits']
158 num_bytes = (nr_cpu_ids + 7) / 8
159 buf = utils.read_memoryview(inf, bits, num_bytes).tobytes()
160 buf = binascii.b2a_hex(buf)
161
162 chunks = []
163 i = num_bytes
164 while i > 0:
165 i -= 1
166 start = i * 2
167 end = start + 2
168 chunks.append(buf[start:end])
169 if i != 0 and i % 4 == 0:
170 chunks.append(',')
171
172 extra = nr_cpu_ids % 8
173 if 0 < extra <= 4:
174 chunks[0] = chunks[0][0] # Cut off the first 0
175
176 return "".join(chunks)
177
178
179class LxTimerList(gdb.Command):
180 """Print /proc/timer_list"""
181
182 def __init__(self):
183 super(LxTimerList, self).__init__("lx-timerlist", gdb.COMMAND_DATA)
184
185 def invoke(self, arg, from_tty):
186 hrtimer_bases = gdb.parse_and_eval("&hrtimer_bases")
187 max_clock_bases = gdb.parse_and_eval("HRTIMER_MAX_CLOCK_BASES")
188
189 text = "Timer List Version: gdb scripts\n"
190 text += "HRTIMER_MAX_CLOCK_BASES: {}\n".format(max_clock_bases)
191 text += "now at {} nsecs\n".format(ktime_get())
192
193 for cpu in cpus.each_online_cpu():
194 text += print_cpu(hrtimer_bases, cpu, max_clock_bases)
195
196 if constants.LX_CONFIG_GENERIC_CLOCKEVENTS:
197 if constants.LX_CONFIG_GENERIC_CLOCKEVENTS_BROADCAST:
198 bc_dev = gdb.parse_and_eval("&tick_broadcast_device")
199 text += print_tickdevice(bc_dev, -1)
200 text += "\n"
201 mask = gdb.parse_and_eval("tick_broadcast_mask")
202 mask = pr_cpumask(mask)
203 text += "tick_broadcast_mask: {}\n".format(mask)
204 if constants.LX_CONFIG_TICK_ONESHOT:
205 mask = gdb.parse_and_eval("tick_broadcast_oneshot_mask")
206 mask = pr_cpumask(mask)
207 text += "tick_broadcast_oneshot_mask: {}\n".format(mask)
208 text += "\n"
209
210 tick_cpu_devices = gdb.parse_and_eval("&tick_cpu_device")
211 for cpu in cpus.each_online_cpu():
212 tick_dev = cpus.per_cpu(tick_cpu_devices, cpu)
213 text += print_tickdevice(tick_dev, cpu)
214 text += "\n"
215
216 gdb.write(text)
217
218
219LxTimerList()
diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py
index 50805874cfc3..bc67126118c4 100644
--- a/scripts/gdb/linux/utils.py
+++ b/scripts/gdb/linux/utils.py
@@ -66,6 +66,7 @@ Note that TYPE and ELEMENT have to be quoted as strings."""
66 return container_of(ptr, gdb.lookup_type(typename.string()).pointer(), 66 return container_of(ptr, gdb.lookup_type(typename.string()).pointer(),
67 elementname.string()) 67 elementname.string())
68 68
69
69ContainerOf() 70ContainerOf()
70 71
71 72
@@ -148,14 +149,14 @@ def get_gdbserver_type():
148 def probe_qemu(): 149 def probe_qemu():
149 try: 150 try:
150 return gdb.execute("monitor info version", to_string=True) != "" 151 return gdb.execute("monitor info version", to_string=True) != ""
151 except: 152 except gdb.error:
152 return False 153 return False
153 154
154 def probe_kgdb(): 155 def probe_kgdb():
155 try: 156 try:
156 thread_info = gdb.execute("info thread 2", to_string=True) 157 thread_info = gdb.execute("info thread 2", to_string=True)
157 return "shadowCPU0" in thread_info 158 return "shadowCPU0" in thread_info
158 except: 159 except gdb.error:
159 return False 160 return False
160 161
161 global gdbserver_type 162 global gdbserver_type
@@ -172,7 +173,7 @@ def get_gdbserver_type():
172def gdb_eval_or_none(expresssion): 173def gdb_eval_or_none(expresssion):
173 try: 174 try:
174 return gdb.parse_and_eval(expresssion) 175 return gdb.parse_and_eval(expresssion)
175 except: 176 except gdb.error:
176 return None 177 return None
177 178
178 179
diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py
index 6e0b0afd888a..eff5a48ac026 100644
--- a/scripts/gdb/vmlinux-gdb.py
+++ b/scripts/gdb/vmlinux-gdb.py
@@ -27,7 +27,11 @@ else:
27 import linux.modules 27 import linux.modules
28 import linux.dmesg 28 import linux.dmesg
29 import linux.tasks 29 import linux.tasks
30 import linux.config
30 import linux.cpus 31 import linux.cpus
31 import linux.lists 32 import linux.lists
33 import linux.rbtree
32 import linux.proc 34 import linux.proc
33 import linux.constants 35 import linux.constants
36 import linux.timerlist
37 import linux.clk
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 64073e050c6a..b02279da6fa1 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -6,4 +6,5 @@ execveat.moved
6execveat.path.ephemeral 6execveat.path.ephemeral
7execveat.ephemeral 7execveat.ephemeral
8execveat.denatured 8execveat.denatured
9xxxxxxxx* \ No newline at end of file 9/recursion-depth
10xxxxxxxx*
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 427c41ba5151..33339e31e365 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,11 +1,15 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2CFLAGS = -Wall 2CFLAGS = -Wall
3CFLAGS += -Wno-nonnull
4CFLAGS += -D_GNU_SOURCE
3 5
4TEST_GEN_PROGS := execveat 6TEST_GEN_PROGS := execveat
5TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir 7TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
6# Makefile is a run-time dependency, since it's accessed by the execveat test 8# Makefile is a run-time dependency, since it's accessed by the execveat test
7TEST_FILES := Makefile 9TEST_FILES := Makefile
8 10
11TEST_GEN_PROGS += recursion-depth
12
9EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* 13EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
10 14
11include ../lib.mk 15include ../lib.mk
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
new file mode 100644
index 000000000000..2dbd5bc45b3e
--- /dev/null
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16/* Test that pointing #! script interpreter to self doesn't recurse. */
17#include <errno.h>
18#include <sched.h>
19#include <stdio.h>
20#include <string.h>
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <fcntl.h>
24#include <sys/mount.h>
25#include <unistd.h>
26
27int main(void)
28{
29 if (unshare(CLONE_NEWNS) == -1) {
30 if (errno == ENOSYS || errno == EPERM) {
31 fprintf(stderr, "error: unshare, errno %d\n", errno);
32 return 4;
33 }
34 fprintf(stderr, "error: unshare, errno %d\n", errno);
35 return 1;
36 }
37 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
38 fprintf(stderr, "error: mount '/', errno %d\n", errno);
39 return 1;
40 }
41 /* Require "exec" filesystem. */
42 if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) {
43 fprintf(stderr, "error: mount ramfs, errno %d\n", errno);
44 return 1;
45 }
46
47#define FILENAME "/tmp/1"
48
49 int fd = creat(FILENAME, 0700);
50 if (fd == -1) {
51 fprintf(stderr, "error: creat, errno %d\n", errno);
52 return 1;
53 }
54#define S "#!" FILENAME "\n"
55 if (write(fd, S, strlen(S)) != strlen(S)) {
56 fprintf(stderr, "error: write, errno %d\n", errno);
57 return 1;
58 }
59 close(fd);
60
61 int rv = execve(FILENAME, NULL, NULL);
62 if (rv == -1 && errno == ELOOP) {
63 return 0;
64 }
65 fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno);
66 return 1;
67}
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 780ce7123374..6a970b127c9b 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -24,19 +24,21 @@ TEST_FILE=$(mktemp)
24 24
25# This represents 25# This represents
26# 26#
27# TEST_ID:TEST_COUNT:ENABLED 27# TEST_ID:TEST_COUNT:ENABLED:TARGET
28# 28#
29# TEST_ID: is the test id number 29# TEST_ID: is the test id number
30# TEST_COUNT: number of times we should run the test 30# TEST_COUNT: number of times we should run the test
31# ENABLED: 1 if enabled, 0 otherwise 31# ENABLED: 1 if enabled, 0 otherwise
32# TARGET: test target file required on the test_sysctl module
32# 33#
33# Once these are enabled please leave them as-is. Write your own test, 34# Once these are enabled please leave them as-is. Write your own test,
34# we have tons of space. 35# we have tons of space.
35ALL_TESTS="0001:1:1" 36ALL_TESTS="0001:1:1:int_0001"
36ALL_TESTS="$ALL_TESTS 0002:1:1" 37ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001"
37ALL_TESTS="$ALL_TESTS 0003:1:1" 38ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
38ALL_TESTS="$ALL_TESTS 0004:1:1" 39ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
39ALL_TESTS="$ALL_TESTS 0005:3:1" 40ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
41ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
40 42
41test_modprobe() 43test_modprobe()
42{ 44{
@@ -149,6 +151,9 @@ reset_vals()
149 string_0001) 151 string_0001)
150 VAL="(none)" 152 VAL="(none)"
151 ;; 153 ;;
154 bitmap_0001)
155 VAL=""
156 ;;
152 *) 157 *)
153 ;; 158 ;;
154 esac 159 esac
@@ -157,8 +162,10 @@ reset_vals()
157 162
158set_orig() 163set_orig()
159{ 164{
160 if [ ! -z $TARGET ]; then 165 if [ ! -z $TARGET ] && [ ! -z $ORIG ]; then
161 echo "${ORIG}" > "${TARGET}" 166 if [ -f ${TARGET} ]; then
167 echo "${ORIG}" > "${TARGET}"
168 fi
162 fi 169 fi
163} 170}
164 171
@@ -177,9 +184,25 @@ verify()
177 return 0 184 return 0
178} 185}
179 186
187# proc files get read a page at a time, which can confuse diff,
188# and get you incorrect results on proc files with long data. To use
189# diff against them you must first extract the output to a file, and
190# then compare against that file.
191verify_diff_proc_file()
192{
193 TMP_DUMP_FILE=$(mktemp)
194 cat $1 > $TMP_DUMP_FILE
195
196 if ! diff -w -q $TMP_DUMP_FILE $2; then
197 return 1
198 else
199 return 0
200 fi
201}
202
180verify_diff_w() 203verify_diff_w()
181{ 204{
182 echo "$TEST_STR" | diff -q -w -u - $1 205 echo "$TEST_STR" | diff -q -w -u - $1 > /dev/null
183 return $? 206 return $?
184} 207}
185 208
@@ -600,9 +623,70 @@ run_stringtests()
600 test_rc 623 test_rc
601} 624}
602 625
626target_exists()
627{
628 TARGET="${SYSCTL}/$1"
629 TEST_ID="$2"
630
631 if [ ! -f ${TARGET} ] ; then
632 echo "Target for test $TEST_ID: $TARGET not exist, skipping test ..."
633 return 0
634 fi
635 return 1
636}
637
638run_bitmaptest() {
639 # Total length of bitmaps string to use, a bit under
640 # the maximum input size of the test node
641 LENGTH=$((RANDOM % 65000))
642
643 # First bit to set
644 BIT=$((RANDOM % 1024))
645
646 # String containing our list of bits to set
647 TEST_STR=$BIT
648
649 # build up the string
650 while [ "${#TEST_STR}" -le "$LENGTH" ]; do
651 # Make sure next entry is discontiguous,
652 # skip ahead at least 2
653 BIT=$((BIT + $((2 + RANDOM % 10))))
654
655 # Add new bit to the list
656 TEST_STR="${TEST_STR},${BIT}"
657
658 # Randomly make it a range
659 if [ "$((RANDOM % 2))" -eq "1" ]; then
660 RANGE_END=$((BIT + $((1 + RANDOM % 10))))
661 TEST_STR="${TEST_STR}-${RANGE_END}"
662 BIT=$RANGE_END
663 fi
664 done
665
666 echo -n "Checking bitmap handler... "
667 TEST_FILE=$(mktemp)
668 echo -n "$TEST_STR" > $TEST_FILE
669
670 cat $TEST_FILE > $TARGET 2> /dev/null
671 if [ $? -ne 0 ]; then
672 echo "FAIL" >&2
673 rc=1
674 test_rc
675 fi
676
677 if ! verify_diff_proc_file "$TARGET" "$TEST_FILE"; then
678 echo "FAIL" >&2
679 rc=1
680 else
681 echo "ok"
682 rc=0
683 fi
684 test_rc
685}
686
603sysctl_test_0001() 687sysctl_test_0001()
604{ 688{
605 TARGET="${SYSCTL}/int_0001" 689 TARGET="${SYSCTL}/$(get_test_target 0001)"
606 reset_vals 690 reset_vals
607 ORIG=$(cat "${TARGET}") 691 ORIG=$(cat "${TARGET}")
608 TEST_STR=$(( $ORIG + 1 )) 692 TEST_STR=$(( $ORIG + 1 ))
@@ -614,7 +698,7 @@ sysctl_test_0001()
614 698
615sysctl_test_0002() 699sysctl_test_0002()
616{ 700{
617 TARGET="${SYSCTL}/string_0001" 701 TARGET="${SYSCTL}/$(get_test_target 0002)"
618 reset_vals 702 reset_vals
619 ORIG=$(cat "${TARGET}") 703 ORIG=$(cat "${TARGET}")
620 TEST_STR="Testing sysctl" 704 TEST_STR="Testing sysctl"
@@ -627,7 +711,7 @@ sysctl_test_0002()
627 711
628sysctl_test_0003() 712sysctl_test_0003()
629{ 713{
630 TARGET="${SYSCTL}/int_0002" 714 TARGET="${SYSCTL}/$(get_test_target 0003)"
631 reset_vals 715 reset_vals
632 ORIG=$(cat "${TARGET}") 716 ORIG=$(cat "${TARGET}")
633 TEST_STR=$(( $ORIG + 1 )) 717 TEST_STR=$(( $ORIG + 1 ))
@@ -640,7 +724,7 @@ sysctl_test_0003()
640 724
641sysctl_test_0004() 725sysctl_test_0004()
642{ 726{
643 TARGET="${SYSCTL}/uint_0001" 727 TARGET="${SYSCTL}/$(get_test_target 0004)"
644 reset_vals 728 reset_vals
645 ORIG=$(cat "${TARGET}") 729 ORIG=$(cat "${TARGET}")
646 TEST_STR=$(( $ORIG + 1 )) 730 TEST_STR=$(( $ORIG + 1 ))
@@ -653,13 +737,21 @@ sysctl_test_0004()
653 737
654sysctl_test_0005() 738sysctl_test_0005()
655{ 739{
656 TARGET="${SYSCTL}/int_0003" 740 TARGET="${SYSCTL}/$(get_test_target 0005)"
657 reset_vals 741 reset_vals
658 ORIG=$(cat "${TARGET}") 742 ORIG=$(cat "${TARGET}")
659 743
660 run_limit_digit_int_array 744 run_limit_digit_int_array
661} 745}
662 746
747sysctl_test_0006()
748{
749 TARGET="${SYSCTL}/bitmap_0001"
750 reset_vals
751 ORIG=""
752 run_bitmaptest
753}
754
663list_tests() 755list_tests()
664{ 756{
665 echo "Test ID list:" 757 echo "Test ID list:"
@@ -673,10 +765,9 @@ list_tests()
673 echo "0003 x $(get_test_count 0003) - tests proc_dointvec()" 765 echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
674 echo "0004 x $(get_test_count 0004) - tests proc_douintvec()" 766 echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
675 echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array" 767 echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
768 echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
676} 769}
677 770
678test_reqs
679
680usage() 771usage()
681{ 772{
682 NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .) 773 NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
@@ -724,25 +815,35 @@ function get_test_count()
724{ 815{
725 test_num $1 816 test_num $1
726 TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') 817 TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
727 LAST_TWO=${TEST_DATA#*:*} 818 echo ${TEST_DATA} | awk -F":" '{print $2}'
728 echo ${LAST_TWO%:*}
729} 819}
730 820
731function get_test_enabled() 821function get_test_enabled()
732{ 822{
733 test_num $1 823 test_num $1
734 TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') 824 TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
735 echo ${TEST_DATA#*:*:} 825 echo ${TEST_DATA} | awk -F":" '{print $3}'
826}
827
828function get_test_target()
829{
830 test_num $1
831 TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
832 echo ${TEST_DATA} | awk -F":" '{print $4}'
736} 833}
737 834
738function run_all_tests() 835function run_all_tests()
739{ 836{
740 for i in $ALL_TESTS ; do 837 for i in $ALL_TESTS ; do
741 TEST_ID=${i%:*:*} 838 TEST_ID=${i%:*:*:*}
742 ENABLED=$(get_test_enabled $TEST_ID) 839 ENABLED=$(get_test_enabled $TEST_ID)
743 TEST_COUNT=$(get_test_count $TEST_ID) 840 TEST_COUNT=$(get_test_count $TEST_ID)
841 TEST_TARGET=$(get_test_target $TEST_ID)
842 if target_exists $TEST_TARGET $TEST_ID; then
843 continue
844 fi
744 if [[ $ENABLED -eq "1" ]]; then 845 if [[ $ENABLED -eq "1" ]]; then
745 test_case $TEST_ID $TEST_COUNT 846 test_case $TEST_ID $TEST_COUNT $TEST_TARGET
746 fi 847 fi
747 done 848 done
748} 849}
@@ -775,12 +876,14 @@ function watch_case()
775 876
776function test_case() 877function test_case()
777{ 878{
778 NUM_TESTS=$DEFAULT_NUM_TESTS 879 NUM_TESTS=$2
779 if [ $# -eq 2 ]; then
780 NUM_TESTS=$2
781 fi
782 880
783 i=0 881 i=0
882
883 if target_exists $3 $1; then
884 continue
885 fi
886
784 while [ $i -lt $NUM_TESTS ]; do 887 while [ $i -lt $NUM_TESTS ]; do
785 test_num $1 888 test_num $1
786 watch_log $i ${TEST_NAME}_test_$1 noclear 889 watch_log $i ${TEST_NAME}_test_$1 noclear
@@ -803,15 +906,15 @@ function parse_args()
803 elif [[ "$1" = "-t" ]]; then 906 elif [[ "$1" = "-t" ]]; then
804 shift 907 shift
805 test_num $1 908 test_num $1
806 test_case $1 $(get_test_count $1) 909 test_case $1 $(get_test_count $1) $(get_test_target $1)
807 elif [[ "$1" = "-c" ]]; then 910 elif [[ "$1" = "-c" ]]; then
808 shift 911 shift
809 test_num $1 912 test_num $1
810 test_num $2 913 test_num $2
811 test_case $1 $2 914 test_case $1 $2 $(get_test_target $1)
812 elif [[ "$1" = "-s" ]]; then 915 elif [[ "$1" = "-s" ]]; then
813 shift 916 shift
814 test_case $1 1 917 test_case $1 1 $(get_test_target $1)
815 elif [[ "$1" = "-l" ]]; then 918 elif [[ "$1" = "-l" ]]; then
816 list_tests 919 list_tests
817 elif [[ "$1" = "-h" || "$1" = "--help" ]]; then 920 elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
@@ -825,8 +928,8 @@ function parse_args()
825test_reqs 928test_reqs
826allow_user_defaults 929allow_user_defaults
827check_production_sysctl_writes_strict 930check_production_sysctl_writes_strict
828test_modprobe
829load_req_mod 931load_req_mod
932test_modprobe
830 933
831trap "test_finish" EXIT 934trap "test_finish" EXIT
832 935