aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--CREDITS17
-rw-r--r--Documentation/ABI/obsolete/sysfs-block-zram119
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram25
-rw-r--r--Documentation/blockdev/zram.txt87
-rw-r--r--Documentation/filesystems/Locking8
-rw-r--r--Documentation/printk-formats.txt49
-rw-r--r--Documentation/sysctl/vm.txt11
-rw-r--r--Documentation/vm/hugetlbpage.txt55
-rw-r--r--Documentation/vm/unevictable-lru.txt12
-rw-r--r--Documentation/vm/zsmalloc.txt70
-rw-r--r--MAINTAINERS75
-rw-r--r--arch/arm/plat-pxa/dma.c111
-rw-r--r--arch/cris/arch-v10/kernel/fasttimer.c85
-rw-r--r--arch/cris/arch-v10/kernel/setup.c58
-rw-r--r--arch/cris/arch-v32/kernel/fasttimer.c85
-rw-r--r--arch/cris/arch-v32/kernel/setup.c62
-rw-r--r--arch/microblaze/kernel/cpu/mb.c149
-rw-r--r--arch/nios2/kernel/cpuinfo.c77
-rw-r--r--arch/openrisc/kernel/setup.c50
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c5
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/pci/pci_debug.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c12
-rw-r--r--drivers/base/power/wakeup.c16
-rw-r--r--drivers/block/paride/pg.c4
-rw-r--r--drivers/block/zram/zram_drv.c73
-rw-r--r--drivers/block/zram/zram_drv.h1
-rw-r--r--drivers/parisc/ccio-dma.c54
-rw-r--r--drivers/parisc/sba_iommu.c86
-rw-r--r--drivers/rtc/rtc-cmos.c36
-rw-r--r--drivers/rtc/rtc-ds1305.c6
-rw-r--r--drivers/rtc/rtc-mrst.c16
-rw-r--r--drivers/rtc/rtc-tegra.c4
-rw-r--r--drivers/s390/cio/blacklist.c12
-rw-r--r--drivers/sbus/char/bbc_envctrl.c3
-rw-r--r--drivers/sbus/char/envctrl.c7
-rw-r--r--drivers/staging/lustre/lustre/Kconfig1
-rw-r--r--fs/dax.c17
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c17
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext2/namei.c10
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/file.c19
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/hugetlbfs/inode.c90
-rw-r--r--fs/jfs/jfs_metapage.c31
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/proc/array.c26
-rw-r--r--fs/proc/base.c82
-rw-r--r--fs/splice.c3
-rw-r--r--include/linux/a.out.h67
-rw-r--r--include/linux/bitmap.h8
-rw-r--r--include/linux/capability.h29
-rw-r--r--include/linux/compaction.h1
-rw-r--r--include/linux/cred.h23
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/hugetlb.h20
-rw-r--r--include/linux/ioport.h8
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/ksm.h17
-rw-r--r--include/linux/mempool.h3
-rw-r--r--include/linux/mm.h100
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/page-flags.h103
-rw-r--r--include/linux/printk.h5
-rw-r--r--include/linux/reboot.h3
-rw-r--r--include/linux/rmap.h8
-rw-r--r--include/linux/string_helpers.h8
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/types.h6
-rw-r--r--include/linux/uidgid.h12
-rw-r--r--include/linux/zsmalloc.h1
-rw-r--r--include/trace/events/cma.h66
-rw-r--r--init/Kconfig19
-rw-r--r--ipc/msg.c34
-rw-r--r--ipc/sem.c26
-rw-r--r--ipc/shm.c42
-rw-r--r--ipc/util.c6
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/capability.c35
-rw-r--r--kernel/cgroup.c6
-rw-r--r--kernel/cred.c3
-rw-r--r--kernel/groups.c3
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/reboot.c53
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sys_ni.c14
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--lib/lru_cache.c9
-rw-r--r--lib/string_helpers.c193
-rw-r--r--lib/test-hexdump.c8
-rw-r--r--lib/test-string_helpers.c40
-rw-r--r--lib/vsprintf.c110
-rw-r--r--mm/cma.c5
-rw-r--r--mm/cma_debug.c41
-rw-r--r--mm/compaction.c60
-rw-r--r--mm/gup.c4
-rw-r--r--mm/huge_memory.c86
-rw-r--r--mm/hugetlb.c234
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/kasan.c13
-rw-r--r--mm/ksm.c10
-rw-r--r--mm/memblock.c18
-rw-r--r--mm/memcontrol.c47
-rw-r--r--mm/memory-failure.c122
-rw-r--r--mm/memory.c56
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempool.c117
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/mmap.c21
-rw-r--r--mm/mremap.c25
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c3
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/slub.c4
-rw-r--r--mm/swap.c34
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/util.c41
-rw-r--r--mm/vmalloc.c95
-rw-r--r--mm/zsmalloc.c971
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--security/Kconfig1
-rw-r--r--tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c8
-rw-r--r--tools/testing/selftests/vm/hugetlbfstest.c4
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c6
136 files changed, 3279 insertions, 1814 deletions
diff --git a/.mailmap b/.mailmap
index 0d971cfb0772..6287004040e7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -100,6 +100,7 @@ Rajesh Shah <rajesh.shah@intel.com>
100Ralf Baechle <ralf@linux-mips.org> 100Ralf Baechle <ralf@linux-mips.org>
101Ralf Wildenhues <Ralf.Wildenhues@gmx.de> 101Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
102Rémi Denis-Courmont <rdenis@simphalempin.com> 102Rémi Denis-Courmont <rdenis@simphalempin.com>
103Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
103Rudolf Marek <R.Marek@sh.cvut.cz> 104Rudolf Marek <R.Marek@sh.cvut.cz>
104Rui Saraiva <rmps@joel.ist.utl.pt> 105Rui Saraiva <rmps@joel.ist.utl.pt>
105Sachin P Sant <ssant@in.ibm.com> 106Sachin P Sant <ssant@in.ibm.com>
diff --git a/CREDITS b/CREDITS
index 843e17647f3b..2ef5dceef324 100644
--- a/CREDITS
+++ b/CREDITS
@@ -508,6 +508,10 @@ E: paul@paulbristow.net
508W: http://paulbristow.net/linux/idefloppy.html 508W: http://paulbristow.net/linux/idefloppy.html
509D: Maintainer of IDE/ATAPI floppy driver 509D: Maintainer of IDE/ATAPI floppy driver
510 510
511N: Stefano Brivio
512E: stefano.brivio@polimi.it
513D: Broadcom B43 driver
514
511N: Dominik Brodowski 515N: Dominik Brodowski
512E: linux@brodo.de 516E: linux@brodo.de
513W: http://www.brodo.de/ 517W: http://www.brodo.de/
@@ -3008,6 +3012,19 @@ W: http://www.qsl.net/dl1bke/
3008D: Generic Z8530 driver, AX.25 DAMA slave implementation 3012D: Generic Z8530 driver, AX.25 DAMA slave implementation
3009D: Several AX.25 hacks 3013D: Several AX.25 hacks
3010 3014
3015N: Ricardo Ribalda Delgado
3016E: ricardo.ribalda@gmail.com
3017W: http://ribalda.com
3018D: PLX USB338x driver
3019D: PCA9634 driver
3020D: Option GTM671WFS
3021D: Fintek F81216A
3022D: Various kernel hacks
3023S: Qtechnology A/S
3024S: Valby Langgade 142
3025S: 2500 Valby
3026S: Denmark
3027
3011N: Francois-Rene Rideau 3028N: Francois-Rene Rideau
3012E: fare@tunes.org 3029E: fare@tunes.org
3013W: http://www.tunes.org/~fare 3030W: http://www.tunes.org/~fare
diff --git a/Documentation/ABI/obsolete/sysfs-block-zram b/Documentation/ABI/obsolete/sysfs-block-zram
new file mode 100644
index 000000000000..720ea92cfb2e
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-block-zram
@@ -0,0 +1,119 @@
1What: /sys/block/zram<id>/num_reads
2Date: August 2015
3Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
4Description:
5 The num_reads file is read-only and specifies the number of
6 reads (failed or successful) done on this device.
7 Now accessible via zram<id>/stat node.
8
9What: /sys/block/zram<id>/num_writes
10Date: August 2015
11Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
12Description:
13 The num_writes file is read-only and specifies the number of
14 writes (failed or successful) done on this device.
15 Now accessible via zram<id>/stat node.
16
17What: /sys/block/zram<id>/invalid_io
18Date: August 2015
19Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
20Description:
21 The invalid_io file is read-only and specifies the number of
22 non-page-size-aligned I/O requests issued to this device.
23 Now accessible via zram<id>/io_stat node.
24
25What: /sys/block/zram<id>/failed_reads
26Date: August 2015
27Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
28Description:
29 The failed_reads file is read-only and specifies the number of
30 failed reads happened on this device.
31 Now accessible via zram<id>/io_stat node.
32
33What: /sys/block/zram<id>/failed_writes
34Date: August 2015
35Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
36Description:
37 The failed_writes file is read-only and specifies the number of
38 failed writes happened on this device.
39 Now accessible via zram<id>/io_stat node.
40
41What: /sys/block/zram<id>/notify_free
42Date: August 2015
43Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
44Description:
45 The notify_free file is read-only. Depending on device usage
46 scenario it may account a) the number of pages freed because
47 of swap slot free notifications or b) the number of pages freed
48 because of REQ_DISCARD requests sent by bio. The former ones
49 are sent to a swap block device when a swap slot is freed, which
50 implies that this disk is being used as a swap disk. The latter
51 ones are sent by filesystem mounted with discard option,
52 whenever some data blocks are getting discarded.
53 Now accessible via zram<id>/io_stat node.
54
55What: /sys/block/zram<id>/zero_pages
56Date: August 2015
57Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
58Description:
59 The zero_pages file is read-only and specifies number of zero
60 filled pages written to this disk. No memory is allocated for
61 such pages.
62 Now accessible via zram<id>/mm_stat node.
63
64What: /sys/block/zram<id>/orig_data_size
65Date: August 2015
66Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
67Description:
68 The orig_data_size file is read-only and specifies uncompressed
69 size of data stored in this disk. This excludes zero-filled
70 pages (zero_pages) since no memory is allocated for them.
71 Unit: bytes
72 Now accessible via zram<id>/mm_stat node.
73
74What: /sys/block/zram<id>/compr_data_size
75Date: August 2015
76Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
77Description:
78 The compr_data_size file is read-only and specifies compressed
79 size of data stored in this disk. So, compression ratio can be
80 calculated using orig_data_size and this statistic.
81 Unit: bytes
82 Now accessible via zram<id>/mm_stat node.
83
84What: /sys/block/zram<id>/mem_used_total
85Date: August 2015
86Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
87Description:
88 The mem_used_total file is read-only and specifies the amount
89 of memory, including allocator fragmentation and metadata
90 overhead, allocated for this disk. So, allocator space
91 efficiency can be calculated using compr_data_size and this
92 statistic.
93 Unit: bytes
94 Now accessible via zram<id>/mm_stat node.
95
96What: /sys/block/zram<id>/mem_used_max
97Date: August 2015
98Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
99Description:
100 The mem_used_max file is read/write and specifies the amount
101 of maximum memory zram have consumed to store compressed data.
102 For resetting the value, you should write "0". Otherwise,
103 you could see -EINVAL.
104 Unit: bytes
105 Downgraded to write-only node: so it's possible to set new
106 value only; its current value is stored in zram<id>/mm_stat
107 node.
108
109What: /sys/block/zram<id>/mem_limit
110Date: August 2015
111Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
112Description:
113 The mem_limit file is read/write and specifies the maximum
114 amount of memory ZRAM can use to store the compressed data.
115 The limit could be changed in run time and "0" means disable
116 the limit. No limit is the initial state. Unit: bytes
117 Downgraded to write-only node: so it's possible to set new
118 value only; its current value is stored in zram<id>/mm_stat
119 node.
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index a6148eaf91e5..2e69e83bf510 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -141,3 +141,28 @@ Description:
141 amount of memory ZRAM can use to store the compressed data. The 141 amount of memory ZRAM can use to store the compressed data. The
142 limit could be changed in run time and "0" means disable the 142 limit could be changed in run time and "0" means disable the
143 limit. No limit is the initial state. Unit: bytes 143 limit. No limit is the initial state. Unit: bytes
144
145What: /sys/block/zram<id>/compact
146Date: August 2015
147Contact: Minchan Kim <minchan@kernel.org>
148Description:
149 The compact file is write-only and trigger compaction for
150 allocator zrm uses. The allocator moves some objects so that
151 it could free fragment space.
152
153What: /sys/block/zram<id>/io_stat
154Date: August 2015
155Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
156Description:
157 The io_stat file is read-only and accumulates device's I/O
158 statistics not accounted by block layer. For example,
159 failed_reads, failed_writes, etc. File format is similar to
160 block layer statistics file format.
161
162What: /sys/block/zram<id>/mm_stat
163Date: August 2015
164Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
165Description:
166 The mm_stat file is read-only and represents device's mm
167 statistics (orig_data_size, compr_data_size, etc.) in a format
168 similar to block layer statistics file format.
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 7fcf9c6592ec..48a183e29988 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -98,20 +98,79 @@ size of the disk when not in use so a huge zram is wasteful.
98 mount /dev/zram1 /tmp 98 mount /dev/zram1 /tmp
99 99
1007) Stats: 1007) Stats:
101 Per-device statistics are exported as various nodes under 101Per-device statistics are exported as various nodes under /sys/block/zram<id>/
102 /sys/block/zram<id>/ 102
103 disksize 103A brief description of exported device attritbutes. For more details please
104 num_reads 104read Documentation/ABI/testing/sysfs-block-zram.
105 num_writes 105
106 failed_reads 106Name access description
107 failed_writes 107---- ------ -----------
108 invalid_io 108disksize RW show and set the device's disk size
109 notify_free 109initstate RO shows the initialization state of the device
110 zero_pages 110reset WO trigger device reset
111 orig_data_size 111num_reads RO the number of reads
112 compr_data_size 112failed_reads RO the number of failed reads
113 mem_used_total 113num_write RO the number of writes
114 mem_used_max 114failed_writes RO the number of failed writes
115invalid_io RO the number of non-page-size-aligned I/O requests
116max_comp_streams RW the number of possible concurrent compress operations
117comp_algorithm RW show and change the compression algorithm
118notify_free RO the number of notifications to free pages (either
119 slot free notifications or REQ_DISCARD requests)
120zero_pages RO the number of zero filled pages written to this disk
121orig_data_size RO uncompressed size of data stored in this disk
122compr_data_size RO compressed size of data stored in this disk
123mem_used_total RO the amount of memory allocated for this disk
124mem_used_max RW the maximum amount memory zram have consumed to
125 store compressed data
126mem_limit RW the maximum amount of memory ZRAM can use to store
127 the compressed data
128num_migrated RO the number of objects migrated migrated by compaction
129
130
131WARNING
132=======
133per-stat sysfs attributes are considered to be deprecated.
134The basic strategy is:
135-- the existing RW nodes will be downgraded to WO nodes (in linux 4.11)
136-- deprecated RO sysfs nodes will eventually be removed (in linux 4.11)
137
138The list of deprecated attributes can be found here:
139Documentation/ABI/obsolete/sysfs-block-zram
140
141Basically, every attribute that has its own read accessible sysfs node
142(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat
143or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated.
144
145User space is advised to use the following files to read the device statistics.
146
147File /sys/block/zram<id>/stat
148
149Represents block layer statistics. Read Documentation/block/stat.txt for
150details.
151
152File /sys/block/zram<id>/io_stat
153
154The stat file represents device's I/O statistics not accounted by block
155layer and, thus, not available in zram<id>/stat file. It consists of a
156single line of text and contains the following stats separated by
157whitespace:
158 failed_reads
159 failed_writes
160 invalid_io
161 notify_free
162
163File /sys/block/zram<id>/mm_stat
164
165The stat file represents device's mm statistics. It consists of a single
166line of text and contains the following stats separated by whitespace:
167 orig_data_size
168 compr_data_size
169 mem_used_total
170 mem_limit
171 mem_used_max
172 zero_pages
173 num_migrated
115 174
1168) Deactivate: 1758) Deactivate:
117 swapoff /dev/zram0 176 swapoff /dev/zram0
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index c3cd6279e92e..7c3f187d48bf 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -523,6 +523,7 @@ prototypes:
523 void (*close)(struct vm_area_struct*); 523 void (*close)(struct vm_area_struct*);
524 int (*fault)(struct vm_area_struct*, struct vm_fault *); 524 int (*fault)(struct vm_area_struct*, struct vm_fault *);
525 int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); 525 int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
526 int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
526 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 527 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
527 528
528locking rules: 529locking rules:
@@ -532,6 +533,7 @@ close: yes
532fault: yes can return with page locked 533fault: yes can return with page locked
533map_pages: yes 534map_pages: yes
534page_mkwrite: yes can return with page locked 535page_mkwrite: yes can return with page locked
536pfn_mkwrite: yes
535access: yes 537access: yes
536 538
537 ->fault() is called when a previously not present pte is about 539 ->fault() is called when a previously not present pte is about
@@ -558,6 +560,12 @@ the page has been truncated, the filesystem should not look up a new page
558like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which 560like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
559will cause the VM to retry the fault. 561will cause the VM to retry the fault.
560 562
563 ->pfn_mkwrite() is the same as page_mkwrite but when the pte is
564VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
565VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
566after this call is to make the pte read-write, unless pfn_mkwrite returns
567an error.
568
561 ->access() is called when get_user_pages() fails in 569 ->access() is called when get_user_pages() fails in
562access_process_vm(), typically used to debug a process through 570access_process_vm(), typically used to debug a process through
563/proc/pid/mem or ptrace. This function is needed only for 571/proc/pid/mem or ptrace. This function is needed only for
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 5a615c14f75d..cb6a596072bb 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -8,6 +8,21 @@ If variable is of Type, use printk format specifier:
8 unsigned long long %llu or %llx 8 unsigned long long %llu or %llx
9 size_t %zu or %zx 9 size_t %zu or %zx
10 ssize_t %zd or %zx 10 ssize_t %zd or %zx
11 s32 %d or %x
12 u32 %u or %x
13 s64 %lld or %llx
14 u64 %llu or %llx
15
16If <type> is dependent on a config option for its size (e.g., sector_t,
17blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
18format specifier of its largest possible type and explicitly cast to it.
19Example:
20
21 printk("test: sector number/total blocks: %llu/%llu\n",
22 (unsigned long long)sector, (unsigned long long)blockcount);
23
24Reminder: sizeof() result is of type size_t.
25
11 26
12Raw pointer value SHOULD be printed with %p. The kernel supports 27Raw pointer value SHOULD be printed with %p. The kernel supports
13the following extended format specifiers for pointer types: 28the following extended format specifiers for pointer types:
@@ -54,6 +69,7 @@ Struct Resources:
54 69
55 For printing struct resources. The 'R' and 'r' specifiers result in a 70 For printing struct resources. The 'R' and 'r' specifiers result in a
56 printed resource with ('R') or without ('r') a decoded flags member. 71 printed resource with ('R') or without ('r') a decoded flags member.
72 Passed by reference.
57 73
58Physical addresses types phys_addr_t: 74Physical addresses types phys_addr_t:
59 75
@@ -132,6 +148,8 @@ MAC/FDDI addresses:
132 specifier to use reversed byte order suitable for visual interpretation 148 specifier to use reversed byte order suitable for visual interpretation
133 of Bluetooth addresses which are in the little endian order. 149 of Bluetooth addresses which are in the little endian order.
134 150
151 Passed by reference.
152
135IPv4 addresses: 153IPv4 addresses:
136 154
137 %pI4 1.2.3.4 155 %pI4 1.2.3.4
@@ -146,6 +164,8 @@ IPv4 addresses:
146 host, network, big or little endian order addresses respectively. Where 164 host, network, big or little endian order addresses respectively. Where
147 no specifier is provided the default network/big endian order is used. 165 no specifier is provided the default network/big endian order is used.
148 166
167 Passed by reference.
168
149IPv6 addresses: 169IPv6 addresses:
150 170
151 %pI6 0001:0002:0003:0004:0005:0006:0007:0008 171 %pI6 0001:0002:0003:0004:0005:0006:0007:0008
@@ -160,6 +180,8 @@ IPv6 addresses:
160 print a compressed IPv6 address as described by 180 print a compressed IPv6 address as described by
161 http://tools.ietf.org/html/rfc5952 181 http://tools.ietf.org/html/rfc5952
162 182
183 Passed by reference.
184
163IPv4/IPv6 addresses (generic, with port, flowinfo, scope): 185IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
164 186
165 %pIS 1.2.3.4 or 0001:0002:0003:0004:0005:0006:0007:0008 187 %pIS 1.2.3.4 or 0001:0002:0003:0004:0005:0006:0007:0008
@@ -186,6 +208,8 @@ IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
186 specifiers can be used as well and are ignored in case of an IPv6 208 specifiers can be used as well and are ignored in case of an IPv6
187 address. 209 address.
188 210
211 Passed by reference.
212
189 Further examples: 213 Further examples:
190 214
191 %pISfc 1.2.3.4 or [1:2:3:4:5:6:7:8]/123456789 215 %pISfc 1.2.3.4 or [1:2:3:4:5:6:7:8]/123456789
@@ -207,6 +231,8 @@ UUID/GUID addresses:
207 Where no additional specifiers are used the default little endian 231 Where no additional specifiers are used the default little endian
208 order with lower case hex characters will be printed. 232 order with lower case hex characters will be printed.
209 233
234 Passed by reference.
235
210dentry names: 236dentry names:
211 %pd{,2,3,4} 237 %pd{,2,3,4}
212 %pD{,2,3,4} 238 %pD{,2,3,4}
@@ -216,6 +242,8 @@ dentry names:
216 equivalent of %s dentry->d_name.name we used to use, %pd<n> prints 242 equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
217 n last components. %pD does the same thing for struct file. 243 n last components. %pD does the same thing for struct file.
218 244
245 Passed by reference.
246
219struct va_format: 247struct va_format:
220 248
221 %pV 249 %pV
@@ -231,23 +259,20 @@ struct va_format:
231 Do not use this feature without some mechanism to verify the 259 Do not use this feature without some mechanism to verify the
232 correctness of the format string and va_list arguments. 260 correctness of the format string and va_list arguments.
233 261
234u64 SHOULD be printed with %llu/%llx: 262 Passed by reference.
235
236 printk("%llu", u64_var);
237 263
238s64 SHOULD be printed with %lld/%llx: 264struct clk:
239 265
240 printk("%lld", s64_var); 266 %pC pll1
267 %pCn pll1
268 %pCr 1560000000
241 269
242If <type> is dependent on a config option for its size (e.g., sector_t, 270 For printing struct clk structures. '%pC' and '%pCn' print the name
243blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a 271 (Common Clock Framework) or address (legacy clock framework) of the
244format specifier of its largest possible type and explicitly cast to it. 272 structure; '%pCr' prints the current clock rate.
245Example:
246 273
247 printk("test: sector number/total blocks: %llu/%llu\n", 274 Passed by reference.
248 (unsigned long long)sector, (unsigned long long)blockcount);
249 275
250Reminder: sizeof() result is of type size_t.
251 276
252Thank you for your cooperation and attention. 277Thank you for your cooperation and attention.
253 278
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 902b4574acfb..9832ec52f859 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -21,6 +21,7 @@ Currently, these files are in /proc/sys/vm:
21- admin_reserve_kbytes 21- admin_reserve_kbytes
22- block_dump 22- block_dump
23- compact_memory 23- compact_memory
24- compact_unevictable_allowed
24- dirty_background_bytes 25- dirty_background_bytes
25- dirty_background_ratio 26- dirty_background_ratio
26- dirty_bytes 27- dirty_bytes
@@ -106,6 +107,16 @@ huge pages although processes will also directly compact memory as required.
106 107
107============================================================== 108==============================================================
108 109
110compact_unevictable_allowed
111
112Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
113allowed to examine the unevictable lru (mlocked pages) for pages to compact.
114This should be used on systems where stalls for minor page faults are an
115acceptable trade for large contiguous free memory. Set to 0 to prevent
116compaction from moving pages that are unevictable. Default value is 1.
117
118==============================================================
119
109dirty_background_bytes 120dirty_background_bytes
110 121
111Contains the amount of dirty memory at which the background kernel 122Contains the amount of dirty memory at which the background kernel
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index f2d3a100fe38..030977fb8d2d 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -267,21 +267,34 @@ call, then it is required that system administrator mount a file system of
267type hugetlbfs: 267type hugetlbfs:
268 268
269 mount -t hugetlbfs \ 269 mount -t hugetlbfs \
270 -o uid=<value>,gid=<value>,mode=<value>,size=<value>,nr_inodes=<value> \ 270 -o uid=<value>,gid=<value>,mode=<value>,pagesize=<value>,size=<value>,\
271 none /mnt/huge 271 min_size=<value>,nr_inodes=<value> none /mnt/huge
272 272
273This command mounts a (pseudo) filesystem of type hugetlbfs on the directory 273This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
274/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid 274/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid
275options sets the owner and group of the root of the file system. By default 275options sets the owner and group of the root of the file system. By default
276the uid and gid of the current process are taken. The mode option sets the 276the uid and gid of the current process are taken. The mode option sets the
277mode of root of file system to value & 01777. This value is given in octal. 277mode of root of file system to value & 01777. This value is given in octal.
278By default the value 0755 is picked. The size option sets the maximum value of 278By default the value 0755 is picked. If the paltform supports multiple huge
279memory (huge pages) allowed for that filesystem (/mnt/huge). The size is 279page sizes, the pagesize option can be used to specify the huge page size and
280rounded down to HPAGE_SIZE. The option nr_inodes sets the maximum number of 280associated pool. pagesize is specified in bytes. If pagesize is not specified
281inodes that /mnt/huge can use. If the size or nr_inodes option is not 281the paltform's default huge page size and associated pool will be used. The
282provided on command line then no limits are set. For size and nr_inodes 282size option sets the maximum value of memory (huge pages) allowed for that
283options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For 283filesystem (/mnt/huge). The size option can be specified in bytes, or as a
284example, size=2K has the same meaning as size=2048. 284percentage of the specified huge page pool (nr_hugepages). The size is
285rounded down to HPAGE_SIZE boundary. The min_size option sets the minimum
286value of memory (huge pages) allowed for the filesystem. min_size can be
287specified in the same way as size, either bytes or a percentage of the
288huge page pool. At mount time, the number of huge pages specified by
289min_size are reserved for use by the filesystem. If there are not enough
290free huge pages available, the mount will fail. As huge pages are allocated
291to the filesystem and freed, the reserve count is adjusted so that the sum
292of allocated and reserved huge pages is always at least min_size. The option
293nr_inodes sets the maximum number of inodes that /mnt/huge can use. If the
294size, min_size or nr_inodes option is not provided on command line then
295no limits are set. For pagesize, size, min_size and nr_inodes options, you
296can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For example, size=2K
297has the same meaning as size=2048.
285 298
286While read system calls are supported on files that reside on hugetlb 299While read system calls are supported on files that reside on hugetlb
287file systems, write system calls are not. 300file systems, write system calls are not.
@@ -289,15 +302,23 @@ file systems, write system calls are not.
289Regular chown, chgrp, and chmod commands (with right permissions) could be 302Regular chown, chgrp, and chmod commands (with right permissions) could be
290used to change the file attributes on hugetlbfs. 303used to change the file attributes on hugetlbfs.
291 304
292Also, it is important to note that no such mount command is required if the 305Also, it is important to note that no such mount command is required if
293applications are going to use only shmat/shmget system calls or mmap with 306applications are going to use only shmat/shmget system calls or mmap with
294MAP_HUGETLB. Users who wish to use hugetlb page via shared memory segment 307MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see map_hugetlb
295should be a member of a supplementary group and system admin needs to 308below.
296configure that gid into /proc/sys/vm/hugetlb_shm_group. It is possible for 309
297same or different applications to use any combination of mmaps and shm* 310Users who wish to use hugetlb memory via shared memory segment should be a
298calls, though the mount of filesystem will be required for using mmap calls 311member of a supplementary group and system admin needs to configure that gid
299without MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see 312into /proc/sys/vm/hugetlb_shm_group. It is possible for same or different
300map_hugetlb.c. 313applications to use any combination of mmaps and shm* calls, though the mount of
314filesystem will be required for using mmap calls without MAP_HUGETLB.
315
316Syscalls that operate on memory backed by hugetlb pages only have their lengths
317aligned to the native page size of the processor; they will normally fail with
318errno set to EINVAL or exclude hugetlb pages that extend beyond the length if
319not hugepage aligned. For example, munmap(2) will fail if memory is backed by
320a hugetlb page and the length is smaller than the hugepage size.
321
301 322
302Examples 323Examples
303======== 324========
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 86cb4624fc5a..3be0bfc4738d 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -22,6 +22,7 @@ CONTENTS
22 - Filtering special vmas. 22 - Filtering special vmas.
23 - munlock()/munlockall() system call handling. 23 - munlock()/munlockall() system call handling.
24 - Migrating mlocked pages. 24 - Migrating mlocked pages.
25 - Compacting mlocked pages.
25 - mmap(MAP_LOCKED) system call handling. 26 - mmap(MAP_LOCKED) system call handling.
26 - munmap()/exit()/exec() system call handling. 27 - munmap()/exit()/exec() system call handling.
27 - try_to_unmap(). 28 - try_to_unmap().
@@ -450,6 +451,17 @@ list because of a race between munlock and migration, page migration uses the
450putback_lru_page() function to add migrated pages back to the LRU. 451putback_lru_page() function to add migrated pages back to the LRU.
451 452
452 453
454COMPACTING MLOCKED PAGES
455------------------------
456
457The unevictable LRU can be scanned for compactable regions and the default
458behavior is to do so. /proc/sys/vm/compact_unevictable_allowed controls
459this behavior (see Documentation/sysctl/vm.txt). Once scanning of the
460unevictable LRU is enabled, the work of compaction is mostly handled by
461the page migration code and the same work flow as described in MIGRATING
462MLOCKED PAGES will apply.
463
464
453mmap(MAP_LOCKED) SYSTEM CALL HANDLING 465mmap(MAP_LOCKED) SYSTEM CALL HANDLING
454------------------------------------- 466-------------------------------------
455 467
diff --git a/Documentation/vm/zsmalloc.txt b/Documentation/vm/zsmalloc.txt
new file mode 100644
index 000000000000..64ed63c4f69d
--- /dev/null
+++ b/Documentation/vm/zsmalloc.txt
@@ -0,0 +1,70 @@
1zsmalloc
2--------
3
4This allocator is designed for use with zram. Thus, the allocator is
5supposed to work well under low memory conditions. In particular, it
6never attempts higher order page allocation which is very likely to
7fail under memory pressure. On the other hand, if we just use single
8(0-order) pages, it would suffer from very high fragmentation --
9any object of size PAGE_SIZE/2 or larger would occupy an entire page.
10This was one of the major issues with its predecessor (xvmalloc).
11
12To overcome these issues, zsmalloc allocates a bunch of 0-order pages
13and links them together using various 'struct page' fields. These linked
14pages act as a single higher-order page i.e. an object can span 0-order
15page boundaries. The code refers to these linked pages as a single entity
16called zspage.
17
18For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
19since this satisfies the requirements of all its current users (in the
20worst case, page is incompressible and is thus stored "as-is" i.e. in
21uncompressed form). For allocation requests larger than this size, failure
22is returned (see zs_malloc).
23
24Additionally, zs_malloc() does not return a dereferenceable pointer.
25Instead, it returns an opaque handle (unsigned long) which encodes actual
26location of the allocated object. The reason for this indirection is that
27zsmalloc does not keep zspages permanently mapped since that would cause
28issues on 32-bit systems where the VA region for kernel space mappings
29is very small. So, before using the allocating memory, the object has to
30be mapped using zs_map_object() to get a usable pointer and subsequently
31unmapped using zs_unmap_object().
32
33stat
34----
35
36With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
37/sys/kernel/debug/zsmalloc/<user name>. Here is a sample of stat output:
38
39# cat /sys/kernel/debug/zsmalloc/zram0/classes
40
41 class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage
42 ..
43 ..
44 9 176 0 1 186 129 8 4
45 10 192 1 0 2880 2872 135 3
46 11 208 0 1 819 795 42 2
47 12 224 0 1 219 159 12 4
48 ..
49 ..
50
51
52class: index
53size: object size zspage stores
54almost_empty: the number of ZS_ALMOST_EMPTY zspages(see below)
55almost_full: the number of ZS_ALMOST_FULL zspages(see below)
56obj_allocated: the number of objects allocated
57obj_used: the number of objects allocated to the user
58pages_used: the number of pages allocated for the class
59pages_per_zspage: the number of 0-order pages to make a zspage
60
61We assign a zspage to ZS_ALMOST_EMPTY fullness group when:
62 n <= N / f, where
63n = number of allocated objects
64N = total number of objects zspage can store
65f = fullness_threshold_frac(ie, 4 at the moment)
66
67Similarly, we assign zspage to:
68 ZS_ALMOST_FULL when n > N / f
69 ZS_EMPTY when n == 0
70 ZS_FULL when n == N
diff --git a/MAINTAINERS b/MAINTAINERS
index d158405c75ff..56a432d51119 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -625,16 +625,16 @@ F: drivers/iommu/amd_iommu*.[ch]
625F: include/linux/amd-iommu.h 625F: include/linux/amd-iommu.h
626 626
627AMD KFD 627AMD KFD
628M: Oded Gabbay <oded.gabbay@amd.com> 628M: Oded Gabbay <oded.gabbay@amd.com>
629L: dri-devel@lists.freedesktop.org 629L: dri-devel@lists.freedesktop.org
630T: git git://people.freedesktop.org/~gabbayo/linux.git 630T: git git://people.freedesktop.org/~gabbayo/linux.git
631S: Supported 631S: Supported
632F: drivers/gpu/drm/amd/amdkfd/ 632F: drivers/gpu/drm/amd/amdkfd/
633F: drivers/gpu/drm/amd/include/cik_structs.h 633F: drivers/gpu/drm/amd/include/cik_structs.h
634F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h 634F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
635F: drivers/gpu/drm/radeon/radeon_kfd.c 635F: drivers/gpu/drm/radeon/radeon_kfd.c
636F: drivers/gpu/drm/radeon/radeon_kfd.h 636F: drivers/gpu/drm/radeon/radeon_kfd.h
637F: include/uapi/linux/kfd_ioctl.h 637F: include/uapi/linux/kfd_ioctl.h
638 638
639AMD MICROCODE UPDATE SUPPORT 639AMD MICROCODE UPDATE SUPPORT
640M: Borislav Petkov <bp@alien8.de> 640M: Borislav Petkov <bp@alien8.de>
@@ -1915,16 +1915,14 @@ S: Maintained
1915F: drivers/media/radio/radio-aztech* 1915F: drivers/media/radio/radio-aztech*
1916 1916
1917B43 WIRELESS DRIVER 1917B43 WIRELESS DRIVER
1918M: Stefano Brivio <stefano.brivio@polimi.it>
1919L: linux-wireless@vger.kernel.org 1918L: linux-wireless@vger.kernel.org
1920L: b43-dev@lists.infradead.org 1919L: b43-dev@lists.infradead.org
1921W: http://wireless.kernel.org/en/users/Drivers/b43 1920W: http://wireless.kernel.org/en/users/Drivers/b43
1922S: Maintained 1921S: Odd Fixes
1923F: drivers/net/wireless/b43/ 1922F: drivers/net/wireless/b43/
1924 1923
1925B43LEGACY WIRELESS DRIVER 1924B43LEGACY WIRELESS DRIVER
1926M: Larry Finger <Larry.Finger@lwfinger.net> 1925M: Larry Finger <Larry.Finger@lwfinger.net>
1927M: Stefano Brivio <stefano.brivio@polimi.it>
1928L: linux-wireless@vger.kernel.org 1926L: linux-wireless@vger.kernel.org
1929L: b43-dev@lists.infradead.org 1927L: b43-dev@lists.infradead.org
1930W: http://wireless.kernel.org/en/users/Drivers/b43 1928W: http://wireless.kernel.org/en/users/Drivers/b43
@@ -1967,10 +1965,10 @@ F: Documentation/filesystems/befs.txt
1967F: fs/befs/ 1965F: fs/befs/
1968 1966
1969BECKHOFF CX5020 ETHERCAT MASTER DRIVER 1967BECKHOFF CX5020 ETHERCAT MASTER DRIVER
1970M: Dariusz Marcinkiewicz <reksio@newterm.pl> 1968M: Dariusz Marcinkiewicz <reksio@newterm.pl>
1971L: netdev@vger.kernel.org 1969L: netdev@vger.kernel.org
1972S: Maintained 1970S: Maintained
1973F: drivers/net/ethernet/ec_bhf.c 1971F: drivers/net/ethernet/ec_bhf.c
1974 1972
1975BFS FILE SYSTEM 1973BFS FILE SYSTEM
1976M: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk> 1974M: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk>
@@ -2896,11 +2894,11 @@ S: Supported
2896F: drivers/net/ethernet/chelsio/cxgb3/ 2894F: drivers/net/ethernet/chelsio/cxgb3/
2897 2895
2898CXGB3 ISCSI DRIVER (CXGB3I) 2896CXGB3 ISCSI DRIVER (CXGB3I)
2899M: Karen Xie <kxie@chelsio.com> 2897M: Karen Xie <kxie@chelsio.com>
2900L: linux-scsi@vger.kernel.org 2898L: linux-scsi@vger.kernel.org
2901W: http://www.chelsio.com 2899W: http://www.chelsio.com
2902S: Supported 2900S: Supported
2903F: drivers/scsi/cxgbi/cxgb3i 2901F: drivers/scsi/cxgbi/cxgb3i
2904 2902
2905CXGB3 IWARP RNIC DRIVER (IW_CXGB3) 2903CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
2906M: Steve Wise <swise@chelsio.com> 2904M: Steve Wise <swise@chelsio.com>
@@ -2917,11 +2915,11 @@ S: Supported
2917F: drivers/net/ethernet/chelsio/cxgb4/ 2915F: drivers/net/ethernet/chelsio/cxgb4/
2918 2916
2919CXGB4 ISCSI DRIVER (CXGB4I) 2917CXGB4 ISCSI DRIVER (CXGB4I)
2920M: Karen Xie <kxie@chelsio.com> 2918M: Karen Xie <kxie@chelsio.com>
2921L: linux-scsi@vger.kernel.org 2919L: linux-scsi@vger.kernel.org
2922W: http://www.chelsio.com 2920W: http://www.chelsio.com
2923S: Supported 2921S: Supported
2924F: drivers/scsi/cxgbi/cxgb4i 2922F: drivers/scsi/cxgbi/cxgb4i
2925 2923
2926CXGB4 IWARP RNIC DRIVER (IW_CXGB4) 2924CXGB4 IWARP RNIC DRIVER (IW_CXGB4)
2927M: Steve Wise <swise@chelsio.com> 2925M: Steve Wise <swise@chelsio.com>
@@ -5223,7 +5221,7 @@ F: arch/x86/kernel/tboot.c
5223INTEL WIRELESS WIMAX CONNECTION 2400 5221INTEL WIRELESS WIMAX CONNECTION 2400
5224M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> 5222M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
5225M: linux-wimax@intel.com 5223M: linux-wimax@intel.com
5226L: wimax@linuxwimax.org (subscribers-only) 5224L: wimax@linuxwimax.org (subscribers-only)
5227S: Supported 5225S: Supported
5228W: http://linuxwimax.org 5226W: http://linuxwimax.org
5229F: Documentation/wimax/README.i2400m 5227F: Documentation/wimax/README.i2400m
@@ -5926,7 +5924,7 @@ F: arch/powerpc/platforms/512x/
5926F: arch/powerpc/platforms/52xx/ 5924F: arch/powerpc/platforms/52xx/
5927 5925
5928LINUX FOR POWERPC EMBEDDED PPC4XX 5926LINUX FOR POWERPC EMBEDDED PPC4XX
5929M: Alistair Popple <alistair@popple.id.au> 5927M: Alistair Popple <alistair@popple.id.au>
5930M: Matt Porter <mporter@kernel.crashing.org> 5928M: Matt Porter <mporter@kernel.crashing.org>
5931W: http://www.penguinppc.org/ 5929W: http://www.penguinppc.org/
5932L: linuxppc-dev@lists.ozlabs.org 5930L: linuxppc-dev@lists.ozlabs.org
@@ -6399,7 +6397,7 @@ S: Supported
6399F: drivers/watchdog/mena21_wdt.c 6397F: drivers/watchdog/mena21_wdt.c
6400 6398
6401MEN CHAMELEON BUS (mcb) 6399MEN CHAMELEON BUS (mcb)
6402M: Johannes Thumshirn <johannes.thumshirn@men.de> 6400M: Johannes Thumshirn <johannes.thumshirn@men.de>
6403S: Supported 6401S: Supported
6404F: drivers/mcb/ 6402F: drivers/mcb/
6405F: include/linux/mcb.h 6403F: include/linux/mcb.h
@@ -7955,10 +7953,10 @@ L: rtc-linux@googlegroups.com
7955S: Maintained 7953S: Maintained
7956 7954
7957QAT DRIVER 7955QAT DRIVER
7958M: Tadeusz Struk <tadeusz.struk@intel.com> 7956M: Tadeusz Struk <tadeusz.struk@intel.com>
7959L: qat-linux@intel.com 7957L: qat-linux@intel.com
7960S: Supported 7958S: Supported
7961F: drivers/crypto/qat/ 7959F: drivers/crypto/qat/
7962 7960
7963QIB DRIVER 7961QIB DRIVER
7964M: Mike Marciniszyn <infinipath@intel.com> 7962M: Mike Marciniszyn <infinipath@intel.com>
@@ -10129,11 +10127,11 @@ F: include/linux/cdrom.h
10129F: include/uapi/linux/cdrom.h 10127F: include/uapi/linux/cdrom.h
10130 10128
10131UNISYS S-PAR DRIVERS 10129UNISYS S-PAR DRIVERS
10132M: Benjamin Romer <benjamin.romer@unisys.com> 10130M: Benjamin Romer <benjamin.romer@unisys.com>
10133M: David Kershner <david.kershner@unisys.com> 10131M: David Kershner <david.kershner@unisys.com>
10134L: sparmaintainer@unisys.com (Unisys internal) 10132L: sparmaintainer@unisys.com (Unisys internal)
10135S: Supported 10133S: Supported
10136F: drivers/staging/unisys/ 10134F: drivers/staging/unisys/
10137 10135
10138UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER 10136UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER
10139M: Vinayak Holikatti <vinholikatti@gmail.com> 10137M: Vinayak Holikatti <vinholikatti@gmail.com>
@@ -10690,7 +10688,7 @@ F: drivers/media/rc/winbond-cir.c
10690WIMAX STACK 10688WIMAX STACK
10691M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> 10689M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
10692M: linux-wimax@intel.com 10690M: linux-wimax@intel.com
10693L: wimax@linuxwimax.org (subscribers-only) 10691L: wimax@linuxwimax.org (subscribers-only)
10694S: Supported 10692S: Supported
10695W: http://linuxwimax.org 10693W: http://linuxwimax.org
10696F: Documentation/wimax/README.wimax 10694F: Documentation/wimax/README.wimax
@@ -10981,6 +10979,7 @@ L: linux-mm@kvack.org
10981S: Maintained 10979S: Maintained
10982F: mm/zsmalloc.c 10980F: mm/zsmalloc.c
10983F: include/linux/zsmalloc.h 10981F: include/linux/zsmalloc.h
10982F: Documentation/vm/zsmalloc.txt
10984 10983
10985ZSWAP COMPRESSED SWAP CACHING 10984ZSWAP COMPRESSED SWAP CACHING
10986M: Seth Jennings <sjennings@variantweb.net> 10985M: Seth Jennings <sjennings@variantweb.net>
diff --git a/arch/arm/plat-pxa/dma.c b/arch/arm/plat-pxa/dma.c
index 054fc5a1a11c..d92f07f6ecfb 100644
--- a/arch/arm/plat-pxa/dma.c
+++ b/arch/arm/plat-pxa/dma.c
@@ -51,19 +51,19 @@ static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan;
51 51
52static int dbg_show_requester_chan(struct seq_file *s, void *p) 52static int dbg_show_requester_chan(struct seq_file *s, void *p)
53{ 53{
54 int pos = 0;
55 int chan = (int)s->private; 54 int chan = (int)s->private;
56 int i; 55 int i;
57 u32 drcmr; 56 u32 drcmr;
58 57
59 pos += seq_printf(s, "DMA channel %d requesters list :\n", chan); 58 seq_printf(s, "DMA channel %d requesters list :\n", chan);
60 for (i = 0; i < DMA_MAX_REQUESTERS; i++) { 59 for (i = 0; i < DMA_MAX_REQUESTERS; i++) {
61 drcmr = DRCMR(i); 60 drcmr = DRCMR(i);
62 if ((drcmr & DRCMR_CHLNUM) == chan) 61 if ((drcmr & DRCMR_CHLNUM) == chan)
63 pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, 62 seq_printf(s, "\tRequester %d (MAPVLD=%d)\n",
64 !!(drcmr & DRCMR_MAPVLD)); 63 i, !!(drcmr & DRCMR_MAPVLD));
65 } 64 }
66 return pos; 65
66 return 0;
67} 67}
68 68
69static inline int dbg_burst_from_dcmd(u32 dcmd) 69static inline int dbg_burst_from_dcmd(u32 dcmd)
@@ -83,7 +83,6 @@ static int is_phys_valid(unsigned long addr)
83 83
84static int dbg_show_descriptors(struct seq_file *s, void *p) 84static int dbg_show_descriptors(struct seq_file *s, void *p)
85{ 85{
86 int pos = 0;
87 int chan = (int)s->private; 86 int chan = (int)s->private;
88 int i, max_show = 20, burst, width; 87 int i, max_show = 20, burst, width;
89 u32 dcmd; 88 u32 dcmd;
@@ -94,44 +93,43 @@ static int dbg_show_descriptors(struct seq_file *s, void *p)
94 spin_lock_irqsave(&dma_channels[chan].lock, flags); 93 spin_lock_irqsave(&dma_channels[chan].lock, flags);
95 phys_desc = DDADR(chan); 94 phys_desc = DDADR(chan);
96 95
97 pos += seq_printf(s, "DMA channel %d descriptors :\n", chan); 96 seq_printf(s, "DMA channel %d descriptors :\n", chan);
98 pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0); 97 seq_printf(s, "[%03d] First descriptor unknown\n", 0);
99 for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { 98 for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
100 desc = phys_to_virt(phys_desc); 99 desc = phys_to_virt(phys_desc);
101 dcmd = desc->dcmd; 100 dcmd = desc->dcmd;
102 burst = dbg_burst_from_dcmd(dcmd); 101 burst = dbg_burst_from_dcmd(dcmd);
103 width = (1 << ((dcmd >> 14) & 0x3)) >> 1; 102 width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
104 103
105 pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", 104 seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
106 i, phys_desc, desc); 105 i, phys_desc, desc);
107 pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); 106 seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
108 pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); 107 seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
109 pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); 108 seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
110 pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d" 109 seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
111 " width=%d len=%d)\n", 110 dcmd,
112 dcmd, 111 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
113 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), 112 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
114 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), 113 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
115 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), 114 DCMD_STR(ENDIAN), burst, width,
116 DCMD_STR(ENDIAN), burst, width, 115 dcmd & DCMD_LENGTH);
117 dcmd & DCMD_LENGTH);
118 phys_desc = desc->ddadr; 116 phys_desc = desc->ddadr;
119 } 117 }
120 if (i == max_show) 118 if (i == max_show)
121 pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", 119 seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
122 i, phys_desc); 120 i, phys_desc);
123 else 121 else
124 pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n", 122 seq_printf(s, "[%03d] Desc at %08lx is %s\n",
125 i, phys_desc, phys_desc == DDADR_STOP ? 123 i, phys_desc, phys_desc == DDADR_STOP ?
126 "DDADR_STOP" : "invalid"); 124 "DDADR_STOP" : "invalid");
127 125
128 spin_unlock_irqrestore(&dma_channels[chan].lock, flags); 126 spin_unlock_irqrestore(&dma_channels[chan].lock, flags);
129 return pos; 127
128 return 0;
130} 129}
131 130
132static int dbg_show_chan_state(struct seq_file *s, void *p) 131static int dbg_show_chan_state(struct seq_file *s, void *p)
133{ 132{
134 int pos = 0;
135 int chan = (int)s->private; 133 int chan = (int)s->private;
136 u32 dcsr, dcmd; 134 u32 dcsr, dcmd;
137 int burst, width; 135 int burst, width;
@@ -142,42 +140,39 @@ static int dbg_show_chan_state(struct seq_file *s, void *p)
142 burst = dbg_burst_from_dcmd(dcmd); 140 burst = dbg_burst_from_dcmd(dcmd);
143 width = (1 << ((dcmd >> 14) & 0x3)) >> 1; 141 width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
144 142
145 pos += seq_printf(s, "DMA channel %d\n", chan); 143 seq_printf(s, "DMA channel %d\n", chan);
146 pos += seq_printf(s, "\tPriority : %s\n", 144 seq_printf(s, "\tPriority : %s\n", str_prio[dma_channels[chan].prio]);
147 str_prio[dma_channels[chan].prio]); 145 seq_printf(s, "\tUnaligned transfer bit: %s\n",
148 pos += seq_printf(s, "\tUnaligned transfer bit: %s\n", 146 DALGN & (1 << chan) ? "yes" : "no");
149 DALGN & (1 << chan) ? "yes" : "no"); 147 seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
150 pos += seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 148 dcsr, DCSR_STR(RUN), DCSR_STR(NODESC),
151 dcsr, DCSR_STR(RUN), DCSR_STR(NODESC), 149 DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN),
152 DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN), 150 DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN),
153 DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN), 151 DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST),
154 DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST), 152 DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND),
155 DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND), 153 DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR),
156 DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR), 154 DCSR_STR(STARTINTR), DCSR_STR(BUSERR));
157 DCSR_STR(STARTINTR), DCSR_STR(BUSERR)); 155
158 156 seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
159 pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d" 157 dcmd,
160 " len=%d)\n", 158 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
161 dcmd, 159 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
162 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), 160 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
163 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), 161 DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH);
164 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), 162 seq_printf(s, "\tDSADR = %08x\n", DSADR(chan));
165 DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH); 163 seq_printf(s, "\tDTADR = %08x\n", DTADR(chan));
166 pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan)); 164 seq_printf(s, "\tDDADR = %08x\n", DDADR(chan));
167 pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan)); 165
168 pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan)); 166 return 0;
169 return pos;
170} 167}
171 168
172static int dbg_show_state(struct seq_file *s, void *p) 169static int dbg_show_state(struct seq_file *s, void *p)
173{ 170{
174 int pos = 0;
175
176 /* basic device status */ 171 /* basic device status */
177 pos += seq_printf(s, "DMA engine status\n"); 172 seq_puts(s, "DMA engine status\n");
178 pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels); 173 seq_printf(s, "\tChannel number: %d\n", num_dma_channels);
179 174
180 return pos; 175 return 0;
181} 176}
182 177
183#define DBGFS_FUNC_DECL(name) \ 178#define DBGFS_FUNC_DECL(name) \
diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c
index 48a59afbeeb1..e9298739d72e 100644
--- a/arch/cris/arch-v10/kernel/fasttimer.c
+++ b/arch/cris/arch-v10/kernel/fasttimer.c
@@ -527,7 +527,8 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
527 i = debug_log_cnt; 527 i = debug_log_cnt;
528 528
529 while (i != end_i || debug_log_cnt_wrapped) { 529 while (i != end_i || debug_log_cnt_wrapped) {
530 if (seq_printf(m, debug_log_string[i], debug_log_value[i]) < 0) 530 seq_printf(m, debug_log_string[i], debug_log_value[i]);
531 if (seq_has_overflowed(m))
531 return 0; 532 return 0;
532 i = (i+1) % DEBUG_LOG_MAX; 533 i = (i+1) % DEBUG_LOG_MAX;
533 } 534 }
@@ -542,24 +543,22 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
542 int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS; 543 int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS;
543 544
544#if 1 //ndef FAST_TIMER_LOG 545#if 1 //ndef FAST_TIMER_LOG
545 seq_printf(m, "div: %i freq: %i delay: %i" 546 seq_printf(m, "div: %i freq: %i delay: %i\n",
546 "\n",
547 timer_div_settings[cur], 547 timer_div_settings[cur],
548 timer_freq_settings[cur], 548 timer_freq_settings[cur],
549 timer_delay_settings[cur]); 549 timer_delay_settings[cur]);
550#endif 550#endif
551#ifdef FAST_TIMER_LOG 551#ifdef FAST_TIMER_LOG
552 t = &timer_started_log[cur]; 552 t = &timer_started_log[cur];
553 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 553 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
554 "d: %6li us data: 0x%08lX" 554 t->name,
555 "\n", 555 (unsigned long)t->tv_set.tv_jiff,
556 t->name, 556 (unsigned long)t->tv_set.tv_usec,
557 (unsigned long)t->tv_set.tv_jiff, 557 (unsigned long)t->tv_expires.tv_jiff,
558 (unsigned long)t->tv_set.tv_usec, 558 (unsigned long)t->tv_expires.tv_usec,
559 (unsigned long)t->tv_expires.tv_jiff, 559 t->delay_us,
560 (unsigned long)t->tv_expires.tv_usec, 560 t->data);
561 t->delay_us, 561 if (seq_has_overflowed(m))
562 t->data) < 0)
563 return 0; 562 return 0;
564#endif 563#endif
565 } 564 }
@@ -571,16 +570,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
571 seq_printf(m, "Timers added: %i\n", fast_timers_added); 570 seq_printf(m, "Timers added: %i\n", fast_timers_added);
572 for (i = 0; i < num_to_show; i++) { 571 for (i = 0; i < num_to_show; i++) {
573 t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS]; 572 t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS];
574 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 573 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
575 "d: %6li us data: 0x%08lX" 574 t->name,
576 "\n", 575 (unsigned long)t->tv_set.tv_jiff,
577 t->name, 576 (unsigned long)t->tv_set.tv_usec,
578 (unsigned long)t->tv_set.tv_jiff, 577 (unsigned long)t->tv_expires.tv_jiff,
579 (unsigned long)t->tv_set.tv_usec, 578 (unsigned long)t->tv_expires.tv_usec,
580 (unsigned long)t->tv_expires.tv_jiff, 579 t->delay_us,
581 (unsigned long)t->tv_expires.tv_usec, 580 t->data);
582 t->delay_us, 581 if (seq_has_overflowed(m))
583 t->data) < 0)
584 return 0; 582 return 0;
585 } 583 }
586 seq_putc(m, '\n'); 584 seq_putc(m, '\n');
@@ -590,16 +588,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
590 seq_printf(m, "Timers expired: %i\n", fast_timers_expired); 588 seq_printf(m, "Timers expired: %i\n", fast_timers_expired);
591 for (i = 0; i < num_to_show; i++) { 589 for (i = 0; i < num_to_show; i++) {
592 t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS]; 590 t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS];
593 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 591 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
594 "d: %6li us data: 0x%08lX" 592 t->name,
595 "\n", 593 (unsigned long)t->tv_set.tv_jiff,
596 t->name, 594 (unsigned long)t->tv_set.tv_usec,
597 (unsigned long)t->tv_set.tv_jiff, 595 (unsigned long)t->tv_expires.tv_jiff,
598 (unsigned long)t->tv_set.tv_usec, 596 (unsigned long)t->tv_expires.tv_usec,
599 (unsigned long)t->tv_expires.tv_jiff, 597 t->delay_us,
600 (unsigned long)t->tv_expires.tv_usec, 598 t->data);
601 t->delay_us, 599 if (seq_has_overflowed(m))
602 t->data) < 0)
603 return 0; 600 return 0;
604 } 601 }
605 seq_putc(m, '\n'); 602 seq_putc(m, '\n');
@@ -611,19 +608,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
611 while (t) { 608 while (t) {
612 nextt = t->next; 609 nextt = t->next;
613 local_irq_restore(flags); 610 local_irq_restore(flags);
614 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 611 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
615 "d: %6li us data: 0x%08lX" 612 t->name,
616/* " func: 0x%08lX" */ 613 (unsigned long)t->tv_set.tv_jiff,
617 "\n", 614 (unsigned long)t->tv_set.tv_usec,
618 t->name, 615 (unsigned long)t->tv_expires.tv_jiff,
619 (unsigned long)t->tv_set.tv_jiff, 616 (unsigned long)t->tv_expires.tv_usec,
620 (unsigned long)t->tv_set.tv_usec, 617 t->delay_us,
621 (unsigned long)t->tv_expires.tv_jiff, 618 t->data);
622 (unsigned long)t->tv_expires.tv_usec, 619 if (seq_has_overflowed(m))
623 t->delay_us,
624 t->data
625/* , t->function */
626 ) < 0)
627 return 0; 620 return 0;
628 local_irq_save(flags); 621 local_irq_save(flags);
629 if (t->next != nextt) 622 if (t->next != nextt)
diff --git a/arch/cris/arch-v10/kernel/setup.c b/arch/cris/arch-v10/kernel/setup.c
index 4f96d71b5154..7ab31f1c7540 100644
--- a/arch/cris/arch-v10/kernel/setup.c
+++ b/arch/cris/arch-v10/kernel/setup.c
@@ -63,35 +63,37 @@ int show_cpuinfo(struct seq_file *m, void *v)
63 else 63 else
64 info = &cpu_info[revision]; 64 info = &cpu_info[revision];
65 65
66 return seq_printf(m, 66 seq_printf(m,
67 "processor\t: 0\n" 67 "processor\t: 0\n"
68 "cpu\t\t: CRIS\n" 68 "cpu\t\t: CRIS\n"
69 "cpu revision\t: %lu\n" 69 "cpu revision\t: %lu\n"
70 "cpu model\t: %s\n" 70 "cpu model\t: %s\n"
71 "cache size\t: %d kB\n" 71 "cache size\t: %d kB\n"
72 "fpu\t\t: %s\n" 72 "fpu\t\t: %s\n"
73 "mmu\t\t: %s\n" 73 "mmu\t\t: %s\n"
74 "mmu DMA bug\t: %s\n" 74 "mmu DMA bug\t: %s\n"
75 "ethernet\t: %s Mbps\n" 75 "ethernet\t: %s Mbps\n"
76 "token ring\t: %s\n" 76 "token ring\t: %s\n"
77 "scsi\t\t: %s\n" 77 "scsi\t\t: %s\n"
78 "ata\t\t: %s\n" 78 "ata\t\t: %s\n"
79 "usb\t\t: %s\n" 79 "usb\t\t: %s\n"
80 "bogomips\t: %lu.%02lu\n", 80 "bogomips\t: %lu.%02lu\n",
81 81
82 revision, 82 revision,
83 info->model, 83 info->model,
84 info->cache, 84 info->cache,
85 info->flags & HAS_FPU ? "yes" : "no", 85 info->flags & HAS_FPU ? "yes" : "no",
86 info->flags & HAS_MMU ? "yes" : "no", 86 info->flags & HAS_MMU ? "yes" : "no",
87 info->flags & HAS_MMU_BUG ? "yes" : "no", 87 info->flags & HAS_MMU_BUG ? "yes" : "no",
88 info->flags & HAS_ETHERNET100 ? "10/100" : "10", 88 info->flags & HAS_ETHERNET100 ? "10/100" : "10",
89 info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no", 89 info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
90 info->flags & HAS_SCSI ? "yes" : "no", 90 info->flags & HAS_SCSI ? "yes" : "no",
91 info->flags & HAS_ATA ? "yes" : "no", 91 info->flags & HAS_ATA ? "yes" : "no",
92 info->flags & HAS_USB ? "yes" : "no", 92 info->flags & HAS_USB ? "yes" : "no",
93 (loops_per_jiffy * HZ + 500) / 500000, 93 (loops_per_jiffy * HZ + 500) / 500000,
94 ((loops_per_jiffy * HZ + 500) / 5000) % 100); 94 ((loops_per_jiffy * HZ + 500) / 5000) % 100);
95
96 return 0;
95} 97}
96 98
97#endif /* CONFIG_PROC_FS */ 99#endif /* CONFIG_PROC_FS */
diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c
index b130c2c5fdd8..5c84dbb99f30 100644
--- a/arch/cris/arch-v32/kernel/fasttimer.c
+++ b/arch/cris/arch-v32/kernel/fasttimer.c
@@ -501,7 +501,8 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
501 i = debug_log_cnt; 501 i = debug_log_cnt;
502 502
503 while ((i != end_i || debug_log_cnt_wrapped)) { 503 while ((i != end_i || debug_log_cnt_wrapped)) {
504 if (seq_printf(m, debug_log_string[i], debug_log_value[i]) < 0) 504 seq_printf(m, debug_log_string[i], debug_log_value[i]);
505 if (seq_has_overflowed(m))
505 return 0; 506 return 0;
506 i = (i+1) % DEBUG_LOG_MAX; 507 i = (i+1) % DEBUG_LOG_MAX;
507 } 508 }
@@ -516,23 +517,21 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
516 int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS; 517 int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS;
517 518
518#if 1 //ndef FAST_TIMER_LOG 519#if 1 //ndef FAST_TIMER_LOG
519 seq_printf(m, "div: %i delay: %i" 520 seq_printf(m, "div: %i delay: %i\n",
520 "\n",
521 timer_div_settings[cur], 521 timer_div_settings[cur],
522 timer_delay_settings[cur]); 522 timer_delay_settings[cur]);
523#endif 523#endif
524#ifdef FAST_TIMER_LOG 524#ifdef FAST_TIMER_LOG
525 t = &timer_started_log[cur]; 525 t = &timer_started_log[cur];
526 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 526 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
527 "d: %6li us data: 0x%08lX" 527 t->name,
528 "\n", 528 (unsigned long)t->tv_set.tv_jiff,
529 t->name, 529 (unsigned long)t->tv_set.tv_usec,
530 (unsigned long)t->tv_set.tv_jiff, 530 (unsigned long)t->tv_expires.tv_jiff,
531 (unsigned long)t->tv_set.tv_usec, 531 (unsigned long)t->tv_expires.tv_usec,
532 (unsigned long)t->tv_expires.tv_jiff, 532 t->delay_us,
533 (unsigned long)t->tv_expires.tv_usec, 533 t->data);
534 t->delay_us, 534 if (seq_has_overflowed(m))
535 t->data) < 0)
536 return 0; 535 return 0;
537#endif 536#endif
538 } 537 }
@@ -544,16 +543,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
544 seq_printf(m, "Timers added: %i\n", fast_timers_added); 543 seq_printf(m, "Timers added: %i\n", fast_timers_added);
545 for (i = 0; i < num_to_show; i++) { 544 for (i = 0; i < num_to_show; i++) {
546 t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS]; 545 t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS];
547 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 546 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
548 "d: %6li us data: 0x%08lX" 547 t->name,
549 "\n", 548 (unsigned long)t->tv_set.tv_jiff,
550 t->name, 549 (unsigned long)t->tv_set.tv_usec,
551 (unsigned long)t->tv_set.tv_jiff, 550 (unsigned long)t->tv_expires.tv_jiff,
552 (unsigned long)t->tv_set.tv_usec, 551 (unsigned long)t->tv_expires.tv_usec,
553 (unsigned long)t->tv_expires.tv_jiff, 552 t->delay_us,
554 (unsigned long)t->tv_expires.tv_usec, 553 t->data);
555 t->delay_us, 554 if (seq_has_overflowed(m))
556 t->data) < 0)
557 return 0; 555 return 0;
558 } 556 }
559 seq_putc(m, '\n'); 557 seq_putc(m, '\n');
@@ -563,16 +561,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
563 seq_printf(m, "Timers expired: %i\n", fast_timers_expired); 561 seq_printf(m, "Timers expired: %i\n", fast_timers_expired);
564 for (i = 0; i < num_to_show; i++){ 562 for (i = 0; i < num_to_show; i++){
565 t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS]; 563 t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS];
566 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 564 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
567 "d: %6li us data: 0x%08lX" 565 t->name,
568 "\n", 566 (unsigned long)t->tv_set.tv_jiff,
569 t->name, 567 (unsigned long)t->tv_set.tv_usec,
570 (unsigned long)t->tv_set.tv_jiff, 568 (unsigned long)t->tv_expires.tv_jiff,
571 (unsigned long)t->tv_set.tv_usec, 569 (unsigned long)t->tv_expires.tv_usec,
572 (unsigned long)t->tv_expires.tv_jiff, 570 t->delay_us,
573 (unsigned long)t->tv_expires.tv_usec, 571 t->data);
574 t->delay_us, 572 if (seq_has_overflowed(m))
575 t->data) < 0)
576 return 0; 573 return 0;
577 } 574 }
578 seq_putc(m, '\n'); 575 seq_putc(m, '\n');
@@ -584,19 +581,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
584 while (t != NULL){ 581 while (t != NULL){
585 nextt = t->next; 582 nextt = t->next;
586 local_irq_restore(flags); 583 local_irq_restore(flags);
587 if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu " 584 seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
588 "d: %6li us data: 0x%08lX" 585 t->name,
589/* " func: 0x%08lX" */ 586 (unsigned long)t->tv_set.tv_jiff,
590 "\n", 587 (unsigned long)t->tv_set.tv_usec,
591 t->name, 588 (unsigned long)t->tv_expires.tv_jiff,
592 (unsigned long)t->tv_set.tv_jiff, 589 (unsigned long)t->tv_expires.tv_usec,
593 (unsigned long)t->tv_set.tv_usec, 590 t->delay_us,
594 (unsigned long)t->tv_expires.tv_jiff, 591 t->data);
595 (unsigned long)t->tv_expires.tv_usec, 592 if (seq_has_overflowed(m))
596 t->delay_us,
597 t->data
598/* , t->function */
599 ) < 0)
600 return 0; 593 return 0;
601 local_irq_save(flags); 594 local_irq_save(flags);
602 if (t->next != nextt) 595 if (t->next != nextt)
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c
index 61e10ae65296..81715c683baf 100644
--- a/arch/cris/arch-v32/kernel/setup.c
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -77,36 +77,38 @@ int show_cpuinfo(struct seq_file *m, void *v)
77 } 77 }
78 } 78 }
79 79
80 return seq_printf(m, 80 seq_printf(m,
81 "processor\t: %d\n" 81 "processor\t: %d\n"
82 "cpu\t\t: CRIS\n" 82 "cpu\t\t: CRIS\n"
83 "cpu revision\t: %lu\n" 83 "cpu revision\t: %lu\n"
84 "cpu model\t: %s\n" 84 "cpu model\t: %s\n"
85 "cache size\t: %d KB\n" 85 "cache size\t: %d KB\n"
86 "fpu\t\t: %s\n" 86 "fpu\t\t: %s\n"
87 "mmu\t\t: %s\n" 87 "mmu\t\t: %s\n"
88 "mmu DMA bug\t: %s\n" 88 "mmu DMA bug\t: %s\n"
89 "ethernet\t: %s Mbps\n" 89 "ethernet\t: %s Mbps\n"
90 "token ring\t: %s\n" 90 "token ring\t: %s\n"
91 "scsi\t\t: %s\n" 91 "scsi\t\t: %s\n"
92 "ata\t\t: %s\n" 92 "ata\t\t: %s\n"
93 "usb\t\t: %s\n" 93 "usb\t\t: %s\n"
94 "bogomips\t: %lu.%02lu\n\n", 94 "bogomips\t: %lu.%02lu\n\n",
95 95
96 cpu, 96 cpu,
97 revision, 97 revision,
98 info->cpu_model, 98 info->cpu_model,
99 info->cache_size, 99 info->cache_size,
100 info->flags & HAS_FPU ? "yes" : "no", 100 info->flags & HAS_FPU ? "yes" : "no",
101 info->flags & HAS_MMU ? "yes" : "no", 101 info->flags & HAS_MMU ? "yes" : "no",
102 info->flags & HAS_MMU_BUG ? "yes" : "no", 102 info->flags & HAS_MMU_BUG ? "yes" : "no",
103 info->flags & HAS_ETHERNET100 ? "10/100" : "10", 103 info->flags & HAS_ETHERNET100 ? "10/100" : "10",
104 info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no", 104 info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
105 info->flags & HAS_SCSI ? "yes" : "no", 105 info->flags & HAS_SCSI ? "yes" : "no",
106 info->flags & HAS_ATA ? "yes" : "no", 106 info->flags & HAS_ATA ? "yes" : "no",
107 info->flags & HAS_USB ? "yes" : "no", 107 info->flags & HAS_USB ? "yes" : "no",
108 (loops_per_jiffy * HZ + 500) / 500000, 108 (loops_per_jiffy * HZ + 500) / 500000,
109 ((loops_per_jiffy * HZ + 500) / 5000) % 100); 109 ((loops_per_jiffy * HZ + 500) / 5000) % 100);
110
111 return 0;
110} 112}
111 113
112#endif /* CONFIG_PROC_FS */ 114#endif /* CONFIG_PROC_FS */
diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c
index 7b5dca7ed39d..9581d194d9e4 100644
--- a/arch/microblaze/kernel/cpu/mb.c
+++ b/arch/microblaze/kernel/cpu/mb.c
@@ -27,7 +27,6 @@
27 27
28static int show_cpuinfo(struct seq_file *m, void *v) 28static int show_cpuinfo(struct seq_file *m, void *v)
29{ 29{
30 int count = 0;
31 char *fpga_family = "Unknown"; 30 char *fpga_family = "Unknown";
32 char *cpu_ver = "Unknown"; 31 char *cpu_ver = "Unknown";
33 int i; 32 int i;
@@ -48,91 +47,89 @@ static int show_cpuinfo(struct seq_file *m, void *v)
48 } 47 }
49 } 48 }
50 49
51 count = seq_printf(m, 50 seq_printf(m,
52 "CPU-Family: MicroBlaze\n" 51 "CPU-Family: MicroBlaze\n"
53 "FPGA-Arch: %s\n" 52 "FPGA-Arch: %s\n"
54 "CPU-Ver: %s, %s endian\n" 53 "CPU-Ver: %s, %s endian\n"
55 "CPU-MHz: %d.%02d\n" 54 "CPU-MHz: %d.%02d\n"
56 "BogoMips: %lu.%02lu\n", 55 "BogoMips: %lu.%02lu\n",
57 fpga_family, 56 fpga_family,
58 cpu_ver, 57 cpu_ver,
59 cpuinfo.endian ? "little" : "big", 58 cpuinfo.endian ? "little" : "big",
60 cpuinfo.cpu_clock_freq / 59 cpuinfo.cpu_clock_freq / 1000000,
61 1000000, 60 cpuinfo.cpu_clock_freq % 1000000,
62 cpuinfo.cpu_clock_freq % 61 loops_per_jiffy / (500000 / HZ),
63 1000000, 62 (loops_per_jiffy / (5000 / HZ)) % 100);
64 loops_per_jiffy / (500000 / HZ), 63
65 (loops_per_jiffy / (5000 / HZ)) % 100); 64 seq_printf(m,
66 65 "HW:\n Shift:\t\t%s\n"
67 count += seq_printf(m, 66 " MSR:\t\t%s\n"
68 "HW:\n Shift:\t\t%s\n" 67 " PCMP:\t\t%s\n"
69 " MSR:\t\t%s\n" 68 " DIV:\t\t%s\n",
70 " PCMP:\t\t%s\n" 69 (cpuinfo.use_instr & PVR0_USE_BARREL_MASK) ? "yes" : "no",
71 " DIV:\t\t%s\n", 70 (cpuinfo.use_instr & PVR2_USE_MSR_INSTR) ? "yes" : "no",
72 (cpuinfo.use_instr & PVR0_USE_BARREL_MASK) ? "yes" : "no", 71 (cpuinfo.use_instr & PVR2_USE_PCMP_INSTR) ? "yes" : "no",
73 (cpuinfo.use_instr & PVR2_USE_MSR_INSTR) ? "yes" : "no", 72 (cpuinfo.use_instr & PVR0_USE_DIV_MASK) ? "yes" : "no");
74 (cpuinfo.use_instr & PVR2_USE_PCMP_INSTR) ? "yes" : "no", 73
75 (cpuinfo.use_instr & PVR0_USE_DIV_MASK) ? "yes" : "no"); 74 seq_printf(m, " MMU:\t\t%x\n", cpuinfo.mmu);
76 75
77 count += seq_printf(m, 76 seq_printf(m,
78 " MMU:\t\t%x\n", 77 " MUL:\t\t%s\n"
79 cpuinfo.mmu); 78 " FPU:\t\t%s\n",
80 79 (cpuinfo.use_mult & PVR2_USE_MUL64_MASK) ? "v2" :
81 count += seq_printf(m, 80 (cpuinfo.use_mult & PVR0_USE_HW_MUL_MASK) ? "v1" : "no",
82 " MUL:\t\t%s\n" 81 (cpuinfo.use_fpu & PVR2_USE_FPU2_MASK) ? "v2" :
83 " FPU:\t\t%s\n", 82 (cpuinfo.use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no");
84 (cpuinfo.use_mult & PVR2_USE_MUL64_MASK) ? "v2" : 83
85 (cpuinfo.use_mult & PVR0_USE_HW_MUL_MASK) ? "v1" : "no", 84 seq_printf(m,
86 (cpuinfo.use_fpu & PVR2_USE_FPU2_MASK) ? "v2" : 85 " Exc:\t\t%s%s%s%s%s%s%s%s\n",
87 (cpuinfo.use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no"); 86 (cpuinfo.use_exc & PVR2_OPCODE_0x0_ILL_MASK) ? "op0x0 " : "",
88 87 (cpuinfo.use_exc & PVR2_UNALIGNED_EXC_MASK) ? "unal " : "",
89 count += seq_printf(m, 88 (cpuinfo.use_exc & PVR2_ILL_OPCODE_EXC_MASK) ? "ill " : "",
90 " Exc:\t\t%s%s%s%s%s%s%s%s\n", 89 (cpuinfo.use_exc & PVR2_IOPB_BUS_EXC_MASK) ? "iopb " : "",
91 (cpuinfo.use_exc & PVR2_OPCODE_0x0_ILL_MASK) ? "op0x0 " : "", 90 (cpuinfo.use_exc & PVR2_DOPB_BUS_EXC_MASK) ? "dopb " : "",
92 (cpuinfo.use_exc & PVR2_UNALIGNED_EXC_MASK) ? "unal " : "", 91 (cpuinfo.use_exc & PVR2_DIV_ZERO_EXC_MASK) ? "zero " : "",
93 (cpuinfo.use_exc & PVR2_ILL_OPCODE_EXC_MASK) ? "ill " : "", 92 (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
94 (cpuinfo.use_exc & PVR2_IOPB_BUS_EXC_MASK) ? "iopb " : "", 93 (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
95 (cpuinfo.use_exc & PVR2_DOPB_BUS_EXC_MASK) ? "dopb " : "", 94
96 (cpuinfo.use_exc & PVR2_DIV_ZERO_EXC_MASK) ? "zero " : "", 95 seq_printf(m,
97 (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "", 96 "Stream-insns:\t%sprivileged\n",
98 (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : ""); 97 cpuinfo.mmu_privins ? "un" : "");
99
100 count += seq_printf(m,
101 "Stream-insns:\t%sprivileged\n",
102 cpuinfo.mmu_privins ? "un" : "");
103 98
104 if (cpuinfo.use_icache) 99 if (cpuinfo.use_icache)
105 count += seq_printf(m, 100 seq_printf(m,
106 "Icache:\t\t%ukB\tline length:\t%dB\n", 101 "Icache:\t\t%ukB\tline length:\t%dB\n",
107 cpuinfo.icache_size >> 10, 102 cpuinfo.icache_size >> 10,
108 cpuinfo.icache_line_length); 103 cpuinfo.icache_line_length);
109 else 104 else
110 count += seq_printf(m, "Icache:\t\tno\n"); 105 seq_puts(m, "Icache:\t\tno\n");
111 106
112 if (cpuinfo.use_dcache) { 107 if (cpuinfo.use_dcache) {
113 count += seq_printf(m, 108 seq_printf(m,
114 "Dcache:\t\t%ukB\tline length:\t%dB\n", 109 "Dcache:\t\t%ukB\tline length:\t%dB\n",
115 cpuinfo.dcache_size >> 10, 110 cpuinfo.dcache_size >> 10,
116 cpuinfo.dcache_line_length); 111 cpuinfo.dcache_line_length);
117 seq_printf(m, "Dcache-Policy:\t"); 112 seq_puts(m, "Dcache-Policy:\t");
118 if (cpuinfo.dcache_wb) 113 if (cpuinfo.dcache_wb)
119 count += seq_printf(m, "write-back\n"); 114 seq_puts(m, "write-back\n");
120 else 115 else
121 count += seq_printf(m, "write-through\n"); 116 seq_puts(m, "write-through\n");
122 } else 117 } else {
123 count += seq_printf(m, "Dcache:\t\tno\n"); 118 seq_puts(m, "Dcache:\t\tno\n");
119 }
120
121 seq_printf(m,
122 "HW-Debug:\t%s\n",
123 cpuinfo.hw_debug ? "yes" : "no");
124 124
125 count += seq_printf(m, 125 seq_printf(m,
126 "HW-Debug:\t%s\n", 126 "PVR-USR1:\t%02x\n"
127 cpuinfo.hw_debug ? "yes" : "no"); 127 "PVR-USR2:\t%08x\n",
128 cpuinfo.pvr_user1,
129 cpuinfo.pvr_user2);
128 130
129 count += seq_printf(m, 131 seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
130 "PVR-USR1:\t%02x\n"
131 "PVR-USR2:\t%08x\n",
132 cpuinfo.pvr_user1,
133 cpuinfo.pvr_user2);
134 132
135 count += seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
136 return 0; 133 return 0;
137} 134}
138 135
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index a223691dff4f..1d96de0bd4aa 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -126,47 +126,46 @@ void __init setup_cpuinfo(void)
126 */ 126 */
127static int show_cpuinfo(struct seq_file *m, void *v) 127static int show_cpuinfo(struct seq_file *m, void *v)
128{ 128{
129 int count = 0;
130 const u32 clockfreq = cpuinfo.cpu_clock_freq; 129 const u32 clockfreq = cpuinfo.cpu_clock_freq;
131 130
132 count = seq_printf(m, 131 seq_printf(m,
133 "CPU:\t\tNios II/%s\n" 132 "CPU:\t\tNios II/%s\n"
134 "MMU:\t\t%s\n" 133 "MMU:\t\t%s\n"
135 "FPU:\t\tnone\n" 134 "FPU:\t\tnone\n"
136 "Clocking:\t%u.%02u MHz\n" 135 "Clocking:\t%u.%02u MHz\n"
137 "BogoMips:\t%lu.%02lu\n" 136 "BogoMips:\t%lu.%02lu\n"
138 "Calibration:\t%lu loops\n", 137 "Calibration:\t%lu loops\n",
139 cpuinfo.cpu_impl, 138 cpuinfo.cpu_impl,
140 cpuinfo.mmu ? "present" : "none", 139 cpuinfo.mmu ? "present" : "none",
141 clockfreq / 1000000, (clockfreq / 100000) % 10, 140 clockfreq / 1000000, (clockfreq / 100000) % 10,
142 (loops_per_jiffy * HZ) / 500000, 141 (loops_per_jiffy * HZ) / 500000,
143 ((loops_per_jiffy * HZ) / 5000) % 100, 142 ((loops_per_jiffy * HZ) / 5000) % 100,
144 (loops_per_jiffy * HZ)); 143 (loops_per_jiffy * HZ));
145 144
146 count += seq_printf(m, 145 seq_printf(m,
147 "HW:\n" 146 "HW:\n"
148 " MUL:\t\t%s\n" 147 " MUL:\t\t%s\n"
149 " MULX:\t\t%s\n" 148 " MULX:\t\t%s\n"
150 " DIV:\t\t%s\n", 149 " DIV:\t\t%s\n",
151 cpuinfo.has_mul ? "yes" : "no", 150 cpuinfo.has_mul ? "yes" : "no",
152 cpuinfo.has_mulx ? "yes" : "no", 151 cpuinfo.has_mulx ? "yes" : "no",
153 cpuinfo.has_div ? "yes" : "no"); 152 cpuinfo.has_div ? "yes" : "no");
154 153
155 count += seq_printf(m, 154 seq_printf(m,
156 "Icache:\t\t%ukB, line length: %u\n", 155 "Icache:\t\t%ukB, line length: %u\n",
157 cpuinfo.icache_size >> 10, 156 cpuinfo.icache_size >> 10,
158 cpuinfo.icache_line_size); 157 cpuinfo.icache_line_size);
159 158
160 count += seq_printf(m, 159 seq_printf(m,
161 "Dcache:\t\t%ukB, line length: %u\n", 160 "Dcache:\t\t%ukB, line length: %u\n",
162 cpuinfo.dcache_size >> 10, 161 cpuinfo.dcache_size >> 10,
163 cpuinfo.dcache_line_size); 162 cpuinfo.dcache_line_size);
164 163
165 count += seq_printf(m, 164 seq_printf(m,
166 "TLB:\t\t%u ways, %u entries, %u PID bits\n", 165 "TLB:\t\t%u ways, %u entries, %u PID bits\n",
167 cpuinfo.tlb_num_ways, 166 cpuinfo.tlb_num_ways,
168 cpuinfo.tlb_num_entries, 167 cpuinfo.tlb_num_entries,
169 cpuinfo.tlb_pid_num_bits); 168 cpuinfo.tlb_pid_num_bits);
170 169
171 return 0; 170 return 0;
172} 171}
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 4fc7ccc0a2cf..b4ed8b36e078 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -329,30 +329,32 @@ static int show_cpuinfo(struct seq_file *m, void *v)
329 version = (vr & SPR_VR_VER) >> 24; 329 version = (vr & SPR_VR_VER) >> 24;
330 revision = vr & SPR_VR_REV; 330 revision = vr & SPR_VR_REV;
331 331
332 return seq_printf(m, 332 seq_printf(m,
333 "cpu\t\t: OpenRISC-%x\n" 333 "cpu\t\t: OpenRISC-%x\n"
334 "revision\t: %d\n" 334 "revision\t: %d\n"
335 "frequency\t: %ld\n" 335 "frequency\t: %ld\n"
336 "dcache size\t: %d bytes\n" 336 "dcache size\t: %d bytes\n"
337 "dcache block size\t: %d bytes\n" 337 "dcache block size\t: %d bytes\n"
338 "icache size\t: %d bytes\n" 338 "icache size\t: %d bytes\n"
339 "icache block size\t: %d bytes\n" 339 "icache block size\t: %d bytes\n"
340 "immu\t\t: %d entries, %lu ways\n" 340 "immu\t\t: %d entries, %lu ways\n"
341 "dmmu\t\t: %d entries, %lu ways\n" 341 "dmmu\t\t: %d entries, %lu ways\n"
342 "bogomips\t: %lu.%02lu\n", 342 "bogomips\t: %lu.%02lu\n",
343 version, 343 version,
344 revision, 344 revision,
345 loops_per_jiffy * HZ, 345 loops_per_jiffy * HZ,
346 cpuinfo.dcache_size, 346 cpuinfo.dcache_size,
347 cpuinfo.dcache_block_size, 347 cpuinfo.dcache_block_size,
348 cpuinfo.icache_size, 348 cpuinfo.icache_size,
349 cpuinfo.icache_block_size, 349 cpuinfo.icache_block_size,
350 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 350 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
351 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), 351 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
352 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), 352 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
353 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), 353 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
354 (loops_per_jiffy * HZ) / 500000, 354 (loops_per_jiffy * HZ) / 500000,
355 ((loops_per_jiffy * HZ) / 5000) % 100); 355 ((loops_per_jiffy * HZ) / 5000) % 100);
356
357 return 0;
356} 358}
357 359
358static void *c_start(struct seq_file *m, loff_t * pos) 360static void *c_start(struct seq_file *m, loff_t * pos)
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index 48bf5b080bcf..ac46c2c24f99 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -29,8 +29,9 @@ static int opal_power_control_event(struct notifier_block *nb,
29 29
30 switch (type) { 30 switch (type) {
31 case SOFT_REBOOT: 31 case SOFT_REBOOT:
32 /* Fall through. The service processor is responsible for 32 pr_info("OPAL: reboot requested\n");
33 * bringing the machine back up */ 33 orderly_reboot();
34 break;
34 case SOFT_OFF: 35 case SOFT_OFF:
35 pr_info("OPAL: poweroff requested\n"); 36 pr_info("OPAL: poweroff requested\n");
36 orderly_poweroff(true); 37 orderly_poweroff(true);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a5ced5c3c1e0..de2726a487b0 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -328,6 +328,7 @@ config COMPAT
328 select COMPAT_BINFMT_ELF if BINFMT_ELF 328 select COMPAT_BINFMT_ELF if BINFMT_ELF
329 select ARCH_WANT_OLD_COMPAT_IPC 329 select ARCH_WANT_OLD_COMPAT_IPC
330 select COMPAT_OLD_SIGACTION 330 select COMPAT_OLD_SIGACTION
331 depends on MULTIUSER
331 help 332 help
332 Select this option if you want to enable your system kernel to 333 Select this option if you want to enable your system kernel to
333 handle system-calls from ELF binaries for 31 bit ESA. This option 334 handle system-calls from ELF binaries for 31 bit ESA. This option
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 3229a2e570df..c22d4402ae45 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -45,8 +45,10 @@ static int pci_perf_show(struct seq_file *m, void *v)
45 45
46 if (!zdev) 46 if (!zdev)
47 return 0; 47 return 0;
48 if (!zdev->fmb) 48 if (!zdev->fmb) {
49 return seq_printf(m, "FMB statistics disabled\n"); 49 seq_puts(m, "FMB statistics disabled\n");
50 return 0;
51 }
50 52
51 /* header */ 53 /* header */
52 seq_printf(m, "FMB @ %p\n", zdev->fmb); 54 seq_printf(m, "FMB @ %p\n", zdev->fmb);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index a041e094b8b9..d76f13d6d8d6 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -404,11 +404,10 @@ static const struct file_operations mtrr_fops = {
404static int mtrr_seq_show(struct seq_file *seq, void *offset) 404static int mtrr_seq_show(struct seq_file *seq, void *offset)
405{ 405{
406 char factor; 406 char factor;
407 int i, max, len; 407 int i, max;
408 mtrr_type type; 408 mtrr_type type;
409 unsigned long base, size; 409 unsigned long base, size;
410 410
411 len = 0;
412 max = num_var_ranges; 411 max = num_var_ranges;
413 for (i = 0; i < max; i++) { 412 for (i = 0; i < max; i++) {
414 mtrr_if->get(i, &base, &size, &type); 413 mtrr_if->get(i, &base, &size, &type);
@@ -425,11 +424,10 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
425 size >>= 20 - PAGE_SHIFT; 424 size >>= 20 - PAGE_SHIFT;
426 } 425 }
427 /* Base can be > 32bit */ 426 /* Base can be > 32bit */
428 len += seq_printf(seq, "reg%02i: base=0x%06lx000 " 427 seq_printf(seq, "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
429 "(%5luMB), size=%5lu%cB, count=%d: %s\n", 428 i, base, base >> (20 - PAGE_SHIFT),
430 i, base, base >> (20 - PAGE_SHIFT), size, 429 size, factor,
431 factor, mtrr_usage_table[i], 430 mtrr_usage_table[i], mtrr_attrib_to_str(type));
432 mtrr_attrib_to_str(type));
433 } 431 }
434 return 0; 432 return 0;
435} 433}
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index aab7158d2afe..77262009f89d 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -843,7 +843,6 @@ static int print_wakeup_source_stats(struct seq_file *m,
843 unsigned long active_count; 843 unsigned long active_count;
844 ktime_t active_time; 844 ktime_t active_time;
845 ktime_t prevent_sleep_time; 845 ktime_t prevent_sleep_time;
846 int ret;
847 846
848 spin_lock_irqsave(&ws->lock, flags); 847 spin_lock_irqsave(&ws->lock, flags);
849 848
@@ -866,17 +865,16 @@ static int print_wakeup_source_stats(struct seq_file *m,
866 active_time = ktime_set(0, 0); 865 active_time = ktime_set(0, 0);
867 } 866 }
868 867
869 ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" 868 seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
870 "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", 869 ws->name, active_count, ws->event_count,
871 ws->name, active_count, ws->event_count, 870 ws->wakeup_count, ws->expire_count,
872 ws->wakeup_count, ws->expire_count, 871 ktime_to_ms(active_time), ktime_to_ms(total_time),
873 ktime_to_ms(active_time), ktime_to_ms(total_time), 872 ktime_to_ms(max_time), ktime_to_ms(ws->last_time),
874 ktime_to_ms(max_time), ktime_to_ms(ws->last_time), 873 ktime_to_ms(prevent_sleep_time));
875 ktime_to_ms(prevent_sleep_time));
876 874
877 spin_unlock_irqrestore(&ws->lock, flags); 875 spin_unlock_irqrestore(&ws->lock, flags);
878 876
879 return ret; 877 return 0;
880} 878}
881 879
882/** 880/**
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 2ce3dfd7e6b9..876d0c3eaf58 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -137,7 +137,7 @@
137 137
138*/ 138*/
139 139
140static bool verbose = 0; 140static int verbose;
141static int major = PG_MAJOR; 141static int major = PG_MAJOR;
142static char *name = PG_NAME; 142static char *name = PG_NAME;
143static int disable = 0; 143static int disable = 0;
@@ -168,7 +168,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
168 168
169#include <asm/uaccess.h> 169#include <asm/uaccess.h>
170 170
171module_param(verbose, bool, 0644); 171module_param(verbose, int, 0644);
172module_param(major, int, 0); 172module_param(major, int, 0);
173module_param(name, charp, 0); 173module_param(name, charp, 0);
174module_param_array(drive0, int, NULL, 0); 174module_param_array(drive0, int, NULL, 0);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 871bd3550cb0..c94386aa563d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -43,11 +43,22 @@ static const char *default_compressor = "lzo";
43/* Module params (documentation at end) */ 43/* Module params (documentation at end) */
44static unsigned int num_devices = 1; 44static unsigned int num_devices = 1;
45 45
46static inline void deprecated_attr_warn(const char *name)
47{
48 pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
49 task_pid_nr(current),
50 current->comm,
51 name,
52 "See zram documentation.");
53}
54
46#define ZRAM_ATTR_RO(name) \ 55#define ZRAM_ATTR_RO(name) \
47static ssize_t name##_show(struct device *d, \ 56static ssize_t name##_show(struct device *d, \
48 struct device_attribute *attr, char *b) \ 57 struct device_attribute *attr, char *b) \
49{ \ 58{ \
50 struct zram *zram = dev_to_zram(d); \ 59 struct zram *zram = dev_to_zram(d); \
60 \
61 deprecated_attr_warn(__stringify(name)); \
51 return scnprintf(b, PAGE_SIZE, "%llu\n", \ 62 return scnprintf(b, PAGE_SIZE, "%llu\n", \
52 (u64)atomic64_read(&zram->stats.name)); \ 63 (u64)atomic64_read(&zram->stats.name)); \
53} \ 64} \
@@ -89,6 +100,7 @@ static ssize_t orig_data_size_show(struct device *dev,
89{ 100{
90 struct zram *zram = dev_to_zram(dev); 101 struct zram *zram = dev_to_zram(dev);
91 102
103 deprecated_attr_warn("orig_data_size");
92 return scnprintf(buf, PAGE_SIZE, "%llu\n", 104 return scnprintf(buf, PAGE_SIZE, "%llu\n",
93 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 105 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
94} 106}
@@ -99,6 +111,7 @@ static ssize_t mem_used_total_show(struct device *dev,
99 u64 val = 0; 111 u64 val = 0;
100 struct zram *zram = dev_to_zram(dev); 112 struct zram *zram = dev_to_zram(dev);
101 113
114 deprecated_attr_warn("mem_used_total");
102 down_read(&zram->init_lock); 115 down_read(&zram->init_lock);
103 if (init_done(zram)) { 116 if (init_done(zram)) {
104 struct zram_meta *meta = zram->meta; 117 struct zram_meta *meta = zram->meta;
@@ -128,6 +141,7 @@ static ssize_t mem_limit_show(struct device *dev,
128 u64 val; 141 u64 val;
129 struct zram *zram = dev_to_zram(dev); 142 struct zram *zram = dev_to_zram(dev);
130 143
144 deprecated_attr_warn("mem_limit");
131 down_read(&zram->init_lock); 145 down_read(&zram->init_lock);
132 val = zram->limit_pages; 146 val = zram->limit_pages;
133 up_read(&zram->init_lock); 147 up_read(&zram->init_lock);
@@ -159,6 +173,7 @@ static ssize_t mem_used_max_show(struct device *dev,
159 u64 val = 0; 173 u64 val = 0;
160 struct zram *zram = dev_to_zram(dev); 174 struct zram *zram = dev_to_zram(dev);
161 175
176 deprecated_attr_warn("mem_used_max");
162 down_read(&zram->init_lock); 177 down_read(&zram->init_lock);
163 if (init_done(zram)) 178 if (init_done(zram))
164 val = atomic_long_read(&zram->stats.max_used_pages); 179 val = atomic_long_read(&zram->stats.max_used_pages);
@@ -670,8 +685,12 @@ out:
670static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 685static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
671 int offset, int rw) 686 int offset, int rw)
672{ 687{
688 unsigned long start_time = jiffies;
673 int ret; 689 int ret;
674 690
691 generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
692 &zram->disk->part0);
693
675 if (rw == READ) { 694 if (rw == READ) {
676 atomic64_inc(&zram->stats.num_reads); 695 atomic64_inc(&zram->stats.num_reads);
677 ret = zram_bvec_read(zram, bvec, index, offset); 696 ret = zram_bvec_read(zram, bvec, index, offset);
@@ -680,6 +699,8 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
680 ret = zram_bvec_write(zram, bvec, index, offset); 699 ret = zram_bvec_write(zram, bvec, index, offset);
681 } 700 }
682 701
702 generic_end_io_acct(rw, &zram->disk->part0, start_time);
703
683 if (unlikely(ret)) { 704 if (unlikely(ret)) {
684 if (rw == READ) 705 if (rw == READ)
685 atomic64_inc(&zram->stats.failed_reads); 706 atomic64_inc(&zram->stats.failed_reads);
@@ -1027,6 +1048,55 @@ static DEVICE_ATTR_RW(mem_used_max);
1027static DEVICE_ATTR_RW(max_comp_streams); 1048static DEVICE_ATTR_RW(max_comp_streams);
1028static DEVICE_ATTR_RW(comp_algorithm); 1049static DEVICE_ATTR_RW(comp_algorithm);
1029 1050
1051static ssize_t io_stat_show(struct device *dev,
1052 struct device_attribute *attr, char *buf)
1053{
1054 struct zram *zram = dev_to_zram(dev);
1055 ssize_t ret;
1056
1057 down_read(&zram->init_lock);
1058 ret = scnprintf(buf, PAGE_SIZE,
1059 "%8llu %8llu %8llu %8llu\n",
1060 (u64)atomic64_read(&zram->stats.failed_reads),
1061 (u64)atomic64_read(&zram->stats.failed_writes),
1062 (u64)atomic64_read(&zram->stats.invalid_io),
1063 (u64)atomic64_read(&zram->stats.notify_free));
1064 up_read(&zram->init_lock);
1065
1066 return ret;
1067}
1068
1069static ssize_t mm_stat_show(struct device *dev,
1070 struct device_attribute *attr, char *buf)
1071{
1072 struct zram *zram = dev_to_zram(dev);
1073 u64 orig_size, mem_used = 0;
1074 long max_used;
1075 ssize_t ret;
1076
1077 down_read(&zram->init_lock);
1078 if (init_done(zram))
1079 mem_used = zs_get_total_pages(zram->meta->mem_pool);
1080
1081 orig_size = atomic64_read(&zram->stats.pages_stored);
1082 max_used = atomic_long_read(&zram->stats.max_used_pages);
1083
1084 ret = scnprintf(buf, PAGE_SIZE,
1085 "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
1086 orig_size << PAGE_SHIFT,
1087 (u64)atomic64_read(&zram->stats.compr_data_size),
1088 mem_used << PAGE_SHIFT,
1089 zram->limit_pages << PAGE_SHIFT,
1090 max_used << PAGE_SHIFT,
1091 (u64)atomic64_read(&zram->stats.zero_pages),
1092 (u64)atomic64_read(&zram->stats.num_migrated));
1093 up_read(&zram->init_lock);
1094
1095 return ret;
1096}
1097
1098static DEVICE_ATTR_RO(io_stat);
1099static DEVICE_ATTR_RO(mm_stat);
1030ZRAM_ATTR_RO(num_reads); 1100ZRAM_ATTR_RO(num_reads);
1031ZRAM_ATTR_RO(num_writes); 1101ZRAM_ATTR_RO(num_writes);
1032ZRAM_ATTR_RO(failed_reads); 1102ZRAM_ATTR_RO(failed_reads);
@@ -1054,6 +1124,8 @@ static struct attribute *zram_disk_attrs[] = {
1054 &dev_attr_mem_used_max.attr, 1124 &dev_attr_mem_used_max.attr,
1055 &dev_attr_max_comp_streams.attr, 1125 &dev_attr_max_comp_streams.attr,
1056 &dev_attr_comp_algorithm.attr, 1126 &dev_attr_comp_algorithm.attr,
1127 &dev_attr_io_stat.attr,
1128 &dev_attr_mm_stat.attr,
1057 NULL, 1129 NULL,
1058}; 1130};
1059 1131
@@ -1082,6 +1154,7 @@ static int create_device(struct zram *zram, int device_id)
1082 if (!zram->disk) { 1154 if (!zram->disk) {
1083 pr_warn("Error allocating disk structure for device %d\n", 1155 pr_warn("Error allocating disk structure for device %d\n",
1084 device_id); 1156 device_id);
1157 ret = -ENOMEM;
1085 goto out_free_queue; 1158 goto out_free_queue;
1086 } 1159 }
1087 1160
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 17056e589146..570c598f4ce9 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -84,6 +84,7 @@ struct zram_stats {
84 atomic64_t compr_data_size; /* compressed size of pages stored */ 84 atomic64_t compr_data_size; /* compressed size of pages stored */
85 atomic64_t num_reads; /* failed + successful */ 85 atomic64_t num_reads; /* failed + successful */
86 atomic64_t num_writes; /* --do-- */ 86 atomic64_t num_writes; /* --do-- */
87 atomic64_t num_migrated; /* no. of migrated object */
87 atomic64_t failed_reads; /* can happen when memory is too low */ 88 atomic64_t failed_reads; /* can happen when memory is too low */
88 atomic64_t failed_writes; /* can happen when memory is too low */ 89 atomic64_t failed_writes; /* can happen when memory is too low */
89 atomic64_t invalid_io; /* non-page-aligned I/O requests */ 90 atomic64_t invalid_io; /* non-page-aligned I/O requests */
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 8b490d77054f..6bc16809c504 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1021,7 +1021,6 @@ static struct hppa_dma_ops ccio_ops = {
1021#ifdef CONFIG_PROC_FS 1021#ifdef CONFIG_PROC_FS
1022static int ccio_proc_info(struct seq_file *m, void *p) 1022static int ccio_proc_info(struct seq_file *m, void *p)
1023{ 1023{
1024 int len = 0;
1025 struct ioc *ioc = ioc_list; 1024 struct ioc *ioc = ioc_list;
1026 1025
1027 while (ioc != NULL) { 1026 while (ioc != NULL) {
@@ -1031,22 +1030,22 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1031 int j; 1030 int j;
1032#endif 1031#endif
1033 1032
1034 len += seq_printf(m, "%s\n", ioc->name); 1033 seq_printf(m, "%s\n", ioc->name);
1035 1034
1036 len += seq_printf(m, "Cujo 2.0 bug : %s\n", 1035 seq_printf(m, "Cujo 2.0 bug : %s\n",
1037 (ioc->cujo20_bug ? "yes" : "no")); 1036 (ioc->cujo20_bug ? "yes" : "no"));
1038 1037
1039 len += seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n", 1038 seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n",
1040 total_pages * 8, total_pages); 1039 total_pages * 8, total_pages);
1041 1040
1042#ifdef CCIO_COLLECT_STATS 1041#ifdef CCIO_COLLECT_STATS
1043 len += seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n", 1042 seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n",
1044 total_pages - ioc->used_pages, ioc->used_pages, 1043 total_pages - ioc->used_pages, ioc->used_pages,
1045 (int)(ioc->used_pages * 100 / total_pages)); 1044 (int)(ioc->used_pages * 100 / total_pages));
1046#endif 1045#endif
1047 1046
1048 len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 1047 seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n",
1049 ioc->res_size, total_pages); 1048 ioc->res_size, total_pages);
1050 1049
1051#ifdef CCIO_COLLECT_STATS 1050#ifdef CCIO_COLLECT_STATS
1052 min = max = ioc->avg_search[0]; 1051 min = max = ioc->avg_search[0];
@@ -1058,26 +1057,26 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1058 min = ioc->avg_search[j]; 1057 min = ioc->avg_search[j];
1059 } 1058 }
1060 avg /= CCIO_SEARCH_SAMPLE; 1059 avg /= CCIO_SEARCH_SAMPLE;
1061 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n", 1060 seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
1062 min, avg, max); 1061 min, avg, max);
1063 1062
1064 len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n", 1063 seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n",
1065 ioc->msingle_calls, ioc->msingle_pages, 1064 ioc->msingle_calls, ioc->msingle_pages,
1066 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls)); 1065 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
1067 1066
1068 /* KLUGE - unmap_sg calls unmap_single for each mapped page */ 1067 /* KLUGE - unmap_sg calls unmap_single for each mapped page */
1069 min = ioc->usingle_calls - ioc->usg_calls; 1068 min = ioc->usingle_calls - ioc->usg_calls;
1070 max = ioc->usingle_pages - ioc->usg_pages; 1069 max = ioc->usingle_pages - ioc->usg_pages;
1071 len += seq_printf(m, "pci_unmap_single: %8ld calls %8ld pages (avg %d/1000)\n", 1070 seq_printf(m, "pci_unmap_single: %8ld calls %8ld pages (avg %d/1000)\n",
1072 min, max, (int)((max * 1000)/min)); 1071 min, max, (int)((max * 1000)/min));
1073 1072
1074 len += seq_printf(m, "pci_map_sg() : %8ld calls %8ld pages (avg %d/1000)\n", 1073 seq_printf(m, "pci_map_sg() : %8ld calls %8ld pages (avg %d/1000)\n",
1075 ioc->msg_calls, ioc->msg_pages, 1074 ioc->msg_calls, ioc->msg_pages,
1076 (int)((ioc->msg_pages * 1000)/ioc->msg_calls)); 1075 (int)((ioc->msg_pages * 1000)/ioc->msg_calls));
1077 1076
1078 len += seq_printf(m, "pci_unmap_sg() : %8ld calls %8ld pages (avg %d/1000)\n\n\n", 1077 seq_printf(m, "pci_unmap_sg() : %8ld calls %8ld pages (avg %d/1000)\n\n\n",
1079 ioc->usg_calls, ioc->usg_pages, 1078 ioc->usg_calls, ioc->usg_pages,
1080 (int)((ioc->usg_pages * 1000)/ioc->usg_calls)); 1079 (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
1081#endif /* CCIO_COLLECT_STATS */ 1080#endif /* CCIO_COLLECT_STATS */
1082 1081
1083 ioc = ioc->next; 1082 ioc = ioc->next;
@@ -1101,7 +1100,6 @@ static const struct file_operations ccio_proc_info_fops = {
1101 1100
1102static int ccio_proc_bitmap_info(struct seq_file *m, void *p) 1101static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
1103{ 1102{
1104 int len = 0;
1105 struct ioc *ioc = ioc_list; 1103 struct ioc *ioc = ioc_list;
1106 1104
1107 while (ioc != NULL) { 1105 while (ioc != NULL) {
@@ -1110,11 +1108,11 @@ static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
1110 1108
1111 for (j = 0; j < (ioc->res_size / sizeof(u32)); j++) { 1109 for (j = 0; j < (ioc->res_size / sizeof(u32)); j++) {
1112 if ((j & 7) == 0) 1110 if ((j & 7) == 0)
1113 len += seq_puts(m, "\n "); 1111 seq_puts(m, "\n ");
1114 len += seq_printf(m, "%08x", *res_ptr); 1112 seq_printf(m, "%08x", *res_ptr);
1115 res_ptr++; 1113 res_ptr++;
1116 } 1114 }
1117 len += seq_puts(m, "\n\n"); 1115 seq_puts(m, "\n\n");
1118 ioc = ioc->next; 1116 ioc = ioc->next;
1119 break; /* XXX - remove me */ 1117 break; /* XXX - remove me */
1120 } 1118 }
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 1ff1b67e8b27..f07471264689 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1774,37 +1774,35 @@ static int sba_proc_info(struct seq_file *m, void *p)
1774#ifdef SBA_COLLECT_STATS 1774#ifdef SBA_COLLECT_STATS
1775 unsigned long avg = 0, min, max; 1775 unsigned long avg = 0, min, max;
1776#endif 1776#endif
1777 int i, len = 0; 1777 int i;
1778 1778
1779 len += seq_printf(m, "%s rev %d.%d\n", 1779 seq_printf(m, "%s rev %d.%d\n",
1780 sba_dev->name, 1780 sba_dev->name,
1781 (sba_dev->hw_rev & 0x7) + 1, 1781 (sba_dev->hw_rev & 0x7) + 1,
1782 (sba_dev->hw_rev & 0x18) >> 3 1782 (sba_dev->hw_rev & 0x18) >> 3);
1783 ); 1783 seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n",
1784 len += seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n", 1784 (int)((ioc->res_size << 3) * sizeof(u64)), /* 8 bits/byte */
1785 (int) ((ioc->res_size << 3) * sizeof(u64)), /* 8 bits/byte */ 1785 total_pages);
1786 total_pages); 1786
1787 1787 seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n",
1788 len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 1788 ioc->res_size, ioc->res_size << 3); /* 8 bits per byte */
1789 ioc->res_size, ioc->res_size << 3); /* 8 bits per byte */ 1789
1790 1790 seq_printf(m, "LMMIO_BASE/MASK/ROUTE %08x %08x %08x\n",
1791 len += seq_printf(m, "LMMIO_BASE/MASK/ROUTE %08x %08x %08x\n", 1791 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_BASE),
1792 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_BASE), 1792 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_MASK),
1793 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_MASK), 1793 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_ROUTE));
1794 READ_REG32(sba_dev->sba_hpa + LMMIO_DIST_ROUTE)
1795 );
1796 1794
1797 for (i=0; i<4; i++) 1795 for (i=0; i<4; i++)
1798 len += seq_printf(m, "DIR%d_BASE/MASK/ROUTE %08x %08x %08x\n", i, 1796 seq_printf(m, "DIR%d_BASE/MASK/ROUTE %08x %08x %08x\n",
1799 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_BASE + i*0x18), 1797 i,
1800 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_MASK + i*0x18), 1798 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_BASE + i*0x18),
1801 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_ROUTE + i*0x18) 1799 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_MASK + i*0x18),
1802 ); 1800 READ_REG32(sba_dev->sba_hpa + LMMIO_DIRECT0_ROUTE + i*0x18));
1803 1801
1804#ifdef SBA_COLLECT_STATS 1802#ifdef SBA_COLLECT_STATS
1805 len += seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n", 1803 seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n",
1806 total_pages - ioc->used_pages, ioc->used_pages, 1804 total_pages - ioc->used_pages, ioc->used_pages,
1807 (int) (ioc->used_pages * 100 / total_pages)); 1805 (int)(ioc->used_pages * 100 / total_pages));
1808 1806
1809 min = max = ioc->avg_search[0]; 1807 min = max = ioc->avg_search[0];
1810 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { 1808 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
@@ -1813,26 +1811,26 @@ static int sba_proc_info(struct seq_file *m, void *p)
1813 if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; 1811 if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
1814 } 1812 }
1815 avg /= SBA_SEARCH_SAMPLE; 1813 avg /= SBA_SEARCH_SAMPLE;
1816 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n", 1814 seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
1817 min, avg, max); 1815 min, avg, max);
1818 1816
1819 len += seq_printf(m, "pci_map_single(): %12ld calls %12ld pages (avg %d/1000)\n", 1817 seq_printf(m, "pci_map_single(): %12ld calls %12ld pages (avg %d/1000)\n",
1820 ioc->msingle_calls, ioc->msingle_pages, 1818 ioc->msingle_calls, ioc->msingle_pages,
1821 (int) ((ioc->msingle_pages * 1000)/ioc->msingle_calls)); 1819 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
1822 1820
1823 /* KLUGE - unmap_sg calls unmap_single for each mapped page */ 1821 /* KLUGE - unmap_sg calls unmap_single for each mapped page */
1824 min = ioc->usingle_calls; 1822 min = ioc->usingle_calls;
1825 max = ioc->usingle_pages - ioc->usg_pages; 1823 max = ioc->usingle_pages - ioc->usg_pages;
1826 len += seq_printf(m, "pci_unmap_single: %12ld calls %12ld pages (avg %d/1000)\n", 1824 seq_printf(m, "pci_unmap_single: %12ld calls %12ld pages (avg %d/1000)\n",
1827 min, max, (int) ((max * 1000)/min)); 1825 min, max, (int)((max * 1000)/min));
1828 1826
1829 len += seq_printf(m, "pci_map_sg() : %12ld calls %12ld pages (avg %d/1000)\n", 1827 seq_printf(m, "pci_map_sg() : %12ld calls %12ld pages (avg %d/1000)\n",
1830 ioc->msg_calls, ioc->msg_pages, 1828 ioc->msg_calls, ioc->msg_pages,
1831 (int) ((ioc->msg_pages * 1000)/ioc->msg_calls)); 1829 (int)((ioc->msg_pages * 1000)/ioc->msg_calls));
1832 1830
1833 len += seq_printf(m, "pci_unmap_sg() : %12ld calls %12ld pages (avg %d/1000)\n", 1831 seq_printf(m, "pci_unmap_sg() : %12ld calls %12ld pages (avg %d/1000)\n",
1834 ioc->usg_calls, ioc->usg_pages, 1832 ioc->usg_calls, ioc->usg_pages,
1835 (int) ((ioc->usg_pages * 1000)/ioc->usg_calls)); 1833 (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
1836#endif 1834#endif
1837 1835
1838 return 0; 1836 return 0;
@@ -1858,14 +1856,14 @@ sba_proc_bitmap_info(struct seq_file *m, void *p)
1858 struct sba_device *sba_dev = sba_list; 1856 struct sba_device *sba_dev = sba_list;
1859 struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */ 1857 struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */
1860 unsigned int *res_ptr = (unsigned int *)ioc->res_map; 1858 unsigned int *res_ptr = (unsigned int *)ioc->res_map;
1861 int i, len = 0; 1859 int i;
1862 1860
1863 for (i = 0; i < (ioc->res_size/sizeof(unsigned int)); ++i, ++res_ptr) { 1861 for (i = 0; i < (ioc->res_size/sizeof(unsigned int)); ++i, ++res_ptr) {
1864 if ((i & 7) == 0) 1862 if ((i & 7) == 0)
1865 len += seq_printf(m, "\n "); 1863 seq_puts(m, "\n ");
1866 len += seq_printf(m, " %08x", *res_ptr); 1864 seq_printf(m, " %08x", *res_ptr);
1867 } 1865 }
1868 len += seq_printf(m, "\n"); 1866 seq_putc(m, '\n');
1869 1867
1870 return 0; 1868 return 0;
1871} 1869}
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 5b2e76159b41..87647f459198 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -459,23 +459,25 @@ static int cmos_procfs(struct device *dev, struct seq_file *seq)
459 /* NOTE: at least ICH6 reports battery status using a different 459 /* NOTE: at least ICH6 reports battery status using a different
460 * (non-RTC) bit; and SQWE is ignored on many current systems. 460 * (non-RTC) bit; and SQWE is ignored on many current systems.
461 */ 461 */
462 return seq_printf(seq, 462 seq_printf(seq,
463 "periodic_IRQ\t: %s\n" 463 "periodic_IRQ\t: %s\n"
464 "update_IRQ\t: %s\n" 464 "update_IRQ\t: %s\n"
465 "HPET_emulated\t: %s\n" 465 "HPET_emulated\t: %s\n"
466 // "square_wave\t: %s\n" 466 // "square_wave\t: %s\n"
467 "BCD\t\t: %s\n" 467 "BCD\t\t: %s\n"
468 "DST_enable\t: %s\n" 468 "DST_enable\t: %s\n"
469 "periodic_freq\t: %d\n" 469 "periodic_freq\t: %d\n"
470 "batt_status\t: %s\n", 470 "batt_status\t: %s\n",
471 (rtc_control & RTC_PIE) ? "yes" : "no", 471 (rtc_control & RTC_PIE) ? "yes" : "no",
472 (rtc_control & RTC_UIE) ? "yes" : "no", 472 (rtc_control & RTC_UIE) ? "yes" : "no",
473 is_hpet_enabled() ? "yes" : "no", 473 is_hpet_enabled() ? "yes" : "no",
474 // (rtc_control & RTC_SQWE) ? "yes" : "no", 474 // (rtc_control & RTC_SQWE) ? "yes" : "no",
475 (rtc_control & RTC_DM_BINARY) ? "no" : "yes", 475 (rtc_control & RTC_DM_BINARY) ? "no" : "yes",
476 (rtc_control & RTC_DST_EN) ? "yes" : "no", 476 (rtc_control & RTC_DST_EN) ? "yes" : "no",
477 cmos->rtc->irq_freq, 477 cmos->rtc->irq_freq,
478 (valid & RTC_VRT) ? "okay" : "dead"); 478 (valid & RTC_VRT) ? "okay" : "dead");
479
480 return 0;
479} 481}
480 482
481#else 483#else
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 129add77065d..12b07158a366 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -434,9 +434,9 @@ static int ds1305_proc(struct device *dev, struct seq_file *seq)
434 } 434 }
435 435
436done: 436done:
437 return seq_printf(seq, 437 seq_printf(seq, "trickle_charge\t: %s%s\n", diodes, resistors);
438 "trickle_charge\t: %s%s\n", 438
439 diodes, resistors); 439 return 0;
440} 440}
441 441
442#else 442#else
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 3a6fd3a8a2ec..548ea6f6f384 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -277,13 +277,15 @@ static int mrst_procfs(struct device *dev, struct seq_file *seq)
277 valid = vrtc_cmos_read(RTC_VALID); 277 valid = vrtc_cmos_read(RTC_VALID);
278 spin_unlock_irq(&rtc_lock); 278 spin_unlock_irq(&rtc_lock);
279 279
280 return seq_printf(seq, 280 seq_printf(seq,
281 "periodic_IRQ\t: %s\n" 281 "periodic_IRQ\t: %s\n"
282 "alarm\t\t: %s\n" 282 "alarm\t\t: %s\n"
283 "BCD\t\t: no\n" 283 "BCD\t\t: no\n"
284 "periodic_freq\t: daily (not adjustable)\n", 284 "periodic_freq\t: daily (not adjustable)\n",
285 (rtc_control & RTC_PIE) ? "on" : "off", 285 (rtc_control & RTC_PIE) ? "on" : "off",
286 (rtc_control & RTC_AIE) ? "on" : "off"); 286 (rtc_control & RTC_AIE) ? "on" : "off");
287
288 return 0;
287} 289}
288 290
289#else 291#else
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index d948277057d8..60232bd366ef 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -261,7 +261,9 @@ static int tegra_rtc_proc(struct device *dev, struct seq_file *seq)
261 if (!dev || !dev->driver) 261 if (!dev || !dev->driver)
262 return 0; 262 return 0;
263 263
264 return seq_printf(seq, "name\t\t: %s\n", dev_name(dev)); 264 seq_printf(seq, "name\t\t: %s\n", dev_name(dev));
265
266 return 0;
265} 267}
266 268
267static irqreturn_t tegra_rtc_irq_handler(int irq, void *data) 269static irqreturn_t tegra_rtc_irq_handler(int irq, void *data)
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index b3f791b2c1f8..20314aad7ab7 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -330,18 +330,20 @@ cio_ignore_proc_seq_show(struct seq_file *s, void *it)
330 if (!iter->in_range) { 330 if (!iter->in_range) {
331 /* First device in range. */ 331 /* First device in range. */
332 if ((iter->devno == __MAX_SUBCHANNEL) || 332 if ((iter->devno == __MAX_SUBCHANNEL) ||
333 !is_blacklisted(iter->ssid, iter->devno + 1)) 333 !is_blacklisted(iter->ssid, iter->devno + 1)) {
334 /* Singular device. */ 334 /* Singular device. */
335 return seq_printf(s, "0.%x.%04x\n", 335 seq_printf(s, "0.%x.%04x\n", iter->ssid, iter->devno);
336 iter->ssid, iter->devno); 336 return 0;
337 }
337 iter->in_range = 1; 338 iter->in_range = 1;
338 return seq_printf(s, "0.%x.%04x-", iter->ssid, iter->devno); 339 seq_printf(s, "0.%x.%04x-", iter->ssid, iter->devno);
340 return 0;
339 } 341 }
340 if ((iter->devno == __MAX_SUBCHANNEL) || 342 if ((iter->devno == __MAX_SUBCHANNEL) ||
341 !is_blacklisted(iter->ssid, iter->devno + 1)) { 343 !is_blacklisted(iter->ssid, iter->devno + 1)) {
342 /* Last device in range. */ 344 /* Last device in range. */
343 iter->in_range = 0; 345 iter->in_range = 0;
344 return seq_printf(s, "0.%x.%04x\n", iter->ssid, iter->devno); 346 seq_printf(s, "0.%x.%04x\n", iter->ssid, iter->devno);
345 } 347 }
346 return 0; 348 return 0;
347} 349}
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index 0787b9756165..228c782d6433 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -160,8 +160,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
160 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 160 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
161 161
162 shutting_down = 1; 162 shutting_down = 1;
163 if (orderly_poweroff(true) < 0) 163 orderly_poweroff(true);
164 printk(KERN_CRIT "envctrl: shutdown execution failed\n");
165} 164}
166 165
167#define WARN_INTERVAL (30 * HZ) 166#define WARN_INTERVAL (30 * HZ)
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index e244cf3d9ec8..5609b602c54d 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -970,18 +970,13 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type)
970static void envctrl_do_shutdown(void) 970static void envctrl_do_shutdown(void)
971{ 971{
972 static int inprog = 0; 972 static int inprog = 0;
973 int ret;
974 973
975 if (inprog != 0) 974 if (inprog != 0)
976 return; 975 return;
977 976
978 inprog = 1; 977 inprog = 1;
979 printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); 978 printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
980 ret = orderly_poweroff(true); 979 orderly_poweroff(true);
981 if (ret < 0) {
982 printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
983 inprog = 0; /* unlikely to succeed, but we could try again */
984 }
985} 980}
986 981
987static struct task_struct *kenvctrld_task; 982static struct task_struct *kenvctrld_task;
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index 6725467ef4d0..62c7bba75274 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -10,6 +10,7 @@ config LUSTRE_FS
10 select CRYPTO_SHA1 10 select CRYPTO_SHA1
11 select CRYPTO_SHA256 11 select CRYPTO_SHA256
12 select CRYPTO_SHA512 12 select CRYPTO_SHA512
13 depends on MULTIUSER
13 help 14 help
14 This option enables Lustre file system client support. Choose Y 15 This option enables Lustre file system client support. Choose Y
15 here if you want to access a Lustre file system cluster. To compile 16 here if you want to access a Lustre file system cluster. To compile
diff --git a/fs/dax.c b/fs/dax.c
index ed1619ec6537..d0bd1f4f81b3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -464,6 +464,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
464EXPORT_SYMBOL_GPL(dax_fault); 464EXPORT_SYMBOL_GPL(dax_fault);
465 465
466/** 466/**
467 * dax_pfn_mkwrite - handle first write to DAX page
468 * @vma: The virtual memory area where the fault occurred
469 * @vmf: The description of the fault
470 *
471 */
472int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
473{
474 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
475
476 sb_start_pagefault(sb);
477 file_update_time(vma->vm_file);
478 sb_end_pagefault(sb);
479 return VM_FAULT_NOPAGE;
480}
481EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
482
483/**
467 * dax_zero_page_range - zero a range within a page of a DAX file 484 * dax_zero_page_range - zero a range within a page of a DAX file
468 * @inode: The file being truncated 485 * @inode: The file being truncated
469 * @from: The file offset that is being truncated to 486 * @from: The file offset that is being truncated to
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 678f9ab08c48..8d15febd0aa3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
793 int datasync); 793 int datasync);
794extern const struct inode_operations ext2_file_inode_operations; 794extern const struct inode_operations ext2_file_inode_operations;
795extern const struct file_operations ext2_file_operations; 795extern const struct file_operations ext2_file_operations;
796extern const struct file_operations ext2_dax_file_operations;
797 796
798/* inode.c */ 797/* inode.c */
799extern const struct address_space_operations ext2_aops; 798extern const struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index ef04fdb57dbf..3a0a6c6406d0 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
39static const struct vm_operations_struct ext2_dax_vm_ops = { 39static const struct vm_operations_struct ext2_dax_vm_ops = {
40 .fault = ext2_dax_fault, 40 .fault = ext2_dax_fault,
41 .page_mkwrite = ext2_dax_mkwrite, 41 .page_mkwrite = ext2_dax_mkwrite,
42 .pfn_mkwrite = dax_pfn_mkwrite,
42}; 43};
43 44
44static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) 45static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -106,22 +107,6 @@ const struct file_operations ext2_file_operations = {
106 .splice_write = iter_file_splice_write, 107 .splice_write = iter_file_splice_write,
107}; 108};
108 109
109#ifdef CONFIG_FS_DAX
110const struct file_operations ext2_dax_file_operations = {
111 .llseek = generic_file_llseek,
112 .read_iter = generic_file_read_iter,
113 .write_iter = generic_file_write_iter,
114 .unlocked_ioctl = ext2_ioctl,
115#ifdef CONFIG_COMPAT
116 .compat_ioctl = ext2_compat_ioctl,
117#endif
118 .mmap = ext2_file_mmap,
119 .open = dquot_file_open,
120 .release = ext2_release_file,
121 .fsync = ext2_fsync,
122};
123#endif
124
125const struct inode_operations ext2_file_inode_operations = { 110const struct inode_operations ext2_file_inode_operations = {
126#ifdef CONFIG_EXT2_FS_XATTR 111#ifdef CONFIG_EXT2_FS_XATTR
127 .setxattr = generic_setxattr, 112 .setxattr = generic_setxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index df9d6afbc5d5..b29eb6747116 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1388,10 +1388,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1388 1388
1389 if (S_ISREG(inode->i_mode)) { 1389 if (S_ISREG(inode->i_mode)) {
1390 inode->i_op = &ext2_file_inode_operations; 1390 inode->i_op = &ext2_file_inode_operations;
1391 if (test_opt(inode->i_sb, DAX)) { 1391 if (test_opt(inode->i_sb, NOBH)) {
1392 inode->i_mapping->a_ops = &ext2_aops;
1393 inode->i_fop = &ext2_dax_file_operations;
1394 } else if (test_opt(inode->i_sb, NOBH)) {
1395 inode->i_mapping->a_ops = &ext2_nobh_aops; 1392 inode->i_mapping->a_ops = &ext2_nobh_aops;
1396 inode->i_fop = &ext2_file_operations; 1393 inode->i_fop = &ext2_file_operations;
1397 } else { 1394 } else {
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 148f6e3789ea..ce422931f411 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
104 return PTR_ERR(inode); 104 return PTR_ERR(inode);
105 105
106 inode->i_op = &ext2_file_inode_operations; 106 inode->i_op = &ext2_file_inode_operations;
107 if (test_opt(inode->i_sb, DAX)) { 107 if (test_opt(inode->i_sb, NOBH)) {
108 inode->i_mapping->a_ops = &ext2_aops;
109 inode->i_fop = &ext2_dax_file_operations;
110 } else if (test_opt(inode->i_sb, NOBH)) {
111 inode->i_mapping->a_ops = &ext2_nobh_aops; 108 inode->i_mapping->a_ops = &ext2_nobh_aops;
112 inode->i_fop = &ext2_file_operations; 109 inode->i_fop = &ext2_file_operations;
113 } else { 110 } else {
@@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
125 return PTR_ERR(inode); 122 return PTR_ERR(inode);
126 123
127 inode->i_op = &ext2_file_inode_operations; 124 inode->i_op = &ext2_file_inode_operations;
128 if (test_opt(inode->i_sb, DAX)) { 125 if (test_opt(inode->i_sb, NOBH)) {
129 inode->i_mapping->a_ops = &ext2_aops;
130 inode->i_fop = &ext2_dax_file_operations;
131 } else if (test_opt(inode->i_sb, NOBH)) {
132 inode->i_mapping->a_ops = &ext2_nobh_aops; 126 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 inode->i_fop = &ext2_file_operations; 127 inode->i_fop = &ext2_file_operations;
134 } else { 128 } else {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f63c3d5805c4..8a3981ea35d8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2593,7 +2593,6 @@ extern const struct file_operations ext4_dir_operations;
2593/* file.c */ 2593/* file.c */
2594extern const struct inode_operations ext4_file_inode_operations; 2594extern const struct inode_operations ext4_file_inode_operations;
2595extern const struct file_operations ext4_file_operations; 2595extern const struct file_operations ext4_file_operations;
2596extern const struct file_operations ext4_dax_file_operations;
2597extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2596extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2598 2597
2599/* inline.c */ 2598/* inline.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9ad03036d9f5..7a6defcf3352 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
206static const struct vm_operations_struct ext4_dax_vm_ops = { 206static const struct vm_operations_struct ext4_dax_vm_ops = {
207 .fault = ext4_dax_fault, 207 .fault = ext4_dax_fault,
208 .page_mkwrite = ext4_dax_mkwrite, 208 .page_mkwrite = ext4_dax_mkwrite,
209 .pfn_mkwrite = dax_pfn_mkwrite,
209}; 210};
210#else 211#else
211#define ext4_dax_vm_ops ext4_file_vm_ops 212#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -622,24 +623,6 @@ const struct file_operations ext4_file_operations = {
622 .fallocate = ext4_fallocate, 623 .fallocate = ext4_fallocate,
623}; 624};
624 625
625#ifdef CONFIG_FS_DAX
626const struct file_operations ext4_dax_file_operations = {
627 .llseek = ext4_llseek,
628 .read_iter = generic_file_read_iter,
629 .write_iter = ext4_file_write_iter,
630 .unlocked_ioctl = ext4_ioctl,
631#ifdef CONFIG_COMPAT
632 .compat_ioctl = ext4_compat_ioctl,
633#endif
634 .mmap = ext4_file_mmap,
635 .open = ext4_file_open,
636 .release = ext4_release_file,
637 .fsync = ext4_sync_file,
638 /* Splice not yet supported with DAX */
639 .fallocate = ext4_fallocate,
640};
641#endif
642
643const struct inode_operations ext4_file_inode_operations = { 626const struct inode_operations ext4_file_inode_operations = {
644 .setattr = ext4_setattr, 627 .setattr = ext4_setattr,
645 .getattr = ext4_getattr, 628 .getattr = ext4_getattr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a3f451370bef..035b7a06f1c3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4090,10 +4090,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4090 4090
4091 if (S_ISREG(inode->i_mode)) { 4091 if (S_ISREG(inode->i_mode)) {
4092 inode->i_op = &ext4_file_inode_operations; 4092 inode->i_op = &ext4_file_inode_operations;
4093 if (test_opt(inode->i_sb, DAX)) 4093 inode->i_fop = &ext4_file_operations;
4094 inode->i_fop = &ext4_dax_file_operations;
4095 else
4096 inode->i_fop = &ext4_file_operations;
4097 ext4_set_aops(inode); 4094 ext4_set_aops(inode);
4098 } else if (S_ISDIR(inode->i_mode)) { 4095 } else if (S_ISDIR(inode->i_mode)) {
4099 inode->i_op = &ext4_dir_inode_operations; 4096 inode->i_op = &ext4_dir_inode_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28fe71a2904c..2291923dae4e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2235,10 +2235,7 @@ retry:
2235 err = PTR_ERR(inode); 2235 err = PTR_ERR(inode);
2236 if (!IS_ERR(inode)) { 2236 if (!IS_ERR(inode)) {
2237 inode->i_op = &ext4_file_inode_operations; 2237 inode->i_op = &ext4_file_inode_operations;
2238 if (test_opt(inode->i_sb, DAX)) 2238 inode->i_fop = &ext4_file_operations;
2239 inode->i_fop = &ext4_dax_file_operations;
2240 else
2241 inode->i_fop = &ext4_file_operations;
2242 ext4_set_aops(inode); 2239 ext4_set_aops(inode);
2243 err = ext4_add_nondir(handle, dentry, inode); 2240 err = ext4_add_nondir(handle, dentry, inode);
2244 if (!err && IS_DIRSYNC(dir)) 2241 if (!err && IS_DIRSYNC(dir))
@@ -2302,10 +2299,7 @@ retry:
2302 err = PTR_ERR(inode); 2299 err = PTR_ERR(inode);
2303 if (!IS_ERR(inode)) { 2300 if (!IS_ERR(inode)) {
2304 inode->i_op = &ext4_file_inode_operations; 2301 inode->i_op = &ext4_file_inode_operations;
2305 if (test_opt(inode->i_sb, DAX)) 2302 inode->i_fop = &ext4_file_operations;
2306 inode->i_fop = &ext4_dax_file_operations;
2307 else
2308 inode->i_fop = &ext4_file_operations;
2309 ext4_set_aops(inode); 2303 ext4_set_aops(inode);
2310 d_tmpfile(dentry, inode); 2304 d_tmpfile(dentry, inode);
2311 err = ext4_orphan_add(handle, inode); 2305 err = ext4_orphan_add(handle, inode);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 45e34908bdb5..2640d88b0e63 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -48,9 +48,10 @@ struct hugetlbfs_config {
48 kuid_t uid; 48 kuid_t uid;
49 kgid_t gid; 49 kgid_t gid;
50 umode_t mode; 50 umode_t mode;
51 long nr_blocks; 51 long max_hpages;
52 long nr_inodes; 52 long nr_inodes;
53 struct hstate *hstate; 53 struct hstate *hstate;
54 long min_hpages;
54}; 55};
55 56
56struct hugetlbfs_inode_info { 57struct hugetlbfs_inode_info {
@@ -68,7 +69,7 @@ int sysctl_hugetlb_shm_group;
68enum { 69enum {
69 Opt_size, Opt_nr_inodes, 70 Opt_size, Opt_nr_inodes,
70 Opt_mode, Opt_uid, Opt_gid, 71 Opt_mode, Opt_uid, Opt_gid,
71 Opt_pagesize, 72 Opt_pagesize, Opt_min_size,
72 Opt_err, 73 Opt_err,
73}; 74};
74 75
@@ -79,6 +80,7 @@ static const match_table_t tokens = {
79 {Opt_uid, "uid=%u"}, 80 {Opt_uid, "uid=%u"},
80 {Opt_gid, "gid=%u"}, 81 {Opt_gid, "gid=%u"},
81 {Opt_pagesize, "pagesize=%s"}, 82 {Opt_pagesize, "pagesize=%s"},
83 {Opt_min_size, "min_size=%s"},
82 {Opt_err, NULL}, 84 {Opt_err, NULL},
83}; 85};
84 86
@@ -729,14 +731,38 @@ static const struct super_operations hugetlbfs_ops = {
729 .show_options = generic_show_options, 731 .show_options = generic_show_options,
730}; 732};
731 733
734enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
735
736/*
737 * Convert size option passed from command line to number of huge pages
738 * in the pool specified by hstate. Size option could be in bytes
739 * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
740 */
741static long long
742hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
743 int val_type)
744{
745 if (val_type == NO_SIZE)
746 return -1;
747
748 if (val_type == SIZE_PERCENT) {
749 size_opt <<= huge_page_shift(h);
750 size_opt *= h->max_huge_pages;
751 do_div(size_opt, 100);
752 }
753
754 size_opt >>= huge_page_shift(h);
755 return size_opt;
756}
757
732static int 758static int
733hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 759hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
734{ 760{
735 char *p, *rest; 761 char *p, *rest;
736 substring_t args[MAX_OPT_ARGS]; 762 substring_t args[MAX_OPT_ARGS];
737 int option; 763 int option;
738 unsigned long long size = 0; 764 unsigned long long max_size_opt = 0, min_size_opt = 0;
739 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; 765 int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
740 766
741 if (!options) 767 if (!options)
742 return 0; 768 return 0;
@@ -774,10 +800,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
774 /* memparse() will accept a K/M/G without a digit */ 800 /* memparse() will accept a K/M/G without a digit */
775 if (!isdigit(*args[0].from)) 801 if (!isdigit(*args[0].from))
776 goto bad_val; 802 goto bad_val;
777 size = memparse(args[0].from, &rest); 803 max_size_opt = memparse(args[0].from, &rest);
778 setsize = SIZE_STD; 804 max_val_type = SIZE_STD;
779 if (*rest == '%') 805 if (*rest == '%')
780 setsize = SIZE_PERCENT; 806 max_val_type = SIZE_PERCENT;
781 break; 807 break;
782 } 808 }
783 809
@@ -800,6 +826,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
800 break; 826 break;
801 } 827 }
802 828
829 case Opt_min_size: {
830 /* memparse() will accept a K/M/G without a digit */
831 if (!isdigit(*args[0].from))
832 goto bad_val;
833 min_size_opt = memparse(args[0].from, &rest);
834 min_val_type = SIZE_STD;
835 if (*rest == '%')
836 min_val_type = SIZE_PERCENT;
837 break;
838 }
839
803 default: 840 default:
804 pr_err("Bad mount option: \"%s\"\n", p); 841 pr_err("Bad mount option: \"%s\"\n", p);
805 return -EINVAL; 842 return -EINVAL;
@@ -807,15 +844,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
807 } 844 }
808 } 845 }
809 846
810 /* Do size after hstate is set up */ 847 /*
811 if (setsize > NO_SIZE) { 848 * Use huge page pool size (in hstate) to convert the size
812 struct hstate *h = pconfig->hstate; 849 * options to number of huge pages. If NO_SIZE, -1 is returned.
813 if (setsize == SIZE_PERCENT) { 850 */
814 size <<= huge_page_shift(h); 851 pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
815 size *= h->max_huge_pages; 852 max_size_opt, max_val_type);
816 do_div(size, 100); 853 pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
817 } 854 min_size_opt, min_val_type);
818 pconfig->nr_blocks = (size >> huge_page_shift(h)); 855
856 /*
857 * If max_size was specified, then min_size must be smaller
858 */
859 if (max_val_type > NO_SIZE &&
860 pconfig->min_hpages > pconfig->max_hpages) {
861 pr_err("minimum size can not be greater than maximum size\n");
862 return -EINVAL;
819 } 863 }
820 864
821 return 0; 865 return 0;
@@ -834,12 +878,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
834 878
835 save_mount_options(sb, data); 879 save_mount_options(sb, data);
836 880
837 config.nr_blocks = -1; /* No limit on size by default */ 881 config.max_hpages = -1; /* No limit on size by default */
838 config.nr_inodes = -1; /* No limit on number of inodes by default */ 882 config.nr_inodes = -1; /* No limit on number of inodes by default */
839 config.uid = current_fsuid(); 883 config.uid = current_fsuid();
840 config.gid = current_fsgid(); 884 config.gid = current_fsgid();
841 config.mode = 0755; 885 config.mode = 0755;
842 config.hstate = &default_hstate; 886 config.hstate = &default_hstate;
887 config.min_hpages = -1; /* No default minimum size */
843 ret = hugetlbfs_parse_options(data, &config); 888 ret = hugetlbfs_parse_options(data, &config);
844 if (ret) 889 if (ret)
845 return ret; 890 return ret;
@@ -853,8 +898,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
853 sbinfo->max_inodes = config.nr_inodes; 898 sbinfo->max_inodes = config.nr_inodes;
854 sbinfo->free_inodes = config.nr_inodes; 899 sbinfo->free_inodes = config.nr_inodes;
855 sbinfo->spool = NULL; 900 sbinfo->spool = NULL;
856 if (config.nr_blocks != -1) { 901 /*
857 sbinfo->spool = hugepage_new_subpool(config.nr_blocks); 902 * Allocate and initialize subpool if maximum or minimum size is
903 * specified. Any needed reservations (for minimim size) are taken
904 * taken when the subpool is created.
905 */
906 if (config.max_hpages != -1 || config.min_hpages != -1) {
907 sbinfo->spool = hugepage_new_subpool(config.hstate,
908 config.max_hpages,
909 config.min_hpages);
858 if (!sbinfo->spool) 910 if (!sbinfo->spool)
859 goto out_free; 911 goto out_free;
860 } 912 }
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 49ba7ff1bbb9..16a0922beb59 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
183 183
184#endif 184#endif
185 185
186static void init_once(void *foo)
187{
188 struct metapage *mp = (struct metapage *)foo;
189
190 mp->lid = 0;
191 mp->lsn = 0;
192 mp->flag = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 set_bit(META_free, &mp->flag);
197 init_waitqueue_head(&mp->wait);
198}
199
200static inline struct metapage *alloc_metapage(gfp_t gfp_mask) 186static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
201{ 187{
202 return mempool_alloc(metapage_mempool, gfp_mask); 188 struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
189
190 if (mp) {
191 mp->lid = 0;
192 mp->lsn = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 init_waitqueue_head(&mp->wait);
197 }
198 return mp;
203} 199}
204 200
205static inline void free_metapage(struct metapage *mp) 201static inline void free_metapage(struct metapage *mp)
206{ 202{
207 mp->flag = 0;
208 set_bit(META_free, &mp->flag);
209
210 mempool_free(mp, metapage_mempool); 203 mempool_free(mp, metapage_mempool);
211} 204}
212 205
@@ -216,7 +209,7 @@ int __init metapage_init(void)
216 * Allocate the metapage structures 209 * Allocate the metapage structures
217 */ 210 */
218 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), 211 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
219 0, 0, init_once); 212 0, 0, NULL);
220 if (metapage_cache == NULL) 213 if (metapage_cache == NULL)
221 return -ENOMEM; 214 return -ENOMEM;
222 215
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda85f68..337e9e51ac06 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
48 48
49/* metapage flag */ 49/* metapage flag */
50#define META_locked 0 50#define META_locked 0
51#define META_free 1
52#define META_dirty 2 51#define META_dirty 2
53#define META_sync 3 52#define META_sync 3
54#define META_discard 4 53#define META_discard 4
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index c7abc10279af..f31fd0dd92c6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
1config NFS_FS 1config NFS_FS
2 tristate "NFS client support" 2 tristate "NFS client support"
3 depends on INET && FILE_LOCKING 3 depends on INET && FILE_LOCKING && MULTIUSER
4 select LOCKD 4 select LOCKD
5 select SUNRPC 5 select SUNRPC
6 select NFS_ACL_SUPPORT if NFS_V3_ACL 6 select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 683bf718aead..fc2d108f5272 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -6,6 +6,7 @@ config NFSD
6 select SUNRPC 6 select SUNRPC
7 select EXPORTFS 7 select EXPORTFS
8 select NFS_ACL_SUPPORT if NFSD_V2_ACL 8 select NFS_ACL_SUPPORT if NFSD_V2_ACL
9 depends on MULTIUSER
9 help 10 help
10 Choose Y here if you want to allow other computers to access 11 Choose Y here if you want to allow other computers to access
11 files residing on this system using Sun's Network File System 12 files residing on this system using Sun's Network File System
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1295a00ca316..fd02a9ebfc30 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
99 buf = m->buf + m->count; 99 buf = m->buf + m->count;
100 100
101 /* Ignore error for now */ 101 /* Ignore error for now */
102 string_escape_str(tcomm, &buf, m->size - m->count, 102 buf += string_escape_str(tcomm, buf, m->size - m->count,
103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); 103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
104 104
105 m->count = buf - m->buf; 105 m->count = buf - m->buf;
106 seq_putc(m, '\n'); 106 seq_putc(m, '\n');
@@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
188 from_kgid_munged(user_ns, GROUP_AT(group_info, g))); 188 from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
189 put_cred(cred); 189 put_cred(cred);
190 190
191#ifdef CONFIG_PID_NS
192 seq_puts(m, "\nNStgid:");
193 for (g = ns->level; g <= pid->level; g++)
194 seq_printf(m, "\t%d",
195 task_tgid_nr_ns(p, pid->numbers[g].ns));
196 seq_puts(m, "\nNSpid:");
197 for (g = ns->level; g <= pid->level; g++)
198 seq_printf(m, "\t%d",
199 task_pid_nr_ns(p, pid->numbers[g].ns));
200 seq_puts(m, "\nNSpgid:");
201 for (g = ns->level; g <= pid->level; g++)
202 seq_printf(m, "\t%d",
203 task_pgrp_nr_ns(p, pid->numbers[g].ns));
204 seq_puts(m, "\nNSsid:");
205 for (g = ns->level; g <= pid->level; g++)
206 seq_printf(m, "\t%d",
207 task_session_nr_ns(p, pid->numbers[g].ns));
208#endif
191 seq_putc(m, '\n'); 209 seq_putc(m, '\n');
192} 210}
193 211
@@ -614,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v)
614 pid_t pid; 632 pid_t pid;
615 633
616 pid = pid_nr_ns(v, inode->i_sb->s_fs_info); 634 pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
617 return seq_printf(seq, "%d ", pid); 635 seq_printf(seq, "%d ", pid);
636
637 return 0;
618} 638}
619 639
620static void *children_seq_start(struct seq_file *seq, loff_t *pos) 640static void *children_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3f3d7aeb0712..7a3b82f986dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
238 238
239 wchan = get_wchan(task); 239 wchan = get_wchan(task);
240 240
241 if (lookup_symbol_name(wchan, symname) < 0) 241 if (lookup_symbol_name(wchan, symname) < 0) {
242 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 242 if (!ptrace_may_access(task, PTRACE_MODE_READ))
243 return 0; 243 return 0;
244 else 244 seq_printf(m, "%lu", wchan);
245 return seq_printf(m, "%lu", wchan); 245 } else {
246 else 246 seq_printf(m, "%s", symname);
247 return seq_printf(m, "%s", symname); 247 }
248
249 return 0;
248} 250}
249#endif /* CONFIG_KALLSYMS */ 251#endif /* CONFIG_KALLSYMS */
250 252
@@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
309static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 311static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task) 312 struct pid *pid, struct task_struct *task)
311{ 313{
312 return seq_printf(m, "%llu %llu %lu\n", 314 seq_printf(m, "%llu %llu %lu\n",
313 (unsigned long long)task->se.sum_exec_runtime, 315 (unsigned long long)task->se.sum_exec_runtime,
314 (unsigned long long)task->sched_info.run_delay, 316 (unsigned long long)task->sched_info.run_delay,
315 task->sched_info.pcount); 317 task->sched_info.pcount);
318
319 return 0;
316} 320}
317#endif 321#endif
318 322
@@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
387 points = oom_badness(task, NULL, NULL, totalpages) * 391 points = oom_badness(task, NULL, NULL, totalpages) *
388 1000 / totalpages; 392 1000 / totalpages;
389 read_unlock(&tasklist_lock); 393 read_unlock(&tasklist_lock);
390 return seq_printf(m, "%lu\n", points); 394 seq_printf(m, "%lu\n", points);
395
396 return 0;
391} 397}
392 398
393struct limit_names { 399struct limit_names {
@@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
432 * print the file header 438 * print the file header
433 */ 439 */
434 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 440 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
435 "Limit", "Soft Limit", "Hard Limit", "Units"); 441 "Limit", "Soft Limit", "Hard Limit", "Units");
436 442
437 for (i = 0; i < RLIM_NLIMITS; i++) { 443 for (i = 0; i < RLIM_NLIMITS; i++) {
438 if (rlim[i].rlim_cur == RLIM_INFINITY) 444 if (rlim[i].rlim_cur == RLIM_INFINITY)
439 seq_printf(m, "%-25s %-20s ", 445 seq_printf(m, "%-25s %-20s ",
440 lnames[i].name, "unlimited"); 446 lnames[i].name, "unlimited");
441 else 447 else
442 seq_printf(m, "%-25s %-20lu ", 448 seq_printf(m, "%-25s %-20lu ",
443 lnames[i].name, rlim[i].rlim_cur); 449 lnames[i].name, rlim[i].rlim_cur);
444 450
445 if (rlim[i].rlim_max == RLIM_INFINITY) 451 if (rlim[i].rlim_max == RLIM_INFINITY)
446 seq_printf(m, "%-20s ", "unlimited"); 452 seq_printf(m, "%-20s ", "unlimited");
@@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
462{ 468{
463 long nr; 469 long nr;
464 unsigned long args[6], sp, pc; 470 unsigned long args[6], sp, pc;
465 int res = lock_trace(task); 471 int res;
472
473 res = lock_trace(task);
466 if (res) 474 if (res)
467 return res; 475 return res;
468 476
@@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
477 args[0], args[1], args[2], args[3], args[4], args[5], 485 args[0], args[1], args[2], args[3], args[4], args[5],
478 sp, pc); 486 sp, pc);
479 unlock_trace(task); 487 unlock_trace(task);
480 return res; 488
489 return 0;
481} 490}
482#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 491#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
483 492
@@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v)
2002 notify = timer->it_sigev_notify; 2011 notify = timer->it_sigev_notify;
2003 2012
2004 seq_printf(m, "ID: %d\n", timer->it_id); 2013 seq_printf(m, "ID: %d\n", timer->it_id);
2005 seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, 2014 seq_printf(m, "signal: %d/%p\n",
2006 timer->sigq->info.si_value.sival_ptr); 2015 timer->sigq->info.si_signo,
2016 timer->sigq->info.si_value.sival_ptr);
2007 seq_printf(m, "notify: %s/%s.%d\n", 2017 seq_printf(m, "notify: %s/%s.%d\n",
2008 nstr[notify & ~SIGEV_THREAD_ID], 2018 nstr[notify & ~SIGEV_THREAD_ID],
2009 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2019 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2010 pid_nr_ns(timer->it_pid, tp->ns)); 2020 pid_nr_ns(timer->it_pid, tp->ns));
2011 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2021 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2012 2022
2013 return 0; 2023 return 0;
@@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
2352 2362
2353 unlock_task_sighand(task, &flags); 2363 unlock_task_sighand(task, &flags);
2354 } 2364 }
2355 result = seq_printf(m, 2365 seq_printf(m,
2356 "rchar: %llu\n" 2366 "rchar: %llu\n"
2357 "wchar: %llu\n" 2367 "wchar: %llu\n"
2358 "syscr: %llu\n" 2368 "syscr: %llu\n"
2359 "syscw: %llu\n" 2369 "syscw: %llu\n"
2360 "read_bytes: %llu\n" 2370 "read_bytes: %llu\n"
2361 "write_bytes: %llu\n" 2371 "write_bytes: %llu\n"
2362 "cancelled_write_bytes: %llu\n", 2372 "cancelled_write_bytes: %llu\n",
2363 (unsigned long long)acct.rchar, 2373 (unsigned long long)acct.rchar,
2364 (unsigned long long)acct.wchar, 2374 (unsigned long long)acct.wchar,
2365 (unsigned long long)acct.syscr, 2375 (unsigned long long)acct.syscr,
2366 (unsigned long long)acct.syscw, 2376 (unsigned long long)acct.syscw,
2367 (unsigned long long)acct.read_bytes, 2377 (unsigned long long)acct.read_bytes,
2368 (unsigned long long)acct.write_bytes, 2378 (unsigned long long)acct.write_bytes,
2369 (unsigned long long)acct.cancelled_write_bytes); 2379 (unsigned long long)acct.cancelled_write_bytes);
2380 result = 0;
2381
2370out_unlock: 2382out_unlock:
2371 mutex_unlock(&task->signal->cred_guard_mutex); 2383 mutex_unlock(&task->signal->cred_guard_mutex);
2372 return result; 2384 return result;
diff --git a/fs/splice.c b/fs/splice.c
index 41cbb16299e0..476024bb6546 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -523,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
523 loff_t isize, left; 523 loff_t isize, left;
524 int ret; 524 int ret;
525 525
526 if (IS_DAX(in->f_mapping->host))
527 return default_file_splice_read(in, ppos, pipe, len, flags);
528
526 isize = i_size_read(in->f_mapping->host); 529 isize = i_size_read(in->f_mapping->host);
527 if (unlikely(*ppos >= isize)) 530 if (unlikely(*ppos >= isize))
528 return 0; 531 return 0;
diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index 220f14338895..ee884168989f 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -4,44 +4,6 @@
4#include <uapi/linux/a.out.h> 4#include <uapi/linux/a.out.h>
5 5
6#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
7#if defined (M_OLDSUN2)
8#else
9#endif
10#if defined (M_68010)
11#else
12#endif
13#if defined (M_68020)
14#else
15#endif
16#if defined (M_SPARC)
17#else
18#endif
19#if !defined (N_MAGIC)
20#endif
21#if !defined (N_BADMAG)
22#endif
23#if !defined (N_TXTOFF)
24#endif
25#if !defined (N_DATOFF)
26#endif
27#if !defined (N_TRELOFF)
28#endif
29#if !defined (N_DRELOFF)
30#endif
31#if !defined (N_SYMOFF)
32#endif
33#if !defined (N_STROFF)
34#endif
35#if !defined (N_TXTADDR)
36#endif
37#if defined(vax) || defined(hp300) || defined(pyr)
38#endif
39#ifdef sony
40#endif /* Sony. */
41#ifdef is68k
42#endif
43#if defined(m68k) && defined(PORTAR)
44#endif
45#ifdef linux 7#ifdef linux
46#include <asm/page.h> 8#include <asm/page.h>
47#if defined(__i386__) || defined(__mc68000__) 9#if defined(__i386__) || defined(__mc68000__)
@@ -51,34 +13,5 @@
51#endif 13#endif
52#endif 14#endif
53#endif 15#endif
54#ifndef N_DATADDR
55#endif
56#if !defined (N_BSSADDR)
57#endif
58#if !defined (N_NLIST_DECLARED)
59#endif /* no N_NLIST_DECLARED. */
60#if !defined (N_UNDF)
61#endif
62#if !defined (N_ABS)
63#endif
64#if !defined (N_TEXT)
65#endif
66#if !defined (N_DATA)
67#endif
68#if !defined (N_BSS)
69#endif
70#if !defined (N_FN)
71#endif
72#if !defined (N_EXT)
73#endif
74#if !defined (N_TYPE)
75#endif
76#if !defined (N_STAB)
77#endif
78#if !defined (N_RELOCATION_INFO_DECLARED)
79#ifdef NS32K
80#else
81#endif
82#endif /* no N_RELOCATION_INFO_DECLARED. */
83#endif /*__ASSEMBLY__ */ 16#endif /*__ASSEMBLY__ */
84#endif /* __A_OUT_GNU_H__ */ 17#endif /* __A_OUT_GNU_H__ */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index dbfbf4990005..be4fa5ddf36c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -172,12 +172,8 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
172extern int bitmap_print_to_pagebuf(bool list, char *buf, 172extern int bitmap_print_to_pagebuf(bool list, char *buf,
173 const unsigned long *maskp, int nmaskbits); 173 const unsigned long *maskp, int nmaskbits);
174 174
175#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) 175#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
176#define BITMAP_LAST_WORD_MASK(nbits) \ 176#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
177( \
178 ((nbits) % BITS_PER_LONG) ? \
179 (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
180)
181 177
182#define small_const_nbits(nbits) \ 178#define small_const_nbits(nbits) \
183 (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) 179 (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
diff --git a/include/linux/capability.h b/include/linux/capability.h
index aa93e5ef594c..af9f0b9e80e6 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -205,6 +205,7 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
205 cap_intersect(permitted, __cap_nfsd_set)); 205 cap_intersect(permitted, __cap_nfsd_set));
206} 206}
207 207
208#ifdef CONFIG_MULTIUSER
208extern bool has_capability(struct task_struct *t, int cap); 209extern bool has_capability(struct task_struct *t, int cap);
209extern bool has_ns_capability(struct task_struct *t, 210extern bool has_ns_capability(struct task_struct *t,
210 struct user_namespace *ns, int cap); 211 struct user_namespace *ns, int cap);
@@ -213,6 +214,34 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
213 struct user_namespace *ns, int cap); 214 struct user_namespace *ns, int cap);
214extern bool capable(int cap); 215extern bool capable(int cap);
215extern bool ns_capable(struct user_namespace *ns, int cap); 216extern bool ns_capable(struct user_namespace *ns, int cap);
217#else
218static inline bool has_capability(struct task_struct *t, int cap)
219{
220 return true;
221}
222static inline bool has_ns_capability(struct task_struct *t,
223 struct user_namespace *ns, int cap)
224{
225 return true;
226}
227static inline bool has_capability_noaudit(struct task_struct *t, int cap)
228{
229 return true;
230}
231static inline bool has_ns_capability_noaudit(struct task_struct *t,
232 struct user_namespace *ns, int cap)
233{
234 return true;
235}
236static inline bool capable(int cap)
237{
238 return true;
239}
240static inline bool ns_capable(struct user_namespace *ns, int cap)
241{
242 return true;
243}
244#endif /* CONFIG_MULTIUSER */
216extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); 245extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
217extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); 246extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
218 247
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index a014559e4a49..aa8f61cf3a19 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -34,6 +34,7 @@ extern int sysctl_compaction_handler(struct ctl_table *table, int write,
34extern int sysctl_extfrag_threshold; 34extern int sysctl_extfrag_threshold;
35extern int sysctl_extfrag_handler(struct ctl_table *table, int write, 35extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
36 void __user *buffer, size_t *length, loff_t *ppos); 36 void __user *buffer, size_t *length, loff_t *ppos);
37extern int sysctl_compact_unevictable_allowed;
37 38
38extern int fragmentation_index(struct zone *zone, unsigned int order); 39extern int fragmentation_index(struct zone *zone, unsigned int order);
39extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 40extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 2fb2ca2127ed..8b6c083e68a7 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -62,9 +62,27 @@ do { \
62 groups_free(group_info); \ 62 groups_free(group_info); \
63} while (0) 63} while (0)
64 64
65extern struct group_info *groups_alloc(int);
66extern struct group_info init_groups; 65extern struct group_info init_groups;
66#ifdef CONFIG_MULTIUSER
67extern struct group_info *groups_alloc(int);
67extern void groups_free(struct group_info *); 68extern void groups_free(struct group_info *);
69
70extern int in_group_p(kgid_t);
71extern int in_egroup_p(kgid_t);
72#else
73static inline void groups_free(struct group_info *group_info)
74{
75}
76
77static inline int in_group_p(kgid_t grp)
78{
79 return 1;
80}
81static inline int in_egroup_p(kgid_t grp)
82{
83 return 1;
84}
85#endif
68extern int set_current_groups(struct group_info *); 86extern int set_current_groups(struct group_info *);
69extern void set_groups(struct cred *, struct group_info *); 87extern void set_groups(struct cred *, struct group_info *);
70extern int groups_search(const struct group_info *, kgid_t); 88extern int groups_search(const struct group_info *, kgid_t);
@@ -74,9 +92,6 @@ extern bool may_setgroups(void);
74#define GROUP_AT(gi, i) \ 92#define GROUP_AT(gi, i) \
75 ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) 93 ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK])
76 94
77extern int in_group_p(kgid_t);
78extern int in_egroup_p(kgid_t);
79
80/* 95/*
81 * The security context of a task 96 * The security context of a task
82 * 97 *
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90a1207231ea..f4fc60727b8d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2615,6 +2615,7 @@ int dax_clear_blocks(struct inode *, sector_t block, long size);
2615int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); 2615int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
2616int dax_truncate_page(struct inode *, loff_t from, get_block_t); 2616int dax_truncate_page(struct inode *, loff_t from, get_block_t);
2617int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); 2617int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
2618int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
2618#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) 2619#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
2619 2620
2620#ifdef CONFIG_BLOCK 2621#ifdef CONFIG_BLOCK
@@ -2679,7 +2680,6 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes);
2679loff_t inode_get_bytes(struct inode *inode); 2680loff_t inode_get_bytes(struct inode *inode);
2680void inode_set_bytes(struct inode *inode, loff_t bytes); 2681void inode_set_bytes(struct inode *inode, loff_t bytes);
2681 2682
2682extern int vfs_readdir(struct file *, filldir_t, void *);
2683extern int iterate_dir(struct file *, struct dir_context *); 2683extern int iterate_dir(struct file *, struct dir_context *);
2684 2684
2685extern int vfs_stat(const char __user *, struct kstat *); 2685extern int vfs_stat(const char __user *, struct kstat *);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7b5785032049..205026175c42 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -22,7 +22,13 @@ struct mmu_gather;
22struct hugepage_subpool { 22struct hugepage_subpool {
23 spinlock_t lock; 23 spinlock_t lock;
24 long count; 24 long count;
25 long max_hpages, used_hpages; 25 long max_hpages; /* Maximum huge pages or -1 if no maximum. */
26 long used_hpages; /* Used count against maximum, includes */
27 /* both alloced and reserved pages. */
28 struct hstate *hstate;
29 long min_hpages; /* Minimum huge pages or -1 if no minimum. */
30 long rsv_hpages; /* Pages reserved against global pool to */
31 /* sasitfy minimum size. */
26}; 32};
27 33
28struct resv_map { 34struct resv_map {
@@ -38,11 +44,10 @@ extern int hugetlb_max_hstate __read_mostly;
38#define for_each_hstate(h) \ 44#define for_each_hstate(h) \
39 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 45 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++)
40 46
41struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); 47struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
48 long min_hpages);
42void hugepage_put_subpool(struct hugepage_subpool *spool); 49void hugepage_put_subpool(struct hugepage_subpool *spool);
43 50
44int PageHuge(struct page *page);
45
46void reset_vma_resv_huge_pages(struct vm_area_struct *vma); 51void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
47int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 52int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
48int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 53int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -79,7 +84,6 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
79int dequeue_hwpoisoned_huge_page(struct page *page); 84int dequeue_hwpoisoned_huge_page(struct page *page);
80bool isolate_huge_page(struct page *page, struct list_head *list); 85bool isolate_huge_page(struct page *page, struct list_head *list);
81void putback_active_hugepage(struct page *page); 86void putback_active_hugepage(struct page *page);
82bool is_hugepage_active(struct page *page);
83void free_huge_page(struct page *page); 87void free_huge_page(struct page *page);
84 88
85#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 89#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@@ -109,11 +113,6 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
109 113
110#else /* !CONFIG_HUGETLB_PAGE */ 114#else /* !CONFIG_HUGETLB_PAGE */
111 115
112static inline int PageHuge(struct page *page)
113{
114 return 0;
115}
116
117static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) 116static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
118{ 117{
119} 118}
@@ -152,7 +151,6 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
152 return false; 151 return false;
153} 152}
154#define putback_active_hugepage(p) do {} while (0) 153#define putback_active_hugepage(p) do {} while (0)
155#define is_hugepage_active(x) false
156 154
157static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, 155static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
158 unsigned long address, unsigned long end, pgprot_t newprot) 156 unsigned long address, unsigned long end, pgprot_t newprot)
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 2c5250222278..388e3ae94f7a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -196,10 +196,8 @@ extern struct resource * __request_region(struct resource *,
196 196
197/* Compatibility cruft */ 197/* Compatibility cruft */
198#define release_region(start,n) __release_region(&ioport_resource, (start), (n)) 198#define release_region(start,n) __release_region(&ioport_resource, (start), (n))
199#define check_mem_region(start,n) __check_region(&iomem_resource, (start), (n))
200#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) 199#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n))
201 200
202extern int __check_region(struct resource *, resource_size_t, resource_size_t);
203extern void __release_region(struct resource *, resource_size_t, 201extern void __release_region(struct resource *, resource_size_t,
204 resource_size_t); 202 resource_size_t);
205#ifdef CONFIG_MEMORY_HOTREMOVE 203#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -207,12 +205,6 @@ extern int release_mem_region_adjustable(struct resource *, resource_size_t,
207 resource_size_t); 205 resource_size_t);
208#endif 206#endif
209 207
210static inline int __deprecated check_region(resource_size_t s,
211 resource_size_t n)
212{
213 return __check_region(&ioport_resource, s, n);
214}
215
216/* Wrappers for managed devices */ 208/* Wrappers for managed devices */
217struct device; 209struct device;
218 210
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5bb074431eb0..5486d777b706 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -44,6 +44,7 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
44 44
45void kasan_kmalloc_large(const void *ptr, size_t size); 45void kasan_kmalloc_large(const void *ptr, size_t size);
46void kasan_kfree_large(const void *ptr); 46void kasan_kfree_large(const void *ptr);
47void kasan_kfree(void *ptr);
47void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); 48void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
48void kasan_krealloc(const void *object, size_t new_size); 49void kasan_krealloc(const void *object, size_t new_size);
49 50
@@ -71,6 +72,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
71 72
72static inline void kasan_kmalloc_large(void *ptr, size_t size) {} 73static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
73static inline void kasan_kfree_large(const void *ptr) {} 74static inline void kasan_kfree_large(const void *ptr) {}
75static inline void kasan_kfree(void *ptr) {}
74static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, 76static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
75 size_t size) {} 77 size_t size) {}
76static inline void kasan_krealloc(const void *object, size_t new_size) {} 78static inline void kasan_krealloc(const void *object, size_t new_size) {}
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 3be6bb18562d..7ae216a39c9e 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -35,18 +35,6 @@ static inline void ksm_exit(struct mm_struct *mm)
35 __ksm_exit(mm); 35 __ksm_exit(mm);
36} 36}
37 37
38/*
39 * A KSM page is one of those write-protected "shared pages" or "merged pages"
40 * which KSM maps into multiple mms, wherever identical anonymous page content
41 * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
42 * anon_vma, but to that page's node of the stable tree.
43 */
44static inline int PageKsm(struct page *page)
45{
46 return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
47 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
48}
49
50static inline struct stable_node *page_stable_node(struct page *page) 38static inline struct stable_node *page_stable_node(struct page *page)
51{ 39{
52 return PageKsm(page) ? page_rmapping(page) : NULL; 40 return PageKsm(page) ? page_rmapping(page) : NULL;
@@ -87,11 +75,6 @@ static inline void ksm_exit(struct mm_struct *mm)
87{ 75{
88} 76}
89 77
90static inline int PageKsm(struct page *page)
91{
92 return 0;
93}
94
95#ifdef CONFIG_MMU 78#ifdef CONFIG_MMU
96static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, 79static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
97 unsigned long end, int advice, unsigned long *vm_flags) 80 unsigned long end, int advice, unsigned long *vm_flags)
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index b19b3023c880..69b6951e8fd2 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -36,7 +36,8 @@ extern void mempool_free(void *element, mempool_t *pool);
36 36
37/* 37/*
38 * A mempool_alloc_t and mempool_free_t that get the memory from 38 * A mempool_alloc_t and mempool_free_t that get the memory from
39 * a slab that is passed in through pool_data. 39 * a slab cache that is passed in through pool_data.
40 * Note: the slab cache may not have a ctor function.
40 */ 41 */
41void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); 42void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
42void mempool_free_slab(void *element, void *pool_data); 43void mempool_free_slab(void *element, void *pool_data);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6571dd78e984..8b086070c3a5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -251,6 +251,9 @@ struct vm_operations_struct {
251 * writable, if an error is returned it will cause a SIGBUS */ 251 * writable, if an error is returned it will cause a SIGBUS */
252 int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); 252 int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
253 253
254 /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
255 int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
256
254 /* called by access_process_vm when get_user_pages() fails, typically 257 /* called by access_process_vm when get_user_pages() fails, typically
255 * for use by special VMAs that can switch between memory and hardware 258 * for use by special VMAs that can switch between memory and hardware
256 */ 259 */
@@ -494,18 +497,9 @@ static inline int page_count(struct page *page)
494 return atomic_read(&compound_head(page)->_count); 497 return atomic_read(&compound_head(page)->_count);
495} 498}
496 499
497#ifdef CONFIG_HUGETLB_PAGE
498extern int PageHeadHuge(struct page *page_head);
499#else /* CONFIG_HUGETLB_PAGE */
500static inline int PageHeadHuge(struct page *page_head)
501{
502 return 0;
503}
504#endif /* CONFIG_HUGETLB_PAGE */
505
506static inline bool __compound_tail_refcounted(struct page *page) 500static inline bool __compound_tail_refcounted(struct page *page)
507{ 501{
508 return !PageSlab(page) && !PageHeadHuge(page); 502 return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
509} 503}
510 504
511/* 505/*
@@ -571,53 +565,6 @@ static inline void init_page_count(struct page *page)
571 atomic_set(&page->_count, 1); 565 atomic_set(&page->_count, 1);
572} 566}
573 567
574/*
575 * PageBuddy() indicate that the page is free and in the buddy system
576 * (see mm/page_alloc.c).
577 *
578 * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
579 * -2 so that an underflow of the page_mapcount() won't be mistaken
580 * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
581 * efficiently by most CPU architectures.
582 */
583#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
584
585static inline int PageBuddy(struct page *page)
586{
587 return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
588}
589
590static inline void __SetPageBuddy(struct page *page)
591{
592 VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
593 atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
594}
595
596static inline void __ClearPageBuddy(struct page *page)
597{
598 VM_BUG_ON_PAGE(!PageBuddy(page), page);
599 atomic_set(&page->_mapcount, -1);
600}
601
602#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
603
604static inline int PageBalloon(struct page *page)
605{
606 return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
607}
608
609static inline void __SetPageBalloon(struct page *page)
610{
611 VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
612 atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
613}
614
615static inline void __ClearPageBalloon(struct page *page)
616{
617 VM_BUG_ON_PAGE(!PageBalloon(page), page);
618 atomic_set(&page->_mapcount, -1);
619}
620
621void put_page(struct page *page); 568void put_page(struct page *page);
622void put_pages_list(struct list_head *pages); 569void put_pages_list(struct list_head *pages);
623 570
@@ -1006,34 +953,10 @@ void page_address_init(void);
1006#define page_address_init() do { } while(0) 953#define page_address_init() do { } while(0)
1007#endif 954#endif
1008 955
1009/* 956extern void *page_rmapping(struct page *page);
1010 * On an anonymous page mapped into a user virtual memory area, 957extern struct anon_vma *page_anon_vma(struct page *page);
1011 * page->mapping points to its anon_vma, not to a struct address_space;
1012 * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.
1013 *
1014 * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
1015 * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;
1016 * and then page->mapping points, not to an anon_vma, but to a private
1017 * structure which KSM associates with that merged page. See ksm.h.
1018 *
1019 * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.
1020 *
1021 * Please note that, confusingly, "page_mapping" refers to the inode
1022 * address_space which maps the page from disk; whereas "page_mapped"
1023 * refers to user virtual address space into which the page is mapped.
1024 */
1025#define PAGE_MAPPING_ANON 1
1026#define PAGE_MAPPING_KSM 2
1027#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
1028
1029extern struct address_space *page_mapping(struct page *page); 958extern struct address_space *page_mapping(struct page *page);
1030 959
1031/* Neutral page->mapping pointer to address_space or anon_vma or other */
1032static inline void *page_rmapping(struct page *page)
1033{
1034 return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
1035}
1036
1037extern struct address_space *__page_file_mapping(struct page *); 960extern struct address_space *__page_file_mapping(struct page *);
1038 961
1039static inline 962static inline
@@ -1045,11 +968,6 @@ struct address_space *page_file_mapping(struct page *page)
1045 return page->mapping; 968 return page->mapping;
1046} 969}
1047 970
1048static inline int PageAnon(struct page *page)
1049{
1050 return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
1051}
1052
1053/* 971/*
1054 * Return the pagecache index of the passed page. Regular pagecache pages 972 * Return the pagecache index of the passed page. Regular pagecache pages
1055 * use ->index whereas swapcache pages use ->private 973 * use ->index whereas swapcache pages use ->private
@@ -1975,10 +1893,10 @@ extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
1975static inline unsigned long 1893static inline unsigned long
1976vm_unmapped_area(struct vm_unmapped_area_info *info) 1894vm_unmapped_area(struct vm_unmapped_area_info *info)
1977{ 1895{
1978 if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN)) 1896 if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
1979 return unmapped_area(info);
1980 else
1981 return unmapped_area_topdown(info); 1897 return unmapped_area_topdown(info);
1898 else
1899 return unmapped_area(info);
1982} 1900}
1983 1901
1984/* truncate.c */ 1902/* truncate.c */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2782df47101e..54d74f6eb233 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -842,16 +842,16 @@ static inline int populated_zone(struct zone *zone)
842 842
843extern int movable_zone; 843extern int movable_zone;
844 844
845#ifdef CONFIG_HIGHMEM
845static inline int zone_movable_is_highmem(void) 846static inline int zone_movable_is_highmem(void)
846{ 847{
847#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 848#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
848 return movable_zone == ZONE_HIGHMEM; 849 return movable_zone == ZONE_HIGHMEM;
849#elif defined(CONFIG_HIGHMEM)
850 return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
851#else 850#else
852 return 0; 851 return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
853#endif 852#endif
854} 853}
854#endif
855 855
856static inline int is_highmem_idx(enum zone_type idx) 856static inline int is_highmem_idx(enum zone_type idx)
857{ 857{
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c851ff92d5b3..f34e040b34e9 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -289,6 +289,47 @@ PAGEFLAG_FALSE(HWPoison)
289#define __PG_HWPOISON 0 289#define __PG_HWPOISON 0
290#endif 290#endif
291 291
292/*
293 * On an anonymous page mapped into a user virtual memory area,
294 * page->mapping points to its anon_vma, not to a struct address_space;
295 * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.
296 *
297 * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
298 * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;
299 * and then page->mapping points, not to an anon_vma, but to a private
300 * structure which KSM associates with that merged page. See ksm.h.
301 *
302 * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.
303 *
304 * Please note that, confusingly, "page_mapping" refers to the inode
305 * address_space which maps the page from disk; whereas "page_mapped"
306 * refers to user virtual address space into which the page is mapped.
307 */
308#define PAGE_MAPPING_ANON 1
309#define PAGE_MAPPING_KSM 2
310#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
311
312static inline int PageAnon(struct page *page)
313{
314 return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
315}
316
317#ifdef CONFIG_KSM
318/*
319 * A KSM page is one of those write-protected "shared pages" or "merged pages"
320 * which KSM maps into multiple mms, wherever identical anonymous page content
321 * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
322 * anon_vma, but to that page's node of the stable tree.
323 */
324static inline int PageKsm(struct page *page)
325{
326 return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
327 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
328}
329#else
330TESTPAGEFLAG_FALSE(Ksm)
331#endif
332
292u64 stable_page_flags(struct page *page); 333u64 stable_page_flags(struct page *page);
293 334
294static inline int PageUptodate(struct page *page) 335static inline int PageUptodate(struct page *page)
@@ -426,6 +467,21 @@ static inline void ClearPageCompound(struct page *page)
426 467
427#endif /* !PAGEFLAGS_EXTENDED */ 468#endif /* !PAGEFLAGS_EXTENDED */
428 469
470#ifdef CONFIG_HUGETLB_PAGE
471int PageHuge(struct page *page);
472int PageHeadHuge(struct page *page);
473bool page_huge_active(struct page *page);
474#else
475TESTPAGEFLAG_FALSE(Huge)
476TESTPAGEFLAG_FALSE(HeadHuge)
477
478static inline bool page_huge_active(struct page *page)
479{
480 return 0;
481}
482#endif
483
484
429#ifdef CONFIG_TRANSPARENT_HUGEPAGE 485#ifdef CONFIG_TRANSPARENT_HUGEPAGE
430/* 486/*
431 * PageHuge() only returns true for hugetlbfs pages, but not for 487 * PageHuge() only returns true for hugetlbfs pages, but not for
@@ -480,6 +536,53 @@ static inline int PageTransTail(struct page *page)
480#endif 536#endif
481 537
482/* 538/*
539 * PageBuddy() indicate that the page is free and in the buddy system
540 * (see mm/page_alloc.c).
541 *
542 * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
543 * -2 so that an underflow of the page_mapcount() won't be mistaken
544 * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
545 * efficiently by most CPU architectures.
546 */
547#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
548
549static inline int PageBuddy(struct page *page)
550{
551 return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
552}
553
554static inline void __SetPageBuddy(struct page *page)
555{
556 VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
557 atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
558}
559
560static inline void __ClearPageBuddy(struct page *page)
561{
562 VM_BUG_ON_PAGE(!PageBuddy(page), page);
563 atomic_set(&page->_mapcount, -1);
564}
565
566#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
567
568static inline int PageBalloon(struct page *page)
569{
570 return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
571}
572
573static inline void __SetPageBalloon(struct page *page)
574{
575 VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
576 atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
577}
578
579static inline void __ClearPageBalloon(struct page *page)
580{
581 VM_BUG_ON_PAGE(!PageBalloon(page), page);
582 atomic_set(&page->_mapcount, -1);
583}
584
585/*
483 * If network-based swap is enabled, sl*b must keep track of whether pages 586 * If network-based swap is enabled, sl*b must keep track of whether pages
484 * were allocated from pfmemalloc reserves. 587 * were allocated from pfmemalloc reserves.
485 */ 588 */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index baa3f97d8ce8..9b30871c9149 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -255,6 +255,11 @@ extern asmlinkage void dump_stack(void) __cold;
255 printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) 255 printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
256#define pr_info(fmt, ...) \ 256#define pr_info(fmt, ...) \
257 printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) 257 printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
258/*
259 * Like KERN_CONT, pr_cont() should only be used when continuing
260 * a line with no newline ('\n') enclosed. Otherwise it defaults
261 * back to KERN_DEFAULT.
262 */
258#define pr_cont(fmt, ...) \ 263#define pr_cont(fmt, ...) \
259 printk(KERN_CONT fmt, ##__VA_ARGS__) 264 printk(KERN_CONT fmt, ##__VA_ARGS__)
260 265
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 67fc8fcdc4b0..a7ff409f386d 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -70,7 +70,8 @@ void ctrl_alt_del(void);
70#define POWEROFF_CMD_PATH_LEN 256 70#define POWEROFF_CMD_PATH_LEN 256
71extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; 71extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
72 72
73extern int orderly_poweroff(bool force); 73extern void orderly_poweroff(bool force);
74extern void orderly_reboot(void);
74 75
75/* 76/*
76 * Emergency restart, callable from an interrupt handler. 77 * Emergency restart, callable from an interrupt handler.
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c4c559a45dc8..c89c53a113a8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -105,14 +105,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
105 __put_anon_vma(anon_vma); 105 __put_anon_vma(anon_vma);
106} 106}
107 107
108static inline struct anon_vma *page_anon_vma(struct page *page)
109{
110 if (((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) !=
111 PAGE_MAPPING_ANON)
112 return NULL;
113 return page_rmapping(page);
114}
115
116static inline void vma_lock_anon_vma(struct vm_area_struct *vma) 108static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
117{ 109{
118 struct anon_vma *anon_vma = vma->anon_vma; 110 struct anon_vma *anon_vma = vma->anon_vma;
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 657571817260..0991913f4953 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -47,22 +47,22 @@ static inline int string_unescape_any_inplace(char *buf)
47#define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) 47#define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP)
48#define ESCAPE_HEX 0x20 48#define ESCAPE_HEX 0x20
49 49
50int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, 50int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
51 unsigned int flags, const char *esc); 51 unsigned int flags, const char *esc);
52 52
53static inline int string_escape_mem_any_np(const char *src, size_t isz, 53static inline int string_escape_mem_any_np(const char *src, size_t isz,
54 char **dst, size_t osz, const char *esc) 54 char *dst, size_t osz, const char *esc)
55{ 55{
56 return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); 56 return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc);
57} 57}
58 58
59static inline int string_escape_str(const char *src, char **dst, size_t sz, 59static inline int string_escape_str(const char *src, char *dst, size_t sz,
60 unsigned int flags, const char *esc) 60 unsigned int flags, const char *esc)
61{ 61{
62 return string_escape_mem(src, strlen(src), dst, sz, flags, esc); 62 return string_escape_mem(src, strlen(src), dst, sz, flags, esc);
63} 63}
64 64
65static inline int string_escape_str_any_np(const char *src, char **dst, 65static inline int string_escape_str_any_np(const char *src, char *dst,
66 size_t sz, const char *esc) 66 size_t sz, const char *esc)
67{ 67{
68 return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); 68 return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7067eca501e2..cee108cbe2d5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -307,7 +307,7 @@ extern void lru_add_drain(void);
307extern void lru_add_drain_cpu(int cpu); 307extern void lru_add_drain_cpu(int cpu);
308extern void lru_add_drain_all(void); 308extern void lru_add_drain_all(void);
309extern void rotate_reclaimable_page(struct page *page); 309extern void rotate_reclaimable_page(struct page *page);
310extern void deactivate_page(struct page *page); 310extern void deactivate_file_page(struct page *page);
311extern void swap_setup(void); 311extern void swap_setup(void);
312 312
313extern void add_page_to_unevictable_list(struct page *page); 313extern void add_page_to_unevictable_list(struct page *page);
diff --git a/include/linux/types.h b/include/linux/types.h
index 6747247e3f9f..59698be03490 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -146,12 +146,6 @@ typedef u64 dma_addr_t;
146typedef u32 dma_addr_t; 146typedef u32 dma_addr_t;
147#endif /* dma_addr_t */ 147#endif /* dma_addr_t */
148 148
149#ifdef __CHECKER__
150#else
151#endif
152#ifdef __CHECK_ENDIAN__
153#else
154#endif
155typedef unsigned __bitwise__ gfp_t; 149typedef unsigned __bitwise__ gfp_t;
156typedef unsigned __bitwise__ fmode_t; 150typedef unsigned __bitwise__ fmode_t;
157typedef unsigned __bitwise__ oom_flags_t; 151typedef unsigned __bitwise__ oom_flags_t;
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index 2d1f9b627f91..0ee05da38899 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -29,6 +29,7 @@ typedef struct {
29#define KUIDT_INIT(value) (kuid_t){ value } 29#define KUIDT_INIT(value) (kuid_t){ value }
30#define KGIDT_INIT(value) (kgid_t){ value } 30#define KGIDT_INIT(value) (kgid_t){ value }
31 31
32#ifdef CONFIG_MULTIUSER
32static inline uid_t __kuid_val(kuid_t uid) 33static inline uid_t __kuid_val(kuid_t uid)
33{ 34{
34 return uid.val; 35 return uid.val;
@@ -38,6 +39,17 @@ static inline gid_t __kgid_val(kgid_t gid)
38{ 39{
39 return gid.val; 40 return gid.val;
40} 41}
42#else
43static inline uid_t __kuid_val(kuid_t uid)
44{
45 return 0;
46}
47
48static inline gid_t __kgid_val(kgid_t gid)
49{
50 return 0;
51}
52#endif
41 53
42#define GLOBAL_ROOT_UID KUIDT_INIT(0) 54#define GLOBAL_ROOT_UID KUIDT_INIT(0)
43#define GLOBAL_ROOT_GID KGIDT_INIT(0) 55#define GLOBAL_ROOT_GID KGIDT_INIT(0)
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 3283c6a55425..1338190b5478 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -47,5 +47,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
47void zs_unmap_object(struct zs_pool *pool, unsigned long handle); 47void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
48 48
49unsigned long zs_get_total_pages(struct zs_pool *pool); 49unsigned long zs_get_total_pages(struct zs_pool *pool);
50unsigned long zs_compact(struct zs_pool *pool);
50 51
51#endif 52#endif
diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
new file mode 100644
index 000000000000..d7cd961720a7
--- /dev/null
+++ b/include/trace/events/cma.h
@@ -0,0 +1,66 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM cma
3
4#if !defined(_TRACE_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_CMA_H
6
7#include <linux/types.h>
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(cma_alloc,
11
12 TP_PROTO(unsigned long pfn, const struct page *page,
13 unsigned int count, unsigned int align),
14
15 TP_ARGS(pfn, page, count, align),
16
17 TP_STRUCT__entry(
18 __field(unsigned long, pfn)
19 __field(const struct page *, page)
20 __field(unsigned int, count)
21 __field(unsigned int, align)
22 ),
23
24 TP_fast_assign(
25 __entry->pfn = pfn;
26 __entry->page = page;
27 __entry->count = count;
28 __entry->align = align;
29 ),
30
31 TP_printk("pfn=%lx page=%p count=%u align=%u",
32 __entry->pfn,
33 __entry->page,
34 __entry->count,
35 __entry->align)
36);
37
38TRACE_EVENT(cma_release,
39
40 TP_PROTO(unsigned long pfn, const struct page *page,
41 unsigned int count),
42
43 TP_ARGS(pfn, page, count),
44
45 TP_STRUCT__entry(
46 __field(unsigned long, pfn)
47 __field(const struct page *, page)
48 __field(unsigned int, count)
49 ),
50
51 TP_fast_assign(
52 __entry->pfn = pfn;
53 __entry->page = page;
54 __entry->count = count;
55 ),
56
57 TP_printk("pfn=%lx page=%p count=%u",
58 __entry->pfn,
59 __entry->page,
60 __entry->count)
61);
62
63#endif /* _TRACE_CMA_H */
64
65/* This part must be outside protection */
66#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index a905b7301e10..3b9df1aa35db 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -394,6 +394,7 @@ endchoice
394 394
395config BSD_PROCESS_ACCT 395config BSD_PROCESS_ACCT
396 bool "BSD Process Accounting" 396 bool "BSD Process Accounting"
397 depends on MULTIUSER
397 help 398 help
398 If you say Y here, a user level program will be able to instruct the 399 If you say Y here, a user level program will be able to instruct the
399 kernel (via a special system call) to write process accounting 400 kernel (via a special system call) to write process accounting
@@ -420,6 +421,7 @@ config BSD_PROCESS_ACCT_V3
420config TASKSTATS 421config TASKSTATS
421 bool "Export task/process statistics through netlink" 422 bool "Export task/process statistics through netlink"
422 depends on NET 423 depends on NET
424 depends on MULTIUSER
423 default n 425 default n
424 help 426 help
425 Export selected statistics for tasks/processes through the 427 Export selected statistics for tasks/processes through the
@@ -1160,6 +1162,7 @@ config CHECKPOINT_RESTORE
1160 1162
1161menuconfig NAMESPACES 1163menuconfig NAMESPACES
1162 bool "Namespaces support" if EXPERT 1164 bool "Namespaces support" if EXPERT
1165 depends on MULTIUSER
1163 default !EXPERT 1166 default !EXPERT
1164 help 1167 help
1165 Provides the way to make tasks work with different objects using 1168 Provides the way to make tasks work with different objects using
@@ -1356,11 +1359,25 @@ menuconfig EXPERT
1356 1359
1357config UID16 1360config UID16
1358 bool "Enable 16-bit UID system calls" if EXPERT 1361 bool "Enable 16-bit UID system calls" if EXPERT
1359 depends on HAVE_UID16 1362 depends on HAVE_UID16 && MULTIUSER
1360 default y 1363 default y
1361 help 1364 help
1362 This enables the legacy 16-bit UID syscall wrappers. 1365 This enables the legacy 16-bit UID syscall wrappers.
1363 1366
1367config MULTIUSER
1368 bool "Multiple users, groups and capabilities support" if EXPERT
1369 default y
1370 help
1371 This option enables support for non-root users, groups and
1372 capabilities.
1373
1374 If you say N here, all processes will run with UID 0, GID 0, and all
1375 possible capabilities. Saying N here also compiles out support for
1376 system calls related to UIDs, GIDs, and capabilities, such as setuid,
1377 setgid, and capset.
1378
1379 If unsure, say Y here.
1380
1364config SGETMASK_SYSCALL 1381config SGETMASK_SYSCALL
1365 bool "sgetmask/ssetmask syscalls support" if EXPERT 1382 bool "sgetmask/ssetmask syscalls support" if EXPERT
1366 def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH 1383 def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH
diff --git a/ipc/msg.c b/ipc/msg.c
index a7261d5cbc89..2b6fdbb9e0e9 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1015,22 +1015,24 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1015 struct user_namespace *user_ns = seq_user_ns(s); 1015 struct user_namespace *user_ns = seq_user_ns(s);
1016 struct msg_queue *msq = it; 1016 struct msg_queue *msq = it;
1017 1017
1018 return seq_printf(s, 1018 seq_printf(s,
1019 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1019 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
1020 msq->q_perm.key, 1020 msq->q_perm.key,
1021 msq->q_perm.id, 1021 msq->q_perm.id,
1022 msq->q_perm.mode, 1022 msq->q_perm.mode,
1023 msq->q_cbytes, 1023 msq->q_cbytes,
1024 msq->q_qnum, 1024 msq->q_qnum,
1025 msq->q_lspid, 1025 msq->q_lspid,
1026 msq->q_lrpid, 1026 msq->q_lrpid,
1027 from_kuid_munged(user_ns, msq->q_perm.uid), 1027 from_kuid_munged(user_ns, msq->q_perm.uid),
1028 from_kgid_munged(user_ns, msq->q_perm.gid), 1028 from_kgid_munged(user_ns, msq->q_perm.gid),
1029 from_kuid_munged(user_ns, msq->q_perm.cuid), 1029 from_kuid_munged(user_ns, msq->q_perm.cuid),
1030 from_kgid_munged(user_ns, msq->q_perm.cgid), 1030 from_kgid_munged(user_ns, msq->q_perm.cgid),
1031 msq->q_stime, 1031 msq->q_stime,
1032 msq->q_rtime, 1032 msq->q_rtime,
1033 msq->q_ctime); 1033 msq->q_ctime);
1034
1035 return 0;
1034} 1036}
1035#endif 1037#endif
1036 1038
diff --git a/ipc/sem.c b/ipc/sem.c
index 92842113c6a9..d1a6edd17eba 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2170,17 +2170,19 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2170 2170
2171 sem_otime = get_semotime(sma); 2171 sem_otime = get_semotime(sma);
2172 2172
2173 return seq_printf(s, 2173 seq_printf(s,
2174 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 2174 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
2175 sma->sem_perm.key, 2175 sma->sem_perm.key,
2176 sma->sem_perm.id, 2176 sma->sem_perm.id,
2177 sma->sem_perm.mode, 2177 sma->sem_perm.mode,
2178 sma->sem_nsems, 2178 sma->sem_nsems,
2179 from_kuid_munged(user_ns, sma->sem_perm.uid), 2179 from_kuid_munged(user_ns, sma->sem_perm.uid),
2180 from_kgid_munged(user_ns, sma->sem_perm.gid), 2180 from_kgid_munged(user_ns, sma->sem_perm.gid),
2181 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2181 from_kuid_munged(user_ns, sma->sem_perm.cuid),
2182 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2182 from_kgid_munged(user_ns, sma->sem_perm.cgid),
2183 sem_otime, 2183 sem_otime,
2184 sma->sem_ctime); 2184 sma->sem_ctime);
2185
2186 return 0;
2185} 2187}
2186#endif 2188#endif
diff --git a/ipc/shm.c b/ipc/shm.c
index 19633b4a2350..d280a74af2ef 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1342,25 +1342,27 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1342#define SIZE_SPEC "%21lu" 1342#define SIZE_SPEC "%21lu"
1343#endif 1343#endif
1344 1344
1345 return seq_printf(s, 1345 seq_printf(s,
1346 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1346 "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1347 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " 1347 "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1348 SIZE_SPEC " " SIZE_SPEC "\n", 1348 SIZE_SPEC " " SIZE_SPEC "\n",
1349 shp->shm_perm.key, 1349 shp->shm_perm.key,
1350 shp->shm_perm.id, 1350 shp->shm_perm.id,
1351 shp->shm_perm.mode, 1351 shp->shm_perm.mode,
1352 shp->shm_segsz, 1352 shp->shm_segsz,
1353 shp->shm_cprid, 1353 shp->shm_cprid,
1354 shp->shm_lprid, 1354 shp->shm_lprid,
1355 shp->shm_nattch, 1355 shp->shm_nattch,
1356 from_kuid_munged(user_ns, shp->shm_perm.uid), 1356 from_kuid_munged(user_ns, shp->shm_perm.uid),
1357 from_kgid_munged(user_ns, shp->shm_perm.gid), 1357 from_kgid_munged(user_ns, shp->shm_perm.gid),
1358 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1358 from_kuid_munged(user_ns, shp->shm_perm.cuid),
1359 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1359 from_kgid_munged(user_ns, shp->shm_perm.cgid),
1360 shp->shm_atim, 1360 shp->shm_atim,
1361 shp->shm_dtim, 1361 shp->shm_dtim,
1362 shp->shm_ctim, 1362 shp->shm_ctim,
1363 rss * PAGE_SIZE, 1363 rss * PAGE_SIZE,
1364 swp * PAGE_SIZE); 1364 swp * PAGE_SIZE);
1365
1366 return 0;
1365} 1367}
1366#endif 1368#endif
diff --git a/ipc/util.c b/ipc/util.c
index 106bed0378ab..ff3323ef8d8b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -837,8 +837,10 @@ static int sysvipc_proc_show(struct seq_file *s, void *it)
837 struct ipc_proc_iter *iter = s->private; 837 struct ipc_proc_iter *iter = s->private;
838 struct ipc_proc_iface *iface = iter->iface; 838 struct ipc_proc_iface *iface = iter->iface;
839 839
840 if (it == SEQ_START_TOKEN) 840 if (it == SEQ_START_TOKEN) {
841 return seq_puts(s, iface->header); 841 seq_puts(s, iface->header);
842 return 0;
843 }
842 844
843 return iface->show(s, it); 845 return iface->show(s, it);
844} 846}
diff --git a/kernel/Makefile b/kernel/Makefile
index 1408b3353a3c..0f8f8b0bc1bf 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,9 @@ obj-y = fork.o exec_domain.o panic.o \
9 extable.o params.o \ 9 extable.o params.o \
10 kthread.o sys_ni.o nsproxy.o \ 10 kthread.o sys_ni.o nsproxy.o \
11 notifier.o ksysfs.o cred.o reboot.o \ 11 notifier.o ksysfs.o cred.o reboot.o \
12 async.o range.o groups.o smpboot.o 12 async.o range.o smpboot.o
13
14obj-$(CONFIG_MULTIUSER) += groups.o
13 15
14ifdef CONFIG_FUNCTION_TRACER 16ifdef CONFIG_FUNCTION_TRACER
15# Do not trace debug files and internal ftrace files 17# Do not trace debug files and internal ftrace files
diff --git a/kernel/capability.c b/kernel/capability.c
index 989f5bfc57dc..45432b54d5c6 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -35,6 +35,7 @@ static int __init file_caps_disable(char *str)
35} 35}
36__setup("no_file_caps", file_caps_disable); 36__setup("no_file_caps", file_caps_disable);
37 37
38#ifdef CONFIG_MULTIUSER
38/* 39/*
39 * More recent versions of libcap are available from: 40 * More recent versions of libcap are available from:
40 * 41 *
@@ -386,6 +387,24 @@ bool ns_capable(struct user_namespace *ns, int cap)
386} 387}
387EXPORT_SYMBOL(ns_capable); 388EXPORT_SYMBOL(ns_capable);
388 389
390
391/**
392 * capable - Determine if the current task has a superior capability in effect
393 * @cap: The capability to be tested for
394 *
395 * Return true if the current task has the given superior capability currently
396 * available for use, false if not.
397 *
398 * This sets PF_SUPERPRIV on the task if the capability is available on the
399 * assumption that it's about to be used.
400 */
401bool capable(int cap)
402{
403 return ns_capable(&init_user_ns, cap);
404}
405EXPORT_SYMBOL(capable);
406#endif /* CONFIG_MULTIUSER */
407
389/** 408/**
390 * file_ns_capable - Determine if the file's opener had a capability in effect 409 * file_ns_capable - Determine if the file's opener had a capability in effect
391 * @file: The file we want to check 410 * @file: The file we want to check
@@ -412,22 +431,6 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
412EXPORT_SYMBOL(file_ns_capable); 431EXPORT_SYMBOL(file_ns_capable);
413 432
414/** 433/**
415 * capable - Determine if the current task has a superior capability in effect
416 * @cap: The capability to be tested for
417 *
418 * Return true if the current task has the given superior capability currently
419 * available for use, false if not.
420 *
421 * This sets PF_SUPERPRIV on the task if the capability is available on the
422 * assumption that it's about to be used.
423 */
424bool capable(int cap)
425{
426 return ns_capable(&init_user_ns, cap);
427}
428EXPORT_SYMBOL(capable);
429
430/**
431 * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped 434 * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
432 * @inode: The inode in question 435 * @inode: The inode in question
433 * @cap: The capability in question 436 * @cap: The capability in question
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a220fdb66568..469dd547770c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4196,7 +4196,9 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
4196 4196
4197static int cgroup_pidlist_show(struct seq_file *s, void *v) 4197static int cgroup_pidlist_show(struct seq_file *s, void *v)
4198{ 4198{
4199 return seq_printf(s, "%d\n", *(int *)v); 4199 seq_printf(s, "%d\n", *(int *)v);
4200
4201 return 0;
4200} 4202}
4201 4203
4202static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, 4204static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
@@ -5451,7 +5453,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
5451struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) 5453struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
5452{ 5454{
5453 WARN_ON_ONCE(!rcu_read_lock_held()); 5455 WARN_ON_ONCE(!rcu_read_lock_held());
5454 return idr_find(&ss->css_idr, id); 5456 return id > 0 ? idr_find(&ss->css_idr, id) : NULL;
5455} 5457}
5456 5458
5457#ifdef CONFIG_CGROUP_DEBUG 5459#ifdef CONFIG_CGROUP_DEBUG
diff --git a/kernel/cred.c b/kernel/cred.c
index e0573a43c7df..ec1c07667ec1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -29,6 +29,9 @@
29 29
30static struct kmem_cache *cred_jar; 30static struct kmem_cache *cred_jar;
31 31
32/* init to 2 - one for init_task, one to ensure it is never freed */
33struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
34
32/* 35/*
33 * The initial credentials for the initial task 36 * The initial credentials for the initial task
34 */ 37 */
diff --git a/kernel/groups.c b/kernel/groups.c
index 664411f171b5..74d431d25251 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -9,9 +9,6 @@
9#include <linux/user_namespace.h> 9#include <linux/user_namespace.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11 11
12/* init to 2 - one for init_task, one to ensure it is never freed */
13struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
14
15struct group_info *groups_alloc(int gidsetsize) 12struct group_info *groups_alloc(int gidsetsize)
16{ 13{
17 struct group_info *group_info; 14 struct group_info *group_info;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 06db12434d72..e0f90c2b57aa 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -169,7 +169,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
169 return; 169 return;
170 170
171 rcu_read_lock(); 171 rcu_read_lock();
172 do_each_thread(g, t) { 172 for_each_process_thread(g, t) {
173 if (!max_count--) 173 if (!max_count--)
174 goto unlock; 174 goto unlock;
175 if (!--batch_count) { 175 if (!--batch_count) {
@@ -180,7 +180,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
180 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ 180 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
181 if (t->state == TASK_UNINTERRUPTIBLE) 181 if (t->state == TASK_UNINTERRUPTIBLE)
182 check_hung_task(t, timeout); 182 check_hung_task(t, timeout);
183 } while_each_thread(g, t); 183 }
184 unlock: 184 unlock:
185 rcu_read_unlock(); 185 rcu_read_unlock();
186} 186}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 5925f5ae8dff..d20c85d9f8c0 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -387,8 +387,9 @@ void ctrl_alt_del(void)
387} 387}
388 388
389char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 389char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
390static const char reboot_cmd[] = "/sbin/reboot";
390 391
391static int __orderly_poweroff(bool force) 392static int run_cmd(const char *cmd)
392{ 393{
393 char **argv; 394 char **argv;
394 static char *envp[] = { 395 static char *envp[] = {
@@ -397,8 +398,7 @@ static int __orderly_poweroff(bool force)
397 NULL 398 NULL
398 }; 399 };
399 int ret; 400 int ret;
400 401 argv = argv_split(GFP_KERNEL, cmd, NULL);
401 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
402 if (argv) { 402 if (argv) {
403 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); 403 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
404 argv_free(argv); 404 argv_free(argv);
@@ -406,8 +406,33 @@ static int __orderly_poweroff(bool force)
406 ret = -ENOMEM; 406 ret = -ENOMEM;
407 } 407 }
408 408
409 return ret;
410}
411
412static int __orderly_reboot(void)
413{
414 int ret;
415
416 ret = run_cmd(reboot_cmd);
417
418 if (ret) {
419 pr_warn("Failed to start orderly reboot: forcing the issue\n");
420 emergency_sync();
421 kernel_restart(NULL);
422 }
423
424 return ret;
425}
426
427static int __orderly_poweroff(bool force)
428{
429 int ret;
430
431 ret = run_cmd(poweroff_cmd);
432
409 if (ret && force) { 433 if (ret && force) {
410 pr_warn("Failed to start orderly shutdown: forcing the issue\n"); 434 pr_warn("Failed to start orderly shutdown: forcing the issue\n");
435
411 /* 436 /*
412 * I guess this should try to kick off some daemon to sync and 437 * I guess this should try to kick off some daemon to sync and
413 * poweroff asap. Or not even bother syncing if we're doing an 438 * poweroff asap. Or not even bother syncing if we're doing an
@@ -436,15 +461,33 @@ static DECLARE_WORK(poweroff_work, poweroff_work_func);
436 * This may be called from any context to trigger a system shutdown. 461 * This may be called from any context to trigger a system shutdown.
437 * If the orderly shutdown fails, it will force an immediate shutdown. 462 * If the orderly shutdown fails, it will force an immediate shutdown.
438 */ 463 */
439int orderly_poweroff(bool force) 464void orderly_poweroff(bool force)
440{ 465{
441 if (force) /* do not override the pending "true" */ 466 if (force) /* do not override the pending "true" */
442 poweroff_force = true; 467 poweroff_force = true;
443 schedule_work(&poweroff_work); 468 schedule_work(&poweroff_work);
444 return 0;
445} 469}
446EXPORT_SYMBOL_GPL(orderly_poweroff); 470EXPORT_SYMBOL_GPL(orderly_poweroff);
447 471
472static void reboot_work_func(struct work_struct *work)
473{
474 __orderly_reboot();
475}
476
477static DECLARE_WORK(reboot_work, reboot_work_func);
478
479/**
480 * orderly_reboot - Trigger an orderly system reboot
481 *
482 * This may be called from any context to trigger a system reboot.
483 * If the orderly reboot fails, it will force an immediate reboot.
484 */
485void orderly_reboot(void)
486{
487 schedule_work(&reboot_work);
488}
489EXPORT_SYMBOL_GPL(orderly_reboot);
490
448static int __init reboot_setup(char *str) 491static int __init reboot_setup(char *str)
449{ 492{
450 for (;;) { 493 for (;;) {
diff --git a/kernel/resource.c b/kernel/resource.c
index 19f2357dfda3..90552aab5f2d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1034,8 +1034,6 @@ resource_size_t resource_alignment(struct resource *res)
1034 * 1034 *
1035 * request_region creates a new busy region. 1035 * request_region creates a new busy region.
1036 * 1036 *
1037 * check_region returns non-zero if the area is already busy.
1038 *
1039 * release_region releases a matching busy region. 1037 * release_region releases a matching busy region.
1040 */ 1038 */
1041 1039
@@ -1098,36 +1096,6 @@ struct resource * __request_region(struct resource *parent,
1098EXPORT_SYMBOL(__request_region); 1096EXPORT_SYMBOL(__request_region);
1099 1097
1100/** 1098/**
1101 * __check_region - check if a resource region is busy or free
1102 * @parent: parent resource descriptor
1103 * @start: resource start address
1104 * @n: resource region size
1105 *
1106 * Returns 0 if the region is free at the moment it is checked,
1107 * returns %-EBUSY if the region is busy.
1108 *
1109 * NOTE:
1110 * This function is deprecated because its use is racy.
1111 * Even if it returns 0, a subsequent call to request_region()
1112 * may fail because another driver etc. just allocated the region.
1113 * Do NOT use it. It will be removed from the kernel.
1114 */
1115int __check_region(struct resource *parent, resource_size_t start,
1116 resource_size_t n)
1117{
1118 struct resource * res;
1119
1120 res = __request_region(parent, start, n, "check-region", 0);
1121 if (!res)
1122 return -EBUSY;
1123
1124 release_resource(res);
1125 free_resource(res);
1126 return 0;
1127}
1128EXPORT_SYMBOL(__check_region);
1129
1130/**
1131 * __release_region - release a previously reserved resource region 1099 * __release_region - release a previously reserved resource region
1132 * @parent: parent resource descriptor 1100 * @parent: parent resource descriptor
1133 * @start: resource start address 1101 * @start: resource start address
diff --git a/kernel/sys.c b/kernel/sys.c
index a03d9cd23ed7..3be344902316 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -325,6 +325,7 @@ out_unlock:
325 * SMP: There are not races, the GIDs are checked only by filesystem 325 * SMP: There are not races, the GIDs are checked only by filesystem
326 * operations (as far as semantic preservation is concerned). 326 * operations (as far as semantic preservation is concerned).
327 */ 327 */
328#ifdef CONFIG_MULTIUSER
328SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 329SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
329{ 330{
330 struct user_namespace *ns = current_user_ns(); 331 struct user_namespace *ns = current_user_ns();
@@ -815,6 +816,7 @@ change_okay:
815 commit_creds(new); 816 commit_creds(new);
816 return old_fsgid; 817 return old_fsgid;
817} 818}
819#endif /* CONFIG_MULTIUSER */
818 820
819/** 821/**
820 * sys_getpid - return the thread group id of the current process 822 * sys_getpid - return the thread group id of the current process
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0ae3a58..7995ef5868d8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,20 @@ cond_syscall(sys_uselib);
159cond_syscall(sys_fadvise64); 159cond_syscall(sys_fadvise64);
160cond_syscall(sys_fadvise64_64); 160cond_syscall(sys_fadvise64_64);
161cond_syscall(sys_madvise); 161cond_syscall(sys_madvise);
162cond_syscall(sys_setuid);
163cond_syscall(sys_setregid);
164cond_syscall(sys_setgid);
165cond_syscall(sys_setreuid);
166cond_syscall(sys_setresuid);
167cond_syscall(sys_getresuid);
168cond_syscall(sys_setresgid);
169cond_syscall(sys_getresgid);
170cond_syscall(sys_setgroups);
171cond_syscall(sys_getgroups);
172cond_syscall(sys_setfsuid);
173cond_syscall(sys_setfsgid);
174cond_syscall(sys_capget);
175cond_syscall(sys_capset);
162 176
163/* arch-specific weak syscall entries */ 177/* arch-specific weak syscall entries */
164cond_syscall(sys_pciconfig_read); 178cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8c0eabd41886..42b7fc2860c1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1335,6 +1335,15 @@ static struct ctl_table vm_table[] = {
1335 .extra1 = &min_extfrag_threshold, 1335 .extra1 = &min_extfrag_threshold,
1336 .extra2 = &max_extfrag_threshold, 1336 .extra2 = &max_extfrag_threshold,
1337 }, 1337 },
1338 {
1339 .procname = "compact_unevictable_allowed",
1340 .data = &sysctl_compact_unevictable_allowed,
1341 .maxlen = sizeof(int),
1342 .mode = 0644,
1343 .proc_handler = proc_dointvec,
1344 .extra1 = &zero,
1345 .extra2 = &one,
1346 },
1338 1347
1339#endif /* CONFIG_COMPACTION */ 1348#endif /* CONFIG_COMPACTION */
1340 { 1349 {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c3e4fcfddd45..3f34496244e9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -327,11 +327,11 @@ static void t_stop(struct seq_file *m, void *p)
327 local_irq_enable(); 327 local_irq_enable();
328} 328}
329 329
330static int trace_lookup_stack(struct seq_file *m, long i) 330static void trace_lookup_stack(struct seq_file *m, long i)
331{ 331{
332 unsigned long addr = stack_dump_trace[i]; 332 unsigned long addr = stack_dump_trace[i];
333 333
334 return seq_printf(m, "%pS\n", (void *)addr); 334 seq_printf(m, "%pS\n", (void *)addr);
335} 335}
336 336
337static void print_disabled(struct seq_file *m) 337static void print_disabled(struct seq_file *m)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 852c81e3ba9a..028f5d996eef 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -247,10 +247,11 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
247 * progress) and "changed", when this in fact lead to an successful 247 * progress) and "changed", when this in fact lead to an successful
248 * update of the cache. 248 * update of the cache.
249 */ 249 */
250 return seq_printf(seq, "\t%s: used:%u/%u " 250 seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
251 "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", 251 lc->name, lc->used, lc->nr_elements,
252 lc->name, lc->used, lc->nr_elements, 252 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
253 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); 253
254 return 0;
254} 255}
255 256
256static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 257static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 8f8c4417f228..1826c7407258 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -239,29 +239,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
239} 239}
240EXPORT_SYMBOL(string_unescape); 240EXPORT_SYMBOL(string_unescape);
241 241
242static int escape_passthrough(unsigned char c, char **dst, size_t *osz) 242static bool escape_passthrough(unsigned char c, char **dst, char *end)
243{ 243{
244 char *out = *dst; 244 char *out = *dst;
245 245
246 if (*osz < 1) 246 if (out < end)
247 return -ENOMEM; 247 *out = c;
248 248 *dst = out + 1;
249 *out++ = c; 249 return true;
250
251 *dst = out;
252 *osz -= 1;
253
254 return 1;
255} 250}
256 251
257static int escape_space(unsigned char c, char **dst, size_t *osz) 252static bool escape_space(unsigned char c, char **dst, char *end)
258{ 253{
259 char *out = *dst; 254 char *out = *dst;
260 unsigned char to; 255 unsigned char to;
261 256
262 if (*osz < 2)
263 return -ENOMEM;
264
265 switch (c) { 257 switch (c) {
266 case '\n': 258 case '\n':
267 to = 'n'; 259 to = 'n';
@@ -279,26 +271,25 @@ static int escape_space(unsigned char c, char **dst, size_t *osz)
279 to = 'f'; 271 to = 'f';
280 break; 272 break;
281 default: 273 default:
282 return 0; 274 return false;
283 } 275 }
284 276
285 *out++ = '\\'; 277 if (out < end)
286 *out++ = to; 278 *out = '\\';
279 ++out;
280 if (out < end)
281 *out = to;
282 ++out;
287 283
288 *dst = out; 284 *dst = out;
289 *osz -= 2; 285 return true;
290
291 return 1;
292} 286}
293 287
294static int escape_special(unsigned char c, char **dst, size_t *osz) 288static bool escape_special(unsigned char c, char **dst, char *end)
295{ 289{
296 char *out = *dst; 290 char *out = *dst;
297 unsigned char to; 291 unsigned char to;
298 292
299 if (*osz < 2)
300 return -ENOMEM;
301
302 switch (c) { 293 switch (c) {
303 case '\\': 294 case '\\':
304 to = '\\'; 295 to = '\\';
@@ -310,71 +301,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz)
310 to = 'e'; 301 to = 'e';
311 break; 302 break;
312 default: 303 default:
313 return 0; 304 return false;
314 } 305 }
315 306
316 *out++ = '\\'; 307 if (out < end)
317 *out++ = to; 308 *out = '\\';
309 ++out;
310 if (out < end)
311 *out = to;
312 ++out;
318 313
319 *dst = out; 314 *dst = out;
320 *osz -= 2; 315 return true;
321
322 return 1;
323} 316}
324 317
325static int escape_null(unsigned char c, char **dst, size_t *osz) 318static bool escape_null(unsigned char c, char **dst, char *end)
326{ 319{
327 char *out = *dst; 320 char *out = *dst;
328 321
329 if (*osz < 2)
330 return -ENOMEM;
331
332 if (c) 322 if (c)
333 return 0; 323 return false;
334 324
335 *out++ = '\\'; 325 if (out < end)
336 *out++ = '0'; 326 *out = '\\';
327 ++out;
328 if (out < end)
329 *out = '0';
330 ++out;
337 331
338 *dst = out; 332 *dst = out;
339 *osz -= 2; 333 return true;
340
341 return 1;
342} 334}
343 335
344static int escape_octal(unsigned char c, char **dst, size_t *osz) 336static bool escape_octal(unsigned char c, char **dst, char *end)
345{ 337{
346 char *out = *dst; 338 char *out = *dst;
347 339
348 if (*osz < 4) 340 if (out < end)
349 return -ENOMEM; 341 *out = '\\';
350 342 ++out;
351 *out++ = '\\'; 343 if (out < end)
352 *out++ = ((c >> 6) & 0x07) + '0'; 344 *out = ((c >> 6) & 0x07) + '0';
353 *out++ = ((c >> 3) & 0x07) + '0'; 345 ++out;
354 *out++ = ((c >> 0) & 0x07) + '0'; 346 if (out < end)
347 *out = ((c >> 3) & 0x07) + '0';
348 ++out;
349 if (out < end)
350 *out = ((c >> 0) & 0x07) + '0';
351 ++out;
355 352
356 *dst = out; 353 *dst = out;
357 *osz -= 4; 354 return true;
358
359 return 1;
360} 355}
361 356
362static int escape_hex(unsigned char c, char **dst, size_t *osz) 357static bool escape_hex(unsigned char c, char **dst, char *end)
363{ 358{
364 char *out = *dst; 359 char *out = *dst;
365 360
366 if (*osz < 4) 361 if (out < end)
367 return -ENOMEM; 362 *out = '\\';
368 363 ++out;
369 *out++ = '\\'; 364 if (out < end)
370 *out++ = 'x'; 365 *out = 'x';
371 *out++ = hex_asc_hi(c); 366 ++out;
372 *out++ = hex_asc_lo(c); 367 if (out < end)
368 *out = hex_asc_hi(c);
369 ++out;
370 if (out < end)
371 *out = hex_asc_lo(c);
372 ++out;
373 373
374 *dst = out; 374 *dst = out;
375 *osz -= 4; 375 return true;
376
377 return 1;
378} 376}
379 377
380/** 378/**
@@ -426,19 +424,17 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz)
426 * it if needs. 424 * it if needs.
427 * 425 *
428 * Return: 426 * Return:
429 * The amount of the characters processed to the destination buffer, or 427 * The total size of the escaped output that would be generated for
430 * %-ENOMEM if the size of buffer is not enough to put an escaped character is 428 * the given input and flags. To check whether the output was
431 * returned. 429 * truncated, compare the return value to osz. There is room left in
432 * 430 * dst for a '\0' terminator if and only if ret < osz.
433 * Even in the case of error @dst pointer will be updated to point to the byte
434 * after the last processed character.
435 */ 431 */
436int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, 432int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
437 unsigned int flags, const char *esc) 433 unsigned int flags, const char *esc)
438{ 434{
439 char *out = *dst, *p = out; 435 char *p = dst;
436 char *end = p + osz;
440 bool is_dict = esc && *esc; 437 bool is_dict = esc && *esc;
441 int ret = 0;
442 438
443 while (isz--) { 439 while (isz--) {
444 unsigned char c = *src++; 440 unsigned char c = *src++;
@@ -458,55 +454,26 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
458 (is_dict && !strchr(esc, c))) { 454 (is_dict && !strchr(esc, c))) {
459 /* do nothing */ 455 /* do nothing */
460 } else { 456 } else {
461 if (flags & ESCAPE_SPACE) { 457 if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
462 ret = escape_space(c, &p, &osz); 458 continue;
463 if (ret < 0) 459
464 break; 460 if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
465 if (ret > 0) 461 continue;
466 continue; 462
467 } 463 if (flags & ESCAPE_NULL && escape_null(c, &p, end))
468 464 continue;
469 if (flags & ESCAPE_SPECIAL) {
470 ret = escape_special(c, &p, &osz);
471 if (ret < 0)
472 break;
473 if (ret > 0)
474 continue;
475 }
476
477 if (flags & ESCAPE_NULL) {
478 ret = escape_null(c, &p, &osz);
479 if (ret < 0)
480 break;
481 if (ret > 0)
482 continue;
483 }
484 465
485 /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ 466 /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
486 if (flags & ESCAPE_OCTAL) { 467 if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
487 ret = escape_octal(c, &p, &osz);
488 if (ret < 0)
489 break;
490 continue; 468 continue;
491 } 469
492 if (flags & ESCAPE_HEX) { 470 if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
493 ret = escape_hex(c, &p, &osz);
494 if (ret < 0)
495 break;
496 continue; 471 continue;
497 }
498 } 472 }
499 473
500 ret = escape_passthrough(c, &p, &osz); 474 escape_passthrough(c, &p, end);
501 if (ret < 0)
502 break;
503 } 475 }
504 476
505 *dst = p; 477 return p - dst;
506
507 if (ret < 0)
508 return ret;
509
510 return p - out;
511} 478}
512EXPORT_SYMBOL(string_escape_mem); 479EXPORT_SYMBOL(string_escape_mem);
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
index daf29a390a89..9846ff7428b3 100644
--- a/lib/test-hexdump.c
+++ b/lib/test-hexdump.c
@@ -18,26 +18,26 @@ static const unsigned char data_b[] = {
18 18
19static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; 19static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
20 20
21static const char *test_data_1_le[] __initconst = { 21static const char * const test_data_1_le[] __initconst = {
22 "be", "32", "db", "7b", "0a", "18", "93", "b2", 22 "be", "32", "db", "7b", "0a", "18", "93", "b2",
23 "70", "ba", "c4", "24", "7d", "83", "34", "9b", 23 "70", "ba", "c4", "24", "7d", "83", "34", "9b",
24 "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", 24 "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
25 "4c", "d1", "19", "99", "43", "b1", "af", "0c", 25 "4c", "d1", "19", "99", "43", "b1", "af", "0c",
26}; 26};
27 27
28static const char *test_data_2_le[] __initconst = { 28static const char *test_data_2_le[] __initdata = {
29 "32be", "7bdb", "180a", "b293", 29 "32be", "7bdb", "180a", "b293",
30 "ba70", "24c4", "837d", "9b34", 30 "ba70", "24c4", "837d", "9b34",
31 "9ca6", "ad31", "0f9c", "e9ac", 31 "9ca6", "ad31", "0f9c", "e9ac",
32 "d14c", "9919", "b143", "0caf", 32 "d14c", "9919", "b143", "0caf",
33}; 33};
34 34
35static const char *test_data_4_le[] __initconst = { 35static const char *test_data_4_le[] __initdata = {
36 "7bdb32be", "b293180a", "24c4ba70", "9b34837d", 36 "7bdb32be", "b293180a", "24c4ba70", "9b34837d",
37 "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", 37 "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
38}; 38};
39 39
40static const char *test_data_8_le[] __initconst = { 40static const char *test_data_8_le[] __initdata = {
41 "b293180a7bdb32be", "9b34837d24c4ba70", 41 "b293180a7bdb32be", "9b34837d24c4ba70",
42 "e9ac0f9cad319ca6", "0cafb1439919d14c", 42 "e9ac0f9cad319ca6", "0cafb1439919d14c",
43}; 43};
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index ab0d30e1e18f..8e376efd88a4 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2,
260 return NULL; 260 return NULL;
261} 261}
262 262
263static __init void
264test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc,
265 int q_test, const char *name)
266{
267 int q_real;
268
269 q_real = string_escape_mem(in, p, NULL, 0, flags, esc);
270 if (q_real != q_test)
271 pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n",
272 name, flags, q_test, q_real);
273}
274
263static __init void test_string_escape(const char *name, 275static __init void test_string_escape(const char *name,
264 const struct test_string_2 *s2, 276 const struct test_string_2 *s2,
265 unsigned int flags, const char *esc) 277 unsigned int flags, const char *esc)
266{ 278{
267 int q_real = 512; 279 size_t out_size = 512;
268 char *out_test = kmalloc(q_real, GFP_KERNEL); 280 char *out_test = kmalloc(out_size, GFP_KERNEL);
269 char *out_real = kmalloc(q_real, GFP_KERNEL); 281 char *out_real = kmalloc(out_size, GFP_KERNEL);
270 char *in = kmalloc(256, GFP_KERNEL); 282 char *in = kmalloc(256, GFP_KERNEL);
271 char *buf = out_real;
272 int p = 0, q_test = 0; 283 int p = 0, q_test = 0;
284 int q_real;
273 285
274 if (!out_test || !out_real || !in) 286 if (!out_test || !out_real || !in)
275 goto out; 287 goto out;
@@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name,
301 q_test += len; 313 q_test += len;
302 } 314 }
303 315
304 q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); 316 q_real = string_escape_mem(in, p, out_real, out_size, flags, esc);
305 317
306 test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, 318 test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
307 q_test); 319 q_test);
320
321 test_string_escape_overflow(in, p, flags, esc, q_test, name);
322
308out: 323out:
309 kfree(in); 324 kfree(in);
310 kfree(out_real); 325 kfree(out_real);
311 kfree(out_test); 326 kfree(out_test);
312} 327}
313 328
314static __init void test_string_escape_nomem(void)
315{
316 char *in = "\eb \\C\007\"\x90\r]";
317 char out[64], *buf = out;
318 int rc = -ENOMEM, ret;
319
320 ret = string_escape_str_any_np(in, &buf, strlen(in), NULL);
321 if (ret == rc)
322 return;
323
324 pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc);
325}
326
327static int __init test_string_helpers_init(void) 329static int __init test_string_helpers_init(void)
328{ 330{
329 unsigned int i; 331 unsigned int i;
@@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void)
342 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) 344 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
343 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); 345 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
344 346
345 test_string_escape_nomem();
346
347 return -EINVAL; 347 return -EINVAL;
348} 348}
349module_init(test_string_helpers_init); 349module_init(test_string_helpers_init);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b235c96167d3..3a1e0843f9a2 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <stdarg.h> 19#include <stdarg.h>
20#include <linux/clk-provider.h>
20#include <linux/module.h> /* for KSYM_SYMBOL_LEN */ 21#include <linux/module.h> /* for KSYM_SYMBOL_LEN */
21#include <linux/types.h> 22#include <linux/types.h>
22#include <linux/string.h> 23#include <linux/string.h>
@@ -340,11 +341,11 @@ int num_to_str(char *buf, int size, unsigned long long num)
340 return len; 341 return len;
341} 342}
342 343
343#define ZEROPAD 1 /* pad with zero */ 344#define SIGN 1 /* unsigned/signed, must be 1 */
344#define SIGN 2 /* unsigned/signed long */ 345#define LEFT 2 /* left justified */
345#define PLUS 4 /* show plus */ 346#define PLUS 4 /* show plus */
346#define SPACE 8 /* space if plus */ 347#define SPACE 8 /* space if plus */
347#define LEFT 16 /* left justified */ 348#define ZEROPAD 16 /* pad with zero, must be 16 == '0' - ' ' */
348#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ 349#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
349#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ 350#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
350 351
@@ -383,10 +384,7 @@ static noinline_for_stack
383char *number(char *buf, char *end, unsigned long long num, 384char *number(char *buf, char *end, unsigned long long num,
384 struct printf_spec spec) 385 struct printf_spec spec)
385{ 386{
386 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 387 char tmp[3 * sizeof(num)];
387 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
388
389 char tmp[66];
390 char sign; 388 char sign;
391 char locase; 389 char locase;
392 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); 390 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -422,12 +420,7 @@ char *number(char *buf, char *end, unsigned long long num,
422 /* generate full string in tmp[], in reverse order */ 420 /* generate full string in tmp[], in reverse order */
423 i = 0; 421 i = 0;
424 if (num < spec.base) 422 if (num < spec.base)
425 tmp[i++] = digits[num] | locase; 423 tmp[i++] = hex_asc_upper[num] | locase;
426 /* Generic code, for any base:
427 else do {
428 tmp[i++] = (digits[do_div(num,base)] | locase);
429 } while (num != 0);
430 */
431 else if (spec.base != 10) { /* 8 or 16 */ 424 else if (spec.base != 10) { /* 8 or 16 */
432 int mask = spec.base - 1; 425 int mask = spec.base - 1;
433 int shift = 3; 426 int shift = 3;
@@ -435,7 +428,7 @@ char *number(char *buf, char *end, unsigned long long num,
435 if (spec.base == 16) 428 if (spec.base == 16)
436 shift = 4; 429 shift = 4;
437 do { 430 do {
438 tmp[i++] = (digits[((unsigned char)num) & mask] | locase); 431 tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase);
439 num >>= shift; 432 num >>= shift;
440 } while (num); 433 } while (num);
441 } else { /* base 10 */ 434 } else { /* base 10 */
@@ -447,7 +440,7 @@ char *number(char *buf, char *end, unsigned long long num,
447 spec.precision = i; 440 spec.precision = i;
448 /* leading space padding */ 441 /* leading space padding */
449 spec.field_width -= spec.precision; 442 spec.field_width -= spec.precision;
450 if (!(spec.flags & (ZEROPAD+LEFT))) { 443 if (!(spec.flags & (ZEROPAD | LEFT))) {
451 while (--spec.field_width >= 0) { 444 while (--spec.field_width >= 0) {
452 if (buf < end) 445 if (buf < end)
453 *buf = ' '; 446 *buf = ' ';
@@ -475,7 +468,8 @@ char *number(char *buf, char *end, unsigned long long num,
475 } 468 }
476 /* zero or space padding */ 469 /* zero or space padding */
477 if (!(spec.flags & LEFT)) { 470 if (!(spec.flags & LEFT)) {
478 char c = (spec.flags & ZEROPAD) ? '0' : ' '; 471 char c = ' ' + (spec.flags & ZEROPAD);
472 BUILD_BUG_ON(' ' + ZEROPAD != '0');
479 while (--spec.field_width >= 0) { 473 while (--spec.field_width >= 0) {
480 if (buf < end) 474 if (buf < end)
481 *buf = c; 475 *buf = c;
@@ -783,11 +777,19 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
783 if (spec.field_width > 0) 777 if (spec.field_width > 0)
784 len = min_t(int, spec.field_width, 64); 778 len = min_t(int, spec.field_width, 64);
785 779
786 for (i = 0; i < len && buf < end - 1; i++) { 780 for (i = 0; i < len; ++i) {
787 buf = hex_byte_pack(buf, addr[i]); 781 if (buf < end)
782 *buf = hex_asc_hi(addr[i]);
783 ++buf;
784 if (buf < end)
785 *buf = hex_asc_lo(addr[i]);
786 ++buf;
788 787
789 if (buf < end && separator && i != len - 1) 788 if (separator && i != len - 1) {
790 *buf++ = separator; 789 if (buf < end)
790 *buf = separator;
791 ++buf;
792 }
791 } 793 }
792 794
793 return buf; 795 return buf;
@@ -1233,8 +1235,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
1233 1235
1234 len = spec.field_width < 0 ? 1 : spec.field_width; 1236 len = spec.field_width < 0 ? 1 : spec.field_width;
1235 1237
1236 /* Ignore the error. We print as many characters as we can */ 1238 /*
1237 string_escape_mem(addr, len, &buf, end - buf, flags, NULL); 1239 * string_escape_mem() writes as many characters as it can to
1240 * the given buffer, and returns the total size of the output
1241 * had the buffer been big enough.
1242 */
1243 buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL);
1238 1244
1239 return buf; 1245 return buf;
1240} 1246}
@@ -1322,6 +1328,30 @@ char *address_val(char *buf, char *end, const void *addr,
1322 return number(buf, end, num, spec); 1328 return number(buf, end, num, spec);
1323} 1329}
1324 1330
1331static noinline_for_stack
1332char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
1333 const char *fmt)
1334{
1335 if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk)
1336 return string(buf, end, NULL, spec);
1337
1338 switch (fmt[1]) {
1339 case 'r':
1340 return number(buf, end, clk_get_rate(clk), spec);
1341
1342 case 'n':
1343 default:
1344#ifdef CONFIG_COMMON_CLK
1345 return string(buf, end, __clk_get_name(clk), spec);
1346#else
1347 spec.base = 16;
1348 spec.field_width = sizeof(unsigned long) * 2 + 2;
1349 spec.flags |= SPECIAL | SMALL | ZEROPAD;
1350 return number(buf, end, (unsigned long)clk, spec);
1351#endif
1352 }
1353}
1354
1325int kptr_restrict __read_mostly; 1355int kptr_restrict __read_mostly;
1326 1356
1327/* 1357/*
@@ -1404,6 +1434,11 @@ int kptr_restrict __read_mostly;
1404 * (default assumed to be phys_addr_t, passed by reference) 1434 * (default assumed to be phys_addr_t, passed by reference)
1405 * - 'd[234]' For a dentry name (optionally 2-4 last components) 1435 * - 'd[234]' For a dentry name (optionally 2-4 last components)
1406 * - 'D[234]' Same as 'd' but for a struct file 1436 * - 'D[234]' Same as 'd' but for a struct file
1437 * - 'C' For a clock, it prints the name (Common Clock Framework) or address
1438 * (legacy clock framework) of the clock
1439 * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
1440 * (legacy clock framework) of the clock
1441 * - 'Cr' For a clock, it prints the current rate of the clock
1407 * 1442 *
1408 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 1443 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
1409 * function pointers are really function descriptors, which contain a 1444 * function pointers are really function descriptors, which contain a
@@ -1548,6 +1583,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
1548 return address_val(buf, end, ptr, spec, fmt); 1583 return address_val(buf, end, ptr, spec, fmt);
1549 case 'd': 1584 case 'd':
1550 return dentry_name(buf, end, ptr, spec, fmt); 1585 return dentry_name(buf, end, ptr, spec, fmt);
1586 case 'C':
1587 return clock(buf, end, ptr, spec, fmt);
1551 case 'D': 1588 case 'D':
1552 return dentry_name(buf, end, 1589 return dentry_name(buf, end,
1553 ((const struct file *)ptr)->f_path.dentry, 1590 ((const struct file *)ptr)->f_path.dentry,
@@ -1738,29 +1775,21 @@ qualifier:
1738 if (spec->qualifier == 'L') 1775 if (spec->qualifier == 'L')
1739 spec->type = FORMAT_TYPE_LONG_LONG; 1776 spec->type = FORMAT_TYPE_LONG_LONG;
1740 else if (spec->qualifier == 'l') { 1777 else if (spec->qualifier == 'l') {
1741 if (spec->flags & SIGN) 1778 BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
1742 spec->type = FORMAT_TYPE_LONG; 1779 spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
1743 else
1744 spec->type = FORMAT_TYPE_ULONG;
1745 } else if (_tolower(spec->qualifier) == 'z') { 1780 } else if (_tolower(spec->qualifier) == 'z') {
1746 spec->type = FORMAT_TYPE_SIZE_T; 1781 spec->type = FORMAT_TYPE_SIZE_T;
1747 } else if (spec->qualifier == 't') { 1782 } else if (spec->qualifier == 't') {
1748 spec->type = FORMAT_TYPE_PTRDIFF; 1783 spec->type = FORMAT_TYPE_PTRDIFF;
1749 } else if (spec->qualifier == 'H') { 1784 } else if (spec->qualifier == 'H') {
1750 if (spec->flags & SIGN) 1785 BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE);
1751 spec->type = FORMAT_TYPE_BYTE; 1786 spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN);
1752 else
1753 spec->type = FORMAT_TYPE_UBYTE;
1754 } else if (spec->qualifier == 'h') { 1787 } else if (spec->qualifier == 'h') {
1755 if (spec->flags & SIGN) 1788 BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT);
1756 spec->type = FORMAT_TYPE_SHORT; 1789 spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN);
1757 else
1758 spec->type = FORMAT_TYPE_USHORT;
1759 } else { 1790 } else {
1760 if (spec->flags & SIGN) 1791 BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT);
1761 spec->type = FORMAT_TYPE_INT; 1792 spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN);
1762 else
1763 spec->type = FORMAT_TYPE_UINT;
1764 } 1793 }
1765 1794
1766 return ++fmt - start; 1795 return ++fmt - start;
@@ -1800,6 +1829,11 @@ qualifier:
1800 * %*pE[achnops] print an escaped buffer 1829 * %*pE[achnops] print an escaped buffer
1801 * %*ph[CDN] a variable-length hex string with a separator (supports up to 64 1830 * %*ph[CDN] a variable-length hex string with a separator (supports up to 64
1802 * bytes of the input) 1831 * bytes of the input)
1832 * %pC output the name (Common Clock Framework) or address (legacy clock
1833 * framework) of a clock
1834 * %pCn output the name (Common Clock Framework) or address (legacy clock
1835 * framework) of a clock
1836 * %pCr output the current rate of a clock
1803 * %n is ignored 1837 * %n is ignored
1804 * 1838 *
1805 * ** Please update Documentation/printk-formats.txt when making changes ** 1839 * ** Please update Documentation/printk-formats.txt when making changes **
diff --git a/mm/cma.c b/mm/cma.c
index 47203faaf65e..3a7a67b93394 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -23,6 +23,7 @@
23# define DEBUG 23# define DEBUG
24#endif 24#endif
25#endif 25#endif
26#define CREATE_TRACE_POINTS
26 27
27#include <linux/memblock.h> 28#include <linux/memblock.h>
28#include <linux/err.h> 29#include <linux/err.h>
@@ -34,6 +35,7 @@
34#include <linux/cma.h> 35#include <linux/cma.h>
35#include <linux/highmem.h> 36#include <linux/highmem.h>
36#include <linux/io.h> 37#include <linux/io.h>
38#include <trace/events/cma.h>
37 39
38#include "cma.h" 40#include "cma.h"
39 41
@@ -414,6 +416,8 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
414 start = bitmap_no + mask + 1; 416 start = bitmap_no + mask + 1;
415 } 417 }
416 418
419 trace_cma_alloc(page ? pfn : -1UL, page, count, align);
420
417 pr_debug("%s(): returned %p\n", __func__, page); 421 pr_debug("%s(): returned %p\n", __func__, page);
418 return page; 422 return page;
419} 423}
@@ -446,6 +450,7 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned int count)
446 450
447 free_contig_range(pfn, count); 451 free_contig_range(pfn, count);
448 cma_clear_bitmap(cma, pfn, count); 452 cma_clear_bitmap(cma, pfn, count);
453 trace_cma_release(pfn, pages, count);
449 454
450 return true; 455 return true;
451} 456}
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index 0b377536ccde..7621ee34daa0 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -30,9 +30,44 @@ static int cma_debugfs_get(void *data, u64 *val)
30 30
31 return 0; 31 return 0;
32} 32}
33
34DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n"); 33DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n");
35 34
35static int cma_used_get(void *data, u64 *val)
36{
37 struct cma *cma = data;
38 unsigned long used;
39
40 mutex_lock(&cma->lock);
41 /* pages counter is smaller than sizeof(int) */
42 used = bitmap_weight(cma->bitmap, (int)cma->count);
43 mutex_unlock(&cma->lock);
44 *val = (u64)used << cma->order_per_bit;
45
46 return 0;
47}
48DEFINE_SIMPLE_ATTRIBUTE(cma_used_fops, cma_used_get, NULL, "%llu\n");
49
50static int cma_maxchunk_get(void *data, u64 *val)
51{
52 struct cma *cma = data;
53 unsigned long maxchunk = 0;
54 unsigned long start, end = 0;
55
56 mutex_lock(&cma->lock);
57 for (;;) {
58 start = find_next_zero_bit(cma->bitmap, cma->count, end);
59 if (start >= cma->count)
60 break;
61 end = find_next_bit(cma->bitmap, cma->count, start);
62 maxchunk = max(end - start, maxchunk);
63 }
64 mutex_unlock(&cma->lock);
65 *val = (u64)maxchunk << cma->order_per_bit;
66
67 return 0;
68}
69DEFINE_SIMPLE_ATTRIBUTE(cma_maxchunk_fops, cma_maxchunk_get, NULL, "%llu\n");
70
36static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem) 71static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem)
37{ 72{
38 spin_lock(&cma->mem_head_lock); 73 spin_lock(&cma->mem_head_lock);
@@ -91,7 +126,6 @@ static int cma_free_write(void *data, u64 val)
91 126
92 return cma_free_mem(cma, pages); 127 return cma_free_mem(cma, pages);
93} 128}
94
95DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n"); 129DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n");
96 130
97static int cma_alloc_mem(struct cma *cma, int count) 131static int cma_alloc_mem(struct cma *cma, int count)
@@ -124,7 +158,6 @@ static int cma_alloc_write(void *data, u64 val)
124 158
125 return cma_alloc_mem(cma, pages); 159 return cma_alloc_mem(cma, pages);
126} 160}
127
128DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n"); 161DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n");
129 162
130static void cma_debugfs_add_one(struct cma *cma, int idx) 163static void cma_debugfs_add_one(struct cma *cma, int idx)
@@ -149,6 +182,8 @@ static void cma_debugfs_add_one(struct cma *cma, int idx)
149 &cma->count, &cma_debugfs_fops); 182 &cma->count, &cma_debugfs_fops);
150 debugfs_create_file("order_per_bit", S_IRUGO, tmp, 183 debugfs_create_file("order_per_bit", S_IRUGO, tmp,
151 &cma->order_per_bit, &cma_debugfs_fops); 184 &cma->order_per_bit, &cma_debugfs_fops);
185 debugfs_create_file("used", S_IRUGO, tmp, cma, &cma_used_fops);
186 debugfs_create_file("maxchunk", S_IRUGO, tmp, cma, &cma_maxchunk_fops);
152 187
153 u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); 188 u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32));
154 debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s); 189 debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s);
diff --git a/mm/compaction.c b/mm/compaction.c
index a18201a8124e..018f08da99a2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -391,28 +391,6 @@ static inline bool compact_should_abort(struct compact_control *cc)
391 return false; 391 return false;
392} 392}
393 393
394/* Returns true if the page is within a block suitable for migration to */
395static bool suitable_migration_target(struct page *page)
396{
397 /* If the page is a large free page, then disallow migration */
398 if (PageBuddy(page)) {
399 /*
400 * We are checking page_order without zone->lock taken. But
401 * the only small danger is that we skip a potentially suitable
402 * pageblock, so it's not worth to check order for valid range.
403 */
404 if (page_order_unsafe(page) >= pageblock_order)
405 return false;
406 }
407
408 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
409 if (migrate_async_suitable(get_pageblock_migratetype(page)))
410 return true;
411
412 /* Otherwise skip the block */
413 return false;
414}
415
416/* 394/*
417 * Isolate free pages onto a private freelist. If @strict is true, will abort 395 * Isolate free pages onto a private freelist. If @strict is true, will abort
418 * returning 0 on any invalid PFNs or non-free pages inside of the pageblock 396 * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
@@ -896,6 +874,29 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
896 874
897#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 875#endif /* CONFIG_COMPACTION || CONFIG_CMA */
898#ifdef CONFIG_COMPACTION 876#ifdef CONFIG_COMPACTION
877
878/* Returns true if the page is within a block suitable for migration to */
879static bool suitable_migration_target(struct page *page)
880{
881 /* If the page is a large free page, then disallow migration */
882 if (PageBuddy(page)) {
883 /*
884 * We are checking page_order without zone->lock taken. But
885 * the only small danger is that we skip a potentially suitable
886 * pageblock, so it's not worth to check order for valid range.
887 */
888 if (page_order_unsafe(page) >= pageblock_order)
889 return false;
890 }
891
892 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
893 if (migrate_async_suitable(get_pageblock_migratetype(page)))
894 return true;
895
896 /* Otherwise skip the block */
897 return false;
898}
899
899/* 900/*
900 * Based on information in the current compact_control, find blocks 901 * Based on information in the current compact_control, find blocks
901 * suitable for isolating free pages from and then isolate them. 902 * suitable for isolating free pages from and then isolate them.
@@ -1047,6 +1048,12 @@ typedef enum {
1047} isolate_migrate_t; 1048} isolate_migrate_t;
1048 1049
1049/* 1050/*
1051 * Allow userspace to control policy on scanning the unevictable LRU for
1052 * compactable pages.
1053 */
1054int sysctl_compact_unevictable_allowed __read_mostly = 1;
1055
1056/*
1050 * Isolate all pages that can be migrated from the first suitable block, 1057 * Isolate all pages that can be migrated from the first suitable block,
1051 * starting at the block pointed to by the migrate scanner pfn within 1058 * starting at the block pointed to by the migrate scanner pfn within
1052 * compact_control. 1059 * compact_control.
@@ -1057,6 +1064,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
1057 unsigned long low_pfn, end_pfn; 1064 unsigned long low_pfn, end_pfn;
1058 struct page *page; 1065 struct page *page;
1059 const isolate_mode_t isolate_mode = 1066 const isolate_mode_t isolate_mode =
1067 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1060 (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0); 1068 (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
1061 1069
1062 /* 1070 /*
@@ -1598,6 +1606,14 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
1598 INIT_LIST_HEAD(&cc->freepages); 1606 INIT_LIST_HEAD(&cc->freepages);
1599 INIT_LIST_HEAD(&cc->migratepages); 1607 INIT_LIST_HEAD(&cc->migratepages);
1600 1608
1609 /*
1610 * When called via /proc/sys/vm/compact_memory
1611 * this makes sure we compact the whole zone regardless of
1612 * cached scanner positions.
1613 */
1614 if (cc->order == -1)
1615 __reset_isolation_suitable(zone);
1616
1601 if (cc->order == -1 || !compaction_deferred(zone, cc->order)) 1617 if (cc->order == -1 || !compaction_deferred(zone, cc->order))
1602 compact_zone(zone, cc); 1618 compact_zone(zone, cc);
1603 1619
diff --git a/mm/gup.c b/mm/gup.c
index ca7b607ab671..6297f6bccfb1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1019,7 +1019,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1019 * 1019 *
1020 * for an example see gup_get_pte in arch/x86/mm/gup.c 1020 * for an example see gup_get_pte in arch/x86/mm/gup.c
1021 */ 1021 */
1022 pte_t pte = ACCESS_ONCE(*ptep); 1022 pte_t pte = READ_ONCE(*ptep);
1023 struct page *page; 1023 struct page *page;
1024 1024
1025 /* 1025 /*
@@ -1309,7 +1309,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1309 local_irq_save(flags); 1309 local_irq_save(flags);
1310 pgdp = pgd_offset(mm, addr); 1310 pgdp = pgd_offset(mm, addr);
1311 do { 1311 do {
1312 pgd_t pgd = ACCESS_ONCE(*pgdp); 1312 pgd_t pgd = READ_ONCE(*pgdp);
1313 1313
1314 next = pgd_addr_end(addr, end); 1314 next = pgd_addr_end(addr, end);
1315 if (pgd_none(pgd)) 1315 if (pgd_none(pgd))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3afb5cbe1312..078832cf3636 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -67,6 +67,7 @@ static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
67 67
68static int khugepaged(void *none); 68static int khugepaged(void *none);
69static int khugepaged_slab_init(void); 69static int khugepaged_slab_init(void);
70static void khugepaged_slab_exit(void);
70 71
71#define MM_SLOTS_HASH_BITS 10 72#define MM_SLOTS_HASH_BITS 10
72static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); 73static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
@@ -109,9 +110,6 @@ static int set_recommended_min_free_kbytes(void)
109 int nr_zones = 0; 110 int nr_zones = 0;
110 unsigned long recommended_min; 111 unsigned long recommended_min;
111 112
112 if (!khugepaged_enabled())
113 return 0;
114
115 for_each_populated_zone(zone) 113 for_each_populated_zone(zone)
116 nr_zones++; 114 nr_zones++;
117 115
@@ -143,9 +141,8 @@ static int set_recommended_min_free_kbytes(void)
143 setup_per_zone_wmarks(); 141 setup_per_zone_wmarks();
144 return 0; 142 return 0;
145} 143}
146late_initcall(set_recommended_min_free_kbytes);
147 144
148static int start_khugepaged(void) 145static int start_stop_khugepaged(void)
149{ 146{
150 int err = 0; 147 int err = 0;
151 if (khugepaged_enabled()) { 148 if (khugepaged_enabled()) {
@@ -156,6 +153,7 @@ static int start_khugepaged(void)
156 pr_err("khugepaged: kthread_run(khugepaged) failed\n"); 153 pr_err("khugepaged: kthread_run(khugepaged) failed\n");
157 err = PTR_ERR(khugepaged_thread); 154 err = PTR_ERR(khugepaged_thread);
158 khugepaged_thread = NULL; 155 khugepaged_thread = NULL;
156 goto fail;
159 } 157 }
160 158
161 if (!list_empty(&khugepaged_scan.mm_head)) 159 if (!list_empty(&khugepaged_scan.mm_head))
@@ -166,7 +164,7 @@ static int start_khugepaged(void)
166 kthread_stop(khugepaged_thread); 164 kthread_stop(khugepaged_thread);
167 khugepaged_thread = NULL; 165 khugepaged_thread = NULL;
168 } 166 }
169 167fail:
170 return err; 168 return err;
171} 169}
172 170
@@ -183,7 +181,7 @@ static struct page *get_huge_zero_page(void)
183 struct page *zero_page; 181 struct page *zero_page;
184retry: 182retry:
185 if (likely(atomic_inc_not_zero(&huge_zero_refcount))) 183 if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
186 return ACCESS_ONCE(huge_zero_page); 184 return READ_ONCE(huge_zero_page);
187 185
188 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, 186 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
189 HPAGE_PMD_ORDER); 187 HPAGE_PMD_ORDER);
@@ -202,7 +200,7 @@ retry:
202 /* We take additional reference here. It will be put back by shrinker */ 200 /* We take additional reference here. It will be put back by shrinker */
203 atomic_set(&huge_zero_refcount, 2); 201 atomic_set(&huge_zero_refcount, 2);
204 preempt_enable(); 202 preempt_enable();
205 return ACCESS_ONCE(huge_zero_page); 203 return READ_ONCE(huge_zero_page);
206} 204}
207 205
208static void put_huge_zero_page(void) 206static void put_huge_zero_page(void)
@@ -300,7 +298,7 @@ static ssize_t enabled_store(struct kobject *kobj,
300 int err; 298 int err;
301 299
302 mutex_lock(&khugepaged_mutex); 300 mutex_lock(&khugepaged_mutex);
303 err = start_khugepaged(); 301 err = start_stop_khugepaged();
304 mutex_unlock(&khugepaged_mutex); 302 mutex_unlock(&khugepaged_mutex);
305 303
306 if (err) 304 if (err)
@@ -634,27 +632,38 @@ static int __init hugepage_init(void)
634 632
635 err = hugepage_init_sysfs(&hugepage_kobj); 633 err = hugepage_init_sysfs(&hugepage_kobj);
636 if (err) 634 if (err)
637 return err; 635 goto err_sysfs;
638 636
639 err = khugepaged_slab_init(); 637 err = khugepaged_slab_init();
640 if (err) 638 if (err)
641 goto out; 639 goto err_slab;
642 640
643 register_shrinker(&huge_zero_page_shrinker); 641 err = register_shrinker(&huge_zero_page_shrinker);
642 if (err)
643 goto err_hzp_shrinker;
644 644
645 /* 645 /*
646 * By default disable transparent hugepages on smaller systems, 646 * By default disable transparent hugepages on smaller systems,
647 * where the extra memory used could hurt more than TLB overhead 647 * where the extra memory used could hurt more than TLB overhead
648 * is likely to save. The admin can still enable it through /sys. 648 * is likely to save. The admin can still enable it through /sys.
649 */ 649 */
650 if (totalram_pages < (512 << (20 - PAGE_SHIFT))) 650 if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
651 transparent_hugepage_flags = 0; 651 transparent_hugepage_flags = 0;
652 return 0;
653 }
652 654
653 start_khugepaged(); 655 err = start_stop_khugepaged();
656 if (err)
657 goto err_khugepaged;
654 658
655 return 0; 659 return 0;
656out: 660err_khugepaged:
661 unregister_shrinker(&huge_zero_page_shrinker);
662err_hzp_shrinker:
663 khugepaged_slab_exit();
664err_slab:
657 hugepage_exit_sysfs(hugepage_kobj); 665 hugepage_exit_sysfs(hugepage_kobj);
666err_sysfs:
658 return err; 667 return err;
659} 668}
660subsys_initcall(hugepage_init); 669subsys_initcall(hugepage_init);
@@ -708,7 +717,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
708static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, 717static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
709 struct vm_area_struct *vma, 718 struct vm_area_struct *vma,
710 unsigned long haddr, pmd_t *pmd, 719 unsigned long haddr, pmd_t *pmd,
711 struct page *page) 720 struct page *page, gfp_t gfp)
712{ 721{
713 struct mem_cgroup *memcg; 722 struct mem_cgroup *memcg;
714 pgtable_t pgtable; 723 pgtable_t pgtable;
@@ -716,7 +725,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
716 725
717 VM_BUG_ON_PAGE(!PageCompound(page), page); 726 VM_BUG_ON_PAGE(!PageCompound(page), page);
718 727
719 if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) 728 if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
720 return VM_FAULT_OOM; 729 return VM_FAULT_OOM;
721 730
722 pgtable = pte_alloc_one(mm, haddr); 731 pgtable = pte_alloc_one(mm, haddr);
@@ -822,7 +831,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
822 count_vm_event(THP_FAULT_FALLBACK); 831 count_vm_event(THP_FAULT_FALLBACK);
823 return VM_FAULT_FALLBACK; 832 return VM_FAULT_FALLBACK;
824 } 833 }
825 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { 834 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
826 put_page(page); 835 put_page(page);
827 count_vm_event(THP_FAULT_FALLBACK); 836 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 837 return VM_FAULT_FALLBACK;
@@ -1080,6 +1089,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1080 unsigned long haddr; 1089 unsigned long haddr;
1081 unsigned long mmun_start; /* For mmu_notifiers */ 1090 unsigned long mmun_start; /* For mmu_notifiers */
1082 unsigned long mmun_end; /* For mmu_notifiers */ 1091 unsigned long mmun_end; /* For mmu_notifiers */
1092 gfp_t huge_gfp; /* for allocation and charge */
1083 1093
1084 ptl = pmd_lockptr(mm, pmd); 1094 ptl = pmd_lockptr(mm, pmd);
1085 VM_BUG_ON_VMA(!vma->anon_vma, vma); 1095 VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1106,10 +1116,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1106alloc: 1116alloc:
1107 if (transparent_hugepage_enabled(vma) && 1117 if (transparent_hugepage_enabled(vma) &&
1108 !transparent_hugepage_debug_cow()) { 1118 !transparent_hugepage_debug_cow()) {
1109 gfp_t gfp; 1119 huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1110 1120 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1111 gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1112 new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
1113 } else 1121 } else
1114 new_page = NULL; 1122 new_page = NULL;
1115 1123
@@ -1130,8 +1138,7 @@ alloc:
1130 goto out; 1138 goto out;
1131 } 1139 }
1132 1140
1133 if (unlikely(mem_cgroup_try_charge(new_page, mm, 1141 if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
1134 GFP_TRANSHUGE, &memcg))) {
1135 put_page(new_page); 1142 put_page(new_page);
1136 if (page) { 1143 if (page) {
1137 split_huge_page(page); 1144 split_huge_page(page);
@@ -1976,6 +1983,11 @@ static int __init khugepaged_slab_init(void)
1976 return 0; 1983 return 0;
1977} 1984}
1978 1985
1986static void __init khugepaged_slab_exit(void)
1987{
1988 kmem_cache_destroy(mm_slot_cache);
1989}
1990
1979static inline struct mm_slot *alloc_mm_slot(void) 1991static inline struct mm_slot *alloc_mm_slot(void)
1980{ 1992{
1981 if (!mm_slot_cache) /* initialization failed */ 1993 if (!mm_slot_cache) /* initialization failed */
@@ -2323,19 +2335,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2323 return true; 2335 return true;
2324} 2336}
2325 2337
2326static struct page 2338static struct page *
2327*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2339khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2328 struct vm_area_struct *vma, unsigned long address, 2340 struct vm_area_struct *vma, unsigned long address,
2329 int node) 2341 int node)
2330{ 2342{
2331 gfp_t flags;
2332
2333 VM_BUG_ON_PAGE(*hpage, *hpage); 2343 VM_BUG_ON_PAGE(*hpage, *hpage);
2334 2344
2335 /* Only allocate from the target node */
2336 flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2337 __GFP_THISNODE;
2338
2339 /* 2345 /*
2340 * Before allocating the hugepage, release the mmap_sem read lock. 2346 * Before allocating the hugepage, release the mmap_sem read lock.
2341 * The allocation can take potentially a long time if it involves 2347 * The allocation can take potentially a long time if it involves
@@ -2344,7 +2350,7 @@ static struct page
2344 */ 2350 */
2345 up_read(&mm->mmap_sem); 2351 up_read(&mm->mmap_sem);
2346 2352
2347 *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); 2353 *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
2348 if (unlikely(!*hpage)) { 2354 if (unlikely(!*hpage)) {
2349 count_vm_event(THP_COLLAPSE_ALLOC_FAILED); 2355 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2350 *hpage = ERR_PTR(-ENOMEM); 2356 *hpage = ERR_PTR(-ENOMEM);
@@ -2397,13 +2403,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2397 return true; 2403 return true;
2398} 2404}
2399 2405
2400static struct page 2406static struct page *
2401*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2407khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2402 struct vm_area_struct *vma, unsigned long address, 2408 struct vm_area_struct *vma, unsigned long address,
2403 int node) 2409 int node)
2404{ 2410{
2405 up_read(&mm->mmap_sem); 2411 up_read(&mm->mmap_sem);
2406 VM_BUG_ON(!*hpage); 2412 VM_BUG_ON(!*hpage);
2413
2407 return *hpage; 2414 return *hpage;
2408} 2415}
2409#endif 2416#endif
@@ -2438,16 +2445,21 @@ static void collapse_huge_page(struct mm_struct *mm,
2438 struct mem_cgroup *memcg; 2445 struct mem_cgroup *memcg;
2439 unsigned long mmun_start; /* For mmu_notifiers */ 2446 unsigned long mmun_start; /* For mmu_notifiers */
2440 unsigned long mmun_end; /* For mmu_notifiers */ 2447 unsigned long mmun_end; /* For mmu_notifiers */
2448 gfp_t gfp;
2441 2449
2442 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 2450 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
2443 2451
2452 /* Only allocate from the target node */
2453 gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2454 __GFP_THISNODE;
2455
2444 /* release the mmap_sem read lock. */ 2456 /* release the mmap_sem read lock. */
2445 new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); 2457 new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
2446 if (!new_page) 2458 if (!new_page)
2447 return; 2459 return;
2448 2460
2449 if (unlikely(mem_cgroup_try_charge(new_page, mm, 2461 if (unlikely(mem_cgroup_try_charge(new_page, mm,
2450 GFP_TRANSHUGE, &memcg))) 2462 gfp, &memcg)))
2451 return; 2463 return;
2452 2464
2453 /* 2465 /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8874c8ad55aa..271e4432734c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -61,6 +61,9 @@ DEFINE_SPINLOCK(hugetlb_lock);
61static int num_fault_mutexes; 61static int num_fault_mutexes;
62static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp; 62static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
63 63
64/* Forward declaration */
65static int hugetlb_acct_memory(struct hstate *h, long delta);
66
64static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) 67static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
65{ 68{
66 bool free = (spool->count == 0) && (spool->used_hpages == 0); 69 bool free = (spool->count == 0) && (spool->used_hpages == 0);
@@ -68,23 +71,36 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
68 spin_unlock(&spool->lock); 71 spin_unlock(&spool->lock);
69 72
70 /* If no pages are used, and no other handles to the subpool 73 /* If no pages are used, and no other handles to the subpool
71 * remain, free the subpool the subpool remain */ 74 * remain, give up any reservations mased on minimum size and
72 if (free) 75 * free the subpool */
76 if (free) {
77 if (spool->min_hpages != -1)
78 hugetlb_acct_memory(spool->hstate,
79 -spool->min_hpages);
73 kfree(spool); 80 kfree(spool);
81 }
74} 82}
75 83
76struct hugepage_subpool *hugepage_new_subpool(long nr_blocks) 84struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
85 long min_hpages)
77{ 86{
78 struct hugepage_subpool *spool; 87 struct hugepage_subpool *spool;
79 88
80 spool = kmalloc(sizeof(*spool), GFP_KERNEL); 89 spool = kzalloc(sizeof(*spool), GFP_KERNEL);
81 if (!spool) 90 if (!spool)
82 return NULL; 91 return NULL;
83 92
84 spin_lock_init(&spool->lock); 93 spin_lock_init(&spool->lock);
85 spool->count = 1; 94 spool->count = 1;
86 spool->max_hpages = nr_blocks; 95 spool->max_hpages = max_hpages;
87 spool->used_hpages = 0; 96 spool->hstate = h;
97 spool->min_hpages = min_hpages;
98
99 if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
100 kfree(spool);
101 return NULL;
102 }
103 spool->rsv_hpages = min_hpages;
88 104
89 return spool; 105 return spool;
90} 106}
@@ -97,36 +113,89 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
97 unlock_or_release_subpool(spool); 113 unlock_or_release_subpool(spool);
98} 114}
99 115
100static int hugepage_subpool_get_pages(struct hugepage_subpool *spool, 116/*
117 * Subpool accounting for allocating and reserving pages.
118 * Return -ENOMEM if there are not enough resources to satisfy the
119 * the request. Otherwise, return the number of pages by which the
120 * global pools must be adjusted (upward). The returned value may
121 * only be different than the passed value (delta) in the case where
122 * a subpool minimum size must be manitained.
123 */
124static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
101 long delta) 125 long delta)
102{ 126{
103 int ret = 0; 127 long ret = delta;
104 128
105 if (!spool) 129 if (!spool)
106 return 0; 130 return ret;
107 131
108 spin_lock(&spool->lock); 132 spin_lock(&spool->lock);
109 if ((spool->used_hpages + delta) <= spool->max_hpages) { 133
110 spool->used_hpages += delta; 134 if (spool->max_hpages != -1) { /* maximum size accounting */
111 } else { 135 if ((spool->used_hpages + delta) <= spool->max_hpages)
112 ret = -ENOMEM; 136 spool->used_hpages += delta;
137 else {
138 ret = -ENOMEM;
139 goto unlock_ret;
140 }
141 }
142
143 if (spool->min_hpages != -1) { /* minimum size accounting */
144 if (delta > spool->rsv_hpages) {
145 /*
146 * Asking for more reserves than those already taken on
147 * behalf of subpool. Return difference.
148 */
149 ret = delta - spool->rsv_hpages;
150 spool->rsv_hpages = 0;
151 } else {
152 ret = 0; /* reserves already accounted for */
153 spool->rsv_hpages -= delta;
154 }
113 } 155 }
114 spin_unlock(&spool->lock);
115 156
157unlock_ret:
158 spin_unlock(&spool->lock);
116 return ret; 159 return ret;
117} 160}
118 161
119static void hugepage_subpool_put_pages(struct hugepage_subpool *spool, 162/*
163 * Subpool accounting for freeing and unreserving pages.
164 * Return the number of global page reservations that must be dropped.
165 * The return value may only be different than the passed value (delta)
166 * in the case where a subpool minimum size must be maintained.
167 */
168static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
120 long delta) 169 long delta)
121{ 170{
171 long ret = delta;
172
122 if (!spool) 173 if (!spool)
123 return; 174 return delta;
124 175
125 spin_lock(&spool->lock); 176 spin_lock(&spool->lock);
126 spool->used_hpages -= delta; 177
127 /* If hugetlbfs_put_super couldn't free spool due to 178 if (spool->max_hpages != -1) /* maximum size accounting */
128 * an outstanding quota reference, free it now. */ 179 spool->used_hpages -= delta;
180
181 if (spool->min_hpages != -1) { /* minimum size accounting */
182 if (spool->rsv_hpages + delta <= spool->min_hpages)
183 ret = 0;
184 else
185 ret = spool->rsv_hpages + delta - spool->min_hpages;
186
187 spool->rsv_hpages += delta;
188 if (spool->rsv_hpages > spool->min_hpages)
189 spool->rsv_hpages = spool->min_hpages;
190 }
191
192 /*
193 * If hugetlbfs_put_super couldn't free spool due to an outstanding
194 * quota reference, free it now.
195 */
129 unlock_or_release_subpool(spool); 196 unlock_or_release_subpool(spool);
197
198 return ret;
130} 199}
131 200
132static inline struct hugepage_subpool *subpool_inode(struct inode *inode) 201static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
@@ -855,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size)
855 return NULL; 924 return NULL;
856} 925}
857 926
927/*
928 * Test to determine whether the hugepage is "active/in-use" (i.e. being linked
929 * to hstate->hugepage_activelist.)
930 *
931 * This function can be called for tail pages, but never returns true for them.
932 */
933bool page_huge_active(struct page *page)
934{
935 VM_BUG_ON_PAGE(!PageHuge(page), page);
936 return PageHead(page) && PagePrivate(&page[1]);
937}
938
939/* never called for tail page */
940static void set_page_huge_active(struct page *page)
941{
942 VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
943 SetPagePrivate(&page[1]);
944}
945
946static void clear_page_huge_active(struct page *page)
947{
948 VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
949 ClearPagePrivate(&page[1]);
950}
951
858void free_huge_page(struct page *page) 952void free_huge_page(struct page *page)
859{ 953{
860 /* 954 /*
@@ -874,7 +968,16 @@ void free_huge_page(struct page *page)
874 restore_reserve = PagePrivate(page); 968 restore_reserve = PagePrivate(page);
875 ClearPagePrivate(page); 969 ClearPagePrivate(page);
876 970
971 /*
972 * A return code of zero implies that the subpool will be under its
973 * minimum size if the reservation is not restored after page is free.
974 * Therefore, force restore_reserve operation.
975 */
976 if (hugepage_subpool_put_pages(spool, 1) == 0)
977 restore_reserve = true;
978
877 spin_lock(&hugetlb_lock); 979 spin_lock(&hugetlb_lock);
980 clear_page_huge_active(page);
878 hugetlb_cgroup_uncharge_page(hstate_index(h), 981 hugetlb_cgroup_uncharge_page(hstate_index(h),
879 pages_per_huge_page(h), page); 982 pages_per_huge_page(h), page);
880 if (restore_reserve) 983 if (restore_reserve)
@@ -891,7 +994,6 @@ void free_huge_page(struct page *page)
891 enqueue_huge_page(h, page); 994 enqueue_huge_page(h, page);
892 } 995 }
893 spin_unlock(&hugetlb_lock); 996 spin_unlock(&hugetlb_lock);
894 hugepage_subpool_put_pages(spool, 1);
895} 997}
896 998
897static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) 999static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -1386,7 +1488,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
1386 if (chg < 0) 1488 if (chg < 0)
1387 return ERR_PTR(-ENOMEM); 1489 return ERR_PTR(-ENOMEM);
1388 if (chg || avoid_reserve) 1490 if (chg || avoid_reserve)
1389 if (hugepage_subpool_get_pages(spool, 1)) 1491 if (hugepage_subpool_get_pages(spool, 1) < 0)
1390 return ERR_PTR(-ENOSPC); 1492 return ERR_PTR(-ENOSPC);
1391 1493
1392 ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); 1494 ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
@@ -2454,6 +2556,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2454 struct resv_map *resv = vma_resv_map(vma); 2556 struct resv_map *resv = vma_resv_map(vma);
2455 struct hugepage_subpool *spool = subpool_vma(vma); 2557 struct hugepage_subpool *spool = subpool_vma(vma);
2456 unsigned long reserve, start, end; 2558 unsigned long reserve, start, end;
2559 long gbl_reserve;
2457 2560
2458 if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER)) 2561 if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
2459 return; 2562 return;
@@ -2466,8 +2569,12 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2466 kref_put(&resv->refs, resv_map_release); 2569 kref_put(&resv->refs, resv_map_release);
2467 2570
2468 if (reserve) { 2571 if (reserve) {
2469 hugetlb_acct_memory(h, -reserve); 2572 /*
2470 hugepage_subpool_put_pages(spool, reserve); 2573 * Decrement reserve counts. The global reserve count may be
2574 * adjusted if the subpool has a minimum size.
2575 */
2576 gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
2577 hugetlb_acct_memory(h, -gbl_reserve);
2471 } 2578 }
2472} 2579}
2473 2580
@@ -2891,6 +2998,7 @@ retry_avoidcopy:
2891 copy_user_huge_page(new_page, old_page, address, vma, 2998 copy_user_huge_page(new_page, old_page, address, vma,
2892 pages_per_huge_page(h)); 2999 pages_per_huge_page(h));
2893 __SetPageUptodate(new_page); 3000 __SetPageUptodate(new_page);
3001 set_page_huge_active(new_page);
2894 3002
2895 mmun_start = address & huge_page_mask(h); 3003 mmun_start = address & huge_page_mask(h);
2896 mmun_end = mmun_start + huge_page_size(h); 3004 mmun_end = mmun_start + huge_page_size(h);
@@ -3003,6 +3111,7 @@ retry:
3003 } 3111 }
3004 clear_huge_page(page, address, pages_per_huge_page(h)); 3112 clear_huge_page(page, address, pages_per_huge_page(h));
3005 __SetPageUptodate(page); 3113 __SetPageUptodate(page);
3114 set_page_huge_active(page);
3006 3115
3007 if (vma->vm_flags & VM_MAYSHARE) { 3116 if (vma->vm_flags & VM_MAYSHARE) {
3008 int err; 3117 int err;
@@ -3447,6 +3556,7 @@ int hugetlb_reserve_pages(struct inode *inode,
3447 struct hstate *h = hstate_inode(inode); 3556 struct hstate *h = hstate_inode(inode);
3448 struct hugepage_subpool *spool = subpool_inode(inode); 3557 struct hugepage_subpool *spool = subpool_inode(inode);
3449 struct resv_map *resv_map; 3558 struct resv_map *resv_map;
3559 long gbl_reserve;
3450 3560
3451 /* 3561 /*
3452 * Only apply hugepage reservation if asked. At fault time, an 3562 * Only apply hugepage reservation if asked. At fault time, an
@@ -3483,8 +3593,13 @@ int hugetlb_reserve_pages(struct inode *inode,
3483 goto out_err; 3593 goto out_err;
3484 } 3594 }
3485 3595
3486 /* There must be enough pages in the subpool for the mapping */ 3596 /*
3487 if (hugepage_subpool_get_pages(spool, chg)) { 3597 * There must be enough pages in the subpool for the mapping. If
3598 * the subpool has a minimum size, there may be some global
3599 * reservations already in place (gbl_reserve).
3600 */
3601 gbl_reserve = hugepage_subpool_get_pages(spool, chg);
3602 if (gbl_reserve < 0) {
3488 ret = -ENOSPC; 3603 ret = -ENOSPC;
3489 goto out_err; 3604 goto out_err;
3490 } 3605 }
@@ -3493,9 +3608,10 @@ int hugetlb_reserve_pages(struct inode *inode,
3493 * Check enough hugepages are available for the reservation. 3608 * Check enough hugepages are available for the reservation.
3494 * Hand the pages back to the subpool if there are not 3609 * Hand the pages back to the subpool if there are not
3495 */ 3610 */
3496 ret = hugetlb_acct_memory(h, chg); 3611 ret = hugetlb_acct_memory(h, gbl_reserve);
3497 if (ret < 0) { 3612 if (ret < 0) {
3498 hugepage_subpool_put_pages(spool, chg); 3613 /* put back original number of pages, chg */
3614 (void)hugepage_subpool_put_pages(spool, chg);
3499 goto out_err; 3615 goto out_err;
3500 } 3616 }
3501 3617
@@ -3525,6 +3641,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
3525 struct resv_map *resv_map = inode_resv_map(inode); 3641 struct resv_map *resv_map = inode_resv_map(inode);
3526 long chg = 0; 3642 long chg = 0;
3527 struct hugepage_subpool *spool = subpool_inode(inode); 3643 struct hugepage_subpool *spool = subpool_inode(inode);
3644 long gbl_reserve;
3528 3645
3529 if (resv_map) 3646 if (resv_map)
3530 chg = region_truncate(resv_map, offset); 3647 chg = region_truncate(resv_map, offset);
@@ -3532,8 +3649,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
3532 inode->i_blocks -= (blocks_per_huge_page(h) * freed); 3649 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
3533 spin_unlock(&inode->i_lock); 3650 spin_unlock(&inode->i_lock);
3534 3651
3535 hugepage_subpool_put_pages(spool, (chg - freed)); 3652 /*
3536 hugetlb_acct_memory(h, -(chg - freed)); 3653 * If the subpool has a minimum size, the number of global
3654 * reservations to be released may be adjusted.
3655 */
3656 gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
3657 hugetlb_acct_memory(h, -gbl_reserve);
3537} 3658}
3538 3659
3539#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 3660#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@@ -3775,20 +3896,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
3775 3896
3776#ifdef CONFIG_MEMORY_FAILURE 3897#ifdef CONFIG_MEMORY_FAILURE
3777 3898
3778/* Should be called in hugetlb_lock */
3779static int is_hugepage_on_freelist(struct page *hpage)
3780{
3781 struct page *page;
3782 struct page *tmp;
3783 struct hstate *h = page_hstate(hpage);
3784 int nid = page_to_nid(hpage);
3785
3786 list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
3787 if (page == hpage)
3788 return 1;
3789 return 0;
3790}
3791
3792/* 3899/*
3793 * This function is called from memory failure code. 3900 * This function is called from memory failure code.
3794 * Assume the caller holds page lock of the head page. 3901 * Assume the caller holds page lock of the head page.
@@ -3800,7 +3907,11 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
3800 int ret = -EBUSY; 3907 int ret = -EBUSY;
3801 3908
3802 spin_lock(&hugetlb_lock); 3909 spin_lock(&hugetlb_lock);
3803 if (is_hugepage_on_freelist(hpage)) { 3910 /*
3911 * Just checking !page_huge_active is not enough, because that could be
3912 * an isolated/hwpoisoned hugepage (which have >0 refcount).
3913 */
3914 if (!page_huge_active(hpage) && !page_count(hpage)) {
3804 /* 3915 /*
3805 * Hwpoisoned hugepage isn't linked to activelist or freelist, 3916 * Hwpoisoned hugepage isn't linked to activelist or freelist,
3806 * but dangling hpage->lru can trigger list-debug warnings 3917 * but dangling hpage->lru can trigger list-debug warnings
@@ -3820,42 +3931,27 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
3820 3931
3821bool isolate_huge_page(struct page *page, struct list_head *list) 3932bool isolate_huge_page(struct page *page, struct list_head *list)
3822{ 3933{
3934 bool ret = true;
3935
3823 VM_BUG_ON_PAGE(!PageHead(page), page); 3936 VM_BUG_ON_PAGE(!PageHead(page), page);
3824 if (!get_page_unless_zero(page))
3825 return false;
3826 spin_lock(&hugetlb_lock); 3937 spin_lock(&hugetlb_lock);
3938 if (!page_huge_active(page) || !get_page_unless_zero(page)) {
3939 ret = false;
3940 goto unlock;
3941 }
3942 clear_page_huge_active(page);
3827 list_move_tail(&page->lru, list); 3943 list_move_tail(&page->lru, list);
3944unlock:
3828 spin_unlock(&hugetlb_lock); 3945 spin_unlock(&hugetlb_lock);
3829 return true; 3946 return ret;
3830} 3947}
3831 3948
3832void putback_active_hugepage(struct page *page) 3949void putback_active_hugepage(struct page *page)
3833{ 3950{
3834 VM_BUG_ON_PAGE(!PageHead(page), page); 3951 VM_BUG_ON_PAGE(!PageHead(page), page);
3835 spin_lock(&hugetlb_lock); 3952 spin_lock(&hugetlb_lock);
3953 set_page_huge_active(page);
3836 list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); 3954 list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
3837 spin_unlock(&hugetlb_lock); 3955 spin_unlock(&hugetlb_lock);
3838 put_page(page); 3956 put_page(page);
3839} 3957}
3840
3841bool is_hugepage_active(struct page *page)
3842{
3843 VM_BUG_ON_PAGE(!PageHuge(page), page);
3844 /*
3845 * This function can be called for a tail page because the caller,
3846 * scan_movable_pages, scans through a given pfn-range which typically
3847 * covers one memory block. In systems using gigantic hugepage (1GB
3848 * for x86_64,) a hugepage is larger than a memory block, and we don't
3849 * support migrating such large hugepages for now, so return false
3850 * when called for tail pages.
3851 */
3852 if (PageTail(page))
3853 return false;
3854 /*
3855 * Refcount of a hwpoisoned hugepages is 1, but they are not active,
3856 * so we should return false for them.
3857 */
3858 if (unlikely(PageHWPoison(page)))
3859 return false;
3860 return page_count(page) > 0;
3861}
diff --git a/mm/internal.h b/mm/internal.h
index edaab69a9c35..a25e359a4039 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -224,13 +224,13 @@ static inline unsigned long page_order(struct page *page)
224 * PageBuddy() should be checked first by the caller to minimize race window, 224 * PageBuddy() should be checked first by the caller to minimize race window,
225 * and invalid values must be handled gracefully. 225 * and invalid values must be handled gracefully.
226 * 226 *
227 * ACCESS_ONCE is used so that if the caller assigns the result into a local 227 * READ_ONCE is used so that if the caller assigns the result into a local
228 * variable and e.g. tests it for valid range before using, the compiler cannot 228 * variable and e.g. tests it for valid range before using, the compiler cannot
229 * decide to remove the variable and inline the page_private(page) multiple 229 * decide to remove the variable and inline the page_private(page) multiple
230 * times, potentially observing different values in the tests and the actual 230 * times, potentially observing different values in the tests and the actual
231 * use of the result. 231 * use of the result.
232 */ 232 */
233#define page_order_unsafe(page) ACCESS_ONCE(page_private(page)) 233#define page_order_unsafe(page) READ_ONCE(page_private(page))
234 234
235static inline bool is_cow_mapping(vm_flags_t flags) 235static inline bool is_cow_mapping(vm_flags_t flags)
236{ 236{
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 936d81661c47..6c513a63ea84 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -389,6 +389,19 @@ void kasan_krealloc(const void *object, size_t size)
389 kasan_kmalloc(page->slab_cache, object, size); 389 kasan_kmalloc(page->slab_cache, object, size);
390} 390}
391 391
392void kasan_kfree(void *ptr)
393{
394 struct page *page;
395
396 page = virt_to_head_page(ptr);
397
398 if (unlikely(!PageSlab(page)))
399 kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
400 KASAN_FREE_PAGE);
401 else
402 kasan_slab_free(page->slab_cache, ptr);
403}
404
392void kasan_kfree_large(const void *ptr) 405void kasan_kfree_large(const void *ptr)
393{ 406{
394 struct page *page = virt_to_page(ptr); 407 struct page *page = virt_to_page(ptr);
diff --git a/mm/ksm.c b/mm/ksm.c
index 4162dce2eb44..7ee101eaacdf 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -542,7 +542,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
542 expected_mapping = (void *)stable_node + 542 expected_mapping = (void *)stable_node +
543 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); 543 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
544again: 544again:
545 kpfn = ACCESS_ONCE(stable_node->kpfn); 545 kpfn = READ_ONCE(stable_node->kpfn);
546 page = pfn_to_page(kpfn); 546 page = pfn_to_page(kpfn);
547 547
548 /* 548 /*
@@ -551,7 +551,7 @@ again:
551 * but on Alpha we need to be more careful. 551 * but on Alpha we need to be more careful.
552 */ 552 */
553 smp_read_barrier_depends(); 553 smp_read_barrier_depends();
554 if (ACCESS_ONCE(page->mapping) != expected_mapping) 554 if (READ_ONCE(page->mapping) != expected_mapping)
555 goto stale; 555 goto stale;
556 556
557 /* 557 /*
@@ -577,14 +577,14 @@ again:
577 cpu_relax(); 577 cpu_relax();
578 } 578 }
579 579
580 if (ACCESS_ONCE(page->mapping) != expected_mapping) { 580 if (READ_ONCE(page->mapping) != expected_mapping) {
581 put_page(page); 581 put_page(page);
582 goto stale; 582 goto stale;
583 } 583 }
584 584
585 if (lock_it) { 585 if (lock_it) {
586 lock_page(page); 586 lock_page(page);
587 if (ACCESS_ONCE(page->mapping) != expected_mapping) { 587 if (READ_ONCE(page->mapping) != expected_mapping) {
588 unlock_page(page); 588 unlock_page(page);
589 put_page(page); 589 put_page(page);
590 goto stale; 590 goto stale;
@@ -600,7 +600,7 @@ stale:
600 * before checking whether node->kpfn has been changed. 600 * before checking whether node->kpfn has been changed.
601 */ 601 */
602 smp_rmb(); 602 smp_rmb();
603 if (ACCESS_ONCE(stable_node->kpfn) != kpfn) 603 if (READ_ONCE(stable_node->kpfn) != kpfn)
604 goto again; 604 goto again;
605 remove_node_from_stable_tree(stable_node); 605 remove_node_from_stable_tree(stable_node);
606 return NULL; 606 return NULL;
diff --git a/mm/memblock.c b/mm/memblock.c
index 3f37a0bca5d5..9318b567ed79 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -580,10 +580,24 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
580 return memblock_add_range(&memblock.memory, base, size, nid, 0); 580 return memblock_add_range(&memblock.memory, base, size, nid, 0);
581} 581}
582 582
583static int __init_memblock memblock_add_region(phys_addr_t base,
584 phys_addr_t size,
585 int nid,
586 unsigned long flags)
587{
588 struct memblock_type *_rgn = &memblock.memory;
589
590 memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n",
591 (unsigned long long)base,
592 (unsigned long long)base + size - 1,
593 flags, (void *)_RET_IP_);
594
595 return memblock_add_range(_rgn, base, size, nid, flags);
596}
597
583int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 598int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
584{ 599{
585 return memblock_add_range(&memblock.memory, base, size, 600 return memblock_add_region(base, size, MAX_NUMNODES, 0);
586 MAX_NUMNODES, 0);
587} 601}
588 602
589/** 603/**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3f09b2dda5f..14c2f2017e37 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -259,11 +259,6 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
259 * page cache and RSS per cgroup. We would eventually like to provide 259 * page cache and RSS per cgroup. We would eventually like to provide
260 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 260 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
261 * to help the administrator determine what knobs to tune. 261 * to help the administrator determine what knobs to tune.
262 *
263 * TODO: Add a water mark for the memory controller. Reclaim will begin when
264 * we hit the water mark. May be even add a low water mark, such that
265 * no reclaim occurs from a cgroup at it's low water mark, this is
266 * a feature that will be implemented much later in the future.
267 */ 262 */
268struct mem_cgroup { 263struct mem_cgroup {
269 struct cgroup_subsys_state css; 264 struct cgroup_subsys_state css;
@@ -460,6 +455,12 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
460 return memcg->css.id; 455 return memcg->css.id;
461} 456}
462 457
458/*
459 * A helper function to get mem_cgroup from ID. must be called under
460 * rcu_read_lock(). The caller is responsible for calling
461 * css_tryget_online() if the mem_cgroup is used for charging. (dropping
462 * refcnt from swap can be called against removed memcg.)
463 */
463static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 464static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
464{ 465{
465 struct cgroup_subsys_state *css; 466 struct cgroup_subsys_state *css;
@@ -673,7 +674,7 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
673static unsigned long soft_limit_excess(struct mem_cgroup *memcg) 674static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
674{ 675{
675 unsigned long nr_pages = page_counter_read(&memcg->memory); 676 unsigned long nr_pages = page_counter_read(&memcg->memory);
676 unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit); 677 unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
677 unsigned long excess = 0; 678 unsigned long excess = 0;
678 679
679 if (nr_pages > soft_limit) 680 if (nr_pages > soft_limit)
@@ -1041,7 +1042,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1041 goto out_unlock; 1042 goto out_unlock;
1042 1043
1043 do { 1044 do {
1044 pos = ACCESS_ONCE(iter->position); 1045 pos = READ_ONCE(iter->position);
1045 /* 1046 /*
1046 * A racing update may change the position and 1047 * A racing update may change the position and
1047 * put the last reference, hence css_tryget(), 1048 * put the last reference, hence css_tryget(),
@@ -1358,13 +1359,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1358 unsigned long limit; 1359 unsigned long limit;
1359 1360
1360 count = page_counter_read(&memcg->memory); 1361 count = page_counter_read(&memcg->memory);
1361 limit = ACCESS_ONCE(memcg->memory.limit); 1362 limit = READ_ONCE(memcg->memory.limit);
1362 if (count < limit) 1363 if (count < limit)
1363 margin = limit - count; 1364 margin = limit - count;
1364 1365
1365 if (do_swap_account) { 1366 if (do_swap_account) {
1366 count = page_counter_read(&memcg->memsw); 1367 count = page_counter_read(&memcg->memsw);
1367 limit = ACCESS_ONCE(memcg->memsw.limit); 1368 limit = READ_ONCE(memcg->memsw.limit);
1368 if (count <= limit) 1369 if (count <= limit)
1369 margin = min(margin, limit - count); 1370 margin = min(margin, limit - count);
1370 } 1371 }
@@ -2349,20 +2350,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2349} 2350}
2350 2351
2351/* 2352/*
2352 * A helper function to get mem_cgroup from ID. must be called under
2353 * rcu_read_lock(). The caller is responsible for calling
2354 * css_tryget_online() if the mem_cgroup is used for charging. (dropping
2355 * refcnt from swap can be called against removed memcg.)
2356 */
2357static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2358{
2359 /* ID 0 is unused ID */
2360 if (!id)
2361 return NULL;
2362 return mem_cgroup_from_id(id);
2363}
2364
2365/*
2366 * try_get_mem_cgroup_from_page - look up page's memcg association 2353 * try_get_mem_cgroup_from_page - look up page's memcg association
2367 * @page: the page 2354 * @page: the page
2368 * 2355 *
@@ -2388,7 +2375,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2388 ent.val = page_private(page); 2375 ent.val = page_private(page);
2389 id = lookup_swap_cgroup_id(ent); 2376 id = lookup_swap_cgroup_id(ent);
2390 rcu_read_lock(); 2377 rcu_read_lock();
2391 memcg = mem_cgroup_lookup(id); 2378 memcg = mem_cgroup_from_id(id);
2392 if (memcg && !css_tryget_online(&memcg->css)) 2379 if (memcg && !css_tryget_online(&memcg->css))
2393 memcg = NULL; 2380 memcg = NULL;
2394 rcu_read_unlock(); 2381 rcu_read_unlock();
@@ -2650,7 +2637,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
2650 return cachep; 2637 return cachep;
2651 2638
2652 memcg = get_mem_cgroup_from_mm(current->mm); 2639 memcg = get_mem_cgroup_from_mm(current->mm);
2653 kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id); 2640 kmemcg_id = READ_ONCE(memcg->kmemcg_id);
2654 if (kmemcg_id < 0) 2641 if (kmemcg_id < 0)
2655 goto out; 2642 goto out;
2656 2643
@@ -5020,7 +5007,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
5020 * tunable will only affect upcoming migrations, not the current one. 5007 * tunable will only affect upcoming migrations, not the current one.
5021 * So we need to save it, and keep it going. 5008 * So we need to save it, and keep it going.
5022 */ 5009 */
5023 move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate); 5010 move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
5024 if (move_flags) { 5011 if (move_flags) {
5025 struct mm_struct *mm; 5012 struct mm_struct *mm;
5026 struct mem_cgroup *from = mem_cgroup_from_task(p); 5013 struct mem_cgroup *from = mem_cgroup_from_task(p);
@@ -5254,7 +5241,7 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
5254static int memory_low_show(struct seq_file *m, void *v) 5241static int memory_low_show(struct seq_file *m, void *v)
5255{ 5242{
5256 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5243 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5257 unsigned long low = ACCESS_ONCE(memcg->low); 5244 unsigned long low = READ_ONCE(memcg->low);
5258 5245
5259 if (low == PAGE_COUNTER_MAX) 5246 if (low == PAGE_COUNTER_MAX)
5260 seq_puts(m, "max\n"); 5247 seq_puts(m, "max\n");
@@ -5284,7 +5271,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
5284static int memory_high_show(struct seq_file *m, void *v) 5271static int memory_high_show(struct seq_file *m, void *v)
5285{ 5272{
5286 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5273 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5287 unsigned long high = ACCESS_ONCE(memcg->high); 5274 unsigned long high = READ_ONCE(memcg->high);
5288 5275
5289 if (high == PAGE_COUNTER_MAX) 5276 if (high == PAGE_COUNTER_MAX)
5290 seq_puts(m, "max\n"); 5277 seq_puts(m, "max\n");
@@ -5314,7 +5301,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
5314static int memory_max_show(struct seq_file *m, void *v) 5301static int memory_max_show(struct seq_file *m, void *v)
5315{ 5302{
5316 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5303 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5317 unsigned long max = ACCESS_ONCE(memcg->memory.limit); 5304 unsigned long max = READ_ONCE(memcg->memory.limit);
5318 5305
5319 if (max == PAGE_COUNTER_MAX) 5306 if (max == PAGE_COUNTER_MAX)
5320 seq_puts(m, "max\n"); 5307 seq_puts(m, "max\n");
@@ -5869,7 +5856,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
5869 5856
5870 id = swap_cgroup_record(entry, 0); 5857 id = swap_cgroup_record(entry, 0);
5871 rcu_read_lock(); 5858 rcu_read_lock();
5872 memcg = mem_cgroup_lookup(id); 5859 memcg = mem_cgroup_from_id(id);
5873 if (memcg) { 5860 if (memcg) {
5874 if (!mem_cgroup_is_root(memcg)) 5861 if (!mem_cgroup_is_root(memcg))
5875 page_counter_uncharge(&memcg->memsw, 1); 5862 page_counter_uncharge(&memcg->memsw, 1);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d487f8dc6d39..d9359b770cd9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -521,6 +521,52 @@ static const char *action_name[] = {
521 [RECOVERED] = "Recovered", 521 [RECOVERED] = "Recovered",
522}; 522};
523 523
524enum action_page_type {
525 MSG_KERNEL,
526 MSG_KERNEL_HIGH_ORDER,
527 MSG_SLAB,
528 MSG_DIFFERENT_COMPOUND,
529 MSG_POISONED_HUGE,
530 MSG_HUGE,
531 MSG_FREE_HUGE,
532 MSG_UNMAP_FAILED,
533 MSG_DIRTY_SWAPCACHE,
534 MSG_CLEAN_SWAPCACHE,
535 MSG_DIRTY_MLOCKED_LRU,
536 MSG_CLEAN_MLOCKED_LRU,
537 MSG_DIRTY_UNEVICTABLE_LRU,
538 MSG_CLEAN_UNEVICTABLE_LRU,
539 MSG_DIRTY_LRU,
540 MSG_CLEAN_LRU,
541 MSG_TRUNCATED_LRU,
542 MSG_BUDDY,
543 MSG_BUDDY_2ND,
544 MSG_UNKNOWN,
545};
546
547static const char * const action_page_types[] = {
548 [MSG_KERNEL] = "reserved kernel page",
549 [MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
550 [MSG_SLAB] = "kernel slab page",
551 [MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
552 [MSG_POISONED_HUGE] = "huge page already hardware poisoned",
553 [MSG_HUGE] = "huge page",
554 [MSG_FREE_HUGE] = "free huge page",
555 [MSG_UNMAP_FAILED] = "unmapping failed page",
556 [MSG_DIRTY_SWAPCACHE] = "dirty swapcache page",
557 [MSG_CLEAN_SWAPCACHE] = "clean swapcache page",
558 [MSG_DIRTY_MLOCKED_LRU] = "dirty mlocked LRU page",
559 [MSG_CLEAN_MLOCKED_LRU] = "clean mlocked LRU page",
560 [MSG_DIRTY_UNEVICTABLE_LRU] = "dirty unevictable LRU page",
561 [MSG_CLEAN_UNEVICTABLE_LRU] = "clean unevictable LRU page",
562 [MSG_DIRTY_LRU] = "dirty LRU page",
563 [MSG_CLEAN_LRU] = "clean LRU page",
564 [MSG_TRUNCATED_LRU] = "already truncated LRU page",
565 [MSG_BUDDY] = "free buddy page",
566 [MSG_BUDDY_2ND] = "free buddy page (2nd try)",
567 [MSG_UNKNOWN] = "unknown page",
568};
569
524/* 570/*
525 * XXX: It is possible that a page is isolated from LRU cache, 571 * XXX: It is possible that a page is isolated from LRU cache,
526 * and then kept in swap cache or failed to remove from page cache. 572 * and then kept in swap cache or failed to remove from page cache.
@@ -777,10 +823,10 @@ static int me_huge_page(struct page *p, unsigned long pfn)
777static struct page_state { 823static struct page_state {
778 unsigned long mask; 824 unsigned long mask;
779 unsigned long res; 825 unsigned long res;
780 char *msg; 826 enum action_page_type type;
781 int (*action)(struct page *p, unsigned long pfn); 827 int (*action)(struct page *p, unsigned long pfn);
782} error_states[] = { 828} error_states[] = {
783 { reserved, reserved, "reserved kernel", me_kernel }, 829 { reserved, reserved, MSG_KERNEL, me_kernel },
784 /* 830 /*
785 * free pages are specially detected outside this table: 831 * free pages are specially detected outside this table:
786 * PG_buddy pages only make a small fraction of all free pages. 832 * PG_buddy pages only make a small fraction of all free pages.
@@ -791,31 +837,31 @@ static struct page_state {
791 * currently unused objects without touching them. But just 837 * currently unused objects without touching them. But just
792 * treat it as standard kernel for now. 838 * treat it as standard kernel for now.
793 */ 839 */
794 { slab, slab, "kernel slab", me_kernel }, 840 { slab, slab, MSG_SLAB, me_kernel },
795 841
796#ifdef CONFIG_PAGEFLAGS_EXTENDED 842#ifdef CONFIG_PAGEFLAGS_EXTENDED
797 { head, head, "huge", me_huge_page }, 843 { head, head, MSG_HUGE, me_huge_page },
798 { tail, tail, "huge", me_huge_page }, 844 { tail, tail, MSG_HUGE, me_huge_page },
799#else 845#else
800 { compound, compound, "huge", me_huge_page }, 846 { compound, compound, MSG_HUGE, me_huge_page },
801#endif 847#endif
802 848
803 { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, 849 { sc|dirty, sc|dirty, MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
804 { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, 850 { sc|dirty, sc, MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
805 851
806 { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, 852 { mlock|dirty, mlock|dirty, MSG_DIRTY_MLOCKED_LRU, me_pagecache_dirty },
807 { mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean }, 853 { mlock|dirty, mlock, MSG_CLEAN_MLOCKED_LRU, me_pagecache_clean },
808 854
809 { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, 855 { unevict|dirty, unevict|dirty, MSG_DIRTY_UNEVICTABLE_LRU, me_pagecache_dirty },
810 { unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean }, 856 { unevict|dirty, unevict, MSG_CLEAN_UNEVICTABLE_LRU, me_pagecache_clean },
811 857
812 { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, 858 { lru|dirty, lru|dirty, MSG_DIRTY_LRU, me_pagecache_dirty },
813 { lru|dirty, lru, "clean LRU", me_pagecache_clean }, 859 { lru|dirty, lru, MSG_CLEAN_LRU, me_pagecache_clean },
814 860
815 /* 861 /*
816 * Catchall entry: must be at end. 862 * Catchall entry: must be at end.
817 */ 863 */
818 { 0, 0, "unknown page state", me_unknown }, 864 { 0, 0, MSG_UNKNOWN, me_unknown },
819}; 865};
820 866
821#undef dirty 867#undef dirty
@@ -835,10 +881,10 @@ static struct page_state {
835 * "Dirty/Clean" indication is not 100% accurate due to the possibility of 881 * "Dirty/Clean" indication is not 100% accurate due to the possibility of
836 * setting PG_dirty outside page lock. See also comment above set_page_dirty(). 882 * setting PG_dirty outside page lock. See also comment above set_page_dirty().
837 */ 883 */
838static void action_result(unsigned long pfn, char *msg, int result) 884static void action_result(unsigned long pfn, enum action_page_type type, int result)
839{ 885{
840 pr_err("MCE %#lx: %s page recovery: %s\n", 886 pr_err("MCE %#lx: recovery action for %s: %s\n",
841 pfn, msg, action_name[result]); 887 pfn, action_page_types[type], action_name[result]);
842} 888}
843 889
844static int page_action(struct page_state *ps, struct page *p, 890static int page_action(struct page_state *ps, struct page *p,
@@ -854,11 +900,11 @@ static int page_action(struct page_state *ps, struct page *p,
854 count--; 900 count--;
855 if (count != 0) { 901 if (count != 0) {
856 printk(KERN_ERR 902 printk(KERN_ERR
857 "MCE %#lx: %s page still referenced by %d users\n", 903 "MCE %#lx: %s still referenced by %d users\n",
858 pfn, ps->msg, count); 904 pfn, action_page_types[ps->type], count);
859 result = FAILED; 905 result = FAILED;
860 } 906 }
861 action_result(pfn, ps->msg, result); 907 action_result(pfn, ps->type, result);
862 908
863 /* Could do more checks here if page looks ok */ 909 /* Could do more checks here if page looks ok */
864 /* 910 /*
@@ -1106,7 +1152,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1106 if (!(flags & MF_COUNT_INCREASED) && 1152 if (!(flags & MF_COUNT_INCREASED) &&
1107 !get_page_unless_zero(hpage)) { 1153 !get_page_unless_zero(hpage)) {
1108 if (is_free_buddy_page(p)) { 1154 if (is_free_buddy_page(p)) {
1109 action_result(pfn, "free buddy", DELAYED); 1155 action_result(pfn, MSG_BUDDY, DELAYED);
1110 return 0; 1156 return 0;
1111 } else if (PageHuge(hpage)) { 1157 } else if (PageHuge(hpage)) {
1112 /* 1158 /*
@@ -1123,12 +1169,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1123 } 1169 }
1124 set_page_hwpoison_huge_page(hpage); 1170 set_page_hwpoison_huge_page(hpage);
1125 res = dequeue_hwpoisoned_huge_page(hpage); 1171 res = dequeue_hwpoisoned_huge_page(hpage);
1126 action_result(pfn, "free huge", 1172 action_result(pfn, MSG_FREE_HUGE,
1127 res ? IGNORED : DELAYED); 1173 res ? IGNORED : DELAYED);
1128 unlock_page(hpage); 1174 unlock_page(hpage);
1129 return res; 1175 return res;
1130 } else { 1176 } else {
1131 action_result(pfn, "high order kernel", IGNORED); 1177 action_result(pfn, MSG_KERNEL_HIGH_ORDER, IGNORED);
1132 return -EBUSY; 1178 return -EBUSY;
1133 } 1179 }
1134 } 1180 }
@@ -1150,9 +1196,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1150 */ 1196 */
1151 if (is_free_buddy_page(p)) { 1197 if (is_free_buddy_page(p)) {
1152 if (flags & MF_COUNT_INCREASED) 1198 if (flags & MF_COUNT_INCREASED)
1153 action_result(pfn, "free buddy", DELAYED); 1199 action_result(pfn, MSG_BUDDY, DELAYED);
1154 else 1200 else
1155 action_result(pfn, "free buddy, 2nd try", DELAYED); 1201 action_result(pfn, MSG_BUDDY_2ND,
1202 DELAYED);
1156 return 0; 1203 return 0;
1157 } 1204 }
1158 } 1205 }
@@ -1165,7 +1212,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1165 * If this happens just bail out. 1212 * If this happens just bail out.
1166 */ 1213 */
1167 if (compound_head(p) != hpage) { 1214 if (compound_head(p) != hpage) {
1168 action_result(pfn, "different compound page after locking", IGNORED); 1215 action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED);
1169 res = -EBUSY; 1216 res = -EBUSY;
1170 goto out; 1217 goto out;
1171 } 1218 }
@@ -1205,8 +1252,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1205 * on the head page to show that the hugepage is hwpoisoned 1252 * on the head page to show that the hugepage is hwpoisoned
1206 */ 1253 */
1207 if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { 1254 if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
1208 action_result(pfn, "hugepage already hardware poisoned", 1255 action_result(pfn, MSG_POISONED_HUGE, IGNORED);
1209 IGNORED);
1210 unlock_page(hpage); 1256 unlock_page(hpage);
1211 put_page(hpage); 1257 put_page(hpage);
1212 return 0; 1258 return 0;
@@ -1235,7 +1281,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1235 */ 1281 */
1236 if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) 1282 if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)
1237 != SWAP_SUCCESS) { 1283 != SWAP_SUCCESS) {
1238 action_result(pfn, "unmapping failed", IGNORED); 1284 action_result(pfn, MSG_UNMAP_FAILED, IGNORED);
1239 res = -EBUSY; 1285 res = -EBUSY;
1240 goto out; 1286 goto out;
1241 } 1287 }
@@ -1244,7 +1290,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1244 * Torn down by someone else? 1290 * Torn down by someone else?
1245 */ 1291 */
1246 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { 1292 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
1247 action_result(pfn, "already truncated LRU", IGNORED); 1293 action_result(pfn, MSG_TRUNCATED_LRU, IGNORED);
1248 res = -EBUSY; 1294 res = -EBUSY;
1249 goto out; 1295 goto out;
1250 } 1296 }
@@ -1540,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
1540 } 1586 }
1541 unlock_page(hpage); 1587 unlock_page(hpage);
1542 1588
1543 /* Keep page count to indicate a given hugepage is isolated. */ 1589 ret = isolate_huge_page(hpage, &pagelist);
1544 list_move(&hpage->lru, &pagelist); 1590 if (ret) {
1591 /*
1592 * get_any_page() and isolate_huge_page() takes a refcount each,
1593 * so need to drop one here.
1594 */
1595 put_page(hpage);
1596 } else {
1597 pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
1598 return -EBUSY;
1599 }
1600
1545 ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, 1601 ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
1546 MIGRATE_SYNC, MR_MEMORY_FAILURE); 1602 MIGRATE_SYNC, MR_MEMORY_FAILURE);
1547 if (ret) { 1603 if (ret) {
diff --git a/mm/memory.c b/mm/memory.c
index ac20b2a6a0c3..22e037e3364e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -690,12 +690,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
690 /* 690 /*
691 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y 691 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
692 */ 692 */
693 if (vma->vm_ops) 693 pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
694 printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n", 694 vma->vm_file,
695 vma->vm_ops->fault); 695 vma->vm_ops ? vma->vm_ops->fault : NULL,
696 if (vma->vm_file) 696 vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
697 printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n", 697 mapping ? mapping->a_ops->readpage : NULL);
698 vma->vm_file->f_op->mmap);
699 dump_stack(); 698 dump_stack();
700 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 699 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
701} 700}
@@ -2181,6 +2180,42 @@ oom:
2181 return VM_FAULT_OOM; 2180 return VM_FAULT_OOM;
2182} 2181}
2183 2182
2183/*
2184 * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
2185 * mapping
2186 */
2187static int wp_pfn_shared(struct mm_struct *mm,
2188 struct vm_area_struct *vma, unsigned long address,
2189 pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
2190 pmd_t *pmd)
2191{
2192 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
2193 struct vm_fault vmf = {
2194 .page = NULL,
2195 .pgoff = linear_page_index(vma, address),
2196 .virtual_address = (void __user *)(address & PAGE_MASK),
2197 .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
2198 };
2199 int ret;
2200
2201 pte_unmap_unlock(page_table, ptl);
2202 ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
2203 if (ret & VM_FAULT_ERROR)
2204 return ret;
2205 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2206 /*
2207 * We might have raced with another page fault while we
2208 * released the pte_offset_map_lock.
2209 */
2210 if (!pte_same(*page_table, orig_pte)) {
2211 pte_unmap_unlock(page_table, ptl);
2212 return 0;
2213 }
2214 }
2215 return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte,
2216 NULL, 0, 0);
2217}
2218
2184static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, 2219static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
2185 unsigned long address, pte_t *page_table, 2220 unsigned long address, pte_t *page_table,
2186 pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, 2221 pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte,
@@ -2259,13 +2294,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2259 * VM_PFNMAP VMA. 2294 * VM_PFNMAP VMA.
2260 * 2295 *
2261 * We should not cow pages in a shared writeable mapping. 2296 * We should not cow pages in a shared writeable mapping.
2262 * Just mark the pages writable as we can't do any dirty 2297 * Just mark the pages writable and/or call ops->pfn_mkwrite.
2263 * accounting on raw pfn maps.
2264 */ 2298 */
2265 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 2299 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2266 (VM_WRITE|VM_SHARED)) 2300 (VM_WRITE|VM_SHARED))
2267 return wp_page_reuse(mm, vma, address, page_table, ptl, 2301 return wp_pfn_shared(mm, vma, address, page_table, ptl,
2268 orig_pte, old_page, 0, 0); 2302 orig_pte, pmd);
2269 2303
2270 pte_unmap_unlock(page_table, ptl); 2304 pte_unmap_unlock(page_table, ptl);
2271 return wp_page_copy(mm, vma, address, page_table, pmd, 2305 return wp_page_copy(mm, vma, address, page_table, pmd,
@@ -2845,7 +2879,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
2845 struct vm_fault vmf; 2879 struct vm_fault vmf;
2846 int off; 2880 int off;
2847 2881
2848 nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT; 2882 nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
2849 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; 2883 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
2850 2884
2851 start_addr = max(address & mask, vma->vm_start); 2885 start_addr = max(address & mask, vma->vm_start);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e2e8014fb755..457bde530cbe 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1373,7 +1373,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
1373 if (PageLRU(page)) 1373 if (PageLRU(page))
1374 return pfn; 1374 return pfn;
1375 if (PageHuge(page)) { 1375 if (PageHuge(page)) {
1376 if (is_hugepage_active(page)) 1376 if (page_huge_active(page))
1377 return pfn; 1377 return pfn;
1378 else 1378 else
1379 pfn = round_up(pfn + 1, 1379 pfn = round_up(pfn + 1,
diff --git a/mm/mempool.c b/mm/mempool.c
index 949970db2874..2cc08de8b1db 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -6,26 +6,138 @@
6 * extreme VM load. 6 * extreme VM load.
7 * 7 *
8 * started by Ingo Molnar, Copyright (C) 2001 8 * started by Ingo Molnar, Copyright (C) 2001
9 * debugging by David Rientjes, Copyright (C) 2015
9 */ 10 */
10 11
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/highmem.h>
15#include <linux/kasan.h>
13#include <linux/kmemleak.h> 16#include <linux/kmemleak.h>
14#include <linux/export.h> 17#include <linux/export.h>
15#include <linux/mempool.h> 18#include <linux/mempool.h>
16#include <linux/blkdev.h> 19#include <linux/blkdev.h>
17#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include "slab.h"
22
23#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
24static void poison_error(mempool_t *pool, void *element, size_t size,
25 size_t byte)
26{
27 const int nr = pool->curr_nr;
28 const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
29 const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
30 int i;
31
32 pr_err("BUG: mempool element poison mismatch\n");
33 pr_err("Mempool %p size %zu\n", pool, size);
34 pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
35 for (i = start; i < end; i++)
36 pr_cont("%x ", *(u8 *)(element + i));
37 pr_cont("%s\n", end < size ? "..." : "");
38 dump_stack();
39}
40
41static void __check_element(mempool_t *pool, void *element, size_t size)
42{
43 u8 *obj = element;
44 size_t i;
45
46 for (i = 0; i < size; i++) {
47 u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
48
49 if (obj[i] != exp) {
50 poison_error(pool, element, size, i);
51 return;
52 }
53 }
54 memset(obj, POISON_INUSE, size);
55}
56
57static void check_element(mempool_t *pool, void *element)
58{
59 /* Mempools backed by slab allocator */
60 if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
61 __check_element(pool, element, ksize(element));
62
63 /* Mempools backed by page allocator */
64 if (pool->free == mempool_free_pages) {
65 int order = (int)(long)pool->pool_data;
66 void *addr = kmap_atomic((struct page *)element);
67
68 __check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
69 kunmap_atomic(addr);
70 }
71}
72
73static void __poison_element(void *element, size_t size)
74{
75 u8 *obj = element;
76
77 memset(obj, POISON_FREE, size - 1);
78 obj[size - 1] = POISON_END;
79}
80
81static void poison_element(mempool_t *pool, void *element)
82{
83 /* Mempools backed by slab allocator */
84 if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
85 __poison_element(element, ksize(element));
86
87 /* Mempools backed by page allocator */
88 if (pool->alloc == mempool_alloc_pages) {
89 int order = (int)(long)pool->pool_data;
90 void *addr = kmap_atomic((struct page *)element);
91
92 __poison_element(addr, 1UL << (PAGE_SHIFT + order));
93 kunmap_atomic(addr);
94 }
95}
96#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
97static inline void check_element(mempool_t *pool, void *element)
98{
99}
100static inline void poison_element(mempool_t *pool, void *element)
101{
102}
103#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
104
105static void kasan_poison_element(mempool_t *pool, void *element)
106{
107 if (pool->alloc == mempool_alloc_slab)
108 kasan_slab_free(pool->pool_data, element);
109 if (pool->alloc == mempool_kmalloc)
110 kasan_kfree(element);
111 if (pool->alloc == mempool_alloc_pages)
112 kasan_free_pages(element, (unsigned long)pool->pool_data);
113}
114
115static void kasan_unpoison_element(mempool_t *pool, void *element)
116{
117 if (pool->alloc == mempool_alloc_slab)
118 kasan_slab_alloc(pool->pool_data, element);
119 if (pool->alloc == mempool_kmalloc)
120 kasan_krealloc(element, (size_t)pool->pool_data);
121 if (pool->alloc == mempool_alloc_pages)
122 kasan_alloc_pages(element, (unsigned long)pool->pool_data);
123}
18 124
19static void add_element(mempool_t *pool, void *element) 125static void add_element(mempool_t *pool, void *element)
20{ 126{
21 BUG_ON(pool->curr_nr >= pool->min_nr); 127 BUG_ON(pool->curr_nr >= pool->min_nr);
128 poison_element(pool, element);
129 kasan_poison_element(pool, element);
22 pool->elements[pool->curr_nr++] = element; 130 pool->elements[pool->curr_nr++] = element;
23} 131}
24 132
25static void *remove_element(mempool_t *pool) 133static void *remove_element(mempool_t *pool)
26{ 134{
27 BUG_ON(pool->curr_nr <= 0); 135 void *element = pool->elements[--pool->curr_nr];
28 return pool->elements[--pool->curr_nr]; 136
137 BUG_ON(pool->curr_nr < 0);
138 check_element(pool, element);
139 kasan_unpoison_element(pool, element);
140 return element;
29} 141}
30 142
31/** 143/**
@@ -334,6 +446,7 @@ EXPORT_SYMBOL(mempool_free);
334void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) 446void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
335{ 447{
336 struct kmem_cache *mem = pool_data; 448 struct kmem_cache *mem = pool_data;
449 VM_BUG_ON(mem->ctor);
337 return kmem_cache_alloc(mem, gfp_mask); 450 return kmem_cache_alloc(mem, gfp_mask);
338} 451}
339EXPORT_SYMBOL(mempool_alloc_slab); 452EXPORT_SYMBOL(mempool_alloc_slab);
diff --git a/mm/migrate.c b/mm/migrate.c
index a65ff72ab739..f53838fe3dfe 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -537,7 +537,8 @@ void migrate_page_copy(struct page *newpage, struct page *page)
537 * Please do not reorder this without considering how mm/ksm.c's 537 * Please do not reorder this without considering how mm/ksm.c's
538 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). 538 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
539 */ 539 */
540 ClearPageSwapCache(page); 540 if (PageSwapCache(page))
541 ClearPageSwapCache(page);
541 ClearPagePrivate(page); 542 ClearPagePrivate(page);
542 set_page_private(page, 0); 543 set_page_private(page, 0);
543 544
diff --git a/mm/mmap.c b/mm/mmap.c
index 06a6076c92e5..bb50cacc3ea5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1133,7 +1133,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
1133 * by another page fault trying to merge _that_. But that's ok: if it 1133 * by another page fault trying to merge _that_. But that's ok: if it
1134 * is being set up, that automatically means that it will be a singleton 1134 * is being set up, that automatically means that it will be a singleton
1135 * acceptable for merging, so we can do all of this optimistically. But 1135 * acceptable for merging, so we can do all of this optimistically. But
1136 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. 1136 * we do that READ_ONCE() to make sure that we never re-load the pointer.
1137 * 1137 *
1138 * IOW: that the "list_is_singular()" test on the anon_vma_chain only 1138 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
1139 * matters for the 'stable anon_vma' case (ie the thing we want to avoid 1139 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
@@ -1147,7 +1147,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
1147static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) 1147static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1148{ 1148{
1149 if (anon_vma_compatible(a, b)) { 1149 if (anon_vma_compatible(a, b)) {
1150 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); 1150 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1151 1151
1152 if (anon_vma && list_is_singular(&old->anon_vma_chain)) 1152 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1153 return anon_vma; 1153 return anon_vma;
@@ -1551,11 +1551,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1551 1551
1552 /* Clear old maps */ 1552 /* Clear old maps */
1553 error = -ENOMEM; 1553 error = -ENOMEM;
1554munmap_back: 1554 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1555 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { 1555 &rb_parent)) {
1556 if (do_munmap(mm, addr, len)) 1556 if (do_munmap(mm, addr, len))
1557 return -ENOMEM; 1557 return -ENOMEM;
1558 goto munmap_back;
1559 } 1558 }
1560 1559
1561 /* 1560 /*
@@ -1571,7 +1570,8 @@ munmap_back:
1571 /* 1570 /*
1572 * Can we just expand an old mapping? 1571 * Can we just expand an old mapping?
1573 */ 1572 */
1574 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL); 1573 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
1574 NULL);
1575 if (vma) 1575 if (vma)
1576 goto out; 1576 goto out;
1577 1577
@@ -2100,7 +2100,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
2100 actual_size = size; 2100 actual_size = size;
2101 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) 2101 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2102 actual_size -= PAGE_SIZE; 2102 actual_size -= PAGE_SIZE;
2103 if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) 2103 if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2104 return -ENOMEM; 2104 return -ENOMEM;
2105 2105
2106 /* mlock limit tests */ 2106 /* mlock limit tests */
@@ -2108,7 +2108,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
2108 unsigned long locked; 2108 unsigned long locked;
2109 unsigned long limit; 2109 unsigned long limit;
2110 locked = mm->locked_vm + grow; 2110 locked = mm->locked_vm + grow;
2111 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur); 2111 limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2112 limit >>= PAGE_SHIFT; 2112 limit >>= PAGE_SHIFT;
2113 if (locked > limit && !capable(CAP_IPC_LOCK)) 2113 if (locked > limit && !capable(CAP_IPC_LOCK))
2114 return -ENOMEM; 2114 return -ENOMEM;
@@ -2739,11 +2739,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
2739 /* 2739 /*
2740 * Clear old maps. this also does some error checking for us 2740 * Clear old maps. this also does some error checking for us
2741 */ 2741 */
2742 munmap_back: 2742 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2743 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { 2743 &rb_parent)) {
2744 if (do_munmap(mm, addr, len)) 2744 if (do_munmap(mm, addr, len))
2745 return -ENOMEM; 2745 return -ENOMEM;
2746 goto munmap_back;
2747 } 2746 }
2748 2747
2749 /* Check against address space limits *after* clearing old maps... */ 2748 /* Check against address space limits *after* clearing old maps... */
diff --git a/mm/mremap.c b/mm/mremap.c
index 2dc44b1cb1df..034e2d360652 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -345,25 +345,25 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
345 struct vm_area_struct *vma = find_vma(mm, addr); 345 struct vm_area_struct *vma = find_vma(mm, addr);
346 346
347 if (!vma || vma->vm_start > addr) 347 if (!vma || vma->vm_start > addr)
348 goto Efault; 348 return ERR_PTR(-EFAULT);
349 349
350 if (is_vm_hugetlb_page(vma)) 350 if (is_vm_hugetlb_page(vma))
351 goto Einval; 351 return ERR_PTR(-EINVAL);
352 352
353 /* We can't remap across vm area boundaries */ 353 /* We can't remap across vm area boundaries */
354 if (old_len > vma->vm_end - addr) 354 if (old_len > vma->vm_end - addr)
355 goto Efault; 355 return ERR_PTR(-EFAULT);
356 356
357 /* Need to be careful about a growing mapping */ 357 /* Need to be careful about a growing mapping */
358 if (new_len > old_len) { 358 if (new_len > old_len) {
359 unsigned long pgoff; 359 unsigned long pgoff;
360 360
361 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) 361 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
362 goto Efault; 362 return ERR_PTR(-EFAULT);
363 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; 363 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
364 pgoff += vma->vm_pgoff; 364 pgoff += vma->vm_pgoff;
365 if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) 365 if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
366 goto Einval; 366 return ERR_PTR(-EINVAL);
367 } 367 }
368 368
369 if (vma->vm_flags & VM_LOCKED) { 369 if (vma->vm_flags & VM_LOCKED) {
@@ -372,29 +372,20 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
372 lock_limit = rlimit(RLIMIT_MEMLOCK); 372 lock_limit = rlimit(RLIMIT_MEMLOCK);
373 locked += new_len - old_len; 373 locked += new_len - old_len;
374 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 374 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
375 goto Eagain; 375 return ERR_PTR(-EAGAIN);
376 } 376 }
377 377
378 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) 378 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
379 goto Enomem; 379 return ERR_PTR(-ENOMEM);
380 380
381 if (vma->vm_flags & VM_ACCOUNT) { 381 if (vma->vm_flags & VM_ACCOUNT) {
382 unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; 382 unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
383 if (security_vm_enough_memory_mm(mm, charged)) 383 if (security_vm_enough_memory_mm(mm, charged))
384 goto Efault; 384 return ERR_PTR(-ENOMEM);
385 *p = charged; 385 *p = charged;
386 } 386 }
387 387
388 return vma; 388 return vma;
389
390Efault: /* very odd choice for most of the cases, but... */
391 return ERR_PTR(-EFAULT);
392Einval:
393 return ERR_PTR(-EINVAL);
394Enomem:
395 return ERR_PTR(-ENOMEM);
396Eagain:
397 return ERR_PTR(-EAGAIN);
398} 389}
399 390
400static unsigned long mremap_to(unsigned long addr, unsigned long old_len, 391static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 52628c819bf7..2b665da1b3c9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -408,7 +408,7 @@ bool oom_killer_disabled __read_mostly;
408static DECLARE_RWSEM(oom_sem); 408static DECLARE_RWSEM(oom_sem);
409 409
410/** 410/**
411 * mark_tsk_oom_victim - marks the given taks as OOM victim. 411 * mark_tsk_oom_victim - marks the given task as OOM victim.
412 * @tsk: task to mark 412 * @tsk: task to mark
413 * 413 *
414 * Has to be called with oom_sem taken for read and never after 414 * Has to be called with oom_sem taken for read and never after
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0372411f38fc..5daf5568b9e1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2228,7 +2228,8 @@ int set_page_dirty(struct page *page)
2228 * it will confuse readahead and make it restart the size rampup 2228 * it will confuse readahead and make it restart the size rampup
2229 * process. But it's a trivial problem. 2229 * process. But it's a trivial problem.
2230 */ 2230 */
2231 ClearPageReclaim(page); 2231 if (PageReclaim(page))
2232 ClearPageReclaim(page);
2232#ifdef CONFIG_BLOCK 2233#ifdef CONFIG_BLOCK
2233 if (!spd) 2234 if (!spd)
2234 spd = __set_page_dirty_buffers; 2235 spd = __set_page_dirty_buffers;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b849500640c..ebffa0e4a9c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1371,7 +1371,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1371 int to_drain, batch; 1371 int to_drain, batch;
1372 1372
1373 local_irq_save(flags); 1373 local_irq_save(flags);
1374 batch = ACCESS_ONCE(pcp->batch); 1374 batch = READ_ONCE(pcp->batch);
1375 to_drain = min(pcp->count, batch); 1375 to_drain = min(pcp->count, batch);
1376 if (to_drain > 0) { 1376 if (to_drain > 0) {
1377 free_pcppages_bulk(zone, to_drain, pcp); 1377 free_pcppages_bulk(zone, to_drain, pcp);
@@ -1570,7 +1570,7 @@ void free_hot_cold_page(struct page *page, bool cold)
1570 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1570 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1571 pcp->count++; 1571 pcp->count++;
1572 if (pcp->count >= pcp->high) { 1572 if (pcp->count >= pcp->high) {
1573 unsigned long batch = ACCESS_ONCE(pcp->batch); 1573 unsigned long batch = READ_ONCE(pcp->batch);
1574 free_pcppages_bulk(zone, batch, pcp); 1574 free_pcppages_bulk(zone, batch, pcp);
1575 pcp->count -= batch; 1575 pcp->count -= batch;
1576 } 1576 }
@@ -6207,7 +6207,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
6207 mask <<= (BITS_PER_LONG - bitidx - 1); 6207 mask <<= (BITS_PER_LONG - bitidx - 1);
6208 flags <<= (BITS_PER_LONG - bitidx - 1); 6208 flags <<= (BITS_PER_LONG - bitidx - 1);
6209 6209
6210 word = ACCESS_ONCE(bitmap[word_bitidx]); 6210 word = READ_ONCE(bitmap[word_bitidx]);
6211 for (;;) { 6211 for (;;) {
6212 old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags); 6212 old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
6213 if (word == old_word) 6213 if (word == old_word)
diff --git a/mm/rmap.c b/mm/rmap.c
index c161a14b6a8f..24dd3f9fee27 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -456,7 +456,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
456 unsigned long anon_mapping; 456 unsigned long anon_mapping;
457 457
458 rcu_read_lock(); 458 rcu_read_lock();
459 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping); 459 anon_mapping = (unsigned long)READ_ONCE(page->mapping);
460 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) 460 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
461 goto out; 461 goto out;
462 if (!page_mapped(page)) 462 if (!page_mapped(page))
@@ -500,14 +500,14 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
500 unsigned long anon_mapping; 500 unsigned long anon_mapping;
501 501
502 rcu_read_lock(); 502 rcu_read_lock();
503 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping); 503 anon_mapping = (unsigned long)READ_ONCE(page->mapping);
504 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) 504 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
505 goto out; 505 goto out;
506 if (!page_mapped(page)) 506 if (!page_mapped(page))
507 goto out; 507 goto out;
508 508
509 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); 509 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
510 root_anon_vma = ACCESS_ONCE(anon_vma->root); 510 root_anon_vma = READ_ONCE(anon_vma->root);
511 if (down_read_trylock(&root_anon_vma->rwsem)) { 511 if (down_read_trylock(&root_anon_vma->rwsem)) {
512 /* 512 /*
513 * If the page is still mapped, then this anon_vma is still 513 * If the page is still mapped, then this anon_vma is still
diff --git a/mm/slub.c b/mm/slub.c
index 0fdd6c1e1f82..54c0876b43d5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4277,7 +4277,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4277 int node; 4277 int node;
4278 struct page *page; 4278 struct page *page;
4279 4279
4280 page = ACCESS_ONCE(c->page); 4280 page = READ_ONCE(c->page);
4281 if (!page) 4281 if (!page)
4282 continue; 4282 continue;
4283 4283
@@ -4292,7 +4292,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4292 total += x; 4292 total += x;
4293 nodes[node] += x; 4293 nodes[node] += x;
4294 4294
4295 page = ACCESS_ONCE(c->partial); 4295 page = READ_ONCE(c->partial);
4296 if (page) { 4296 if (page) {
4297 node = page_to_nid(page); 4297 node = page_to_nid(page);
4298 if (flags & SO_TOTAL) 4298 if (flags & SO_TOTAL)
diff --git a/mm/swap.c b/mm/swap.c
index cd3a5e64cea9..a7251a8ed532 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,6 +31,7 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/uio.h> 33#include <linux/uio.h>
34#include <linux/hugetlb.h>
34 35
35#include "internal.h" 36#include "internal.h"
36 37
@@ -42,7 +43,7 @@ int page_cluster;
42 43
43static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); 44static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
44static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 45static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
45static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); 46static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
46 47
47/* 48/*
48 * This path almost never happens for VM activity - pages are normally 49 * This path almost never happens for VM activity - pages are normally
@@ -75,7 +76,14 @@ static void __put_compound_page(struct page *page)
75{ 76{
76 compound_page_dtor *dtor; 77 compound_page_dtor *dtor;
77 78
78 __page_cache_release(page); 79 /*
80 * __page_cache_release() is supposed to be called for thp, not for
81 * hugetlb. This is because hugetlb page does never have PageLRU set
82 * (it's never listed to any LRU lists) and no memcg routines should
83 * be called for hugetlb (it has a separate hugetlb_cgroup.)
84 */
85 if (!PageHuge(page))
86 __page_cache_release(page);
79 dtor = get_compound_page_dtor(page); 87 dtor = get_compound_page_dtor(page);
80 (*dtor)(page); 88 (*dtor)(page);
81} 89}
@@ -743,7 +751,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
743 * be write it out by flusher threads as this is much more effective 751 * be write it out by flusher threads as this is much more effective
744 * than the single-page writeout from reclaim. 752 * than the single-page writeout from reclaim.
745 */ 753 */
746static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, 754static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
747 void *arg) 755 void *arg)
748{ 756{
749 int lru, file; 757 int lru, file;
@@ -811,36 +819,36 @@ void lru_add_drain_cpu(int cpu)
811 local_irq_restore(flags); 819 local_irq_restore(flags);
812 } 820 }
813 821
814 pvec = &per_cpu(lru_deactivate_pvecs, cpu); 822 pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
815 if (pagevec_count(pvec)) 823 if (pagevec_count(pvec))
816 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 824 pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
817 825
818 activate_page_drain(cpu); 826 activate_page_drain(cpu);
819} 827}
820 828
821/** 829/**
822 * deactivate_page - forcefully deactivate a page 830 * deactivate_file_page - forcefully deactivate a file page
823 * @page: page to deactivate 831 * @page: page to deactivate
824 * 832 *
825 * This function hints the VM that @page is a good reclaim candidate, 833 * This function hints the VM that @page is a good reclaim candidate,
826 * for example if its invalidation fails due to the page being dirty 834 * for example if its invalidation fails due to the page being dirty
827 * or under writeback. 835 * or under writeback.
828 */ 836 */
829void deactivate_page(struct page *page) 837void deactivate_file_page(struct page *page)
830{ 838{
831 /* 839 /*
832 * In a workload with many unevictable page such as mprotect, unevictable 840 * In a workload with many unevictable page such as mprotect,
833 * page deactivation for accelerating reclaim is pointless. 841 * unevictable page deactivation for accelerating reclaim is pointless.
834 */ 842 */
835 if (PageUnevictable(page)) 843 if (PageUnevictable(page))
836 return; 844 return;
837 845
838 if (likely(get_page_unless_zero(page))) { 846 if (likely(get_page_unless_zero(page))) {
839 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); 847 struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
840 848
841 if (!pagevec_add(pvec, page)) 849 if (!pagevec_add(pvec, page))
842 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 850 pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
843 put_cpu_var(lru_deactivate_pvecs); 851 put_cpu_var(lru_deactivate_file_pvecs);
844 } 852 }
845} 853}
846 854
@@ -872,7 +880,7 @@ void lru_add_drain_all(void)
872 880
873 if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || 881 if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
874 pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || 882 pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
875 pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || 883 pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
876 need_activate_page_drain(cpu)) { 884 need_activate_page_drain(cpu)) {
877 INIT_WORK(work, lru_add_drain_per_cpu); 885 INIT_WORK(work, lru_add_drain_per_cpu);
878 schedule_work_on(cpu, work); 886 schedule_work_on(cpu, work);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 405923f77334..8bc8e66138da 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -390,7 +390,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
390 unsigned int pages, max_pages, last_ra; 390 unsigned int pages, max_pages, last_ra;
391 static atomic_t last_readahead_pages; 391 static atomic_t last_readahead_pages;
392 392
393 max_pages = 1 << ACCESS_ONCE(page_cluster); 393 max_pages = 1 << READ_ONCE(page_cluster);
394 if (max_pages <= 1) 394 if (max_pages <= 1)
395 return 1; 395 return 1;
396 396
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 63f55ccb9b26..a7e72103f23b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1312,7 +1312,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1312 else 1312 else
1313 continue; 1313 continue;
1314 } 1314 }
1315 count = ACCESS_ONCE(si->swap_map[i]); 1315 count = READ_ONCE(si->swap_map[i]);
1316 if (count && swap_count(count) != SWAP_MAP_BAD) 1316 if (count && swap_count(count) != SWAP_MAP_BAD)
1317 break; 1317 break;
1318 } 1318 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 7a9d8a3cb143..66af9031fae8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -490,7 +490,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
490 * of interest and try to speed up its reclaim. 490 * of interest and try to speed up its reclaim.
491 */ 491 */
492 if (!ret) 492 if (!ret)
493 deactivate_page(page); 493 deactivate_file_page(page);
494 count += ret; 494 count += ret;
495 } 495 }
496 pagevec_remove_exceptionals(&pvec); 496 pagevec_remove_exceptionals(&pvec);
diff --git a/mm/util.c b/mm/util.c
index 3981ae9d1b15..68ff8a5361e7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -325,9 +325,37 @@ void kvfree(const void *addr)
325} 325}
326EXPORT_SYMBOL(kvfree); 326EXPORT_SYMBOL(kvfree);
327 327
328static inline void *__page_rmapping(struct page *page)
329{
330 unsigned long mapping;
331
332 mapping = (unsigned long)page->mapping;
333 mapping &= ~PAGE_MAPPING_FLAGS;
334
335 return (void *)mapping;
336}
337
338/* Neutral page->mapping pointer to address_space or anon_vma or other */
339void *page_rmapping(struct page *page)
340{
341 page = compound_head(page);
342 return __page_rmapping(page);
343}
344
345struct anon_vma *page_anon_vma(struct page *page)
346{
347 unsigned long mapping;
348
349 page = compound_head(page);
350 mapping = (unsigned long)page->mapping;
351 if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
352 return NULL;
353 return __page_rmapping(page);
354}
355
328struct address_space *page_mapping(struct page *page) 356struct address_space *page_mapping(struct page *page)
329{ 357{
330 struct address_space *mapping = page->mapping; 358 unsigned long mapping;
331 359
332 /* This happens if someone calls flush_dcache_page on slab page */ 360 /* This happens if someone calls flush_dcache_page on slab page */
333 if (unlikely(PageSlab(page))) 361 if (unlikely(PageSlab(page)))
@@ -337,10 +365,13 @@ struct address_space *page_mapping(struct page *page)
337 swp_entry_t entry; 365 swp_entry_t entry;
338 366
339 entry.val = page_private(page); 367 entry.val = page_private(page);
340 mapping = swap_address_space(entry); 368 return swap_address_space(entry);
341 } else if ((unsigned long)mapping & PAGE_MAPPING_ANON) 369 }
342 mapping = NULL; 370
343 return mapping; 371 mapping = (unsigned long)page->mapping;
372 if (mapping & PAGE_MAPPING_FLAGS)
373 return NULL;
374 return page->mapping;
344} 375}
345 376
346int overcommit_ratio_handler(struct ctl_table *table, int write, 377int overcommit_ratio_handler(struct ctl_table *table, int write,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a5bbdd3b5d67..2faaa2976447 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -765,7 +765,7 @@ struct vmap_block {
765 spinlock_t lock; 765 spinlock_t lock;
766 struct vmap_area *va; 766 struct vmap_area *va;
767 unsigned long free, dirty; 767 unsigned long free, dirty;
768 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); 768 unsigned long dirty_min, dirty_max; /*< dirty range */
769 struct list_head free_list; 769 struct list_head free_list;
770 struct rcu_head rcu_head; 770 struct rcu_head rcu_head;
771 struct list_head purge; 771 struct list_head purge;
@@ -796,13 +796,31 @@ static unsigned long addr_to_vb_idx(unsigned long addr)
796 return addr; 796 return addr;
797} 797}
798 798
799static struct vmap_block *new_vmap_block(gfp_t gfp_mask) 799static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
800{
801 unsigned long addr;
802
803 addr = va_start + (pages_off << PAGE_SHIFT);
804 BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
805 return (void *)addr;
806}
807
808/**
809 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
810 * block. Of course pages number can't exceed VMAP_BBMAP_BITS
811 * @order: how many 2^order pages should be occupied in newly allocated block
812 * @gfp_mask: flags for the page level allocator
813 *
814 * Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
815 */
816static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
800{ 817{
801 struct vmap_block_queue *vbq; 818 struct vmap_block_queue *vbq;
802 struct vmap_block *vb; 819 struct vmap_block *vb;
803 struct vmap_area *va; 820 struct vmap_area *va;
804 unsigned long vb_idx; 821 unsigned long vb_idx;
805 int node, err; 822 int node, err;
823 void *vaddr;
806 824
807 node = numa_node_id(); 825 node = numa_node_id();
808 826
@@ -826,11 +844,15 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
826 return ERR_PTR(err); 844 return ERR_PTR(err);
827 } 845 }
828 846
847 vaddr = vmap_block_vaddr(va->va_start, 0);
829 spin_lock_init(&vb->lock); 848 spin_lock_init(&vb->lock);
830 vb->va = va; 849 vb->va = va;
831 vb->free = VMAP_BBMAP_BITS; 850 /* At least something should be left free */
851 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
852 vb->free = VMAP_BBMAP_BITS - (1UL << order);
832 vb->dirty = 0; 853 vb->dirty = 0;
833 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); 854 vb->dirty_min = VMAP_BBMAP_BITS;
855 vb->dirty_max = 0;
834 INIT_LIST_HEAD(&vb->free_list); 856 INIT_LIST_HEAD(&vb->free_list);
835 857
836 vb_idx = addr_to_vb_idx(va->va_start); 858 vb_idx = addr_to_vb_idx(va->va_start);
@@ -842,11 +864,11 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
842 864
843 vbq = &get_cpu_var(vmap_block_queue); 865 vbq = &get_cpu_var(vmap_block_queue);
844 spin_lock(&vbq->lock); 866 spin_lock(&vbq->lock);
845 list_add_rcu(&vb->free_list, &vbq->free); 867 list_add_tail_rcu(&vb->free_list, &vbq->free);
846 spin_unlock(&vbq->lock); 868 spin_unlock(&vbq->lock);
847 put_cpu_var(vmap_block_queue); 869 put_cpu_var(vmap_block_queue);
848 870
849 return vb; 871 return vaddr;
850} 872}
851 873
852static void free_vmap_block(struct vmap_block *vb) 874static void free_vmap_block(struct vmap_block *vb)
@@ -881,7 +903,8 @@ static void purge_fragmented_blocks(int cpu)
881 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { 903 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
882 vb->free = 0; /* prevent further allocs after releasing lock */ 904 vb->free = 0; /* prevent further allocs after releasing lock */
883 vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ 905 vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
884 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); 906 vb->dirty_min = 0;
907 vb->dirty_max = VMAP_BBMAP_BITS;
885 spin_lock(&vbq->lock); 908 spin_lock(&vbq->lock);
886 list_del_rcu(&vb->free_list); 909 list_del_rcu(&vb->free_list);
887 spin_unlock(&vbq->lock); 910 spin_unlock(&vbq->lock);
@@ -910,7 +933,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
910{ 933{
911 struct vmap_block_queue *vbq; 934 struct vmap_block_queue *vbq;
912 struct vmap_block *vb; 935 struct vmap_block *vb;
913 unsigned long addr = 0; 936 void *vaddr = NULL;
914 unsigned int order; 937 unsigned int order;
915 938
916 BUG_ON(size & ~PAGE_MASK); 939 BUG_ON(size & ~PAGE_MASK);
@@ -925,43 +948,38 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
925 } 948 }
926 order = get_order(size); 949 order = get_order(size);
927 950
928again:
929 rcu_read_lock(); 951 rcu_read_lock();
930 vbq = &get_cpu_var(vmap_block_queue); 952 vbq = &get_cpu_var(vmap_block_queue);
931 list_for_each_entry_rcu(vb, &vbq->free, free_list) { 953 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
932 int i; 954 unsigned long pages_off;
933 955
934 spin_lock(&vb->lock); 956 spin_lock(&vb->lock);
935 if (vb->free < 1UL << order) 957 if (vb->free < (1UL << order)) {
936 goto next; 958 spin_unlock(&vb->lock);
959 continue;
960 }
937 961
938 i = VMAP_BBMAP_BITS - vb->free; 962 pages_off = VMAP_BBMAP_BITS - vb->free;
939 addr = vb->va->va_start + (i << PAGE_SHIFT); 963 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
940 BUG_ON(addr_to_vb_idx(addr) !=
941 addr_to_vb_idx(vb->va->va_start));
942 vb->free -= 1UL << order; 964 vb->free -= 1UL << order;
943 if (vb->free == 0) { 965 if (vb->free == 0) {
944 spin_lock(&vbq->lock); 966 spin_lock(&vbq->lock);
945 list_del_rcu(&vb->free_list); 967 list_del_rcu(&vb->free_list);
946 spin_unlock(&vbq->lock); 968 spin_unlock(&vbq->lock);
947 } 969 }
970
948 spin_unlock(&vb->lock); 971 spin_unlock(&vb->lock);
949 break; 972 break;
950next:
951 spin_unlock(&vb->lock);
952 } 973 }
953 974
954 put_cpu_var(vmap_block_queue); 975 put_cpu_var(vmap_block_queue);
955 rcu_read_unlock(); 976 rcu_read_unlock();
956 977
957 if (!addr) { 978 /* Allocate new block if nothing was found */
958 vb = new_vmap_block(gfp_mask); 979 if (!vaddr)
959 if (IS_ERR(vb)) 980 vaddr = new_vmap_block(order, gfp_mask);
960 return vb;
961 goto again;
962 }
963 981
964 return (void *)addr; 982 return vaddr;
965} 983}
966 984
967static void vb_free(const void *addr, unsigned long size) 985static void vb_free(const void *addr, unsigned long size)
@@ -979,6 +997,7 @@ static void vb_free(const void *addr, unsigned long size)
979 order = get_order(size); 997 order = get_order(size);
980 998
981 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); 999 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
1000 offset >>= PAGE_SHIFT;
982 1001
983 vb_idx = addr_to_vb_idx((unsigned long)addr); 1002 vb_idx = addr_to_vb_idx((unsigned long)addr);
984 rcu_read_lock(); 1003 rcu_read_lock();
@@ -989,7 +1008,10 @@ static void vb_free(const void *addr, unsigned long size)
989 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); 1008 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
990 1009
991 spin_lock(&vb->lock); 1010 spin_lock(&vb->lock);
992 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); 1011
1012 /* Expand dirty range */
1013 vb->dirty_min = min(vb->dirty_min, offset);
1014 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
993 1015
994 vb->dirty += 1UL << order; 1016 vb->dirty += 1UL << order;
995 if (vb->dirty == VMAP_BBMAP_BITS) { 1017 if (vb->dirty == VMAP_BBMAP_BITS) {
@@ -1028,25 +1050,18 @@ void vm_unmap_aliases(void)
1028 1050
1029 rcu_read_lock(); 1051 rcu_read_lock();
1030 list_for_each_entry_rcu(vb, &vbq->free, free_list) { 1052 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1031 int i, j;
1032
1033 spin_lock(&vb->lock); 1053 spin_lock(&vb->lock);
1034 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); 1054 if (vb->dirty) {
1035 if (i < VMAP_BBMAP_BITS) { 1055 unsigned long va_start = vb->va->va_start;
1036 unsigned long s, e; 1056 unsigned long s, e;
1037 1057
1038 j = find_last_bit(vb->dirty_map, 1058 s = va_start + (vb->dirty_min << PAGE_SHIFT);
1039 VMAP_BBMAP_BITS); 1059 e = va_start + (vb->dirty_max << PAGE_SHIFT);
1040 j = j + 1; /* need exclusive index */
1041 1060
1042 s = vb->va->va_start + (i << PAGE_SHIFT); 1061 start = min(s, start);
1043 e = vb->va->va_start + (j << PAGE_SHIFT); 1062 end = max(e, end);
1044 flush = 1;
1045 1063
1046 if (s < start) 1064 flush = 1;
1047 start = s;
1048 if (e > end)
1049 end = e;
1050 } 1065 }
1051 spin_unlock(&vb->lock); 1066 spin_unlock(&vb->lock);
1052 } 1067 }
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 0dec1fa5f656..08bd7a3d464a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -12,35 +12,6 @@
12 */ 12 */
13 13
14/* 14/*
15 * This allocator is designed for use with zram. Thus, the allocator is
16 * supposed to work well under low memory conditions. In particular, it
17 * never attempts higher order page allocation which is very likely to
18 * fail under memory pressure. On the other hand, if we just use single
19 * (0-order) pages, it would suffer from very high fragmentation --
20 * any object of size PAGE_SIZE/2 or larger would occupy an entire page.
21 * This was one of the major issues with its predecessor (xvmalloc).
22 *
23 * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
24 * and links them together using various 'struct page' fields. These linked
25 * pages act as a single higher-order page i.e. an object can span 0-order
26 * page boundaries. The code refers to these linked pages as a single entity
27 * called zspage.
28 *
29 * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
30 * since this satisfies the requirements of all its current users (in the
31 * worst case, page is incompressible and is thus stored "as-is" i.e. in
32 * uncompressed form). For allocation requests larger than this size, failure
33 * is returned (see zs_malloc).
34 *
35 * Additionally, zs_malloc() does not return a dereferenceable pointer.
36 * Instead, it returns an opaque handle (unsigned long) which encodes actual
37 * location of the allocated object. The reason for this indirection is that
38 * zsmalloc does not keep zspages permanently mapped since that would cause
39 * issues on 32-bit systems where the VA region for kernel space mappings
40 * is very small. So, before using the allocating memory, the object has to
41 * be mapped using zs_map_object() to get a usable pointer and subsequently
42 * unmapped using zs_unmap_object().
43 *
44 * Following is how we use various fields and flags of underlying 15 * Following is how we use various fields and flags of underlying
45 * struct page(s) to form a zspage. 16 * struct page(s) to form a zspage.
46 * 17 *
@@ -57,6 +28,8 @@
57 * 28 *
58 * page->private (union with page->first_page): refers to the 29 * page->private (union with page->first_page): refers to the
59 * component page after the first page 30 * component page after the first page
31 * If the page is first_page for huge object, it stores handle.
32 * Look at size_class->huge.
60 * page->freelist: points to the first free object in zspage. 33 * page->freelist: points to the first free object in zspage.
61 * Free objects are linked together using in-place 34 * Free objects are linked together using in-place
62 * metadata. 35 * metadata.
@@ -78,6 +51,7 @@
78 51
79#include <linux/module.h> 52#include <linux/module.h>
80#include <linux/kernel.h> 53#include <linux/kernel.h>
54#include <linux/sched.h>
81#include <linux/bitops.h> 55#include <linux/bitops.h>
82#include <linux/errno.h> 56#include <linux/errno.h>
83#include <linux/highmem.h> 57#include <linux/highmem.h>
@@ -110,6 +84,8 @@
110#define ZS_MAX_ZSPAGE_ORDER 2 84#define ZS_MAX_ZSPAGE_ORDER 2
111#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) 85#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
112 86
87#define ZS_HANDLE_SIZE (sizeof(unsigned long))
88
113/* 89/*
114 * Object location (<PFN>, <obj_idx>) is encoded as 90 * Object location (<PFN>, <obj_idx>) is encoded as
115 * as single (unsigned long) handle value. 91 * as single (unsigned long) handle value.
@@ -133,13 +109,33 @@
133#endif 109#endif
134#endif 110#endif
135#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) 111#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
136#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) 112
113/*
114 * Memory for allocating for handle keeps object position by
115 * encoding <page, obj_idx> and the encoded value has a room
116 * in least bit(ie, look at obj_to_location).
117 * We use the bit to synchronize between object access by
118 * user and migration.
119 */
120#define HANDLE_PIN_BIT 0
121
122/*
123 * Head in allocated object should have OBJ_ALLOCATED_TAG
124 * to identify the object was allocated or not.
125 * It's okay to add the status bit in the least bit because
126 * header keeps handle which is 4byte-aligned address so we
127 * have room for two bit at least.
128 */
129#define OBJ_ALLOCATED_TAG 1
130#define OBJ_TAG_BITS 1
131#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
137#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) 132#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
138 133
139#define MAX(a, b) ((a) >= (b) ? (a) : (b)) 134#define MAX(a, b) ((a) >= (b) ? (a) : (b))
140/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ 135/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
141#define ZS_MIN_ALLOC_SIZE \ 136#define ZS_MIN_ALLOC_SIZE \
142 MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) 137 MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
138/* each chunk includes extra space to keep handle */
143#define ZS_MAX_ALLOC_SIZE PAGE_SIZE 139#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
144 140
145/* 141/*
@@ -172,6 +168,8 @@ enum fullness_group {
172enum zs_stat_type { 168enum zs_stat_type {
173 OBJ_ALLOCATED, 169 OBJ_ALLOCATED,
174 OBJ_USED, 170 OBJ_USED,
171 CLASS_ALMOST_FULL,
172 CLASS_ALMOST_EMPTY,
175 NR_ZS_STAT_TYPE, 173 NR_ZS_STAT_TYPE,
176}; 174};
177 175
@@ -216,6 +214,8 @@ struct size_class {
216 214
217 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ 215 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
218 int pages_per_zspage; 216 int pages_per_zspage;
217 /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
218 bool huge;
219 219
220#ifdef CONFIG_ZSMALLOC_STAT 220#ifdef CONFIG_ZSMALLOC_STAT
221 struct zs_size_stat stats; 221 struct zs_size_stat stats;
@@ -233,14 +233,24 @@ struct size_class {
233 * This must be power of 2 and less than or equal to ZS_ALIGN 233 * This must be power of 2 and less than or equal to ZS_ALIGN
234 */ 234 */
235struct link_free { 235struct link_free {
236 /* Handle of next free chunk (encodes <PFN, obj_idx>) */ 236 union {
237 void *next; 237 /*
238 * Position of next free chunk (encodes <PFN, obj_idx>)
239 * It's valid for non-allocated object
240 */
241 void *next;
242 /*
243 * Handle of allocated object.
244 */
245 unsigned long handle;
246 };
238}; 247};
239 248
240struct zs_pool { 249struct zs_pool {
241 char *name; 250 char *name;
242 251
243 struct size_class **size_class; 252 struct size_class **size_class;
253 struct kmem_cache *handle_cachep;
244 254
245 gfp_t flags; /* allocation flags used when growing pool */ 255 gfp_t flags; /* allocation flags used when growing pool */
246 atomic_long_t pages_allocated; 256 atomic_long_t pages_allocated;
@@ -267,8 +277,37 @@ struct mapping_area {
267#endif 277#endif
268 char *vm_addr; /* address of kmap_atomic()'ed pages */ 278 char *vm_addr; /* address of kmap_atomic()'ed pages */
269 enum zs_mapmode vm_mm; /* mapping mode */ 279 enum zs_mapmode vm_mm; /* mapping mode */
280 bool huge;
270}; 281};
271 282
283static int create_handle_cache(struct zs_pool *pool)
284{
285 pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
286 0, 0, NULL);
287 return pool->handle_cachep ? 0 : 1;
288}
289
290static void destroy_handle_cache(struct zs_pool *pool)
291{
292 kmem_cache_destroy(pool->handle_cachep);
293}
294
295static unsigned long alloc_handle(struct zs_pool *pool)
296{
297 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
298 pool->flags & ~__GFP_HIGHMEM);
299}
300
301static void free_handle(struct zs_pool *pool, unsigned long handle)
302{
303 kmem_cache_free(pool->handle_cachep, (void *)handle);
304}
305
306static void record_obj(unsigned long handle, unsigned long obj)
307{
308 *(unsigned long *)handle = obj;
309}
310
272/* zpool driver */ 311/* zpool driver */
273 312
274#ifdef CONFIG_ZPOOL 313#ifdef CONFIG_ZPOOL
@@ -346,6 +385,11 @@ static struct zpool_driver zs_zpool_driver = {
346MODULE_ALIAS("zpool-zsmalloc"); 385MODULE_ALIAS("zpool-zsmalloc");
347#endif /* CONFIG_ZPOOL */ 386#endif /* CONFIG_ZPOOL */
348 387
388static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
389{
390 return pages_per_zspage * PAGE_SIZE / size;
391}
392
349/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 393/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
350static DEFINE_PER_CPU(struct mapping_area, zs_map_area); 394static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
351 395
@@ -396,9 +440,182 @@ static int get_size_class_index(int size)
396 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, 440 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
397 ZS_SIZE_CLASS_DELTA); 441 ZS_SIZE_CLASS_DELTA);
398 442
399 return idx; 443 return min(zs_size_classes - 1, idx);
444}
445
446#ifdef CONFIG_ZSMALLOC_STAT
447
448static inline void zs_stat_inc(struct size_class *class,
449 enum zs_stat_type type, unsigned long cnt)
450{
451 class->stats.objs[type] += cnt;
452}
453
454static inline void zs_stat_dec(struct size_class *class,
455 enum zs_stat_type type, unsigned long cnt)
456{
457 class->stats.objs[type] -= cnt;
458}
459
460static inline unsigned long zs_stat_get(struct size_class *class,
461 enum zs_stat_type type)
462{
463 return class->stats.objs[type];
464}
465
466static int __init zs_stat_init(void)
467{
468 if (!debugfs_initialized())
469 return -ENODEV;
470
471 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
472 if (!zs_stat_root)
473 return -ENOMEM;
474
475 return 0;
476}
477
478static void __exit zs_stat_exit(void)
479{
480 debugfs_remove_recursive(zs_stat_root);
481}
482
483static int zs_stats_size_show(struct seq_file *s, void *v)
484{
485 int i;
486 struct zs_pool *pool = s->private;
487 struct size_class *class;
488 int objs_per_zspage;
489 unsigned long class_almost_full, class_almost_empty;
490 unsigned long obj_allocated, obj_used, pages_used;
491 unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
492 unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
493
494 seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s\n",
495 "class", "size", "almost_full", "almost_empty",
496 "obj_allocated", "obj_used", "pages_used",
497 "pages_per_zspage");
498
499 for (i = 0; i < zs_size_classes; i++) {
500 class = pool->size_class[i];
501
502 if (class->index != i)
503 continue;
504
505 spin_lock(&class->lock);
506 class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
507 class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
508 obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
509 obj_used = zs_stat_get(class, OBJ_USED);
510 spin_unlock(&class->lock);
511
512 objs_per_zspage = get_maxobj_per_zspage(class->size,
513 class->pages_per_zspage);
514 pages_used = obj_allocated / objs_per_zspage *
515 class->pages_per_zspage;
516
517 seq_printf(s, " %5u %5u %11lu %12lu %13lu %10lu %10lu %16d\n",
518 i, class->size, class_almost_full, class_almost_empty,
519 obj_allocated, obj_used, pages_used,
520 class->pages_per_zspage);
521
522 total_class_almost_full += class_almost_full;
523 total_class_almost_empty += class_almost_empty;
524 total_objs += obj_allocated;
525 total_used_objs += obj_used;
526 total_pages += pages_used;
527 }
528
529 seq_puts(s, "\n");
530 seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu\n",
531 "Total", "", total_class_almost_full,
532 total_class_almost_empty, total_objs,
533 total_used_objs, total_pages);
534
535 return 0;
536}
537
538static int zs_stats_size_open(struct inode *inode, struct file *file)
539{
540 return single_open(file, zs_stats_size_show, inode->i_private);
541}
542
543static const struct file_operations zs_stat_size_ops = {
544 .open = zs_stats_size_open,
545 .read = seq_read,
546 .llseek = seq_lseek,
547 .release = single_release,
548};
549
550static int zs_pool_stat_create(char *name, struct zs_pool *pool)
551{
552 struct dentry *entry;
553
554 if (!zs_stat_root)
555 return -ENODEV;
556
557 entry = debugfs_create_dir(name, zs_stat_root);
558 if (!entry) {
559 pr_warn("debugfs dir <%s> creation failed\n", name);
560 return -ENOMEM;
561 }
562 pool->stat_dentry = entry;
563
564 entry = debugfs_create_file("classes", S_IFREG | S_IRUGO,
565 pool->stat_dentry, pool, &zs_stat_size_ops);
566 if (!entry) {
567 pr_warn("%s: debugfs file entry <%s> creation failed\n",
568 name, "classes");
569 return -ENOMEM;
570 }
571
572 return 0;
573}
574
575static void zs_pool_stat_destroy(struct zs_pool *pool)
576{
577 debugfs_remove_recursive(pool->stat_dentry);
578}
579
580#else /* CONFIG_ZSMALLOC_STAT */
581
582static inline void zs_stat_inc(struct size_class *class,
583 enum zs_stat_type type, unsigned long cnt)
584{
585}
586
587static inline void zs_stat_dec(struct size_class *class,
588 enum zs_stat_type type, unsigned long cnt)
589{
590}
591
592static inline unsigned long zs_stat_get(struct size_class *class,
593 enum zs_stat_type type)
594{
595 return 0;
596}
597
598static int __init zs_stat_init(void)
599{
600 return 0;
601}
602
603static void __exit zs_stat_exit(void)
604{
605}
606
607static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
608{
609 return 0;
610}
611
612static inline void zs_pool_stat_destroy(struct zs_pool *pool)
613{
400} 614}
401 615
616#endif
617
618
402/* 619/*
403 * For each size class, zspages are divided into different groups 620 * For each size class, zspages are divided into different groups
404 * depending on how "full" they are. This was done so that we could 621 * depending on how "full" they are. This was done so that we could
@@ -419,7 +636,7 @@ static enum fullness_group get_fullness_group(struct page *page)
419 fg = ZS_EMPTY; 636 fg = ZS_EMPTY;
420 else if (inuse == max_objects) 637 else if (inuse == max_objects)
421 fg = ZS_FULL; 638 fg = ZS_FULL;
422 else if (inuse <= max_objects / fullness_threshold_frac) 639 else if (inuse <= 3 * max_objects / fullness_threshold_frac)
423 fg = ZS_ALMOST_EMPTY; 640 fg = ZS_ALMOST_EMPTY;
424 else 641 else
425 fg = ZS_ALMOST_FULL; 642 fg = ZS_ALMOST_FULL;
@@ -448,6 +665,8 @@ static void insert_zspage(struct page *page, struct size_class *class,
448 list_add_tail(&page->lru, &(*head)->lru); 665 list_add_tail(&page->lru, &(*head)->lru);
449 666
450 *head = page; 667 *head = page;
668 zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
669 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
451} 670}
452 671
453/* 672/*
@@ -473,6 +692,8 @@ static void remove_zspage(struct page *page, struct size_class *class,
473 struct page, lru); 692 struct page, lru);
474 693
475 list_del_init(&page->lru); 694 list_del_init(&page->lru);
695 zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
696 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
476} 697}
477 698
478/* 699/*
@@ -484,11 +705,10 @@ static void remove_zspage(struct page *page, struct size_class *class,
484 * page from the freelist of the old fullness group to that of the new 705 * page from the freelist of the old fullness group to that of the new
485 * fullness group. 706 * fullness group.
486 */ 707 */
487static enum fullness_group fix_fullness_group(struct zs_pool *pool, 708static enum fullness_group fix_fullness_group(struct size_class *class,
488 struct page *page) 709 struct page *page)
489{ 710{
490 int class_idx; 711 int class_idx;
491 struct size_class *class;
492 enum fullness_group currfg, newfg; 712 enum fullness_group currfg, newfg;
493 713
494 BUG_ON(!is_first_page(page)); 714 BUG_ON(!is_first_page(page));
@@ -498,7 +718,6 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool,
498 if (newfg == currfg) 718 if (newfg == currfg)
499 goto out; 719 goto out;
500 720
501 class = pool->size_class[class_idx];
502 remove_zspage(page, class, currfg); 721 remove_zspage(page, class, currfg);
503 insert_zspage(page, class, newfg); 722 insert_zspage(page, class, newfg);
504 set_zspage_mapping(page, class_idx, newfg); 723 set_zspage_mapping(page, class_idx, newfg);
@@ -512,7 +731,8 @@ out:
512 * to form a zspage for each size class. This is important 731 * to form a zspage for each size class. This is important
513 * to reduce wastage due to unusable space left at end of 732 * to reduce wastage due to unusable space left at end of
514 * each zspage which is given as: 733 * each zspage which is given as:
515 * wastage = Zp - Zp % size_class 734 * wastage = Zp % class_size
735 * usage = Zp - wastage
516 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ... 736 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
517 * 737 *
518 * For example, for size class of 3/8 * PAGE_SIZE, we should 738 * For example, for size class of 3/8 * PAGE_SIZE, we should
@@ -571,35 +791,50 @@ static struct page *get_next_page(struct page *page)
571 791
572/* 792/*
573 * Encode <page, obj_idx> as a single handle value. 793 * Encode <page, obj_idx> as a single handle value.
574 * On hardware platforms with physical memory starting at 0x0 the pfn 794 * We use the least bit of handle for tagging.
575 * could be 0 so we ensure that the handle will never be 0 by adjusting the
576 * encoded obj_idx value before encoding.
577 */ 795 */
578static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) 796static void *location_to_obj(struct page *page, unsigned long obj_idx)
579{ 797{
580 unsigned long handle; 798 unsigned long obj;
581 799
582 if (!page) { 800 if (!page) {
583 BUG_ON(obj_idx); 801 BUG_ON(obj_idx);
584 return NULL; 802 return NULL;
585 } 803 }
586 804
587 handle = page_to_pfn(page) << OBJ_INDEX_BITS; 805 obj = page_to_pfn(page) << OBJ_INDEX_BITS;
588 handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); 806 obj |= ((obj_idx) & OBJ_INDEX_MASK);
807 obj <<= OBJ_TAG_BITS;
589 808
590 return (void *)handle; 809 return (void *)obj;
591} 810}
592 811
593/* 812/*
594 * Decode <page, obj_idx> pair from the given object handle. We adjust the 813 * Decode <page, obj_idx> pair from the given object handle. We adjust the
595 * decoded obj_idx back to its original value since it was adjusted in 814 * decoded obj_idx back to its original value since it was adjusted in
596 * obj_location_to_handle(). 815 * location_to_obj().
597 */ 816 */
598static void obj_handle_to_location(unsigned long handle, struct page **page, 817static void obj_to_location(unsigned long obj, struct page **page,
599 unsigned long *obj_idx) 818 unsigned long *obj_idx)
600{ 819{
601 *page = pfn_to_page(handle >> OBJ_INDEX_BITS); 820 obj >>= OBJ_TAG_BITS;
602 *obj_idx = (handle & OBJ_INDEX_MASK) - 1; 821 *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
822 *obj_idx = (obj & OBJ_INDEX_MASK);
823}
824
825static unsigned long handle_to_obj(unsigned long handle)
826{
827 return *(unsigned long *)handle;
828}
829
830static unsigned long obj_to_head(struct size_class *class, struct page *page,
831 void *obj)
832{
833 if (class->huge) {
834 VM_BUG_ON(!is_first_page(page));
835 return *(unsigned long *)page_private(page);
836 } else
837 return *(unsigned long *)obj;
603} 838}
604 839
605static unsigned long obj_idx_to_offset(struct page *page, 840static unsigned long obj_idx_to_offset(struct page *page,
@@ -613,6 +848,25 @@ static unsigned long obj_idx_to_offset(struct page *page,
613 return off + obj_idx * class_size; 848 return off + obj_idx * class_size;
614} 849}
615 850
851static inline int trypin_tag(unsigned long handle)
852{
853 unsigned long *ptr = (unsigned long *)handle;
854
855 return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr);
856}
857
858static void pin_tag(unsigned long handle)
859{
860 while (!trypin_tag(handle));
861}
862
863static void unpin_tag(unsigned long handle)
864{
865 unsigned long *ptr = (unsigned long *)handle;
866
867 clear_bit_unlock(HANDLE_PIN_BIT, ptr);
868}
869
616static void reset_page(struct page *page) 870static void reset_page(struct page *page)
617{ 871{
618 clear_bit(PG_private, &page->flags); 872 clear_bit(PG_private, &page->flags);
@@ -674,7 +928,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
674 link = (struct link_free *)vaddr + off / sizeof(*link); 928 link = (struct link_free *)vaddr + off / sizeof(*link);
675 929
676 while ((off += class->size) < PAGE_SIZE) { 930 while ((off += class->size) < PAGE_SIZE) {
677 link->next = obj_location_to_handle(page, i++); 931 link->next = location_to_obj(page, i++);
678 link += class->size / sizeof(*link); 932 link += class->size / sizeof(*link);
679 } 933 }
680 934
@@ -684,7 +938,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
684 * page (if present) 938 * page (if present)
685 */ 939 */
686 next_page = get_next_page(page); 940 next_page = get_next_page(page);
687 link->next = obj_location_to_handle(next_page, 0); 941 link->next = location_to_obj(next_page, 0);
688 kunmap_atomic(vaddr); 942 kunmap_atomic(vaddr);
689 page = next_page; 943 page = next_page;
690 off %= PAGE_SIZE; 944 off %= PAGE_SIZE;
@@ -738,7 +992,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
738 992
739 init_zspage(first_page, class); 993 init_zspage(first_page, class);
740 994
741 first_page->freelist = obj_location_to_handle(first_page, 0); 995 first_page->freelist = location_to_obj(first_page, 0);
742 /* Maximum number of objects we can store in this zspage */ 996 /* Maximum number of objects we can store in this zspage */
743 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; 997 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
744 998
@@ -860,12 +1114,19 @@ static void __zs_unmap_object(struct mapping_area *area,
860{ 1114{
861 int sizes[2]; 1115 int sizes[2];
862 void *addr; 1116 void *addr;
863 char *buf = area->vm_buf; 1117 char *buf;
864 1118
865 /* no write fastpath */ 1119 /* no write fastpath */
866 if (area->vm_mm == ZS_MM_RO) 1120 if (area->vm_mm == ZS_MM_RO)
867 goto out; 1121 goto out;
868 1122
1123 buf = area->vm_buf;
1124 if (!area->huge) {
1125 buf = buf + ZS_HANDLE_SIZE;
1126 size -= ZS_HANDLE_SIZE;
1127 off += ZS_HANDLE_SIZE;
1128 }
1129
869 sizes[0] = PAGE_SIZE - off; 1130 sizes[0] = PAGE_SIZE - off;
870 sizes[1] = size - sizes[0]; 1131 sizes[1] = size - sizes[0];
871 1132
@@ -952,11 +1213,6 @@ static void init_zs_size_classes(void)
952 zs_size_classes = nr; 1213 zs_size_classes = nr;
953} 1214}
954 1215
955static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
956{
957 return pages_per_zspage * PAGE_SIZE / size;
958}
959
960static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) 1216static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
961{ 1217{
962 if (prev->pages_per_zspage != pages_per_zspage) 1218 if (prev->pages_per_zspage != pages_per_zspage)
@@ -969,166 +1225,13 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
969 return true; 1225 return true;
970} 1226}
971 1227
972#ifdef CONFIG_ZSMALLOC_STAT 1228static bool zspage_full(struct page *page)
973
974static inline void zs_stat_inc(struct size_class *class,
975 enum zs_stat_type type, unsigned long cnt)
976{
977 class->stats.objs[type] += cnt;
978}
979
980static inline void zs_stat_dec(struct size_class *class,
981 enum zs_stat_type type, unsigned long cnt)
982{
983 class->stats.objs[type] -= cnt;
984}
985
986static inline unsigned long zs_stat_get(struct size_class *class,
987 enum zs_stat_type type)
988{
989 return class->stats.objs[type];
990}
991
992static int __init zs_stat_init(void)
993{
994 if (!debugfs_initialized())
995 return -ENODEV;
996
997 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
998 if (!zs_stat_root)
999 return -ENOMEM;
1000
1001 return 0;
1002}
1003
1004static void __exit zs_stat_exit(void)
1005{
1006 debugfs_remove_recursive(zs_stat_root);
1007}
1008
1009static int zs_stats_size_show(struct seq_file *s, void *v)
1010{ 1229{
1011 int i; 1230 BUG_ON(!is_first_page(page));
1012 struct zs_pool *pool = s->private;
1013 struct size_class *class;
1014 int objs_per_zspage;
1015 unsigned long obj_allocated, obj_used, pages_used;
1016 unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
1017
1018 seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size",
1019 "obj_allocated", "obj_used", "pages_used");
1020
1021 for (i = 0; i < zs_size_classes; i++) {
1022 class = pool->size_class[i];
1023
1024 if (class->index != i)
1025 continue;
1026
1027 spin_lock(&class->lock);
1028 obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
1029 obj_used = zs_stat_get(class, OBJ_USED);
1030 spin_unlock(&class->lock);
1031
1032 objs_per_zspage = get_maxobj_per_zspage(class->size,
1033 class->pages_per_zspage);
1034 pages_used = obj_allocated / objs_per_zspage *
1035 class->pages_per_zspage;
1036
1037 seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i,
1038 class->size, obj_allocated, obj_used, pages_used);
1039
1040 total_objs += obj_allocated;
1041 total_used_objs += obj_used;
1042 total_pages += pages_used;
1043 }
1044
1045 seq_puts(s, "\n");
1046 seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "",
1047 total_objs, total_used_objs, total_pages);
1048
1049 return 0;
1050}
1051
1052static int zs_stats_size_open(struct inode *inode, struct file *file)
1053{
1054 return single_open(file, zs_stats_size_show, inode->i_private);
1055}
1056
1057static const struct file_operations zs_stat_size_ops = {
1058 .open = zs_stats_size_open,
1059 .read = seq_read,
1060 .llseek = seq_lseek,
1061 .release = single_release,
1062};
1063
1064static int zs_pool_stat_create(char *name, struct zs_pool *pool)
1065{
1066 struct dentry *entry;
1067
1068 if (!zs_stat_root)
1069 return -ENODEV;
1070
1071 entry = debugfs_create_dir(name, zs_stat_root);
1072 if (!entry) {
1073 pr_warn("debugfs dir <%s> creation failed\n", name);
1074 return -ENOMEM;
1075 }
1076 pool->stat_dentry = entry;
1077
1078 entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO,
1079 pool->stat_dentry, pool, &zs_stat_size_ops);
1080 if (!entry) {
1081 pr_warn("%s: debugfs file entry <%s> creation failed\n",
1082 name, "obj_in_classes");
1083 return -ENOMEM;
1084 }
1085
1086 return 0;
1087}
1088
1089static void zs_pool_stat_destroy(struct zs_pool *pool)
1090{
1091 debugfs_remove_recursive(pool->stat_dentry);
1092}
1093
1094#else /* CONFIG_ZSMALLOC_STAT */
1095
1096static inline void zs_stat_inc(struct size_class *class,
1097 enum zs_stat_type type, unsigned long cnt)
1098{
1099}
1100
1101static inline void zs_stat_dec(struct size_class *class,
1102 enum zs_stat_type type, unsigned long cnt)
1103{
1104}
1105
1106static inline unsigned long zs_stat_get(struct size_class *class,
1107 enum zs_stat_type type)
1108{
1109 return 0;
1110}
1111
1112static int __init zs_stat_init(void)
1113{
1114 return 0;
1115}
1116
1117static void __exit zs_stat_exit(void)
1118{
1119}
1120
1121static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
1122{
1123 return 0;
1124}
1125 1231
1126static inline void zs_pool_stat_destroy(struct zs_pool *pool) 1232 return page->inuse == page->objects;
1127{
1128} 1233}
1129 1234
1130#endif
1131
1132unsigned long zs_get_total_pages(struct zs_pool *pool) 1235unsigned long zs_get_total_pages(struct zs_pool *pool)
1133{ 1236{
1134 return atomic_long_read(&pool->pages_allocated); 1237 return atomic_long_read(&pool->pages_allocated);
@@ -1153,13 +1256,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1153 enum zs_mapmode mm) 1256 enum zs_mapmode mm)
1154{ 1257{
1155 struct page *page; 1258 struct page *page;
1156 unsigned long obj_idx, off; 1259 unsigned long obj, obj_idx, off;
1157 1260
1158 unsigned int class_idx; 1261 unsigned int class_idx;
1159 enum fullness_group fg; 1262 enum fullness_group fg;
1160 struct size_class *class; 1263 struct size_class *class;
1161 struct mapping_area *area; 1264 struct mapping_area *area;
1162 struct page *pages[2]; 1265 struct page *pages[2];
1266 void *ret;
1163 1267
1164 BUG_ON(!handle); 1268 BUG_ON(!handle);
1165 1269
@@ -1170,7 +1274,11 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1170 */ 1274 */
1171 BUG_ON(in_interrupt()); 1275 BUG_ON(in_interrupt());
1172 1276
1173 obj_handle_to_location(handle, &page, &obj_idx); 1277 /* From now on, migration cannot move the object */
1278 pin_tag(handle);
1279
1280 obj = handle_to_obj(handle);
1281 obj_to_location(obj, &page, &obj_idx);
1174 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1282 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1175 class = pool->size_class[class_idx]; 1283 class = pool->size_class[class_idx];
1176 off = obj_idx_to_offset(page, obj_idx, class->size); 1284 off = obj_idx_to_offset(page, obj_idx, class->size);
@@ -1180,7 +1288,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1180 if (off + class->size <= PAGE_SIZE) { 1288 if (off + class->size <= PAGE_SIZE) {
1181 /* this object is contained entirely within a page */ 1289 /* this object is contained entirely within a page */
1182 area->vm_addr = kmap_atomic(page); 1290 area->vm_addr = kmap_atomic(page);
1183 return area->vm_addr + off; 1291 ret = area->vm_addr + off;
1292 goto out;
1184 } 1293 }
1185 1294
1186 /* this object spans two pages */ 1295 /* this object spans two pages */
@@ -1188,14 +1297,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1188 pages[1] = get_next_page(page); 1297 pages[1] = get_next_page(page);
1189 BUG_ON(!pages[1]); 1298 BUG_ON(!pages[1]);
1190 1299
1191 return __zs_map_object(area, pages, off, class->size); 1300 ret = __zs_map_object(area, pages, off, class->size);
1301out:
1302 if (!class->huge)
1303 ret += ZS_HANDLE_SIZE;
1304
1305 return ret;
1192} 1306}
1193EXPORT_SYMBOL_GPL(zs_map_object); 1307EXPORT_SYMBOL_GPL(zs_map_object);
1194 1308
1195void zs_unmap_object(struct zs_pool *pool, unsigned long handle) 1309void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1196{ 1310{
1197 struct page *page; 1311 struct page *page;
1198 unsigned long obj_idx, off; 1312 unsigned long obj, obj_idx, off;
1199 1313
1200 unsigned int class_idx; 1314 unsigned int class_idx;
1201 enum fullness_group fg; 1315 enum fullness_group fg;
@@ -1204,7 +1318,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1204 1318
1205 BUG_ON(!handle); 1319 BUG_ON(!handle);
1206 1320
1207 obj_handle_to_location(handle, &page, &obj_idx); 1321 obj = handle_to_obj(handle);
1322 obj_to_location(obj, &page, &obj_idx);
1208 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1323 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1209 class = pool->size_class[class_idx]; 1324 class = pool->size_class[class_idx];
1210 off = obj_idx_to_offset(page, obj_idx, class->size); 1325 off = obj_idx_to_offset(page, obj_idx, class->size);
@@ -1222,9 +1337,42 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1222 __zs_unmap_object(area, pages, off, class->size); 1337 __zs_unmap_object(area, pages, off, class->size);
1223 } 1338 }
1224 put_cpu_var(zs_map_area); 1339 put_cpu_var(zs_map_area);
1340 unpin_tag(handle);
1225} 1341}
1226EXPORT_SYMBOL_GPL(zs_unmap_object); 1342EXPORT_SYMBOL_GPL(zs_unmap_object);
1227 1343
1344static unsigned long obj_malloc(struct page *first_page,
1345 struct size_class *class, unsigned long handle)
1346{
1347 unsigned long obj;
1348 struct link_free *link;
1349
1350 struct page *m_page;
1351 unsigned long m_objidx, m_offset;
1352 void *vaddr;
1353
1354 handle |= OBJ_ALLOCATED_TAG;
1355 obj = (unsigned long)first_page->freelist;
1356 obj_to_location(obj, &m_page, &m_objidx);
1357 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
1358
1359 vaddr = kmap_atomic(m_page);
1360 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1361 first_page->freelist = link->next;
1362 if (!class->huge)
1363 /* record handle in the header of allocated chunk */
1364 link->handle = handle;
1365 else
1366 /* record handle in first_page->private */
1367 set_page_private(first_page, handle);
1368 kunmap_atomic(vaddr);
1369 first_page->inuse++;
1370 zs_stat_inc(class, OBJ_USED, 1);
1371
1372 return obj;
1373}
1374
1375
1228/** 1376/**
1229 * zs_malloc - Allocate block of given size from pool. 1377 * zs_malloc - Allocate block of given size from pool.
1230 * @pool: pool to allocate from 1378 * @pool: pool to allocate from
@@ -1236,17 +1384,19 @@ EXPORT_SYMBOL_GPL(zs_unmap_object);
1236 */ 1384 */
1237unsigned long zs_malloc(struct zs_pool *pool, size_t size) 1385unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1238{ 1386{
1239 unsigned long obj; 1387 unsigned long handle, obj;
1240 struct link_free *link;
1241 struct size_class *class; 1388 struct size_class *class;
1242 void *vaddr; 1389 struct page *first_page;
1243
1244 struct page *first_page, *m_page;
1245 unsigned long m_objidx, m_offset;
1246 1390
1247 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) 1391 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
1248 return 0; 1392 return 0;
1249 1393
1394 handle = alloc_handle(pool);
1395 if (!handle)
1396 return 0;
1397
1398 /* extra space in chunk to keep the handle */
1399 size += ZS_HANDLE_SIZE;
1250 class = pool->size_class[get_size_class_index(size)]; 1400 class = pool->size_class[get_size_class_index(size)];
1251 1401
1252 spin_lock(&class->lock); 1402 spin_lock(&class->lock);
@@ -1255,8 +1405,10 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1255 if (!first_page) { 1405 if (!first_page) {
1256 spin_unlock(&class->lock); 1406 spin_unlock(&class->lock);
1257 first_page = alloc_zspage(class, pool->flags); 1407 first_page = alloc_zspage(class, pool->flags);
1258 if (unlikely(!first_page)) 1408 if (unlikely(!first_page)) {
1409 free_handle(pool, handle);
1259 return 0; 1410 return 0;
1411 }
1260 1412
1261 set_zspage_mapping(first_page, class->index, ZS_EMPTY); 1413 set_zspage_mapping(first_page, class->index, ZS_EMPTY);
1262 atomic_long_add(class->pages_per_zspage, 1414 atomic_long_add(class->pages_per_zspage,
@@ -1267,73 +1419,360 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1267 class->size, class->pages_per_zspage)); 1419 class->size, class->pages_per_zspage));
1268 } 1420 }
1269 1421
1270 obj = (unsigned long)first_page->freelist; 1422 obj = obj_malloc(first_page, class, handle);
1271 obj_handle_to_location(obj, &m_page, &m_objidx);
1272 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
1273
1274 vaddr = kmap_atomic(m_page);
1275 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1276 first_page->freelist = link->next;
1277 memset(link, POISON_INUSE, sizeof(*link));
1278 kunmap_atomic(vaddr);
1279
1280 first_page->inuse++;
1281 zs_stat_inc(class, OBJ_USED, 1);
1282 /* Now move the zspage to another fullness group, if required */ 1423 /* Now move the zspage to another fullness group, if required */
1283 fix_fullness_group(pool, first_page); 1424 fix_fullness_group(class, first_page);
1425 record_obj(handle, obj);
1284 spin_unlock(&class->lock); 1426 spin_unlock(&class->lock);
1285 1427
1286 return obj; 1428 return handle;
1287} 1429}
1288EXPORT_SYMBOL_GPL(zs_malloc); 1430EXPORT_SYMBOL_GPL(zs_malloc);
1289 1431
1290void zs_free(struct zs_pool *pool, unsigned long obj) 1432static void obj_free(struct zs_pool *pool, struct size_class *class,
1433 unsigned long obj)
1291{ 1434{
1292 struct link_free *link; 1435 struct link_free *link;
1293 struct page *first_page, *f_page; 1436 struct page *first_page, *f_page;
1294 unsigned long f_objidx, f_offset; 1437 unsigned long f_objidx, f_offset;
1295 void *vaddr; 1438 void *vaddr;
1296
1297 int class_idx; 1439 int class_idx;
1298 struct size_class *class;
1299 enum fullness_group fullness; 1440 enum fullness_group fullness;
1300 1441
1301 if (unlikely(!obj)) 1442 BUG_ON(!obj);
1302 return;
1303 1443
1304 obj_handle_to_location(obj, &f_page, &f_objidx); 1444 obj &= ~OBJ_ALLOCATED_TAG;
1445 obj_to_location(obj, &f_page, &f_objidx);
1305 first_page = get_first_page(f_page); 1446 first_page = get_first_page(f_page);
1306 1447
1307 get_zspage_mapping(first_page, &class_idx, &fullness); 1448 get_zspage_mapping(first_page, &class_idx, &fullness);
1308 class = pool->size_class[class_idx];
1309 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); 1449 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
1310 1450
1311 spin_lock(&class->lock); 1451 vaddr = kmap_atomic(f_page);
1312 1452
1313 /* Insert this object in containing zspage's freelist */ 1453 /* Insert this object in containing zspage's freelist */
1314 vaddr = kmap_atomic(f_page);
1315 link = (struct link_free *)(vaddr + f_offset); 1454 link = (struct link_free *)(vaddr + f_offset);
1316 link->next = first_page->freelist; 1455 link->next = first_page->freelist;
1456 if (class->huge)
1457 set_page_private(first_page, 0);
1317 kunmap_atomic(vaddr); 1458 kunmap_atomic(vaddr);
1318 first_page->freelist = (void *)obj; 1459 first_page->freelist = (void *)obj;
1319
1320 first_page->inuse--; 1460 first_page->inuse--;
1321 fullness = fix_fullness_group(pool, first_page);
1322
1323 zs_stat_dec(class, OBJ_USED, 1); 1461 zs_stat_dec(class, OBJ_USED, 1);
1324 if (fullness == ZS_EMPTY) 1462}
1463
1464void zs_free(struct zs_pool *pool, unsigned long handle)
1465{
1466 struct page *first_page, *f_page;
1467 unsigned long obj, f_objidx;
1468 int class_idx;
1469 struct size_class *class;
1470 enum fullness_group fullness;
1471
1472 if (unlikely(!handle))
1473 return;
1474
1475 pin_tag(handle);
1476 obj = handle_to_obj(handle);
1477 obj_to_location(obj, &f_page, &f_objidx);
1478 first_page = get_first_page(f_page);
1479
1480 get_zspage_mapping(first_page, &class_idx, &fullness);
1481 class = pool->size_class[class_idx];
1482
1483 spin_lock(&class->lock);
1484 obj_free(pool, class, obj);
1485 fullness = fix_fullness_group(class, first_page);
1486 if (fullness == ZS_EMPTY) {
1325 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 1487 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1326 class->size, class->pages_per_zspage)); 1488 class->size, class->pages_per_zspage));
1327 1489 atomic_long_sub(class->pages_per_zspage,
1490 &pool->pages_allocated);
1491 free_zspage(first_page);
1492 }
1328 spin_unlock(&class->lock); 1493 spin_unlock(&class->lock);
1494 unpin_tag(handle);
1495
1496 free_handle(pool, handle);
1497}
1498EXPORT_SYMBOL_GPL(zs_free);
1499
1500static void zs_object_copy(unsigned long src, unsigned long dst,
1501 struct size_class *class)
1502{
1503 struct page *s_page, *d_page;
1504 unsigned long s_objidx, d_objidx;
1505 unsigned long s_off, d_off;
1506 void *s_addr, *d_addr;
1507 int s_size, d_size, size;
1508 int written = 0;
1509
1510 s_size = d_size = class->size;
1511
1512 obj_to_location(src, &s_page, &s_objidx);
1513 obj_to_location(dst, &d_page, &d_objidx);
1514
1515 s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
1516 d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
1517
1518 if (s_off + class->size > PAGE_SIZE)
1519 s_size = PAGE_SIZE - s_off;
1520
1521 if (d_off + class->size > PAGE_SIZE)
1522 d_size = PAGE_SIZE - d_off;
1523
1524 s_addr = kmap_atomic(s_page);
1525 d_addr = kmap_atomic(d_page);
1526
1527 while (1) {
1528 size = min(s_size, d_size);
1529 memcpy(d_addr + d_off, s_addr + s_off, size);
1530 written += size;
1531
1532 if (written == class->size)
1533 break;
1534
1535 s_off += size;
1536 s_size -= size;
1537 d_off += size;
1538 d_size -= size;
1539
1540 if (s_off >= PAGE_SIZE) {
1541 kunmap_atomic(d_addr);
1542 kunmap_atomic(s_addr);
1543 s_page = get_next_page(s_page);
1544 BUG_ON(!s_page);
1545 s_addr = kmap_atomic(s_page);
1546 d_addr = kmap_atomic(d_page);
1547 s_size = class->size - written;
1548 s_off = 0;
1549 }
1550
1551 if (d_off >= PAGE_SIZE) {
1552 kunmap_atomic(d_addr);
1553 d_page = get_next_page(d_page);
1554 BUG_ON(!d_page);
1555 d_addr = kmap_atomic(d_page);
1556 d_size = class->size - written;
1557 d_off = 0;
1558 }
1559 }
1560
1561 kunmap_atomic(d_addr);
1562 kunmap_atomic(s_addr);
1563}
1564
1565/*
1566 * Find alloced object in zspage from index object and
1567 * return handle.
1568 */
1569static unsigned long find_alloced_obj(struct page *page, int index,
1570 struct size_class *class)
1571{
1572 unsigned long head;
1573 int offset = 0;
1574 unsigned long handle = 0;
1575 void *addr = kmap_atomic(page);
1576
1577 if (!is_first_page(page))
1578 offset = page->index;
1579 offset += class->size * index;
1580
1581 while (offset < PAGE_SIZE) {
1582 head = obj_to_head(class, page, addr + offset);
1583 if (head & OBJ_ALLOCATED_TAG) {
1584 handle = head & ~OBJ_ALLOCATED_TAG;
1585 if (trypin_tag(handle))
1586 break;
1587 handle = 0;
1588 }
1589
1590 offset += class->size;
1591 index++;
1592 }
1593
1594 kunmap_atomic(addr);
1595 return handle;
1596}
1597
1598struct zs_compact_control {
1599 /* Source page for migration which could be a subpage of zspage. */
1600 struct page *s_page;
1601 /* Destination page for migration which should be a first page
1602 * of zspage. */
1603 struct page *d_page;
1604 /* Starting object index within @s_page which used for live object
1605 * in the subpage. */
1606 int index;
1607 /* how many of objects are migrated */
1608 int nr_migrated;
1609};
1610
1611static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
1612 struct zs_compact_control *cc)
1613{
1614 unsigned long used_obj, free_obj;
1615 unsigned long handle;
1616 struct page *s_page = cc->s_page;
1617 struct page *d_page = cc->d_page;
1618 unsigned long index = cc->index;
1619 int nr_migrated = 0;
1620 int ret = 0;
1621
1622 while (1) {
1623 handle = find_alloced_obj(s_page, index, class);
1624 if (!handle) {
1625 s_page = get_next_page(s_page);
1626 if (!s_page)
1627 break;
1628 index = 0;
1629 continue;
1630 }
1631
1632 /* Stop if there is no more space */
1633 if (zspage_full(d_page)) {
1634 unpin_tag(handle);
1635 ret = -ENOMEM;
1636 break;
1637 }
1638
1639 used_obj = handle_to_obj(handle);
1640 free_obj = obj_malloc(d_page, class, handle);
1641 zs_object_copy(used_obj, free_obj, class);
1642 index++;
1643 record_obj(handle, free_obj);
1644 unpin_tag(handle);
1645 obj_free(pool, class, used_obj);
1646 nr_migrated++;
1647 }
1648
1649 /* Remember last position in this iteration */
1650 cc->s_page = s_page;
1651 cc->index = index;
1652 cc->nr_migrated = nr_migrated;
1653
1654 return ret;
1655}
1656
1657static struct page *alloc_target_page(struct size_class *class)
1658{
1659 int i;
1660 struct page *page;
1661
1662 for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
1663 page = class->fullness_list[i];
1664 if (page) {
1665 remove_zspage(page, class, i);
1666 break;
1667 }
1668 }
1669
1670 return page;
1671}
1672
1673static void putback_zspage(struct zs_pool *pool, struct size_class *class,
1674 struct page *first_page)
1675{
1676 enum fullness_group fullness;
1677
1678 BUG_ON(!is_first_page(first_page));
1679
1680 fullness = get_fullness_group(first_page);
1681 insert_zspage(first_page, class, fullness);
1682 set_zspage_mapping(first_page, class->index, fullness);
1329 1683
1330 if (fullness == ZS_EMPTY) { 1684 if (fullness == ZS_EMPTY) {
1685 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1686 class->size, class->pages_per_zspage));
1331 atomic_long_sub(class->pages_per_zspage, 1687 atomic_long_sub(class->pages_per_zspage,
1332 &pool->pages_allocated); 1688 &pool->pages_allocated);
1689
1333 free_zspage(first_page); 1690 free_zspage(first_page);
1334 } 1691 }
1335} 1692}
1336EXPORT_SYMBOL_GPL(zs_free); 1693
1694static struct page *isolate_source_page(struct size_class *class)
1695{
1696 struct page *page;
1697
1698 page = class->fullness_list[ZS_ALMOST_EMPTY];
1699 if (page)
1700 remove_zspage(page, class, ZS_ALMOST_EMPTY);
1701
1702 return page;
1703}
1704
1705static unsigned long __zs_compact(struct zs_pool *pool,
1706 struct size_class *class)
1707{
1708 int nr_to_migrate;
1709 struct zs_compact_control cc;
1710 struct page *src_page;
1711 struct page *dst_page = NULL;
1712 unsigned long nr_total_migrated = 0;
1713
1714 spin_lock(&class->lock);
1715 while ((src_page = isolate_source_page(class))) {
1716
1717 BUG_ON(!is_first_page(src_page));
1718
1719 /* The goal is to migrate all live objects in source page */
1720 nr_to_migrate = src_page->inuse;
1721 cc.index = 0;
1722 cc.s_page = src_page;
1723
1724 while ((dst_page = alloc_target_page(class))) {
1725 cc.d_page = dst_page;
1726 /*
1727 * If there is no more space in dst_page, try to
1728 * allocate another zspage.
1729 */
1730 if (!migrate_zspage(pool, class, &cc))
1731 break;
1732
1733 putback_zspage(pool, class, dst_page);
1734 nr_total_migrated += cc.nr_migrated;
1735 nr_to_migrate -= cc.nr_migrated;
1736 }
1737
1738 /* Stop if we couldn't find slot */
1739 if (dst_page == NULL)
1740 break;
1741
1742 putback_zspage(pool, class, dst_page);
1743 putback_zspage(pool, class, src_page);
1744 spin_unlock(&class->lock);
1745 nr_total_migrated += cc.nr_migrated;
1746 cond_resched();
1747 spin_lock(&class->lock);
1748 }
1749
1750 if (src_page)
1751 putback_zspage(pool, class, src_page);
1752
1753 spin_unlock(&class->lock);
1754
1755 return nr_total_migrated;
1756}
1757
1758unsigned long zs_compact(struct zs_pool *pool)
1759{
1760 int i;
1761 unsigned long nr_migrated = 0;
1762 struct size_class *class;
1763
1764 for (i = zs_size_classes - 1; i >= 0; i--) {
1765 class = pool->size_class[i];
1766 if (!class)
1767 continue;
1768 if (class->index != i)
1769 continue;
1770 nr_migrated += __zs_compact(pool, class);
1771 }
1772
1773 return nr_migrated;
1774}
1775EXPORT_SYMBOL_GPL(zs_compact);
1337 1776
1338/** 1777/**
1339 * zs_create_pool - Creates an allocation pool to work from. 1778 * zs_create_pool - Creates an allocation pool to work from.
@@ -1355,20 +1794,20 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
1355 if (!pool) 1794 if (!pool)
1356 return NULL; 1795 return NULL;
1357 1796
1358 pool->name = kstrdup(name, GFP_KERNEL);
1359 if (!pool->name) {
1360 kfree(pool);
1361 return NULL;
1362 }
1363
1364 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), 1797 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
1365 GFP_KERNEL); 1798 GFP_KERNEL);
1366 if (!pool->size_class) { 1799 if (!pool->size_class) {
1367 kfree(pool->name);
1368 kfree(pool); 1800 kfree(pool);
1369 return NULL; 1801 return NULL;
1370 } 1802 }
1371 1803
1804 pool->name = kstrdup(name, GFP_KERNEL);
1805 if (!pool->name)
1806 goto err;
1807
1808 if (create_handle_cache(pool))
1809 goto err;
1810
1372 /* 1811 /*
1373 * Iterate reversly, because, size of size_class that we want to use 1812 * Iterate reversly, because, size of size_class that we want to use
1374 * for merging should be larger or equal to current size. 1813 * for merging should be larger or equal to current size.
@@ -1406,6 +1845,9 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
1406 class->size = size; 1845 class->size = size;
1407 class->index = i; 1846 class->index = i;
1408 class->pages_per_zspage = pages_per_zspage; 1847 class->pages_per_zspage = pages_per_zspage;
1848 if (pages_per_zspage == 1 &&
1849 get_maxobj_per_zspage(size, pages_per_zspage) == 1)
1850 class->huge = true;
1409 spin_lock_init(&class->lock); 1851 spin_lock_init(&class->lock);
1410 pool->size_class[i] = class; 1852 pool->size_class[i] = class;
1411 1853
@@ -1450,6 +1892,7 @@ void zs_destroy_pool(struct zs_pool *pool)
1450 kfree(class); 1892 kfree(class);
1451 } 1893 }
1452 1894
1895 destroy_handle_cache(pool);
1453 kfree(pool->size_class); 1896 kfree(pool->size_class);
1454 kfree(pool->name); 1897 kfree(pool->name);
1455 kfree(pool); 1898 kfree(pool);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index fb78117b896c..9068e72aa73c 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -1,9 +1,11 @@
1config SUNRPC 1config SUNRPC
2 tristate 2 tristate
3 depends on MULTIUSER
3 4
4config SUNRPC_GSS 5config SUNRPC_GSS
5 tristate 6 tristate
6 select OID_REGISTRY 7 select OID_REGISTRY
8 depends on MULTIUSER
7 9
8config SUNRPC_BACKCHANNEL 10config SUNRPC_BACKCHANNEL
9 bool 11 bool
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5199bb1a017e..2928afffbb81 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1072,10 +1072,12 @@ void qword_add(char **bpp, int *lp, char *str)
1072 1072
1073 if (len < 0) return; 1073 if (len < 0) return;
1074 1074
1075 ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t"); 1075 ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t");
1076 if (ret < 0 || ret == len) 1076 if (ret >= len) {
1077 bp += len;
1077 len = -1; 1078 len = -1;
1078 else { 1079 } else {
1080 bp += ret;
1079 len -= ret; 1081 len -= ret;
1080 *bp++ = ' '; 1082 *bp++ = ' ';
1081 len--; 1083 len--;
diff --git a/security/Kconfig b/security/Kconfig
index beb86b500adf..bf4ec46474b6 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -21,6 +21,7 @@ config SECURITY_DMESG_RESTRICT
21config SECURITY 21config SECURITY
22 bool "Enable different security models" 22 bool "Enable different security models"
23 depends on SYSFS 23 depends on SYSFS
24 depends on MULTIUSER
24 help 25 help
25 This allows you to choose different security modules to be 26 This allows you to choose different security modules to be
26 configured into your kernel. 27 configured into your kernel.
diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
index 3d8e5b033e1d..49003674de4f 100644
--- a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
@@ -21,9 +21,13 @@ static int test_body(void)
21 * Typically the mmap will fail because no huge pages are 21 * Typically the mmap will fail because no huge pages are
22 * allocated on the system. But if there are huge pages 22 * allocated on the system. But if there are huge pages
23 * allocated the mmap will succeed. That's fine too, we just 23 * allocated the mmap will succeed. That's fine too, we just
24 * munmap here before continuing. 24 * munmap here before continuing. munmap() length of
25 * MAP_HUGETLB memory must be hugepage aligned.
25 */ 26 */
26 munmap(addr, SIZE); 27 if (munmap(addr, SIZE)) {
28 perror("munmap");
29 return 1;
30 }
27 } 31 }
28 32
29 p = mmap(addr, SIZE, PROT_READ | PROT_WRITE, 33 p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c
index ea40ff8c2391..02e1072ec187 100644
--- a/tools/testing/selftests/vm/hugetlbfstest.c
+++ b/tools/testing/selftests/vm/hugetlbfstest.c
@@ -34,6 +34,7 @@ static void do_mmap(int fd, int extra_flags, int unmap)
34 int *p; 34 int *p;
35 int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; 35 int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags;
36 u64 before, after; 36 u64 before, after;
37 int ret;
37 38
38 before = read_rss(); 39 before = read_rss();
39 p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); 40 p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0);
@@ -44,7 +45,8 @@ static void do_mmap(int fd, int extra_flags, int unmap)
44 !"rss didn't grow as expected"); 45 !"rss didn't grow as expected");
45 if (!unmap) 46 if (!unmap)
46 return; 47 return;
47 munmap(p, length); 48 ret = munmap(p, length);
49 assert(!ret || !"munmap returned an unexpected error");
48 after = read_rss(); 50 after = read_rss();
49 assert(llabs(after - before) < 0x40000 || 51 assert(llabs(after - before) < 0x40000 ||
50 !"rss didn't shrink as expected"); 52 !"rss didn't shrink as expected");
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index ac56639dd4a9..addcd6fc1ecc 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -73,7 +73,11 @@ int main(void)
73 write_bytes(addr); 73 write_bytes(addr);
74 ret = read_bytes(addr); 74 ret = read_bytes(addr);
75 75
76 munmap(addr, LENGTH); 76 /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
77 if (munmap(addr, LENGTH)) {
78 perror("munmap");
79 exit(1);
80 }
77 81
78 return ret; 82 return ret;
79} 83}