diff options
247 files changed, 8796 insertions, 2889 deletions
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt index 8686e789542e..1f06daf03f5b 100644 --- a/Documentation/aoe/udev.txt +++ b/Documentation/aoe/udev.txt | |||
@@ -23,4 +23,4 @@ SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="02 | |||
23 | SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" | 23 | SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" |
24 | 24 | ||
25 | # aoe block devices | 25 | # aoe block devices |
26 | KERNEL=="etherd*", NAME="%k", GROUP="disk" | 26 | KERNEL=="etherd*", GROUP="disk" |
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt new file mode 100644 index 000000000000..2bbf4cc40c3f --- /dev/null +++ b/Documentation/block/cmdline-partition.txt | |||
@@ -0,0 +1,39 @@ | |||
1 | Embedded device command line partition | ||
2 | ===================================================================== | ||
3 | |||
4 | Read block device partition table from command line. | ||
5 | The partition used for fixed block device (eMMC) embedded device. | ||
6 | It is no MBR, save storage space. Bootloader can be easily accessed | ||
7 | by absolute address of data on the block device. | ||
8 | Users can easily change the partition. | ||
9 | |||
10 | The format for the command line is just like mtdparts: | ||
11 | |||
12 | blkdevparts=<blkdev-def>[;<blkdev-def>] | ||
13 | <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>] | ||
14 | <partdef> := <size>[@<offset>](part-name) | ||
15 | |||
16 | <blkdev-id> | ||
17 | block device disk name, embedded device used fixed block device, | ||
18 | it's disk name also fixed. such as: mmcblk0, mmcblk1, mmcblk0boot0. | ||
19 | |||
20 | <size> | ||
21 | partition size, in bytes, such as: 512, 1m, 1G. | ||
22 | |||
23 | <offset> | ||
24 | partition start address, in bytes. | ||
25 | |||
26 | (part-name) | ||
27 | partition name, kernel send uevent with "PARTNAME". application can create | ||
28 | a link to block device partition with the name "PARTNAME". | ||
29 | user space application can access partition by partition name. | ||
30 | |||
31 | Example: | ||
32 | eMMC disk name is "mmcblk0" and "mmcblk0boot0" | ||
33 | |||
34 | bootargs: | ||
35 | 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)' | ||
36 | |||
37 | dmesg: | ||
38 | mmcblk0: p1(data0) p2(data1) p3() | ||
39 | mmcblk0boot0: p1(boot) p2(kernel) | ||
diff --git a/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt b/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt new file mode 100644 index 000000000000..c9d3ac1477fe --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | MOXA ART real-time clock | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : Should be "moxa,moxart-rtc" | ||
6 | - gpio-rtc-sclk : RTC sclk gpio, with zero flags | ||
7 | - gpio-rtc-data : RTC data gpio, with zero flags | ||
8 | - gpio-rtc-reset : RTC reset gpio, with zero flags | ||
9 | |||
10 | Example: | ||
11 | |||
12 | rtc: rtc { | ||
13 | compatible = "moxa,moxart-rtc"; | ||
14 | gpio-rtc-sclk = <&gpio 5 0>; | ||
15 | gpio-rtc-data = <&gpio 6 0>; | ||
16 | gpio-rtc-reset = <&gpio 7 0>; | ||
17 | }; | ||
diff --git a/Documentation/devicetree/bindings/rtc/rtc-omap.txt b/Documentation/devicetree/bindings/rtc/rtc-omap.txt index b47aa415c820..5a0f02d34d95 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-omap.txt +++ b/Documentation/devicetree/bindings/rtc/rtc-omap.txt | |||
@@ -1,7 +1,11 @@ | |||
1 | TI Real Time Clock | 1 | TI Real Time Clock |
2 | 2 | ||
3 | Required properties: | 3 | Required properties: |
4 | - compatible: "ti,da830-rtc" | 4 | - compatible: |
5 | - "ti,da830-rtc" - for RTC IP used similar to that on DA8xx SoC family. | ||
6 | - "ti,am3352-rtc" - for RTC IP used similar to that on AM335x SoC family. | ||
7 | This RTC IP has special WAKE-EN Register to enable | ||
8 | Wakeup generation for event Alarm. | ||
5 | - reg: Address range of rtc register set | 9 | - reg: Address range of rtc register set |
6 | - interrupts: rtc timer, alarm interrupts in order | 10 | - interrupts: rtc timer, alarm interrupts in order |
7 | - interrupt-parent: phandle for the interrupt controller | 11 | - interrupt-parent: phandle for the interrupt controller |
diff --git a/Documentation/devicetree/bindings/rtc/rtc-palmas.txt b/Documentation/devicetree/bindings/rtc/rtc-palmas.txt new file mode 100644 index 000000000000..adbccc0a51e1 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/rtc-palmas.txt | |||
@@ -0,0 +1,33 @@ | |||
1 | Palmas RTC controller bindings | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: | ||
5 | - "ti,palmas-rtc" for palma series of the RTC controller | ||
6 | - interrupt-parent: Parent interrupt device, must be handle of palmas node. | ||
7 | - interrupts: Interrupt number of RTC submodule on device. | ||
8 | |||
9 | Optional properties: | ||
10 | |||
11 | - ti,backup-battery-chargeable: The Palmas series device like TPS65913 or | ||
12 | TPS80036 supports the backup battery for powering the RTC when main | ||
13 | battery is removed or in very low power state. The backup battery | ||
14 | can be chargeable or non-chargeable. This flag will tells whether | ||
15 | battery is chargeable or not. If charging battery then driver can | ||
16 | enable the charging. | ||
17 | - ti,backup-battery-charge-high-current: Enable high current charging in | ||
18 | backup battery. Device supports the < 100mA and > 100mA charging. | ||
19 | The high current will be > 100mA. Absence of this property will | ||
20 | charge battery to lower current i.e. < 100mA. | ||
21 | |||
22 | Example: | ||
23 | palmas: tps65913@58 { | ||
24 | ... | ||
25 | palmas_rtc: rtc { | ||
26 | compatible = "ti,palmas-rtc"; | ||
27 | interrupt-parent = <&palmas>; | ||
28 | interrupts = <8 0>; | ||
29 | ti,backup-battery-chargeable; | ||
30 | ti,backup-battery-charge-high-current; | ||
31 | }; | ||
32 | ... | ||
33 | }; | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fcc22c982a25..823c95faebd2 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -854,16 +854,15 @@ Committed_AS: The amount of memory presently allocated on the system. | |||
854 | The committed memory is a sum of all of the memory which | 854 | The committed memory is a sum of all of the memory which |
855 | has been allocated by processes, even if it has not been | 855 | has been allocated by processes, even if it has not been |
856 | "used" by them as of yet. A process which malloc()'s 1G | 856 | "used" by them as of yet. A process which malloc()'s 1G |
857 | of memory, but only touches 300M of it will only show up | 857 | of memory, but only touches 300M of it will show up as |
858 | as using 300M of memory even if it has the address space | 858 | using 1G. This 1G is memory which has been "committed" to |
859 | allocated for the entire 1G. This 1G is memory which has | 859 | by the VM and can be used at any time by the allocating |
860 | been "committed" to by the VM and can be used at any time | 860 | application. With strict overcommit enabled on the system |
861 | by the allocating application. With strict overcommit | 861 | (mode 2 in 'vm.overcommit_memory'),allocations which would |
862 | enabled on the system (mode 2 in 'vm.overcommit_memory'), | 862 | exceed the CommitLimit (detailed above) will not be permitted. |
863 | allocations which would exceed the CommitLimit (detailed | 863 | This is useful if one needs to guarantee that processes will |
864 | above) will not be permitted. This is useful if one needs | 864 | not fail due to lack of memory once that memory has been |
865 | to guarantee that processes will not fail due to lack of | 865 | successfully allocated. |
866 | memory once that memory has been successfully allocated. | ||
867 | VmallocTotal: total size of vmalloc memory area | 866 | VmallocTotal: total size of vmalloc memory area |
868 | VmallocUsed: amount of vmalloc area which is used | 867 | VmallocUsed: amount of vmalloc area which is used |
869 | VmallocChunk: largest contiguous block of vmalloc area which is free | 868 | VmallocChunk: largest contiguous block of vmalloc area which is free |
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt index 59b4a0962e0f..b176928e6963 100644 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt | |||
@@ -79,6 +79,10 @@ to just make sure certain lists can't become empty. | |||
79 | Most systems just mount another filesystem over rootfs and ignore it. The | 79 | Most systems just mount another filesystem over rootfs and ignore it. The |
80 | amount of space an empty instance of ramfs takes up is tiny. | 80 | amount of space an empty instance of ramfs takes up is tiny. |
81 | 81 | ||
82 | If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by | ||
83 | default. To force ramfs, add "rootfstype=ramfs" to the kernel command | ||
84 | line. | ||
85 | |||
82 | What is initramfs? | 86 | What is initramfs? |
83 | ------------------ | 87 | ------------------ |
84 | 88 | ||
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ab7d16efa96b..9d4c1d18ad44 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -182,6 +182,7 @@ core_pattern is used to specify a core dumpfile pattern name. | |||
182 | %<NUL> '%' is dropped | 182 | %<NUL> '%' is dropped |
183 | %% output one '%' | 183 | %% output one '%' |
184 | %p pid | 184 | %p pid |
185 | %P global pid (init PID namespace) | ||
185 | %u uid | 186 | %u uid |
186 | %g gid | 187 | %g gid |
187 | %d dump mode, matches PR_SET_DUMPABLE and | 188 | %d dump mode, matches PR_SET_DUMPABLE and |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 36ecc26c7433..79a797eb3e87 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -200,17 +200,25 @@ fragmentation index is <= extfrag_threshold. The default value is 500. | |||
200 | 200 | ||
201 | hugepages_treat_as_movable | 201 | hugepages_treat_as_movable |
202 | 202 | ||
203 | This parameter is only useful when kernelcore= is specified at boot time to | 203 | This parameter controls whether we can allocate hugepages from ZONE_MOVABLE |
204 | create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages | 204 | or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE. |
205 | are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero | 205 | ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified, |
206 | value written to hugepages_treat_as_movable allows huge pages to be allocated | 206 | so this parameter has no effect if used without kernelcore=. |
207 | from ZONE_MOVABLE. | 207 | |
208 | 208 | Hugepage migration is now available in some situations which depend on the | |
209 | Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge | 209 | architecture and/or the hugepage size. If a hugepage supports migration, |
210 | pages pool can easily grow or shrink within. Assuming that applications are | 210 | allocation from ZONE_MOVABLE is always enabled for the hugepage regardless |
211 | not running that mlock() a lot of memory, it is likely the huge pages pool | 211 | of the value of this parameter. |
212 | can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value | 212 | IOW, this parameter affects only non-migratable hugepages. |
213 | into nr_hugepages and triggering page reclaim. | 213 | |
214 | Assuming that hugepages are not migratable in your system, one usecase of | ||
215 | this parameter is that users can make hugepage pool more extensible by | ||
216 | enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE | ||
217 | page reclaim/migration/compaction work more and you can get contiguous | ||
218 | memory more likely. Note that using ZONE_MOVABLE for non-migratable | ||
219 | hugepages can do harm to other features like memory hotremove (because | ||
220 | memory hotremove expects that memory blocks on ZONE_MOVABLE are always | ||
221 | removable,) so it's a trade-off responsible for the users. | ||
214 | 222 | ||
215 | ============================================================== | 223 | ============================================================== |
216 | 224 | ||
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index 4ac359b7aa17..bdd4bb97fff7 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt | |||
@@ -165,6 +165,7 @@ which function as described above for the default huge page-sized case. | |||
165 | 165 | ||
166 | 166 | ||
167 | Interaction of Task Memory Policy with Huge Page Allocation/Freeing | 167 | Interaction of Task Memory Policy with Huge Page Allocation/Freeing |
168 | =================================================================== | ||
168 | 169 | ||
169 | Whether huge pages are allocated and freed via the /proc interface or | 170 | Whether huge pages are allocated and freed via the /proc interface or |
170 | the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA | 171 | the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA |
@@ -229,6 +230,7 @@ resulting effect on persistent huge page allocation is as follows: | |||
229 | of huge pages over all on-lines nodes with memory. | 230 | of huge pages over all on-lines nodes with memory. |
230 | 231 | ||
231 | Per Node Hugepages Attributes | 232 | Per Node Hugepages Attributes |
233 | ============================= | ||
232 | 234 | ||
233 | A subset of the contents of the root huge page control directory in sysfs, | 235 | A subset of the contents of the root huge page control directory in sysfs, |
234 | described above, will be replicated under each the system device of each | 236 | described above, will be replicated under each the system device of each |
@@ -258,6 +260,7 @@ applied, from which node the huge page allocation will be attempted. | |||
258 | 260 | ||
259 | 261 | ||
260 | Using Huge Pages | 262 | Using Huge Pages |
263 | ================ | ||
261 | 264 | ||
262 | If the user applications are going to request huge pages using mmap system | 265 | If the user applications are going to request huge pages using mmap system |
263 | call, then it is required that system administrator mount a file system of | 266 | call, then it is required that system administrator mount a file system of |
@@ -296,20 +299,16 @@ calls, though the mount of filesystem will be required for using mmap calls | |||
296 | without MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see | 299 | without MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see |
297 | map_hugetlb.c. | 300 | map_hugetlb.c. |
298 | 301 | ||
299 | ******************************************************************* | 302 | Examples |
303 | ======== | ||
300 | 304 | ||
301 | /* | 305 | 1) map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c |
302 | * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c | ||
303 | */ | ||
304 | 306 | ||
305 | ******************************************************************* | 307 | 2) hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c |
306 | 308 | ||
307 | /* | 309 | 3) hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c |
308 | * hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c | ||
309 | */ | ||
310 | 310 | ||
311 | ******************************************************************* | 311 | 4) The libhugetlbfs (http://libhugetlbfs.sourceforge.net) library provides a |
312 | 312 | wide range of userspace tools to help with huge page usability, environment | |
313 | /* | 313 | setup, and control. Furthermore it provides useful test cases that should be |
314 | * hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c | 314 | used when modifying code to ensure no regressions are introduced. |
315 | */ | ||
diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/vm/soft-dirty.txt index 9a12a5956bc0..55684d11a1e8 100644 --- a/Documentation/vm/soft-dirty.txt +++ b/Documentation/vm/soft-dirty.txt | |||
@@ -28,6 +28,13 @@ This is so, since the pages are still mapped to physical memory, and thus all | |||
28 | the kernel does is finds this fact out and puts both writable and soft-dirty | 28 | the kernel does is finds this fact out and puts both writable and soft-dirty |
29 | bits on the PTE. | 29 | bits on the PTE. |
30 | 30 | ||
31 | While in most cases tracking memory changes by #PF-s is more than enough | ||
32 | there is still a scenario when we can lose soft dirty bits -- a task | ||
33 | unmaps a previously mapped memory region and then maps a new one at exactly | ||
34 | the same place. When unmap is called, the kernel internally clears PTE values | ||
35 | including soft dirty bits. To notify user space application about such | ||
36 | memory region renewal the kernel always marks new memory regions (and | ||
37 | expanded regions) as soft dirty. | ||
31 | 38 | ||
32 | This feature is actively used by the checkpoint-restore project. You | 39 | This feature is actively used by the checkpoint-restore project. You |
33 | can find more details about it on http://criu.org | 40 | can find more details about it on http://criu.org |
diff --git a/MAINTAINERS b/MAINTAINERS index be70759e51c5..e61c2e83fc2b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1028,7 +1028,7 @@ F: arch/arm/mach-orion5x/ts78xx-* | |||
1028 | ARM/MICREL KS8695 ARCHITECTURE | 1028 | ARM/MICREL KS8695 ARCHITECTURE |
1029 | M: Greg Ungerer <gerg@uclinux.org> | 1029 | M: Greg Ungerer <gerg@uclinux.org> |
1030 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 1030 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
1031 | F: arch/arm/mach-ks8695 | 1031 | F: arch/arm/mach-ks8695/ |
1032 | S: Odd Fixes | 1032 | S: Odd Fixes |
1033 | 1033 | ||
1034 | ARM/MIOA701 MACHINE SUPPORT | 1034 | ARM/MIOA701 MACHINE SUPPORT |
@@ -1048,7 +1048,6 @@ M: STEricsson <STEricsson_nomadik_linux@list.st.com> | |||
1048 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 1048 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
1049 | S: Maintained | 1049 | S: Maintained |
1050 | F: arch/arm/mach-nomadik/ | 1050 | F: arch/arm/mach-nomadik/ |
1051 | F: arch/arm/plat-nomadik/ | ||
1052 | F: drivers/i2c/busses/i2c-nomadik.c | 1051 | F: drivers/i2c/busses/i2c-nomadik.c |
1053 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git | 1052 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git |
1054 | 1053 | ||
@@ -1070,7 +1069,7 @@ F: drivers/mmc/host/msm_sdcc.h | |||
1070 | F: drivers/tty/serial/msm_serial.h | 1069 | F: drivers/tty/serial/msm_serial.h |
1071 | F: drivers/tty/serial/msm_serial.c | 1070 | F: drivers/tty/serial/msm_serial.c |
1072 | F: drivers/*/pm8???-* | 1071 | F: drivers/*/pm8???-* |
1073 | F: drivers/ssbi/ | 1072 | F: drivers/mfd/ssbi/ |
1074 | F: include/linux/mfd/pm8xxx/ | 1073 | F: include/linux/mfd/pm8xxx/ |
1075 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git | 1074 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git |
1076 | S: Maintained | 1075 | S: Maintained |
@@ -1156,7 +1155,6 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) | |||
1156 | W: http://www.fluff.org/ben/linux/ | 1155 | W: http://www.fluff.org/ben/linux/ |
1157 | S: Maintained | 1156 | S: Maintained |
1158 | F: arch/arm/plat-samsung/ | 1157 | F: arch/arm/plat-samsung/ |
1159 | F: arch/arm/plat-s3c24xx/ | ||
1160 | F: arch/arm/mach-s3c24*/ | 1158 | F: arch/arm/mach-s3c24*/ |
1161 | F: arch/arm/mach-s3c64xx/ | 1159 | F: arch/arm/mach-s3c64xx/ |
1162 | F: drivers/*/*s3c2410* | 1160 | F: drivers/*/*s3c2410* |
@@ -1179,8 +1177,6 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | |||
1179 | S: Maintained | 1177 | S: Maintained |
1180 | F: arch/arm/mach-s5pv210/mach-aquila.c | 1178 | F: arch/arm/mach-s5pv210/mach-aquila.c |
1181 | F: arch/arm/mach-s5pv210/mach-goni.c | 1179 | F: arch/arm/mach-s5pv210/mach-goni.c |
1182 | F: arch/arm/mach-exynos/mach-universal_c210.c | ||
1183 | F: arch/arm/mach-exynos/mach-nuri.c | ||
1184 | 1180 | ||
1185 | ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT | 1181 | ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT |
1186 | M: Kyungmin Park <kyungmin.park@samsung.com> | 1182 | M: Kyungmin Park <kyungmin.park@samsung.com> |
@@ -1325,7 +1321,7 @@ F: drivers/mmc/host/wmt-sdmmc.c | |||
1325 | F: drivers/pwm/pwm-vt8500.c | 1321 | F: drivers/pwm/pwm-vt8500.c |
1326 | F: drivers/rtc/rtc-vt8500.c | 1322 | F: drivers/rtc/rtc-vt8500.c |
1327 | F: drivers/tty/serial/vt8500_serial.c | 1323 | F: drivers/tty/serial/vt8500_serial.c |
1328 | F: drivers/usb/host/ehci-vt8500.c | 1324 | F: drivers/usb/host/ehci-platform.c |
1329 | F: drivers/usb/host/uhci-platform.c | 1325 | F: drivers/usb/host/uhci-platform.c |
1330 | F: drivers/video/vt8500lcdfb.* | 1326 | F: drivers/video/vt8500lcdfb.* |
1331 | F: drivers/video/wm8505fb* | 1327 | F: drivers/video/wm8505fb* |
@@ -1815,6 +1811,17 @@ L: netdev@vger.kernel.org | |||
1815 | S: Supported | 1811 | S: Supported |
1816 | F: drivers/net/ethernet/broadcom/bnx2x/ | 1812 | F: drivers/net/ethernet/broadcom/bnx2x/ |
1817 | 1813 | ||
1814 | BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE | ||
1815 | M: Christian Daudt <csd@broadcom.com> | ||
1816 | T: git git://git.github.com/broadcom/bcm11351 | ||
1817 | S: Maintained | ||
1818 | F: arch/arm/mach-bcm/ | ||
1819 | F: arch/arm/boot/dts/bcm113* | ||
1820 | F: arch/arm/boot/dts/bcm281* | ||
1821 | F: arch/arm/configs/bcm_defconfig | ||
1822 | F: drivers/mmc/host/sdhci_bcm_kona.c | ||
1823 | F: drivers/clocksource/bcm_kona_timer.c | ||
1824 | |||
1818 | BROADCOM BCM2835 ARM ARCHICTURE | 1825 | BROADCOM BCM2835 ARM ARCHICTURE |
1819 | M: Stephen Warren <swarren@wwwdotorg.org> | 1826 | M: Stephen Warren <swarren@wwwdotorg.org> |
1820 | L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) | 1827 | L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) |
@@ -2035,10 +2042,10 @@ W: http://ceph.com/ | |||
2035 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | 2042 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git |
2036 | S: Supported | 2043 | S: Supported |
2037 | F: Documentation/filesystems/ceph.txt | 2044 | F: Documentation/filesystems/ceph.txt |
2038 | F: fs/ceph | 2045 | F: fs/ceph/ |
2039 | F: net/ceph | 2046 | F: net/ceph/ |
2040 | F: include/linux/ceph | 2047 | F: include/linux/ceph/ |
2041 | F: include/linux/crush | 2048 | F: include/linux/crush/ |
2042 | 2049 | ||
2043 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | 2050 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: |
2044 | L: linux-usb@vger.kernel.org | 2051 | L: linux-usb@vger.kernel.org |
@@ -2335,7 +2342,7 @@ CPU POWER MONITORING SUBSYSTEM | |||
2335 | M: Dominik Brodowski <linux@dominikbrodowski.net> | 2342 | M: Dominik Brodowski <linux@dominikbrodowski.net> |
2336 | M: Thomas Renninger <trenn@suse.de> | 2343 | M: Thomas Renninger <trenn@suse.de> |
2337 | S: Maintained | 2344 | S: Maintained |
2338 | F: tools/power/cpupower | 2345 | F: tools/power/cpupower/ |
2339 | 2346 | ||
2340 | CPUSETS | 2347 | CPUSETS |
2341 | M: Li Zefan <lizefan@huawei.com> | 2348 | M: Li Zefan <lizefan@huawei.com> |
@@ -2773,7 +2780,7 @@ L: intel-gfx@lists.freedesktop.org | |||
2773 | L: dri-devel@lists.freedesktop.org | 2780 | L: dri-devel@lists.freedesktop.org |
2774 | T: git git://people.freedesktop.org/~danvet/drm-intel | 2781 | T: git git://people.freedesktop.org/~danvet/drm-intel |
2775 | S: Supported | 2782 | S: Supported |
2776 | F: drivers/gpu/drm/i915 | 2783 | F: drivers/gpu/drm/i915/ |
2777 | F: include/drm/i915* | 2784 | F: include/drm/i915* |
2778 | F: include/uapi/drm/i915* | 2785 | F: include/uapi/drm/i915* |
2779 | 2786 | ||
@@ -2785,7 +2792,7 @@ M: Kyungmin Park <kyungmin.park@samsung.com> | |||
2785 | L: dri-devel@lists.freedesktop.org | 2792 | L: dri-devel@lists.freedesktop.org |
2786 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git | 2793 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git |
2787 | S: Supported | 2794 | S: Supported |
2788 | F: drivers/gpu/drm/exynos | 2795 | F: drivers/gpu/drm/exynos/ |
2789 | F: include/drm/exynos* | 2796 | F: include/drm/exynos* |
2790 | F: include/uapi/drm/exynos* | 2797 | F: include/uapi/drm/exynos* |
2791 | 2798 | ||
@@ -3038,7 +3045,7 @@ M: Mauro Carvalho Chehab <m.chehab@samsung.com> | |||
3038 | L: linux-edac@vger.kernel.org | 3045 | L: linux-edac@vger.kernel.org |
3039 | W: bluesmoke.sourceforge.net | 3046 | W: bluesmoke.sourceforge.net |
3040 | S: Maintained | 3047 | S: Maintained |
3041 | F: drivers/edac/ghes-edac.c | 3048 | F: drivers/edac/ghes_edac.c |
3042 | 3049 | ||
3043 | EDAC-I82443BXGX | 3050 | EDAC-I82443BXGX |
3044 | M: Tim Small <tim@buttersideup.com> | 3051 | M: Tim Small <tim@buttersideup.com> |
@@ -3644,8 +3651,8 @@ M: Arnd Bergmann <arnd@arndb.de> | |||
3644 | L: linux-arch@vger.kernel.org | 3651 | L: linux-arch@vger.kernel.org |
3645 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git | 3652 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git |
3646 | S: Maintained | 3653 | S: Maintained |
3647 | F: include/asm-generic | 3654 | F: include/asm-generic/ |
3648 | F: include/uapi/asm-generic | 3655 | F: include/uapi/asm-generic/ |
3649 | 3656 | ||
3650 | GENERIC UIO DRIVER FOR PCI DEVICES | 3657 | GENERIC UIO DRIVER FOR PCI DEVICES |
3651 | M: "Michael S. Tsirkin" <mst@redhat.com> | 3658 | M: "Michael S. Tsirkin" <mst@redhat.com> |
@@ -3687,7 +3694,8 @@ GRE DEMULTIPLEXER DRIVER | |||
3687 | M: Dmitry Kozlov <xeb@mail.ru> | 3694 | M: Dmitry Kozlov <xeb@mail.ru> |
3688 | L: netdev@vger.kernel.org | 3695 | L: netdev@vger.kernel.org |
3689 | S: Maintained | 3696 | S: Maintained |
3690 | F: net/ipv4/gre.c | 3697 | F: net/ipv4/gre_demux.c |
3698 | F: net/ipv4/gre_offload.c | ||
3691 | F: include/net/gre.h | 3699 | F: include/net/gre.h |
3692 | 3700 | ||
3693 | GRETH 10/100/1G Ethernet MAC device driver | 3701 | GRETH 10/100/1G Ethernet MAC device driver |
@@ -3765,7 +3773,7 @@ L: linux-media@vger.kernel.org | |||
3765 | T: git git://linuxtv.org/media_tree.git | 3773 | T: git git://linuxtv.org/media_tree.git |
3766 | W: http://linuxtv.org | 3774 | W: http://linuxtv.org |
3767 | S: Odd Fixes | 3775 | S: Odd Fixes |
3768 | F: drivers/media/usb/hdpvr | 3776 | F: drivers/media/usb/hdpvr/ |
3769 | 3777 | ||
3770 | HWPOISON MEMORY FAILURE HANDLING | 3778 | HWPOISON MEMORY FAILURE HANDLING |
3771 | M: Andi Kleen <andi@firstfloor.org> | 3779 | M: Andi Kleen <andi@firstfloor.org> |
@@ -4574,7 +4582,7 @@ S: Supported | |||
4574 | W: http://www.openfabrics.org | 4582 | W: http://www.openfabrics.org |
4575 | W: www.open-iscsi.org | 4583 | W: www.open-iscsi.org |
4576 | Q: http://patchwork.kernel.org/project/linux-rdma/list/ | 4584 | Q: http://patchwork.kernel.org/project/linux-rdma/list/ |
4577 | F: drivers/infiniband/ulp/iser | 4585 | F: drivers/infiniband/ulp/iser/ |
4578 | 4586 | ||
4579 | ISDN SUBSYSTEM | 4587 | ISDN SUBSYSTEM |
4580 | M: Karsten Keil <isdn@linux-pingi.de> | 4588 | M: Karsten Keil <isdn@linux-pingi.de> |
@@ -4628,7 +4636,7 @@ W: http://palosaari.fi/linux/ | |||
4628 | Q: http://patchwork.linuxtv.org/project/linux-media/list/ | 4636 | Q: http://patchwork.linuxtv.org/project/linux-media/list/ |
4629 | T: git git://linuxtv.org/anttip/media_tree.git | 4637 | T: git git://linuxtv.org/anttip/media_tree.git |
4630 | S: Maintained | 4638 | S: Maintained |
4631 | F: drivers/media/tuners/it913x* | 4639 | F: drivers/media/tuners/tuner_it913x* |
4632 | 4640 | ||
4633 | IVTV VIDEO4LINUX DRIVER | 4641 | IVTV VIDEO4LINUX DRIVER |
4634 | M: Andy Walls <awalls@md.metrocast.net> | 4642 | M: Andy Walls <awalls@md.metrocast.net> |
@@ -5964,15 +5972,12 @@ S: Maintained | |||
5964 | F: arch/arm/*omap*/*pm* | 5972 | F: arch/arm/*omap*/*pm* |
5965 | F: drivers/cpufreq/omap-cpufreq.c | 5973 | F: drivers/cpufreq/omap-cpufreq.c |
5966 | 5974 | ||
5967 | OMAP POWERDOMAIN/CLOCKDOMAIN SOC ADAPTATION LAYER SUPPORT | 5975 | OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT |
5968 | M: Rajendra Nayak <rnayak@ti.com> | 5976 | M: Rajendra Nayak <rnayak@ti.com> |
5969 | M: Paul Walmsley <paul@pwsan.com> | 5977 | M: Paul Walmsley <paul@pwsan.com> |
5970 | L: linux-omap@vger.kernel.org | 5978 | L: linux-omap@vger.kernel.org |
5971 | S: Maintained | 5979 | S: Maintained |
5972 | F: arch/arm/mach-omap2/powerdomain2xxx_3xxx.c | 5980 | F: arch/arm/mach-omap2/prm* |
5973 | F: arch/arm/mach-omap2/powerdomain44xx.c | ||
5974 | F: arch/arm/mach-omap2/clockdomain2xxx_3xxx.c | ||
5975 | F: arch/arm/mach-omap2/clockdomain44xx.c | ||
5976 | 5981 | ||
5977 | OMAP AUDIO SUPPORT | 5982 | OMAP AUDIO SUPPORT |
5978 | M: Peter Ujfalusi <peter.ujfalusi@ti.com> | 5983 | M: Peter Ujfalusi <peter.ujfalusi@ti.com> |
@@ -6138,7 +6143,7 @@ W: http://openrisc.net | |||
6138 | L: linux@lists.openrisc.net (moderated for non-subscribers) | 6143 | L: linux@lists.openrisc.net (moderated for non-subscribers) |
6139 | S: Maintained | 6144 | S: Maintained |
6140 | T: git git://openrisc.net/~jonas/linux | 6145 | T: git git://openrisc.net/~jonas/linux |
6141 | F: arch/openrisc | 6146 | F: arch/openrisc/ |
6142 | 6147 | ||
6143 | OPENVSWITCH | 6148 | OPENVSWITCH |
6144 | M: Jesse Gross <jesse@nicira.com> | 6149 | M: Jesse Gross <jesse@nicira.com> |
@@ -6429,7 +6434,7 @@ M: Jamie Iles <jamie@jamieiles.com> | |||
6429 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 6434 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
6430 | T: git git://github.com/jamieiles/linux-2.6-ji.git | 6435 | T: git git://github.com/jamieiles/linux-2.6-ji.git |
6431 | S: Supported | 6436 | S: Supported |
6432 | F: arch/arm/mach-picoxcell | 6437 | F: arch/arm/mach-picoxcell/ |
6433 | F: drivers/*/picoxcell* | 6438 | F: drivers/*/picoxcell* |
6434 | F: drivers/*/*/picoxcell* | 6439 | F: drivers/*/*/picoxcell* |
6435 | 6440 | ||
@@ -6702,7 +6707,7 @@ F: drivers/spi/spi-pxa2xx* | |||
6702 | F: drivers/usb/gadget/pxa2* | 6707 | F: drivers/usb/gadget/pxa2* |
6703 | F: include/sound/pxa2xx-lib.h | 6708 | F: include/sound/pxa2xx-lib.h |
6704 | F: sound/arm/pxa* | 6709 | F: sound/arm/pxa* |
6705 | F: sound/soc/pxa | 6710 | F: sound/soc/pxa/ |
6706 | 6711 | ||
6707 | MMP SUPPORT | 6712 | MMP SUPPORT |
6708 | M: Eric Miao <eric.y.miao@gmail.com> | 6713 | M: Eric Miao <eric.y.miao@gmail.com> |
@@ -7155,7 +7160,7 @@ SAMSUNG AUDIO (ASoC) DRIVERS | |||
7155 | M: Sangbeom Kim <sbkim73@samsung.com> | 7160 | M: Sangbeom Kim <sbkim73@samsung.com> |
7156 | L: alsa-devel@alsa-project.org (moderated for non-subscribers) | 7161 | L: alsa-devel@alsa-project.org (moderated for non-subscribers) |
7157 | S: Supported | 7162 | S: Supported |
7158 | F: sound/soc/samsung | 7163 | F: sound/soc/samsung/ |
7159 | 7164 | ||
7160 | SAMSUNG FRAMEBUFFER DRIVER | 7165 | SAMSUNG FRAMEBUFFER DRIVER |
7161 | M: Jingoo Han <jg1.han@samsung.com> | 7166 | M: Jingoo Han <jg1.han@samsung.com> |
@@ -7201,7 +7206,7 @@ SERIAL DRIVERS | |||
7201 | M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 7206 | M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
7202 | L: linux-serial@vger.kernel.org | 7207 | L: linux-serial@vger.kernel.org |
7203 | S: Maintained | 7208 | S: Maintained |
7204 | F: drivers/tty/serial | 7209 | F: drivers/tty/serial/ |
7205 | 7210 | ||
7206 | SYNOPSYS DESIGNWARE DMAC DRIVER | 7211 | SYNOPSYS DESIGNWARE DMAC DRIVER |
7207 | M: Viresh Kumar <viresh.linux@gmail.com> | 7212 | M: Viresh Kumar <viresh.linux@gmail.com> |
@@ -7236,7 +7241,7 @@ TLG2300 VIDEO4LINUX-2 DRIVER | |||
7236 | M: Huang Shijie <shijie8@gmail.com> | 7241 | M: Huang Shijie <shijie8@gmail.com> |
7237 | M: Hans Verkuil <hverkuil@xs4all.nl> | 7242 | M: Hans Verkuil <hverkuil@xs4all.nl> |
7238 | S: Odd Fixes | 7243 | S: Odd Fixes |
7239 | F: drivers/media/usb/tlg2300 | 7244 | F: drivers/media/usb/tlg2300/ |
7240 | 7245 | ||
7241 | SC1200 WDT DRIVER | 7246 | SC1200 WDT DRIVER |
7242 | M: Zwane Mwaikambo <zwane@arm.linux.org.uk> | 7247 | M: Zwane Mwaikambo <zwane@arm.linux.org.uk> |
@@ -7497,7 +7502,7 @@ L: linux-media@vger.kernel.org | |||
7497 | T: git git://linuxtv.org/media_tree.git | 7502 | T: git git://linuxtv.org/media_tree.git |
7498 | W: http://linuxtv.org | 7503 | W: http://linuxtv.org |
7499 | S: Odd Fixes | 7504 | S: Odd Fixes |
7500 | F: drivers/media/radio/radio-si4713.h | 7505 | F: drivers/media/radio/radio-si4713.c |
7501 | 7506 | ||
7502 | SIANO DVB DRIVER | 7507 | SIANO DVB DRIVER |
7503 | M: Mauro Carvalho Chehab <m.chehab@samsung.com> | 7508 | M: Mauro Carvalho Chehab <m.chehab@samsung.com> |
@@ -7506,9 +7511,9 @@ W: http://linuxtv.org | |||
7506 | T: git git://linuxtv.org/media_tree.git | 7511 | T: git git://linuxtv.org/media_tree.git |
7507 | S: Odd fixes | 7512 | S: Odd fixes |
7508 | F: drivers/media/common/siano/ | 7513 | F: drivers/media/common/siano/ |
7509 | F: drivers/media/dvb/siano/ | ||
7510 | F: drivers/media/usb/siano/ | 7514 | F: drivers/media/usb/siano/ |
7511 | F: drivers/media/mmc/siano | 7515 | F: drivers/media/usb/siano/ |
7516 | F: drivers/media/mmc/siano/ | ||
7512 | 7517 | ||
7513 | SH_VEU V4L2 MEM2MEM DRIVER | 7518 | SH_VEU V4L2 MEM2MEM DRIVER |
7514 | M: Guennadi Liakhovetski <g.liakhovetski@gmx.de> | 7519 | M: Guennadi Liakhovetski <g.liakhovetski@gmx.de> |
@@ -7546,9 +7551,9 @@ P: Vincent Sanders <vince@simtec.co.uk> | |||
7546 | M: Simtec Linux Team <linux@simtec.co.uk> | 7551 | M: Simtec Linux Team <linux@simtec.co.uk> |
7547 | W: http://www.simtec.co.uk/products/EB2410ITX/ | 7552 | W: http://www.simtec.co.uk/products/EB2410ITX/ |
7548 | S: Supported | 7553 | S: Supported |
7549 | F: arch/arm/mach-s3c2410/mach-bast.c | 7554 | F: arch/arm/mach-s3c24xx/mach-bast.c |
7550 | F: arch/arm/mach-s3c2410/bast-ide.c | 7555 | F: arch/arm/mach-s3c24xx/bast-ide.c |
7551 | F: arch/arm/mach-s3c2410/bast-irq.c | 7556 | F: arch/arm/mach-s3c24xx/bast-irq.c |
7552 | 7557 | ||
7553 | TI DAVINCI MACHINE SUPPORT | 7558 | TI DAVINCI MACHINE SUPPORT |
7554 | M: Sekhar Nori <nsekhar@ti.com> | 7559 | M: Sekhar Nori <nsekhar@ti.com> |
@@ -7557,7 +7562,7 @@ L: davinci-linux-open-source@linux.davincidsp.com (moderated for non-subscribers | |||
7557 | T: git git://gitorious.org/linux-davinci/linux-davinci.git | 7562 | T: git git://gitorious.org/linux-davinci/linux-davinci.git |
7558 | Q: http://patchwork.kernel.org/project/linux-davinci/list/ | 7563 | Q: http://patchwork.kernel.org/project/linux-davinci/list/ |
7559 | S: Supported | 7564 | S: Supported |
7560 | F: arch/arm/mach-davinci | 7565 | F: arch/arm/mach-davinci/ |
7561 | F: drivers/i2c/busses/i2c-davinci.c | 7566 | F: drivers/i2c/busses/i2c-davinci.c |
7562 | 7567 | ||
7563 | TI DAVINCI SERIES MEDIA DRIVER | 7568 | TI DAVINCI SERIES MEDIA DRIVER |
@@ -7642,7 +7647,7 @@ SMIA AND SMIA++ IMAGE SENSOR DRIVER | |||
7642 | M: Sakari Ailus <sakari.ailus@iki.fi> | 7647 | M: Sakari Ailus <sakari.ailus@iki.fi> |
7643 | L: linux-media@vger.kernel.org | 7648 | L: linux-media@vger.kernel.org |
7644 | S: Maintained | 7649 | S: Maintained |
7645 | F: drivers/media/i2c/smiapp | 7650 | F: drivers/media/i2c/smiapp/ |
7646 | F: include/media/smiapp.h | 7651 | F: include/media/smiapp.h |
7647 | F: drivers/media/i2c/smiapp-pll.c | 7652 | F: drivers/media/i2c/smiapp-pll.c |
7648 | F: drivers/media/i2c/smiapp-pll.h | 7653 | F: drivers/media/i2c/smiapp-pll.h |
@@ -7745,6 +7750,11 @@ W: http://tifmxx.berlios.de/ | |||
7745 | S: Maintained | 7750 | S: Maintained |
7746 | F: drivers/memstick/host/tifm_ms.c | 7751 | F: drivers/memstick/host/tifm_ms.c |
7747 | 7752 | ||
7753 | SONY MEMORYSTICK STANDARD SUPPORT | ||
7754 | M: Maxim Levitsky <maximlevitsky@gmail.com> | ||
7755 | S: Maintained | ||
7756 | F: drivers/memstick/core/ms_block.* | ||
7757 | |||
7748 | SOUND | 7758 | SOUND |
7749 | M: Jaroslav Kysela <perex@perex.cz> | 7759 | M: Jaroslav Kysela <perex@perex.cz> |
7750 | M: Takashi Iwai <tiwai@suse.de> | 7760 | M: Takashi Iwai <tiwai@suse.de> |
@@ -7821,35 +7831,7 @@ L: spear-devel@list.st.com | |||
7821 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 7831 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
7822 | W: http://www.st.com/spear | 7832 | W: http://www.st.com/spear |
7823 | S: Maintained | 7833 | S: Maintained |
7824 | F: arch/arm/plat-spear/ | 7834 | F: arch/arm/mach-spear/ |
7825 | |||
7826 | SPEAR13XX MACHINE SUPPORT | ||
7827 | M: Viresh Kumar <viresh.linux@gmail.com> | ||
7828 | M: Shiraz Hashim <shiraz.hashim@st.com> | ||
7829 | L: spear-devel@list.st.com | ||
7830 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | ||
7831 | W: http://www.st.com/spear | ||
7832 | S: Maintained | ||
7833 | F: arch/arm/mach-spear13xx/ | ||
7834 | |||
7835 | SPEAR3XX MACHINE SUPPORT | ||
7836 | M: Viresh Kumar <viresh.linux@gmail.com> | ||
7837 | M: Shiraz Hashim <shiraz.hashim@st.com> | ||
7838 | L: spear-devel@list.st.com | ||
7839 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | ||
7840 | W: http://www.st.com/spear | ||
7841 | S: Maintained | ||
7842 | F: arch/arm/mach-spear3xx/ | ||
7843 | |||
7844 | SPEAR6XX MACHINE SUPPORT | ||
7845 | M: Rajeev Kumar <rajeev-dlh.kumar@st.com> | ||
7846 | M: Shiraz Hashim <shiraz.hashim@st.com> | ||
7847 | M: Viresh Kumar <viresh.linux@gmail.com> | ||
7848 | L: spear-devel@list.st.com | ||
7849 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | ||
7850 | W: http://www.st.com/spear | ||
7851 | S: Maintained | ||
7852 | F: arch/arm/mach-spear6xx/ | ||
7853 | 7835 | ||
7854 | SPEAR CLOCK FRAMEWORK SUPPORT | 7836 | SPEAR CLOCK FRAMEWORK SUPPORT |
7855 | M: Viresh Kumar <viresh.linux@gmail.com> | 7837 | M: Viresh Kumar <viresh.linux@gmail.com> |
@@ -8118,7 +8100,7 @@ M: Vineet Gupta <vgupta@synopsys.com> | |||
8118 | S: Supported | 8100 | S: Supported |
8119 | F: arch/arc/ | 8101 | F: arch/arc/ |
8120 | F: Documentation/devicetree/bindings/arc/ | 8102 | F: Documentation/devicetree/bindings/arc/ |
8121 | F: drivers/tty/serial/arc-uart.c | 8103 | F: drivers/tty/serial/arc_uart.c |
8122 | 8104 | ||
8123 | SYSV FILESYSTEM | 8105 | SYSV FILESYSTEM |
8124 | M: Christoph Hellwig <hch@infradead.org> | 8106 | M: Christoph Hellwig <hch@infradead.org> |
@@ -8808,7 +8790,6 @@ L: linux-usb@vger.kernel.org | |||
8808 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git | 8790 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git |
8809 | S: Maintained | 8791 | S: Maintained |
8810 | F: drivers/usb/phy/ | 8792 | F: drivers/usb/phy/ |
8811 | F: drivers/usb/otg/ | ||
8812 | 8793 | ||
8813 | USB PRINTER DRIVER (usblp) | 8794 | USB PRINTER DRIVER (usblp) |
8814 | M: Pete Zaitcev <zaitcev@redhat.com> | 8795 | M: Pete Zaitcev <zaitcev@redhat.com> |
@@ -9339,7 +9320,7 @@ M: Matthew Garrett <matthew.garrett@nebula.com> | |||
9339 | L: platform-driver-x86@vger.kernel.org | 9320 | L: platform-driver-x86@vger.kernel.org |
9340 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.git | 9321 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.git |
9341 | S: Maintained | 9322 | S: Maintained |
9342 | F: drivers/platform/x86 | 9323 | F: drivers/platform/x86/ |
9343 | 9324 | ||
9344 | X86 MCE INFRASTRUCTURE | 9325 | X86 MCE INFRASTRUCTURE |
9345 | M: Tony Luck <tony.luck@intel.com> | 9326 | M: Tony Luck <tony.luck@intel.com> |
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index 40736da9bea8..ffb19b7da999 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c | |||
@@ -338,6 +338,11 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, | |||
338 | unsigned long doff = 7 & (unsigned long) dst; | 338 | unsigned long doff = 7 & (unsigned long) dst; |
339 | 339 | ||
340 | if (len) { | 340 | if (len) { |
341 | if (!access_ok(VERIFY_READ, src, len)) { | ||
342 | *errp = -EFAULT; | ||
343 | memset(dst, 0, len); | ||
344 | return sum; | ||
345 | } | ||
341 | if (!doff) { | 346 | if (!doff) { |
342 | if (!soff) | 347 | if (!soff) |
343 | checksum = csum_partial_cfu_aligned( | 348 | checksum = csum_partial_cfu_aligned( |
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 66781bf34077..54ee6163c181 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c | |||
@@ -56,3 +56,8 @@ int pmd_huge(pmd_t pmd) | |||
56 | { | 56 | { |
57 | return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); | 57 | return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); |
58 | } | 58 | } |
59 | |||
60 | int pmd_huge_support(void) | ||
61 | { | ||
62 | return 1; | ||
63 | } | ||
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2fc8258bab2d..5e9aec358306 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c | |||
@@ -54,6 +54,11 @@ int pud_huge(pud_t pud) | |||
54 | return !(pud_val(pud) & PUD_TABLE_BIT); | 54 | return !(pud_val(pud) & PUD_TABLE_BIT); |
55 | } | 55 | } |
56 | 56 | ||
57 | int pmd_huge_support(void) | ||
58 | { | ||
59 | return 1; | ||
60 | } | ||
61 | |||
57 | static __init int setup_hugepagesz(char *opt) | 62 | static __init int setup_hugepagesz(char *opt) |
58 | { | 63 | { |
59 | unsigned long ps = memparse(opt, &opt); | 64 | unsigned long ps = memparse(opt, &opt); |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 76069c18ee42..68232db98baa 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -114,6 +114,11 @@ int pud_huge(pud_t pud) | |||
114 | return 0; | 114 | return 0; |
115 | } | 115 | } |
116 | 116 | ||
117 | int pmd_huge_support(void) | ||
118 | { | ||
119 | return 0; | ||
120 | } | ||
121 | |||
117 | struct page * | 122 | struct page * |
118 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) | 123 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) |
119 | { | 124 | { |
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 3c52fa6d0f8e..042431509b56 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c | |||
@@ -110,6 +110,11 @@ int pud_huge(pud_t pud) | |||
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | int pmd_huge_support(void) | ||
114 | { | ||
115 | return 1; | ||
116 | } | ||
117 | |||
113 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 118 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
114 | pmd_t *pmd, int write) | 119 | pmd_t *pmd, int write) |
115 | { | 120 | { |
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index a7fee0dfb7a9..01fda4419ed0 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c | |||
@@ -85,6 +85,11 @@ int pud_huge(pud_t pud) | |||
85 | return (pud_val(pud) & _PAGE_HUGE) != 0; | 85 | return (pud_val(pud) & _PAGE_HUGE) != 0; |
86 | } | 86 | } |
87 | 87 | ||
88 | int pmd_huge_support(void) | ||
89 | { | ||
90 | return 1; | ||
91 | } | ||
92 | |||
88 | struct page * | 93 | struct page * |
89 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 94 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
90 | pmd_t *pmd, int write) | 95 | pmd_t *pmd, int write) |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 834ca8eb38f2..d67db4bd672d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -86,6 +86,11 @@ int pgd_huge(pgd_t pgd) | |||
86 | */ | 86 | */ |
87 | return ((pgd_val(pgd) & 0x3) != 0x0); | 87 | return ((pgd_val(pgd) & 0x3) != 0x0); |
88 | } | 88 | } |
89 | |||
90 | int pmd_huge_support(void) | ||
91 | { | ||
92 | return 1; | ||
93 | } | ||
89 | #else | 94 | #else |
90 | int pmd_huge(pmd_t pmd) | 95 | int pmd_huge(pmd_t pmd) |
91 | { | 96 | { |
@@ -101,6 +106,11 @@ int pgd_huge(pgd_t pgd) | |||
101 | { | 106 | { |
102 | return 0; | 107 | return 0; |
103 | } | 108 | } |
109 | |||
110 | int pmd_huge_support(void) | ||
111 | { | ||
112 | return 0; | ||
113 | } | ||
104 | #endif | 114 | #endif |
105 | 115 | ||
106 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 116 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index fb2723e8ba65..3ec272859e1e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -526,6 +526,7 @@ config CRASH_DUMP | |||
526 | bool "kernel crash dumps" | 526 | bool "kernel crash dumps" |
527 | depends on 64BIT && SMP | 527 | depends on 64BIT && SMP |
528 | select KEXEC | 528 | select KEXEC |
529 | select ZFCPDUMP | ||
529 | help | 530 | help |
530 | Generate crash dump after being started by kexec. | 531 | Generate crash dump after being started by kexec. |
531 | Crash dump kernels are loaded in the main kernel with kexec-tools | 532 | Crash dump kernels are loaded in the main kernel with kexec-tools |
@@ -536,7 +537,7 @@ config CRASH_DUMP | |||
536 | config ZFCPDUMP | 537 | config ZFCPDUMP |
537 | def_bool n | 538 | def_bool n |
538 | prompt "zfcpdump support" | 539 | prompt "zfcpdump support" |
539 | select SMP | 540 | depends on SMP |
540 | help | 541 | help |
541 | Select this option if you want to build an zfcpdump enabled kernel. | 542 | Select this option if you want to build an zfcpdump enabled kernel. |
542 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. | 543 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. |
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index dcf6948a875c..4176dfe0fba1 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
32 | #include <linux/percpu.h> | 32 | #include <linux/percpu.h> |
33 | 33 | ||
34 | #define __ARCH_WANT_KPROBES_INSN_SLOT | ||
35 | |||
34 | struct pt_regs; | 36 | struct pt_regs; |
35 | struct kprobe; | 37 | struct kprobe; |
36 | 38 | ||
@@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t; | |||
57 | /* Architecture specific copy of original instruction */ | 59 | /* Architecture specific copy of original instruction */ |
58 | struct arch_specific_insn { | 60 | struct arch_specific_insn { |
59 | /* copy of original instruction */ | 61 | /* copy of original instruction */ |
60 | kprobe_opcode_t insn[MAX_INSN_SIZE]; | 62 | kprobe_opcode_t *insn; |
61 | }; | 63 | }; |
62 | 64 | ||
63 | struct prev_kprobe { | 65 | struct prev_kprobe { |
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 06a136136047..7dc7f9c63b65 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h | |||
@@ -56,5 +56,6 @@ bool sclp_has_linemode(void); | |||
56 | bool sclp_has_vt220(void); | 56 | bool sclp_has_vt220(void); |
57 | int sclp_pci_configure(u32 fid); | 57 | int sclp_pci_configure(u32 fid); |
58 | int sclp_pci_deconfigure(u32 fid); | 58 | int sclp_pci_deconfigure(u32 fid); |
59 | int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); | ||
59 | 60 | ||
60 | #endif /* _ASM_S390_SCLP_H */ | 61 | #endif /* _ASM_S390_SCLP_H */ |
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d8f355657171..c84f33d51f7b 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/os_info.h> | 16 | #include <asm/os_info.h> |
17 | #include <asm/elf.h> | 17 | #include <asm/elf.h> |
18 | #include <asm/ipl.h> | 18 | #include <asm/ipl.h> |
19 | #include <asm/sclp.h> | ||
19 | 20 | ||
20 | #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) | 21 | #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) |
21 | #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) | 22 | #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) |
@@ -64,22 +65,46 @@ static ssize_t copy_page_real(void *buf, void *src, size_t csize) | |||
64 | } | 65 | } |
65 | 66 | ||
66 | /* | 67 | /* |
67 | * Copy one page from "oldmem" | 68 | * Pointer to ELF header in new kernel |
69 | */ | ||
70 | static void *elfcorehdr_newmem; | ||
71 | |||
72 | /* | ||
73 | * Copy one page from zfcpdump "oldmem" | ||
74 | * | ||
75 | * For pages below ZFCPDUMP_HSA_SIZE memory from the HSA is copied. Otherwise | ||
76 | * real memory copy is used. | ||
77 | */ | ||
78 | static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, | ||
79 | unsigned long src, int userbuf) | ||
80 | { | ||
81 | int rc; | ||
82 | |||
83 | if (src < ZFCPDUMP_HSA_SIZE) { | ||
84 | rc = memcpy_hsa(buf, src, csize, userbuf); | ||
85 | } else { | ||
86 | if (userbuf) | ||
87 | rc = copy_to_user_real((void __force __user *) buf, | ||
88 | (void *) src, csize); | ||
89 | else | ||
90 | rc = memcpy_real(buf, (void *) src, csize); | ||
91 | } | ||
92 | return rc ? rc : csize; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Copy one page from kdump "oldmem" | ||
68 | * | 97 | * |
69 | * For the kdump reserved memory this functions performs a swap operation: | 98 | * For the kdump reserved memory this functions performs a swap operation: |
70 | * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. | 99 | * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. |
71 | * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] | 100 | * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] |
72 | */ | 101 | */ |
73 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | 102 | static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, |
74 | size_t csize, unsigned long offset, int userbuf) | 103 | unsigned long src, int userbuf) |
104 | |||
75 | { | 105 | { |
76 | unsigned long src; | ||
77 | int rc; | 106 | int rc; |
78 | 107 | ||
79 | if (!csize) | ||
80 | return 0; | ||
81 | |||
82 | src = (pfn << PAGE_SHIFT) + offset; | ||
83 | if (src < OLDMEM_SIZE) | 108 | if (src < OLDMEM_SIZE) |
84 | src += OLDMEM_BASE; | 109 | src += OLDMEM_BASE; |
85 | else if (src > OLDMEM_BASE && | 110 | else if (src > OLDMEM_BASE && |
@@ -90,7 +115,88 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
90 | (void *) src, csize); | 115 | (void *) src, csize); |
91 | else | 116 | else |
92 | rc = copy_page_real(buf, (void *) src, csize); | 117 | rc = copy_page_real(buf, (void *) src, csize); |
93 | return (rc == 0) ? csize : rc; | 118 | return (rc == 0) ? rc : csize; |
119 | } | ||
120 | |||
121 | /* | ||
122 | * Copy one page from "oldmem" | ||
123 | */ | ||
124 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, | ||
125 | unsigned long offset, int userbuf) | ||
126 | { | ||
127 | unsigned long src; | ||
128 | |||
129 | if (!csize) | ||
130 | return 0; | ||
131 | src = (pfn << PAGE_SHIFT) + offset; | ||
132 | if (OLDMEM_BASE) | ||
133 | return copy_oldmem_page_kdump(buf, csize, src, userbuf); | ||
134 | else | ||
135 | return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Remap "oldmem" for kdump | ||
140 | * | ||
141 | * For the kdump reserved memory this functions performs a swap operation: | ||
142 | * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] | ||
143 | */ | ||
144 | static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, | ||
145 | unsigned long from, unsigned long pfn, | ||
146 | unsigned long size, pgprot_t prot) | ||
147 | { | ||
148 | unsigned long size_old; | ||
149 | int rc; | ||
150 | |||
151 | if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { | ||
152 | size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); | ||
153 | rc = remap_pfn_range(vma, from, | ||
154 | pfn + (OLDMEM_BASE >> PAGE_SHIFT), | ||
155 | size_old, prot); | ||
156 | if (rc || size == size_old) | ||
157 | return rc; | ||
158 | size -= size_old; | ||
159 | from += size_old; | ||
160 | pfn += size_old >> PAGE_SHIFT; | ||
161 | } | ||
162 | return remap_pfn_range(vma, from, pfn, size, prot); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Remap "oldmem" for zfcpdump | ||
167 | * | ||
168 | * We only map available memory above ZFCPDUMP_HSA_SIZE. Memory below | ||
169 | * ZFCPDUMP_HSA_SIZE is read on demand using the copy_oldmem_page() function. | ||
170 | */ | ||
171 | static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, | ||
172 | unsigned long from, | ||
173 | unsigned long pfn, | ||
174 | unsigned long size, pgprot_t prot) | ||
175 | { | ||
176 | unsigned long size_hsa; | ||
177 | |||
178 | if (pfn < ZFCPDUMP_HSA_SIZE >> PAGE_SHIFT) { | ||
179 | size_hsa = min(size, ZFCPDUMP_HSA_SIZE - (pfn << PAGE_SHIFT)); | ||
180 | if (size == size_hsa) | ||
181 | return 0; | ||
182 | size -= size_hsa; | ||
183 | from += size_hsa; | ||
184 | pfn += size_hsa >> PAGE_SHIFT; | ||
185 | } | ||
186 | return remap_pfn_range(vma, from, pfn, size, prot); | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Remap "oldmem" for kdump or zfcpdump | ||
191 | */ | ||
192 | int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, | ||
193 | unsigned long pfn, unsigned long size, pgprot_t prot) | ||
194 | { | ||
195 | if (OLDMEM_BASE) | ||
196 | return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); | ||
197 | else | ||
198 | return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, | ||
199 | prot); | ||
94 | } | 200 | } |
95 | 201 | ||
96 | /* | 202 | /* |
@@ -101,11 +207,21 @@ int copy_from_oldmem(void *dest, void *src, size_t count) | |||
101 | unsigned long copied = 0; | 207 | unsigned long copied = 0; |
102 | int rc; | 208 | int rc; |
103 | 209 | ||
104 | if ((unsigned long) src < OLDMEM_SIZE) { | 210 | if (OLDMEM_BASE) { |
105 | copied = min(count, OLDMEM_SIZE - (unsigned long) src); | 211 | if ((unsigned long) src < OLDMEM_SIZE) { |
106 | rc = memcpy_real(dest, src + OLDMEM_BASE, copied); | 212 | copied = min(count, OLDMEM_SIZE - (unsigned long) src); |
107 | if (rc) | 213 | rc = memcpy_real(dest, src + OLDMEM_BASE, copied); |
108 | return rc; | 214 | if (rc) |
215 | return rc; | ||
216 | } | ||
217 | } else { | ||
218 | if ((unsigned long) src < ZFCPDUMP_HSA_SIZE) { | ||
219 | copied = min(count, | ||
220 | ZFCPDUMP_HSA_SIZE - (unsigned long) src); | ||
221 | rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); | ||
222 | if (rc) | ||
223 | return rc; | ||
224 | } | ||
109 | } | 225 | } |
110 | return memcpy_real(dest + copied, src + copied, count - copied); | 226 | return memcpy_real(dest + copied, src + copied, count - copied); |
111 | } | 227 | } |
@@ -368,14 +484,6 @@ static int get_mem_chunk_cnt(void) | |||
368 | } | 484 | } |
369 | 485 | ||
370 | /* | 486 | /* |
371 | * Relocate pointer in order to allow vmcore code access the data | ||
372 | */ | ||
373 | static inline unsigned long relocate(unsigned long addr) | ||
374 | { | ||
375 | return OLDMEM_BASE + addr; | ||
376 | } | ||
377 | |||
378 | /* | ||
379 | * Initialize ELF loads (new kernel) | 487 | * Initialize ELF loads (new kernel) |
380 | */ | 488 | */ |
381 | static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) | 489 | static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) |
@@ -426,7 +534,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) | |||
426 | ptr = nt_vmcoreinfo(ptr); | 534 | ptr = nt_vmcoreinfo(ptr); |
427 | memset(phdr, 0, sizeof(*phdr)); | 535 | memset(phdr, 0, sizeof(*phdr)); |
428 | phdr->p_type = PT_NOTE; | 536 | phdr->p_type = PT_NOTE; |
429 | phdr->p_offset = relocate(notes_offset); | 537 | phdr->p_offset = notes_offset; |
430 | phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); | 538 | phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); |
431 | phdr->p_memsz = phdr->p_filesz; | 539 | phdr->p_memsz = phdr->p_filesz; |
432 | return ptr; | 540 | return ptr; |
@@ -435,7 +543,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) | |||
435 | /* | 543 | /* |
436 | * Create ELF core header (new kernel) | 544 | * Create ELF core header (new kernel) |
437 | */ | 545 | */ |
438 | static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) | 546 | int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) |
439 | { | 547 | { |
440 | Elf64_Phdr *phdr_notes, *phdr_loads; | 548 | Elf64_Phdr *phdr_notes, *phdr_loads; |
441 | int mem_chunk_cnt; | 549 | int mem_chunk_cnt; |
@@ -443,6 +551,12 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) | |||
443 | u32 alloc_size; | 551 | u32 alloc_size; |
444 | u64 hdr_off; | 552 | u64 hdr_off; |
445 | 553 | ||
554 | /* If we are not in kdump or zfcpdump mode return */ | ||
555 | if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) | ||
556 | return 0; | ||
557 | /* If elfcorehdr= has been passed via cmdline, we use that one */ | ||
558 | if (elfcorehdr_addr != ELFCORE_ADDR_MAX) | ||
559 | return 0; | ||
446 | mem_chunk_cnt = get_mem_chunk_cnt(); | 560 | mem_chunk_cnt = get_mem_chunk_cnt(); |
447 | 561 | ||
448 | alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + | 562 | alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + |
@@ -460,27 +574,52 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) | |||
460 | ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); | 574 | ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); |
461 | /* Init loads */ | 575 | /* Init loads */ |
462 | hdr_off = PTR_DIFF(ptr, hdr); | 576 | hdr_off = PTR_DIFF(ptr, hdr); |
463 | loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); | 577 | loads_init(phdr_loads, hdr_off); |
464 | *elfcorebuf_sz = hdr_off; | 578 | *addr = (unsigned long long) hdr; |
465 | *elfcorebuf = (void *) relocate((unsigned long) hdr); | 579 | elfcorehdr_newmem = hdr; |
466 | BUG_ON(*elfcorebuf_sz > alloc_size); | 580 | *size = (unsigned long long) hdr_off; |
581 | BUG_ON(elfcorehdr_size > alloc_size); | ||
582 | return 0; | ||
467 | } | 583 | } |
468 | 584 | ||
469 | /* | 585 | /* |
470 | * Create kdump ELF core header in new kernel, if it has not been passed via | 586 | * Free ELF core header (new kernel) |
471 | * the "elfcorehdr" kernel parameter | ||
472 | */ | 587 | */ |
473 | static int setup_kdump_elfcorehdr(void) | 588 | void elfcorehdr_free(unsigned long long addr) |
474 | { | 589 | { |
475 | size_t elfcorebuf_sz; | 590 | if (!elfcorehdr_newmem) |
476 | char *elfcorebuf; | 591 | return; |
592 | kfree((void *)(unsigned long)addr); | ||
593 | } | ||
477 | 594 | ||
478 | if (!OLDMEM_BASE || is_kdump_kernel()) | 595 | /* |
479 | return -EINVAL; | 596 | * Read from ELF header |
480 | s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); | 597 | */ |
481 | elfcorehdr_addr = (unsigned long long) elfcorebuf; | 598 | ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) |
482 | elfcorehdr_size = elfcorebuf_sz; | 599 | { |
483 | return 0; | 600 | void *src = (void *)(unsigned long)*ppos; |
601 | |||
602 | src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; | ||
603 | memcpy(buf, src, count); | ||
604 | *ppos += count; | ||
605 | return count; | ||
484 | } | 606 | } |
485 | 607 | ||
486 | subsys_initcall(setup_kdump_elfcorehdr); | 608 | /* |
609 | * Read from ELF notes data | ||
610 | */ | ||
611 | ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) | ||
612 | { | ||
613 | void *src = (void *)(unsigned long)*ppos; | ||
614 | int rc; | ||
615 | |||
616 | if (elfcorehdr_newmem) { | ||
617 | memcpy(buf, src, count); | ||
618 | } else { | ||
619 | rc = copy_from_oldmem(buf, src, count); | ||
620 | if (rc) | ||
621 | return rc; | ||
622 | } | ||
623 | *ppos += count; | ||
624 | return count; | ||
625 | } | ||
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index adbbe7f1cb0d..0ce9fb245034 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c | |||
@@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | |||
37 | 37 | ||
38 | struct kretprobe_blackpoint kretprobe_blacklist[] = { }; | 38 | struct kretprobe_blackpoint kretprobe_blacklist[] = { }; |
39 | 39 | ||
40 | DEFINE_INSN_CACHE_OPS(dmainsn); | ||
41 | |||
42 | static void *alloc_dmainsn_page(void) | ||
43 | { | ||
44 | return (void *)__get_free_page(GFP_KERNEL | GFP_DMA); | ||
45 | } | ||
46 | |||
47 | static void free_dmainsn_page(void *page) | ||
48 | { | ||
49 | free_page((unsigned long)page); | ||
50 | } | ||
51 | |||
52 | struct kprobe_insn_cache kprobe_dmainsn_slots = { | ||
53 | .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex), | ||
54 | .alloc = alloc_dmainsn_page, | ||
55 | .free = free_dmainsn_page, | ||
56 | .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages), | ||
57 | .insn_size = MAX_INSN_SIZE, | ||
58 | }; | ||
59 | |||
40 | static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) | 60 | static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) |
41 | { | 61 | { |
42 | switch (insn[0] >> 8) { | 62 | switch (insn[0] >> 8) { |
@@ -100,9 +120,8 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) | |||
100 | fixup |= FIXUP_RETURN_REGISTER; | 120 | fixup |= FIXUP_RETURN_REGISTER; |
101 | break; | 121 | break; |
102 | case 0xc0: | 122 | case 0xc0: |
103 | if ((insn[0] & 0x0f) == 0x00 || /* larl */ | 123 | if ((insn[0] & 0x0f) == 0x05) /* brasl */ |
104 | (insn[0] & 0x0f) == 0x05) /* brasl */ | 124 | fixup |= FIXUP_RETURN_REGISTER; |
105 | fixup |= FIXUP_RETURN_REGISTER; | ||
106 | break; | 125 | break; |
107 | case 0xeb: | 126 | case 0xeb: |
108 | switch (insn[2] & 0xff) { | 127 | switch (insn[2] & 0xff) { |
@@ -134,18 +153,128 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) | |||
134 | return fixup; | 153 | return fixup; |
135 | } | 154 | } |
136 | 155 | ||
156 | static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) | ||
157 | { | ||
158 | /* Check if we have a RIL-b or RIL-c format instruction which | ||
159 | * we need to modify in order to avoid instruction emulation. */ | ||
160 | switch (insn[0] >> 8) { | ||
161 | case 0xc0: | ||
162 | if ((insn[0] & 0x0f) == 0x00) /* larl */ | ||
163 | return true; | ||
164 | break; | ||
165 | case 0xc4: | ||
166 | switch (insn[0] & 0x0f) { | ||
167 | case 0x02: /* llhrl */ | ||
168 | case 0x04: /* lghrl */ | ||
169 | case 0x05: /* lhrl */ | ||
170 | case 0x06: /* llghrl */ | ||
171 | case 0x07: /* sthrl */ | ||
172 | case 0x08: /* lgrl */ | ||
173 | case 0x0b: /* stgrl */ | ||
174 | case 0x0c: /* lgfrl */ | ||
175 | case 0x0d: /* lrl */ | ||
176 | case 0x0e: /* llgfrl */ | ||
177 | case 0x0f: /* strl */ | ||
178 | return true; | ||
179 | } | ||
180 | break; | ||
181 | case 0xc6: | ||
182 | switch (insn[0] & 0x0f) { | ||
183 | case 0x00: /* exrl */ | ||
184 | case 0x02: /* pfdrl */ | ||
185 | case 0x04: /* cghrl */ | ||
186 | case 0x05: /* chrl */ | ||
187 | case 0x06: /* clghrl */ | ||
188 | case 0x07: /* clhrl */ | ||
189 | case 0x08: /* cgrl */ | ||
190 | case 0x0a: /* clgrl */ | ||
191 | case 0x0c: /* cgfrl */ | ||
192 | case 0x0d: /* crl */ | ||
193 | case 0x0e: /* clgfrl */ | ||
194 | case 0x0f: /* clrl */ | ||
195 | return true; | ||
196 | } | ||
197 | break; | ||
198 | } | ||
199 | return false; | ||
200 | } | ||
201 | |||
202 | static void __kprobes copy_instruction(struct kprobe *p) | ||
203 | { | ||
204 | s64 disp, new_disp; | ||
205 | u64 addr, new_addr; | ||
206 | |||
207 | memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); | ||
208 | if (!is_insn_relative_long(p->ainsn.insn)) | ||
209 | return; | ||
210 | /* | ||
211 | * For pc-relative instructions in RIL-b or RIL-c format patch the | ||
212 | * RI2 displacement field. We have already made sure that the insn | ||
213 | * slot for the patched instruction is within the same 2GB area | ||
214 | * as the original instruction (either kernel image or module area). | ||
215 | * Therefore the new displacement will always fit. | ||
216 | */ | ||
217 | disp = *(s32 *)&p->ainsn.insn[1]; | ||
218 | addr = (u64)(unsigned long)p->addr; | ||
219 | new_addr = (u64)(unsigned long)p->ainsn.insn; | ||
220 | new_disp = ((addr + (disp * 2)) - new_addr) / 2; | ||
221 | *(s32 *)&p->ainsn.insn[1] = new_disp; | ||
222 | } | ||
223 | |||
224 | static inline int is_kernel_addr(void *addr) | ||
225 | { | ||
226 | return addr < (void *)_end; | ||
227 | } | ||
228 | |||
229 | static inline int is_module_addr(void *addr) | ||
230 | { | ||
231 | #ifdef CONFIG_64BIT | ||
232 | BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); | ||
233 | if (addr < (void *)MODULES_VADDR) | ||
234 | return 0; | ||
235 | if (addr > (void *)MODULES_END) | ||
236 | return 0; | ||
237 | #endif | ||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | static int __kprobes s390_get_insn_slot(struct kprobe *p) | ||
242 | { | ||
243 | /* | ||
244 | * Get an insn slot that is within the same 2GB area like the original | ||
245 | * instruction. That way instructions with a 32bit signed displacement | ||
246 | * field can be patched and executed within the insn slot. | ||
247 | */ | ||
248 | p->ainsn.insn = NULL; | ||
249 | if (is_kernel_addr(p->addr)) | ||
250 | p->ainsn.insn = get_dmainsn_slot(); | ||
251 | if (is_module_addr(p->addr)) | ||
252 | p->ainsn.insn = get_insn_slot(); | ||
253 | return p->ainsn.insn ? 0 : -ENOMEM; | ||
254 | } | ||
255 | |||
256 | static void __kprobes s390_free_insn_slot(struct kprobe *p) | ||
257 | { | ||
258 | if (!p->ainsn.insn) | ||
259 | return; | ||
260 | if (is_kernel_addr(p->addr)) | ||
261 | free_dmainsn_slot(p->ainsn.insn, 0); | ||
262 | else | ||
263 | free_insn_slot(p->ainsn.insn, 0); | ||
264 | p->ainsn.insn = NULL; | ||
265 | } | ||
266 | |||
137 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 267 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
138 | { | 268 | { |
139 | if ((unsigned long) p->addr & 0x01) | 269 | if ((unsigned long) p->addr & 0x01) |
140 | return -EINVAL; | 270 | return -EINVAL; |
141 | |||
142 | /* Make sure the probe isn't going on a difficult instruction */ | 271 | /* Make sure the probe isn't going on a difficult instruction */ |
143 | if (is_prohibited_opcode(p->addr)) | 272 | if (is_prohibited_opcode(p->addr)) |
144 | return -EINVAL; | 273 | return -EINVAL; |
145 | 274 | if (s390_get_insn_slot(p)) | |
275 | return -ENOMEM; | ||
146 | p->opcode = *p->addr; | 276 | p->opcode = *p->addr; |
147 | memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); | 277 | copy_instruction(p); |
148 | |||
149 | return 0; | 278 | return 0; |
150 | } | 279 | } |
151 | 280 | ||
@@ -186,6 +315,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) | |||
186 | 315 | ||
187 | void __kprobes arch_remove_kprobe(struct kprobe *p) | 316 | void __kprobes arch_remove_kprobe(struct kprobe *p) |
188 | { | 317 | { |
318 | s390_free_insn_slot(p); | ||
189 | } | 319 | } |
190 | 320 | ||
191 | static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, | 321 | static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 248445f92604..d261c62e40a6 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -223,6 +223,11 @@ int pud_huge(pud_t pud) | |||
223 | return 0; | 223 | return 0; |
224 | } | 224 | } |
225 | 225 | ||
226 | int pmd_huge_support(void) | ||
227 | { | ||
228 | return 1; | ||
229 | } | ||
230 | |||
226 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 231 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
227 | pmd_t *pmdp, int write) | 232 | pmd_t *pmdp, int write) |
228 | { | 233 | { |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index d7762349ea48..0d676a41081e 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
@@ -83,6 +83,11 @@ int pud_huge(pud_t pud) | |||
83 | return 0; | 83 | return 0; |
84 | } | 84 | } |
85 | 85 | ||
86 | int pmd_huge_support(void) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | |||
86 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 91 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
87 | pmd_t *pmd, int write) | 92 | pmd_t *pmd, int write) |
88 | { | 93 | { |
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 3d0ddbc005fe..71368850dfc0 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c | |||
@@ -169,10 +169,10 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, | |||
169 | new_ka.ka_restorer = restorer; | 169 | new_ka.ka_restorer = restorer; |
170 | ret = get_user(u_handler, &act->sa_handler); | 170 | ret = get_user(u_handler, &act->sa_handler); |
171 | new_ka.sa.sa_handler = compat_ptr(u_handler); | 171 | new_ka.sa.sa_handler = compat_ptr(u_handler); |
172 | ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); | 172 | ret |= copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); |
173 | sigset_from_compat(&new_ka.sa.sa_mask, &set32); | 173 | sigset_from_compat(&new_ka.sa.sa_mask, &set32); |
174 | ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); | 174 | ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); |
175 | ret |= __get_user(u_restorer, &act->sa_restorer); | 175 | ret |= get_user(u_restorer, &act->sa_restorer); |
176 | new_ka.sa.sa_restorer = compat_ptr(u_restorer); | 176 | new_ka.sa.sa_restorer = compat_ptr(u_restorer); |
177 | if (ret) | 177 | if (ret) |
178 | return -EFAULT; | 178 | return -EFAULT; |
@@ -183,9 +183,9 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, | |||
183 | if (!ret && oact) { | 183 | if (!ret && oact) { |
184 | sigset_to_compat(&set32, &old_ka.sa.sa_mask); | 184 | sigset_to_compat(&set32, &old_ka.sa.sa_mask); |
185 | ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); | 185 | ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); |
186 | ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); | 186 | ret |= copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); |
187 | ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | 187 | ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); |
188 | ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); | 188 | ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); |
189 | if (ret) | 189 | if (ret) |
190 | ret = -EFAULT; | 190 | ret = -EFAULT; |
191 | } | 191 | } |
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d2b59441ebdd..96399646570a 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c | |||
@@ -234,6 +234,11 @@ int pud_huge(pud_t pud) | |||
234 | return 0; | 234 | return 0; |
235 | } | 235 | } |
236 | 236 | ||
237 | int pmd_huge_support(void) | ||
238 | { | ||
239 | return 0; | ||
240 | } | ||
241 | |||
237 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 242 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
238 | pmd_t *pmd, int write) | 243 | pmd_t *pmd, int write) |
239 | { | 244 | { |
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index e514899e1100..0cb3bbaa580c 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c | |||
@@ -166,6 +166,11 @@ int pud_huge(pud_t pud) | |||
166 | return !!(pud_val(pud) & _PAGE_HUGE_PAGE); | 166 | return !!(pud_val(pud) & _PAGE_HUGE_PAGE); |
167 | } | 167 | } |
168 | 168 | ||
169 | int pmd_huge_support(void) | ||
170 | { | ||
171 | return 1; | ||
172 | } | ||
173 | |||
169 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 174 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
170 | pmd_t *pmd, int write) | 175 | pmd_t *pmd, int write) |
171 | { | 176 | { |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8d16befdec88..3d1999458709 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -315,21 +315,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
315 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 315 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
316 | } | 316 | } |
317 | 317 | ||
318 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
319 | { | ||
320 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
321 | } | ||
322 | |||
323 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
324 | { | ||
325 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
326 | } | ||
327 | |||
328 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
329 | { | ||
330 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
331 | } | ||
332 | |||
333 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) | 318 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) |
334 | { | 319 | { |
335 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | 320 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); |
@@ -446,6 +431,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); | |||
446 | 431 | ||
447 | #ifndef __ASSEMBLY__ | 432 | #ifndef __ASSEMBLY__ |
448 | #include <linux/mm_types.h> | 433 | #include <linux/mm_types.h> |
434 | #include <linux/mmdebug.h> | ||
449 | #include <linux/log2.h> | 435 | #include <linux/log2.h> |
450 | 436 | ||
451 | static inline int pte_none(pte_t pte) | 437 | static inline int pte_none(pte_t pte) |
@@ -864,6 +850,24 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | |||
864 | { | 850 | { |
865 | } | 851 | } |
866 | 852 | ||
853 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
854 | { | ||
855 | VM_BUG_ON(pte_present(pte)); | ||
856 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
857 | } | ||
858 | |||
859 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
860 | { | ||
861 | VM_BUG_ON(pte_present(pte)); | ||
862 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
863 | } | ||
864 | |||
865 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
866 | { | ||
867 | VM_BUG_ON(pte_present(pte)); | ||
868 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
869 | } | ||
870 | |||
867 | #include <asm-generic/pgtable.h> | 871 | #include <asm-generic/pgtable.h> |
868 | #endif /* __ASSEMBLY__ */ | 872 | #endif /* __ASSEMBLY__ */ |
869 | 873 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f4843e031131..0ecac257fb26 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -75,6 +75,9 @@ | |||
75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved | 75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved |
76 | * into swap entry computation, but bit 6 is used for nonlinear | 76 | * into swap entry computation, but bit 6 is used for nonlinear |
77 | * file mapping, so we borrow bit 7 for soft dirty tracking. | 77 | * file mapping, so we borrow bit 7 for soft dirty tracking. |
78 | * | ||
79 | * Please note that this bit must be treated as swap dirty page | ||
80 | * mark if and only if the PTE has present bit clear! | ||
78 | */ | 81 | */ |
79 | #ifdef CONFIG_MEM_SOFT_DIRTY | 82 | #ifdef CONFIG_MEM_SOFT_DIRTY |
80 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE | 83 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index cf512003e663..e6d90babc245 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -62,6 +62,7 @@ static inline void __flush_tlb_all(void) | |||
62 | 62 | ||
63 | static inline void __flush_tlb_one(unsigned long addr) | 63 | static inline void __flush_tlb_one(unsigned long addr) |
64 | { | 64 | { |
65 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | ||
65 | __flush_tlb_single(addr); | 66 | __flush_tlb_single(addr); |
66 | } | 67 | } |
67 | 68 | ||
@@ -84,14 +85,38 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
84 | 85 | ||
85 | #ifndef CONFIG_SMP | 86 | #ifndef CONFIG_SMP |
86 | 87 | ||
87 | #define flush_tlb() __flush_tlb() | 88 | /* "_up" is for UniProcessor. |
88 | #define flush_tlb_all() __flush_tlb_all() | 89 | * |
89 | #define local_flush_tlb() __flush_tlb() | 90 | * This is a helper for other header functions. *Not* intended to be called |
91 | * directly. All global TLB flushes need to either call this, or to bump the | ||
92 | * vm statistics themselves. | ||
93 | */ | ||
94 | static inline void __flush_tlb_up(void) | ||
95 | { | ||
96 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
97 | __flush_tlb(); | ||
98 | } | ||
99 | |||
100 | static inline void flush_tlb_all(void) | ||
101 | { | ||
102 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
103 | __flush_tlb_all(); | ||
104 | } | ||
105 | |||
106 | static inline void flush_tlb(void) | ||
107 | { | ||
108 | __flush_tlb_up(); | ||
109 | } | ||
110 | |||
111 | static inline void local_flush_tlb(void) | ||
112 | { | ||
113 | __flush_tlb_up(); | ||
114 | } | ||
90 | 115 | ||
91 | static inline void flush_tlb_mm(struct mm_struct *mm) | 116 | static inline void flush_tlb_mm(struct mm_struct *mm) |
92 | { | 117 | { |
93 | if (mm == current->active_mm) | 118 | if (mm == current->active_mm) |
94 | __flush_tlb(); | 119 | __flush_tlb_up(); |
95 | } | 120 | } |
96 | 121 | ||
97 | static inline void flush_tlb_page(struct vm_area_struct *vma, | 122 | static inline void flush_tlb_page(struct vm_area_struct *vma, |
@@ -105,14 +130,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, | |||
105 | unsigned long start, unsigned long end) | 130 | unsigned long start, unsigned long end) |
106 | { | 131 | { |
107 | if (vma->vm_mm == current->active_mm) | 132 | if (vma->vm_mm == current->active_mm) |
108 | __flush_tlb(); | 133 | __flush_tlb_up(); |
109 | } | 134 | } |
110 | 135 | ||
111 | static inline void flush_tlb_mm_range(struct mm_struct *mm, | 136 | static inline void flush_tlb_mm_range(struct mm_struct *mm, |
112 | unsigned long start, unsigned long end, unsigned long vmflag) | 137 | unsigned long start, unsigned long end, unsigned long vmflag) |
113 | { | 138 | { |
114 | if (mm == current->active_mm) | 139 | if (mm == current->active_mm) |
115 | __flush_tlb(); | 140 | __flush_tlb_up(); |
116 | } | 141 | } |
117 | 142 | ||
118 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, | 143 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index d4cdfa67509e..ce2d0a2c3e4f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -683,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
683 | } | 683 | } |
684 | 684 | ||
685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
686 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
686 | __flush_tlb(); | 687 | __flush_tlb(); |
687 | 688 | ||
688 | /* Save MTRR state */ | 689 | /* Save MTRR state */ |
@@ -696,6 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
696 | static void post_set(void) __releases(set_atomicity_lock) | 697 | static void post_set(void) __releases(set_atomicity_lock) |
697 | { | 698 | { |
698 | /* Flush TLBs (no need to flush caches - they are disabled) */ | 699 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
700 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
699 | __flush_tlb(); | 701 | __flush_tlb(); |
700 | 702 | ||
701 | /* Intel (P6) standard MTRRs */ | 703 | /* Intel (P6) standard MTRRs */ |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e73e8c69096..9d980d88b747 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -59,6 +59,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
59 | return NULL; | 59 | return NULL; |
60 | } | 60 | } |
61 | 61 | ||
62 | int pmd_huge_support(void) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
62 | #else | 66 | #else |
63 | 67 | ||
64 | struct page * | 68 | struct page * |
@@ -77,6 +81,10 @@ int pud_huge(pud_t pud) | |||
77 | return !!(pud_val(pud) & _PAGE_PSE); | 81 | return !!(pud_val(pud) & _PAGE_PSE); |
78 | } | 82 | } |
79 | 83 | ||
84 | int pmd_huge_support(void) | ||
85 | { | ||
86 | return 1; | ||
87 | } | ||
80 | #endif | 88 | #endif |
81 | 89 | ||
82 | /* x86_64 also uses this file */ | 90 | /* x86_64 also uses this file */ |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 282375f13c7e..ae699b3bbac8 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -103,6 +103,7 @@ static void flush_tlb_func(void *info) | |||
103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) | 103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
104 | return; | 104 | return; |
105 | 105 | ||
106 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | ||
106 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | 107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
107 | if (f->flush_end == TLB_FLUSH_ALL) | 108 | if (f->flush_end == TLB_FLUSH_ALL) |
108 | local_flush_tlb(); | 109 | local_flush_tlb(); |
@@ -130,6 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
130 | info.flush_start = start; | 131 | info.flush_start = start; |
131 | info.flush_end = end; | 132 | info.flush_end = end; |
132 | 133 | ||
134 | count_vm_event(NR_TLB_REMOTE_FLUSH); | ||
133 | if (is_uv_system()) { | 135 | if (is_uv_system()) { |
134 | unsigned int cpu; | 136 | unsigned int cpu; |
135 | 137 | ||
@@ -149,6 +151,7 @@ void flush_tlb_current_task(void) | |||
149 | 151 | ||
150 | preempt_disable(); | 152 | preempt_disable(); |
151 | 153 | ||
154 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
152 | local_flush_tlb(); | 155 | local_flush_tlb(); |
153 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
154 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
@@ -211,16 +214,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
211 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | 214 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; |
212 | 215 | ||
213 | /* tlb_flushall_shift is on balance point, details in commit log */ | 216 | /* tlb_flushall_shift is on balance point, details in commit log */ |
214 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | 217 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { |
218 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
215 | local_flush_tlb(); | 219 | local_flush_tlb(); |
216 | else { | 220 | } else { |
217 | if (has_large_page(mm, start, end)) { | 221 | if (has_large_page(mm, start, end)) { |
218 | local_flush_tlb(); | 222 | local_flush_tlb(); |
219 | goto flush_all; | 223 | goto flush_all; |
220 | } | 224 | } |
221 | /* flush range by one by one 'invlpg' */ | 225 | /* flush range by one by one 'invlpg' */ |
222 | for (addr = start; addr < end; addr += PAGE_SIZE) | 226 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
227 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | ||
223 | __flush_tlb_single(addr); | 228 | __flush_tlb_single(addr); |
229 | } | ||
224 | 230 | ||
225 | if (cpumask_any_but(mm_cpumask(mm), | 231 | if (cpumask_any_but(mm_cpumask(mm), |
226 | smp_processor_id()) < nr_cpu_ids) | 232 | smp_processor_id()) < nr_cpu_ids) |
@@ -256,6 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) | |||
256 | 262 | ||
257 | static void do_flush_tlb_all(void *info) | 263 | static void do_flush_tlb_all(void *info) |
258 | { | 264 | { |
265 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | ||
259 | __flush_tlb_all(); | 266 | __flush_tlb_all(); |
260 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | 267 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) |
261 | leave_mm(smp_processor_id()); | 268 | leave_mm(smp_processor_id()); |
@@ -263,6 +270,7 @@ static void do_flush_tlb_all(void *info) | |||
263 | 270 | ||
264 | void flush_tlb_all(void) | 271 | void flush_tlb_all(void) |
265 | { | 272 | { |
273 | count_vm_event(NR_TLB_REMOTE_FLUSH); | ||
266 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 274 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
267 | } | 275 | } |
268 | 276 | ||
diff --git a/block/Kconfig b/block/Kconfig index a7e40a7c8214..7f38e40fee08 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -99,6 +99,12 @@ config BLK_DEV_THROTTLING | |||
99 | 99 | ||
100 | See Documentation/cgroups/blkio-controller.txt for more information. | 100 | See Documentation/cgroups/blkio-controller.txt for more information. |
101 | 101 | ||
102 | config CMDLINE_PARSER | ||
103 | bool "Block device command line partition parser" | ||
104 | default n | ||
105 | ---help--- | ||
106 | Parsing command line, get the partitions information. | ||
107 | |||
102 | menu "Partition Types" | 108 | menu "Partition Types" |
103 | 109 | ||
104 | source "block/partitions/Kconfig" | 110 | source "block/partitions/Kconfig" |
diff --git a/block/Makefile b/block/Makefile index 39b76ba66ffd..4fa4be544ece 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -18,3 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | |||
18 | 18 | ||
19 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o | 19 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
20 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o | 20 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o |
21 | obj-$(CONFIG_CMDLINE_PARSER) += cmdline-parser.o | ||
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 4464c823cff2..46cd7bd18b34 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -367,7 +367,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, | |||
367 | if (!icq) | 367 | if (!icq) |
368 | return NULL; | 368 | return NULL; |
369 | 369 | ||
370 | if (radix_tree_preload(gfp_mask) < 0) { | 370 | if (radix_tree_maybe_preload(gfp_mask) < 0) { |
371 | kmem_cache_free(et->icq_cache, icq); | 371 | kmem_cache_free(et->icq_cache, icq); |
372 | return NULL; | 372 | return NULL; |
373 | } | 373 | } |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5efc5a647183..3aa5b195f4dd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -29,7 +29,7 @@ queue_var_store(unsigned long *var, const char *page, size_t count) | |||
29 | int err; | 29 | int err; |
30 | unsigned long v; | 30 | unsigned long v; |
31 | 31 | ||
32 | err = strict_strtoul(page, 10, &v); | 32 | err = kstrtoul(page, 10, &v); |
33 | if (err || v > UINT_MAX) | 33 | if (err || v > UINT_MAX) |
34 | return -EINVAL; | 34 | return -EINVAL; |
35 | 35 | ||
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c new file mode 100644 index 000000000000..cc2637f8674e --- /dev/null +++ b/block/cmdline-parser.c | |||
@@ -0,0 +1,250 @@ | |||
1 | /* | ||
2 | * Parse command line, get partition information | ||
3 | * | ||
4 | * Written by Cai Zhiyong <caizhiyong@huawei.com> | ||
5 | * | ||
6 | */ | ||
7 | #include <linux/buffer_head.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/cmdline-parser.h> | ||
10 | |||
11 | static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) | ||
12 | { | ||
13 | int ret = 0; | ||
14 | struct cmdline_subpart *new_subpart; | ||
15 | |||
16 | *subpart = NULL; | ||
17 | |||
18 | new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL); | ||
19 | if (!new_subpart) | ||
20 | return -ENOMEM; | ||
21 | |||
22 | if (*partdef == '-') { | ||
23 | new_subpart->size = (sector_t)(~0ULL); | ||
24 | partdef++; | ||
25 | } else { | ||
26 | new_subpart->size = (sector_t)memparse(partdef, &partdef); | ||
27 | if (new_subpart->size < (sector_t)PAGE_SIZE) { | ||
28 | pr_warn("cmdline partition size is invalid."); | ||
29 | ret = -EINVAL; | ||
30 | goto fail; | ||
31 | } | ||
32 | } | ||
33 | |||
34 | if (*partdef == '@') { | ||
35 | partdef++; | ||
36 | new_subpart->from = (sector_t)memparse(partdef, &partdef); | ||
37 | } else { | ||
38 | new_subpart->from = (sector_t)(~0ULL); | ||
39 | } | ||
40 | |||
41 | if (*partdef == '(') { | ||
42 | int length; | ||
43 | char *next = strchr(++partdef, ')'); | ||
44 | |||
45 | if (!next) { | ||
46 | pr_warn("cmdline partition format is invalid."); | ||
47 | ret = -EINVAL; | ||
48 | goto fail; | ||
49 | } | ||
50 | |||
51 | length = min_t(int, next - partdef, | ||
52 | sizeof(new_subpart->name) - 1); | ||
53 | strncpy(new_subpart->name, partdef, length); | ||
54 | new_subpart->name[length] = '\0'; | ||
55 | |||
56 | partdef = ++next; | ||
57 | } else | ||
58 | new_subpart->name[0] = '\0'; | ||
59 | |||
60 | new_subpart->flags = 0; | ||
61 | |||
62 | if (!strncmp(partdef, "ro", 2)) { | ||
63 | new_subpart->flags |= PF_RDONLY; | ||
64 | partdef += 2; | ||
65 | } | ||
66 | |||
67 | if (!strncmp(partdef, "lk", 2)) { | ||
68 | new_subpart->flags |= PF_POWERUP_LOCK; | ||
69 | partdef += 2; | ||
70 | } | ||
71 | |||
72 | *subpart = new_subpart; | ||
73 | return 0; | ||
74 | fail: | ||
75 | kfree(new_subpart); | ||
76 | return ret; | ||
77 | } | ||
78 | |||
79 | static void free_subpart(struct cmdline_parts *parts) | ||
80 | { | ||
81 | struct cmdline_subpart *subpart; | ||
82 | |||
83 | while (parts->subpart) { | ||
84 | subpart = parts->subpart; | ||
85 | parts->subpart = subpart->next_subpart; | ||
86 | kfree(subpart); | ||
87 | } | ||
88 | } | ||
89 | |||
90 | static int parse_parts(struct cmdline_parts **parts, const char *bdevdef) | ||
91 | { | ||
92 | int ret = -EINVAL; | ||
93 | char *next; | ||
94 | int length; | ||
95 | struct cmdline_subpart **next_subpart; | ||
96 | struct cmdline_parts *newparts; | ||
97 | char buf[BDEVNAME_SIZE + 32 + 4]; | ||
98 | |||
99 | *parts = NULL; | ||
100 | |||
101 | newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL); | ||
102 | if (!newparts) | ||
103 | return -ENOMEM; | ||
104 | |||
105 | next = strchr(bdevdef, ':'); | ||
106 | if (!next) { | ||
107 | pr_warn("cmdline partition has no block device."); | ||
108 | goto fail; | ||
109 | } | ||
110 | |||
111 | length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1); | ||
112 | strncpy(newparts->name, bdevdef, length); | ||
113 | newparts->name[length] = '\0'; | ||
114 | newparts->nr_subparts = 0; | ||
115 | |||
116 | next_subpart = &newparts->subpart; | ||
117 | |||
118 | while (next && *(++next)) { | ||
119 | bdevdef = next; | ||
120 | next = strchr(bdevdef, ','); | ||
121 | |||
122 | length = (!next) ? (sizeof(buf) - 1) : | ||
123 | min_t(int, next - bdevdef, sizeof(buf) - 1); | ||
124 | |||
125 | strncpy(buf, bdevdef, length); | ||
126 | buf[length] = '\0'; | ||
127 | |||
128 | ret = parse_subpart(next_subpart, buf); | ||
129 | if (ret) | ||
130 | goto fail; | ||
131 | |||
132 | newparts->nr_subparts++; | ||
133 | next_subpart = &(*next_subpart)->next_subpart; | ||
134 | } | ||
135 | |||
136 | if (!newparts->subpart) { | ||
137 | pr_warn("cmdline partition has no valid partition."); | ||
138 | ret = -EINVAL; | ||
139 | goto fail; | ||
140 | } | ||
141 | |||
142 | *parts = newparts; | ||
143 | |||
144 | return 0; | ||
145 | fail: | ||
146 | free_subpart(newparts); | ||
147 | kfree(newparts); | ||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | void cmdline_parts_free(struct cmdline_parts **parts) | ||
152 | { | ||
153 | struct cmdline_parts *next_parts; | ||
154 | |||
155 | while (*parts) { | ||
156 | next_parts = (*parts)->next_parts; | ||
157 | free_subpart(*parts); | ||
158 | kfree(*parts); | ||
159 | *parts = next_parts; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline) | ||
164 | { | ||
165 | int ret; | ||
166 | char *buf; | ||
167 | char *pbuf; | ||
168 | char *next; | ||
169 | struct cmdline_parts **next_parts; | ||
170 | |||
171 | *parts = NULL; | ||
172 | |||
173 | next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL); | ||
174 | if (!buf) | ||
175 | return -ENOMEM; | ||
176 | |||
177 | next_parts = parts; | ||
178 | |||
179 | while (next && *pbuf) { | ||
180 | next = strchr(pbuf, ';'); | ||
181 | if (next) | ||
182 | *next = '\0'; | ||
183 | |||
184 | ret = parse_parts(next_parts, pbuf); | ||
185 | if (ret) | ||
186 | goto fail; | ||
187 | |||
188 | if (next) | ||
189 | pbuf = ++next; | ||
190 | |||
191 | next_parts = &(*next_parts)->next_parts; | ||
192 | } | ||
193 | |||
194 | if (!*parts) { | ||
195 | pr_warn("cmdline partition has no valid partition."); | ||
196 | ret = -EINVAL; | ||
197 | goto fail; | ||
198 | } | ||
199 | |||
200 | ret = 0; | ||
201 | done: | ||
202 | kfree(buf); | ||
203 | return ret; | ||
204 | |||
205 | fail: | ||
206 | cmdline_parts_free(parts); | ||
207 | goto done; | ||
208 | } | ||
209 | |||
210 | struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, | ||
211 | const char *bdev) | ||
212 | { | ||
213 | while (parts && strncmp(bdev, parts->name, sizeof(parts->name))) | ||
214 | parts = parts->next_parts; | ||
215 | return parts; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * add_part() | ||
220 | * 0 success. | ||
221 | * 1 can not add so many partitions. | ||
222 | */ | ||
223 | void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, | ||
224 | int slot, | ||
225 | int (*add_part)(int, struct cmdline_subpart *, void *), | ||
226 | void *param) | ||
227 | |||
228 | { | ||
229 | sector_t from = 0; | ||
230 | struct cmdline_subpart *subpart; | ||
231 | |||
232 | for (subpart = parts->subpart; subpart; | ||
233 | subpart = subpart->next_subpart, slot++) { | ||
234 | if (subpart->from == (sector_t)(~0ULL)) | ||
235 | subpart->from = from; | ||
236 | else | ||
237 | from = subpart->from; | ||
238 | |||
239 | if (from >= disk_size) | ||
240 | break; | ||
241 | |||
242 | if (subpart->size > (disk_size - from)) | ||
243 | subpart->size = disk_size - from; | ||
244 | |||
245 | from += subpart->size; | ||
246 | |||
247 | if (add_part(slot, subpart, param)) | ||
248 | break; | ||
249 | } | ||
250 | } | ||
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7e5d474dc6ba..fbd5a67cb773 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c | |||
@@ -70,7 +70,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev, | |||
70 | return ret; | 70 | return ret; |
71 | 71 | ||
72 | ret = copy_to_user(ugeo, &geo, 4); | 72 | ret = copy_to_user(ugeo, &geo, 4); |
73 | ret |= __put_user(geo.start, &ugeo->start); | 73 | ret |= put_user(geo.start, &ugeo->start); |
74 | if (ret) | 74 | if (ret) |
75 | ret = -EFAULT; | 75 | ret = -EFAULT; |
76 | 76 | ||
diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 4cebb2f0d2f4..87a32086535d 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig | |||
@@ -260,3 +260,10 @@ config SYSV68_PARTITION | |||
260 | partition table format used by Motorola Delta machines (using | 260 | partition table format used by Motorola Delta machines (using |
261 | sysv68). | 261 | sysv68). |
262 | Otherwise, say N. | 262 | Otherwise, say N. |
263 | |||
264 | config CMDLINE_PARTITION | ||
265 | bool "Command line partition support" if PARTITION_ADVANCED | ||
266 | select CMDLINE_PARSER | ||
267 | help | ||
268 | Say Y here if you would read the partitions table from bootargs. | ||
269 | The format for the command line is just like mtdparts. | ||
diff --git a/block/partitions/Makefile b/block/partitions/Makefile index 2be4d7ba4e3a..37a95270503c 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile | |||
@@ -8,6 +8,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o | |||
8 | obj-$(CONFIG_AMIGA_PARTITION) += amiga.o | 8 | obj-$(CONFIG_AMIGA_PARTITION) += amiga.o |
9 | obj-$(CONFIG_ATARI_PARTITION) += atari.o | 9 | obj-$(CONFIG_ATARI_PARTITION) += atari.o |
10 | obj-$(CONFIG_AIX_PARTITION) += aix.o | 10 | obj-$(CONFIG_AIX_PARTITION) += aix.o |
11 | obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o | ||
11 | obj-$(CONFIG_MAC_PARTITION) += mac.o | 12 | obj-$(CONFIG_MAC_PARTITION) += mac.o |
12 | obj-$(CONFIG_LDM_PARTITION) += ldm.o | 13 | obj-$(CONFIG_LDM_PARTITION) += ldm.o |
13 | obj-$(CONFIG_MSDOS_PARTITION) += msdos.o | 14 | obj-$(CONFIG_MSDOS_PARTITION) += msdos.o |
diff --git a/block/partitions/check.c b/block/partitions/check.c index 19ba207ea7d1..9ac1df74f699 100644 --- a/block/partitions/check.c +++ b/block/partitions/check.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "efi.h" | 34 | #include "efi.h" |
35 | #include "karma.h" | 35 | #include "karma.h" |
36 | #include "sysv68.h" | 36 | #include "sysv68.h" |
37 | #include "cmdline.h" | ||
37 | 38 | ||
38 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ | 39 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ |
39 | 40 | ||
@@ -65,6 +66,9 @@ static int (*check_part[])(struct parsed_partitions *) = { | |||
65 | adfspart_check_ADFS, | 66 | adfspart_check_ADFS, |
66 | #endif | 67 | #endif |
67 | 68 | ||
69 | #ifdef CONFIG_CMDLINE_PARTITION | ||
70 | cmdline_partition, | ||
71 | #endif | ||
68 | #ifdef CONFIG_EFI_PARTITION | 72 | #ifdef CONFIG_EFI_PARTITION |
69 | efi_partition, /* this must come before msdos */ | 73 | efi_partition, /* this must come before msdos */ |
70 | #endif | 74 | #endif |
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c new file mode 100644 index 000000000000..56cf4ffad51e --- /dev/null +++ b/block/partitions/cmdline.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 HUAWEI | ||
3 | * Author: Cai Zhiyong <caizhiyong@huawei.com> | ||
4 | * | ||
5 | * Read block device partition table from command line. | ||
6 | * The partition used for fixed block device (eMMC) embedded device. | ||
7 | * It is no MBR, save storage space. Bootloader can be easily accessed | ||
8 | * by absolute address of data on the block device. | ||
9 | * Users can easily change the partition. | ||
10 | * | ||
11 | * The format for the command line is just like mtdparts. | ||
12 | * | ||
13 | * Verbose config please reference "Documentation/block/cmdline-partition.txt" | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <linux/cmdline-parser.h> | ||
18 | |||
19 | #include "check.h" | ||
20 | #include "cmdline.h" | ||
21 | |||
22 | static char *cmdline; | ||
23 | static struct cmdline_parts *bdev_parts; | ||
24 | |||
25 | static int add_part(int slot, struct cmdline_subpart *subpart, void *param) | ||
26 | { | ||
27 | int label_min; | ||
28 | struct partition_meta_info *info; | ||
29 | char tmp[sizeof(info->volname) + 4]; | ||
30 | struct parsed_partitions *state = (struct parsed_partitions *)param; | ||
31 | |||
32 | if (slot >= state->limit) | ||
33 | return 1; | ||
34 | |||
35 | put_partition(state, slot, subpart->from >> 9, | ||
36 | subpart->size >> 9); | ||
37 | |||
38 | info = &state->parts[slot].info; | ||
39 | |||
40 | label_min = min_t(int, sizeof(info->volname) - 1, | ||
41 | sizeof(subpart->name)); | ||
42 | strncpy(info->volname, subpart->name, label_min); | ||
43 | info->volname[label_min] = '\0'; | ||
44 | |||
45 | snprintf(tmp, sizeof(tmp), "(%s)", info->volname); | ||
46 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
47 | |||
48 | state->parts[slot].has_info = true; | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int __init cmdline_parts_setup(char *s) | ||
54 | { | ||
55 | cmdline = s; | ||
56 | return 1; | ||
57 | } | ||
58 | __setup("blkdevparts=", cmdline_parts_setup); | ||
59 | |||
60 | /* | ||
61 | * Purpose: allocate cmdline partitions. | ||
62 | * Returns: | ||
63 | * -1 if unable to read the partition table | ||
64 | * 0 if this isn't our partition table | ||
65 | * 1 if successful | ||
66 | */ | ||
67 | int cmdline_partition(struct parsed_partitions *state) | ||
68 | { | ||
69 | sector_t disk_size; | ||
70 | char bdev[BDEVNAME_SIZE]; | ||
71 | struct cmdline_parts *parts; | ||
72 | |||
73 | if (cmdline) { | ||
74 | if (bdev_parts) | ||
75 | cmdline_parts_free(&bdev_parts); | ||
76 | |||
77 | if (cmdline_parts_parse(&bdev_parts, cmdline)) { | ||
78 | cmdline = NULL; | ||
79 | return -1; | ||
80 | } | ||
81 | cmdline = NULL; | ||
82 | } | ||
83 | |||
84 | if (!bdev_parts) | ||
85 | return 0; | ||
86 | |||
87 | bdevname(state->bdev, bdev); | ||
88 | parts = cmdline_parts_find(bdev_parts, bdev); | ||
89 | if (!parts) | ||
90 | return 0; | ||
91 | |||
92 | disk_size = get_capacity(state->bdev->bd_disk) << 9; | ||
93 | |||
94 | cmdline_parts_set(parts, disk_size, 1, add_part, (void *)state); | ||
95 | |||
96 | strlcat(state->pp_buf, "\n", PAGE_SIZE); | ||
97 | |||
98 | return 1; | ||
99 | } | ||
diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h new file mode 100644 index 000000000000..26e0f8da1414 --- /dev/null +++ b/block/partitions/cmdline.h | |||
@@ -0,0 +1,2 @@ | |||
1 | |||
2 | int cmdline_partition(struct parsed_partitions *state); | ||
diff --git a/block/partitions/efi.c b/block/partitions/efi.c index c85fc895ecdb..1a5ec9a03c00 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c | |||
@@ -25,6 +25,9 @@ | |||
25 | * TODO: | 25 | * TODO: |
26 | * | 26 | * |
27 | * Changelog: | 27 | * Changelog: |
28 | * Mon August 5th, 2013 Davidlohr Bueso <davidlohr@hp.com> | ||
29 | * - detect hybrid MBRs, tighter pMBR checking & cleanups. | ||
30 | * | ||
28 | * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com> | 31 | * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com> |
29 | * - test for valid PMBR and valid PGPT before ever reading | 32 | * - test for valid PMBR and valid PGPT before ever reading |
30 | * AGPT, allow override with 'gpt' kernel command line option. | 33 | * AGPT, allow override with 'gpt' kernel command line option. |
@@ -149,34 +152,80 @@ static u64 last_lba(struct block_device *bdev) | |||
149 | bdev_logical_block_size(bdev)) - 1ULL; | 152 | bdev_logical_block_size(bdev)) - 1ULL; |
150 | } | 153 | } |
151 | 154 | ||
152 | static inline int | 155 | static inline int pmbr_part_valid(gpt_mbr_record *part) |
153 | pmbr_part_valid(struct partition *part) | ||
154 | { | 156 | { |
155 | if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT && | 157 | if (part->os_type != EFI_PMBR_OSTYPE_EFI_GPT) |
156 | le32_to_cpu(part->start_sect) == 1UL) | 158 | goto invalid; |
157 | return 1; | 159 | |
158 | return 0; | 160 | /* set to 0x00000001 (i.e., the LBA of the GPT Partition Header) */ |
161 | if (le32_to_cpu(part->starting_lba) != GPT_PRIMARY_PARTITION_TABLE_LBA) | ||
162 | goto invalid; | ||
163 | |||
164 | return GPT_MBR_PROTECTIVE; | ||
165 | invalid: | ||
166 | return 0; | ||
159 | } | 167 | } |
160 | 168 | ||
161 | /** | 169 | /** |
162 | * is_pmbr_valid(): test Protective MBR for validity | 170 | * is_pmbr_valid(): test Protective MBR for validity |
163 | * @mbr: pointer to a legacy mbr structure | 171 | * @mbr: pointer to a legacy mbr structure |
172 | * @total_sectors: amount of sectors in the device | ||
164 | * | 173 | * |
165 | * Description: Returns 1 if PMBR is valid, 0 otherwise. | 174 | * Description: Checks for a valid protective or hybrid |
166 | * Validity depends on two things: | 175 | * master boot record (MBR). The validity of a pMBR depends |
176 | * on all of the following properties: | ||
167 | * 1) MSDOS signature is in the last two bytes of the MBR | 177 | * 1) MSDOS signature is in the last two bytes of the MBR |
168 | * 2) One partition of type 0xEE is found | 178 | * 2) One partition of type 0xEE is found |
179 | * | ||
180 | * In addition, a hybrid MBR will have up to three additional | ||
181 | * primary partitions, which point to the same space that's | ||
182 | * marked out by up to three GPT partitions. | ||
183 | * | ||
184 | * Returns 0 upon invalid MBR, or GPT_MBR_PROTECTIVE or | ||
185 | * GPT_MBR_HYBRID depending on the device layout. | ||
169 | */ | 186 | */ |
170 | static int | 187 | static int is_pmbr_valid(legacy_mbr *mbr, sector_t total_sectors) |
171 | is_pmbr_valid(legacy_mbr *mbr) | ||
172 | { | 188 | { |
173 | int i; | 189 | int i, part = 0, ret = 0; /* invalid by default */ |
190 | |||
174 | if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) | 191 | if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) |
175 | return 0; | 192 | goto done; |
193 | |||
194 | for (i = 0; i < 4; i++) { | ||
195 | ret = pmbr_part_valid(&mbr->partition_record[i]); | ||
196 | if (ret == GPT_MBR_PROTECTIVE) { | ||
197 | part = i; | ||
198 | /* | ||
199 | * Ok, we at least know that there's a protective MBR, | ||
200 | * now check if there are other partition types for | ||
201 | * hybrid MBR. | ||
202 | */ | ||
203 | goto check_hybrid; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | if (ret != GPT_MBR_PROTECTIVE) | ||
208 | goto done; | ||
209 | check_hybrid: | ||
176 | for (i = 0; i < 4; i++) | 210 | for (i = 0; i < 4; i++) |
177 | if (pmbr_part_valid(&mbr->partition_record[i])) | 211 | if ((mbr->partition_record[i].os_type != |
178 | return 1; | 212 | EFI_PMBR_OSTYPE_EFI_GPT) && |
179 | return 0; | 213 | (mbr->partition_record[i].os_type != 0x00)) |
214 | ret = GPT_MBR_HYBRID; | ||
215 | |||
216 | /* | ||
217 | * Protective MBRs take up the lesser of the whole disk | ||
218 | * or 2 TiB (32bit LBA), ignoring the rest of the disk. | ||
219 | * | ||
220 | * Hybrid MBRs do not necessarily comply with this. | ||
221 | */ | ||
222 | if (ret == GPT_MBR_PROTECTIVE) { | ||
223 | if (le32_to_cpu(mbr->partition_record[part].size_in_lba) != | ||
224 | min((uint32_t) total_sectors - 1, 0xFFFFFFFF)) | ||
225 | ret = 0; | ||
226 | } | ||
227 | done: | ||
228 | return ret; | ||
180 | } | 229 | } |
181 | 230 | ||
182 | /** | 231 | /** |
@@ -243,8 +292,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, | |||
243 | return NULL; | 292 | return NULL; |
244 | 293 | ||
245 | if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), | 294 | if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), |
246 | (u8 *) pte, | 295 | (u8 *) pte, count) < count) { |
247 | count) < count) { | ||
248 | kfree(pte); | 296 | kfree(pte); |
249 | pte=NULL; | 297 | pte=NULL; |
250 | return NULL; | 298 | return NULL; |
@@ -364,7 +412,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, | |||
364 | (unsigned long long)lastlba); | 412 | (unsigned long long)lastlba); |
365 | goto fail; | 413 | goto fail; |
366 | } | 414 | } |
367 | 415 | if (le64_to_cpu((*gpt)->last_usable_lba) < le64_to_cpu((*gpt)->first_usable_lba)) { | |
416 | pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n", | ||
417 | (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), | ||
418 | (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba)); | ||
419 | goto fail; | ||
420 | } | ||
368 | /* Check that sizeof_partition_entry has the correct value */ | 421 | /* Check that sizeof_partition_entry has the correct value */ |
369 | if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) { | 422 | if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) { |
370 | pr_debug("GUID Partitition Entry Size check failed.\n"); | 423 | pr_debug("GUID Partitition Entry Size check failed.\n"); |
@@ -429,44 +482,42 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
429 | if (!pgpt || !agpt) | 482 | if (!pgpt || !agpt) |
430 | return; | 483 | return; |
431 | if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) { | 484 | if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) { |
432 | printk(KERN_WARNING | 485 | pr_warn("GPT:Primary header LBA != Alt. header alternate_lba\n"); |
433 | "GPT:Primary header LBA != Alt. header alternate_lba\n"); | 486 | pr_warn("GPT:%lld != %lld\n", |
434 | printk(KERN_WARNING "GPT:%lld != %lld\n", | ||
435 | (unsigned long long)le64_to_cpu(pgpt->my_lba), | 487 | (unsigned long long)le64_to_cpu(pgpt->my_lba), |
436 | (unsigned long long)le64_to_cpu(agpt->alternate_lba)); | 488 | (unsigned long long)le64_to_cpu(agpt->alternate_lba)); |
437 | error_found++; | 489 | error_found++; |
438 | } | 490 | } |
439 | if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) { | 491 | if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) { |
440 | printk(KERN_WARNING | 492 | pr_warn("GPT:Primary header alternate_lba != Alt. header my_lba\n"); |
441 | "GPT:Primary header alternate_lba != Alt. header my_lba\n"); | 493 | pr_warn("GPT:%lld != %lld\n", |
442 | printk(KERN_WARNING "GPT:%lld != %lld\n", | ||
443 | (unsigned long long)le64_to_cpu(pgpt->alternate_lba), | 494 | (unsigned long long)le64_to_cpu(pgpt->alternate_lba), |
444 | (unsigned long long)le64_to_cpu(agpt->my_lba)); | 495 | (unsigned long long)le64_to_cpu(agpt->my_lba)); |
445 | error_found++; | 496 | error_found++; |
446 | } | 497 | } |
447 | if (le64_to_cpu(pgpt->first_usable_lba) != | 498 | if (le64_to_cpu(pgpt->first_usable_lba) != |
448 | le64_to_cpu(agpt->first_usable_lba)) { | 499 | le64_to_cpu(agpt->first_usable_lba)) { |
449 | printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n"); | 500 | pr_warn("GPT:first_usable_lbas don't match.\n"); |
450 | printk(KERN_WARNING "GPT:%lld != %lld\n", | 501 | pr_warn("GPT:%lld != %lld\n", |
451 | (unsigned long long)le64_to_cpu(pgpt->first_usable_lba), | 502 | (unsigned long long)le64_to_cpu(pgpt->first_usable_lba), |
452 | (unsigned long long)le64_to_cpu(agpt->first_usable_lba)); | 503 | (unsigned long long)le64_to_cpu(agpt->first_usable_lba)); |
453 | error_found++; | 504 | error_found++; |
454 | } | 505 | } |
455 | if (le64_to_cpu(pgpt->last_usable_lba) != | 506 | if (le64_to_cpu(pgpt->last_usable_lba) != |
456 | le64_to_cpu(agpt->last_usable_lba)) { | 507 | le64_to_cpu(agpt->last_usable_lba)) { |
457 | printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n"); | 508 | pr_warn("GPT:last_usable_lbas don't match.\n"); |
458 | printk(KERN_WARNING "GPT:%lld != %lld\n", | 509 | pr_warn("GPT:%lld != %lld\n", |
459 | (unsigned long long)le64_to_cpu(pgpt->last_usable_lba), | 510 | (unsigned long long)le64_to_cpu(pgpt->last_usable_lba), |
460 | (unsigned long long)le64_to_cpu(agpt->last_usable_lba)); | 511 | (unsigned long long)le64_to_cpu(agpt->last_usable_lba)); |
461 | error_found++; | 512 | error_found++; |
462 | } | 513 | } |
463 | if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) { | 514 | if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) { |
464 | printk(KERN_WARNING "GPT:disk_guids don't match.\n"); | 515 | pr_warn("GPT:disk_guids don't match.\n"); |
465 | error_found++; | 516 | error_found++; |
466 | } | 517 | } |
467 | if (le32_to_cpu(pgpt->num_partition_entries) != | 518 | if (le32_to_cpu(pgpt->num_partition_entries) != |
468 | le32_to_cpu(agpt->num_partition_entries)) { | 519 | le32_to_cpu(agpt->num_partition_entries)) { |
469 | printk(KERN_WARNING "GPT:num_partition_entries don't match: " | 520 | pr_warn("GPT:num_partition_entries don't match: " |
470 | "0x%x != 0x%x\n", | 521 | "0x%x != 0x%x\n", |
471 | le32_to_cpu(pgpt->num_partition_entries), | 522 | le32_to_cpu(pgpt->num_partition_entries), |
472 | le32_to_cpu(agpt->num_partition_entries)); | 523 | le32_to_cpu(agpt->num_partition_entries)); |
@@ -474,8 +525,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
474 | } | 525 | } |
475 | if (le32_to_cpu(pgpt->sizeof_partition_entry) != | 526 | if (le32_to_cpu(pgpt->sizeof_partition_entry) != |
476 | le32_to_cpu(agpt->sizeof_partition_entry)) { | 527 | le32_to_cpu(agpt->sizeof_partition_entry)) { |
477 | printk(KERN_WARNING | 528 | pr_warn("GPT:sizeof_partition_entry values don't match: " |
478 | "GPT:sizeof_partition_entry values don't match: " | ||
479 | "0x%x != 0x%x\n", | 529 | "0x%x != 0x%x\n", |
480 | le32_to_cpu(pgpt->sizeof_partition_entry), | 530 | le32_to_cpu(pgpt->sizeof_partition_entry), |
481 | le32_to_cpu(agpt->sizeof_partition_entry)); | 531 | le32_to_cpu(agpt->sizeof_partition_entry)); |
@@ -483,34 +533,30 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
483 | } | 533 | } |
484 | if (le32_to_cpu(pgpt->partition_entry_array_crc32) != | 534 | if (le32_to_cpu(pgpt->partition_entry_array_crc32) != |
485 | le32_to_cpu(agpt->partition_entry_array_crc32)) { | 535 | le32_to_cpu(agpt->partition_entry_array_crc32)) { |
486 | printk(KERN_WARNING | 536 | pr_warn("GPT:partition_entry_array_crc32 values don't match: " |
487 | "GPT:partition_entry_array_crc32 values don't match: " | ||
488 | "0x%x != 0x%x\n", | 537 | "0x%x != 0x%x\n", |
489 | le32_to_cpu(pgpt->partition_entry_array_crc32), | 538 | le32_to_cpu(pgpt->partition_entry_array_crc32), |
490 | le32_to_cpu(agpt->partition_entry_array_crc32)); | 539 | le32_to_cpu(agpt->partition_entry_array_crc32)); |
491 | error_found++; | 540 | error_found++; |
492 | } | 541 | } |
493 | if (le64_to_cpu(pgpt->alternate_lba) != lastlba) { | 542 | if (le64_to_cpu(pgpt->alternate_lba) != lastlba) { |
494 | printk(KERN_WARNING | 543 | pr_warn("GPT:Primary header thinks Alt. header is not at the end of the disk.\n"); |
495 | "GPT:Primary header thinks Alt. header is not at the end of the disk.\n"); | 544 | pr_warn("GPT:%lld != %lld\n", |
496 | printk(KERN_WARNING "GPT:%lld != %lld\n", | ||
497 | (unsigned long long)le64_to_cpu(pgpt->alternate_lba), | 545 | (unsigned long long)le64_to_cpu(pgpt->alternate_lba), |
498 | (unsigned long long)lastlba); | 546 | (unsigned long long)lastlba); |
499 | error_found++; | 547 | error_found++; |
500 | } | 548 | } |
501 | 549 | ||
502 | if (le64_to_cpu(agpt->my_lba) != lastlba) { | 550 | if (le64_to_cpu(agpt->my_lba) != lastlba) { |
503 | printk(KERN_WARNING | 551 | pr_warn("GPT:Alternate GPT header not at the end of the disk.\n"); |
504 | "GPT:Alternate GPT header not at the end of the disk.\n"); | 552 | pr_warn("GPT:%lld != %lld\n", |
505 | printk(KERN_WARNING "GPT:%lld != %lld\n", | ||
506 | (unsigned long long)le64_to_cpu(agpt->my_lba), | 553 | (unsigned long long)le64_to_cpu(agpt->my_lba), |
507 | (unsigned long long)lastlba); | 554 | (unsigned long long)lastlba); |
508 | error_found++; | 555 | error_found++; |
509 | } | 556 | } |
510 | 557 | ||
511 | if (error_found) | 558 | if (error_found) |
512 | printk(KERN_WARNING | 559 | pr_warn("GPT: Use GNU Parted to correct GPT errors.\n"); |
513 | "GPT: Use GNU Parted to correct GPT errors.\n"); | ||
514 | return; | 560 | return; |
515 | } | 561 | } |
516 | 562 | ||
@@ -536,6 +582,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, | |||
536 | gpt_header *pgpt = NULL, *agpt = NULL; | 582 | gpt_header *pgpt = NULL, *agpt = NULL; |
537 | gpt_entry *pptes = NULL, *aptes = NULL; | 583 | gpt_entry *pptes = NULL, *aptes = NULL; |
538 | legacy_mbr *legacymbr; | 584 | legacy_mbr *legacymbr; |
585 | sector_t total_sectors = i_size_read(state->bdev->bd_inode) >> 9; | ||
539 | u64 lastlba; | 586 | u64 lastlba; |
540 | 587 | ||
541 | if (!ptes) | 588 | if (!ptes) |
@@ -543,17 +590,22 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, | |||
543 | 590 | ||
544 | lastlba = last_lba(state->bdev); | 591 | lastlba = last_lba(state->bdev); |
545 | if (!force_gpt) { | 592 | if (!force_gpt) { |
546 | /* This will be added to the EFI Spec. per Intel after v1.02. */ | 593 | /* This will be added to the EFI Spec. per Intel after v1.02. */ |
547 | legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); | 594 | legacymbr = kzalloc(sizeof(*legacymbr), GFP_KERNEL); |
548 | if (legacymbr) { | 595 | if (!legacymbr) |
549 | read_lba(state, 0, (u8 *) legacymbr, | 596 | goto fail; |
550 | sizeof (*legacymbr)); | 597 | |
551 | good_pmbr = is_pmbr_valid(legacymbr); | 598 | read_lba(state, 0, (u8 *)legacymbr, sizeof(*legacymbr)); |
552 | kfree(legacymbr); | 599 | good_pmbr = is_pmbr_valid(legacymbr, total_sectors); |
553 | } | 600 | kfree(legacymbr); |
554 | if (!good_pmbr) | 601 | |
555 | goto fail; | 602 | if (!good_pmbr) |
556 | } | 603 | goto fail; |
604 | |||
605 | pr_debug("Device has a %s MBR\n", | ||
606 | good_pmbr == GPT_MBR_PROTECTIVE ? | ||
607 | "protective" : "hybrid"); | ||
608 | } | ||
557 | 609 | ||
558 | good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, | 610 | good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, |
559 | &pgpt, &pptes); | 611 | &pgpt, &pptes); |
@@ -576,11 +628,8 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, | |||
576 | *ptes = pptes; | 628 | *ptes = pptes; |
577 | kfree(agpt); | 629 | kfree(agpt); |
578 | kfree(aptes); | 630 | kfree(aptes); |
579 | if (!good_agpt) { | 631 | if (!good_agpt) |
580 | printk(KERN_WARNING | 632 | pr_warn("Alternate GPT is invalid, using primary GPT.\n"); |
581 | "Alternate GPT is invalid, " | ||
582 | "using primary GPT.\n"); | ||
583 | } | ||
584 | return 1; | 633 | return 1; |
585 | } | 634 | } |
586 | else if (good_agpt) { | 635 | else if (good_agpt) { |
@@ -588,8 +637,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, | |||
588 | *ptes = aptes; | 637 | *ptes = aptes; |
589 | kfree(pgpt); | 638 | kfree(pgpt); |
590 | kfree(pptes); | 639 | kfree(pptes); |
591 | printk(KERN_WARNING | 640 | pr_warn("Primary GPT is invalid, using alternate GPT.\n"); |
592 | "Primary GPT is invalid, using alternate GPT.\n"); | ||
593 | return 1; | 641 | return 1; |
594 | } | 642 | } |
595 | 643 | ||
@@ -651,8 +699,7 @@ int efi_partition(struct parsed_partitions *state) | |||
651 | put_partition(state, i+1, start * ssz, size * ssz); | 699 | put_partition(state, i+1, start * ssz, size * ssz); |
652 | 700 | ||
653 | /* If this is a RAID volume, tell md */ | 701 | /* If this is a RAID volume, tell md */ |
654 | if (!efi_guidcmp(ptes[i].partition_type_guid, | 702 | if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_RAID_GUID)) |
655 | PARTITION_LINUX_RAID_GUID)) | ||
656 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; | 703 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; |
657 | 704 | ||
658 | info = &state->parts[i + 1].info; | 705 | info = &state->parts[i + 1].info; |
diff --git a/block/partitions/efi.h b/block/partitions/efi.h index b69ab729558f..4efcafba7e64 100644 --- a/block/partitions/efi.h +++ b/block/partitions/efi.h | |||
@@ -37,6 +37,9 @@ | |||
37 | #define EFI_PMBR_OSTYPE_EFI 0xEF | 37 | #define EFI_PMBR_OSTYPE_EFI 0xEF |
38 | #define EFI_PMBR_OSTYPE_EFI_GPT 0xEE | 38 | #define EFI_PMBR_OSTYPE_EFI_GPT 0xEE |
39 | 39 | ||
40 | #define GPT_MBR_PROTECTIVE 1 | ||
41 | #define GPT_MBR_HYBRID 2 | ||
42 | |||
40 | #define GPT_HEADER_SIGNATURE 0x5452415020494645ULL | 43 | #define GPT_HEADER_SIGNATURE 0x5452415020494645ULL |
41 | #define GPT_HEADER_REVISION_V1 0x00010000 | 44 | #define GPT_HEADER_REVISION_V1 0x00010000 |
42 | #define GPT_PRIMARY_PARTITION_TABLE_LBA 1 | 45 | #define GPT_PRIMARY_PARTITION_TABLE_LBA 1 |
@@ -101,11 +104,25 @@ typedef struct _gpt_entry { | |||
101 | efi_char16_t partition_name[72 / sizeof (efi_char16_t)]; | 104 | efi_char16_t partition_name[72 / sizeof (efi_char16_t)]; |
102 | } __attribute__ ((packed)) gpt_entry; | 105 | } __attribute__ ((packed)) gpt_entry; |
103 | 106 | ||
107 | typedef struct _gpt_mbr_record { | ||
108 | u8 boot_indicator; /* unused by EFI, set to 0x80 for bootable */ | ||
109 | u8 start_head; /* unused by EFI, pt start in CHS */ | ||
110 | u8 start_sector; /* unused by EFI, pt start in CHS */ | ||
111 | u8 start_track; | ||
112 | u8 os_type; /* EFI and legacy non-EFI OS types */ | ||
113 | u8 end_head; /* unused by EFI, pt end in CHS */ | ||
114 | u8 end_sector; /* unused by EFI, pt end in CHS */ | ||
115 | u8 end_track; /* unused by EFI, pt end in CHS */ | ||
116 | __le32 starting_lba; /* used by EFI - start addr of the on disk pt */ | ||
117 | __le32 size_in_lba; /* used by EFI - size of pt in LBA */ | ||
118 | } __packed gpt_mbr_record; | ||
119 | |||
120 | |||
104 | typedef struct _legacy_mbr { | 121 | typedef struct _legacy_mbr { |
105 | u8 boot_code[440]; | 122 | u8 boot_code[440]; |
106 | __le32 unique_mbr_signature; | 123 | __le32 unique_mbr_signature; |
107 | __le16 unknown; | 124 | __le16 unknown; |
108 | struct partition partition_record[4]; | 125 | gpt_mbr_record partition_record[4]; |
109 | __le16 signature; | 126 | __le16 signature; |
110 | } __attribute__ ((packed)) legacy_mbr; | 127 | } __attribute__ ((packed)) legacy_mbr; |
111 | 128 | ||
@@ -113,22 +130,3 @@ typedef struct _legacy_mbr { | |||
113 | extern int efi_partition(struct parsed_partitions *state); | 130 | extern int efi_partition(struct parsed_partitions *state); |
114 | 131 | ||
115 | #endif | 132 | #endif |
116 | |||
117 | /* | ||
118 | * Overrides for Emacs so that we follow Linus's tabbing style. | ||
119 | * Emacs will notice this stuff at the end of the file and automatically | ||
120 | * adjust the settings for this buffer only. This must remain at the end | ||
121 | * of the file. | ||
122 | * -------------------------------------------------------------------------- | ||
123 | * Local variables: | ||
124 | * c-indent-level: 4 | ||
125 | * c-brace-imaginary-offset: 0 | ||
126 | * c-brace-offset: -4 | ||
127 | * c-argdecl-indent: 4 | ||
128 | * c-label-offset: -4 | ||
129 | * c-continued-statement-offset: 4 | ||
130 | * c-continued-brace-offset: 0 | ||
131 | * indent-tabs-mode: nil | ||
132 | * tab-width: 8 | ||
133 | * End: | ||
134 | */ | ||
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 025c41d3cb33..14a9d1912318 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
2 | #define VERSION "83" | 2 | #define VERSION "85" |
3 | #define AOE_MAJOR 152 | 3 | #define AOE_MAJOR 152 |
4 | #define DEVICE_NAME "aoe" | 4 | #define DEVICE_NAME "aoe" |
5 | 5 | ||
@@ -169,6 +169,7 @@ struct aoedev { | |||
169 | ulong ref; | 169 | ulong ref; |
170 | struct work_struct work;/* disk create work struct */ | 170 | struct work_struct work;/* disk create work struct */ |
171 | struct gendisk *gd; | 171 | struct gendisk *gd; |
172 | struct dentry *debugfs; | ||
172 | struct request_queue *blkq; | 173 | struct request_queue *blkq; |
173 | struct hd_geometry geo; | 174 | struct hd_geometry geo; |
174 | sector_t ssize; | 175 | sector_t ssize; |
@@ -206,6 +207,7 @@ struct ktstate { | |||
206 | int aoeblk_init(void); | 207 | int aoeblk_init(void); |
207 | void aoeblk_exit(void); | 208 | void aoeblk_exit(void); |
208 | void aoeblk_gdalloc(void *); | 209 | void aoeblk_gdalloc(void *); |
210 | void aoedisk_rm_debugfs(struct aoedev *d); | ||
209 | void aoedisk_rm_sysfs(struct aoedev *d); | 211 | void aoedisk_rm_sysfs(struct aoedev *d); |
210 | 212 | ||
211 | int aoechr_init(void); | 213 | int aoechr_init(void); |
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 916d9ed5c8aa..dd73e1ff1759 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoeblk.c | 3 | * aoeblk.c |
4 | * block device routines | 4 | * block device routines |
@@ -17,11 +17,13 @@ | |||
17 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
18 | #include <linux/export.h> | 18 | #include <linux/export.h> |
19 | #include <linux/moduleparam.h> | 19 | #include <linux/moduleparam.h> |
20 | #include <linux/debugfs.h> | ||
20 | #include <scsi/sg.h> | 21 | #include <scsi/sg.h> |
21 | #include "aoe.h" | 22 | #include "aoe.h" |
22 | 23 | ||
23 | static DEFINE_MUTEX(aoeblk_mutex); | 24 | static DEFINE_MUTEX(aoeblk_mutex); |
24 | static struct kmem_cache *buf_pool_cache; | 25 | static struct kmem_cache *buf_pool_cache; |
26 | static struct dentry *aoe_debugfs_dir; | ||
25 | 27 | ||
26 | /* GPFS needs a larger value than the default. */ | 28 | /* GPFS needs a larger value than the default. */ |
27 | static int aoe_maxsectors; | 29 | static int aoe_maxsectors; |
@@ -108,6 +110,55 @@ static ssize_t aoedisk_show_payload(struct device *dev, | |||
108 | return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); | 110 | return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); |
109 | } | 111 | } |
110 | 112 | ||
113 | static int aoedisk_debugfs_show(struct seq_file *s, void *ignored) | ||
114 | { | ||
115 | struct aoedev *d; | ||
116 | struct aoetgt **t, **te; | ||
117 | struct aoeif *ifp, *ife; | ||
118 | unsigned long flags; | ||
119 | char c; | ||
120 | |||
121 | d = s->private; | ||
122 | seq_printf(s, "rttavg: %d rttdev: %d\n", | ||
123 | d->rttavg >> RTTSCALE, | ||
124 | d->rttdev >> RTTDSCALE); | ||
125 | seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool)); | ||
126 | seq_printf(s, "kicked: %ld\n", d->kicked); | ||
127 | seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt); | ||
128 | seq_printf(s, "ref: %ld\n", d->ref); | ||
129 | |||
130 | spin_lock_irqsave(&d->lock, flags); | ||
131 | t = d->targets; | ||
132 | te = t + d->ntargets; | ||
133 | for (; t < te && *t; t++) { | ||
134 | c = '\t'; | ||
135 | seq_printf(s, "falloc: %ld\n", (*t)->falloc); | ||
136 | seq_printf(s, "ffree: %p\n", | ||
137 | list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next); | ||
138 | seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout, | ||
139 | (*t)->maxout, (*t)->nframes); | ||
140 | seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh); | ||
141 | seq_printf(s, "\ttaint:%d\n", (*t)->taint); | ||
142 | seq_printf(s, "\tr:%d\n", (*t)->rpkts); | ||
143 | seq_printf(s, "\tw:%d\n", (*t)->wpkts); | ||
144 | ifp = (*t)->ifs; | ||
145 | ife = ifp + ARRAY_SIZE((*t)->ifs); | ||
146 | for (; ifp->nd && ifp < ife; ifp++) { | ||
147 | seq_printf(s, "%c%s", c, ifp->nd->name); | ||
148 | c = ','; | ||
149 | } | ||
150 | seq_puts(s, "\n"); | ||
151 | } | ||
152 | spin_unlock_irqrestore(&d->lock, flags); | ||
153 | |||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | static int aoe_debugfs_open(struct inode *inode, struct file *file) | ||
158 | { | ||
159 | return single_open(file, aoedisk_debugfs_show, inode->i_private); | ||
160 | } | ||
161 | |||
111 | static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); | 162 | static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); |
112 | static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); | 163 | static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); |
113 | static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL); | 164 | static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL); |
@@ -130,6 +181,44 @@ static const struct attribute_group attr_group = { | |||
130 | .attrs = aoe_attrs, | 181 | .attrs = aoe_attrs, |
131 | }; | 182 | }; |
132 | 183 | ||
184 | static const struct file_operations aoe_debugfs_fops = { | ||
185 | .open = aoe_debugfs_open, | ||
186 | .read = seq_read, | ||
187 | .llseek = seq_lseek, | ||
188 | .release = single_release, | ||
189 | }; | ||
190 | |||
191 | static void | ||
192 | aoedisk_add_debugfs(struct aoedev *d) | ||
193 | { | ||
194 | struct dentry *entry; | ||
195 | char *p; | ||
196 | |||
197 | if (aoe_debugfs_dir == NULL) | ||
198 | return; | ||
199 | p = strchr(d->gd->disk_name, '/'); | ||
200 | if (p == NULL) | ||
201 | p = d->gd->disk_name; | ||
202 | else | ||
203 | p++; | ||
204 | BUG_ON(*p == '\0'); | ||
205 | entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d, | ||
206 | &aoe_debugfs_fops); | ||
207 | if (IS_ERR_OR_NULL(entry)) { | ||
208 | pr_info("aoe: cannot create debugfs file for %s\n", | ||
209 | d->gd->disk_name); | ||
210 | return; | ||
211 | } | ||
212 | BUG_ON(d->debugfs); | ||
213 | d->debugfs = entry; | ||
214 | } | ||
215 | void | ||
216 | aoedisk_rm_debugfs(struct aoedev *d) | ||
217 | { | ||
218 | debugfs_remove(d->debugfs); | ||
219 | d->debugfs = NULL; | ||
220 | } | ||
221 | |||
133 | static int | 222 | static int |
134 | aoedisk_add_sysfs(struct aoedev *d) | 223 | aoedisk_add_sysfs(struct aoedev *d) |
135 | { | 224 | { |
@@ -330,6 +419,7 @@ aoeblk_gdalloc(void *vp) | |||
330 | 419 | ||
331 | add_disk(gd); | 420 | add_disk(gd); |
332 | aoedisk_add_sysfs(d); | 421 | aoedisk_add_sysfs(d); |
422 | aoedisk_add_debugfs(d); | ||
333 | 423 | ||
334 | spin_lock_irqsave(&d->lock, flags); | 424 | spin_lock_irqsave(&d->lock, flags); |
335 | WARN_ON(!(d->flags & DEVFL_GD_NOW)); | 425 | WARN_ON(!(d->flags & DEVFL_GD_NOW)); |
@@ -351,6 +441,8 @@ err: | |||
351 | void | 441 | void |
352 | aoeblk_exit(void) | 442 | aoeblk_exit(void) |
353 | { | 443 | { |
444 | debugfs_remove_recursive(aoe_debugfs_dir); | ||
445 | aoe_debugfs_dir = NULL; | ||
354 | kmem_cache_destroy(buf_pool_cache); | 446 | kmem_cache_destroy(buf_pool_cache); |
355 | } | 447 | } |
356 | 448 | ||
@@ -362,7 +454,11 @@ aoeblk_init(void) | |||
362 | 0, 0, NULL); | 454 | 0, 0, NULL); |
363 | if (buf_pool_cache == NULL) | 455 | if (buf_pool_cache == NULL) |
364 | return -ENOMEM; | 456 | return -ENOMEM; |
365 | 457 | aoe_debugfs_dir = debugfs_create_dir("aoe", NULL); | |
458 | if (IS_ERR_OR_NULL(aoe_debugfs_dir)) { | ||
459 | pr_info("aoe: cannot create debugfs directory\n"); | ||
460 | aoe_debugfs_dir = NULL; | ||
461 | } | ||
366 | return 0; | 462 | return 0; |
367 | } | 463 | } |
368 | 464 | ||
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 4d45dba7fb8f..d2515435e23f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -380,7 +380,6 @@ aoecmd_ata_rw(struct aoedev *d) | |||
380 | { | 380 | { |
381 | struct frame *f; | 381 | struct frame *f; |
382 | struct buf *buf; | 382 | struct buf *buf; |
383 | struct aoetgt *t; | ||
384 | struct sk_buff *skb; | 383 | struct sk_buff *skb; |
385 | struct sk_buff_head queue; | 384 | struct sk_buff_head queue; |
386 | ulong bcnt, fbcnt; | 385 | ulong bcnt, fbcnt; |
@@ -391,7 +390,6 @@ aoecmd_ata_rw(struct aoedev *d) | |||
391 | f = newframe(d); | 390 | f = newframe(d); |
392 | if (f == NULL) | 391 | if (f == NULL) |
393 | return 0; | 392 | return 0; |
394 | t = *d->tgt; | ||
395 | bcnt = d->maxbcnt; | 393 | bcnt = d->maxbcnt; |
396 | if (bcnt == 0) | 394 | if (bcnt == 0) |
397 | bcnt = DEFAULTBCNT; | 395 | bcnt = DEFAULTBCNT; |
@@ -485,7 +483,6 @@ resend(struct aoedev *d, struct frame *f) | |||
485 | struct sk_buff *skb; | 483 | struct sk_buff *skb; |
486 | struct sk_buff_head queue; | 484 | struct sk_buff_head queue; |
487 | struct aoe_hdr *h; | 485 | struct aoe_hdr *h; |
488 | struct aoe_atahdr *ah; | ||
489 | struct aoetgt *t; | 486 | struct aoetgt *t; |
490 | char buf[128]; | 487 | char buf[128]; |
491 | u32 n; | 488 | u32 n; |
@@ -500,7 +497,6 @@ resend(struct aoedev *d, struct frame *f) | |||
500 | return; | 497 | return; |
501 | } | 498 | } |
502 | h = (struct aoe_hdr *) skb_mac_header(skb); | 499 | h = (struct aoe_hdr *) skb_mac_header(skb); |
503 | ah = (struct aoe_atahdr *) (h+1); | ||
504 | 500 | ||
505 | if (!(f->flags & FFL_PROBE)) { | 501 | if (!(f->flags & FFL_PROBE)) { |
506 | snprintf(buf, sizeof(buf), | 502 | snprintf(buf, sizeof(buf), |
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 784c92e038d1..e774c50b6842 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/bitmap.h> | 12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | 13 | #include <linux/kdev_t.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/string.h> | ||
15 | #include "aoe.h" | 16 | #include "aoe.h" |
16 | 17 | ||
17 | static void dummy_timer(ulong); | 18 | static void dummy_timer(ulong); |
@@ -241,16 +242,12 @@ aoedev_downdev(struct aoedev *d) | |||
241 | static int | 242 | static int |
242 | user_req(char *s, size_t slen, struct aoedev *d) | 243 | user_req(char *s, size_t slen, struct aoedev *d) |
243 | { | 244 | { |
244 | char *p; | 245 | const char *p; |
245 | size_t lim; | 246 | size_t lim; |
246 | 247 | ||
247 | if (!d->gd) | 248 | if (!d->gd) |
248 | return 0; | 249 | return 0; |
249 | p = strrchr(d->gd->disk_name, '/'); | 250 | p = kbasename(d->gd->disk_name); |
250 | if (!p) | ||
251 | p = d->gd->disk_name; | ||
252 | else | ||
253 | p += 1; | ||
254 | lim = sizeof(d->gd->disk_name); | 251 | lim = sizeof(d->gd->disk_name); |
255 | lim -= p - d->gd->disk_name; | 252 | lim -= p - d->gd->disk_name; |
256 | if (slen < lim) | 253 | if (slen < lim) |
@@ -278,6 +275,7 @@ freedev(struct aoedev *d) | |||
278 | 275 | ||
279 | del_timer_sync(&d->timer); | 276 | del_timer_sync(&d->timer); |
280 | if (d->gd) { | 277 | if (d->gd) { |
278 | aoedisk_rm_debugfs(d); | ||
281 | aoedisk_rm_sysfs(d); | 279 | aoedisk_rm_sysfs(d); |
282 | del_gendisk(d->gd); | 280 | del_gendisk(d->gd); |
283 | put_disk(d->gd); | 281 | put_disk(d->gd); |
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 62b6c2cc80b5..d2d95ff5353b 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -4258,6 +4258,13 @@ static void cciss_find_board_params(ctlr_info_t *h) | |||
4258 | h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds; | 4258 | h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds; |
4259 | h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); | 4259 | h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); |
4260 | /* | 4260 | /* |
4261 | * The P600 may exhibit poor performnace under some workloads | ||
4262 | * if we use the value in the configuration table. Limit this | ||
4263 | * controller to MAXSGENTRIES (32) instead. | ||
4264 | */ | ||
4265 | if (h->board_id == 0x3225103C) | ||
4266 | h->maxsgentries = MAXSGENTRIES; | ||
4267 | /* | ||
4261 | * Limit in-command s/g elements to 32 save dma'able memory. | 4268 | * Limit in-command s/g elements to 32 save dma'able memory. |
4262 | * Howvever spec says if 0, use 31 | 4269 | * Howvever spec says if 0, use 31 |
4263 | */ | 4270 | */ |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index a56cfcd5d648..77a60bedd7a3 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
@@ -636,7 +636,7 @@ ok_to_write: | |||
636 | mg_request(host->breq); | 636 | mg_request(host->breq); |
637 | } | 637 | } |
638 | 638 | ||
639 | void mg_times_out(unsigned long data) | 639 | static void mg_times_out(unsigned long data) |
640 | { | 640 | { |
641 | struct mg_host *host = (struct mg_host *)data; | 641 | struct mg_host *host = (struct mg_host *)data; |
642 | char *name; | 642 | char *name; |
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 1bbc681688e4..79aa179305b5 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c | |||
@@ -598,7 +598,7 @@ static ssize_t class_osdblk_remove(struct class *c, | |||
598 | unsigned long ul; | 598 | unsigned long ul; |
599 | struct list_head *tmp; | 599 | struct list_head *tmp; |
600 | 600 | ||
601 | rc = strict_strtoul(buf, 10, &ul); | 601 | rc = kstrtoul(buf, 10, &ul); |
602 | if (rc) | 602 | if (rc) |
603 | return rc; | 603 | return rc; |
604 | 604 | ||
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f5d0ea11d9fd..56188475cfd3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -44,6 +44,8 @@ | |||
44 | * | 44 | * |
45 | *************************************************************************/ | 45 | *************************************************************************/ |
46 | 46 | ||
47 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
48 | |||
47 | #include <linux/pktcdvd.h> | 49 | #include <linux/pktcdvd.h> |
48 | #include <linux/module.h> | 50 | #include <linux/module.h> |
49 | #include <linux/types.h> | 51 | #include <linux/types.h> |
@@ -69,23 +71,24 @@ | |||
69 | 71 | ||
70 | #define DRIVER_NAME "pktcdvd" | 72 | #define DRIVER_NAME "pktcdvd" |
71 | 73 | ||
72 | #if PACKET_DEBUG | 74 | #define pkt_err(pd, fmt, ...) \ |
73 | #define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) | 75 | pr_err("%s: " fmt, pd->name, ##__VA_ARGS__) |
74 | #else | 76 | #define pkt_notice(pd, fmt, ...) \ |
75 | #define DPRINTK(fmt, args...) | 77 | pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__) |
76 | #endif | 78 | #define pkt_info(pd, fmt, ...) \ |
77 | 79 | pr_info("%s: " fmt, pd->name, ##__VA_ARGS__) | |
78 | #if PACKET_DEBUG > 1 | 80 | |
79 | #define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) | 81 | #define pkt_dbg(level, pd, fmt, ...) \ |
80 | #else | 82 | do { \ |
81 | #define VPRINTK(fmt, args...) | 83 | if (level == 2 && PACKET_DEBUG >= 2) \ |
82 | #endif | 84 | pr_notice("%s: %s():" fmt, \ |
85 | pd->name, __func__, ##__VA_ARGS__); \ | ||
86 | else if (level == 1 && PACKET_DEBUG >= 1) \ | ||
87 | pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \ | ||
88 | } while (0) | ||
83 | 89 | ||
84 | #define MAX_SPEED 0xffff | 90 | #define MAX_SPEED 0xffff |
85 | 91 | ||
86 | #define ZONE(sector, pd) (((sector) + (pd)->offset) & \ | ||
87 | ~(sector_t)((pd)->settings.size - 1)) | ||
88 | |||
89 | static DEFINE_MUTEX(pktcdvd_mutex); | 92 | static DEFINE_MUTEX(pktcdvd_mutex); |
90 | static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; | 93 | static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; |
91 | static struct proc_dir_entry *pkt_proc; | 94 | static struct proc_dir_entry *pkt_proc; |
@@ -103,7 +106,10 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev); | |||
103 | static int pkt_remove_dev(dev_t pkt_dev); | 106 | static int pkt_remove_dev(dev_t pkt_dev); |
104 | static int pkt_seq_show(struct seq_file *m, void *p); | 107 | static int pkt_seq_show(struct seq_file *m, void *p); |
105 | 108 | ||
106 | 109 | static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd) | |
110 | { | ||
111 | return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1); | ||
112 | } | ||
107 | 113 | ||
108 | /* | 114 | /* |
109 | * create and register a pktcdvd kernel object. | 115 | * create and register a pktcdvd kernel object. |
@@ -424,7 +430,7 @@ static int pkt_sysfs_init(void) | |||
424 | if (ret) { | 430 | if (ret) { |
425 | kfree(class_pktcdvd); | 431 | kfree(class_pktcdvd); |
426 | class_pktcdvd = NULL; | 432 | class_pktcdvd = NULL; |
427 | printk(DRIVER_NAME": failed to create class pktcdvd\n"); | 433 | pr_err("failed to create class pktcdvd\n"); |
428 | return ret; | 434 | return ret; |
429 | } | 435 | } |
430 | return 0; | 436 | return 0; |
@@ -517,7 +523,7 @@ static void pkt_bio_finished(struct pktcdvd_device *pd) | |||
517 | { | 523 | { |
518 | BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); | 524 | BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); |
519 | if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { | 525 | if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { |
520 | VPRINTK(DRIVER_NAME": queue empty\n"); | 526 | pkt_dbg(2, pd, "queue empty\n"); |
521 | atomic_set(&pd->iosched.attention, 1); | 527 | atomic_set(&pd->iosched.attention, 1); |
522 | wake_up(&pd->wqueue); | 528 | wake_up(&pd->wqueue); |
523 | } | 529 | } |
@@ -734,36 +740,33 @@ out: | |||
734 | return ret; | 740 | return ret; |
735 | } | 741 | } |
736 | 742 | ||
743 | static const char *sense_key_string(__u8 index) | ||
744 | { | ||
745 | static const char * const info[] = { | ||
746 | "No sense", "Recovered error", "Not ready", | ||
747 | "Medium error", "Hardware error", "Illegal request", | ||
748 | "Unit attention", "Data protect", "Blank check", | ||
749 | }; | ||
750 | |||
751 | return index < ARRAY_SIZE(info) ? info[index] : "INVALID"; | ||
752 | } | ||
753 | |||
737 | /* | 754 | /* |
738 | * A generic sense dump / resolve mechanism should be implemented across | 755 | * A generic sense dump / resolve mechanism should be implemented across |
739 | * all ATAPI + SCSI devices. | 756 | * all ATAPI + SCSI devices. |
740 | */ | 757 | */ |
741 | static void pkt_dump_sense(struct packet_command *cgc) | 758 | static void pkt_dump_sense(struct pktcdvd_device *pd, |
759 | struct packet_command *cgc) | ||
742 | { | 760 | { |
743 | static char *info[9] = { "No sense", "Recovered error", "Not ready", | ||
744 | "Medium error", "Hardware error", "Illegal request", | ||
745 | "Unit attention", "Data protect", "Blank check" }; | ||
746 | int i; | ||
747 | struct request_sense *sense = cgc->sense; | 761 | struct request_sense *sense = cgc->sense; |
748 | 762 | ||
749 | printk(DRIVER_NAME":"); | 763 | if (sense) |
750 | for (i = 0; i < CDROM_PACKET_SIZE; i++) | 764 | pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n", |
751 | printk(" %02x", cgc->cmd[i]); | 765 | CDROM_PACKET_SIZE, cgc->cmd, |
752 | printk(" - "); | 766 | sense->sense_key, sense->asc, sense->ascq, |
753 | 767 | sense_key_string(sense->sense_key)); | |
754 | if (sense == NULL) { | 768 | else |
755 | printk("no sense\n"); | 769 | pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd); |
756 | return; | ||
757 | } | ||
758 | |||
759 | printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq); | ||
760 | |||
761 | if (sense->sense_key > 8) { | ||
762 | printk(" (INVALID)\n"); | ||
763 | return; | ||
764 | } | ||
765 | |||
766 | printk(" (%s)\n", info[sense->sense_key]); | ||
767 | } | 770 | } |
768 | 771 | ||
769 | /* | 772 | /* |
@@ -806,7 +809,7 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, | |||
806 | cgc.cmd[5] = write_speed & 0xff; | 809 | cgc.cmd[5] = write_speed & 0xff; |
807 | 810 | ||
808 | if ((ret = pkt_generic_packet(pd, &cgc))) | 811 | if ((ret = pkt_generic_packet(pd, &cgc))) |
809 | pkt_dump_sense(&cgc); | 812 | pkt_dump_sense(pd, &cgc); |
810 | 813 | ||
811 | return ret; | 814 | return ret; |
812 | } | 815 | } |
@@ -872,7 +875,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) | |||
872 | need_write_seek = 0; | 875 | need_write_seek = 0; |
873 | if (need_write_seek && reads_queued) { | 876 | if (need_write_seek && reads_queued) { |
874 | if (atomic_read(&pd->cdrw.pending_bios) > 0) { | 877 | if (atomic_read(&pd->cdrw.pending_bios) > 0) { |
875 | VPRINTK(DRIVER_NAME": write, waiting\n"); | 878 | pkt_dbg(2, pd, "write, waiting\n"); |
876 | break; | 879 | break; |
877 | } | 880 | } |
878 | pkt_flush_cache(pd); | 881 | pkt_flush_cache(pd); |
@@ -881,7 +884,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) | |||
881 | } else { | 884 | } else { |
882 | if (!reads_queued && writes_queued) { | 885 | if (!reads_queued && writes_queued) { |
883 | if (atomic_read(&pd->cdrw.pending_bios) > 0) { | 886 | if (atomic_read(&pd->cdrw.pending_bios) > 0) { |
884 | VPRINTK(DRIVER_NAME": read, waiting\n"); | 887 | pkt_dbg(2, pd, "read, waiting\n"); |
885 | break; | 888 | break; |
886 | } | 889 | } |
887 | pd->iosched.writing = 1; | 890 | pd->iosched.writing = 1; |
@@ -943,7 +946,7 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que | |||
943 | set_bit(PACKET_MERGE_SEGS, &pd->flags); | 946 | set_bit(PACKET_MERGE_SEGS, &pd->flags); |
944 | return 0; | 947 | return 0; |
945 | } else { | 948 | } else { |
946 | printk(DRIVER_NAME": cdrom max_phys_segments too small\n"); | 949 | pkt_err(pd, "cdrom max_phys_segments too small\n"); |
947 | return -EIO; | 950 | return -EIO; |
948 | } | 951 | } |
949 | } | 952 | } |
@@ -987,8 +990,9 @@ static void pkt_end_io_read(struct bio *bio, int err) | |||
987 | struct pktcdvd_device *pd = pkt->pd; | 990 | struct pktcdvd_device *pd = pkt->pd; |
988 | BUG_ON(!pd); | 991 | BUG_ON(!pd); |
989 | 992 | ||
990 | VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio, | 993 | pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", |
991 | (unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err); | 994 | bio, (unsigned long long)pkt->sector, |
995 | (unsigned long long)bio->bi_sector, err); | ||
992 | 996 | ||
993 | if (err) | 997 | if (err) |
994 | atomic_inc(&pkt->io_errors); | 998 | atomic_inc(&pkt->io_errors); |
@@ -1005,7 +1009,7 @@ static void pkt_end_io_packet_write(struct bio *bio, int err) | |||
1005 | struct pktcdvd_device *pd = pkt->pd; | 1009 | struct pktcdvd_device *pd = pkt->pd; |
1006 | BUG_ON(!pd); | 1010 | BUG_ON(!pd); |
1007 | 1011 | ||
1008 | VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err); | 1012 | pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err); |
1009 | 1013 | ||
1010 | pd->stats.pkt_ended++; | 1014 | pd->stats.pkt_ended++; |
1011 | 1015 | ||
@@ -1047,7 +1051,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1047 | spin_unlock(&pkt->lock); | 1051 | spin_unlock(&pkt->lock); |
1048 | 1052 | ||
1049 | if (pkt->cache_valid) { | 1053 | if (pkt->cache_valid) { |
1050 | VPRINTK("pkt_gather_data: zone %llx cached\n", | 1054 | pkt_dbg(2, pd, "zone %llx cached\n", |
1051 | (unsigned long long)pkt->sector); | 1055 | (unsigned long long)pkt->sector); |
1052 | goto out_account; | 1056 | goto out_account; |
1053 | } | 1057 | } |
@@ -1070,7 +1074,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1070 | 1074 | ||
1071 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; | 1075 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; |
1072 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; | 1076 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; |
1073 | VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n", | 1077 | pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n", |
1074 | f, pkt->pages[p], offset); | 1078 | f, pkt->pages[p], offset); |
1075 | if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) | 1079 | if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) |
1076 | BUG(); | 1080 | BUG(); |
@@ -1082,7 +1086,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1082 | } | 1086 | } |
1083 | 1087 | ||
1084 | out_account: | 1088 | out_account: |
1085 | VPRINTK("pkt_gather_data: need %d frames for zone %llx\n", | 1089 | pkt_dbg(2, pd, "need %d frames for zone %llx\n", |
1086 | frames_read, (unsigned long long)pkt->sector); | 1090 | frames_read, (unsigned long long)pkt->sector); |
1087 | pd->stats.pkt_started++; | 1091 | pd->stats.pkt_started++; |
1088 | pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); | 1092 | pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); |
@@ -1183,7 +1187,8 @@ static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state | |||
1183 | "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" | 1187 | "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" |
1184 | }; | 1188 | }; |
1185 | enum packet_data_state old_state = pkt->state; | 1189 | enum packet_data_state old_state = pkt->state; |
1186 | VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector, | 1190 | pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n", |
1191 | pkt->id, (unsigned long long)pkt->sector, | ||
1187 | state_name[old_state], state_name[state]); | 1192 | state_name[old_state], state_name[state]); |
1188 | #endif | 1193 | #endif |
1189 | pkt->state = state; | 1194 | pkt->state = state; |
@@ -1202,12 +1207,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd) | |||
1202 | struct rb_node *n; | 1207 | struct rb_node *n; |
1203 | int wakeup; | 1208 | int wakeup; |
1204 | 1209 | ||
1205 | VPRINTK("handle_queue\n"); | ||
1206 | |||
1207 | atomic_set(&pd->scan_queue, 0); | 1210 | atomic_set(&pd->scan_queue, 0); |
1208 | 1211 | ||
1209 | if (list_empty(&pd->cdrw.pkt_free_list)) { | 1212 | if (list_empty(&pd->cdrw.pkt_free_list)) { |
1210 | VPRINTK("handle_queue: no pkt\n"); | 1213 | pkt_dbg(2, pd, "no pkt\n"); |
1211 | return 0; | 1214 | return 0; |
1212 | } | 1215 | } |
1213 | 1216 | ||
@@ -1224,7 +1227,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd) | |||
1224 | node = first_node; | 1227 | node = first_node; |
1225 | while (node) { | 1228 | while (node) { |
1226 | bio = node->bio; | 1229 | bio = node->bio; |
1227 | zone = ZONE(bio->bi_sector, pd); | 1230 | zone = get_zone(bio->bi_sector, pd); |
1228 | list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { | 1231 | list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { |
1229 | if (p->sector == zone) { | 1232 | if (p->sector == zone) { |
1230 | bio = NULL; | 1233 | bio = NULL; |
@@ -1244,7 +1247,7 @@ try_next_bio: | |||
1244 | } | 1247 | } |
1245 | spin_unlock(&pd->lock); | 1248 | spin_unlock(&pd->lock); |
1246 | if (!bio) { | 1249 | if (!bio) { |
1247 | VPRINTK("handle_queue: no bio\n"); | 1250 | pkt_dbg(2, pd, "no bio\n"); |
1248 | return 0; | 1251 | return 0; |
1249 | } | 1252 | } |
1250 | 1253 | ||
@@ -1260,12 +1263,12 @@ try_next_bio: | |||
1260 | * to this packet. | 1263 | * to this packet. |
1261 | */ | 1264 | */ |
1262 | spin_lock(&pd->lock); | 1265 | spin_lock(&pd->lock); |
1263 | VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone); | 1266 | pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); |
1264 | while ((node = pkt_rbtree_find(pd, zone)) != NULL) { | 1267 | while ((node = pkt_rbtree_find(pd, zone)) != NULL) { |
1265 | bio = node->bio; | 1268 | bio = node->bio; |
1266 | VPRINTK("pkt_handle_queue: found zone=%llx\n", | 1269 | pkt_dbg(2, pd, "found zone=%llx\n", |
1267 | (unsigned long long)ZONE(bio->bi_sector, pd)); | 1270 | (unsigned long long)get_zone(bio->bi_sector, pd)); |
1268 | if (ZONE(bio->bi_sector, pd) != zone) | 1271 | if (get_zone(bio->bi_sector, pd) != zone) |
1269 | break; | 1272 | break; |
1270 | pkt_rbtree_erase(pd, node); | 1273 | pkt_rbtree_erase(pd, node); |
1271 | spin_lock(&pkt->lock); | 1274 | spin_lock(&pkt->lock); |
@@ -1316,7 +1319,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1316 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) | 1319 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) |
1317 | BUG(); | 1320 | BUG(); |
1318 | } | 1321 | } |
1319 | VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); | 1322 | pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); |
1320 | 1323 | ||
1321 | /* | 1324 | /* |
1322 | * Fill-in bvec with data from orig_bios. | 1325 | * Fill-in bvec with data from orig_bios. |
@@ -1327,7 +1330,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1327 | pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); | 1330 | pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); |
1328 | spin_unlock(&pkt->lock); | 1331 | spin_unlock(&pkt->lock); |
1329 | 1332 | ||
1330 | VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", | 1333 | pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", |
1331 | pkt->write_size, (unsigned long long)pkt->sector); | 1334 | pkt->write_size, (unsigned long long)pkt->sector); |
1332 | 1335 | ||
1333 | if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { | 1336 | if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { |
@@ -1359,7 +1362,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data | |||
1359 | { | 1362 | { |
1360 | int uptodate; | 1363 | int uptodate; |
1361 | 1364 | ||
1362 | VPRINTK("run_state_machine: pkt %d\n", pkt->id); | 1365 | pkt_dbg(2, pd, "pkt %d\n", pkt->id); |
1363 | 1366 | ||
1364 | for (;;) { | 1367 | for (;;) { |
1365 | switch (pkt->state) { | 1368 | switch (pkt->state) { |
@@ -1398,7 +1401,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data | |||
1398 | if (pkt_start_recovery(pkt)) { | 1401 | if (pkt_start_recovery(pkt)) { |
1399 | pkt_start_write(pd, pkt); | 1402 | pkt_start_write(pd, pkt); |
1400 | } else { | 1403 | } else { |
1401 | VPRINTK("No recovery possible\n"); | 1404 | pkt_dbg(2, pd, "No recovery possible\n"); |
1402 | pkt_set_state(pkt, PACKET_FINISHED_STATE); | 1405 | pkt_set_state(pkt, PACKET_FINISHED_STATE); |
1403 | } | 1406 | } |
1404 | break; | 1407 | break; |
@@ -1419,8 +1422,6 @@ static void pkt_handle_packets(struct pktcdvd_device *pd) | |||
1419 | { | 1422 | { |
1420 | struct packet_data *pkt, *next; | 1423 | struct packet_data *pkt, *next; |
1421 | 1424 | ||
1422 | VPRINTK("pkt_handle_packets\n"); | ||
1423 | |||
1424 | /* | 1425 | /* |
1425 | * Run state machine for active packets | 1426 | * Run state machine for active packets |
1426 | */ | 1427 | */ |
@@ -1502,9 +1503,9 @@ static int kcdrwd(void *foobar) | |||
1502 | if (PACKET_DEBUG > 1) { | 1503 | if (PACKET_DEBUG > 1) { |
1503 | int states[PACKET_NUM_STATES]; | 1504 | int states[PACKET_NUM_STATES]; |
1504 | pkt_count_states(pd, states); | 1505 | pkt_count_states(pd, states); |
1505 | VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", | 1506 | pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", |
1506 | states[0], states[1], states[2], states[3], | 1507 | states[0], states[1], states[2], |
1507 | states[4], states[5]); | 1508 | states[3], states[4], states[5]); |
1508 | } | 1509 | } |
1509 | 1510 | ||
1510 | min_sleep_time = MAX_SCHEDULE_TIMEOUT; | 1511 | min_sleep_time = MAX_SCHEDULE_TIMEOUT; |
@@ -1513,9 +1514,9 @@ static int kcdrwd(void *foobar) | |||
1513 | min_sleep_time = pkt->sleep_time; | 1514 | min_sleep_time = pkt->sleep_time; |
1514 | } | 1515 | } |
1515 | 1516 | ||
1516 | VPRINTK("kcdrwd: sleeping\n"); | 1517 | pkt_dbg(2, pd, "sleeping\n"); |
1517 | residue = schedule_timeout(min_sleep_time); | 1518 | residue = schedule_timeout(min_sleep_time); |
1518 | VPRINTK("kcdrwd: wake up\n"); | 1519 | pkt_dbg(2, pd, "wake up\n"); |
1519 | 1520 | ||
1520 | /* make swsusp happy with our thread */ | 1521 | /* make swsusp happy with our thread */ |
1521 | try_to_freeze(); | 1522 | try_to_freeze(); |
@@ -1563,9 +1564,10 @@ work_to_do: | |||
1563 | 1564 | ||
1564 | static void pkt_print_settings(struct pktcdvd_device *pd) | 1565 | static void pkt_print_settings(struct pktcdvd_device *pd) |
1565 | { | 1566 | { |
1566 | printk(DRIVER_NAME": %s packets, ", pd->settings.fp ? "Fixed" : "Variable"); | 1567 | pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n", |
1567 | printk("%u blocks, ", pd->settings.size >> 2); | 1568 | pd->settings.fp ? "Fixed" : "Variable", |
1568 | printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2'); | 1569 | pd->settings.size >> 2, |
1570 | pd->settings.block_mode == 8 ? '1' : '2'); | ||
1569 | } | 1571 | } |
1570 | 1572 | ||
1571 | static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) | 1573 | static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) |
@@ -1699,7 +1701,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) | |||
1699 | init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); | 1701 | init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); |
1700 | cgc.sense = &sense; | 1702 | cgc.sense = &sense; |
1701 | if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { | 1703 | if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { |
1702 | pkt_dump_sense(&cgc); | 1704 | pkt_dump_sense(pd, &cgc); |
1703 | return ret; | 1705 | return ret; |
1704 | } | 1706 | } |
1705 | 1707 | ||
@@ -1714,7 +1716,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) | |||
1714 | init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); | 1716 | init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); |
1715 | cgc.sense = &sense; | 1717 | cgc.sense = &sense; |
1716 | if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { | 1718 | if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { |
1717 | pkt_dump_sense(&cgc); | 1719 | pkt_dump_sense(pd, &cgc); |
1718 | return ret; | 1720 | return ret; |
1719 | } | 1721 | } |
1720 | 1722 | ||
@@ -1749,14 +1751,14 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) | |||
1749 | /* | 1751 | /* |
1750 | * paranoia | 1752 | * paranoia |
1751 | */ | 1753 | */ |
1752 | printk(DRIVER_NAME": write mode wrong %d\n", wp->data_block_type); | 1754 | pkt_err(pd, "write mode wrong %d\n", wp->data_block_type); |
1753 | return 1; | 1755 | return 1; |
1754 | } | 1756 | } |
1755 | wp->packet_size = cpu_to_be32(pd->settings.size >> 2); | 1757 | wp->packet_size = cpu_to_be32(pd->settings.size >> 2); |
1756 | 1758 | ||
1757 | cgc.buflen = cgc.cmd[8] = size; | 1759 | cgc.buflen = cgc.cmd[8] = size; |
1758 | if ((ret = pkt_mode_select(pd, &cgc))) { | 1760 | if ((ret = pkt_mode_select(pd, &cgc))) { |
1759 | pkt_dump_sense(&cgc); | 1761 | pkt_dump_sense(pd, &cgc); |
1760 | return ret; | 1762 | return ret; |
1761 | } | 1763 | } |
1762 | 1764 | ||
@@ -1793,7 +1795,7 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti) | |||
1793 | if (ti->rt == 1 && ti->blank == 0) | 1795 | if (ti->rt == 1 && ti->blank == 0) |
1794 | return 1; | 1796 | return 1; |
1795 | 1797 | ||
1796 | printk(DRIVER_NAME": bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); | 1798 | pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); |
1797 | return 0; | 1799 | return 0; |
1798 | } | 1800 | } |
1799 | 1801 | ||
@@ -1811,7 +1813,8 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) | |||
1811 | case 0x12: /* DVD-RAM */ | 1813 | case 0x12: /* DVD-RAM */ |
1812 | return 1; | 1814 | return 1; |
1813 | default: | 1815 | default: |
1814 | VPRINTK(DRIVER_NAME": Wrong disc profile (%x)\n", pd->mmc3_profile); | 1816 | pkt_dbg(2, pd, "Wrong disc profile (%x)\n", |
1817 | pd->mmc3_profile); | ||
1815 | return 0; | 1818 | return 0; |
1816 | } | 1819 | } |
1817 | 1820 | ||
@@ -1820,22 +1823,22 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) | |||
1820 | * but i'm not sure, should we leave this to user apps? probably. | 1823 | * but i'm not sure, should we leave this to user apps? probably. |
1821 | */ | 1824 | */ |
1822 | if (di->disc_type == 0xff) { | 1825 | if (di->disc_type == 0xff) { |
1823 | printk(DRIVER_NAME": Unknown disc. No track?\n"); | 1826 | pkt_notice(pd, "unknown disc - no track?\n"); |
1824 | return 0; | 1827 | return 0; |
1825 | } | 1828 | } |
1826 | 1829 | ||
1827 | if (di->disc_type != 0x20 && di->disc_type != 0) { | 1830 | if (di->disc_type != 0x20 && di->disc_type != 0) { |
1828 | printk(DRIVER_NAME": Wrong disc type (%x)\n", di->disc_type); | 1831 | pkt_err(pd, "wrong disc type (%x)\n", di->disc_type); |
1829 | return 0; | 1832 | return 0; |
1830 | } | 1833 | } |
1831 | 1834 | ||
1832 | if (di->erasable == 0) { | 1835 | if (di->erasable == 0) { |
1833 | printk(DRIVER_NAME": Disc not erasable\n"); | 1836 | pkt_notice(pd, "disc not erasable\n"); |
1834 | return 0; | 1837 | return 0; |
1835 | } | 1838 | } |
1836 | 1839 | ||
1837 | if (di->border_status == PACKET_SESSION_RESERVED) { | 1840 | if (di->border_status == PACKET_SESSION_RESERVED) { |
1838 | printk(DRIVER_NAME": Can't write to last track (reserved)\n"); | 1841 | pkt_err(pd, "can't write to last track (reserved)\n"); |
1839 | return 0; | 1842 | return 0; |
1840 | } | 1843 | } |
1841 | 1844 | ||
@@ -1860,7 +1863,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) | |||
1860 | memset(&ti, 0, sizeof(track_information)); | 1863 | memset(&ti, 0, sizeof(track_information)); |
1861 | 1864 | ||
1862 | if ((ret = pkt_get_disc_info(pd, &di))) { | 1865 | if ((ret = pkt_get_disc_info(pd, &di))) { |
1863 | printk("failed get_disc\n"); | 1866 | pkt_err(pd, "failed get_disc\n"); |
1864 | return ret; | 1867 | return ret; |
1865 | } | 1868 | } |
1866 | 1869 | ||
@@ -1871,12 +1874,12 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) | |||
1871 | 1874 | ||
1872 | track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ | 1875 | track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ |
1873 | if ((ret = pkt_get_track_info(pd, track, 1, &ti))) { | 1876 | if ((ret = pkt_get_track_info(pd, track, 1, &ti))) { |
1874 | printk(DRIVER_NAME": failed get_track\n"); | 1877 | pkt_err(pd, "failed get_track\n"); |
1875 | return ret; | 1878 | return ret; |
1876 | } | 1879 | } |
1877 | 1880 | ||
1878 | if (!pkt_writable_track(pd, &ti)) { | 1881 | if (!pkt_writable_track(pd, &ti)) { |
1879 | printk(DRIVER_NAME": can't write to this track\n"); | 1882 | pkt_err(pd, "can't write to this track\n"); |
1880 | return -EROFS; | 1883 | return -EROFS; |
1881 | } | 1884 | } |
1882 | 1885 | ||
@@ -1886,11 +1889,11 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) | |||
1886 | */ | 1889 | */ |
1887 | pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; | 1890 | pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; |
1888 | if (pd->settings.size == 0) { | 1891 | if (pd->settings.size == 0) { |
1889 | printk(DRIVER_NAME": detected zero packet size!\n"); | 1892 | pkt_notice(pd, "detected zero packet size!\n"); |
1890 | return -ENXIO; | 1893 | return -ENXIO; |
1891 | } | 1894 | } |
1892 | if (pd->settings.size > PACKET_MAX_SECTORS) { | 1895 | if (pd->settings.size > PACKET_MAX_SECTORS) { |
1893 | printk(DRIVER_NAME": packet size is too big\n"); | 1896 | pkt_err(pd, "packet size is too big\n"); |
1894 | return -EROFS; | 1897 | return -EROFS; |
1895 | } | 1898 | } |
1896 | pd->settings.fp = ti.fp; | 1899 | pd->settings.fp = ti.fp; |
@@ -1932,7 +1935,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) | |||
1932 | pd->settings.block_mode = PACKET_BLOCK_MODE2; | 1935 | pd->settings.block_mode = PACKET_BLOCK_MODE2; |
1933 | break; | 1936 | break; |
1934 | default: | 1937 | default: |
1935 | printk(DRIVER_NAME": unknown data mode\n"); | 1938 | pkt_err(pd, "unknown data mode\n"); |
1936 | return -EROFS; | 1939 | return -EROFS; |
1937 | } | 1940 | } |
1938 | return 0; | 1941 | return 0; |
@@ -1966,10 +1969,10 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, | |||
1966 | cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); | 1969 | cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); |
1967 | ret = pkt_mode_select(pd, &cgc); | 1970 | ret = pkt_mode_select(pd, &cgc); |
1968 | if (ret) { | 1971 | if (ret) { |
1969 | printk(DRIVER_NAME": write caching control failed\n"); | 1972 | pkt_err(pd, "write caching control failed\n"); |
1970 | pkt_dump_sense(&cgc); | 1973 | pkt_dump_sense(pd, &cgc); |
1971 | } else if (!ret && set) | 1974 | } else if (!ret && set) |
1972 | printk(DRIVER_NAME": enabled write caching on %s\n", pd->name); | 1975 | pkt_notice(pd, "enabled write caching\n"); |
1973 | return ret; | 1976 | return ret; |
1974 | } | 1977 | } |
1975 | 1978 | ||
@@ -2005,7 +2008,7 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, | |||
2005 | sizeof(struct mode_page_header); | 2008 | sizeof(struct mode_page_header); |
2006 | ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); | 2009 | ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); |
2007 | if (ret) { | 2010 | if (ret) { |
2008 | pkt_dump_sense(&cgc); | 2011 | pkt_dump_sense(pd, &cgc); |
2009 | return ret; | 2012 | return ret; |
2010 | } | 2013 | } |
2011 | } | 2014 | } |
@@ -2064,7 +2067,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, | |||
2064 | cgc.cmd[8] = 2; | 2067 | cgc.cmd[8] = 2; |
2065 | ret = pkt_generic_packet(pd, &cgc); | 2068 | ret = pkt_generic_packet(pd, &cgc); |
2066 | if (ret) { | 2069 | if (ret) { |
2067 | pkt_dump_sense(&cgc); | 2070 | pkt_dump_sense(pd, &cgc); |
2068 | return ret; | 2071 | return ret; |
2069 | } | 2072 | } |
2070 | size = ((unsigned int) buf[0]<<8) + buf[1] + 2; | 2073 | size = ((unsigned int) buf[0]<<8) + buf[1] + 2; |
@@ -2079,16 +2082,16 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, | |||
2079 | cgc.cmd[8] = size; | 2082 | cgc.cmd[8] = size; |
2080 | ret = pkt_generic_packet(pd, &cgc); | 2083 | ret = pkt_generic_packet(pd, &cgc); |
2081 | if (ret) { | 2084 | if (ret) { |
2082 | pkt_dump_sense(&cgc); | 2085 | pkt_dump_sense(pd, &cgc); |
2083 | return ret; | 2086 | return ret; |
2084 | } | 2087 | } |
2085 | 2088 | ||
2086 | if (!(buf[6] & 0x40)) { | 2089 | if (!(buf[6] & 0x40)) { |
2087 | printk(DRIVER_NAME": Disc type is not CD-RW\n"); | 2090 | pkt_notice(pd, "disc type is not CD-RW\n"); |
2088 | return 1; | 2091 | return 1; |
2089 | } | 2092 | } |
2090 | if (!(buf[6] & 0x4)) { | 2093 | if (!(buf[6] & 0x4)) { |
2091 | printk(DRIVER_NAME": A1 values on media are not valid, maybe not CDRW?\n"); | 2094 | pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n"); |
2092 | return 1; | 2095 | return 1; |
2093 | } | 2096 | } |
2094 | 2097 | ||
@@ -2108,14 +2111,14 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, | |||
2108 | *speed = us_clv_to_speed[sp]; | 2111 | *speed = us_clv_to_speed[sp]; |
2109 | break; | 2112 | break; |
2110 | default: | 2113 | default: |
2111 | printk(DRIVER_NAME": Unknown disc sub-type %d\n",st); | 2114 | pkt_notice(pd, "unknown disc sub-type %d\n", st); |
2112 | return 1; | 2115 | return 1; |
2113 | } | 2116 | } |
2114 | if (*speed) { | 2117 | if (*speed) { |
2115 | printk(DRIVER_NAME": Max. media speed: %d\n",*speed); | 2118 | pkt_info(pd, "maximum media speed: %d\n", *speed); |
2116 | return 0; | 2119 | return 0; |
2117 | } else { | 2120 | } else { |
2118 | printk(DRIVER_NAME": Unknown speed %d for sub-type %d\n",sp,st); | 2121 | pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st); |
2119 | return 1; | 2122 | return 1; |
2120 | } | 2123 | } |
2121 | } | 2124 | } |
@@ -2126,7 +2129,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) | |||
2126 | struct request_sense sense; | 2129 | struct request_sense sense; |
2127 | int ret; | 2130 | int ret; |
2128 | 2131 | ||
2129 | VPRINTK(DRIVER_NAME": Performing OPC\n"); | 2132 | pkt_dbg(2, pd, "Performing OPC\n"); |
2130 | 2133 | ||
2131 | init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); | 2134 | init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); |
2132 | cgc.sense = &sense; | 2135 | cgc.sense = &sense; |
@@ -2134,7 +2137,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) | |||
2134 | cgc.cmd[0] = GPCMD_SEND_OPC; | 2137 | cgc.cmd[0] = GPCMD_SEND_OPC; |
2135 | cgc.cmd[1] = 1; | 2138 | cgc.cmd[1] = 1; |
2136 | if ((ret = pkt_generic_packet(pd, &cgc))) | 2139 | if ((ret = pkt_generic_packet(pd, &cgc))) |
2137 | pkt_dump_sense(&cgc); | 2140 | pkt_dump_sense(pd, &cgc); |
2138 | return ret; | 2141 | return ret; |
2139 | } | 2142 | } |
2140 | 2143 | ||
@@ -2144,12 +2147,12 @@ static int pkt_open_write(struct pktcdvd_device *pd) | |||
2144 | unsigned int write_speed, media_write_speed, read_speed; | 2147 | unsigned int write_speed, media_write_speed, read_speed; |
2145 | 2148 | ||
2146 | if ((ret = pkt_probe_settings(pd))) { | 2149 | if ((ret = pkt_probe_settings(pd))) { |
2147 | VPRINTK(DRIVER_NAME": %s failed probe\n", pd->name); | 2150 | pkt_dbg(2, pd, "failed probe\n"); |
2148 | return ret; | 2151 | return ret; |
2149 | } | 2152 | } |
2150 | 2153 | ||
2151 | if ((ret = pkt_set_write_settings(pd))) { | 2154 | if ((ret = pkt_set_write_settings(pd))) { |
2152 | DPRINTK(DRIVER_NAME": %s failed saving write settings\n", pd->name); | 2155 | pkt_dbg(1, pd, "failed saving write settings\n"); |
2153 | return -EIO; | 2156 | return -EIO; |
2154 | } | 2157 | } |
2155 | 2158 | ||
@@ -2161,26 +2164,26 @@ static int pkt_open_write(struct pktcdvd_device *pd) | |||
2161 | case 0x13: /* DVD-RW */ | 2164 | case 0x13: /* DVD-RW */ |
2162 | case 0x1a: /* DVD+RW */ | 2165 | case 0x1a: /* DVD+RW */ |
2163 | case 0x12: /* DVD-RAM */ | 2166 | case 0x12: /* DVD-RAM */ |
2164 | DPRINTK(DRIVER_NAME": write speed %ukB/s\n", write_speed); | 2167 | pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed); |
2165 | break; | 2168 | break; |
2166 | default: | 2169 | default: |
2167 | if ((ret = pkt_media_speed(pd, &media_write_speed))) | 2170 | if ((ret = pkt_media_speed(pd, &media_write_speed))) |
2168 | media_write_speed = 16; | 2171 | media_write_speed = 16; |
2169 | write_speed = min(write_speed, media_write_speed * 177); | 2172 | write_speed = min(write_speed, media_write_speed * 177); |
2170 | DPRINTK(DRIVER_NAME": write speed %ux\n", write_speed / 176); | 2173 | pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176); |
2171 | break; | 2174 | break; |
2172 | } | 2175 | } |
2173 | read_speed = write_speed; | 2176 | read_speed = write_speed; |
2174 | 2177 | ||
2175 | if ((ret = pkt_set_speed(pd, write_speed, read_speed))) { | 2178 | if ((ret = pkt_set_speed(pd, write_speed, read_speed))) { |
2176 | DPRINTK(DRIVER_NAME": %s couldn't set write speed\n", pd->name); | 2179 | pkt_dbg(1, pd, "couldn't set write speed\n"); |
2177 | return -EIO; | 2180 | return -EIO; |
2178 | } | 2181 | } |
2179 | pd->write_speed = write_speed; | 2182 | pd->write_speed = write_speed; |
2180 | pd->read_speed = read_speed; | 2183 | pd->read_speed = read_speed; |
2181 | 2184 | ||
2182 | if ((ret = pkt_perform_opc(pd))) { | 2185 | if ((ret = pkt_perform_opc(pd))) { |
2183 | DPRINTK(DRIVER_NAME": %s Optimum Power Calibration failed\n", pd->name); | 2186 | pkt_dbg(1, pd, "Optimum Power Calibration failed\n"); |
2184 | } | 2187 | } |
2185 | 2188 | ||
2186 | return 0; | 2189 | return 0; |
@@ -2205,7 +2208,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) | |||
2205 | goto out; | 2208 | goto out; |
2206 | 2209 | ||
2207 | if ((ret = pkt_get_last_written(pd, &lba))) { | 2210 | if ((ret = pkt_get_last_written(pd, &lba))) { |
2208 | printk(DRIVER_NAME": pkt_get_last_written failed\n"); | 2211 | pkt_err(pd, "pkt_get_last_written failed\n"); |
2209 | goto out_putdev; | 2212 | goto out_putdev; |
2210 | } | 2213 | } |
2211 | 2214 | ||
@@ -2235,11 +2238,11 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) | |||
2235 | 2238 | ||
2236 | if (write) { | 2239 | if (write) { |
2237 | if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { | 2240 | if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { |
2238 | printk(DRIVER_NAME": not enough memory for buffers\n"); | 2241 | pkt_err(pd, "not enough memory for buffers\n"); |
2239 | ret = -ENOMEM; | 2242 | ret = -ENOMEM; |
2240 | goto out_putdev; | 2243 | goto out_putdev; |
2241 | } | 2244 | } |
2242 | printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); | 2245 | pkt_info(pd, "%lukB available on disc\n", lba << 1); |
2243 | } | 2246 | } |
2244 | 2247 | ||
2245 | return 0; | 2248 | return 0; |
@@ -2257,7 +2260,7 @@ out: | |||
2257 | static void pkt_release_dev(struct pktcdvd_device *pd, int flush) | 2260 | static void pkt_release_dev(struct pktcdvd_device *pd, int flush) |
2258 | { | 2261 | { |
2259 | if (flush && pkt_flush_cache(pd)) | 2262 | if (flush && pkt_flush_cache(pd)) |
2260 | DPRINTK(DRIVER_NAME": %s not flushing cache\n", pd->name); | 2263 | pkt_dbg(1, pd, "not flushing cache\n"); |
2261 | 2264 | ||
2262 | pkt_lock_door(pd, 0); | 2265 | pkt_lock_door(pd, 0); |
2263 | 2266 | ||
@@ -2279,8 +2282,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode) | |||
2279 | struct pktcdvd_device *pd = NULL; | 2282 | struct pktcdvd_device *pd = NULL; |
2280 | int ret; | 2283 | int ret; |
2281 | 2284 | ||
2282 | VPRINTK(DRIVER_NAME": entering open\n"); | ||
2283 | |||
2284 | mutex_lock(&pktcdvd_mutex); | 2285 | mutex_lock(&pktcdvd_mutex); |
2285 | mutex_lock(&ctl_mutex); | 2286 | mutex_lock(&ctl_mutex); |
2286 | pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); | 2287 | pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); |
@@ -2315,7 +2316,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode) | |||
2315 | out_dec: | 2316 | out_dec: |
2316 | pd->refcnt--; | 2317 | pd->refcnt--; |
2317 | out: | 2318 | out: |
2318 | VPRINTK(DRIVER_NAME": failed open (%d)\n", ret); | ||
2319 | mutex_unlock(&ctl_mutex); | 2319 | mutex_unlock(&ctl_mutex); |
2320 | mutex_unlock(&pktcdvd_mutex); | 2320 | mutex_unlock(&pktcdvd_mutex); |
2321 | return ret; | 2321 | return ret; |
@@ -2360,7 +2360,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2360 | 2360 | ||
2361 | pd = q->queuedata; | 2361 | pd = q->queuedata; |
2362 | if (!pd) { | 2362 | if (!pd) { |
2363 | printk(DRIVER_NAME": %s incorrect request queue\n", bdevname(bio->bi_bdev, b)); | 2363 | pr_err("%s incorrect request queue\n", |
2364 | bdevname(bio->bi_bdev, b)); | ||
2364 | goto end_io; | 2365 | goto end_io; |
2365 | } | 2366 | } |
2366 | 2367 | ||
@@ -2382,20 +2383,20 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2382 | } | 2383 | } |
2383 | 2384 | ||
2384 | if (!test_bit(PACKET_WRITABLE, &pd->flags)) { | 2385 | if (!test_bit(PACKET_WRITABLE, &pd->flags)) { |
2385 | printk(DRIVER_NAME": WRITE for ro device %s (%llu)\n", | 2386 | pkt_notice(pd, "WRITE for ro device (%llu)\n", |
2386 | pd->name, (unsigned long long)bio->bi_sector); | 2387 | (unsigned long long)bio->bi_sector); |
2387 | goto end_io; | 2388 | goto end_io; |
2388 | } | 2389 | } |
2389 | 2390 | ||
2390 | if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) { | 2391 | if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) { |
2391 | printk(DRIVER_NAME": wrong bio size\n"); | 2392 | pkt_err(pd, "wrong bio size\n"); |
2392 | goto end_io; | 2393 | goto end_io; |
2393 | } | 2394 | } |
2394 | 2395 | ||
2395 | blk_queue_bounce(q, &bio); | 2396 | blk_queue_bounce(q, &bio); |
2396 | 2397 | ||
2397 | zone = ZONE(bio->bi_sector, pd); | 2398 | zone = get_zone(bio->bi_sector, pd); |
2398 | VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", | 2399 | pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", |
2399 | (unsigned long long)bio->bi_sector, | 2400 | (unsigned long long)bio->bi_sector, |
2400 | (unsigned long long)bio_end_sector(bio)); | 2401 | (unsigned long long)bio_end_sector(bio)); |
2401 | 2402 | ||
@@ -2405,7 +2406,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2405 | sector_t last_zone; | 2406 | sector_t last_zone; |
2406 | int first_sectors; | 2407 | int first_sectors; |
2407 | 2408 | ||
2408 | last_zone = ZONE(bio_end_sector(bio) - 1, pd); | 2409 | last_zone = get_zone(bio_end_sector(bio) - 1, pd); |
2409 | if (last_zone != zone) { | 2410 | if (last_zone != zone) { |
2410 | BUG_ON(last_zone != zone + pd->settings.size); | 2411 | BUG_ON(last_zone != zone + pd->settings.size); |
2411 | first_sectors = last_zone - bio->bi_sector; | 2412 | first_sectors = last_zone - bio->bi_sector; |
@@ -2500,7 +2501,7 @@ static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, | |||
2500 | struct bio_vec *bvec) | 2501 | struct bio_vec *bvec) |
2501 | { | 2502 | { |
2502 | struct pktcdvd_device *pd = q->queuedata; | 2503 | struct pktcdvd_device *pd = q->queuedata; |
2503 | sector_t zone = ZONE(bmd->bi_sector, pd); | 2504 | sector_t zone = get_zone(bmd->bi_sector, pd); |
2504 | int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size; | 2505 | int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size; |
2505 | int remaining = (pd->settings.size << 9) - used; | 2506 | int remaining = (pd->settings.size << 9) - used; |
2506 | int remaining2; | 2507 | int remaining2; |
@@ -2609,7 +2610,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) | |||
2609 | struct block_device *bdev; | 2610 | struct block_device *bdev; |
2610 | 2611 | ||
2611 | if (pd->pkt_dev == dev) { | 2612 | if (pd->pkt_dev == dev) { |
2612 | printk(DRIVER_NAME": Recursive setup not allowed\n"); | 2613 | pkt_err(pd, "recursive setup not allowed\n"); |
2613 | return -EBUSY; | 2614 | return -EBUSY; |
2614 | } | 2615 | } |
2615 | for (i = 0; i < MAX_WRITERS; i++) { | 2616 | for (i = 0; i < MAX_WRITERS; i++) { |
@@ -2617,11 +2618,12 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) | |||
2617 | if (!pd2) | 2618 | if (!pd2) |
2618 | continue; | 2619 | continue; |
2619 | if (pd2->bdev->bd_dev == dev) { | 2620 | if (pd2->bdev->bd_dev == dev) { |
2620 | printk(DRIVER_NAME": %s already setup\n", bdevname(pd2->bdev, b)); | 2621 | pkt_err(pd, "%s already setup\n", |
2622 | bdevname(pd2->bdev, b)); | ||
2621 | return -EBUSY; | 2623 | return -EBUSY; |
2622 | } | 2624 | } |
2623 | if (pd2->pkt_dev == dev) { | 2625 | if (pd2->pkt_dev == dev) { |
2624 | printk(DRIVER_NAME": Can't chain pktcdvd devices\n"); | 2626 | pkt_err(pd, "can't chain pktcdvd devices\n"); |
2625 | return -EBUSY; | 2627 | return -EBUSY; |
2626 | } | 2628 | } |
2627 | } | 2629 | } |
@@ -2644,13 +2646,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) | |||
2644 | atomic_set(&pd->cdrw.pending_bios, 0); | 2646 | atomic_set(&pd->cdrw.pending_bios, 0); |
2645 | pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); | 2647 | pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); |
2646 | if (IS_ERR(pd->cdrw.thread)) { | 2648 | if (IS_ERR(pd->cdrw.thread)) { |
2647 | printk(DRIVER_NAME": can't start kernel thread\n"); | 2649 | pkt_err(pd, "can't start kernel thread\n"); |
2648 | ret = -ENOMEM; | 2650 | ret = -ENOMEM; |
2649 | goto out_mem; | 2651 | goto out_mem; |
2650 | } | 2652 | } |
2651 | 2653 | ||
2652 | proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd); | 2654 | proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd); |
2653 | DPRINTK(DRIVER_NAME": writer %s mapped to %s\n", pd->name, bdevname(bdev, b)); | 2655 | pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b)); |
2654 | return 0; | 2656 | return 0; |
2655 | 2657 | ||
2656 | out_mem: | 2658 | out_mem: |
@@ -2665,8 +2667,8 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
2665 | struct pktcdvd_device *pd = bdev->bd_disk->private_data; | 2667 | struct pktcdvd_device *pd = bdev->bd_disk->private_data; |
2666 | int ret; | 2668 | int ret; |
2667 | 2669 | ||
2668 | VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, | 2670 | pkt_dbg(2, pd, "cmd %x, dev %d:%d\n", |
2669 | MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); | 2671 | cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); |
2670 | 2672 | ||
2671 | mutex_lock(&pktcdvd_mutex); | 2673 | mutex_lock(&pktcdvd_mutex); |
2672 | switch (cmd) { | 2674 | switch (cmd) { |
@@ -2690,7 +2692,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
2690 | break; | 2692 | break; |
2691 | 2693 | ||
2692 | default: | 2694 | default: |
2693 | VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd); | 2695 | pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); |
2694 | ret = -ENOTTY; | 2696 | ret = -ENOTTY; |
2695 | } | 2697 | } |
2696 | mutex_unlock(&pktcdvd_mutex); | 2698 | mutex_unlock(&pktcdvd_mutex); |
@@ -2743,7 +2745,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) | |||
2743 | if (!pkt_devs[idx]) | 2745 | if (!pkt_devs[idx]) |
2744 | break; | 2746 | break; |
2745 | if (idx == MAX_WRITERS) { | 2747 | if (idx == MAX_WRITERS) { |
2746 | printk(DRIVER_NAME": max %d writers supported\n", MAX_WRITERS); | 2748 | pr_err("max %d writers supported\n", MAX_WRITERS); |
2747 | ret = -EBUSY; | 2749 | ret = -EBUSY; |
2748 | goto out_mutex; | 2750 | goto out_mutex; |
2749 | } | 2751 | } |
@@ -2818,7 +2820,7 @@ out_mem: | |||
2818 | kfree(pd); | 2820 | kfree(pd); |
2819 | out_mutex: | 2821 | out_mutex: |
2820 | mutex_unlock(&ctl_mutex); | 2822 | mutex_unlock(&ctl_mutex); |
2821 | printk(DRIVER_NAME": setup of pktcdvd device failed\n"); | 2823 | pr_err("setup of pktcdvd device failed\n"); |
2822 | return ret; | 2824 | return ret; |
2823 | } | 2825 | } |
2824 | 2826 | ||
@@ -2839,7 +2841,7 @@ static int pkt_remove_dev(dev_t pkt_dev) | |||
2839 | break; | 2841 | break; |
2840 | } | 2842 | } |
2841 | if (idx == MAX_WRITERS) { | 2843 | if (idx == MAX_WRITERS) { |
2842 | DPRINTK(DRIVER_NAME": dev not setup\n"); | 2844 | pr_debug("dev not setup\n"); |
2843 | ret = -ENXIO; | 2845 | ret = -ENXIO; |
2844 | goto out; | 2846 | goto out; |
2845 | } | 2847 | } |
@@ -2859,7 +2861,7 @@ static int pkt_remove_dev(dev_t pkt_dev) | |||
2859 | blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); | 2861 | blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); |
2860 | 2862 | ||
2861 | remove_proc_entry(pd->name, pkt_proc); | 2863 | remove_proc_entry(pd->name, pkt_proc); |
2862 | DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name); | 2864 | pkt_dbg(1, pd, "writer unmapped\n"); |
2863 | 2865 | ||
2864 | del_gendisk(pd->disk); | 2866 | del_gendisk(pd->disk); |
2865 | blk_cleanup_queue(pd->disk->queue); | 2867 | blk_cleanup_queue(pd->disk->queue); |
@@ -2969,7 +2971,7 @@ static int __init pkt_init(void) | |||
2969 | 2971 | ||
2970 | ret = register_blkdev(pktdev_major, DRIVER_NAME); | 2972 | ret = register_blkdev(pktdev_major, DRIVER_NAME); |
2971 | if (ret < 0) { | 2973 | if (ret < 0) { |
2972 | printk(DRIVER_NAME": Unable to register block device\n"); | 2974 | pr_err("unable to register block device\n"); |
2973 | goto out2; | 2975 | goto out2; |
2974 | } | 2976 | } |
2975 | if (!pktdev_major) | 2977 | if (!pktdev_major) |
@@ -2983,7 +2985,7 @@ static int __init pkt_init(void) | |||
2983 | 2985 | ||
2984 | ret = misc_register(&pkt_misc); | 2986 | ret = misc_register(&pkt_misc); |
2985 | if (ret) { | 2987 | if (ret) { |
2986 | printk(DRIVER_NAME": Unable to register misc device\n"); | 2988 | pr_err("unable to register misc device\n"); |
2987 | goto out_misc; | 2989 | goto out_misc; |
2988 | } | 2990 | } |
2989 | 2991 | ||
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 39c51cc7fabc..b22a7d0fe5b7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -5132,7 +5132,7 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
5132 | bool already = false; | 5132 | bool already = false; |
5133 | int ret; | 5133 | int ret; |
5134 | 5134 | ||
5135 | ret = strict_strtoul(buf, 10, &ul); | 5135 | ret = kstrtoul(buf, 10, &ul); |
5136 | if (ret) | 5136 | if (ret) |
5137 | return ret; | 5137 | return ret; |
5138 | 5138 | ||
diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 8ed6ccb748cf..b02d53a399f3 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c | |||
@@ -924,7 +924,6 @@ static int swim_probe(struct platform_device *dev) | |||
924 | return 0; | 924 | return 0; |
925 | 925 | ||
926 | out_kfree: | 926 | out_kfree: |
927 | platform_set_drvdata(dev, NULL); | ||
928 | kfree(swd); | 927 | kfree(swd); |
929 | out_iounmap: | 928 | out_iounmap: |
930 | iounmap(swim_base); | 929 | iounmap(swim_base); |
@@ -962,7 +961,6 @@ static int swim_remove(struct platform_device *dev) | |||
962 | if (res) | 961 | if (res) |
963 | release_mem_region(res->start, resource_size(res)); | 962 | release_mem_region(res->start, resource_size(res)); |
964 | 963 | ||
965 | platform_set_drvdata(dev, NULL); | ||
966 | kfree(swd); | 964 | kfree(swd); |
967 | 965 | ||
968 | return 0; | 966 | return 0; |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index fe5c3cd10c34..c2014a0aa206 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -620,7 +620,7 @@ static void backend_changed(struct xenbus_watch *watch, | |||
620 | } | 620 | } |
621 | 621 | ||
622 | /* Front end dir is a number, which is used as the handle. */ | 622 | /* Front end dir is a number, which is used as the handle. */ |
623 | err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); | 623 | err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); |
624 | if (err) | 624 | if (err) |
625 | return; | 625 | return; |
626 | 626 | ||
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 4519cb332987..5796d0157ce0 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c | |||
@@ -766,6 +766,25 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip) | |||
766 | } | 766 | } |
767 | #endif | 767 | #endif |
768 | 768 | ||
769 | #ifdef CONFIG_PM_SLEEP | ||
770 | static int tpm_tis_resume(struct device *dev) | ||
771 | { | ||
772 | struct tpm_chip *chip = dev_get_drvdata(dev); | ||
773 | int ret; | ||
774 | |||
775 | if (chip->vendor.irq) | ||
776 | tpm_tis_reenable_interrupts(chip); | ||
777 | |||
778 | ret = tpm_pm_resume(dev); | ||
779 | if (!ret) | ||
780 | tpm_do_selftest(chip); | ||
781 | |||
782 | return ret; | ||
783 | } | ||
784 | #endif | ||
785 | |||
786 | static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume); | ||
787 | |||
769 | #ifdef CONFIG_PNP | 788 | #ifdef CONFIG_PNP |
770 | static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, | 789 | static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, |
771 | const struct pnp_device_id *pnp_id) | 790 | const struct pnp_device_id *pnp_id) |
@@ -787,26 +806,6 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, | |||
787 | return tpm_tis_init(&pnp_dev->dev, start, len, irq); | 806 | return tpm_tis_init(&pnp_dev->dev, start, len, irq); |
788 | } | 807 | } |
789 | 808 | ||
790 | static int tpm_tis_pnp_suspend(struct pnp_dev *dev, pm_message_t msg) | ||
791 | { | ||
792 | return tpm_pm_suspend(&dev->dev); | ||
793 | } | ||
794 | |||
795 | static int tpm_tis_pnp_resume(struct pnp_dev *dev) | ||
796 | { | ||
797 | struct tpm_chip *chip = pnp_get_drvdata(dev); | ||
798 | int ret; | ||
799 | |||
800 | if (chip->vendor.irq) | ||
801 | tpm_tis_reenable_interrupts(chip); | ||
802 | |||
803 | ret = tpm_pm_resume(&dev->dev); | ||
804 | if (!ret) | ||
805 | tpm_do_selftest(chip); | ||
806 | |||
807 | return ret; | ||
808 | } | ||
809 | |||
810 | static struct pnp_device_id tpm_pnp_tbl[] = { | 809 | static struct pnp_device_id tpm_pnp_tbl[] = { |
811 | {"PNP0C31", 0}, /* TPM */ | 810 | {"PNP0C31", 0}, /* TPM */ |
812 | {"ATM1200", 0}, /* Atmel */ | 811 | {"ATM1200", 0}, /* Atmel */ |
@@ -835,9 +834,12 @@ static struct pnp_driver tis_pnp_driver = { | |||
835 | .name = "tpm_tis", | 834 | .name = "tpm_tis", |
836 | .id_table = tpm_pnp_tbl, | 835 | .id_table = tpm_pnp_tbl, |
837 | .probe = tpm_tis_pnp_init, | 836 | .probe = tpm_tis_pnp_init, |
838 | .suspend = tpm_tis_pnp_suspend, | ||
839 | .resume = tpm_tis_pnp_resume, | ||
840 | .remove = tpm_tis_pnp_remove, | 837 | .remove = tpm_tis_pnp_remove, |
838 | #ifdef CONFIG_PM_SLEEP | ||
839 | .driver = { | ||
840 | .pm = &tpm_tis_pm, | ||
841 | }, | ||
842 | #endif | ||
841 | }; | 843 | }; |
842 | 844 | ||
843 | #define TIS_HID_USR_IDX sizeof(tpm_pnp_tbl)/sizeof(struct pnp_device_id) -2 | 845 | #define TIS_HID_USR_IDX sizeof(tpm_pnp_tbl)/sizeof(struct pnp_device_id) -2 |
@@ -846,20 +848,6 @@ module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id, | |||
846 | MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); | 848 | MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); |
847 | #endif | 849 | #endif |
848 | 850 | ||
849 | #ifdef CONFIG_PM_SLEEP | ||
850 | static int tpm_tis_resume(struct device *dev) | ||
851 | { | ||
852 | struct tpm_chip *chip = dev_get_drvdata(dev); | ||
853 | |||
854 | if (chip->vendor.irq) | ||
855 | tpm_tis_reenable_interrupts(chip); | ||
856 | |||
857 | return tpm_pm_resume(dev); | ||
858 | } | ||
859 | #endif | ||
860 | |||
861 | static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume); | ||
862 | |||
863 | static struct platform_driver tis_drv = { | 851 | static struct platform_driver tis_drv = { |
864 | .driver = { | 852 | .driver = { |
865 | .name = "tpm_tis", | 853 | .name = "tpm_tis", |
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 232fa8fce26a..fa0affb699b4 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c | |||
@@ -14,7 +14,7 @@ | |||
14 | * of and an antecedent to, SMBIOS, which stands for System | 14 | * of and an antecedent to, SMBIOS, which stands for System |
15 | * Management BIOS. See further: http://www.dmtf.org/standards | 15 | * Management BIOS. See further: http://www.dmtf.org/standards |
16 | */ | 16 | */ |
17 | static char dmi_empty_string[] = " "; | 17 | static const char dmi_empty_string[] = " "; |
18 | 18 | ||
19 | static u16 __initdata dmi_ver; | 19 | static u16 __initdata dmi_ver; |
20 | /* | 20 | /* |
@@ -49,7 +49,7 @@ static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) | |||
49 | return ""; | 49 | return ""; |
50 | } | 50 | } |
51 | 51 | ||
52 | static char * __init dmi_string(const struct dmi_header *dm, u8 s) | 52 | static const char * __init dmi_string(const struct dmi_header *dm, u8 s) |
53 | { | 53 | { |
54 | const char *bp = dmi_string_nosave(dm, s); | 54 | const char *bp = dmi_string_nosave(dm, s); |
55 | char *str; | 55 | char *str; |
@@ -62,8 +62,6 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s) | |||
62 | str = dmi_alloc(len); | 62 | str = dmi_alloc(len); |
63 | if (str != NULL) | 63 | if (str != NULL) |
64 | strcpy(str, bp); | 64 | strcpy(str, bp); |
65 | else | ||
66 | printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len); | ||
67 | 65 | ||
68 | return str; | 66 | return str; |
69 | } | 67 | } |
@@ -133,17 +131,18 @@ static int __init dmi_checksum(const u8 *buf, u8 len) | |||
133 | return sum == 0; | 131 | return sum == 0; |
134 | } | 132 | } |
135 | 133 | ||
136 | static char *dmi_ident[DMI_STRING_MAX]; | 134 | static const char *dmi_ident[DMI_STRING_MAX]; |
137 | static LIST_HEAD(dmi_devices); | 135 | static LIST_HEAD(dmi_devices); |
138 | int dmi_available; | 136 | int dmi_available; |
139 | 137 | ||
140 | /* | 138 | /* |
141 | * Save a DMI string | 139 | * Save a DMI string |
142 | */ | 140 | */ |
143 | static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int string) | 141 | static void __init dmi_save_ident(const struct dmi_header *dm, int slot, |
142 | int string) | ||
144 | { | 143 | { |
145 | const char *d = (const char*) dm; | 144 | const char *d = (const char *) dm; |
146 | char *p; | 145 | const char *p; |
147 | 146 | ||
148 | if (dmi_ident[slot]) | 147 | if (dmi_ident[slot]) |
149 | return; | 148 | return; |
@@ -155,9 +154,10 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int str | |||
155 | dmi_ident[slot] = p; | 154 | dmi_ident[slot] = p; |
156 | } | 155 | } |
157 | 156 | ||
158 | static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int index) | 157 | static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, |
158 | int index) | ||
159 | { | 159 | { |
160 | const u8 *d = (u8*) dm + index; | 160 | const u8 *d = (u8 *) dm + index; |
161 | char *s; | 161 | char *s; |
162 | int is_ff = 1, is_00 = 1, i; | 162 | int is_ff = 1, is_00 = 1, i; |
163 | 163 | ||
@@ -188,12 +188,13 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde | |||
188 | else | 188 | else |
189 | sprintf(s, "%pUB", d); | 189 | sprintf(s, "%pUB", d); |
190 | 190 | ||
191 | dmi_ident[slot] = s; | 191 | dmi_ident[slot] = s; |
192 | } | 192 | } |
193 | 193 | ||
194 | static void __init dmi_save_type(const struct dmi_header *dm, int slot, int index) | 194 | static void __init dmi_save_type(const struct dmi_header *dm, int slot, |
195 | int index) | ||
195 | { | 196 | { |
196 | const u8 *d = (u8*) dm + index; | 197 | const u8 *d = (u8 *) dm + index; |
197 | char *s; | 198 | char *s; |
198 | 199 | ||
199 | if (dmi_ident[slot]) | 200 | if (dmi_ident[slot]) |
@@ -216,10 +217,8 @@ static void __init dmi_save_one_device(int type, const char *name) | |||
216 | return; | 217 | return; |
217 | 218 | ||
218 | dev = dmi_alloc(sizeof(*dev) + strlen(name) + 1); | 219 | dev = dmi_alloc(sizeof(*dev) + strlen(name) + 1); |
219 | if (!dev) { | 220 | if (!dev) |
220 | printk(KERN_ERR "dmi_save_one_device: out of memory.\n"); | ||
221 | return; | 221 | return; |
222 | } | ||
223 | 222 | ||
224 | dev->type = type; | 223 | dev->type = type; |
225 | strcpy((char *)(dev + 1), name); | 224 | strcpy((char *)(dev + 1), name); |
@@ -249,17 +248,14 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm) | |||
249 | struct dmi_device *dev; | 248 | struct dmi_device *dev; |
250 | 249 | ||
251 | for (i = 1; i <= count; i++) { | 250 | for (i = 1; i <= count; i++) { |
252 | char *devname = dmi_string(dm, i); | 251 | const char *devname = dmi_string(dm, i); |
253 | 252 | ||
254 | if (devname == dmi_empty_string) | 253 | if (devname == dmi_empty_string) |
255 | continue; | 254 | continue; |
256 | 255 | ||
257 | dev = dmi_alloc(sizeof(*dev)); | 256 | dev = dmi_alloc(sizeof(*dev)); |
258 | if (!dev) { | 257 | if (!dev) |
259 | printk(KERN_ERR | ||
260 | "dmi_save_oem_strings_devices: out of memory.\n"); | ||
261 | break; | 258 | break; |
262 | } | ||
263 | 259 | ||
264 | dev->type = DMI_DEV_TYPE_OEM_STRING; | 260 | dev->type = DMI_DEV_TYPE_OEM_STRING; |
265 | dev->name = devname; | 261 | dev->name = devname; |
@@ -272,21 +268,17 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm) | |||
272 | static void __init dmi_save_ipmi_device(const struct dmi_header *dm) | 268 | static void __init dmi_save_ipmi_device(const struct dmi_header *dm) |
273 | { | 269 | { |
274 | struct dmi_device *dev; | 270 | struct dmi_device *dev; |
275 | void * data; | 271 | void *data; |
276 | 272 | ||
277 | data = dmi_alloc(dm->length); | 273 | data = dmi_alloc(dm->length); |
278 | if (data == NULL) { | 274 | if (data == NULL) |
279 | printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); | ||
280 | return; | 275 | return; |
281 | } | ||
282 | 276 | ||
283 | memcpy(data, dm, dm->length); | 277 | memcpy(data, dm, dm->length); |
284 | 278 | ||
285 | dev = dmi_alloc(sizeof(*dev)); | 279 | dev = dmi_alloc(sizeof(*dev)); |
286 | if (!dev) { | 280 | if (!dev) |
287 | printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); | ||
288 | return; | 281 | return; |
289 | } | ||
290 | 282 | ||
291 | dev->type = DMI_DEV_TYPE_IPMI; | 283 | dev->type = DMI_DEV_TYPE_IPMI; |
292 | dev->name = "IPMI controller"; | 284 | dev->name = "IPMI controller"; |
@@ -301,10 +293,9 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus, | |||
301 | struct dmi_dev_onboard *onboard_dev; | 293 | struct dmi_dev_onboard *onboard_dev; |
302 | 294 | ||
303 | onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1); | 295 | onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1); |
304 | if (!onboard_dev) { | 296 | if (!onboard_dev) |
305 | printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n"); | ||
306 | return; | 297 | return; |
307 | } | 298 | |
308 | onboard_dev->instance = instance; | 299 | onboard_dev->instance = instance; |
309 | onboard_dev->segment = segment; | 300 | onboard_dev->segment = segment; |
310 | onboard_dev->bus = bus; | 301 | onboard_dev->bus = bus; |
@@ -320,7 +311,7 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus, | |||
320 | 311 | ||
321 | static void __init dmi_save_extended_devices(const struct dmi_header *dm) | 312 | static void __init dmi_save_extended_devices(const struct dmi_header *dm) |
322 | { | 313 | { |
323 | const u8 *d = (u8*) dm + 5; | 314 | const u8 *d = (u8 *) dm + 5; |
324 | 315 | ||
325 | /* Skip disabled device */ | 316 | /* Skip disabled device */ |
326 | if ((*d & 0x80) == 0) | 317 | if ((*d & 0x80) == 0) |
@@ -338,7 +329,7 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm) | |||
338 | */ | 329 | */ |
339 | static void __init dmi_decode(const struct dmi_header *dm, void *dummy) | 330 | static void __init dmi_decode(const struct dmi_header *dm, void *dummy) |
340 | { | 331 | { |
341 | switch(dm->type) { | 332 | switch (dm->type) { |
342 | case 0: /* BIOS Information */ | 333 | case 0: /* BIOS Information */ |
343 | dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); | 334 | dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); |
344 | dmi_save_ident(dm, DMI_BIOS_VERSION, 5); | 335 | dmi_save_ident(dm, DMI_BIOS_VERSION, 5); |
@@ -502,13 +493,7 @@ void __init dmi_scan_machine(void) | |||
502 | dmi_available = 1; | 493 | dmi_available = 1; |
503 | goto out; | 494 | goto out; |
504 | } | 495 | } |
505 | } | 496 | } else { |
506 | else { | ||
507 | /* | ||
508 | * no iounmap() for that ioremap(); it would be a no-op, but | ||
509 | * it's so early in setup that sucker gets confused into doing | ||
510 | * what it shouldn't if we actually call it. | ||
511 | */ | ||
512 | p = dmi_ioremap(0xF0000, 0x10000); | 497 | p = dmi_ioremap(0xF0000, 0x10000); |
513 | if (p == NULL) | 498 | if (p == NULL) |
514 | goto error; | 499 | goto error; |
@@ -533,7 +518,7 @@ void __init dmi_scan_machine(void) | |||
533 | dmi_iounmap(p, 0x10000); | 518 | dmi_iounmap(p, 0x10000); |
534 | } | 519 | } |
535 | error: | 520 | error: |
536 | printk(KERN_INFO "DMI not present or invalid.\n"); | 521 | pr_info("DMI not present or invalid.\n"); |
537 | out: | 522 | out: |
538 | dmi_initialized = 1; | 523 | dmi_initialized = 1; |
539 | } | 524 | } |
@@ -669,7 +654,7 @@ int dmi_name_in_serial(const char *str) | |||
669 | 654 | ||
670 | /** | 655 | /** |
671 | * dmi_name_in_vendors - Check if string is in the DMI system or board vendor name | 656 | * dmi_name_in_vendors - Check if string is in the DMI system or board vendor name |
672 | * @str: Case sensitive Name | 657 | * @str: Case sensitive Name |
673 | */ | 658 | */ |
674 | int dmi_name_in_vendors(const char *str) | 659 | int dmi_name_in_vendors(const char *str) |
675 | { | 660 | { |
@@ -696,13 +681,13 @@ EXPORT_SYMBOL(dmi_name_in_vendors); | |||
696 | * A new search is initiated by passing %NULL as the @from argument. | 681 | * A new search is initiated by passing %NULL as the @from argument. |
697 | * If @from is not %NULL, searches continue from next device. | 682 | * If @from is not %NULL, searches continue from next device. |
698 | */ | 683 | */ |
699 | const struct dmi_device * dmi_find_device(int type, const char *name, | 684 | const struct dmi_device *dmi_find_device(int type, const char *name, |
700 | const struct dmi_device *from) | 685 | const struct dmi_device *from) |
701 | { | 686 | { |
702 | const struct list_head *head = from ? &from->list : &dmi_devices; | 687 | const struct list_head *head = from ? &from->list : &dmi_devices; |
703 | struct list_head *d; | 688 | struct list_head *d; |
704 | 689 | ||
705 | for(d = head->next; d != &dmi_devices; d = d->next) { | 690 | for (d = head->next; d != &dmi_devices; d = d->next) { |
706 | const struct dmi_device *dev = | 691 | const struct dmi_device *dev = |
707 | list_entry(d, struct dmi_device, list); | 692 | list_entry(d, struct dmi_device, list); |
708 | 693 | ||
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index acba0b9f4406..6eb535ffeddc 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c | |||
@@ -525,7 +525,7 @@ static ssize_t gsmi_clear_eventlog_store(struct kobject *kobj, | |||
525 | u32 data_type; | 525 | u32 data_type; |
526 | } param; | 526 | } param; |
527 | 527 | ||
528 | rc = strict_strtoul(buf, 0, &val); | 528 | rc = kstrtoul(buf, 0, &val); |
529 | if (rc) | 529 | if (rc) |
530 | return rc; | 530 | return rc; |
531 | 531 | ||
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c index 0a1c9626aa9e..08ba4972da9d 100644 --- a/drivers/iommu/msm_iommu_dev.c +++ b/drivers/iommu/msm_iommu_dev.c | |||
@@ -282,7 +282,6 @@ static int msm_iommu_remove(struct platform_device *pdev) | |||
282 | clk_put(drv->pclk); | 282 | clk_put(drv->pclk); |
283 | memset(drv, 0, sizeof(*drv)); | 283 | memset(drv, 0, sizeof(*drv)); |
284 | kfree(drv); | 284 | kfree(drv); |
285 | platform_set_drvdata(pdev, NULL); | ||
286 | } | 285 | } |
287 | return 0; | 286 | return 0; |
288 | } | 287 | } |
@@ -366,7 +365,6 @@ static int msm_iommu_ctx_remove(struct platform_device *pdev) | |||
366 | if (drv) { | 365 | if (drv) { |
367 | memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata)); | 366 | memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata)); |
368 | kfree(drv); | 367 | kfree(drv); |
369 | platform_set_drvdata(pdev, NULL); | ||
370 | } | 368 | } |
371 | return 0; | 369 | return 0; |
372 | } | 370 | } |
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 0ba3766240d5..bcd78a720630 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c | |||
@@ -1008,8 +1008,6 @@ static int omap_iommu_remove(struct platform_device *pdev) | |||
1008 | struct resource *res; | 1008 | struct resource *res; |
1009 | struct omap_iommu *obj = platform_get_drvdata(pdev); | 1009 | struct omap_iommu *obj = platform_get_drvdata(pdev); |
1010 | 1010 | ||
1011 | platform_set_drvdata(pdev, NULL); | ||
1012 | |||
1013 | iopgtable_clear_entry_all(obj); | 1011 | iopgtable_clear_entry_all(obj); |
1014 | 1012 | ||
1015 | irq = platform_get_irq(pdev, 0); | 1013 | irq = platform_get_irq(pdev, 0); |
diff --git a/drivers/memstick/core/Kconfig b/drivers/memstick/core/Kconfig index 95f1814b5368..1d389491d5fd 100644 --- a/drivers/memstick/core/Kconfig +++ b/drivers/memstick/core/Kconfig | |||
@@ -24,3 +24,15 @@ config MSPRO_BLOCK | |||
24 | support. This provides a block device driver, which you can use | 24 | support. This provides a block device driver, which you can use |
25 | to mount the filesystem. Almost everyone wishing MemoryStick | 25 | to mount the filesystem. Almost everyone wishing MemoryStick |
26 | support should say Y or M here. | 26 | support should say Y or M here. |
27 | |||
28 | config MS_BLOCK | ||
29 | tristate "MemoryStick Standard device driver" | ||
30 | depends on BLOCK | ||
31 | help | ||
32 | Say Y here to enable the MemoryStick Standard device driver | ||
33 | support. This provides a block device driver, which you can use | ||
34 | to mount the filesystem. | ||
35 | This driver works with old (bulky) MemoryStick and MemoryStick Duo | ||
36 | but not PRO. Say Y if you have such card. | ||
37 | Driver is new and not yet well tested, thus it can damage your card | ||
38 | (even permanently) | ||
diff --git a/drivers/memstick/core/Makefile b/drivers/memstick/core/Makefile index ecd029937738..0d7f90c0ff25 100644 --- a/drivers/memstick/core/Makefile +++ b/drivers/memstick/core/Makefile | |||
@@ -3,5 +3,5 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_MEMSTICK) += memstick.o | 5 | obj-$(CONFIG_MEMSTICK) += memstick.o |
6 | 6 | obj-$(CONFIG_MS_BLOCK) += ms_block.o | |
7 | obj-$(CONFIG_MSPRO_BLOCK) += mspro_block.o | 7 | obj-$(CONFIG_MSPRO_BLOCK) += mspro_block.o |
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c new file mode 100644 index 000000000000..08e70232062f --- /dev/null +++ b/drivers/memstick/core/ms_block.c | |||
@@ -0,0 +1,2385 @@ | |||
1 | /* | ||
2 | * ms_block.c - Sony MemoryStick (legacy) storage support | ||
3 | |||
4 | * Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * Minor portions of the driver were copied from mspro_block.c which is | ||
11 | * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com> | ||
12 | * | ||
13 | */ | ||
14 | #define DRIVER_NAME "ms_block" | ||
15 | #define pr_fmt(fmt) DRIVER_NAME ": " fmt | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/memstick.h> | ||
20 | #include <linux/idr.h> | ||
21 | #include <linux/hdreg.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/random.h> | ||
25 | #include <linux/bitmap.h> | ||
26 | #include <linux/scatterlist.h> | ||
27 | #include <linux/jiffies.h> | ||
28 | #include <linux/workqueue.h> | ||
29 | #include <linux/mutex.h> | ||
30 | #include "ms_block.h" | ||
31 | |||
32 | static int debug; | ||
33 | static int cache_flush_timeout = 1000; | ||
34 | static bool verify_writes; | ||
35 | |||
36 | /* | ||
37 | * Copies section of 'sg_from' starting from offset 'offset' and with length | ||
38 | * 'len' To another scatterlist of to_nents enties | ||
39 | */ | ||
40 | static size_t msb_sg_copy(struct scatterlist *sg_from, | ||
41 | struct scatterlist *sg_to, int to_nents, size_t offset, size_t len) | ||
42 | { | ||
43 | size_t copied = 0; | ||
44 | |||
45 | while (offset > 0) { | ||
46 | if (offset >= sg_from->length) { | ||
47 | if (sg_is_last(sg_from)) | ||
48 | return 0; | ||
49 | |||
50 | offset -= sg_from->length; | ||
51 | sg_from = sg_next(sg_from); | ||
52 | continue; | ||
53 | } | ||
54 | |||
55 | copied = min(len, sg_from->length - offset); | ||
56 | sg_set_page(sg_to, sg_page(sg_from), | ||
57 | copied, sg_from->offset + offset); | ||
58 | |||
59 | len -= copied; | ||
60 | offset = 0; | ||
61 | |||
62 | if (sg_is_last(sg_from) || !len) | ||
63 | goto out; | ||
64 | |||
65 | sg_to = sg_next(sg_to); | ||
66 | to_nents--; | ||
67 | sg_from = sg_next(sg_from); | ||
68 | } | ||
69 | |||
70 | while (len > sg_from->length && to_nents--) { | ||
71 | len -= sg_from->length; | ||
72 | copied += sg_from->length; | ||
73 | |||
74 | sg_set_page(sg_to, sg_page(sg_from), | ||
75 | sg_from->length, sg_from->offset); | ||
76 | |||
77 | if (sg_is_last(sg_from) || !len) | ||
78 | goto out; | ||
79 | |||
80 | sg_from = sg_next(sg_from); | ||
81 | sg_to = sg_next(sg_to); | ||
82 | } | ||
83 | |||
84 | if (len && to_nents) { | ||
85 | sg_set_page(sg_to, sg_page(sg_from), len, sg_from->offset); | ||
86 | copied += len; | ||
87 | } | ||
88 | out: | ||
89 | sg_mark_end(sg_to); | ||
90 | return copied; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Compares section of 'sg' starting from offset 'offset' and with length 'len' | ||
95 | * to linear buffer of length 'len' at address 'buffer' | ||
96 | * Returns 0 if equal and -1 otherwice | ||
97 | */ | ||
98 | static int msb_sg_compare_to_buffer(struct scatterlist *sg, | ||
99 | size_t offset, u8 *buffer, size_t len) | ||
100 | { | ||
101 | int retval = 0, cmplen; | ||
102 | struct sg_mapping_iter miter; | ||
103 | |||
104 | sg_miter_start(&miter, sg, sg_nents(sg), | ||
105 | SG_MITER_ATOMIC | SG_MITER_FROM_SG); | ||
106 | |||
107 | while (sg_miter_next(&miter) && len > 0) { | ||
108 | if (offset >= miter.length) { | ||
109 | offset -= miter.length; | ||
110 | continue; | ||
111 | } | ||
112 | |||
113 | cmplen = min(miter.length - offset, len); | ||
114 | retval = memcmp(miter.addr + offset, buffer, cmplen) ? -1 : 0; | ||
115 | if (retval) | ||
116 | break; | ||
117 | |||
118 | buffer += cmplen; | ||
119 | len -= cmplen; | ||
120 | offset = 0; | ||
121 | } | ||
122 | |||
123 | if (!retval && len) | ||
124 | retval = -1; | ||
125 | |||
126 | sg_miter_stop(&miter); | ||
127 | return retval; | ||
128 | } | ||
129 | |||
130 | |||
131 | /* Get zone at which block with logical address 'lba' lives | ||
132 | * Flash is broken into zones. | ||
133 | * Each zone consists of 512 eraseblocks, out of which in first | ||
134 | * zone 494 are used and 496 are for all following zones. | ||
135 | * Therefore zone #0 hosts blocks 0-493, zone #1 blocks 494-988, etc... | ||
136 | */ | ||
137 | static int msb_get_zone_from_lba(int lba) | ||
138 | { | ||
139 | if (lba < 494) | ||
140 | return 0; | ||
141 | return ((lba - 494) / 496) + 1; | ||
142 | } | ||
143 | |||
144 | /* Get zone of physical block. Trivial */ | ||
145 | static int msb_get_zone_from_pba(int pba) | ||
146 | { | ||
147 | return pba / MS_BLOCKS_IN_ZONE; | ||
148 | } | ||
149 | |||
150 | /* Debug test to validate free block counts */ | ||
151 | static int msb_validate_used_block_bitmap(struct msb_data *msb) | ||
152 | { | ||
153 | int total_free_blocks = 0; | ||
154 | int i; | ||
155 | |||
156 | if (!debug) | ||
157 | return 0; | ||
158 | |||
159 | for (i = 0; i < msb->zone_count; i++) | ||
160 | total_free_blocks += msb->free_block_count[i]; | ||
161 | |||
162 | if (msb->block_count - bitmap_weight(msb->used_blocks_bitmap, | ||
163 | msb->block_count) == total_free_blocks) | ||
164 | return 0; | ||
165 | |||
166 | pr_err("BUG: free block counts don't match the bitmap"); | ||
167 | msb->read_only = true; | ||
168 | return -EINVAL; | ||
169 | } | ||
170 | |||
171 | /* Mark physical block as used */ | ||
172 | static void msb_mark_block_used(struct msb_data *msb, int pba) | ||
173 | { | ||
174 | int zone = msb_get_zone_from_pba(pba); | ||
175 | |||
176 | if (test_bit(pba, msb->used_blocks_bitmap)) { | ||
177 | pr_err( | ||
178 | "BUG: attempt to mark already used pba %d as used", pba); | ||
179 | msb->read_only = true; | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | if (msb_validate_used_block_bitmap(msb)) | ||
184 | return; | ||
185 | |||
186 | /* No races because all IO is single threaded */ | ||
187 | __set_bit(pba, msb->used_blocks_bitmap); | ||
188 | msb->free_block_count[zone]--; | ||
189 | } | ||
190 | |||
191 | /* Mark physical block as free */ | ||
192 | static void msb_mark_block_unused(struct msb_data *msb, int pba) | ||
193 | { | ||
194 | int zone = msb_get_zone_from_pba(pba); | ||
195 | |||
196 | if (!test_bit(pba, msb->used_blocks_bitmap)) { | ||
197 | pr_err("BUG: attempt to mark already unused pba %d as unused" , pba); | ||
198 | msb->read_only = true; | ||
199 | return; | ||
200 | } | ||
201 | |||
202 | if (msb_validate_used_block_bitmap(msb)) | ||
203 | return; | ||
204 | |||
205 | /* No races because all IO is single threaded */ | ||
206 | __clear_bit(pba, msb->used_blocks_bitmap); | ||
207 | msb->free_block_count[zone]++; | ||
208 | } | ||
209 | |||
210 | /* Invalidate current register window */ | ||
211 | static void msb_invalidate_reg_window(struct msb_data *msb) | ||
212 | { | ||
213 | msb->reg_addr.w_offset = offsetof(struct ms_register, id); | ||
214 | msb->reg_addr.w_length = sizeof(struct ms_id_register); | ||
215 | msb->reg_addr.r_offset = offsetof(struct ms_register, id); | ||
216 | msb->reg_addr.r_length = sizeof(struct ms_id_register); | ||
217 | msb->addr_valid = false; | ||
218 | } | ||
219 | |||
220 | /* Start a state machine */ | ||
221 | static int msb_run_state_machine(struct msb_data *msb, int (*state_func) | ||
222 | (struct memstick_dev *card, struct memstick_request **req)) | ||
223 | { | ||
224 | struct memstick_dev *card = msb->card; | ||
225 | |||
226 | WARN_ON(msb->state != -1); | ||
227 | msb->int_polling = false; | ||
228 | msb->state = 0; | ||
229 | msb->exit_error = 0; | ||
230 | |||
231 | memset(&card->current_mrq, 0, sizeof(card->current_mrq)); | ||
232 | |||
233 | card->next_request = state_func; | ||
234 | memstick_new_req(card->host); | ||
235 | wait_for_completion(&card->mrq_complete); | ||
236 | |||
237 | WARN_ON(msb->state != -1); | ||
238 | return msb->exit_error; | ||
239 | } | ||
240 | |||
241 | /* State machines call that to exit */ | ||
242 | static int msb_exit_state_machine(struct msb_data *msb, int error) | ||
243 | { | ||
244 | WARN_ON(msb->state == -1); | ||
245 | |||
246 | msb->state = -1; | ||
247 | msb->exit_error = error; | ||
248 | msb->card->next_request = h_msb_default_bad; | ||
249 | |||
250 | /* Invalidate reg window on errors */ | ||
251 | if (error) | ||
252 | msb_invalidate_reg_window(msb); | ||
253 | |||
254 | complete(&msb->card->mrq_complete); | ||
255 | return -ENXIO; | ||
256 | } | ||
257 | |||
258 | /* read INT register */ | ||
259 | static int msb_read_int_reg(struct msb_data *msb, long timeout) | ||
260 | { | ||
261 | struct memstick_request *mrq = &msb->card->current_mrq; | ||
262 | |||
263 | WARN_ON(msb->state == -1); | ||
264 | |||
265 | if (!msb->int_polling) { | ||
266 | msb->int_timeout = jiffies + | ||
267 | msecs_to_jiffies(timeout == -1 ? 500 : timeout); | ||
268 | msb->int_polling = true; | ||
269 | } else if (time_after(jiffies, msb->int_timeout)) { | ||
270 | mrq->data[0] = MEMSTICK_INT_CMDNAK; | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | if ((msb->caps & MEMSTICK_CAP_AUTO_GET_INT) && | ||
275 | mrq->need_card_int && !mrq->error) { | ||
276 | mrq->data[0] = mrq->int_reg; | ||
277 | mrq->need_card_int = false; | ||
278 | return 0; | ||
279 | } else { | ||
280 | memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1); | ||
281 | return 1; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | /* Read a register */ | ||
286 | static int msb_read_regs(struct msb_data *msb, int offset, int len) | ||
287 | { | ||
288 | struct memstick_request *req = &msb->card->current_mrq; | ||
289 | |||
290 | if (msb->reg_addr.r_offset != offset || | ||
291 | msb->reg_addr.r_length != len || !msb->addr_valid) { | ||
292 | |||
293 | msb->reg_addr.r_offset = offset; | ||
294 | msb->reg_addr.r_length = len; | ||
295 | msb->addr_valid = true; | ||
296 | |||
297 | memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS, | ||
298 | &msb->reg_addr, sizeof(msb->reg_addr)); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | memstick_init_req(req, MS_TPC_READ_REG, NULL, len); | ||
303 | return 1; | ||
304 | } | ||
305 | |||
306 | /* Write a card register */ | ||
307 | static int msb_write_regs(struct msb_data *msb, int offset, int len, void *buf) | ||
308 | { | ||
309 | struct memstick_request *req = &msb->card->current_mrq; | ||
310 | |||
311 | if (msb->reg_addr.w_offset != offset || | ||
312 | msb->reg_addr.w_length != len || !msb->addr_valid) { | ||
313 | |||
314 | msb->reg_addr.w_offset = offset; | ||
315 | msb->reg_addr.w_length = len; | ||
316 | msb->addr_valid = true; | ||
317 | |||
318 | memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS, | ||
319 | &msb->reg_addr, sizeof(msb->reg_addr)); | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | memstick_init_req(req, MS_TPC_WRITE_REG, buf, len); | ||
324 | return 1; | ||
325 | } | ||
326 | |||
327 | /* Handler for absence of IO */ | ||
328 | static int h_msb_default_bad(struct memstick_dev *card, | ||
329 | struct memstick_request **mrq) | ||
330 | { | ||
331 | return -ENXIO; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * This function is a handler for reads of one page from device. | ||
336 | * Writes output to msb->current_sg, takes sector address from msb->reg.param | ||
337 | * Can also be used to read extra data only. Set params accordintly. | ||
338 | */ | ||
339 | static int h_msb_read_page(struct memstick_dev *card, | ||
340 | struct memstick_request **out_mrq) | ||
341 | { | ||
342 | struct msb_data *msb = memstick_get_drvdata(card); | ||
343 | struct memstick_request *mrq = *out_mrq = &card->current_mrq; | ||
344 | struct scatterlist sg[2]; | ||
345 | u8 command, intreg; | ||
346 | |||
347 | if (mrq->error) { | ||
348 | dbg("read_page, unknown error"); | ||
349 | return msb_exit_state_machine(msb, mrq->error); | ||
350 | } | ||
351 | again: | ||
352 | switch (msb->state) { | ||
353 | case MSB_RP_SEND_BLOCK_ADDRESS: | ||
354 | /* msb_write_regs sometimes "fails" because it needs to update | ||
355 | the reg window, and thus it returns request for that. | ||
356 | Then we stay in this state and retry */ | ||
357 | if (!msb_write_regs(msb, | ||
358 | offsetof(struct ms_register, param), | ||
359 | sizeof(struct ms_param_register), | ||
360 | (unsigned char *)&msb->regs.param)) | ||
361 | return 0; | ||
362 | |||
363 | msb->state = MSB_RP_SEND_READ_COMMAND; | ||
364 | return 0; | ||
365 | |||
366 | case MSB_RP_SEND_READ_COMMAND: | ||
367 | command = MS_CMD_BLOCK_READ; | ||
368 | memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); | ||
369 | msb->state = MSB_RP_SEND_INT_REQ; | ||
370 | return 0; | ||
371 | |||
372 | case MSB_RP_SEND_INT_REQ: | ||
373 | msb->state = MSB_RP_RECEIVE_INT_REQ_RESULT; | ||
374 | /* If dont actually need to send the int read request (only in | ||
375 | serial mode), then just fall through */ | ||
376 | if (msb_read_int_reg(msb, -1)) | ||
377 | return 0; | ||
378 | /* fallthrough */ | ||
379 | |||
380 | case MSB_RP_RECEIVE_INT_REQ_RESULT: | ||
381 | intreg = mrq->data[0]; | ||
382 | msb->regs.status.interrupt = intreg; | ||
383 | |||
384 | if (intreg & MEMSTICK_INT_CMDNAK) | ||
385 | return msb_exit_state_machine(msb, -EIO); | ||
386 | |||
387 | if (!(intreg & MEMSTICK_INT_CED)) { | ||
388 | msb->state = MSB_RP_SEND_INT_REQ; | ||
389 | goto again; | ||
390 | } | ||
391 | |||
392 | msb->int_polling = false; | ||
393 | msb->state = (intreg & MEMSTICK_INT_ERR) ? | ||
394 | MSB_RP_SEND_READ_STATUS_REG : MSB_RP_SEND_OOB_READ; | ||
395 | goto again; | ||
396 | |||
397 | case MSB_RP_SEND_READ_STATUS_REG: | ||
398 | /* read the status register to understand source of the INT_ERR */ | ||
399 | if (!msb_read_regs(msb, | ||
400 | offsetof(struct ms_register, status), | ||
401 | sizeof(struct ms_status_register))) | ||
402 | return 0; | ||
403 | |||
404 | msb->state = MSB_RP_RECEIVE_OOB_READ; | ||
405 | return 0; | ||
406 | |||
407 | case MSB_RP_RECIVE_STATUS_REG: | ||
408 | msb->regs.status = *(struct ms_status_register *)mrq->data; | ||
409 | msb->state = MSB_RP_SEND_OOB_READ; | ||
410 | /* fallthrough */ | ||
411 | |||
412 | case MSB_RP_SEND_OOB_READ: | ||
413 | if (!msb_read_regs(msb, | ||
414 | offsetof(struct ms_register, extra_data), | ||
415 | sizeof(struct ms_extra_data_register))) | ||
416 | return 0; | ||
417 | |||
418 | msb->state = MSB_RP_RECEIVE_OOB_READ; | ||
419 | return 0; | ||
420 | |||
421 | case MSB_RP_RECEIVE_OOB_READ: | ||
422 | msb->regs.extra_data = | ||
423 | *(struct ms_extra_data_register *) mrq->data; | ||
424 | msb->state = MSB_RP_SEND_READ_DATA; | ||
425 | /* fallthrough */ | ||
426 | |||
427 | case MSB_RP_SEND_READ_DATA: | ||
428 | /* Skip that state if we only read the oob */ | ||
429 | if (msb->regs.param.cp == MEMSTICK_CP_EXTRA) { | ||
430 | msb->state = MSB_RP_RECEIVE_READ_DATA; | ||
431 | goto again; | ||
432 | } | ||
433 | |||
434 | sg_init_table(sg, ARRAY_SIZE(sg)); | ||
435 | msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg), | ||
436 | msb->current_sg_offset, | ||
437 | msb->page_size); | ||
438 | |||
439 | memstick_init_req_sg(mrq, MS_TPC_READ_LONG_DATA, sg); | ||
440 | msb->state = MSB_RP_RECEIVE_READ_DATA; | ||
441 | return 0; | ||
442 | |||
443 | case MSB_RP_RECEIVE_READ_DATA: | ||
444 | if (!(msb->regs.status.interrupt & MEMSTICK_INT_ERR)) { | ||
445 | msb->current_sg_offset += msb->page_size; | ||
446 | return msb_exit_state_machine(msb, 0); | ||
447 | } | ||
448 | |||
449 | if (msb->regs.status.status1 & MEMSTICK_UNCORR_ERROR) { | ||
450 | dbg("read_page: uncorrectable error"); | ||
451 | return msb_exit_state_machine(msb, -EBADMSG); | ||
452 | } | ||
453 | |||
454 | if (msb->regs.status.status1 & MEMSTICK_CORR_ERROR) { | ||
455 | dbg("read_page: correctable error"); | ||
456 | msb->current_sg_offset += msb->page_size; | ||
457 | return msb_exit_state_machine(msb, -EUCLEAN); | ||
458 | } else { | ||
459 | dbg("read_page: INT error, but no status error bits"); | ||
460 | return msb_exit_state_machine(msb, -EIO); | ||
461 | } | ||
462 | } | ||
463 | |||
464 | BUG(); | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Handler of writes of exactly one block. | ||
469 | * Takes address from msb->regs.param. | ||
470 | * Writes same extra data to blocks, also taken | ||
471 | * from msb->regs.extra | ||
472 | * Returns -EBADMSG if write fails due to uncorrectable error, or -EIO if | ||
473 | * device refuses to take the command or something else | ||
474 | */ | ||
475 | static int h_msb_write_block(struct memstick_dev *card, | ||
476 | struct memstick_request **out_mrq) | ||
477 | { | ||
478 | struct msb_data *msb = memstick_get_drvdata(card); | ||
479 | struct memstick_request *mrq = *out_mrq = &card->current_mrq; | ||
480 | struct scatterlist sg[2]; | ||
481 | u8 intreg, command; | ||
482 | |||
483 | if (mrq->error) | ||
484 | return msb_exit_state_machine(msb, mrq->error); | ||
485 | |||
486 | again: | ||
487 | switch (msb->state) { | ||
488 | |||
489 | /* HACK: Jmicon handling of TPCs between 8 and | ||
490 | * sizeof(memstick_request.data) is broken due to hardware | ||
491 | * bug in PIO mode that is used for these TPCs | ||
492 | * Therefore split the write | ||
493 | */ | ||
494 | |||
495 | case MSB_WB_SEND_WRITE_PARAMS: | ||
496 | if (!msb_write_regs(msb, | ||
497 | offsetof(struct ms_register, param), | ||
498 | sizeof(struct ms_param_register), | ||
499 | &msb->regs.param)) | ||
500 | return 0; | ||
501 | |||
502 | msb->state = MSB_WB_SEND_WRITE_OOB; | ||
503 | return 0; | ||
504 | |||
505 | case MSB_WB_SEND_WRITE_OOB: | ||
506 | if (!msb_write_regs(msb, | ||
507 | offsetof(struct ms_register, extra_data), | ||
508 | sizeof(struct ms_extra_data_register), | ||
509 | &msb->regs.extra_data)) | ||
510 | return 0; | ||
511 | msb->state = MSB_WB_SEND_WRITE_COMMAND; | ||
512 | return 0; | ||
513 | |||
514 | |||
515 | case MSB_WB_SEND_WRITE_COMMAND: | ||
516 | command = MS_CMD_BLOCK_WRITE; | ||
517 | memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); | ||
518 | msb->state = MSB_WB_SEND_INT_REQ; | ||
519 | return 0; | ||
520 | |||
521 | case MSB_WB_SEND_INT_REQ: | ||
522 | msb->state = MSB_WB_RECEIVE_INT_REQ; | ||
523 | if (msb_read_int_reg(msb, -1)) | ||
524 | return 0; | ||
525 | /* fallthrough */ | ||
526 | |||
527 | case MSB_WB_RECEIVE_INT_REQ: | ||
528 | intreg = mrq->data[0]; | ||
529 | msb->regs.status.interrupt = intreg; | ||
530 | |||
531 | /* errors mean out of here, and fast... */ | ||
532 | if (intreg & (MEMSTICK_INT_CMDNAK)) | ||
533 | return msb_exit_state_machine(msb, -EIO); | ||
534 | |||
535 | if (intreg & MEMSTICK_INT_ERR) | ||
536 | return msb_exit_state_machine(msb, -EBADMSG); | ||
537 | |||
538 | |||
539 | /* for last page we need to poll CED */ | ||
540 | if (msb->current_page == msb->pages_in_block) { | ||
541 | if (intreg & MEMSTICK_INT_CED) | ||
542 | return msb_exit_state_machine(msb, 0); | ||
543 | msb->state = MSB_WB_SEND_INT_REQ; | ||
544 | goto again; | ||
545 | |||
546 | } | ||
547 | |||
548 | /* for non-last page we need BREQ before writing next chunk */ | ||
549 | if (!(intreg & MEMSTICK_INT_BREQ)) { | ||
550 | msb->state = MSB_WB_SEND_INT_REQ; | ||
551 | goto again; | ||
552 | } | ||
553 | |||
554 | msb->int_polling = false; | ||
555 | msb->state = MSB_WB_SEND_WRITE_DATA; | ||
556 | /* fallthrough */ | ||
557 | |||
558 | case MSB_WB_SEND_WRITE_DATA: | ||
559 | sg_init_table(sg, ARRAY_SIZE(sg)); | ||
560 | |||
561 | if (msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg), | ||
562 | msb->current_sg_offset, | ||
563 | msb->page_size) < msb->page_size) | ||
564 | return msb_exit_state_machine(msb, -EIO); | ||
565 | |||
566 | memstick_init_req_sg(mrq, MS_TPC_WRITE_LONG_DATA, sg); | ||
567 | mrq->need_card_int = 1; | ||
568 | msb->state = MSB_WB_RECEIVE_WRITE_CONFIRMATION; | ||
569 | return 0; | ||
570 | |||
571 | case MSB_WB_RECEIVE_WRITE_CONFIRMATION: | ||
572 | msb->current_page++; | ||
573 | msb->current_sg_offset += msb->page_size; | ||
574 | msb->state = MSB_WB_SEND_INT_REQ; | ||
575 | goto again; | ||
576 | default: | ||
577 | BUG(); | ||
578 | } | ||
579 | |||
580 | return 0; | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * This function is used to send simple IO requests to device that consist | ||
585 | * of register write + command | ||
586 | */ | ||
587 | static int h_msb_send_command(struct memstick_dev *card, | ||
588 | struct memstick_request **out_mrq) | ||
589 | { | ||
590 | struct msb_data *msb = memstick_get_drvdata(card); | ||
591 | struct memstick_request *mrq = *out_mrq = &card->current_mrq; | ||
592 | u8 intreg; | ||
593 | |||
594 | if (mrq->error) { | ||
595 | dbg("send_command: unknown error"); | ||
596 | return msb_exit_state_machine(msb, mrq->error); | ||
597 | } | ||
598 | again: | ||
599 | switch (msb->state) { | ||
600 | |||
601 | /* HACK: see h_msb_write_block */ | ||
602 | case MSB_SC_SEND_WRITE_PARAMS: /* write param register*/ | ||
603 | if (!msb_write_regs(msb, | ||
604 | offsetof(struct ms_register, param), | ||
605 | sizeof(struct ms_param_register), | ||
606 | &msb->regs.param)) | ||
607 | return 0; | ||
608 | msb->state = MSB_SC_SEND_WRITE_OOB; | ||
609 | return 0; | ||
610 | |||
611 | case MSB_SC_SEND_WRITE_OOB: | ||
612 | if (!msb->command_need_oob) { | ||
613 | msb->state = MSB_SC_SEND_COMMAND; | ||
614 | goto again; | ||
615 | } | ||
616 | |||
617 | if (!msb_write_regs(msb, | ||
618 | offsetof(struct ms_register, extra_data), | ||
619 | sizeof(struct ms_extra_data_register), | ||
620 | &msb->regs.extra_data)) | ||
621 | return 0; | ||
622 | |||
623 | msb->state = MSB_SC_SEND_COMMAND; | ||
624 | return 0; | ||
625 | |||
626 | case MSB_SC_SEND_COMMAND: | ||
627 | memstick_init_req(mrq, MS_TPC_SET_CMD, &msb->command_value, 1); | ||
628 | msb->state = MSB_SC_SEND_INT_REQ; | ||
629 | return 0; | ||
630 | |||
631 | case MSB_SC_SEND_INT_REQ: | ||
632 | msb->state = MSB_SC_RECEIVE_INT_REQ; | ||
633 | if (msb_read_int_reg(msb, -1)) | ||
634 | return 0; | ||
635 | /* fallthrough */ | ||
636 | |||
637 | case MSB_SC_RECEIVE_INT_REQ: | ||
638 | intreg = mrq->data[0]; | ||
639 | |||
640 | if (intreg & MEMSTICK_INT_CMDNAK) | ||
641 | return msb_exit_state_machine(msb, -EIO); | ||
642 | if (intreg & MEMSTICK_INT_ERR) | ||
643 | return msb_exit_state_machine(msb, -EBADMSG); | ||
644 | |||
645 | if (!(intreg & MEMSTICK_INT_CED)) { | ||
646 | msb->state = MSB_SC_SEND_INT_REQ; | ||
647 | goto again; | ||
648 | } | ||
649 | |||
650 | return msb_exit_state_machine(msb, 0); | ||
651 | } | ||
652 | |||
653 | BUG(); | ||
654 | } | ||
655 | |||
656 | /* Small handler for card reset */ | ||
657 | static int h_msb_reset(struct memstick_dev *card, | ||
658 | struct memstick_request **out_mrq) | ||
659 | { | ||
660 | u8 command = MS_CMD_RESET; | ||
661 | struct msb_data *msb = memstick_get_drvdata(card); | ||
662 | struct memstick_request *mrq = *out_mrq = &card->current_mrq; | ||
663 | |||
664 | if (mrq->error) | ||
665 | return msb_exit_state_machine(msb, mrq->error); | ||
666 | |||
667 | switch (msb->state) { | ||
668 | case MSB_RS_SEND: | ||
669 | memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1); | ||
670 | mrq->need_card_int = 0; | ||
671 | msb->state = MSB_RS_CONFIRM; | ||
672 | return 0; | ||
673 | case MSB_RS_CONFIRM: | ||
674 | return msb_exit_state_machine(msb, 0); | ||
675 | } | ||
676 | BUG(); | ||
677 | } | ||
678 | |||
679 | /* This handler is used to do serial->parallel switch */ | ||
680 | static int h_msb_parallel_switch(struct memstick_dev *card, | ||
681 | struct memstick_request **out_mrq) | ||
682 | { | ||
683 | struct msb_data *msb = memstick_get_drvdata(card); | ||
684 | struct memstick_request *mrq = *out_mrq = &card->current_mrq; | ||
685 | struct memstick_host *host = card->host; | ||
686 | |||
687 | if (mrq->error) { | ||
688 | dbg("parallel_switch: error"); | ||
689 | msb->regs.param.system &= ~MEMSTICK_SYS_PAM; | ||
690 | return msb_exit_state_machine(msb, mrq->error); | ||
691 | } | ||
692 | |||
693 | switch (msb->state) { | ||
694 | case MSB_PS_SEND_SWITCH_COMMAND: | ||
695 | /* Set the parallel interface on memstick side */ | ||
696 | msb->regs.param.system |= MEMSTICK_SYS_PAM; | ||
697 | |||
698 | if (!msb_write_regs(msb, | ||
699 | offsetof(struct ms_register, param), | ||
700 | 1, | ||
701 | (unsigned char *)&msb->regs.param)) | ||
702 | return 0; | ||
703 | |||
704 | msb->state = MSB_PS_SWICH_HOST; | ||
705 | return 0; | ||
706 | |||
707 | case MSB_PS_SWICH_HOST: | ||
708 | /* Set parallel interface on our side + send a dummy request | ||
709 | to see if card responds */ | ||
710 | host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4); | ||
711 | memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1); | ||
712 | msb->state = MSB_PS_CONFIRM; | ||
713 | return 0; | ||
714 | |||
715 | case MSB_PS_CONFIRM: | ||
716 | return msb_exit_state_machine(msb, 0); | ||
717 | } | ||
718 | |||
719 | BUG(); | ||
720 | } | ||
721 | |||
722 | static int msb_switch_to_parallel(struct msb_data *msb); | ||
723 | |||
724 | /* Reset the card, to guard against hw errors beeing treated as bad blocks */ | ||
725 | static int msb_reset(struct msb_data *msb, bool full) | ||
726 | { | ||
727 | |||
728 | bool was_parallel = msb->regs.param.system & MEMSTICK_SYS_PAM; | ||
729 | struct memstick_dev *card = msb->card; | ||
730 | struct memstick_host *host = card->host; | ||
731 | int error; | ||
732 | |||
733 | /* Reset the card */ | ||
734 | msb->regs.param.system = MEMSTICK_SYS_BAMD; | ||
735 | |||
736 | if (full) { | ||
737 | error = host->set_param(host, | ||
738 | MEMSTICK_POWER, MEMSTICK_POWER_OFF); | ||
739 | if (error) | ||
740 | goto out_error; | ||
741 | |||
742 | msb_invalidate_reg_window(msb); | ||
743 | |||
744 | error = host->set_param(host, | ||
745 | MEMSTICK_POWER, MEMSTICK_POWER_ON); | ||
746 | if (error) | ||
747 | goto out_error; | ||
748 | |||
749 | error = host->set_param(host, | ||
750 | MEMSTICK_INTERFACE, MEMSTICK_SERIAL); | ||
751 | if (error) { | ||
752 | out_error: | ||
753 | dbg("Failed to reset the host controller"); | ||
754 | msb->read_only = true; | ||
755 | return -EFAULT; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | error = msb_run_state_machine(msb, h_msb_reset); | ||
760 | if (error) { | ||
761 | dbg("Failed to reset the card"); | ||
762 | msb->read_only = true; | ||
763 | return -ENODEV; | ||
764 | } | ||
765 | |||
766 | /* Set parallel mode */ | ||
767 | if (was_parallel) | ||
768 | msb_switch_to_parallel(msb); | ||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | /* Attempts to switch interface to parallel mode */ | ||
773 | static int msb_switch_to_parallel(struct msb_data *msb) | ||
774 | { | ||
775 | int error; | ||
776 | |||
777 | error = msb_run_state_machine(msb, h_msb_parallel_switch); | ||
778 | if (error) { | ||
779 | pr_err("Switch to parallel failed"); | ||
780 | msb->regs.param.system &= ~MEMSTICK_SYS_PAM; | ||
781 | msb_reset(msb, true); | ||
782 | return -EFAULT; | ||
783 | } | ||
784 | |||
785 | msb->caps |= MEMSTICK_CAP_AUTO_GET_INT; | ||
786 | return 0; | ||
787 | } | ||
788 | |||
789 | /* Changes overwrite flag on a page */ | ||
790 | static int msb_set_overwrite_flag(struct msb_data *msb, | ||
791 | u16 pba, u8 page, u8 flag) | ||
792 | { | ||
793 | if (msb->read_only) | ||
794 | return -EROFS; | ||
795 | |||
796 | msb->regs.param.block_address = cpu_to_be16(pba); | ||
797 | msb->regs.param.page_address = page; | ||
798 | msb->regs.param.cp = MEMSTICK_CP_OVERWRITE; | ||
799 | msb->regs.extra_data.overwrite_flag = flag; | ||
800 | msb->command_value = MS_CMD_BLOCK_WRITE; | ||
801 | msb->command_need_oob = true; | ||
802 | |||
803 | dbg_verbose("changing overwrite flag to %02x for sector %d, page %d", | ||
804 | flag, pba, page); | ||
805 | return msb_run_state_machine(msb, h_msb_send_command); | ||
806 | } | ||
807 | |||
808 | static int msb_mark_bad(struct msb_data *msb, int pba) | ||
809 | { | ||
810 | pr_notice("marking pba %d as bad", pba); | ||
811 | msb_reset(msb, true); | ||
812 | return msb_set_overwrite_flag( | ||
813 | msb, pba, 0, 0xFF & ~MEMSTICK_OVERWRITE_BKST); | ||
814 | } | ||
815 | |||
816 | static int msb_mark_page_bad(struct msb_data *msb, int pba, int page) | ||
817 | { | ||
818 | dbg("marking page %d of pba %d as bad", page, pba); | ||
819 | msb_reset(msb, true); | ||
820 | return msb_set_overwrite_flag(msb, | ||
821 | pba, page, ~MEMSTICK_OVERWRITE_PGST0); | ||
822 | } | ||
823 | |||
824 | /* Erases one physical block */ | ||
825 | static int msb_erase_block(struct msb_data *msb, u16 pba) | ||
826 | { | ||
827 | int error, try; | ||
828 | if (msb->read_only) | ||
829 | return -EROFS; | ||
830 | |||
831 | dbg_verbose("erasing pba %d", pba); | ||
832 | |||
833 | for (try = 1; try < 3; try++) { | ||
834 | msb->regs.param.block_address = cpu_to_be16(pba); | ||
835 | msb->regs.param.page_address = 0; | ||
836 | msb->regs.param.cp = MEMSTICK_CP_BLOCK; | ||
837 | msb->command_value = MS_CMD_BLOCK_ERASE; | ||
838 | msb->command_need_oob = false; | ||
839 | |||
840 | |||
841 | error = msb_run_state_machine(msb, h_msb_send_command); | ||
842 | if (!error || msb_reset(msb, true)) | ||
843 | break; | ||
844 | } | ||
845 | |||
846 | if (error) { | ||
847 | pr_err("erase failed, marking pba %d as bad", pba); | ||
848 | msb_mark_bad(msb, pba); | ||
849 | } | ||
850 | |||
851 | dbg_verbose("erase success, marking pba %d as unused", pba); | ||
852 | msb_mark_block_unused(msb, pba); | ||
853 | __set_bit(pba, msb->erased_blocks_bitmap); | ||
854 | return error; | ||
855 | } | ||
856 | |||
857 | /* Reads one page from device */ | ||
858 | static int msb_read_page(struct msb_data *msb, | ||
859 | u16 pba, u8 page, struct ms_extra_data_register *extra, | ||
860 | struct scatterlist *sg, int offset) | ||
861 | { | ||
862 | int try, error; | ||
863 | |||
864 | if (pba == MS_BLOCK_INVALID) { | ||
865 | unsigned long flags; | ||
866 | struct sg_mapping_iter miter; | ||
867 | size_t len = msb->page_size; | ||
868 | |||
869 | dbg_verbose("read unmapped sector. returning 0xFF"); | ||
870 | |||
871 | local_irq_save(flags); | ||
872 | sg_miter_start(&miter, sg, sg_nents(sg), | ||
873 | SG_MITER_ATOMIC | SG_MITER_TO_SG); | ||
874 | |||
875 | while (sg_miter_next(&miter) && len > 0) { | ||
876 | |||
877 | int chunklen; | ||
878 | |||
879 | if (offset && offset >= miter.length) { | ||
880 | offset -= miter.length; | ||
881 | continue; | ||
882 | } | ||
883 | |||
884 | chunklen = min(miter.length - offset, len); | ||
885 | memset(miter.addr + offset, 0xFF, chunklen); | ||
886 | len -= chunklen; | ||
887 | offset = 0; | ||
888 | } | ||
889 | |||
890 | sg_miter_stop(&miter); | ||
891 | local_irq_restore(flags); | ||
892 | |||
893 | if (offset) | ||
894 | return -EFAULT; | ||
895 | |||
896 | if (extra) | ||
897 | memset(extra, 0xFF, sizeof(*extra)); | ||
898 | return 0; | ||
899 | } | ||
900 | |||
901 | if (pba >= msb->block_count) { | ||
902 | pr_err("BUG: attempt to read beyond the end of the card at pba %d", pba); | ||
903 | return -EINVAL; | ||
904 | } | ||
905 | |||
906 | for (try = 1; try < 3; try++) { | ||
907 | msb->regs.param.block_address = cpu_to_be16(pba); | ||
908 | msb->regs.param.page_address = page; | ||
909 | msb->regs.param.cp = MEMSTICK_CP_PAGE; | ||
910 | |||
911 | msb->current_sg = sg; | ||
912 | msb->current_sg_offset = offset; | ||
913 | error = msb_run_state_machine(msb, h_msb_read_page); | ||
914 | |||
915 | |||
916 | if (error == -EUCLEAN) { | ||
917 | pr_notice("correctable error on pba %d, page %d", | ||
918 | pba, page); | ||
919 | error = 0; | ||
920 | } | ||
921 | |||
922 | if (!error && extra) | ||
923 | *extra = msb->regs.extra_data; | ||
924 | |||
925 | if (!error || msb_reset(msb, true)) | ||
926 | break; | ||
927 | |||
928 | } | ||
929 | |||
930 | /* Mark bad pages */ | ||
931 | if (error == -EBADMSG) { | ||
932 | pr_err("uncorrectable error on read of pba %d, page %d", | ||
933 | pba, page); | ||
934 | |||
935 | if (msb->regs.extra_data.overwrite_flag & | ||
936 | MEMSTICK_OVERWRITE_PGST0) | ||
937 | msb_mark_page_bad(msb, pba, page); | ||
938 | return -EBADMSG; | ||
939 | } | ||
940 | |||
941 | if (error) | ||
942 | pr_err("read of pba %d, page %d failed with error %d", | ||
943 | pba, page, error); | ||
944 | return error; | ||
945 | } | ||
946 | |||
947 | /* Reads oob of page only */ | ||
948 | static int msb_read_oob(struct msb_data *msb, u16 pba, u16 page, | ||
949 | struct ms_extra_data_register *extra) | ||
950 | { | ||
951 | int error; | ||
952 | |||
953 | BUG_ON(!extra); | ||
954 | msb->regs.param.block_address = cpu_to_be16(pba); | ||
955 | msb->regs.param.page_address = page; | ||
956 | msb->regs.param.cp = MEMSTICK_CP_EXTRA; | ||
957 | |||
958 | if (pba > msb->block_count) { | ||
959 | pr_err("BUG: attempt to read beyond the end of card at pba %d", pba); | ||
960 | return -EINVAL; | ||
961 | } | ||
962 | |||
963 | error = msb_run_state_machine(msb, h_msb_read_page); | ||
964 | *extra = msb->regs.extra_data; | ||
965 | |||
966 | if (error == -EUCLEAN) { | ||
967 | pr_notice("correctable error on pba %d, page %d", | ||
968 | pba, page); | ||
969 | return 0; | ||
970 | } | ||
971 | |||
972 | return error; | ||
973 | } | ||
974 | |||
975 | /* Reads a block and compares it with data contained in scatterlist orig_sg */ | ||
976 | static int msb_verify_block(struct msb_data *msb, u16 pba, | ||
977 | struct scatterlist *orig_sg, int offset) | ||
978 | { | ||
979 | struct scatterlist sg; | ||
980 | int page = 0, error; | ||
981 | |||
982 | sg_init_one(&sg, msb->block_buffer, msb->block_size); | ||
983 | |||
984 | while (page < msb->pages_in_block) { | ||
985 | |||
986 | error = msb_read_page(msb, pba, page, | ||
987 | NULL, &sg, page * msb->page_size); | ||
988 | if (error) | ||
989 | return error; | ||
990 | page++; | ||
991 | } | ||
992 | |||
993 | if (msb_sg_compare_to_buffer(orig_sg, offset, | ||
994 | msb->block_buffer, msb->block_size)) | ||
995 | return -EIO; | ||
996 | return 0; | ||
997 | } | ||
998 | |||
999 | /* Writes exectly one block + oob */ | ||
1000 | static int msb_write_block(struct msb_data *msb, | ||
1001 | u16 pba, u32 lba, struct scatterlist *sg, int offset) | ||
1002 | { | ||
1003 | int error, current_try = 1; | ||
1004 | BUG_ON(sg->length < msb->page_size); | ||
1005 | |||
1006 | if (msb->read_only) | ||
1007 | return -EROFS; | ||
1008 | |||
1009 | if (pba == MS_BLOCK_INVALID) { | ||
1010 | pr_err( | ||
1011 | "BUG: write: attempt to write MS_BLOCK_INVALID block"); | ||
1012 | return -EINVAL; | ||
1013 | } | ||
1014 | |||
1015 | if (pba >= msb->block_count || lba >= msb->logical_block_count) { | ||
1016 | pr_err( | ||
1017 | "BUG: write: attempt to write beyond the end of device"); | ||
1018 | return -EINVAL; | ||
1019 | } | ||
1020 | |||
1021 | if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) { | ||
1022 | pr_err("BUG: write: lba zone mismatch"); | ||
1023 | return -EINVAL; | ||
1024 | } | ||
1025 | |||
1026 | if (pba == msb->boot_block_locations[0] || | ||
1027 | pba == msb->boot_block_locations[1]) { | ||
1028 | pr_err("BUG: write: attempt to write to boot blocks!"); | ||
1029 | return -EINVAL; | ||
1030 | } | ||
1031 | |||
1032 | while (1) { | ||
1033 | |||
1034 | if (msb->read_only) | ||
1035 | return -EROFS; | ||
1036 | |||
1037 | msb->regs.param.cp = MEMSTICK_CP_BLOCK; | ||
1038 | msb->regs.param.page_address = 0; | ||
1039 | msb->regs.param.block_address = cpu_to_be16(pba); | ||
1040 | |||
1041 | msb->regs.extra_data.management_flag = 0xFF; | ||
1042 | msb->regs.extra_data.overwrite_flag = 0xF8; | ||
1043 | msb->regs.extra_data.logical_address = cpu_to_be16(lba); | ||
1044 | |||
1045 | msb->current_sg = sg; | ||
1046 | msb->current_sg_offset = offset; | ||
1047 | msb->current_page = 0; | ||
1048 | |||
1049 | error = msb_run_state_machine(msb, h_msb_write_block); | ||
1050 | |||
1051 | /* Sector we just wrote to is assumed erased since its pba | ||
1052 | was erased. If it wasn't erased, write will succeed | ||
1053 | and will just clear the bits that were set in the block | ||
1054 | thus test that what we have written, | ||
1055 | matches what we expect. | ||
1056 | We do trust the blocks that we erased */ | ||
1057 | if (!error && (verify_writes || | ||
1058 | !test_bit(pba, msb->erased_blocks_bitmap))) | ||
1059 | error = msb_verify_block(msb, pba, sg, offset); | ||
1060 | |||
1061 | if (!error) | ||
1062 | break; | ||
1063 | |||
1064 | if (current_try > 1 || msb_reset(msb, true)) | ||
1065 | break; | ||
1066 | |||
1067 | pr_err("write failed, trying to erase the pba %d", pba); | ||
1068 | error = msb_erase_block(msb, pba); | ||
1069 | if (error) | ||
1070 | break; | ||
1071 | |||
1072 | current_try++; | ||
1073 | } | ||
1074 | return error; | ||
1075 | } | ||
1076 | |||
1077 | /* Finds a free block for write replacement */ | ||
1078 | static u16 msb_get_free_block(struct msb_data *msb, int zone) | ||
1079 | { | ||
1080 | u16 pos; | ||
1081 | int pba = zone * MS_BLOCKS_IN_ZONE; | ||
1082 | int i; | ||
1083 | |||
1084 | get_random_bytes(&pos, sizeof(pos)); | ||
1085 | |||
1086 | if (!msb->free_block_count[zone]) { | ||
1087 | pr_err("NO free blocks in the zone %d, to use for a write, (media is WORN out) switching to RO mode", zone); | ||
1088 | msb->read_only = true; | ||
1089 | return MS_BLOCK_INVALID; | ||
1090 | } | ||
1091 | |||
1092 | pos %= msb->free_block_count[zone]; | ||
1093 | |||
1094 | dbg_verbose("have %d choices for a free block, selected randomally: %d", | ||
1095 | msb->free_block_count[zone], pos); | ||
1096 | |||
1097 | pba = find_next_zero_bit(msb->used_blocks_bitmap, | ||
1098 | msb->block_count, pba); | ||
1099 | for (i = 0; i < pos; ++i) | ||
1100 | pba = find_next_zero_bit(msb->used_blocks_bitmap, | ||
1101 | msb->block_count, pba + 1); | ||
1102 | |||
1103 | dbg_verbose("result of the free blocks scan: pba %d", pba); | ||
1104 | |||
1105 | if (pba == msb->block_count || (msb_get_zone_from_pba(pba)) != zone) { | ||
1106 | pr_err("BUG: cant get a free block"); | ||
1107 | msb->read_only = true; | ||
1108 | return MS_BLOCK_INVALID; | ||
1109 | } | ||
1110 | |||
1111 | msb_mark_block_used(msb, pba); | ||
1112 | return pba; | ||
1113 | } | ||
1114 | |||
1115 | static int msb_update_block(struct msb_data *msb, u16 lba, | ||
1116 | struct scatterlist *sg, int offset) | ||
1117 | { | ||
1118 | u16 pba, new_pba; | ||
1119 | int error, try; | ||
1120 | |||
1121 | pba = msb->lba_to_pba_table[lba]; | ||
1122 | dbg_verbose("start of a block update at lba %d, pba %d", lba, pba); | ||
1123 | |||
1124 | if (pba != MS_BLOCK_INVALID) { | ||
1125 | dbg_verbose("setting the update flag on the block"); | ||
1126 | msb_set_overwrite_flag(msb, pba, 0, | ||
1127 | 0xFF & ~MEMSTICK_OVERWRITE_UDST); | ||
1128 | } | ||
1129 | |||
1130 | for (try = 0; try < 3; try++) { | ||
1131 | new_pba = msb_get_free_block(msb, | ||
1132 | msb_get_zone_from_lba(lba)); | ||
1133 | |||
1134 | if (new_pba == MS_BLOCK_INVALID) { | ||
1135 | error = -EIO; | ||
1136 | goto out; | ||
1137 | } | ||
1138 | |||
1139 | dbg_verbose("block update: writing updated block to the pba %d", | ||
1140 | new_pba); | ||
1141 | error = msb_write_block(msb, new_pba, lba, sg, offset); | ||
1142 | if (error == -EBADMSG) { | ||
1143 | msb_mark_bad(msb, new_pba); | ||
1144 | continue; | ||
1145 | } | ||
1146 | |||
1147 | if (error) | ||
1148 | goto out; | ||
1149 | |||
1150 | dbg_verbose("block update: erasing the old block"); | ||
1151 | msb_erase_block(msb, pba); | ||
1152 | msb->lba_to_pba_table[lba] = new_pba; | ||
1153 | return 0; | ||
1154 | } | ||
1155 | out: | ||
1156 | if (error) { | ||
1157 | pr_err("block update error after %d tries, switching to r/o mode", try); | ||
1158 | msb->read_only = true; | ||
1159 | } | ||
1160 | return error; | ||
1161 | } | ||
1162 | |||
1163 | /* Converts endiannes in the boot block for easy use */ | ||
1164 | static void msb_fix_boot_page_endianness(struct ms_boot_page *p) | ||
1165 | { | ||
1166 | p->header.block_id = be16_to_cpu(p->header.block_id); | ||
1167 | p->header.format_reserved = be16_to_cpu(p->header.format_reserved); | ||
1168 | p->entry.disabled_block.start_addr | ||
1169 | = be32_to_cpu(p->entry.disabled_block.start_addr); | ||
1170 | p->entry.disabled_block.data_size | ||
1171 | = be32_to_cpu(p->entry.disabled_block.data_size); | ||
1172 | p->entry.cis_idi.start_addr | ||
1173 | = be32_to_cpu(p->entry.cis_idi.start_addr); | ||
1174 | p->entry.cis_idi.data_size | ||
1175 | = be32_to_cpu(p->entry.cis_idi.data_size); | ||
1176 | p->attr.block_size = be16_to_cpu(p->attr.block_size); | ||
1177 | p->attr.number_of_blocks = be16_to_cpu(p->attr.number_of_blocks); | ||
1178 | p->attr.number_of_effective_blocks | ||
1179 | = be16_to_cpu(p->attr.number_of_effective_blocks); | ||
1180 | p->attr.page_size = be16_to_cpu(p->attr.page_size); | ||
1181 | p->attr.memory_manufacturer_code | ||
1182 | = be16_to_cpu(p->attr.memory_manufacturer_code); | ||
1183 | p->attr.memory_device_code = be16_to_cpu(p->attr.memory_device_code); | ||
1184 | p->attr.implemented_capacity | ||
1185 | = be16_to_cpu(p->attr.implemented_capacity); | ||
1186 | p->attr.controller_number = be16_to_cpu(p->attr.controller_number); | ||
1187 | p->attr.controller_function = be16_to_cpu(p->attr.controller_function); | ||
1188 | } | ||
1189 | |||
1190 | static int msb_read_boot_blocks(struct msb_data *msb) | ||
1191 | { | ||
1192 | int pba = 0; | ||
1193 | struct scatterlist sg; | ||
1194 | struct ms_extra_data_register extra; | ||
1195 | struct ms_boot_page *page; | ||
1196 | |||
1197 | msb->boot_block_locations[0] = MS_BLOCK_INVALID; | ||
1198 | msb->boot_block_locations[1] = MS_BLOCK_INVALID; | ||
1199 | msb->boot_block_count = 0; | ||
1200 | |||
1201 | dbg_verbose("Start of a scan for the boot blocks"); | ||
1202 | |||
1203 | if (!msb->boot_page) { | ||
1204 | page = kmalloc(sizeof(struct ms_boot_page)*2, GFP_KERNEL); | ||
1205 | if (!page) | ||
1206 | return -ENOMEM; | ||
1207 | |||
1208 | msb->boot_page = page; | ||
1209 | } else | ||
1210 | page = msb->boot_page; | ||
1211 | |||
1212 | msb->block_count = MS_BLOCK_MAX_BOOT_ADDR; | ||
1213 | |||
1214 | for (pba = 0; pba < MS_BLOCK_MAX_BOOT_ADDR; pba++) { | ||
1215 | |||
1216 | sg_init_one(&sg, page, sizeof(*page)); | ||
1217 | if (msb_read_page(msb, pba, 0, &extra, &sg, 0)) { | ||
1218 | dbg("boot scan: can't read pba %d", pba); | ||
1219 | continue; | ||
1220 | } | ||
1221 | |||
1222 | if (extra.management_flag & MEMSTICK_MANAGEMENT_SYSFLG) { | ||
1223 | dbg("managment flag doesn't indicate boot block %d", | ||
1224 | pba); | ||
1225 | continue; | ||
1226 | } | ||
1227 | |||
1228 | if (be16_to_cpu(page->header.block_id) != MS_BLOCK_BOOT_ID) { | ||
1229 | dbg("the pba at %d doesn' contain boot block ID", pba); | ||
1230 | continue; | ||
1231 | } | ||
1232 | |||
1233 | msb_fix_boot_page_endianness(page); | ||
1234 | msb->boot_block_locations[msb->boot_block_count] = pba; | ||
1235 | |||
1236 | page++; | ||
1237 | msb->boot_block_count++; | ||
1238 | |||
1239 | if (msb->boot_block_count == 2) | ||
1240 | break; | ||
1241 | } | ||
1242 | |||
1243 | if (!msb->boot_block_count) { | ||
1244 | pr_err("media doesn't contain master page, aborting"); | ||
1245 | return -EIO; | ||
1246 | } | ||
1247 | |||
1248 | dbg_verbose("End of scan for boot blocks"); | ||
1249 | return 0; | ||
1250 | } | ||
1251 | |||
1252 | static int msb_read_bad_block_table(struct msb_data *msb, int block_nr) | ||
1253 | { | ||
1254 | struct ms_boot_page *boot_block; | ||
1255 | struct scatterlist sg; | ||
1256 | u16 *buffer = NULL; | ||
1257 | int offset = 0; | ||
1258 | int i, error = 0; | ||
1259 | int data_size, data_offset, page, page_offset, size_to_read; | ||
1260 | u16 pba; | ||
1261 | |||
1262 | BUG_ON(block_nr > 1); | ||
1263 | boot_block = &msb->boot_page[block_nr]; | ||
1264 | pba = msb->boot_block_locations[block_nr]; | ||
1265 | |||
1266 | if (msb->boot_block_locations[block_nr] == MS_BLOCK_INVALID) | ||
1267 | return -EINVAL; | ||
1268 | |||
1269 | data_size = boot_block->entry.disabled_block.data_size; | ||
1270 | data_offset = sizeof(struct ms_boot_page) + | ||
1271 | boot_block->entry.disabled_block.start_addr; | ||
1272 | if (!data_size) | ||
1273 | return 0; | ||
1274 | |||
1275 | page = data_offset / msb->page_size; | ||
1276 | page_offset = data_offset % msb->page_size; | ||
1277 | size_to_read = | ||
1278 | DIV_ROUND_UP(data_size + page_offset, msb->page_size) * | ||
1279 | msb->page_size; | ||
1280 | |||
1281 | dbg("reading bad block of boot block at pba %d, offset %d len %d", | ||
1282 | pba, data_offset, data_size); | ||
1283 | |||
1284 | buffer = kzalloc(size_to_read, GFP_KERNEL); | ||
1285 | if (!buffer) | ||
1286 | return -ENOMEM; | ||
1287 | |||
1288 | /* Read the buffer */ | ||
1289 | sg_init_one(&sg, buffer, size_to_read); | ||
1290 | |||
1291 | while (offset < size_to_read) { | ||
1292 | error = msb_read_page(msb, pba, page, NULL, &sg, offset); | ||
1293 | if (error) | ||
1294 | goto out; | ||
1295 | |||
1296 | page++; | ||
1297 | offset += msb->page_size; | ||
1298 | |||
1299 | if (page == msb->pages_in_block) { | ||
1300 | pr_err( | ||
1301 | "bad block table extends beyond the boot block"); | ||
1302 | break; | ||
1303 | } | ||
1304 | } | ||
1305 | |||
1306 | /* Process the bad block table */ | ||
1307 | for (i = page_offset; i < data_size / sizeof(u16); i++) { | ||
1308 | |||
1309 | u16 bad_block = be16_to_cpu(buffer[i]); | ||
1310 | |||
1311 | if (bad_block >= msb->block_count) { | ||
1312 | dbg("bad block table contains invalid block %d", | ||
1313 | bad_block); | ||
1314 | continue; | ||
1315 | } | ||
1316 | |||
1317 | if (test_bit(bad_block, msb->used_blocks_bitmap)) { | ||
1318 | dbg("duplicate bad block %d in the table", | ||
1319 | bad_block); | ||
1320 | continue; | ||
1321 | } | ||
1322 | |||
1323 | dbg("block %d is marked as factory bad", bad_block); | ||
1324 | msb_mark_block_used(msb, bad_block); | ||
1325 | } | ||
1326 | out: | ||
1327 | kfree(buffer); | ||
1328 | return error; | ||
1329 | } | ||
1330 | |||
1331 | static int msb_ftl_initialize(struct msb_data *msb) | ||
1332 | { | ||
1333 | int i; | ||
1334 | |||
1335 | if (msb->ftl_initialized) | ||
1336 | return 0; | ||
1337 | |||
1338 | msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE; | ||
1339 | msb->logical_block_count = msb->zone_count * 496 - 2; | ||
1340 | |||
1341 | msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); | ||
1342 | msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); | ||
1343 | msb->lba_to_pba_table = | ||
1344 | kmalloc(msb->logical_block_count * sizeof(u16), GFP_KERNEL); | ||
1345 | |||
1346 | if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table || | ||
1347 | !msb->erased_blocks_bitmap) { | ||
1348 | kfree(msb->used_blocks_bitmap); | ||
1349 | kfree(msb->lba_to_pba_table); | ||
1350 | kfree(msb->erased_blocks_bitmap); | ||
1351 | return -ENOMEM; | ||
1352 | } | ||
1353 | |||
1354 | for (i = 0; i < msb->zone_count; i++) | ||
1355 | msb->free_block_count[i] = MS_BLOCKS_IN_ZONE; | ||
1356 | |||
1357 | memset(msb->lba_to_pba_table, MS_BLOCK_INVALID, | ||
1358 | msb->logical_block_count * sizeof(u16)); | ||
1359 | |||
1360 | dbg("initial FTL tables created. Zone count = %d, Logical block count = %d", | ||
1361 | msb->zone_count, msb->logical_block_count); | ||
1362 | |||
1363 | msb->ftl_initialized = true; | ||
1364 | return 0; | ||
1365 | } | ||
1366 | |||
1367 | static int msb_ftl_scan(struct msb_data *msb) | ||
1368 | { | ||
1369 | u16 pba, lba, other_block; | ||
1370 | u8 overwrite_flag, managment_flag, other_overwrite_flag; | ||
1371 | int error; | ||
1372 | struct ms_extra_data_register extra; | ||
1373 | u8 *overwrite_flags = kzalloc(msb->block_count, GFP_KERNEL); | ||
1374 | |||
1375 | if (!overwrite_flags) | ||
1376 | return -ENOMEM; | ||
1377 | |||
1378 | dbg("Start of media scanning"); | ||
1379 | for (pba = 0; pba < msb->block_count; pba++) { | ||
1380 | |||
1381 | if (pba == msb->boot_block_locations[0] || | ||
1382 | pba == msb->boot_block_locations[1]) { | ||
1383 | dbg_verbose("pba %05d -> [boot block]", pba); | ||
1384 | msb_mark_block_used(msb, pba); | ||
1385 | continue; | ||
1386 | } | ||
1387 | |||
1388 | if (test_bit(pba, msb->used_blocks_bitmap)) { | ||
1389 | dbg_verbose("pba %05d -> [factory bad]", pba); | ||
1390 | continue; | ||
1391 | } | ||
1392 | |||
1393 | memset(&extra, 0, sizeof(extra)); | ||
1394 | error = msb_read_oob(msb, pba, 0, &extra); | ||
1395 | |||
1396 | /* can't trust the page if we can't read the oob */ | ||
1397 | if (error == -EBADMSG) { | ||
1398 | pr_notice( | ||
1399 | "oob of pba %d damaged, will try to erase it", pba); | ||
1400 | msb_mark_block_used(msb, pba); | ||
1401 | msb_erase_block(msb, pba); | ||
1402 | continue; | ||
1403 | } else if (error) { | ||
1404 | pr_err("unknown error %d on read of oob of pba %d - aborting", | ||
1405 | error, pba); | ||
1406 | |||
1407 | kfree(overwrite_flags); | ||
1408 | return error; | ||
1409 | } | ||
1410 | |||
1411 | lba = be16_to_cpu(extra.logical_address); | ||
1412 | managment_flag = extra.management_flag; | ||
1413 | overwrite_flag = extra.overwrite_flag; | ||
1414 | overwrite_flags[pba] = overwrite_flag; | ||
1415 | |||
1416 | /* Skip bad blocks */ | ||
1417 | if (!(overwrite_flag & MEMSTICK_OVERWRITE_BKST)) { | ||
1418 | dbg("pba %05d -> [BAD]", pba); | ||
1419 | msb_mark_block_used(msb, pba); | ||
1420 | continue; | ||
1421 | } | ||
1422 | |||
1423 | /* Skip system/drm blocks */ | ||
1424 | if ((managment_flag & MEMSTICK_MANAGMENT_FLAG_NORMAL) != | ||
1425 | MEMSTICK_MANAGMENT_FLAG_NORMAL) { | ||
1426 | dbg("pba %05d -> [reserved managment flag %02x]", | ||
1427 | pba, managment_flag); | ||
1428 | msb_mark_block_used(msb, pba); | ||
1429 | continue; | ||
1430 | } | ||
1431 | |||
1432 | /* Erase temporary tables */ | ||
1433 | if (!(managment_flag & MEMSTICK_MANAGEMENT_ATFLG)) { | ||
1434 | dbg("pba %05d -> [temp table] - will erase", pba); | ||
1435 | |||
1436 | msb_mark_block_used(msb, pba); | ||
1437 | msb_erase_block(msb, pba); | ||
1438 | continue; | ||
1439 | } | ||
1440 | |||
1441 | if (lba == MS_BLOCK_INVALID) { | ||
1442 | dbg_verbose("pba %05d -> [free]", pba); | ||
1443 | continue; | ||
1444 | } | ||
1445 | |||
1446 | msb_mark_block_used(msb, pba); | ||
1447 | |||
1448 | /* Block has LBA not according to zoning*/ | ||
1449 | if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) { | ||
1450 | pr_notice("pba %05d -> [bad lba %05d] - will erase", | ||
1451 | pba, lba); | ||
1452 | msb_erase_block(msb, pba); | ||
1453 | continue; | ||
1454 | } | ||
1455 | |||
1456 | /* No collisions - great */ | ||
1457 | if (msb->lba_to_pba_table[lba] == MS_BLOCK_INVALID) { | ||
1458 | dbg_verbose("pba %05d -> [lba %05d]", pba, lba); | ||
1459 | msb->lba_to_pba_table[lba] = pba; | ||
1460 | continue; | ||
1461 | } | ||
1462 | |||
1463 | other_block = msb->lba_to_pba_table[lba]; | ||
1464 | other_overwrite_flag = overwrite_flags[other_block]; | ||
1465 | |||
1466 | pr_notice("Collision between pba %d and pba %d", | ||
1467 | pba, other_block); | ||
1468 | |||
1469 | if (!(overwrite_flag & MEMSTICK_OVERWRITE_UDST)) { | ||
1470 | pr_notice("pba %d is marked as stable, use it", pba); | ||
1471 | msb_erase_block(msb, other_block); | ||
1472 | msb->lba_to_pba_table[lba] = pba; | ||
1473 | continue; | ||
1474 | } | ||
1475 | |||
1476 | if (!(other_overwrite_flag & MEMSTICK_OVERWRITE_UDST)) { | ||
1477 | pr_notice("pba %d is marked as stable, use it", | ||
1478 | other_block); | ||
1479 | msb_erase_block(msb, pba); | ||
1480 | continue; | ||
1481 | } | ||
1482 | |||
1483 | pr_notice("collision between blocks %d and %d, without stable flag set on both, erasing pba %d", | ||
1484 | pba, other_block, other_block); | ||
1485 | |||
1486 | msb_erase_block(msb, other_block); | ||
1487 | msb->lba_to_pba_table[lba] = pba; | ||
1488 | } | ||
1489 | |||
1490 | dbg("End of media scanning"); | ||
1491 | kfree(overwrite_flags); | ||
1492 | return 0; | ||
1493 | } | ||
1494 | |||
1495 | static void msb_cache_flush_timer(unsigned long data) | ||
1496 | { | ||
1497 | struct msb_data *msb = (struct msb_data *)data; | ||
1498 | msb->need_flush_cache = true; | ||
1499 | queue_work(msb->io_queue, &msb->io_work); | ||
1500 | } | ||
1501 | |||
1502 | |||
1503 | static void msb_cache_discard(struct msb_data *msb) | ||
1504 | { | ||
1505 | if (msb->cache_block_lba == MS_BLOCK_INVALID) | ||
1506 | return; | ||
1507 | |||
1508 | del_timer_sync(&msb->cache_flush_timer); | ||
1509 | |||
1510 | dbg_verbose("Discarding the write cache"); | ||
1511 | msb->cache_block_lba = MS_BLOCK_INVALID; | ||
1512 | bitmap_zero(&msb->valid_cache_bitmap, msb->pages_in_block); | ||
1513 | } | ||
1514 | |||
1515 | static int msb_cache_init(struct msb_data *msb) | ||
1516 | { | ||
1517 | setup_timer(&msb->cache_flush_timer, msb_cache_flush_timer, | ||
1518 | (unsigned long)msb); | ||
1519 | |||
1520 | if (!msb->cache) | ||
1521 | msb->cache = kzalloc(msb->block_size, GFP_KERNEL); | ||
1522 | if (!msb->cache) | ||
1523 | return -ENOMEM; | ||
1524 | |||
1525 | msb_cache_discard(msb); | ||
1526 | return 0; | ||
1527 | } | ||
1528 | |||
1529 | static int msb_cache_flush(struct msb_data *msb) | ||
1530 | { | ||
1531 | struct scatterlist sg; | ||
1532 | struct ms_extra_data_register extra; | ||
1533 | int page, offset, error; | ||
1534 | u16 pba, lba; | ||
1535 | |||
1536 | if (msb->read_only) | ||
1537 | return -EROFS; | ||
1538 | |||
1539 | if (msb->cache_block_lba == MS_BLOCK_INVALID) | ||
1540 | return 0; | ||
1541 | |||
1542 | lba = msb->cache_block_lba; | ||
1543 | pba = msb->lba_to_pba_table[lba]; | ||
1544 | |||
1545 | dbg_verbose("Flushing the write cache of pba %d (LBA %d)", | ||
1546 | pba, msb->cache_block_lba); | ||
1547 | |||
1548 | sg_init_one(&sg, msb->cache , msb->block_size); | ||
1549 | |||
1550 | /* Read all missing pages in cache */ | ||
1551 | for (page = 0; page < msb->pages_in_block; page++) { | ||
1552 | |||
1553 | if (test_bit(page, &msb->valid_cache_bitmap)) | ||
1554 | continue; | ||
1555 | |||
1556 | offset = page * msb->page_size; | ||
1557 | |||
1558 | dbg_verbose("reading non-present sector %d of cache block %d", | ||
1559 | page, lba); | ||
1560 | error = msb_read_page(msb, pba, page, &extra, &sg, offset); | ||
1561 | |||
1562 | /* Bad pages are copied with 00 page status */ | ||
1563 | if (error == -EBADMSG) { | ||
1564 | pr_err("read error on sector %d, contents probably damaged", page); | ||
1565 | continue; | ||
1566 | } | ||
1567 | |||
1568 | if (error) | ||
1569 | return error; | ||
1570 | |||
1571 | if ((extra.overwrite_flag & MEMSTICK_OV_PG_NORMAL) != | ||
1572 | MEMSTICK_OV_PG_NORMAL) { | ||
1573 | dbg("page %d is marked as bad", page); | ||
1574 | continue; | ||
1575 | } | ||
1576 | |||
1577 | set_bit(page, &msb->valid_cache_bitmap); | ||
1578 | } | ||
1579 | |||
1580 | /* Write the cache now */ | ||
1581 | error = msb_update_block(msb, msb->cache_block_lba, &sg, 0); | ||
1582 | pba = msb->lba_to_pba_table[msb->cache_block_lba]; | ||
1583 | |||
1584 | /* Mark invalid pages */ | ||
1585 | if (!error) { | ||
1586 | for (page = 0; page < msb->pages_in_block; page++) { | ||
1587 | |||
1588 | if (test_bit(page, &msb->valid_cache_bitmap)) | ||
1589 | continue; | ||
1590 | |||
1591 | dbg("marking page %d as containing damaged data", | ||
1592 | page); | ||
1593 | msb_set_overwrite_flag(msb, | ||
1594 | pba , page, 0xFF & ~MEMSTICK_OV_PG_NORMAL); | ||
1595 | } | ||
1596 | } | ||
1597 | |||
1598 | msb_cache_discard(msb); | ||
1599 | return error; | ||
1600 | } | ||
1601 | |||
1602 | static int msb_cache_write(struct msb_data *msb, int lba, | ||
1603 | int page, bool add_to_cache_only, struct scatterlist *sg, int offset) | ||
1604 | { | ||
1605 | int error; | ||
1606 | struct scatterlist sg_tmp[10]; | ||
1607 | |||
1608 | if (msb->read_only) | ||
1609 | return -EROFS; | ||
1610 | |||
1611 | if (msb->cache_block_lba == MS_BLOCK_INVALID || | ||
1612 | lba != msb->cache_block_lba) | ||
1613 | if (add_to_cache_only) | ||
1614 | return 0; | ||
1615 | |||
1616 | /* If we need to write different block */ | ||
1617 | if (msb->cache_block_lba != MS_BLOCK_INVALID && | ||
1618 | lba != msb->cache_block_lba) { | ||
1619 | dbg_verbose("first flush the cache"); | ||
1620 | error = msb_cache_flush(msb); | ||
1621 | if (error) | ||
1622 | return error; | ||
1623 | } | ||
1624 | |||
1625 | if (msb->cache_block_lba == MS_BLOCK_INVALID) { | ||
1626 | msb->cache_block_lba = lba; | ||
1627 | mod_timer(&msb->cache_flush_timer, | ||
1628 | jiffies + msecs_to_jiffies(cache_flush_timeout)); | ||
1629 | } | ||
1630 | |||
1631 | dbg_verbose("Write of LBA %d page %d to cache ", lba, page); | ||
1632 | |||
1633 | sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp)); | ||
1634 | msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), offset, msb->page_size); | ||
1635 | |||
1636 | sg_copy_to_buffer(sg_tmp, sg_nents(sg_tmp), | ||
1637 | msb->cache + page * msb->page_size, msb->page_size); | ||
1638 | |||
1639 | set_bit(page, &msb->valid_cache_bitmap); | ||
1640 | return 0; | ||
1641 | } | ||
1642 | |||
1643 | static int msb_cache_read(struct msb_data *msb, int lba, | ||
1644 | int page, struct scatterlist *sg, int offset) | ||
1645 | { | ||
1646 | int pba = msb->lba_to_pba_table[lba]; | ||
1647 | struct scatterlist sg_tmp[10]; | ||
1648 | int error = 0; | ||
1649 | |||
1650 | if (lba == msb->cache_block_lba && | ||
1651 | test_bit(page, &msb->valid_cache_bitmap)) { | ||
1652 | |||
1653 | dbg_verbose("Read of LBA %d (pba %d) sector %d from cache", | ||
1654 | lba, pba, page); | ||
1655 | |||
1656 | sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp)); | ||
1657 | msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), | ||
1658 | offset, msb->page_size); | ||
1659 | sg_copy_from_buffer(sg_tmp, sg_nents(sg_tmp), | ||
1660 | msb->cache + msb->page_size * page, | ||
1661 | msb->page_size); | ||
1662 | } else { | ||
1663 | dbg_verbose("Read of LBA %d (pba %d) sector %d from device", | ||
1664 | lba, pba, page); | ||
1665 | |||
1666 | error = msb_read_page(msb, pba, page, NULL, sg, offset); | ||
1667 | if (error) | ||
1668 | return error; | ||
1669 | |||
1670 | msb_cache_write(msb, lba, page, true, sg, offset); | ||
1671 | } | ||
1672 | return error; | ||
1673 | } | ||
1674 | |||
1675 | /* Emulated geometry table | ||
1676 | * This table content isn't that importaint, | ||
1677 | * One could put here different values, providing that they still | ||
1678 | * cover whole disk. | ||
1679 | * 64 MB entry is what windows reports for my 64M memstick */ | ||
1680 | |||
1681 | static const struct chs_entry chs_table[] = { | ||
1682 | /* size sectors cylynders heads */ | ||
1683 | { 4, 16, 247, 2 }, | ||
1684 | { 8, 16, 495, 2 }, | ||
1685 | { 16, 16, 495, 4 }, | ||
1686 | { 32, 16, 991, 4 }, | ||
1687 | { 64, 16, 991, 8 }, | ||
1688 | {128, 16, 991, 16 }, | ||
1689 | { 0 } | ||
1690 | }; | ||
1691 | |||
1692 | /* Load information about the card */ | ||
1693 | static int msb_init_card(struct memstick_dev *card) | ||
1694 | { | ||
1695 | struct msb_data *msb = memstick_get_drvdata(card); | ||
1696 | struct memstick_host *host = card->host; | ||
1697 | struct ms_boot_page *boot_block; | ||
1698 | int error = 0, i, raw_size_in_megs; | ||
1699 | |||
1700 | msb->caps = 0; | ||
1701 | |||
1702 | if (card->id.class >= MEMSTICK_CLASS_ROM && | ||
1703 | card->id.class <= MEMSTICK_CLASS_ROM) | ||
1704 | msb->read_only = true; | ||
1705 | |||
1706 | msb->state = -1; | ||
1707 | error = msb_reset(msb, false); | ||
1708 | if (error) | ||
1709 | return error; | ||
1710 | |||
1711 | /* Due to a bug in Jmicron driver written by Alex Dubov, | ||
1712 | its serial mode barely works, | ||
1713 | so we switch to parallel mode right away */ | ||
1714 | if (host->caps & MEMSTICK_CAP_PAR4) | ||
1715 | msb_switch_to_parallel(msb); | ||
1716 | |||
1717 | msb->page_size = sizeof(struct ms_boot_page); | ||
1718 | |||
1719 | /* Read the boot page */ | ||
1720 | error = msb_read_boot_blocks(msb); | ||
1721 | if (error) | ||
1722 | return -EIO; | ||
1723 | |||
1724 | boot_block = &msb->boot_page[0]; | ||
1725 | |||
1726 | /* Save intersting attributes from boot page */ | ||
1727 | msb->block_count = boot_block->attr.number_of_blocks; | ||
1728 | msb->page_size = boot_block->attr.page_size; | ||
1729 | |||
1730 | msb->pages_in_block = boot_block->attr.block_size * 2; | ||
1731 | msb->block_size = msb->page_size * msb->pages_in_block; | ||
1732 | |||
1733 | if (msb->page_size > PAGE_SIZE) { | ||
1734 | /* this isn't supported by linux at all, anyway*/ | ||
1735 | dbg("device page %d size isn't supported", msb->page_size); | ||
1736 | return -EINVAL; | ||
1737 | } | ||
1738 | |||
1739 | msb->block_buffer = kzalloc(msb->block_size, GFP_KERNEL); | ||
1740 | if (!msb->block_buffer) | ||
1741 | return -ENOMEM; | ||
1742 | |||
1743 | raw_size_in_megs = (msb->block_size * msb->block_count) >> 20; | ||
1744 | |||
1745 | for (i = 0; chs_table[i].size; i++) { | ||
1746 | |||
1747 | if (chs_table[i].size != raw_size_in_megs) | ||
1748 | continue; | ||
1749 | |||
1750 | msb->geometry.cylinders = chs_table[i].cyl; | ||
1751 | msb->geometry.heads = chs_table[i].head; | ||
1752 | msb->geometry.sectors = chs_table[i].sec; | ||
1753 | break; | ||
1754 | } | ||
1755 | |||
1756 | if (boot_block->attr.transfer_supporting == 1) | ||
1757 | msb->caps |= MEMSTICK_CAP_PAR4; | ||
1758 | |||
1759 | if (boot_block->attr.device_type & 0x03) | ||
1760 | msb->read_only = true; | ||
1761 | |||
1762 | dbg("Total block count = %d", msb->block_count); | ||
1763 | dbg("Each block consists of %d pages", msb->pages_in_block); | ||
1764 | dbg("Page size = %d bytes", msb->page_size); | ||
1765 | dbg("Parallel mode supported: %d", !!(msb->caps & MEMSTICK_CAP_PAR4)); | ||
1766 | dbg("Read only: %d", msb->read_only); | ||
1767 | |||
1768 | #if 0 | ||
1769 | /* Now we can switch the interface */ | ||
1770 | if (host->caps & msb->caps & MEMSTICK_CAP_PAR4) | ||
1771 | msb_switch_to_parallel(msb); | ||
1772 | #endif | ||
1773 | |||
1774 | error = msb_cache_init(msb); | ||
1775 | if (error) | ||
1776 | return error; | ||
1777 | |||
1778 | error = msb_ftl_initialize(msb); | ||
1779 | if (error) | ||
1780 | return error; | ||
1781 | |||
1782 | |||
1783 | /* Read the bad block table */ | ||
1784 | error = msb_read_bad_block_table(msb, 0); | ||
1785 | |||
1786 | if (error && error != -ENOMEM) { | ||
1787 | dbg("failed to read bad block table from primary boot block, trying from backup"); | ||
1788 | error = msb_read_bad_block_table(msb, 1); | ||
1789 | } | ||
1790 | |||
1791 | if (error) | ||
1792 | return error; | ||
1793 | |||
1794 | /* *drum roll* Scan the media */ | ||
1795 | error = msb_ftl_scan(msb); | ||
1796 | if (error) { | ||
1797 | pr_err("Scan of media failed"); | ||
1798 | return error; | ||
1799 | } | ||
1800 | |||
1801 | return 0; | ||
1802 | |||
1803 | } | ||
1804 | |||
1805 | static int msb_do_write_request(struct msb_data *msb, int lba, | ||
1806 | int page, struct scatterlist *sg, size_t len, int *sucessfuly_written) | ||
1807 | { | ||
1808 | int error = 0; | ||
1809 | off_t offset = 0; | ||
1810 | *sucessfuly_written = 0; | ||
1811 | |||
1812 | while (offset < len) { | ||
1813 | if (page == 0 && len - offset >= msb->block_size) { | ||
1814 | |||
1815 | if (msb->cache_block_lba == lba) | ||
1816 | msb_cache_discard(msb); | ||
1817 | |||
1818 | dbg_verbose("Writing whole lba %d", lba); | ||
1819 | error = msb_update_block(msb, lba, sg, offset); | ||
1820 | if (error) | ||
1821 | return error; | ||
1822 | |||
1823 | offset += msb->block_size; | ||
1824 | *sucessfuly_written += msb->block_size; | ||
1825 | lba++; | ||
1826 | continue; | ||
1827 | } | ||
1828 | |||
1829 | error = msb_cache_write(msb, lba, page, false, sg, offset); | ||
1830 | if (error) | ||
1831 | return error; | ||
1832 | |||
1833 | offset += msb->page_size; | ||
1834 | *sucessfuly_written += msb->page_size; | ||
1835 | |||
1836 | page++; | ||
1837 | if (page == msb->pages_in_block) { | ||
1838 | page = 0; | ||
1839 | lba++; | ||
1840 | } | ||
1841 | } | ||
1842 | return 0; | ||
1843 | } | ||
1844 | |||
1845 | static int msb_do_read_request(struct msb_data *msb, int lba, | ||
1846 | int page, struct scatterlist *sg, int len, int *sucessfuly_read) | ||
1847 | { | ||
1848 | int error = 0; | ||
1849 | int offset = 0; | ||
1850 | *sucessfuly_read = 0; | ||
1851 | |||
1852 | while (offset < len) { | ||
1853 | |||
1854 | error = msb_cache_read(msb, lba, page, sg, offset); | ||
1855 | if (error) | ||
1856 | return error; | ||
1857 | |||
1858 | offset += msb->page_size; | ||
1859 | *sucessfuly_read += msb->page_size; | ||
1860 | |||
1861 | page++; | ||
1862 | if (page == msb->pages_in_block) { | ||
1863 | page = 0; | ||
1864 | lba++; | ||
1865 | } | ||
1866 | } | ||
1867 | return 0; | ||
1868 | } | ||
1869 | |||
1870 | static void msb_io_work(struct work_struct *work) | ||
1871 | { | ||
1872 | struct msb_data *msb = container_of(work, struct msb_data, io_work); | ||
1873 | int page, error, len; | ||
1874 | sector_t lba; | ||
1875 | unsigned long flags; | ||
1876 | struct scatterlist *sg = msb->prealloc_sg; | ||
1877 | |||
1878 | dbg_verbose("IO: work started"); | ||
1879 | |||
1880 | while (1) { | ||
1881 | spin_lock_irqsave(&msb->q_lock, flags); | ||
1882 | |||
1883 | if (msb->need_flush_cache) { | ||
1884 | msb->need_flush_cache = false; | ||
1885 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
1886 | msb_cache_flush(msb); | ||
1887 | continue; | ||
1888 | } | ||
1889 | |||
1890 | if (!msb->req) { | ||
1891 | msb->req = blk_fetch_request(msb->queue); | ||
1892 | if (!msb->req) { | ||
1893 | dbg_verbose("IO: no more requests exiting"); | ||
1894 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
1895 | return; | ||
1896 | } | ||
1897 | } | ||
1898 | |||
1899 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
1900 | |||
1901 | /* If card was removed meanwhile */ | ||
1902 | if (!msb->req) | ||
1903 | return; | ||
1904 | |||
1905 | /* process the request */ | ||
1906 | dbg_verbose("IO: processing new request"); | ||
1907 | blk_rq_map_sg(msb->queue, msb->req, sg); | ||
1908 | |||
1909 | lba = blk_rq_pos(msb->req); | ||
1910 | |||
1911 | sector_div(lba, msb->page_size / 512); | ||
1912 | page = do_div(lba, msb->pages_in_block); | ||
1913 | |||
1914 | if (rq_data_dir(msb->req) == READ) | ||
1915 | error = msb_do_read_request(msb, lba, page, sg, | ||
1916 | blk_rq_bytes(msb->req), &len); | ||
1917 | else | ||
1918 | error = msb_do_write_request(msb, lba, page, sg, | ||
1919 | blk_rq_bytes(msb->req), &len); | ||
1920 | |||
1921 | spin_lock_irqsave(&msb->q_lock, flags); | ||
1922 | |||
1923 | if (len) | ||
1924 | if (!__blk_end_request(msb->req, 0, len)) | ||
1925 | msb->req = NULL; | ||
1926 | |||
1927 | if (error && msb->req) { | ||
1928 | dbg_verbose("IO: ending one sector of the request with error"); | ||
1929 | if (!__blk_end_request(msb->req, error, msb->page_size)) | ||
1930 | msb->req = NULL; | ||
1931 | } | ||
1932 | |||
1933 | if (msb->req) | ||
1934 | dbg_verbose("IO: request still pending"); | ||
1935 | |||
1936 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
1937 | } | ||
1938 | } | ||
1939 | |||
1940 | static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */ | ||
1941 | static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */ | ||
1942 | |||
1943 | static int msb_bd_open(struct block_device *bdev, fmode_t mode) | ||
1944 | { | ||
1945 | struct gendisk *disk = bdev->bd_disk; | ||
1946 | struct msb_data *msb = disk->private_data; | ||
1947 | |||
1948 | dbg_verbose("block device open"); | ||
1949 | |||
1950 | mutex_lock(&msb_disk_lock); | ||
1951 | |||
1952 | if (msb && msb->card) | ||
1953 | msb->usage_count++; | ||
1954 | |||
1955 | mutex_unlock(&msb_disk_lock); | ||
1956 | return 0; | ||
1957 | } | ||
1958 | |||
1959 | static void msb_data_clear(struct msb_data *msb) | ||
1960 | { | ||
1961 | kfree(msb->boot_page); | ||
1962 | kfree(msb->used_blocks_bitmap); | ||
1963 | kfree(msb->lba_to_pba_table); | ||
1964 | kfree(msb->cache); | ||
1965 | msb->card = NULL; | ||
1966 | } | ||
1967 | |||
1968 | static int msb_disk_release(struct gendisk *disk) | ||
1969 | { | ||
1970 | struct msb_data *msb = disk->private_data; | ||
1971 | |||
1972 | dbg_verbose("block device release"); | ||
1973 | mutex_lock(&msb_disk_lock); | ||
1974 | |||
1975 | if (msb) { | ||
1976 | if (msb->usage_count) | ||
1977 | msb->usage_count--; | ||
1978 | |||
1979 | if (!msb->usage_count) { | ||
1980 | disk->private_data = NULL; | ||
1981 | idr_remove(&msb_disk_idr, msb->disk_id); | ||
1982 | put_disk(disk); | ||
1983 | kfree(msb); | ||
1984 | } | ||
1985 | } | ||
1986 | mutex_unlock(&msb_disk_lock); | ||
1987 | return 0; | ||
1988 | } | ||
1989 | |||
1990 | static void msb_bd_release(struct gendisk *disk, fmode_t mode) | ||
1991 | { | ||
1992 | msb_disk_release(disk); | ||
1993 | } | ||
1994 | |||
1995 | static int msb_bd_getgeo(struct block_device *bdev, | ||
1996 | struct hd_geometry *geo) | ||
1997 | { | ||
1998 | struct msb_data *msb = bdev->bd_disk->private_data; | ||
1999 | *geo = msb->geometry; | ||
2000 | return 0; | ||
2001 | } | ||
2002 | |||
2003 | static int msb_prepare_req(struct request_queue *q, struct request *req) | ||
2004 | { | ||
2005 | if (req->cmd_type != REQ_TYPE_FS && | ||
2006 | req->cmd_type != REQ_TYPE_BLOCK_PC) { | ||
2007 | blk_dump_rq_flags(req, "MS unsupported request"); | ||
2008 | return BLKPREP_KILL; | ||
2009 | } | ||
2010 | req->cmd_flags |= REQ_DONTPREP; | ||
2011 | return BLKPREP_OK; | ||
2012 | } | ||
2013 | |||
2014 | static void msb_submit_req(struct request_queue *q) | ||
2015 | { | ||
2016 | struct memstick_dev *card = q->queuedata; | ||
2017 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2018 | struct request *req = NULL; | ||
2019 | |||
2020 | dbg_verbose("Submit request"); | ||
2021 | |||
2022 | if (msb->card_dead) { | ||
2023 | dbg("Refusing requests on removed card"); | ||
2024 | |||
2025 | WARN_ON(!msb->io_queue_stopped); | ||
2026 | |||
2027 | while ((req = blk_fetch_request(q)) != NULL) | ||
2028 | __blk_end_request_all(req, -ENODEV); | ||
2029 | return; | ||
2030 | } | ||
2031 | |||
2032 | if (msb->req) | ||
2033 | return; | ||
2034 | |||
2035 | if (!msb->io_queue_stopped) | ||
2036 | queue_work(msb->io_queue, &msb->io_work); | ||
2037 | } | ||
2038 | |||
2039 | static int msb_check_card(struct memstick_dev *card) | ||
2040 | { | ||
2041 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2042 | return (msb->card_dead == 0); | ||
2043 | } | ||
2044 | |||
2045 | static void msb_stop(struct memstick_dev *card) | ||
2046 | { | ||
2047 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2048 | unsigned long flags; | ||
2049 | |||
2050 | dbg("Stopping all msblock IO"); | ||
2051 | |||
2052 | spin_lock_irqsave(&msb->q_lock, flags); | ||
2053 | blk_stop_queue(msb->queue); | ||
2054 | msb->io_queue_stopped = true; | ||
2055 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2056 | |||
2057 | del_timer_sync(&msb->cache_flush_timer); | ||
2058 | flush_workqueue(msb->io_queue); | ||
2059 | |||
2060 | if (msb->req) { | ||
2061 | spin_lock_irqsave(&msb->q_lock, flags); | ||
2062 | blk_requeue_request(msb->queue, msb->req); | ||
2063 | msb->req = NULL; | ||
2064 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2065 | } | ||
2066 | |||
2067 | } | ||
2068 | |||
2069 | static void msb_start(struct memstick_dev *card) | ||
2070 | { | ||
2071 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2072 | unsigned long flags; | ||
2073 | |||
2074 | dbg("Resuming IO from msblock"); | ||
2075 | |||
2076 | msb_invalidate_reg_window(msb); | ||
2077 | |||
2078 | spin_lock_irqsave(&msb->q_lock, flags); | ||
2079 | if (!msb->io_queue_stopped || msb->card_dead) { | ||
2080 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2081 | return; | ||
2082 | } | ||
2083 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2084 | |||
2085 | /* Kick cache flush anyway, its harmless */ | ||
2086 | msb->need_flush_cache = true; | ||
2087 | msb->io_queue_stopped = false; | ||
2088 | |||
2089 | spin_lock_irqsave(&msb->q_lock, flags); | ||
2090 | blk_start_queue(msb->queue); | ||
2091 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2092 | |||
2093 | queue_work(msb->io_queue, &msb->io_work); | ||
2094 | |||
2095 | } | ||
2096 | |||
2097 | static const struct block_device_operations msb_bdops = { | ||
2098 | .open = msb_bd_open, | ||
2099 | .release = msb_bd_release, | ||
2100 | .getgeo = msb_bd_getgeo, | ||
2101 | .owner = THIS_MODULE | ||
2102 | }; | ||
2103 | |||
2104 | /* Registers the block device */ | ||
2105 | static int msb_init_disk(struct memstick_dev *card) | ||
2106 | { | ||
2107 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2108 | struct memstick_host *host = card->host; | ||
2109 | int rc; | ||
2110 | u64 limit = BLK_BOUNCE_HIGH; | ||
2111 | unsigned long capacity; | ||
2112 | |||
2113 | if (host->dev.dma_mask && *(host->dev.dma_mask)) | ||
2114 | limit = *(host->dev.dma_mask); | ||
2115 | |||
2116 | mutex_lock(&msb_disk_lock); | ||
2117 | msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL); | ||
2118 | mutex_unlock(&msb_disk_lock); | ||
2119 | |||
2120 | if (msb->disk_id < 0) | ||
2121 | return msb->disk_id; | ||
2122 | |||
2123 | msb->disk = alloc_disk(0); | ||
2124 | if (!msb->disk) { | ||
2125 | rc = -ENOMEM; | ||
2126 | goto out_release_id; | ||
2127 | } | ||
2128 | |||
2129 | msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock); | ||
2130 | if (!msb->queue) { | ||
2131 | rc = -ENOMEM; | ||
2132 | goto out_put_disk; | ||
2133 | } | ||
2134 | |||
2135 | msb->queue->queuedata = card; | ||
2136 | blk_queue_prep_rq(msb->queue, msb_prepare_req); | ||
2137 | |||
2138 | blk_queue_bounce_limit(msb->queue, limit); | ||
2139 | blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); | ||
2140 | blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS); | ||
2141 | blk_queue_max_segment_size(msb->queue, | ||
2142 | MS_BLOCK_MAX_PAGES * msb->page_size); | ||
2143 | blk_queue_logical_block_size(msb->queue, msb->page_size); | ||
2144 | |||
2145 | sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id); | ||
2146 | msb->disk->fops = &msb_bdops; | ||
2147 | msb->disk->private_data = msb; | ||
2148 | msb->disk->queue = msb->queue; | ||
2149 | msb->disk->driverfs_dev = &card->dev; | ||
2150 | msb->disk->flags |= GENHD_FL_EXT_DEVT; | ||
2151 | |||
2152 | capacity = msb->pages_in_block * msb->logical_block_count; | ||
2153 | capacity *= (msb->page_size / 512); | ||
2154 | set_capacity(msb->disk, capacity); | ||
2155 | dbg("Set total disk size to %lu sectors", capacity); | ||
2156 | |||
2157 | msb->usage_count = 1; | ||
2158 | msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM); | ||
2159 | INIT_WORK(&msb->io_work, msb_io_work); | ||
2160 | sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); | ||
2161 | |||
2162 | if (msb->read_only) | ||
2163 | set_disk_ro(msb->disk, 1); | ||
2164 | |||
2165 | msb_start(card); | ||
2166 | add_disk(msb->disk); | ||
2167 | dbg("Disk added"); | ||
2168 | return 0; | ||
2169 | |||
2170 | out_put_disk: | ||
2171 | put_disk(msb->disk); | ||
2172 | out_release_id: | ||
2173 | mutex_lock(&msb_disk_lock); | ||
2174 | idr_remove(&msb_disk_idr, msb->disk_id); | ||
2175 | mutex_unlock(&msb_disk_lock); | ||
2176 | return rc; | ||
2177 | } | ||
2178 | |||
2179 | static int msb_probe(struct memstick_dev *card) | ||
2180 | { | ||
2181 | struct msb_data *msb; | ||
2182 | int rc = 0; | ||
2183 | |||
2184 | msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL); | ||
2185 | if (!msb) | ||
2186 | return -ENOMEM; | ||
2187 | memstick_set_drvdata(card, msb); | ||
2188 | msb->card = card; | ||
2189 | spin_lock_init(&msb->q_lock); | ||
2190 | |||
2191 | rc = msb_init_card(card); | ||
2192 | if (rc) | ||
2193 | goto out_free; | ||
2194 | |||
2195 | rc = msb_init_disk(card); | ||
2196 | if (!rc) { | ||
2197 | card->check = msb_check_card; | ||
2198 | card->stop = msb_stop; | ||
2199 | card->start = msb_start; | ||
2200 | return 0; | ||
2201 | } | ||
2202 | out_free: | ||
2203 | memstick_set_drvdata(card, NULL); | ||
2204 | msb_data_clear(msb); | ||
2205 | kfree(msb); | ||
2206 | return rc; | ||
2207 | } | ||
2208 | |||
2209 | static void msb_remove(struct memstick_dev *card) | ||
2210 | { | ||
2211 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2212 | unsigned long flags; | ||
2213 | |||
2214 | if (!msb->io_queue_stopped) | ||
2215 | msb_stop(card); | ||
2216 | |||
2217 | dbg("Removing the disk device"); | ||
2218 | |||
2219 | /* Take care of unhandled + new requests from now on */ | ||
2220 | spin_lock_irqsave(&msb->q_lock, flags); | ||
2221 | msb->card_dead = true; | ||
2222 | blk_start_queue(msb->queue); | ||
2223 | spin_unlock_irqrestore(&msb->q_lock, flags); | ||
2224 | |||
2225 | /* Remove the disk */ | ||
2226 | del_gendisk(msb->disk); | ||
2227 | blk_cleanup_queue(msb->queue); | ||
2228 | msb->queue = NULL; | ||
2229 | |||
2230 | mutex_lock(&msb_disk_lock); | ||
2231 | msb_data_clear(msb); | ||
2232 | mutex_unlock(&msb_disk_lock); | ||
2233 | |||
2234 | msb_disk_release(msb->disk); | ||
2235 | memstick_set_drvdata(card, NULL); | ||
2236 | } | ||
2237 | |||
2238 | #ifdef CONFIG_PM | ||
2239 | |||
2240 | static int msb_suspend(struct memstick_dev *card, pm_message_t state) | ||
2241 | { | ||
2242 | msb_stop(card); | ||
2243 | return 0; | ||
2244 | } | ||
2245 | |||
2246 | static int msb_resume(struct memstick_dev *card) | ||
2247 | { | ||
2248 | struct msb_data *msb = memstick_get_drvdata(card); | ||
2249 | struct msb_data *new_msb = NULL; | ||
2250 | bool card_dead = true; | ||
2251 | |||
2252 | #ifndef CONFIG_MEMSTICK_UNSAFE_RESUME | ||
2253 | msb->card_dead = true; | ||
2254 | return 0; | ||
2255 | #endif | ||
2256 | mutex_lock(&card->host->lock); | ||
2257 | |||
2258 | new_msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL); | ||
2259 | if (!new_msb) | ||
2260 | goto out; | ||
2261 | |||
2262 | new_msb->card = card; | ||
2263 | memstick_set_drvdata(card, new_msb); | ||
2264 | spin_lock_init(&new_msb->q_lock); | ||
2265 | sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); | ||
2266 | |||
2267 | if (msb_init_card(card)) | ||
2268 | goto out; | ||
2269 | |||
2270 | if (msb->block_size != new_msb->block_size) | ||
2271 | goto out; | ||
2272 | |||
2273 | if (memcmp(msb->boot_page, new_msb->boot_page, | ||
2274 | sizeof(struct ms_boot_page))) | ||
2275 | goto out; | ||
2276 | |||
2277 | if (msb->logical_block_count != new_msb->logical_block_count || | ||
2278 | memcmp(msb->lba_to_pba_table, new_msb->lba_to_pba_table, | ||
2279 | msb->logical_block_count)) | ||
2280 | goto out; | ||
2281 | |||
2282 | if (msb->block_count != new_msb->block_count || | ||
2283 | memcmp(msb->used_blocks_bitmap, new_msb->used_blocks_bitmap, | ||
2284 | msb->block_count / 8)) | ||
2285 | goto out; | ||
2286 | |||
2287 | card_dead = false; | ||
2288 | out: | ||
2289 | if (card_dead) | ||
2290 | dbg("Card was removed/replaced during suspend"); | ||
2291 | |||
2292 | msb->card_dead = card_dead; | ||
2293 | memstick_set_drvdata(card, msb); | ||
2294 | |||
2295 | if (new_msb) { | ||
2296 | msb_data_clear(new_msb); | ||
2297 | kfree(new_msb); | ||
2298 | } | ||
2299 | |||
2300 | msb_start(card); | ||
2301 | mutex_unlock(&card->host->lock); | ||
2302 | return 0; | ||
2303 | } | ||
2304 | #else | ||
2305 | |||
2306 | #define msb_suspend NULL | ||
2307 | #define msb_resume NULL | ||
2308 | |||
2309 | #endif /* CONFIG_PM */ | ||
2310 | |||
2311 | static struct memstick_device_id msb_id_tbl[] = { | ||
2312 | {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, | ||
2313 | MEMSTICK_CLASS_FLASH}, | ||
2314 | |||
2315 | {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, | ||
2316 | MEMSTICK_CLASS_ROM}, | ||
2317 | |||
2318 | {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, | ||
2319 | MEMSTICK_CLASS_RO}, | ||
2320 | |||
2321 | {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, | ||
2322 | MEMSTICK_CLASS_WP}, | ||
2323 | |||
2324 | {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_DUO, MEMSTICK_CATEGORY_STORAGE_DUO, | ||
2325 | MEMSTICK_CLASS_DUO}, | ||
2326 | {} | ||
2327 | }; | ||
2328 | MODULE_DEVICE_TABLE(memstick, msb_id_tbl); | ||
2329 | |||
2330 | |||
2331 | static struct memstick_driver msb_driver = { | ||
2332 | .driver = { | ||
2333 | .name = DRIVER_NAME, | ||
2334 | .owner = THIS_MODULE | ||
2335 | }, | ||
2336 | .id_table = msb_id_tbl, | ||
2337 | .probe = msb_probe, | ||
2338 | .remove = msb_remove, | ||
2339 | .suspend = msb_suspend, | ||
2340 | .resume = msb_resume | ||
2341 | }; | ||
2342 | |||
2343 | static int major; | ||
2344 | |||
2345 | static int __init msb_init(void) | ||
2346 | { | ||
2347 | int rc = register_blkdev(0, DRIVER_NAME); | ||
2348 | |||
2349 | if (rc < 0) { | ||
2350 | pr_err("failed to register major (error %d)\n", rc); | ||
2351 | return rc; | ||
2352 | } | ||
2353 | |||
2354 | major = rc; | ||
2355 | rc = memstick_register_driver(&msb_driver); | ||
2356 | if (rc) { | ||
2357 | unregister_blkdev(major, DRIVER_NAME); | ||
2358 | pr_err("failed to register memstick driver (error %d)\n", rc); | ||
2359 | } | ||
2360 | |||
2361 | return rc; | ||
2362 | } | ||
2363 | |||
2364 | static void __exit msb_exit(void) | ||
2365 | { | ||
2366 | memstick_unregister_driver(&msb_driver); | ||
2367 | unregister_blkdev(major, DRIVER_NAME); | ||
2368 | idr_destroy(&msb_disk_idr); | ||
2369 | } | ||
2370 | |||
2371 | module_init(msb_init); | ||
2372 | module_exit(msb_exit); | ||
2373 | |||
2374 | module_param(cache_flush_timeout, int, S_IRUGO); | ||
2375 | MODULE_PARM_DESC(cache_flush_timeout, | ||
2376 | "Cache flush timeout in msec (1000 default)"); | ||
2377 | module_param(debug, int, S_IRUGO | S_IWUSR); | ||
2378 | MODULE_PARM_DESC(debug, "Debug level (0-2)"); | ||
2379 | |||
2380 | module_param(verify_writes, bool, S_IRUGO); | ||
2381 | MODULE_PARM_DESC(verify_writes, "Read back and check all data that is written"); | ||
2382 | |||
2383 | MODULE_LICENSE("GPL"); | ||
2384 | MODULE_AUTHOR("Maxim Levitsky"); | ||
2385 | MODULE_DESCRIPTION("Sony MemoryStick block device driver"); | ||
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h new file mode 100644 index 000000000000..96e637550988 --- /dev/null +++ b/drivers/memstick/core/ms_block.h | |||
@@ -0,0 +1,290 @@ | |||
1 | /* | ||
2 | * ms_block.h - Sony MemoryStick (legacy) storage support | ||
3 | |||
4 | * Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * Minor portions of the driver are copied from mspro_block.c which is | ||
11 | * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com> | ||
12 | * | ||
13 | * Also ms structures were copied from old broken driver by same author | ||
14 | * These probably come from MS spec | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #ifndef MS_BLOCK_NEW_H | ||
19 | #define MS_BLOCK_NEW_H | ||
20 | |||
21 | #define MS_BLOCK_MAX_SEGS 32 | ||
22 | #define MS_BLOCK_MAX_PAGES ((2 << 16) - 1) | ||
23 | |||
24 | #define MS_BLOCK_MAX_BOOT_ADDR 0x000c | ||
25 | #define MS_BLOCK_BOOT_ID 0x0001 | ||
26 | #define MS_BLOCK_INVALID 0xffff | ||
27 | #define MS_MAX_ZONES 16 | ||
28 | #define MS_BLOCKS_IN_ZONE 512 | ||
29 | |||
30 | #define MS_BLOCK_MAP_LINE_SZ 16 | ||
31 | #define MS_BLOCK_PART_SHIFT 3 | ||
32 | |||
33 | |||
34 | #define MEMSTICK_UNCORR_ERROR (MEMSTICK_STATUS1_UCFG | \ | ||
35 | MEMSTICK_STATUS1_UCEX | MEMSTICK_STATUS1_UCDT) | ||
36 | |||
37 | #define MEMSTICK_CORR_ERROR (MEMSTICK_STATUS1_FGER | MEMSTICK_STATUS1_EXER | \ | ||
38 | MEMSTICK_STATUS1_DTER) | ||
39 | |||
40 | #define MEMSTICK_INT_ERROR (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR) | ||
41 | |||
42 | #define MEMSTICK_OVERWRITE_FLAG_NORMAL \ | ||
43 | (MEMSTICK_OVERWRITE_PGST1 | \ | ||
44 | MEMSTICK_OVERWRITE_PGST0 | \ | ||
45 | MEMSTICK_OVERWRITE_BKST) | ||
46 | |||
47 | #define MEMSTICK_OV_PG_NORMAL \ | ||
48 | (MEMSTICK_OVERWRITE_PGST1 | MEMSTICK_OVERWRITE_PGST0) | ||
49 | |||
50 | #define MEMSTICK_MANAGMENT_FLAG_NORMAL \ | ||
51 | (MEMSTICK_MANAGEMENT_SYSFLG | \ | ||
52 | MEMSTICK_MANAGEMENT_SCMS1 | \ | ||
53 | MEMSTICK_MANAGEMENT_SCMS0) \ | ||
54 | |||
55 | struct ms_boot_header { | ||
56 | unsigned short block_id; | ||
57 | unsigned short format_reserved; | ||
58 | unsigned char reserved0[184]; | ||
59 | unsigned char data_entry; | ||
60 | unsigned char reserved1[179]; | ||
61 | } __packed; | ||
62 | |||
63 | |||
64 | struct ms_system_item { | ||
65 | unsigned int start_addr; | ||
66 | unsigned int data_size; | ||
67 | unsigned char data_type_id; | ||
68 | unsigned char reserved[3]; | ||
69 | } __packed; | ||
70 | |||
71 | struct ms_system_entry { | ||
72 | struct ms_system_item disabled_block; | ||
73 | struct ms_system_item cis_idi; | ||
74 | unsigned char reserved[24]; | ||
75 | } __packed; | ||
76 | |||
77 | struct ms_boot_attr_info { | ||
78 | unsigned char memorystick_class; | ||
79 | unsigned char format_unique_value1; | ||
80 | unsigned short block_size; | ||
81 | unsigned short number_of_blocks; | ||
82 | unsigned short number_of_effective_blocks; | ||
83 | unsigned short page_size; | ||
84 | unsigned char extra_data_size; | ||
85 | unsigned char format_unique_value2; | ||
86 | unsigned char assembly_time[8]; | ||
87 | unsigned char format_unique_value3; | ||
88 | unsigned char serial_number[3]; | ||
89 | unsigned char assembly_manufacturer_code; | ||
90 | unsigned char assembly_model_code[3]; | ||
91 | unsigned short memory_manufacturer_code; | ||
92 | unsigned short memory_device_code; | ||
93 | unsigned short implemented_capacity; | ||
94 | unsigned char format_unique_value4[2]; | ||
95 | unsigned char vcc; | ||
96 | unsigned char vpp; | ||
97 | unsigned short controller_number; | ||
98 | unsigned short controller_function; | ||
99 | unsigned char reserved0[9]; | ||
100 | unsigned char transfer_supporting; | ||
101 | unsigned short format_unique_value5; | ||
102 | unsigned char format_type; | ||
103 | unsigned char memorystick_application; | ||
104 | unsigned char device_type; | ||
105 | unsigned char reserved1[22]; | ||
106 | unsigned char format_uniqure_value6[2]; | ||
107 | unsigned char reserved2[15]; | ||
108 | } __packed; | ||
109 | |||
110 | struct ms_cis_idi { | ||
111 | unsigned short general_config; | ||
112 | unsigned short logical_cylinders; | ||
113 | unsigned short reserved0; | ||
114 | unsigned short logical_heads; | ||
115 | unsigned short track_size; | ||
116 | unsigned short page_size; | ||
117 | unsigned short pages_per_track; | ||
118 | unsigned short msw; | ||
119 | unsigned short lsw; | ||
120 | unsigned short reserved1; | ||
121 | unsigned char serial_number[20]; | ||
122 | unsigned short buffer_type; | ||
123 | unsigned short buffer_size_increments; | ||
124 | unsigned short long_command_ecc; | ||
125 | unsigned char firmware_version[28]; | ||
126 | unsigned char model_name[18]; | ||
127 | unsigned short reserved2[5]; | ||
128 | unsigned short pio_mode_number; | ||
129 | unsigned short dma_mode_number; | ||
130 | unsigned short field_validity; | ||
131 | unsigned short current_logical_cylinders; | ||
132 | unsigned short current_logical_heads; | ||
133 | unsigned short current_pages_per_track; | ||
134 | unsigned int current_page_capacity; | ||
135 | unsigned short mutiple_page_setting; | ||
136 | unsigned int addressable_pages; | ||
137 | unsigned short single_word_dma; | ||
138 | unsigned short multi_word_dma; | ||
139 | unsigned char reserved3[128]; | ||
140 | } __packed; | ||
141 | |||
142 | |||
143 | struct ms_boot_page { | ||
144 | struct ms_boot_header header; | ||
145 | struct ms_system_entry entry; | ||
146 | struct ms_boot_attr_info attr; | ||
147 | } __packed; | ||
148 | |||
149 | struct msb_data { | ||
150 | unsigned int usage_count; | ||
151 | struct memstick_dev *card; | ||
152 | struct gendisk *disk; | ||
153 | struct request_queue *queue; | ||
154 | spinlock_t q_lock; | ||
155 | struct hd_geometry geometry; | ||
156 | struct attribute_group attr_group; | ||
157 | struct request *req; | ||
158 | int caps; | ||
159 | int disk_id; | ||
160 | |||
161 | /* IO */ | ||
162 | struct workqueue_struct *io_queue; | ||
163 | bool io_queue_stopped; | ||
164 | struct work_struct io_work; | ||
165 | bool card_dead; | ||
166 | |||
167 | /* Media properties */ | ||
168 | struct ms_boot_page *boot_page; | ||
169 | u16 boot_block_locations[2]; | ||
170 | int boot_block_count; | ||
171 | |||
172 | bool read_only; | ||
173 | unsigned short page_size; | ||
174 | int block_size; | ||
175 | int pages_in_block; | ||
176 | int zone_count; | ||
177 | int block_count; | ||
178 | int logical_block_count; | ||
179 | |||
180 | /* FTL tables */ | ||
181 | unsigned long *used_blocks_bitmap; | ||
182 | unsigned long *erased_blocks_bitmap; | ||
183 | u16 *lba_to_pba_table; | ||
184 | int free_block_count[MS_MAX_ZONES]; | ||
185 | bool ftl_initialized; | ||
186 | |||
187 | /* Cache */ | ||
188 | unsigned char *cache; | ||
189 | unsigned long valid_cache_bitmap; | ||
190 | int cache_block_lba; | ||
191 | bool need_flush_cache; | ||
192 | struct timer_list cache_flush_timer; | ||
193 | |||
194 | /* Preallocated buffers */ | ||
195 | unsigned char *block_buffer; | ||
196 | struct scatterlist prealloc_sg[MS_BLOCK_MAX_SEGS+1]; | ||
197 | |||
198 | |||
199 | /* handler's local data */ | ||
200 | struct ms_register_addr reg_addr; | ||
201 | bool addr_valid; | ||
202 | |||
203 | u8 command_value; | ||
204 | bool command_need_oob; | ||
205 | struct scatterlist *current_sg; | ||
206 | int current_sg_offset; | ||
207 | |||
208 | struct ms_register regs; | ||
209 | int current_page; | ||
210 | |||
211 | int state; | ||
212 | int exit_error; | ||
213 | bool int_polling; | ||
214 | unsigned long int_timeout; | ||
215 | |||
216 | }; | ||
217 | |||
218 | enum msb_readpage_states { | ||
219 | MSB_RP_SEND_BLOCK_ADDRESS = 0, | ||
220 | MSB_RP_SEND_READ_COMMAND, | ||
221 | |||
222 | MSB_RP_SEND_INT_REQ, | ||
223 | MSB_RP_RECEIVE_INT_REQ_RESULT, | ||
224 | |||
225 | MSB_RP_SEND_READ_STATUS_REG, | ||
226 | MSB_RP_RECIVE_STATUS_REG, | ||
227 | |||
228 | MSB_RP_SEND_OOB_READ, | ||
229 | MSB_RP_RECEIVE_OOB_READ, | ||
230 | |||
231 | MSB_RP_SEND_READ_DATA, | ||
232 | MSB_RP_RECEIVE_READ_DATA, | ||
233 | }; | ||
234 | |||
235 | enum msb_write_block_states { | ||
236 | MSB_WB_SEND_WRITE_PARAMS = 0, | ||
237 | MSB_WB_SEND_WRITE_OOB, | ||
238 | MSB_WB_SEND_WRITE_COMMAND, | ||
239 | |||
240 | MSB_WB_SEND_INT_REQ, | ||
241 | MSB_WB_RECEIVE_INT_REQ, | ||
242 | |||
243 | MSB_WB_SEND_WRITE_DATA, | ||
244 | MSB_WB_RECEIVE_WRITE_CONFIRMATION, | ||
245 | }; | ||
246 | |||
247 | enum msb_send_command_states { | ||
248 | MSB_SC_SEND_WRITE_PARAMS, | ||
249 | MSB_SC_SEND_WRITE_OOB, | ||
250 | MSB_SC_SEND_COMMAND, | ||
251 | |||
252 | MSB_SC_SEND_INT_REQ, | ||
253 | MSB_SC_RECEIVE_INT_REQ, | ||
254 | |||
255 | }; | ||
256 | |||
257 | enum msb_reset_states { | ||
258 | MSB_RS_SEND, | ||
259 | MSB_RS_CONFIRM, | ||
260 | }; | ||
261 | |||
262 | enum msb_par_switch_states { | ||
263 | MSB_PS_SEND_SWITCH_COMMAND, | ||
264 | MSB_PS_SWICH_HOST, | ||
265 | MSB_PS_CONFIRM, | ||
266 | }; | ||
267 | |||
268 | struct chs_entry { | ||
269 | unsigned long size; | ||
270 | unsigned char sec; | ||
271 | unsigned short cyl; | ||
272 | unsigned char head; | ||
273 | }; | ||
274 | |||
275 | static int msb_reset(struct msb_data *msb, bool full); | ||
276 | |||
277 | static int h_msb_default_bad(struct memstick_dev *card, | ||
278 | struct memstick_request **mrq); | ||
279 | |||
280 | #define __dbg(level, format, ...) \ | ||
281 | do { \ | ||
282 | if (debug >= level) \ | ||
283 | pr_err(format "\n", ## __VA_ARGS__); \ | ||
284 | } while (0) | ||
285 | |||
286 | |||
287 | #define dbg(format, ...) __dbg(1, format, ## __VA_ARGS__) | ||
288 | #define dbg_verbose(format, ...) __dbg(2, format, ## __VA_ARGS__) | ||
289 | |||
290 | #endif | ||
diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c index cf8bd727dfc7..25f8f93decb6 100644 --- a/drivers/memstick/host/rtsx_pci_ms.c +++ b/drivers/memstick/host/rtsx_pci_ms.c | |||
@@ -612,8 +612,6 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev) | |||
612 | memstick_remove_host(msh); | 612 | memstick_remove_host(msh); |
613 | memstick_free_host(msh); | 613 | memstick_free_host(msh); |
614 | 614 | ||
615 | platform_set_drvdata(pdev, NULL); | ||
616 | |||
617 | dev_dbg(&(pdev->dev), | 615 | dev_dbg(&(pdev->dev), |
618 | ": Realtek PCI-E Memstick controller has been removed\n"); | 616 | ": Realtek PCI-E Memstick controller has been removed\n"); |
619 | 617 | ||
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index f74bfcbb7bad..8eea2efbbb6d 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c | |||
@@ -393,17 +393,21 @@ static void gmux_notify_handler(acpi_handle device, u32 value, void *context) | |||
393 | complete(&gmux_data->powerchange_done); | 393 | complete(&gmux_data->powerchange_done); |
394 | } | 394 | } |
395 | 395 | ||
396 | static int gmux_suspend(struct pnp_dev *pnp, pm_message_t state) | 396 | static int gmux_suspend(struct device *dev) |
397 | { | 397 | { |
398 | struct pnp_dev *pnp = to_pnp_dev(dev); | ||
398 | struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp); | 399 | struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp); |
400 | |||
399 | gmux_data->resume_client_id = gmux_active_client(gmux_data); | 401 | gmux_data->resume_client_id = gmux_active_client(gmux_data); |
400 | gmux_disable_interrupts(gmux_data); | 402 | gmux_disable_interrupts(gmux_data); |
401 | return 0; | 403 | return 0; |
402 | } | 404 | } |
403 | 405 | ||
404 | static int gmux_resume(struct pnp_dev *pnp) | 406 | static int gmux_resume(struct device *dev) |
405 | { | 407 | { |
408 | struct pnp_dev *pnp = to_pnp_dev(dev); | ||
406 | struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp); | 409 | struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp); |
410 | |||
407 | gmux_enable_interrupts(gmux_data); | 411 | gmux_enable_interrupts(gmux_data); |
408 | gmux_switchto(gmux_data->resume_client_id); | 412 | gmux_switchto(gmux_data->resume_client_id); |
409 | if (gmux_data->power_state == VGA_SWITCHEROO_OFF) | 413 | if (gmux_data->power_state == VGA_SWITCHEROO_OFF) |
@@ -605,13 +609,19 @@ static const struct pnp_device_id gmux_device_ids[] = { | |||
605 | {"", 0} | 609 | {"", 0} |
606 | }; | 610 | }; |
607 | 611 | ||
612 | static const struct dev_pm_ops gmux_dev_pm_ops = { | ||
613 | .suspend = gmux_suspend, | ||
614 | .resume = gmux_resume, | ||
615 | }; | ||
616 | |||
608 | static struct pnp_driver gmux_pnp_driver = { | 617 | static struct pnp_driver gmux_pnp_driver = { |
609 | .name = "apple-gmux", | 618 | .name = "apple-gmux", |
610 | .probe = gmux_probe, | 619 | .probe = gmux_probe, |
611 | .remove = gmux_remove, | 620 | .remove = gmux_remove, |
612 | .id_table = gmux_device_ids, | 621 | .id_table = gmux_device_ids, |
613 | .suspend = gmux_suspend, | 622 | .driver = { |
614 | .resume = gmux_resume | 623 | .pm = &gmux_dev_pm_ops, |
624 | }, | ||
615 | }; | 625 | }; |
616 | 626 | ||
617 | static int __init apple_gmux_init(void) | 627 | static int __init apple_gmux_init(void) |
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index 12adb43a0693..a39ee38a9414 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c | |||
@@ -163,6 +163,13 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state) | |||
163 | if (!pnp_drv) | 163 | if (!pnp_drv) |
164 | return 0; | 164 | return 0; |
165 | 165 | ||
166 | if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) { | ||
167 | error = pnp_drv->driver.pm->suspend(dev); | ||
168 | suspend_report_result(pnp_drv->driver.pm->suspend, error); | ||
169 | if (error) | ||
170 | return error; | ||
171 | } | ||
172 | |||
166 | if (pnp_drv->suspend) { | 173 | if (pnp_drv->suspend) { |
167 | error = pnp_drv->suspend(pnp_dev, state); | 174 | error = pnp_drv->suspend(pnp_dev, state); |
168 | if (error) | 175 | if (error) |
@@ -211,6 +218,12 @@ static int pnp_bus_resume(struct device *dev) | |||
211 | return error; | 218 | return error; |
212 | } | 219 | } |
213 | 220 | ||
221 | if (pnp_drv->driver.pm && pnp_drv->driver.pm->resume) { | ||
222 | error = pnp_drv->driver.pm->resume(dev); | ||
223 | if (error) | ||
224 | return error; | ||
225 | } | ||
226 | |||
214 | if (pnp_drv->resume) { | 227 | if (pnp_drv->resume) { |
215 | error = pnp_drv->resume(pnp_dev); | 228 | error = pnp_drv->resume(pnp_dev); |
216 | if (error) | 229 | if (error) |
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index eae0eda9ff39..9966124ad988 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c | |||
@@ -184,7 +184,6 @@ static int pps_gpio_remove(struct platform_device *pdev) | |||
184 | { | 184 | { |
185 | struct pps_gpio_device_data *data = platform_get_drvdata(pdev); | 185 | struct pps_gpio_device_data *data = platform_get_drvdata(pdev); |
186 | 186 | ||
187 | platform_set_drvdata(pdev, NULL); | ||
188 | pps_unregister_source(data->pps); | 187 | pps_unregister_source(data->pps); |
189 | dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq); | 188 | dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq); |
190 | return 0; | 189 | return 0; |
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 9e3498bf302b..9654aa3c05cb 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig | |||
@@ -1249,6 +1249,15 @@ config RTC_DRV_SIRFSOC | |||
1249 | Say "yes" here to support the real time clock on SiRF SOC chips. | 1249 | Say "yes" here to support the real time clock on SiRF SOC chips. |
1250 | This driver can also be built as a module called rtc-sirfsoc. | 1250 | This driver can also be built as a module called rtc-sirfsoc. |
1251 | 1251 | ||
1252 | config RTC_DRV_MOXART | ||
1253 | tristate "MOXA ART RTC" | ||
1254 | help | ||
1255 | If you say yes here you get support for the MOXA ART | ||
1256 | RTC module. | ||
1257 | |||
1258 | This driver can also be built as a module. If so, the module | ||
1259 | will be called rtc-moxart | ||
1260 | |||
1252 | comment "HID Sensor RTC drivers" | 1261 | comment "HID Sensor RTC drivers" |
1253 | 1262 | ||
1254 | config RTC_DRV_HID_SENSOR_TIME | 1263 | config RTC_DRV_HID_SENSOR_TIME |
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index d3b4488f48f2..2dff3d2009b5 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile | |||
@@ -130,3 +130,4 @@ obj-$(CONFIG_RTC_DRV_WM831X) += rtc-wm831x.o | |||
130 | obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o | 130 | obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o |
131 | obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o | 131 | obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o |
132 | obj-$(CONFIG_RTC_DRV_SIRFSOC) += rtc-sirfsoc.o | 132 | obj-$(CONFIG_RTC_DRV_SIRFSOC) += rtc-sirfsoc.o |
133 | obj-$(CONFIG_RTC_DRV_MOXART) += rtc-moxart.o | ||
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index be06d7150de5..24e733c98f8b 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c | |||
@@ -1018,23 +1018,6 @@ static void __exit cmos_pnp_remove(struct pnp_dev *pnp) | |||
1018 | cmos_do_remove(&pnp->dev); | 1018 | cmos_do_remove(&pnp->dev); |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | #ifdef CONFIG_PM | ||
1022 | |||
1023 | static int cmos_pnp_suspend(struct pnp_dev *pnp, pm_message_t mesg) | ||
1024 | { | ||
1025 | return cmos_suspend(&pnp->dev); | ||
1026 | } | ||
1027 | |||
1028 | static int cmos_pnp_resume(struct pnp_dev *pnp) | ||
1029 | { | ||
1030 | return cmos_resume(&pnp->dev); | ||
1031 | } | ||
1032 | |||
1033 | #else | ||
1034 | #define cmos_pnp_suspend NULL | ||
1035 | #define cmos_pnp_resume NULL | ||
1036 | #endif | ||
1037 | |||
1038 | static void cmos_pnp_shutdown(struct pnp_dev *pnp) | 1021 | static void cmos_pnp_shutdown(struct pnp_dev *pnp) |
1039 | { | 1022 | { |
1040 | if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pnp->dev)) | 1023 | if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pnp->dev)) |
@@ -1060,8 +1043,11 @@ static struct pnp_driver cmos_pnp_driver = { | |||
1060 | 1043 | ||
1061 | /* flag ensures resume() gets called, and stops syslog spam */ | 1044 | /* flag ensures resume() gets called, and stops syslog spam */ |
1062 | .flags = PNP_DRIVER_RES_DO_NOT_CHANGE, | 1045 | .flags = PNP_DRIVER_RES_DO_NOT_CHANGE, |
1063 | .suspend = cmos_pnp_suspend, | 1046 | #ifdef CONFIG_PM_SLEEP |
1064 | .resume = cmos_pnp_resume, | 1047 | .driver = { |
1048 | .pm = &cmos_pm_ops, | ||
1049 | }, | ||
1050 | #endif | ||
1065 | }; | 1051 | }; |
1066 | 1052 | ||
1067 | #endif /* CONFIG_PNP */ | 1053 | #endif /* CONFIG_PNP */ |
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 308a8fefe76f..bc7b4fcf603c 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c | |||
@@ -89,7 +89,6 @@ enum ds1511reg { | |||
89 | struct rtc_plat_data { | 89 | struct rtc_plat_data { |
90 | struct rtc_device *rtc; | 90 | struct rtc_device *rtc; |
91 | void __iomem *ioaddr; /* virtual base address */ | 91 | void __iomem *ioaddr; /* virtual base address */ |
92 | int size; /* amount of memory mapped */ | ||
93 | int irq; | 92 | int irq; |
94 | unsigned int irqen; | 93 | unsigned int irqen; |
95 | int alrm_sec; | 94 | int alrm_sec; |
@@ -479,20 +478,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev) | |||
479 | struct rtc_plat_data *pdata; | 478 | struct rtc_plat_data *pdata; |
480 | int ret = 0; | 479 | int ret = 0; |
481 | 480 | ||
482 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
483 | if (!res) | ||
484 | return -ENODEV; | ||
485 | |||
486 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 481 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
487 | if (!pdata) | 482 | if (!pdata) |
488 | return -ENOMEM; | 483 | return -ENOMEM; |
489 | pdata->size = resource_size(res); | 484 | |
490 | if (!devm_request_mem_region(&pdev->dev, res->start, pdata->size, | 485 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
491 | pdev->name)) | 486 | ds1511_base = devm_ioremap_resource(&pdev->dev, res); |
492 | return -EBUSY; | 487 | if (IS_ERR(ds1511_base)) |
493 | ds1511_base = devm_ioremap(&pdev->dev, res->start, pdata->size); | 488 | return PTR_ERR(ds1511_base); |
494 | if (!ds1511_base) | ||
495 | return -ENOMEM; | ||
496 | pdata->ioaddr = ds1511_base; | 489 | pdata->ioaddr = ds1511_base; |
497 | pdata->irq = platform_get_irq(pdev, 0); | 490 | pdata->irq = platform_get_irq(pdev, 0); |
498 | 491 | ||
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 8c6c952e90b1..fd31571941f5 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c | |||
@@ -285,19 +285,14 @@ static int ds1553_rtc_probe(struct platform_device *pdev) | |||
285 | void __iomem *ioaddr; | 285 | void __iomem *ioaddr; |
286 | int ret = 0; | 286 | int ret = 0; |
287 | 287 | ||
288 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
289 | if (!res) | ||
290 | return -ENODEV; | ||
291 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 288 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
292 | if (!pdata) | 289 | if (!pdata) |
293 | return -ENOMEM; | 290 | return -ENOMEM; |
294 | if (!devm_request_mem_region(&pdev->dev, res->start, RTC_REG_SIZE, | ||
295 | pdev->name)) | ||
296 | return -EBUSY; | ||
297 | 291 | ||
298 | ioaddr = devm_ioremap(&pdev->dev, res->start, RTC_REG_SIZE); | 292 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
299 | if (!ioaddr) | 293 | ioaddr = devm_ioremap_resource(&pdev->dev, res); |
300 | return -ENOMEM; | 294 | if (IS_ERR(ioaddr)) |
295 | return PTR_ERR(ioaddr); | ||
301 | pdata->ioaddr = ioaddr; | 296 | pdata->ioaddr = ioaddr; |
302 | pdata->irq = platform_get_irq(pdev, 0); | 297 | pdata->irq = platform_get_irq(pdev, 0); |
303 | 298 | ||
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index eccdc62ae1c0..17b73fdc3b6e 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c | |||
@@ -52,11 +52,9 @@ | |||
52 | #define RTC_BATT_FLAG 0x80 | 52 | #define RTC_BATT_FLAG 0x80 |
53 | 53 | ||
54 | struct rtc_plat_data { | 54 | struct rtc_plat_data { |
55 | struct rtc_device *rtc; | ||
56 | void __iomem *ioaddr_nvram; | 55 | void __iomem *ioaddr_nvram; |
57 | void __iomem *ioaddr_rtc; | 56 | void __iomem *ioaddr_rtc; |
58 | size_t size_nvram; | 57 | size_t size_nvram; |
59 | size_t size; | ||
60 | unsigned long last_jiffies; | 58 | unsigned long last_jiffies; |
61 | struct bin_attribute nvram_attr; | 59 | struct bin_attribute nvram_attr; |
62 | }; | 60 | }; |
@@ -117,11 +115,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm) | |||
117 | /* year is 1900 + tm->tm_year */ | 115 | /* year is 1900 + tm->tm_year */ |
118 | tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900; | 116 | tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900; |
119 | 117 | ||
120 | if (rtc_valid_tm(tm) < 0) { | 118 | return rtc_valid_tm(tm); |
121 | dev_err(dev, "retrieved date/time is not valid.\n"); | ||
122 | rtc_time_to_tm(0, tm); | ||
123 | } | ||
124 | return 0; | ||
125 | } | 119 | } |
126 | 120 | ||
127 | static const struct rtc_class_ops ds1742_rtc_ops = { | 121 | static const struct rtc_class_ops ds1742_rtc_ops = { |
@@ -168,22 +162,17 @@ static int ds1742_rtc_probe(struct platform_device *pdev) | |||
168 | void __iomem *ioaddr; | 162 | void __iomem *ioaddr; |
169 | int ret = 0; | 163 | int ret = 0; |
170 | 164 | ||
171 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
172 | if (!res) | ||
173 | return -ENODEV; | ||
174 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 165 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
175 | if (!pdata) | 166 | if (!pdata) |
176 | return -ENOMEM; | 167 | return -ENOMEM; |
177 | pdata->size = resource_size(res); | 168 | |
178 | if (!devm_request_mem_region(&pdev->dev, res->start, pdata->size, | 169 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
179 | pdev->name)) | 170 | ioaddr = devm_ioremap_resource(&pdev->dev, res); |
180 | return -EBUSY; | 171 | if (IS_ERR(ioaddr)) |
181 | ioaddr = devm_ioremap(&pdev->dev, res->start, pdata->size); | 172 | return PTR_ERR(ioaddr); |
182 | if (!ioaddr) | ||
183 | return -ENOMEM; | ||
184 | 173 | ||
185 | pdata->ioaddr_nvram = ioaddr; | 174 | pdata->ioaddr_nvram = ioaddr; |
186 | pdata->size_nvram = pdata->size - RTC_SIZE; | 175 | pdata->size_nvram = resource_size(res) - RTC_SIZE; |
187 | pdata->ioaddr_rtc = ioaddr + pdata->size_nvram; | 176 | pdata->ioaddr_rtc = ioaddr + pdata->size_nvram; |
188 | 177 | ||
189 | sysfs_bin_attr_init(&pdata->nvram_attr); | 178 | sysfs_bin_attr_init(&pdata->nvram_attr); |
@@ -212,7 +201,6 @@ static int ds1742_rtc_probe(struct platform_device *pdev) | |||
212 | &ds1742_rtc_ops, THIS_MODULE); | 201 | &ds1742_rtc_ops, THIS_MODULE); |
213 | if (IS_ERR(rtc)) | 202 | if (IS_ERR(rtc)) |
214 | return PTR_ERR(rtc); | 203 | return PTR_ERR(rtc); |
215 | pdata->rtc = rtc; | ||
216 | 204 | ||
217 | ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); | 205 | ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); |
218 | 206 | ||
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 549b3c3792d2..580e7b56bde8 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c | |||
@@ -138,17 +138,9 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) | |||
138 | return -ENOMEM; | 138 | return -ENOMEM; |
139 | 139 | ||
140 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 140 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
141 | if (!res) | 141 | ep93xx_rtc->mmio_base = devm_ioremap_resource(&pdev->dev, res); |
142 | return -ENXIO; | 142 | if (IS_ERR(ep93xx_rtc->mmio_base)) |
143 | 143 | return PTR_ERR(ep93xx_rtc->mmio_base); | |
144 | if (!devm_request_mem_region(&pdev->dev, res->start, | ||
145 | resource_size(res), pdev->name)) | ||
146 | return -EBUSY; | ||
147 | |||
148 | ep93xx_rtc->mmio_base = devm_ioremap(&pdev->dev, res->start, | ||
149 | resource_size(res)); | ||
150 | if (!ep93xx_rtc->mmio_base) | ||
151 | return -ENXIO; | ||
152 | 144 | ||
153 | pdev->dev.platform_data = ep93xx_rtc; | 145 | pdev->dev.platform_data = ep93xx_rtc; |
154 | platform_set_drvdata(pdev, ep93xx_rtc); | 146 | platform_set_drvdata(pdev, ep93xx_rtc); |
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 7273b0139e5c..4e2a81854f51 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c | |||
@@ -23,10 +23,6 @@ | |||
23 | #include <linux/iio/iio.h> | 23 | #include <linux/iio/iio.h> |
24 | #include <linux/rtc.h> | 24 | #include <linux/rtc.h> |
25 | 25 | ||
26 | /* Format: HID-SENSOR-usage_id_in_hex */ | ||
27 | /* Usage ID from spec for Time: 0x2000A0 */ | ||
28 | #define DRIVER_NAME "HID-SENSOR-2000a0" /* must be lowercase */ | ||
29 | |||
30 | enum hid_time_channel { | 26 | enum hid_time_channel { |
31 | CHANNEL_SCAN_INDEX_YEAR, | 27 | CHANNEL_SCAN_INDEX_YEAR, |
32 | CHANNEL_SCAN_INDEX_MONTH, | 28 | CHANNEL_SCAN_INDEX_MONTH, |
@@ -283,9 +279,11 @@ static int hid_time_probe(struct platform_device *pdev) | |||
283 | "hid-sensor-time", &hid_time_rtc_ops, | 279 | "hid-sensor-time", &hid_time_rtc_ops, |
284 | THIS_MODULE); | 280 | THIS_MODULE); |
285 | 281 | ||
286 | if (IS_ERR(time_state->rtc)) { | 282 | if (IS_ERR_OR_NULL(time_state->rtc)) { |
283 | ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV; | ||
284 | time_state->rtc = NULL; | ||
285 | sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME); | ||
287 | dev_err(&pdev->dev, "rtc device register failed!\n"); | 286 | dev_err(&pdev->dev, "rtc device register failed!\n"); |
288 | return PTR_ERR(time_state->rtc); | ||
289 | } | 287 | } |
290 | 288 | ||
291 | return ret; | 289 | return ret; |
@@ -300,9 +298,19 @@ static int hid_time_remove(struct platform_device *pdev) | |||
300 | return 0; | 298 | return 0; |
301 | } | 299 | } |
302 | 300 | ||
301 | static struct platform_device_id hid_time_ids[] = { | ||
302 | { | ||
303 | /* Format: HID-SENSOR-usage_id_in_hex_lowercase */ | ||
304 | .name = "HID-SENSOR-2000a0", | ||
305 | }, | ||
306 | { /* sentinel */ } | ||
307 | }; | ||
308 | MODULE_DEVICE_TABLE(platform, hid_time_ids); | ||
309 | |||
303 | static struct platform_driver hid_time_platform_driver = { | 310 | static struct platform_driver hid_time_platform_driver = { |
311 | .id_table = hid_time_ids, | ||
304 | .driver = { | 312 | .driver = { |
305 | .name = DRIVER_NAME, | 313 | .name = KBUILD_MODNAME, |
306 | .owner = THIS_MODULE, | 314 | .owner = THIS_MODULE, |
307 | }, | 315 | }, |
308 | .probe = hid_time_probe, | 316 | .probe = hid_time_probe, |
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index d3a8c8e255de..abd7f9091f34 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c | |||
@@ -375,24 +375,16 @@ static int __init dryice_rtc_probe(struct platform_device *pdev) | |||
375 | struct imxdi_dev *imxdi; | 375 | struct imxdi_dev *imxdi; |
376 | int rc; | 376 | int rc; |
377 | 377 | ||
378 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
379 | if (!res) | ||
380 | return -ENODEV; | ||
381 | |||
382 | imxdi = devm_kzalloc(&pdev->dev, sizeof(*imxdi), GFP_KERNEL); | 378 | imxdi = devm_kzalloc(&pdev->dev, sizeof(*imxdi), GFP_KERNEL); |
383 | if (!imxdi) | 379 | if (!imxdi) |
384 | return -ENOMEM; | 380 | return -ENOMEM; |
385 | 381 | ||
386 | imxdi->pdev = pdev; | 382 | imxdi->pdev = pdev; |
387 | 383 | ||
388 | if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res), | 384 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
389 | pdev->name)) | 385 | imxdi->ioaddr = devm_ioremap_resource(&pdev->dev, res); |
390 | return -EBUSY; | 386 | if (IS_ERR(imxdi->ioaddr)) |
391 | 387 | return PTR_ERR(imxdi->ioaddr); | |
392 | imxdi->ioaddr = devm_ioremap(&pdev->dev, res->start, | ||
393 | resource_size(res)); | ||
394 | if (imxdi->ioaddr == NULL) | ||
395 | return -ENOMEM; | ||
396 | 388 | ||
397 | spin_lock_init(&imxdi->irq_lock); | 389 | spin_lock_init(&imxdi->irq_lock); |
398 | 390 | ||
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 8276ae94a2a9..bfdbcb82d069 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c | |||
@@ -201,16 +201,9 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) | |||
201 | { | 201 | { |
202 | struct resource *res; | 202 | struct resource *res; |
203 | struct lpc32xx_rtc *rtc; | 203 | struct lpc32xx_rtc *rtc; |
204 | resource_size_t size; | ||
205 | int rtcirq; | 204 | int rtcirq; |
206 | u32 tmp; | 205 | u32 tmp; |
207 | 206 | ||
208 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
209 | if (!res) { | ||
210 | dev_err(&pdev->dev, "Can't get memory resource\n"); | ||
211 | return -ENOENT; | ||
212 | } | ||
213 | |||
214 | rtcirq = platform_get_irq(pdev, 0); | 207 | rtcirq = platform_get_irq(pdev, 0); |
215 | if (rtcirq < 0 || rtcirq >= NR_IRQS) { | 208 | if (rtcirq < 0 || rtcirq >= NR_IRQS) { |
216 | dev_warn(&pdev->dev, "Can't get interrupt resource\n"); | 209 | dev_warn(&pdev->dev, "Can't get interrupt resource\n"); |
@@ -224,19 +217,10 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) | |||
224 | } | 217 | } |
225 | rtc->irq = rtcirq; | 218 | rtc->irq = rtcirq; |
226 | 219 | ||
227 | size = resource_size(res); | 220 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
228 | 221 | rtc->rtc_base = devm_ioremap_resource(&pdev->dev, res); | |
229 | if (!devm_request_mem_region(&pdev->dev, res->start, size, | 222 | if (IS_ERR(rtc->rtc_base)) |
230 | pdev->name)) { | 223 | return PTR_ERR(rtc->rtc_base); |
231 | dev_err(&pdev->dev, "RTC registers are not free\n"); | ||
232 | return -EBUSY; | ||
233 | } | ||
234 | |||
235 | rtc->rtc_base = devm_ioremap(&pdev->dev, res->start, size); | ||
236 | if (!rtc->rtc_base) { | ||
237 | dev_err(&pdev->dev, "Can't map memory\n"); | ||
238 | return -ENOMEM; | ||
239 | } | ||
240 | 224 | ||
241 | spin_lock_init(&rtc->lock); | 225 | spin_lock_init(&rtc->lock); |
242 | 226 | ||
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 9915cb96014b..9efe118a28ba 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c | |||
@@ -240,9 +240,9 @@ static int max77686_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) | |||
240 | } | 240 | } |
241 | 241 | ||
242 | alrm->pending = 0; | 242 | alrm->pending = 0; |
243 | ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS1, &val); | 243 | ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS2, &val); |
244 | if (ret < 0) { | 244 | if (ret < 0) { |
245 | dev_err(info->dev, "%s:%d fail to read status1 reg(%d)\n", | 245 | dev_err(info->dev, "%s:%d fail to read status2 reg(%d)\n", |
246 | __func__, __LINE__, ret); | 246 | __func__, __LINE__, ret); |
247 | goto out; | 247 | goto out; |
248 | } | 248 | } |
diff --git a/drivers/rtc/rtc-moxart.c b/drivers/rtc/rtc-moxart.c new file mode 100644 index 000000000000..c29dee0946e6 --- /dev/null +++ b/drivers/rtc/rtc-moxart.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * MOXA ART RTC driver. | ||
3 | * | ||
4 | * Copyright (C) 2013 Jonas Jensen | ||
5 | * | ||
6 | * Jonas Jensen <jonas.jensen@gmail.com> | ||
7 | * | ||
8 | * Based on code from | ||
9 | * Moxa Technology Co., Ltd. <www.moxa.com> | ||
10 | * | ||
11 | * This file is licensed under the terms of the GNU General Public | ||
12 | * License version 2. This program is licensed "as is" without any | ||
13 | * warranty of any kind, whether express or implied. | ||
14 | */ | ||
15 | |||
16 | #include <linux/init.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/rtc.h> | ||
20 | #include <linux/platform_device.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/gpio.h> | ||
23 | #include <linux/of_gpio.h> | ||
24 | |||
25 | #define GPIO_RTC_RESERVED 0x0C | ||
26 | #define GPIO_RTC_DATA_SET 0x10 | ||
27 | #define GPIO_RTC_DATA_CLEAR 0x14 | ||
28 | #define GPIO_RTC_PIN_PULL_ENABLE 0x18 | ||
29 | #define GPIO_RTC_PIN_PULL_TYPE 0x1C | ||
30 | #define GPIO_RTC_INT_ENABLE 0x20 | ||
31 | #define GPIO_RTC_INT_RAW_STATE 0x24 | ||
32 | #define GPIO_RTC_INT_MASKED_STATE 0x28 | ||
33 | #define GPIO_RTC_INT_MASK 0x2C | ||
34 | #define GPIO_RTC_INT_CLEAR 0x30 | ||
35 | #define GPIO_RTC_INT_TRIGGER 0x34 | ||
36 | #define GPIO_RTC_INT_BOTH 0x38 | ||
37 | #define GPIO_RTC_INT_RISE_NEG 0x3C | ||
38 | #define GPIO_RTC_BOUNCE_ENABLE 0x40 | ||
39 | #define GPIO_RTC_BOUNCE_PRE_SCALE 0x44 | ||
40 | #define GPIO_RTC_PROTECT_W 0x8E | ||
41 | #define GPIO_RTC_PROTECT_R 0x8F | ||
42 | #define GPIO_RTC_YEAR_W 0x8C | ||
43 | #define GPIO_RTC_YEAR_R 0x8D | ||
44 | #define GPIO_RTC_DAY_W 0x8A | ||
45 | #define GPIO_RTC_DAY_R 0x8B | ||
46 | #define GPIO_RTC_MONTH_W 0x88 | ||
47 | #define GPIO_RTC_MONTH_R 0x89 | ||
48 | #define GPIO_RTC_DATE_W 0x86 | ||
49 | #define GPIO_RTC_DATE_R 0x87 | ||
50 | #define GPIO_RTC_HOURS_W 0x84 | ||
51 | #define GPIO_RTC_HOURS_R 0x85 | ||
52 | #define GPIO_RTC_MINUTES_W 0x82 | ||
53 | #define GPIO_RTC_MINUTES_R 0x83 | ||
54 | #define GPIO_RTC_SECONDS_W 0x80 | ||
55 | #define GPIO_RTC_SECONDS_R 0x81 | ||
56 | #define GPIO_RTC_DELAY_TIME 8 | ||
57 | |||
58 | struct moxart_rtc { | ||
59 | struct rtc_device *rtc; | ||
60 | spinlock_t rtc_lock; | ||
61 | int gpio_data, gpio_sclk, gpio_reset; | ||
62 | }; | ||
63 | |||
64 | static int day_of_year[12] = { 0, 31, 59, 90, 120, 151, 181, | ||
65 | 212, 243, 273, 304, 334 }; | ||
66 | |||
67 | static void moxart_rtc_write_byte(struct device *dev, u8 data) | ||
68 | { | ||
69 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
70 | int i; | ||
71 | |||
72 | for (i = 0; i < 8; i++, data >>= 1) { | ||
73 | gpio_set_value(moxart_rtc->gpio_sclk, 0); | ||
74 | gpio_set_value(moxart_rtc->gpio_data, ((data & 1) == 1)); | ||
75 | udelay(GPIO_RTC_DELAY_TIME); | ||
76 | gpio_set_value(moxart_rtc->gpio_sclk, 1); | ||
77 | udelay(GPIO_RTC_DELAY_TIME); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static u8 moxart_rtc_read_byte(struct device *dev) | ||
82 | { | ||
83 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
84 | int i; | ||
85 | u8 data = 0; | ||
86 | |||
87 | for (i = 0; i < 8; i++) { | ||
88 | gpio_set_value(moxart_rtc->gpio_sclk, 0); | ||
89 | udelay(GPIO_RTC_DELAY_TIME); | ||
90 | gpio_set_value(moxart_rtc->gpio_sclk, 1); | ||
91 | udelay(GPIO_RTC_DELAY_TIME); | ||
92 | if (gpio_get_value(moxart_rtc->gpio_data)) | ||
93 | data |= (1 << i); | ||
94 | udelay(GPIO_RTC_DELAY_TIME); | ||
95 | } | ||
96 | return data; | ||
97 | } | ||
98 | |||
99 | static u8 moxart_rtc_read_register(struct device *dev, u8 cmd) | ||
100 | { | ||
101 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
102 | u8 data; | ||
103 | unsigned long flags; | ||
104 | |||
105 | local_irq_save(flags); | ||
106 | |||
107 | gpio_direction_output(moxart_rtc->gpio_data, 0); | ||
108 | gpio_set_value(moxart_rtc->gpio_reset, 1); | ||
109 | udelay(GPIO_RTC_DELAY_TIME); | ||
110 | moxart_rtc_write_byte(dev, cmd); | ||
111 | gpio_direction_input(moxart_rtc->gpio_data); | ||
112 | udelay(GPIO_RTC_DELAY_TIME); | ||
113 | data = moxart_rtc_read_byte(dev); | ||
114 | gpio_set_value(moxart_rtc->gpio_sclk, 0); | ||
115 | gpio_set_value(moxart_rtc->gpio_reset, 0); | ||
116 | udelay(GPIO_RTC_DELAY_TIME); | ||
117 | |||
118 | local_irq_restore(flags); | ||
119 | |||
120 | return data; | ||
121 | } | ||
122 | |||
123 | static void moxart_rtc_write_register(struct device *dev, u8 cmd, u8 data) | ||
124 | { | ||
125 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
126 | unsigned long flags; | ||
127 | |||
128 | local_irq_save(flags); | ||
129 | |||
130 | gpio_direction_output(moxart_rtc->gpio_data, 0); | ||
131 | gpio_set_value(moxart_rtc->gpio_reset, 1); | ||
132 | udelay(GPIO_RTC_DELAY_TIME); | ||
133 | moxart_rtc_write_byte(dev, cmd); | ||
134 | moxart_rtc_write_byte(dev, data); | ||
135 | gpio_set_value(moxart_rtc->gpio_sclk, 0); | ||
136 | gpio_set_value(moxart_rtc->gpio_reset, 0); | ||
137 | udelay(GPIO_RTC_DELAY_TIME); | ||
138 | |||
139 | local_irq_restore(flags); | ||
140 | } | ||
141 | |||
142 | static int moxart_rtc_set_time(struct device *dev, struct rtc_time *tm) | ||
143 | { | ||
144 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
145 | |||
146 | spin_lock_irq(&moxart_rtc->rtc_lock); | ||
147 | |||
148 | moxart_rtc_write_register(dev, GPIO_RTC_PROTECT_W, 0); | ||
149 | moxart_rtc_write_register(dev, GPIO_RTC_YEAR_W, | ||
150 | (((tm->tm_year - 100) / 10) << 4) | | ||
151 | ((tm->tm_year - 100) % 10)); | ||
152 | |||
153 | moxart_rtc_write_register(dev, GPIO_RTC_MONTH_W, | ||
154 | (((tm->tm_mon + 1) / 10) << 4) | | ||
155 | ((tm->tm_mon + 1) % 10)); | ||
156 | |||
157 | moxart_rtc_write_register(dev, GPIO_RTC_DATE_W, | ||
158 | ((tm->tm_mday / 10) << 4) | | ||
159 | (tm->tm_mday % 10)); | ||
160 | |||
161 | moxart_rtc_write_register(dev, GPIO_RTC_HOURS_W, | ||
162 | ((tm->tm_hour / 10) << 4) | | ||
163 | (tm->tm_hour % 10)); | ||
164 | |||
165 | moxart_rtc_write_register(dev, GPIO_RTC_MINUTES_W, | ||
166 | ((tm->tm_min / 10) << 4) | | ||
167 | (tm->tm_min % 10)); | ||
168 | |||
169 | moxart_rtc_write_register(dev, GPIO_RTC_SECONDS_W, | ||
170 | ((tm->tm_sec / 10) << 4) | | ||
171 | (tm->tm_sec % 10)); | ||
172 | |||
173 | moxart_rtc_write_register(dev, GPIO_RTC_PROTECT_W, 0x80); | ||
174 | |||
175 | spin_unlock_irq(&moxart_rtc->rtc_lock); | ||
176 | |||
177 | dev_dbg(dev, "%s: success tm_year=%d tm_mon=%d\n" | ||
178 | "tm_mday=%d tm_hour=%d tm_min=%d tm_sec=%d\n", | ||
179 | __func__, tm->tm_year, tm->tm_mon, tm->tm_mday, | ||
180 | tm->tm_hour, tm->tm_min, tm->tm_sec); | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static int moxart_rtc_read_time(struct device *dev, struct rtc_time *tm) | ||
186 | { | ||
187 | struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev); | ||
188 | unsigned char v; | ||
189 | |||
190 | spin_lock_irq(&moxart_rtc->rtc_lock); | ||
191 | |||
192 | v = moxart_rtc_read_register(dev, GPIO_RTC_SECONDS_R); | ||
193 | tm->tm_sec = (((v & 0x70) >> 4) * 10) + (v & 0x0F); | ||
194 | |||
195 | v = moxart_rtc_read_register(dev, GPIO_RTC_MINUTES_R); | ||
196 | tm->tm_min = (((v & 0x70) >> 4) * 10) + (v & 0x0F); | ||
197 | |||
198 | v = moxart_rtc_read_register(dev, GPIO_RTC_HOURS_R); | ||
199 | if (v & 0x80) { /* 12-hour mode */ | ||
200 | tm->tm_hour = (((v & 0x10) >> 4) * 10) + (v & 0x0F); | ||
201 | if (v & 0x20) { /* PM mode */ | ||
202 | tm->tm_hour += 12; | ||
203 | if (tm->tm_hour >= 24) | ||
204 | tm->tm_hour = 0; | ||
205 | } | ||
206 | } else { /* 24-hour mode */ | ||
207 | tm->tm_hour = (((v & 0x30) >> 4) * 10) + (v & 0x0F); | ||
208 | } | ||
209 | |||
210 | v = moxart_rtc_read_register(dev, GPIO_RTC_DATE_R); | ||
211 | tm->tm_mday = (((v & 0x30) >> 4) * 10) + (v & 0x0F); | ||
212 | |||
213 | v = moxart_rtc_read_register(dev, GPIO_RTC_MONTH_R); | ||
214 | tm->tm_mon = (((v & 0x10) >> 4) * 10) + (v & 0x0F); | ||
215 | tm->tm_mon--; | ||
216 | |||
217 | v = moxart_rtc_read_register(dev, GPIO_RTC_YEAR_R); | ||
218 | tm->tm_year = (((v & 0xF0) >> 4) * 10) + (v & 0x0F); | ||
219 | tm->tm_year += 100; | ||
220 | if (tm->tm_year <= 69) | ||
221 | tm->tm_year += 100; | ||
222 | |||
223 | v = moxart_rtc_read_register(dev, GPIO_RTC_DAY_R); | ||
224 | tm->tm_wday = (v & 0x0f) - 1; | ||
225 | tm->tm_yday = day_of_year[tm->tm_mon]; | ||
226 | tm->tm_yday += (tm->tm_mday - 1); | ||
227 | if (tm->tm_mon >= 2) { | ||
228 | if (!(tm->tm_year % 4) && (tm->tm_year % 100)) | ||
229 | tm->tm_yday++; | ||
230 | } | ||
231 | |||
232 | tm->tm_isdst = 0; | ||
233 | |||
234 | spin_unlock_irq(&moxart_rtc->rtc_lock); | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static const struct rtc_class_ops moxart_rtc_ops = { | ||
240 | .read_time = moxart_rtc_read_time, | ||
241 | .set_time = moxart_rtc_set_time, | ||
242 | }; | ||
243 | |||
244 | static int moxart_rtc_probe(struct platform_device *pdev) | ||
245 | { | ||
246 | struct moxart_rtc *moxart_rtc; | ||
247 | int ret = 0; | ||
248 | |||
249 | moxart_rtc = devm_kzalloc(&pdev->dev, sizeof(*moxart_rtc), GFP_KERNEL); | ||
250 | if (!moxart_rtc) { | ||
251 | dev_err(&pdev->dev, "devm_kzalloc failed\n"); | ||
252 | return -ENOMEM; | ||
253 | } | ||
254 | |||
255 | moxart_rtc->gpio_data = of_get_named_gpio(pdev->dev.of_node, | ||
256 | "gpio-rtc-data", 0); | ||
257 | if (!gpio_is_valid(moxart_rtc->gpio_data)) { | ||
258 | dev_err(&pdev->dev, "invalid gpio (data): %d\n", | ||
259 | moxart_rtc->gpio_data); | ||
260 | return moxart_rtc->gpio_data; | ||
261 | } | ||
262 | |||
263 | moxart_rtc->gpio_sclk = of_get_named_gpio(pdev->dev.of_node, | ||
264 | "gpio-rtc-sclk", 0); | ||
265 | if (!gpio_is_valid(moxart_rtc->gpio_sclk)) { | ||
266 | dev_err(&pdev->dev, "invalid gpio (sclk): %d\n", | ||
267 | moxart_rtc->gpio_sclk); | ||
268 | return moxart_rtc->gpio_sclk; | ||
269 | } | ||
270 | |||
271 | moxart_rtc->gpio_reset = of_get_named_gpio(pdev->dev.of_node, | ||
272 | "gpio-rtc-reset", 0); | ||
273 | if (!gpio_is_valid(moxart_rtc->gpio_reset)) { | ||
274 | dev_err(&pdev->dev, "invalid gpio (reset): %d\n", | ||
275 | moxart_rtc->gpio_reset); | ||
276 | return moxart_rtc->gpio_reset; | ||
277 | } | ||
278 | |||
279 | spin_lock_init(&moxart_rtc->rtc_lock); | ||
280 | platform_set_drvdata(pdev, moxart_rtc); | ||
281 | |||
282 | ret = devm_gpio_request(&pdev->dev, moxart_rtc->gpio_data, "rtc_data"); | ||
283 | if (ret) { | ||
284 | dev_err(&pdev->dev, "can't get rtc_data gpio\n"); | ||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_sclk, | ||
289 | GPIOF_DIR_OUT, "rtc_sclk"); | ||
290 | if (ret) { | ||
291 | dev_err(&pdev->dev, "can't get rtc_sclk gpio\n"); | ||
292 | return ret; | ||
293 | } | ||
294 | |||
295 | ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_reset, | ||
296 | GPIOF_DIR_OUT, "rtc_reset"); | ||
297 | if (ret) { | ||
298 | dev_err(&pdev->dev, "can't get rtc_reset gpio\n"); | ||
299 | return ret; | ||
300 | } | ||
301 | |||
302 | moxart_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, | ||
303 | &moxart_rtc_ops, | ||
304 | THIS_MODULE); | ||
305 | if (IS_ERR(moxart_rtc->rtc)) { | ||
306 | dev_err(&pdev->dev, "devm_rtc_device_register failed\n"); | ||
307 | return PTR_ERR(moxart_rtc->rtc); | ||
308 | } | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | static const struct of_device_id moxart_rtc_match[] = { | ||
314 | { .compatible = "moxa,moxart-rtc" }, | ||
315 | { }, | ||
316 | }; | ||
317 | |||
318 | static struct platform_driver moxart_rtc_driver = { | ||
319 | .probe = moxart_rtc_probe, | ||
320 | .driver = { | ||
321 | .name = "moxart-rtc", | ||
322 | .owner = THIS_MODULE, | ||
323 | .of_match_table = moxart_rtc_match, | ||
324 | }, | ||
325 | }; | ||
326 | module_platform_driver(moxart_rtc_driver); | ||
327 | |||
328 | MODULE_DESCRIPTION("MOXART RTC driver"); | ||
329 | MODULE_LICENSE("GPL"); | ||
330 | MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>"); | ||
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index baab802f2153..d536c5962c99 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c | |||
@@ -221,26 +221,17 @@ static int __init mv_rtc_probe(struct platform_device *pdev) | |||
221 | { | 221 | { |
222 | struct resource *res; | 222 | struct resource *res; |
223 | struct rtc_plat_data *pdata; | 223 | struct rtc_plat_data *pdata; |
224 | resource_size_t size; | ||
225 | u32 rtc_time; | 224 | u32 rtc_time; |
226 | int ret = 0; | 225 | int ret = 0; |
227 | 226 | ||
228 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
229 | if (!res) | ||
230 | return -ENODEV; | ||
231 | |||
232 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 227 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
233 | if (!pdata) | 228 | if (!pdata) |
234 | return -ENOMEM; | 229 | return -ENOMEM; |
235 | 230 | ||
236 | size = resource_size(res); | 231 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
237 | if (!devm_request_mem_region(&pdev->dev, res->start, size, | 232 | pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res); |
238 | pdev->name)) | 233 | if (IS_ERR(pdata->ioaddr)) |
239 | return -EBUSY; | 234 | return PTR_ERR(pdata->ioaddr); |
240 | |||
241 | pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, size); | ||
242 | if (!pdata->ioaddr) | ||
243 | return -ENOMEM; | ||
244 | 235 | ||
245 | pdata->clk = devm_clk_get(&pdev->dev, NULL); | 236 | pdata->clk = devm_clk_get(&pdev->dev, NULL); |
246 | /* Not all SoCs require a clock.*/ | 237 | /* Not all SoCs require a clock.*/ |
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index ab87bacb8f88..50c572645546 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c | |||
@@ -377,22 +377,16 @@ static int mxc_rtc_probe(struct platform_device *pdev) | |||
377 | unsigned long rate; | 377 | unsigned long rate; |
378 | int ret; | 378 | int ret; |
379 | 379 | ||
380 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
381 | if (!res) | ||
382 | return -ENODEV; | ||
383 | |||
384 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 380 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
385 | if (!pdata) | 381 | if (!pdata) |
386 | return -ENOMEM; | 382 | return -ENOMEM; |
387 | 383 | ||
388 | pdata->devtype = pdev->id_entry->driver_data; | 384 | pdata->devtype = pdev->id_entry->driver_data; |
389 | 385 | ||
390 | if (!devm_request_mem_region(&pdev->dev, res->start, | 386 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
391 | resource_size(res), pdev->name)) | 387 | pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res); |
392 | return -EBUSY; | 388 | if (IS_ERR(pdata->ioaddr)) |
393 | 389 | return PTR_ERR(pdata->ioaddr); | |
394 | pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, | ||
395 | resource_size(res)); | ||
396 | 390 | ||
397 | pdata->clk = devm_clk_get(&pdev->dev, NULL); | 391 | pdata->clk = devm_clk_get(&pdev->dev, NULL); |
398 | if (IS_ERR(pdata->clk)) { | 392 | if (IS_ERR(pdata->clk)) { |
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c index 22861c5e0c59..248653c74b80 100644 --- a/drivers/rtc/rtc-nuc900.c +++ b/drivers/rtc/rtc-nuc900.c | |||
@@ -99,7 +99,7 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc) | |||
99 | if (!timeout) | 99 | if (!timeout) |
100 | return ERR_PTR(-EPERM); | 100 | return ERR_PTR(-EPERM); |
101 | 101 | ||
102 | return 0; | 102 | return NULL; |
103 | } | 103 | } |
104 | 104 | ||
105 | static int nuc900_rtc_bcd2bin(unsigned int timereg, | 105 | static int nuc900_rtc_bcd2bin(unsigned int timereg, |
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index c6ffbaec32a4..c7d97ee59327 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c | |||
@@ -70,6 +70,8 @@ | |||
70 | #define OMAP_RTC_KICK0_REG 0x6c | 70 | #define OMAP_RTC_KICK0_REG 0x6c |
71 | #define OMAP_RTC_KICK1_REG 0x70 | 71 | #define OMAP_RTC_KICK1_REG 0x70 |
72 | 72 | ||
73 | #define OMAP_RTC_IRQWAKEEN 0x7c | ||
74 | |||
73 | /* OMAP_RTC_CTRL_REG bit fields: */ | 75 | /* OMAP_RTC_CTRL_REG bit fields: */ |
74 | #define OMAP_RTC_CTRL_SPLIT (1<<7) | 76 | #define OMAP_RTC_CTRL_SPLIT (1<<7) |
75 | #define OMAP_RTC_CTRL_DISABLE (1<<6) | 77 | #define OMAP_RTC_CTRL_DISABLE (1<<6) |
@@ -94,12 +96,21 @@ | |||
94 | #define OMAP_RTC_INTERRUPTS_IT_ALARM (1<<3) | 96 | #define OMAP_RTC_INTERRUPTS_IT_ALARM (1<<3) |
95 | #define OMAP_RTC_INTERRUPTS_IT_TIMER (1<<2) | 97 | #define OMAP_RTC_INTERRUPTS_IT_TIMER (1<<2) |
96 | 98 | ||
99 | /* OMAP_RTC_IRQWAKEEN bit fields: */ | ||
100 | #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN (1<<1) | ||
101 | |||
97 | /* OMAP_RTC_KICKER values */ | 102 | /* OMAP_RTC_KICKER values */ |
98 | #define KICK0_VALUE 0x83e70b13 | 103 | #define KICK0_VALUE 0x83e70b13 |
99 | #define KICK1_VALUE 0x95a4f1e0 | 104 | #define KICK1_VALUE 0x95a4f1e0 |
100 | 105 | ||
101 | #define OMAP_RTC_HAS_KICKER 0x1 | 106 | #define OMAP_RTC_HAS_KICKER 0x1 |
102 | 107 | ||
108 | /* | ||
109 | * Few RTC IP revisions has special WAKE-EN Register to enable Wakeup | ||
110 | * generation for event Alarm. | ||
111 | */ | ||
112 | #define OMAP_RTC_HAS_IRQWAKEEN 0x2 | ||
113 | |||
103 | static void __iomem *rtc_base; | 114 | static void __iomem *rtc_base; |
104 | 115 | ||
105 | #define rtc_read(addr) readb(rtc_base + (addr)) | 116 | #define rtc_read(addr) readb(rtc_base + (addr)) |
@@ -299,12 +310,18 @@ static struct rtc_class_ops omap_rtc_ops = { | |||
299 | static int omap_rtc_alarm; | 310 | static int omap_rtc_alarm; |
300 | static int omap_rtc_timer; | 311 | static int omap_rtc_timer; |
301 | 312 | ||
302 | #define OMAP_RTC_DATA_DA830_IDX 1 | 313 | #define OMAP_RTC_DATA_AM3352_IDX 1 |
314 | #define OMAP_RTC_DATA_DA830_IDX 2 | ||
303 | 315 | ||
304 | static struct platform_device_id omap_rtc_devtype[] = { | 316 | static struct platform_device_id omap_rtc_devtype[] = { |
305 | { | 317 | { |
306 | .name = DRIVER_NAME, | 318 | .name = DRIVER_NAME, |
307 | }, { | 319 | }, |
320 | [OMAP_RTC_DATA_AM3352_IDX] = { | ||
321 | .name = "am3352-rtc", | ||
322 | .driver_data = OMAP_RTC_HAS_KICKER | OMAP_RTC_HAS_IRQWAKEEN, | ||
323 | }, | ||
324 | [OMAP_RTC_DATA_DA830_IDX] = { | ||
308 | .name = "da830-rtc", | 325 | .name = "da830-rtc", |
309 | .driver_data = OMAP_RTC_HAS_KICKER, | 326 | .driver_data = OMAP_RTC_HAS_KICKER, |
310 | }, | 327 | }, |
@@ -316,6 +333,9 @@ static const struct of_device_id omap_rtc_of_match[] = { | |||
316 | { .compatible = "ti,da830-rtc", | 333 | { .compatible = "ti,da830-rtc", |
317 | .data = &omap_rtc_devtype[OMAP_RTC_DATA_DA830_IDX], | 334 | .data = &omap_rtc_devtype[OMAP_RTC_DATA_DA830_IDX], |
318 | }, | 335 | }, |
336 | { .compatible = "ti,am3352-rtc", | ||
337 | .data = &omap_rtc_devtype[OMAP_RTC_DATA_AM3352_IDX], | ||
338 | }, | ||
319 | {}, | 339 | {}, |
320 | }; | 340 | }; |
321 | MODULE_DEVICE_TABLE(of, omap_rtc_of_match); | 341 | MODULE_DEVICE_TABLE(of, omap_rtc_of_match); |
@@ -464,16 +484,28 @@ static u8 irqstat; | |||
464 | 484 | ||
465 | static int omap_rtc_suspend(struct device *dev) | 485 | static int omap_rtc_suspend(struct device *dev) |
466 | { | 486 | { |
487 | u8 irqwake_stat; | ||
488 | struct platform_device *pdev = to_platform_device(dev); | ||
489 | const struct platform_device_id *id_entry = | ||
490 | platform_get_device_id(pdev); | ||
491 | |||
467 | irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG); | 492 | irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG); |
468 | 493 | ||
469 | /* FIXME the RTC alarm is not currently acting as a wakeup event | 494 | /* FIXME the RTC alarm is not currently acting as a wakeup event |
470 | * source, and in fact this enable() call is just saving a flag | 495 | * source on some platforms, and in fact this enable() call is just |
471 | * that's never used... | 496 | * saving a flag that's never used... |
472 | */ | 497 | */ |
473 | if (device_may_wakeup(dev)) | 498 | if (device_may_wakeup(dev)) { |
474 | enable_irq_wake(omap_rtc_alarm); | 499 | enable_irq_wake(omap_rtc_alarm); |
475 | else | 500 | |
501 | if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN) { | ||
502 | irqwake_stat = rtc_read(OMAP_RTC_IRQWAKEEN); | ||
503 | irqwake_stat |= OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN; | ||
504 | rtc_write(irqwake_stat, OMAP_RTC_IRQWAKEEN); | ||
505 | } | ||
506 | } else { | ||
476 | rtc_write(0, OMAP_RTC_INTERRUPTS_REG); | 507 | rtc_write(0, OMAP_RTC_INTERRUPTS_REG); |
508 | } | ||
477 | 509 | ||
478 | /* Disable the clock/module */ | 510 | /* Disable the clock/module */ |
479 | pm_runtime_put_sync(dev); | 511 | pm_runtime_put_sync(dev); |
@@ -483,13 +515,25 @@ static int omap_rtc_suspend(struct device *dev) | |||
483 | 515 | ||
484 | static int omap_rtc_resume(struct device *dev) | 516 | static int omap_rtc_resume(struct device *dev) |
485 | { | 517 | { |
518 | u8 irqwake_stat; | ||
519 | struct platform_device *pdev = to_platform_device(dev); | ||
520 | const struct platform_device_id *id_entry = | ||
521 | platform_get_device_id(pdev); | ||
522 | |||
486 | /* Enable the clock/module so that we can access the registers */ | 523 | /* Enable the clock/module so that we can access the registers */ |
487 | pm_runtime_get_sync(dev); | 524 | pm_runtime_get_sync(dev); |
488 | 525 | ||
489 | if (device_may_wakeup(dev)) | 526 | if (device_may_wakeup(dev)) { |
490 | disable_irq_wake(omap_rtc_alarm); | 527 | disable_irq_wake(omap_rtc_alarm); |
491 | else | 528 | |
529 | if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN) { | ||
530 | irqwake_stat = rtc_read(OMAP_RTC_IRQWAKEEN); | ||
531 | irqwake_stat &= ~OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN; | ||
532 | rtc_write(irqwake_stat, OMAP_RTC_IRQWAKEEN); | ||
533 | } | ||
534 | } else { | ||
492 | rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG); | 535 | rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG); |
536 | } | ||
493 | return 0; | 537 | return 0; |
494 | } | 538 | } |
495 | #endif | 539 | #endif |
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index a1fecc8d97fc..fffb7d3449d7 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c | |||
@@ -238,6 +238,15 @@ static int palmas_rtc_probe(struct platform_device *pdev) | |||
238 | struct palmas *palmas = dev_get_drvdata(pdev->dev.parent); | 238 | struct palmas *palmas = dev_get_drvdata(pdev->dev.parent); |
239 | struct palmas_rtc *palmas_rtc = NULL; | 239 | struct palmas_rtc *palmas_rtc = NULL; |
240 | int ret; | 240 | int ret; |
241 | bool enable_bb_charging = false; | ||
242 | bool high_bb_charging; | ||
243 | |||
244 | if (pdev->dev.of_node) { | ||
245 | enable_bb_charging = of_property_read_bool(pdev->dev.of_node, | ||
246 | "ti,backup-battery-chargeable"); | ||
247 | high_bb_charging = of_property_read_bool(pdev->dev.of_node, | ||
248 | "ti,backup-battery-charge-high-current"); | ||
249 | } | ||
241 | 250 | ||
242 | palmas_rtc = devm_kzalloc(&pdev->dev, sizeof(struct palmas_rtc), | 251 | palmas_rtc = devm_kzalloc(&pdev->dev, sizeof(struct palmas_rtc), |
243 | GFP_KERNEL); | 252 | GFP_KERNEL); |
@@ -254,6 +263,32 @@ static int palmas_rtc_probe(struct platform_device *pdev) | |||
254 | palmas_rtc->dev = &pdev->dev; | 263 | palmas_rtc->dev = &pdev->dev; |
255 | platform_set_drvdata(pdev, palmas_rtc); | 264 | platform_set_drvdata(pdev, palmas_rtc); |
256 | 265 | ||
266 | if (enable_bb_charging) { | ||
267 | unsigned reg = PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG; | ||
268 | |||
269 | if (high_bb_charging) | ||
270 | reg = 0; | ||
271 | |||
272 | ret = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE, | ||
273 | PALMAS_BACKUP_BATTERY_CTRL, | ||
274 | PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG, reg); | ||
275 | if (ret < 0) { | ||
276 | dev_err(&pdev->dev, | ||
277 | "BACKUP_BATTERY_CTRL update failed, %d\n", ret); | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | ret = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE, | ||
282 | PALMAS_BACKUP_BATTERY_CTRL, | ||
283 | PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN, | ||
284 | PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN); | ||
285 | if (ret < 0) { | ||
286 | dev_err(&pdev->dev, | ||
287 | "BACKUP_BATTERY_CTRL update failed, %d\n", ret); | ||
288 | return ret; | ||
289 | } | ||
290 | } | ||
291 | |||
257 | /* Start RTC */ | 292 | /* Start RTC */ |
258 | ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG, | 293 | ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG, |
259 | PALMAS_RTC_CTRL_REG_STOP_RTC, | 294 | PALMAS_RTC_CTRL_REG_STOP_RTC, |
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 205b9f7da1b8..1ee514a3972c 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c | |||
@@ -203,11 +203,6 @@ static int pcf2127_probe(struct i2c_client *client, | |||
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
206 | static int pcf2127_remove(struct i2c_client *client) | ||
207 | { | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static const struct i2c_device_id pcf2127_id[] = { | 206 | static const struct i2c_device_id pcf2127_id[] = { |
212 | { "pcf2127", 0 }, | 207 | { "pcf2127", 0 }, |
213 | { } | 208 | { } |
@@ -229,7 +224,6 @@ static struct i2c_driver pcf2127_driver = { | |||
229 | .of_match_table = of_match_ptr(pcf2127_of_match), | 224 | .of_match_table = of_match_ptr(pcf2127_of_match), |
230 | }, | 225 | }, |
231 | .probe = pcf2127_probe, | 226 | .probe = pcf2127_probe, |
232 | .remove = pcf2127_remove, | ||
233 | .id_table = pcf2127_id, | 227 | .id_table = pcf2127_id, |
234 | }; | 228 | }; |
235 | 229 | ||
diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c index aa7ed4b5f7f0..63460cf80f1b 100644 --- a/drivers/rtc/rtc-sirfsoc.c +++ b/drivers/rtc/rtc-sirfsoc.c | |||
@@ -44,6 +44,7 @@ struct sirfsoc_rtc_drv { | |||
44 | struct rtc_device *rtc; | 44 | struct rtc_device *rtc; |
45 | u32 rtc_base; | 45 | u32 rtc_base; |
46 | u32 irq; | 46 | u32 irq; |
47 | unsigned irq_wake; | ||
47 | /* Overflow for every 8 years extra time */ | 48 | /* Overflow for every 8 years extra time */ |
48 | u32 overflow_rtc; | 49 | u32 overflow_rtc; |
49 | #ifdef CONFIG_PM | 50 | #ifdef CONFIG_PM |
@@ -355,8 +356,8 @@ static int sirfsoc_rtc_suspend(struct device *dev) | |||
355 | rtcdrv->saved_counter = | 356 | rtcdrv->saved_counter = |
356 | sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); | 357 | sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); |
357 | rtcdrv->saved_overflow_rtc = rtcdrv->overflow_rtc; | 358 | rtcdrv->saved_overflow_rtc = rtcdrv->overflow_rtc; |
358 | if (device_may_wakeup(&pdev->dev)) | 359 | if (device_may_wakeup(&pdev->dev) && !enable_irq_wake(rtcdrv->irq)) |
359 | enable_irq_wake(rtcdrv->irq); | 360 | rtcdrv->irq_wake = 1; |
360 | 361 | ||
361 | return 0; | 362 | return 0; |
362 | } | 363 | } |
@@ -423,8 +424,10 @@ static int sirfsoc_rtc_resume(struct device *dev) | |||
423 | struct platform_device *pdev = to_platform_device(dev); | 424 | struct platform_device *pdev = to_platform_device(dev); |
424 | struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); | 425 | struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); |
425 | sirfsoc_rtc_thaw(dev); | 426 | sirfsoc_rtc_thaw(dev); |
426 | if (device_may_wakeup(&pdev->dev)) | 427 | if (device_may_wakeup(&pdev->dev) && rtcdrv->irq_wake) { |
427 | disable_irq_wake(rtcdrv->irq); | 428 | disable_irq_wake(rtcdrv->irq); |
429 | rtcdrv->irq_wake = 0; | ||
430 | } | ||
428 | 431 | ||
429 | return 0; | 432 | return 0; |
430 | } | 433 | } |
@@ -434,8 +437,10 @@ static int sirfsoc_rtc_restore(struct device *dev) | |||
434 | struct platform_device *pdev = to_platform_device(dev); | 437 | struct platform_device *pdev = to_platform_device(dev); |
435 | struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); | 438 | struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); |
436 | 439 | ||
437 | if (device_may_wakeup(&pdev->dev)) | 440 | if (device_may_wakeup(&pdev->dev) && rtcdrv->irq_wake) { |
438 | disable_irq_wake(rtcdrv->irq); | 441 | disable_irq_wake(rtcdrv->irq); |
442 | rtcdrv->irq_wake = 0; | ||
443 | } | ||
439 | return 0; | 444 | return 0; |
440 | } | 445 | } |
441 | 446 | ||
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index af5e97e3f272..a176ba614683 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c | |||
@@ -294,19 +294,14 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev) | |||
294 | void __iomem *ioaddr; | 294 | void __iomem *ioaddr; |
295 | int ret = 0; | 295 | int ret = 0; |
296 | 296 | ||
297 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
298 | if (!res) | ||
299 | return -ENODEV; | ||
300 | |||
301 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); | 297 | pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); |
302 | if (!pdata) | 298 | if (!pdata) |
303 | return -ENOMEM; | 299 | return -ENOMEM; |
304 | if (!devm_request_mem_region(&pdev->dev, res->start, RTC_REG_SIZE, | 300 | |
305 | pdev->name)) | 301 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
306 | return -EBUSY; | 302 | ioaddr = devm_ioremap_resource(&pdev->dev, res); |
307 | ioaddr = devm_ioremap(&pdev->dev, res->start, RTC_REG_SIZE); | 303 | if (IS_ERR(ioaddr)) |
308 | if (!ioaddr) | 304 | return PTR_ERR(ioaddr); |
309 | return -ENOMEM; | ||
310 | pdata->ioaddr = ioaddr; | 305 | pdata->ioaddr = ioaddr; |
311 | pdata->irq = platform_get_irq(pdev, 0); | 306 | pdata->irq = platform_get_irq(pdev, 0); |
312 | 307 | ||
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index f9a0677e4e3b..4f87234e0dee 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c | |||
@@ -244,9 +244,6 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) | |||
244 | struct resource *res; | 244 | struct resource *res; |
245 | int irq, ret; | 245 | int irq, ret; |
246 | 246 | ||
247 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
248 | if (!res) | ||
249 | return -ENODEV; | ||
250 | irq = platform_get_irq(pdev, 0); | 247 | irq = platform_get_irq(pdev, 0); |
251 | if (irq < 0) | 248 | if (irq < 0) |
252 | return -ENODEV; | 249 | return -ENODEV; |
@@ -255,13 +252,10 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) | |||
255 | return -ENOMEM; | 252 | return -ENOMEM; |
256 | platform_set_drvdata(pdev, pdata); | 253 | platform_set_drvdata(pdev, pdata); |
257 | 254 | ||
258 | if (!devm_request_mem_region(&pdev->dev, res->start, | 255 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
259 | resource_size(res), pdev->name)) | 256 | pdata->rtcreg = devm_ioremap_resource(&pdev->dev, res); |
260 | return -EBUSY; | 257 | if (IS_ERR(pdata->rtcreg)) |
261 | pdata->rtcreg = devm_ioremap(&pdev->dev, res->start, | 258 | return PTR_ERR(pdata->rtcreg); |
262 | resource_size(res)); | ||
263 | if (!pdata->rtcreg) | ||
264 | return -EBUSY; | ||
265 | 259 | ||
266 | spin_lock_init(&pdata->lock); | 260 | spin_lock_init(&pdata->lock); |
267 | tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); | 261 | tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); |
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 9e5e14686e75..794820a123d0 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c | |||
@@ -30,8 +30,8 @@ | |||
30 | 30 | ||
31 | #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x) | 31 | #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x) |
32 | 32 | ||
33 | #define TO_USER 0 | 33 | #define TO_USER 1 |
34 | #define TO_KERNEL 1 | 34 | #define TO_KERNEL 0 |
35 | #define CHUNK_INFO_SIZE 34 /* 2 16-byte char, each followed by blank */ | 35 | #define CHUNK_INFO_SIZE 34 /* 2 16-byte char, each followed by blank */ |
36 | 36 | ||
37 | enum arch_id { | 37 | enum arch_id { |
@@ -73,7 +73,7 @@ static struct ipl_parameter_block *ipl_block; | |||
73 | * @count: Size of buffer, which should be copied | 73 | * @count: Size of buffer, which should be copied |
74 | * @mode: Either TO_KERNEL or TO_USER | 74 | * @mode: Either TO_KERNEL or TO_USER |
75 | */ | 75 | */ |
76 | static int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode) | 76 | int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode) |
77 | { | 77 | { |
78 | int offs, blk_num; | 78 | int offs, blk_num; |
79 | static char buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); | 79 | static char buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); |
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c index 6488a7351a60..7e8346ec9cdc 100644 --- a/drivers/video/acornfb.c +++ b/drivers/video/acornfb.c | |||
@@ -38,14 +38,6 @@ | |||
38 | #include "acornfb.h" | 38 | #include "acornfb.h" |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * VIDC machines can't do 16 or 32BPP modes. | ||
42 | */ | ||
43 | #ifdef HAS_VIDC | ||
44 | #undef FBCON_HAS_CFB16 | ||
45 | #undef FBCON_HAS_CFB32 | ||
46 | #endif | ||
47 | |||
48 | /* | ||
49 | * Default resolution. | 41 | * Default resolution. |
50 | * NOTE that it has to be supported in the table towards | 42 | * NOTE that it has to be supported in the table towards |
51 | * the end of this file. | 43 | * the end of this file. |
@@ -106,238 +98,6 @@ static struct vidc_timing current_vidc; | |||
106 | 98 | ||
107 | extern unsigned int vram_size; /* set by setup.c */ | 99 | extern unsigned int vram_size; /* set by setup.c */ |
108 | 100 | ||
109 | #ifdef HAS_VIDC | ||
110 | |||
111 | #define MAX_SIZE 480*1024 | ||
112 | |||
113 | /* CTL VIDC Actual | ||
114 | * 24.000 0 8.000 | ||
115 | * 25.175 0 8.392 | ||
116 | * 36.000 0 12.000 | ||
117 | * 24.000 1 12.000 | ||
118 | * 25.175 1 12.588 | ||
119 | * 24.000 2 16.000 | ||
120 | * 25.175 2 16.783 | ||
121 | * 36.000 1 18.000 | ||
122 | * 24.000 3 24.000 | ||
123 | * 36.000 2 24.000 | ||
124 | * 25.175 3 25.175 | ||
125 | * 36.000 3 36.000 | ||
126 | */ | ||
127 | struct pixclock { | ||
128 | u_long min_clock; | ||
129 | u_long max_clock; | ||
130 | u_int vidc_ctl; | ||
131 | u_int vid_ctl; | ||
132 | }; | ||
133 | |||
134 | static struct pixclock arc_clocks[] = { | ||
135 | /* we allow +/-1% on these */ | ||
136 | { 123750, 126250, VIDC_CTRL_DIV3, VID_CTL_24MHz }, /* 8.000MHz */ | ||
137 | { 82500, 84167, VIDC_CTRL_DIV2, VID_CTL_24MHz }, /* 12.000MHz */ | ||
138 | { 61875, 63125, VIDC_CTRL_DIV1_5, VID_CTL_24MHz }, /* 16.000MHz */ | ||
139 | { 41250, 42083, VIDC_CTRL_DIV1, VID_CTL_24MHz }, /* 24.000MHz */ | ||
140 | }; | ||
141 | |||
142 | static struct pixclock * | ||
143 | acornfb_valid_pixrate(struct fb_var_screeninfo *var) | ||
144 | { | ||
145 | u_long pixclock = var->pixclock; | ||
146 | u_int i; | ||
147 | |||
148 | if (!var->pixclock) | ||
149 | return NULL; | ||
150 | |||
151 | for (i = 0; i < ARRAY_SIZE(arc_clocks); i++) | ||
152 | if (pixclock > arc_clocks[i].min_clock && | ||
153 | pixclock < arc_clocks[i].max_clock) | ||
154 | return arc_clocks + i; | ||
155 | |||
156 | return NULL; | ||
157 | } | ||
158 | |||
159 | /* VIDC Rules: | ||
160 | * hcr : must be even (interlace, hcr/2 must be even) | ||
161 | * hswr : must be even | ||
162 | * hdsr : must be odd | ||
163 | * hder : must be odd | ||
164 | * | ||
165 | * vcr : must be odd | ||
166 | * vswr : >= 1 | ||
167 | * vdsr : >= 1 | ||
168 | * vder : >= vdsr | ||
169 | * if interlaced, then hcr/2 must be even | ||
170 | */ | ||
171 | static void | ||
172 | acornfb_set_timing(struct fb_var_screeninfo *var) | ||
173 | { | ||
174 | struct pixclock *pclk; | ||
175 | struct vidc_timing vidc; | ||
176 | u_int horiz_correction; | ||
177 | u_int sync_len, display_start, display_end, cycle; | ||
178 | u_int is_interlaced; | ||
179 | u_int vid_ctl, vidc_ctl; | ||
180 | u_int bandwidth; | ||
181 | |||
182 | memset(&vidc, 0, sizeof(vidc)); | ||
183 | |||
184 | pclk = acornfb_valid_pixrate(var); | ||
185 | vidc_ctl = pclk->vidc_ctl; | ||
186 | vid_ctl = pclk->vid_ctl; | ||
187 | |||
188 | bandwidth = var->pixclock * 8 / var->bits_per_pixel; | ||
189 | /* 25.175, 4bpp = 79.444ns per byte, 317.776ns per word: fifo = 2,6 */ | ||
190 | if (bandwidth > 143500) | ||
191 | vidc_ctl |= VIDC_CTRL_FIFO_3_7; | ||
192 | else if (bandwidth > 71750) | ||
193 | vidc_ctl |= VIDC_CTRL_FIFO_2_6; | ||
194 | else if (bandwidth > 35875) | ||
195 | vidc_ctl |= VIDC_CTRL_FIFO_1_5; | ||
196 | else | ||
197 | vidc_ctl |= VIDC_CTRL_FIFO_0_4; | ||
198 | |||
199 | switch (var->bits_per_pixel) { | ||
200 | case 1: | ||
201 | horiz_correction = 19; | ||
202 | vidc_ctl |= VIDC_CTRL_1BPP; | ||
203 | break; | ||
204 | |||
205 | case 2: | ||
206 | horiz_correction = 11; | ||
207 | vidc_ctl |= VIDC_CTRL_2BPP; | ||
208 | break; | ||
209 | |||
210 | case 4: | ||
211 | horiz_correction = 7; | ||
212 | vidc_ctl |= VIDC_CTRL_4BPP; | ||
213 | break; | ||
214 | |||
215 | default: | ||
216 | case 8: | ||
217 | horiz_correction = 5; | ||
218 | vidc_ctl |= VIDC_CTRL_8BPP; | ||
219 | break; | ||
220 | } | ||
221 | |||
222 | if (var->sync & FB_SYNC_COMP_HIGH_ACT) /* should be FB_SYNC_COMP */ | ||
223 | vidc_ctl |= VIDC_CTRL_CSYNC; | ||
224 | else { | ||
225 | if (!(var->sync & FB_SYNC_HOR_HIGH_ACT)) | ||
226 | vid_ctl |= VID_CTL_HS_NHSYNC; | ||
227 | |||
228 | if (!(var->sync & FB_SYNC_VERT_HIGH_ACT)) | ||
229 | vid_ctl |= VID_CTL_VS_NVSYNC; | ||
230 | } | ||
231 | |||
232 | sync_len = var->hsync_len; | ||
233 | display_start = sync_len + var->left_margin; | ||
234 | display_end = display_start + var->xres; | ||
235 | cycle = display_end + var->right_margin; | ||
236 | |||
237 | /* if interlaced, then hcr/2 must be even */ | ||
238 | is_interlaced = (var->vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED; | ||
239 | |||
240 | if (is_interlaced) { | ||
241 | vidc_ctl |= VIDC_CTRL_INTERLACE; | ||
242 | if (cycle & 2) { | ||
243 | cycle += 2; | ||
244 | var->right_margin += 2; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | vidc.h_cycle = (cycle - 2) / 2; | ||
249 | vidc.h_sync_width = (sync_len - 2) / 2; | ||
250 | vidc.h_border_start = (display_start - 1) / 2; | ||
251 | vidc.h_display_start = (display_start - horiz_correction) / 2; | ||
252 | vidc.h_display_end = (display_end - horiz_correction) / 2; | ||
253 | vidc.h_border_end = (display_end - 1) / 2; | ||
254 | vidc.h_interlace = (vidc.h_cycle + 1) / 2; | ||
255 | |||
256 | sync_len = var->vsync_len; | ||
257 | display_start = sync_len + var->upper_margin; | ||
258 | display_end = display_start + var->yres; | ||
259 | cycle = display_end + var->lower_margin; | ||
260 | |||
261 | if (is_interlaced) | ||
262 | cycle = (cycle - 3) / 2; | ||
263 | else | ||
264 | cycle = cycle - 1; | ||
265 | |||
266 | vidc.v_cycle = cycle; | ||
267 | vidc.v_sync_width = sync_len - 1; | ||
268 | vidc.v_border_start = display_start - 1; | ||
269 | vidc.v_display_start = vidc.v_border_start; | ||
270 | vidc.v_display_end = display_end - 1; | ||
271 | vidc.v_border_end = vidc.v_display_end; | ||
272 | |||
273 | if (machine_is_a5k()) | ||
274 | __raw_writeb(vid_ctl, IOEB_VID_CTL); | ||
275 | |||
276 | if (memcmp(¤t_vidc, &vidc, sizeof(vidc))) { | ||
277 | current_vidc = vidc; | ||
278 | |||
279 | vidc_writel(0xe0000000 | vidc_ctl); | ||
280 | vidc_writel(0x80000000 | (vidc.h_cycle << 14)); | ||
281 | vidc_writel(0x84000000 | (vidc.h_sync_width << 14)); | ||
282 | vidc_writel(0x88000000 | (vidc.h_border_start << 14)); | ||
283 | vidc_writel(0x8c000000 | (vidc.h_display_start << 14)); | ||
284 | vidc_writel(0x90000000 | (vidc.h_display_end << 14)); | ||
285 | vidc_writel(0x94000000 | (vidc.h_border_end << 14)); | ||
286 | vidc_writel(0x98000000); | ||
287 | vidc_writel(0x9c000000 | (vidc.h_interlace << 14)); | ||
288 | vidc_writel(0xa0000000 | (vidc.v_cycle << 14)); | ||
289 | vidc_writel(0xa4000000 | (vidc.v_sync_width << 14)); | ||
290 | vidc_writel(0xa8000000 | (vidc.v_border_start << 14)); | ||
291 | vidc_writel(0xac000000 | (vidc.v_display_start << 14)); | ||
292 | vidc_writel(0xb0000000 | (vidc.v_display_end << 14)); | ||
293 | vidc_writel(0xb4000000 | (vidc.v_border_end << 14)); | ||
294 | vidc_writel(0xb8000000); | ||
295 | vidc_writel(0xbc000000); | ||
296 | } | ||
297 | #ifdef DEBUG_MODE_SELECTION | ||
298 | printk(KERN_DEBUG "VIDC registers for %dx%dx%d:\n", var->xres, | ||
299 | var->yres, var->bits_per_pixel); | ||
300 | printk(KERN_DEBUG " H-cycle : %d\n", vidc.h_cycle); | ||
301 | printk(KERN_DEBUG " H-sync-width : %d\n", vidc.h_sync_width); | ||
302 | printk(KERN_DEBUG " H-border-start : %d\n", vidc.h_border_start); | ||
303 | printk(KERN_DEBUG " H-display-start : %d\n", vidc.h_display_start); | ||
304 | printk(KERN_DEBUG " H-display-end : %d\n", vidc.h_display_end); | ||
305 | printk(KERN_DEBUG " H-border-end : %d\n", vidc.h_border_end); | ||
306 | printk(KERN_DEBUG " H-interlace : %d\n", vidc.h_interlace); | ||
307 | printk(KERN_DEBUG " V-cycle : %d\n", vidc.v_cycle); | ||
308 | printk(KERN_DEBUG " V-sync-width : %d\n", vidc.v_sync_width); | ||
309 | printk(KERN_DEBUG " V-border-start : %d\n", vidc.v_border_start); | ||
310 | printk(KERN_DEBUG " V-display-start : %d\n", vidc.v_display_start); | ||
311 | printk(KERN_DEBUG " V-display-end : %d\n", vidc.v_display_end); | ||
312 | printk(KERN_DEBUG " V-border-end : %d\n", vidc.v_border_end); | ||
313 | printk(KERN_DEBUG " VIDC Ctrl (E) : 0x%08X\n", vidc_ctl); | ||
314 | printk(KERN_DEBUG " IOEB Ctrl : 0x%08X\n", vid_ctl); | ||
315 | #endif | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, | ||
320 | u_int trans, struct fb_info *info) | ||
321 | { | ||
322 | union palette pal; | ||
323 | |||
324 | if (regno >= current_par.palette_size) | ||
325 | return 1; | ||
326 | |||
327 | pal.p = 0; | ||
328 | pal.vidc.reg = regno; | ||
329 | pal.vidc.red = red >> 12; | ||
330 | pal.vidc.green = green >> 12; | ||
331 | pal.vidc.blue = blue >> 12; | ||
332 | |||
333 | current_par.palette[regno] = pal; | ||
334 | |||
335 | vidc_writel(pal.p); | ||
336 | |||
337 | return 0; | ||
338 | } | ||
339 | #endif | ||
340 | |||
341 | #ifdef HAS_VIDC20 | 101 | #ifdef HAS_VIDC20 |
342 | #include <mach/acornfb.h> | 102 | #include <mach/acornfb.h> |
343 | 103 | ||
@@ -634,16 +394,7 @@ acornfb_adjust_timing(struct fb_info *info, struct fb_var_screeninfo *var, u_int | |||
634 | /* hsync_len must be even */ | 394 | /* hsync_len must be even */ |
635 | var->hsync_len = (var->hsync_len + 1) & ~1; | 395 | var->hsync_len = (var->hsync_len + 1) & ~1; |
636 | 396 | ||
637 | #ifdef HAS_VIDC | 397 | #if defined(HAS_VIDC20) |
638 | /* left_margin must be odd */ | ||
639 | if ((var->left_margin & 1) == 0) { | ||
640 | var->left_margin -= 1; | ||
641 | var->right_margin += 1; | ||
642 | } | ||
643 | |||
644 | /* right_margin must be odd */ | ||
645 | var->right_margin |= 1; | ||
646 | #elif defined(HAS_VIDC20) | ||
647 | /* left_margin must be even */ | 398 | /* left_margin must be even */ |
648 | if (var->left_margin & 1) { | 399 | if (var->left_margin & 1) { |
649 | var->left_margin += 1; | 400 | var->left_margin += 1; |
@@ -787,11 +538,7 @@ static int acornfb_set_par(struct fb_info *info) | |||
787 | break; | 538 | break; |
788 | case 8: | 539 | case 8: |
789 | current_par.palette_size = VIDC_PALETTE_SIZE; | 540 | current_par.palette_size = VIDC_PALETTE_SIZE; |
790 | #ifdef HAS_VIDC | ||
791 | info->fix.visual = FB_VISUAL_STATIC_PSEUDOCOLOR; | ||
792 | #else | ||
793 | info->fix.visual = FB_VISUAL_PSEUDOCOLOR; | 541 | info->fix.visual = FB_VISUAL_PSEUDOCOLOR; |
794 | #endif | ||
795 | break; | 542 | break; |
796 | #ifdef HAS_VIDC20 | 543 | #ifdef HAS_VIDC20 |
797 | case 16: | 544 | case 16: |
@@ -971,9 +718,6 @@ static void acornfb_init_fbinfo(void) | |||
971 | #if defined(HAS_VIDC20) | 718 | #if defined(HAS_VIDC20) |
972 | fb_info.var.red.length = 8; | 719 | fb_info.var.red.length = 8; |
973 | fb_info.var.transp.length = 4; | 720 | fb_info.var.transp.length = 4; |
974 | #elif defined(HAS_VIDC) | ||
975 | fb_info.var.red.length = 4; | ||
976 | fb_info.var.transp.length = 1; | ||
977 | #endif | 721 | #endif |
978 | fb_info.var.green = fb_info.var.red; | 722 | fb_info.var.green = fb_info.var.red; |
979 | fb_info.var.blue = fb_info.var.red; | 723 | fb_info.var.blue = fb_info.var.red; |
@@ -1310,14 +1054,6 @@ static int acornfb_probe(struct platform_device *dev) | |||
1310 | fb_info.fix.smem_start = handle; | 1054 | fb_info.fix.smem_start = handle; |
1311 | } | 1055 | } |
1312 | #endif | 1056 | #endif |
1313 | #if defined(HAS_VIDC) | ||
1314 | /* | ||
1315 | * Archimedes/A5000 machines use a fixed address for their | ||
1316 | * framebuffers. Free unused pages | ||
1317 | */ | ||
1318 | free_unused_pages(PAGE_OFFSET + size, PAGE_OFFSET + MAX_SIZE); | ||
1319 | #endif | ||
1320 | |||
1321 | fb_info.fix.smem_len = size; | 1057 | fb_info.fix.smem_len = size; |
1322 | current_par.palette_size = VIDC_PALETTE_SIZE; | 1058 | current_par.palette_size = VIDC_PALETTE_SIZE; |
1323 | 1059 | ||
diff --git a/drivers/video/acornfb.h b/drivers/video/acornfb.h index fb2a7fffe506..175c8ff3367c 100644 --- a/drivers/video/acornfb.h +++ b/drivers/video/acornfb.h | |||
@@ -13,10 +13,6 @@ | |||
13 | #include <asm/hardware/iomd.h> | 13 | #include <asm/hardware/iomd.h> |
14 | #define VIDC_PALETTE_SIZE 256 | 14 | #define VIDC_PALETTE_SIZE 256 |
15 | #define VIDC_NAME "VIDC20" | 15 | #define VIDC_NAME "VIDC20" |
16 | #elif defined(HAS_VIDC) | ||
17 | #include <asm/hardware/memc.h> | ||
18 | #define VIDC_PALETTE_SIZE 16 | ||
19 | #define VIDC_NAME "VIDC" | ||
20 | #endif | 16 | #endif |
21 | 17 | ||
22 | #define EXTEND8(x) ((x)|(x)<<8) | 18 | #define EXTEND8(x) ((x)|(x)<<8) |
@@ -101,31 +97,6 @@ struct modex_params { | |||
101 | const struct modey_params *modey; | 97 | const struct modey_params *modey; |
102 | }; | 98 | }; |
103 | 99 | ||
104 | #ifdef HAS_VIDC | ||
105 | |||
106 | #define VID_CTL_VS_NVSYNC (1 << 3) | ||
107 | #define VID_CTL_HS_NHSYNC (1 << 2) | ||
108 | #define VID_CTL_24MHz (0) | ||
109 | #define VID_CTL_25MHz (1) | ||
110 | #define VID_CTL_36MHz (2) | ||
111 | |||
112 | #define VIDC_CTRL_CSYNC (1 << 7) | ||
113 | #define VIDC_CTRL_INTERLACE (1 << 6) | ||
114 | #define VIDC_CTRL_FIFO_0_4 (0 << 4) | ||
115 | #define VIDC_CTRL_FIFO_1_5 (1 << 4) | ||
116 | #define VIDC_CTRL_FIFO_2_6 (2 << 4) | ||
117 | #define VIDC_CTRL_FIFO_3_7 (3 << 4) | ||
118 | #define VIDC_CTRL_1BPP (0 << 2) | ||
119 | #define VIDC_CTRL_2BPP (1 << 2) | ||
120 | #define VIDC_CTRL_4BPP (2 << 2) | ||
121 | #define VIDC_CTRL_8BPP (3 << 2) | ||
122 | #define VIDC_CTRL_DIV3 (0 << 0) | ||
123 | #define VIDC_CTRL_DIV2 (1 << 0) | ||
124 | #define VIDC_CTRL_DIV1_5 (2 << 0) | ||
125 | #define VIDC_CTRL_DIV1 (3 << 0) | ||
126 | |||
127 | #endif | ||
128 | |||
129 | #ifdef HAS_VIDC20 | 100 | #ifdef HAS_VIDC20 |
130 | /* | 101 | /* |
131 | * VIDC20 registers | 102 | * VIDC20 registers |
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 47e12cfc2a57..15c7251b0556 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c | |||
@@ -152,8 +152,6 @@ static int mxc_w1_remove(struct platform_device *pdev) | |||
152 | 152 | ||
153 | clk_disable_unprepare(mdev->clk); | 153 | clk_disable_unprepare(mdev->clk); |
154 | 154 | ||
155 | platform_set_drvdata(pdev, NULL); | ||
156 | |||
157 | return 0; | 155 | return 0; |
158 | } | 156 | } |
159 | 157 | ||
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 22013ca2119c..c7c64f18773d 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c | |||
@@ -234,9 +234,11 @@ static ssize_t w1_master_attribute_store_search(struct device * dev, | |||
234 | { | 234 | { |
235 | long tmp; | 235 | long tmp; |
236 | struct w1_master *md = dev_to_w1_master(dev); | 236 | struct w1_master *md = dev_to_w1_master(dev); |
237 | int ret; | ||
237 | 238 | ||
238 | if (strict_strtol(buf, 0, &tmp) == -EINVAL) | 239 | ret = kstrtol(buf, 0, &tmp); |
239 | return -EINVAL; | 240 | if (ret) |
241 | return ret; | ||
240 | 242 | ||
241 | mutex_lock(&md->mutex); | 243 | mutex_lock(&md->mutex); |
242 | md->search_count = tmp; | 244 | md->search_count = tmp; |
@@ -266,9 +268,11 @@ static ssize_t w1_master_attribute_store_pullup(struct device *dev, | |||
266 | { | 268 | { |
267 | long tmp; | 269 | long tmp; |
268 | struct w1_master *md = dev_to_w1_master(dev); | 270 | struct w1_master *md = dev_to_w1_master(dev); |
271 | int ret; | ||
269 | 272 | ||
270 | if (strict_strtol(buf, 0, &tmp) == -EINVAL) | 273 | ret = kstrtol(buf, 0, &tmp); |
271 | return -EINVAL; | 274 | if (ret) |
275 | return ret; | ||
272 | 276 | ||
273 | mutex_lock(&md->mutex); | 277 | mutex_lock(&md->mutex); |
274 | md->enable_pullup = tmp; | 278 | md->enable_pullup = tmp; |
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index de7e4f497222..5be5e3d14f79 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c | |||
@@ -162,7 +162,8 @@ extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs, | |||
162 | #define HPWDT_ARCH 32 | 162 | #define HPWDT_ARCH 32 |
163 | 163 | ||
164 | asm(".text \n\t" | 164 | asm(".text \n\t" |
165 | ".align 4 \n" | 165 | ".align 4 \n\t" |
166 | ".globl asminline_call \n" | ||
166 | "asminline_call: \n\t" | 167 | "asminline_call: \n\t" |
167 | "pushl %ebp \n\t" | 168 | "pushl %ebp \n\t" |
168 | "movl %esp, %ebp \n\t" | 169 | "movl %esp, %ebp \n\t" |
@@ -352,7 +353,8 @@ static int detect_cru_service(void) | |||
352 | #define HPWDT_ARCH 64 | 353 | #define HPWDT_ARCH 64 |
353 | 354 | ||
354 | asm(".text \n\t" | 355 | asm(".text \n\t" |
355 | ".align 4 \n" | 356 | ".align 4 \n\t" |
357 | ".globl asminline_call \n" | ||
356 | "asminline_call: \n\t" | 358 | "asminline_call: \n\t" |
357 | "pushq %rbp \n\t" | 359 | "pushq %rbp \n\t" |
358 | "movq %rsp, %rbp \n\t" | 360 | "movq %rsp, %rbp \n\t" |
diff --git a/fs/affs/file.c b/fs/affs/file.c index af3261b78102..776e3935a758 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -836,7 +836,7 @@ affs_truncate(struct inode *inode) | |||
836 | struct address_space *mapping = inode->i_mapping; | 836 | struct address_space *mapping = inode->i_mapping; |
837 | struct page *page; | 837 | struct page *page; |
838 | void *fsdata; | 838 | void *fsdata; |
839 | u32 size = inode->i_size; | 839 | loff_t size = inode->i_size; |
840 | int res; | 840 | int res; |
841 | 841 | ||
842 | res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); | 842 | res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916d8a2..60250847929f 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -716,13 +716,14 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) | |||
716 | return 0; | 716 | return 0; |
717 | 717 | ||
718 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); | 718 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); |
719 | 719 | if (!bs->bio_integrity_pool) | |
720 | bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); | ||
721 | if (!bs->bvec_integrity_pool) | ||
722 | return -1; | 720 | return -1; |
723 | 721 | ||
724 | if (!bs->bio_integrity_pool) | 722 | bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); |
723 | if (!bs->bvec_integrity_pool) { | ||
724 | mempool_destroy(bs->bio_integrity_pool); | ||
725 | return -1; | 725 | return -1; |
726 | } | ||
726 | 727 | ||
727 | return 0; | 728 | return 0; |
728 | } | 729 | } |
diff --git a/fs/coredump.c b/fs/coredump.c index 72f816d6cad9..9bdeca12ae0e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -190,6 +190,11 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
190 | err = cn_printf(cn, "%d", | 190 | err = cn_printf(cn, "%d", |
191 | task_tgid_vnr(current)); | 191 | task_tgid_vnr(current)); |
192 | break; | 192 | break; |
193 | /* global pid */ | ||
194 | case 'P': | ||
195 | err = cn_printf(cn, "%d", | ||
196 | task_tgid_nr(current)); | ||
197 | break; | ||
193 | /* uid */ | 198 | /* uid */ |
194 | case 'u': | 199 | case 'u': |
195 | err = cn_printf(cn, "%d", cred->uid); | 200 | err = cn_printf(cn, "%d", cred->uid); |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 293f86741ddb..473e09da7d02 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -740,6 +740,7 @@ static void ep_free(struct eventpoll *ep) | |||
740 | epi = rb_entry(rbp, struct epitem, rbn); | 740 | epi = rb_entry(rbp, struct epitem, rbn); |
741 | 741 | ||
742 | ep_unregister_pollwait(ep, epi); | 742 | ep_unregister_pollwait(ep, epi); |
743 | cond_resched(); | ||
743 | } | 744 | } |
744 | 745 | ||
745 | /* | 746 | /* |
@@ -754,6 +755,7 @@ static void ep_free(struct eventpoll *ep) | |||
754 | while ((rbp = rb_first(&ep->rbr)) != NULL) { | 755 | while ((rbp = rb_first(&ep->rbr)) != NULL) { |
755 | epi = rb_entry(rbp, struct epitem, rbn); | 756 | epi = rb_entry(rbp, struct epitem, rbn); |
756 | ep_remove(ep, epi); | 757 | ep_remove(ep, epi); |
758 | cond_resched(); | ||
757 | } | 759 | } |
758 | mutex_unlock(&ep->mtx); | 760 | mutex_unlock(&ep->mtx); |
759 | 761 | ||
@@ -74,6 +74,8 @@ static DEFINE_RWLOCK(binfmt_lock); | |||
74 | void __register_binfmt(struct linux_binfmt * fmt, int insert) | 74 | void __register_binfmt(struct linux_binfmt * fmt, int insert) |
75 | { | 75 | { |
76 | BUG_ON(!fmt); | 76 | BUG_ON(!fmt); |
77 | if (WARN_ON(!fmt->load_binary)) | ||
78 | return; | ||
77 | write_lock(&binfmt_lock); | 79 | write_lock(&binfmt_lock); |
78 | insert ? list_add(&fmt->lh, &formats) : | 80 | insert ? list_add(&fmt->lh, &formats) : |
79 | list_add_tail(&fmt->lh, &formats); | 81 | list_add_tail(&fmt->lh, &formats); |
@@ -266,7 +268,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
266 | BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); | 268 | BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); |
267 | vma->vm_end = STACK_TOP_MAX; | 269 | vma->vm_end = STACK_TOP_MAX; |
268 | vma->vm_start = vma->vm_end - PAGE_SIZE; | 270 | vma->vm_start = vma->vm_end - PAGE_SIZE; |
269 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; | 271 | vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; |
270 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 272 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
271 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 273 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
272 | 274 | ||
@@ -1365,18 +1367,18 @@ out: | |||
1365 | } | 1367 | } |
1366 | EXPORT_SYMBOL(remove_arg_zero); | 1368 | EXPORT_SYMBOL(remove_arg_zero); |
1367 | 1369 | ||
1370 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) | ||
1368 | /* | 1371 | /* |
1369 | * cycle the list of binary formats handler, until one recognizes the image | 1372 | * cycle the list of binary formats handler, until one recognizes the image |
1370 | */ | 1373 | */ |
1371 | int search_binary_handler(struct linux_binprm *bprm) | 1374 | int search_binary_handler(struct linux_binprm *bprm) |
1372 | { | 1375 | { |
1373 | unsigned int depth = bprm->recursion_depth; | 1376 | bool need_retry = IS_ENABLED(CONFIG_MODULES); |
1374 | int try,retval; | ||
1375 | struct linux_binfmt *fmt; | 1377 | struct linux_binfmt *fmt; |
1376 | pid_t old_pid, old_vpid; | 1378 | int retval; |
1377 | 1379 | ||
1378 | /* This allows 4 levels of binfmt rewrites before failing hard. */ | 1380 | /* This allows 4 levels of binfmt rewrites before failing hard. */ |
1379 | if (depth > 5) | 1381 | if (bprm->recursion_depth > 5) |
1380 | return -ELOOP; | 1382 | return -ELOOP; |
1381 | 1383 | ||
1382 | retval = security_bprm_check(bprm); | 1384 | retval = security_bprm_check(bprm); |
@@ -1387,71 +1389,67 @@ int search_binary_handler(struct linux_binprm *bprm) | |||
1387 | if (retval) | 1389 | if (retval) |
1388 | return retval; | 1390 | return retval; |
1389 | 1391 | ||
1392 | retval = -ENOENT; | ||
1393 | retry: | ||
1394 | read_lock(&binfmt_lock); | ||
1395 | list_for_each_entry(fmt, &formats, lh) { | ||
1396 | if (!try_module_get(fmt->module)) | ||
1397 | continue; | ||
1398 | read_unlock(&binfmt_lock); | ||
1399 | bprm->recursion_depth++; | ||
1400 | retval = fmt->load_binary(bprm); | ||
1401 | bprm->recursion_depth--; | ||
1402 | if (retval >= 0 || retval != -ENOEXEC || | ||
1403 | bprm->mm == NULL || bprm->file == NULL) { | ||
1404 | put_binfmt(fmt); | ||
1405 | return retval; | ||
1406 | } | ||
1407 | read_lock(&binfmt_lock); | ||
1408 | put_binfmt(fmt); | ||
1409 | } | ||
1410 | read_unlock(&binfmt_lock); | ||
1411 | |||
1412 | if (need_retry && retval == -ENOEXEC) { | ||
1413 | if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && | ||
1414 | printable(bprm->buf[2]) && printable(bprm->buf[3])) | ||
1415 | return retval; | ||
1416 | if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0) | ||
1417 | return retval; | ||
1418 | need_retry = false; | ||
1419 | goto retry; | ||
1420 | } | ||
1421 | |||
1422 | return retval; | ||
1423 | } | ||
1424 | EXPORT_SYMBOL(search_binary_handler); | ||
1425 | |||
1426 | static int exec_binprm(struct linux_binprm *bprm) | ||
1427 | { | ||
1428 | pid_t old_pid, old_vpid; | ||
1429 | int ret; | ||
1430 | |||
1390 | /* Need to fetch pid before load_binary changes it */ | 1431 | /* Need to fetch pid before load_binary changes it */ |
1391 | old_pid = current->pid; | 1432 | old_pid = current->pid; |
1392 | rcu_read_lock(); | 1433 | rcu_read_lock(); |
1393 | old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); | 1434 | old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); |
1394 | rcu_read_unlock(); | 1435 | rcu_read_unlock(); |
1395 | 1436 | ||
1396 | retval = -ENOENT; | 1437 | ret = search_binary_handler(bprm); |
1397 | for (try=0; try<2; try++) { | 1438 | if (ret >= 0) { |
1398 | read_lock(&binfmt_lock); | 1439 | trace_sched_process_exec(current, old_pid, bprm); |
1399 | list_for_each_entry(fmt, &formats, lh) { | 1440 | ptrace_event(PTRACE_EVENT_EXEC, old_vpid); |
1400 | int (*fn)(struct linux_binprm *) = fmt->load_binary; | 1441 | current->did_exec = 1; |
1401 | if (!fn) | 1442 | proc_exec_connector(current); |
1402 | continue; | 1443 | |
1403 | if (!try_module_get(fmt->module)) | 1444 | if (bprm->file) { |
1404 | continue; | 1445 | allow_write_access(bprm->file); |
1405 | read_unlock(&binfmt_lock); | 1446 | fput(bprm->file); |
1406 | bprm->recursion_depth = depth + 1; | 1447 | bprm->file = NULL; /* to catch use-after-free */ |
1407 | retval = fn(bprm); | ||
1408 | bprm->recursion_depth = depth; | ||
1409 | if (retval >= 0) { | ||
1410 | if (depth == 0) { | ||
1411 | trace_sched_process_exec(current, old_pid, bprm); | ||
1412 | ptrace_event(PTRACE_EVENT_EXEC, old_vpid); | ||
1413 | } | ||
1414 | put_binfmt(fmt); | ||
1415 | allow_write_access(bprm->file); | ||
1416 | if (bprm->file) | ||
1417 | fput(bprm->file); | ||
1418 | bprm->file = NULL; | ||
1419 | current->did_exec = 1; | ||
1420 | proc_exec_connector(current); | ||
1421 | return retval; | ||
1422 | } | ||
1423 | read_lock(&binfmt_lock); | ||
1424 | put_binfmt(fmt); | ||
1425 | if (retval != -ENOEXEC || bprm->mm == NULL) | ||
1426 | break; | ||
1427 | if (!bprm->file) { | ||
1428 | read_unlock(&binfmt_lock); | ||
1429 | return retval; | ||
1430 | } | ||
1431 | } | 1448 | } |
1432 | read_unlock(&binfmt_lock); | ||
1433 | #ifdef CONFIG_MODULES | ||
1434 | if (retval != -ENOEXEC || bprm->mm == NULL) { | ||
1435 | break; | ||
1436 | } else { | ||
1437 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) | ||
1438 | if (printable(bprm->buf[0]) && | ||
1439 | printable(bprm->buf[1]) && | ||
1440 | printable(bprm->buf[2]) && | ||
1441 | printable(bprm->buf[3])) | ||
1442 | break; /* -ENOEXEC */ | ||
1443 | if (try) | ||
1444 | break; /* -ENOEXEC */ | ||
1445 | request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); | ||
1446 | } | ||
1447 | #else | ||
1448 | break; | ||
1449 | #endif | ||
1450 | } | 1449 | } |
1451 | return retval; | ||
1452 | } | ||
1453 | 1450 | ||
1454 | EXPORT_SYMBOL(search_binary_handler); | 1451 | return ret; |
1452 | } | ||
1455 | 1453 | ||
1456 | /* | 1454 | /* |
1457 | * sys_execve() executes a new program. | 1455 | * sys_execve() executes a new program. |
@@ -1541,7 +1539,7 @@ static int do_execve_common(const char *filename, | |||
1541 | if (retval < 0) | 1539 | if (retval < 0) |
1542 | goto out; | 1540 | goto out; |
1543 | 1541 | ||
1544 | retval = search_binary_handler(bprm); | 1542 | retval = exec_binprm(bprm); |
1545 | if (retval < 0) | 1543 | if (retval < 0) |
1546 | goto out; | 1544 | goto out; |
1547 | 1545 | ||
diff --git a/fs/file_table.c b/fs/file_table.c index 322cd37626cb..abdd15ad13c9 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -311,8 +311,7 @@ void fput(struct file *file) | |||
311 | return; | 311 | return; |
312 | /* | 312 | /* |
313 | * After this task has run exit_task_work(), | 313 | * After this task has run exit_task_work(), |
314 | * task_work_add() will fail. free_ipc_ns()-> | 314 | * task_work_add() will fail. Fall through to delayed |
315 | * shm_destroy() can do this. Fall through to delayed | ||
316 | * fput to avoid leaking *file. | 315 | * fput to avoid leaking *file. |
317 | */ | 316 | */ |
318 | } | 317 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68851ff2fd41..30f6f27d5a59 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb, | |||
723 | return wrote; | 723 | return wrote; |
724 | } | 724 | } |
725 | 725 | ||
726 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, | 726 | static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, |
727 | enum wb_reason reason) | 727 | enum wb_reason reason) |
728 | { | 728 | { |
729 | struct wb_writeback_work work = { | 729 | struct wb_writeback_work work = { |
@@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
1049 | { | 1049 | { |
1050 | struct backing_dev_info *bdi; | 1050 | struct backing_dev_info *bdi; |
1051 | 1051 | ||
1052 | if (!nr_pages) { | 1052 | if (!nr_pages) |
1053 | nr_pages = global_page_state(NR_FILE_DIRTY) + | 1053 | nr_pages = get_nr_dirty_pages(); |
1054 | global_page_state(NR_UNSTABLE_NFS); | ||
1055 | } | ||
1056 | 1054 | ||
1057 | rcu_read_lock(); | 1055 | rcu_read_lock(); |
1058 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 1056 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
@@ -1173,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1173 | bool wakeup_bdi = false; | 1171 | bool wakeup_bdi = false; |
1174 | bdi = inode_to_bdi(inode); | 1172 | bdi = inode_to_bdi(inode); |
1175 | 1173 | ||
1174 | spin_unlock(&inode->i_lock); | ||
1175 | spin_lock(&bdi->wb.list_lock); | ||
1176 | if (bdi_cap_writeback_dirty(bdi)) { | 1176 | if (bdi_cap_writeback_dirty(bdi)) { |
1177 | WARN(!test_bit(BDI_registered, &bdi->state), | 1177 | WARN(!test_bit(BDI_registered, &bdi->state), |
1178 | "bdi-%s not registered\n", bdi->name); | 1178 | "bdi-%s not registered\n", bdi->name); |
@@ -1187,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1187 | wakeup_bdi = true; | 1187 | wakeup_bdi = true; |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | spin_unlock(&inode->i_lock); | ||
1191 | spin_lock(&bdi->wb.list_lock); | ||
1192 | inode->dirtied_when = jiffies; | 1190 | inode->dirtied_when = jiffies; |
1193 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1191 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1194 | spin_unlock(&bdi->wb.list_lock); | 1192 | spin_unlock(&bdi->wb.list_lock); |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 8702b732109a..73899c1c3449 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -913,7 +913,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
913 | (1 << FSCACHE_OP_WAITING) | | 913 | (1 << FSCACHE_OP_WAITING) | |
914 | (1 << FSCACHE_OP_UNUSE_COOKIE); | 914 | (1 << FSCACHE_OP_UNUSE_COOKIE); |
915 | 915 | ||
916 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 916 | ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM); |
917 | if (ret < 0) | 917 | if (ret < 0) |
918 | goto nomem_free; | 918 | goto nomem_free; |
919 | 919 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e0fe703ee3d6..84434594e80e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -930,7 +930,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | |||
930 | fc->bdi.name = "fuse"; | 930 | fc->bdi.name = "fuse"; |
931 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 931 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
932 | /* fuse does it's own writeback accounting */ | 932 | /* fuse does it's own writeback accounting */ |
933 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; | 933 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; |
934 | 934 | ||
935 | err = bdi_init(&fc->bdi); | 935 | err = bdi_init(&fc->bdi); |
936 | if (err) | 936 | if (err) |
diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig index a63371815aab..24bc20fd42f7 100644 --- a/fs/hfsplus/Kconfig +++ b/fs/hfsplus/Kconfig | |||
@@ -11,3 +11,21 @@ config HFSPLUS_FS | |||
11 | MacOS 8. It includes all Mac specific filesystem data such as | 11 | MacOS 8. It includes all Mac specific filesystem data such as |
12 | data forks and creator codes, but it also has several UNIX | 12 | data forks and creator codes, but it also has several UNIX |
13 | style features such as file ownership and permissions. | 13 | style features such as file ownership and permissions. |
14 | |||
15 | config HFSPLUS_FS_POSIX_ACL | ||
16 | bool "HFS+ POSIX Access Control Lists" | ||
17 | depends on HFSPLUS_FS | ||
18 | select FS_POSIX_ACL | ||
19 | help | ||
20 | POSIX Access Control Lists (ACLs) support permissions for users and | ||
21 | groups beyond the owner/group/world scheme. | ||
22 | |||
23 | To learn more about Access Control Lists, visit the POSIX ACLs for | ||
24 | Linux website <http://acl.bestbits.at/>. | ||
25 | |||
26 | It needs to understand that POSIX ACLs are treated only under | ||
27 | Linux. POSIX ACLs doesn't mean something under Mac OS X. | ||
28 | Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs, | ||
29 | which are part of the NFSv4 standard. | ||
30 | |||
31 | If you don't know what Access Control Lists are, say N | ||
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile index 09d278bb7b91..683fca2e5e65 100644 --- a/fs/hfsplus/Makefile +++ b/fs/hfsplus/Makefile | |||
@@ -7,3 +7,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o | |||
7 | hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ | 7 | hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ |
8 | bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ | 8 | bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ |
9 | attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o | 9 | attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o |
10 | |||
11 | hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o | ||
diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h new file mode 100644 index 000000000000..07c0d4947527 --- /dev/null +++ b/fs/hfsplus/acl.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * linux/fs/hfsplus/acl.h | ||
3 | * | ||
4 | * Vyacheslav Dubeyko <slava@dubeyko.com> | ||
5 | * | ||
6 | * Handler for Posix Access Control Lists (ACLs) support. | ||
7 | */ | ||
8 | |||
9 | #include <linux/posix_acl_xattr.h> | ||
10 | |||
11 | #ifdef CONFIG_HFSPLUS_FS_POSIX_ACL | ||
12 | |||
13 | /* posix_acl.c */ | ||
14 | struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type); | ||
15 | extern int hfsplus_posix_acl_chmod(struct inode *); | ||
16 | extern int hfsplus_init_posix_acl(struct inode *, struct inode *); | ||
17 | |||
18 | #else /* CONFIG_HFSPLUS_FS_POSIX_ACL */ | ||
19 | #define hfsplus_get_posix_acl NULL | ||
20 | |||
21 | static inline int hfsplus_posix_acl_chmod(struct inode *inode) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) | ||
27 | { | ||
28 | return 0; | ||
29 | } | ||
30 | #endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ | ||
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index d8ce4bd17fc5..4a4fea002673 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "hfsplus_fs.h" | 16 | #include "hfsplus_fs.h" |
17 | #include "hfsplus_raw.h" | 17 | #include "hfsplus_raw.h" |
18 | #include "xattr.h" | 18 | #include "xattr.h" |
19 | #include "acl.h" | ||
19 | 20 | ||
20 | static inline void hfsplus_instantiate(struct dentry *dentry, | 21 | static inline void hfsplus_instantiate(struct dentry *dentry, |
21 | struct inode *inode, u32 cnid) | 22 | struct inode *inode, u32 cnid) |
@@ -529,6 +530,9 @@ const struct inode_operations hfsplus_dir_inode_operations = { | |||
529 | .getxattr = generic_getxattr, | 530 | .getxattr = generic_getxattr, |
530 | .listxattr = hfsplus_listxattr, | 531 | .listxattr = hfsplus_listxattr, |
531 | .removexattr = hfsplus_removexattr, | 532 | .removexattr = hfsplus_removexattr, |
533 | #ifdef CONFIG_HFSPLUS_FS_POSIX_ACL | ||
534 | .get_acl = hfsplus_get_posix_acl, | ||
535 | #endif | ||
532 | }; | 536 | }; |
533 | 537 | ||
534 | const struct file_operations hfsplus_dir_operations = { | 538 | const struct file_operations hfsplus_dir_operations = { |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index ede79317cfb8..2b9cd01696e2 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #define DBG_EXTENT 0x00000020 | 30 | #define DBG_EXTENT 0x00000020 |
31 | #define DBG_BITMAP 0x00000040 | 31 | #define DBG_BITMAP 0x00000040 |
32 | #define DBG_ATTR_MOD 0x00000080 | 32 | #define DBG_ATTR_MOD 0x00000080 |
33 | #define DBG_ACL_MOD 0x00000100 | ||
33 | 34 | ||
34 | #if 0 | 35 | #if 0 |
35 | #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) | 36 | #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index f833d35630ab..4d2edaea891c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "hfsplus_fs.h" | 19 | #include "hfsplus_fs.h" |
20 | #include "hfsplus_raw.h" | 20 | #include "hfsplus_raw.h" |
21 | #include "xattr.h" | 21 | #include "xattr.h" |
22 | #include "acl.h" | ||
22 | 23 | ||
23 | static int hfsplus_readpage(struct file *file, struct page *page) | 24 | static int hfsplus_readpage(struct file *file, struct page *page) |
24 | { | 25 | { |
@@ -316,6 +317,13 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) | |||
316 | 317 | ||
317 | setattr_copy(inode, attr); | 318 | setattr_copy(inode, attr); |
318 | mark_inode_dirty(inode); | 319 | mark_inode_dirty(inode); |
320 | |||
321 | if (attr->ia_valid & ATTR_MODE) { | ||
322 | error = hfsplus_posix_acl_chmod(inode); | ||
323 | if (unlikely(error)) | ||
324 | return error; | ||
325 | } | ||
326 | |||
319 | return 0; | 327 | return 0; |
320 | } | 328 | } |
321 | 329 | ||
@@ -383,6 +391,9 @@ static const struct inode_operations hfsplus_file_inode_operations = { | |||
383 | .getxattr = generic_getxattr, | 391 | .getxattr = generic_getxattr, |
384 | .listxattr = hfsplus_listxattr, | 392 | .listxattr = hfsplus_listxattr, |
385 | .removexattr = hfsplus_removexattr, | 393 | .removexattr = hfsplus_removexattr, |
394 | #ifdef CONFIG_HFSPLUS_FS_POSIX_ACL | ||
395 | .get_acl = hfsplus_get_posix_acl, | ||
396 | #endif | ||
386 | }; | 397 | }; |
387 | 398 | ||
388 | static const struct file_operations hfsplus_file_operations = { | 399 | static const struct file_operations hfsplus_file_operations = { |
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c new file mode 100644 index 000000000000..b609cc14c72e --- /dev/null +++ b/fs/hfsplus/posix_acl.c | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * linux/fs/hfsplus/posix_acl.c | ||
3 | * | ||
4 | * Vyacheslav Dubeyko <slava@dubeyko.com> | ||
5 | * | ||
6 | * Handler for Posix Access Control Lists (ACLs) support. | ||
7 | */ | ||
8 | |||
9 | #include "hfsplus_fs.h" | ||
10 | #include "xattr.h" | ||
11 | #include "acl.h" | ||
12 | |||
13 | struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) | ||
14 | { | ||
15 | struct posix_acl *acl; | ||
16 | char *xattr_name; | ||
17 | char *value = NULL; | ||
18 | ssize_t size; | ||
19 | |||
20 | acl = get_cached_acl(inode, type); | ||
21 | if (acl != ACL_NOT_CACHED) | ||
22 | return acl; | ||
23 | |||
24 | switch (type) { | ||
25 | case ACL_TYPE_ACCESS: | ||
26 | xattr_name = POSIX_ACL_XATTR_ACCESS; | ||
27 | break; | ||
28 | case ACL_TYPE_DEFAULT: | ||
29 | xattr_name = POSIX_ACL_XATTR_DEFAULT; | ||
30 | break; | ||
31 | default: | ||
32 | return ERR_PTR(-EINVAL); | ||
33 | } | ||
34 | |||
35 | size = __hfsplus_getxattr(inode, xattr_name, NULL, 0); | ||
36 | |||
37 | if (size > 0) { | ||
38 | value = (char *)hfsplus_alloc_attr_entry(); | ||
39 | if (unlikely(!value)) | ||
40 | return ERR_PTR(-ENOMEM); | ||
41 | size = __hfsplus_getxattr(inode, xattr_name, value, size); | ||
42 | } | ||
43 | |||
44 | if (size > 0) | ||
45 | acl = posix_acl_from_xattr(&init_user_ns, value, size); | ||
46 | else if (size == -ENODATA) | ||
47 | acl = NULL; | ||
48 | else | ||
49 | acl = ERR_PTR(size); | ||
50 | |||
51 | hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); | ||
52 | |||
53 | if (!IS_ERR(acl)) | ||
54 | set_cached_acl(inode, type, acl); | ||
55 | |||
56 | return acl; | ||
57 | } | ||
58 | |||
59 | static int hfsplus_set_posix_acl(struct inode *inode, | ||
60 | int type, | ||
61 | struct posix_acl *acl) | ||
62 | { | ||
63 | int err; | ||
64 | char *xattr_name; | ||
65 | size_t size = 0; | ||
66 | char *value = NULL; | ||
67 | |||
68 | if (S_ISLNK(inode->i_mode)) | ||
69 | return -EOPNOTSUPP; | ||
70 | |||
71 | switch (type) { | ||
72 | case ACL_TYPE_ACCESS: | ||
73 | xattr_name = POSIX_ACL_XATTR_ACCESS; | ||
74 | if (acl) { | ||
75 | err = posix_acl_equiv_mode(acl, &inode->i_mode); | ||
76 | if (err < 0) | ||
77 | return err; | ||
78 | } | ||
79 | err = 0; | ||
80 | break; | ||
81 | |||
82 | case ACL_TYPE_DEFAULT: | ||
83 | xattr_name = POSIX_ACL_XATTR_DEFAULT; | ||
84 | if (!S_ISDIR(inode->i_mode)) | ||
85 | return acl ? -EACCES : 0; | ||
86 | break; | ||
87 | |||
88 | default: | ||
89 | return -EINVAL; | ||
90 | } | ||
91 | |||
92 | if (acl) { | ||
93 | size = posix_acl_xattr_size(acl->a_count); | ||
94 | if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE)) | ||
95 | return -ENOMEM; | ||
96 | value = (char *)hfsplus_alloc_attr_entry(); | ||
97 | if (unlikely(!value)) | ||
98 | return -ENOMEM; | ||
99 | err = posix_acl_to_xattr(&init_user_ns, acl, value, size); | ||
100 | if (unlikely(err < 0)) | ||
101 | goto end_set_acl; | ||
102 | } | ||
103 | |||
104 | err = __hfsplus_setxattr(inode, xattr_name, value, size, 0); | ||
105 | |||
106 | end_set_acl: | ||
107 | hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); | ||
108 | |||
109 | if (!err) | ||
110 | set_cached_acl(inode, type, acl); | ||
111 | |||
112 | return err; | ||
113 | } | ||
114 | |||
115 | int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) | ||
116 | { | ||
117 | int err = 0; | ||
118 | struct posix_acl *acl = NULL; | ||
119 | |||
120 | hfs_dbg(ACL_MOD, | ||
121 | "[%s]: ino %lu, dir->ino %lu\n", | ||
122 | __func__, inode->i_ino, dir->i_ino); | ||
123 | |||
124 | if (S_ISLNK(inode->i_mode)) | ||
125 | return 0; | ||
126 | |||
127 | acl = hfsplus_get_posix_acl(dir, ACL_TYPE_DEFAULT); | ||
128 | if (IS_ERR(acl)) | ||
129 | return PTR_ERR(acl); | ||
130 | |||
131 | if (acl) { | ||
132 | if (S_ISDIR(inode->i_mode)) { | ||
133 | err = hfsplus_set_posix_acl(inode, | ||
134 | ACL_TYPE_DEFAULT, | ||
135 | acl); | ||
136 | if (unlikely(err)) | ||
137 | goto init_acl_cleanup; | ||
138 | } | ||
139 | |||
140 | err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); | ||
141 | if (unlikely(err < 0)) | ||
142 | return err; | ||
143 | |||
144 | if (err > 0) | ||
145 | err = hfsplus_set_posix_acl(inode, | ||
146 | ACL_TYPE_ACCESS, | ||
147 | acl); | ||
148 | } else | ||
149 | inode->i_mode &= ~current_umask(); | ||
150 | |||
151 | init_acl_cleanup: | ||
152 | posix_acl_release(acl); | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | int hfsplus_posix_acl_chmod(struct inode *inode) | ||
157 | { | ||
158 | int err; | ||
159 | struct posix_acl *acl; | ||
160 | |||
161 | hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino); | ||
162 | |||
163 | if (S_ISLNK(inode->i_mode)) | ||
164 | return -EOPNOTSUPP; | ||
165 | |||
166 | acl = hfsplus_get_posix_acl(inode, ACL_TYPE_ACCESS); | ||
167 | if (IS_ERR(acl) || !acl) | ||
168 | return PTR_ERR(acl); | ||
169 | |||
170 | err = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); | ||
171 | if (unlikely(err)) | ||
172 | return err; | ||
173 | |||
174 | err = hfsplus_set_posix_acl(inode, ACL_TYPE_ACCESS, acl); | ||
175 | posix_acl_release(acl); | ||
176 | return err; | ||
177 | } | ||
178 | |||
179 | static int hfsplus_xattr_get_posix_acl(struct dentry *dentry, | ||
180 | const char *name, | ||
181 | void *buffer, | ||
182 | size_t size, | ||
183 | int type) | ||
184 | { | ||
185 | int err = 0; | ||
186 | struct posix_acl *acl; | ||
187 | |||
188 | hfs_dbg(ACL_MOD, | ||
189 | "[%s]: ino %lu, buffer %p, size %zu, type %#x\n", | ||
190 | __func__, dentry->d_inode->i_ino, buffer, size, type); | ||
191 | |||
192 | if (strcmp(name, "") != 0) | ||
193 | return -EINVAL; | ||
194 | |||
195 | acl = hfsplus_get_posix_acl(dentry->d_inode, type); | ||
196 | if (IS_ERR(acl)) | ||
197 | return PTR_ERR(acl); | ||
198 | if (acl == NULL) | ||
199 | return -ENODATA; | ||
200 | |||
201 | err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); | ||
202 | posix_acl_release(acl); | ||
203 | |||
204 | return err; | ||
205 | } | ||
206 | |||
207 | static int hfsplus_xattr_set_posix_acl(struct dentry *dentry, | ||
208 | const char *name, | ||
209 | const void *value, | ||
210 | size_t size, | ||
211 | int flags, | ||
212 | int type) | ||
213 | { | ||
214 | int err = 0; | ||
215 | struct inode *inode = dentry->d_inode; | ||
216 | struct posix_acl *acl = NULL; | ||
217 | |||
218 | hfs_dbg(ACL_MOD, | ||
219 | "[%s]: ino %lu, value %p, size %zu, flags %#x, type %#x\n", | ||
220 | __func__, inode->i_ino, value, size, flags, type); | ||
221 | |||
222 | if (strcmp(name, "") != 0) | ||
223 | return -EINVAL; | ||
224 | |||
225 | if (!inode_owner_or_capable(inode)) | ||
226 | return -EPERM; | ||
227 | |||
228 | if (value) { | ||
229 | acl = posix_acl_from_xattr(&init_user_ns, value, size); | ||
230 | if (IS_ERR(acl)) | ||
231 | return PTR_ERR(acl); | ||
232 | else if (acl) { | ||
233 | err = posix_acl_valid(acl); | ||
234 | if (err) | ||
235 | goto end_xattr_set_acl; | ||
236 | } | ||
237 | } | ||
238 | |||
239 | err = hfsplus_set_posix_acl(inode, type, acl); | ||
240 | |||
241 | end_xattr_set_acl: | ||
242 | posix_acl_release(acl); | ||
243 | return err; | ||
244 | } | ||
245 | |||
246 | static size_t hfsplus_xattr_list_posix_acl(struct dentry *dentry, | ||
247 | char *list, | ||
248 | size_t list_size, | ||
249 | const char *name, | ||
250 | size_t name_len, | ||
251 | int type) | ||
252 | { | ||
253 | /* | ||
254 | * This method is not used. | ||
255 | * It is used hfsplus_listxattr() instead of generic_listxattr(). | ||
256 | */ | ||
257 | return -EOPNOTSUPP; | ||
258 | } | ||
259 | |||
260 | const struct xattr_handler hfsplus_xattr_acl_access_handler = { | ||
261 | .prefix = POSIX_ACL_XATTR_ACCESS, | ||
262 | .flags = ACL_TYPE_ACCESS, | ||
263 | .list = hfsplus_xattr_list_posix_acl, | ||
264 | .get = hfsplus_xattr_get_posix_acl, | ||
265 | .set = hfsplus_xattr_set_posix_acl, | ||
266 | }; | ||
267 | |||
268 | const struct xattr_handler hfsplus_xattr_acl_default_handler = { | ||
269 | .prefix = POSIX_ACL_XATTR_DEFAULT, | ||
270 | .flags = ACL_TYPE_DEFAULT, | ||
271 | .list = hfsplus_xattr_list_posix_acl, | ||
272 | .get = hfsplus_xattr_get_posix_acl, | ||
273 | .set = hfsplus_xattr_set_posix_acl, | ||
274 | }; | ||
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index f66346155df5..bd8471fb9a6a 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c | |||
@@ -8,11 +8,16 @@ | |||
8 | 8 | ||
9 | #include "hfsplus_fs.h" | 9 | #include "hfsplus_fs.h" |
10 | #include "xattr.h" | 10 | #include "xattr.h" |
11 | #include "acl.h" | ||
11 | 12 | ||
12 | const struct xattr_handler *hfsplus_xattr_handlers[] = { | 13 | const struct xattr_handler *hfsplus_xattr_handlers[] = { |
13 | &hfsplus_xattr_osx_handler, | 14 | &hfsplus_xattr_osx_handler, |
14 | &hfsplus_xattr_user_handler, | 15 | &hfsplus_xattr_user_handler, |
15 | &hfsplus_xattr_trusted_handler, | 16 | &hfsplus_xattr_trusted_handler, |
17 | #ifdef CONFIG_HFSPLUS_FS_POSIX_ACL | ||
18 | &hfsplus_xattr_acl_access_handler, | ||
19 | &hfsplus_xattr_acl_default_handler, | ||
20 | #endif | ||
16 | &hfsplus_xattr_security_handler, | 21 | &hfsplus_xattr_security_handler, |
17 | NULL | 22 | NULL |
18 | }; | 23 | }; |
@@ -46,11 +51,58 @@ static inline int is_known_namespace(const char *name) | |||
46 | return true; | 51 | return true; |
47 | } | 52 | } |
48 | 53 | ||
54 | static int can_set_system_xattr(struct inode *inode, const char *name, | ||
55 | const void *value, size_t size) | ||
56 | { | ||
57 | #ifdef CONFIG_HFSPLUS_FS_POSIX_ACL | ||
58 | struct posix_acl *acl; | ||
59 | int err; | ||
60 | |||
61 | if (!inode_owner_or_capable(inode)) | ||
62 | return -EPERM; | ||
63 | |||
64 | /* | ||
65 | * POSIX_ACL_XATTR_ACCESS is tied to i_mode | ||
66 | */ | ||
67 | if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { | ||
68 | acl = posix_acl_from_xattr(&init_user_ns, value, size); | ||
69 | if (IS_ERR(acl)) | ||
70 | return PTR_ERR(acl); | ||
71 | if (acl) { | ||
72 | err = posix_acl_equiv_mode(acl, &inode->i_mode); | ||
73 | posix_acl_release(acl); | ||
74 | if (err < 0) | ||
75 | return err; | ||
76 | mark_inode_dirty(inode); | ||
77 | } | ||
78 | /* | ||
79 | * We're changing the ACL. Get rid of the cached one | ||
80 | */ | ||
81 | forget_cached_acl(inode, ACL_TYPE_ACCESS); | ||
82 | |||
83 | return 0; | ||
84 | } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { | ||
85 | acl = posix_acl_from_xattr(&init_user_ns, value, size); | ||
86 | if (IS_ERR(acl)) | ||
87 | return PTR_ERR(acl); | ||
88 | posix_acl_release(acl); | ||
89 | |||
90 | /* | ||
91 | * We're changing the default ACL. Get rid of the cached one | ||
92 | */ | ||
93 | forget_cached_acl(inode, ACL_TYPE_DEFAULT); | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | #endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ | ||
98 | return -EOPNOTSUPP; | ||
99 | } | ||
100 | |||
49 | static int can_set_xattr(struct inode *inode, const char *name, | 101 | static int can_set_xattr(struct inode *inode, const char *name, |
50 | const void *value, size_t value_len) | 102 | const void *value, size_t value_len) |
51 | { | 103 | { |
52 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | 104 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) |
53 | return -EOPNOTSUPP; /* TODO: implement ACL support */ | 105 | return can_set_system_xattr(inode, name, value, value_len); |
54 | 106 | ||
55 | if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { | 107 | if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { |
56 | /* | 108 | /* |
@@ -253,11 +305,10 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len) | |||
253 | return len; | 305 | return len; |
254 | } | 306 | } |
255 | 307 | ||
256 | static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, | 308 | static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, |
257 | void *value, size_t size) | 309 | void *value, size_t size) |
258 | { | 310 | { |
259 | ssize_t res = 0; | 311 | ssize_t res = 0; |
260 | struct inode *inode = dentry->d_inode; | ||
261 | struct hfs_find_data fd; | 312 | struct hfs_find_data fd; |
262 | u16 entry_type; | 313 | u16 entry_type; |
263 | u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); | 314 | u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); |
@@ -304,10 +355,9 @@ end_getxattr_finder_info: | |||
304 | return res; | 355 | return res; |
305 | } | 356 | } |
306 | 357 | ||
307 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | 358 | ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, |
308 | void *value, size_t size) | 359 | void *value, size_t size) |
309 | { | 360 | { |
310 | struct inode *inode = dentry->d_inode; | ||
311 | struct hfs_find_data fd; | 361 | struct hfs_find_data fd; |
312 | hfsplus_attr_entry *entry; | 362 | hfsplus_attr_entry *entry; |
313 | __be32 xattr_record_type; | 363 | __be32 xattr_record_type; |
@@ -333,7 +383,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
333 | } | 383 | } |
334 | 384 | ||
335 | if (!strcmp_xattr_finder_info(name)) | 385 | if (!strcmp_xattr_finder_info(name)) |
336 | return hfsplus_getxattr_finder_info(dentry, value, size); | 386 | return hfsplus_getxattr_finder_info(inode, value, size); |
337 | 387 | ||
338 | if (!HFSPLUS_SB(inode->i_sb)->attr_tree) | 388 | if (!HFSPLUS_SB(inode->i_sb)->attr_tree) |
339 | return -EOPNOTSUPP; | 389 | return -EOPNOTSUPP; |
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index 847b695b984d..841b5698c0fc 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h | |||
@@ -14,8 +14,8 @@ | |||
14 | extern const struct xattr_handler hfsplus_xattr_osx_handler; | 14 | extern const struct xattr_handler hfsplus_xattr_osx_handler; |
15 | extern const struct xattr_handler hfsplus_xattr_user_handler; | 15 | extern const struct xattr_handler hfsplus_xattr_user_handler; |
16 | extern const struct xattr_handler hfsplus_xattr_trusted_handler; | 16 | extern const struct xattr_handler hfsplus_xattr_trusted_handler; |
17 | /*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ | 17 | extern const struct xattr_handler hfsplus_xattr_acl_access_handler; |
18 | /*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ | 18 | extern const struct xattr_handler hfsplus_xattr_acl_default_handler; |
19 | extern const struct xattr_handler hfsplus_xattr_security_handler; | 19 | extern const struct xattr_handler hfsplus_xattr_security_handler; |
20 | 20 | ||
21 | extern const struct xattr_handler *hfsplus_xattr_handlers[]; | 21 | extern const struct xattr_handler *hfsplus_xattr_handlers[]; |
@@ -29,9 +29,17 @@ static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, | |||
29 | return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); | 29 | return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); |
30 | } | 30 | } |
31 | 31 | ||
32 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | 32 | ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, |
33 | void *value, size_t size); | 33 | void *value, size_t size); |
34 | 34 | ||
35 | static inline ssize_t hfsplus_getxattr(struct dentry *dentry, | ||
36 | const char *name, | ||
37 | void *value, | ||
38 | size_t size) | ||
39 | { | ||
40 | return __hfsplus_getxattr(dentry->d_inode, name, value, size); | ||
41 | } | ||
42 | |||
35 | ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); | 43 | ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); |
36 | 44 | ||
37 | int hfsplus_removexattr(struct dentry *dentry, const char *name); | 45 | int hfsplus_removexattr(struct dentry *dentry, const char *name); |
@@ -39,22 +47,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name); | |||
39 | int hfsplus_init_security(struct inode *inode, struct inode *dir, | 47 | int hfsplus_init_security(struct inode *inode, struct inode *dir, |
40 | const struct qstr *qstr); | 48 | const struct qstr *qstr); |
41 | 49 | ||
42 | static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) | 50 | int hfsplus_init_inode_security(struct inode *inode, struct inode *dir, |
43 | { | 51 | const struct qstr *qstr); |
44 | /*TODO: implement*/ | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | static inline int hfsplus_init_inode_security(struct inode *inode, | ||
49 | struct inode *dir, | ||
50 | const struct qstr *qstr) | ||
51 | { | ||
52 | int err; | ||
53 | |||
54 | err = hfsplus_init_acl(inode, dir); | ||
55 | if (!err) | ||
56 | err = hfsplus_init_security(inode, dir, qstr); | ||
57 | return err; | ||
58 | } | ||
59 | 52 | ||
60 | #endif | 53 | #endif |
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 83b842f113c5..00722765ea79 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/security.h> | 9 | #include <linux/security.h> |
10 | #include "hfsplus_fs.h" | 10 | #include "hfsplus_fs.h" |
11 | #include "xattr.h" | 11 | #include "xattr.h" |
12 | #include "acl.h" | ||
12 | 13 | ||
13 | static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, | 14 | static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, |
14 | void *buffer, size_t size, int type) | 15 | void *buffer, size_t size, int type) |
@@ -96,6 +97,18 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir, | |||
96 | &hfsplus_initxattrs, NULL); | 97 | &hfsplus_initxattrs, NULL); |
97 | } | 98 | } |
98 | 99 | ||
100 | int hfsplus_init_inode_security(struct inode *inode, | ||
101 | struct inode *dir, | ||
102 | const struct qstr *qstr) | ||
103 | { | ||
104 | int err; | ||
105 | |||
106 | err = hfsplus_init_posix_acl(inode, dir); | ||
107 | if (!err) | ||
108 | err = hfsplus_init_security(inode, dir, qstr); | ||
109 | return err; | ||
110 | } | ||
111 | |||
99 | const struct xattr_handler hfsplus_xattr_security_handler = { | 112 | const struct xattr_handler hfsplus_xattr_security_handler = { |
100 | .prefix = XATTR_SECURITY_PREFIX, | 113 | .prefix = XATTR_SECURITY_PREFIX, |
101 | .list = hfsplus_security_listxattr, | 114 | .list = hfsplus_security_listxattr, |
diff --git a/fs/namespace.c b/fs/namespace.c index 25845d1b300b..da5c49483430 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/security.h> | 17 | #include <linux/security.h> |
18 | #include <linux/idr.h> | 18 | #include <linux/idr.h> |
19 | #include <linux/acct.h> /* acct_auto_close_mnt */ | 19 | #include <linux/acct.h> /* acct_auto_close_mnt */ |
20 | #include <linux/ramfs.h> /* init_rootfs */ | 20 | #include <linux/init.h> /* init_rootfs */ |
21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 8a404576fb26..b4f788e0ca31 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) | |||
51 | return ERR_PTR(-EINVAL); | 51 | return ERR_PTR(-EINVAL); |
52 | 52 | ||
53 | count = size / sizeof(struct posix_acl_entry); | 53 | count = size / sizeof(struct posix_acl_entry); |
54 | if (count < 0) | ||
55 | return ERR_PTR(-EINVAL); | ||
56 | if (count == 0) | ||
57 | return NULL; | ||
58 | 54 | ||
59 | acl = posix_acl_alloc(count, GFP_NOFS); | 55 | acl = posix_acl_alloc(count, GFP_NOFS); |
60 | if (!acl) | 56 | if (!acl) |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 94417a85ce6e..f37d3c0e2053 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
2044 | 2044 | ||
2045 | out_write_size: | 2045 | out_write_size: |
2046 | pos += copied; | 2046 | pos += copied; |
2047 | if (pos > inode->i_size) { | 2047 | if (pos > i_size_read(inode)) { |
2048 | i_size_write(inode, pos); | 2048 | i_size_write(inode, pos); |
2049 | mark_inode_dirty(inode); | 2049 | mark_inode_dirty(inode); |
2050 | } | 2050 | } |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5c1c864e81cc..363f0dcc924f 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, | |||
628 | struct o2nm_node *node, | 628 | struct o2nm_node *node, |
629 | int idx) | 629 | int idx) |
630 | { | 630 | { |
631 | struct list_head *iter; | ||
632 | struct o2hb_callback_func *f; | 631 | struct o2hb_callback_func *f; |
633 | 632 | ||
634 | list_for_each(iter, &hbcall->list) { | 633 | list_for_each_entry(f, &hbcall->list, hc_item) { |
635 | f = list_entry(iter, struct o2hb_callback_func, hc_item); | ||
636 | mlog(ML_HEARTBEAT, "calling funcs %p\n", f); | 634 | mlog(ML_HEARTBEAT, "calling funcs %p\n", f); |
637 | (f->hc_func)(node, idx, f->hc_data); | 635 | (f->hc_func)(node, idx, f->hc_data); |
638 | } | 636 | } |
@@ -641,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, | |||
641 | /* Will run the list in order until we process the passed event */ | 639 | /* Will run the list in order until we process the passed event */ |
642 | static void o2hb_run_event_list(struct o2hb_node_event *queued_event) | 640 | static void o2hb_run_event_list(struct o2hb_node_event *queued_event) |
643 | { | 641 | { |
644 | int empty; | ||
645 | struct o2hb_callback *hbcall; | 642 | struct o2hb_callback *hbcall; |
646 | struct o2hb_node_event *event; | 643 | struct o2hb_node_event *event; |
647 | 644 | ||
648 | spin_lock(&o2hb_live_lock); | ||
649 | empty = list_empty(&queued_event->hn_item); | ||
650 | spin_unlock(&o2hb_live_lock); | ||
651 | if (empty) | ||
652 | return; | ||
653 | |||
654 | /* Holding callback sem assures we don't alter the callback | 645 | /* Holding callback sem assures we don't alter the callback |
655 | * lists when doing this, and serializes ourselves with other | 646 | * lists when doing this, and serializes ourselves with other |
656 | * processes wanting callbacks. */ | 647 | * processes wanting callbacks. */ |
@@ -709,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
709 | struct o2hb_node_event event = | 700 | struct o2hb_node_event event = |
710 | { .hn_item = LIST_HEAD_INIT(event.hn_item), }; | 701 | { .hn_item = LIST_HEAD_INIT(event.hn_item), }; |
711 | struct o2nm_node *node; | 702 | struct o2nm_node *node; |
703 | int queued = 0; | ||
712 | 704 | ||
713 | node = o2nm_get_node_by_num(slot->ds_node_num); | 705 | node = o2nm_get_node_by_num(slot->ds_node_num); |
714 | if (!node) | 706 | if (!node) |
@@ -726,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
726 | 718 | ||
727 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 719 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, |
728 | slot->ds_node_num); | 720 | slot->ds_node_num); |
721 | queued = 1; | ||
729 | } | 722 | } |
730 | } | 723 | } |
731 | spin_unlock(&o2hb_live_lock); | 724 | spin_unlock(&o2hb_live_lock); |
732 | 725 | ||
733 | o2hb_run_event_list(&event); | 726 | if (queued) |
727 | o2hb_run_event_list(&event); | ||
734 | 728 | ||
735 | o2nm_node_put(node); | 729 | o2nm_node_put(node); |
736 | } | 730 | } |
@@ -790,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
790 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 784 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
791 | unsigned int slot_dead_ms; | 785 | unsigned int slot_dead_ms; |
792 | int tmp; | 786 | int tmp; |
787 | int queued = 0; | ||
793 | 788 | ||
794 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 789 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
795 | 790 | ||
@@ -883,6 +878,7 @@ fire_callbacks: | |||
883 | slot->ds_node_num); | 878 | slot->ds_node_num); |
884 | 879 | ||
885 | changed = 1; | 880 | changed = 1; |
881 | queued = 1; | ||
886 | } | 882 | } |
887 | 883 | ||
888 | list_add_tail(&slot->ds_live_item, | 884 | list_add_tail(&slot->ds_live_item, |
@@ -934,6 +930,7 @@ fire_callbacks: | |||
934 | node, slot->ds_node_num); | 930 | node, slot->ds_node_num); |
935 | 931 | ||
936 | changed = 1; | 932 | changed = 1; |
933 | queued = 1; | ||
937 | } | 934 | } |
938 | 935 | ||
939 | /* We don't clear this because the node is still | 936 | /* We don't clear this because the node is still |
@@ -949,7 +946,8 @@ fire_callbacks: | |||
949 | out: | 946 | out: |
950 | spin_unlock(&o2hb_live_lock); | 947 | spin_unlock(&o2hb_live_lock); |
951 | 948 | ||
952 | o2hb_run_event_list(&event); | 949 | if (queued) |
950 | o2hb_run_event_list(&event); | ||
953 | 951 | ||
954 | if (node) | 952 | if (node) |
955 | o2nm_node_put(node); | 953 | o2nm_node_put(node); |
@@ -2516,8 +2514,7 @@ unlock: | |||
2516 | int o2hb_register_callback(const char *region_uuid, | 2514 | int o2hb_register_callback(const char *region_uuid, |
2517 | struct o2hb_callback_func *hc) | 2515 | struct o2hb_callback_func *hc) |
2518 | { | 2516 | { |
2519 | struct o2hb_callback_func *tmp; | 2517 | struct o2hb_callback_func *f; |
2520 | struct list_head *iter; | ||
2521 | struct o2hb_callback *hbcall; | 2518 | struct o2hb_callback *hbcall; |
2522 | int ret; | 2519 | int ret; |
2523 | 2520 | ||
@@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid, | |||
2540 | 2537 | ||
2541 | down_write(&o2hb_callback_sem); | 2538 | down_write(&o2hb_callback_sem); |
2542 | 2539 | ||
2543 | list_for_each(iter, &hbcall->list) { | 2540 | list_for_each_entry(f, &hbcall->list, hc_item) { |
2544 | tmp = list_entry(iter, struct o2hb_callback_func, hc_item); | 2541 | if (hc->hc_priority < f->hc_priority) { |
2545 | if (hc->hc_priority < tmp->hc_priority) { | 2542 | list_add_tail(&hc->hc_item, &f->hc_item); |
2546 | list_add_tail(&hc->hc_item, iter); | ||
2547 | break; | 2543 | break; |
2548 | } | 2544 | } |
2549 | } | 2545 | } |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d644dc611425..2cd2406b4140 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
543 | } | 543 | } |
544 | 544 | ||
545 | if (was_valid && !valid) { | 545 | if (was_valid && !valid) { |
546 | printk(KERN_NOTICE "o2net: No longer connected to " | 546 | if (old_sc) |
547 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); | 547 | printk(KERN_NOTICE "o2net: No longer connected to " |
548 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); | ||
548 | o2net_complete_nodes_nsw(nn); | 549 | o2net_complete_nodes_nsw(nn); |
549 | } | 550 | } |
550 | 551 | ||
@@ -765,32 +766,32 @@ static struct o2net_msg_handler * | |||
765 | o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, | 766 | o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, |
766 | struct rb_node **ret_parent) | 767 | struct rb_node **ret_parent) |
767 | { | 768 | { |
768 | struct rb_node **p = &o2net_handler_tree.rb_node; | 769 | struct rb_node **p = &o2net_handler_tree.rb_node; |
769 | struct rb_node *parent = NULL; | 770 | struct rb_node *parent = NULL; |
770 | struct o2net_msg_handler *nmh, *ret = NULL; | 771 | struct o2net_msg_handler *nmh, *ret = NULL; |
771 | int cmp; | 772 | int cmp; |
772 | 773 | ||
773 | while (*p) { | 774 | while (*p) { |
774 | parent = *p; | 775 | parent = *p; |
775 | nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); | 776 | nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); |
776 | cmp = o2net_handler_cmp(nmh, msg_type, key); | 777 | cmp = o2net_handler_cmp(nmh, msg_type, key); |
777 | 778 | ||
778 | if (cmp < 0) | 779 | if (cmp < 0) |
779 | p = &(*p)->rb_left; | 780 | p = &(*p)->rb_left; |
780 | else if (cmp > 0) | 781 | else if (cmp > 0) |
781 | p = &(*p)->rb_right; | 782 | p = &(*p)->rb_right; |
782 | else { | 783 | else { |
783 | ret = nmh; | 784 | ret = nmh; |
784 | break; | 785 | break; |
785 | } | 786 | } |
786 | } | 787 | } |
787 | 788 | ||
788 | if (ret_p != NULL) | 789 | if (ret_p != NULL) |
789 | *ret_p = p; | 790 | *ret_p = p; |
790 | if (ret_parent != NULL) | 791 | if (ret_parent != NULL) |
791 | *ret_parent = parent; | 792 | *ret_parent = parent; |
792 | 793 | ||
793 | return ret; | 794 | return ret; |
794 | } | 795 | } |
795 | 796 | ||
796 | static void o2net_handler_kref_release(struct kref *kref) | 797 | static void o2net_handler_kref_release(struct kref *kref) |
@@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work) | |||
1695 | ret = 0; | 1696 | ret = 0; |
1696 | 1697 | ||
1697 | out: | 1698 | out: |
1698 | if (ret) { | 1699 | if (ret && sc) { |
1699 | printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT | 1700 | printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT |
1700 | " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); | 1701 | " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); |
1701 | /* 0 err so that another will be queued and attempted | 1702 | /* 0 err so that another will be queued and attempted |
1702 | * from set_nn_state */ | 1703 | * from set_nn_state */ |
1703 | if (sc) | 1704 | o2net_ensure_shutdown(nn, sc, 0); |
1704 | o2net_ensure_shutdown(nn, sc, 0); | ||
1705 | } | 1705 | } |
1706 | if (sc) | 1706 | if (sc) |
1707 | sc_put(sc); | 1707 | sc_put(sc); |
@@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock) | |||
1873 | 1873 | ||
1874 | if (o2nm_this_node() >= node->nd_num) { | 1874 | if (o2nm_this_node() >= node->nd_num) { |
1875 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | 1875 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
1876 | printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " | 1876 | if (local_node) |
1877 | "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " | 1877 | printk(KERN_NOTICE "o2net: Unexpected connect attempt " |
1878 | "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, | 1878 | "seen at node '%s' (%u, %pI4:%d) from " |
1879 | &(local_node->nd_ipv4_address), | 1879 | "node '%s' (%u, %pI4:%d)\n", |
1880 | ntohs(local_node->nd_ipv4_port), node->nd_name, | 1880 | local_node->nd_name, local_node->nd_num, |
1881 | node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); | 1881 | &(local_node->nd_ipv4_address), |
1882 | ntohs(local_node->nd_ipv4_port), | ||
1883 | node->nd_name, | ||
1884 | node->nd_num, &sin.sin_addr.s_addr, | ||
1885 | ntohs(sin.sin_port)); | ||
1882 | ret = -EINVAL; | 1886 | ret = -EINVAL; |
1883 | goto out; | 1887 | goto out; |
1884 | } | 1888 | } |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fbec0be62326..b46278f9ae44 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | |||
292 | struct dlm_lock *lock = NULL; | 292 | struct dlm_lock *lock = NULL; |
293 | struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; | 293 | struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; |
294 | char *name; | 294 | char *name; |
295 | struct list_head *iter, *head=NULL; | 295 | struct list_head *head = NULL; |
296 | __be64 cookie; | 296 | __be64 cookie; |
297 | u32 flags; | 297 | u32 flags; |
298 | u8 node; | 298 | u8 node; |
@@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | |||
373 | /* try convert queue for both ast/bast */ | 373 | /* try convert queue for both ast/bast */ |
374 | head = &res->converting; | 374 | head = &res->converting; |
375 | lock = NULL; | 375 | lock = NULL; |
376 | list_for_each(iter, head) { | 376 | list_for_each_entry(lock, head, list) { |
377 | lock = list_entry (iter, struct dlm_lock, list); | ||
378 | if (lock->ml.cookie == cookie) | 377 | if (lock->ml.cookie == cookie) |
379 | goto do_ast; | 378 | goto do_ast; |
380 | } | 379 | } |
@@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | |||
385 | else | 384 | else |
386 | head = &res->granted; | 385 | head = &res->granted; |
387 | 386 | ||
388 | list_for_each(iter, head) { | 387 | list_for_each_entry(lock, head, list) { |
389 | lock = list_entry (iter, struct dlm_lock, list); | ||
390 | if (lock->ml.cookie == cookie) | 388 | if (lock->ml.cookie == cookie) |
391 | goto do_ast; | 389 | goto do_ast; |
392 | } | 390 | } |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index de854cca12a2..e0517762fcc0 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request) | |||
1079 | static inline int dlm_lock_on_list(struct list_head *head, | 1079 | static inline int dlm_lock_on_list(struct list_head *head, |
1080 | struct dlm_lock *lock) | 1080 | struct dlm_lock *lock) |
1081 | { | 1081 | { |
1082 | struct list_head *iter; | ||
1083 | struct dlm_lock *tmplock; | 1082 | struct dlm_lock *tmplock; |
1084 | 1083 | ||
1085 | list_for_each(iter, head) { | 1084 | list_for_each_entry(tmplock, head, list) { |
1086 | tmplock = list_entry(iter, struct dlm_lock, list); | ||
1087 | if (tmplock == lock) | 1085 | if (tmplock == lock) |
1088 | return 1; | 1086 | return 1; |
1089 | } | 1087 | } |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 29a886d1e82c..e36d63ff1783 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, | |||
123 | int *kick_thread) | 123 | int *kick_thread) |
124 | { | 124 | { |
125 | enum dlm_status status = DLM_NORMAL; | 125 | enum dlm_status status = DLM_NORMAL; |
126 | struct list_head *iter; | ||
127 | struct dlm_lock *tmplock=NULL; | 126 | struct dlm_lock *tmplock=NULL; |
128 | 127 | ||
129 | assert_spin_locked(&res->spinlock); | 128 | assert_spin_locked(&res->spinlock); |
@@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, | |||
185 | 184 | ||
186 | /* upconvert from here on */ | 185 | /* upconvert from here on */ |
187 | status = DLM_NORMAL; | 186 | status = DLM_NORMAL; |
188 | list_for_each(iter, &res->granted) { | 187 | list_for_each_entry(tmplock, &res->granted, list) { |
189 | tmplock = list_entry(iter, struct dlm_lock, list); | ||
190 | if (tmplock == lock) | 188 | if (tmplock == lock) |
191 | continue; | 189 | continue; |
192 | if (!dlm_lock_compatible(tmplock->ml.type, type)) | 190 | if (!dlm_lock_compatible(tmplock->ml.type, type)) |
193 | goto switch_queues; | 191 | goto switch_queues; |
194 | } | 192 | } |
195 | 193 | ||
196 | list_for_each(iter, &res->converting) { | 194 | list_for_each_entry(tmplock, &res->converting, list) { |
197 | tmplock = list_entry(iter, struct dlm_lock, list); | ||
198 | if (!dlm_lock_compatible(tmplock->ml.type, type)) | 195 | if (!dlm_lock_compatible(tmplock->ml.type, type)) |
199 | goto switch_queues; | 196 | goto switch_queues; |
200 | /* existing conversion requests take precedence */ | 197 | /* existing conversion requests take precedence */ |
@@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, | |||
424 | struct dlm_ctxt *dlm = data; | 421 | struct dlm_ctxt *dlm = data; |
425 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; | 422 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; |
426 | struct dlm_lock_resource *res = NULL; | 423 | struct dlm_lock_resource *res = NULL; |
427 | struct list_head *iter; | ||
428 | struct dlm_lock *lock = NULL; | 424 | struct dlm_lock *lock = NULL; |
425 | struct dlm_lock *tmp_lock; | ||
429 | struct dlm_lockstatus *lksb; | 426 | struct dlm_lockstatus *lksb; |
430 | enum dlm_status status = DLM_NORMAL; | 427 | enum dlm_status status = DLM_NORMAL; |
431 | u32 flags; | 428 | u32 flags; |
@@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, | |||
471 | dlm_error(status); | 468 | dlm_error(status); |
472 | goto leave; | 469 | goto leave; |
473 | } | 470 | } |
474 | list_for_each(iter, &res->granted) { | 471 | list_for_each_entry(tmp_lock, &res->granted, list) { |
475 | lock = list_entry(iter, struct dlm_lock, list); | 472 | if (tmp_lock->ml.cookie == cnv->cookie && |
476 | if (lock->ml.cookie == cnv->cookie && | 473 | tmp_lock->ml.node == cnv->node_idx) { |
477 | lock->ml.node == cnv->node_idx) { | 474 | lock = tmp_lock; |
478 | dlm_lock_get(lock); | 475 | dlm_lock_get(lock); |
479 | break; | 476 | break; |
480 | } | 477 | } |
481 | lock = NULL; | ||
482 | } | 478 | } |
483 | spin_unlock(&res->spinlock); | 479 | spin_unlock(&res->spinlock); |
484 | if (!lock) { | 480 | if (!lock) { |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0e28e242226d..e33cd7a3c582 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock) | |||
96 | 96 | ||
97 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 97 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
98 | { | 98 | { |
99 | struct list_head *iter2; | ||
100 | struct dlm_lock *lock; | 99 | struct dlm_lock *lock; |
101 | char buf[DLM_LOCKID_NAME_MAX]; | 100 | char buf[DLM_LOCKID_NAME_MAX]; |
102 | 101 | ||
@@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
118 | res->inflight_locks, atomic_read(&res->asts_reserved)); | 117 | res->inflight_locks, atomic_read(&res->asts_reserved)); |
119 | dlm_print_lockres_refmap(res); | 118 | dlm_print_lockres_refmap(res); |
120 | printk(" granted queue:\n"); | 119 | printk(" granted queue:\n"); |
121 | list_for_each(iter2, &res->granted) { | 120 | list_for_each_entry(lock, &res->granted, list) { |
122 | lock = list_entry(iter2, struct dlm_lock, list); | ||
123 | __dlm_print_lock(lock); | 121 | __dlm_print_lock(lock); |
124 | } | 122 | } |
125 | printk(" converting queue:\n"); | 123 | printk(" converting queue:\n"); |
126 | list_for_each(iter2, &res->converting) { | 124 | list_for_each_entry(lock, &res->converting, list) { |
127 | lock = list_entry(iter2, struct dlm_lock, list); | ||
128 | __dlm_print_lock(lock); | 125 | __dlm_print_lock(lock); |
129 | } | 126 | } |
130 | printk(" blocked queue:\n"); | 127 | printk(" blocked queue:\n"); |
131 | list_for_each(iter2, &res->blocked) { | 128 | list_for_each_entry(lock, &res->blocked, list) { |
132 | lock = list_entry(iter2, struct dlm_lock, list); | ||
133 | __dlm_print_lock(lock); | 129 | __dlm_print_lock(lock); |
134 | } | 130 | } |
135 | } | 131 | } |
@@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
446 | { | 442 | { |
447 | struct dlm_master_list_entry *mle; | 443 | struct dlm_master_list_entry *mle; |
448 | struct hlist_head *bucket; | 444 | struct hlist_head *bucket; |
449 | struct hlist_node *list; | ||
450 | int i, out = 0; | 445 | int i, out = 0; |
451 | unsigned long total = 0, longest = 0, bucket_count = 0; | 446 | unsigned long total = 0, longest = 0, bucket_count = 0; |
452 | 447 | ||
@@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
456 | spin_lock(&dlm->master_lock); | 451 | spin_lock(&dlm->master_lock); |
457 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 452 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
458 | bucket = dlm_master_hash(dlm, i); | 453 | bucket = dlm_master_hash(dlm, i); |
459 | hlist_for_each(list, bucket) { | 454 | hlist_for_each_entry(mle, bucket, master_hash_node) { |
460 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
461 | master_hash_node); | ||
462 | ++total; | 455 | ++total; |
463 | ++bucket_count; | 456 | ++bucket_count; |
464 | if (len - out < 200) | 457 | if (len - out < 200) |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index dbb17c07656a..8b3382abf840 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | |||
193 | unsigned int hash) | 193 | unsigned int hash) |
194 | { | 194 | { |
195 | struct hlist_head *bucket; | 195 | struct hlist_head *bucket; |
196 | struct hlist_node *list; | 196 | struct dlm_lock_resource *res; |
197 | 197 | ||
198 | mlog(0, "%.*s\n", len, name); | 198 | mlog(0, "%.*s\n", len, name); |
199 | 199 | ||
@@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | |||
201 | 201 | ||
202 | bucket = dlm_lockres_hash(dlm, hash); | 202 | bucket = dlm_lockres_hash(dlm, hash); |
203 | 203 | ||
204 | hlist_for_each(list, bucket) { | 204 | hlist_for_each_entry(res, bucket, hash_node) { |
205 | struct dlm_lock_resource *res = hlist_entry(list, | ||
206 | struct dlm_lock_resource, hash_node); | ||
207 | if (res->lockname.name[0] != name[0]) | 205 | if (res->lockname.name[0] != name[0]) |
208 | continue; | 206 | continue; |
209 | if (unlikely(res->lockname.len != len)) | 207 | if (unlikely(res->lockname.len != len)) |
@@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
262 | 260 | ||
263 | static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) | 261 | static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) |
264 | { | 262 | { |
265 | struct dlm_ctxt *tmp = NULL; | 263 | struct dlm_ctxt *tmp; |
266 | struct list_head *iter; | ||
267 | 264 | ||
268 | assert_spin_locked(&dlm_domain_lock); | 265 | assert_spin_locked(&dlm_domain_lock); |
269 | 266 | ||
270 | /* tmp->name here is always NULL terminated, | 267 | /* tmp->name here is always NULL terminated, |
271 | * but domain may not be! */ | 268 | * but domain may not be! */ |
272 | list_for_each(iter, &dlm_domains) { | 269 | list_for_each_entry(tmp, &dlm_domains, list) { |
273 | tmp = list_entry (iter, struct dlm_ctxt, list); | ||
274 | if (strlen(tmp->name) == len && | 270 | if (strlen(tmp->name) == len && |
275 | memcmp(tmp->name, domain, len)==0) | 271 | memcmp(tmp->name, domain, len)==0) |
276 | break; | 272 | return tmp; |
277 | tmp = NULL; | ||
278 | } | 273 | } |
279 | 274 | ||
280 | return tmp; | 275 | return NULL; |
281 | } | 276 | } |
282 | 277 | ||
283 | /* For null terminated domain strings ONLY */ | 278 | /* For null terminated domain strings ONLY */ |
@@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm) | |||
366 | * you shouldn't trust your pointer. */ | 361 | * you shouldn't trust your pointer. */ |
367 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) | 362 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) |
368 | { | 363 | { |
369 | struct list_head *iter; | 364 | struct dlm_ctxt *target; |
370 | struct dlm_ctxt *target = NULL; | 365 | struct dlm_ctxt *ret = NULL; |
371 | 366 | ||
372 | spin_lock(&dlm_domain_lock); | 367 | spin_lock(&dlm_domain_lock); |
373 | 368 | ||
374 | list_for_each(iter, &dlm_domains) { | 369 | list_for_each_entry(target, &dlm_domains, list) { |
375 | target = list_entry (iter, struct dlm_ctxt, list); | ||
376 | |||
377 | if (target == dlm) { | 370 | if (target == dlm) { |
378 | __dlm_get(target); | 371 | __dlm_get(target); |
372 | ret = target; | ||
379 | break; | 373 | break; |
380 | } | 374 | } |
381 | |||
382 | target = NULL; | ||
383 | } | 375 | } |
384 | 376 | ||
385 | spin_unlock(&dlm_domain_lock); | 377 | spin_unlock(&dlm_domain_lock); |
386 | 378 | ||
387 | return target; | 379 | return ret; |
388 | } | 380 | } |
389 | 381 | ||
390 | int dlm_domain_fully_joined(struct dlm_ctxt *dlm) | 382 | int dlm_domain_fully_joined(struct dlm_ctxt *dlm) |
@@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem); | |||
2296 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, | 2288 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, |
2297 | int node_num) | 2289 | int node_num) |
2298 | { | 2290 | { |
2299 | struct list_head *iter; | ||
2300 | struct dlm_eviction_cb *cb; | 2291 | struct dlm_eviction_cb *cb; |
2301 | 2292 | ||
2302 | down_read(&dlm_callback_sem); | 2293 | down_read(&dlm_callback_sem); |
2303 | list_for_each(iter, &dlm->dlm_eviction_callbacks) { | 2294 | list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { |
2304 | cb = list_entry(iter, struct dlm_eviction_cb, ec_item); | ||
2305 | |||
2306 | cb->ec_func(node_num, cb->ec_data); | 2295 | cb->ec_func(node_num, cb->ec_data); |
2307 | } | 2296 | } |
2308 | up_read(&dlm_callback_sem); | 2297 | up_read(&dlm_callback_sem); |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 47e67c2d228f..5d32f7511f74 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void) | |||
91 | static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, | 91 | static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, |
92 | struct dlm_lock *lock) | 92 | struct dlm_lock *lock) |
93 | { | 93 | { |
94 | struct list_head *iter; | ||
95 | struct dlm_lock *tmplock; | 94 | struct dlm_lock *tmplock; |
96 | 95 | ||
97 | list_for_each(iter, &res->granted) { | 96 | list_for_each_entry(tmplock, &res->granted, list) { |
98 | tmplock = list_entry(iter, struct dlm_lock, list); | ||
99 | |||
100 | if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) | 97 | if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) |
101 | return 0; | 98 | return 0; |
102 | } | 99 | } |
103 | 100 | ||
104 | list_for_each(iter, &res->converting) { | 101 | list_for_each_entry(tmplock, &res->converting, list) { |
105 | tmplock = list_entry(iter, struct dlm_lock, list); | ||
106 | |||
107 | if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) | 102 | if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) |
108 | return 0; | 103 | return 0; |
109 | if (!dlm_lock_compatible(tmplock->ml.convert_type, | 104 | if (!dlm_lock_compatible(tmplock->ml.convert_type, |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 33ecbe0e6734..cf0f103963b1 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
342 | { | 342 | { |
343 | struct dlm_master_list_entry *tmpmle; | 343 | struct dlm_master_list_entry *tmpmle; |
344 | struct hlist_head *bucket; | 344 | struct hlist_head *bucket; |
345 | struct hlist_node *list; | ||
346 | unsigned int hash; | 345 | unsigned int hash; |
347 | 346 | ||
348 | assert_spin_locked(&dlm->master_lock); | 347 | assert_spin_locked(&dlm->master_lock); |
349 | 348 | ||
350 | hash = dlm_lockid_hash(name, namelen); | 349 | hash = dlm_lockid_hash(name, namelen); |
351 | bucket = dlm_master_hash(dlm, hash); | 350 | bucket = dlm_master_hash(dlm, hash); |
352 | hlist_for_each(list, bucket) { | 351 | hlist_for_each_entry(tmpmle, bucket, master_hash_node) { |
353 | tmpmle = hlist_entry(list, struct dlm_master_list_entry, | ||
354 | master_hash_node); | ||
355 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) | 352 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) |
356 | continue; | 353 | continue; |
357 | dlm_get_mle(tmpmle); | 354 | dlm_get_mle(tmpmle); |
@@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) | |||
3183 | struct dlm_master_list_entry *mle; | 3180 | struct dlm_master_list_entry *mle; |
3184 | struct dlm_lock_resource *res; | 3181 | struct dlm_lock_resource *res; |
3185 | struct hlist_head *bucket; | 3182 | struct hlist_head *bucket; |
3186 | struct hlist_node *list; | 3183 | struct hlist_node *tmp; |
3187 | unsigned int i; | 3184 | unsigned int i; |
3188 | 3185 | ||
3189 | mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); | 3186 | mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); |
@@ -3194,10 +3191,7 @@ top: | |||
3194 | spin_lock(&dlm->master_lock); | 3191 | spin_lock(&dlm->master_lock); |
3195 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 3192 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
3196 | bucket = dlm_master_hash(dlm, i); | 3193 | bucket = dlm_master_hash(dlm, i); |
3197 | hlist_for_each(list, bucket) { | 3194 | hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { |
3198 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3199 | master_hash_node); | ||
3200 | |||
3201 | BUG_ON(mle->type != DLM_MLE_BLOCK && | 3195 | BUG_ON(mle->type != DLM_MLE_BLOCK && |
3202 | mle->type != DLM_MLE_MASTER && | 3196 | mle->type != DLM_MLE_MASTER && |
3203 | mle->type != DLM_MLE_MIGRATION); | 3197 | mle->type != DLM_MLE_MIGRATION); |
@@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) | |||
3378 | int i; | 3372 | int i; |
3379 | struct hlist_head *bucket; | 3373 | struct hlist_head *bucket; |
3380 | struct dlm_master_list_entry *mle; | 3374 | struct dlm_master_list_entry *mle; |
3381 | struct hlist_node *tmp, *list; | 3375 | struct hlist_node *tmp; |
3382 | 3376 | ||
3383 | /* | 3377 | /* |
3384 | * We notified all other nodes that we are exiting the domain and | 3378 | * We notified all other nodes that we are exiting the domain and |
@@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) | |||
3394 | 3388 | ||
3395 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 3389 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
3396 | bucket = dlm_master_hash(dlm, i); | 3390 | bucket = dlm_master_hash(dlm, i); |
3397 | hlist_for_each_safe(list, tmp, bucket) { | 3391 | hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { |
3398 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3399 | master_hash_node); | ||
3400 | if (mle->type != DLM_MLE_BLOCK) { | 3392 | if (mle->type != DLM_MLE_BLOCK) { |
3401 | mlog(ML_ERROR, "bad mle: %p\n", mle); | 3393 | mlog(ML_ERROR, "bad mle: %p\n", mle); |
3402 | dlm_print_one_mle(mle); | 3394 | dlm_print_one_mle(mle); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 773bd32bfd8c..0b5adca1b178 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
787 | { | 787 | { |
788 | struct dlm_lock_request lr; | 788 | struct dlm_lock_request lr; |
789 | int ret; | 789 | int ret; |
790 | int status; | ||
790 | 791 | ||
791 | mlog(0, "\n"); | 792 | mlog(0, "\n"); |
792 | 793 | ||
@@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
800 | 801 | ||
801 | // send message | 802 | // send message |
802 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, | 803 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, |
803 | &lr, sizeof(lr), request_from, NULL); | 804 | &lr, sizeof(lr), request_from, &status); |
804 | 805 | ||
805 | /* negative status is handled by caller */ | 806 | /* negative status is handled by caller */ |
806 | if (ret < 0) | 807 | if (ret < 0) |
807 | mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " | 808 | mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " |
808 | "to recover dead node %u\n", dlm->name, ret, | 809 | "to recover dead node %u\n", dlm->name, ret, |
809 | request_from, dead_node); | 810 | request_from, dead_node); |
811 | else | ||
812 | ret = status; | ||
810 | // return from here, then | 813 | // return from here, then |
811 | // sleep until all received or error | 814 | // sleep until all received or error |
812 | return ret; | 815 | return ret; |
@@ -2328,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2328 | } else if (res->owner == dlm->node_num) { | 2331 | } else if (res->owner == dlm->node_num) { |
2329 | dlm_free_dead_locks(dlm, res, dead_node); | 2332 | dlm_free_dead_locks(dlm, res, dead_node); |
2330 | __dlm_lockres_calc_usage(dlm, res); | 2333 | __dlm_lockres_calc_usage(dlm, res); |
2334 | } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { | ||
2335 | if (test_bit(dead_node, res->refmap)) { | ||
2336 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | ||
2337 | "no locks and had not purged before dying\n", | ||
2338 | dlm->name, res->lockname.len, | ||
2339 | res->lockname.name, dead_node); | ||
2340 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); | ||
2341 | } | ||
2331 | } | 2342 | } |
2332 | spin_unlock(&res->spinlock); | 2343 | spin_unlock(&res->spinlock); |
2333 | } | 2344 | } |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index e73c833fc2a1..9db869de829d 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, | |||
286 | struct dlm_lock_resource *res) | 286 | struct dlm_lock_resource *res) |
287 | { | 287 | { |
288 | struct dlm_lock *lock, *target; | 288 | struct dlm_lock *lock, *target; |
289 | struct list_head *iter; | ||
290 | struct list_head *head; | ||
291 | int can_grant = 1; | 289 | int can_grant = 1; |
292 | 290 | ||
293 | /* | 291 | /* |
@@ -314,9 +312,7 @@ converting: | |||
314 | dlm->name, res->lockname.len, res->lockname.name); | 312 | dlm->name, res->lockname.len, res->lockname.name); |
315 | BUG(); | 313 | BUG(); |
316 | } | 314 | } |
317 | head = &res->granted; | 315 | list_for_each_entry(lock, &res->granted, list) { |
318 | list_for_each(iter, head) { | ||
319 | lock = list_entry(iter, struct dlm_lock, list); | ||
320 | if (lock==target) | 316 | if (lock==target) |
321 | continue; | 317 | continue; |
322 | if (!dlm_lock_compatible(lock->ml.type, | 318 | if (!dlm_lock_compatible(lock->ml.type, |
@@ -333,9 +329,8 @@ converting: | |||
333 | target->ml.convert_type; | 329 | target->ml.convert_type; |
334 | } | 330 | } |
335 | } | 331 | } |
336 | head = &res->converting; | 332 | |
337 | list_for_each(iter, head) { | 333 | list_for_each_entry(lock, &res->converting, list) { |
338 | lock = list_entry(iter, struct dlm_lock, list); | ||
339 | if (lock==target) | 334 | if (lock==target) |
340 | continue; | 335 | continue; |
341 | if (!dlm_lock_compatible(lock->ml.type, | 336 | if (!dlm_lock_compatible(lock->ml.type, |
@@ -384,9 +379,7 @@ blocked: | |||
384 | goto leave; | 379 | goto leave; |
385 | target = list_entry(res->blocked.next, struct dlm_lock, list); | 380 | target = list_entry(res->blocked.next, struct dlm_lock, list); |
386 | 381 | ||
387 | head = &res->granted; | 382 | list_for_each_entry(lock, &res->granted, list) { |
388 | list_for_each(iter, head) { | ||
389 | lock = list_entry(iter, struct dlm_lock, list); | ||
390 | if (lock==target) | 383 | if (lock==target) |
391 | continue; | 384 | continue; |
392 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { | 385 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { |
@@ -400,9 +393,7 @@ blocked: | |||
400 | } | 393 | } |
401 | } | 394 | } |
402 | 395 | ||
403 | head = &res->converting; | 396 | list_for_each_entry(lock, &res->converting, list) { |
404 | list_for_each(iter, head) { | ||
405 | lock = list_entry(iter, struct dlm_lock, list); | ||
406 | if (lock==target) | 397 | if (lock==target) |
407 | continue; | 398 | continue; |
408 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { | 399 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 850aa7e87537..5698b52cf5c9 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, | |||
388 | struct dlm_ctxt *dlm = data; | 388 | struct dlm_ctxt *dlm = data; |
389 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; | 389 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; |
390 | struct dlm_lock_resource *res = NULL; | 390 | struct dlm_lock_resource *res = NULL; |
391 | struct list_head *iter; | ||
392 | struct dlm_lock *lock = NULL; | 391 | struct dlm_lock *lock = NULL; |
393 | enum dlm_status status = DLM_NORMAL; | 392 | enum dlm_status status = DLM_NORMAL; |
394 | int found = 0, i; | 393 | int found = 0, i; |
@@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, | |||
458 | } | 457 | } |
459 | 458 | ||
460 | for (i=0; i<3; i++) { | 459 | for (i=0; i<3; i++) { |
461 | list_for_each(iter, queue) { | 460 | list_for_each_entry(lock, queue, list) { |
462 | lock = list_entry(iter, struct dlm_lock, list); | ||
463 | if (lock->ml.cookie == unlock->cookie && | 461 | if (lock->ml.cookie == unlock->cookie && |
464 | lock->ml.node == unlock->node_idx) { | 462 | lock->ml.node == unlock->node_idx) { |
465 | dlm_lock_get(lock); | 463 | dlm_lock_get(lock); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 12bafb7265ce..efa2b3d339e3 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) | |||
401 | { | 401 | { |
402 | struct inode *inode = new_inode(sb); | 402 | struct inode *inode = new_inode(sb); |
403 | umode_t mode = S_IFDIR | 0755; | 403 | umode_t mode = S_IFDIR | 0755; |
404 | struct dlmfs_inode_private *ip; | ||
405 | 404 | ||
406 | if (inode) { | 405 | if (inode) { |
407 | ip = DLMFS_I(inode); | ||
408 | |||
409 | inode->i_ino = get_next_ino(); | 406 | inode->i_ino = get_next_ino(); |
410 | inode_init_owner(inode, NULL, mode); | 407 | inode_init_owner(inode, NULL, mode); |
411 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | 408 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116d0d33..767370b656ca 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
781 | cpos = map_start >> osb->s_clustersize_bits; | 781 | cpos = map_start >> osb->s_clustersize_bits; |
782 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | 782 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, |
783 | map_start + map_len); | 783 | map_start + map_len); |
784 | mapping_end -= cpos; | ||
785 | is_last = 0; | 784 | is_last = 0; |
786 | while (cpos < mapping_end && !is_last) { | 785 | while (cpos < mapping_end && !is_last) { |
787 | u32 fe_flags; | 786 | u32 fe_flags; |
@@ -852,20 +851,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) | |||
852 | 851 | ||
853 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 852 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
854 | 853 | ||
855 | if (*offset >= inode->i_size) { | 854 | if (*offset >= i_size_read(inode)) { |
856 | ret = -ENXIO; | 855 | ret = -ENXIO; |
857 | goto out_unlock; | 856 | goto out_unlock; |
858 | } | 857 | } |
859 | 858 | ||
860 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 859 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
861 | if (whence == SEEK_HOLE) | 860 | if (whence == SEEK_HOLE) |
862 | *offset = inode->i_size; | 861 | *offset = i_size_read(inode); |
863 | goto out_unlock; | 862 | goto out_unlock; |
864 | } | 863 | } |
865 | 864 | ||
866 | clen = 0; | 865 | clen = 0; |
867 | cpos = *offset >> cs_bits; | 866 | cpos = *offset >> cs_bits; |
868 | cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); | 867 | cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); |
869 | 868 | ||
870 | while (cpos < cend && !is_last) { | 869 | while (cpos < cend && !is_last) { |
871 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, | 870 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, |
@@ -904,8 +903,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) | |||
904 | extlen = clen; | 903 | extlen = clen; |
905 | extlen <<= cs_bits; | 904 | extlen <<= cs_bits; |
906 | 905 | ||
907 | if ((extoff + extlen) > inode->i_size) | 906 | if ((extoff + extlen) > i_size_read(inode)) |
908 | extlen = inode->i_size - extoff; | 907 | extlen = i_size_read(inode) - extoff; |
909 | extoff += extlen; | 908 | extoff += extlen; |
910 | if (extoff > *offset) | 909 | if (extoff > *offset) |
911 | *offset = extoff; | 910 | *offset = extoff; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3261d71319ee..4f8197caa487 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -671,11 +671,7 @@ restarted_transaction: | |||
671 | } else { | 671 | } else { |
672 | BUG_ON(why != RESTART_TRANS); | 672 | BUG_ON(why != RESTART_TRANS); |
673 | 673 | ||
674 | /* TODO: This can be more intelligent. */ | 674 | status = ocfs2_allocate_extend_trans(handle, 1); |
675 | credits = ocfs2_calc_extend_credits(osb->sb, | ||
676 | &fe->id2.i_list, | ||
677 | clusters_to_add); | ||
678 | status = ocfs2_extend_trans(handle, credits); | ||
679 | if (status < 0) { | 675 | if (status < 0) { |
680 | /* handle still has to be committed at | 676 | /* handle still has to be committed at |
681 | * this point. */ | 677 | * this point. */ |
@@ -1800,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, | |||
1800 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | 1796 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); |
1801 | 1797 | ||
1802 | out: | 1798 | out: |
1799 | ocfs2_free_path(path); | ||
1803 | ocfs2_schedule_truncate_log_flush(osb, 1); | 1800 | ocfs2_schedule_truncate_log_flush(osb, 1); |
1804 | ocfs2_run_deallocs(osb, &dealloc); | 1801 | ocfs2_run_deallocs(osb, &dealloc); |
1805 | 1802 | ||
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 0c60ef2d8056..fa32ce9b455d 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
303 | if (o2info_from_user(oij, req)) | 303 | if (o2info_from_user(oij, req)) |
304 | goto bail; | 304 | goto bail; |
305 | 305 | ||
306 | oij.ij_journal_size = osb->journal->j_inode->i_size; | 306 | oij.ij_journal_size = i_size_read(osb->journal->j_inode); |
307 | 307 | ||
308 | o2info_set_request_filled(&oij.ij_req); | 308 | o2info_set_request_filled(&oij.ij_req); |
309 | 309 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 242170d83971..44fc3e530c3d 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -455,6 +455,41 @@ bail: | |||
455 | return status; | 455 | return status; |
456 | } | 456 | } |
457 | 457 | ||
458 | /* | ||
459 | * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. | ||
460 | * If that fails, restart the transaction & regain write access for the | ||
461 | * buffer head which is used for metadata modifications. | ||
462 | * Taken from Ext4: extend_or_restart_transaction() | ||
463 | */ | ||
464 | int ocfs2_allocate_extend_trans(handle_t *handle, int thresh) | ||
465 | { | ||
466 | int status, old_nblks; | ||
467 | |||
468 | BUG_ON(!handle); | ||
469 | |||
470 | old_nblks = handle->h_buffer_credits; | ||
471 | trace_ocfs2_allocate_extend_trans(old_nblks, thresh); | ||
472 | |||
473 | if (old_nblks < thresh) | ||
474 | return 0; | ||
475 | |||
476 | status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); | ||
477 | if (status < 0) { | ||
478 | mlog_errno(status); | ||
479 | goto bail; | ||
480 | } | ||
481 | |||
482 | if (status > 0) { | ||
483 | status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); | ||
484 | if (status < 0) | ||
485 | mlog_errno(status); | ||
486 | } | ||
487 | |||
488 | bail: | ||
489 | return status; | ||
490 | } | ||
491 | |||
492 | |||
458 | struct ocfs2_triggers { | 493 | struct ocfs2_triggers { |
459 | struct jbd2_buffer_trigger_type ot_triggers; | 494 | struct jbd2_buffer_trigger_type ot_triggers; |
460 | int ot_offset; | 495 | int ot_offset; |
@@ -801,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
801 | inode_lock = 1; | 836 | inode_lock = 1; |
802 | di = (struct ocfs2_dinode *)bh->b_data; | 837 | di = (struct ocfs2_dinode *)bh->b_data; |
803 | 838 | ||
804 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 839 | if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { |
805 | mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", | 840 | mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", |
806 | inode->i_size); | 841 | i_size_read(inode)); |
807 | status = -EINVAL; | 842 | status = -EINVAL; |
808 | goto done; | 843 | goto done; |
809 | } | 844 | } |
810 | 845 | ||
811 | trace_ocfs2_journal_init(inode->i_size, | 846 | trace_ocfs2_journal_init(i_size_read(inode), |
812 | (unsigned long long)inode->i_blocks, | 847 | (unsigned long long)inode->i_blocks, |
813 | OCFS2_I(inode)->ip_clusters); | 848 | OCFS2_I(inode)->ip_clusters); |
814 | 849 | ||
@@ -1096,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
1096 | 1131 | ||
1097 | memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); | 1132 | memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); |
1098 | 1133 | ||
1099 | num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); | 1134 | num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
1100 | v_blkno = 0; | 1135 | v_blkno = 0; |
1101 | while (v_blkno < num_blocks) { | 1136 | while (v_blkno < num_blocks) { |
1102 | status = ocfs2_extent_map_get_blocks(inode, v_blkno, | 1137 | status = ocfs2_extent_map_get_blocks(inode, v_blkno, |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0a992737dcaf..0b479bab3671 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -258,6 +258,17 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, | |||
258 | int ocfs2_commit_trans(struct ocfs2_super *osb, | 258 | int ocfs2_commit_trans(struct ocfs2_super *osb, |
259 | handle_t *handle); | 259 | handle_t *handle); |
260 | int ocfs2_extend_trans(handle_t *handle, int nblocks); | 260 | int ocfs2_extend_trans(handle_t *handle, int nblocks); |
261 | int ocfs2_allocate_extend_trans(handle_t *handle, | ||
262 | int thresh); | ||
263 | |||
264 | /* | ||
265 | * Define an arbitrary limit for the amount of data we will anticipate | ||
266 | * writing to any given transaction. For unbounded transactions such as | ||
267 | * fallocate(2) we can write more than this, but we always | ||
268 | * start off at the maximum transaction size and grow the transaction | ||
269 | * optimistically as we go. | ||
270 | */ | ||
271 | #define OCFS2_MAX_TRANS_DATA 64U | ||
261 | 272 | ||
262 | /* | 273 | /* |
263 | * Create access is for when we get a newly created buffer and we're | 274 | * Create access is for when we get a newly created buffer and we're |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index aebeacd807c3..cd5496b7a0a3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | retry_enospc: | 1084 | retry_enospc: |
1085 | (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; | 1085 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; |
1086 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 1086 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
1087 | if (status == -ENOSPC) { | 1087 | if (status == -ENOSPC) { |
1088 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | 1088 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == |
@@ -1154,7 +1154,7 @@ retry_enospc: | |||
1154 | OCFS2_LA_DISABLED) | 1154 | OCFS2_LA_DISABLED) |
1155 | goto bail; | 1155 | goto bail; |
1156 | 1156 | ||
1157 | ac->ac_bits_wanted = osb->local_alloc_default_bits; | 1157 | ac->ac_bits_wanted = osb->local_alloc_bits; |
1158 | status = ocfs2_claim_clusters(handle, ac, | 1158 | status = ocfs2_claim_clusters(handle, ac, |
1159 | osb->local_alloc_bits, | 1159 | osb->local_alloc_bits, |
1160 | &cluster_off, | 1160 | &cluster_off, |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 452068b45749..3d3f3c83065c 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle, | |||
152 | } | 152 | } |
153 | 153 | ||
154 | out: | 154 | out: |
155 | ocfs2_free_path(path); | ||
155 | return ret; | 156 | return ret; |
156 | } | 157 | } |
157 | 158 | ||
@@ -845,7 +846,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | |||
845 | struct ocfs2_move_extents *range = context->range; | 846 | struct ocfs2_move_extents *range = context->range; |
846 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 847 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
847 | 848 | ||
848 | if ((inode->i_size == 0) || (range->me_len == 0)) | 849 | if ((i_size_read(inode) == 0) || (range->me_len == 0)) |
849 | return 0; | 850 | return 0; |
850 | 851 | ||
851 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 852 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 3b481f490633..1b60c62aa9d6 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
@@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); | |||
2579 | 2579 | ||
2580 | DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); | 2580 | DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); |
2581 | 2581 | ||
2582 | DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); | ||
2583 | |||
2582 | DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); | 2584 | DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); |
2583 | 2585 | ||
2584 | DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); | 2586 | DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281f217e..aaa50611ec66 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
234 | len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; | 234 | len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; |
235 | } | 235 | } |
236 | 236 | ||
237 | if (gqinode->i_size < off + len) { | 237 | if (i_size_read(gqinode) < off + len) { |
238 | loff_t rounded_end = | 238 | loff_t rounded_end = |
239 | ocfs2_align_bytes_to_blocks(sb, off + len); | 239 | ocfs2_align_bytes_to_blocks(sb, off + len); |
240 | 240 | ||
@@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) | |||
778 | */ | 778 | */ |
779 | WARN_ON(journal_current_handle()); | 779 | WARN_ON(journal_current_handle()); |
780 | status = ocfs2_extend_no_holes(gqinode, NULL, | 780 | status = ocfs2_extend_no_holes(gqinode, NULL, |
781 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), | 781 | i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits), |
782 | gqinode->i_size); | 782 | i_size_read(gqinode)); |
783 | if (status < 0) | 783 | if (status < 0) |
784 | goto out_dq; | 784 | goto out_dq; |
785 | } | 785 | } |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee4874c..2e4344be3b96 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
982 | 982 | ||
983 | /* We are protected by dqio_sem so no locking needed */ | 983 | /* We are protected by dqio_sem so no locking needed */ |
984 | status = ocfs2_extend_no_holes(lqinode, NULL, | 984 | status = ocfs2_extend_no_holes(lqinode, NULL, |
985 | lqinode->i_size + 2 * sb->s_blocksize, | 985 | i_size_read(lqinode) + 2 * sb->s_blocksize, |
986 | lqinode->i_size); | 986 | i_size_read(lqinode)); |
987 | if (status < 0) { | 987 | if (status < 0) { |
988 | mlog_errno(status); | 988 | mlog_errno(status); |
989 | goto out; | 989 | goto out; |
990 | } | 990 | } |
991 | status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, | 991 | status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, |
992 | lqinode->i_size + 2 * sb->s_blocksize); | 992 | i_size_read(lqinode) + 2 * sb->s_blocksize); |
993 | if (status < 0) { | 993 | if (status < 0) { |
994 | mlog_errno(status); | 994 | mlog_errno(status); |
995 | goto out; | 995 | goto out; |
@@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1125 | 1125 | ||
1126 | /* We are protected by dqio_sem so no locking needed */ | 1126 | /* We are protected by dqio_sem so no locking needed */ |
1127 | status = ocfs2_extend_no_holes(lqinode, NULL, | 1127 | status = ocfs2_extend_no_holes(lqinode, NULL, |
1128 | lqinode->i_size + sb->s_blocksize, | 1128 | i_size_read(lqinode) + sb->s_blocksize, |
1129 | lqinode->i_size); | 1129 | i_size_read(lqinode)); |
1130 | if (status < 0) { | 1130 | if (status < 0) { |
1131 | mlog_errno(status); | 1131 | mlog_errno(status); |
1132 | goto out; | 1132 | goto out; |
1133 | } | 1133 | } |
1134 | status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, | 1134 | status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, |
1135 | lqinode->i_size + sb->s_blocksize); | 1135 | i_size_read(lqinode) + sb->s_blocksize); |
1136 | if (status < 0) { | 1136 | if (status < 0) { |
1137 | mlog_errno(status); | 1137 | mlog_errno(status); |
1138 | goto out; | 1138 | goto out; |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a70d604593b6..bf4dfc14bb2c 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, | |||
3854 | while (cpos < clusters) { | 3854 | while (cpos < clusters) { |
3855 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, | 3855 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, |
3856 | &num_clusters, &ext_flags); | 3856 | &num_clusters, &ext_flags); |
3857 | 3857 | if (ret) { | |
3858 | mlog_errno(ret); | ||
3859 | goto unlock; | ||
3860 | } | ||
3858 | if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { | 3861 | if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { |
3859 | ret = ocfs2_add_refcount_flag(inode, &di_et, | 3862 | ret = ocfs2_add_refcount_flag(inode, &di_et, |
3860 | &ref_tree->rf_ci, | 3863 | &ref_tree->rf_ci, |
@@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode, | |||
4025 | while (cpos < clusters) { | 4028 | while (cpos < clusters) { |
4026 | ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, | 4029 | ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, |
4027 | &num_clusters, &ext_flags); | 4030 | &num_clusters, &ext_flags); |
4028 | 4031 | if (ret) { | |
4032 | mlog_errno(ret); | ||
4033 | goto out; | ||
4034 | } | ||
4029 | if (p_cluster) { | 4035 | if (p_cluster) { |
4030 | ret = ocfs2_add_refcounted_extent(t_inode, &et, | 4036 | ret = ocfs2_add_refcounted_extent(t_inode, &et, |
4031 | ref_ci, ref_root_bh, | 4037 | ref_ci, ref_root_bh, |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 317ef0abccbb..6ce0686eab72 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
3505 | int ret, credits, ref_meta = 0, ref_credits = 0; | 3505 | int ret, credits, ref_meta = 0, ref_credits = 0; |
3506 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 3506 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
3507 | struct inode *tl_inode = osb->osb_tl_inode; | 3507 | struct inode *tl_inode = osb->osb_tl_inode; |
3508 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; | 3508 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; |
3509 | struct ocfs2_refcount_tree *ref_tree = NULL; | 3509 | struct ocfs2_refcount_tree *ref_tree = NULL; |
3510 | 3510 | ||
3511 | struct ocfs2_xattr_info xi = { | 3511 | struct ocfs2_xattr_info xi = { |
@@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode, | |||
3609 | if (IS_ERR(ctxt.handle)) { | 3609 | if (IS_ERR(ctxt.handle)) { |
3610 | ret = PTR_ERR(ctxt.handle); | 3610 | ret = PTR_ERR(ctxt.handle); |
3611 | mlog_errno(ret); | 3611 | mlog_errno(ret); |
3612 | goto cleanup; | 3612 | goto out_free_ac; |
3613 | } | 3613 | } |
3614 | 3614 | ||
3615 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); | 3615 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); |
3616 | 3616 | ||
3617 | ocfs2_commit_trans(osb, ctxt.handle); | 3617 | ocfs2_commit_trans(osb, ctxt.handle); |
3618 | 3618 | ||
3619 | out_free_ac: | ||
3619 | if (ctxt.data_ac) | 3620 | if (ctxt.data_ac) |
3620 | ocfs2_free_alloc_context(ctxt.data_ac); | 3621 | ocfs2_free_alloc_context(ctxt.data_ac); |
3621 | if (ctxt.meta_ac) | 3622 | if (ctxt.meta_ac) |
@@ -5881,6 +5882,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode, | |||
5881 | while (cpos < clusters) { | 5882 | while (cpos < clusters) { |
5882 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | 5883 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, |
5883 | &num_clusters, el, &ext_flags); | 5884 | &num_clusters, el, &ext_flags); |
5885 | if (ret) { | ||
5886 | mlog_errno(ret); | ||
5887 | break; | ||
5888 | } | ||
5884 | 5889 | ||
5885 | cpos += num_clusters; | 5890 | cpos += num_clusters; |
5886 | if ((ext_flags & OCFS2_EXT_REFCOUNTED)) | 5891 | if ((ext_flags & OCFS2_EXT_REFCOUNTED)) |
@@ -6797,7 +6802,7 @@ out: | |||
6797 | if (ret) { | 6802 | if (ret) { |
6798 | if (*meta_ac) { | 6803 | if (*meta_ac) { |
6799 | ocfs2_free_alloc_context(*meta_ac); | 6804 | ocfs2_free_alloc_context(*meta_ac); |
6800 | meta_ac = NULL; | 6805 | *meta_ac = NULL; |
6801 | } | 6806 | } |
6802 | } | 6807 | } |
6803 | 6808 | ||
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 0ff80f9b930f..985ea881b5bc 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
@@ -286,7 +286,7 @@ int proc_fd_permission(struct inode *inode, int mask) | |||
286 | int rv = generic_permission(inode, mask); | 286 | int rv = generic_permission(inode, mask); |
287 | if (rv == 0) | 287 | if (rv == 0) |
288 | return 0; | 288 | return 0; |
289 | if (task_pid(current) == proc_pid(inode)) | 289 | if (task_tgid(current) == proc_pid(inode)) |
290 | rv = 0; | 290 | rv = 0; |
291 | return rv; | 291 | return rv; |
292 | } | 292 | } |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 107d026f5d6e..7366e9d63cee 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, | |||
740 | ptent = pte_file_clear_soft_dirty(ptent); | 740 | ptent = pte_file_clear_soft_dirty(ptent); |
741 | } | 741 | } |
742 | 742 | ||
743 | if (vma->vm_flags & VM_SOFTDIRTY) | ||
744 | vma->vm_flags &= ~VM_SOFTDIRTY; | ||
745 | |||
743 | set_pte_at(vma->vm_mm, addr, pte, ptent); | 746 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
744 | #endif | 747 | #endif |
745 | } | 748 | } |
@@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, | |||
949 | if (is_migration_entry(entry)) | 952 | if (is_migration_entry(entry)) |
950 | page = migration_entry_to_page(entry); | 953 | page = migration_entry_to_page(entry); |
951 | } else { | 954 | } else { |
952 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); | 955 | if (vma->vm_flags & VM_SOFTDIRTY) |
956 | flags2 |= __PM_SOFT_DIRTY; | ||
957 | *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); | ||
953 | return; | 958 | return; |
954 | } | 959 | } |
955 | 960 | ||
956 | if (page && !PageAnon(page)) | 961 | if (page && !PageAnon(page)) |
957 | flags |= PM_FILE; | 962 | flags |= PM_FILE; |
958 | if (pte_soft_dirty(pte)) | 963 | if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) |
959 | flags2 |= __PM_SOFT_DIRTY; | 964 | flags2 |= __PM_SOFT_DIRTY; |
960 | 965 | ||
961 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); | 966 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
@@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p | |||
974 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 979 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
975 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); | 980 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
976 | else | 981 | else |
977 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); | 982 | *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2)); |
978 | } | 983 | } |
979 | #else | 984 | #else |
980 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, | 985 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
@@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
997 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 1002 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
998 | int pmd_flags2; | 1003 | int pmd_flags2; |
999 | 1004 | ||
1000 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | 1005 | if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) |
1006 | pmd_flags2 = __PM_SOFT_DIRTY; | ||
1007 | else | ||
1008 | pmd_flags2 = 0; | ||
1009 | |||
1001 | for (; addr != end; addr += PAGE_SIZE) { | 1010 | for (; addr != end; addr += PAGE_SIZE) { |
1002 | unsigned long offset; | 1011 | unsigned long offset; |
1003 | 1012 | ||
@@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1015 | if (pmd_trans_unstable(pmd)) | 1024 | if (pmd_trans_unstable(pmd)) |
1016 | return 0; | 1025 | return 0; |
1017 | for (; addr != end; addr += PAGE_SIZE) { | 1026 | for (; addr != end; addr += PAGE_SIZE) { |
1027 | int flags2; | ||
1018 | 1028 | ||
1019 | /* check to see if we've left 'vma' behind | 1029 | /* check to see if we've left 'vma' behind |
1020 | * and need a new, higher one */ | 1030 | * and need a new, higher one */ |
1021 | if (vma && (addr >= vma->vm_end)) { | 1031 | if (vma && (addr >= vma->vm_end)) { |
1022 | vma = find_vma(walk->mm, addr); | 1032 | vma = find_vma(walk->mm, addr); |
1023 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); | 1033 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) |
1034 | flags2 = __PM_SOFT_DIRTY; | ||
1035 | else | ||
1036 | flags2 = 0; | ||
1037 | pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); | ||
1024 | } | 1038 | } |
1025 | 1039 | ||
1026 | /* check that 'vma' actually covers this address, | 1040 | /* check that 'vma' actually covers this address, |
@@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1044 | 1058 | ||
1045 | #ifdef CONFIG_HUGETLB_PAGE | 1059 | #ifdef CONFIG_HUGETLB_PAGE |
1046 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, | 1060 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
1047 | pte_t pte, int offset) | 1061 | pte_t pte, int offset, int flags2) |
1048 | { | 1062 | { |
1049 | if (pte_present(pte)) | 1063 | if (pte_present(pte)) |
1050 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1064 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | |
1051 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); | 1065 | PM_STATUS2(pm->v2, flags2) | |
1066 | PM_PRESENT); | ||
1052 | else | 1067 | else |
1053 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); | 1068 | *pme = make_pme(PM_NOT_PRESENT(pm->v2) | |
1069 | PM_STATUS2(pm->v2, flags2)); | ||
1054 | } | 1070 | } |
1055 | 1071 | ||
1056 | /* This function walks within one hugetlb entry in the single call */ | 1072 | /* This function walks within one hugetlb entry in the single call */ |
@@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
1059 | struct mm_walk *walk) | 1075 | struct mm_walk *walk) |
1060 | { | 1076 | { |
1061 | struct pagemapread *pm = walk->private; | 1077 | struct pagemapread *pm = walk->private; |
1078 | struct vm_area_struct *vma; | ||
1062 | int err = 0; | 1079 | int err = 0; |
1080 | int flags2; | ||
1063 | pagemap_entry_t pme; | 1081 | pagemap_entry_t pme; |
1064 | 1082 | ||
1083 | vma = find_vma(walk->mm, addr); | ||
1084 | WARN_ON_ONCE(!vma); | ||
1085 | |||
1086 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | ||
1087 | flags2 = __PM_SOFT_DIRTY; | ||
1088 | else | ||
1089 | flags2 = 0; | ||
1090 | |||
1065 | for (; addr != end; addr += PAGE_SIZE) { | 1091 | for (; addr != end; addr += PAGE_SIZE) { |
1066 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1092 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
1067 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); | 1093 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2); |
1068 | err = add_to_pagemap(addr, &pme, pm); | 1094 | err = add_to_pagemap(addr, &pme, pm); |
1069 | if (err) | 1095 | if (err) |
1070 | return err; | 1096 | return err; |
@@ -1376,8 +1402,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1376 | walk.mm = mm; | 1402 | walk.mm = mm; |
1377 | 1403 | ||
1378 | pol = get_vma_policy(task, vma, vma->vm_start); | 1404 | pol = get_vma_policy(task, vma, vma->vm_start); |
1379 | mpol_to_str(buffer, sizeof(buffer), pol); | 1405 | n = mpol_to_str(buffer, sizeof(buffer), pol); |
1380 | mpol_cond_put(pol); | 1406 | mpol_cond_put(pol); |
1407 | if (n < 0) | ||
1408 | return n; | ||
1381 | 1409 | ||
1382 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1410 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1383 | 1411 | ||
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a1a16eb97c7b..9100d6959886 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/crash_dump.h> | 21 | #include <linux/crash_dump.h> |
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/pagemap.h> | ||
24 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
25 | #include <asm/io.h> | 26 | #include <asm/io.h> |
26 | #include "internal.h" | 27 | #include "internal.h" |
@@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
123 | return read; | 124 | return read; |
124 | } | 125 | } |
125 | 126 | ||
127 | /* | ||
128 | * Architectures may override this function to allocate ELF header in 2nd kernel | ||
129 | */ | ||
130 | int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) | ||
131 | { | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Architectures may override this function to free header | ||
137 | */ | ||
138 | void __weak elfcorehdr_free(unsigned long long addr) | ||
139 | {} | ||
140 | |||
141 | /* | ||
142 | * Architectures may override this function to read from ELF header | ||
143 | */ | ||
144 | ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) | ||
145 | { | ||
146 | return read_from_oldmem(buf, count, ppos, 0); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Architectures may override this function to read from notes sections | ||
151 | */ | ||
152 | ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) | ||
153 | { | ||
154 | return read_from_oldmem(buf, count, ppos, 0); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Architectures may override this function to map oldmem | ||
159 | */ | ||
160 | int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, | ||
161 | unsigned long from, unsigned long pfn, | ||
162 | unsigned long size, pgprot_t prot) | ||
163 | { | ||
164 | return remap_pfn_range(vma, from, pfn, size, prot); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Copy to either kernel or user space | ||
169 | */ | ||
170 | static int copy_to(void *target, void *src, size_t size, int userbuf) | ||
171 | { | ||
172 | if (userbuf) { | ||
173 | if (copy_to_user((char __user *) target, src, size)) | ||
174 | return -EFAULT; | ||
175 | } else { | ||
176 | memcpy(target, src, size); | ||
177 | } | ||
178 | return 0; | ||
179 | } | ||
180 | |||
126 | /* Read from the ELF header and then the crash dump. On error, negative value is | 181 | /* Read from the ELF header and then the crash dump. On error, negative value is |
127 | * returned otherwise number of bytes read are returned. | 182 | * returned otherwise number of bytes read are returned. |
128 | */ | 183 | */ |
129 | static ssize_t read_vmcore(struct file *file, char __user *buffer, | 184 | static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, |
130 | size_t buflen, loff_t *fpos) | 185 | int userbuf) |
131 | { | 186 | { |
132 | ssize_t acc = 0, tmp; | 187 | ssize_t acc = 0, tmp; |
133 | size_t tsz; | 188 | size_t tsz; |
@@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
144 | /* Read ELF core header */ | 199 | /* Read ELF core header */ |
145 | if (*fpos < elfcorebuf_sz) { | 200 | if (*fpos < elfcorebuf_sz) { |
146 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); | 201 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); |
147 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) | 202 | if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf)) |
148 | return -EFAULT; | 203 | return -EFAULT; |
149 | buflen -= tsz; | 204 | buflen -= tsz; |
150 | *fpos += tsz; | 205 | *fpos += tsz; |
@@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
162 | 217 | ||
163 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); | 218 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); |
164 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; | 219 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; |
165 | if (copy_to_user(buffer, kaddr, tsz)) | 220 | if (copy_to(buffer, kaddr, tsz, userbuf)) |
166 | return -EFAULT; | 221 | return -EFAULT; |
167 | buflen -= tsz; | 222 | buflen -= tsz; |
168 | *fpos += tsz; | 223 | *fpos += tsz; |
@@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
178 | if (*fpos < m->offset + m->size) { | 233 | if (*fpos < m->offset + m->size) { |
179 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); | 234 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); |
180 | start = m->paddr + *fpos - m->offset; | 235 | start = m->paddr + *fpos - m->offset; |
181 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | 236 | tmp = read_from_oldmem(buffer, tsz, &start, userbuf); |
182 | if (tmp < 0) | 237 | if (tmp < 0) |
183 | return tmp; | 238 | return tmp; |
184 | buflen -= tsz; | 239 | buflen -= tsz; |
@@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
195 | return acc; | 250 | return acc; |
196 | } | 251 | } |
197 | 252 | ||
253 | static ssize_t read_vmcore(struct file *file, char __user *buffer, | ||
254 | size_t buflen, loff_t *fpos) | ||
255 | { | ||
256 | return __read_vmcore((__force char *) buffer, buflen, fpos, 1); | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * The vmcore fault handler uses the page cache and fills data using the | ||
261 | * standard __vmcore_read() function. | ||
262 | * | ||
263 | * On s390 the fault handler is used for memory regions that can't be mapped | ||
264 | * directly with remap_pfn_range(). | ||
265 | */ | ||
266 | static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
267 | { | ||
268 | #ifdef CONFIG_S390 | ||
269 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
270 | pgoff_t index = vmf->pgoff; | ||
271 | struct page *page; | ||
272 | loff_t offset; | ||
273 | char *buf; | ||
274 | int rc; | ||
275 | |||
276 | page = find_or_create_page(mapping, index, GFP_KERNEL); | ||
277 | if (!page) | ||
278 | return VM_FAULT_OOM; | ||
279 | if (!PageUptodate(page)) { | ||
280 | offset = (loff_t) index << PAGE_CACHE_SHIFT; | ||
281 | buf = __va((page_to_pfn(page) << PAGE_SHIFT)); | ||
282 | rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0); | ||
283 | if (rc < 0) { | ||
284 | unlock_page(page); | ||
285 | page_cache_release(page); | ||
286 | return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; | ||
287 | } | ||
288 | SetPageUptodate(page); | ||
289 | } | ||
290 | unlock_page(page); | ||
291 | vmf->page = page; | ||
292 | return 0; | ||
293 | #else | ||
294 | return VM_FAULT_SIGBUS; | ||
295 | #endif | ||
296 | } | ||
297 | |||
298 | static const struct vm_operations_struct vmcore_mmap_ops = { | ||
299 | .fault = mmap_vmcore_fault, | ||
300 | }; | ||
301 | |||
198 | /** | 302 | /** |
199 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in | 303 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in |
200 | * vmalloc memory | 304 | * vmalloc memory |
@@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) | |||
223 | * regions in the 1st kernel pointed to by PT_LOAD entries) into | 327 | * regions in the 1st kernel pointed to by PT_LOAD entries) into |
224 | * virtually contiguous user-space in ELF layout. | 328 | * virtually contiguous user-space in ELF layout. |
225 | */ | 329 | */ |
226 | #if defined(CONFIG_MMU) && !defined(CONFIG_S390) | 330 | #ifdef CONFIG_MMU |
227 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | 331 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) |
228 | { | 332 | { |
229 | size_t size = vma->vm_end - vma->vm_start; | 333 | size_t size = vma->vm_end - vma->vm_start; |
@@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | |||
241 | 345 | ||
242 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); | 346 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); |
243 | vma->vm_flags |= VM_MIXEDMAP; | 347 | vma->vm_flags |= VM_MIXEDMAP; |
348 | vma->vm_ops = &vmcore_mmap_ops; | ||
244 | 349 | ||
245 | len = 0; | 350 | len = 0; |
246 | 351 | ||
@@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | |||
282 | 387 | ||
283 | tsz = min_t(size_t, m->offset + m->size - start, size); | 388 | tsz = min_t(size_t, m->offset + m->size - start, size); |
284 | paddr = m->paddr + start - m->offset; | 389 | paddr = m->paddr + start - m->offset; |
285 | if (remap_pfn_range(vma, vma->vm_start + len, | 390 | if (remap_oldmem_pfn_range(vma, vma->vm_start + len, |
286 | paddr >> PAGE_SHIFT, tsz, | 391 | paddr >> PAGE_SHIFT, tsz, |
287 | vma->vm_page_prot)) | 392 | vma->vm_page_prot)) |
288 | goto fail; | 393 | goto fail; |
289 | size -= tsz; | 394 | size -= tsz; |
290 | start += tsz; | 395 | start += tsz; |
@@ -357,7 +462,7 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | |||
357 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 462 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
358 | if (!notes_section) | 463 | if (!notes_section) |
359 | return -ENOMEM; | 464 | return -ENOMEM; |
360 | rc = read_from_oldmem(notes_section, max_sz, &offset, 0); | 465 | rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); |
361 | if (rc < 0) { | 466 | if (rc < 0) { |
362 | kfree(notes_section); | 467 | kfree(notes_section); |
363 | return rc; | 468 | return rc; |
@@ -444,7 +549,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) | |||
444 | if (phdr_ptr->p_type != PT_NOTE) | 549 | if (phdr_ptr->p_type != PT_NOTE) |
445 | continue; | 550 | continue; |
446 | offset = phdr_ptr->p_offset; | 551 | offset = phdr_ptr->p_offset; |
447 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | 552 | rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, |
553 | &offset); | ||
448 | if (rc < 0) | 554 | if (rc < 0) |
449 | return rc; | 555 | return rc; |
450 | notes_buf += phdr_ptr->p_memsz; | 556 | notes_buf += phdr_ptr->p_memsz; |
@@ -536,7 +642,7 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | |||
536 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 642 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
537 | if (!notes_section) | 643 | if (!notes_section) |
538 | return -ENOMEM; | 644 | return -ENOMEM; |
539 | rc = read_from_oldmem(notes_section, max_sz, &offset, 0); | 645 | rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); |
540 | if (rc < 0) { | 646 | if (rc < 0) { |
541 | kfree(notes_section); | 647 | kfree(notes_section); |
542 | return rc; | 648 | return rc; |
@@ -623,7 +729,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) | |||
623 | if (phdr_ptr->p_type != PT_NOTE) | 729 | if (phdr_ptr->p_type != PT_NOTE) |
624 | continue; | 730 | continue; |
625 | offset = phdr_ptr->p_offset; | 731 | offset = phdr_ptr->p_offset; |
626 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | 732 | rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, |
733 | &offset); | ||
627 | if (rc < 0) | 734 | if (rc < 0) |
628 | return rc; | 735 | return rc; |
629 | notes_buf += phdr_ptr->p_memsz; | 736 | notes_buf += phdr_ptr->p_memsz; |
@@ -810,7 +917,7 @@ static int __init parse_crash_elf64_headers(void) | |||
810 | addr = elfcorehdr_addr; | 917 | addr = elfcorehdr_addr; |
811 | 918 | ||
812 | /* Read Elf header */ | 919 | /* Read Elf header */ |
813 | rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); | 920 | rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr); |
814 | if (rc < 0) | 921 | if (rc < 0) |
815 | return rc; | 922 | return rc; |
816 | 923 | ||
@@ -837,7 +944,7 @@ static int __init parse_crash_elf64_headers(void) | |||
837 | if (!elfcorebuf) | 944 | if (!elfcorebuf) |
838 | return -ENOMEM; | 945 | return -ENOMEM; |
839 | addr = elfcorehdr_addr; | 946 | addr = elfcorehdr_addr; |
840 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); | 947 | rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); |
841 | if (rc < 0) | 948 | if (rc < 0) |
842 | goto fail; | 949 | goto fail; |
843 | 950 | ||
@@ -866,7 +973,7 @@ static int __init parse_crash_elf32_headers(void) | |||
866 | addr = elfcorehdr_addr; | 973 | addr = elfcorehdr_addr; |
867 | 974 | ||
868 | /* Read Elf header */ | 975 | /* Read Elf header */ |
869 | rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); | 976 | rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr); |
870 | if (rc < 0) | 977 | if (rc < 0) |
871 | return rc; | 978 | return rc; |
872 | 979 | ||
@@ -892,7 +999,7 @@ static int __init parse_crash_elf32_headers(void) | |||
892 | if (!elfcorebuf) | 999 | if (!elfcorebuf) |
893 | return -ENOMEM; | 1000 | return -ENOMEM; |
894 | addr = elfcorehdr_addr; | 1001 | addr = elfcorehdr_addr; |
895 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); | 1002 | rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); |
896 | if (rc < 0) | 1003 | if (rc < 0) |
897 | goto fail; | 1004 | goto fail; |
898 | 1005 | ||
@@ -919,7 +1026,7 @@ static int __init parse_crash_elf_headers(void) | |||
919 | int rc=0; | 1026 | int rc=0; |
920 | 1027 | ||
921 | addr = elfcorehdr_addr; | 1028 | addr = elfcorehdr_addr; |
922 | rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); | 1029 | rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr); |
923 | if (rc < 0) | 1030 | if (rc < 0) |
924 | return rc; | 1031 | return rc; |
925 | if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { | 1032 | if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { |
@@ -952,7 +1059,14 @@ static int __init vmcore_init(void) | |||
952 | { | 1059 | { |
953 | int rc = 0; | 1060 | int rc = 0; |
954 | 1061 | ||
955 | /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ | 1062 | /* Allow architectures to allocate ELF header in 2nd kernel */ |
1063 | rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size); | ||
1064 | if (rc) | ||
1065 | return rc; | ||
1066 | /* | ||
1067 | * If elfcorehdr= has been passed in cmdline or created in 2nd kernel, | ||
1068 | * then capture the dump. | ||
1069 | */ | ||
956 | if (!(is_vmcore_usable())) | 1070 | if (!(is_vmcore_usable())) |
957 | return rc; | 1071 | return rc; |
958 | rc = parse_crash_elf_headers(); | 1072 | rc = parse_crash_elf_headers(); |
@@ -960,6 +1074,8 @@ static int __init vmcore_init(void) | |||
960 | pr_warn("Kdump: vmcore not initialized\n"); | 1074 | pr_warn("Kdump: vmcore not initialized\n"); |
961 | return rc; | 1075 | return rc; |
962 | } | 1076 | } |
1077 | elfcorehdr_free(elfcorehdr_addr); | ||
1078 | elfcorehdr_addr = ELFCORE_ADDR_ERR; | ||
963 | 1079 | ||
964 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); | 1080 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); |
965 | if (proc_vmcore) | 1081 | if (proc_vmcore) |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index c24f1e10b946..39d14659a8d3 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -244,12 +244,6 @@ struct dentry *ramfs_mount(struct file_system_type *fs_type, | |||
244 | return mount_nodev(fs_type, flags, data, ramfs_fill_super); | 244 | return mount_nodev(fs_type, flags, data, ramfs_fill_super); |
245 | } | 245 | } |
246 | 246 | ||
247 | static struct dentry *rootfs_mount(struct file_system_type *fs_type, | ||
248 | int flags, const char *dev_name, void *data) | ||
249 | { | ||
250 | return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); | ||
251 | } | ||
252 | |||
253 | static void ramfs_kill_sb(struct super_block *sb) | 247 | static void ramfs_kill_sb(struct super_block *sb) |
254 | { | 248 | { |
255 | kfree(sb->s_fs_info); | 249 | kfree(sb->s_fs_info); |
@@ -262,29 +256,23 @@ static struct file_system_type ramfs_fs_type = { | |||
262 | .kill_sb = ramfs_kill_sb, | 256 | .kill_sb = ramfs_kill_sb, |
263 | .fs_flags = FS_USERNS_MOUNT, | 257 | .fs_flags = FS_USERNS_MOUNT, |
264 | }; | 258 | }; |
265 | static struct file_system_type rootfs_fs_type = { | ||
266 | .name = "rootfs", | ||
267 | .mount = rootfs_mount, | ||
268 | .kill_sb = kill_litter_super, | ||
269 | }; | ||
270 | 259 | ||
271 | static int __init init_ramfs_fs(void) | 260 | int __init init_ramfs_fs(void) |
272 | { | ||
273 | return register_filesystem(&ramfs_fs_type); | ||
274 | } | ||
275 | module_init(init_ramfs_fs) | ||
276 | |||
277 | int __init init_rootfs(void) | ||
278 | { | 261 | { |
262 | static unsigned long once; | ||
279 | int err; | 263 | int err; |
280 | 264 | ||
265 | if (test_and_set_bit(0, &once)) | ||
266 | return 0; | ||
267 | |||
281 | err = bdi_init(&ramfs_backing_dev_info); | 268 | err = bdi_init(&ramfs_backing_dev_info); |
282 | if (err) | 269 | if (err) |
283 | return err; | 270 | return err; |
284 | 271 | ||
285 | err = register_filesystem(&rootfs_fs_type); | 272 | err = register_filesystem(&ramfs_fs_type); |
286 | if (err) | 273 | if (err) |
287 | bdi_destroy(&ramfs_backing_dev_info); | 274 | bdi_destroy(&ramfs_backing_dev_info); |
288 | 275 | ||
289 | return err; | 276 | return err; |
290 | } | 277 | } |
278 | module_init(init_ramfs_fs) | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c3881553f7d1..5f66d519a726 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -243,6 +243,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | |||
243 | * BDI_CAP_EXEC_MAP: Can be mapped for execution | 243 | * BDI_CAP_EXEC_MAP: Can be mapped for execution |
244 | * | 244 | * |
245 | * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. | 245 | * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. |
246 | * | ||
247 | * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. | ||
246 | */ | 248 | */ |
247 | #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 | 249 | #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 |
248 | #define BDI_CAP_NO_WRITEBACK 0x00000002 | 250 | #define BDI_CAP_NO_WRITEBACK 0x00000002 |
@@ -254,6 +256,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | |||
254 | #define BDI_CAP_NO_ACCT_WB 0x00000080 | 256 | #define BDI_CAP_NO_ACCT_WB 0x00000080 |
255 | #define BDI_CAP_SWAP_BACKED 0x00000100 | 257 | #define BDI_CAP_SWAP_BACKED 0x00000100 |
256 | #define BDI_CAP_STABLE_WRITES 0x00000200 | 258 | #define BDI_CAP_STABLE_WRITES 0x00000200 |
259 | #define BDI_CAP_STRICTLIMIT 0x00000400 | ||
257 | 260 | ||
258 | #define BDI_CAP_VMFLAGS \ | 261 | #define BDI_CAP_VMFLAGS \ |
259 | (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) | 262 | (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) |
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 70cf138690e9..e8112ae50531 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h | |||
@@ -31,7 +31,7 @@ struct linux_binprm { | |||
31 | #ifdef __alpha__ | 31 | #ifdef __alpha__ |
32 | unsigned int taso:1; | 32 | unsigned int taso:1; |
33 | #endif | 33 | #endif |
34 | unsigned int recursion_depth; | 34 | unsigned int recursion_depth; /* only for search_binary_handler() */ |
35 | struct file * file; | 35 | struct file * file; |
36 | struct cred *cred; /* new credentials */ | 36 | struct cred *cred; /* new credentials */ |
37 | int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ | 37 | int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ |
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h new file mode 100644 index 000000000000..98e892ef6d5a --- /dev/null +++ b/include/linux/cmdline-parser.h | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * Parsing command line, get the partitions information. | ||
3 | * | ||
4 | * Written by Cai Zhiyong <caizhiyong@huawei.com> | ||
5 | * | ||
6 | */ | ||
7 | #ifndef CMDLINEPARSEH | ||
8 | #define CMDLINEPARSEH | ||
9 | |||
10 | #include <linux/blkdev.h> | ||
11 | |||
12 | /* partition flags */ | ||
13 | #define PF_RDONLY 0x01 /* Device is read only */ | ||
14 | #define PF_POWERUP_LOCK 0x02 /* Always locked after reset */ | ||
15 | |||
16 | struct cmdline_subpart { | ||
17 | char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */ | ||
18 | sector_t from; | ||
19 | sector_t size; | ||
20 | int flags; | ||
21 | struct cmdline_subpart *next_subpart; | ||
22 | }; | ||
23 | |||
24 | struct cmdline_parts { | ||
25 | char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */ | ||
26 | unsigned int nr_subparts; | ||
27 | struct cmdline_subpart *subpart; | ||
28 | struct cmdline_parts *next_parts; | ||
29 | }; | ||
30 | |||
31 | void cmdline_parts_free(struct cmdline_parts **parts); | ||
32 | |||
33 | int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline); | ||
34 | |||
35 | struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, | ||
36 | const char *bdev); | ||
37 | |||
38 | void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, | ||
39 | int slot, | ||
40 | int (*add_part)(int, struct cmdline_subpart *, void *), | ||
41 | void *param); | ||
42 | |||
43 | #endif /* CMDLINEPARSEH */ | ||
diff --git a/include/linux/compat.h b/include/linux/compat.h index ec1aee4aec9c..345da00a86e0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h | |||
@@ -43,6 +43,7 @@ | |||
43 | #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ | 43 | #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ |
44 | asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ | 44 | asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ |
45 | static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ | 45 | static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ |
46 | asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ | ||
46 | asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ | 47 | asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ |
47 | { \ | 48 | { \ |
48 | return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ | 49 | return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ |
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 37e4f8da7cdf..fe68a5a98583 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h | |||
@@ -12,6 +12,15 @@ | |||
12 | extern unsigned long long elfcorehdr_addr; | 12 | extern unsigned long long elfcorehdr_addr; |
13 | extern unsigned long long elfcorehdr_size; | 13 | extern unsigned long long elfcorehdr_size; |
14 | 14 | ||
15 | extern int __weak elfcorehdr_alloc(unsigned long long *addr, | ||
16 | unsigned long long *size); | ||
17 | extern void __weak elfcorehdr_free(unsigned long long addr); | ||
18 | extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); | ||
19 | extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); | ||
20 | extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, | ||
21 | unsigned long from, unsigned long pfn, | ||
22 | unsigned long size, pgprot_t prot); | ||
23 | |||
15 | extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, | 24 | extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, |
16 | unsigned long, int); | 25 | unsigned long, int); |
17 | 26 | ||
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 661d374aeb2d..f8d41cb1cbe0 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h | |||
@@ -66,8 +66,8 @@ struct gen_pool_chunk { | |||
66 | struct list_head next_chunk; /* next chunk in pool */ | 66 | struct list_head next_chunk; /* next chunk in pool */ |
67 | atomic_t avail; | 67 | atomic_t avail; |
68 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ | 68 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ |
69 | unsigned long start_addr; /* starting address of memory chunk */ | 69 | unsigned long start_addr; /* start address of memory chunk */ |
70 | unsigned long end_addr; /* ending address of memory chunk */ | 70 | unsigned long end_addr; /* end address of memory chunk (inclusive) */ |
71 | unsigned long bits[0]; /* bitmap for allocating memory chunk */ | 71 | unsigned long bits[0]; /* bitmap for allocating memory chunk */ |
72 | }; | 72 | }; |
73 | 73 | ||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c2b1801a160b..0393270466c3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -66,6 +66,9 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, | |||
66 | vm_flags_t vm_flags); | 66 | vm_flags_t vm_flags); |
67 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | 67 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); |
68 | int dequeue_hwpoisoned_huge_page(struct page *page); | 68 | int dequeue_hwpoisoned_huge_page(struct page *page); |
69 | bool isolate_huge_page(struct page *page, struct list_head *list); | ||
70 | void putback_active_hugepage(struct page *page); | ||
71 | bool is_hugepage_active(struct page *page); | ||
69 | void copy_huge_page(struct page *dst, struct page *src); | 72 | void copy_huge_page(struct page *dst, struct page *src); |
70 | 73 | ||
71 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE | 74 | #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE |
@@ -134,6 +137,9 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) | |||
134 | return 0; | 137 | return 0; |
135 | } | 138 | } |
136 | 139 | ||
140 | #define isolate_huge_page(p, l) false | ||
141 | #define putback_active_hugepage(p) do {} while (0) | ||
142 | #define is_hugepage_active(x) false | ||
137 | static inline void copy_huge_page(struct page *dst, struct page *src) | 143 | static inline void copy_huge_page(struct page *dst, struct page *src) |
138 | { | 144 | { |
139 | } | 145 | } |
@@ -261,6 +267,8 @@ struct huge_bootmem_page { | |||
261 | }; | 267 | }; |
262 | 268 | ||
263 | struct page *alloc_huge_page_node(struct hstate *h, int nid); | 269 | struct page *alloc_huge_page_node(struct hstate *h, int nid); |
270 | struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, | ||
271 | unsigned long addr, int avoid_reserve); | ||
264 | 272 | ||
265 | /* arch callback */ | 273 | /* arch callback */ |
266 | int __init alloc_bootmem_huge_page(struct hstate *h); | 274 | int __init alloc_bootmem_huge_page(struct hstate *h); |
@@ -371,9 +379,23 @@ static inline pgoff_t basepage_index(struct page *page) | |||
371 | return __basepage_index(page); | 379 | return __basepage_index(page); |
372 | } | 380 | } |
373 | 381 | ||
382 | extern void dissolve_free_huge_pages(unsigned long start_pfn, | ||
383 | unsigned long end_pfn); | ||
384 | int pmd_huge_support(void); | ||
385 | /* | ||
386 | * Currently hugepage migration is enabled only for pmd-based hugepage. | ||
387 | * This function will be updated when hugepage migration is more widely | ||
388 | * supported. | ||
389 | */ | ||
390 | static inline int hugepage_migration_support(struct hstate *h) | ||
391 | { | ||
392 | return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); | ||
393 | } | ||
394 | |||
374 | #else /* CONFIG_HUGETLB_PAGE */ | 395 | #else /* CONFIG_HUGETLB_PAGE */ |
375 | struct hstate {}; | 396 | struct hstate {}; |
376 | #define alloc_huge_page_node(h, nid) NULL | 397 | #define alloc_huge_page_node(h, nid) NULL |
398 | #define alloc_huge_page_noerr(v, a, r) NULL | ||
377 | #define alloc_bootmem_huge_page(h) NULL | 399 | #define alloc_bootmem_huge_page(h) NULL |
378 | #define hstate_file(f) NULL | 400 | #define hstate_file(f) NULL |
379 | #define hstate_sizelog(s) NULL | 401 | #define hstate_sizelog(s) NULL |
@@ -396,6 +418,9 @@ static inline pgoff_t basepage_index(struct page *page) | |||
396 | { | 418 | { |
397 | return page->index; | 419 | return page->index; |
398 | } | 420 | } |
421 | #define dissolve_free_huge_pages(s, e) do {} while (0) | ||
422 | #define pmd_huge_support() 0 | ||
423 | #define hugepage_migration_support(h) 0 | ||
399 | #endif /* CONFIG_HUGETLB_PAGE */ | 424 | #endif /* CONFIG_HUGETLB_PAGE */ |
400 | 425 | ||
401 | #endif /* _LINUX_HUGETLB_H */ | 426 | #endif /* _LINUX_HUGETLB_H */ |
diff --git a/include/linux/init.h b/include/linux/init.h index e73f2b708525..f1c27a71d03c 100644 --- a/include/linux/init.h +++ b/include/linux/init.h | |||
@@ -153,6 +153,7 @@ extern unsigned int reset_devices; | |||
153 | void setup_arch(char **); | 153 | void setup_arch(char **); |
154 | void prepare_namespace(void); | 154 | void prepare_namespace(void); |
155 | void __init load_default_modules(void); | 155 | void __init load_default_modules(void); |
156 | int __init init_rootfs(void); | ||
156 | 157 | ||
157 | extern void (*late_time_init)(void); | 158 | extern void (*late_time_init)(void); |
158 | 159 | ||
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c4d870b0d5e6..19c19a5eee29 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h | |||
@@ -22,7 +22,7 @@ struct ipc_ids { | |||
22 | int in_use; | 22 | int in_use; |
23 | unsigned short seq; | 23 | unsigned short seq; |
24 | unsigned short seq_max; | 24 | unsigned short seq_max; |
25 | struct rw_semaphore rw_mutex; | 25 | struct rw_semaphore rwsem; |
26 | struct idr ipcs_idr; | 26 | struct idr ipcs_idr; |
27 | int next_id; | 27 | int next_id; |
28 | }; | 28 | }; |
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ca1d27a0d6a6..925eaf28fca9 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h | |||
@@ -264,10 +264,36 @@ extern void arch_arm_kprobe(struct kprobe *p); | |||
264 | extern void arch_disarm_kprobe(struct kprobe *p); | 264 | extern void arch_disarm_kprobe(struct kprobe *p); |
265 | extern int arch_init_kprobes(void); | 265 | extern int arch_init_kprobes(void); |
266 | extern void show_registers(struct pt_regs *regs); | 266 | extern void show_registers(struct pt_regs *regs); |
267 | extern kprobe_opcode_t *get_insn_slot(void); | ||
268 | extern void free_insn_slot(kprobe_opcode_t *slot, int dirty); | ||
269 | extern void kprobes_inc_nmissed_count(struct kprobe *p); | 267 | extern void kprobes_inc_nmissed_count(struct kprobe *p); |
270 | 268 | ||
269 | struct kprobe_insn_cache { | ||
270 | struct mutex mutex; | ||
271 | void *(*alloc)(void); /* allocate insn page */ | ||
272 | void (*free)(void *); /* free insn page */ | ||
273 | struct list_head pages; /* list of kprobe_insn_page */ | ||
274 | size_t insn_size; /* size of instruction slot */ | ||
275 | int nr_garbage; | ||
276 | }; | ||
277 | |||
278 | extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c); | ||
279 | extern void __free_insn_slot(struct kprobe_insn_cache *c, | ||
280 | kprobe_opcode_t *slot, int dirty); | ||
281 | |||
282 | #define DEFINE_INSN_CACHE_OPS(__name) \ | ||
283 | extern struct kprobe_insn_cache kprobe_##__name##_slots; \ | ||
284 | \ | ||
285 | static inline kprobe_opcode_t *get_##__name##_slot(void) \ | ||
286 | { \ | ||
287 | return __get_insn_slot(&kprobe_##__name##_slots); \ | ||
288 | } \ | ||
289 | \ | ||
290 | static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\ | ||
291 | { \ | ||
292 | __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \ | ||
293 | } \ | ||
294 | |||
295 | DEFINE_INSN_CACHE_OPS(insn); | ||
296 | |||
271 | #ifdef CONFIG_OPTPROBES | 297 | #ifdef CONFIG_OPTPROBES |
272 | /* | 298 | /* |
273 | * Internal structure for direct jump optimized probe | 299 | * Internal structure for direct jump optimized probe |
@@ -287,13 +313,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist); | |||
287 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | 313 | extern void arch_unoptimize_kprobes(struct list_head *oplist, |
288 | struct list_head *done_list); | 314 | struct list_head *done_list); |
289 | extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); | 315 | extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); |
290 | extern kprobe_opcode_t *get_optinsn_slot(void); | ||
291 | extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); | ||
292 | extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, | 316 | extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, |
293 | unsigned long addr); | 317 | unsigned long addr); |
294 | 318 | ||
295 | extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); | 319 | extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); |
296 | 320 | ||
321 | DEFINE_INSN_CACHE_OPS(optinsn); | ||
322 | |||
297 | #ifdef CONFIG_SYSCTL | 323 | #ifdef CONFIG_SYSCTL |
298 | extern int sysctl_kprobes_optimization; | 324 | extern int sysctl_kprobes_optimization; |
299 | extern int proc_kprobes_optimization_handler(struct ctl_table *table, | 325 | extern int proc_kprobes_optimization_handler(struct ctl_table *table, |
diff --git a/include/linux/lz4.h b/include/linux/lz4.h index d21c13f10a64..4356686b0a39 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h | |||
@@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, | |||
67 | * note : Destination buffer must be already allocated. | 67 | * note : Destination buffer must be already allocated. |
68 | * slightly faster than lz4_decompress_unknownoutputsize() | 68 | * slightly faster than lz4_decompress_unknownoutputsize() |
69 | */ | 69 | */ |
70 | int lz4_decompress(const char *src, size_t *src_len, char *dest, | 70 | int lz4_decompress(const unsigned char *src, size_t *src_len, |
71 | size_t actual_dest_len); | 71 | unsigned char *dest, size_t actual_dest_len); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * lz4_decompress_unknownoutputsize() | 74 | * lz4_decompress_unknownoutputsize() |
@@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, | |||
82 | * Error if return (< 0) | 82 | * Error if return (< 0) |
83 | * note : Destination buffer must be already allocated. | 83 | * note : Destination buffer must be already allocated. |
84 | */ | 84 | */ |
85 | int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, | 85 | int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, |
86 | char *dest, size_t *dest_len); | 86 | unsigned char *dest, size_t *dest_len); |
87 | #endif | 87 | #endif |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f388203db7e8..31e95acddb4d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -60,6 +60,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); | |||
60 | void memblock_trim_memory(phys_addr_t align); | 60 | void memblock_trim_memory(phys_addr_t align); |
61 | 61 | ||
62 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 62 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
63 | int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, | ||
64 | unsigned long *end_pfn); | ||
63 | void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, | 65 | void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, |
64 | unsigned long *out_end_pfn, int *out_nid); | 66 | unsigned long *out_end_pfn, int *out_nid); |
65 | 67 | ||
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0d7df39a5885..da6716b9e3fe 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) | |||
91 | } | 91 | } |
92 | 92 | ||
93 | #define vma_policy(vma) ((vma)->vm_policy) | 93 | #define vma_policy(vma) ((vma)->vm_policy) |
94 | #define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol)) | ||
95 | 94 | ||
96 | static inline void mpol_get(struct mempolicy *pol) | 95 | static inline void mpol_get(struct mempolicy *pol) |
97 | { | 96 | { |
@@ -126,6 +125,7 @@ struct shared_policy { | |||
126 | spinlock_t lock; | 125 | spinlock_t lock; |
127 | }; | 126 | }; |
128 | 127 | ||
128 | int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); | ||
129 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); | 129 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); |
130 | int mpol_set_shared_policy(struct shared_policy *info, | 130 | int mpol_set_shared_policy(struct shared_policy *info, |
131 | struct vm_area_struct *vma, | 131 | struct vm_area_struct *vma, |
@@ -173,7 +173,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); | |||
173 | /* Check if a vma is migratable */ | 173 | /* Check if a vma is migratable */ |
174 | static inline int vma_migratable(struct vm_area_struct *vma) | 174 | static inline int vma_migratable(struct vm_area_struct *vma) |
175 | { | 175 | { |
176 | if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP)) | 176 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) |
177 | return 0; | 177 | return 0; |
178 | /* | 178 | /* |
179 | * Migration allocates pages in the highest zone. If we cannot | 179 | * Migration allocates pages in the highest zone. If we cannot |
@@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) | |||
240 | } | 240 | } |
241 | 241 | ||
242 | #define vma_policy(vma) NULL | 242 | #define vma_policy(vma) NULL |
243 | #define vma_set_policy(vma, pol) do {} while(0) | 243 | |
244 | static inline int | ||
245 | vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) | ||
246 | { | ||
247 | return 0; | ||
248 | } | ||
244 | 249 | ||
245 | static inline void numa_policy_init(void) | 250 | static inline void numa_policy_init(void) |
246 | { | 251 | { |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a405d3dc0f61..6fe521420631 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -41,8 +41,6 @@ extern int migrate_page(struct address_space *, | |||
41 | struct page *, struct page *, enum migrate_mode); | 41 | struct page *, struct page *, enum migrate_mode); |
42 | extern int migrate_pages(struct list_head *l, new_page_t x, | 42 | extern int migrate_pages(struct list_head *l, new_page_t x, |
43 | unsigned long private, enum migrate_mode mode, int reason); | 43 | unsigned long private, enum migrate_mode mode, int reason); |
44 | extern int migrate_huge_page(struct page *, new_page_t x, | ||
45 | unsigned long private, enum migrate_mode mode); | ||
46 | 44 | ||
47 | extern int fail_migrate_page(struct address_space *, | 45 | extern int fail_migrate_page(struct address_space *, |
48 | struct page *, struct page *); | 46 | struct page *, struct page *); |
@@ -62,9 +60,6 @@ static inline void putback_movable_pages(struct list_head *l) {} | |||
62 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 60 | static inline int migrate_pages(struct list_head *l, new_page_t x, |
63 | unsigned long private, enum migrate_mode mode, int reason) | 61 | unsigned long private, enum migrate_mode mode, int reason) |
64 | { return -ENOSYS; } | 62 | { return -ENOSYS; } |
65 | static inline int migrate_huge_page(struct page *page, new_page_t x, | ||
66 | unsigned long private, enum migrate_mode mode) | ||
67 | { return -ENOSYS; } | ||
68 | 63 | ||
69 | static inline int migrate_prep(void) { return -ENOSYS; } | 64 | static inline int migrate_prep(void) { return -ENOSYS; } |
70 | static inline int migrate_prep_local(void) { return -ENOSYS; } | 65 | static inline int migrate_prep_local(void) { return -ENOSYS; } |
diff --git a/include/linux/mm.h b/include/linux/mm.h index d2d59b4149d0..caf543c7eaa7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -115,6 +115,12 @@ extern unsigned int kobjsize(const void *objp); | |||
115 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ | 115 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ |
116 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ | 116 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ |
117 | 117 | ||
118 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
119 | # define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ | ||
120 | #else | ||
121 | # define VM_SOFTDIRTY 0 | ||
122 | #endif | ||
123 | |||
118 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ | 124 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ |
119 | #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ | 125 | #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ |
120 | #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ | 126 | #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ |
@@ -489,20 +495,6 @@ static inline int compound_order(struct page *page) | |||
489 | return (unsigned long)page[1].lru.prev; | 495 | return (unsigned long)page[1].lru.prev; |
490 | } | 496 | } |
491 | 497 | ||
492 | static inline int compound_trans_order(struct page *page) | ||
493 | { | ||
494 | int order; | ||
495 | unsigned long flags; | ||
496 | |||
497 | if (!PageHead(page)) | ||
498 | return 0; | ||
499 | |||
500 | flags = compound_lock_irqsave(page); | ||
501 | order = compound_order(page); | ||
502 | compound_unlock_irqrestore(page, flags); | ||
503 | return order; | ||
504 | } | ||
505 | |||
506 | static inline void set_compound_order(struct page *page, unsigned long order) | 498 | static inline void set_compound_order(struct page *page, unsigned long order) |
507 | { | 499 | { |
508 | page[1].lru.prev = (void *)order; | 500 | page[1].lru.prev = (void *)order; |
@@ -637,12 +629,12 @@ static inline enum zone_type page_zonenum(const struct page *page) | |||
637 | #endif | 629 | #endif |
638 | 630 | ||
639 | /* | 631 | /* |
640 | * The identification function is only used by the buddy allocator for | 632 | * The identification function is mainly used by the buddy allocator for |
641 | * determining if two pages could be buddies. We are not really | 633 | * determining if two pages could be buddies. We are not really identifying |
642 | * identifying a zone since we could be using a the section number | 634 | * the zone since we could be using the section number id if we do not have |
643 | * id if we have not node id available in page flags. | 635 | * node id available in page flags. |
644 | * We guarantee only that it will return the same value for two | 636 | * We only guarantee that it will return the same value for two combinable |
645 | * combinable pages in a zone. | 637 | * pages in a zone. |
646 | */ | 638 | */ |
647 | static inline int page_zone_id(struct page *page) | 639 | static inline int page_zone_id(struct page *page) |
648 | { | 640 | { |
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 1397ccf81e91..cf55945c83fb 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define LINUX_MM_INLINE_H | 2 | #define LINUX_MM_INLINE_H |
3 | 3 | ||
4 | #include <linux/huge_mm.h> | 4 | #include <linux/huge_mm.h> |
5 | #include <linux/swap.h> | ||
5 | 6 | ||
6 | /** | 7 | /** |
7 | * page_is_file_cache - should the page be on a file LRU or anon LRU? | 8 | * page_is_file_cache - should the page be on a file LRU or anon LRU? |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index af4a3b77a8de..bd791e452ad7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -105,6 +105,7 @@ struct zone_padding { | |||
105 | enum zone_stat_item { | 105 | enum zone_stat_item { |
106 | /* First 128 byte cacheline (assuming 64 bit words) */ | 106 | /* First 128 byte cacheline (assuming 64 bit words) */ |
107 | NR_FREE_PAGES, | 107 | NR_FREE_PAGES, |
108 | NR_ALLOC_BATCH, | ||
108 | NR_LRU_BASE, | 109 | NR_LRU_BASE, |
109 | NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ | 110 | NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ |
110 | NR_ACTIVE_ANON, /* " " " " " */ | 111 | NR_ACTIVE_ANON, /* " " " " " */ |
@@ -352,7 +353,6 @@ struct zone { | |||
352 | * free areas of different sizes | 353 | * free areas of different sizes |
353 | */ | 354 | */ |
354 | spinlock_t lock; | 355 | spinlock_t lock; |
355 | int all_unreclaimable; /* All pages pinned */ | ||
356 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | 356 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
357 | /* Set to true when the PG_migrate_skip bits should be cleared */ | 357 | /* Set to true when the PG_migrate_skip bits should be cleared */ |
358 | bool compact_blockskip_flush; | 358 | bool compact_blockskip_flush; |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index ffc444c38b0a..403940787be1 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -231,6 +231,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, | |||
231 | unsigned long radix_tree_prev_hole(struct radix_tree_root *root, | 231 | unsigned long radix_tree_prev_hole(struct radix_tree_root *root, |
232 | unsigned long index, unsigned long max_scan); | 232 | unsigned long index, unsigned long max_scan); |
233 | int radix_tree_preload(gfp_t gfp_mask); | 233 | int radix_tree_preload(gfp_t gfp_mask); |
234 | int radix_tree_maybe_preload(gfp_t gfp_mask); | ||
234 | void radix_tree_init(void); | 235 | void radix_tree_init(void); |
235 | void *radix_tree_tag_set(struct radix_tree_root *root, | 236 | void *radix_tree_tag_set(struct radix_tree_root *root, |
236 | unsigned long index, unsigned int tag); | 237 | unsigned long index, unsigned int tag); |
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 69e37c2d1ea5..753207c8ce20 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h | |||
@@ -25,7 +25,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); | |||
25 | 25 | ||
26 | extern const struct file_operations ramfs_file_operations; | 26 | extern const struct file_operations ramfs_file_operations; |
27 | extern const struct vm_operations_struct generic_file_vm_ops; | 27 | extern const struct vm_operations_struct generic_file_vm_ops; |
28 | extern int __init init_rootfs(void); | 28 | extern int __init init_ramfs_fs(void); |
29 | 29 | ||
30 | int ramfs_fill_super(struct super_block *sb, void *data, int silent); | 30 | int ramfs_fill_super(struct super_block *sb, void *data, int silent); |
31 | 31 | ||
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 0022c1bb1e26..aa870a4ddf54 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h | |||
@@ -68,6 +68,10 @@ extern struct rb_node *rb_prev(const struct rb_node *); | |||
68 | extern struct rb_node *rb_first(const struct rb_root *); | 68 | extern struct rb_node *rb_first(const struct rb_root *); |
69 | extern struct rb_node *rb_last(const struct rb_root *); | 69 | extern struct rb_node *rb_last(const struct rb_root *); |
70 | 70 | ||
71 | /* Postorder iteration - always visit the parent after its children */ | ||
72 | extern struct rb_node *rb_first_postorder(const struct rb_root *); | ||
73 | extern struct rb_node *rb_next_postorder(const struct rb_node *); | ||
74 | |||
71 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ | 75 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ |
72 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, | 76 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, |
73 | struct rb_root *root); | 77 | struct rb_root *root); |
@@ -81,4 +85,22 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, | |||
81 | *rb_link = node; | 85 | *rb_link = node; |
82 | } | 86 | } |
83 | 87 | ||
88 | /** | ||
89 | * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of | ||
90 | * given type safe against removal of rb_node entry | ||
91 | * | ||
92 | * @pos: the 'type *' to use as a loop cursor. | ||
93 | * @n: another 'type *' to use as temporary storage | ||
94 | * @root: 'rb_root *' of the rbtree. | ||
95 | * @field: the name of the rb_node field within 'type'. | ||
96 | */ | ||
97 | #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ | ||
98 | for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\ | ||
99 | n = rb_entry(rb_next_postorder(&pos->field), \ | ||
100 | typeof(*pos), field); \ | ||
101 | &pos->field; \ | ||
102 | pos = n, \ | ||
103 | n = rb_entry(rb_next_postorder(&pos->field), \ | ||
104 | typeof(*pos), field)) | ||
105 | |||
84 | #endif /* _LINUX_RBTREE_H */ | 106 | #endif /* _LINUX_RBTREE_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index ce1e1c0aaa33..45f254dddafc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2169,15 +2169,15 @@ static inline bool thread_group_leader(struct task_struct *p) | |||
2169 | * all we care about is that we have a task with the appropriate | 2169 | * all we care about is that we have a task with the appropriate |
2170 | * pid, we don't actually care if we have the right task. | 2170 | * pid, we don't actually care if we have the right task. |
2171 | */ | 2171 | */ |
2172 | static inline int has_group_leader_pid(struct task_struct *p) | 2172 | static inline bool has_group_leader_pid(struct task_struct *p) |
2173 | { | 2173 | { |
2174 | return p->pid == p->tgid; | 2174 | return task_pid(p) == p->signal->leader_pid; |
2175 | } | 2175 | } |
2176 | 2176 | ||
2177 | static inline | 2177 | static inline |
2178 | int same_thread_group(struct task_struct *p1, struct task_struct *p2) | 2178 | bool same_thread_group(struct task_struct *p1, struct task_struct *p2) |
2179 | { | 2179 | { |
2180 | return p1->tgid == p2->tgid; | 2180 | return p1->signal == p2->signal; |
2181 | } | 2181 | } |
2182 | 2182 | ||
2183 | static inline struct task_struct *next_thread(const struct task_struct *p) | 2183 | static inline struct task_struct *next_thread(const struct task_struct *p) |
diff --git a/include/linux/smp.h b/include/linux/smp.h index c181399f2c20..cfb7ca094b38 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -28,6 +28,27 @@ extern unsigned int total_cpus; | |||
28 | int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, | 28 | int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, |
29 | int wait); | 29 | int wait); |
30 | 30 | ||
31 | /* | ||
32 | * Call a function on all processors | ||
33 | */ | ||
34 | int on_each_cpu(smp_call_func_t func, void *info, int wait); | ||
35 | |||
36 | /* | ||
37 | * Call a function on processors specified by mask, which might include | ||
38 | * the local one. | ||
39 | */ | ||
40 | void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, | ||
41 | void *info, bool wait); | ||
42 | |||
43 | /* | ||
44 | * Call a function on each processor for which the supplied function | ||
45 | * cond_func returns a positive value. This may include the local | ||
46 | * processor. | ||
47 | */ | ||
48 | void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), | ||
49 | smp_call_func_t func, void *info, bool wait, | ||
50 | gfp_t gfp_flags); | ||
51 | |||
31 | #ifdef CONFIG_SMP | 52 | #ifdef CONFIG_SMP |
32 | 53 | ||
33 | #include <linux/preempt.h> | 54 | #include <linux/preempt.h> |
@@ -95,27 +116,6 @@ static inline void call_function_init(void) { } | |||
95 | #endif | 116 | #endif |
96 | 117 | ||
97 | /* | 118 | /* |
98 | * Call a function on all processors | ||
99 | */ | ||
100 | int on_each_cpu(smp_call_func_t func, void *info, int wait); | ||
101 | |||
102 | /* | ||
103 | * Call a function on processors specified by mask, which might include | ||
104 | * the local one. | ||
105 | */ | ||
106 | void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, | ||
107 | void *info, bool wait); | ||
108 | |||
109 | /* | ||
110 | * Call a function on each processor for which the supplied function | ||
111 | * cond_func returns a positive value. This may include the local | ||
112 | * processor. | ||
113 | */ | ||
114 | void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), | ||
115 | smp_call_func_t func, void *info, bool wait, | ||
116 | gfp_t gfp_flags); | ||
117 | |||
118 | /* | ||
119 | * Mark the boot cpu "online" so that it can call console drivers in | 119 | * Mark the boot cpu "online" so that it can call console drivers in |
120 | * printk() and can access its per-cpu storage. | 120 | * printk() and can access its per-cpu storage. |
121 | */ | 121 | */ |
@@ -139,43 +139,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) | |||
139 | } | 139 | } |
140 | #define smp_call_function(func, info, wait) \ | 140 | #define smp_call_function(func, info, wait) \ |
141 | (up_smp_call_function(func, info)) | 141 | (up_smp_call_function(func, info)) |
142 | #define on_each_cpu(func, info, wait) \ | ||
143 | ({ \ | ||
144 | unsigned long __flags; \ | ||
145 | local_irq_save(__flags); \ | ||
146 | func(info); \ | ||
147 | local_irq_restore(__flags); \ | ||
148 | 0; \ | ||
149 | }) | ||
150 | /* | ||
151 | * Note we still need to test the mask even for UP | ||
152 | * because we actually can get an empty mask from | ||
153 | * code that on SMP might call us without the local | ||
154 | * CPU in the mask. | ||
155 | */ | ||
156 | #define on_each_cpu_mask(mask, func, info, wait) \ | ||
157 | do { \ | ||
158 | if (cpumask_test_cpu(0, (mask))) { \ | ||
159 | local_irq_disable(); \ | ||
160 | (func)(info); \ | ||
161 | local_irq_enable(); \ | ||
162 | } \ | ||
163 | } while (0) | ||
164 | /* | ||
165 | * Preemption is disabled here to make sure the cond_func is called under the | ||
166 | * same condtions in UP and SMP. | ||
167 | */ | ||
168 | #define on_each_cpu_cond(cond_func, func, info, wait, gfp_flags)\ | ||
169 | do { \ | ||
170 | void *__info = (info); \ | ||
171 | preempt_disable(); \ | ||
172 | if ((cond_func)(0, __info)) { \ | ||
173 | local_irq_disable(); \ | ||
174 | (func)(__info); \ | ||
175 | local_irq_enable(); \ | ||
176 | } \ | ||
177 | preempt_enable(); \ | ||
178 | } while (0) | ||
179 | 142 | ||
180 | static inline void smp_send_reschedule(int cpu) { } | 143 | static inline void smp_send_reschedule(int cpu) { } |
181 | #define smp_prepare_boot_cpu() do {} while (0) | 144 | #define smp_prepare_boot_cpu() do {} while (0) |
diff --git a/include/linux/swap.h b/include/linux/swap.h index d95cde5e257d..c03c139219c9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -182,6 +182,33 @@ enum { | |||
182 | #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */ | 182 | #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */ |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * We use this to track usage of a cluster. A cluster is a block of swap disk | ||
186 | * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All | ||
187 | * free clusters are organized into a list. We fetch an entry from the list to | ||
188 | * get a free cluster. | ||
189 | * | ||
190 | * The data field stores next cluster if the cluster is free or cluster usage | ||
191 | * counter otherwise. The flags field determines if a cluster is free. This is | ||
192 | * protected by swap_info_struct.lock. | ||
193 | */ | ||
194 | struct swap_cluster_info { | ||
195 | unsigned int data:24; | ||
196 | unsigned int flags:8; | ||
197 | }; | ||
198 | #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ | ||
199 | #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ | ||
200 | |||
201 | /* | ||
202 | * We assign a cluster to each CPU, so each CPU can allocate swap entry from | ||
203 | * its own cluster and swapout sequentially. The purpose is to optimize swapout | ||
204 | * throughput. | ||
205 | */ | ||
206 | struct percpu_cluster { | ||
207 | struct swap_cluster_info index; /* Current cluster index */ | ||
208 | unsigned int next; /* Likely next allocation offset */ | ||
209 | }; | ||
210 | |||
211 | /* | ||
185 | * The in-memory structure used to track swap areas. | 212 | * The in-memory structure used to track swap areas. |
186 | */ | 213 | */ |
187 | struct swap_info_struct { | 214 | struct swap_info_struct { |
@@ -191,14 +218,16 @@ struct swap_info_struct { | |||
191 | signed char next; /* next type on the swap list */ | 218 | signed char next; /* next type on the swap list */ |
192 | unsigned int max; /* extent of the swap_map */ | 219 | unsigned int max; /* extent of the swap_map */ |
193 | unsigned char *swap_map; /* vmalloc'ed array of usage counts */ | 220 | unsigned char *swap_map; /* vmalloc'ed array of usage counts */ |
221 | struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ | ||
222 | struct swap_cluster_info free_cluster_head; /* free cluster list head */ | ||
223 | struct swap_cluster_info free_cluster_tail; /* free cluster list tail */ | ||
194 | unsigned int lowest_bit; /* index of first free in swap_map */ | 224 | unsigned int lowest_bit; /* index of first free in swap_map */ |
195 | unsigned int highest_bit; /* index of last free in swap_map */ | 225 | unsigned int highest_bit; /* index of last free in swap_map */ |
196 | unsigned int pages; /* total of usable pages of swap */ | 226 | unsigned int pages; /* total of usable pages of swap */ |
197 | unsigned int inuse_pages; /* number of those currently in use */ | 227 | unsigned int inuse_pages; /* number of those currently in use */ |
198 | unsigned int cluster_next; /* likely index for next allocation */ | 228 | unsigned int cluster_next; /* likely index for next allocation */ |
199 | unsigned int cluster_nr; /* countdown to next cluster search */ | 229 | unsigned int cluster_nr; /* countdown to next cluster search */ |
200 | unsigned int lowest_alloc; /* while preparing discard cluster */ | 230 | struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ |
201 | unsigned int highest_alloc; /* while preparing discard cluster */ | ||
202 | struct swap_extent *curr_swap_extent; | 231 | struct swap_extent *curr_swap_extent; |
203 | struct swap_extent first_swap_extent; | 232 | struct swap_extent first_swap_extent; |
204 | struct block_device *bdev; /* swap device or bdev of swap file */ | 233 | struct block_device *bdev; /* swap device or bdev of swap file */ |
@@ -212,14 +241,18 @@ struct swap_info_struct { | |||
212 | * protect map scan related fields like | 241 | * protect map scan related fields like |
213 | * swap_map, lowest_bit, highest_bit, | 242 | * swap_map, lowest_bit, highest_bit, |
214 | * inuse_pages, cluster_next, | 243 | * inuse_pages, cluster_next, |
215 | * cluster_nr, lowest_alloc and | 244 | * cluster_nr, lowest_alloc, |
216 | * highest_alloc. other fields are only | 245 | * highest_alloc, free/discard cluster |
217 | * changed at swapon/swapoff, so are | 246 | * list. other fields are only changed |
218 | * protected by swap_lock. changing | 247 | * at swapon/swapoff, so are protected |
219 | * flags need hold this lock and | 248 | * by swap_lock. changing flags need |
220 | * swap_lock. If both locks need hold, | 249 | * hold this lock and swap_lock. If |
221 | * hold swap_lock first. | 250 | * both locks need hold, hold swap_lock |
251 | * first. | ||
222 | */ | 252 | */ |
253 | struct work_struct discard_work; /* discard worker */ | ||
254 | struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */ | ||
255 | struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ | ||
223 | }; | 256 | }; |
224 | 257 | ||
225 | struct swap_list_t { | 258 | struct swap_list_t { |
@@ -414,6 +447,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) | |||
414 | 447 | ||
415 | #else /* CONFIG_SWAP */ | 448 | #else /* CONFIG_SWAP */ |
416 | 449 | ||
450 | #define swap_address_space(entry) (NULL) | ||
417 | #define get_nr_swap_pages() 0L | 451 | #define get_nr_swap_pages() 0L |
418 | #define total_swap_pages 0L | 452 | #define total_swap_pages 0L |
419 | #define total_swapcache_pages() 0UL | 453 | #define total_swapcache_pages() 0UL |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 84662ecc7b51..7fac04e7ff6e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -186,6 +186,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; | |||
186 | #define __SYSCALL_DEFINEx(x, name, ...) \ | 186 | #define __SYSCALL_DEFINEx(x, name, ...) \ |
187 | asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ | 187 | asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ |
188 | static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ | 188 | static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ |
189 | asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ | ||
189 | asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ | 190 | asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ |
190 | { \ | 191 | { \ |
191 | long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ | 192 | long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ |
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index bd6cf61142be..1855f0a22add 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h | |||
@@ -70,6 +70,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | |||
70 | THP_ZERO_PAGE_ALLOC, | 70 | THP_ZERO_PAGE_ALLOC, |
71 | THP_ZERO_PAGE_ALLOC_FAILED, | 71 | THP_ZERO_PAGE_ALLOC_FAILED, |
72 | #endif | 72 | #endif |
73 | #ifdef CONFIG_SMP | ||
74 | NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ | ||
75 | NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ | ||
76 | #endif | ||
77 | NR_TLB_LOCAL_FLUSH_ALL, | ||
78 | NR_TLB_LOCAL_FLUSH_ONE, | ||
73 | NR_VM_EVENT_ITEMS | 79 | NR_VM_EVENT_ITEMS |
74 | }; | 80 | }; |
75 | 81 | ||
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index c586679b6fef..e4b948080d20 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -143,7 +143,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, | |||
143 | } | 143 | } |
144 | 144 | ||
145 | extern unsigned long global_reclaimable_pages(void); | 145 | extern unsigned long global_reclaimable_pages(void); |
146 | extern unsigned long zone_reclaimable_pages(struct zone *zone); | ||
147 | 146 | ||
148 | #ifdef CONFIG_NUMA | 147 | #ifdef CONFIG_NUMA |
149 | /* | 148 | /* |
@@ -198,7 +197,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); | |||
198 | extern void dec_zone_state(struct zone *, enum zone_stat_item); | 197 | extern void dec_zone_state(struct zone *, enum zone_stat_item); |
199 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); | 198 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); |
200 | 199 | ||
201 | void refresh_cpu_vm_stats(int); | 200 | void cpu_vm_stats_fold(int cpu); |
202 | void refresh_zone_stat_thresholds(void); | 201 | void refresh_zone_stat_thresholds(void); |
203 | 202 | ||
204 | void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); | 203 | void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); |
@@ -255,6 +254,7 @@ static inline void __dec_zone_page_state(struct page *page, | |||
255 | 254 | ||
256 | static inline void refresh_cpu_vm_stats(int cpu) { } | 255 | static inline void refresh_cpu_vm_stats(int cpu) { } |
257 | static inline void refresh_zone_stat_thresholds(void) { } | 256 | static inline void refresh_zone_stat_thresholds(void) { } |
257 | static inline void cpu_vm_stats_fold(int cpu) { } | ||
258 | 258 | ||
259 | static inline void drain_zonestat(struct zone *zone, | 259 | static inline void drain_zonestat(struct zone *zone, |
260 | struct per_cpu_pageset *pset) { } | 260 | struct per_cpu_pageset *pset) { } |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 4e198ca1f685..021b8a319b9e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -98,8 +98,6 @@ int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); | |||
98 | int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | 98 | int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
99 | enum wb_reason reason); | 99 | enum wb_reason reason); |
100 | void sync_inodes_sb(struct super_block *); | 100 | void sync_inodes_sb(struct super_block *); |
101 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, | ||
102 | enum wb_reason reason); | ||
103 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); | 101 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
104 | void inode_wait_for_writeback(struct inode *inode); | 102 | void inode_wait_for_writeback(struct inode *inode); |
105 | 103 | ||
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 6bc943ecb841..d0c613476620 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h | |||
@@ -268,11 +268,13 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
268 | 268 | ||
269 | TP_PROTO(struct page *page, | 269 | TP_PROTO(struct page *page, |
270 | int alloc_order, int fallback_order, | 270 | int alloc_order, int fallback_order, |
271 | int alloc_migratetype, int fallback_migratetype), | 271 | int alloc_migratetype, int fallback_migratetype, |
272 | int change_ownership), | ||
272 | 273 | ||
273 | TP_ARGS(page, | 274 | TP_ARGS(page, |
274 | alloc_order, fallback_order, | 275 | alloc_order, fallback_order, |
275 | alloc_migratetype, fallback_migratetype), | 276 | alloc_migratetype, fallback_migratetype, |
277 | change_ownership), | ||
276 | 278 | ||
277 | TP_STRUCT__entry( | 279 | TP_STRUCT__entry( |
278 | __field( struct page *, page ) | 280 | __field( struct page *, page ) |
@@ -280,6 +282,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
280 | __field( int, fallback_order ) | 282 | __field( int, fallback_order ) |
281 | __field( int, alloc_migratetype ) | 283 | __field( int, alloc_migratetype ) |
282 | __field( int, fallback_migratetype ) | 284 | __field( int, fallback_migratetype ) |
285 | __field( int, change_ownership ) | ||
283 | ), | 286 | ), |
284 | 287 | ||
285 | TP_fast_assign( | 288 | TP_fast_assign( |
@@ -288,6 +291,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
288 | __entry->fallback_order = fallback_order; | 291 | __entry->fallback_order = fallback_order; |
289 | __entry->alloc_migratetype = alloc_migratetype; | 292 | __entry->alloc_migratetype = alloc_migratetype; |
290 | __entry->fallback_migratetype = fallback_migratetype; | 293 | __entry->fallback_migratetype = fallback_migratetype; |
294 | __entry->change_ownership = change_ownership; | ||
291 | ), | 295 | ), |
292 | 296 | ||
293 | TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", | 297 | TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", |
@@ -299,7 +303,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, | |||
299 | __entry->alloc_migratetype, | 303 | __entry->alloc_migratetype, |
300 | __entry->fallback_migratetype, | 304 | __entry->fallback_migratetype, |
301 | __entry->fallback_order < pageblock_order, | 305 | __entry->fallback_order < pageblock_order, |
302 | __entry->alloc_migratetype == __entry->fallback_migratetype) | 306 | __entry->change_ownership) |
303 | ); | 307 | ); |
304 | 308 | ||
305 | #endif /* _TRACE_KMEM_H */ | 309 | #endif /* _TRACE_KMEM_H */ |
diff --git a/init/do_mounts.c b/init/do_mounts.c index 816014c4627e..a51cddc2ff8c 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/async.h> | 26 | #include <linux/async.h> |
27 | #include <linux/fs_struct.h> | 27 | #include <linux/fs_struct.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/ramfs.h> | ||
30 | #include <linux/shmem_fs.h> | ||
29 | 31 | ||
30 | #include <linux/nfs_fs.h> | 32 | #include <linux/nfs_fs.h> |
31 | #include <linux/nfs_fs_sb.h> | 33 | #include <linux/nfs_fs_sb.h> |
@@ -588,3 +590,46 @@ out: | |||
588 | sys_mount(".", "/", NULL, MS_MOVE, NULL); | 590 | sys_mount(".", "/", NULL, MS_MOVE, NULL); |
589 | sys_chroot("."); | 591 | sys_chroot("."); |
590 | } | 592 | } |
593 | |||
594 | static bool is_tmpfs; | ||
595 | static struct dentry *rootfs_mount(struct file_system_type *fs_type, | ||
596 | int flags, const char *dev_name, void *data) | ||
597 | { | ||
598 | static unsigned long once; | ||
599 | void *fill = ramfs_fill_super; | ||
600 | |||
601 | if (test_and_set_bit(0, &once)) | ||
602 | return ERR_PTR(-ENODEV); | ||
603 | |||
604 | if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs) | ||
605 | fill = shmem_fill_super; | ||
606 | |||
607 | return mount_nodev(fs_type, flags, data, fill); | ||
608 | } | ||
609 | |||
610 | static struct file_system_type rootfs_fs_type = { | ||
611 | .name = "rootfs", | ||
612 | .mount = rootfs_mount, | ||
613 | .kill_sb = kill_litter_super, | ||
614 | }; | ||
615 | |||
616 | int __init init_rootfs(void) | ||
617 | { | ||
618 | int err = register_filesystem(&rootfs_fs_type); | ||
619 | |||
620 | if (err) | ||
621 | return err; | ||
622 | |||
623 | if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] && | ||
624 | (!root_fs_names || strstr(root_fs_names, "tmpfs"))) { | ||
625 | err = shmem_init(); | ||
626 | is_tmpfs = true; | ||
627 | } else { | ||
628 | err = init_ramfs_fs(); | ||
629 | } | ||
630 | |||
631 | if (err) | ||
632 | unregister_filesystem(&rootfs_fs_type); | ||
633 | |||
634 | return err; | ||
635 | } | ||
@@ -70,8 +70,6 @@ struct msg_sender { | |||
70 | 70 | ||
71 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) | 71 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) |
72 | 72 | ||
73 | #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) | ||
74 | |||
75 | static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); | 73 | static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); |
76 | static int newque(struct ipc_namespace *, struct ipc_params *); | 74 | static int newque(struct ipc_namespace *, struct ipc_params *); |
77 | #ifdef CONFIG_PROC_FS | 75 | #ifdef CONFIG_PROC_FS |
@@ -172,7 +170,7 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) | |||
172 | * @ns: namespace | 170 | * @ns: namespace |
173 | * @params: ptr to the structure that contains the key and msgflg | 171 | * @params: ptr to the structure that contains the key and msgflg |
174 | * | 172 | * |
175 | * Called with msg_ids.rw_mutex held (writer) | 173 | * Called with msg_ids.rwsem held (writer) |
176 | */ | 174 | */ |
177 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) | 175 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
178 | { | 176 | { |
@@ -259,8 +257,8 @@ static void expunge_all(struct msg_queue *msq, int res) | |||
259 | * removes the message queue from message queue ID IDR, and cleans up all the | 257 | * removes the message queue from message queue ID IDR, and cleans up all the |
260 | * messages associated with this queue. | 258 | * messages associated with this queue. |
261 | * | 259 | * |
262 | * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held | 260 | * msg_ids.rwsem (writer) and the spinlock for this message queue are held |
263 | * before freeque() is called. msg_ids.rw_mutex remains locked on exit. | 261 | * before freeque() is called. msg_ids.rwsem remains locked on exit. |
264 | */ | 262 | */ |
265 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 263 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
266 | { | 264 | { |
@@ -270,7 +268,8 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
270 | expunge_all(msq, -EIDRM); | 268 | expunge_all(msq, -EIDRM); |
271 | ss_wakeup(&msq->q_senders, 1); | 269 | ss_wakeup(&msq->q_senders, 1); |
272 | msg_rmid(ns, msq); | 270 | msg_rmid(ns, msq); |
273 | msg_unlock(msq); | 271 | ipc_unlock_object(&msq->q_perm); |
272 | rcu_read_unlock(); | ||
274 | 273 | ||
275 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { | 274 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { |
276 | atomic_dec(&ns->msg_hdrs); | 275 | atomic_dec(&ns->msg_hdrs); |
@@ -282,7 +281,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
282 | } | 281 | } |
283 | 282 | ||
284 | /* | 283 | /* |
285 | * Called with msg_ids.rw_mutex and ipcp locked. | 284 | * Called with msg_ids.rwsem and ipcp locked. |
286 | */ | 285 | */ |
287 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) | 286 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) |
288 | { | 287 | { |
@@ -386,9 +385,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) | |||
386 | } | 385 | } |
387 | 386 | ||
388 | /* | 387 | /* |
389 | * This function handles some msgctl commands which require the rw_mutex | 388 | * This function handles some msgctl commands which require the rwsem |
390 | * to be held in write mode. | 389 | * to be held in write mode. |
391 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 390 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
392 | */ | 391 | */ |
393 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | 392 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
394 | struct msqid_ds __user *buf, int version) | 393 | struct msqid_ds __user *buf, int version) |
@@ -403,7 +402,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |||
403 | return -EFAULT; | 402 | return -EFAULT; |
404 | } | 403 | } |
405 | 404 | ||
406 | down_write(&msg_ids(ns).rw_mutex); | 405 | down_write(&msg_ids(ns).rwsem); |
407 | rcu_read_lock(); | 406 | rcu_read_lock(); |
408 | 407 | ||
409 | ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, | 408 | ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, |
@@ -459,7 +458,7 @@ out_unlock0: | |||
459 | out_unlock1: | 458 | out_unlock1: |
460 | rcu_read_unlock(); | 459 | rcu_read_unlock(); |
461 | out_up: | 460 | out_up: |
462 | up_write(&msg_ids(ns).rw_mutex); | 461 | up_write(&msg_ids(ns).rwsem); |
463 | return err; | 462 | return err; |
464 | } | 463 | } |
465 | 464 | ||
@@ -494,7 +493,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, | |||
494 | msginfo.msgmnb = ns->msg_ctlmnb; | 493 | msginfo.msgmnb = ns->msg_ctlmnb; |
495 | msginfo.msgssz = MSGSSZ; | 494 | msginfo.msgssz = MSGSSZ; |
496 | msginfo.msgseg = MSGSEG; | 495 | msginfo.msgseg = MSGSEG; |
497 | down_read(&msg_ids(ns).rw_mutex); | 496 | down_read(&msg_ids(ns).rwsem); |
498 | if (cmd == MSG_INFO) { | 497 | if (cmd == MSG_INFO) { |
499 | msginfo.msgpool = msg_ids(ns).in_use; | 498 | msginfo.msgpool = msg_ids(ns).in_use; |
500 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); | 499 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); |
@@ -505,7 +504,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, | |||
505 | msginfo.msgtql = MSGTQL; | 504 | msginfo.msgtql = MSGTQL; |
506 | } | 505 | } |
507 | max_id = ipc_get_maxid(&msg_ids(ns)); | 506 | max_id = ipc_get_maxid(&msg_ids(ns)); |
508 | up_read(&msg_ids(ns).rw_mutex); | 507 | up_read(&msg_ids(ns).rwsem); |
509 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) | 508 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) |
510 | return -EFAULT; | 509 | return -EFAULT; |
511 | return (max_id < 0) ? 0 : max_id; | 510 | return (max_id < 0) ? 0 : max_id; |
diff --git a/ipc/namespace.c b/ipc/namespace.c index 4be6581d3b7f..59451c1e214d 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c | |||
@@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
81 | int next_id; | 81 | int next_id; |
82 | int total, in_use; | 82 | int total, in_use; |
83 | 83 | ||
84 | down_write(&ids->rw_mutex); | 84 | down_write(&ids->rwsem); |
85 | 85 | ||
86 | in_use = ids->in_use; | 86 | in_use = ids->in_use; |
87 | 87 | ||
@@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
89 | perm = idr_find(&ids->ipcs_idr, next_id); | 89 | perm = idr_find(&ids->ipcs_idr, next_id); |
90 | if (perm == NULL) | 90 | if (perm == NULL) |
91 | continue; | 91 | continue; |
92 | ipc_lock_by_ptr(perm); | 92 | rcu_read_lock(); |
93 | ipc_lock_object(perm); | ||
93 | free(ns, perm); | 94 | free(ns, perm); |
94 | total++; | 95 | total++; |
95 | } | 96 | } |
96 | up_write(&ids->rw_mutex); | 97 | up_write(&ids->rwsem); |
97 | } | 98 | } |
98 | 99 | ||
99 | static void free_ipc_ns(struct ipc_namespace *ns) | 100 | static void free_ipc_ns(struct ipc_namespace *ns) |
@@ -322,7 +322,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) | |||
322 | } | 322 | } |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * sem_lock_(check_) routines are called in the paths where the rw_mutex | 325 | * sem_lock_(check_) routines are called in the paths where the rwsem |
326 | * is not held. | 326 | * is not held. |
327 | * | 327 | * |
328 | * The caller holds the RCU read lock. | 328 | * The caller holds the RCU read lock. |
@@ -426,7 +426,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | |||
426 | * @ns: namespace | 426 | * @ns: namespace |
427 | * @params: ptr to the structure that contains key, semflg and nsems | 427 | * @params: ptr to the structure that contains key, semflg and nsems |
428 | * | 428 | * |
429 | * Called with sem_ids.rw_mutex held (as a writer) | 429 | * Called with sem_ids.rwsem held (as a writer) |
430 | */ | 430 | */ |
431 | 431 | ||
432 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) | 432 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
@@ -492,7 +492,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) | |||
492 | 492 | ||
493 | 493 | ||
494 | /* | 494 | /* |
495 | * Called with sem_ids.rw_mutex and ipcp locked. | 495 | * Called with sem_ids.rwsem and ipcp locked. |
496 | */ | 496 | */ |
497 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) | 497 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
498 | { | 498 | { |
@@ -503,7 +503,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) | |||
503 | } | 503 | } |
504 | 504 | ||
505 | /* | 505 | /* |
506 | * Called with sem_ids.rw_mutex and ipcp locked. | 506 | * Called with sem_ids.rwsem and ipcp locked. |
507 | */ | 507 | */ |
508 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, | 508 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, |
509 | struct ipc_params *params) | 509 | struct ipc_params *params) |
@@ -994,8 +994,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) | |||
994 | return semzcnt; | 994 | return semzcnt; |
995 | } | 995 | } |
996 | 996 | ||
997 | /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked | 997 | /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked |
998 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex | 998 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem |
999 | * remains locked on exit. | 999 | * remains locked on exit. |
1000 | */ | 1000 | */ |
1001 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 1001 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
@@ -1116,7 +1116,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1116 | seminfo.semmnu = SEMMNU; | 1116 | seminfo.semmnu = SEMMNU; |
1117 | seminfo.semmap = SEMMAP; | 1117 | seminfo.semmap = SEMMAP; |
1118 | seminfo.semume = SEMUME; | 1118 | seminfo.semume = SEMUME; |
1119 | down_read(&sem_ids(ns).rw_mutex); | 1119 | down_read(&sem_ids(ns).rwsem); |
1120 | if (cmd == SEM_INFO) { | 1120 | if (cmd == SEM_INFO) { |
1121 | seminfo.semusz = sem_ids(ns).in_use; | 1121 | seminfo.semusz = sem_ids(ns).in_use; |
1122 | seminfo.semaem = ns->used_sems; | 1122 | seminfo.semaem = ns->used_sems; |
@@ -1125,7 +1125,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1125 | seminfo.semaem = SEMAEM; | 1125 | seminfo.semaem = SEMAEM; |
1126 | } | 1126 | } |
1127 | max_id = ipc_get_maxid(&sem_ids(ns)); | 1127 | max_id = ipc_get_maxid(&sem_ids(ns)); |
1128 | up_read(&sem_ids(ns).rw_mutex); | 1128 | up_read(&sem_ids(ns).rwsem); |
1129 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) | 1129 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) |
1130 | return -EFAULT; | 1130 | return -EFAULT; |
1131 | return (max_id < 0) ? 0: max_id; | 1131 | return (max_id < 0) ? 0: max_id; |
@@ -1431,9 +1431,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) | |||
1431 | } | 1431 | } |
1432 | 1432 | ||
1433 | /* | 1433 | /* |
1434 | * This function handles some semctl commands which require the rw_mutex | 1434 | * This function handles some semctl commands which require the rwsem |
1435 | * to be held in write mode. | 1435 | * to be held in write mode. |
1436 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 1436 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
1437 | */ | 1437 | */ |
1438 | static int semctl_down(struct ipc_namespace *ns, int semid, | 1438 | static int semctl_down(struct ipc_namespace *ns, int semid, |
1439 | int cmd, int version, void __user *p) | 1439 | int cmd, int version, void __user *p) |
@@ -1448,7 +1448,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, | |||
1448 | return -EFAULT; | 1448 | return -EFAULT; |
1449 | } | 1449 | } |
1450 | 1450 | ||
1451 | down_write(&sem_ids(ns).rw_mutex); | 1451 | down_write(&sem_ids(ns).rwsem); |
1452 | rcu_read_lock(); | 1452 | rcu_read_lock(); |
1453 | 1453 | ||
1454 | ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, | 1454 | ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, |
@@ -1487,7 +1487,7 @@ out_unlock0: | |||
1487 | out_unlock1: | 1487 | out_unlock1: |
1488 | rcu_read_unlock(); | 1488 | rcu_read_unlock(); |
1489 | out_up: | 1489 | out_up: |
1490 | up_write(&sem_ids(ns).rw_mutex); | 1490 | up_write(&sem_ids(ns).rwsem); |
1491 | return err; | 1491 | return err; |
1492 | } | 1492 | } |
1493 | 1493 | ||
@@ -19,6 +19,9 @@ | |||
19 | * namespaces support | 19 | * namespaces support |
20 | * OpenVZ, SWsoft Inc. | 20 | * OpenVZ, SWsoft Inc. |
21 | * Pavel Emelianov <xemul@openvz.org> | 21 | * Pavel Emelianov <xemul@openvz.org> |
22 | * | ||
23 | * Better ipc lock (kern_ipc_perm.lock) handling | ||
24 | * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. | ||
22 | */ | 25 | */ |
23 | 26 | ||
24 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns) | |||
80 | } | 83 | } |
81 | 84 | ||
82 | /* | 85 | /* |
83 | * Called with shm_ids.rw_mutex (writer) and the shp structure locked. | 86 | * Called with shm_ids.rwsem (writer) and the shp structure locked. |
84 | * Only shm_ids.rw_mutex remains locked on exit. | 87 | * Only shm_ids.rwsem remains locked on exit. |
85 | */ | 88 | */ |
86 | static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 89 | static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
87 | { | 90 | { |
@@ -124,8 +127,28 @@ void __init shm_init (void) | |||
124 | IPC_SHM_IDS, sysvipc_shm_proc_show); | 127 | IPC_SHM_IDS, sysvipc_shm_proc_show); |
125 | } | 128 | } |
126 | 129 | ||
130 | static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) | ||
131 | { | ||
132 | struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); | ||
133 | |||
134 | if (IS_ERR(ipcp)) | ||
135 | return ERR_CAST(ipcp); | ||
136 | |||
137 | return container_of(ipcp, struct shmid_kernel, shm_perm); | ||
138 | } | ||
139 | |||
140 | static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) | ||
141 | { | ||
142 | struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); | ||
143 | |||
144 | if (IS_ERR(ipcp)) | ||
145 | return ERR_CAST(ipcp); | ||
146 | |||
147 | return container_of(ipcp, struct shmid_kernel, shm_perm); | ||
148 | } | ||
149 | |||
127 | /* | 150 | /* |
128 | * shm_lock_(check_) routines are called in the paths where the rw_mutex | 151 | * shm_lock_(check_) routines are called in the paths where the rwsem |
129 | * is not necessarily held. | 152 | * is not necessarily held. |
130 | */ | 153 | */ |
131 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) | 154 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) |
@@ -144,17 +167,6 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) | |||
144 | ipc_lock_object(&ipcp->shm_perm); | 167 | ipc_lock_object(&ipcp->shm_perm); |
145 | } | 168 | } |
146 | 169 | ||
147 | static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, | ||
148 | int id) | ||
149 | { | ||
150 | struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); | ||
151 | |||
152 | if (IS_ERR(ipcp)) | ||
153 | return (struct shmid_kernel *)ipcp; | ||
154 | |||
155 | return container_of(ipcp, struct shmid_kernel, shm_perm); | ||
156 | } | ||
157 | |||
158 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) | 170 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) |
159 | { | 171 | { |
160 | ipc_rmid(&shm_ids(ns), &s->shm_perm); | 172 | ipc_rmid(&shm_ids(ns), &s->shm_perm); |
@@ -182,7 +194,7 @@ static void shm_open(struct vm_area_struct *vma) | |||
182 | * @ns: namespace | 194 | * @ns: namespace |
183 | * @shp: struct to free | 195 | * @shp: struct to free |
184 | * | 196 | * |
185 | * It has to be called with shp and shm_ids.rw_mutex (writer) locked, | 197 | * It has to be called with shp and shm_ids.rwsem (writer) locked, |
186 | * but returns with shp unlocked and freed. | 198 | * but returns with shp unlocked and freed. |
187 | */ | 199 | */ |
188 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | 200 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) |
@@ -230,7 +242,7 @@ static void shm_close(struct vm_area_struct *vma) | |||
230 | struct shmid_kernel *shp; | 242 | struct shmid_kernel *shp; |
231 | struct ipc_namespace *ns = sfd->ns; | 243 | struct ipc_namespace *ns = sfd->ns; |
232 | 244 | ||
233 | down_write(&shm_ids(ns).rw_mutex); | 245 | down_write(&shm_ids(ns).rwsem); |
234 | /* remove from the list of attaches of the shm segment */ | 246 | /* remove from the list of attaches of the shm segment */ |
235 | shp = shm_lock(ns, sfd->id); | 247 | shp = shm_lock(ns, sfd->id); |
236 | BUG_ON(IS_ERR(shp)); | 248 | BUG_ON(IS_ERR(shp)); |
@@ -241,10 +253,10 @@ static void shm_close(struct vm_area_struct *vma) | |||
241 | shm_destroy(ns, shp); | 253 | shm_destroy(ns, shp); |
242 | else | 254 | else |
243 | shm_unlock(shp); | 255 | shm_unlock(shp); |
244 | up_write(&shm_ids(ns).rw_mutex); | 256 | up_write(&shm_ids(ns).rwsem); |
245 | } | 257 | } |
246 | 258 | ||
247 | /* Called with ns->shm_ids(ns).rw_mutex locked */ | 259 | /* Called with ns->shm_ids(ns).rwsem locked */ |
248 | static int shm_try_destroy_current(int id, void *p, void *data) | 260 | static int shm_try_destroy_current(int id, void *p, void *data) |
249 | { | 261 | { |
250 | struct ipc_namespace *ns = data; | 262 | struct ipc_namespace *ns = data; |
@@ -275,7 +287,7 @@ static int shm_try_destroy_current(int id, void *p, void *data) | |||
275 | return 0; | 287 | return 0; |
276 | } | 288 | } |
277 | 289 | ||
278 | /* Called with ns->shm_ids(ns).rw_mutex locked */ | 290 | /* Called with ns->shm_ids(ns).rwsem locked */ |
279 | static int shm_try_destroy_orphaned(int id, void *p, void *data) | 291 | static int shm_try_destroy_orphaned(int id, void *p, void *data) |
280 | { | 292 | { |
281 | struct ipc_namespace *ns = data; | 293 | struct ipc_namespace *ns = data; |
@@ -286,7 +298,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) | |||
286 | * We want to destroy segments without users and with already | 298 | * We want to destroy segments without users and with already |
287 | * exit'ed originating process. | 299 | * exit'ed originating process. |
288 | * | 300 | * |
289 | * As shp->* are changed under rw_mutex, it's safe to skip shp locking. | 301 | * As shp->* are changed under rwsem, it's safe to skip shp locking. |
290 | */ | 302 | */ |
291 | if (shp->shm_creator != NULL) | 303 | if (shp->shm_creator != NULL) |
292 | return 0; | 304 | return 0; |
@@ -300,10 +312,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) | |||
300 | 312 | ||
301 | void shm_destroy_orphaned(struct ipc_namespace *ns) | 313 | void shm_destroy_orphaned(struct ipc_namespace *ns) |
302 | { | 314 | { |
303 | down_write(&shm_ids(ns).rw_mutex); | 315 | down_write(&shm_ids(ns).rwsem); |
304 | if (shm_ids(ns).in_use) | 316 | if (shm_ids(ns).in_use) |
305 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); | 317 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); |
306 | up_write(&shm_ids(ns).rw_mutex); | 318 | up_write(&shm_ids(ns).rwsem); |
307 | } | 319 | } |
308 | 320 | ||
309 | 321 | ||
@@ -315,10 +327,10 @@ void exit_shm(struct task_struct *task) | |||
315 | return; | 327 | return; |
316 | 328 | ||
317 | /* Destroy all already created segments, but not mapped yet */ | 329 | /* Destroy all already created segments, but not mapped yet */ |
318 | down_write(&shm_ids(ns).rw_mutex); | 330 | down_write(&shm_ids(ns).rwsem); |
319 | if (shm_ids(ns).in_use) | 331 | if (shm_ids(ns).in_use) |
320 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); | 332 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); |
321 | up_write(&shm_ids(ns).rw_mutex); | 333 | up_write(&shm_ids(ns).rwsem); |
322 | } | 334 | } |
323 | 335 | ||
324 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 336 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
@@ -452,7 +464,7 @@ static const struct vm_operations_struct shm_vm_ops = { | |||
452 | * @ns: namespace | 464 | * @ns: namespace |
453 | * @params: ptr to the structure that contains key, size and shmflg | 465 | * @params: ptr to the structure that contains key, size and shmflg |
454 | * | 466 | * |
455 | * Called with shm_ids.rw_mutex held as a writer. | 467 | * Called with shm_ids.rwsem held as a writer. |
456 | */ | 468 | */ |
457 | 469 | ||
458 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) | 470 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
@@ -560,7 +572,7 @@ no_file: | |||
560 | } | 572 | } |
561 | 573 | ||
562 | /* | 574 | /* |
563 | * Called with shm_ids.rw_mutex and ipcp locked. | 575 | * Called with shm_ids.rwsem and ipcp locked. |
564 | */ | 576 | */ |
565 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) | 577 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) |
566 | { | 578 | { |
@@ -571,7 +583,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) | |||
571 | } | 583 | } |
572 | 584 | ||
573 | /* | 585 | /* |
574 | * Called with shm_ids.rw_mutex and ipcp locked. | 586 | * Called with shm_ids.rwsem and ipcp locked. |
575 | */ | 587 | */ |
576 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, | 588 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, |
577 | struct ipc_params *params) | 589 | struct ipc_params *params) |
@@ -684,7 +696,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf | |||
684 | 696 | ||
685 | /* | 697 | /* |
686 | * Calculate and add used RSS and swap pages of a shm. | 698 | * Calculate and add used RSS and swap pages of a shm. |
687 | * Called with shm_ids.rw_mutex held as a reader | 699 | * Called with shm_ids.rwsem held as a reader |
688 | */ | 700 | */ |
689 | static void shm_add_rss_swap(struct shmid_kernel *shp, | 701 | static void shm_add_rss_swap(struct shmid_kernel *shp, |
690 | unsigned long *rss_add, unsigned long *swp_add) | 702 | unsigned long *rss_add, unsigned long *swp_add) |
@@ -711,7 +723,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, | |||
711 | } | 723 | } |
712 | 724 | ||
713 | /* | 725 | /* |
714 | * Called with shm_ids.rw_mutex held as a reader | 726 | * Called with shm_ids.rwsem held as a reader |
715 | */ | 727 | */ |
716 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | 728 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, |
717 | unsigned long *swp) | 729 | unsigned long *swp) |
@@ -740,9 +752,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | |||
740 | } | 752 | } |
741 | 753 | ||
742 | /* | 754 | /* |
743 | * This function handles some shmctl commands which require the rw_mutex | 755 | * This function handles some shmctl commands which require the rwsem |
744 | * to be held in write mode. | 756 | * to be held in write mode. |
745 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 757 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
746 | */ | 758 | */ |
747 | static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | 759 | static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, |
748 | struct shmid_ds __user *buf, int version) | 760 | struct shmid_ds __user *buf, int version) |
@@ -757,14 +769,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
757 | return -EFAULT; | 769 | return -EFAULT; |
758 | } | 770 | } |
759 | 771 | ||
760 | down_write(&shm_ids(ns).rw_mutex); | 772 | down_write(&shm_ids(ns).rwsem); |
761 | rcu_read_lock(); | 773 | rcu_read_lock(); |
762 | 774 | ||
763 | ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, | 775 | ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, |
764 | &shmid64.shm_perm, 0); | 776 | &shmid64.shm_perm, 0); |
765 | if (IS_ERR(ipcp)) { | 777 | if (IS_ERR(ipcp)) { |
766 | err = PTR_ERR(ipcp); | 778 | err = PTR_ERR(ipcp); |
767 | /* the ipc lock is not held upon failure */ | ||
768 | goto out_unlock1; | 779 | goto out_unlock1; |
769 | } | 780 | } |
770 | 781 | ||
@@ -772,14 +783,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
772 | 783 | ||
773 | err = security_shm_shmctl(shp, cmd); | 784 | err = security_shm_shmctl(shp, cmd); |
774 | if (err) | 785 | if (err) |
775 | goto out_unlock0; | 786 | goto out_unlock1; |
776 | 787 | ||
777 | switch (cmd) { | 788 | switch (cmd) { |
778 | case IPC_RMID: | 789 | case IPC_RMID: |
790 | ipc_lock_object(&shp->shm_perm); | ||
779 | /* do_shm_rmid unlocks the ipc object and rcu */ | 791 | /* do_shm_rmid unlocks the ipc object and rcu */ |
780 | do_shm_rmid(ns, ipcp); | 792 | do_shm_rmid(ns, ipcp); |
781 | goto out_up; | 793 | goto out_up; |
782 | case IPC_SET: | 794 | case IPC_SET: |
795 | ipc_lock_object(&shp->shm_perm); | ||
783 | err = ipc_update_perm(&shmid64.shm_perm, ipcp); | 796 | err = ipc_update_perm(&shmid64.shm_perm, ipcp); |
784 | if (err) | 797 | if (err) |
785 | goto out_unlock0; | 798 | goto out_unlock0; |
@@ -787,6 +800,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
787 | break; | 800 | break; |
788 | default: | 801 | default: |
789 | err = -EINVAL; | 802 | err = -EINVAL; |
803 | goto out_unlock1; | ||
790 | } | 804 | } |
791 | 805 | ||
792 | out_unlock0: | 806 | out_unlock0: |
@@ -794,33 +808,28 @@ out_unlock0: | |||
794 | out_unlock1: | 808 | out_unlock1: |
795 | rcu_read_unlock(); | 809 | rcu_read_unlock(); |
796 | out_up: | 810 | out_up: |
797 | up_write(&shm_ids(ns).rw_mutex); | 811 | up_write(&shm_ids(ns).rwsem); |
798 | return err; | 812 | return err; |
799 | } | 813 | } |
800 | 814 | ||
801 | SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | 815 | static int shmctl_nolock(struct ipc_namespace *ns, int shmid, |
816 | int cmd, int version, void __user *buf) | ||
802 | { | 817 | { |
818 | int err; | ||
803 | struct shmid_kernel *shp; | 819 | struct shmid_kernel *shp; |
804 | int err, version; | ||
805 | struct ipc_namespace *ns; | ||
806 | 820 | ||
807 | if (cmd < 0 || shmid < 0) { | 821 | /* preliminary security checks for *_INFO */ |
808 | err = -EINVAL; | 822 | if (cmd == IPC_INFO || cmd == SHM_INFO) { |
809 | goto out; | 823 | err = security_shm_shmctl(NULL, cmd); |
824 | if (err) | ||
825 | return err; | ||
810 | } | 826 | } |
811 | 827 | ||
812 | version = ipc_parse_version(&cmd); | 828 | switch (cmd) { |
813 | ns = current->nsproxy->ipc_ns; | ||
814 | |||
815 | switch (cmd) { /* replace with proc interface ? */ | ||
816 | case IPC_INFO: | 829 | case IPC_INFO: |
817 | { | 830 | { |
818 | struct shminfo64 shminfo; | 831 | struct shminfo64 shminfo; |
819 | 832 | ||
820 | err = security_shm_shmctl(NULL, cmd); | ||
821 | if (err) | ||
822 | return err; | ||
823 | |||
824 | memset(&shminfo, 0, sizeof(shminfo)); | 833 | memset(&shminfo, 0, sizeof(shminfo)); |
825 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; | 834 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; |
826 | shminfo.shmmax = ns->shm_ctlmax; | 835 | shminfo.shmmax = ns->shm_ctlmax; |
@@ -830,9 +839,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
830 | if(copy_shminfo_to_user (buf, &shminfo, version)) | 839 | if(copy_shminfo_to_user (buf, &shminfo, version)) |
831 | return -EFAULT; | 840 | return -EFAULT; |
832 | 841 | ||
833 | down_read(&shm_ids(ns).rw_mutex); | 842 | down_read(&shm_ids(ns).rwsem); |
834 | err = ipc_get_maxid(&shm_ids(ns)); | 843 | err = ipc_get_maxid(&shm_ids(ns)); |
835 | up_read(&shm_ids(ns).rw_mutex); | 844 | up_read(&shm_ids(ns).rwsem); |
836 | 845 | ||
837 | if(err<0) | 846 | if(err<0) |
838 | err = 0; | 847 | err = 0; |
@@ -842,19 +851,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
842 | { | 851 | { |
843 | struct shm_info shm_info; | 852 | struct shm_info shm_info; |
844 | 853 | ||
845 | err = security_shm_shmctl(NULL, cmd); | ||
846 | if (err) | ||
847 | return err; | ||
848 | |||
849 | memset(&shm_info, 0, sizeof(shm_info)); | 854 | memset(&shm_info, 0, sizeof(shm_info)); |
850 | down_read(&shm_ids(ns).rw_mutex); | 855 | down_read(&shm_ids(ns).rwsem); |
851 | shm_info.used_ids = shm_ids(ns).in_use; | 856 | shm_info.used_ids = shm_ids(ns).in_use; |
852 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); | 857 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); |
853 | shm_info.shm_tot = ns->shm_tot; | 858 | shm_info.shm_tot = ns->shm_tot; |
854 | shm_info.swap_attempts = 0; | 859 | shm_info.swap_attempts = 0; |
855 | shm_info.swap_successes = 0; | 860 | shm_info.swap_successes = 0; |
856 | err = ipc_get_maxid(&shm_ids(ns)); | 861 | err = ipc_get_maxid(&shm_ids(ns)); |
857 | up_read(&shm_ids(ns).rw_mutex); | 862 | up_read(&shm_ids(ns).rwsem); |
858 | if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { | 863 | if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { |
859 | err = -EFAULT; | 864 | err = -EFAULT; |
860 | goto out; | 865 | goto out; |
@@ -869,27 +874,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
869 | struct shmid64_ds tbuf; | 874 | struct shmid64_ds tbuf; |
870 | int result; | 875 | int result; |
871 | 876 | ||
877 | rcu_read_lock(); | ||
872 | if (cmd == SHM_STAT) { | 878 | if (cmd == SHM_STAT) { |
873 | shp = shm_lock(ns, shmid); | 879 | shp = shm_obtain_object(ns, shmid); |
874 | if (IS_ERR(shp)) { | 880 | if (IS_ERR(shp)) { |
875 | err = PTR_ERR(shp); | 881 | err = PTR_ERR(shp); |
876 | goto out; | 882 | goto out_unlock; |
877 | } | 883 | } |
878 | result = shp->shm_perm.id; | 884 | result = shp->shm_perm.id; |
879 | } else { | 885 | } else { |
880 | shp = shm_lock_check(ns, shmid); | 886 | shp = shm_obtain_object_check(ns, shmid); |
881 | if (IS_ERR(shp)) { | 887 | if (IS_ERR(shp)) { |
882 | err = PTR_ERR(shp); | 888 | err = PTR_ERR(shp); |
883 | goto out; | 889 | goto out_unlock; |
884 | } | 890 | } |
885 | result = 0; | 891 | result = 0; |
886 | } | 892 | } |
893 | |||
887 | err = -EACCES; | 894 | err = -EACCES; |
888 | if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) | 895 | if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) |
889 | goto out_unlock; | 896 | goto out_unlock; |
897 | |||
890 | err = security_shm_shmctl(shp, cmd); | 898 | err = security_shm_shmctl(shp, cmd); |
891 | if (err) | 899 | if (err) |
892 | goto out_unlock; | 900 | goto out_unlock; |
901 | |||
893 | memset(&tbuf, 0, sizeof(tbuf)); | 902 | memset(&tbuf, 0, sizeof(tbuf)); |
894 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); | 903 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); |
895 | tbuf.shm_segsz = shp->shm_segsz; | 904 | tbuf.shm_segsz = shp->shm_segsz; |
@@ -899,43 +908,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
899 | tbuf.shm_cpid = shp->shm_cprid; | 908 | tbuf.shm_cpid = shp->shm_cprid; |
900 | tbuf.shm_lpid = shp->shm_lprid; | 909 | tbuf.shm_lpid = shp->shm_lprid; |
901 | tbuf.shm_nattch = shp->shm_nattch; | 910 | tbuf.shm_nattch = shp->shm_nattch; |
902 | shm_unlock(shp); | 911 | rcu_read_unlock(); |
903 | if(copy_shmid_to_user (buf, &tbuf, version)) | 912 | |
913 | if (copy_shmid_to_user(buf, &tbuf, version)) | ||
904 | err = -EFAULT; | 914 | err = -EFAULT; |
905 | else | 915 | else |
906 | err = result; | 916 | err = result; |
907 | goto out; | 917 | goto out; |
908 | } | 918 | } |
919 | default: | ||
920 | return -EINVAL; | ||
921 | } | ||
922 | |||
923 | out_unlock: | ||
924 | rcu_read_unlock(); | ||
925 | out: | ||
926 | return err; | ||
927 | } | ||
928 | |||
929 | SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | ||
930 | { | ||
931 | struct shmid_kernel *shp; | ||
932 | int err, version; | ||
933 | struct ipc_namespace *ns; | ||
934 | |||
935 | if (cmd < 0 || shmid < 0) | ||
936 | return -EINVAL; | ||
937 | |||
938 | version = ipc_parse_version(&cmd); | ||
939 | ns = current->nsproxy->ipc_ns; | ||
940 | |||
941 | switch (cmd) { | ||
942 | case IPC_INFO: | ||
943 | case SHM_INFO: | ||
944 | case SHM_STAT: | ||
945 | case IPC_STAT: | ||
946 | return shmctl_nolock(ns, shmid, cmd, version, buf); | ||
947 | case IPC_RMID: | ||
948 | case IPC_SET: | ||
949 | return shmctl_down(ns, shmid, cmd, buf, version); | ||
909 | case SHM_LOCK: | 950 | case SHM_LOCK: |
910 | case SHM_UNLOCK: | 951 | case SHM_UNLOCK: |
911 | { | 952 | { |
912 | struct file *shm_file; | 953 | struct file *shm_file; |
913 | 954 | ||
914 | shp = shm_lock_check(ns, shmid); | 955 | rcu_read_lock(); |
956 | shp = shm_obtain_object_check(ns, shmid); | ||
915 | if (IS_ERR(shp)) { | 957 | if (IS_ERR(shp)) { |
916 | err = PTR_ERR(shp); | 958 | err = PTR_ERR(shp); |
917 | goto out; | 959 | goto out_unlock1; |
918 | } | 960 | } |
919 | 961 | ||
920 | audit_ipc_obj(&(shp->shm_perm)); | 962 | audit_ipc_obj(&(shp->shm_perm)); |
963 | err = security_shm_shmctl(shp, cmd); | ||
964 | if (err) | ||
965 | goto out_unlock1; | ||
921 | 966 | ||
967 | ipc_lock_object(&shp->shm_perm); | ||
922 | if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { | 968 | if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { |
923 | kuid_t euid = current_euid(); | 969 | kuid_t euid = current_euid(); |
924 | err = -EPERM; | 970 | err = -EPERM; |
925 | if (!uid_eq(euid, shp->shm_perm.uid) && | 971 | if (!uid_eq(euid, shp->shm_perm.uid) && |
926 | !uid_eq(euid, shp->shm_perm.cuid)) | 972 | !uid_eq(euid, shp->shm_perm.cuid)) |
927 | goto out_unlock; | 973 | goto out_unlock0; |
928 | if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) | 974 | if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) |
929 | goto out_unlock; | 975 | goto out_unlock0; |
930 | } | 976 | } |
931 | 977 | ||
932 | err = security_shm_shmctl(shp, cmd); | ||
933 | if (err) | ||
934 | goto out_unlock; | ||
935 | |||
936 | shm_file = shp->shm_file; | 978 | shm_file = shp->shm_file; |
937 | if (is_file_hugepages(shm_file)) | 979 | if (is_file_hugepages(shm_file)) |
938 | goto out_unlock; | 980 | goto out_unlock0; |
939 | 981 | ||
940 | if (cmd == SHM_LOCK) { | 982 | if (cmd == SHM_LOCK) { |
941 | struct user_struct *user = current_user(); | 983 | struct user_struct *user = current_user(); |
@@ -944,32 +986,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
944 | shp->shm_perm.mode |= SHM_LOCKED; | 986 | shp->shm_perm.mode |= SHM_LOCKED; |
945 | shp->mlock_user = user; | 987 | shp->mlock_user = user; |
946 | } | 988 | } |
947 | goto out_unlock; | 989 | goto out_unlock0; |
948 | } | 990 | } |
949 | 991 | ||
950 | /* SHM_UNLOCK */ | 992 | /* SHM_UNLOCK */ |
951 | if (!(shp->shm_perm.mode & SHM_LOCKED)) | 993 | if (!(shp->shm_perm.mode & SHM_LOCKED)) |
952 | goto out_unlock; | 994 | goto out_unlock0; |
953 | shmem_lock(shm_file, 0, shp->mlock_user); | 995 | shmem_lock(shm_file, 0, shp->mlock_user); |
954 | shp->shm_perm.mode &= ~SHM_LOCKED; | 996 | shp->shm_perm.mode &= ~SHM_LOCKED; |
955 | shp->mlock_user = NULL; | 997 | shp->mlock_user = NULL; |
956 | get_file(shm_file); | 998 | get_file(shm_file); |
957 | shm_unlock(shp); | 999 | ipc_unlock_object(&shp->shm_perm); |
1000 | rcu_read_unlock(); | ||
958 | shmem_unlock_mapping(shm_file->f_mapping); | 1001 | shmem_unlock_mapping(shm_file->f_mapping); |
1002 | |||
959 | fput(shm_file); | 1003 | fput(shm_file); |
960 | goto out; | ||
961 | } | ||
962 | case IPC_RMID: | ||
963 | case IPC_SET: | ||
964 | err = shmctl_down(ns, shmid, cmd, buf, version); | ||
965 | return err; | 1004 | return err; |
1005 | } | ||
966 | default: | 1006 | default: |
967 | return -EINVAL; | 1007 | return -EINVAL; |
968 | } | 1008 | } |
969 | 1009 | ||
970 | out_unlock: | 1010 | out_unlock0: |
971 | shm_unlock(shp); | 1011 | ipc_unlock_object(&shp->shm_perm); |
972 | out: | 1012 | out_unlock1: |
1013 | rcu_read_unlock(); | ||
973 | return err; | 1014 | return err; |
974 | } | 1015 | } |
975 | 1016 | ||
@@ -1037,10 +1078,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1037 | * additional creator id... | 1078 | * additional creator id... |
1038 | */ | 1079 | */ |
1039 | ns = current->nsproxy->ipc_ns; | 1080 | ns = current->nsproxy->ipc_ns; |
1040 | shp = shm_lock_check(ns, shmid); | 1081 | rcu_read_lock(); |
1082 | shp = shm_obtain_object_check(ns, shmid); | ||
1041 | if (IS_ERR(shp)) { | 1083 | if (IS_ERR(shp)) { |
1042 | err = PTR_ERR(shp); | 1084 | err = PTR_ERR(shp); |
1043 | goto out; | 1085 | goto out_unlock; |
1044 | } | 1086 | } |
1045 | 1087 | ||
1046 | err = -EACCES; | 1088 | err = -EACCES; |
@@ -1051,24 +1093,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1051 | if (err) | 1093 | if (err) |
1052 | goto out_unlock; | 1094 | goto out_unlock; |
1053 | 1095 | ||
1096 | ipc_lock_object(&shp->shm_perm); | ||
1054 | path = shp->shm_file->f_path; | 1097 | path = shp->shm_file->f_path; |
1055 | path_get(&path); | 1098 | path_get(&path); |
1056 | shp->shm_nattch++; | 1099 | shp->shm_nattch++; |
1057 | size = i_size_read(path.dentry->d_inode); | 1100 | size = i_size_read(path.dentry->d_inode); |
1058 | shm_unlock(shp); | 1101 | ipc_unlock_object(&shp->shm_perm); |
1102 | rcu_read_unlock(); | ||
1059 | 1103 | ||
1060 | err = -ENOMEM; | 1104 | err = -ENOMEM; |
1061 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); | 1105 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); |
1062 | if (!sfd) | 1106 | if (!sfd) { |
1063 | goto out_put_dentry; | 1107 | path_put(&path); |
1108 | goto out_nattch; | ||
1109 | } | ||
1064 | 1110 | ||
1065 | file = alloc_file(&path, f_mode, | 1111 | file = alloc_file(&path, f_mode, |
1066 | is_file_hugepages(shp->shm_file) ? | 1112 | is_file_hugepages(shp->shm_file) ? |
1067 | &shm_file_operations_huge : | 1113 | &shm_file_operations_huge : |
1068 | &shm_file_operations); | 1114 | &shm_file_operations); |
1069 | err = PTR_ERR(file); | 1115 | err = PTR_ERR(file); |
1070 | if (IS_ERR(file)) | 1116 | if (IS_ERR(file)) { |
1071 | goto out_free; | 1117 | kfree(sfd); |
1118 | path_put(&path); | ||
1119 | goto out_nattch; | ||
1120 | } | ||
1072 | 1121 | ||
1073 | file->private_data = sfd; | 1122 | file->private_data = sfd; |
1074 | file->f_mapping = shp->shm_file->f_mapping; | 1123 | file->f_mapping = shp->shm_file->f_mapping; |
@@ -1094,7 +1143,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1094 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) | 1143 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) |
1095 | goto invalid; | 1144 | goto invalid; |
1096 | } | 1145 | } |
1097 | 1146 | ||
1098 | addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); | 1147 | addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); |
1099 | *raddr = addr; | 1148 | *raddr = addr; |
1100 | err = 0; | 1149 | err = 0; |
@@ -1109,7 +1158,7 @@ out_fput: | |||
1109 | fput(file); | 1158 | fput(file); |
1110 | 1159 | ||
1111 | out_nattch: | 1160 | out_nattch: |
1112 | down_write(&shm_ids(ns).rw_mutex); | 1161 | down_write(&shm_ids(ns).rwsem); |
1113 | shp = shm_lock(ns, shmid); | 1162 | shp = shm_lock(ns, shmid); |
1114 | BUG_ON(IS_ERR(shp)); | 1163 | BUG_ON(IS_ERR(shp)); |
1115 | shp->shm_nattch--; | 1164 | shp->shm_nattch--; |
@@ -1117,20 +1166,13 @@ out_nattch: | |||
1117 | shm_destroy(ns, shp); | 1166 | shm_destroy(ns, shp); |
1118 | else | 1167 | else |
1119 | shm_unlock(shp); | 1168 | shm_unlock(shp); |
1120 | up_write(&shm_ids(ns).rw_mutex); | 1169 | up_write(&shm_ids(ns).rwsem); |
1121 | |||
1122 | out: | ||
1123 | return err; | 1170 | return err; |
1124 | 1171 | ||
1125 | out_unlock: | 1172 | out_unlock: |
1126 | shm_unlock(shp); | 1173 | rcu_read_unlock(); |
1127 | goto out; | 1174 | out: |
1128 | 1175 | return err; | |
1129 | out_free: | ||
1130 | kfree(sfd); | ||
1131 | out_put_dentry: | ||
1132 | path_put(&path); | ||
1133 | goto out_nattch; | ||
1134 | } | 1176 | } |
1135 | 1177 | ||
1136 | SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) | 1178 | SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) |
@@ -1235,8 +1277,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) | |||
1235 | #else /* CONFIG_MMU */ | 1277 | #else /* CONFIG_MMU */ |
1236 | /* under NOMMU conditions, the exact address to be destroyed must be | 1278 | /* under NOMMU conditions, the exact address to be destroyed must be |
1237 | * given */ | 1279 | * given */ |
1238 | retval = -EINVAL; | 1280 | if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { |
1239 | if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { | ||
1240 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); | 1281 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
1241 | retval = 0; | 1282 | retval = 0; |
1242 | } | 1283 | } |
diff --git a/ipc/util.c b/ipc/util.c index 4704223bfad4..e829da9ed01f 100644 --- a/ipc/util.c +++ b/ipc/util.c | |||
@@ -15,6 +15,14 @@ | |||
15 | * Jun 2006 - namespaces ssupport | 15 | * Jun 2006 - namespaces ssupport |
16 | * OpenVZ, SWsoft Inc. | 16 | * OpenVZ, SWsoft Inc. |
17 | * Pavel Emelianov <xemul@openvz.org> | 17 | * Pavel Emelianov <xemul@openvz.org> |
18 | * | ||
19 | * General sysv ipc locking scheme: | ||
20 | * when doing ipc id lookups, take the ids->rwsem | ||
21 | * rcu_read_lock() | ||
22 | * obtain the ipc object (kern_ipc_perm) | ||
23 | * perform security, capabilities, auditing and permission checks, etc. | ||
24 | * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() | ||
25 | * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) | ||
18 | */ | 26 | */ |
19 | 27 | ||
20 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
@@ -119,7 +127,7 @@ __initcall(ipc_init); | |||
119 | 127 | ||
120 | void ipc_init_ids(struct ipc_ids *ids) | 128 | void ipc_init_ids(struct ipc_ids *ids) |
121 | { | 129 | { |
122 | init_rwsem(&ids->rw_mutex); | 130 | init_rwsem(&ids->rwsem); |
123 | 131 | ||
124 | ids->in_use = 0; | 132 | ids->in_use = 0; |
125 | ids->seq = 0; | 133 | ids->seq = 0; |
@@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, | |||
174 | * @ids: Identifier set | 182 | * @ids: Identifier set |
175 | * @key: The key to find | 183 | * @key: The key to find |
176 | * | 184 | * |
177 | * Requires ipc_ids.rw_mutex locked. | 185 | * Requires ipc_ids.rwsem locked. |
178 | * Returns the LOCKED pointer to the ipc structure if found or NULL | 186 | * Returns the LOCKED pointer to the ipc structure if found or NULL |
179 | * if not. | 187 | * if not. |
180 | * If key is found ipc points to the owning ipc structure | 188 | * If key is found ipc points to the owning ipc structure |
@@ -197,7 +205,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) | |||
197 | continue; | 205 | continue; |
198 | } | 206 | } |
199 | 207 | ||
200 | ipc_lock_by_ptr(ipc); | 208 | rcu_read_lock(); |
209 | ipc_lock_object(ipc); | ||
201 | return ipc; | 210 | return ipc; |
202 | } | 211 | } |
203 | 212 | ||
@@ -208,7 +217,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) | |||
208 | * ipc_get_maxid - get the last assigned id | 217 | * ipc_get_maxid - get the last assigned id |
209 | * @ids: IPC identifier set | 218 | * @ids: IPC identifier set |
210 | * | 219 | * |
211 | * Called with ipc_ids.rw_mutex held. | 220 | * Called with ipc_ids.rwsem held. |
212 | */ | 221 | */ |
213 | 222 | ||
214 | int ipc_get_maxid(struct ipc_ids *ids) | 223 | int ipc_get_maxid(struct ipc_ids *ids) |
@@ -246,7 +255,7 @@ int ipc_get_maxid(struct ipc_ids *ids) | |||
246 | * is returned. The 'new' entry is returned in a locked state on success. | 255 | * is returned. The 'new' entry is returned in a locked state on success. |
247 | * On failure the entry is not locked and a negative err-code is returned. | 256 | * On failure the entry is not locked and a negative err-code is returned. |
248 | * | 257 | * |
249 | * Called with writer ipc_ids.rw_mutex held. | 258 | * Called with writer ipc_ids.rwsem held. |
250 | */ | 259 | */ |
251 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) | 260 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) |
252 | { | 261 | { |
@@ -312,9 +321,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
312 | { | 321 | { |
313 | int err; | 322 | int err; |
314 | 323 | ||
315 | down_write(&ids->rw_mutex); | 324 | down_write(&ids->rwsem); |
316 | err = ops->getnew(ns, params); | 325 | err = ops->getnew(ns, params); |
317 | up_write(&ids->rw_mutex); | 326 | up_write(&ids->rwsem); |
318 | return err; | 327 | return err; |
319 | } | 328 | } |
320 | 329 | ||
@@ -331,7 +340,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
331 | * | 340 | * |
332 | * On success, the IPC id is returned. | 341 | * On success, the IPC id is returned. |
333 | * | 342 | * |
334 | * It is called with ipc_ids.rw_mutex and ipcp->lock held. | 343 | * It is called with ipc_ids.rwsem and ipcp->lock held. |
335 | */ | 344 | */ |
336 | static int ipc_check_perms(struct ipc_namespace *ns, | 345 | static int ipc_check_perms(struct ipc_namespace *ns, |
337 | struct kern_ipc_perm *ipcp, | 346 | struct kern_ipc_perm *ipcp, |
@@ -376,7 +385,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
376 | * Take the lock as a writer since we are potentially going to add | 385 | * Take the lock as a writer since we are potentially going to add |
377 | * a new entry + read locks are not "upgradable" | 386 | * a new entry + read locks are not "upgradable" |
378 | */ | 387 | */ |
379 | down_write(&ids->rw_mutex); | 388 | down_write(&ids->rwsem); |
380 | ipcp = ipc_findkey(ids, params->key); | 389 | ipcp = ipc_findkey(ids, params->key); |
381 | if (ipcp == NULL) { | 390 | if (ipcp == NULL) { |
382 | /* key not used */ | 391 | /* key not used */ |
@@ -402,7 +411,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
402 | } | 411 | } |
403 | ipc_unlock(ipcp); | 412 | ipc_unlock(ipcp); |
404 | } | 413 | } |
405 | up_write(&ids->rw_mutex); | 414 | up_write(&ids->rwsem); |
406 | 415 | ||
407 | return err; | 416 | return err; |
408 | } | 417 | } |
@@ -413,7 +422,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
413 | * @ids: IPC identifier set | 422 | * @ids: IPC identifier set |
414 | * @ipcp: ipc perm structure containing the identifier to remove | 423 | * @ipcp: ipc perm structure containing the identifier to remove |
415 | * | 424 | * |
416 | * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held | 425 | * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held |
417 | * before this function is called, and remain locked on the exit. | 426 | * before this function is called, and remain locked on the exit. |
418 | */ | 427 | */ |
419 | 428 | ||
@@ -621,7 +630,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) | |||
621 | } | 630 | } |
622 | 631 | ||
623 | /** | 632 | /** |
624 | * ipc_lock - Lock an ipc structure without rw_mutex held | 633 | * ipc_lock - Lock an ipc structure without rwsem held |
625 | * @ids: IPC identifier set | 634 | * @ids: IPC identifier set |
626 | * @id: ipc id to look for | 635 | * @id: ipc id to look for |
627 | * | 636 | * |
@@ -677,22 +686,6 @@ out: | |||
677 | return out; | 686 | return out; |
678 | } | 687 | } |
679 | 688 | ||
680 | struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) | ||
681 | { | ||
682 | struct kern_ipc_perm *out; | ||
683 | |||
684 | out = ipc_lock(ids, id); | ||
685 | if (IS_ERR(out)) | ||
686 | return out; | ||
687 | |||
688 | if (ipc_checkid(out, id)) { | ||
689 | ipc_unlock(out); | ||
690 | return ERR_PTR(-EIDRM); | ||
691 | } | ||
692 | |||
693 | return out; | ||
694 | } | ||
695 | |||
696 | /** | 689 | /** |
697 | * ipcget - Common sys_*get() code | 690 | * ipcget - Common sys_*get() code |
698 | * @ns : namsepace | 691 | * @ns : namsepace |
@@ -733,7 +726,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) | |||
733 | } | 726 | } |
734 | 727 | ||
735 | /** | 728 | /** |
736 | * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd | 729 | * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd |
737 | * @ns: the ipc namespace | 730 | * @ns: the ipc namespace |
738 | * @ids: the table of ids where to look for the ipc | 731 | * @ids: the table of ids where to look for the ipc |
739 | * @id: the id of the ipc to retrieve | 732 | * @id: the id of the ipc to retrieve |
@@ -746,29 +739,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) | |||
746 | * It must be called without any lock held and | 739 | * It must be called without any lock held and |
747 | * - retrieves the ipc with the given id in the given table. | 740 | * - retrieves the ipc with the given id in the given table. |
748 | * - performs some audit and permission check, depending on the given cmd | 741 | * - performs some audit and permission check, depending on the given cmd |
749 | * - returns the ipc with the ipc lock held in case of success | 742 | * - returns a pointer to the ipc object or otherwise, the corresponding error. |
750 | * or an err-code without any lock held otherwise. | ||
751 | * | 743 | * |
752 | * Call holding the both the rw_mutex and the rcu read lock. | 744 | * Call holding the both the rwsem and the rcu read lock. |
753 | */ | 745 | */ |
754 | struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | ||
755 | struct ipc_ids *ids, int id, int cmd, | ||
756 | struct ipc64_perm *perm, int extra_perm) | ||
757 | { | ||
758 | struct kern_ipc_perm *ipcp; | ||
759 | |||
760 | ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); | ||
761 | if (IS_ERR(ipcp)) | ||
762 | goto out; | ||
763 | |||
764 | spin_lock(&ipcp->lock); | ||
765 | out: | ||
766 | return ipcp; | ||
767 | } | ||
768 | |||
769 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, | 746 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
770 | struct ipc_ids *ids, int id, int cmd, | 747 | struct ipc_ids *ids, int id, int cmd, |
771 | struct ipc64_perm *perm, int extra_perm) | 748 | struct ipc64_perm *perm, int extra_perm) |
772 | { | 749 | { |
773 | kuid_t euid; | 750 | kuid_t euid; |
774 | int err = -EPERM; | 751 | int err = -EPERM; |
@@ -846,7 +823,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, | |||
846 | ipc = idr_find(&ids->ipcs_idr, pos); | 823 | ipc = idr_find(&ids->ipcs_idr, pos); |
847 | if (ipc != NULL) { | 824 | if (ipc != NULL) { |
848 | *new_pos = pos + 1; | 825 | *new_pos = pos + 1; |
849 | ipc_lock_by_ptr(ipc); | 826 | rcu_read_lock(); |
827 | ipc_lock_object(ipc); | ||
850 | return ipc; | 828 | return ipc; |
851 | } | 829 | } |
852 | } | 830 | } |
@@ -884,7 +862,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) | |||
884 | * Take the lock - this will be released by the corresponding | 862 | * Take the lock - this will be released by the corresponding |
885 | * call to stop(). | 863 | * call to stop(). |
886 | */ | 864 | */ |
887 | down_read(&ids->rw_mutex); | 865 | down_read(&ids->rwsem); |
888 | 866 | ||
889 | /* pos < 0 is invalid */ | 867 | /* pos < 0 is invalid */ |
890 | if (*pos < 0) | 868 | if (*pos < 0) |
@@ -911,7 +889,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) | |||
911 | 889 | ||
912 | ids = &iter->ns->ids[iface->ids]; | 890 | ids = &iter->ns->ids[iface->ids]; |
913 | /* Release the lock we took in start() */ | 891 | /* Release the lock we took in start() */ |
914 | up_read(&ids->rw_mutex); | 892 | up_read(&ids->rwsem); |
915 | } | 893 | } |
916 | 894 | ||
917 | static int sysvipc_proc_show(struct seq_file *s, void *it) | 895 | static int sysvipc_proc_show(struct seq_file *s, void *it) |
diff --git a/ipc/util.h b/ipc/util.h index b6a6a88f3002..c5f3338ba1fa 100644 --- a/ipc/util.h +++ b/ipc/util.h | |||
@@ -94,10 +94,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header, | |||
94 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) | 94 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) |
95 | #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) | 95 | #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) |
96 | 96 | ||
97 | /* must be called with ids->rw_mutex acquired for writing */ | 97 | /* must be called with ids->rwsem acquired for writing */ |
98 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); | 98 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); |
99 | 99 | ||
100 | /* must be called with ids->rw_mutex acquired for reading */ | 100 | /* must be called with ids->rwsem acquired for reading */ |
101 | int ipc_get_maxid(struct ipc_ids *); | 101 | int ipc_get_maxid(struct ipc_ids *); |
102 | 102 | ||
103 | /* must be called with both locks acquired. */ | 103 | /* must be called with both locks acquired. */ |
@@ -131,9 +131,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); | |||
131 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, | 131 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
132 | struct ipc_ids *ids, int id, int cmd, | 132 | struct ipc_ids *ids, int id, int cmd, |
133 | struct ipc64_perm *perm, int extra_perm); | 133 | struct ipc64_perm *perm, int extra_perm); |
134 | struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | ||
135 | struct ipc_ids *ids, int id, int cmd, | ||
136 | struct ipc64_perm *perm, int extra_perm); | ||
137 | 134 | ||
138 | #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION | 135 | #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION |
139 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ | 136 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ |
@@ -174,19 +171,12 @@ static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) | |||
174 | assert_spin_locked(&perm->lock); | 171 | assert_spin_locked(&perm->lock); |
175 | } | 172 | } |
176 | 173 | ||
177 | static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) | ||
178 | { | ||
179 | rcu_read_lock(); | ||
180 | ipc_lock_object(perm); | ||
181 | } | ||
182 | |||
183 | static inline void ipc_unlock(struct kern_ipc_perm *perm) | 174 | static inline void ipc_unlock(struct kern_ipc_perm *perm) |
184 | { | 175 | { |
185 | ipc_unlock_object(perm); | 176 | ipc_unlock_object(perm); |
186 | rcu_read_unlock(); | 177 | rcu_read_unlock(); |
187 | } | 178 | } |
188 | 179 | ||
189 | struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); | ||
190 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); | 180 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); |
191 | int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, | 181 | int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, |
192 | struct ipc_ops *ops, struct ipc_params *params); | 182 | struct ipc_ops *ops, struct ipc_params *params); |
diff --git a/kernel/extable.c b/kernel/extable.c index 67460b93b1a1..832cb28105bb 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -41,7 +41,7 @@ u32 __initdata main_extable_sort_needed = 1; | |||
41 | /* Sort the kernel's built-in exception table */ | 41 | /* Sort the kernel's built-in exception table */ |
42 | void __init sort_main_extable(void) | 42 | void __init sort_main_extable(void) |
43 | { | 43 | { |
44 | if (main_extable_sort_needed) { | 44 | if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) { |
45 | pr_notice("Sorting __ex_table...\n"); | 45 | pr_notice("Sorting __ex_table...\n"); |
46 | sort_extable(__start___ex_table, __stop___ex_table); | 46 | sort_extable(__start___ex_table, __stop___ex_table); |
47 | } | 47 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index c9eaf2013002..81ccb4f010c2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
351 | struct rb_node **rb_link, *rb_parent; | 351 | struct rb_node **rb_link, *rb_parent; |
352 | int retval; | 352 | int retval; |
353 | unsigned long charge; | 353 | unsigned long charge; |
354 | struct mempolicy *pol; | ||
355 | 354 | ||
356 | uprobe_start_dup_mmap(); | 355 | uprobe_start_dup_mmap(); |
357 | down_write(&oldmm->mmap_sem); | 356 | down_write(&oldmm->mmap_sem); |
@@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
400 | goto fail_nomem; | 399 | goto fail_nomem; |
401 | *tmp = *mpnt; | 400 | *tmp = *mpnt; |
402 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | 401 | INIT_LIST_HEAD(&tmp->anon_vma_chain); |
403 | pol = mpol_dup(vma_policy(mpnt)); | 402 | retval = vma_dup_policy(mpnt, tmp); |
404 | retval = PTR_ERR(pol); | 403 | if (retval) |
405 | if (IS_ERR(pol)) | ||
406 | goto fail_nomem_policy; | 404 | goto fail_nomem_policy; |
407 | vma_set_policy(tmp, pol); | ||
408 | tmp->vm_mm = mm; | 405 | tmp->vm_mm = mm; |
409 | if (anon_vma_fork(tmp, mpnt)) | 406 | if (anon_vma_fork(tmp, mpnt)) |
410 | goto fail_nomem_anon_vma_fork; | 407 | goto fail_nomem_anon_vma_fork; |
@@ -472,7 +469,7 @@ out: | |||
472 | uprobe_end_dup_mmap(); | 469 | uprobe_end_dup_mmap(); |
473 | return retval; | 470 | return retval; |
474 | fail_nomem_anon_vma_fork: | 471 | fail_nomem_anon_vma_fork: |
475 | mpol_put(pol); | 472 | mpol_put(vma_policy(tmp)); |
476 | fail_nomem_policy: | 473 | fail_nomem_policy: |
477 | kmem_cache_free(vm_area_cachep, tmp); | 474 | kmem_cache_free(vm_area_cachep, tmp); |
478 | fail_nomem: | 475 | fail_nomem: |
@@ -1173,13 +1170,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1173 | return ERR_PTR(-EINVAL); | 1170 | return ERR_PTR(-EINVAL); |
1174 | 1171 | ||
1175 | /* | 1172 | /* |
1176 | * If the new process will be in a different pid namespace | 1173 | * If the new process will be in a different pid or user namespace |
1177 | * don't allow the creation of threads. | 1174 | * do not allow it to share a thread group or signal handlers or |
1175 | * parent with the forking task. | ||
1178 | */ | 1176 | */ |
1179 | if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && | 1177 | if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) { |
1180 | (task_active_pid_ns(current) != | 1178 | if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || |
1181 | current->nsproxy->pid_ns_for_children)) | 1179 | (task_active_pid_ns(current) != |
1182 | return ERR_PTR(-EINVAL); | 1180 | current->nsproxy->pid_ns_for_children)) |
1181 | return ERR_PTR(-EINVAL); | ||
1182 | } | ||
1183 | 1183 | ||
1184 | retval = security_task_create(clone_flags); | 1184 | retval = security_task_create(clone_flags); |
1185 | if (retval) | 1185 | if (retval) |
@@ -1576,15 +1576,6 @@ long do_fork(unsigned long clone_flags, | |||
1576 | long nr; | 1576 | long nr; |
1577 | 1577 | ||
1578 | /* | 1578 | /* |
1579 | * Do some preliminary argument and permissions checking before we | ||
1580 | * actually start allocating stuff | ||
1581 | */ | ||
1582 | if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { | ||
1583 | if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) | ||
1584 | return -EINVAL; | ||
1585 | } | ||
1586 | |||
1587 | /* | ||
1588 | * Determine whether and which event to report to ptracer. When | 1579 | * Determine whether and which event to report to ptracer. When |
1589 | * called from kernel_thread or CLONE_UNTRACED is explicitly | 1580 | * called from kernel_thread or CLONE_UNTRACED is explicitly |
1590 | * requested, no event is reported; otherwise, report if the event | 1581 | * requested, no event is reported; otherwise, report if the event |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 59f7b55ba745..2a74f307c5ec 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1474,11 +1474,8 @@ static int __init __parse_crashkernel(char *cmdline, | |||
1474 | if (first_colon && (!first_space || first_colon < first_space)) | 1474 | if (first_colon && (!first_space || first_colon < first_space)) |
1475 | return parse_crashkernel_mem(ck_cmdline, system_ram, | 1475 | return parse_crashkernel_mem(ck_cmdline, system_ram, |
1476 | crash_size, crash_base); | 1476 | crash_size, crash_base); |
1477 | else | ||
1478 | return parse_crashkernel_simple(ck_cmdline, crash_size, | ||
1479 | crash_base); | ||
1480 | 1477 | ||
1481 | return 0; | 1478 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); |
1482 | } | 1479 | } |
1483 | 1480 | ||
1484 | /* | 1481 | /* |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6e33498d665c..a0d367a49122 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { | |||
112 | struct kprobe_insn_page { | 112 | struct kprobe_insn_page { |
113 | struct list_head list; | 113 | struct list_head list; |
114 | kprobe_opcode_t *insns; /* Page of instruction slots */ | 114 | kprobe_opcode_t *insns; /* Page of instruction slots */ |
115 | struct kprobe_insn_cache *cache; | ||
115 | int nused; | 116 | int nused; |
116 | int ngarbage; | 117 | int ngarbage; |
117 | char slot_used[]; | 118 | char slot_used[]; |
@@ -121,12 +122,6 @@ struct kprobe_insn_page { | |||
121 | (offsetof(struct kprobe_insn_page, slot_used) + \ | 122 | (offsetof(struct kprobe_insn_page, slot_used) + \ |
122 | (sizeof(char) * (slots))) | 123 | (sizeof(char) * (slots))) |
123 | 124 | ||
124 | struct kprobe_insn_cache { | ||
125 | struct list_head pages; /* list of kprobe_insn_page */ | ||
126 | size_t insn_size; /* size of instruction slot */ | ||
127 | int nr_garbage; | ||
128 | }; | ||
129 | |||
130 | static int slots_per_page(struct kprobe_insn_cache *c) | 125 | static int slots_per_page(struct kprobe_insn_cache *c) |
131 | { | 126 | { |
132 | return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); | 127 | return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); |
@@ -138,8 +133,20 @@ enum kprobe_slot_state { | |||
138 | SLOT_USED = 2, | 133 | SLOT_USED = 2, |
139 | }; | 134 | }; |
140 | 135 | ||
141 | static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */ | 136 | static void *alloc_insn_page(void) |
142 | static struct kprobe_insn_cache kprobe_insn_slots = { | 137 | { |
138 | return module_alloc(PAGE_SIZE); | ||
139 | } | ||
140 | |||
141 | static void free_insn_page(void *page) | ||
142 | { | ||
143 | module_free(NULL, page); | ||
144 | } | ||
145 | |||
146 | struct kprobe_insn_cache kprobe_insn_slots = { | ||
147 | .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex), | ||
148 | .alloc = alloc_insn_page, | ||
149 | .free = free_insn_page, | ||
143 | .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages), | 150 | .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages), |
144 | .insn_size = MAX_INSN_SIZE, | 151 | .insn_size = MAX_INSN_SIZE, |
145 | .nr_garbage = 0, | 152 | .nr_garbage = 0, |
@@ -150,10 +157,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); | |||
150 | * __get_insn_slot() - Find a slot on an executable page for an instruction. | 157 | * __get_insn_slot() - Find a slot on an executable page for an instruction. |
151 | * We allocate an executable page if there's no room on existing ones. | 158 | * We allocate an executable page if there's no room on existing ones. |
152 | */ | 159 | */ |
153 | static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) | 160 | kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) |
154 | { | 161 | { |
155 | struct kprobe_insn_page *kip; | 162 | struct kprobe_insn_page *kip; |
163 | kprobe_opcode_t *slot = NULL; | ||
156 | 164 | ||
165 | mutex_lock(&c->mutex); | ||
157 | retry: | 166 | retry: |
158 | list_for_each_entry(kip, &c->pages, list) { | 167 | list_for_each_entry(kip, &c->pages, list) { |
159 | if (kip->nused < slots_per_page(c)) { | 168 | if (kip->nused < slots_per_page(c)) { |
@@ -162,7 +171,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) | |||
162 | if (kip->slot_used[i] == SLOT_CLEAN) { | 171 | if (kip->slot_used[i] == SLOT_CLEAN) { |
163 | kip->slot_used[i] = SLOT_USED; | 172 | kip->slot_used[i] = SLOT_USED; |
164 | kip->nused++; | 173 | kip->nused++; |
165 | return kip->insns + (i * c->insn_size); | 174 | slot = kip->insns + (i * c->insn_size); |
175 | goto out; | ||
166 | } | 176 | } |
167 | } | 177 | } |
168 | /* kip->nused is broken. Fix it. */ | 178 | /* kip->nused is broken. Fix it. */ |
@@ -178,37 +188,29 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) | |||
178 | /* All out of space. Need to allocate a new page. */ | 188 | /* All out of space. Need to allocate a new page. */ |
179 | kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL); | 189 | kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL); |
180 | if (!kip) | 190 | if (!kip) |
181 | return NULL; | 191 | goto out; |
182 | 192 | ||
183 | /* | 193 | /* |
184 | * Use module_alloc so this page is within +/- 2GB of where the | 194 | * Use module_alloc so this page is within +/- 2GB of where the |
185 | * kernel image and loaded module images reside. This is required | 195 | * kernel image and loaded module images reside. This is required |
186 | * so x86_64 can correctly handle the %rip-relative fixups. | 196 | * so x86_64 can correctly handle the %rip-relative fixups. |
187 | */ | 197 | */ |
188 | kip->insns = module_alloc(PAGE_SIZE); | 198 | kip->insns = c->alloc(); |
189 | if (!kip->insns) { | 199 | if (!kip->insns) { |
190 | kfree(kip); | 200 | kfree(kip); |
191 | return NULL; | 201 | goto out; |
192 | } | 202 | } |
193 | INIT_LIST_HEAD(&kip->list); | 203 | INIT_LIST_HEAD(&kip->list); |
194 | memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c)); | 204 | memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c)); |
195 | kip->slot_used[0] = SLOT_USED; | 205 | kip->slot_used[0] = SLOT_USED; |
196 | kip->nused = 1; | 206 | kip->nused = 1; |
197 | kip->ngarbage = 0; | 207 | kip->ngarbage = 0; |
208 | kip->cache = c; | ||
198 | list_add(&kip->list, &c->pages); | 209 | list_add(&kip->list, &c->pages); |
199 | return kip->insns; | 210 | slot = kip->insns; |
200 | } | 211 | out: |
201 | 212 | mutex_unlock(&c->mutex); | |
202 | 213 | return slot; | |
203 | kprobe_opcode_t __kprobes *get_insn_slot(void) | ||
204 | { | ||
205 | kprobe_opcode_t *ret = NULL; | ||
206 | |||
207 | mutex_lock(&kprobe_insn_mutex); | ||
208 | ret = __get_insn_slot(&kprobe_insn_slots); | ||
209 | mutex_unlock(&kprobe_insn_mutex); | ||
210 | |||
211 | return ret; | ||
212 | } | 214 | } |
213 | 215 | ||
214 | /* Return 1 if all garbages are collected, otherwise 0. */ | 216 | /* Return 1 if all garbages are collected, otherwise 0. */ |
@@ -225,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) | |||
225 | */ | 227 | */ |
226 | if (!list_is_singular(&kip->list)) { | 228 | if (!list_is_singular(&kip->list)) { |
227 | list_del(&kip->list); | 229 | list_del(&kip->list); |
228 | module_free(NULL, kip->insns); | 230 | kip->cache->free(kip->insns); |
229 | kfree(kip); | 231 | kfree(kip); |
230 | } | 232 | } |
231 | return 1; | 233 | return 1; |
@@ -255,11 +257,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) | |||
255 | return 0; | 257 | return 0; |
256 | } | 258 | } |
257 | 259 | ||
258 | static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, | 260 | void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, |
259 | kprobe_opcode_t *slot, int dirty) | 261 | kprobe_opcode_t *slot, int dirty) |
260 | { | 262 | { |
261 | struct kprobe_insn_page *kip; | 263 | struct kprobe_insn_page *kip; |
262 | 264 | ||
265 | mutex_lock(&c->mutex); | ||
263 | list_for_each_entry(kip, &c->pages, list) { | 266 | list_for_each_entry(kip, &c->pages, list) { |
264 | long idx = ((long)slot - (long)kip->insns) / | 267 | long idx = ((long)slot - (long)kip->insns) / |
265 | (c->insn_size * sizeof(kprobe_opcode_t)); | 268 | (c->insn_size * sizeof(kprobe_opcode_t)); |
@@ -272,45 +275,25 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, | |||
272 | collect_garbage_slots(c); | 275 | collect_garbage_slots(c); |
273 | } else | 276 | } else |
274 | collect_one_slot(kip, idx); | 277 | collect_one_slot(kip, idx); |
275 | return; | 278 | goto out; |
276 | } | 279 | } |
277 | } | 280 | } |
278 | /* Could not free this slot. */ | 281 | /* Could not free this slot. */ |
279 | WARN_ON(1); | 282 | WARN_ON(1); |
283 | out: | ||
284 | mutex_unlock(&c->mutex); | ||
280 | } | 285 | } |
281 | 286 | ||
282 | void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) | ||
283 | { | ||
284 | mutex_lock(&kprobe_insn_mutex); | ||
285 | __free_insn_slot(&kprobe_insn_slots, slot, dirty); | ||
286 | mutex_unlock(&kprobe_insn_mutex); | ||
287 | } | ||
288 | #ifdef CONFIG_OPTPROBES | 287 | #ifdef CONFIG_OPTPROBES |
289 | /* For optimized_kprobe buffer */ | 288 | /* For optimized_kprobe buffer */ |
290 | static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */ | 289 | struct kprobe_insn_cache kprobe_optinsn_slots = { |
291 | static struct kprobe_insn_cache kprobe_optinsn_slots = { | 290 | .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), |
291 | .alloc = alloc_insn_page, | ||
292 | .free = free_insn_page, | ||
292 | .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), | 293 | .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), |
293 | /* .insn_size is initialized later */ | 294 | /* .insn_size is initialized later */ |
294 | .nr_garbage = 0, | 295 | .nr_garbage = 0, |
295 | }; | 296 | }; |
296 | /* Get a slot for optimized_kprobe buffer */ | ||
297 | kprobe_opcode_t __kprobes *get_optinsn_slot(void) | ||
298 | { | ||
299 | kprobe_opcode_t *ret = NULL; | ||
300 | |||
301 | mutex_lock(&kprobe_optinsn_mutex); | ||
302 | ret = __get_insn_slot(&kprobe_optinsn_slots); | ||
303 | mutex_unlock(&kprobe_optinsn_mutex); | ||
304 | |||
305 | return ret; | ||
306 | } | ||
307 | |||
308 | void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty) | ||
309 | { | ||
310 | mutex_lock(&kprobe_optinsn_mutex); | ||
311 | __free_insn_slot(&kprobe_optinsn_slots, slot, dirty); | ||
312 | mutex_unlock(&kprobe_optinsn_mutex); | ||
313 | } | ||
314 | #endif | 297 | #endif |
315 | #endif | 298 | #endif |
316 | 299 | ||
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 2b6e69909c39..7cbd4507a7e6 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c | |||
@@ -18,14 +18,14 @@ | |||
18 | 18 | ||
19 | struct key *modsign_keyring; | 19 | struct key *modsign_keyring; |
20 | 20 | ||
21 | extern __initdata const u8 modsign_certificate_list[]; | 21 | extern __initconst const u8 modsign_certificate_list[]; |
22 | extern __initdata const u8 modsign_certificate_list_end[]; | 22 | extern __initconst const u8 modsign_certificate_list_end[]; |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | 25 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice |
26 | * if modsign.pub changes. | 26 | * if modsign.pub changes. |
27 | */ | 27 | */ |
28 | static __initdata const char annoy_ccache[] = __TIME__ "foo"; | 28 | static __initconst const char annoy_ccache[] = __TIME__ "foo"; |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Load the compiled-in keys | 31 | * Load the compiled-in keys |
diff --git a/kernel/panic.c b/kernel/panic.c index 801864600514..b6c482ccc5db 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -123,10 +123,14 @@ void panic(const char *fmt, ...) | |||
123 | */ | 123 | */ |
124 | smp_send_stop(); | 124 | smp_send_stop(); |
125 | 125 | ||
126 | kmsg_dump(KMSG_DUMP_PANIC); | 126 | /* |
127 | 127 | * Run any panic handlers, including those that might need to | |
128 | * add information to the kmsg dump output. | ||
129 | */ | ||
128 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); | 130 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
129 | 131 | ||
132 | kmsg_dump(KMSG_DUMP_PANIC); | ||
133 | |||
130 | bust_spinlocks(0); | 134 | bust_spinlocks(0); |
131 | 135 | ||
132 | if (!panic_blink) | 136 | if (!panic_blink) |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 349587bb03e1..358a146fd4da 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -352,7 +352,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) | |||
352 | struct mem_extent *ext, *cur, *aux; | 352 | struct mem_extent *ext, *cur, *aux; |
353 | 353 | ||
354 | zone_start = zone->zone_start_pfn; | 354 | zone_start = zone->zone_start_pfn; |
355 | zone_end = zone->zone_start_pfn + zone->spanned_pages; | 355 | zone_end = zone_end_pfn(zone); |
356 | 356 | ||
357 | list_for_each_entry(ext, list, hook) | 357 | list_for_each_entry(ext, list, hook) |
358 | if (zone_start <= ext->end) | 358 | if (zone_start <= ext->end) |
@@ -884,7 +884,7 @@ static unsigned int count_highmem_pages(void) | |||
884 | continue; | 884 | continue; |
885 | 885 | ||
886 | mark_free_pages(zone); | 886 | mark_free_pages(zone); |
887 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 887 | max_zone_pfn = zone_end_pfn(zone); |
888 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 888 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
889 | if (saveable_highmem_page(zone, pfn)) | 889 | if (saveable_highmem_page(zone, pfn)) |
890 | n++; | 890 | n++; |
@@ -948,7 +948,7 @@ static unsigned int count_data_pages(void) | |||
948 | continue; | 948 | continue; |
949 | 949 | ||
950 | mark_free_pages(zone); | 950 | mark_free_pages(zone); |
951 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 951 | max_zone_pfn = zone_end_pfn(zone); |
952 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 952 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
953 | if (saveable_page(zone, pfn)) | 953 | if (saveable_page(zone, pfn)) |
954 | n++; | 954 | n++; |
@@ -1041,7 +1041,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) | |||
1041 | unsigned long max_zone_pfn; | 1041 | unsigned long max_zone_pfn; |
1042 | 1042 | ||
1043 | mark_free_pages(zone); | 1043 | mark_free_pages(zone); |
1044 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1044 | max_zone_pfn = zone_end_pfn(zone); |
1045 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1045 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1046 | if (page_is_saveable(zone, pfn)) | 1046 | if (page_is_saveable(zone, pfn)) |
1047 | memory_bm_set_bit(orig_bm, pfn); | 1047 | memory_bm_set_bit(orig_bm, pfn); |
@@ -1093,7 +1093,7 @@ void swsusp_free(void) | |||
1093 | unsigned long pfn, max_zone_pfn; | 1093 | unsigned long pfn, max_zone_pfn; |
1094 | 1094 | ||
1095 | for_each_populated_zone(zone) { | 1095 | for_each_populated_zone(zone) { |
1096 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1096 | max_zone_pfn = zone_end_pfn(zone); |
1097 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1097 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1098 | if (pfn_valid(pfn)) { | 1098 | if (pfn_valid(pfn)) { |
1099 | struct page *page = pfn_to_page(pfn); | 1099 | struct page *page = pfn_to_page(pfn); |
@@ -1755,7 +1755,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) | |||
1755 | 1755 | ||
1756 | /* Clear page flags */ | 1756 | /* Clear page flags */ |
1757 | for_each_populated_zone(zone) { | 1757 | for_each_populated_zone(zone) { |
1758 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1758 | max_zone_pfn = zone_end_pfn(zone); |
1759 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1759 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1760 | if (pfn_valid(pfn)) | 1760 | if (pfn_valid(pfn)) |
1761 | swsusp_unset_page_free(pfn_to_page(pfn)); | 1761 | swsusp_unset_page_free(pfn_to_page(pfn)); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a146ee327f6a..dd562e9aa2c8 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -236,7 +236,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
236 | */ | 236 | */ |
237 | int dumpable = 0; | 237 | int dumpable = 0; |
238 | /* Don't let security modules deny introspection */ | 238 | /* Don't let security modules deny introspection */ |
239 | if (task == current) | 239 | if (same_thread_group(task, current)) |
240 | return 0; | 240 | return 0; |
241 | rcu_read_lock(); | 241 | rcu_read_lock(); |
242 | tcred = __task_cred(task); | 242 | tcred = __task_cred(task); |
diff --git a/kernel/signal.c b/kernel/signal.c index 50e41075ac77..ded28b91fa53 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, | |||
3394 | new_ka.sa.sa_restorer = compat_ptr(restorer); | 3394 | new_ka.sa.sa_restorer = compat_ptr(restorer); |
3395 | #endif | 3395 | #endif |
3396 | ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask)); | 3396 | ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask)); |
3397 | ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); | 3397 | ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); |
3398 | if (ret) | 3398 | if (ret) |
3399 | return -EFAULT; | 3399 | return -EFAULT; |
3400 | sigset_from_compat(&new_ka.sa.sa_mask, &mask); | 3400 | sigset_from_compat(&new_ka.sa.sa_mask, &mask); |
@@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, | |||
3406 | ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), | 3406 | ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), |
3407 | &oact->sa_handler); | 3407 | &oact->sa_handler); |
3408 | ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask)); | 3408 | ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask)); |
3409 | ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | 3409 | ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); |
3410 | #ifdef __ARCH_HAS_SA_RESTORER | 3410 | #ifdef __ARCH_HAS_SA_RESTORER |
3411 | ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), | 3411 | ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), |
3412 | &oact->sa_restorer); | 3412 | &oact->sa_restorer); |
diff --git a/kernel/smp.c b/kernel/smp.c index 449b707fc20d..0564571dcdf7 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -48,10 +48,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
48 | cpu_to_node(cpu))) | 48 | cpu_to_node(cpu))) |
49 | return notifier_from_errno(-ENOMEM); | 49 | return notifier_from_errno(-ENOMEM); |
50 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, | 50 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, |
51 | cpu_to_node(cpu))) | 51 | cpu_to_node(cpu))) { |
52 | free_cpumask_var(cfd->cpumask); | ||
52 | return notifier_from_errno(-ENOMEM); | 53 | return notifier_from_errno(-ENOMEM); |
54 | } | ||
53 | cfd->csd = alloc_percpu(struct call_single_data); | 55 | cfd->csd = alloc_percpu(struct call_single_data); |
54 | if (!cfd->csd) { | 56 | if (!cfd->csd) { |
57 | free_cpumask_var(cfd->cpumask_ipi); | ||
55 | free_cpumask_var(cfd->cpumask); | 58 | free_cpumask_var(cfd->cpumask); |
56 | return notifier_from_errno(-ENOMEM); | 59 | return notifier_from_errno(-ENOMEM); |
57 | } | 60 | } |
@@ -572,8 +575,10 @@ EXPORT_SYMBOL(on_each_cpu); | |||
572 | * | 575 | * |
573 | * If @wait is true, then returns once @func has returned. | 576 | * If @wait is true, then returns once @func has returned. |
574 | * | 577 | * |
575 | * You must not call this function with disabled interrupts or | 578 | * You must not call this function with disabled interrupts or from a |
576 | * from a hardware interrupt handler or from a bottom half handler. | 579 | * hardware interrupt handler or from a bottom half handler. The |
580 | * exception is that it may be used during early boot while | ||
581 | * early_boot_irqs_disabled is set. | ||
577 | */ | 582 | */ |
578 | void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, | 583 | void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, |
579 | void *info, bool wait) | 584 | void *info, bool wait) |
@@ -582,9 +587,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, | |||
582 | 587 | ||
583 | smp_call_function_many(mask, func, info, wait); | 588 | smp_call_function_many(mask, func, info, wait); |
584 | if (cpumask_test_cpu(cpu, mask)) { | 589 | if (cpumask_test_cpu(cpu, mask)) { |
585 | local_irq_disable(); | 590 | unsigned long flags; |
591 | local_irq_save(flags); | ||
586 | func(info); | 592 | func(info); |
587 | local_irq_enable(); | 593 | local_irq_restore(flags); |
588 | } | 594 | } |
589 | put_cpu(); | 595 | put_cpu(); |
590 | } | 596 | } |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 5cdd8065a3ce..4b082b5cac9e 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
@@ -34,6 +34,20 @@ | |||
34 | #else | 34 | #else |
35 | #define raw_read_can_lock(l) read_can_lock(l) | 35 | #define raw_read_can_lock(l) read_can_lock(l) |
36 | #define raw_write_can_lock(l) write_can_lock(l) | 36 | #define raw_write_can_lock(l) write_can_lock(l) |
37 | |||
38 | /* | ||
39 | * Some architectures can relax in favour of the CPU owning the lock. | ||
40 | */ | ||
41 | #ifndef arch_read_relax | ||
42 | # define arch_read_relax(l) cpu_relax() | ||
43 | #endif | ||
44 | #ifndef arch_write_relax | ||
45 | # define arch_write_relax(l) cpu_relax() | ||
46 | #endif | ||
47 | #ifndef arch_spin_relax | ||
48 | # define arch_spin_relax(l) cpu_relax() | ||
49 | #endif | ||
50 | |||
37 | /* | 51 | /* |
38 | * We build the __lock_function inlines here. They are too large for | 52 | * We build the __lock_function inlines here. They are too large for |
39 | * inlining all over the place, but here is only one user per function | 53 | * inlining all over the place, but here is only one user per function |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 07f6fc468e17..dc69093a8ec4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = { | |||
1225 | .data = &hugepages_treat_as_movable, | 1225 | .data = &hugepages_treat_as_movable, |
1226 | .maxlen = sizeof(int), | 1226 | .maxlen = sizeof(int), |
1227 | .mode = 0644, | 1227 | .mode = 0644, |
1228 | .proc_handler = hugetlb_treat_movable_handler, | 1228 | .proc_handler = proc_dointvec, |
1229 | }, | 1229 | }, |
1230 | { | 1230 | { |
1231 | .procname = "nr_overcommit_hugepages", | 1231 | .procname = "nr_overcommit_hugepages", |
diff --git a/kernel/task_work.c b/kernel/task_work.c index 65bd3c92d6f3..8727032e3a6f 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c | |||
@@ -4,6 +4,23 @@ | |||
4 | 4 | ||
5 | static struct callback_head work_exited; /* all we need is ->next == NULL */ | 5 | static struct callback_head work_exited; /* all we need is ->next == NULL */ |
6 | 6 | ||
7 | /** | ||
8 | * task_work_add - ask the @task to execute @work->func() | ||
9 | * @task: the task which should run the callback | ||
10 | * @work: the callback to run | ||
11 | * @notify: send the notification if true | ||
12 | * | ||
13 | * Queue @work for task_work_run() below and notify the @task if @notify. | ||
14 | * Fails if the @task is exiting/exited and thus it can't process this @work. | ||
15 | * Otherwise @work->func() will be called when the @task returns from kernel | ||
16 | * mode or exits. | ||
17 | * | ||
18 | * This is like the signal handler which runs in kernel mode, but it doesn't | ||
19 | * try to wake up the @task. | ||
20 | * | ||
21 | * RETURNS: | ||
22 | * 0 if succeeds or -ESRCH. | ||
23 | */ | ||
7 | int | 24 | int |
8 | task_work_add(struct task_struct *task, struct callback_head *work, bool notify) | 25 | task_work_add(struct task_struct *task, struct callback_head *work, bool notify) |
9 | { | 26 | { |
@@ -21,11 +38,22 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) | |||
21 | return 0; | 38 | return 0; |
22 | } | 39 | } |
23 | 40 | ||
41 | /** | ||
42 | * task_work_cancel - cancel a pending work added by task_work_add() | ||
43 | * @task: the task which should execute the work | ||
44 | * @func: identifies the work to remove | ||
45 | * | ||
46 | * Find the last queued pending work with ->func == @func and remove | ||
47 | * it from queue. | ||
48 | * | ||
49 | * RETURNS: | ||
50 | * The found work or NULL if not found. | ||
51 | */ | ||
24 | struct callback_head * | 52 | struct callback_head * |
25 | task_work_cancel(struct task_struct *task, task_work_func_t func) | 53 | task_work_cancel(struct task_struct *task, task_work_func_t func) |
26 | { | 54 | { |
27 | struct callback_head **pprev = &task->task_works; | 55 | struct callback_head **pprev = &task->task_works; |
28 | struct callback_head *work = NULL; | 56 | struct callback_head *work; |
29 | unsigned long flags; | 57 | unsigned long flags; |
30 | /* | 58 | /* |
31 | * If cmpxchg() fails we continue without updating pprev. | 59 | * If cmpxchg() fails we continue without updating pprev. |
@@ -35,7 +63,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) | |||
35 | */ | 63 | */ |
36 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 64 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
37 | while ((work = ACCESS_ONCE(*pprev))) { | 65 | while ((work = ACCESS_ONCE(*pprev))) { |
38 | read_barrier_depends(); | 66 | smp_read_barrier_depends(); |
39 | if (work->func != func) | 67 | if (work->func != func) |
40 | pprev = &work->next; | 68 | pprev = &work->next; |
41 | else if (cmpxchg(pprev, work, work->next) == work) | 69 | else if (cmpxchg(pprev, work, work->next) == work) |
@@ -46,6 +74,14 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) | |||
46 | return work; | 74 | return work; |
47 | } | 75 | } |
48 | 76 | ||
77 | /** | ||
78 | * task_work_run - execute the works added by task_work_add() | ||
79 | * | ||
80 | * Flush the pending works. Should be used by the core kernel code. | ||
81 | * Called before the task returns to the user-mode or stops, or when | ||
82 | * it exits. In the latter case task_work_add() can no longer add the | ||
83 | * new work after task_work_run() returns. | ||
84 | */ | ||
49 | void task_work_run(void) | 85 | void task_work_run(void) |
50 | { | 86 | { |
51 | struct task_struct *task = current; | 87 | struct task_struct *task = current; |
diff --git a/kernel/up.c b/kernel/up.c index c54c75e9faf7..630d72bf7e41 100644 --- a/kernel/up.c +++ b/kernel/up.c | |||
@@ -10,12 +10,64 @@ | |||
10 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 10 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
11 | int wait) | 11 | int wait) |
12 | { | 12 | { |
13 | unsigned long flags; | ||
14 | |||
13 | WARN_ON(cpu != 0); | 15 | WARN_ON(cpu != 0); |
14 | 16 | ||
15 | local_irq_disable(); | 17 | local_irq_save(flags); |
16 | (func)(info); | 18 | func(info); |
17 | local_irq_enable(); | 19 | local_irq_restore(flags); |
18 | 20 | ||
19 | return 0; | 21 | return 0; |
20 | } | 22 | } |
21 | EXPORT_SYMBOL(smp_call_function_single); | 23 | EXPORT_SYMBOL(smp_call_function_single); |
24 | |||
25 | int on_each_cpu(smp_call_func_t func, void *info, int wait) | ||
26 | { | ||
27 | unsigned long flags; | ||
28 | |||
29 | local_irq_save(flags); | ||
30 | func(info); | ||
31 | local_irq_restore(flags); | ||
32 | return 0; | ||
33 | } | ||
34 | EXPORT_SYMBOL(on_each_cpu); | ||
35 | |||
36 | /* | ||
37 | * Note we still need to test the mask even for UP | ||
38 | * because we actually can get an empty mask from | ||
39 | * code that on SMP might call us without the local | ||
40 | * CPU in the mask. | ||
41 | */ | ||
42 | void on_each_cpu_mask(const struct cpumask *mask, | ||
43 | smp_call_func_t func, void *info, bool wait) | ||
44 | { | ||
45 | unsigned long flags; | ||
46 | |||
47 | if (cpumask_test_cpu(0, mask)) { | ||
48 | local_irq_save(flags); | ||
49 | func(info); | ||
50 | local_irq_restore(flags); | ||
51 | } | ||
52 | } | ||
53 | EXPORT_SYMBOL(on_each_cpu_mask); | ||
54 | |||
55 | /* | ||
56 | * Preemption is disabled here to make sure the cond_func is called under the | ||
57 | * same condtions in UP and SMP. | ||
58 | */ | ||
59 | void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), | ||
60 | smp_call_func_t func, void *info, bool wait, | ||
61 | gfp_t gfp_flags) | ||
62 | { | ||
63 | unsigned long flags; | ||
64 | |||
65 | preempt_disable(); | ||
66 | if (cond_func(0, info)) { | ||
67 | local_irq_save(flags); | ||
68 | func(info); | ||
69 | local_irq_restore(flags); | ||
70 | } | ||
71 | preempt_enable(); | ||
72 | } | ||
73 | EXPORT_SYMBOL(on_each_cpu_cond); | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 652bea9054f0..c9eef36739a9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1461,7 +1461,7 @@ config BACKTRACE_SELF_TEST | |||
1461 | 1461 | ||
1462 | config RBTREE_TEST | 1462 | config RBTREE_TEST |
1463 | tristate "Red-Black tree test" | 1463 | tristate "Red-Black tree test" |
1464 | depends on m && DEBUG_KERNEL | 1464 | depends on DEBUG_KERNEL |
1465 | help | 1465 | help |
1466 | A benchmark measuring the performance of the rbtree library. | 1466 | A benchmark measuring the performance of the rbtree library. |
1467 | Also includes rbtree invariant checks. | 1467 | Also includes rbtree invariant checks. |
diff --git a/lib/crc32.c b/lib/crc32.c index 072fbd8234d5..410093dbe51c 100644 --- a/lib/crc32.c +++ b/lib/crc32.c | |||
@@ -131,11 +131,14 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | |||
131 | #endif | 131 | #endif |
132 | 132 | ||
133 | /** | 133 | /** |
134 | * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 | 134 | * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II |
135 | * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for | 135 | * CRC32/CRC32C |
136 | * other uses, or the previous crc32 value if computing incrementally. | 136 | * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other |
137 | * @p: pointer to buffer over which CRC is run | 137 | * uses, or the previous crc32/crc32c value if computing incrementally. |
138 | * @p: pointer to buffer over which CRC32/CRC32C is run | ||
138 | * @len: length of buffer @p | 139 | * @len: length of buffer @p |
140 | * @tab: little-endian Ethernet table | ||
141 | * @polynomial: CRC32/CRC32c LE polynomial | ||
139 | */ | 142 | */ |
140 | static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, | 143 | static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, |
141 | size_t len, const u32 (*tab)[256], | 144 | size_t len, const u32 (*tab)[256], |
@@ -201,11 +204,13 @@ EXPORT_SYMBOL(crc32_le); | |||
201 | EXPORT_SYMBOL(__crc32c_le); | 204 | EXPORT_SYMBOL(__crc32c_le); |
202 | 205 | ||
203 | /** | 206 | /** |
204 | * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 | 207 | * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 |
205 | * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for | 208 | * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for |
206 | * other uses, or the previous crc32 value if computing incrementally. | 209 | * other uses, or the previous crc32 value if computing incrementally. |
207 | * @p: pointer to buffer over which CRC is run | 210 | * @p: pointer to buffer over which CRC32 is run |
208 | * @len: length of buffer @p | 211 | * @len: length of buffer @p |
212 | * @tab: big-endian Ethernet table | ||
213 | * @polynomial: CRC32 BE polynomial | ||
209 | */ | 214 | */ |
210 | static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, | 215 | static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, |
211 | size_t len, const u32 (*tab)[256], | 216 | size_t len, const u32 (*tab)[256], |
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 19ff89e34eec..d619b28c456f 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c | |||
@@ -48,7 +48,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, | |||
48 | out_len = 0x8000; /* 32 K */ | 48 | out_len = 0x8000; /* 32 K */ |
49 | out_buf = malloc(out_len); | 49 | out_buf = malloc(out_len); |
50 | } else { | 50 | } else { |
51 | out_len = 0x7fffffff; /* no limit */ | 51 | out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ |
52 | } | 52 | } |
53 | if (!out_buf) { | 53 | if (!out_buf) { |
54 | error("Out of memory while allocating output buffer"); | 54 | error("Out of memory while allocating output buffer"); |
diff --git a/lib/genalloc.c b/lib/genalloc.c index b35cfa9bc3d4..26cf20be72b7 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c | |||
@@ -37,6 +37,11 @@ | |||
37 | #include <linux/of_address.h> | 37 | #include <linux/of_address.h> |
38 | #include <linux/of_device.h> | 38 | #include <linux/of_device.h> |
39 | 39 | ||
40 | static inline size_t chunk_size(const struct gen_pool_chunk *chunk) | ||
41 | { | ||
42 | return chunk->end_addr - chunk->start_addr + 1; | ||
43 | } | ||
44 | |||
40 | static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) | 45 | static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) |
41 | { | 46 | { |
42 | unsigned long val, nval; | 47 | unsigned long val, nval; |
@@ -182,13 +187,13 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy | |||
182 | int nbytes = sizeof(struct gen_pool_chunk) + | 187 | int nbytes = sizeof(struct gen_pool_chunk) + |
183 | BITS_TO_LONGS(nbits) * sizeof(long); | 188 | BITS_TO_LONGS(nbits) * sizeof(long); |
184 | 189 | ||
185 | chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid); | 190 | chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); |
186 | if (unlikely(chunk == NULL)) | 191 | if (unlikely(chunk == NULL)) |
187 | return -ENOMEM; | 192 | return -ENOMEM; |
188 | 193 | ||
189 | chunk->phys_addr = phys; | 194 | chunk->phys_addr = phys; |
190 | chunk->start_addr = virt; | 195 | chunk->start_addr = virt; |
191 | chunk->end_addr = virt + size; | 196 | chunk->end_addr = virt + size - 1; |
192 | atomic_set(&chunk->avail, size); | 197 | atomic_set(&chunk->avail, size); |
193 | 198 | ||
194 | spin_lock(&pool->lock); | 199 | spin_lock(&pool->lock); |
@@ -213,7 +218,7 @@ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) | |||
213 | 218 | ||
214 | rcu_read_lock(); | 219 | rcu_read_lock(); |
215 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { | 220 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
216 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { | 221 | if (addr >= chunk->start_addr && addr <= chunk->end_addr) { |
217 | paddr = chunk->phys_addr + (addr - chunk->start_addr); | 222 | paddr = chunk->phys_addr + (addr - chunk->start_addr); |
218 | break; | 223 | break; |
219 | } | 224 | } |
@@ -242,7 +247,7 @@ void gen_pool_destroy(struct gen_pool *pool) | |||
242 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 247 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); |
243 | list_del(&chunk->next_chunk); | 248 | list_del(&chunk->next_chunk); |
244 | 249 | ||
245 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; | 250 | end_bit = chunk_size(chunk) >> order; |
246 | bit = find_next_bit(chunk->bits, end_bit, 0); | 251 | bit = find_next_bit(chunk->bits, end_bit, 0); |
247 | BUG_ON(bit < end_bit); | 252 | BUG_ON(bit < end_bit); |
248 | 253 | ||
@@ -283,7 +288,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) | |||
283 | if (size > atomic_read(&chunk->avail)) | 288 | if (size > atomic_read(&chunk->avail)) |
284 | continue; | 289 | continue; |
285 | 290 | ||
286 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; | 291 | end_bit = chunk_size(chunk) >> order; |
287 | retry: | 292 | retry: |
288 | start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits, | 293 | start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits, |
289 | pool->data); | 294 | pool->data); |
@@ -330,8 +335,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) | |||
330 | nbits = (size + (1UL << order) - 1) >> order; | 335 | nbits = (size + (1UL << order) - 1) >> order; |
331 | rcu_read_lock(); | 336 | rcu_read_lock(); |
332 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { | 337 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
333 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { | 338 | if (addr >= chunk->start_addr && addr <= chunk->end_addr) { |
334 | BUG_ON(addr + size > chunk->end_addr); | 339 | BUG_ON(addr + size - 1 > chunk->end_addr); |
335 | start_bit = (addr - chunk->start_addr) >> order; | 340 | start_bit = (addr - chunk->start_addr) >> order; |
336 | remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); | 341 | remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); |
337 | BUG_ON(remain); | 342 | BUG_ON(remain); |
@@ -400,7 +405,7 @@ size_t gen_pool_size(struct gen_pool *pool) | |||
400 | 405 | ||
401 | rcu_read_lock(); | 406 | rcu_read_lock(); |
402 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) | 407 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) |
403 | size += chunk->end_addr - chunk->start_addr; | 408 | size += chunk_size(chunk); |
404 | rcu_read_unlock(); | 409 | rcu_read_unlock(); |
405 | return size; | 410 | return size; |
406 | } | 411 | } |
@@ -519,7 +524,6 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, | |||
519 | /** | 524 | /** |
520 | * dev_get_gen_pool - Obtain the gen_pool (if any) for a device | 525 | * dev_get_gen_pool - Obtain the gen_pool (if any) for a device |
521 | * @dev: device to retrieve the gen_pool from | 526 | * @dev: device to retrieve the gen_pool from |
522 | * @name: Optional name for the gen_pool, usually NULL | ||
523 | * | 527 | * |
524 | * Returns the gen_pool for the device if one is present, or NULL. | 528 | * Returns the gen_pool for the device if one is present, or NULL. |
525 | */ | 529 | */ |
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 411be80ddb46..df6839e3ce08 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c | |||
@@ -283,8 +283,8 @@ _output_error: | |||
283 | return (int) (-(((char *) ip) - source)); | 283 | return (int) (-(((char *) ip) - source)); |
284 | } | 284 | } |
285 | 285 | ||
286 | int lz4_decompress(const char *src, size_t *src_len, char *dest, | 286 | int lz4_decompress(const unsigned char *src, size_t *src_len, |
287 | size_t actual_dest_len) | 287 | unsigned char *dest, size_t actual_dest_len) |
288 | { | 288 | { |
289 | int ret = -1; | 289 | int ret = -1; |
290 | int input_len = 0; | 290 | int input_len = 0; |
@@ -302,8 +302,8 @@ exit_0: | |||
302 | EXPORT_SYMBOL(lz4_decompress); | 302 | EXPORT_SYMBOL(lz4_decompress); |
303 | #endif | 303 | #endif |
304 | 304 | ||
305 | int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, | 305 | int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, |
306 | char *dest, size_t *dest_len) | 306 | unsigned char *dest, size_t *dest_len) |
307 | { | 307 | { |
308 | int ret = -1; | 308 | int ret = -1; |
309 | int out_len = 0; | 309 | int out_len = 0; |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e7964296fd50..7811ed3b4e70 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
33 | #include <linux/bitops.h> | 33 | #include <linux/bitops.h> |
34 | #include <linux/rcupdate.h> | 34 | #include <linux/rcupdate.h> |
35 | #include <linux/hardirq.h> /* in_interrupt() */ | ||
35 | 36 | ||
36 | 37 | ||
37 | #ifdef __KERNEL__ | 38 | #ifdef __KERNEL__ |
@@ -207,7 +208,12 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |||
207 | struct radix_tree_node *ret = NULL; | 208 | struct radix_tree_node *ret = NULL; |
208 | gfp_t gfp_mask = root_gfp_mask(root); | 209 | gfp_t gfp_mask = root_gfp_mask(root); |
209 | 210 | ||
210 | if (!(gfp_mask & __GFP_WAIT)) { | 211 | /* |
212 | * Preload code isn't irq safe and it doesn't make sence to use | ||
213 | * preloading in the interrupt anyway as all the allocations have to | ||
214 | * be atomic. So just do normal allocation when in interrupt. | ||
215 | */ | ||
216 | if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) { | ||
211 | struct radix_tree_preload *rtp; | 217 | struct radix_tree_preload *rtp; |
212 | 218 | ||
213 | /* | 219 | /* |
@@ -264,7 +270,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |||
264 | * To make use of this facility, the radix tree must be initialised without | 270 | * To make use of this facility, the radix tree must be initialised without |
265 | * __GFP_WAIT being passed to INIT_RADIX_TREE(). | 271 | * __GFP_WAIT being passed to INIT_RADIX_TREE(). |
266 | */ | 272 | */ |
267 | int radix_tree_preload(gfp_t gfp_mask) | 273 | static int __radix_tree_preload(gfp_t gfp_mask) |
268 | { | 274 | { |
269 | struct radix_tree_preload *rtp; | 275 | struct radix_tree_preload *rtp; |
270 | struct radix_tree_node *node; | 276 | struct radix_tree_node *node; |
@@ -288,9 +294,40 @@ int radix_tree_preload(gfp_t gfp_mask) | |||
288 | out: | 294 | out: |
289 | return ret; | 295 | return ret; |
290 | } | 296 | } |
297 | |||
298 | /* | ||
299 | * Load up this CPU's radix_tree_node buffer with sufficient objects to | ||
300 | * ensure that the addition of a single element in the tree cannot fail. On | ||
301 | * success, return zero, with preemption disabled. On error, return -ENOMEM | ||
302 | * with preemption not disabled. | ||
303 | * | ||
304 | * To make use of this facility, the radix tree must be initialised without | ||
305 | * __GFP_WAIT being passed to INIT_RADIX_TREE(). | ||
306 | */ | ||
307 | int radix_tree_preload(gfp_t gfp_mask) | ||
308 | { | ||
309 | /* Warn on non-sensical use... */ | ||
310 | WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT)); | ||
311 | return __radix_tree_preload(gfp_mask); | ||
312 | } | ||
291 | EXPORT_SYMBOL(radix_tree_preload); | 313 | EXPORT_SYMBOL(radix_tree_preload); |
292 | 314 | ||
293 | /* | 315 | /* |
316 | * The same as above function, except we don't guarantee preloading happens. | ||
317 | * We do it, if we decide it helps. On success, return zero with preemption | ||
318 | * disabled. On error, return -ENOMEM with preemption not disabled. | ||
319 | */ | ||
320 | int radix_tree_maybe_preload(gfp_t gfp_mask) | ||
321 | { | ||
322 | if (gfp_mask & __GFP_WAIT) | ||
323 | return __radix_tree_preload(gfp_mask); | ||
324 | /* Preloading doesn't help anything with this gfp mask, skip it */ | ||
325 | preempt_disable(); | ||
326 | return 0; | ||
327 | } | ||
328 | EXPORT_SYMBOL(radix_tree_maybe_preload); | ||
329 | |||
330 | /* | ||
294 | * Return the maximum key which can be store into a | 331 | * Return the maximum key which can be store into a |
295 | * radix tree with height HEIGHT. | 332 | * radix tree with height HEIGHT. |
296 | */ | 333 | */ |
diff --git a/lib/rbtree.c b/lib/rbtree.c index c0e31fe2fabf..65f4effd117f 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c | |||
@@ -518,3 +518,43 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, | |||
518 | *new = *victim; | 518 | *new = *victim; |
519 | } | 519 | } |
520 | EXPORT_SYMBOL(rb_replace_node); | 520 | EXPORT_SYMBOL(rb_replace_node); |
521 | |||
522 | static struct rb_node *rb_left_deepest_node(const struct rb_node *node) | ||
523 | { | ||
524 | for (;;) { | ||
525 | if (node->rb_left) | ||
526 | node = node->rb_left; | ||
527 | else if (node->rb_right) | ||
528 | node = node->rb_right; | ||
529 | else | ||
530 | return (struct rb_node *)node; | ||
531 | } | ||
532 | } | ||
533 | |||
534 | struct rb_node *rb_next_postorder(const struct rb_node *node) | ||
535 | { | ||
536 | const struct rb_node *parent; | ||
537 | if (!node) | ||
538 | return NULL; | ||
539 | parent = rb_parent(node); | ||
540 | |||
541 | /* If we're sitting on node, we've already seen our children */ | ||
542 | if (parent && node == parent->rb_left && parent->rb_right) { | ||
543 | /* If we are the parent's left node, go to the parent's right | ||
544 | * node then all the way down to the left */ | ||
545 | return rb_left_deepest_node(parent->rb_right); | ||
546 | } else | ||
547 | /* Otherwise we are the parent's right node, and the parent | ||
548 | * should be next */ | ||
549 | return (struct rb_node *)parent; | ||
550 | } | ||
551 | EXPORT_SYMBOL(rb_next_postorder); | ||
552 | |||
553 | struct rb_node *rb_first_postorder(const struct rb_root *root) | ||
554 | { | ||
555 | if (!root->rb_node) | ||
556 | return NULL; | ||
557 | |||
558 | return rb_left_deepest_node(root->rb_node); | ||
559 | } | ||
560 | EXPORT_SYMBOL(rb_first_postorder); | ||
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 122f02f9941b..31dd4ccd3baa 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c | |||
@@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) | |||
114 | return count; | 114 | return count; |
115 | } | 115 | } |
116 | 116 | ||
117 | static void check_postorder(int nr_nodes) | ||
118 | { | ||
119 | struct rb_node *rb; | ||
120 | int count = 0; | ||
121 | for (rb = rb_first_postorder(&root); rb; rb = rb_next_postorder(rb)) | ||
122 | count++; | ||
123 | |||
124 | WARN_ON_ONCE(count != nr_nodes); | ||
125 | } | ||
126 | |||
117 | static void check(int nr_nodes) | 127 | static void check(int nr_nodes) |
118 | { | 128 | { |
119 | struct rb_node *rb; | 129 | struct rb_node *rb; |
@@ -136,6 +146,8 @@ static void check(int nr_nodes) | |||
136 | 146 | ||
137 | WARN_ON_ONCE(count != nr_nodes); | 147 | WARN_ON_ONCE(count != nr_nodes); |
138 | WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); | 148 | WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); |
149 | |||
150 | check_postorder(nr_nodes); | ||
139 | } | 151 | } |
140 | 152 | ||
141 | static void check_augmented(int nr_nodes) | 153 | static void check_augmented(int nr_nodes) |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 37d9edcd14cf..ce682f7a4f29 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -652,7 +652,7 @@ int pdflush_proc_obsolete(struct ctl_table *table, int write, | |||
652 | { | 652 | { |
653 | char kbuf[] = "0\n"; | 653 | char kbuf[] = "0\n"; |
654 | 654 | ||
655 | if (*ppos) { | 655 | if (*ppos || *lenp < sizeof(kbuf)) { |
656 | *lenp = 0; | 656 | *lenp = 0; |
657 | return 0; | 657 | return 0; |
658 | } | 658 | } |
diff --git a/mm/compaction.c b/mm/compaction.c index 05ccb4cc0bdb..c43789388cd8 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -1131,6 +1131,9 @@ void compact_pgdat(pg_data_t *pgdat, int order) | |||
1131 | .sync = false, | 1131 | .sync = false, |
1132 | }; | 1132 | }; |
1133 | 1133 | ||
1134 | if (!order) | ||
1135 | return; | ||
1136 | |||
1134 | __compact_pgdat(pgdat, &cc); | 1137 | __compact_pgdat(pgdat, &cc); |
1135 | } | 1138 | } |
1136 | 1139 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 731a2c24532d..e607728db4a8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -469,7 +469,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
469 | if (error) | 469 | if (error) |
470 | goto out; | 470 | goto out; |
471 | 471 | ||
472 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 472 | error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); |
473 | if (error == 0) { | 473 | if (error == 0) { |
474 | page_cache_get(page); | 474 | page_cache_get(page); |
475 | page->mapping = mapping; | 475 | page->mapping = mapping; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a92012a71702..963e14c0486f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -417,7 +417,7 @@ static ssize_t scan_sleep_millisecs_store(struct kobject *kobj, | |||
417 | unsigned long msecs; | 417 | unsigned long msecs; |
418 | int err; | 418 | int err; |
419 | 419 | ||
420 | err = strict_strtoul(buf, 10, &msecs); | 420 | err = kstrtoul(buf, 10, &msecs); |
421 | if (err || msecs > UINT_MAX) | 421 | if (err || msecs > UINT_MAX) |
422 | return -EINVAL; | 422 | return -EINVAL; |
423 | 423 | ||
@@ -444,7 +444,7 @@ static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj, | |||
444 | unsigned long msecs; | 444 | unsigned long msecs; |
445 | int err; | 445 | int err; |
446 | 446 | ||
447 | err = strict_strtoul(buf, 10, &msecs); | 447 | err = kstrtoul(buf, 10, &msecs); |
448 | if (err || msecs > UINT_MAX) | 448 | if (err || msecs > UINT_MAX) |
449 | return -EINVAL; | 449 | return -EINVAL; |
450 | 450 | ||
@@ -470,7 +470,7 @@ static ssize_t pages_to_scan_store(struct kobject *kobj, | |||
470 | int err; | 470 | int err; |
471 | unsigned long pages; | 471 | unsigned long pages; |
472 | 472 | ||
473 | err = strict_strtoul(buf, 10, &pages); | 473 | err = kstrtoul(buf, 10, &pages); |
474 | if (err || !pages || pages > UINT_MAX) | 474 | if (err || !pages || pages > UINT_MAX) |
475 | return -EINVAL; | 475 | return -EINVAL; |
476 | 476 | ||
@@ -538,7 +538,7 @@ static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj, | |||
538 | int err; | 538 | int err; |
539 | unsigned long max_ptes_none; | 539 | unsigned long max_ptes_none; |
540 | 540 | ||
541 | err = strict_strtoul(buf, 10, &max_ptes_none); | 541 | err = kstrtoul(buf, 10, &max_ptes_none); |
542 | if (err || max_ptes_none > HPAGE_PMD_NR-1) | 542 | if (err || max_ptes_none > HPAGE_PMD_NR-1) |
543 | return -EINVAL; | 543 | return -EINVAL; |
544 | 544 | ||
@@ -2296,6 +2296,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2296 | goto out; | 2296 | goto out; |
2297 | 2297 | ||
2298 | vma = find_vma(mm, address); | 2298 | vma = find_vma(mm, address); |
2299 | if (!vma) | ||
2300 | goto out; | ||
2299 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2301 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2300 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2302 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2301 | if (address < hstart || address + HPAGE_PMD_SIZE > hend) | 2303 | if (address < hstart || address + HPAGE_PMD_SIZE > hend) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b60f33080a28..b49579c7f2a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/rmap.h> | 21 | #include <linux/rmap.h> |
22 | #include <linux/swap.h> | 22 | #include <linux/swap.h> |
23 | #include <linux/swapops.h> | 23 | #include <linux/swapops.h> |
24 | #include <linux/page-isolation.h> | ||
24 | 25 | ||
25 | #include <asm/page.h> | 26 | #include <asm/page.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -33,7 +34,6 @@ | |||
33 | #include "internal.h" | 34 | #include "internal.h" |
34 | 35 | ||
35 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 36 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
36 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | ||
37 | unsigned long hugepages_treat_as_movable; | 37 | unsigned long hugepages_treat_as_movable; |
38 | 38 | ||
39 | int hugetlb_max_hstate __read_mostly; | 39 | int hugetlb_max_hstate __read_mostly; |
@@ -48,7 +48,8 @@ static unsigned long __initdata default_hstate_max_huge_pages; | |||
48 | static unsigned long __initdata default_hstate_size; | 48 | static unsigned long __initdata default_hstate_size; |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 51 | * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages, |
52 | * free_huge_pages, and surplus_huge_pages. | ||
52 | */ | 53 | */ |
53 | DEFINE_SPINLOCK(hugetlb_lock); | 54 | DEFINE_SPINLOCK(hugetlb_lock); |
54 | 55 | ||
@@ -135,9 +136,9 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) | |||
135 | * across the pages in a mapping. | 136 | * across the pages in a mapping. |
136 | * | 137 | * |
137 | * The region data structures are protected by a combination of the mmap_sem | 138 | * The region data structures are protected by a combination of the mmap_sem |
138 | * and the hugetlb_instantion_mutex. To access or modify a region the caller | 139 | * and the hugetlb_instantiation_mutex. To access or modify a region the caller |
139 | * must either hold the mmap_sem for write, or the mmap_sem for read and | 140 | * must either hold the mmap_sem for write, or the mmap_sem for read and |
140 | * the hugetlb_instantiation mutex: | 141 | * the hugetlb_instantiation_mutex: |
141 | * | 142 | * |
142 | * down_write(&mm->mmap_sem); | 143 | * down_write(&mm->mmap_sem); |
143 | * or | 144 | * or |
@@ -434,25 +435,6 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) | |||
434 | return (get_vma_private_data(vma) & flag) != 0; | 435 | return (get_vma_private_data(vma) & flag) != 0; |
435 | } | 436 | } |
436 | 437 | ||
437 | /* Decrement the reserved pages in the hugepage pool by one */ | ||
438 | static void decrement_hugepage_resv_vma(struct hstate *h, | ||
439 | struct vm_area_struct *vma) | ||
440 | { | ||
441 | if (vma->vm_flags & VM_NORESERVE) | ||
442 | return; | ||
443 | |||
444 | if (vma->vm_flags & VM_MAYSHARE) { | ||
445 | /* Shared mappings always use reserves */ | ||
446 | h->resv_huge_pages--; | ||
447 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | ||
448 | /* | ||
449 | * Only the process that called mmap() has reserves for | ||
450 | * private mappings. | ||
451 | */ | ||
452 | h->resv_huge_pages--; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | /* Reset counters to 0 and clear all HPAGE_RESV_* flags */ | 438 | /* Reset counters to 0 and clear all HPAGE_RESV_* flags */ |
457 | void reset_vma_resv_huge_pages(struct vm_area_struct *vma) | 439 | void reset_vma_resv_huge_pages(struct vm_area_struct *vma) |
458 | { | 440 | { |
@@ -462,12 +444,35 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma) | |||
462 | } | 444 | } |
463 | 445 | ||
464 | /* Returns true if the VMA has associated reserve pages */ | 446 | /* Returns true if the VMA has associated reserve pages */ |
465 | static int vma_has_reserves(struct vm_area_struct *vma) | 447 | static int vma_has_reserves(struct vm_area_struct *vma, long chg) |
466 | { | 448 | { |
449 | if (vma->vm_flags & VM_NORESERVE) { | ||
450 | /* | ||
451 | * This address is already reserved by other process(chg == 0), | ||
452 | * so, we should decrement reserved count. Without decrementing, | ||
453 | * reserve count remains after releasing inode, because this | ||
454 | * allocated page will go into page cache and is regarded as | ||
455 | * coming from reserved pool in releasing step. Currently, we | ||
456 | * don't have any other solution to deal with this situation | ||
457 | * properly, so add work-around here. | ||
458 | */ | ||
459 | if (vma->vm_flags & VM_MAYSHARE && chg == 0) | ||
460 | return 1; | ||
461 | else | ||
462 | return 0; | ||
463 | } | ||
464 | |||
465 | /* Shared mappings always use reserves */ | ||
467 | if (vma->vm_flags & VM_MAYSHARE) | 466 | if (vma->vm_flags & VM_MAYSHARE) |
468 | return 1; | 467 | return 1; |
468 | |||
469 | /* | ||
470 | * Only the process that called mmap() has reserves for | ||
471 | * private mappings. | ||
472 | */ | ||
469 | if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) | 473 | if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) |
470 | return 1; | 474 | return 1; |
475 | |||
471 | return 0; | 476 | return 0; |
472 | } | 477 | } |
473 | 478 | ||
@@ -517,9 +522,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | |||
517 | { | 522 | { |
518 | struct page *page; | 523 | struct page *page; |
519 | 524 | ||
520 | if (list_empty(&h->hugepage_freelists[nid])) | 525 | list_for_each_entry(page, &h->hugepage_freelists[nid], lru) |
526 | if (!is_migrate_isolate_page(page)) | ||
527 | break; | ||
528 | /* | ||
529 | * if 'non-isolated free hugepage' not found on the list, | ||
530 | * the allocation fails. | ||
531 | */ | ||
532 | if (&h->hugepage_freelists[nid] == &page->lru) | ||
521 | return NULL; | 533 | return NULL; |
522 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); | ||
523 | list_move(&page->lru, &h->hugepage_activelist); | 534 | list_move(&page->lru, &h->hugepage_activelist); |
524 | set_page_refcounted(page); | 535 | set_page_refcounted(page); |
525 | h->free_huge_pages--; | 536 | h->free_huge_pages--; |
@@ -527,9 +538,19 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | |||
527 | return page; | 538 | return page; |
528 | } | 539 | } |
529 | 540 | ||
541 | /* Movability of hugepages depends on migration support. */ | ||
542 | static inline gfp_t htlb_alloc_mask(struct hstate *h) | ||
543 | { | ||
544 | if (hugepages_treat_as_movable || hugepage_migration_support(h)) | ||
545 | return GFP_HIGHUSER_MOVABLE; | ||
546 | else | ||
547 | return GFP_HIGHUSER; | ||
548 | } | ||
549 | |||
530 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 550 | static struct page *dequeue_huge_page_vma(struct hstate *h, |
531 | struct vm_area_struct *vma, | 551 | struct vm_area_struct *vma, |
532 | unsigned long address, int avoid_reserve) | 552 | unsigned long address, int avoid_reserve, |
553 | long chg) | ||
533 | { | 554 | { |
534 | struct page *page = NULL; | 555 | struct page *page = NULL; |
535 | struct mempolicy *mpol; | 556 | struct mempolicy *mpol; |
@@ -539,16 +560,12 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
539 | struct zoneref *z; | 560 | struct zoneref *z; |
540 | unsigned int cpuset_mems_cookie; | 561 | unsigned int cpuset_mems_cookie; |
541 | 562 | ||
542 | retry_cpuset: | ||
543 | cpuset_mems_cookie = get_mems_allowed(); | ||
544 | zonelist = huge_zonelist(vma, address, | ||
545 | htlb_alloc_mask, &mpol, &nodemask); | ||
546 | /* | 563 | /* |
547 | * A child process with MAP_PRIVATE mappings created by their parent | 564 | * A child process with MAP_PRIVATE mappings created by their parent |
548 | * have no page reserves. This check ensures that reservations are | 565 | * have no page reserves. This check ensures that reservations are |
549 | * not "stolen". The child may still get SIGKILLed | 566 | * not "stolen". The child may still get SIGKILLed |
550 | */ | 567 | */ |
551 | if (!vma_has_reserves(vma) && | 568 | if (!vma_has_reserves(vma, chg) && |
552 | h->free_huge_pages - h->resv_huge_pages == 0) | 569 | h->free_huge_pages - h->resv_huge_pages == 0) |
553 | goto err; | 570 | goto err; |
554 | 571 | ||
@@ -556,13 +573,23 @@ retry_cpuset: | |||
556 | if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) | 573 | if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) |
557 | goto err; | 574 | goto err; |
558 | 575 | ||
576 | retry_cpuset: | ||
577 | cpuset_mems_cookie = get_mems_allowed(); | ||
578 | zonelist = huge_zonelist(vma, address, | ||
579 | htlb_alloc_mask(h), &mpol, &nodemask); | ||
580 | |||
559 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 581 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
560 | MAX_NR_ZONES - 1, nodemask) { | 582 | MAX_NR_ZONES - 1, nodemask) { |
561 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { | 583 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) { |
562 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); | 584 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); |
563 | if (page) { | 585 | if (page) { |
564 | if (!avoid_reserve) | 586 | if (avoid_reserve) |
565 | decrement_hugepage_resv_vma(h, vma); | 587 | break; |
588 | if (!vma_has_reserves(vma, chg)) | ||
589 | break; | ||
590 | |||
591 | SetPagePrivate(page); | ||
592 | h->resv_huge_pages--; | ||
566 | break; | 593 | break; |
567 | } | 594 | } |
568 | } | 595 | } |
@@ -574,7 +601,6 @@ retry_cpuset: | |||
574 | return page; | 601 | return page; |
575 | 602 | ||
576 | err: | 603 | err: |
577 | mpol_cond_put(mpol); | ||
578 | return NULL; | 604 | return NULL; |
579 | } | 605 | } |
580 | 606 | ||
@@ -620,15 +646,20 @@ static void free_huge_page(struct page *page) | |||
620 | int nid = page_to_nid(page); | 646 | int nid = page_to_nid(page); |
621 | struct hugepage_subpool *spool = | 647 | struct hugepage_subpool *spool = |
622 | (struct hugepage_subpool *)page_private(page); | 648 | (struct hugepage_subpool *)page_private(page); |
649 | bool restore_reserve; | ||
623 | 650 | ||
624 | set_page_private(page, 0); | 651 | set_page_private(page, 0); |
625 | page->mapping = NULL; | 652 | page->mapping = NULL; |
626 | BUG_ON(page_count(page)); | 653 | BUG_ON(page_count(page)); |
627 | BUG_ON(page_mapcount(page)); | 654 | BUG_ON(page_mapcount(page)); |
655 | restore_reserve = PagePrivate(page); | ||
628 | 656 | ||
629 | spin_lock(&hugetlb_lock); | 657 | spin_lock(&hugetlb_lock); |
630 | hugetlb_cgroup_uncharge_page(hstate_index(h), | 658 | hugetlb_cgroup_uncharge_page(hstate_index(h), |
631 | pages_per_huge_page(h), page); | 659 | pages_per_huge_page(h), page); |
660 | if (restore_reserve) | ||
661 | h->resv_huge_pages++; | ||
662 | |||
632 | if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { | 663 | if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { |
633 | /* remove the page from active list */ | 664 | /* remove the page from active list */ |
634 | list_del(&page->lru); | 665 | list_del(&page->lru); |
@@ -715,7 +746,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | |||
715 | return NULL; | 746 | return NULL; |
716 | 747 | ||
717 | page = alloc_pages_exact_node(nid, | 748 | page = alloc_pages_exact_node(nid, |
718 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | 749 | htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| |
719 | __GFP_REPEAT|__GFP_NOWARN, | 750 | __GFP_REPEAT|__GFP_NOWARN, |
720 | huge_page_order(h)); | 751 | huge_page_order(h)); |
721 | if (page) { | 752 | if (page) { |
@@ -772,33 +803,6 @@ static int hstate_next_node_to_alloc(struct hstate *h, | |||
772 | return nid; | 803 | return nid; |
773 | } | 804 | } |
774 | 805 | ||
775 | static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed) | ||
776 | { | ||
777 | struct page *page; | ||
778 | int start_nid; | ||
779 | int next_nid; | ||
780 | int ret = 0; | ||
781 | |||
782 | start_nid = hstate_next_node_to_alloc(h, nodes_allowed); | ||
783 | next_nid = start_nid; | ||
784 | |||
785 | do { | ||
786 | page = alloc_fresh_huge_page_node(h, next_nid); | ||
787 | if (page) { | ||
788 | ret = 1; | ||
789 | break; | ||
790 | } | ||
791 | next_nid = hstate_next_node_to_alloc(h, nodes_allowed); | ||
792 | } while (next_nid != start_nid); | ||
793 | |||
794 | if (ret) | ||
795 | count_vm_event(HTLB_BUDDY_PGALLOC); | ||
796 | else | ||
797 | count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); | ||
798 | |||
799 | return ret; | ||
800 | } | ||
801 | |||
802 | /* | 806 | /* |
803 | * helper for free_pool_huge_page() - return the previously saved | 807 | * helper for free_pool_huge_page() - return the previously saved |
804 | * node ["this node"] from which to free a huge page. Advance the | 808 | * node ["this node"] from which to free a huge page. Advance the |
@@ -817,6 +821,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) | |||
817 | return nid; | 821 | return nid; |
818 | } | 822 | } |
819 | 823 | ||
824 | #define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ | ||
825 | for (nr_nodes = nodes_weight(*mask); \ | ||
826 | nr_nodes > 0 && \ | ||
827 | ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ | ||
828 | nr_nodes--) | ||
829 | |||
830 | #define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ | ||
831 | for (nr_nodes = nodes_weight(*mask); \ | ||
832 | nr_nodes > 0 && \ | ||
833 | ((node = hstate_next_node_to_free(hs, mask)) || 1); \ | ||
834 | nr_nodes--) | ||
835 | |||
836 | static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed) | ||
837 | { | ||
838 | struct page *page; | ||
839 | int nr_nodes, node; | ||
840 | int ret = 0; | ||
841 | |||
842 | for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { | ||
843 | page = alloc_fresh_huge_page_node(h, node); | ||
844 | if (page) { | ||
845 | ret = 1; | ||
846 | break; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | if (ret) | ||
851 | count_vm_event(HTLB_BUDDY_PGALLOC); | ||
852 | else | ||
853 | count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); | ||
854 | |||
855 | return ret; | ||
856 | } | ||
857 | |||
820 | /* | 858 | /* |
821 | * Free huge page from pool from next node to free. | 859 | * Free huge page from pool from next node to free. |
822 | * Attempt to keep persistent huge pages more or less | 860 | * Attempt to keep persistent huge pages more or less |
@@ -826,40 +864,73 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) | |||
826 | static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, | 864 | static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, |
827 | bool acct_surplus) | 865 | bool acct_surplus) |
828 | { | 866 | { |
829 | int start_nid; | 867 | int nr_nodes, node; |
830 | int next_nid; | ||
831 | int ret = 0; | 868 | int ret = 0; |
832 | 869 | ||
833 | start_nid = hstate_next_node_to_free(h, nodes_allowed); | 870 | for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { |
834 | next_nid = start_nid; | ||
835 | |||
836 | do { | ||
837 | /* | 871 | /* |
838 | * If we're returning unused surplus pages, only examine | 872 | * If we're returning unused surplus pages, only examine |
839 | * nodes with surplus pages. | 873 | * nodes with surplus pages. |
840 | */ | 874 | */ |
841 | if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) && | 875 | if ((!acct_surplus || h->surplus_huge_pages_node[node]) && |
842 | !list_empty(&h->hugepage_freelists[next_nid])) { | 876 | !list_empty(&h->hugepage_freelists[node])) { |
843 | struct page *page = | 877 | struct page *page = |
844 | list_entry(h->hugepage_freelists[next_nid].next, | 878 | list_entry(h->hugepage_freelists[node].next, |
845 | struct page, lru); | 879 | struct page, lru); |
846 | list_del(&page->lru); | 880 | list_del(&page->lru); |
847 | h->free_huge_pages--; | 881 | h->free_huge_pages--; |
848 | h->free_huge_pages_node[next_nid]--; | 882 | h->free_huge_pages_node[node]--; |
849 | if (acct_surplus) { | 883 | if (acct_surplus) { |
850 | h->surplus_huge_pages--; | 884 | h->surplus_huge_pages--; |
851 | h->surplus_huge_pages_node[next_nid]--; | 885 | h->surplus_huge_pages_node[node]--; |
852 | } | 886 | } |
853 | update_and_free_page(h, page); | 887 | update_and_free_page(h, page); |
854 | ret = 1; | 888 | ret = 1; |
855 | break; | 889 | break; |
856 | } | 890 | } |
857 | next_nid = hstate_next_node_to_free(h, nodes_allowed); | 891 | } |
858 | } while (next_nid != start_nid); | ||
859 | 892 | ||
860 | return ret; | 893 | return ret; |
861 | } | 894 | } |
862 | 895 | ||
896 | /* | ||
897 | * Dissolve a given free hugepage into free buddy pages. This function does | ||
898 | * nothing for in-use (including surplus) hugepages. | ||
899 | */ | ||
900 | static void dissolve_free_huge_page(struct page *page) | ||
901 | { | ||
902 | spin_lock(&hugetlb_lock); | ||
903 | if (PageHuge(page) && !page_count(page)) { | ||
904 | struct hstate *h = page_hstate(page); | ||
905 | int nid = page_to_nid(page); | ||
906 | list_del(&page->lru); | ||
907 | h->free_huge_pages--; | ||
908 | h->free_huge_pages_node[nid]--; | ||
909 | update_and_free_page(h, page); | ||
910 | } | ||
911 | spin_unlock(&hugetlb_lock); | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Dissolve free hugepages in a given pfn range. Used by memory hotplug to | ||
916 | * make specified memory blocks removable from the system. | ||
917 | * Note that start_pfn should aligned with (minimum) hugepage size. | ||
918 | */ | ||
919 | void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) | ||
920 | { | ||
921 | unsigned int order = 8 * sizeof(void *); | ||
922 | unsigned long pfn; | ||
923 | struct hstate *h; | ||
924 | |||
925 | /* Set scan step to minimum hugepage size */ | ||
926 | for_each_hstate(h) | ||
927 | if (order > huge_page_order(h)) | ||
928 | order = huge_page_order(h); | ||
929 | VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); | ||
930 | for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) | ||
931 | dissolve_free_huge_page(pfn_to_page(pfn)); | ||
932 | } | ||
933 | |||
863 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | 934 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) |
864 | { | 935 | { |
865 | struct page *page; | 936 | struct page *page; |
@@ -902,12 +973,12 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | |||
902 | spin_unlock(&hugetlb_lock); | 973 | spin_unlock(&hugetlb_lock); |
903 | 974 | ||
904 | if (nid == NUMA_NO_NODE) | 975 | if (nid == NUMA_NO_NODE) |
905 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 976 | page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP| |
906 | __GFP_REPEAT|__GFP_NOWARN, | 977 | __GFP_REPEAT|__GFP_NOWARN, |
907 | huge_page_order(h)); | 978 | huge_page_order(h)); |
908 | else | 979 | else |
909 | page = alloc_pages_exact_node(nid, | 980 | page = alloc_pages_exact_node(nid, |
910 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | 981 | htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| |
911 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); | 982 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); |
912 | 983 | ||
913 | if (page && arch_prepare_hugepage(page)) { | 984 | if (page && arch_prepare_hugepage(page)) { |
@@ -944,10 +1015,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | |||
944 | */ | 1015 | */ |
945 | struct page *alloc_huge_page_node(struct hstate *h, int nid) | 1016 | struct page *alloc_huge_page_node(struct hstate *h, int nid) |
946 | { | 1017 | { |
947 | struct page *page; | 1018 | struct page *page = NULL; |
948 | 1019 | ||
949 | spin_lock(&hugetlb_lock); | 1020 | spin_lock(&hugetlb_lock); |
950 | page = dequeue_huge_page_node(h, nid); | 1021 | if (h->free_huge_pages - h->resv_huge_pages > 0) |
1022 | page = dequeue_huge_page_node(h, nid); | ||
951 | spin_unlock(&hugetlb_lock); | 1023 | spin_unlock(&hugetlb_lock); |
952 | 1024 | ||
953 | if (!page) | 1025 | if (!page) |
@@ -1035,11 +1107,8 @@ free: | |||
1035 | spin_unlock(&hugetlb_lock); | 1107 | spin_unlock(&hugetlb_lock); |
1036 | 1108 | ||
1037 | /* Free unnecessary surplus pages to the buddy allocator */ | 1109 | /* Free unnecessary surplus pages to the buddy allocator */ |
1038 | if (!list_empty(&surplus_list)) { | 1110 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) |
1039 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 1111 | put_page(page); |
1040 | put_page(page); | ||
1041 | } | ||
1042 | } | ||
1043 | spin_lock(&hugetlb_lock); | 1112 | spin_lock(&hugetlb_lock); |
1044 | 1113 | ||
1045 | return ret; | 1114 | return ret; |
@@ -1106,9 +1175,9 @@ static long vma_needs_reservation(struct hstate *h, | |||
1106 | } else { | 1175 | } else { |
1107 | long err; | 1176 | long err; |
1108 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); | 1177 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
1109 | struct resv_map *reservations = vma_resv_map(vma); | 1178 | struct resv_map *resv = vma_resv_map(vma); |
1110 | 1179 | ||
1111 | err = region_chg(&reservations->regions, idx, idx + 1); | 1180 | err = region_chg(&resv->regions, idx, idx + 1); |
1112 | if (err < 0) | 1181 | if (err < 0) |
1113 | return err; | 1182 | return err; |
1114 | return 0; | 1183 | return 0; |
@@ -1126,10 +1195,10 @@ static void vma_commit_reservation(struct hstate *h, | |||
1126 | 1195 | ||
1127 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | 1196 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { |
1128 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); | 1197 | pgoff_t idx = vma_hugecache_offset(h, vma, addr); |
1129 | struct resv_map *reservations = vma_resv_map(vma); | 1198 | struct resv_map *resv = vma_resv_map(vma); |
1130 | 1199 | ||
1131 | /* Mark this page used in the map. */ | 1200 | /* Mark this page used in the map. */ |
1132 | region_add(&reservations->regions, idx, idx + 1); | 1201 | region_add(&resv->regions, idx, idx + 1); |
1133 | } | 1202 | } |
1134 | } | 1203 | } |
1135 | 1204 | ||
@@ -1155,38 +1224,35 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
1155 | chg = vma_needs_reservation(h, vma, addr); | 1224 | chg = vma_needs_reservation(h, vma, addr); |
1156 | if (chg < 0) | 1225 | if (chg < 0) |
1157 | return ERR_PTR(-ENOMEM); | 1226 | return ERR_PTR(-ENOMEM); |
1158 | if (chg) | 1227 | if (chg || avoid_reserve) |
1159 | if (hugepage_subpool_get_pages(spool, chg)) | 1228 | if (hugepage_subpool_get_pages(spool, 1)) |
1160 | return ERR_PTR(-ENOSPC); | 1229 | return ERR_PTR(-ENOSPC); |
1161 | 1230 | ||
1162 | ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); | 1231 | ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); |
1163 | if (ret) { | 1232 | if (ret) { |
1164 | hugepage_subpool_put_pages(spool, chg); | 1233 | if (chg || avoid_reserve) |
1234 | hugepage_subpool_put_pages(spool, 1); | ||
1165 | return ERR_PTR(-ENOSPC); | 1235 | return ERR_PTR(-ENOSPC); |
1166 | } | 1236 | } |
1167 | spin_lock(&hugetlb_lock); | 1237 | spin_lock(&hugetlb_lock); |
1168 | page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); | 1238 | page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg); |
1169 | if (page) { | 1239 | if (!page) { |
1170 | /* update page cgroup details */ | ||
1171 | hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), | ||
1172 | h_cg, page); | ||
1173 | spin_unlock(&hugetlb_lock); | ||
1174 | } else { | ||
1175 | spin_unlock(&hugetlb_lock); | 1240 | spin_unlock(&hugetlb_lock); |
1176 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); | 1241 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); |
1177 | if (!page) { | 1242 | if (!page) { |
1178 | hugetlb_cgroup_uncharge_cgroup(idx, | 1243 | hugetlb_cgroup_uncharge_cgroup(idx, |
1179 | pages_per_huge_page(h), | 1244 | pages_per_huge_page(h), |
1180 | h_cg); | 1245 | h_cg); |
1181 | hugepage_subpool_put_pages(spool, chg); | 1246 | if (chg || avoid_reserve) |
1247 | hugepage_subpool_put_pages(spool, 1); | ||
1182 | return ERR_PTR(-ENOSPC); | 1248 | return ERR_PTR(-ENOSPC); |
1183 | } | 1249 | } |
1184 | spin_lock(&hugetlb_lock); | 1250 | spin_lock(&hugetlb_lock); |
1185 | hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), | ||
1186 | h_cg, page); | ||
1187 | list_move(&page->lru, &h->hugepage_activelist); | 1251 | list_move(&page->lru, &h->hugepage_activelist); |
1188 | spin_unlock(&hugetlb_lock); | 1252 | /* Fall through */ |
1189 | } | 1253 | } |
1254 | hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page); | ||
1255 | spin_unlock(&hugetlb_lock); | ||
1190 | 1256 | ||
1191 | set_page_private(page, (unsigned long)spool); | 1257 | set_page_private(page, (unsigned long)spool); |
1192 | 1258 | ||
@@ -1194,17 +1260,29 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
1194 | return page; | 1260 | return page; |
1195 | } | 1261 | } |
1196 | 1262 | ||
1263 | /* | ||
1264 | * alloc_huge_page()'s wrapper which simply returns the page if allocation | ||
1265 | * succeeds, otherwise NULL. This function is called from new_vma_page(), | ||
1266 | * where no ERR_VALUE is expected to be returned. | ||
1267 | */ | ||
1268 | struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, | ||
1269 | unsigned long addr, int avoid_reserve) | ||
1270 | { | ||
1271 | struct page *page = alloc_huge_page(vma, addr, avoid_reserve); | ||
1272 | if (IS_ERR(page)) | ||
1273 | page = NULL; | ||
1274 | return page; | ||
1275 | } | ||
1276 | |||
1197 | int __weak alloc_bootmem_huge_page(struct hstate *h) | 1277 | int __weak alloc_bootmem_huge_page(struct hstate *h) |
1198 | { | 1278 | { |
1199 | struct huge_bootmem_page *m; | 1279 | struct huge_bootmem_page *m; |
1200 | int nr_nodes = nodes_weight(node_states[N_MEMORY]); | 1280 | int nr_nodes, node; |
1201 | 1281 | ||
1202 | while (nr_nodes) { | 1282 | for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { |
1203 | void *addr; | 1283 | void *addr; |
1204 | 1284 | ||
1205 | addr = __alloc_bootmem_node_nopanic( | 1285 | addr = __alloc_bootmem_node_nopanic(NODE_DATA(node), |
1206 | NODE_DATA(hstate_next_node_to_alloc(h, | ||
1207 | &node_states[N_MEMORY])), | ||
1208 | huge_page_size(h), huge_page_size(h), 0); | 1286 | huge_page_size(h), huge_page_size(h), 0); |
1209 | 1287 | ||
1210 | if (addr) { | 1288 | if (addr) { |
@@ -1216,7 +1294,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) | |||
1216 | m = addr; | 1294 | m = addr; |
1217 | goto found; | 1295 | goto found; |
1218 | } | 1296 | } |
1219 | nr_nodes--; | ||
1220 | } | 1297 | } |
1221 | return 0; | 1298 | return 0; |
1222 | 1299 | ||
@@ -1355,48 +1432,28 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count, | |||
1355 | static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, | 1432 | static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, |
1356 | int delta) | 1433 | int delta) |
1357 | { | 1434 | { |
1358 | int start_nid, next_nid; | 1435 | int nr_nodes, node; |
1359 | int ret = 0; | ||
1360 | 1436 | ||
1361 | VM_BUG_ON(delta != -1 && delta != 1); | 1437 | VM_BUG_ON(delta != -1 && delta != 1); |
1362 | 1438 | ||
1363 | if (delta < 0) | 1439 | if (delta < 0) { |
1364 | start_nid = hstate_next_node_to_alloc(h, nodes_allowed); | 1440 | for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { |
1365 | else | 1441 | if (h->surplus_huge_pages_node[node]) |
1366 | start_nid = hstate_next_node_to_free(h, nodes_allowed); | 1442 | goto found; |
1367 | next_nid = start_nid; | ||
1368 | |||
1369 | do { | ||
1370 | int nid = next_nid; | ||
1371 | if (delta < 0) { | ||
1372 | /* | ||
1373 | * To shrink on this node, there must be a surplus page | ||
1374 | */ | ||
1375 | if (!h->surplus_huge_pages_node[nid]) { | ||
1376 | next_nid = hstate_next_node_to_alloc(h, | ||
1377 | nodes_allowed); | ||
1378 | continue; | ||
1379 | } | ||
1380 | } | 1443 | } |
1381 | if (delta > 0) { | 1444 | } else { |
1382 | /* | 1445 | for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { |
1383 | * Surplus cannot exceed the total number of pages | 1446 | if (h->surplus_huge_pages_node[node] < |
1384 | */ | 1447 | h->nr_huge_pages_node[node]) |
1385 | if (h->surplus_huge_pages_node[nid] >= | 1448 | goto found; |
1386 | h->nr_huge_pages_node[nid]) { | ||
1387 | next_nid = hstate_next_node_to_free(h, | ||
1388 | nodes_allowed); | ||
1389 | continue; | ||
1390 | } | ||
1391 | } | 1449 | } |
1450 | } | ||
1451 | return 0; | ||
1392 | 1452 | ||
1393 | h->surplus_huge_pages += delta; | 1453 | found: |
1394 | h->surplus_huge_pages_node[nid] += delta; | 1454 | h->surplus_huge_pages += delta; |
1395 | ret = 1; | 1455 | h->surplus_huge_pages_node[node] += delta; |
1396 | break; | 1456 | return 1; |
1397 | } while (next_nid != start_nid); | ||
1398 | |||
1399 | return ret; | ||
1400 | } | 1457 | } |
1401 | 1458 | ||
1402 | #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) | 1459 | #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) |
@@ -1526,7 +1583,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy, | |||
1526 | struct hstate *h; | 1583 | struct hstate *h; |
1527 | NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); | 1584 | NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); |
1528 | 1585 | ||
1529 | err = strict_strtoul(buf, 10, &count); | 1586 | err = kstrtoul(buf, 10, &count); |
1530 | if (err) | 1587 | if (err) |
1531 | goto out; | 1588 | goto out; |
1532 | 1589 | ||
@@ -1617,7 +1674,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, | |||
1617 | if (h->order >= MAX_ORDER) | 1674 | if (h->order >= MAX_ORDER) |
1618 | return -EINVAL; | 1675 | return -EINVAL; |
1619 | 1676 | ||
1620 | err = strict_strtoul(buf, 10, &input); | 1677 | err = kstrtoul(buf, 10, &input); |
1621 | if (err) | 1678 | if (err) |
1622 | return err; | 1679 | return err; |
1623 | 1680 | ||
@@ -2068,18 +2125,6 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, | |||
2068 | } | 2125 | } |
2069 | #endif /* CONFIG_NUMA */ | 2126 | #endif /* CONFIG_NUMA */ |
2070 | 2127 | ||
2071 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, | ||
2072 | void __user *buffer, | ||
2073 | size_t *length, loff_t *ppos) | ||
2074 | { | ||
2075 | proc_dointvec(table, write, buffer, length, ppos); | ||
2076 | if (hugepages_treat_as_movable) | ||
2077 | htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; | ||
2078 | else | ||
2079 | htlb_alloc_mask = GFP_HIGHUSER; | ||
2080 | return 0; | ||
2081 | } | ||
2082 | |||
2083 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, | 2128 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, |
2084 | void __user *buffer, | 2129 | void __user *buffer, |
2085 | size_t *length, loff_t *ppos) | 2130 | size_t *length, loff_t *ppos) |
@@ -2207,7 +2252,7 @@ out: | |||
2207 | 2252 | ||
2208 | static void hugetlb_vm_op_open(struct vm_area_struct *vma) | 2253 | static void hugetlb_vm_op_open(struct vm_area_struct *vma) |
2209 | { | 2254 | { |
2210 | struct resv_map *reservations = vma_resv_map(vma); | 2255 | struct resv_map *resv = vma_resv_map(vma); |
2211 | 2256 | ||
2212 | /* | 2257 | /* |
2213 | * This new VMA should share its siblings reservation map if present. | 2258 | * This new VMA should share its siblings reservation map if present. |
@@ -2217,34 +2262,34 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) | |||
2217 | * after this open call completes. It is therefore safe to take a | 2262 | * after this open call completes. It is therefore safe to take a |
2218 | * new reference here without additional locking. | 2263 | * new reference here without additional locking. |
2219 | */ | 2264 | */ |
2220 | if (reservations) | 2265 | if (resv) |
2221 | kref_get(&reservations->refs); | 2266 | kref_get(&resv->refs); |
2222 | } | 2267 | } |
2223 | 2268 | ||
2224 | static void resv_map_put(struct vm_area_struct *vma) | 2269 | static void resv_map_put(struct vm_area_struct *vma) |
2225 | { | 2270 | { |
2226 | struct resv_map *reservations = vma_resv_map(vma); | 2271 | struct resv_map *resv = vma_resv_map(vma); |
2227 | 2272 | ||
2228 | if (!reservations) | 2273 | if (!resv) |
2229 | return; | 2274 | return; |
2230 | kref_put(&reservations->refs, resv_map_release); | 2275 | kref_put(&resv->refs, resv_map_release); |
2231 | } | 2276 | } |
2232 | 2277 | ||
2233 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) | 2278 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) |
2234 | { | 2279 | { |
2235 | struct hstate *h = hstate_vma(vma); | 2280 | struct hstate *h = hstate_vma(vma); |
2236 | struct resv_map *reservations = vma_resv_map(vma); | 2281 | struct resv_map *resv = vma_resv_map(vma); |
2237 | struct hugepage_subpool *spool = subpool_vma(vma); | 2282 | struct hugepage_subpool *spool = subpool_vma(vma); |
2238 | unsigned long reserve; | 2283 | unsigned long reserve; |
2239 | unsigned long start; | 2284 | unsigned long start; |
2240 | unsigned long end; | 2285 | unsigned long end; |
2241 | 2286 | ||
2242 | if (reservations) { | 2287 | if (resv) { |
2243 | start = vma_hugecache_offset(h, vma, vma->vm_start); | 2288 | start = vma_hugecache_offset(h, vma, vma->vm_start); |
2244 | end = vma_hugecache_offset(h, vma, vma->vm_end); | 2289 | end = vma_hugecache_offset(h, vma, vma->vm_end); |
2245 | 2290 | ||
2246 | reserve = (end - start) - | 2291 | reserve = (end - start) - |
2247 | region_count(&reservations->regions, start, end); | 2292 | region_count(&resv->regions, start, end); |
2248 | 2293 | ||
2249 | resv_map_put(vma); | 2294 | resv_map_put(vma); |
2250 | 2295 | ||
@@ -2557,7 +2602,6 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2557 | { | 2602 | { |
2558 | struct hstate *h = hstate_vma(vma); | 2603 | struct hstate *h = hstate_vma(vma); |
2559 | struct page *old_page, *new_page; | 2604 | struct page *old_page, *new_page; |
2560 | int avoidcopy; | ||
2561 | int outside_reserve = 0; | 2605 | int outside_reserve = 0; |
2562 | unsigned long mmun_start; /* For mmu_notifiers */ | 2606 | unsigned long mmun_start; /* For mmu_notifiers */ |
2563 | unsigned long mmun_end; /* For mmu_notifiers */ | 2607 | unsigned long mmun_end; /* For mmu_notifiers */ |
@@ -2567,10 +2611,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2567 | retry_avoidcopy: | 2611 | retry_avoidcopy: |
2568 | /* If no-one else is actually using this page, avoid the copy | 2612 | /* If no-one else is actually using this page, avoid the copy |
2569 | * and just make the page writable */ | 2613 | * and just make the page writable */ |
2570 | avoidcopy = (page_mapcount(old_page) == 1); | 2614 | if (page_mapcount(old_page) == 1 && PageAnon(old_page)) { |
2571 | if (avoidcopy) { | 2615 | page_move_anon_rmap(old_page, vma, address); |
2572 | if (PageAnon(old_page)) | ||
2573 | page_move_anon_rmap(old_page, vma, address); | ||
2574 | set_huge_ptep_writable(vma, address, ptep); | 2616 | set_huge_ptep_writable(vma, address, ptep); |
2575 | return 0; | 2617 | return 0; |
2576 | } | 2618 | } |
@@ -2584,8 +2626,7 @@ retry_avoidcopy: | |||
2584 | * at the time of fork() could consume its reserves on COW instead | 2626 | * at the time of fork() could consume its reserves on COW instead |
2585 | * of the full address range. | 2627 | * of the full address range. |
2586 | */ | 2628 | */ |
2587 | if (!(vma->vm_flags & VM_MAYSHARE) && | 2629 | if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && |
2588 | is_vma_resv_set(vma, HPAGE_RESV_OWNER) && | ||
2589 | old_page != pagecache_page) | 2630 | old_page != pagecache_page) |
2590 | outside_reserve = 1; | 2631 | outside_reserve = 1; |
2591 | 2632 | ||
@@ -2657,6 +2698,8 @@ retry_avoidcopy: | |||
2657 | spin_lock(&mm->page_table_lock); | 2698 | spin_lock(&mm->page_table_lock); |
2658 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); | 2699 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2659 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { | 2700 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { |
2701 | ClearPagePrivate(new_page); | ||
2702 | |||
2660 | /* Break COW */ | 2703 | /* Break COW */ |
2661 | huge_ptep_clear_flush(vma, address, ptep); | 2704 | huge_ptep_clear_flush(vma, address, ptep); |
2662 | set_huge_pte_at(mm, address, ptep, | 2705 | set_huge_pte_at(mm, address, ptep, |
@@ -2668,10 +2711,11 @@ retry_avoidcopy: | |||
2668 | } | 2711 | } |
2669 | spin_unlock(&mm->page_table_lock); | 2712 | spin_unlock(&mm->page_table_lock); |
2670 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2713 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2671 | /* Caller expects lock to be held */ | ||
2672 | spin_lock(&mm->page_table_lock); | ||
2673 | page_cache_release(new_page); | 2714 | page_cache_release(new_page); |
2674 | page_cache_release(old_page); | 2715 | page_cache_release(old_page); |
2716 | |||
2717 | /* Caller expects lock to be held */ | ||
2718 | spin_lock(&mm->page_table_lock); | ||
2675 | return 0; | 2719 | return 0; |
2676 | } | 2720 | } |
2677 | 2721 | ||
@@ -2767,6 +2811,7 @@ retry: | |||
2767 | goto retry; | 2811 | goto retry; |
2768 | goto out; | 2812 | goto out; |
2769 | } | 2813 | } |
2814 | ClearPagePrivate(page); | ||
2770 | 2815 | ||
2771 | spin_lock(&inode->i_lock); | 2816 | spin_lock(&inode->i_lock); |
2772 | inode->i_blocks += blocks_per_huge_page(h); | 2817 | inode->i_blocks += blocks_per_huge_page(h); |
@@ -2813,8 +2858,10 @@ retry: | |||
2813 | if (!huge_pte_none(huge_ptep_get(ptep))) | 2858 | if (!huge_pte_none(huge_ptep_get(ptep))) |
2814 | goto backout; | 2859 | goto backout; |
2815 | 2860 | ||
2816 | if (anon_rmap) | 2861 | if (anon_rmap) { |
2862 | ClearPagePrivate(page); | ||
2817 | hugepage_add_new_anon_rmap(page, vma, address); | 2863 | hugepage_add_new_anon_rmap(page, vma, address); |
2864 | } | ||
2818 | else | 2865 | else |
2819 | page_dup_rmap(page); | 2866 | page_dup_rmap(page); |
2820 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) | 2867 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) |
@@ -3431,3 +3478,45 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage) | |||
3431 | return ret; | 3478 | return ret; |
3432 | } | 3479 | } |
3433 | #endif | 3480 | #endif |
3481 | |||
3482 | bool isolate_huge_page(struct page *page, struct list_head *list) | ||
3483 | { | ||
3484 | VM_BUG_ON(!PageHead(page)); | ||
3485 | if (!get_page_unless_zero(page)) | ||
3486 | return false; | ||
3487 | spin_lock(&hugetlb_lock); | ||
3488 | list_move_tail(&page->lru, list); | ||
3489 | spin_unlock(&hugetlb_lock); | ||
3490 | return true; | ||
3491 | } | ||
3492 | |||
3493 | void putback_active_hugepage(struct page *page) | ||
3494 | { | ||
3495 | VM_BUG_ON(!PageHead(page)); | ||
3496 | spin_lock(&hugetlb_lock); | ||
3497 | list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); | ||
3498 | spin_unlock(&hugetlb_lock); | ||
3499 | put_page(page); | ||
3500 | } | ||
3501 | |||
3502 | bool is_hugepage_active(struct page *page) | ||
3503 | { | ||
3504 | VM_BUG_ON(!PageHuge(page)); | ||
3505 | /* | ||
3506 | * This function can be called for a tail page because the caller, | ||
3507 | * scan_movable_pages, scans through a given pfn-range which typically | ||
3508 | * covers one memory block. In systems using gigantic hugepage (1GB | ||
3509 | * for x86_64,) a hugepage is larger than a memory block, and we don't | ||
3510 | * support migrating such large hugepages for now, so return false | ||
3511 | * when called for tail pages. | ||
3512 | */ | ||
3513 | if (PageTail(page)) | ||
3514 | return false; | ||
3515 | /* | ||
3516 | * Refcount of a hwpoisoned hugepages is 1, but they are not active, | ||
3517 | * so we should return false for them. | ||
3518 | */ | ||
3519 | if (unlikely(PageHWPoison(page))) | ||
3520 | return false; | ||
3521 | return page_count(page) > 0; | ||
3522 | } | ||
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 3a61efc518d5..afc2daa91c60 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -88,12 +88,12 @@ static int pfn_inject_init(void) | |||
88 | * hardware status change, hence do not require hardware support. | 88 | * hardware status change, hence do not require hardware support. |
89 | * They are mainly for testing hwpoison in software level. | 89 | * They are mainly for testing hwpoison in software level. |
90 | */ | 90 | */ |
91 | dentry = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir, | 91 | dentry = debugfs_create_file("corrupt-pfn", 0200, hwpoison_dir, |
92 | NULL, &hwpoison_fops); | 92 | NULL, &hwpoison_fops); |
93 | if (!dentry) | 93 | if (!dentry) |
94 | goto fail; | 94 | goto fail; |
95 | 95 | ||
96 | dentry = debugfs_create_file("unpoison-pfn", 0600, hwpoison_dir, | 96 | dentry = debugfs_create_file("unpoison-pfn", 0200, hwpoison_dir, |
97 | NULL, &unpoison_fops); | 97 | NULL, &unpoison_fops); |
98 | if (!dentry) | 98 | if (!dentry) |
99 | goto fail; | 99 | goto fail; |
diff --git a/mm/internal.h b/mm/internal.h index 4390ac6c106e..684f7aa9692a 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -85,6 +85,8 @@ extern unsigned long highest_memmap_pfn; | |||
85 | */ | 85 | */ |
86 | extern int isolate_lru_page(struct page *page); | 86 | extern int isolate_lru_page(struct page *page); |
87 | extern void putback_lru_page(struct page *page); | 87 | extern void putback_lru_page(struct page *page); |
88 | extern unsigned long zone_reclaimable_pages(struct zone *zone); | ||
89 | extern bool zone_reclaimable(struct zone *zone); | ||
88 | 90 | ||
89 | /* | 91 | /* |
90 | * in mm/rmap.c: | 92 | * in mm/rmap.c: |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c8d7f3110fd0..e126b0ef9ad2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -1639,7 +1639,7 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | |||
1639 | else if (strncmp(buf, "scan=", 5) == 0) { | 1639 | else if (strncmp(buf, "scan=", 5) == 0) { |
1640 | unsigned long secs; | 1640 | unsigned long secs; |
1641 | 1641 | ||
1642 | ret = strict_strtoul(buf + 5, 0, &secs); | 1642 | ret = kstrtoul(buf + 5, 0, &secs); |
1643 | if (ret < 0) | 1643 | if (ret < 0) |
1644 | goto out; | 1644 | goto out; |
1645 | stop_scan_thread(); | 1645 | stop_scan_thread(); |
@@ -2194,7 +2194,7 @@ static ssize_t sleep_millisecs_store(struct kobject *kobj, | |||
2194 | unsigned long msecs; | 2194 | unsigned long msecs; |
2195 | int err; | 2195 | int err; |
2196 | 2196 | ||
2197 | err = strict_strtoul(buf, 10, &msecs); | 2197 | err = kstrtoul(buf, 10, &msecs); |
2198 | if (err || msecs > UINT_MAX) | 2198 | if (err || msecs > UINT_MAX) |
2199 | return -EINVAL; | 2199 | return -EINVAL; |
2200 | 2200 | ||
@@ -2217,7 +2217,7 @@ static ssize_t pages_to_scan_store(struct kobject *kobj, | |||
2217 | int err; | 2217 | int err; |
2218 | unsigned long nr_pages; | 2218 | unsigned long nr_pages; |
2219 | 2219 | ||
2220 | err = strict_strtoul(buf, 10, &nr_pages); | 2220 | err = kstrtoul(buf, 10, &nr_pages); |
2221 | if (err || nr_pages > UINT_MAX) | 2221 | if (err || nr_pages > UINT_MAX) |
2222 | return -EINVAL; | 2222 | return -EINVAL; |
2223 | 2223 | ||
@@ -2239,7 +2239,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
2239 | int err; | 2239 | int err; |
2240 | unsigned long flags; | 2240 | unsigned long flags; |
2241 | 2241 | ||
2242 | err = strict_strtoul(buf, 10, &flags); | 2242 | err = kstrtoul(buf, 10, &flags); |
2243 | if (err || flags > UINT_MAX) | 2243 | if (err || flags > UINT_MAX) |
2244 | return -EINVAL; | 2244 | return -EINVAL; |
2245 | if (flags > KSM_RUN_UNMERGE) | 2245 | if (flags > KSM_RUN_UNMERGE) |
diff --git a/mm/madvise.c b/mm/madvise.c index 7055883e6e25..6975bc812542 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -42,11 +42,11 @@ static int madvise_need_mmap_write(int behavior) | |||
42 | * We can potentially split a vm area into separate | 42 | * We can potentially split a vm area into separate |
43 | * areas, each area with its own behavior. | 43 | * areas, each area with its own behavior. |
44 | */ | 44 | */ |
45 | static long madvise_behavior(struct vm_area_struct * vma, | 45 | static long madvise_behavior(struct vm_area_struct *vma, |
46 | struct vm_area_struct **prev, | 46 | struct vm_area_struct **prev, |
47 | unsigned long start, unsigned long end, int behavior) | 47 | unsigned long start, unsigned long end, int behavior) |
48 | { | 48 | { |
49 | struct mm_struct * mm = vma->vm_mm; | 49 | struct mm_struct *mm = vma->vm_mm; |
50 | int error = 0; | 50 | int error = 0; |
51 | pgoff_t pgoff; | 51 | pgoff_t pgoff; |
52 | unsigned long new_flags = vma->vm_flags; | 52 | unsigned long new_flags = vma->vm_flags; |
@@ -215,8 +215,8 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, | |||
215 | /* | 215 | /* |
216 | * Schedule all required I/O operations. Do not wait for completion. | 216 | * Schedule all required I/O operations. Do not wait for completion. |
217 | */ | 217 | */ |
218 | static long madvise_willneed(struct vm_area_struct * vma, | 218 | static long madvise_willneed(struct vm_area_struct *vma, |
219 | struct vm_area_struct ** prev, | 219 | struct vm_area_struct **prev, |
220 | unsigned long start, unsigned long end) | 220 | unsigned long start, unsigned long end) |
221 | { | 221 | { |
222 | struct file *file = vma->vm_file; | 222 | struct file *file = vma->vm_file; |
@@ -270,8 +270,8 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
270 | * An interface that causes the system to free clean pages and flush | 270 | * An interface that causes the system to free clean pages and flush |
271 | * dirty pages is already available as msync(MS_INVALIDATE). | 271 | * dirty pages is already available as msync(MS_INVALIDATE). |
272 | */ | 272 | */ |
273 | static long madvise_dontneed(struct vm_area_struct * vma, | 273 | static long madvise_dontneed(struct vm_area_struct *vma, |
274 | struct vm_area_struct ** prev, | 274 | struct vm_area_struct **prev, |
275 | unsigned long start, unsigned long end) | 275 | unsigned long start, unsigned long end) |
276 | { | 276 | { |
277 | *prev = vma; | 277 | *prev = vma; |
@@ -343,29 +343,34 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
343 | */ | 343 | */ |
344 | static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) | 344 | static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) |
345 | { | 345 | { |
346 | int ret = 0; | ||
347 | |||
348 | if (!capable(CAP_SYS_ADMIN)) | 346 | if (!capable(CAP_SYS_ADMIN)) |
349 | return -EPERM; | 347 | return -EPERM; |
350 | for (; start < end; start += PAGE_SIZE) { | 348 | for (; start < end; start += PAGE_SIZE) { |
351 | struct page *p; | 349 | struct page *p; |
352 | int ret = get_user_pages_fast(start, 1, 0, &p); | 350 | int ret; |
351 | |||
352 | ret = get_user_pages_fast(start, 1, 0, &p); | ||
353 | if (ret != 1) | 353 | if (ret != 1) |
354 | return ret; | 354 | return ret; |
355 | |||
356 | if (PageHWPoison(p)) { | ||
357 | put_page(p); | ||
358 | continue; | ||
359 | } | ||
355 | if (bhv == MADV_SOFT_OFFLINE) { | 360 | if (bhv == MADV_SOFT_OFFLINE) { |
356 | printk(KERN_INFO "Soft offlining page %lx at %lx\n", | 361 | pr_info("Soft offlining page %#lx at %#lx\n", |
357 | page_to_pfn(p), start); | 362 | page_to_pfn(p), start); |
358 | ret = soft_offline_page(p, MF_COUNT_INCREASED); | 363 | ret = soft_offline_page(p, MF_COUNT_INCREASED); |
359 | if (ret) | 364 | if (ret) |
360 | break; | 365 | return ret; |
361 | continue; | 366 | continue; |
362 | } | 367 | } |
363 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", | 368 | pr_info("Injecting memory failure for page %#lx at %#lx\n", |
364 | page_to_pfn(p), start); | 369 | page_to_pfn(p), start); |
365 | /* Ignore return value for now */ | 370 | /* Ignore return value for now */ |
366 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); | 371 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); |
367 | } | 372 | } |
368 | return ret; | 373 | return 0; |
369 | } | 374 | } |
370 | #endif | 375 | #endif |
371 | 376 | ||
@@ -459,7 +464,7 @@ madvise_behavior_valid(int behavior) | |||
459 | SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | 464 | SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) |
460 | { | 465 | { |
461 | unsigned long end, tmp; | 466 | unsigned long end, tmp; |
462 | struct vm_area_struct * vma, *prev; | 467 | struct vm_area_struct *vma, *prev; |
463 | int unmapped_error = 0; | 468 | int unmapped_error = 0; |
464 | int error = -EINVAL; | 469 | int error = -EINVAL; |
465 | int write; | 470 | int write; |
diff --git a/mm/memblock.c b/mm/memblock.c index a847bfe6f3ba..0ac412a0a7ee 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -914,6 +914,24 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) | |||
914 | return memblock_search(&memblock.memory, addr) != -1; | 914 | return memblock_search(&memblock.memory, addr) != -1; |
915 | } | 915 | } |
916 | 916 | ||
917 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
918 | int __init_memblock memblock_search_pfn_nid(unsigned long pfn, | ||
919 | unsigned long *start_pfn, unsigned long *end_pfn) | ||
920 | { | ||
921 | struct memblock_type *type = &memblock.memory; | ||
922 | int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT); | ||
923 | |||
924 | if (mid == -1) | ||
925 | return -1; | ||
926 | |||
927 | *start_pfn = type->regions[mid].base >> PAGE_SHIFT; | ||
928 | *end_pfn = (type->regions[mid].base + type->regions[mid].size) | ||
929 | >> PAGE_SHIFT; | ||
930 | |||
931 | return type->regions[mid].nid; | ||
932 | } | ||
933 | #endif | ||
934 | |||
917 | /** | 935 | /** |
918 | * memblock_is_region_memory - check if a region is a subset of memory | 936 | * memblock_is_region_memory - check if a region is a subset of memory |
919 | * @base: base of region to check | 937 | * @base: base of region to check |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3b83957b6439..c6bd28edd533 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -3121,7 +3121,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3121 | ssize_t size = memcg_caches_array_size(num_groups); | 3121 | ssize_t size = memcg_caches_array_size(num_groups); |
3122 | 3122 | ||
3123 | size *= sizeof(void *); | 3123 | size *= sizeof(void *); |
3124 | size += sizeof(struct memcg_cache_params); | 3124 | size += offsetof(struct memcg_cache_params, memcg_caches); |
3125 | 3125 | ||
3126 | s->memcg_params = kzalloc(size, GFP_KERNEL); | 3126 | s->memcg_params = kzalloc(size, GFP_KERNEL); |
3127 | if (!s->memcg_params) { | 3127 | if (!s->memcg_params) { |
@@ -3164,13 +3164,16 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3164 | int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, | 3164 | int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, |
3165 | struct kmem_cache *root_cache) | 3165 | struct kmem_cache *root_cache) |
3166 | { | 3166 | { |
3167 | size_t size = sizeof(struct memcg_cache_params); | 3167 | size_t size; |
3168 | 3168 | ||
3169 | if (!memcg_kmem_enabled()) | 3169 | if (!memcg_kmem_enabled()) |
3170 | return 0; | 3170 | return 0; |
3171 | 3171 | ||
3172 | if (!memcg) | 3172 | if (!memcg) { |
3173 | size = offsetof(struct memcg_cache_params, memcg_caches); | ||
3173 | size += memcg_limited_groups_array_size * sizeof(void *); | 3174 | size += memcg_limited_groups_array_size * sizeof(void *); |
3175 | } else | ||
3176 | size = sizeof(struct memcg_cache_params); | ||
3174 | 3177 | ||
3175 | s->memcg_params = kzalloc(size, GFP_KERNEL); | 3178 | s->memcg_params = kzalloc(size, GFP_KERNEL); |
3176 | if (!s->memcg_params) | 3179 | if (!s->memcg_params) |
@@ -5588,7 +5591,13 @@ static int compare_thresholds(const void *a, const void *b) | |||
5588 | const struct mem_cgroup_threshold *_a = a; | 5591 | const struct mem_cgroup_threshold *_a = a; |
5589 | const struct mem_cgroup_threshold *_b = b; | 5592 | const struct mem_cgroup_threshold *_b = b; |
5590 | 5593 | ||
5591 | return _a->threshold - _b->threshold; | 5594 | if (_a->threshold > _b->threshold) |
5595 | return 1; | ||
5596 | |||
5597 | if (_a->threshold < _b->threshold) | ||
5598 | return -1; | ||
5599 | |||
5600 | return 0; | ||
5592 | } | 5601 | } |
5593 | 5602 | ||
5594 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) | 5603 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d84c5e5331bb..d472e14c6808 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -206,7 +206,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, | |||
206 | #ifdef __ARCH_SI_TRAPNO | 206 | #ifdef __ARCH_SI_TRAPNO |
207 | si.si_trapno = trapno; | 207 | si.si_trapno = trapno; |
208 | #endif | 208 | #endif |
209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; | 209 | si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; |
210 | 210 | ||
211 | if ((flags & MF_ACTION_REQUIRED) && t == current) { | 211 | if ((flags & MF_ACTION_REQUIRED) && t == current) { |
212 | si.si_code = BUS_MCEERR_AR; | 212 | si.si_code = BUS_MCEERR_AR; |
@@ -983,7 +983,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
983 | static void set_page_hwpoison_huge_page(struct page *hpage) | 983 | static void set_page_hwpoison_huge_page(struct page *hpage) |
984 | { | 984 | { |
985 | int i; | 985 | int i; |
986 | int nr_pages = 1 << compound_trans_order(hpage); | 986 | int nr_pages = 1 << compound_order(hpage); |
987 | for (i = 0; i < nr_pages; i++) | 987 | for (i = 0; i < nr_pages; i++) |
988 | SetPageHWPoison(hpage + i); | 988 | SetPageHWPoison(hpage + i); |
989 | } | 989 | } |
@@ -991,7 +991,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage) | |||
991 | static void clear_page_hwpoison_huge_page(struct page *hpage) | 991 | static void clear_page_hwpoison_huge_page(struct page *hpage) |
992 | { | 992 | { |
993 | int i; | 993 | int i; |
994 | int nr_pages = 1 << compound_trans_order(hpage); | 994 | int nr_pages = 1 << compound_order(hpage); |
995 | for (i = 0; i < nr_pages; i++) | 995 | for (i = 0; i < nr_pages; i++) |
996 | ClearPageHWPoison(hpage + i); | 996 | ClearPageHWPoison(hpage + i); |
997 | } | 997 | } |
@@ -1204,6 +1204,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1204 | for (ps = error_states;; ps++) | 1204 | for (ps = error_states;; ps++) |
1205 | if ((p->flags & ps->mask) == ps->res) | 1205 | if ((p->flags & ps->mask) == ps->res) |
1206 | break; | 1206 | break; |
1207 | |||
1208 | page_flags |= (p->flags & (1UL << PG_dirty)); | ||
1209 | |||
1207 | if (!ps->mask) | 1210 | if (!ps->mask) |
1208 | for (ps = error_states;; ps++) | 1211 | for (ps = error_states;; ps++) |
1209 | if ((page_flags & ps->mask) == ps->res) | 1212 | if ((page_flags & ps->mask) == ps->res) |
@@ -1339,7 +1342,17 @@ int unpoison_memory(unsigned long pfn) | |||
1339 | return 0; | 1342 | return 0; |
1340 | } | 1343 | } |
1341 | 1344 | ||
1342 | nr_pages = 1 << compound_trans_order(page); | 1345 | /* |
1346 | * unpoison_memory() can encounter thp only when the thp is being | ||
1347 | * worked by memory_failure() and the page lock is not held yet. | ||
1348 | * In such case, we yield to memory_failure() and make unpoison fail. | ||
1349 | */ | ||
1350 | if (PageTransHuge(page)) { | ||
1351 | pr_info("MCE: Memory failure is now running on %#lx\n", pfn); | ||
1352 | return 0; | ||
1353 | } | ||
1354 | |||
1355 | nr_pages = 1 << compound_order(page); | ||
1343 | 1356 | ||
1344 | if (!get_page_unless_zero(page)) { | 1357 | if (!get_page_unless_zero(page)) { |
1345 | /* | 1358 | /* |
@@ -1353,7 +1366,7 @@ int unpoison_memory(unsigned long pfn) | |||
1353 | return 0; | 1366 | return 0; |
1354 | } | 1367 | } |
1355 | if (TestClearPageHWPoison(p)) | 1368 | if (TestClearPageHWPoison(p)) |
1356 | atomic_long_sub(nr_pages, &num_poisoned_pages); | 1369 | atomic_long_dec(&num_poisoned_pages); |
1357 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1370 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); |
1358 | return 0; | 1371 | return 0; |
1359 | } | 1372 | } |
@@ -1375,7 +1388,7 @@ int unpoison_memory(unsigned long pfn) | |||
1375 | unlock_page(page); | 1388 | unlock_page(page); |
1376 | 1389 | ||
1377 | put_page(page); | 1390 | put_page(page); |
1378 | if (freeit) | 1391 | if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) |
1379 | put_page(page); | 1392 | put_page(page); |
1380 | 1393 | ||
1381 | return 0; | 1394 | return 0; |
@@ -1416,7 +1429,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1416 | * was free. This flag should be kept set until the source page | 1429 | * was free. This flag should be kept set until the source page |
1417 | * is freed and PG_hwpoison on it is set. | 1430 | * is freed and PG_hwpoison on it is set. |
1418 | */ | 1431 | */ |
1419 | set_migratetype_isolate(p, true); | 1432 | if (get_pageblock_migratetype(p) != MIGRATE_ISOLATE) |
1433 | set_migratetype_isolate(p, true); | ||
1420 | /* | 1434 | /* |
1421 | * When the target page is a free hugepage, just remove it | 1435 | * When the target page is a free hugepage, just remove it |
1422 | * from free hugepage list. | 1436 | * from free hugepage list. |
@@ -1470,6 +1484,7 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1470 | int ret; | 1484 | int ret; |
1471 | unsigned long pfn = page_to_pfn(page); | 1485 | unsigned long pfn = page_to_pfn(page); |
1472 | struct page *hpage = compound_head(page); | 1486 | struct page *hpage = compound_head(page); |
1487 | LIST_HEAD(pagelist); | ||
1473 | 1488 | ||
1474 | /* | 1489 | /* |
1475 | * This double-check of PageHWPoison is to avoid the race with | 1490 | * This double-check of PageHWPoison is to avoid the race with |
@@ -1485,86 +1500,29 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1485 | unlock_page(hpage); | 1500 | unlock_page(hpage); |
1486 | 1501 | ||
1487 | /* Keep page count to indicate a given hugepage is isolated. */ | 1502 | /* Keep page count to indicate a given hugepage is isolated. */ |
1488 | ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, | 1503 | list_move(&hpage->lru, &pagelist); |
1489 | MIGRATE_SYNC); | 1504 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1490 | put_page(hpage); | 1505 | MIGRATE_SYNC, MR_MEMORY_FAILURE); |
1491 | if (ret) { | 1506 | if (ret) { |
1492 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1507 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1493 | pfn, ret, page->flags); | 1508 | pfn, ret, page->flags); |
1509 | /* | ||
1510 | * We know that soft_offline_huge_page() tries to migrate | ||
1511 | * only one hugepage pointed to by hpage, so we need not | ||
1512 | * run through the pagelist here. | ||
1513 | */ | ||
1514 | putback_active_hugepage(hpage); | ||
1515 | if (ret > 0) | ||
1516 | ret = -EIO; | ||
1494 | } else { | 1517 | } else { |
1495 | set_page_hwpoison_huge_page(hpage); | 1518 | set_page_hwpoison_huge_page(hpage); |
1496 | dequeue_hwpoisoned_huge_page(hpage); | 1519 | dequeue_hwpoisoned_huge_page(hpage); |
1497 | atomic_long_add(1 << compound_trans_order(hpage), | 1520 | atomic_long_add(1 << compound_order(hpage), |
1498 | &num_poisoned_pages); | 1521 | &num_poisoned_pages); |
1499 | } | 1522 | } |
1500 | return ret; | 1523 | return ret; |
1501 | } | 1524 | } |
1502 | 1525 | ||
1503 | static int __soft_offline_page(struct page *page, int flags); | ||
1504 | |||
1505 | /** | ||
1506 | * soft_offline_page - Soft offline a page. | ||
1507 | * @page: page to offline | ||
1508 | * @flags: flags. Same as memory_failure(). | ||
1509 | * | ||
1510 | * Returns 0 on success, otherwise negated errno. | ||
1511 | * | ||
1512 | * Soft offline a page, by migration or invalidation, | ||
1513 | * without killing anything. This is for the case when | ||
1514 | * a page is not corrupted yet (so it's still valid to access), | ||
1515 | * but has had a number of corrected errors and is better taken | ||
1516 | * out. | ||
1517 | * | ||
1518 | * The actual policy on when to do that is maintained by | ||
1519 | * user space. | ||
1520 | * | ||
1521 | * This should never impact any application or cause data loss, | ||
1522 | * however it might take some time. | ||
1523 | * | ||
1524 | * This is not a 100% solution for all memory, but tries to be | ||
1525 | * ``good enough'' for the majority of memory. | ||
1526 | */ | ||
1527 | int soft_offline_page(struct page *page, int flags) | ||
1528 | { | ||
1529 | int ret; | ||
1530 | unsigned long pfn = page_to_pfn(page); | ||
1531 | struct page *hpage = compound_trans_head(page); | ||
1532 | |||
1533 | if (PageHWPoison(page)) { | ||
1534 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | ||
1535 | return -EBUSY; | ||
1536 | } | ||
1537 | if (!PageHuge(page) && PageTransHuge(hpage)) { | ||
1538 | if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { | ||
1539 | pr_info("soft offline: %#lx: failed to split THP\n", | ||
1540 | pfn); | ||
1541 | return -EBUSY; | ||
1542 | } | ||
1543 | } | ||
1544 | |||
1545 | ret = get_any_page(page, pfn, flags); | ||
1546 | if (ret < 0) | ||
1547 | return ret; | ||
1548 | if (ret) { /* for in-use pages */ | ||
1549 | if (PageHuge(page)) | ||
1550 | ret = soft_offline_huge_page(page, flags); | ||
1551 | else | ||
1552 | ret = __soft_offline_page(page, flags); | ||
1553 | } else { /* for free pages */ | ||
1554 | if (PageHuge(page)) { | ||
1555 | set_page_hwpoison_huge_page(hpage); | ||
1556 | dequeue_hwpoisoned_huge_page(hpage); | ||
1557 | atomic_long_add(1 << compound_trans_order(hpage), | ||
1558 | &num_poisoned_pages); | ||
1559 | } else { | ||
1560 | SetPageHWPoison(page); | ||
1561 | atomic_long_inc(&num_poisoned_pages); | ||
1562 | } | ||
1563 | } | ||
1564 | unset_migratetype_isolate(page, MIGRATE_MOVABLE); | ||
1565 | return ret; | ||
1566 | } | ||
1567 | |||
1568 | static int __soft_offline_page(struct page *page, int flags) | 1526 | static int __soft_offline_page(struct page *page, int flags) |
1569 | { | 1527 | { |
1570 | int ret; | 1528 | int ret; |
@@ -1651,3 +1609,67 @@ static int __soft_offline_page(struct page *page, int flags) | |||
1651 | } | 1609 | } |
1652 | return ret; | 1610 | return ret; |
1653 | } | 1611 | } |
1612 | |||
1613 | /** | ||
1614 | * soft_offline_page - Soft offline a page. | ||
1615 | * @page: page to offline | ||
1616 | * @flags: flags. Same as memory_failure(). | ||
1617 | * | ||
1618 | * Returns 0 on success, otherwise negated errno. | ||
1619 | * | ||
1620 | * Soft offline a page, by migration or invalidation, | ||
1621 | * without killing anything. This is for the case when | ||
1622 | * a page is not corrupted yet (so it's still valid to access), | ||
1623 | * but has had a number of corrected errors and is better taken | ||
1624 | * out. | ||
1625 | * | ||
1626 | * The actual policy on when to do that is maintained by | ||
1627 | * user space. | ||
1628 | * | ||
1629 | * This should never impact any application or cause data loss, | ||
1630 | * however it might take some time. | ||
1631 | * | ||
1632 | * This is not a 100% solution for all memory, but tries to be | ||
1633 | * ``good enough'' for the majority of memory. | ||
1634 | */ | ||
1635 | int soft_offline_page(struct page *page, int flags) | ||
1636 | { | ||
1637 | int ret; | ||
1638 | unsigned long pfn = page_to_pfn(page); | ||
1639 | struct page *hpage = compound_trans_head(page); | ||
1640 | |||
1641 | if (PageHWPoison(page)) { | ||
1642 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | ||
1643 | return -EBUSY; | ||
1644 | } | ||
1645 | if (!PageHuge(page) && PageTransHuge(hpage)) { | ||
1646 | if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { | ||
1647 | pr_info("soft offline: %#lx: failed to split THP\n", | ||
1648 | pfn); | ||
1649 | return -EBUSY; | ||
1650 | } | ||
1651 | } | ||
1652 | |||
1653 | ret = get_any_page(page, pfn, flags); | ||
1654 | if (ret < 0) | ||
1655 | goto unset; | ||
1656 | if (ret) { /* for in-use pages */ | ||
1657 | if (PageHuge(page)) | ||
1658 | ret = soft_offline_huge_page(page, flags); | ||
1659 | else | ||
1660 | ret = __soft_offline_page(page, flags); | ||
1661 | } else { /* for free pages */ | ||
1662 | if (PageHuge(page)) { | ||
1663 | set_page_hwpoison_huge_page(hpage); | ||
1664 | dequeue_hwpoisoned_huge_page(hpage); | ||
1665 | atomic_long_add(1 << compound_order(hpage), | ||
1666 | &num_poisoned_pages); | ||
1667 | } else { | ||
1668 | SetPageHWPoison(page); | ||
1669 | atomic_long_inc(&num_poisoned_pages); | ||
1670 | } | ||
1671 | } | ||
1672 | unset: | ||
1673 | unset_migratetype_isolate(page, MIGRATE_MOVABLE); | ||
1674 | return ret; | ||
1675 | } | ||
diff --git a/mm/memory.c b/mm/memory.c index b3c6bf9a398e..2b73dbde2274 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -373,30 +373,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) | |||
373 | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | 373 | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ |
374 | 374 | ||
375 | /* | 375 | /* |
376 | * If a p?d_bad entry is found while walking page tables, report | ||
377 | * the error, before resetting entry to p?d_none. Usually (but | ||
378 | * very seldom) called out from the p?d_none_or_clear_bad macros. | ||
379 | */ | ||
380 | |||
381 | void pgd_clear_bad(pgd_t *pgd) | ||
382 | { | ||
383 | pgd_ERROR(*pgd); | ||
384 | pgd_clear(pgd); | ||
385 | } | ||
386 | |||
387 | void pud_clear_bad(pud_t *pud) | ||
388 | { | ||
389 | pud_ERROR(*pud); | ||
390 | pud_clear(pud); | ||
391 | } | ||
392 | |||
393 | void pmd_clear_bad(pmd_t *pmd) | ||
394 | { | ||
395 | pmd_ERROR(*pmd); | ||
396 | pmd_clear(pmd); | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Note: this doesn't free the actual pages themselves. That | 376 | * Note: this doesn't free the actual pages themselves. That |
401 | * has been handled earlier when unmapping all the memory regions. | 377 | * has been handled earlier when unmapping all the memory regions. |
402 | */ | 378 | */ |
@@ -1505,7 +1481,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma, | |||
1505 | if (pud_none(*pud)) | 1481 | if (pud_none(*pud)) |
1506 | goto no_page_table; | 1482 | goto no_page_table; |
1507 | if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { | 1483 | if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { |
1508 | BUG_ON(flags & FOLL_GET); | 1484 | if (flags & FOLL_GET) |
1485 | goto out; | ||
1509 | page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); | 1486 | page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); |
1510 | goto out; | 1487 | goto out; |
1511 | } | 1488 | } |
@@ -1516,8 +1493,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma, | |||
1516 | if (pmd_none(*pmd)) | 1493 | if (pmd_none(*pmd)) |
1517 | goto no_page_table; | 1494 | goto no_page_table; |
1518 | if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { | 1495 | if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { |
1519 | BUG_ON(flags & FOLL_GET); | ||
1520 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); | 1496 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); |
1497 | if (flags & FOLL_GET) { | ||
1498 | /* | ||
1499 | * Refcount on tail pages are not well-defined and | ||
1500 | * shouldn't be taken. The caller should handle a NULL | ||
1501 | * return when trying to follow tail pages. | ||
1502 | */ | ||
1503 | if (PageHead(page)) | ||
1504 | get_page(page); | ||
1505 | else { | ||
1506 | page = NULL; | ||
1507 | goto out; | ||
1508 | } | ||
1509 | } | ||
1521 | goto out; | 1510 | goto out; |
1522 | } | 1511 | } |
1523 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | 1512 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ca1dd3aa5eee..0eb1a1df649d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/mm_inline.h> | 30 | #include <linux/mm_inline.h> |
31 | #include <linux/firmware-map.h> | 31 | #include <linux/firmware-map.h> |
32 | #include <linux/stop_machine.h> | 32 | #include <linux/stop_machine.h> |
33 | #include <linux/hugetlb.h> | ||
33 | 34 | ||
34 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
35 | 36 | ||
@@ -194,7 +195,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) | |||
194 | 195 | ||
195 | zone = &pgdat->node_zones[0]; | 196 | zone = &pgdat->node_zones[0]; |
196 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { | 197 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { |
197 | if (zone->wait_table) { | 198 | if (zone_is_initialized(zone)) { |
198 | nr_pages = zone->wait_table_hash_nr_entries | 199 | nr_pages = zone->wait_table_hash_nr_entries |
199 | * sizeof(wait_queue_head_t); | 200 | * sizeof(wait_queue_head_t); |
200 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; | 201 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; |
@@ -229,8 +230,8 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | |||
229 | 230 | ||
230 | zone_span_writelock(zone); | 231 | zone_span_writelock(zone); |
231 | 232 | ||
232 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | 233 | old_zone_end_pfn = zone_end_pfn(zone); |
233 | if (!zone->spanned_pages || start_pfn < zone->zone_start_pfn) | 234 | if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn) |
234 | zone->zone_start_pfn = start_pfn; | 235 | zone->zone_start_pfn = start_pfn; |
235 | 236 | ||
236 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - | 237 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - |
@@ -305,7 +306,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, | |||
305 | goto out_fail; | 306 | goto out_fail; |
306 | 307 | ||
307 | /* use start_pfn for z1's start_pfn if z1 is empty */ | 308 | /* use start_pfn for z1's start_pfn if z1 is empty */ |
308 | if (z1->spanned_pages) | 309 | if (!zone_is_empty(z1)) |
309 | z1_start_pfn = z1->zone_start_pfn; | 310 | z1_start_pfn = z1->zone_start_pfn; |
310 | else | 311 | else |
311 | z1_start_pfn = start_pfn; | 312 | z1_start_pfn = start_pfn; |
@@ -347,7 +348,7 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2, | |||
347 | goto out_fail; | 348 | goto out_fail; |
348 | 349 | ||
349 | /* use end_pfn for z2's end_pfn if z2 is empty */ | 350 | /* use end_pfn for z2's end_pfn if z2 is empty */ |
350 | if (z2->spanned_pages) | 351 | if (!zone_is_empty(z2)) |
351 | z2_end_pfn = zone_end_pfn(z2); | 352 | z2_end_pfn = zone_end_pfn(z2); |
352 | else | 353 | else |
353 | z2_end_pfn = end_pfn; | 354 | z2_end_pfn = end_pfn; |
@@ -514,8 +515,9 @@ static int find_biggest_section_pfn(int nid, struct zone *zone, | |||
514 | static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, | 515 | static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, |
515 | unsigned long end_pfn) | 516 | unsigned long end_pfn) |
516 | { | 517 | { |
517 | unsigned long zone_start_pfn = zone->zone_start_pfn; | 518 | unsigned long zone_start_pfn = zone->zone_start_pfn; |
518 | unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | 519 | unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ |
520 | unsigned long zone_end_pfn = z; | ||
519 | unsigned long pfn; | 521 | unsigned long pfn; |
520 | struct mem_section *ms; | 522 | struct mem_section *ms; |
521 | int nid = zone_to_nid(zone); | 523 | int nid = zone_to_nid(zone); |
@@ -1069,6 +1071,23 @@ out: | |||
1069 | return ret; | 1071 | return ret; |
1070 | } | 1072 | } |
1071 | 1073 | ||
1074 | static int check_hotplug_memory_range(u64 start, u64 size) | ||
1075 | { | ||
1076 | u64 start_pfn = start >> PAGE_SHIFT; | ||
1077 | u64 nr_pages = size >> PAGE_SHIFT; | ||
1078 | |||
1079 | /* Memory range must be aligned with section */ | ||
1080 | if ((start_pfn & ~PAGE_SECTION_MASK) || | ||
1081 | (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) { | ||
1082 | pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n", | ||
1083 | (unsigned long long)start, | ||
1084 | (unsigned long long)size); | ||
1085 | return -EINVAL; | ||
1086 | } | ||
1087 | |||
1088 | return 0; | ||
1089 | } | ||
1090 | |||
1072 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1091 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
1073 | int __ref add_memory(int nid, u64 start, u64 size) | 1092 | int __ref add_memory(int nid, u64 start, u64 size) |
1074 | { | 1093 | { |
@@ -1078,6 +1097,10 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
1078 | struct resource *res; | 1097 | struct resource *res; |
1079 | int ret; | 1098 | int ret; |
1080 | 1099 | ||
1100 | ret = check_hotplug_memory_range(start, size); | ||
1101 | if (ret) | ||
1102 | return ret; | ||
1103 | |||
1081 | lock_memory_hotplug(); | 1104 | lock_memory_hotplug(); |
1082 | 1105 | ||
1083 | res = register_memory_resource(start, size); | 1106 | res = register_memory_resource(start, size); |
@@ -1208,10 +1231,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | |||
1208 | } | 1231 | } |
1209 | 1232 | ||
1210 | /* | 1233 | /* |
1211 | * Scanning pfn is much easier than scanning lru list. | 1234 | * Scan pfn range [start,end) to find movable/migratable pages (LRU pages |
1212 | * Scan pfn from start to end and Find LRU page. | 1235 | * and hugepages). We scan pfn because it's much easier than scanning over |
1236 | * linked list. This function returns the pfn of the first found movable | ||
1237 | * page if it's found, otherwise 0. | ||
1213 | */ | 1238 | */ |
1214 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | 1239 | static unsigned long scan_movable_pages(unsigned long start, unsigned long end) |
1215 | { | 1240 | { |
1216 | unsigned long pfn; | 1241 | unsigned long pfn; |
1217 | struct page *page; | 1242 | struct page *page; |
@@ -1220,6 +1245,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | |||
1220 | page = pfn_to_page(pfn); | 1245 | page = pfn_to_page(pfn); |
1221 | if (PageLRU(page)) | 1246 | if (PageLRU(page)) |
1222 | return pfn; | 1247 | return pfn; |
1248 | if (PageHuge(page)) { | ||
1249 | if (is_hugepage_active(page)) | ||
1250 | return pfn; | ||
1251 | else | ||
1252 | pfn = round_up(pfn + 1, | ||
1253 | 1 << compound_order(page)) - 1; | ||
1254 | } | ||
1223 | } | 1255 | } |
1224 | } | 1256 | } |
1225 | return 0; | 1257 | return 0; |
@@ -1240,6 +1272,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1240 | if (!pfn_valid(pfn)) | 1272 | if (!pfn_valid(pfn)) |
1241 | continue; | 1273 | continue; |
1242 | page = pfn_to_page(pfn); | 1274 | page = pfn_to_page(pfn); |
1275 | |||
1276 | if (PageHuge(page)) { | ||
1277 | struct page *head = compound_head(page); | ||
1278 | pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1; | ||
1279 | if (compound_order(head) > PFN_SECTION_SHIFT) { | ||
1280 | ret = -EBUSY; | ||
1281 | break; | ||
1282 | } | ||
1283 | if (isolate_huge_page(page, &source)) | ||
1284 | move_pages -= 1 << compound_order(head); | ||
1285 | continue; | ||
1286 | } | ||
1287 | |||
1243 | if (!get_page_unless_zero(page)) | 1288 | if (!get_page_unless_zero(page)) |
1244 | continue; | 1289 | continue; |
1245 | /* | 1290 | /* |
@@ -1272,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1272 | } | 1317 | } |
1273 | if (!list_empty(&source)) { | 1318 | if (!list_empty(&source)) { |
1274 | if (not_managed) { | 1319 | if (not_managed) { |
1275 | putback_lru_pages(&source); | 1320 | putback_movable_pages(&source); |
1276 | goto out; | 1321 | goto out; |
1277 | } | 1322 | } |
1278 | 1323 | ||
@@ -1283,7 +1328,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1283 | ret = migrate_pages(&source, alloc_migrate_target, 0, | 1328 | ret = migrate_pages(&source, alloc_migrate_target, 0, |
1284 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); | 1329 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); |
1285 | if (ret) | 1330 | if (ret) |
1286 | putback_lru_pages(&source); | 1331 | putback_movable_pages(&source); |
1287 | } | 1332 | } |
1288 | out: | 1333 | out: |
1289 | return ret; | 1334 | return ret; |
@@ -1472,7 +1517,6 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1472 | struct zone *zone; | 1517 | struct zone *zone; |
1473 | struct memory_notify arg; | 1518 | struct memory_notify arg; |
1474 | 1519 | ||
1475 | BUG_ON(start_pfn >= end_pfn); | ||
1476 | /* at least, alignment against pageblock is necessary */ | 1520 | /* at least, alignment against pageblock is necessary */ |
1477 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) | 1521 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) |
1478 | return -EINVAL; | 1522 | return -EINVAL; |
@@ -1527,8 +1571,8 @@ repeat: | |||
1527 | drain_all_pages(); | 1571 | drain_all_pages(); |
1528 | } | 1572 | } |
1529 | 1573 | ||
1530 | pfn = scan_lru_pages(start_pfn, end_pfn); | 1574 | pfn = scan_movable_pages(start_pfn, end_pfn); |
1531 | if (pfn) { /* We have page on LRU */ | 1575 | if (pfn) { /* We have movable pages */ |
1532 | ret = do_migrate_range(pfn, end_pfn); | 1576 | ret = do_migrate_range(pfn, end_pfn); |
1533 | if (!ret) { | 1577 | if (!ret) { |
1534 | drain = 1; | 1578 | drain = 1; |
@@ -1547,6 +1591,11 @@ repeat: | |||
1547 | yield(); | 1591 | yield(); |
1548 | /* drain pcp pages, this is synchronous. */ | 1592 | /* drain pcp pages, this is synchronous. */ |
1549 | drain_all_pages(); | 1593 | drain_all_pages(); |
1594 | /* | ||
1595 | * dissolve free hugepages in the memory block before doing offlining | ||
1596 | * actually in order to make hugetlbfs's object counting consistent. | ||
1597 | */ | ||
1598 | dissolve_free_huge_pages(start_pfn, end_pfn); | ||
1550 | /* check again */ | 1599 | /* check again */ |
1551 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | 1600 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); |
1552 | if (offlined_pages < 0) { | 1601 | if (offlined_pages < 0) { |
@@ -1674,9 +1723,8 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) | |||
1674 | return ret; | 1723 | return ret; |
1675 | } | 1724 | } |
1676 | 1725 | ||
1677 | static int check_cpu_on_node(void *data) | 1726 | static int check_cpu_on_node(pg_data_t *pgdat) |
1678 | { | 1727 | { |
1679 | struct pglist_data *pgdat = data; | ||
1680 | int cpu; | 1728 | int cpu; |
1681 | 1729 | ||
1682 | for_each_present_cpu(cpu) { | 1730 | for_each_present_cpu(cpu) { |
@@ -1691,10 +1739,9 @@ static int check_cpu_on_node(void *data) | |||
1691 | return 0; | 1739 | return 0; |
1692 | } | 1740 | } |
1693 | 1741 | ||
1694 | static void unmap_cpu_on_node(void *data) | 1742 | static void unmap_cpu_on_node(pg_data_t *pgdat) |
1695 | { | 1743 | { |
1696 | #ifdef CONFIG_ACPI_NUMA | 1744 | #ifdef CONFIG_ACPI_NUMA |
1697 | struct pglist_data *pgdat = data; | ||
1698 | int cpu; | 1745 | int cpu; |
1699 | 1746 | ||
1700 | for_each_possible_cpu(cpu) | 1747 | for_each_possible_cpu(cpu) |
@@ -1703,10 +1750,11 @@ static void unmap_cpu_on_node(void *data) | |||
1703 | #endif | 1750 | #endif |
1704 | } | 1751 | } |
1705 | 1752 | ||
1706 | static int check_and_unmap_cpu_on_node(void *data) | 1753 | static int check_and_unmap_cpu_on_node(pg_data_t *pgdat) |
1707 | { | 1754 | { |
1708 | int ret = check_cpu_on_node(data); | 1755 | int ret; |
1709 | 1756 | ||
1757 | ret = check_cpu_on_node(pgdat); | ||
1710 | if (ret) | 1758 | if (ret) |
1711 | return ret; | 1759 | return ret; |
1712 | 1760 | ||
@@ -1715,11 +1763,18 @@ static int check_and_unmap_cpu_on_node(void *data) | |||
1715 | * the cpu_to_node() now. | 1763 | * the cpu_to_node() now. |
1716 | */ | 1764 | */ |
1717 | 1765 | ||
1718 | unmap_cpu_on_node(data); | 1766 | unmap_cpu_on_node(pgdat); |
1719 | return 0; | 1767 | return 0; |
1720 | } | 1768 | } |
1721 | 1769 | ||
1722 | /* offline the node if all memory sections of this node are removed */ | 1770 | /** |
1771 | * try_offline_node | ||
1772 | * | ||
1773 | * Offline a node if all memory sections and cpus of the node are removed. | ||
1774 | * | ||
1775 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1776 | * and online/offline operations before this call. | ||
1777 | */ | ||
1723 | void try_offline_node(int nid) | 1778 | void try_offline_node(int nid) |
1724 | { | 1779 | { |
1725 | pg_data_t *pgdat = NODE_DATA(nid); | 1780 | pg_data_t *pgdat = NODE_DATA(nid); |
@@ -1745,7 +1800,7 @@ void try_offline_node(int nid) | |||
1745 | return; | 1800 | return; |
1746 | } | 1801 | } |
1747 | 1802 | ||
1748 | if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL)) | 1803 | if (check_and_unmap_cpu_on_node(pgdat)) |
1749 | return; | 1804 | return; |
1750 | 1805 | ||
1751 | /* | 1806 | /* |
@@ -1782,10 +1837,19 @@ void try_offline_node(int nid) | |||
1782 | } | 1837 | } |
1783 | EXPORT_SYMBOL(try_offline_node); | 1838 | EXPORT_SYMBOL(try_offline_node); |
1784 | 1839 | ||
1840 | /** | ||
1841 | * remove_memory | ||
1842 | * | ||
1843 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1844 | * and online/offline operations before this call, as required by | ||
1845 | * try_offline_node(). | ||
1846 | */ | ||
1785 | void __ref remove_memory(int nid, u64 start, u64 size) | 1847 | void __ref remove_memory(int nid, u64 start, u64 size) |
1786 | { | 1848 | { |
1787 | int ret; | 1849 | int ret; |
1788 | 1850 | ||
1851 | BUG_ON(check_hotplug_memory_range(start, size)); | ||
1852 | |||
1789 | lock_memory_hotplug(); | 1853 | lock_memory_hotplug(); |
1790 | 1854 | ||
1791 | /* | 1855 | /* |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4baf12e534d1..04729647f359 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -123,16 +123,19 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; | |||
123 | static struct mempolicy *get_task_policy(struct task_struct *p) | 123 | static struct mempolicy *get_task_policy(struct task_struct *p) |
124 | { | 124 | { |
125 | struct mempolicy *pol = p->mempolicy; | 125 | struct mempolicy *pol = p->mempolicy; |
126 | int node; | ||
127 | 126 | ||
128 | if (!pol) { | 127 | if (!pol) { |
129 | node = numa_node_id(); | 128 | int node = numa_node_id(); |
130 | if (node != NUMA_NO_NODE) | ||
131 | pol = &preferred_node_policy[node]; | ||
132 | 129 | ||
133 | /* preferred_node_policy is not initialised early in boot */ | 130 | if (node != NUMA_NO_NODE) { |
134 | if (!pol->mode) | 131 | pol = &preferred_node_policy[node]; |
135 | pol = NULL; | 132 | /* |
133 | * preferred_node_policy is not initialised early in | ||
134 | * boot | ||
135 | */ | ||
136 | if (!pol->mode) | ||
137 | pol = NULL; | ||
138 | } | ||
136 | } | 139 | } |
137 | 140 | ||
138 | return pol; | 141 | return pol; |
@@ -473,8 +476,11 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { | |||
473 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 476 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
474 | unsigned long flags); | 477 | unsigned long flags); |
475 | 478 | ||
476 | /* Scan through pages checking if pages follow certain conditions. */ | 479 | /* |
477 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 480 | * Scan through pages checking if pages follow certain conditions, |
481 | * and move them to the pagelist if they do. | ||
482 | */ | ||
483 | static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | ||
478 | unsigned long addr, unsigned long end, | 484 | unsigned long addr, unsigned long end, |
479 | const nodemask_t *nodes, unsigned long flags, | 485 | const nodemask_t *nodes, unsigned long flags, |
480 | void *private) | 486 | void *private) |
@@ -512,7 +518,31 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
512 | return addr != end; | 518 | return addr != end; |
513 | } | 519 | } |
514 | 520 | ||
515 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | 521 | static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, |
522 | pmd_t *pmd, const nodemask_t *nodes, unsigned long flags, | ||
523 | void *private) | ||
524 | { | ||
525 | #ifdef CONFIG_HUGETLB_PAGE | ||
526 | int nid; | ||
527 | struct page *page; | ||
528 | |||
529 | spin_lock(&vma->vm_mm->page_table_lock); | ||
530 | page = pte_page(huge_ptep_get((pte_t *)pmd)); | ||
531 | nid = page_to_nid(page); | ||
532 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) | ||
533 | goto unlock; | ||
534 | /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ | ||
535 | if (flags & (MPOL_MF_MOVE_ALL) || | ||
536 | (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) | ||
537 | isolate_huge_page(page, private); | ||
538 | unlock: | ||
539 | spin_unlock(&vma->vm_mm->page_table_lock); | ||
540 | #else | ||
541 | BUG(); | ||
542 | #endif | ||
543 | } | ||
544 | |||
545 | static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud, | ||
516 | unsigned long addr, unsigned long end, | 546 | unsigned long addr, unsigned long end, |
517 | const nodemask_t *nodes, unsigned long flags, | 547 | const nodemask_t *nodes, unsigned long flags, |
518 | void *private) | 548 | void *private) |
@@ -523,17 +553,24 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
523 | pmd = pmd_offset(pud, addr); | 553 | pmd = pmd_offset(pud, addr); |
524 | do { | 554 | do { |
525 | next = pmd_addr_end(addr, end); | 555 | next = pmd_addr_end(addr, end); |
556 | if (!pmd_present(*pmd)) | ||
557 | continue; | ||
558 | if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) { | ||
559 | queue_pages_hugetlb_pmd_range(vma, pmd, nodes, | ||
560 | flags, private); | ||
561 | continue; | ||
562 | } | ||
526 | split_huge_page_pmd(vma, addr, pmd); | 563 | split_huge_page_pmd(vma, addr, pmd); |
527 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) | 564 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) |
528 | continue; | 565 | continue; |
529 | if (check_pte_range(vma, pmd, addr, next, nodes, | 566 | if (queue_pages_pte_range(vma, pmd, addr, next, nodes, |
530 | flags, private)) | 567 | flags, private)) |
531 | return -EIO; | 568 | return -EIO; |
532 | } while (pmd++, addr = next, addr != end); | 569 | } while (pmd++, addr = next, addr != end); |
533 | return 0; | 570 | return 0; |
534 | } | 571 | } |
535 | 572 | ||
536 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | 573 | static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
537 | unsigned long addr, unsigned long end, | 574 | unsigned long addr, unsigned long end, |
538 | const nodemask_t *nodes, unsigned long flags, | 575 | const nodemask_t *nodes, unsigned long flags, |
539 | void *private) | 576 | void *private) |
@@ -544,16 +581,18 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
544 | pud = pud_offset(pgd, addr); | 581 | pud = pud_offset(pgd, addr); |
545 | do { | 582 | do { |
546 | next = pud_addr_end(addr, end); | 583 | next = pud_addr_end(addr, end); |
584 | if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) | ||
585 | continue; | ||
547 | if (pud_none_or_clear_bad(pud)) | 586 | if (pud_none_or_clear_bad(pud)) |
548 | continue; | 587 | continue; |
549 | if (check_pmd_range(vma, pud, addr, next, nodes, | 588 | if (queue_pages_pmd_range(vma, pud, addr, next, nodes, |
550 | flags, private)) | 589 | flags, private)) |
551 | return -EIO; | 590 | return -EIO; |
552 | } while (pud++, addr = next, addr != end); | 591 | } while (pud++, addr = next, addr != end); |
553 | return 0; | 592 | return 0; |
554 | } | 593 | } |
555 | 594 | ||
556 | static inline int check_pgd_range(struct vm_area_struct *vma, | 595 | static inline int queue_pages_pgd_range(struct vm_area_struct *vma, |
557 | unsigned long addr, unsigned long end, | 596 | unsigned long addr, unsigned long end, |
558 | const nodemask_t *nodes, unsigned long flags, | 597 | const nodemask_t *nodes, unsigned long flags, |
559 | void *private) | 598 | void *private) |
@@ -566,7 +605,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma, | |||
566 | next = pgd_addr_end(addr, end); | 605 | next = pgd_addr_end(addr, end); |
567 | if (pgd_none_or_clear_bad(pgd)) | 606 | if (pgd_none_or_clear_bad(pgd)) |
568 | continue; | 607 | continue; |
569 | if (check_pud_range(vma, pgd, addr, next, nodes, | 608 | if (queue_pages_pud_range(vma, pgd, addr, next, nodes, |
570 | flags, private)) | 609 | flags, private)) |
571 | return -EIO; | 610 | return -EIO; |
572 | } while (pgd++, addr = next, addr != end); | 611 | } while (pgd++, addr = next, addr != end); |
@@ -604,12 +643,14 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, | |||
604 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ | 643 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ |
605 | 644 | ||
606 | /* | 645 | /* |
607 | * Check if all pages in a range are on a set of nodes. | 646 | * Walk through page tables and collect pages to be migrated. |
608 | * If pagelist != NULL then isolate pages from the LRU and | 647 | * |
609 | * put them on the pagelist. | 648 | * If pages found in a given range are on a set of nodes (determined by |
649 | * @nodes and @flags,) it's isolated and queued to the pagelist which is | ||
650 | * passed via @private.) | ||
610 | */ | 651 | */ |
611 | static struct vm_area_struct * | 652 | static struct vm_area_struct * |
612 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 653 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
613 | const nodemask_t *nodes, unsigned long flags, void *private) | 654 | const nodemask_t *nodes, unsigned long flags, void *private) |
614 | { | 655 | { |
615 | int err; | 656 | int err; |
@@ -635,9 +676,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
635 | return ERR_PTR(-EFAULT); | 676 | return ERR_PTR(-EFAULT); |
636 | } | 677 | } |
637 | 678 | ||
638 | if (is_vm_hugetlb_page(vma)) | ||
639 | goto next; | ||
640 | |||
641 | if (flags & MPOL_MF_LAZY) { | 679 | if (flags & MPOL_MF_LAZY) { |
642 | change_prot_numa(vma, start, endvma); | 680 | change_prot_numa(vma, start, endvma); |
643 | goto next; | 681 | goto next; |
@@ -647,7 +685,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
647 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && | 685 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && |
648 | vma_migratable(vma))) { | 686 | vma_migratable(vma))) { |
649 | 687 | ||
650 | err = check_pgd_range(vma, start, endvma, nodes, | 688 | err = queue_pages_pgd_range(vma, start, endvma, nodes, |
651 | flags, private); | 689 | flags, private); |
652 | if (err) { | 690 | if (err) { |
653 | first = ERR_PTR(err); | 691 | first = ERR_PTR(err); |
@@ -990,7 +1028,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, | |||
990 | 1028 | ||
991 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) | 1029 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) |
992 | { | 1030 | { |
993 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | 1031 | if (PageHuge(page)) |
1032 | return alloc_huge_page_node(page_hstate(compound_head(page)), | ||
1033 | node); | ||
1034 | else | ||
1035 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | ||
994 | } | 1036 | } |
995 | 1037 | ||
996 | /* | 1038 | /* |
@@ -1013,14 +1055,14 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, | |||
1013 | * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. | 1055 | * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. |
1014 | */ | 1056 | */ |
1015 | VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); | 1057 | VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); |
1016 | check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, | 1058 | queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, |
1017 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | 1059 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); |
1018 | 1060 | ||
1019 | if (!list_empty(&pagelist)) { | 1061 | if (!list_empty(&pagelist)) { |
1020 | err = migrate_pages(&pagelist, new_node_page, dest, | 1062 | err = migrate_pages(&pagelist, new_node_page, dest, |
1021 | MIGRATE_SYNC, MR_SYSCALL); | 1063 | MIGRATE_SYNC, MR_SYSCALL); |
1022 | if (err) | 1064 | if (err) |
1023 | putback_lru_pages(&pagelist); | 1065 | putback_movable_pages(&pagelist); |
1024 | } | 1066 | } |
1025 | 1067 | ||
1026 | return err; | 1068 | return err; |
@@ -1154,10 +1196,14 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * | |||
1154 | break; | 1196 | break; |
1155 | vma = vma->vm_next; | 1197 | vma = vma->vm_next; |
1156 | } | 1198 | } |
1157 | |||
1158 | /* | 1199 | /* |
1159 | * if !vma, alloc_page_vma() will use task or system default policy | 1200 | * queue_pages_range() confirms that @page belongs to some vma, |
1201 | * so vma shouldn't be NULL. | ||
1160 | */ | 1202 | */ |
1203 | BUG_ON(!vma); | ||
1204 | |||
1205 | if (PageHuge(page)) | ||
1206 | return alloc_huge_page_noerr(vma, address, 1); | ||
1161 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1207 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1162 | } | 1208 | } |
1163 | #else | 1209 | #else |
@@ -1249,7 +1295,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1249 | if (err) | 1295 | if (err) |
1250 | goto mpol_out; | 1296 | goto mpol_out; |
1251 | 1297 | ||
1252 | vma = check_range(mm, start, end, nmask, | 1298 | vma = queue_pages_range(mm, start, end, nmask, |
1253 | flags | MPOL_MF_INVERT, &pagelist); | 1299 | flags | MPOL_MF_INVERT, &pagelist); |
1254 | 1300 | ||
1255 | err = PTR_ERR(vma); /* maybe ... */ | 1301 | err = PTR_ERR(vma); /* maybe ... */ |
@@ -1265,7 +1311,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1265 | (unsigned long)vma, | 1311 | (unsigned long)vma, |
1266 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); | 1312 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); |
1267 | if (nr_failed) | 1313 | if (nr_failed) |
1268 | putback_lru_pages(&pagelist); | 1314 | putback_movable_pages(&pagelist); |
1269 | } | 1315 | } |
1270 | 1316 | ||
1271 | if (nr_failed && (flags & MPOL_MF_STRICT)) | 1317 | if (nr_failed && (flags & MPOL_MF_STRICT)) |
@@ -2065,6 +2111,16 @@ retry_cpuset: | |||
2065 | } | 2111 | } |
2066 | EXPORT_SYMBOL(alloc_pages_current); | 2112 | EXPORT_SYMBOL(alloc_pages_current); |
2067 | 2113 | ||
2114 | int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) | ||
2115 | { | ||
2116 | struct mempolicy *pol = mpol_dup(vma_policy(src)); | ||
2117 | |||
2118 | if (IS_ERR(pol)) | ||
2119 | return PTR_ERR(pol); | ||
2120 | dst->vm_policy = pol; | ||
2121 | return 0; | ||
2122 | } | ||
2123 | |||
2068 | /* | 2124 | /* |
2069 | * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it | 2125 | * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it |
2070 | * rebinds the mempolicy its copying by calling mpol_rebind_policy() | 2126 | * rebinds the mempolicy its copying by calling mpol_rebind_policy() |
diff --git a/mm/mempool.c b/mm/mempool.c index 54990476c049..659aa42bad16 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -73,7 +73,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | |||
73 | gfp_t gfp_mask, int node_id) | 73 | gfp_t gfp_mask, int node_id) |
74 | { | 74 | { |
75 | mempool_t *pool; | 75 | mempool_t *pool; |
76 | pool = kmalloc_node(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id); | 76 | pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); |
77 | if (!pool) | 77 | if (!pool) |
78 | return NULL; | 78 | return NULL; |
79 | pool->elements = kmalloc_node(min_nr * sizeof(void *), | 79 | pool->elements = kmalloc_node(min_nr * sizeof(void *), |
diff --git a/mm/migrate.c b/mm/migrate.c index 6f0c24438bba..b7ded7eafe3a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -100,6 +100,10 @@ void putback_movable_pages(struct list_head *l) | |||
100 | struct page *page2; | 100 | struct page *page2; |
101 | 101 | ||
102 | list_for_each_entry_safe(page, page2, l, lru) { | 102 | list_for_each_entry_safe(page, page2, l, lru) { |
103 | if (unlikely(PageHuge(page))) { | ||
104 | putback_active_hugepage(page); | ||
105 | continue; | ||
106 | } | ||
103 | list_del(&page->lru); | 107 | list_del(&page->lru); |
104 | dec_zone_page_state(page, NR_ISOLATED_ANON + | 108 | dec_zone_page_state(page, NR_ISOLATED_ANON + |
105 | page_is_file_cache(page)); | 109 | page_is_file_cache(page)); |
@@ -945,6 +949,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
945 | struct page *new_hpage = get_new_page(hpage, private, &result); | 949 | struct page *new_hpage = get_new_page(hpage, private, &result); |
946 | struct anon_vma *anon_vma = NULL; | 950 | struct anon_vma *anon_vma = NULL; |
947 | 951 | ||
952 | /* | ||
953 | * Movability of hugepages depends on architectures and hugepage size. | ||
954 | * This check is necessary because some callers of hugepage migration | ||
955 | * like soft offline and memory hotremove don't walk through page | ||
956 | * tables or check whether the hugepage is pmd-based or not before | ||
957 | * kicking migration. | ||
958 | */ | ||
959 | if (!hugepage_migration_support(page_hstate(hpage))) | ||
960 | return -ENOSYS; | ||
961 | |||
948 | if (!new_hpage) | 962 | if (!new_hpage) |
949 | return -ENOMEM; | 963 | return -ENOMEM; |
950 | 964 | ||
@@ -975,6 +989,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
975 | 989 | ||
976 | unlock_page(hpage); | 990 | unlock_page(hpage); |
977 | out: | 991 | out: |
992 | if (rc != -EAGAIN) | ||
993 | putback_active_hugepage(hpage); | ||
978 | put_page(new_hpage); | 994 | put_page(new_hpage); |
979 | if (result) { | 995 | if (result) { |
980 | if (rc) | 996 | if (rc) |
@@ -1025,7 +1041,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, | |||
1025 | list_for_each_entry_safe(page, page2, from, lru) { | 1041 | list_for_each_entry_safe(page, page2, from, lru) { |
1026 | cond_resched(); | 1042 | cond_resched(); |
1027 | 1043 | ||
1028 | rc = unmap_and_move(get_new_page, private, | 1044 | if (PageHuge(page)) |
1045 | rc = unmap_and_move_huge_page(get_new_page, | ||
1046 | private, page, pass > 2, mode); | ||
1047 | else | ||
1048 | rc = unmap_and_move(get_new_page, private, | ||
1029 | page, pass > 2, mode); | 1049 | page, pass > 2, mode); |
1030 | 1050 | ||
1031 | switch(rc) { | 1051 | switch(rc) { |
@@ -1058,32 +1078,6 @@ out: | |||
1058 | return rc; | 1078 | return rc; |
1059 | } | 1079 | } |
1060 | 1080 | ||
1061 | int migrate_huge_page(struct page *hpage, new_page_t get_new_page, | ||
1062 | unsigned long private, enum migrate_mode mode) | ||
1063 | { | ||
1064 | int pass, rc; | ||
1065 | |||
1066 | for (pass = 0; pass < 10; pass++) { | ||
1067 | rc = unmap_and_move_huge_page(get_new_page, private, | ||
1068 | hpage, pass > 2, mode); | ||
1069 | switch (rc) { | ||
1070 | case -ENOMEM: | ||
1071 | goto out; | ||
1072 | case -EAGAIN: | ||
1073 | /* try again */ | ||
1074 | cond_resched(); | ||
1075 | break; | ||
1076 | case MIGRATEPAGE_SUCCESS: | ||
1077 | goto out; | ||
1078 | default: | ||
1079 | rc = -EIO; | ||
1080 | goto out; | ||
1081 | } | ||
1082 | } | ||
1083 | out: | ||
1084 | return rc; | ||
1085 | } | ||
1086 | |||
1087 | #ifdef CONFIG_NUMA | 1081 | #ifdef CONFIG_NUMA |
1088 | /* | 1082 | /* |
1089 | * Move a list of individual pages | 1083 | * Move a list of individual pages |
@@ -1108,7 +1102,11 @@ static struct page *new_page_node(struct page *p, unsigned long private, | |||
1108 | 1102 | ||
1109 | *result = &pm->status; | 1103 | *result = &pm->status; |
1110 | 1104 | ||
1111 | return alloc_pages_exact_node(pm->node, | 1105 | if (PageHuge(p)) |
1106 | return alloc_huge_page_node(page_hstate(compound_head(p)), | ||
1107 | pm->node); | ||
1108 | else | ||
1109 | return alloc_pages_exact_node(pm->node, | ||
1112 | GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); | 1110 | GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); |
1113 | } | 1111 | } |
1114 | 1112 | ||
@@ -1168,6 +1166,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
1168 | !migrate_all) | 1166 | !migrate_all) |
1169 | goto put_and_set; | 1167 | goto put_and_set; |
1170 | 1168 | ||
1169 | if (PageHuge(page)) { | ||
1170 | isolate_huge_page(page, &pagelist); | ||
1171 | goto put_and_set; | ||
1172 | } | ||
1173 | |||
1171 | err = isolate_lru_page(page); | 1174 | err = isolate_lru_page(page); |
1172 | if (!err) { | 1175 | if (!err) { |
1173 | list_add_tail(&page->lru, &pagelist); | 1176 | list_add_tail(&page->lru, &pagelist); |
@@ -1190,7 +1193,7 @@ set_status: | |||
1190 | err = migrate_pages(&pagelist, new_page_node, | 1193 | err = migrate_pages(&pagelist, new_page_node, |
1191 | (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); | 1194 | (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); |
1192 | if (err) | 1195 | if (err) |
1193 | putback_lru_pages(&pagelist); | 1196 | putback_movable_pages(&pagelist); |
1194 | } | 1197 | } |
1195 | 1198 | ||
1196 | up_read(&mm->mmap_sem); | 1199 | up_read(&mm->mmap_sem); |
@@ -1468,7 +1471,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, | |||
1468 | if (!populated_zone(zone)) | 1471 | if (!populated_zone(zone)) |
1469 | continue; | 1472 | continue; |
1470 | 1473 | ||
1471 | if (zone->all_unreclaimable) | 1474 | if (!zone_reclaimable(zone)) |
1472 | continue; | 1475 | continue; |
1473 | 1476 | ||
1474 | /* Avoid waking kswapd by allocating pages_to_migrate pages. */ | 1477 | /* Avoid waking kswapd by allocating pages_to_migrate pages. */ |
diff --git a/mm/mlock.c b/mm/mlock.c index 79b7cf7d1bca..d63802663242 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/swap.h> | 11 | #include <linux/swap.h> |
12 | #include <linux/swapops.h> | 12 | #include <linux/swapops.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/pagevec.h> | ||
14 | #include <linux/mempolicy.h> | 15 | #include <linux/mempolicy.h> |
15 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
16 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
@@ -18,6 +19,8 @@ | |||
18 | #include <linux/rmap.h> | 19 | #include <linux/rmap.h> |
19 | #include <linux/mmzone.h> | 20 | #include <linux/mmzone.h> |
20 | #include <linux/hugetlb.h> | 21 | #include <linux/hugetlb.h> |
22 | #include <linux/memcontrol.h> | ||
23 | #include <linux/mm_inline.h> | ||
21 | 24 | ||
22 | #include "internal.h" | 25 | #include "internal.h" |
23 | 26 | ||
@@ -87,6 +90,47 @@ void mlock_vma_page(struct page *page) | |||
87 | } | 90 | } |
88 | } | 91 | } |
89 | 92 | ||
93 | /* | ||
94 | * Finish munlock after successful page isolation | ||
95 | * | ||
96 | * Page must be locked. This is a wrapper for try_to_munlock() | ||
97 | * and putback_lru_page() with munlock accounting. | ||
98 | */ | ||
99 | static void __munlock_isolated_page(struct page *page) | ||
100 | { | ||
101 | int ret = SWAP_AGAIN; | ||
102 | |||
103 | /* | ||
104 | * Optimization: if the page was mapped just once, that's our mapping | ||
105 | * and we don't need to check all the other vmas. | ||
106 | */ | ||
107 | if (page_mapcount(page) > 1) | ||
108 | ret = try_to_munlock(page); | ||
109 | |||
110 | /* Did try_to_unlock() succeed or punt? */ | ||
111 | if (ret != SWAP_MLOCK) | ||
112 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); | ||
113 | |||
114 | putback_lru_page(page); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Accounting for page isolation fail during munlock | ||
119 | * | ||
120 | * Performs accounting when page isolation fails in munlock. There is nothing | ||
121 | * else to do because it means some other task has already removed the page | ||
122 | * from the LRU. putback_lru_page() will take care of removing the page from | ||
123 | * the unevictable list, if necessary. vmscan [page_referenced()] will move | ||
124 | * the page back to the unevictable list if some other vma has it mlocked. | ||
125 | */ | ||
126 | static void __munlock_isolation_failed(struct page *page) | ||
127 | { | ||
128 | if (PageUnevictable(page)) | ||
129 | count_vm_event(UNEVICTABLE_PGSTRANDED); | ||
130 | else | ||
131 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); | ||
132 | } | ||
133 | |||
90 | /** | 134 | /** |
91 | * munlock_vma_page - munlock a vma page | 135 | * munlock_vma_page - munlock a vma page |
92 | * @page - page to be unlocked | 136 | * @page - page to be unlocked |
@@ -112,37 +156,10 @@ unsigned int munlock_vma_page(struct page *page) | |||
112 | unsigned int nr_pages = hpage_nr_pages(page); | 156 | unsigned int nr_pages = hpage_nr_pages(page); |
113 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); | 157 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
114 | page_mask = nr_pages - 1; | 158 | page_mask = nr_pages - 1; |
115 | if (!isolate_lru_page(page)) { | 159 | if (!isolate_lru_page(page)) |
116 | int ret = SWAP_AGAIN; | 160 | __munlock_isolated_page(page); |
117 | 161 | else | |
118 | /* | 162 | __munlock_isolation_failed(page); |
119 | * Optimization: if the page was mapped just once, | ||
120 | * that's our mapping and we don't need to check all the | ||
121 | * other vmas. | ||
122 | */ | ||
123 | if (page_mapcount(page) > 1) | ||
124 | ret = try_to_munlock(page); | ||
125 | /* | ||
126 | * did try_to_unlock() succeed or punt? | ||
127 | */ | ||
128 | if (ret != SWAP_MLOCK) | ||
129 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); | ||
130 | |||
131 | putback_lru_page(page); | ||
132 | } else { | ||
133 | /* | ||
134 | * Some other task has removed the page from the LRU. | ||
135 | * putback_lru_page() will take care of removing the | ||
136 | * page from the unevictable list, if necessary. | ||
137 | * vmscan [page_referenced()] will move the page back | ||
138 | * to the unevictable list if some other vma has it | ||
139 | * mlocked. | ||
140 | */ | ||
141 | if (PageUnevictable(page)) | ||
142 | count_vm_event(UNEVICTABLE_PGSTRANDED); | ||
143 | else | ||
144 | count_vm_event(UNEVICTABLE_PGMUNLOCKED); | ||
145 | } | ||
146 | } | 163 | } |
147 | 164 | ||
148 | return page_mask; | 165 | return page_mask; |
@@ -210,6 +227,191 @@ static int __mlock_posix_error_return(long retval) | |||
210 | } | 227 | } |
211 | 228 | ||
212 | /* | 229 | /* |
230 | * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec() | ||
231 | * | ||
232 | * The fast path is available only for evictable pages with single mapping. | ||
233 | * Then we can bypass the per-cpu pvec and get better performance. | ||
234 | * when mapcount > 1 we need try_to_munlock() which can fail. | ||
235 | * when !page_evictable(), we need the full redo logic of putback_lru_page to | ||
236 | * avoid leaving evictable page in unevictable list. | ||
237 | * | ||
238 | * In case of success, @page is added to @pvec and @pgrescued is incremented | ||
239 | * in case that the page was previously unevictable. @page is also unlocked. | ||
240 | */ | ||
241 | static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, | ||
242 | int *pgrescued) | ||
243 | { | ||
244 | VM_BUG_ON(PageLRU(page)); | ||
245 | VM_BUG_ON(!PageLocked(page)); | ||
246 | |||
247 | if (page_mapcount(page) <= 1 && page_evictable(page)) { | ||
248 | pagevec_add(pvec, page); | ||
249 | if (TestClearPageUnevictable(page)) | ||
250 | (*pgrescued)++; | ||
251 | unlock_page(page); | ||
252 | return true; | ||
253 | } | ||
254 | |||
255 | return false; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * Putback multiple evictable pages to the LRU | ||
260 | * | ||
261 | * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of | ||
262 | * the pages might have meanwhile become unevictable but that is OK. | ||
263 | */ | ||
264 | static void __putback_lru_fast(struct pagevec *pvec, int pgrescued) | ||
265 | { | ||
266 | count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec)); | ||
267 | /* | ||
268 | *__pagevec_lru_add() calls release_pages() so we don't call | ||
269 | * put_page() explicitly | ||
270 | */ | ||
271 | __pagevec_lru_add(pvec); | ||
272 | count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Munlock a batch of pages from the same zone | ||
277 | * | ||
278 | * The work is split to two main phases. First phase clears the Mlocked flag | ||
279 | * and attempts to isolate the pages, all under a single zone lru lock. | ||
280 | * The second phase finishes the munlock only for pages where isolation | ||
281 | * succeeded. | ||
282 | * | ||
283 | * Note that the pagevec may be modified during the process. | ||
284 | */ | ||
285 | static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | ||
286 | { | ||
287 | int i; | ||
288 | int nr = pagevec_count(pvec); | ||
289 | int delta_munlocked = -nr; | ||
290 | struct pagevec pvec_putback; | ||
291 | int pgrescued = 0; | ||
292 | |||
293 | /* Phase 1: page isolation */ | ||
294 | spin_lock_irq(&zone->lru_lock); | ||
295 | for (i = 0; i < nr; i++) { | ||
296 | struct page *page = pvec->pages[i]; | ||
297 | |||
298 | if (TestClearPageMlocked(page)) { | ||
299 | struct lruvec *lruvec; | ||
300 | int lru; | ||
301 | |||
302 | if (PageLRU(page)) { | ||
303 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
304 | lru = page_lru(page); | ||
305 | /* | ||
306 | * We already have pin from follow_page_mask() | ||
307 | * so we can spare the get_page() here. | ||
308 | */ | ||
309 | ClearPageLRU(page); | ||
310 | del_page_from_lru_list(page, lruvec, lru); | ||
311 | } else { | ||
312 | __munlock_isolation_failed(page); | ||
313 | goto skip_munlock; | ||
314 | } | ||
315 | |||
316 | } else { | ||
317 | skip_munlock: | ||
318 | /* | ||
319 | * We won't be munlocking this page in the next phase | ||
320 | * but we still need to release the follow_page_mask() | ||
321 | * pin. | ||
322 | */ | ||
323 | pvec->pages[i] = NULL; | ||
324 | put_page(page); | ||
325 | delta_munlocked++; | ||
326 | } | ||
327 | } | ||
328 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); | ||
329 | spin_unlock_irq(&zone->lru_lock); | ||
330 | |||
331 | /* Phase 2: page munlock */ | ||
332 | pagevec_init(&pvec_putback, 0); | ||
333 | for (i = 0; i < nr; i++) { | ||
334 | struct page *page = pvec->pages[i]; | ||
335 | |||
336 | if (page) { | ||
337 | lock_page(page); | ||
338 | if (!__putback_lru_fast_prepare(page, &pvec_putback, | ||
339 | &pgrescued)) { | ||
340 | /* | ||
341 | * Slow path. We don't want to lose the last | ||
342 | * pin before unlock_page() | ||
343 | */ | ||
344 | get_page(page); /* for putback_lru_page() */ | ||
345 | __munlock_isolated_page(page); | ||
346 | unlock_page(page); | ||
347 | put_page(page); /* from follow_page_mask() */ | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Phase 3: page putback for pages that qualified for the fast path | ||
354 | * This will also call put_page() to return pin from follow_page_mask() | ||
355 | */ | ||
356 | if (pagevec_count(&pvec_putback)) | ||
357 | __putback_lru_fast(&pvec_putback, pgrescued); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * Fill up pagevec for __munlock_pagevec using pte walk | ||
362 | * | ||
363 | * The function expects that the struct page corresponding to @start address is | ||
364 | * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. | ||
365 | * | ||
366 | * The rest of @pvec is filled by subsequent pages within the same pmd and same | ||
367 | * zone, as long as the pte's are present and vm_normal_page() succeeds. These | ||
368 | * pages also get pinned. | ||
369 | * | ||
370 | * Returns the address of the next page that should be scanned. This equals | ||
371 | * @start + PAGE_SIZE when no page could be added by the pte walk. | ||
372 | */ | ||
373 | static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, | ||
374 | struct vm_area_struct *vma, int zoneid, unsigned long start, | ||
375 | unsigned long end) | ||
376 | { | ||
377 | pte_t *pte; | ||
378 | spinlock_t *ptl; | ||
379 | |||
380 | /* | ||
381 | * Initialize pte walk starting at the already pinned page where we | ||
382 | * are sure that there is a pte. | ||
383 | */ | ||
384 | pte = get_locked_pte(vma->vm_mm, start, &ptl); | ||
385 | end = min(end, pmd_addr_end(start, end)); | ||
386 | |||
387 | /* The page next to the pinned page is the first we will try to get */ | ||
388 | start += PAGE_SIZE; | ||
389 | while (start < end) { | ||
390 | struct page *page = NULL; | ||
391 | pte++; | ||
392 | if (pte_present(*pte)) | ||
393 | page = vm_normal_page(vma, start, *pte); | ||
394 | /* | ||
395 | * Break if page could not be obtained or the page's node+zone does not | ||
396 | * match | ||
397 | */ | ||
398 | if (!page || page_zone_id(page) != zoneid) | ||
399 | break; | ||
400 | |||
401 | get_page(page); | ||
402 | /* | ||
403 | * Increase the address that will be returned *before* the | ||
404 | * eventual break due to pvec becoming full by adding the page | ||
405 | */ | ||
406 | start += PAGE_SIZE; | ||
407 | if (pagevec_add(pvec, page) == 0) | ||
408 | break; | ||
409 | } | ||
410 | pte_unmap_unlock(pte, ptl); | ||
411 | return start; | ||
412 | } | ||
413 | |||
414 | /* | ||
213 | * munlock_vma_pages_range() - munlock all pages in the vma range.' | 415 | * munlock_vma_pages_range() - munlock all pages in the vma range.' |
214 | * @vma - vma containing range to be munlock()ed. | 416 | * @vma - vma containing range to be munlock()ed. |
215 | * @start - start address in @vma of the range | 417 | * @start - start address in @vma of the range |
@@ -233,9 +435,13 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
233 | vma->vm_flags &= ~VM_LOCKED; | 435 | vma->vm_flags &= ~VM_LOCKED; |
234 | 436 | ||
235 | while (start < end) { | 437 | while (start < end) { |
236 | struct page *page; | 438 | struct page *page = NULL; |
237 | unsigned int page_mask, page_increm; | 439 | unsigned int page_mask, page_increm; |
440 | struct pagevec pvec; | ||
441 | struct zone *zone; | ||
442 | int zoneid; | ||
238 | 443 | ||
444 | pagevec_init(&pvec, 0); | ||
239 | /* | 445 | /* |
240 | * Although FOLL_DUMP is intended for get_dump_page(), | 446 | * Although FOLL_DUMP is intended for get_dump_page(), |
241 | * it just so happens that its special treatment of the | 447 | * it just so happens that its special treatment of the |
@@ -244,21 +450,45 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
244 | * has sneaked into the range, we won't oops here: great). | 450 | * has sneaked into the range, we won't oops here: great). |
245 | */ | 451 | */ |
246 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, | 452 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, |
247 | &page_mask); | 453 | &page_mask); |
454 | |||
248 | if (page && !IS_ERR(page)) { | 455 | if (page && !IS_ERR(page)) { |
249 | lock_page(page); | 456 | if (PageTransHuge(page)) { |
250 | lru_add_drain(); | 457 | lock_page(page); |
251 | /* | 458 | /* |
252 | * Any THP page found by follow_page_mask() may have | 459 | * Any THP page found by follow_page_mask() may |
253 | * gotten split before reaching munlock_vma_page(), | 460 | * have gotten split before reaching |
254 | * so we need to recompute the page_mask here. | 461 | * munlock_vma_page(), so we need to recompute |
255 | */ | 462 | * the page_mask here. |
256 | page_mask = munlock_vma_page(page); | 463 | */ |
257 | unlock_page(page); | 464 | page_mask = munlock_vma_page(page); |
258 | put_page(page); | 465 | unlock_page(page); |
466 | put_page(page); /* follow_page_mask() */ | ||
467 | } else { | ||
468 | /* | ||
469 | * Non-huge pages are handled in batches via | ||
470 | * pagevec. The pin from follow_page_mask() | ||
471 | * prevents them from collapsing by THP. | ||
472 | */ | ||
473 | pagevec_add(&pvec, page); | ||
474 | zone = page_zone(page); | ||
475 | zoneid = page_zone_id(page); | ||
476 | |||
477 | /* | ||
478 | * Try to fill the rest of pagevec using fast | ||
479 | * pte walk. This will also update start to | ||
480 | * the next page to process. Then munlock the | ||
481 | * pagevec. | ||
482 | */ | ||
483 | start = __munlock_pagevec_fill(&pvec, vma, | ||
484 | zoneid, start, end); | ||
485 | __munlock_pagevec(&pvec, zone); | ||
486 | goto next; | ||
487 | } | ||
259 | } | 488 | } |
260 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | 489 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); |
261 | start += page_increm * PAGE_SIZE; | 490 | start += page_increm * PAGE_SIZE; |
491 | next: | ||
262 | cond_resched(); | 492 | cond_resched(); |
263 | } | 493 | } |
264 | } | 494 | } |
@@ -1202,7 +1202,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1202 | unsigned long *populate) | 1202 | unsigned long *populate) |
1203 | { | 1203 | { |
1204 | struct mm_struct * mm = current->mm; | 1204 | struct mm_struct * mm = current->mm; |
1205 | struct inode *inode; | ||
1206 | vm_flags_t vm_flags; | 1205 | vm_flags_t vm_flags; |
1207 | 1206 | ||
1208 | *populate = 0; | 1207 | *populate = 0; |
@@ -1265,9 +1264,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1265 | return -EAGAIN; | 1264 | return -EAGAIN; |
1266 | } | 1265 | } |
1267 | 1266 | ||
1268 | inode = file ? file_inode(file) : NULL; | ||
1269 | |||
1270 | if (file) { | 1267 | if (file) { |
1268 | struct inode *inode = file_inode(file); | ||
1269 | |||
1271 | switch (flags & MAP_TYPE) { | 1270 | switch (flags & MAP_TYPE) { |
1272 | case MAP_SHARED: | 1271 | case MAP_SHARED: |
1273 | if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) | 1272 | if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) |
@@ -1302,6 +1301,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1302 | 1301 | ||
1303 | if (!file->f_op || !file->f_op->mmap) | 1302 | if (!file->f_op || !file->f_op->mmap) |
1304 | return -ENODEV; | 1303 | return -ENODEV; |
1304 | if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) | ||
1305 | return -EINVAL; | ||
1305 | break; | 1306 | break; |
1306 | 1307 | ||
1307 | default: | 1308 | default: |
@@ -1310,6 +1311,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1310 | } else { | 1311 | } else { |
1311 | switch (flags & MAP_TYPE) { | 1312 | switch (flags & MAP_TYPE) { |
1312 | case MAP_SHARED: | 1313 | case MAP_SHARED: |
1314 | if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) | ||
1315 | return -EINVAL; | ||
1313 | /* | 1316 | /* |
1314 | * Ignore pgoff. | 1317 | * Ignore pgoff. |
1315 | */ | 1318 | */ |
@@ -1476,11 +1479,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | |||
1476 | { | 1479 | { |
1477 | struct mm_struct *mm = current->mm; | 1480 | struct mm_struct *mm = current->mm; |
1478 | struct vm_area_struct *vma, *prev; | 1481 | struct vm_area_struct *vma, *prev; |
1479 | int correct_wcount = 0; | ||
1480 | int error; | 1482 | int error; |
1481 | struct rb_node **rb_link, *rb_parent; | 1483 | struct rb_node **rb_link, *rb_parent; |
1482 | unsigned long charged = 0; | 1484 | unsigned long charged = 0; |
1483 | struct inode *inode = file ? file_inode(file) : NULL; | ||
1484 | 1485 | ||
1485 | /* Check against address space limit. */ | 1486 | /* Check against address space limit. */ |
1486 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { | 1487 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { |
@@ -1544,16 +1545,11 @@ munmap_back: | |||
1544 | vma->vm_pgoff = pgoff; | 1545 | vma->vm_pgoff = pgoff; |
1545 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 1546 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
1546 | 1547 | ||
1547 | error = -EINVAL; /* when rejecting VM_GROWSDOWN|VM_GROWSUP */ | ||
1548 | |||
1549 | if (file) { | 1548 | if (file) { |
1550 | if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) | ||
1551 | goto free_vma; | ||
1552 | if (vm_flags & VM_DENYWRITE) { | 1549 | if (vm_flags & VM_DENYWRITE) { |
1553 | error = deny_write_access(file); | 1550 | error = deny_write_access(file); |
1554 | if (error) | 1551 | if (error) |
1555 | goto free_vma; | 1552 | goto free_vma; |
1556 | correct_wcount = 1; | ||
1557 | } | 1553 | } |
1558 | vma->vm_file = get_file(file); | 1554 | vma->vm_file = get_file(file); |
1559 | error = file->f_op->mmap(file, vma); | 1555 | error = file->f_op->mmap(file, vma); |
@@ -1570,11 +1566,8 @@ munmap_back: | |||
1570 | WARN_ON_ONCE(addr != vma->vm_start); | 1566 | WARN_ON_ONCE(addr != vma->vm_start); |
1571 | 1567 | ||
1572 | addr = vma->vm_start; | 1568 | addr = vma->vm_start; |
1573 | pgoff = vma->vm_pgoff; | ||
1574 | vm_flags = vma->vm_flags; | 1569 | vm_flags = vma->vm_flags; |
1575 | } else if (vm_flags & VM_SHARED) { | 1570 | } else if (vm_flags & VM_SHARED) { |
1576 | if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP))) | ||
1577 | goto free_vma; | ||
1578 | error = shmem_zero_setup(vma); | 1571 | error = shmem_zero_setup(vma); |
1579 | if (error) | 1572 | if (error) |
1580 | goto free_vma; | 1573 | goto free_vma; |
@@ -1596,11 +1589,10 @@ munmap_back: | |||
1596 | } | 1589 | } |
1597 | 1590 | ||
1598 | vma_link(mm, vma, prev, rb_link, rb_parent); | 1591 | vma_link(mm, vma, prev, rb_link, rb_parent); |
1599 | file = vma->vm_file; | ||
1600 | |||
1601 | /* Once vma denies write, undo our temporary denial count */ | 1592 | /* Once vma denies write, undo our temporary denial count */ |
1602 | if (correct_wcount) | 1593 | if (vm_flags & VM_DENYWRITE) |
1603 | atomic_inc(&inode->i_writecount); | 1594 | allow_write_access(file); |
1595 | file = vma->vm_file; | ||
1604 | out: | 1596 | out: |
1605 | perf_event_mmap(vma); | 1597 | perf_event_mmap(vma); |
1606 | 1598 | ||
@@ -1616,11 +1608,20 @@ out: | |||
1616 | if (file) | 1608 | if (file) |
1617 | uprobe_mmap(vma); | 1609 | uprobe_mmap(vma); |
1618 | 1610 | ||
1611 | /* | ||
1612 | * New (or expanded) vma always get soft dirty status. | ||
1613 | * Otherwise user-space soft-dirty page tracker won't | ||
1614 | * be able to distinguish situation when vma area unmapped, | ||
1615 | * then new mapped in-place (which must be aimed as | ||
1616 | * a completely new data area). | ||
1617 | */ | ||
1618 | vma->vm_flags |= VM_SOFTDIRTY; | ||
1619 | |||
1619 | return addr; | 1620 | return addr; |
1620 | 1621 | ||
1621 | unmap_and_free_vma: | 1622 | unmap_and_free_vma: |
1622 | if (correct_wcount) | 1623 | if (vm_flags & VM_DENYWRITE) |
1623 | atomic_inc(&inode->i_writecount); | 1624 | allow_write_access(file); |
1624 | vma->vm_file = NULL; | 1625 | vma->vm_file = NULL; |
1625 | fput(file); | 1626 | fput(file); |
1626 | 1627 | ||
@@ -2380,7 +2381,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2380 | static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | 2381 | static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, |
2381 | unsigned long addr, int new_below) | 2382 | unsigned long addr, int new_below) |
2382 | { | 2383 | { |
2383 | struct mempolicy *pol; | ||
2384 | struct vm_area_struct *new; | 2384 | struct vm_area_struct *new; |
2385 | int err = -ENOMEM; | 2385 | int err = -ENOMEM; |
2386 | 2386 | ||
@@ -2404,12 +2404,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
2404 | new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); | 2404 | new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); |
2405 | } | 2405 | } |
2406 | 2406 | ||
2407 | pol = mpol_dup(vma_policy(vma)); | 2407 | err = vma_dup_policy(vma, new); |
2408 | if (IS_ERR(pol)) { | 2408 | if (err) |
2409 | err = PTR_ERR(pol); | ||
2410 | goto out_free_vma; | 2409 | goto out_free_vma; |
2411 | } | ||
2412 | vma_set_policy(new, pol); | ||
2413 | 2410 | ||
2414 | if (anon_vma_clone(new, vma)) | 2411 | if (anon_vma_clone(new, vma)) |
2415 | goto out_free_mpol; | 2412 | goto out_free_mpol; |
@@ -2437,7 +2434,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
2437 | fput(new->vm_file); | 2434 | fput(new->vm_file); |
2438 | unlink_anon_vmas(new); | 2435 | unlink_anon_vmas(new); |
2439 | out_free_mpol: | 2436 | out_free_mpol: |
2440 | mpol_put(pol); | 2437 | mpol_put(vma_policy(new)); |
2441 | out_free_vma: | 2438 | out_free_vma: |
2442 | kmem_cache_free(vm_area_cachep, new); | 2439 | kmem_cache_free(vm_area_cachep, new); |
2443 | out_err: | 2440 | out_err: |
@@ -2663,6 +2660,7 @@ out: | |||
2663 | mm->total_vm += len >> PAGE_SHIFT; | 2660 | mm->total_vm += len >> PAGE_SHIFT; |
2664 | if (flags & VM_LOCKED) | 2661 | if (flags & VM_LOCKED) |
2665 | mm->locked_vm += (len >> PAGE_SHIFT); | 2662 | mm->locked_vm += (len >> PAGE_SHIFT); |
2663 | vma->vm_flags |= VM_SOFTDIRTY; | ||
2666 | return addr; | 2664 | return addr; |
2667 | } | 2665 | } |
2668 | 2666 | ||
@@ -2780,7 +2778,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2780 | struct mm_struct *mm = vma->vm_mm; | 2778 | struct mm_struct *mm = vma->vm_mm; |
2781 | struct vm_area_struct *new_vma, *prev; | 2779 | struct vm_area_struct *new_vma, *prev; |
2782 | struct rb_node **rb_link, *rb_parent; | 2780 | struct rb_node **rb_link, *rb_parent; |
2783 | struct mempolicy *pol; | ||
2784 | bool faulted_in_anon_vma = true; | 2781 | bool faulted_in_anon_vma = true; |
2785 | 2782 | ||
2786 | /* | 2783 | /* |
@@ -2825,10 +2822,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2825 | new_vma->vm_start = addr; | 2822 | new_vma->vm_start = addr; |
2826 | new_vma->vm_end = addr + len; | 2823 | new_vma->vm_end = addr + len; |
2827 | new_vma->vm_pgoff = pgoff; | 2824 | new_vma->vm_pgoff = pgoff; |
2828 | pol = mpol_dup(vma_policy(vma)); | 2825 | if (vma_dup_policy(vma, new_vma)) |
2829 | if (IS_ERR(pol)) | ||
2830 | goto out_free_vma; | 2826 | goto out_free_vma; |
2831 | vma_set_policy(new_vma, pol); | ||
2832 | INIT_LIST_HEAD(&new_vma->anon_vma_chain); | 2827 | INIT_LIST_HEAD(&new_vma->anon_vma_chain); |
2833 | if (anon_vma_clone(new_vma, vma)) | 2828 | if (anon_vma_clone(new_vma, vma)) |
2834 | goto out_free_mempol; | 2829 | goto out_free_mempol; |
@@ -2843,7 +2838,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2843 | return new_vma; | 2838 | return new_vma; |
2844 | 2839 | ||
2845 | out_free_mempol: | 2840 | out_free_mempol: |
2846 | mpol_put(pol); | 2841 | mpol_put(vma_policy(new_vma)); |
2847 | out_free_vma: | 2842 | out_free_vma: |
2848 | kmem_cache_free(vm_area_cachep, new_vma); | 2843 | kmem_cache_free(vm_area_cachep, new_vma); |
2849 | return NULL; | 2844 | return NULL; |
@@ -2930,7 +2925,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2930 | vma->vm_start = addr; | 2925 | vma->vm_start = addr; |
2931 | vma->vm_end = addr + len; | 2926 | vma->vm_end = addr + len; |
2932 | 2927 | ||
2933 | vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; | 2928 | vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; |
2934 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 2929 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
2935 | 2930 | ||
2936 | vma->vm_ops = &special_mapping_vmops; | 2931 | vma->vm_ops = &special_mapping_vmops; |
diff --git a/mm/mremap.c b/mm/mremap.c index 0843feb66f3d..91b13d6a16d4 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
28 | #include <asm/pgalloc.h> | ||
28 | 29 | ||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
@@ -62,8 +63,10 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, | |||
62 | return NULL; | 63 | return NULL; |
63 | 64 | ||
64 | pmd = pmd_alloc(mm, pud, addr); | 65 | pmd = pmd_alloc(mm, pud, addr); |
65 | if (!pmd) | 66 | if (!pmd) { |
67 | pud_free(mm, pud); | ||
66 | return NULL; | 68 | return NULL; |
69 | } | ||
67 | 70 | ||
68 | VM_BUG_ON(pmd_trans_huge(*pmd)); | 71 | VM_BUG_ON(pmd_trans_huge(*pmd)); |
69 | 72 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3f0c895c71fe..6c7b0187be8e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -36,8 +36,11 @@ | |||
36 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
37 | #include <linux/timer.h> | 37 | #include <linux/timer.h> |
38 | #include <linux/sched/rt.h> | 38 | #include <linux/sched/rt.h> |
39 | #include <linux/mm_inline.h> | ||
39 | #include <trace/events/writeback.h> | 40 | #include <trace/events/writeback.h> |
40 | 41 | ||
42 | #include "internal.h" | ||
43 | |||
41 | /* | 44 | /* |
42 | * Sleep at most 200ms at a time in balance_dirty_pages(). | 45 | * Sleep at most 200ms at a time in balance_dirty_pages(). |
43 | */ | 46 | */ |
@@ -241,9 +244,6 @@ static unsigned long global_dirtyable_memory(void) | |||
241 | if (!vm_highmem_is_dirtyable) | 244 | if (!vm_highmem_is_dirtyable) |
242 | x -= highmem_dirtyable_memory(x); | 245 | x -= highmem_dirtyable_memory(x); |
243 | 246 | ||
244 | /* Subtract min_free_kbytes */ | ||
245 | x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10)); | ||
246 | |||
247 | return x + 1; /* Ensure that we never return 0 */ | 247 | return x + 1; /* Ensure that we never return 0 */ |
248 | } | 248 | } |
249 | 249 | ||
@@ -585,6 +585,37 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) | |||
585 | } | 585 | } |
586 | 586 | ||
587 | /* | 587 | /* |
588 | * setpoint - dirty 3 | ||
589 | * f(dirty) := 1.0 + (----------------) | ||
590 | * limit - setpoint | ||
591 | * | ||
592 | * it's a 3rd order polynomial that subjects to | ||
593 | * | ||
594 | * (1) f(freerun) = 2.0 => rampup dirty_ratelimit reasonably fast | ||
595 | * (2) f(setpoint) = 1.0 => the balance point | ||
596 | * (3) f(limit) = 0 => the hard limit | ||
597 | * (4) df/dx <= 0 => negative feedback control | ||
598 | * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) | ||
599 | * => fast response on large errors; small oscillation near setpoint | ||
600 | */ | ||
601 | static inline long long pos_ratio_polynom(unsigned long setpoint, | ||
602 | unsigned long dirty, | ||
603 | unsigned long limit) | ||
604 | { | ||
605 | long long pos_ratio; | ||
606 | long x; | ||
607 | |||
608 | x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, | ||
609 | limit - setpoint + 1); | ||
610 | pos_ratio = x; | ||
611 | pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; | ||
612 | pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; | ||
613 | pos_ratio += 1 << RATELIMIT_CALC_SHIFT; | ||
614 | |||
615 | return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT); | ||
616 | } | ||
617 | |||
618 | /* | ||
588 | * Dirty position control. | 619 | * Dirty position control. |
589 | * | 620 | * |
590 | * (o) global/bdi setpoints | 621 | * (o) global/bdi setpoints |
@@ -682,26 +713,80 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, | |||
682 | /* | 713 | /* |
683 | * global setpoint | 714 | * global setpoint |
684 | * | 715 | * |
685 | * setpoint - dirty 3 | 716 | * See comment for pos_ratio_polynom(). |
686 | * f(dirty) := 1.0 + (----------------) | 717 | */ |
687 | * limit - setpoint | 718 | setpoint = (freerun + limit) / 2; |
719 | pos_ratio = pos_ratio_polynom(setpoint, dirty, limit); | ||
720 | |||
721 | /* | ||
722 | * The strictlimit feature is a tool preventing mistrusted filesystems | ||
723 | * from growing a large number of dirty pages before throttling. For | ||
724 | * such filesystems balance_dirty_pages always checks bdi counters | ||
725 | * against bdi limits. Even if global "nr_dirty" is under "freerun". | ||
726 | * This is especially important for fuse which sets bdi->max_ratio to | ||
727 | * 1% by default. Without strictlimit feature, fuse writeback may | ||
728 | * consume arbitrary amount of RAM because it is accounted in | ||
729 | * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty". | ||
688 | * | 730 | * |
689 | * it's a 3rd order polynomial that subjects to | 731 | * Here, in bdi_position_ratio(), we calculate pos_ratio based on |
732 | * two values: bdi_dirty and bdi_thresh. Let's consider an example: | ||
733 | * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global | ||
734 | * limits are set by default to 10% and 20% (background and throttle). | ||
735 | * Then bdi_thresh is 1% of 20% of 16GB. This amounts to ~8K pages. | ||
736 | * bdi_dirty_limit(bdi, bg_thresh) is about ~4K pages. bdi_setpoint is | ||
737 | * about ~6K pages (as the average of background and throttle bdi | ||
738 | * limits). The 3rd order polynomial will provide positive feedback if | ||
739 | * bdi_dirty is under bdi_setpoint and vice versa. | ||
690 | * | 740 | * |
691 | * (1) f(freerun) = 2.0 => rampup dirty_ratelimit reasonably fast | 741 | * Note, that we cannot use global counters in these calculations |
692 | * (2) f(setpoint) = 1.0 => the balance point | 742 | * because we want to throttle process writing to a strictlimit BDI |
693 | * (3) f(limit) = 0 => the hard limit | 743 | * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB |
694 | * (4) df/dx <= 0 => negative feedback control | 744 | * in the example above). |
695 | * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) | ||
696 | * => fast response on large errors; small oscillation near setpoint | ||
697 | */ | 745 | */ |
698 | setpoint = (freerun + limit) / 2; | 746 | if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) { |
699 | x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, | 747 | long long bdi_pos_ratio; |
700 | limit - setpoint + 1); | 748 | unsigned long bdi_bg_thresh; |
701 | pos_ratio = x; | 749 | |
702 | pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; | 750 | if (bdi_dirty < 8) |
703 | pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; | 751 | return min_t(long long, pos_ratio * 2, |
704 | pos_ratio += 1 << RATELIMIT_CALC_SHIFT; | 752 | 2 << RATELIMIT_CALC_SHIFT); |
753 | |||
754 | if (bdi_dirty >= bdi_thresh) | ||
755 | return 0; | ||
756 | |||
757 | bdi_bg_thresh = div_u64((u64)bdi_thresh * bg_thresh, thresh); | ||
758 | bdi_setpoint = dirty_freerun_ceiling(bdi_thresh, | ||
759 | bdi_bg_thresh); | ||
760 | |||
761 | if (bdi_setpoint == 0 || bdi_setpoint == bdi_thresh) | ||
762 | return 0; | ||
763 | |||
764 | bdi_pos_ratio = pos_ratio_polynom(bdi_setpoint, bdi_dirty, | ||
765 | bdi_thresh); | ||
766 | |||
767 | /* | ||
768 | * Typically, for strictlimit case, bdi_setpoint << setpoint | ||
769 | * and pos_ratio >> bdi_pos_ratio. In the other words global | ||
770 | * state ("dirty") is not limiting factor and we have to | ||
771 | * make decision based on bdi counters. But there is an | ||
772 | * important case when global pos_ratio should get precedence: | ||
773 | * global limits are exceeded (e.g. due to activities on other | ||
774 | * BDIs) while given strictlimit BDI is below limit. | ||
775 | * | ||
776 | * "pos_ratio * bdi_pos_ratio" would work for the case above, | ||
777 | * but it would look too non-natural for the case of all | ||
778 | * activity in the system coming from a single strictlimit BDI | ||
779 | * with bdi->max_ratio == 100%. | ||
780 | * | ||
781 | * Note that min() below somewhat changes the dynamics of the | ||
782 | * control system. Normally, pos_ratio value can be well over 3 | ||
783 | * (when globally we are at freerun and bdi is well below bdi | ||
784 | * setpoint). Now the maximum pos_ratio in the same situation | ||
785 | * is 2. We might want to tweak this if we observe the control | ||
786 | * system is too slow to adapt. | ||
787 | */ | ||
788 | return min(pos_ratio, bdi_pos_ratio); | ||
789 | } | ||
705 | 790 | ||
706 | /* | 791 | /* |
707 | * We have computed basic pos_ratio above based on global situation. If | 792 | * We have computed basic pos_ratio above based on global situation. If |
@@ -994,6 +1079,27 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, | |||
994 | * keep that period small to reduce time lags). | 1079 | * keep that period small to reduce time lags). |
995 | */ | 1080 | */ |
996 | step = 0; | 1081 | step = 0; |
1082 | |||
1083 | /* | ||
1084 | * For strictlimit case, calculations above were based on bdi counters | ||
1085 | * and limits (starting from pos_ratio = bdi_position_ratio() and up to | ||
1086 | * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate). | ||
1087 | * Hence, to calculate "step" properly, we have to use bdi_dirty as | ||
1088 | * "dirty" and bdi_setpoint as "setpoint". | ||
1089 | * | ||
1090 | * We rampup dirty_ratelimit forcibly if bdi_dirty is low because | ||
1091 | * it's possible that bdi_thresh is close to zero due to inactivity | ||
1092 | * of backing device (see the implementation of bdi_dirty_limit()). | ||
1093 | */ | ||
1094 | if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) { | ||
1095 | dirty = bdi_dirty; | ||
1096 | if (bdi_dirty < 8) | ||
1097 | setpoint = bdi_dirty + 1; | ||
1098 | else | ||
1099 | setpoint = (bdi_thresh + | ||
1100 | bdi_dirty_limit(bdi, bg_thresh)) / 2; | ||
1101 | } | ||
1102 | |||
997 | if (dirty < setpoint) { | 1103 | if (dirty < setpoint) { |
998 | x = min(bdi->balanced_dirty_ratelimit, | 1104 | x = min(bdi->balanced_dirty_ratelimit, |
999 | min(balanced_dirty_ratelimit, task_ratelimit)); | 1105 | min(balanced_dirty_ratelimit, task_ratelimit)); |
@@ -1198,6 +1304,56 @@ static long bdi_min_pause(struct backing_dev_info *bdi, | |||
1198 | return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t; | 1304 | return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t; |
1199 | } | 1305 | } |
1200 | 1306 | ||
1307 | static inline void bdi_dirty_limits(struct backing_dev_info *bdi, | ||
1308 | unsigned long dirty_thresh, | ||
1309 | unsigned long background_thresh, | ||
1310 | unsigned long *bdi_dirty, | ||
1311 | unsigned long *bdi_thresh, | ||
1312 | unsigned long *bdi_bg_thresh) | ||
1313 | { | ||
1314 | unsigned long bdi_reclaimable; | ||
1315 | |||
1316 | /* | ||
1317 | * bdi_thresh is not treated as some limiting factor as | ||
1318 | * dirty_thresh, due to reasons | ||
1319 | * - in JBOD setup, bdi_thresh can fluctuate a lot | ||
1320 | * - in a system with HDD and USB key, the USB key may somehow | ||
1321 | * go into state (bdi_dirty >> bdi_thresh) either because | ||
1322 | * bdi_dirty starts high, or because bdi_thresh drops low. | ||
1323 | * In this case we don't want to hard throttle the USB key | ||
1324 | * dirtiers for 100 seconds until bdi_dirty drops under | ||
1325 | * bdi_thresh. Instead the auxiliary bdi control line in | ||
1326 | * bdi_position_ratio() will let the dirtier task progress | ||
1327 | * at some rate <= (write_bw / 2) for bringing down bdi_dirty. | ||
1328 | */ | ||
1329 | *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); | ||
1330 | |||
1331 | if (bdi_bg_thresh) | ||
1332 | *bdi_bg_thresh = div_u64((u64)*bdi_thresh * | ||
1333 | background_thresh, | ||
1334 | dirty_thresh); | ||
1335 | |||
1336 | /* | ||
1337 | * In order to avoid the stacked BDI deadlock we need | ||
1338 | * to ensure we accurately count the 'dirty' pages when | ||
1339 | * the threshold is low. | ||
1340 | * | ||
1341 | * Otherwise it would be possible to get thresh+n pages | ||
1342 | * reported dirty, even though there are thresh-m pages | ||
1343 | * actually dirty; with m+n sitting in the percpu | ||
1344 | * deltas. | ||
1345 | */ | ||
1346 | if (*bdi_thresh < 2 * bdi_stat_error(bdi)) { | ||
1347 | bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); | ||
1348 | *bdi_dirty = bdi_reclaimable + | ||
1349 | bdi_stat_sum(bdi, BDI_WRITEBACK); | ||
1350 | } else { | ||
1351 | bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | ||
1352 | *bdi_dirty = bdi_reclaimable + | ||
1353 | bdi_stat(bdi, BDI_WRITEBACK); | ||
1354 | } | ||
1355 | } | ||
1356 | |||
1201 | /* | 1357 | /* |
1202 | * balance_dirty_pages() must be called by processes which are generating dirty | 1358 | * balance_dirty_pages() must be called by processes which are generating dirty |
1203 | * data. It looks at the number of dirty pages in the machine and will force | 1359 | * data. It looks at the number of dirty pages in the machine and will force |
@@ -1209,13 +1365,9 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1209 | unsigned long pages_dirtied) | 1365 | unsigned long pages_dirtied) |
1210 | { | 1366 | { |
1211 | unsigned long nr_reclaimable; /* = file_dirty + unstable_nfs */ | 1367 | unsigned long nr_reclaimable; /* = file_dirty + unstable_nfs */ |
1212 | unsigned long bdi_reclaimable; | ||
1213 | unsigned long nr_dirty; /* = file_dirty + writeback + unstable_nfs */ | 1368 | unsigned long nr_dirty; /* = file_dirty + writeback + unstable_nfs */ |
1214 | unsigned long bdi_dirty; | ||
1215 | unsigned long freerun; | ||
1216 | unsigned long background_thresh; | 1369 | unsigned long background_thresh; |
1217 | unsigned long dirty_thresh; | 1370 | unsigned long dirty_thresh; |
1218 | unsigned long bdi_thresh; | ||
1219 | long period; | 1371 | long period; |
1220 | long pause; | 1372 | long pause; |
1221 | long max_pause; | 1373 | long max_pause; |
@@ -1226,10 +1378,16 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1226 | unsigned long dirty_ratelimit; | 1378 | unsigned long dirty_ratelimit; |
1227 | unsigned long pos_ratio; | 1379 | unsigned long pos_ratio; |
1228 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 1380 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
1381 | bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; | ||
1229 | unsigned long start_time = jiffies; | 1382 | unsigned long start_time = jiffies; |
1230 | 1383 | ||
1231 | for (;;) { | 1384 | for (;;) { |
1232 | unsigned long now = jiffies; | 1385 | unsigned long now = jiffies; |
1386 | unsigned long uninitialized_var(bdi_thresh); | ||
1387 | unsigned long thresh; | ||
1388 | unsigned long uninitialized_var(bdi_dirty); | ||
1389 | unsigned long dirty; | ||
1390 | unsigned long bg_thresh; | ||
1233 | 1391 | ||
1234 | /* | 1392 | /* |
1235 | * Unstable writes are a feature of certain networked | 1393 | * Unstable writes are a feature of certain networked |
@@ -1243,61 +1401,44 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1243 | 1401 | ||
1244 | global_dirty_limits(&background_thresh, &dirty_thresh); | 1402 | global_dirty_limits(&background_thresh, &dirty_thresh); |
1245 | 1403 | ||
1404 | if (unlikely(strictlimit)) { | ||
1405 | bdi_dirty_limits(bdi, dirty_thresh, background_thresh, | ||
1406 | &bdi_dirty, &bdi_thresh, &bg_thresh); | ||
1407 | |||
1408 | dirty = bdi_dirty; | ||
1409 | thresh = bdi_thresh; | ||
1410 | } else { | ||
1411 | dirty = nr_dirty; | ||
1412 | thresh = dirty_thresh; | ||
1413 | bg_thresh = background_thresh; | ||
1414 | } | ||
1415 | |||
1246 | /* | 1416 | /* |
1247 | * Throttle it only when the background writeback cannot | 1417 | * Throttle it only when the background writeback cannot |
1248 | * catch-up. This avoids (excessively) small writeouts | 1418 | * catch-up. This avoids (excessively) small writeouts |
1249 | * when the bdi limits are ramping up. | 1419 | * when the bdi limits are ramping up in case of !strictlimit. |
1420 | * | ||
1421 | * In strictlimit case make decision based on the bdi counters | ||
1422 | * and limits. Small writeouts when the bdi limits are ramping | ||
1423 | * up are the price we consciously pay for strictlimit-ing. | ||
1250 | */ | 1424 | */ |
1251 | freerun = dirty_freerun_ceiling(dirty_thresh, | 1425 | if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh)) { |
1252 | background_thresh); | ||
1253 | if (nr_dirty <= freerun) { | ||
1254 | current->dirty_paused_when = now; | 1426 | current->dirty_paused_when = now; |
1255 | current->nr_dirtied = 0; | 1427 | current->nr_dirtied = 0; |
1256 | current->nr_dirtied_pause = | 1428 | current->nr_dirtied_pause = |
1257 | dirty_poll_interval(nr_dirty, dirty_thresh); | 1429 | dirty_poll_interval(dirty, thresh); |
1258 | break; | 1430 | break; |
1259 | } | 1431 | } |
1260 | 1432 | ||
1261 | if (unlikely(!writeback_in_progress(bdi))) | 1433 | if (unlikely(!writeback_in_progress(bdi))) |
1262 | bdi_start_background_writeback(bdi); | 1434 | bdi_start_background_writeback(bdi); |
1263 | 1435 | ||
1264 | /* | 1436 | if (!strictlimit) |
1265 | * bdi_thresh is not treated as some limiting factor as | 1437 | bdi_dirty_limits(bdi, dirty_thresh, background_thresh, |
1266 | * dirty_thresh, due to reasons | 1438 | &bdi_dirty, &bdi_thresh, NULL); |
1267 | * - in JBOD setup, bdi_thresh can fluctuate a lot | ||
1268 | * - in a system with HDD and USB key, the USB key may somehow | ||
1269 | * go into state (bdi_dirty >> bdi_thresh) either because | ||
1270 | * bdi_dirty starts high, or because bdi_thresh drops low. | ||
1271 | * In this case we don't want to hard throttle the USB key | ||
1272 | * dirtiers for 100 seconds until bdi_dirty drops under | ||
1273 | * bdi_thresh. Instead the auxiliary bdi control line in | ||
1274 | * bdi_position_ratio() will let the dirtier task progress | ||
1275 | * at some rate <= (write_bw / 2) for bringing down bdi_dirty. | ||
1276 | */ | ||
1277 | bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); | ||
1278 | |||
1279 | /* | ||
1280 | * In order to avoid the stacked BDI deadlock we need | ||
1281 | * to ensure we accurately count the 'dirty' pages when | ||
1282 | * the threshold is low. | ||
1283 | * | ||
1284 | * Otherwise it would be possible to get thresh+n pages | ||
1285 | * reported dirty, even though there are thresh-m pages | ||
1286 | * actually dirty; with m+n sitting in the percpu | ||
1287 | * deltas. | ||
1288 | */ | ||
1289 | if (bdi_thresh < 2 * bdi_stat_error(bdi)) { | ||
1290 | bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); | ||
1291 | bdi_dirty = bdi_reclaimable + | ||
1292 | bdi_stat_sum(bdi, BDI_WRITEBACK); | ||
1293 | } else { | ||
1294 | bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | ||
1295 | bdi_dirty = bdi_reclaimable + | ||
1296 | bdi_stat(bdi, BDI_WRITEBACK); | ||
1297 | } | ||
1298 | 1439 | ||
1299 | dirty_exceeded = (bdi_dirty > bdi_thresh) && | 1440 | dirty_exceeded = (bdi_dirty > bdi_thresh) && |
1300 | (nr_dirty > dirty_thresh); | 1441 | ((nr_dirty > dirty_thresh) || strictlimit); |
1301 | if (dirty_exceeded && !bdi->dirty_exceeded) | 1442 | if (dirty_exceeded && !bdi->dirty_exceeded) |
1302 | bdi->dirty_exceeded = 1; | 1443 | bdi->dirty_exceeded = 1; |
1303 | 1444 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c2b59dbda196..0ee638f76ebe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/ftrace_event.h> | 56 | #include <linux/ftrace_event.h> |
57 | #include <linux/memcontrol.h> | 57 | #include <linux/memcontrol.h> |
58 | #include <linux/prefetch.h> | 58 | #include <linux/prefetch.h> |
59 | #include <linux/mm_inline.h> | ||
59 | #include <linux/migrate.h> | 60 | #include <linux/migrate.h> |
60 | #include <linux/page-debug-flags.h> | 61 | #include <linux/page-debug-flags.h> |
61 | #include <linux/hugetlb.h> | 62 | #include <linux/hugetlb.h> |
@@ -488,8 +489,10 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) | |||
488 | * (c) a page and its buddy have the same order && | 489 | * (c) a page and its buddy have the same order && |
489 | * (d) a page and its buddy are in the same zone. | 490 | * (d) a page and its buddy are in the same zone. |
490 | * | 491 | * |
491 | * For recording whether a page is in the buddy system, we set ->_mapcount -2. | 492 | * For recording whether a page is in the buddy system, we set ->_mapcount |
492 | * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. | 493 | * PAGE_BUDDY_MAPCOUNT_VALUE. |
494 | * Setting, clearing, and testing _mapcount PAGE_BUDDY_MAPCOUNT_VALUE is | ||
495 | * serialized by zone->lock. | ||
493 | * | 496 | * |
494 | * For recording page's order, we use page_private(page). | 497 | * For recording page's order, we use page_private(page). |
495 | */ | 498 | */ |
@@ -527,8 +530,9 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
527 | * as necessary, plus some accounting needed to play nicely with other | 530 | * as necessary, plus some accounting needed to play nicely with other |
528 | * parts of the VM system. | 531 | * parts of the VM system. |
529 | * At each level, we keep a list of pages, which are heads of continuous | 532 | * At each level, we keep a list of pages, which are heads of continuous |
530 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's | 533 | * free pages of length of (1 << order) and marked with _mapcount |
531 | * order is recorded in page_private(page) field. | 534 | * PAGE_BUDDY_MAPCOUNT_VALUE. Page's order is recorded in page_private(page) |
535 | * field. | ||
532 | * So when we are allocating or freeing one, we can derive the state of the | 536 | * So when we are allocating or freeing one, we can derive the state of the |
533 | * other. That is, if we allocate a small block, and both were | 537 | * other. That is, if we allocate a small block, and both were |
534 | * free, the remainder of the region must be split into blocks. | 538 | * free, the remainder of the region must be split into blocks. |
@@ -647,7 +651,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
647 | int to_free = count; | 651 | int to_free = count; |
648 | 652 | ||
649 | spin_lock(&zone->lock); | 653 | spin_lock(&zone->lock); |
650 | zone->all_unreclaimable = 0; | ||
651 | zone->pages_scanned = 0; | 654 | zone->pages_scanned = 0; |
652 | 655 | ||
653 | while (to_free) { | 656 | while (to_free) { |
@@ -696,7 +699,6 @@ static void free_one_page(struct zone *zone, struct page *page, int order, | |||
696 | int migratetype) | 699 | int migratetype) |
697 | { | 700 | { |
698 | spin_lock(&zone->lock); | 701 | spin_lock(&zone->lock); |
699 | zone->all_unreclaimable = 0; | ||
700 | zone->pages_scanned = 0; | 702 | zone->pages_scanned = 0; |
701 | 703 | ||
702 | __free_one_page(page, zone, order, migratetype); | 704 | __free_one_page(page, zone, order, migratetype); |
@@ -721,7 +723,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
721 | return false; | 723 | return false; |
722 | 724 | ||
723 | if (!PageHighMem(page)) { | 725 | if (!PageHighMem(page)) { |
724 | debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); | 726 | debug_check_no_locks_freed(page_address(page), |
727 | PAGE_SIZE << order); | ||
725 | debug_check_no_obj_freed(page_address(page), | 728 | debug_check_no_obj_freed(page_address(page), |
726 | PAGE_SIZE << order); | 729 | PAGE_SIZE << order); |
727 | } | 730 | } |
@@ -750,19 +753,19 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
750 | void __init __free_pages_bootmem(struct page *page, unsigned int order) | 753 | void __init __free_pages_bootmem(struct page *page, unsigned int order) |
751 | { | 754 | { |
752 | unsigned int nr_pages = 1 << order; | 755 | unsigned int nr_pages = 1 << order; |
756 | struct page *p = page; | ||
753 | unsigned int loop; | 757 | unsigned int loop; |
754 | 758 | ||
755 | prefetchw(page); | 759 | prefetchw(p); |
756 | for (loop = 0; loop < nr_pages; loop++) { | 760 | for (loop = 0; loop < (nr_pages - 1); loop++, p++) { |
757 | struct page *p = &page[loop]; | 761 | prefetchw(p + 1); |
758 | |||
759 | if (loop + 1 < nr_pages) | ||
760 | prefetchw(p + 1); | ||
761 | __ClearPageReserved(p); | 762 | __ClearPageReserved(p); |
762 | set_page_count(p, 0); | 763 | set_page_count(p, 0); |
763 | } | 764 | } |
765 | __ClearPageReserved(p); | ||
766 | set_page_count(p, 0); | ||
764 | 767 | ||
765 | page_zone(page)->managed_pages += 1 << order; | 768 | page_zone(page)->managed_pages += nr_pages; |
766 | set_page_refcounted(page); | 769 | set_page_refcounted(page); |
767 | __free_pages(page, order); | 770 | __free_pages(page, order); |
768 | } | 771 | } |
@@ -885,7 +888,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
885 | int migratetype) | 888 | int migratetype) |
886 | { | 889 | { |
887 | unsigned int current_order; | 890 | unsigned int current_order; |
888 | struct free_area * area; | 891 | struct free_area *area; |
889 | struct page *page; | 892 | struct page *page; |
890 | 893 | ||
891 | /* Find a page of the appropriate size in the preferred list */ | 894 | /* Find a page of the appropriate size in the preferred list */ |
@@ -1007,14 +1010,60 @@ static void change_pageblock_range(struct page *pageblock_page, | |||
1007 | } | 1010 | } |
1008 | } | 1011 | } |
1009 | 1012 | ||
1013 | /* | ||
1014 | * If breaking a large block of pages, move all free pages to the preferred | ||
1015 | * allocation list. If falling back for a reclaimable kernel allocation, be | ||
1016 | * more aggressive about taking ownership of free pages. | ||
1017 | * | ||
1018 | * On the other hand, never change migration type of MIGRATE_CMA pageblocks | ||
1019 | * nor move CMA pages to different free lists. We don't want unmovable pages | ||
1020 | * to be allocated from MIGRATE_CMA areas. | ||
1021 | * | ||
1022 | * Returns the new migratetype of the pageblock (or the same old migratetype | ||
1023 | * if it was unchanged). | ||
1024 | */ | ||
1025 | static int try_to_steal_freepages(struct zone *zone, struct page *page, | ||
1026 | int start_type, int fallback_type) | ||
1027 | { | ||
1028 | int current_order = page_order(page); | ||
1029 | |||
1030 | if (is_migrate_cma(fallback_type)) | ||
1031 | return fallback_type; | ||
1032 | |||
1033 | /* Take ownership for orders >= pageblock_order */ | ||
1034 | if (current_order >= pageblock_order) { | ||
1035 | change_pageblock_range(page, current_order, start_type); | ||
1036 | return start_type; | ||
1037 | } | ||
1038 | |||
1039 | if (current_order >= pageblock_order / 2 || | ||
1040 | start_type == MIGRATE_RECLAIMABLE || | ||
1041 | page_group_by_mobility_disabled) { | ||
1042 | int pages; | ||
1043 | |||
1044 | pages = move_freepages_block(zone, page, start_type); | ||
1045 | |||
1046 | /* Claim the whole block if over half of it is free */ | ||
1047 | if (pages >= (1 << (pageblock_order-1)) || | ||
1048 | page_group_by_mobility_disabled) { | ||
1049 | |||
1050 | set_pageblock_migratetype(page, start_type); | ||
1051 | return start_type; | ||
1052 | } | ||
1053 | |||
1054 | } | ||
1055 | |||
1056 | return fallback_type; | ||
1057 | } | ||
1058 | |||
1010 | /* Remove an element from the buddy allocator from the fallback list */ | 1059 | /* Remove an element from the buddy allocator from the fallback list */ |
1011 | static inline struct page * | 1060 | static inline struct page * |
1012 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 1061 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) |
1013 | { | 1062 | { |
1014 | struct free_area * area; | 1063 | struct free_area *area; |
1015 | int current_order; | 1064 | int current_order; |
1016 | struct page *page; | 1065 | struct page *page; |
1017 | int migratetype, i; | 1066 | int migratetype, new_type, i; |
1018 | 1067 | ||
1019 | /* Find the largest possible block of pages in the other list */ | 1068 | /* Find the largest possible block of pages in the other list */ |
1020 | for (current_order = MAX_ORDER-1; current_order >= order; | 1069 | for (current_order = MAX_ORDER-1; current_order >= order; |
@@ -1034,51 +1083,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
1034 | struct page, lru); | 1083 | struct page, lru); |
1035 | area->nr_free--; | 1084 | area->nr_free--; |
1036 | 1085 | ||
1037 | /* | 1086 | new_type = try_to_steal_freepages(zone, page, |
1038 | * If breaking a large block of pages, move all free | 1087 | start_migratetype, |
1039 | * pages to the preferred allocation list. If falling | 1088 | migratetype); |
1040 | * back for a reclaimable kernel allocation, be more | ||
1041 | * aggressive about taking ownership of free pages | ||
1042 | * | ||
1043 | * On the other hand, never change migration | ||
1044 | * type of MIGRATE_CMA pageblocks nor move CMA | ||
1045 | * pages on different free lists. We don't | ||
1046 | * want unmovable pages to be allocated from | ||
1047 | * MIGRATE_CMA areas. | ||
1048 | */ | ||
1049 | if (!is_migrate_cma(migratetype) && | ||
1050 | (current_order >= pageblock_order / 2 || | ||
1051 | start_migratetype == MIGRATE_RECLAIMABLE || | ||
1052 | page_group_by_mobility_disabled)) { | ||
1053 | int pages; | ||
1054 | pages = move_freepages_block(zone, page, | ||
1055 | start_migratetype); | ||
1056 | |||
1057 | /* Claim the whole block if over half of it is free */ | ||
1058 | if (pages >= (1 << (pageblock_order-1)) || | ||
1059 | page_group_by_mobility_disabled) | ||
1060 | set_pageblock_migratetype(page, | ||
1061 | start_migratetype); | ||
1062 | |||
1063 | migratetype = start_migratetype; | ||
1064 | } | ||
1065 | 1089 | ||
1066 | /* Remove the page from the freelists */ | 1090 | /* Remove the page from the freelists */ |
1067 | list_del(&page->lru); | 1091 | list_del(&page->lru); |
1068 | rmv_page_order(page); | 1092 | rmv_page_order(page); |
1069 | 1093 | ||
1070 | /* Take ownership for orders >= pageblock_order */ | 1094 | /* |
1071 | if (current_order >= pageblock_order && | 1095 | * Borrow the excess buddy pages as well, irrespective |
1072 | !is_migrate_cma(migratetype)) | 1096 | * of whether we stole freepages, or took ownership of |
1073 | change_pageblock_range(page, current_order, | 1097 | * the pageblock or not. |
1074 | start_migratetype); | 1098 | * |
1075 | 1099 | * Exception: When borrowing from MIGRATE_CMA, release | |
1100 | * the excess buddy pages to CMA itself. | ||
1101 | */ | ||
1076 | expand(zone, page, order, current_order, area, | 1102 | expand(zone, page, order, current_order, area, |
1077 | is_migrate_cma(migratetype) | 1103 | is_migrate_cma(migratetype) |
1078 | ? migratetype : start_migratetype); | 1104 | ? migratetype : start_migratetype); |
1079 | 1105 | ||
1080 | trace_mm_page_alloc_extfrag(page, order, current_order, | 1106 | trace_mm_page_alloc_extfrag(page, order, |
1081 | start_migratetype, migratetype); | 1107 | current_order, start_migratetype, migratetype, |
1108 | new_type == start_migratetype); | ||
1082 | 1109 | ||
1083 | return page; | 1110 | return page; |
1084 | } | 1111 | } |
@@ -1281,7 +1308,7 @@ void mark_free_pages(struct zone *zone) | |||
1281 | int order, t; | 1308 | int order, t; |
1282 | struct list_head *curr; | 1309 | struct list_head *curr; |
1283 | 1310 | ||
1284 | if (!zone->spanned_pages) | 1311 | if (zone_is_empty(zone)) |
1285 | return; | 1312 | return; |
1286 | 1313 | ||
1287 | spin_lock_irqsave(&zone->lock, flags); | 1314 | spin_lock_irqsave(&zone->lock, flags); |
@@ -1526,6 +1553,7 @@ again: | |||
1526 | get_pageblock_migratetype(page)); | 1553 | get_pageblock_migratetype(page)); |
1527 | } | 1554 | } |
1528 | 1555 | ||
1556 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | ||
1529 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1557 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1530 | zone_statistics(preferred_zone, zone, gfp_flags); | 1558 | zone_statistics(preferred_zone, zone, gfp_flags); |
1531 | local_irq_restore(flags); | 1559 | local_irq_restore(flags); |
@@ -1792,6 +1820,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) | |||
1792 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 1820 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
1793 | } | 1821 | } |
1794 | 1822 | ||
1823 | static bool zone_local(struct zone *local_zone, struct zone *zone) | ||
1824 | { | ||
1825 | return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE; | ||
1826 | } | ||
1827 | |||
1795 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | 1828 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |
1796 | { | 1829 | { |
1797 | return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes); | 1830 | return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes); |
@@ -1829,6 +1862,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) | |||
1829 | { | 1862 | { |
1830 | } | 1863 | } |
1831 | 1864 | ||
1865 | static bool zone_local(struct zone *local_zone, struct zone *zone) | ||
1866 | { | ||
1867 | return true; | ||
1868 | } | ||
1869 | |||
1832 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | 1870 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |
1833 | { | 1871 | { |
1834 | return true; | 1872 | return true; |
@@ -1860,16 +1898,41 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, | |||
1860 | zonelist_scan: | 1898 | zonelist_scan: |
1861 | /* | 1899 | /* |
1862 | * Scan zonelist, looking for a zone with enough free. | 1900 | * Scan zonelist, looking for a zone with enough free. |
1863 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1901 | * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. |
1864 | */ | 1902 | */ |
1865 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1903 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1866 | high_zoneidx, nodemask) { | 1904 | high_zoneidx, nodemask) { |
1905 | unsigned long mark; | ||
1906 | |||
1867 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && | 1907 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && |
1868 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1908 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1869 | continue; | 1909 | continue; |
1870 | if ((alloc_flags & ALLOC_CPUSET) && | 1910 | if ((alloc_flags & ALLOC_CPUSET) && |
1871 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1911 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1872 | continue; | 1912 | continue; |
1913 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | ||
1914 | if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS)) | ||
1915 | goto try_this_zone; | ||
1916 | /* | ||
1917 | * Distribute pages in proportion to the individual | ||
1918 | * zone size to ensure fair page aging. The zone a | ||
1919 | * page was allocated in should have no effect on the | ||
1920 | * time the page has in memory before being reclaimed. | ||
1921 | * | ||
1922 | * When zone_reclaim_mode is enabled, try to stay in | ||
1923 | * local zones in the fastpath. If that fails, the | ||
1924 | * slowpath is entered, which will do another pass | ||
1925 | * starting with the local zones, but ultimately fall | ||
1926 | * back to remote zones that do not partake in the | ||
1927 | * fairness round-robin cycle of this zonelist. | ||
1928 | */ | ||
1929 | if (alloc_flags & ALLOC_WMARK_LOW) { | ||
1930 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | ||
1931 | continue; | ||
1932 | if (zone_reclaim_mode && | ||
1933 | !zone_local(preferred_zone, zone)) | ||
1934 | continue; | ||
1935 | } | ||
1873 | /* | 1936 | /* |
1874 | * When allocating a page cache page for writing, we | 1937 | * When allocating a page cache page for writing, we |
1875 | * want to get it from a zone that is within its dirty | 1938 | * want to get it from a zone that is within its dirty |
@@ -1900,16 +1963,11 @@ zonelist_scan: | |||
1900 | (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) | 1963 | (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) |
1901 | goto this_zone_full; | 1964 | goto this_zone_full; |
1902 | 1965 | ||
1903 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 1966 | mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; |
1904 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 1967 | if (!zone_watermark_ok(zone, order, mark, |
1905 | unsigned long mark; | 1968 | classzone_idx, alloc_flags)) { |
1906 | int ret; | 1969 | int ret; |
1907 | 1970 | ||
1908 | mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; | ||
1909 | if (zone_watermark_ok(zone, order, mark, | ||
1910 | classzone_idx, alloc_flags)) | ||
1911 | goto try_this_zone; | ||
1912 | |||
1913 | if (IS_ENABLED(CONFIG_NUMA) && | 1971 | if (IS_ENABLED(CONFIG_NUMA) && |
1914 | !did_zlc_setup && nr_online_nodes > 1) { | 1972 | !did_zlc_setup && nr_online_nodes > 1) { |
1915 | /* | 1973 | /* |
@@ -2321,16 +2379,30 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
2321 | return page; | 2379 | return page; |
2322 | } | 2380 | } |
2323 | 2381 | ||
2324 | static inline | 2382 | static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, |
2325 | void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, | 2383 | struct zonelist *zonelist, |
2326 | enum zone_type high_zoneidx, | 2384 | enum zone_type high_zoneidx, |
2327 | enum zone_type classzone_idx) | 2385 | struct zone *preferred_zone) |
2328 | { | 2386 | { |
2329 | struct zoneref *z; | 2387 | struct zoneref *z; |
2330 | struct zone *zone; | 2388 | struct zone *zone; |
2331 | 2389 | ||
2332 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 2390 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
2333 | wakeup_kswapd(zone, order, classzone_idx); | 2391 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2392 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | ||
2393 | /* | ||
2394 | * Only reset the batches of zones that were actually | ||
2395 | * considered in the fast path, we don't want to | ||
2396 | * thrash fairness information for zones that are not | ||
2397 | * actually part of this zonelist's round-robin cycle. | ||
2398 | */ | ||
2399 | if (zone_reclaim_mode && !zone_local(preferred_zone, zone)) | ||
2400 | continue; | ||
2401 | mod_zone_page_state(zone, NR_ALLOC_BATCH, | ||
2402 | high_wmark_pages(zone) - | ||
2403 | low_wmark_pages(zone) - | ||
2404 | zone_page_state(zone, NR_ALLOC_BATCH)); | ||
2405 | } | ||
2334 | } | 2406 | } |
2335 | 2407 | ||
2336 | static inline int | 2408 | static inline int |
@@ -2426,9 +2498,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2426 | goto nopage; | 2498 | goto nopage; |
2427 | 2499 | ||
2428 | restart: | 2500 | restart: |
2429 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | 2501 | prepare_slowpath(gfp_mask, order, zonelist, |
2430 | wake_all_kswapd(order, zonelist, high_zoneidx, | 2502 | high_zoneidx, preferred_zone); |
2431 | zone_idx(preferred_zone)); | ||
2432 | 2503 | ||
2433 | /* | 2504 | /* |
2434 | * OK, we're below the kswapd watermark and have kicked background | 2505 | * OK, we're below the kswapd watermark and have kicked background |
@@ -3095,7 +3166,7 @@ void show_free_areas(unsigned int filter) | |||
3095 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), | 3166 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), |
3096 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | 3167 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), |
3097 | zone->pages_scanned, | 3168 | zone->pages_scanned, |
3098 | (zone->all_unreclaimable ? "yes" : "no") | 3169 | (!zone_reclaimable(zone) ? "yes" : "no") |
3099 | ); | 3170 | ); |
3100 | printk("lowmem_reserve[]:"); | 3171 | printk("lowmem_reserve[]:"); |
3101 | for (i = 0; i < MAX_NR_ZONES; i++) | 3172 | for (i = 0; i < MAX_NR_ZONES; i++) |
@@ -3104,7 +3175,7 @@ void show_free_areas(unsigned int filter) | |||
3104 | } | 3175 | } |
3105 | 3176 | ||
3106 | for_each_populated_zone(zone) { | 3177 | for_each_populated_zone(zone) { |
3107 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 3178 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
3108 | unsigned char types[MAX_ORDER]; | 3179 | unsigned char types[MAX_ORDER]; |
3109 | 3180 | ||
3110 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3181 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
@@ -3416,11 +3487,11 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | |||
3416 | static int default_zonelist_order(void) | 3487 | static int default_zonelist_order(void) |
3417 | { | 3488 | { |
3418 | int nid, zone_type; | 3489 | int nid, zone_type; |
3419 | unsigned long low_kmem_size,total_size; | 3490 | unsigned long low_kmem_size, total_size; |
3420 | struct zone *z; | 3491 | struct zone *z; |
3421 | int average_size; | 3492 | int average_size; |
3422 | /* | 3493 | /* |
3423 | * ZONE_DMA and ZONE_DMA32 can be very small area in the system. | 3494 | * ZONE_DMA and ZONE_DMA32 can be very small area in the system. |
3424 | * If they are really small and used heavily, the system can fall | 3495 | * If they are really small and used heavily, the system can fall |
3425 | * into OOM very easily. | 3496 | * into OOM very easily. |
3426 | * This function detect ZONE_DMA/DMA32 size and configures zone order. | 3497 | * This function detect ZONE_DMA/DMA32 size and configures zone order. |
@@ -3452,9 +3523,9 @@ static int default_zonelist_order(void) | |||
3452 | return ZONELIST_ORDER_NODE; | 3523 | return ZONELIST_ORDER_NODE; |
3453 | /* | 3524 | /* |
3454 | * look into each node's config. | 3525 | * look into each node's config. |
3455 | * If there is a node whose DMA/DMA32 memory is very big area on | 3526 | * If there is a node whose DMA/DMA32 memory is very big area on |
3456 | * local memory, NODE_ORDER may be suitable. | 3527 | * local memory, NODE_ORDER may be suitable. |
3457 | */ | 3528 | */ |
3458 | average_size = total_size / | 3529 | average_size = total_size / |
3459 | (nodes_weight(node_states[N_MEMORY]) + 1); | 3530 | (nodes_weight(node_states[N_MEMORY]) + 1); |
3460 | for_each_online_node(nid) { | 3531 | for_each_online_node(nid) { |
@@ -4180,7 +4251,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | |||
4180 | if (!zone->wait_table) | 4251 | if (!zone->wait_table) |
4181 | return -ENOMEM; | 4252 | return -ENOMEM; |
4182 | 4253 | ||
4183 | for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) | 4254 | for (i = 0; i < zone->wait_table_hash_nr_entries; ++i) |
4184 | init_waitqueue_head(zone->wait_table + i); | 4255 | init_waitqueue_head(zone->wait_table + i); |
4185 | 4256 | ||
4186 | return 0; | 4257 | return 0; |
@@ -4237,7 +4308,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, | |||
4237 | int __meminit __early_pfn_to_nid(unsigned long pfn) | 4308 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
4238 | { | 4309 | { |
4239 | unsigned long start_pfn, end_pfn; | 4310 | unsigned long start_pfn, end_pfn; |
4240 | int i, nid; | 4311 | int nid; |
4241 | /* | 4312 | /* |
4242 | * NOTE: The following SMP-unsafe globals are only used early in boot | 4313 | * NOTE: The following SMP-unsafe globals are only used early in boot |
4243 | * when the kernel is running single-threaded. | 4314 | * when the kernel is running single-threaded. |
@@ -4248,15 +4319,14 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) | |||
4248 | if (last_start_pfn <= pfn && pfn < last_end_pfn) | 4319 | if (last_start_pfn <= pfn && pfn < last_end_pfn) |
4249 | return last_nid; | 4320 | return last_nid; |
4250 | 4321 | ||
4251 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | 4322 | nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); |
4252 | if (start_pfn <= pfn && pfn < end_pfn) { | 4323 | if (nid != -1) { |
4253 | last_start_pfn = start_pfn; | 4324 | last_start_pfn = start_pfn; |
4254 | last_end_pfn = end_pfn; | 4325 | last_end_pfn = end_pfn; |
4255 | last_nid = nid; | 4326 | last_nid = nid; |
4256 | return nid; | 4327 | } |
4257 | } | 4328 | |
4258 | /* This is a memory hole */ | 4329 | return nid; |
4259 | return -1; | ||
4260 | } | 4330 | } |
4261 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | 4331 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ |
4262 | 4332 | ||
@@ -4586,7 +4656,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, | |||
4586 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | 4656 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE |
4587 | 4657 | ||
4588 | /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ | 4658 | /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ |
4589 | void __init set_pageblock_order(void) | 4659 | void __paginginit set_pageblock_order(void) |
4590 | { | 4660 | { |
4591 | unsigned int order; | 4661 | unsigned int order; |
4592 | 4662 | ||
@@ -4614,7 +4684,7 @@ void __init set_pageblock_order(void) | |||
4614 | * include/linux/pageblock-flags.h for the values of pageblock_order based on | 4684 | * include/linux/pageblock-flags.h for the values of pageblock_order based on |
4615 | * the kernel config | 4685 | * the kernel config |
4616 | */ | 4686 | */ |
4617 | void __init set_pageblock_order(void) | 4687 | void __paginginit set_pageblock_order(void) |
4618 | { | 4688 | { |
4619 | } | 4689 | } |
4620 | 4690 | ||
@@ -4728,8 +4798,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4728 | spin_lock_init(&zone->lru_lock); | 4798 | spin_lock_init(&zone->lru_lock); |
4729 | zone_seqlock_init(zone); | 4799 | zone_seqlock_init(zone); |
4730 | zone->zone_pgdat = pgdat; | 4800 | zone->zone_pgdat = pgdat; |
4731 | |||
4732 | zone_pcp_init(zone); | 4801 | zone_pcp_init(zone); |
4802 | |||
4803 | /* For bootup, initialized properly in watermark setup */ | ||
4804 | mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); | ||
4805 | |||
4733 | lruvec_init(&zone->lruvec); | 4806 | lruvec_init(&zone->lruvec); |
4734 | if (!size) | 4807 | if (!size) |
4735 | continue; | 4808 | continue; |
@@ -4930,7 +5003,7 @@ static unsigned long __init early_calculate_totalpages(void) | |||
4930 | if (pages) | 5003 | if (pages) |
4931 | node_set_state(nid, N_MEMORY); | 5004 | node_set_state(nid, N_MEMORY); |
4932 | } | 5005 | } |
4933 | return totalpages; | 5006 | return totalpages; |
4934 | } | 5007 | } |
4935 | 5008 | ||
4936 | /* | 5009 | /* |
@@ -5047,7 +5120,7 @@ restart: | |||
5047 | /* | 5120 | /* |
5048 | * Some kernelcore has been met, update counts and | 5121 | * Some kernelcore has been met, update counts and |
5049 | * break if the kernelcore for this node has been | 5122 | * break if the kernelcore for this node has been |
5050 | * satisified | 5123 | * satisfied |
5051 | */ | 5124 | */ |
5052 | required_kernelcore -= min(required_kernelcore, | 5125 | required_kernelcore -= min(required_kernelcore, |
5053 | size_pages); | 5126 | size_pages); |
@@ -5061,7 +5134,7 @@ restart: | |||
5061 | * If there is still required_kernelcore, we do another pass with one | 5134 | * If there is still required_kernelcore, we do another pass with one |
5062 | * less node in the count. This will push zone_movable_pfn[nid] further | 5135 | * less node in the count. This will push zone_movable_pfn[nid] further |
5063 | * along on the nodes that still have memory until kernelcore is | 5136 | * along on the nodes that still have memory until kernelcore is |
5064 | * satisified | 5137 | * satisfied |
5065 | */ | 5138 | */ |
5066 | usable_nodes--; | 5139 | usable_nodes--; |
5067 | if (usable_nodes && required_kernelcore > usable_nodes) | 5140 | if (usable_nodes && required_kernelcore > usable_nodes) |
@@ -5286,8 +5359,10 @@ void __init mem_init_print_info(const char *str) | |||
5286 | * 3) .rodata.* may be embedded into .text or .data sections. | 5359 | * 3) .rodata.* may be embedded into .text or .data sections. |
5287 | */ | 5360 | */ |
5288 | #define adj_init_size(start, end, size, pos, adj) \ | 5361 | #define adj_init_size(start, end, size, pos, adj) \ |
5289 | if (start <= pos && pos < end && size > adj) \ | 5362 | do { \ |
5290 | size -= adj; | 5363 | if (start <= pos && pos < end && size > adj) \ |
5364 | size -= adj; \ | ||
5365 | } while (0) | ||
5291 | 5366 | ||
5292 | adj_init_size(__init_begin, __init_end, init_data_size, | 5367 | adj_init_size(__init_begin, __init_end, init_data_size, |
5293 | _sinittext, init_code_size); | 5368 | _sinittext, init_code_size); |
@@ -5361,7 +5436,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |||
5361 | * This is only okay since the processor is dead and cannot | 5436 | * This is only okay since the processor is dead and cannot |
5362 | * race with what we are doing. | 5437 | * race with what we are doing. |
5363 | */ | 5438 | */ |
5364 | refresh_cpu_vm_stats(cpu); | 5439 | cpu_vm_stats_fold(cpu); |
5365 | } | 5440 | } |
5366 | return NOTIFY_OK; | 5441 | return NOTIFY_OK; |
5367 | } | 5442 | } |
@@ -5498,6 +5573,11 @@ static void __setup_per_zone_wmarks(void) | |||
5498 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); | 5573 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); |
5499 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); | 5574 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); |
5500 | 5575 | ||
5576 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, | ||
5577 | high_wmark_pages(zone) - | ||
5578 | low_wmark_pages(zone) - | ||
5579 | zone_page_state(zone, NR_ALLOC_BATCH)); | ||
5580 | |||
5501 | setup_zone_migrate_reserve(zone); | 5581 | setup_zone_migrate_reserve(zone); |
5502 | spin_unlock_irqrestore(&zone->lock, flags); | 5582 | spin_unlock_irqrestore(&zone->lock, flags); |
5503 | } | 5583 | } |
@@ -5570,7 +5650,7 @@ static void __meminit setup_per_zone_inactive_ratio(void) | |||
5570 | * we want it large (64MB max). But it is not linear, because network | 5650 | * we want it large (64MB max). But it is not linear, because network |
5571 | * bandwidth does not increase linearly with machine size. We use | 5651 | * bandwidth does not increase linearly with machine size. We use |
5572 | * | 5652 | * |
5573 | * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: | 5653 | * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: |
5574 | * min_free_kbytes = sqrt(lowmem_kbytes * 16) | 5654 | * min_free_kbytes = sqrt(lowmem_kbytes * 16) |
5575 | * | 5655 | * |
5576 | * which yields | 5656 | * which yields |
@@ -5614,11 +5694,11 @@ int __meminit init_per_zone_wmark_min(void) | |||
5614 | module_init(init_per_zone_wmark_min) | 5694 | module_init(init_per_zone_wmark_min) |
5615 | 5695 | ||
5616 | /* | 5696 | /* |
5617 | * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so | 5697 | * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so |
5618 | * that we can call two helper functions whenever min_free_kbytes | 5698 | * that we can call two helper functions whenever min_free_kbytes |
5619 | * changes. | 5699 | * changes. |
5620 | */ | 5700 | */ |
5621 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | 5701 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, |
5622 | void __user *buffer, size_t *length, loff_t *ppos) | 5702 | void __user *buffer, size_t *length, loff_t *ppos) |
5623 | { | 5703 | { |
5624 | proc_dointvec(table, write, buffer, length, ppos); | 5704 | proc_dointvec(table, write, buffer, length, ppos); |
@@ -5682,8 +5762,8 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
5682 | 5762 | ||
5683 | /* | 5763 | /* |
5684 | * percpu_pagelist_fraction - changes the pcp->high for each zone on each | 5764 | * percpu_pagelist_fraction - changes the pcp->high for each zone on each |
5685 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist | 5765 | * cpu. It is the fraction of total pages in each zone that a hot per cpu |
5686 | * can have before it gets flushed back to buddy allocator. | 5766 | * pagelist can have before it gets flushed back to buddy allocator. |
5687 | */ | 5767 | */ |
5688 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | 5768 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, |
5689 | void __user *buffer, size_t *length, loff_t *ppos) | 5769 | void __user *buffer, size_t *length, loff_t *ppos) |
@@ -5745,9 +5825,10 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
5745 | if (!numentries) { | 5825 | if (!numentries) { |
5746 | /* round applicable memory size up to nearest megabyte */ | 5826 | /* round applicable memory size up to nearest megabyte */ |
5747 | numentries = nr_kernel_pages; | 5827 | numentries = nr_kernel_pages; |
5748 | numentries += (1UL << (20 - PAGE_SHIFT)) - 1; | 5828 | |
5749 | numentries >>= 20 - PAGE_SHIFT; | 5829 | /* It isn't necessary when PAGE_SIZE >= 1MB */ |
5750 | numentries <<= 20 - PAGE_SHIFT; | 5830 | if (PAGE_SHIFT < 20) |
5831 | numentries = round_up(numentries, (1<<20)/PAGE_SIZE); | ||
5751 | 5832 | ||
5752 | /* limit to 1 bucket per 2^scale bytes of low memory */ | 5833 | /* limit to 1 bucket per 2^scale bytes of low memory */ |
5753 | if (scale > PAGE_SHIFT) | 5834 | if (scale > PAGE_SHIFT) |
@@ -5900,7 +5981,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5900 | * This function checks whether pageblock includes unmovable pages or not. | 5981 | * This function checks whether pageblock includes unmovable pages or not. |
5901 | * If @count is not zero, it is okay to include less @count unmovable pages | 5982 | * If @count is not zero, it is okay to include less @count unmovable pages |
5902 | * | 5983 | * |
5903 | * PageLRU check wihtout isolation or lru_lock could race so that | 5984 | * PageLRU check without isolation or lru_lock could race so that |
5904 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't | 5985 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't |
5905 | * expect this function should be exact. | 5986 | * expect this function should be exact. |
5906 | */ | 5987 | */ |
@@ -5928,6 +6009,17 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
5928 | continue; | 6009 | continue; |
5929 | 6010 | ||
5930 | page = pfn_to_page(check); | 6011 | page = pfn_to_page(check); |
6012 | |||
6013 | /* | ||
6014 | * Hugepages are not in LRU lists, but they're movable. | ||
6015 | * We need not scan over tail pages bacause we don't | ||
6016 | * handle each tail page individually in migration. | ||
6017 | */ | ||
6018 | if (PageHuge(page)) { | ||
6019 | iter = round_up(iter + 1, 1<<compound_order(page)) - 1; | ||
6020 | continue; | ||
6021 | } | ||
6022 | |||
5931 | /* | 6023 | /* |
5932 | * We can't use page_count without pin a page | 6024 | * We can't use page_count without pin a page |
5933 | * because another CPU can free compound page. | 6025 | * because another CPU can free compound page. |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 0cee10ffb98d..d1473b2e9481 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/page-isolation.h> | 6 | #include <linux/page-isolation.h> |
7 | #include <linux/pageblock-flags.h> | 7 | #include <linux/pageblock-flags.h> |
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/hugetlb.h> | ||
9 | #include "internal.h" | 10 | #include "internal.h" |
10 | 11 | ||
11 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) | 12 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) |
@@ -252,6 +253,19 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, | |||
252 | { | 253 | { |
253 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; | 254 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; |
254 | 255 | ||
256 | /* | ||
257 | * TODO: allocate a destination hugepage from a nearest neighbor node, | ||
258 | * accordance with memory policy of the user process if possible. For | ||
259 | * now as a simple work-around, we use the next node for destination. | ||
260 | */ | ||
261 | if (PageHuge(page)) { | ||
262 | nodemask_t src = nodemask_of_node(page_to_nid(page)); | ||
263 | nodemask_t dst; | ||
264 | nodes_complement(dst, src); | ||
265 | return alloc_huge_page_node(page_hstate(compound_head(page)), | ||
266 | next_node(page_to_nid(page), dst)); | ||
267 | } | ||
268 | |||
255 | if (PageHighMem(page)) | 269 | if (PageHighMem(page)) |
256 | gfp_mask |= __GFP_HIGHMEM; | 270 | gfp_mask |= __GFP_HIGHMEM; |
257 | 271 | ||
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index e1a6e4fab016..3929a40bd6c0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -10,6 +10,30 @@ | |||
10 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
11 | #include <asm-generic/pgtable.h> | 11 | #include <asm-generic/pgtable.h> |
12 | 12 | ||
13 | /* | ||
14 | * If a p?d_bad entry is found while walking page tables, report | ||
15 | * the error, before resetting entry to p?d_none. Usually (but | ||
16 | * very seldom) called out from the p?d_none_or_clear_bad macros. | ||
17 | */ | ||
18 | |||
19 | void pgd_clear_bad(pgd_t *pgd) | ||
20 | { | ||
21 | pgd_ERROR(*pgd); | ||
22 | pgd_clear(pgd); | ||
23 | } | ||
24 | |||
25 | void pud_clear_bad(pud_t *pud) | ||
26 | { | ||
27 | pud_ERROR(*pud); | ||
28 | pud_clear(pud); | ||
29 | } | ||
30 | |||
31 | void pmd_clear_bad(pmd_t *pmd) | ||
32 | { | ||
33 | pmd_ERROR(*pmd); | ||
34 | pmd_clear(pmd); | ||
35 | } | ||
36 | |||
13 | #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | 37 | #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
14 | /* | 38 | /* |
15 | * Only sets the access flags (dirty, accessed), as well as write | 39 | * Only sets the access flags (dirty, accessed), as well as write |
diff --git a/mm/readahead.c b/mm/readahead.c index 829a77c62834..e4ed04149785 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -371,10 +371,10 @@ static int try_context_readahead(struct address_space *mapping, | |||
371 | size = count_history_pages(mapping, ra, offset, max); | 371 | size = count_history_pages(mapping, ra, offset, max); |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * no history pages: | 374 | * not enough history pages: |
375 | * it could be a random read | 375 | * it could be a random read |
376 | */ | 376 | */ |
377 | if (!size) | 377 | if (size <= req_size) |
378 | return 0; | 378 | return 0; |
379 | 379 | ||
380 | /* | 380 | /* |
@@ -385,8 +385,8 @@ static int try_context_readahead(struct address_space *mapping, | |||
385 | size *= 2; | 385 | size *= 2; |
386 | 386 | ||
387 | ra->start = offset; | 387 | ra->start = offset; |
388 | ra->size = get_init_ra_size(size + req_size, max); | 388 | ra->size = min(size + req_size, max); |
389 | ra->async_size = ra->size; | 389 | ra->async_size = 1; |
390 | 390 | ||
391 | return 1; | 391 | return 1; |
392 | } | 392 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index 526149846d0a..8297623fcaed 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1205,7 +1205,7 @@ repeat: | |||
1205 | gfp & GFP_RECLAIM_MASK); | 1205 | gfp & GFP_RECLAIM_MASK); |
1206 | if (error) | 1206 | if (error) |
1207 | goto decused; | 1207 | goto decused; |
1208 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | 1208 | error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); |
1209 | if (!error) { | 1209 | if (!error) { |
1210 | error = shmem_add_to_page_cache(page, mapping, index, | 1210 | error = shmem_add_to_page_cache(page, mapping, index, |
1211 | gfp, NULL); | 1211 | gfp, NULL); |
@@ -2819,6 +2819,10 @@ int __init shmem_init(void) | |||
2819 | { | 2819 | { |
2820 | int error; | 2820 | int error; |
2821 | 2821 | ||
2822 | /* If rootfs called this, don't re-init */ | ||
2823 | if (shmem_inode_cachep) | ||
2824 | return 0; | ||
2825 | |||
2822 | error = bdi_init(&shmem_backing_dev_info); | 2826 | error = bdi_init(&shmem_backing_dev_info); |
2823 | if (error) | 2827 | if (error) |
2824 | goto out4; | 2828 | goto out4; |
@@ -4420,7 +4420,7 @@ static ssize_t order_store(struct kmem_cache *s, | |||
4420 | unsigned long order; | 4420 | unsigned long order; |
4421 | int err; | 4421 | int err; |
4422 | 4422 | ||
4423 | err = strict_strtoul(buf, 10, &order); | 4423 | err = kstrtoul(buf, 10, &order); |
4424 | if (err) | 4424 | if (err) |
4425 | return err; | 4425 | return err; |
4426 | 4426 | ||
@@ -4448,7 +4448,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, | |||
4448 | unsigned long min; | 4448 | unsigned long min; |
4449 | int err; | 4449 | int err; |
4450 | 4450 | ||
4451 | err = strict_strtoul(buf, 10, &min); | 4451 | err = kstrtoul(buf, 10, &min); |
4452 | if (err) | 4452 | if (err) |
4453 | return err; | 4453 | return err; |
4454 | 4454 | ||
@@ -4468,7 +4468,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, | |||
4468 | unsigned long objects; | 4468 | unsigned long objects; |
4469 | int err; | 4469 | int err; |
4470 | 4470 | ||
4471 | err = strict_strtoul(buf, 10, &objects); | 4471 | err = kstrtoul(buf, 10, &objects); |
4472 | if (err) | 4472 | if (err) |
4473 | return err; | 4473 | return err; |
4474 | if (objects && !kmem_cache_has_cpu_partial(s)) | 4474 | if (objects && !kmem_cache_has_cpu_partial(s)) |
@@ -4784,7 +4784,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | |||
4784 | unsigned long ratio; | 4784 | unsigned long ratio; |
4785 | int err; | 4785 | int err; |
4786 | 4786 | ||
4787 | err = strict_strtoul(buf, 10, &ratio); | 4787 | err = kstrtoul(buf, 10, &ratio); |
4788 | if (err) | 4788 | if (err) |
4789 | return err; | 4789 | return err; |
4790 | 4790 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 308d50331bc3..4ac1d7ef548f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -339,13 +339,14 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
339 | } | 339 | } |
340 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 340 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
341 | 341 | ||
342 | static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, | 342 | static void __init sparse_early_usemaps_alloc_node(void *data, |
343 | unsigned long pnum_begin, | 343 | unsigned long pnum_begin, |
344 | unsigned long pnum_end, | 344 | unsigned long pnum_end, |
345 | unsigned long usemap_count, int nodeid) | 345 | unsigned long usemap_count, int nodeid) |
346 | { | 346 | { |
347 | void *usemap; | 347 | void *usemap; |
348 | unsigned long pnum; | 348 | unsigned long pnum; |
349 | unsigned long **usemap_map = (unsigned long **)data; | ||
349 | int size = usemap_size(); | 350 | int size = usemap_size(); |
350 | 351 | ||
351 | usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), | 352 | usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), |
@@ -430,11 +431,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, | |||
430 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 431 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
431 | 432 | ||
432 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | 433 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER |
433 | static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, | 434 | static void __init sparse_early_mem_maps_alloc_node(void *data, |
434 | unsigned long pnum_begin, | 435 | unsigned long pnum_begin, |
435 | unsigned long pnum_end, | 436 | unsigned long pnum_end, |
436 | unsigned long map_count, int nodeid) | 437 | unsigned long map_count, int nodeid) |
437 | { | 438 | { |
439 | struct page **map_map = (struct page **)data; | ||
438 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, | 440 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, |
439 | map_count, nodeid); | 441 | map_count, nodeid); |
440 | } | 442 | } |
@@ -460,6 +462,55 @@ void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | |||
460 | { | 462 | { |
461 | } | 463 | } |
462 | 464 | ||
465 | /** | ||
466 | * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap | ||
467 | * @map: usemap_map for pageblock flags or mmap_map for vmemmap | ||
468 | */ | ||
469 | static void __init alloc_usemap_and_memmap(void (*alloc_func) | ||
470 | (void *, unsigned long, unsigned long, | ||
471 | unsigned long, int), void *data) | ||
472 | { | ||
473 | unsigned long pnum; | ||
474 | unsigned long map_count; | ||
475 | int nodeid_begin = 0; | ||
476 | unsigned long pnum_begin = 0; | ||
477 | |||
478 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
479 | struct mem_section *ms; | ||
480 | |||
481 | if (!present_section_nr(pnum)) | ||
482 | continue; | ||
483 | ms = __nr_to_section(pnum); | ||
484 | nodeid_begin = sparse_early_nid(ms); | ||
485 | pnum_begin = pnum; | ||
486 | break; | ||
487 | } | ||
488 | map_count = 1; | ||
489 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
490 | struct mem_section *ms; | ||
491 | int nodeid; | ||
492 | |||
493 | if (!present_section_nr(pnum)) | ||
494 | continue; | ||
495 | ms = __nr_to_section(pnum); | ||
496 | nodeid = sparse_early_nid(ms); | ||
497 | if (nodeid == nodeid_begin) { | ||
498 | map_count++; | ||
499 | continue; | ||
500 | } | ||
501 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
502 | alloc_func(data, pnum_begin, pnum, | ||
503 | map_count, nodeid_begin); | ||
504 | /* new start, update count etc*/ | ||
505 | nodeid_begin = nodeid; | ||
506 | pnum_begin = pnum; | ||
507 | map_count = 1; | ||
508 | } | ||
509 | /* ok, last chunk */ | ||
510 | alloc_func(data, pnum_begin, NR_MEM_SECTIONS, | ||
511 | map_count, nodeid_begin); | ||
512 | } | ||
513 | |||
463 | /* | 514 | /* |
464 | * Allocate the accumulated non-linear sections, allocate a mem_map | 515 | * Allocate the accumulated non-linear sections, allocate a mem_map |
465 | * for each and record the physical to section mapping. | 516 | * for each and record the physical to section mapping. |
@@ -471,11 +522,7 @@ void __init sparse_init(void) | |||
471 | unsigned long *usemap; | 522 | unsigned long *usemap; |
472 | unsigned long **usemap_map; | 523 | unsigned long **usemap_map; |
473 | int size; | 524 | int size; |
474 | int nodeid_begin = 0; | ||
475 | unsigned long pnum_begin = 0; | ||
476 | unsigned long usemap_count; | ||
477 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | 525 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER |
478 | unsigned long map_count; | ||
479 | int size2; | 526 | int size2; |
480 | struct page **map_map; | 527 | struct page **map_map; |
481 | #endif | 528 | #endif |
@@ -501,82 +548,16 @@ void __init sparse_init(void) | |||
501 | usemap_map = alloc_bootmem(size); | 548 | usemap_map = alloc_bootmem(size); |
502 | if (!usemap_map) | 549 | if (!usemap_map) |
503 | panic("can not allocate usemap_map\n"); | 550 | panic("can not allocate usemap_map\n"); |
504 | 551 | alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, | |
505 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 552 | (void *)usemap_map); |
506 | struct mem_section *ms; | ||
507 | |||
508 | if (!present_section_nr(pnum)) | ||
509 | continue; | ||
510 | ms = __nr_to_section(pnum); | ||
511 | nodeid_begin = sparse_early_nid(ms); | ||
512 | pnum_begin = pnum; | ||
513 | break; | ||
514 | } | ||
515 | usemap_count = 1; | ||
516 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
517 | struct mem_section *ms; | ||
518 | int nodeid; | ||
519 | |||
520 | if (!present_section_nr(pnum)) | ||
521 | continue; | ||
522 | ms = __nr_to_section(pnum); | ||
523 | nodeid = sparse_early_nid(ms); | ||
524 | if (nodeid == nodeid_begin) { | ||
525 | usemap_count++; | ||
526 | continue; | ||
527 | } | ||
528 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
529 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, | ||
530 | usemap_count, nodeid_begin); | ||
531 | /* new start, update count etc*/ | ||
532 | nodeid_begin = nodeid; | ||
533 | pnum_begin = pnum; | ||
534 | usemap_count = 1; | ||
535 | } | ||
536 | /* ok, last chunk */ | ||
537 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, | ||
538 | usemap_count, nodeid_begin); | ||
539 | 553 | ||
540 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | 554 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER |
541 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; | 555 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; |
542 | map_map = alloc_bootmem(size2); | 556 | map_map = alloc_bootmem(size2); |
543 | if (!map_map) | 557 | if (!map_map) |
544 | panic("can not allocate map_map\n"); | 558 | panic("can not allocate map_map\n"); |
545 | 559 | alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node, | |
546 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 560 | (void *)map_map); |
547 | struct mem_section *ms; | ||
548 | |||
549 | if (!present_section_nr(pnum)) | ||
550 | continue; | ||
551 | ms = __nr_to_section(pnum); | ||
552 | nodeid_begin = sparse_early_nid(ms); | ||
553 | pnum_begin = pnum; | ||
554 | break; | ||
555 | } | ||
556 | map_count = 1; | ||
557 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
558 | struct mem_section *ms; | ||
559 | int nodeid; | ||
560 | |||
561 | if (!present_section_nr(pnum)) | ||
562 | continue; | ||
563 | ms = __nr_to_section(pnum); | ||
564 | nodeid = sparse_early_nid(ms); | ||
565 | if (nodeid == nodeid_begin) { | ||
566 | map_count++; | ||
567 | continue; | ||
568 | } | ||
569 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
570 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, | ||
571 | map_count, nodeid_begin); | ||
572 | /* new start, update count etc*/ | ||
573 | nodeid_begin = nodeid; | ||
574 | pnum_begin = pnum; | ||
575 | map_count = 1; | ||
576 | } | ||
577 | /* ok, last chunk */ | ||
578 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, | ||
579 | map_count, nodeid_begin); | ||
580 | #endif | 561 | #endif |
581 | 562 | ||
582 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 563 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/uio.h> | 33 | #include <linux/uio.h> |
34 | #include <linux/hugetlb.h> | ||
34 | 35 | ||
35 | #include "internal.h" | 36 | #include "internal.h" |
36 | 37 | ||
@@ -81,6 +82,19 @@ static void __put_compound_page(struct page *page) | |||
81 | 82 | ||
82 | static void put_compound_page(struct page *page) | 83 | static void put_compound_page(struct page *page) |
83 | { | 84 | { |
85 | /* | ||
86 | * hugetlbfs pages cannot be split from under us. If this is a | ||
87 | * hugetlbfs page, check refcount on head page and release the page if | ||
88 | * the refcount becomes zero. | ||
89 | */ | ||
90 | if (PageHuge(page)) { | ||
91 | page = compound_head(page); | ||
92 | if (put_page_testzero(page)) | ||
93 | __put_compound_page(page); | ||
94 | |||
95 | return; | ||
96 | } | ||
97 | |||
84 | if (unlikely(PageTail(page))) { | 98 | if (unlikely(PageTail(page))) { |
85 | /* __split_huge_page_refcount can run under us */ | 99 | /* __split_huge_page_refcount can run under us */ |
86 | struct page *page_head = compound_trans_head(page); | 100 | struct page *page_head = compound_trans_head(page); |
@@ -184,38 +198,51 @@ bool __get_page_tail(struct page *page) | |||
184 | * proper PT lock that already serializes against | 198 | * proper PT lock that already serializes against |
185 | * split_huge_page(). | 199 | * split_huge_page(). |
186 | */ | 200 | */ |
187 | unsigned long flags; | ||
188 | bool got = false; | 201 | bool got = false; |
189 | struct page *page_head = compound_trans_head(page); | 202 | struct page *page_head; |
190 | 203 | ||
191 | if (likely(page != page_head && get_page_unless_zero(page_head))) { | 204 | /* |
205 | * If this is a hugetlbfs page it cannot be split under us. Simply | ||
206 | * increment refcount for the head page. | ||
207 | */ | ||
208 | if (PageHuge(page)) { | ||
209 | page_head = compound_head(page); | ||
210 | atomic_inc(&page_head->_count); | ||
211 | got = true; | ||
212 | } else { | ||
213 | unsigned long flags; | ||
214 | |||
215 | page_head = compound_trans_head(page); | ||
216 | if (likely(page != page_head && | ||
217 | get_page_unless_zero(page_head))) { | ||
218 | |||
219 | /* Ref to put_compound_page() comment. */ | ||
220 | if (PageSlab(page_head)) { | ||
221 | if (likely(PageTail(page))) { | ||
222 | __get_page_tail_foll(page, false); | ||
223 | return true; | ||
224 | } else { | ||
225 | put_page(page_head); | ||
226 | return false; | ||
227 | } | ||
228 | } | ||
192 | 229 | ||
193 | /* Ref to put_compound_page() comment. */ | 230 | /* |
194 | if (PageSlab(page_head)) { | 231 | * page_head wasn't a dangling pointer but it |
232 | * may not be a head page anymore by the time | ||
233 | * we obtain the lock. That is ok as long as it | ||
234 | * can't be freed from under us. | ||
235 | */ | ||
236 | flags = compound_lock_irqsave(page_head); | ||
237 | /* here __split_huge_page_refcount won't run anymore */ | ||
195 | if (likely(PageTail(page))) { | 238 | if (likely(PageTail(page))) { |
196 | __get_page_tail_foll(page, false); | 239 | __get_page_tail_foll(page, false); |
197 | return true; | 240 | got = true; |
198 | } else { | ||
199 | put_page(page_head); | ||
200 | return false; | ||
201 | } | 241 | } |
242 | compound_unlock_irqrestore(page_head, flags); | ||
243 | if (unlikely(!got)) | ||
244 | put_page(page_head); | ||
202 | } | 245 | } |
203 | |||
204 | /* | ||
205 | * page_head wasn't a dangling pointer but it | ||
206 | * may not be a head page anymore by the time | ||
207 | * we obtain the lock. That is ok as long as it | ||
208 | * can't be freed from under us. | ||
209 | */ | ||
210 | flags = compound_lock_irqsave(page_head); | ||
211 | /* here __split_huge_page_refcount won't run anymore */ | ||
212 | if (likely(PageTail(page))) { | ||
213 | __get_page_tail_foll(page, false); | ||
214 | got = true; | ||
215 | } | ||
216 | compound_unlock_irqrestore(page_head, flags); | ||
217 | if (unlikely(!got)) | ||
218 | put_page(page_head); | ||
219 | } | 246 | } |
220 | return got; | 247 | return got; |
221 | } | 248 | } |
diff --git a/mm/swap_state.c b/mm/swap_state.c index f24ab0dff554..e6f15f8ca2af 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -122,7 +122,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
122 | { | 122 | { |
123 | int error; | 123 | int error; |
124 | 124 | ||
125 | error = radix_tree_preload(gfp_mask); | 125 | error = radix_tree_maybe_preload(gfp_mask); |
126 | if (!error) { | 126 | if (!error) { |
127 | error = __add_to_swap_cache(page, entry); | 127 | error = __add_to_swap_cache(page, entry); |
128 | radix_tree_preload_end(); | 128 | radix_tree_preload_end(); |
@@ -328,7 +328,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
328 | /* | 328 | /* |
329 | * call radix_tree_preload() while we can wait. | 329 | * call radix_tree_preload() while we can wait. |
330 | */ | 330 | */ |
331 | err = radix_tree_preload(gfp_mask & GFP_KERNEL); | 331 | err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL); |
332 | if (err) | 332 | if (err) |
333 | break; | 333 | break; |
334 | 334 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6cf2e60983b7..3963fc24fcc1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -175,14 +175,296 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
175 | } | 175 | } |
176 | } | 176 | } |
177 | 177 | ||
178 | static int wait_for_discard(void *word) | 178 | #define SWAPFILE_CLUSTER 256 |
179 | #define LATENCY_LIMIT 256 | ||
180 | |||
181 | static inline void cluster_set_flag(struct swap_cluster_info *info, | ||
182 | unsigned int flag) | ||
179 | { | 183 | { |
180 | schedule(); | 184 | info->flags = flag; |
181 | return 0; | ||
182 | } | 185 | } |
183 | 186 | ||
184 | #define SWAPFILE_CLUSTER 256 | 187 | static inline unsigned int cluster_count(struct swap_cluster_info *info) |
185 | #define LATENCY_LIMIT 256 | 188 | { |
189 | return info->data; | ||
190 | } | ||
191 | |||
192 | static inline void cluster_set_count(struct swap_cluster_info *info, | ||
193 | unsigned int c) | ||
194 | { | ||
195 | info->data = c; | ||
196 | } | ||
197 | |||
198 | static inline void cluster_set_count_flag(struct swap_cluster_info *info, | ||
199 | unsigned int c, unsigned int f) | ||
200 | { | ||
201 | info->flags = f; | ||
202 | info->data = c; | ||
203 | } | ||
204 | |||
205 | static inline unsigned int cluster_next(struct swap_cluster_info *info) | ||
206 | { | ||
207 | return info->data; | ||
208 | } | ||
209 | |||
210 | static inline void cluster_set_next(struct swap_cluster_info *info, | ||
211 | unsigned int n) | ||
212 | { | ||
213 | info->data = n; | ||
214 | } | ||
215 | |||
216 | static inline void cluster_set_next_flag(struct swap_cluster_info *info, | ||
217 | unsigned int n, unsigned int f) | ||
218 | { | ||
219 | info->flags = f; | ||
220 | info->data = n; | ||
221 | } | ||
222 | |||
223 | static inline bool cluster_is_free(struct swap_cluster_info *info) | ||
224 | { | ||
225 | return info->flags & CLUSTER_FLAG_FREE; | ||
226 | } | ||
227 | |||
228 | static inline bool cluster_is_null(struct swap_cluster_info *info) | ||
229 | { | ||
230 | return info->flags & CLUSTER_FLAG_NEXT_NULL; | ||
231 | } | ||
232 | |||
233 | static inline void cluster_set_null(struct swap_cluster_info *info) | ||
234 | { | ||
235 | info->flags = CLUSTER_FLAG_NEXT_NULL; | ||
236 | info->data = 0; | ||
237 | } | ||
238 | |||
239 | /* Add a cluster to discard list and schedule it to do discard */ | ||
240 | static void swap_cluster_schedule_discard(struct swap_info_struct *si, | ||
241 | unsigned int idx) | ||
242 | { | ||
243 | /* | ||
244 | * If scan_swap_map() can't find a free cluster, it will check | ||
245 | * si->swap_map directly. To make sure the discarding cluster isn't | ||
246 | * taken by scan_swap_map(), mark the swap entries bad (occupied). It | ||
247 | * will be cleared after discard | ||
248 | */ | ||
249 | memset(si->swap_map + idx * SWAPFILE_CLUSTER, | ||
250 | SWAP_MAP_BAD, SWAPFILE_CLUSTER); | ||
251 | |||
252 | if (cluster_is_null(&si->discard_cluster_head)) { | ||
253 | cluster_set_next_flag(&si->discard_cluster_head, | ||
254 | idx, 0); | ||
255 | cluster_set_next_flag(&si->discard_cluster_tail, | ||
256 | idx, 0); | ||
257 | } else { | ||
258 | unsigned int tail = cluster_next(&si->discard_cluster_tail); | ||
259 | cluster_set_next(&si->cluster_info[tail], idx); | ||
260 | cluster_set_next_flag(&si->discard_cluster_tail, | ||
261 | idx, 0); | ||
262 | } | ||
263 | |||
264 | schedule_work(&si->discard_work); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * Doing discard actually. After a cluster discard is finished, the cluster | ||
269 | * will be added to free cluster list. caller should hold si->lock. | ||
270 | */ | ||
271 | static void swap_do_scheduled_discard(struct swap_info_struct *si) | ||
272 | { | ||
273 | struct swap_cluster_info *info; | ||
274 | unsigned int idx; | ||
275 | |||
276 | info = si->cluster_info; | ||
277 | |||
278 | while (!cluster_is_null(&si->discard_cluster_head)) { | ||
279 | idx = cluster_next(&si->discard_cluster_head); | ||
280 | |||
281 | cluster_set_next_flag(&si->discard_cluster_head, | ||
282 | cluster_next(&info[idx]), 0); | ||
283 | if (cluster_next(&si->discard_cluster_tail) == idx) { | ||
284 | cluster_set_null(&si->discard_cluster_head); | ||
285 | cluster_set_null(&si->discard_cluster_tail); | ||
286 | } | ||
287 | spin_unlock(&si->lock); | ||
288 | |||
289 | discard_swap_cluster(si, idx * SWAPFILE_CLUSTER, | ||
290 | SWAPFILE_CLUSTER); | ||
291 | |||
292 | spin_lock(&si->lock); | ||
293 | cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE); | ||
294 | if (cluster_is_null(&si->free_cluster_head)) { | ||
295 | cluster_set_next_flag(&si->free_cluster_head, | ||
296 | idx, 0); | ||
297 | cluster_set_next_flag(&si->free_cluster_tail, | ||
298 | idx, 0); | ||
299 | } else { | ||
300 | unsigned int tail; | ||
301 | |||
302 | tail = cluster_next(&si->free_cluster_tail); | ||
303 | cluster_set_next(&info[tail], idx); | ||
304 | cluster_set_next_flag(&si->free_cluster_tail, | ||
305 | idx, 0); | ||
306 | } | ||
307 | memset(si->swap_map + idx * SWAPFILE_CLUSTER, | ||
308 | 0, SWAPFILE_CLUSTER); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | static void swap_discard_work(struct work_struct *work) | ||
313 | { | ||
314 | struct swap_info_struct *si; | ||
315 | |||
316 | si = container_of(work, struct swap_info_struct, discard_work); | ||
317 | |||
318 | spin_lock(&si->lock); | ||
319 | swap_do_scheduled_discard(si); | ||
320 | spin_unlock(&si->lock); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * The cluster corresponding to page_nr will be used. The cluster will be | ||
325 | * removed from free cluster list and its usage counter will be increased. | ||
326 | */ | ||
327 | static void inc_cluster_info_page(struct swap_info_struct *p, | ||
328 | struct swap_cluster_info *cluster_info, unsigned long page_nr) | ||
329 | { | ||
330 | unsigned long idx = page_nr / SWAPFILE_CLUSTER; | ||
331 | |||
332 | if (!cluster_info) | ||
333 | return; | ||
334 | if (cluster_is_free(&cluster_info[idx])) { | ||
335 | VM_BUG_ON(cluster_next(&p->free_cluster_head) != idx); | ||
336 | cluster_set_next_flag(&p->free_cluster_head, | ||
337 | cluster_next(&cluster_info[idx]), 0); | ||
338 | if (cluster_next(&p->free_cluster_tail) == idx) { | ||
339 | cluster_set_null(&p->free_cluster_tail); | ||
340 | cluster_set_null(&p->free_cluster_head); | ||
341 | } | ||
342 | cluster_set_count_flag(&cluster_info[idx], 0, 0); | ||
343 | } | ||
344 | |||
345 | VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER); | ||
346 | cluster_set_count(&cluster_info[idx], | ||
347 | cluster_count(&cluster_info[idx]) + 1); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * The cluster corresponding to page_nr decreases one usage. If the usage | ||
352 | * counter becomes 0, which means no page in the cluster is in using, we can | ||
353 | * optionally discard the cluster and add it to free cluster list. | ||
354 | */ | ||
355 | static void dec_cluster_info_page(struct swap_info_struct *p, | ||
356 | struct swap_cluster_info *cluster_info, unsigned long page_nr) | ||
357 | { | ||
358 | unsigned long idx = page_nr / SWAPFILE_CLUSTER; | ||
359 | |||
360 | if (!cluster_info) | ||
361 | return; | ||
362 | |||
363 | VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0); | ||
364 | cluster_set_count(&cluster_info[idx], | ||
365 | cluster_count(&cluster_info[idx]) - 1); | ||
366 | |||
367 | if (cluster_count(&cluster_info[idx]) == 0) { | ||
368 | /* | ||
369 | * If the swap is discardable, prepare discard the cluster | ||
370 | * instead of free it immediately. The cluster will be freed | ||
371 | * after discard. | ||
372 | */ | ||
373 | if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == | ||
374 | (SWP_WRITEOK | SWP_PAGE_DISCARD)) { | ||
375 | swap_cluster_schedule_discard(p, idx); | ||
376 | return; | ||
377 | } | ||
378 | |||
379 | cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); | ||
380 | if (cluster_is_null(&p->free_cluster_head)) { | ||
381 | cluster_set_next_flag(&p->free_cluster_head, idx, 0); | ||
382 | cluster_set_next_flag(&p->free_cluster_tail, idx, 0); | ||
383 | } else { | ||
384 | unsigned int tail = cluster_next(&p->free_cluster_tail); | ||
385 | cluster_set_next(&cluster_info[tail], idx); | ||
386 | cluster_set_next_flag(&p->free_cluster_tail, idx, 0); | ||
387 | } | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * It's possible scan_swap_map() uses a free cluster in the middle of free | ||
393 | * cluster list. Avoiding such abuse to avoid list corruption. | ||
394 | */ | ||
395 | static bool | ||
396 | scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, | ||
397 | unsigned long offset) | ||
398 | { | ||
399 | struct percpu_cluster *percpu_cluster; | ||
400 | bool conflict; | ||
401 | |||
402 | offset /= SWAPFILE_CLUSTER; | ||
403 | conflict = !cluster_is_null(&si->free_cluster_head) && | ||
404 | offset != cluster_next(&si->free_cluster_head) && | ||
405 | cluster_is_free(&si->cluster_info[offset]); | ||
406 | |||
407 | if (!conflict) | ||
408 | return false; | ||
409 | |||
410 | percpu_cluster = this_cpu_ptr(si->percpu_cluster); | ||
411 | cluster_set_null(&percpu_cluster->index); | ||
412 | return true; | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * Try to get a swap entry from current cpu's swap entry pool (a cluster). This | ||
417 | * might involve allocating a new cluster for current CPU too. | ||
418 | */ | ||
419 | static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, | ||
420 | unsigned long *offset, unsigned long *scan_base) | ||
421 | { | ||
422 | struct percpu_cluster *cluster; | ||
423 | bool found_free; | ||
424 | unsigned long tmp; | ||
425 | |||
426 | new_cluster: | ||
427 | cluster = this_cpu_ptr(si->percpu_cluster); | ||
428 | if (cluster_is_null(&cluster->index)) { | ||
429 | if (!cluster_is_null(&si->free_cluster_head)) { | ||
430 | cluster->index = si->free_cluster_head; | ||
431 | cluster->next = cluster_next(&cluster->index) * | ||
432 | SWAPFILE_CLUSTER; | ||
433 | } else if (!cluster_is_null(&si->discard_cluster_head)) { | ||
434 | /* | ||
435 | * we don't have free cluster but have some clusters in | ||
436 | * discarding, do discard now and reclaim them | ||
437 | */ | ||
438 | swap_do_scheduled_discard(si); | ||
439 | *scan_base = *offset = si->cluster_next; | ||
440 | goto new_cluster; | ||
441 | } else | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | found_free = false; | ||
446 | |||
447 | /* | ||
448 | * Other CPUs can use our cluster if they can't find a free cluster, | ||
449 | * check if there is still free entry in the cluster | ||
450 | */ | ||
451 | tmp = cluster->next; | ||
452 | while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) * | ||
453 | SWAPFILE_CLUSTER) { | ||
454 | if (!si->swap_map[tmp]) { | ||
455 | found_free = true; | ||
456 | break; | ||
457 | } | ||
458 | tmp++; | ||
459 | } | ||
460 | if (!found_free) { | ||
461 | cluster_set_null(&cluster->index); | ||
462 | goto new_cluster; | ||
463 | } | ||
464 | cluster->next = tmp + 1; | ||
465 | *offset = tmp; | ||
466 | *scan_base = tmp; | ||
467 | } | ||
186 | 468 | ||
187 | static unsigned long scan_swap_map(struct swap_info_struct *si, | 469 | static unsigned long scan_swap_map(struct swap_info_struct *si, |
188 | unsigned char usage) | 470 | unsigned char usage) |
@@ -191,7 +473,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
191 | unsigned long scan_base; | 473 | unsigned long scan_base; |
192 | unsigned long last_in_cluster = 0; | 474 | unsigned long last_in_cluster = 0; |
193 | int latency_ration = LATENCY_LIMIT; | 475 | int latency_ration = LATENCY_LIMIT; |
194 | int found_free_cluster = 0; | ||
195 | 476 | ||
196 | /* | 477 | /* |
197 | * We try to cluster swap pages by allocating them sequentially | 478 | * We try to cluster swap pages by allocating them sequentially |
@@ -207,24 +488,18 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
207 | si->flags += SWP_SCANNING; | 488 | si->flags += SWP_SCANNING; |
208 | scan_base = offset = si->cluster_next; | 489 | scan_base = offset = si->cluster_next; |
209 | 490 | ||
491 | /* SSD algorithm */ | ||
492 | if (si->cluster_info) { | ||
493 | scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); | ||
494 | goto checks; | ||
495 | } | ||
496 | |||
210 | if (unlikely(!si->cluster_nr--)) { | 497 | if (unlikely(!si->cluster_nr--)) { |
211 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { | 498 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { |
212 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 499 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
213 | goto checks; | 500 | goto checks; |
214 | } | 501 | } |
215 | if (si->flags & SWP_PAGE_DISCARD) { | 502 | |
216 | /* | ||
217 | * Start range check on racing allocations, in case | ||
218 | * they overlap the cluster we eventually decide on | ||
219 | * (we scan without swap_lock to allow preemption). | ||
220 | * It's hardly conceivable that cluster_nr could be | ||
221 | * wrapped during our scan, but don't depend on it. | ||
222 | */ | ||
223 | if (si->lowest_alloc) | ||
224 | goto checks; | ||
225 | si->lowest_alloc = si->max; | ||
226 | si->highest_alloc = 0; | ||
227 | } | ||
228 | spin_unlock(&si->lock); | 503 | spin_unlock(&si->lock); |
229 | 504 | ||
230 | /* | 505 | /* |
@@ -248,7 +523,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
248 | offset -= SWAPFILE_CLUSTER - 1; | 523 | offset -= SWAPFILE_CLUSTER - 1; |
249 | si->cluster_next = offset; | 524 | si->cluster_next = offset; |
250 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 525 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
251 | found_free_cluster = 1; | ||
252 | goto checks; | 526 | goto checks; |
253 | } | 527 | } |
254 | if (unlikely(--latency_ration < 0)) { | 528 | if (unlikely(--latency_ration < 0)) { |
@@ -269,7 +543,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
269 | offset -= SWAPFILE_CLUSTER - 1; | 543 | offset -= SWAPFILE_CLUSTER - 1; |
270 | si->cluster_next = offset; | 544 | si->cluster_next = offset; |
271 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 545 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
272 | found_free_cluster = 1; | ||
273 | goto checks; | 546 | goto checks; |
274 | } | 547 | } |
275 | if (unlikely(--latency_ration < 0)) { | 548 | if (unlikely(--latency_ration < 0)) { |
@@ -281,10 +554,13 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
281 | offset = scan_base; | 554 | offset = scan_base; |
282 | spin_lock(&si->lock); | 555 | spin_lock(&si->lock); |
283 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 556 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
284 | si->lowest_alloc = 0; | ||
285 | } | 557 | } |
286 | 558 | ||
287 | checks: | 559 | checks: |
560 | if (si->cluster_info) { | ||
561 | while (scan_swap_map_ssd_cluster_conflict(si, offset)) | ||
562 | scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); | ||
563 | } | ||
288 | if (!(si->flags & SWP_WRITEOK)) | 564 | if (!(si->flags & SWP_WRITEOK)) |
289 | goto no_page; | 565 | goto no_page; |
290 | if (!si->highest_bit) | 566 | if (!si->highest_bit) |
@@ -317,62 +593,10 @@ checks: | |||
317 | si->highest_bit = 0; | 593 | si->highest_bit = 0; |
318 | } | 594 | } |
319 | si->swap_map[offset] = usage; | 595 | si->swap_map[offset] = usage; |
596 | inc_cluster_info_page(si, si->cluster_info, offset); | ||
320 | si->cluster_next = offset + 1; | 597 | si->cluster_next = offset + 1; |
321 | si->flags -= SWP_SCANNING; | 598 | si->flags -= SWP_SCANNING; |
322 | 599 | ||
323 | if (si->lowest_alloc) { | ||
324 | /* | ||
325 | * Only set when SWP_PAGE_DISCARD, and there's a scan | ||
326 | * for a free cluster in progress or just completed. | ||
327 | */ | ||
328 | if (found_free_cluster) { | ||
329 | /* | ||
330 | * To optimize wear-levelling, discard the | ||
331 | * old data of the cluster, taking care not to | ||
332 | * discard any of its pages that have already | ||
333 | * been allocated by racing tasks (offset has | ||
334 | * already stepped over any at the beginning). | ||
335 | */ | ||
336 | if (offset < si->highest_alloc && | ||
337 | si->lowest_alloc <= last_in_cluster) | ||
338 | last_in_cluster = si->lowest_alloc - 1; | ||
339 | si->flags |= SWP_DISCARDING; | ||
340 | spin_unlock(&si->lock); | ||
341 | |||
342 | if (offset < last_in_cluster) | ||
343 | discard_swap_cluster(si, offset, | ||
344 | last_in_cluster - offset + 1); | ||
345 | |||
346 | spin_lock(&si->lock); | ||
347 | si->lowest_alloc = 0; | ||
348 | si->flags &= ~SWP_DISCARDING; | ||
349 | |||
350 | smp_mb(); /* wake_up_bit advises this */ | ||
351 | wake_up_bit(&si->flags, ilog2(SWP_DISCARDING)); | ||
352 | |||
353 | } else if (si->flags & SWP_DISCARDING) { | ||
354 | /* | ||
355 | * Delay using pages allocated by racing tasks | ||
356 | * until the whole discard has been issued. We | ||
357 | * could defer that delay until swap_writepage, | ||
358 | * but it's easier to keep this self-contained. | ||
359 | */ | ||
360 | spin_unlock(&si->lock); | ||
361 | wait_on_bit(&si->flags, ilog2(SWP_DISCARDING), | ||
362 | wait_for_discard, TASK_UNINTERRUPTIBLE); | ||
363 | spin_lock(&si->lock); | ||
364 | } else { | ||
365 | /* | ||
366 | * Note pages allocated by racing tasks while | ||
367 | * scan for a free cluster is in progress, so | ||
368 | * that its final discard can exclude them. | ||
369 | */ | ||
370 | if (offset < si->lowest_alloc) | ||
371 | si->lowest_alloc = offset; | ||
372 | if (offset > si->highest_alloc) | ||
373 | si->highest_alloc = offset; | ||
374 | } | ||
375 | } | ||
376 | return offset; | 600 | return offset; |
377 | 601 | ||
378 | scan: | 602 | scan: |
@@ -527,16 +751,16 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) | |||
527 | return p; | 751 | return p; |
528 | 752 | ||
529 | bad_free: | 753 | bad_free: |
530 | printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); | 754 | pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val); |
531 | goto out; | 755 | goto out; |
532 | bad_offset: | 756 | bad_offset: |
533 | printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); | 757 | pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val); |
534 | goto out; | 758 | goto out; |
535 | bad_device: | 759 | bad_device: |
536 | printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); | 760 | pr_err("swap_free: %s%08lx\n", Unused_file, entry.val); |
537 | goto out; | 761 | goto out; |
538 | bad_nofile: | 762 | bad_nofile: |
539 | printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); | 763 | pr_err("swap_free: %s%08lx\n", Bad_file, entry.val); |
540 | out: | 764 | out: |
541 | return NULL; | 765 | return NULL; |
542 | } | 766 | } |
@@ -600,6 +824,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, | |||
600 | 824 | ||
601 | /* free if no reference */ | 825 | /* free if no reference */ |
602 | if (!usage) { | 826 | if (!usage) { |
827 | dec_cluster_info_page(p, p->cluster_info, offset); | ||
603 | if (offset < p->lowest_bit) | 828 | if (offset < p->lowest_bit) |
604 | p->lowest_bit = offset; | 829 | p->lowest_bit = offset; |
605 | if (offset > p->highest_bit) | 830 | if (offset > p->highest_bit) |
@@ -1107,7 +1332,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1107 | else | 1332 | else |
1108 | continue; | 1333 | continue; |
1109 | } | 1334 | } |
1110 | count = si->swap_map[i]; | 1335 | count = ACCESS_ONCE(si->swap_map[i]); |
1111 | if (count && swap_count(count) != SWAP_MAP_BAD) | 1336 | if (count && swap_count(count) != SWAP_MAP_BAD) |
1112 | break; | 1337 | break; |
1113 | } | 1338 | } |
@@ -1127,7 +1352,11 @@ int try_to_unuse(unsigned int type, bool frontswap, | |||
1127 | { | 1352 | { |
1128 | struct swap_info_struct *si = swap_info[type]; | 1353 | struct swap_info_struct *si = swap_info[type]; |
1129 | struct mm_struct *start_mm; | 1354 | struct mm_struct *start_mm; |
1130 | unsigned char *swap_map; | 1355 | volatile unsigned char *swap_map; /* swap_map is accessed without |
1356 | * locking. Mark it as volatile | ||
1357 | * to prevent compiler doing | ||
1358 | * something odd. | ||
1359 | */ | ||
1131 | unsigned char swcount; | 1360 | unsigned char swcount; |
1132 | struct page *page; | 1361 | struct page *page; |
1133 | swp_entry_t entry; | 1362 | swp_entry_t entry; |
@@ -1178,7 +1407,15 @@ int try_to_unuse(unsigned int type, bool frontswap, | |||
1178 | * reused since sys_swapoff() already disabled | 1407 | * reused since sys_swapoff() already disabled |
1179 | * allocation from here, or alloc_page() failed. | 1408 | * allocation from here, or alloc_page() failed. |
1180 | */ | 1409 | */ |
1181 | if (!*swap_map) | 1410 | swcount = *swap_map; |
1411 | /* | ||
1412 | * We don't hold lock here, so the swap entry could be | ||
1413 | * SWAP_MAP_BAD (when the cluster is discarding). | ||
1414 | * Instead of fail out, We can just skip the swap | ||
1415 | * entry because swapoff will wait for discarding | ||
1416 | * finish anyway. | ||
1417 | */ | ||
1418 | if (!swcount || swcount == SWAP_MAP_BAD) | ||
1182 | continue; | 1419 | continue; |
1183 | retval = -ENOMEM; | 1420 | retval = -ENOMEM; |
1184 | break; | 1421 | break; |
@@ -1524,7 +1761,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) | |||
1524 | } | 1761 | } |
1525 | 1762 | ||
1526 | static void _enable_swap_info(struct swap_info_struct *p, int prio, | 1763 | static void _enable_swap_info(struct swap_info_struct *p, int prio, |
1527 | unsigned char *swap_map) | 1764 | unsigned char *swap_map, |
1765 | struct swap_cluster_info *cluster_info) | ||
1528 | { | 1766 | { |
1529 | int i, prev; | 1767 | int i, prev; |
1530 | 1768 | ||
@@ -1533,6 +1771,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, | |||
1533 | else | 1771 | else |
1534 | p->prio = --least_priority; | 1772 | p->prio = --least_priority; |
1535 | p->swap_map = swap_map; | 1773 | p->swap_map = swap_map; |
1774 | p->cluster_info = cluster_info; | ||
1536 | p->flags |= SWP_WRITEOK; | 1775 | p->flags |= SWP_WRITEOK; |
1537 | atomic_long_add(p->pages, &nr_swap_pages); | 1776 | atomic_long_add(p->pages, &nr_swap_pages); |
1538 | total_swap_pages += p->pages; | 1777 | total_swap_pages += p->pages; |
@@ -1553,12 +1792,13 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, | |||
1553 | 1792 | ||
1554 | static void enable_swap_info(struct swap_info_struct *p, int prio, | 1793 | static void enable_swap_info(struct swap_info_struct *p, int prio, |
1555 | unsigned char *swap_map, | 1794 | unsigned char *swap_map, |
1795 | struct swap_cluster_info *cluster_info, | ||
1556 | unsigned long *frontswap_map) | 1796 | unsigned long *frontswap_map) |
1557 | { | 1797 | { |
1558 | frontswap_init(p->type, frontswap_map); | 1798 | frontswap_init(p->type, frontswap_map); |
1559 | spin_lock(&swap_lock); | 1799 | spin_lock(&swap_lock); |
1560 | spin_lock(&p->lock); | 1800 | spin_lock(&p->lock); |
1561 | _enable_swap_info(p, prio, swap_map); | 1801 | _enable_swap_info(p, prio, swap_map, cluster_info); |
1562 | spin_unlock(&p->lock); | 1802 | spin_unlock(&p->lock); |
1563 | spin_unlock(&swap_lock); | 1803 | spin_unlock(&swap_lock); |
1564 | } | 1804 | } |
@@ -1567,7 +1807,7 @@ static void reinsert_swap_info(struct swap_info_struct *p) | |||
1567 | { | 1807 | { |
1568 | spin_lock(&swap_lock); | 1808 | spin_lock(&swap_lock); |
1569 | spin_lock(&p->lock); | 1809 | spin_lock(&p->lock); |
1570 | _enable_swap_info(p, p->prio, p->swap_map); | 1810 | _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info); |
1571 | spin_unlock(&p->lock); | 1811 | spin_unlock(&p->lock); |
1572 | spin_unlock(&swap_lock); | 1812 | spin_unlock(&swap_lock); |
1573 | } | 1813 | } |
@@ -1576,6 +1816,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1576 | { | 1816 | { |
1577 | struct swap_info_struct *p = NULL; | 1817 | struct swap_info_struct *p = NULL; |
1578 | unsigned char *swap_map; | 1818 | unsigned char *swap_map; |
1819 | struct swap_cluster_info *cluster_info; | ||
1579 | unsigned long *frontswap_map; | 1820 | unsigned long *frontswap_map; |
1580 | struct file *swap_file, *victim; | 1821 | struct file *swap_file, *victim; |
1581 | struct address_space *mapping; | 1822 | struct address_space *mapping; |
@@ -1651,6 +1892,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1651 | goto out_dput; | 1892 | goto out_dput; |
1652 | } | 1893 | } |
1653 | 1894 | ||
1895 | flush_work(&p->discard_work); | ||
1896 | |||
1654 | destroy_swap_extents(p); | 1897 | destroy_swap_extents(p); |
1655 | if (p->flags & SWP_CONTINUED) | 1898 | if (p->flags & SWP_CONTINUED) |
1656 | free_swap_count_continuations(p); | 1899 | free_swap_count_continuations(p); |
@@ -1675,6 +1918,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1675 | p->max = 0; | 1918 | p->max = 0; |
1676 | swap_map = p->swap_map; | 1919 | swap_map = p->swap_map; |
1677 | p->swap_map = NULL; | 1920 | p->swap_map = NULL; |
1921 | cluster_info = p->cluster_info; | ||
1922 | p->cluster_info = NULL; | ||
1678 | p->flags = 0; | 1923 | p->flags = 0; |
1679 | frontswap_map = frontswap_map_get(p); | 1924 | frontswap_map = frontswap_map_get(p); |
1680 | frontswap_map_set(p, NULL); | 1925 | frontswap_map_set(p, NULL); |
@@ -1682,7 +1927,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1682 | spin_unlock(&swap_lock); | 1927 | spin_unlock(&swap_lock); |
1683 | frontswap_invalidate_area(type); | 1928 | frontswap_invalidate_area(type); |
1684 | mutex_unlock(&swapon_mutex); | 1929 | mutex_unlock(&swapon_mutex); |
1930 | free_percpu(p->percpu_cluster); | ||
1931 | p->percpu_cluster = NULL; | ||
1685 | vfree(swap_map); | 1932 | vfree(swap_map); |
1933 | vfree(cluster_info); | ||
1686 | vfree(frontswap_map); | 1934 | vfree(frontswap_map); |
1687 | /* Destroy swap account informatin */ | 1935 | /* Destroy swap account informatin */ |
1688 | swap_cgroup_swapoff(type); | 1936 | swap_cgroup_swapoff(type); |
@@ -1926,9 +2174,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1926 | int i; | 2174 | int i; |
1927 | unsigned long maxpages; | 2175 | unsigned long maxpages; |
1928 | unsigned long swapfilepages; | 2176 | unsigned long swapfilepages; |
2177 | unsigned long last_page; | ||
1929 | 2178 | ||
1930 | if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { | 2179 | if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { |
1931 | printk(KERN_ERR "Unable to find swap-space signature\n"); | 2180 | pr_err("Unable to find swap-space signature\n"); |
1932 | return 0; | 2181 | return 0; |
1933 | } | 2182 | } |
1934 | 2183 | ||
@@ -1942,9 +2191,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1942 | } | 2191 | } |
1943 | /* Check the swap header's sub-version */ | 2192 | /* Check the swap header's sub-version */ |
1944 | if (swap_header->info.version != 1) { | 2193 | if (swap_header->info.version != 1) { |
1945 | printk(KERN_WARNING | 2194 | pr_warn("Unable to handle swap header version %d\n", |
1946 | "Unable to handle swap header version %d\n", | 2195 | swap_header->info.version); |
1947 | swap_header->info.version); | ||
1948 | return 0; | 2196 | return 0; |
1949 | } | 2197 | } |
1950 | 2198 | ||
@@ -1968,8 +2216,14 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1968 | */ | 2216 | */ |
1969 | maxpages = swp_offset(pte_to_swp_entry( | 2217 | maxpages = swp_offset(pte_to_swp_entry( |
1970 | swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; | 2218 | swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; |
1971 | if (maxpages > swap_header->info.last_page) { | 2219 | last_page = swap_header->info.last_page; |
1972 | maxpages = swap_header->info.last_page + 1; | 2220 | if (last_page > maxpages) { |
2221 | pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", | ||
2222 | maxpages << (PAGE_SHIFT - 10), | ||
2223 | last_page << (PAGE_SHIFT - 10)); | ||
2224 | } | ||
2225 | if (maxpages > last_page) { | ||
2226 | maxpages = last_page + 1; | ||
1973 | /* p->max is an unsigned int: don't overflow it */ | 2227 | /* p->max is an unsigned int: don't overflow it */ |
1974 | if ((unsigned int)maxpages == 0) | 2228 | if ((unsigned int)maxpages == 0) |
1975 | maxpages = UINT_MAX; | 2229 | maxpages = UINT_MAX; |
@@ -1980,8 +2234,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1980 | return 0; | 2234 | return 0; |
1981 | swapfilepages = i_size_read(inode) >> PAGE_SHIFT; | 2235 | swapfilepages = i_size_read(inode) >> PAGE_SHIFT; |
1982 | if (swapfilepages && maxpages > swapfilepages) { | 2236 | if (swapfilepages && maxpages > swapfilepages) { |
1983 | printk(KERN_WARNING | 2237 | pr_warn("Swap area shorter than signature indicates\n"); |
1984 | "Swap area shorter than signature indicates\n"); | ||
1985 | return 0; | 2238 | return 0; |
1986 | } | 2239 | } |
1987 | if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) | 2240 | if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) |
@@ -1995,15 +2248,23 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1995 | static int setup_swap_map_and_extents(struct swap_info_struct *p, | 2248 | static int setup_swap_map_and_extents(struct swap_info_struct *p, |
1996 | union swap_header *swap_header, | 2249 | union swap_header *swap_header, |
1997 | unsigned char *swap_map, | 2250 | unsigned char *swap_map, |
2251 | struct swap_cluster_info *cluster_info, | ||
1998 | unsigned long maxpages, | 2252 | unsigned long maxpages, |
1999 | sector_t *span) | 2253 | sector_t *span) |
2000 | { | 2254 | { |
2001 | int i; | 2255 | int i; |
2002 | unsigned int nr_good_pages; | 2256 | unsigned int nr_good_pages; |
2003 | int nr_extents; | 2257 | int nr_extents; |
2258 | unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); | ||
2259 | unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER; | ||
2004 | 2260 | ||
2005 | nr_good_pages = maxpages - 1; /* omit header page */ | 2261 | nr_good_pages = maxpages - 1; /* omit header page */ |
2006 | 2262 | ||
2263 | cluster_set_null(&p->free_cluster_head); | ||
2264 | cluster_set_null(&p->free_cluster_tail); | ||
2265 | cluster_set_null(&p->discard_cluster_head); | ||
2266 | cluster_set_null(&p->discard_cluster_tail); | ||
2267 | |||
2007 | for (i = 0; i < swap_header->info.nr_badpages; i++) { | 2268 | for (i = 0; i < swap_header->info.nr_badpages; i++) { |
2008 | unsigned int page_nr = swap_header->info.badpages[i]; | 2269 | unsigned int page_nr = swap_header->info.badpages[i]; |
2009 | if (page_nr == 0 || page_nr > swap_header->info.last_page) | 2270 | if (page_nr == 0 || page_nr > swap_header->info.last_page) |
@@ -2011,11 +2272,25 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, | |||
2011 | if (page_nr < maxpages) { | 2272 | if (page_nr < maxpages) { |
2012 | swap_map[page_nr] = SWAP_MAP_BAD; | 2273 | swap_map[page_nr] = SWAP_MAP_BAD; |
2013 | nr_good_pages--; | 2274 | nr_good_pages--; |
2275 | /* | ||
2276 | * Haven't marked the cluster free yet, no list | ||
2277 | * operation involved | ||
2278 | */ | ||
2279 | inc_cluster_info_page(p, cluster_info, page_nr); | ||
2014 | } | 2280 | } |
2015 | } | 2281 | } |
2016 | 2282 | ||
2283 | /* Haven't marked the cluster free yet, no list operation involved */ | ||
2284 | for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) | ||
2285 | inc_cluster_info_page(p, cluster_info, i); | ||
2286 | |||
2017 | if (nr_good_pages) { | 2287 | if (nr_good_pages) { |
2018 | swap_map[0] = SWAP_MAP_BAD; | 2288 | swap_map[0] = SWAP_MAP_BAD; |
2289 | /* | ||
2290 | * Not mark the cluster free yet, no list | ||
2291 | * operation involved | ||
2292 | */ | ||
2293 | inc_cluster_info_page(p, cluster_info, 0); | ||
2019 | p->max = maxpages; | 2294 | p->max = maxpages; |
2020 | p->pages = nr_good_pages; | 2295 | p->pages = nr_good_pages; |
2021 | nr_extents = setup_swap_extents(p, span); | 2296 | nr_extents = setup_swap_extents(p, span); |
@@ -2024,10 +2299,34 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, | |||
2024 | nr_good_pages = p->pages; | 2299 | nr_good_pages = p->pages; |
2025 | } | 2300 | } |
2026 | if (!nr_good_pages) { | 2301 | if (!nr_good_pages) { |
2027 | printk(KERN_WARNING "Empty swap-file\n"); | 2302 | pr_warn("Empty swap-file\n"); |
2028 | return -EINVAL; | 2303 | return -EINVAL; |
2029 | } | 2304 | } |
2030 | 2305 | ||
2306 | if (!cluster_info) | ||
2307 | return nr_extents; | ||
2308 | |||
2309 | for (i = 0; i < nr_clusters; i++) { | ||
2310 | if (!cluster_count(&cluster_info[idx])) { | ||
2311 | cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); | ||
2312 | if (cluster_is_null(&p->free_cluster_head)) { | ||
2313 | cluster_set_next_flag(&p->free_cluster_head, | ||
2314 | idx, 0); | ||
2315 | cluster_set_next_flag(&p->free_cluster_tail, | ||
2316 | idx, 0); | ||
2317 | } else { | ||
2318 | unsigned int tail; | ||
2319 | |||
2320 | tail = cluster_next(&p->free_cluster_tail); | ||
2321 | cluster_set_next(&cluster_info[tail], idx); | ||
2322 | cluster_set_next_flag(&p->free_cluster_tail, | ||
2323 | idx, 0); | ||
2324 | } | ||
2325 | } | ||
2326 | idx++; | ||
2327 | if (idx == nr_clusters) | ||
2328 | idx = 0; | ||
2329 | } | ||
2031 | return nr_extents; | 2330 | return nr_extents; |
2032 | } | 2331 | } |
2033 | 2332 | ||
@@ -2059,6 +2358,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2059 | sector_t span; | 2358 | sector_t span; |
2060 | unsigned long maxpages; | 2359 | unsigned long maxpages; |
2061 | unsigned char *swap_map = NULL; | 2360 | unsigned char *swap_map = NULL; |
2361 | struct swap_cluster_info *cluster_info = NULL; | ||
2062 | unsigned long *frontswap_map = NULL; | 2362 | unsigned long *frontswap_map = NULL; |
2063 | struct page *page = NULL; | 2363 | struct page *page = NULL; |
2064 | struct inode *inode = NULL; | 2364 | struct inode *inode = NULL; |
@@ -2073,6 +2373,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2073 | if (IS_ERR(p)) | 2373 | if (IS_ERR(p)) |
2074 | return PTR_ERR(p); | 2374 | return PTR_ERR(p); |
2075 | 2375 | ||
2376 | INIT_WORK(&p->discard_work, swap_discard_work); | ||
2377 | |||
2076 | name = getname(specialfile); | 2378 | name = getname(specialfile); |
2077 | if (IS_ERR(name)) { | 2379 | if (IS_ERR(name)) { |
2078 | error = PTR_ERR(name); | 2380 | error = PTR_ERR(name); |
@@ -2132,13 +2434,38 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2132 | error = -ENOMEM; | 2434 | error = -ENOMEM; |
2133 | goto bad_swap; | 2435 | goto bad_swap; |
2134 | } | 2436 | } |
2437 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { | ||
2438 | p->flags |= SWP_SOLIDSTATE; | ||
2439 | /* | ||
2440 | * select a random position to start with to help wear leveling | ||
2441 | * SSD | ||
2442 | */ | ||
2443 | p->cluster_next = 1 + (prandom_u32() % p->highest_bit); | ||
2444 | |||
2445 | cluster_info = vzalloc(DIV_ROUND_UP(maxpages, | ||
2446 | SWAPFILE_CLUSTER) * sizeof(*cluster_info)); | ||
2447 | if (!cluster_info) { | ||
2448 | error = -ENOMEM; | ||
2449 | goto bad_swap; | ||
2450 | } | ||
2451 | p->percpu_cluster = alloc_percpu(struct percpu_cluster); | ||
2452 | if (!p->percpu_cluster) { | ||
2453 | error = -ENOMEM; | ||
2454 | goto bad_swap; | ||
2455 | } | ||
2456 | for_each_possible_cpu(i) { | ||
2457 | struct percpu_cluster *cluster; | ||
2458 | cluster = per_cpu_ptr(p->percpu_cluster, i); | ||
2459 | cluster_set_null(&cluster->index); | ||
2460 | } | ||
2461 | } | ||
2135 | 2462 | ||
2136 | error = swap_cgroup_swapon(p->type, maxpages); | 2463 | error = swap_cgroup_swapon(p->type, maxpages); |
2137 | if (error) | 2464 | if (error) |
2138 | goto bad_swap; | 2465 | goto bad_swap; |
2139 | 2466 | ||
2140 | nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, | 2467 | nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, |
2141 | maxpages, &span); | 2468 | cluster_info, maxpages, &span); |
2142 | if (unlikely(nr_extents < 0)) { | 2469 | if (unlikely(nr_extents < 0)) { |
2143 | error = nr_extents; | 2470 | error = nr_extents; |
2144 | goto bad_swap; | 2471 | goto bad_swap; |
@@ -2147,41 +2474,33 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2147 | if (frontswap_enabled) | 2474 | if (frontswap_enabled) |
2148 | frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); | 2475 | frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); |
2149 | 2476 | ||
2150 | if (p->bdev) { | 2477 | if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { |
2151 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2478 | /* |
2152 | p->flags |= SWP_SOLIDSTATE; | 2479 | * When discard is enabled for swap with no particular |
2153 | p->cluster_next = 1 + (prandom_u32() % p->highest_bit); | 2480 | * policy flagged, we set all swap discard flags here in |
2154 | } | 2481 | * order to sustain backward compatibility with older |
2155 | 2482 | * swapon(8) releases. | |
2156 | if ((swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { | 2483 | */ |
2157 | /* | 2484 | p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | |
2158 | * When discard is enabled for swap with no particular | 2485 | SWP_PAGE_DISCARD); |
2159 | * policy flagged, we set all swap discard flags here in | ||
2160 | * order to sustain backward compatibility with older | ||
2161 | * swapon(8) releases. | ||
2162 | */ | ||
2163 | p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | | ||
2164 | SWP_PAGE_DISCARD); | ||
2165 | 2486 | ||
2166 | /* | 2487 | /* |
2167 | * By flagging sys_swapon, a sysadmin can tell us to | 2488 | * By flagging sys_swapon, a sysadmin can tell us to |
2168 | * either do single-time area discards only, or to just | 2489 | * either do single-time area discards only, or to just |
2169 | * perform discards for released swap page-clusters. | 2490 | * perform discards for released swap page-clusters. |
2170 | * Now it's time to adjust the p->flags accordingly. | 2491 | * Now it's time to adjust the p->flags accordingly. |
2171 | */ | 2492 | */ |
2172 | if (swap_flags & SWAP_FLAG_DISCARD_ONCE) | 2493 | if (swap_flags & SWAP_FLAG_DISCARD_ONCE) |
2173 | p->flags &= ~SWP_PAGE_DISCARD; | 2494 | p->flags &= ~SWP_PAGE_DISCARD; |
2174 | else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) | 2495 | else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) |
2175 | p->flags &= ~SWP_AREA_DISCARD; | 2496 | p->flags &= ~SWP_AREA_DISCARD; |
2176 | 2497 | ||
2177 | /* issue a swapon-time discard if it's still required */ | 2498 | /* issue a swapon-time discard if it's still required */ |
2178 | if (p->flags & SWP_AREA_DISCARD) { | 2499 | if (p->flags & SWP_AREA_DISCARD) { |
2179 | int err = discard_swap(p); | 2500 | int err = discard_swap(p); |
2180 | if (unlikely(err)) | 2501 | if (unlikely(err)) |
2181 | printk(KERN_ERR | 2502 | pr_err("swapon: discard_swap(%p): %d\n", |
2182 | "swapon: discard_swap(%p): %d\n", | 2503 | p, err); |
2183 | p, err); | ||
2184 | } | ||
2185 | } | 2504 | } |
2186 | } | 2505 | } |
2187 | 2506 | ||
@@ -2190,9 +2509,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2190 | if (swap_flags & SWAP_FLAG_PREFER) | 2509 | if (swap_flags & SWAP_FLAG_PREFER) |
2191 | prio = | 2510 | prio = |
2192 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; | 2511 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
2193 | enable_swap_info(p, prio, swap_map, frontswap_map); | 2512 | enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map); |
2194 | 2513 | ||
2195 | printk(KERN_INFO "Adding %uk swap on %s. " | 2514 | pr_info("Adding %uk swap on %s. " |
2196 | "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n", | 2515 | "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n", |
2197 | p->pages<<(PAGE_SHIFT-10), name->name, p->prio, | 2516 | p->pages<<(PAGE_SHIFT-10), name->name, p->prio, |
2198 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), | 2517 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
@@ -2211,6 +2530,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2211 | error = 0; | 2530 | error = 0; |
2212 | goto out; | 2531 | goto out; |
2213 | bad_swap: | 2532 | bad_swap: |
2533 | free_percpu(p->percpu_cluster); | ||
2534 | p->percpu_cluster = NULL; | ||
2214 | if (inode && S_ISBLK(inode->i_mode) && p->bdev) { | 2535 | if (inode && S_ISBLK(inode->i_mode) && p->bdev) { |
2215 | set_blocksize(p->bdev, p->old_block_size); | 2536 | set_blocksize(p->bdev, p->old_block_size); |
2216 | blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | 2537 | blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); |
@@ -2222,6 +2543,7 @@ bad_swap: | |||
2222 | p->flags = 0; | 2543 | p->flags = 0; |
2223 | spin_unlock(&swap_lock); | 2544 | spin_unlock(&swap_lock); |
2224 | vfree(swap_map); | 2545 | vfree(swap_map); |
2546 | vfree(cluster_info); | ||
2225 | if (swap_file) { | 2547 | if (swap_file) { |
2226 | if (inode && S_ISREG(inode->i_mode)) { | 2548 | if (inode && S_ISREG(inode->i_mode)) { |
2227 | mutex_unlock(&inode->i_mutex); | 2549 | mutex_unlock(&inode->i_mutex); |
@@ -2291,6 +2613,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) | |||
2291 | goto unlock_out; | 2613 | goto unlock_out; |
2292 | 2614 | ||
2293 | count = p->swap_map[offset]; | 2615 | count = p->swap_map[offset]; |
2616 | |||
2617 | /* | ||
2618 | * swapin_readahead() doesn't check if a swap entry is valid, so the | ||
2619 | * swap entry could be SWAP_MAP_BAD. Check here with lock held. | ||
2620 | */ | ||
2621 | if (unlikely(swap_count(count) == SWAP_MAP_BAD)) { | ||
2622 | err = -ENOENT; | ||
2623 | goto unlock_out; | ||
2624 | } | ||
2625 | |||
2294 | has_cache = count & SWAP_HAS_CACHE; | 2626 | has_cache = count & SWAP_HAS_CACHE; |
2295 | count &= ~SWAP_HAS_CACHE; | 2627 | count &= ~SWAP_HAS_CACHE; |
2296 | err = 0; | 2628 | err = 0; |
@@ -2326,7 +2658,7 @@ out: | |||
2326 | return err; | 2658 | return err; |
2327 | 2659 | ||
2328 | bad_file: | 2660 | bad_file: |
2329 | printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val); | 2661 | pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val); |
2330 | goto out; | 2662 | goto out; |
2331 | } | 2663 | } |
2332 | 2664 | ||
@@ -388,15 +388,12 @@ struct address_space *page_mapping(struct page *page) | |||
388 | struct address_space *mapping = page->mapping; | 388 | struct address_space *mapping = page->mapping; |
389 | 389 | ||
390 | VM_BUG_ON(PageSlab(page)); | 390 | VM_BUG_ON(PageSlab(page)); |
391 | #ifdef CONFIG_SWAP | ||
392 | if (unlikely(PageSwapCache(page))) { | 391 | if (unlikely(PageSwapCache(page))) { |
393 | swp_entry_t entry; | 392 | swp_entry_t entry; |
394 | 393 | ||
395 | entry.val = page_private(page); | 394 | entry.val = page_private(page); |
396 | mapping = swap_address_space(entry); | 395 | mapping = swap_address_space(entry); |
397 | } else | 396 | } else if ((unsigned long)mapping & PAGE_MAPPING_ANON) |
398 | #endif | ||
399 | if ((unsigned long)mapping & PAGE_MAPPING_ANON) | ||
400 | mapping = NULL; | 397 | mapping = NULL; |
401 | return mapping; | 398 | return mapping; |
402 | } | 399 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 13a54953a273..107454312d5e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -752,7 +752,6 @@ struct vmap_block_queue { | |||
752 | struct vmap_block { | 752 | struct vmap_block { |
753 | spinlock_t lock; | 753 | spinlock_t lock; |
754 | struct vmap_area *va; | 754 | struct vmap_area *va; |
755 | struct vmap_block_queue *vbq; | ||
756 | unsigned long free, dirty; | 755 | unsigned long free, dirty; |
757 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); | 756 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); |
758 | struct list_head free_list; | 757 | struct list_head free_list; |
@@ -830,7 +829,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | |||
830 | radix_tree_preload_end(); | 829 | radix_tree_preload_end(); |
831 | 830 | ||
832 | vbq = &get_cpu_var(vmap_block_queue); | 831 | vbq = &get_cpu_var(vmap_block_queue); |
833 | vb->vbq = vbq; | ||
834 | spin_lock(&vbq->lock); | 832 | spin_lock(&vbq->lock); |
835 | list_add_rcu(&vb->free_list, &vbq->free); | 833 | list_add_rcu(&vb->free_list, &vbq->free); |
836 | spin_unlock(&vbq->lock); | 834 | spin_unlock(&vbq->lock); |
@@ -1018,15 +1016,16 @@ void vm_unmap_aliases(void) | |||
1018 | 1016 | ||
1019 | rcu_read_lock(); | 1017 | rcu_read_lock(); |
1020 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | 1018 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { |
1021 | int i; | 1019 | int i, j; |
1022 | 1020 | ||
1023 | spin_lock(&vb->lock); | 1021 | spin_lock(&vb->lock); |
1024 | i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); | 1022 | i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); |
1025 | while (i < VMAP_BBMAP_BITS) { | 1023 | if (i < VMAP_BBMAP_BITS) { |
1026 | unsigned long s, e; | 1024 | unsigned long s, e; |
1027 | int j; | 1025 | |
1028 | j = find_next_zero_bit(vb->dirty_map, | 1026 | j = find_last_bit(vb->dirty_map, |
1029 | VMAP_BBMAP_BITS, i); | 1027 | VMAP_BBMAP_BITS); |
1028 | j = j + 1; /* need exclusive index */ | ||
1030 | 1029 | ||
1031 | s = vb->va->va_start + (i << PAGE_SHIFT); | 1030 | s = vb->va->va_start + (i << PAGE_SHIFT); |
1032 | e = vb->va->va_start + (j << PAGE_SHIFT); | 1031 | e = vb->va->va_start + (j << PAGE_SHIFT); |
@@ -1036,10 +1035,6 @@ void vm_unmap_aliases(void) | |||
1036 | start = s; | 1035 | start = s; |
1037 | if (e > end) | 1036 | if (e > end) |
1038 | end = e; | 1037 | end = e; |
1039 | |||
1040 | i = j; | ||
1041 | i = find_next_bit(vb->dirty_map, | ||
1042 | VMAP_BBMAP_BITS, i); | ||
1043 | } | 1038 | } |
1044 | spin_unlock(&vb->lock); | 1039 | spin_unlock(&vb->lock); |
1045 | } | 1040 | } |
@@ -1263,7 +1258,7 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) | |||
1263 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | 1258 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) |
1264 | { | 1259 | { |
1265 | unsigned long addr = (unsigned long)area->addr; | 1260 | unsigned long addr = (unsigned long)area->addr; |
1266 | unsigned long end = addr + area->size - PAGE_SIZE; | 1261 | unsigned long end = addr + get_vm_area_size(area); |
1267 | int err; | 1262 | int err; |
1268 | 1263 | ||
1269 | err = vmap_page_range(addr, end, prot, *pages); | 1264 | err = vmap_page_range(addr, end, prot, *pages); |
@@ -1558,7 +1553,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
1558 | unsigned int nr_pages, array_size, i; | 1553 | unsigned int nr_pages, array_size, i; |
1559 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | 1554 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; |
1560 | 1555 | ||
1561 | nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; | 1556 | nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; |
1562 | array_size = (nr_pages * sizeof(struct page *)); | 1557 | array_size = (nr_pages * sizeof(struct page *)); |
1563 | 1558 | ||
1564 | area->nr_pages = nr_pages; | 1559 | area->nr_pages = nr_pages; |
@@ -1990,7 +1985,7 @@ long vread(char *buf, char *addr, unsigned long count) | |||
1990 | 1985 | ||
1991 | vm = va->vm; | 1986 | vm = va->vm; |
1992 | vaddr = (char *) vm->addr; | 1987 | vaddr = (char *) vm->addr; |
1993 | if (addr >= vaddr + vm->size - PAGE_SIZE) | 1988 | if (addr >= vaddr + get_vm_area_size(vm)) |
1994 | continue; | 1989 | continue; |
1995 | while (addr < vaddr) { | 1990 | while (addr < vaddr) { |
1996 | if (count == 0) | 1991 | if (count == 0) |
@@ -2000,7 +1995,7 @@ long vread(char *buf, char *addr, unsigned long count) | |||
2000 | addr++; | 1995 | addr++; |
2001 | count--; | 1996 | count--; |
2002 | } | 1997 | } |
2003 | n = vaddr + vm->size - PAGE_SIZE - addr; | 1998 | n = vaddr + get_vm_area_size(vm) - addr; |
2004 | if (n > count) | 1999 | if (n > count) |
2005 | n = count; | 2000 | n = count; |
2006 | if (!(vm->flags & VM_IOREMAP)) | 2001 | if (!(vm->flags & VM_IOREMAP)) |
@@ -2072,7 +2067,7 @@ long vwrite(char *buf, char *addr, unsigned long count) | |||
2072 | 2067 | ||
2073 | vm = va->vm; | 2068 | vm = va->vm; |
2074 | vaddr = (char *) vm->addr; | 2069 | vaddr = (char *) vm->addr; |
2075 | if (addr >= vaddr + vm->size - PAGE_SIZE) | 2070 | if (addr >= vaddr + get_vm_area_size(vm)) |
2076 | continue; | 2071 | continue; |
2077 | while (addr < vaddr) { | 2072 | while (addr < vaddr) { |
2078 | if (count == 0) | 2073 | if (count == 0) |
@@ -2081,7 +2076,7 @@ long vwrite(char *buf, char *addr, unsigned long count) | |||
2081 | addr++; | 2076 | addr++; |
2082 | count--; | 2077 | count--; |
2083 | } | 2078 | } |
2084 | n = vaddr + vm->size - PAGE_SIZE - addr; | 2079 | n = vaddr + get_vm_area_size(vm) - addr; |
2085 | if (n > count) | 2080 | if (n > count) |
2086 | n = count; | 2081 | n = count; |
2087 | if (!(vm->flags & VM_IOREMAP)) { | 2082 | if (!(vm->flags & VM_IOREMAP)) { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2cff0d491c6d..fe715daeb8bc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -146,6 +146,25 @@ static bool global_reclaim(struct scan_control *sc) | |||
146 | } | 146 | } |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | unsigned long zone_reclaimable_pages(struct zone *zone) | ||
150 | { | ||
151 | int nr; | ||
152 | |||
153 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | ||
154 | zone_page_state(zone, NR_INACTIVE_FILE); | ||
155 | |||
156 | if (get_nr_swap_pages() > 0) | ||
157 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | ||
158 | zone_page_state(zone, NR_INACTIVE_ANON); | ||
159 | |||
160 | return nr; | ||
161 | } | ||
162 | |||
163 | bool zone_reclaimable(struct zone *zone) | ||
164 | { | ||
165 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | ||
166 | } | ||
167 | |||
149 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | 168 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) |
150 | { | 169 | { |
151 | if (!mem_cgroup_disabled()) | 170 | if (!mem_cgroup_disabled()) |
@@ -545,7 +564,7 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
545 | */ | 564 | */ |
546 | void putback_lru_page(struct page *page) | 565 | void putback_lru_page(struct page *page) |
547 | { | 566 | { |
548 | int lru; | 567 | bool is_unevictable; |
549 | int was_unevictable = PageUnevictable(page); | 568 | int was_unevictable = PageUnevictable(page); |
550 | 569 | ||
551 | VM_BUG_ON(PageLRU(page)); | 570 | VM_BUG_ON(PageLRU(page)); |
@@ -560,14 +579,14 @@ redo: | |||
560 | * unevictable page on [in]active list. | 579 | * unevictable page on [in]active list. |
561 | * We know how to handle that. | 580 | * We know how to handle that. |
562 | */ | 581 | */ |
563 | lru = page_lru_base_type(page); | 582 | is_unevictable = false; |
564 | lru_cache_add(page); | 583 | lru_cache_add(page); |
565 | } else { | 584 | } else { |
566 | /* | 585 | /* |
567 | * Put unevictable pages directly on zone's unevictable | 586 | * Put unevictable pages directly on zone's unevictable |
568 | * list. | 587 | * list. |
569 | */ | 588 | */ |
570 | lru = LRU_UNEVICTABLE; | 589 | is_unevictable = true; |
571 | add_page_to_unevictable_list(page); | 590 | add_page_to_unevictable_list(page); |
572 | /* | 591 | /* |
573 | * When racing with an mlock or AS_UNEVICTABLE clearing | 592 | * When racing with an mlock or AS_UNEVICTABLE clearing |
@@ -587,7 +606,7 @@ redo: | |||
587 | * page is on unevictable list, it never be freed. To avoid that, | 606 | * page is on unevictable list, it never be freed. To avoid that, |
588 | * check after we added it to the list, again. | 607 | * check after we added it to the list, again. |
589 | */ | 608 | */ |
590 | if (lru == LRU_UNEVICTABLE && page_evictable(page)) { | 609 | if (is_unevictable && page_evictable(page)) { |
591 | if (!isolate_lru_page(page)) { | 610 | if (!isolate_lru_page(page)) { |
592 | put_page(page); | 611 | put_page(page); |
593 | goto redo; | 612 | goto redo; |
@@ -598,9 +617,9 @@ redo: | |||
598 | */ | 617 | */ |
599 | } | 618 | } |
600 | 619 | ||
601 | if (was_unevictable && lru != LRU_UNEVICTABLE) | 620 | if (was_unevictable && !is_unevictable) |
602 | count_vm_event(UNEVICTABLE_PGRESCUED); | 621 | count_vm_event(UNEVICTABLE_PGRESCUED); |
603 | else if (!was_unevictable && lru == LRU_UNEVICTABLE) | 622 | else if (!was_unevictable && is_unevictable) |
604 | count_vm_event(UNEVICTABLE_PGCULLED); | 623 | count_vm_event(UNEVICTABLE_PGCULLED); |
605 | 624 | ||
606 | put_page(page); /* drop ref from isolate */ | 625 | put_page(page); /* drop ref from isolate */ |
@@ -1789,7 +1808,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, | |||
1789 | * latencies, so it's better to scan a minimum amount there as | 1808 | * latencies, so it's better to scan a minimum amount there as |
1790 | * well. | 1809 | * well. |
1791 | */ | 1810 | */ |
1792 | if (current_is_kswapd() && zone->all_unreclaimable) | 1811 | if (current_is_kswapd() && !zone_reclaimable(zone)) |
1793 | force_scan = true; | 1812 | force_scan = true; |
1794 | if (!global_reclaim(sc)) | 1813 | if (!global_reclaim(sc)) |
1795 | force_scan = true; | 1814 | force_scan = true; |
@@ -2244,8 +2263,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2244 | if (global_reclaim(sc)) { | 2263 | if (global_reclaim(sc)) { |
2245 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2264 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2246 | continue; | 2265 | continue; |
2247 | if (zone->all_unreclaimable && | 2266 | if (sc->priority != DEF_PRIORITY && |
2248 | sc->priority != DEF_PRIORITY) | 2267 | !zone_reclaimable(zone)) |
2249 | continue; /* Let kswapd poll it */ | 2268 | continue; /* Let kswapd poll it */ |
2250 | if (IS_ENABLED(CONFIG_COMPACTION)) { | 2269 | if (IS_ENABLED(CONFIG_COMPACTION)) { |
2251 | /* | 2270 | /* |
@@ -2283,11 +2302,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2283 | return aborted_reclaim; | 2302 | return aborted_reclaim; |
2284 | } | 2303 | } |
2285 | 2304 | ||
2286 | static bool zone_reclaimable(struct zone *zone) | ||
2287 | { | ||
2288 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | ||
2289 | } | ||
2290 | |||
2291 | /* All zones in zonelist are unreclaimable? */ | 2305 | /* All zones in zonelist are unreclaimable? */ |
2292 | static bool all_unreclaimable(struct zonelist *zonelist, | 2306 | static bool all_unreclaimable(struct zonelist *zonelist, |
2293 | struct scan_control *sc) | 2307 | struct scan_control *sc) |
@@ -2301,7 +2315,7 @@ static bool all_unreclaimable(struct zonelist *zonelist, | |||
2301 | continue; | 2315 | continue; |
2302 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2316 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2303 | continue; | 2317 | continue; |
2304 | if (!zone->all_unreclaimable) | 2318 | if (zone_reclaimable(zone)) |
2305 | return false; | 2319 | return false; |
2306 | } | 2320 | } |
2307 | 2321 | ||
@@ -2712,7 +2726,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) | |||
2712 | * DEF_PRIORITY. Effectively, it considers them balanced so | 2726 | * DEF_PRIORITY. Effectively, it considers them balanced so |
2713 | * they must be considered balanced here as well! | 2727 | * they must be considered balanced here as well! |
2714 | */ | 2728 | */ |
2715 | if (zone->all_unreclaimable) { | 2729 | if (!zone_reclaimable(zone)) { |
2716 | balanced_pages += zone->managed_pages; | 2730 | balanced_pages += zone->managed_pages; |
2717 | continue; | 2731 | continue; |
2718 | } | 2732 | } |
@@ -2773,7 +2787,6 @@ static bool kswapd_shrink_zone(struct zone *zone, | |||
2773 | unsigned long lru_pages, | 2787 | unsigned long lru_pages, |
2774 | unsigned long *nr_attempted) | 2788 | unsigned long *nr_attempted) |
2775 | { | 2789 | { |
2776 | unsigned long nr_slab; | ||
2777 | int testorder = sc->order; | 2790 | int testorder = sc->order; |
2778 | unsigned long balance_gap; | 2791 | unsigned long balance_gap; |
2779 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2792 | struct reclaim_state *reclaim_state = current->reclaim_state; |
@@ -2818,15 +2831,12 @@ static bool kswapd_shrink_zone(struct zone *zone, | |||
2818 | shrink_zone(zone, sc); | 2831 | shrink_zone(zone, sc); |
2819 | 2832 | ||
2820 | reclaim_state->reclaimed_slab = 0; | 2833 | reclaim_state->reclaimed_slab = 0; |
2821 | nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages); | 2834 | shrink_slab(&shrink, sc->nr_scanned, lru_pages); |
2822 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | 2835 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; |
2823 | 2836 | ||
2824 | /* Account for the number of pages attempted to reclaim */ | 2837 | /* Account for the number of pages attempted to reclaim */ |
2825 | *nr_attempted += sc->nr_to_reclaim; | 2838 | *nr_attempted += sc->nr_to_reclaim; |
2826 | 2839 | ||
2827 | if (nr_slab == 0 && !zone_reclaimable(zone)) | ||
2828 | zone->all_unreclaimable = 1; | ||
2829 | |||
2830 | zone_clear_flag(zone, ZONE_WRITEBACK); | 2840 | zone_clear_flag(zone, ZONE_WRITEBACK); |
2831 | 2841 | ||
2832 | /* | 2842 | /* |
@@ -2835,7 +2845,7 @@ static bool kswapd_shrink_zone(struct zone *zone, | |||
2835 | * BDIs but as pressure is relieved, speculatively avoid congestion | 2845 | * BDIs but as pressure is relieved, speculatively avoid congestion |
2836 | * waits. | 2846 | * waits. |
2837 | */ | 2847 | */ |
2838 | if (!zone->all_unreclaimable && | 2848 | if (zone_reclaimable(zone) && |
2839 | zone_balanced(zone, testorder, 0, classzone_idx)) { | 2849 | zone_balanced(zone, testorder, 0, classzone_idx)) { |
2840 | zone_clear_flag(zone, ZONE_CONGESTED); | 2850 | zone_clear_flag(zone, ZONE_CONGESTED); |
2841 | zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); | 2851 | zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); |
@@ -2901,8 +2911,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2901 | if (!populated_zone(zone)) | 2911 | if (!populated_zone(zone)) |
2902 | continue; | 2912 | continue; |
2903 | 2913 | ||
2904 | if (zone->all_unreclaimable && | 2914 | if (sc.priority != DEF_PRIORITY && |
2905 | sc.priority != DEF_PRIORITY) | 2915 | !zone_reclaimable(zone)) |
2906 | continue; | 2916 | continue; |
2907 | 2917 | ||
2908 | /* | 2918 | /* |
@@ -2980,8 +2990,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2980 | if (!populated_zone(zone)) | 2990 | if (!populated_zone(zone)) |
2981 | continue; | 2991 | continue; |
2982 | 2992 | ||
2983 | if (zone->all_unreclaimable && | 2993 | if (sc.priority != DEF_PRIORITY && |
2984 | sc.priority != DEF_PRIORITY) | 2994 | !zone_reclaimable(zone)) |
2985 | continue; | 2995 | continue; |
2986 | 2996 | ||
2987 | sc.nr_scanned = 0; | 2997 | sc.nr_scanned = 0; |
@@ -3237,7 +3247,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | |||
3237 | } | 3247 | } |
3238 | if (!waitqueue_active(&pgdat->kswapd_wait)) | 3248 | if (!waitqueue_active(&pgdat->kswapd_wait)) |
3239 | return; | 3249 | return; |
3240 | if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0)) | 3250 | if (zone_balanced(zone, order, 0, 0)) |
3241 | return; | 3251 | return; |
3242 | 3252 | ||
3243 | trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); | 3253 | trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); |
@@ -3265,20 +3275,6 @@ unsigned long global_reclaimable_pages(void) | |||
3265 | return nr; | 3275 | return nr; |
3266 | } | 3276 | } |
3267 | 3277 | ||
3268 | unsigned long zone_reclaimable_pages(struct zone *zone) | ||
3269 | { | ||
3270 | int nr; | ||
3271 | |||
3272 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | ||
3273 | zone_page_state(zone, NR_INACTIVE_FILE); | ||
3274 | |||
3275 | if (get_nr_swap_pages() > 0) | ||
3276 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | ||
3277 | zone_page_state(zone, NR_INACTIVE_ANON); | ||
3278 | |||
3279 | return nr; | ||
3280 | } | ||
3281 | |||
3282 | #ifdef CONFIG_HIBERNATION | 3278 | #ifdef CONFIG_HIBERNATION |
3283 | /* | 3279 | /* |
3284 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of | 3280 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of |
@@ -3576,7 +3572,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3576 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) | 3572 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) |
3577 | return ZONE_RECLAIM_FULL; | 3573 | return ZONE_RECLAIM_FULL; |
3578 | 3574 | ||
3579 | if (zone->all_unreclaimable) | 3575 | if (!zone_reclaimable(zone)) |
3580 | return ZONE_RECLAIM_FULL; | 3576 | return ZONE_RECLAIM_FULL; |
3581 | 3577 | ||
3582 | /* | 3578 | /* |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 20c2ef4458fa..9bb314577911 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -19,6 +19,9 @@ | |||
19 | #include <linux/math64.h> | 19 | #include <linux/math64.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/compaction.h> | 21 | #include <linux/compaction.h> |
22 | #include <linux/mm_inline.h> | ||
23 | |||
24 | #include "internal.h" | ||
22 | 25 | ||
23 | #ifdef CONFIG_VM_EVENT_COUNTERS | 26 | #ifdef CONFIG_VM_EVENT_COUNTERS |
24 | DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; | 27 | DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; |
@@ -414,12 +417,17 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |||
414 | EXPORT_SYMBOL(dec_zone_page_state); | 417 | EXPORT_SYMBOL(dec_zone_page_state); |
415 | #endif | 418 | #endif |
416 | 419 | ||
420 | static inline void fold_diff(int *diff) | ||
421 | { | ||
422 | int i; | ||
423 | |||
424 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | ||
425 | if (diff[i]) | ||
426 | atomic_long_add(diff[i], &vm_stat[i]); | ||
427 | } | ||
428 | |||
417 | /* | 429 | /* |
418 | * Update the zone counters for one cpu. | 430 | * Update the zone counters for the current cpu. |
419 | * | ||
420 | * The cpu specified must be either the current cpu or a processor that | ||
421 | * is not online. If it is the current cpu then the execution thread must | ||
422 | * be pinned to the current cpu. | ||
423 | * | 431 | * |
424 | * Note that refresh_cpu_vm_stats strives to only access | 432 | * Note that refresh_cpu_vm_stats strives to only access |
425 | * node local memory. The per cpu pagesets on remote zones are placed | 433 | * node local memory. The per cpu pagesets on remote zones are placed |
@@ -432,33 +440,29 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
432 | * with the global counters. These could cause remote node cache line | 440 | * with the global counters. These could cause remote node cache line |
433 | * bouncing and will have to be only done when necessary. | 441 | * bouncing and will have to be only done when necessary. |
434 | */ | 442 | */ |
435 | void refresh_cpu_vm_stats(int cpu) | 443 | static void refresh_cpu_vm_stats(void) |
436 | { | 444 | { |
437 | struct zone *zone; | 445 | struct zone *zone; |
438 | int i; | 446 | int i; |
439 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | 447 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; |
440 | 448 | ||
441 | for_each_populated_zone(zone) { | 449 | for_each_populated_zone(zone) { |
442 | struct per_cpu_pageset *p; | 450 | struct per_cpu_pageset __percpu *p = zone->pageset; |
443 | 451 | ||
444 | p = per_cpu_ptr(zone->pageset, cpu); | 452 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { |
453 | int v; | ||
445 | 454 | ||
446 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 455 | v = this_cpu_xchg(p->vm_stat_diff[i], 0); |
447 | if (p->vm_stat_diff[i]) { | 456 | if (v) { |
448 | unsigned long flags; | ||
449 | int v; | ||
450 | 457 | ||
451 | local_irq_save(flags); | ||
452 | v = p->vm_stat_diff[i]; | ||
453 | p->vm_stat_diff[i] = 0; | ||
454 | local_irq_restore(flags); | ||
455 | atomic_long_add(v, &zone->vm_stat[i]); | 458 | atomic_long_add(v, &zone->vm_stat[i]); |
456 | global_diff[i] += v; | 459 | global_diff[i] += v; |
457 | #ifdef CONFIG_NUMA | 460 | #ifdef CONFIG_NUMA |
458 | /* 3 seconds idle till flush */ | 461 | /* 3 seconds idle till flush */ |
459 | p->expire = 3; | 462 | __this_cpu_write(p->expire, 3); |
460 | #endif | 463 | #endif |
461 | } | 464 | } |
465 | } | ||
462 | cond_resched(); | 466 | cond_resched(); |
463 | #ifdef CONFIG_NUMA | 467 | #ifdef CONFIG_NUMA |
464 | /* | 468 | /* |
@@ -468,29 +472,57 @@ void refresh_cpu_vm_stats(int cpu) | |||
468 | * Check if there are pages remaining in this pageset | 472 | * Check if there are pages remaining in this pageset |
469 | * if not then there is nothing to expire. | 473 | * if not then there is nothing to expire. |
470 | */ | 474 | */ |
471 | if (!p->expire || !p->pcp.count) | 475 | if (!__this_cpu_read(p->expire) || |
476 | !__this_cpu_read(p->pcp.count)) | ||
472 | continue; | 477 | continue; |
473 | 478 | ||
474 | /* | 479 | /* |
475 | * We never drain zones local to this processor. | 480 | * We never drain zones local to this processor. |
476 | */ | 481 | */ |
477 | if (zone_to_nid(zone) == numa_node_id()) { | 482 | if (zone_to_nid(zone) == numa_node_id()) { |
478 | p->expire = 0; | 483 | __this_cpu_write(p->expire, 0); |
479 | continue; | 484 | continue; |
480 | } | 485 | } |
481 | 486 | ||
482 | p->expire--; | 487 | |
483 | if (p->expire) | 488 | if (__this_cpu_dec_return(p->expire)) |
484 | continue; | 489 | continue; |
485 | 490 | ||
486 | if (p->pcp.count) | 491 | if (__this_cpu_read(p->pcp.count)) |
487 | drain_zone_pages(zone, &p->pcp); | 492 | drain_zone_pages(zone, __this_cpu_ptr(&p->pcp)); |
488 | #endif | 493 | #endif |
489 | } | 494 | } |
495 | fold_diff(global_diff); | ||
496 | } | ||
490 | 497 | ||
491 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 498 | /* |
492 | if (global_diff[i]) | 499 | * Fold the data for an offline cpu into the global array. |
493 | atomic_long_add(global_diff[i], &vm_stat[i]); | 500 | * There cannot be any access by the offline cpu and therefore |
501 | * synchronization is simplified. | ||
502 | */ | ||
503 | void cpu_vm_stats_fold(int cpu) | ||
504 | { | ||
505 | struct zone *zone; | ||
506 | int i; | ||
507 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | ||
508 | |||
509 | for_each_populated_zone(zone) { | ||
510 | struct per_cpu_pageset *p; | ||
511 | |||
512 | p = per_cpu_ptr(zone->pageset, cpu); | ||
513 | |||
514 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | ||
515 | if (p->vm_stat_diff[i]) { | ||
516 | int v; | ||
517 | |||
518 | v = p->vm_stat_diff[i]; | ||
519 | p->vm_stat_diff[i] = 0; | ||
520 | atomic_long_add(v, &zone->vm_stat[i]); | ||
521 | global_diff[i] += v; | ||
522 | } | ||
523 | } | ||
524 | |||
525 | fold_diff(global_diff); | ||
494 | } | 526 | } |
495 | 527 | ||
496 | /* | 528 | /* |
@@ -703,6 +735,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, | |||
703 | const char * const vmstat_text[] = { | 735 | const char * const vmstat_text[] = { |
704 | /* Zoned VM counters */ | 736 | /* Zoned VM counters */ |
705 | "nr_free_pages", | 737 | "nr_free_pages", |
738 | "nr_alloc_batch", | ||
706 | "nr_inactive_anon", | 739 | "nr_inactive_anon", |
707 | "nr_active_anon", | 740 | "nr_active_anon", |
708 | "nr_inactive_file", | 741 | "nr_inactive_file", |
@@ -817,6 +850,12 @@ const char * const vmstat_text[] = { | |||
817 | "thp_zero_page_alloc", | 850 | "thp_zero_page_alloc", |
818 | "thp_zero_page_alloc_failed", | 851 | "thp_zero_page_alloc_failed", |
819 | #endif | 852 | #endif |
853 | #ifdef CONFIG_SMP | ||
854 | "nr_tlb_remote_flush", | ||
855 | "nr_tlb_remote_flush_received", | ||
856 | #endif | ||
857 | "nr_tlb_local_flush_all", | ||
858 | "nr_tlb_local_flush_one", | ||
820 | 859 | ||
821 | #endif /* CONFIG_VM_EVENTS_COUNTERS */ | 860 | #endif /* CONFIG_VM_EVENTS_COUNTERS */ |
822 | }; | 861 | }; |
@@ -1052,7 +1091,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
1052 | "\n all_unreclaimable: %u" | 1091 | "\n all_unreclaimable: %u" |
1053 | "\n start_pfn: %lu" | 1092 | "\n start_pfn: %lu" |
1054 | "\n inactive_ratio: %u", | 1093 | "\n inactive_ratio: %u", |
1055 | zone->all_unreclaimable, | 1094 | !zone_reclaimable(zone), |
1056 | zone->zone_start_pfn, | 1095 | zone->zone_start_pfn, |
1057 | zone->inactive_ratio); | 1096 | zone->inactive_ratio); |
1058 | seq_putc(m, '\n'); | 1097 | seq_putc(m, '\n'); |
@@ -1177,7 +1216,7 @@ int sysctl_stat_interval __read_mostly = HZ; | |||
1177 | 1216 | ||
1178 | static void vmstat_update(struct work_struct *w) | 1217 | static void vmstat_update(struct work_struct *w) |
1179 | { | 1218 | { |
1180 | refresh_cpu_vm_stats(smp_processor_id()); | 1219 | refresh_cpu_vm_stats(); |
1181 | schedule_delayed_work(&__get_cpu_var(vmstat_work), | 1220 | schedule_delayed_work(&__get_cpu_var(vmstat_work), |
1182 | round_jiffies_relative(sysctl_stat_interval)); | 1221 | round_jiffies_relative(sysctl_stat_interval)); |
1183 | } | 1222 | } |
@@ -16,7 +16,7 @@ | |||
16 | * | 16 | * |
17 | * zbud works by storing compressed pages, or "zpages", together in pairs in a | 17 | * zbud works by storing compressed pages, or "zpages", together in pairs in a |
18 | * single memory page called a "zbud page". The first buddy is "left | 18 | * single memory page called a "zbud page". The first buddy is "left |
19 | * justifed" at the beginning of the zbud page, and the last buddy is "right | 19 | * justified" at the beginning of the zbud page, and the last buddy is "right |
20 | * justified" at the end of the zbud page. The benefit is that if either | 20 | * justified" at the end of the zbud page. The benefit is that if either |
21 | * buddy is freed, the freed buddy space, coalesced with whatever slack space | 21 | * buddy is freed, the freed buddy space, coalesced with whatever slack space |
22 | * that existed between the buddies, results in the largest possible free region | 22 | * that existed between the buddies, results in the largest possible free region |
@@ -243,7 +243,7 @@ void zbud_destroy_pool(struct zbud_pool *pool) | |||
243 | * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used | 243 | * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used |
244 | * as zbud pool pages. | 244 | * as zbud pool pages. |
245 | * | 245 | * |
246 | * Return: 0 if success and handle is set, otherwise -EINVAL is the size or | 246 | * Return: 0 if success and handle is set, otherwise -EINVAL if the size or |
247 | * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate | 247 | * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate |
248 | * a new page. | 248 | * a new page. |
249 | */ | 249 | */ |
diff --git a/mm/zswap.c b/mm/zswap.c index deda2b671e12..841e35f1db22 100644 --- a/mm/zswap.c +++ b/mm/zswap.c | |||
@@ -409,7 +409,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry, | |||
409 | struct page **retpage) | 409 | struct page **retpage) |
410 | { | 410 | { |
411 | struct page *found_page, *new_page = NULL; | 411 | struct page *found_page, *new_page = NULL; |
412 | struct address_space *swapper_space = &swapper_spaces[swp_type(entry)]; | 412 | struct address_space *swapper_space = swap_address_space(entry); |
413 | int err; | 413 | int err; |
414 | 414 | ||
415 | *retpage = NULL; | 415 | *retpage = NULL; |
@@ -790,26 +790,14 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) | |||
790 | static void zswap_frontswap_invalidate_area(unsigned type) | 790 | static void zswap_frontswap_invalidate_area(unsigned type) |
791 | { | 791 | { |
792 | struct zswap_tree *tree = zswap_trees[type]; | 792 | struct zswap_tree *tree = zswap_trees[type]; |
793 | struct rb_node *node; | 793 | struct zswap_entry *entry, *n; |
794 | struct zswap_entry *entry; | ||
795 | 794 | ||
796 | if (!tree) | 795 | if (!tree) |
797 | return; | 796 | return; |
798 | 797 | ||
799 | /* walk the tree and free everything */ | 798 | /* walk the tree and free everything */ |
800 | spin_lock(&tree->lock); | 799 | spin_lock(&tree->lock); |
801 | /* | 800 | rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) { |
802 | * TODO: Even though this code should not be executed because | ||
803 | * the try_to_unuse() in swapoff should have emptied the tree, | ||
804 | * it is very wasteful to rebalance the tree after every | ||
805 | * removal when we are freeing the whole tree. | ||
806 | * | ||
807 | * If post-order traversal code is ever added to the rbtree | ||
808 | * implementation, it should be used here. | ||
809 | */ | ||
810 | while ((node = rb_first(&tree->rbroot))) { | ||
811 | entry = rb_entry(node, struct zswap_entry, rbnode); | ||
812 | rb_erase(&entry->rbnode, &tree->rbroot); | ||
813 | zbud_free(tree->pool, entry->handle); | 801 | zbud_free(tree->pool, entry->handle); |
814 | zswap_entry_cache_free(entry); | 802 | zswap_entry_cache_free(entry); |
815 | atomic_dec(&zswap_stored_pages); | 803 | atomic_dec(&zswap_stored_pages); |
diff --git a/net/socket.c b/net/socket.c index b2d7c629eeb9..0ceaa5cb9ead 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -3072,12 +3072,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, | |||
3072 | 3072 | ||
3073 | uifmap32 = &uifr32->ifr_ifru.ifru_map; | 3073 | uifmap32 = &uifr32->ifr_ifru.ifru_map; |
3074 | err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); | 3074 | err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); |
3075 | err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); | 3075 | err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); |
3076 | err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); | 3076 | err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); |
3077 | err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); | 3077 | err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); |
3078 | err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); | 3078 | err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); |
3079 | err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); | 3079 | err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); |
3080 | err |= __get_user(ifr.ifr_map.port, &uifmap32->port); | 3080 | err |= get_user(ifr.ifr_map.port, &uifmap32->port); |
3081 | if (err) | 3081 | if (err) |
3082 | return -EFAULT; | 3082 | return -EFAULT; |
3083 | 3083 | ||
@@ -3088,12 +3088,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, | |||
3088 | 3088 | ||
3089 | if (cmd == SIOCGIFMAP && !err) { | 3089 | if (cmd == SIOCGIFMAP && !err) { |
3090 | err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); | 3090 | err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); |
3091 | err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); | 3091 | err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); |
3092 | err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); | 3092 | err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); |
3093 | err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); | 3093 | err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); |
3094 | err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); | 3094 | err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); |
3095 | err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); | 3095 | err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); |
3096 | err |= __put_user(ifr.ifr_map.port, &uifmap32->port); | 3096 | err |= put_user(ifr.ifr_map.port, &uifmap32->port); |
3097 | if (err) | 3097 | if (err) |
3098 | err = -EFAULT; | 3098 | err = -EFAULT; |
3099 | } | 3099 | } |
@@ -3167,25 +3167,25 @@ static int routing_ioctl(struct net *net, struct socket *sock, | |||
3167 | struct in6_rtmsg32 __user *ur6 = argp; | 3167 | struct in6_rtmsg32 __user *ur6 = argp; |
3168 | ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), | 3168 | ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), |
3169 | 3 * sizeof(struct in6_addr)); | 3169 | 3 * sizeof(struct in6_addr)); |
3170 | ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); | 3170 | ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); |
3171 | ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); | 3171 | ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); |
3172 | ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); | 3172 | ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); |
3173 | ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); | 3173 | ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); |
3174 | ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); | 3174 | ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); |
3175 | ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); | 3175 | ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); |
3176 | ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); | 3176 | ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); |
3177 | 3177 | ||
3178 | r = (void *) &r6; | 3178 | r = (void *) &r6; |
3179 | } else { /* ipv4 */ | 3179 | } else { /* ipv4 */ |
3180 | struct rtentry32 __user *ur4 = argp; | 3180 | struct rtentry32 __user *ur4 = argp; |
3181 | ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), | 3181 | ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), |
3182 | 3 * sizeof(struct sockaddr)); | 3182 | 3 * sizeof(struct sockaddr)); |
3183 | ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); | 3183 | ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); |
3184 | ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); | 3184 | ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); |
3185 | ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); | 3185 | ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); |
3186 | ret |= __get_user(r4.rt_window, &(ur4->rt_window)); | 3186 | ret |= get_user(r4.rt_window, &(ur4->rt_window)); |
3187 | ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); | 3187 | ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); |
3188 | ret |= __get_user(rtdev, &(ur4->rt_dev)); | 3188 | ret |= get_user(rtdev, &(ur4->rt_dev)); |
3189 | if (rtdev) { | 3189 | if (rtdev) { |
3190 | ret |= copy_from_user(devname, compat_ptr(rtdev), 15); | 3190 | ret |= copy_from_user(devname, compat_ptr(rtdev), 15); |
3191 | r4.rt_dev = (char __user __force *)devname; | 3191 | r4.rt_dev = (char __user __force *)devname; |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2ee9eb750560..47016c304c84 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -31,12 +31,16 @@ my $show_types = 0; | |||
31 | my $fix = 0; | 31 | my $fix = 0; |
32 | my $root; | 32 | my $root; |
33 | my %debug; | 33 | my %debug; |
34 | my %ignore_type = (); | ||
35 | my %camelcase = (); | 34 | my %camelcase = (); |
35 | my %use_type = (); | ||
36 | my @use = (); | ||
37 | my %ignore_type = (); | ||
36 | my @ignore = (); | 38 | my @ignore = (); |
37 | my $help = 0; | 39 | my $help = 0; |
38 | my $configuration_file = ".checkpatch.conf"; | 40 | my $configuration_file = ".checkpatch.conf"; |
39 | my $max_line_length = 80; | 41 | my $max_line_length = 80; |
42 | my $ignore_perl_version = 0; | ||
43 | my $minimum_perl_version = 5.10.0; | ||
40 | 44 | ||
41 | sub help { | 45 | sub help { |
42 | my ($exitcode) = @_; | 46 | my ($exitcode) = @_; |
@@ -54,6 +58,7 @@ Options: | |||
54 | --terse one line per report | 58 | --terse one line per report |
55 | -f, --file treat FILE as regular source file | 59 | -f, --file treat FILE as regular source file |
56 | --subjective, --strict enable more subjective tests | 60 | --subjective, --strict enable more subjective tests |
61 | --types TYPE(,TYPE2...) show only these comma separated message types | ||
57 | --ignore TYPE(,TYPE2...) ignore various comma separated message types | 62 | --ignore TYPE(,TYPE2...) ignore various comma separated message types |
58 | --max-line-length=n set the maximum line length, if exceeded, warn | 63 | --max-line-length=n set the maximum line length, if exceeded, warn |
59 | --show-types show the message "types" in the output | 64 | --show-types show the message "types" in the output |
@@ -71,6 +76,8 @@ Options: | |||
71 | "<inputfile>.EXPERIMENTAL-checkpatch-fixes" | 76 | "<inputfile>.EXPERIMENTAL-checkpatch-fixes" |
72 | with potential errors corrected to the preferred | 77 | with potential errors corrected to the preferred |
73 | checkpatch style | 78 | checkpatch style |
79 | --ignore-perl-version override checking of perl version. expect | ||
80 | runtime errors. | ||
74 | -h, --help, --version display this help and exit | 81 | -h, --help, --version display this help and exit |
75 | 82 | ||
76 | When FILE is - read standard input. | 83 | When FILE is - read standard input. |
@@ -116,6 +123,7 @@ GetOptions( | |||
116 | 'subjective!' => \$check, | 123 | 'subjective!' => \$check, |
117 | 'strict!' => \$check, | 124 | 'strict!' => \$check, |
118 | 'ignore=s' => \@ignore, | 125 | 'ignore=s' => \@ignore, |
126 | 'types=s' => \@use, | ||
119 | 'show-types!' => \$show_types, | 127 | 'show-types!' => \$show_types, |
120 | 'max-line-length=i' => \$max_line_length, | 128 | 'max-line-length=i' => \$max_line_length, |
121 | 'root=s' => \$root, | 129 | 'root=s' => \$root, |
@@ -123,6 +131,7 @@ GetOptions( | |||
123 | 'mailback!' => \$mailback, | 131 | 'mailback!' => \$mailback, |
124 | 'summary-file!' => \$summary_file, | 132 | 'summary-file!' => \$summary_file, |
125 | 'fix!' => \$fix, | 133 | 'fix!' => \$fix, |
134 | 'ignore-perl-version!' => \$ignore_perl_version, | ||
126 | 'debug=s' => \%debug, | 135 | 'debug=s' => \%debug, |
127 | 'test-only=s' => \$tst_only, | 136 | 'test-only=s' => \$tst_only, |
128 | 'h|help' => \$help, | 137 | 'h|help' => \$help, |
@@ -133,24 +142,50 @@ help(0) if ($help); | |||
133 | 142 | ||
134 | my $exit = 0; | 143 | my $exit = 0; |
135 | 144 | ||
145 | if ($^V && $^V lt $minimum_perl_version) { | ||
146 | printf "$P: requires at least perl version %vd\n", $minimum_perl_version; | ||
147 | if (!$ignore_perl_version) { | ||
148 | exit(1); | ||
149 | } | ||
150 | } | ||
151 | |||
136 | if ($#ARGV < 0) { | 152 | if ($#ARGV < 0) { |
137 | print "$P: no input files\n"; | 153 | print "$P: no input files\n"; |
138 | exit(1); | 154 | exit(1); |
139 | } | 155 | } |
140 | 156 | ||
141 | @ignore = split(/,/, join(',',@ignore)); | 157 | sub hash_save_array_words { |
142 | foreach my $word (@ignore) { | 158 | my ($hashRef, $arrayRef) = @_; |
143 | $word =~ s/\s*\n?$//g; | 159 | |
144 | $word =~ s/^\s*//g; | 160 | my @array = split(/,/, join(',', @$arrayRef)); |
145 | $word =~ s/\s+/ /g; | 161 | foreach my $word (@array) { |
146 | $word =~ tr/[a-z]/[A-Z]/; | 162 | $word =~ s/\s*\n?$//g; |
163 | $word =~ s/^\s*//g; | ||
164 | $word =~ s/\s+/ /g; | ||
165 | $word =~ tr/[a-z]/[A-Z]/; | ||
166 | |||
167 | next if ($word =~ m/^\s*#/); | ||
168 | next if ($word =~ m/^\s*$/); | ||
147 | 169 | ||
148 | next if ($word =~ m/^\s*#/); | 170 | $hashRef->{$word}++; |
149 | next if ($word =~ m/^\s*$/); | 171 | } |
172 | } | ||
150 | 173 | ||
151 | $ignore_type{$word}++; | 174 | sub hash_show_words { |
175 | my ($hashRef, $prefix) = @_; | ||
176 | |||
177 | if ($quiet == 0 && keys %$hashRef) { | ||
178 | print "NOTE: $prefix message types:"; | ||
179 | foreach my $word (sort keys %$hashRef) { | ||
180 | print " $word"; | ||
181 | } | ||
182 | print "\n\n"; | ||
183 | } | ||
152 | } | 184 | } |
153 | 185 | ||
186 | hash_save_array_words(\%ignore_type, \@ignore); | ||
187 | hash_save_array_words(\%use_type, \@use); | ||
188 | |||
154 | my $dbg_values = 0; | 189 | my $dbg_values = 0; |
155 | my $dbg_possible = 0; | 190 | my $dbg_possible = 0; |
156 | my $dbg_type = 0; | 191 | my $dbg_type = 0; |
@@ -207,6 +242,8 @@ our $Sparse = qr{ | |||
207 | __rcu | 242 | __rcu |
208 | }x; | 243 | }x; |
209 | 244 | ||
245 | our $InitAttribute = qr{__(?:mem|cpu|dev|net_|)(?:initdata|initconst|init\b)}; | ||
246 | |||
210 | # Notes to $Attribute: | 247 | # Notes to $Attribute: |
211 | # We need \b after 'init' otherwise 'initconst' will cause a false positive in a check | 248 | # We need \b after 'init' otherwise 'initconst' will cause a false positive in a check |
212 | our $Attribute = qr{ | 249 | our $Attribute = qr{ |
@@ -227,7 +264,7 @@ our $Attribute = qr{ | |||
227 | __deprecated| | 264 | __deprecated| |
228 | __read_mostly| | 265 | __read_mostly| |
229 | __kprobes| | 266 | __kprobes| |
230 | __(?:mem|cpu|dev|)(?:initdata|initconst|init\b)| | 267 | $InitAttribute| |
231 | ____cacheline_aligned| | 268 | ____cacheline_aligned| |
232 | ____cacheline_aligned_in_smp| | 269 | ____cacheline_aligned_in_smp| |
233 | ____cacheline_internodealigned_in_smp| | 270 | ____cacheline_internodealigned_in_smp| |
@@ -257,6 +294,7 @@ our $Operators = qr{ | |||
257 | }x; | 294 | }x; |
258 | 295 | ||
259 | our $NonptrType; | 296 | our $NonptrType; |
297 | our $NonptrTypeWithAttr; | ||
260 | our $Type; | 298 | our $Type; |
261 | our $Declare; | 299 | our $Declare; |
262 | 300 | ||
@@ -319,6 +357,12 @@ our @typeList = ( | |||
319 | qr{${Ident}_handler}, | 357 | qr{${Ident}_handler}, |
320 | qr{${Ident}_handler_fn}, | 358 | qr{${Ident}_handler_fn}, |
321 | ); | 359 | ); |
360 | our @typeListWithAttr = ( | ||
361 | @typeList, | ||
362 | qr{struct\s+$InitAttribute\s+$Ident}, | ||
363 | qr{union\s+$InitAttribute\s+$Ident}, | ||
364 | ); | ||
365 | |||
322 | our @modifierList = ( | 366 | our @modifierList = ( |
323 | qr{fastcall}, | 367 | qr{fastcall}, |
324 | ); | 368 | ); |
@@ -332,6 +376,7 @@ our $allowed_asm_includes = qr{(?x: | |||
332 | sub build_types { | 376 | sub build_types { |
333 | my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; | 377 | my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; |
334 | my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; | 378 | my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; |
379 | my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; | ||
335 | $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; | 380 | $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; |
336 | $NonptrType = qr{ | 381 | $NonptrType = qr{ |
337 | (?:$Modifier\s+|const\s+)* | 382 | (?:$Modifier\s+|const\s+)* |
@@ -342,6 +387,15 @@ sub build_types { | |||
342 | ) | 387 | ) |
343 | (?:\s+$Modifier|\s+const)* | 388 | (?:\s+$Modifier|\s+const)* |
344 | }x; | 389 | }x; |
390 | $NonptrTypeWithAttr = qr{ | ||
391 | (?:$Modifier\s+|const\s+)* | ||
392 | (?: | ||
393 | (?:typeof|__typeof__)\s*\([^\)]*\)| | ||
394 | (?:$typeTypedefs\b)| | ||
395 | (?:${allWithAttr}\b) | ||
396 | ) | ||
397 | (?:\s+$Modifier|\s+const)* | ||
398 | }x; | ||
345 | $Type = qr{ | 399 | $Type = qr{ |
346 | $NonptrType | 400 | $NonptrType |
347 | (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)? | 401 | (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)? |
@@ -1355,7 +1409,9 @@ sub possible { | |||
1355 | my $prefix = ''; | 1409 | my $prefix = ''; |
1356 | 1410 | ||
1357 | sub show_type { | 1411 | sub show_type { |
1358 | return !defined $ignore_type{$_[0]}; | 1412 | return defined $use_type{$_[0]} if (scalar keys %use_type > 0); |
1413 | |||
1414 | return !defined $ignore_type{$_[0]}; | ||
1359 | } | 1415 | } |
1360 | 1416 | ||
1361 | sub report { | 1417 | sub report { |
@@ -1435,7 +1491,23 @@ sub check_absolute_file { | |||
1435 | sub trim { | 1491 | sub trim { |
1436 | my ($string) = @_; | 1492 | my ($string) = @_; |
1437 | 1493 | ||
1438 | $string =~ s/(^\s+|\s+$)//g; | 1494 | $string =~ s/^\s+|\s+$//g; |
1495 | |||
1496 | return $string; | ||
1497 | } | ||
1498 | |||
1499 | sub ltrim { | ||
1500 | my ($string) = @_; | ||
1501 | |||
1502 | $string =~ s/^\s+//; | ||
1503 | |||
1504 | return $string; | ||
1505 | } | ||
1506 | |||
1507 | sub rtrim { | ||
1508 | my ($string) = @_; | ||
1509 | |||
1510 | $string =~ s/\s+$//; | ||
1439 | 1511 | ||
1440 | return $string; | 1512 | return $string; |
1441 | } | 1513 | } |
@@ -1532,6 +1604,7 @@ sub process { | |||
1532 | my %suppress_export; | 1604 | my %suppress_export; |
1533 | my $suppress_statement = 0; | 1605 | my $suppress_statement = 0; |
1534 | 1606 | ||
1607 | my %signatures = (); | ||
1535 | 1608 | ||
1536 | # Pre-scan the patch sanitizing the lines. | 1609 | # Pre-scan the patch sanitizing the lines. |
1537 | # Pre-scan the patch looking for any __setup documentation. | 1610 | # Pre-scan the patch looking for any __setup documentation. |
@@ -1624,6 +1697,8 @@ sub process { | |||
1624 | $linenr = 0; | 1697 | $linenr = 0; |
1625 | foreach my $line (@lines) { | 1698 | foreach my $line (@lines) { |
1626 | $linenr++; | 1699 | $linenr++; |
1700 | my $sline = $line; #copy of $line | ||
1701 | $sline =~ s/$;/ /g; #with comments as spaces | ||
1627 | 1702 | ||
1628 | my $rawline = $rawlines[$linenr - 1]; | 1703 | my $rawline = $rawlines[$linenr - 1]; |
1629 | 1704 | ||
@@ -1781,6 +1856,17 @@ sub process { | |||
1781 | "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); | 1856 | "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); |
1782 | } | 1857 | } |
1783 | } | 1858 | } |
1859 | |||
1860 | # Check for duplicate signatures | ||
1861 | my $sig_nospace = $line; | ||
1862 | $sig_nospace =~ s/\s//g; | ||
1863 | $sig_nospace = lc($sig_nospace); | ||
1864 | if (defined $signatures{$sig_nospace}) { | ||
1865 | WARN("BAD_SIGN_OFF", | ||
1866 | "Duplicate signature\n" . $herecurr); | ||
1867 | } else { | ||
1868 | $signatures{$sig_nospace} = 1; | ||
1869 | } | ||
1784 | } | 1870 | } |
1785 | 1871 | ||
1786 | # Check for wrappage within a valid hunk of the file | 1872 | # Check for wrappage within a valid hunk of the file |
@@ -1845,15 +1931,17 @@ sub process { | |||
1845 | #trailing whitespace | 1931 | #trailing whitespace |
1846 | if ($line =~ /^\+.*\015/) { | 1932 | if ($line =~ /^\+.*\015/) { |
1847 | my $herevet = "$here\n" . cat_vet($rawline) . "\n"; | 1933 | my $herevet = "$here\n" . cat_vet($rawline) . "\n"; |
1848 | ERROR("DOS_LINE_ENDINGS", | 1934 | if (ERROR("DOS_LINE_ENDINGS", |
1849 | "DOS line endings\n" . $herevet); | 1935 | "DOS line endings\n" . $herevet) && |
1850 | 1936 | $fix) { | |
1937 | $fixed[$linenr - 1] =~ s/[\s\015]+$//; | ||
1938 | } | ||
1851 | } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { | 1939 | } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { |
1852 | my $herevet = "$here\n" . cat_vet($rawline) . "\n"; | 1940 | my $herevet = "$here\n" . cat_vet($rawline) . "\n"; |
1853 | if (ERROR("TRAILING_WHITESPACE", | 1941 | if (ERROR("TRAILING_WHITESPACE", |
1854 | "trailing whitespace\n" . $herevet) && | 1942 | "trailing whitespace\n" . $herevet) && |
1855 | $fix) { | 1943 | $fix) { |
1856 | $fixed[$linenr - 1] =~ s/^(\+.*?)\s+$/$1/; | 1944 | $fixed[$linenr - 1] =~ s/\s+$//; |
1857 | } | 1945 | } |
1858 | 1946 | ||
1859 | $rpt_cleaners = 1; | 1947 | $rpt_cleaners = 1; |
@@ -2060,6 +2148,7 @@ sub process { | |||
2060 | if ($realfile =~ m@^(drivers/net/|net/)@ && | 2148 | if ($realfile =~ m@^(drivers/net/|net/)@ && |
2061 | $prevrawline =~ /^\+[ \t]*\/\*/ && #starting /* | 2149 | $prevrawline =~ /^\+[ \t]*\/\*/ && #starting /* |
2062 | $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ | 2150 | $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ |
2151 | $rawline =~ /^\+/ && #line is new | ||
2063 | $rawline !~ /^\+[ \t]*\*/) { #no leading * | 2152 | $rawline !~ /^\+[ \t]*\*/) { #no leading * |
2064 | WARN("NETWORKING_BLOCK_COMMENT_STYLE", | 2153 | WARN("NETWORKING_BLOCK_COMMENT_STYLE", |
2065 | "networking block comments start with * on subsequent lines\n" . $hereprev); | 2154 | "networking block comments start with * on subsequent lines\n" . $hereprev); |
@@ -2126,7 +2215,7 @@ sub process { | |||
2126 | $realline_next); | 2215 | $realline_next); |
2127 | #print "LINE<$line>\n"; | 2216 | #print "LINE<$line>\n"; |
2128 | if ($linenr >= $suppress_statement && | 2217 | if ($linenr >= $suppress_statement && |
2129 | $realcnt && $line =~ /.\s*\S/) { | 2218 | $realcnt && $sline =~ /.\s*\S/) { |
2130 | ($stat, $cond, $line_nr_next, $remain_next, $off_next) = | 2219 | ($stat, $cond, $line_nr_next, $remain_next, $off_next) = |
2131 | ctx_statement_block($linenr, $realcnt, 0); | 2220 | ctx_statement_block($linenr, $realcnt, 0); |
2132 | $stat =~ s/\n./\n /g; | 2221 | $stat =~ s/\n./\n /g; |
@@ -2486,16 +2575,22 @@ sub process { | |||
2486 | } | 2575 | } |
2487 | 2576 | ||
2488 | # check for global initialisers. | 2577 | # check for global initialisers. |
2489 | if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) { | 2578 | if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) { |
2490 | ERROR("GLOBAL_INITIALISERS", | 2579 | if (ERROR("GLOBAL_INITIALISERS", |
2491 | "do not initialise globals to 0 or NULL\n" . | 2580 | "do not initialise globals to 0 or NULL\n" . |
2492 | $herecurr); | 2581 | $herecurr) && |
2582 | $fix) { | ||
2583 | $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; | ||
2584 | } | ||
2493 | } | 2585 | } |
2494 | # check for static initialisers. | 2586 | # check for static initialisers. |
2495 | if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) { | 2587 | if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) { |
2496 | ERROR("INITIALISED_STATIC", | 2588 | if (ERROR("INITIALISED_STATIC", |
2497 | "do not initialise statics to 0 or NULL\n" . | 2589 | "do not initialise statics to 0 or NULL\n" . |
2498 | $herecurr); | 2590 | $herecurr) && |
2591 | $fix) { | ||
2592 | $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/; | ||
2593 | } | ||
2499 | } | 2594 | } |
2500 | 2595 | ||
2501 | # check for static const char * arrays. | 2596 | # check for static const char * arrays. |
@@ -2638,8 +2733,12 @@ sub process { | |||
2638 | } | 2733 | } |
2639 | 2734 | ||
2640 | if ($line =~ /\bpr_warning\s*\(/) { | 2735 | if ($line =~ /\bpr_warning\s*\(/) { |
2641 | WARN("PREFER_PR_LEVEL", | 2736 | if (WARN("PREFER_PR_LEVEL", |
2642 | "Prefer pr_warn(... to pr_warning(...\n" . $herecurr); | 2737 | "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) && |
2738 | $fix) { | ||
2739 | $fixed[$linenr - 1] =~ | ||
2740 | s/\bpr_warning\b/pr_warn/; | ||
2741 | } | ||
2643 | } | 2742 | } |
2644 | 2743 | ||
2645 | if ($line =~ /\bdev_printk\s*\(\s*KERN_([A-Z]+)/) { | 2744 | if ($line =~ /\bdev_printk\s*\(\s*KERN_([A-Z]+)/) { |
@@ -2759,6 +2858,7 @@ sub process { | |||
2759 | $off = 0; | 2858 | $off = 0; |
2760 | 2859 | ||
2761 | my $blank = copy_spacing($opline); | 2860 | my $blank = copy_spacing($opline); |
2861 | my $last_after = -1; | ||
2762 | 2862 | ||
2763 | for (my $n = 0; $n < $#elements; $n += 2) { | 2863 | for (my $n = 0; $n < $#elements; $n += 2) { |
2764 | 2864 | ||
@@ -2824,7 +2924,7 @@ sub process { | |||
2824 | $cc !~ /^\\/ && $cc !~ /^;/) { | 2924 | $cc !~ /^\\/ && $cc !~ /^;/) { |
2825 | if (ERROR("SPACING", | 2925 | if (ERROR("SPACING", |
2826 | "space required after that '$op' $at\n" . $hereptr)) { | 2926 | "space required after that '$op' $at\n" . $hereptr)) { |
2827 | $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; | 2927 | $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " "; |
2828 | $line_fixed = 1; | 2928 | $line_fixed = 1; |
2829 | } | 2929 | } |
2830 | } | 2930 | } |
@@ -2839,11 +2939,11 @@ sub process { | |||
2839 | if ($ctx =~ /Wx.|.xW/) { | 2939 | if ($ctx =~ /Wx.|.xW/) { |
2840 | if (ERROR("SPACING", | 2940 | if (ERROR("SPACING", |
2841 | "spaces prohibited around that '$op' $at\n" . $hereptr)) { | 2941 | "spaces prohibited around that '$op' $at\n" . $hereptr)) { |
2842 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); | 2942 | $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); |
2843 | $line_fixed = 1; | ||
2844 | if (defined $fix_elements[$n + 2]) { | 2943 | if (defined $fix_elements[$n + 2]) { |
2845 | $fix_elements[$n + 2] =~ s/^\s+//; | 2944 | $fix_elements[$n + 2] =~ s/^\s+//; |
2846 | } | 2945 | } |
2946 | $line_fixed = 1; | ||
2847 | } | 2947 | } |
2848 | } | 2948 | } |
2849 | 2949 | ||
@@ -2852,8 +2952,9 @@ sub process { | |||
2852 | if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { | 2952 | if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { |
2853 | if (ERROR("SPACING", | 2953 | if (ERROR("SPACING", |
2854 | "space required after that '$op' $at\n" . $hereptr)) { | 2954 | "space required after that '$op' $at\n" . $hereptr)) { |
2855 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " "; | 2955 | $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " "; |
2856 | $line_fixed = 1; | 2956 | $line_fixed = 1; |
2957 | $last_after = $n; | ||
2857 | } | 2958 | } |
2858 | } | 2959 | } |
2859 | 2960 | ||
@@ -2870,8 +2971,10 @@ sub process { | |||
2870 | if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { | 2971 | if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { |
2871 | if (ERROR("SPACING", | 2972 | if (ERROR("SPACING", |
2872 | "space required before that '$op' $at\n" . $hereptr)) { | 2973 | "space required before that '$op' $at\n" . $hereptr)) { |
2873 | $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]); | 2974 | if ($n != $last_after + 2) { |
2874 | $line_fixed = 1; | 2975 | $good = $fix_elements[$n] . " " . ltrim($fix_elements[$n + 1]); |
2976 | $line_fixed = 1; | ||
2977 | } | ||
2875 | } | 2978 | } |
2876 | } | 2979 | } |
2877 | if ($op eq '*' && $cc =~/\s*$Modifier\b/) { | 2980 | if ($op eq '*' && $cc =~/\s*$Modifier\b/) { |
@@ -2880,12 +2983,11 @@ sub process { | |||
2880 | } elsif ($ctx =~ /.xW/) { | 2983 | } elsif ($ctx =~ /.xW/) { |
2881 | if (ERROR("SPACING", | 2984 | if (ERROR("SPACING", |
2882 | "space prohibited after that '$op' $at\n" . $hereptr)) { | 2985 | "space prohibited after that '$op' $at\n" . $hereptr)) { |
2883 | $fixed_line =~ s/\s+$//; | 2986 | $good = $fix_elements[$n] . rtrim($fix_elements[$n + 1]); |
2884 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); | ||
2885 | $line_fixed = 1; | ||
2886 | if (defined $fix_elements[$n + 2]) { | 2987 | if (defined $fix_elements[$n + 2]) { |
2887 | $fix_elements[$n + 2] =~ s/^\s+//; | 2988 | $fix_elements[$n + 2] =~ s/^\s+//; |
2888 | } | 2989 | } |
2990 | $line_fixed = 1; | ||
2889 | } | 2991 | } |
2890 | } | 2992 | } |
2891 | 2993 | ||
@@ -2894,8 +2996,7 @@ sub process { | |||
2894 | if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { | 2996 | if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { |
2895 | if (ERROR("SPACING", | 2997 | if (ERROR("SPACING", |
2896 | "space required one side of that '$op' $at\n" . $hereptr)) { | 2998 | "space required one side of that '$op' $at\n" . $hereptr)) { |
2897 | $fixed_line =~ s/\s+$//; | 2999 | $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " "; |
2898 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " "; | ||
2899 | $line_fixed = 1; | 3000 | $line_fixed = 1; |
2900 | } | 3001 | } |
2901 | } | 3002 | } |
@@ -2903,20 +3004,18 @@ sub process { | |||
2903 | ($ctx =~ /Wx./ && $cc =~ /^;/)) { | 3004 | ($ctx =~ /Wx./ && $cc =~ /^;/)) { |
2904 | if (ERROR("SPACING", | 3005 | if (ERROR("SPACING", |
2905 | "space prohibited before that '$op' $at\n" . $hereptr)) { | 3006 | "space prohibited before that '$op' $at\n" . $hereptr)) { |
2906 | $fixed_line =~ s/\s+$//; | 3007 | $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); |
2907 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); | ||
2908 | $line_fixed = 1; | 3008 | $line_fixed = 1; |
2909 | } | 3009 | } |
2910 | } | 3010 | } |
2911 | if ($ctx =~ /ExW/) { | 3011 | if ($ctx =~ /ExW/) { |
2912 | if (ERROR("SPACING", | 3012 | if (ERROR("SPACING", |
2913 | "space prohibited after that '$op' $at\n" . $hereptr)) { | 3013 | "space prohibited after that '$op' $at\n" . $hereptr)) { |
2914 | $fixed_line =~ s/\s+$//; | 3014 | $good = $fix_elements[$n] . trim($fix_elements[$n + 1]); |
2915 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); | ||
2916 | $line_fixed = 1; | ||
2917 | if (defined $fix_elements[$n + 2]) { | 3015 | if (defined $fix_elements[$n + 2]) { |
2918 | $fix_elements[$n + 2] =~ s/^\s+//; | 3016 | $fix_elements[$n + 2] =~ s/^\s+//; |
2919 | } | 3017 | } |
3018 | $line_fixed = 1; | ||
2920 | } | 3019 | } |
2921 | } | 3020 | } |
2922 | 3021 | ||
@@ -2930,8 +3029,10 @@ sub process { | |||
2930 | if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { | 3029 | if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { |
2931 | if (ERROR("SPACING", | 3030 | if (ERROR("SPACING", |
2932 | "need consistent spacing around '$op' $at\n" . $hereptr)) { | 3031 | "need consistent spacing around '$op' $at\n" . $hereptr)) { |
2933 | $fixed_line =~ s/\s+$//; | 3032 | $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; |
2934 | $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; | 3033 | if (defined $fix_elements[$n + 2]) { |
3034 | $fix_elements[$n + 2] =~ s/^\s+//; | ||
3035 | } | ||
2935 | $line_fixed = 1; | 3036 | $line_fixed = 1; |
2936 | } | 3037 | } |
2937 | } | 3038 | } |
@@ -2942,7 +3043,7 @@ sub process { | |||
2942 | if ($ctx =~ /Wx./) { | 3043 | if ($ctx =~ /Wx./) { |
2943 | if (ERROR("SPACING", | 3044 | if (ERROR("SPACING", |
2944 | "space prohibited before that '$op' $at\n" . $hereptr)) { | 3045 | "space prohibited before that '$op' $at\n" . $hereptr)) { |
2945 | $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); | 3046 | $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]); |
2946 | $line_fixed = 1; | 3047 | $line_fixed = 1; |
2947 | } | 3048 | } |
2948 | } | 3049 | } |
@@ -2969,8 +3070,10 @@ sub process { | |||
2969 | if ($ok == 0) { | 3070 | if ($ok == 0) { |
2970 | if (ERROR("SPACING", | 3071 | if (ERROR("SPACING", |
2971 | "spaces required around that '$op' $at\n" . $hereptr)) { | 3072 | "spaces required around that '$op' $at\n" . $hereptr)) { |
2972 | $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; | 3073 | $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; |
2973 | $good = $fix_elements[$n] . " " . trim($fix_elements[$n + 1]) . " "; | 3074 | if (defined $fix_elements[$n + 2]) { |
3075 | $fix_elements[$n + 2] =~ s/^\s+//; | ||
3076 | } | ||
2974 | $line_fixed = 1; | 3077 | $line_fixed = 1; |
2975 | } | 3078 | } |
2976 | } | 3079 | } |
@@ -3031,8 +3134,7 @@ sub process { | |||
3031 | if (ERROR("SPACING", | 3134 | if (ERROR("SPACING", |
3032 | "space required before the open brace '{'\n" . $herecurr) && | 3135 | "space required before the open brace '{'\n" . $herecurr) && |
3033 | $fix) { | 3136 | $fix) { |
3034 | $fixed[$linenr - 1] =~ | 3137 | $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/; |
3035 | s/^(\+.*(?:do|\))){/$1 {/; | ||
3036 | } | 3138 | } |
3037 | } | 3139 | } |
3038 | 3140 | ||
@@ -3047,8 +3149,12 @@ sub process { | |||
3047 | # closing brace should have a space following it when it has anything | 3149 | # closing brace should have a space following it when it has anything |
3048 | # on the line | 3150 | # on the line |
3049 | if ($line =~ /}(?!(?:,|;|\)))\S/) { | 3151 | if ($line =~ /}(?!(?:,|;|\)))\S/) { |
3050 | ERROR("SPACING", | 3152 | if (ERROR("SPACING", |
3051 | "space required after that close brace '}'\n" . $herecurr); | 3153 | "space required after that close brace '}'\n" . $herecurr) && |
3154 | $fix) { | ||
3155 | $fixed[$linenr - 1] =~ | ||
3156 | s/}((?!(?:,|;|\)))\S)/} $1/; | ||
3157 | } | ||
3052 | } | 3158 | } |
3053 | 3159 | ||
3054 | # check spacing on square brackets | 3160 | # check spacing on square brackets |
@@ -3271,8 +3377,13 @@ sub process { | |||
3271 | 3377 | ||
3272 | #gcc binary extension | 3378 | #gcc binary extension |
3273 | if ($var =~ /^$Binary$/) { | 3379 | if ($var =~ /^$Binary$/) { |
3274 | WARN("GCC_BINARY_CONSTANT", | 3380 | if (WARN("GCC_BINARY_CONSTANT", |
3275 | "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr); | 3381 | "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) && |
3382 | $fix) { | ||
3383 | my $hexval = sprintf("0x%x", oct($var)); | ||
3384 | $fixed[$linenr - 1] =~ | ||
3385 | s/\b$var\b/$hexval/; | ||
3386 | } | ||
3276 | } | 3387 | } |
3277 | 3388 | ||
3278 | #CamelCase | 3389 | #CamelCase |
@@ -3282,19 +3393,26 @@ sub process { | |||
3282 | $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && | 3393 | $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && |
3283 | #Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) | 3394 | #Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) |
3284 | $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) { | 3395 | $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) { |
3285 | seed_camelcase_includes() if ($check); | 3396 | while ($var =~ m{($Ident)}g) { |
3286 | if (!defined $camelcase{$var}) { | 3397 | my $word = $1; |
3287 | $camelcase{$var} = 1; | 3398 | next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/); |
3288 | CHK("CAMELCASE", | 3399 | seed_camelcase_includes() if ($check); |
3289 | "Avoid CamelCase: <$var>\n" . $herecurr); | 3400 | if (!defined $camelcase{$word}) { |
3401 | $camelcase{$word} = 1; | ||
3402 | CHK("CAMELCASE", | ||
3403 | "Avoid CamelCase: <$word>\n" . $herecurr); | ||
3404 | } | ||
3290 | } | 3405 | } |
3291 | } | 3406 | } |
3292 | } | 3407 | } |
3293 | 3408 | ||
3294 | #no spaces allowed after \ in define | 3409 | #no spaces allowed after \ in define |
3295 | if ($line=~/\#\s*define.*\\\s$/) { | 3410 | if ($line =~ /\#\s*define.*\\\s+$/) { |
3296 | WARN("WHITESPACE_AFTER_LINE_CONTINUATION", | 3411 | if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION", |
3297 | "Whitepspace after \\ makes next lines useless\n" . $herecurr); | 3412 | "Whitespace after \\ makes next lines useless\n" . $herecurr) && |
3413 | $fix) { | ||
3414 | $fixed[$linenr - 1] =~ s/\s+$//; | ||
3415 | } | ||
3298 | } | 3416 | } |
3299 | 3417 | ||
3300 | #warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line) | 3418 | #warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line) |
@@ -3374,7 +3492,8 @@ sub process { | |||
3374 | $dstat !~ /^for\s*$Constant$/ && # for (...) | 3492 | $dstat !~ /^for\s*$Constant$/ && # for (...) |
3375 | $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() | 3493 | $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() |
3376 | $dstat !~ /^do\s*{/ && # do {... | 3494 | $dstat !~ /^do\s*{/ && # do {... |
3377 | $dstat !~ /^\({/) # ({... | 3495 | $dstat !~ /^\({/ && # ({... |
3496 | $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/) | ||
3378 | { | 3497 | { |
3379 | $ctx =~ s/\n*$//; | 3498 | $ctx =~ s/\n*$//; |
3380 | my $herectx = $here . "\n"; | 3499 | my $herectx = $here . "\n"; |
@@ -3606,6 +3725,32 @@ sub process { | |||
3606 | } | 3725 | } |
3607 | } | 3726 | } |
3608 | 3727 | ||
3728 | sub string_find_replace { | ||
3729 | my ($string, $find, $replace) = @_; | ||
3730 | |||
3731 | $string =~ s/$find/$replace/g; | ||
3732 | |||
3733 | return $string; | ||
3734 | } | ||
3735 | |||
3736 | # check for bad placement of section $InitAttribute (e.g.: __initdata) | ||
3737 | if ($line =~ /(\b$InitAttribute\b)/) { | ||
3738 | my $attr = $1; | ||
3739 | if ($line =~ /^\+\s*static\s+(?:const\s+)?(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*[=;]/) { | ||
3740 | my $ptr = $1; | ||
3741 | my $var = $2; | ||
3742 | if ((($ptr =~ /\b(union|struct)\s+$attr\b/ && | ||
3743 | ERROR("MISPLACED_INIT", | ||
3744 | "$attr should be placed after $var\n" . $herecurr)) || | ||
3745 | ($ptr !~ /\b(union|struct)\s+$attr\b/ && | ||
3746 | WARN("MISPLACED_INIT", | ||
3747 | "$attr should be placed after $var\n" . $herecurr))) && | ||
3748 | $fix) { | ||
3749 | $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e; | ||
3750 | } | ||
3751 | } | ||
3752 | } | ||
3753 | |||
3609 | # prefer usleep_range over udelay | 3754 | # prefer usleep_range over udelay |
3610 | if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) { | 3755 | if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) { |
3611 | # ignore udelay's < 10, however | 3756 | # ignore udelay's < 10, however |
@@ -3691,8 +3836,12 @@ sub process { | |||
3691 | 3836 | ||
3692 | # Check for __inline__ and __inline, prefer inline | 3837 | # Check for __inline__ and __inline, prefer inline |
3693 | if ($line =~ /\b(__inline__|__inline)\b/) { | 3838 | if ($line =~ /\b(__inline__|__inline)\b/) { |
3694 | WARN("INLINE", | 3839 | if (WARN("INLINE", |
3695 | "plain inline is preferred over $1\n" . $herecurr); | 3840 | "plain inline is preferred over $1\n" . $herecurr) && |
3841 | $fix) { | ||
3842 | $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/; | ||
3843 | |||
3844 | } | ||
3696 | } | 3845 | } |
3697 | 3846 | ||
3698 | # Check for __attribute__ packed, prefer __packed | 3847 | # Check for __attribute__ packed, prefer __packed |
@@ -3709,14 +3858,21 @@ sub process { | |||
3709 | 3858 | ||
3710 | # Check for __attribute__ format(printf, prefer __printf | 3859 | # Check for __attribute__ format(printf, prefer __printf |
3711 | if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) { | 3860 | if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) { |
3712 | WARN("PREFER_PRINTF", | 3861 | if (WARN("PREFER_PRINTF", |
3713 | "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr); | 3862 | "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) && |
3863 | $fix) { | ||
3864 | $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex; | ||
3865 | |||
3866 | } | ||
3714 | } | 3867 | } |
3715 | 3868 | ||
3716 | # Check for __attribute__ format(scanf, prefer __scanf | 3869 | # Check for __attribute__ format(scanf, prefer __scanf |
3717 | if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) { | 3870 | if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) { |
3718 | WARN("PREFER_SCANF", | 3871 | if (WARN("PREFER_SCANF", |
3719 | "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr); | 3872 | "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) && |
3873 | $fix) { | ||
3874 | $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex; | ||
3875 | } | ||
3720 | } | 3876 | } |
3721 | 3877 | ||
3722 | # check for sizeof(&) | 3878 | # check for sizeof(&) |
@@ -3727,8 +3883,11 @@ sub process { | |||
3727 | 3883 | ||
3728 | # check for sizeof without parenthesis | 3884 | # check for sizeof without parenthesis |
3729 | if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) { | 3885 | if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) { |
3730 | WARN("SIZEOF_PARENTHESIS", | 3886 | if (WARN("SIZEOF_PARENTHESIS", |
3731 | "sizeof $1 should be sizeof($1)\n" . $herecurr); | 3887 | "sizeof $1 should be sizeof($1)\n" . $herecurr) && |
3888 | $fix) { | ||
3889 | $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex; | ||
3890 | } | ||
3732 | } | 3891 | } |
3733 | 3892 | ||
3734 | # check for line continuations in quoted strings with odd counts of " | 3893 | # check for line continuations in quoted strings with odd counts of " |
@@ -3747,8 +3906,11 @@ sub process { | |||
3747 | if ($line =~ /\bseq_printf\s*\(/) { | 3906 | if ($line =~ /\bseq_printf\s*\(/) { |
3748 | my $fmt = get_quoted_string($line, $rawline); | 3907 | my $fmt = get_quoted_string($line, $rawline); |
3749 | if ($fmt !~ /[^\\]\%/) { | 3908 | if ($fmt !~ /[^\\]\%/) { |
3750 | WARN("PREFER_SEQ_PUTS", | 3909 | if (WARN("PREFER_SEQ_PUTS", |
3751 | "Prefer seq_puts to seq_printf\n" . $herecurr); | 3910 | "Prefer seq_puts to seq_printf\n" . $herecurr) && |
3911 | $fix) { | ||
3912 | $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/; | ||
3913 | } | ||
3752 | } | 3914 | } |
3753 | } | 3915 | } |
3754 | 3916 | ||
@@ -3810,6 +3972,16 @@ sub process { | |||
3810 | } | 3972 | } |
3811 | } | 3973 | } |
3812 | 3974 | ||
3975 | # check for new externs in .h files. | ||
3976 | if ($realfile =~ /\.h$/ && | ||
3977 | $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { | ||
3978 | if (WARN("AVOID_EXTERNS", | ||
3979 | "extern prototypes should be avoided in .h files\n" . $herecurr) && | ||
3980 | $fix) { | ||
3981 | $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; | ||
3982 | } | ||
3983 | } | ||
3984 | |||
3813 | # check for new externs in .c files. | 3985 | # check for new externs in .c files. |
3814 | if ($realfile =~ /\.c$/ && defined $stat && | 3986 | if ($realfile =~ /\.c$/ && defined $stat && |
3815 | $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s) | 3987 | $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s) |
@@ -3879,8 +4051,11 @@ sub process { | |||
3879 | 4051 | ||
3880 | # check for multiple semicolons | 4052 | # check for multiple semicolons |
3881 | if ($line =~ /;\s*;\s*$/) { | 4053 | if ($line =~ /;\s*;\s*$/) { |
3882 | WARN("ONE_SEMICOLON", | 4054 | if (WARN("ONE_SEMICOLON", |
3883 | "Statements terminations use 1 semicolon\n" . $herecurr); | 4055 | "Statements terminations use 1 semicolon\n" . $herecurr) && |
4056 | $fix) { | ||
4057 | $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g; | ||
4058 | } | ||
3884 | } | 4059 | } |
3885 | 4060 | ||
3886 | # check for switch/default statements without a break; | 4061 | # check for switch/default statements without a break; |
@@ -3898,9 +4073,12 @@ sub process { | |||
3898 | } | 4073 | } |
3899 | 4074 | ||
3900 | # check for gcc specific __FUNCTION__ | 4075 | # check for gcc specific __FUNCTION__ |
3901 | if ($line =~ /__FUNCTION__/) { | 4076 | if ($line =~ /\b__FUNCTION__\b/) { |
3902 | WARN("USE_FUNC", | 4077 | if (WARN("USE_FUNC", |
3903 | "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); | 4078 | "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) && |
4079 | $fix) { | ||
4080 | $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g; | ||
4081 | } | ||
3904 | } | 4082 | } |
3905 | 4083 | ||
3906 | # check for use of yield() | 4084 | # check for use of yield() |
@@ -4105,13 +4283,8 @@ sub process { | |||
4105 | } | 4283 | } |
4106 | } | 4284 | } |
4107 | 4285 | ||
4108 | if ($quiet == 0 && keys %ignore_type) { | 4286 | hash_show_words(\%use_type, "Used"); |
4109 | print "NOTE: Ignored message types:"; | 4287 | hash_show_words(\%ignore_type, "Ignored"); |
4110 | foreach my $ignore (sort keys %ignore_type) { | ||
4111 | print " $ignore"; | ||
4112 | } | ||
4113 | print "\n\n"; | ||
4114 | } | ||
4115 | 4288 | ||
4116 | if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { | 4289 | if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { |
4117 | my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes"; | 4290 | my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes"; |