aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/acpi/apei/einj.txt11
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_keys.txt2
-rw-r--r--Documentation/devicetree/bindings/input/fsl-mma8450.txt11
-rw-r--r--Documentation/fault-injection/fault-injection.txt3
-rw-r--r--Documentation/feature-removal-schedule.txt9
-rw-r--r--Documentation/frv/booting.txt13
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/m68k/kernel-options.txt14
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/avr32/Kconfig1
-rw-r--r--arch/cris/arch-v10/drivers/sync_serial.c6
-rw-r--r--arch/cris/arch-v10/kernel/irq.c3
-rw-r--r--arch/cris/include/asm/thread_info.h6
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/kernel/idle.c6
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/tile/include/asm/Kbuild38
-rw-r--r--arch/tile/include/asm/bug.h1
-rw-r--r--arch/tile/include/asm/bugs.h1
-rw-r--r--arch/tile/include/asm/cputime.h1
-rw-r--r--arch/tile/include/asm/device.h1
-rw-r--r--arch/tile/include/asm/div64.h1
-rw-r--r--arch/tile/include/asm/emergency-restart.h1
-rw-r--r--arch/tile/include/asm/errno.h1
-rw-r--r--arch/tile/include/asm/fb.h1
-rw-r--r--arch/tile/include/asm/fcntl.h1
-rw-r--r--arch/tile/include/asm/fixmap.h6
-rw-r--r--arch/tile/include/asm/ioctl.h1
-rw-r--r--arch/tile/include/asm/ioctls.h1
-rw-r--r--arch/tile/include/asm/ipc.h1
-rw-r--r--arch/tile/include/asm/ipcbuf.h1
-rw-r--r--arch/tile/include/asm/irq_regs.h1
-rw-r--r--arch/tile/include/asm/kdebug.h1
-rw-r--r--arch/tile/include/asm/local.h1
-rw-r--r--arch/tile/include/asm/module.h1
-rw-r--r--arch/tile/include/asm/msgbuf.h1
-rw-r--r--arch/tile/include/asm/mutex.h1
-rw-r--r--arch/tile/include/asm/param.h1
-rw-r--r--arch/tile/include/asm/parport.h1
-rw-r--r--arch/tile/include/asm/poll.h1
-rw-r--r--arch/tile/include/asm/posix_types.h1
-rw-r--r--arch/tile/include/asm/resource.h1
-rw-r--r--arch/tile/include/asm/scatterlist.h1
-rw-r--r--arch/tile/include/asm/sembuf.h1
-rw-r--r--arch/tile/include/asm/serial.h1
-rw-r--r--arch/tile/include/asm/shmbuf.h1
-rw-r--r--arch/tile/include/asm/shmparam.h1
-rw-r--r--arch/tile/include/asm/socket.h1
-rw-r--r--arch/tile/include/asm/sockios.h1
-rw-r--r--arch/tile/include/asm/statfs.h1
-rw-r--r--arch/tile/include/asm/termbits.h1
-rw-r--r--arch/tile/include/asm/termios.h1
-rw-r--r--arch/tile/include/asm/types.h1
-rw-r--r--arch/tile/include/asm/ucontext.h1
-rw-r--r--arch/tile/include/asm/xor.h1
-rw-r--r--arch/tile/include/hv/drv_srom_intf.h41
-rw-r--r--arch/tile/kernel/time.c5
-rw-r--r--arch/tile/mm/init.c3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/process.c23
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-timeout.c5
-rw-r--r--drivers/acpi/acpica/acglobal.h6
-rw-r--r--drivers/acpi/acpica/aclocal.h1
-rw-r--r--drivers/acpi/acpica/acpredef.h1
-rw-r--r--drivers/acpi/acpica/nspredef.c19
-rw-r--r--drivers/acpi/acpica/nsrepair2.c15
-rw-r--r--drivers/acpi/acpica/tbinstal.c27
-rw-r--r--drivers/acpi/apei/Kconfig11
-rw-r--r--drivers/acpi/apei/apei-base.c35
-rw-r--r--drivers/acpi/apei/apei-internal.h15
-rw-r--r--drivers/acpi/apei/einj.c43
-rw-r--r--drivers/acpi/apei/erst-dbg.c6
-rw-r--r--drivers/acpi/apei/erst.c12
-rw-r--r--drivers/acpi/apei/ghes.c431
-rw-r--r--drivers/acpi/apei/hest.c17
-rw-r--r--drivers/acpi/battery.c82
-rw-r--r--drivers/acpi/bus.c14
-rw-r--r--drivers/acpi/dock.c4
-rw-r--r--drivers/acpi/ec_sys.c2
-rw-r--r--drivers/acpi/fan.c2
-rw-r--r--drivers/acpi/osl.c25
-rw-r--r--drivers/acpi/pci_irq.c58
-rw-r--r--drivers/acpi/pci_root.c3
-rw-r--r--drivers/acpi/processor_thermal.c2
-rw-r--r--drivers/acpi/sbs.c13
-rw-r--r--drivers/acpi/sleep.c16
-rw-r--r--drivers/acpi/sysfs.c4
-rw-r--r--drivers/acpi/thermal.c2
-rw-r--r--drivers/acpi/video.c2
-rw-r--r--drivers/ata/libata-acpi.c4
-rw-r--r--drivers/char/Kconfig11
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/ramoops.c8
-rw-r--r--drivers/char/tile-srom.c481
-rw-r--r--drivers/char/tpm/tpm_tis.c7
-rw-r--r--drivers/cpuidle/cpuidle.c50
-rw-r--r--drivers/cpuidle/cpuidle.h1
-rw-r--r--drivers/cpuidle/driver.c3
-rw-r--r--drivers/cpuidle/governor.c3
-rw-r--r--drivers/eisa/pci_eisa.c4
-rw-r--r--drivers/firmware/efivars.c6
-rw-r--r--drivers/input/keyboard/gpio_keys.c2
-rw-r--r--drivers/input/keyboard/lm8323.c9
-rw-r--r--drivers/input/keyboard/tegra-kbc.c5
-rw-r--r--drivers/input/misc/kxtj9.c1
-rw-r--r--drivers/input/misc/mma8450.c8
-rw-r--r--drivers/input/mouse/hgpk.c1
-rw-r--r--drivers/input/touchscreen/ad7879.c4
-rw-r--r--drivers/of/base.c130
-rw-r--r--drivers/of/fdt.c4
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c2
-rw-r--r--drivers/rtc/rtc-omap.c2
-rw-r--r--drivers/target/iscsi/Kconfig1
-rw-r--r--drivers/target/iscsi/iscsi_target.c15
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_nego.c2
-rw-r--r--drivers/target/target_core_transport.c33
-rw-r--r--drivers/target/tcm_fc/tcm_fc.h5
-rw-r--r--drivers/target/tcm_fc/tfc_cmd.c1
-rw-r--r--drivers/target/tcm_fc/tfc_io.c121
-rw-r--r--drivers/thermal/Kconfig8
-rw-r--r--drivers/thermal/thermal_sys.c142
-rw-r--r--drivers/video/backlight/Kconfig2
-rw-r--r--drivers/video/backlight/aat2870_bl.c8
-rw-r--r--fs/Kconfig15
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/stack.c5
-rw-r--r--include/acpi/acpi_drivers.h2
-rw-r--r--include/acpi/acpixf.h3
-rw-r--r--include/acpi/apei.h5
-rw-r--r--include/acpi/processor.h2
-rw-r--r--include/linux/acpi.h3
-rw-r--r--include/linux/bitmap.h1
-rw-r--r--include/linux/cpuidle.h4
-rw-r--r--include/linux/fault-inject.h18
-rw-r--r--include/linux/genalloc.h34
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/idr.h4
-rw-r--r--include/linux/llist.h126
-rw-r--r--include/linux/memcontrol.h8
-rw-r--r--include/linux/mfd/aat2870.h2
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/of.h8
-rw-r--r--include/linux/of_fdt.h1
-rw-r--r--include/linux/radix-tree.h37
-rw-r--r--include/linux/shmem_fs.h17
-rw-r--r--include/linux/swapops.h23
-rw-r--r--include/linux/thermal.h22
-rw-r--r--init/main.c7
-rw-r--r--ipc/shm.c7
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/taskstats.c18
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/fault-inject.c20
-rw-r--r--lib/genalloc.c300
-rw-r--r--lib/idr.c67
-rw-r--r--lib/llist.c129
-rw-r--r--lib/radix-tree.c121
-rw-r--r--mm/failslab.c14
-rw-r--r--mm/filemap.c106
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/memory-failure.c92
-rw-r--r--mm/mincore.c11
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/shmem.c1493
-rw-r--r--mm/swapfile.c20
-rw-r--r--mm/truncate.c8
-rw-r--r--tools/power/x86/turbostat/turbostat.c46
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c5
193 files changed, 4525 insertions, 1704 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index dfab71848dc8..5cc699ba5453 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -48,12 +48,19 @@ directory apei/einj. The following files are provided.
48- param1 48- param1
49 This file is used to set the first error parameter value. Effect of 49 This file is used to set the first error parameter value. Effect of
50 parameter depends on error_type specified. For memory error, this is 50 parameter depends on error_type specified. For memory error, this is
51 physical memory address. 51 physical memory address. Only available if param_extension module
52 parameter is specified.
52 53
53- param2 54- param2
54 This file is used to set the second error parameter value. Effect of 55 This file is used to set the second error parameter value. Effect of
55 parameter depends on error_type specified. For memory error, this is 56 parameter depends on error_type specified. For memory error, this is
56 physical memory address mask. 57 physical memory address mask. Only available if param_extension
58 module parameter is specified.
59
60Injecting parameter support is a BIOS version specific extension, that
61is, it only works on some BIOS version. If you want to use it, please
62make sure your BIOS version has the proper support and specify
63"param_extension=y" in module parameter.
57 64
58For more information about EINJ, please refer to ACPI specification 65For more information about EINJ, please refer to ACPI specification
59version 4.0, section 17.5. 66version 4.0, section 17.5.
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
index 7190c99d7611..5c2c02140a62 100644
--- a/Documentation/devicetree/bindings/gpio/gpio_keys.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -10,7 +10,7 @@ Optional properties:
10Each button (key) is represented as a sub-node of "gpio-keys": 10Each button (key) is represented as a sub-node of "gpio-keys":
11Subnode properties: 11Subnode properties:
12 12
13 - gpios: OF devcie-tree gpio specificatin. 13 - gpios: OF device-tree gpio specification.
14 - label: Descriptive name of the key. 14 - label: Descriptive name of the key.
15 - linux,code: Keycode to emit. 15 - linux,code: Keycode to emit.
16 16
diff --git a/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
new file mode 100644
index 000000000000..a00c94ccbdee
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
@@ -0,0 +1,11 @@
1* Freescale MMA8450 3-Axis Accelerometer
2
3Required properties:
4- compatible : "fsl,mma8450".
5
6Example:
7
8accelerometer: mma8450@1c {
9 compatible = "fsl,mma8450";
10 reg = <0x1c>;
11};
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 7be15e44d481..82a5d250d75e 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -143,8 +143,7 @@ o provide a way to configure fault attributes
143 failslab, fail_page_alloc, and fail_make_request use this way. 143 failslab, fail_page_alloc, and fail_make_request use this way.
144 Helper functions: 144 Helper functions:
145 145
146 init_fault_attr_dentries(entries, attr, name); 146 fault_create_debugfs_attr(name, parent, attr);
147 void cleanup_fault_attr_dentries(entries);
148 147
149- module parameters 148- module parameters
150 149
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ea0bace0124a..43f48098220d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -296,15 +296,6 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org>
296 296
297--------------------------- 297---------------------------
298 298
299What: CONFIG_THERMAL_HWMON
300When: January 2009
301Why: This option was introduced just to allow older lm-sensors userspace
302 to keep working over the upgrade to 2.6.26. At the scheduled time of
303 removal fixed lm-sensors (2.x or 3.x) should be readily available.
304Who: Rene Herman <rene.herman@gmail.com>
305
306---------------------------
307
308What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS 299What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS
309 (in net/core/net-sysfs.c) 300 (in net/core/net-sysfs.c)
310When: After the only user (hal) has seen a release with the patches 301When: After the only user (hal) has seen a release with the patches
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
index ace200b7c214..37c4d84a0e57 100644
--- a/Documentation/frv/booting.txt
+++ b/Documentation/frv/booting.txt
@@ -106,13 +106,20 @@ separated by spaces:
106 To use the first on-chip serial port at baud rate 115200, no parity, 8 106 To use the first on-chip serial port at baud rate 115200, no parity, 8
107 bits, and no flow control. 107 bits, and no flow control.
108 108
109 (*) root=/dev/<xxxx> 109 (*) root=<xxxx>
110 110
111 This specifies the device upon which the root filesystem resides. For 111 This specifies the device upon which the root filesystem resides. It
112 example: 112 may be specified by major and minor number, device path, or even
113 partition uuid, if supported. For example:
113 114
114 /dev/nfs NFS root filesystem 115 /dev/nfs NFS root filesystem
115 /dev/mtdblock3 Fourth RedBoot partition on the System Flash 116 /dev/mtdblock3 Fourth RedBoot partition on the System Flash
117 PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
118 first partition after the partition with the given UUID
119 253:0 Device with major 253 and minor 0
120
121 Authoritative information can be found in
122 "Documentation/kernel-parameters.txt".
116 123
117 (*) rw 124 (*) rw
118 125
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 72ba8d51dbc1..845a191004b1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments
292 <mailto:buk@buks.ipn.de> 292 <mailto:buk@buks.ipn.de>
2930xA0 all linux/sdp/sdp.h Industrial Device Project 2930xA0 all linux/sdp/sdp.h Industrial Device Project
294 <mailto:kenji@bitgate.com> 294 <mailto:kenji@bitgate.com>
2950xA2 00-0F arch/tile/include/asm/hardwall.h
2950xA3 80-8F Port ACL in development: 2960xA3 80-8F Port ACL in development:
296 <mailto:tlewis@mindspring.com> 297 <mailto:tlewis@mindspring.com>
2970xA3 90-9F linux/dtlk.h 2980xA3 90-9F linux/dtlk.h
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 26a83743af19..e279b7242912 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
163 163
164 See also Documentation/power/pm.txt, pci=noacpi 164 See also Documentation/power/pm.txt, pci=noacpi
165 165
166 acpi_rsdp= [ACPI,EFI,KEXEC]
167 Pass the RSDP address to the kernel, mostly used
168 on machines running EFI runtime service to boot the
169 second kernel for kdump.
170
166 acpi_apic_instance= [ACPI, IOAPIC] 171 acpi_apic_instance= [ACPI, IOAPIC]
167 Format: <int> 172 Format: <int>
168 2: use 2nd APIC table, if available 173 2: use 2nd APIC table, if available
@@ -546,6 +551,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
546 /proc/<pid>/coredump_filter. 551 /proc/<pid>/coredump_filter.
547 See also Documentation/filesystems/proc.txt. 552 See also Documentation/filesystems/proc.txt.
548 553
554 cpuidle.off=1 [CPU_IDLE]
555 disable the cpuidle sub-system
556
549 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver 557 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
550 Format: 558 Format:
551 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] 559 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -2240,6 +2248,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2240 ro [KNL] Mount root device read-only on boot 2248 ro [KNL] Mount root device read-only on boot
2241 2249
2242 root= [KNL] Root filesystem 2250 root= [KNL] Root filesystem
2251 See name_to_dev_t comment in init/do_mounts.c.
2243 2252
2244 rootdelay= [KNL] Delay (in seconds) to pause before attempting to 2253 rootdelay= [KNL] Delay (in seconds) to pause before attempting to
2245 mount the root filesystem 2254 mount the root filesystem
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
index c93bed66e25d..97d45f276fe6 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.txt
@@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
129the first of these. You can find out all valid major numbers by 129the first of these. You can find out all valid major numbers by
130looking into include/linux/major.h. 130looking into include/linux/major.h.
131 131
132In addition to major and minor numbers, if the device containing your
133root partition uses a partition table format with unique partition
134identifiers, then you may use them. For instance,
135"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also
136possible to reference another partition on the same device using a
137known partition UUID as the starting point. For example,
138if partition 5 of the device has the UUID of
13900112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
140follows:
141 PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
142
143Authoritative information can be found in
144"Documentation/kernel-parameters.txt".
145
132 146
1332.2) ro, rw 1472.2) ro, rw
134----------- 148-----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d2fcda465eb..07cfd8deaad5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3367,6 +3367,12 @@ F: drivers/net/ixgb/
3367F: drivers/net/ixgbe/ 3367F: drivers/net/ixgbe/
3368F: drivers/net/ixgbevf/ 3368F: drivers/net/ixgbevf/
3369 3369
3370INTEL MRST PMU DRIVER
3371M: Len Brown <len.brown@intel.com>
3372L: linux-pm@lists.linux-foundation.org
3373S: Supported
3374F: arch/x86/platform/mrst/pmu.*
3375
3370INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT 3376INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT
3371L: linux-wireless@vger.kernel.org 3377L: linux-wireless@vger.kernel.org
3372S: Orphan 3378S: Orphan
@@ -6319,6 +6325,7 @@ F: include/linux/sysv_fs.h
6319TARGET SUBSYSTEM 6325TARGET SUBSYSTEM
6320M: Nicholas A. Bellinger <nab@linux-iscsi.org> 6326M: Nicholas A. Bellinger <nab@linux-iscsi.org>
6321L: linux-scsi@vger.kernel.org 6327L: linux-scsi@vger.kernel.org
6328L: target-devel@vger.kernel.org
6322L: http://groups.google.com/group/linux-iscsi-target-dev 6329L: http://groups.google.com/group/linux-iscsi-target-dev
6323W: http://www.linux-iscsi.org 6330W: http://www.linux-iscsi.org
6324T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master 6331T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b0e2397a57..4b0669cbb3b0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
178config HAVE_RCU_TABLE_FREE 178config HAVE_RCU_TABLE_FREE
179 bool 179 bool
180 180
181config ARCH_HAVE_NMI_SAFE_CMPXCHG
182 bool
183
181source "kernel/gcov/Kconfig" 184source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index ca2da8da6e9c..60cde53d266c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
14 select AUTO_IRQ_AFFINITY if SMP 14 select AUTO_IRQ_AFFINITY if SMP
15 select GENERIC_IRQ_SHOW 15 select GENERIC_IRQ_SHOW
16 select ARCH_WANT_OPTIONAL_GPIOLIB 16 select ARCH_WANT_OPTIONAL_GPIOLIB
17 select ARCH_HAVE_NMI_SAFE_CMPXCHG
17 help 18 help
18 The Alpha is a 64-bit general-purpose processor designed and 19 The Alpha is a 64-bit general-purpose processor designed and
19 marketed by the Digital Equipment Corporation of blessed memory, 20 marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 5e1e54197227..d7ee0d4c072d 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -30,6 +30,7 @@
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/random.h> 31#include <linux/random.h>
32#include <linux/hw_breakpoint.h> 32#include <linux/hw_breakpoint.h>
33#include <linux/cpuidle.h>
33 34
34#include <asm/cacheflush.h> 35#include <asm/cacheflush.h>
35#include <asm/leds.h> 36#include <asm/leds.h>
@@ -196,7 +197,8 @@ void cpu_idle(void)
196 cpu_relax(); 197 cpu_relax();
197 } else { 198 } else {
198 stop_critical_timings(); 199 stop_critical_timings();
199 pm_idle(); 200 if (cpuidle_call_idle())
201 pm_idle();
200 start_critical_timings(); 202 start_critical_timings();
201 /* 203 /*
202 * This will eventually be removed - pm_idle 204 * This will eventually be removed - pm_idle
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e9d689b7c833..197e96f70405 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -10,6 +10,7 @@ config AVR32
10 select GENERIC_IRQ_PROBE 10 select GENERIC_IRQ_PROBE
11 select HARDIRQS_SW_RESEND 11 select HARDIRQS_SW_RESEND
12 select GENERIC_IRQ_SHOW 12 select GENERIC_IRQ_SHOW
13 select ARCH_HAVE_NMI_SAFE_CMPXCHG
13 help 14 help
14 AVR32 is a high-performance 32-bit RISC microprocessor core, 15 AVR32 is a high-performance 32-bit RISC microprocessor core,
15 designed for cost-sensitive embedded applications, with particular 16 designed for cost-sensitive embedded applications, with particular
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 850265373611..466af40c5822 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
158static int sync_serial_release(struct inode *inode, struct file *file); 158static int sync_serial_release(struct inode *inode, struct file *file);
159static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); 159static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
160 160
161static int sync_serial_ioctl(struct file *file, 161static long sync_serial_ioctl(struct file *file,
162 unsigned int cmd, unsigned long arg); 162 unsigned int cmd, unsigned long arg);
163static ssize_t sync_serial_write(struct file *file, const char *buf, 163static ssize_t sync_serial_write(struct file *file, const char *buf,
164 size_t count, loff_t *ppos); 164 size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
625 *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; 625 *R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
626 DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); 626 DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
627 } 627 }
628 ret = 0; 628 err = 0;
629 629
630out: 630out:
631 mutex_unlock(&sync_serial_mutex); 631 mutex_unlock(&sync_serial_mutex);
632 return ret; 632 return err;
633} 633}
634 634
635static int sync_serial_release(struct inode *inode, struct file *file) 635static int sync_serial_release(struct inode *inode, struct file *file)
diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c
index 907cfb5a873d..ba0e5965d6e3 100644
--- a/arch/cris/arch-v10/kernel/irq.c
+++ b/arch/cris/arch-v10/kernel/irq.c
@@ -20,6 +20,9 @@
20#define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); 20#define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
21#define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); 21#define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
22 22
23extern void kgdb_init(void);
24extern void breakpoint(void);
25
23/* don't use set_int_vector, it bypasses the linux interrupt handlers. it is 26/* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
24 * global just so that the kernel gdb can use it. 27 * global just so that the kernel gdb can use it.
25 */ 28 */
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 29b74a105830..332f19c54557 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -11,8 +11,6 @@
11 11
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13 13
14#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
15
16#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
17#include <asm/types.h> 15#include <asm/types.h>
18#include <asm/processor.h> 16#include <asm/processor.h>
@@ -67,8 +65,10 @@ struct thread_info {
67 65
68#define init_thread_info (init_thread_union.thread_info) 66#define init_thread_info (init_thread_union.thread_info)
69 67
68#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
70/* thread information allocation */ 69/* thread information allocation */
71#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) 70#define alloc_thread_info_node(tsk, node) \
71 ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
72#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) 72#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
73 73
74#endif /* !__ASSEMBLY__ */ 74#endif /* !__ASSEMBLY__ */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index cb884e489425..bad27a6ff407 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
7 select HAVE_PERF_EVENTS 7 select HAVE_PERF_EVENTS
8 select HAVE_GENERIC_HARDIRQS 8 select HAVE_GENERIC_HARDIRQS
9 select GENERIC_IRQ_SHOW 9 select GENERIC_IRQ_SHOW
10 select ARCH_HAVE_NMI_SAFE_CMPXCHG
10 11
11config ZONE_DMA 12config ZONE_DMA
12 bool 13 bool
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 64c7ab7e7a81..124854714958 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
28 select IRQ_PER_CPU 28 select IRQ_PER_CPU
29 select GENERIC_IRQ_SHOW 29 select GENERIC_IRQ_SHOW
30 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
31 select ARCH_HAVE_NMI_SAFE_CMPXCHG
31 default y 32 default y
32 help 33 help
33 The Itanium Processor Family is Intel's 64-bit successor to 34 The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 284cd3771eaa..9e8ee9d2b8ca 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,6 +6,7 @@ config M68K
6 select GENERIC_ATOMIC64 if MMU 6 select GENERIC_ATOMIC64 if MMU
7 select HAVE_GENERIC_HARDIRQS if !MMU 7 select HAVE_GENERIC_HARDIRQS if !MMU
8 select GENERIC_IRQ_SHOW if !MMU 8 select GENERIC_IRQ_SHOW if !MMU
9 select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
9 10
10config RWSEM_GENERIC_SPINLOCK 11config RWSEM_GENERIC_SPINLOCK
11 bool 12 bool
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 65adc86a230e..e077b0bf56ca 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,6 +15,7 @@ config PARISC
15 select HAVE_GENERIC_HARDIRQS 15 select HAVE_GENERIC_HARDIRQS
16 select GENERIC_IRQ_PROBE 16 select GENERIC_IRQ_PROBE
17 select IRQ_PER_CPU 17 select IRQ_PER_CPU
18 select ARCH_HAVE_NMI_SAFE_CMPXCHG
18 19
19 help 20 help
20 The PA-RISC microprocessor is designed by Hewlett-Packard and used 21 The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 374c475e56a3..6926b61acfea 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -136,6 +136,7 @@ config PPC
136 select HAVE_SYSCALL_TRACEPOINTS 136 select HAVE_SYSCALL_TRACEPOINTS
137 select HAVE_BPF_JIT if (PPC64 && NET) 137 select HAVE_BPF_JIT if (PPC64 && NET)
138 select HAVE_ARCH_JUMP_LABEL 138 select HAVE_ARCH_JUMP_LABEL
139 select ARCH_HAVE_NMI_SAFE_CMPXCHG
139 140
140config EARLY_PRINTK 141config EARLY_PRINTK
141 bool 142 bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 17dbb4318261..ed5cb5af5281 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -81,6 +81,7 @@ config S390
81 select INIT_ALL_POSSIBLE 81 select INIT_ALL_POSSIBLE
82 select HAVE_IRQ_WORK 82 select HAVE_IRQ_WORK
83 select HAVE_PERF_EVENTS 83 select HAVE_PERF_EVENTS
84 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select HAVE_KERNEL_GZIP 85 select HAVE_KERNEL_GZIP
85 select HAVE_KERNEL_BZIP2 86 select HAVE_KERNEL_BZIP2
86 select HAVE_KERNEL_LZMA 87 select HAVE_KERNEL_LZMA
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 748ff1920068..ff9177c8f643 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,6 +11,7 @@ config SUPERH
11 select HAVE_DMA_ATTRS 11 select HAVE_DMA_ATTRS
12 select HAVE_IRQ_WORK 12 select HAVE_IRQ_WORK
13 select HAVE_PERF_EVENTS 13 select HAVE_PERF_EVENTS
14 select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
14 select PERF_USE_VMALLOC 15 select PERF_USE_VMALLOC
15 select HAVE_KERNEL_GZIP 16 select HAVE_KERNEL_GZIP
16 select HAVE_KERNEL_BZIP2 17 select HAVE_KERNEL_BZIP2
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 84db0d6ccd0d..3c45de1db716 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -16,12 +16,13 @@
16#include <linux/thread_info.h> 16#include <linux/thread_info.h>
17#include <linux/irqflags.h> 17#include <linux/irqflags.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/cpuidle.h>
19#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
20#include <asm/system.h> 21#include <asm/system.h>
21#include <linux/atomic.h> 22#include <linux/atomic.h>
22#include <asm/smp.h> 23#include <asm/smp.h>
23 24
24void (*pm_idle)(void) = NULL; 25static void (*pm_idle)(void);
25 26
26static int hlt_counter; 27static int hlt_counter;
27 28
@@ -100,7 +101,8 @@ void cpu_idle(void)
100 local_irq_disable(); 101 local_irq_disable();
101 /* Don't trace irqs off for idle */ 102 /* Don't trace irqs off for idle */
102 stop_critical_timings(); 103 stop_critical_timings();
103 pm_idle(); 104 if (cpuidle_call_idle())
105 pm_idle();
104 /* 106 /*
105 * Sanity check to ensure that pm_idle() returns 107 * Sanity check to ensure that pm_idle() returns
106 * with IRQs enabled 108 * with IRQs enabled
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 1074dddcb104..42c67beadcae 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -54,6 +54,7 @@ config SPARC64
54 select HAVE_PERF_EVENTS 54 select HAVE_PERF_EVENTS
55 select PERF_USE_VMALLOC 55 select PERF_USE_VMALLOC
56 select IRQ_PREFLOW_FASTEOI 56 select IRQ_PREFLOW_FASTEOI
57 select ARCH_HAVE_NMI_SAFE_CMPXCHG
57 58
58config ARCH_DEFCONFIG 59config ARCH_DEFCONFIG
59 string 60 string
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0249b8b4db54..b30f71ac0d06 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -12,6 +12,7 @@ config TILE
12 select GENERIC_PENDING_IRQ if SMP 12 select GENERIC_PENDING_IRQ if SMP
13 select GENERIC_IRQ_SHOW 13 select GENERIC_IRQ_SHOW
14 select SYS_HYPERVISOR 14 select SYS_HYPERVISOR
15 select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
15 16
16# FIXME: investigate whether we need/want these options. 17# FIXME: investigate whether we need/want these options.
17# select HAVE_IOREMAP_PROT 18# select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 849ab2fa1f5c..aec60dc06007 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm
2 2
3header-y += ucontext.h 3header-y += ucontext.h
4header-y += hardwall.h 4header-y += hardwall.h
5
6generic-y += bug.h
7generic-y += bugs.h
8generic-y += cputime.h
9generic-y += device.h
10generic-y += div64.h
11generic-y += emergency-restart.h
12generic-y += errno.h
13generic-y += fb.h
14generic-y += fcntl.h
15generic-y += ioctl.h
16generic-y += ioctls.h
17generic-y += ipc.h
18generic-y += ipcbuf.h
19generic-y += irq_regs.h
20generic-y += kdebug.h
21generic-y += local.h
22generic-y += module.h
23generic-y += msgbuf.h
24generic-y += mutex.h
25generic-y += param.h
26generic-y += parport.h
27generic-y += poll.h
28generic-y += posix_types.h
29generic-y += resource.h
30generic-y += scatterlist.h
31generic-y += sembuf.h
32generic-y += serial.h
33generic-y += shmbuf.h
34generic-y += shmparam.h
35generic-y += socket.h
36generic-y += sockios.h
37generic-y += statfs.h
38generic-y += termbits.h
39generic-y += termios.h
40generic-y += types.h
41generic-y += ucontext.h
42generic-y += xor.h
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e9..000000000000
--- a/arch/tile/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f5..000000000000
--- a/arch/tile/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
deleted file mode 100644
index 6d68ad7e0ea3..000000000000
--- a/arch/tile/include/asm/cputime.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
deleted file mode 100644
index f0a4c256403b..000000000000
--- a/arch/tile/include/asm/device.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/tile/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50bd..000000000000
--- a/arch/tile/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/tile/include/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df45..000000000000
--- a/arch/tile/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/fb.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/tile/include/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/fcntl.h>
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index 51537ff9265a..c66f7933beaa 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
75 75
76#define set_fixmap(idx, phys) \ 76#define set_fixmap(idx, phys) \
77 __set_fixmap(idx, phys, PAGE_KERNEL) 77 __set_fixmap(idx, phys, PAGE_KERNEL)
78/*
79 * Some hardware wants to get fixmapped without caching.
80 */
81#define set_fixmap_nocache(idx, phys) \
82 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
83
84#define clear_fixmap(idx) \ 78#define clear_fixmap(idx) \
85 __set_fixmap(idx, 0, __pgprot(0)) 79 __set_fixmap(idx, 0, __pgprot(0))
86 80
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/tile/include/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/tile/include/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3f..000000000000
--- a/arch/tile/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/tile/include/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/tile/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/tile/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/tile/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
deleted file mode 100644
index 1e4b79fe8584..000000000000
--- a/arch/tile/include/asm/module.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
deleted file mode 100644
index 809134c644a6..000000000000
--- a/arch/tile/include/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/tile/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/tile/include/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h
deleted file mode 100644
index cf252af64590..000000000000
--- a/arch/tile/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/parport.h>
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/tile/include/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
deleted file mode 100644
index 22cae6230ceb..000000000000
--- a/arch/tile/include/asm/posix_types.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/tile/include/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/tile/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/scatterlist.h>
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef7..000000000000
--- a/arch/tile/include/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h
deleted file mode 100644
index a0cb0caff152..000000000000
--- a/arch/tile/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/serial.h>
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de38..000000000000
--- a/arch/tile/include/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d0..000000000000
--- a/arch/tile/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/tile/include/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/tile/include/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
deleted file mode 100644
index 0b91fe198c20..000000000000
--- a/arch/tile/include/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/tile/include/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/tile/include/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
deleted file mode 100644
index b9e79bc580dd..000000000000
--- a/arch/tile/include/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30fb..000000000000
--- a/arch/tile/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/arch/tile/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 000000000000..6395faa6d9e6
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drv_srom_intf.h
17 * Interface definitions for the SPI Flash ROM driver.
18 */
19
20#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
21#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
22
23/** Read this offset to get the total device size. */
24#define SROM_TOTAL_SIZE_OFF 0xF0000000
25
26/** Read this offset to get the device sector size. */
27#define SROM_SECTOR_SIZE_OFF 0xF0000004
28
29/** Read this offset to get the device page size. */
30#define SROM_PAGE_SIZE_OFF 0xF0000008
31
32/** Write this offset to flush any pending writes. */
33#define SROM_FLUSH_OFF 0xF1000000
34
35/** Write this offset, plus the byte offset of the start of a sector, to
36 * erase a sector. Any write data is ignored, but there must be at least
37 * one byte of write data. Only applies when the driver is in MTD mode.
38 */
39#define SROM_ERASE_OFF 0xF2000000
40
41#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c4be58cc5d50..f6f50f2a5e37 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = {
78 .rating = 300, 78 .rating = 300,
79 .read = clocksource_get_cycles, 79 .read = clocksource_get_cycles,
80 .mask = CLOCKSOURCE_MASK(64), 80 .mask = CLOCKSOURCE_MASK(64),
81 .shift = 22, /* typical value, e.g. x86 tsc uses this */
82 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 81 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
83}; 82};
84 83
@@ -91,8 +90,6 @@ void __init setup_clock(void)
91 cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); 90 cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
92 sched_clock_mult = 91 sched_clock_mult =
93 clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); 92 clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
94 cycle_counter_cs.mult =
95 clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
96} 93}
97 94
98void __init calibrate_delay(void) 95void __init calibrate_delay(void)
@@ -107,7 +104,7 @@ void __init calibrate_delay(void)
107void __init time_init(void) 104void __init time_init(void)
108{ 105{
109 /* Initialize and register the clock source. */ 106 /* Initialize and register the clock source. */
110 clocksource_register(&cycle_counter_cs); 107 clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
111 108
112 /* Start up the tile-timer interrupt source on the boot cpu. */ 109 /* Start up the tile-timer interrupt source on the boot cpu. */
113 setup_tile_timer(); 110 setup_tile_timer();
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 4e10c4023028..7309988c9794 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -836,8 +836,7 @@ void __init mem_init(void)
836#endif 836#endif
837 837
838#ifdef CONFIG_FLATMEM 838#ifdef CONFIG_FLATMEM
839 if (!mem_map) 839 BUG_ON(!mem_map);
840 BUG();
841#endif 840#endif
842 841
843#ifdef CONFIG_HIGHMEM 842#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7cf916fc1ce7..6a47bb22657f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
72 select USE_GENERIC_SMP_HELPERS if SMP 72 select USE_GENERIC_SMP_HELPERS if SMP
73 select HAVE_BPF_JIT if (X86_64 && NET) 73 select HAVE_BPF_JIT if (X86_64 && NET)
74 select CLKEVT_I8253 74 select CLKEVT_I8253
75 select ARCH_HAVE_NMI_SAFE_CMPXCHG
75 76
76config INSTRUCTION_DECODER 77config INSTRUCTION_DECODER
77 def_bool (KPROBES || PERF_EVENTS) 78 def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d02804d650c4..d8e8eefbe24c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,8 +40,6 @@
40#include <linux/compiler.h> 40#include <linux/compiler.h>
41#include <asm/page.h> 41#include <asm/page.h>
42 42
43#include <xen/xen.h>
44
45#define build_mmio_read(name, size, type, reg, barrier) \ 43#define build_mmio_read(name, size, type, reg, barrier) \
46static inline type name(const volatile void __iomem *addr) \ 44static inline type name(const volatile void __iomem *addr) \
47{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ 45{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -334,6 +332,7 @@ extern void fixup_early_ioremap(void);
334extern bool is_early_ioremap_ptep(pte_t *ptep); 332extern bool is_early_ioremap_ptep(pte_t *ptep);
335 333
336#ifdef CONFIG_XEN 334#ifdef CONFIG_XEN
335#include <xen/xen.h>
337struct bio_vec; 336struct bio_vec;
338 337
339extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, 338extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 219371546afd..0d1171c97729 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -751,8 +751,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
751 :: "a" (eax), "c" (ecx)); 751 :: "a" (eax), "c" (ecx));
752} 752}
753 753
754extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
755
756extern void select_idle_routine(const struct cpuinfo_x86 *c); 754extern void select_idle_routine(const struct cpuinfo_x86 *c);
757extern void init_amd_e400_c1e_mask(void); 755extern void init_amd_e400_c1e_mask(void);
758 756
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404a0d4c..f50e7fb2a201 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
149} 149}
150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
151 151
152/*
153 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
154 * which can obviate IPI to trigger checking of need_resched.
155 * We execute MONITOR against need_resched and enter optimized wait state
156 * through MWAIT. Whenever someone changes need_resched, we would be woken
157 * up from MWAIT (without an IPI).
158 *
159 * New with Core Duo processors, MWAIT can take some hints based on CPU
160 * capability.
161 */
162void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
163{
164 if (!need_resched()) {
165 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
166 clflush((void *)&current_thread_info()->flags);
167
168 __monitor((void *)&current_thread_info()->flags, 0, 0);
169 smp_mb();
170 if (!need_resched())
171 __mwait(ax, cx);
172 }
173}
174
152void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) 175void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
153{ 176{
154 unsigned int cpu = smp_processor_id(); 177 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb24e4e..e7e3b019c439 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
438} 438}
439EXPORT_SYMBOL_GPL(cpu_idle_wait); 439EXPORT_SYMBOL_GPL(cpu_idle_wait);
440 440
441/*
442 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
443 * which can obviate IPI to trigger checking of need_resched.
444 * We execute MONITOR against need_resched and enter optimized wait state
445 * through MWAIT. Whenever someone changes need_resched, we would be woken
446 * up from MWAIT (without an IPI).
447 *
448 * New with Core Duo processors, MWAIT can take some hints based on CPU
449 * capability.
450 */
451void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
452{
453 if (!need_resched()) {
454 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
455 clflush((void *)&current_thread_info()->flags);
456
457 __monitor((void *)&current_thread_info()->flags, 0, 0);
458 smp_mb();
459 if (!need_resched())
460 __mwait(ax, cx);
461 }
462}
463
464/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 441/* Default MONITOR/MWAIT with no hints, used for default C1 state */
465static void mwait_idle(void) 442static void mwait_idle(void)
466{ 443{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067b..7a3b65107a27 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/uaccess.h> 38#include <linux/uaccess.h>
39#include <linux/io.h> 39#include <linux/io.h>
40#include <linux/kdebug.h> 40#include <linux/kdebug.h>
41#include <linux/cpuidle.h>
41 42
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
109 local_irq_disable(); 110 local_irq_disable();
110 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
111 stop_critical_timings(); 112 stop_critical_timings();
112 pm_idle(); 113 if (cpuidle_idle_call())
114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
114 } 116 }
115 tick_nohz_restart_sched_tick(); 117 tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df33..f693e44e1bf6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/io.h> 38#include <linux/io.h>
39#include <linux/ftrace.h> 39#include <linux/ftrace.h>
40#include <linux/cpuidle.h>
40 41
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
42#include <asm/system.h> 43#include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
136 enter_idle(); 137 enter_idle();
137 /* Don't trace irqs off for idle */ 138 /* Don't trace irqs off for idle */
138 stop_critical_timings(); 139 stop_critical_timings();
139 pm_idle(); 140 if (cpuidle_idle_call())
141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
141 143
142 /* In many cases the interrupt that ended idle 144 /* In many cases the interrupt that ended idle
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index f61ccdd49341..1ea38775a6d3 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,3 +1,4 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_MRST) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o 2obj-$(CONFIG_X86_MRST) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o 3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
4obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
new file mode 100644
index 000000000000..9281da7d91bd
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.c
@@ -0,0 +1,817 @@
1/*
2 * mrst/pmu.c - driver for MRST Power Management Unit
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include <linux/cpuidle.h>
21#include <linux/debugfs.h>
22#include <linux/delay.h>
23#include <linux/interrupt.h>
24#include <linux/module.h>
25#include <linux/pci.h>
26#include <linux/seq_file.h>
27#include <linux/sfi.h>
28#include <asm/intel_scu_ipc.h>
29#include "pmu.h"
30
31#define IPCMSG_FW_REVISION 0xF4
32
33struct mrst_device {
34 u16 pci_dev_num; /* DEBUG only */
35 u16 lss;
36 u16 latest_request;
37 unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
38};
39
40/*
41 * comlete list of MRST PCI devices
42 */
43static struct mrst_device mrst_devs[] = {
44/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
45/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
46/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
47/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
48/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
49/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
50/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
51/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
52/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
53/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
54/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
55/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
56/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
57/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
58/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
59/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
60/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
61/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
62/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
63/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
64/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
65/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
66
67/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
68
69/* 23 */ { 0x4102, 0 }, /* Lincroft */
70/* 24 */ { 0x4110, 0 }, /* Lincroft */
71};
72
73/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */
74static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
75static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
76 0x0804, 0x0805, 0x080f, 0};
77
78/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
79static spinlock_t mrst_pmu_power_state_lock;
80
81static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
82static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
83
84static int graphics_is_off;
85static int lss_s0i3_enabled;
86static bool mrst_pmu_s0i3_enable;
87
88/* debug counters */
89static u32 pmu_wait_ready_calls;
90static u32 pmu_wait_ready_udelays;
91static u32 pmu_wait_ready_udelays_max;
92static u32 pmu_wait_done_calls;
93static u32 pmu_wait_done_udelays;
94static u32 pmu_wait_done_udelays_max;
95static u32 pmu_set_power_state_entry;
96static u32 pmu_set_power_state_send_cmd;
97
98static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
99{
100 int index = 0;
101
102 if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
103 index = pci_dev_num - 0x800;
104 else if (pci_dev_num == 0x084F)
105 index = 22;
106 else if (pci_dev_num == 0x4102)
107 index = 23;
108 else if (pci_dev_num == 0x4110)
109 index = 24;
110
111 if (pci_dev_num != mrst_devs[index].pci_dev_num) {
112 WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
113 return 0;
114 }
115
116 return &mrst_devs[index];
117}
118
119/**
120 * mrst_pmu_validate_cstates
121 * @dev: cpuidle_device
122 *
123 * Certain states are not appropriate for governor to pick in some cases.
124 * This function will be called as cpuidle_device's prepare callback and
125 * thus tells governor to ignore such states when selecting the next state
126 * to enter.
127 */
128
129#define IDLE_STATE4_IS_C6 4
130#define IDLE_STATE5_IS_S0I3 5
131
132int mrst_pmu_invalid_cstates(void)
133{
134 int cpu = smp_processor_id();
135
136 /*
137 * Demote to C4 if the PMU is busy.
138 * Since LSS changes leave the busy bit clear...
139 * busy means either the PMU is waiting for an ACK-C6 that
140 * isn't coming due to an MWAIT that returned immediately;
141 * or we returned from S0i3 successfully, and the PMU
142 * is not done sending us interrupts.
143 */
144 if (pmu_read_busy_status())
145 return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
146
147 /*
148 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
149 * or if device LSS is insufficient, or the GPU is active,
150 * or if it has been explicitly disabled.
151 */
152 if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
153 !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
154 return 1 << IDLE_STATE5_IS_S0I3;
155 else
156 return 0;
157}
158
159/*
160 * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
161 * DEBUG only.
162 */
163static void pmu_update_wake_counters(void)
164{
165 int lss;
166 u32 wake_status;
167
168 wake_status = pmu_read_wks();
169
170 for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
171 if (wake_status & (1 << lss))
172 wake_counters[lss]++;
173 }
174}
175
176int mrst_pmu_s0i3_entry(void)
177{
178 int status;
179
180 /* Clear any possible error conditions */
181 pmu_write_ics(0x300);
182
183 /* set wake control to current D-states */
184 pmu_write_wssc(S0I3_SSS_TARGET);
185
186 status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
187 pmu_update_wake_counters();
188 return status;
189}
190
191/* poll for maximum of 5ms for busy bit to clear */
192static int pmu_wait_ready(void)
193{
194 int udelays;
195
196 pmu_wait_ready_calls++;
197
198 for (udelays = 0; udelays < 500; ++udelays) {
199 if (udelays > pmu_wait_ready_udelays_max)
200 pmu_wait_ready_udelays_max = udelays;
201
202 if (pmu_read_busy_status() == 0)
203 return 0;
204
205 udelay(10);
206 pmu_wait_ready_udelays++;
207 }
208
209 /*
210 * if this fires, observe
211 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
212 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
213 */
214 WARN_ONCE(1, "SCU not ready for 5ms");
215 return -EBUSY;
216}
217/* poll for maximum of 50ms us for busy bit to clear */
218static int pmu_wait_done(void)
219{
220 int udelays;
221
222 pmu_wait_done_calls++;
223
224 for (udelays = 0; udelays < 500; ++udelays) {
225 if (udelays > pmu_wait_done_udelays_max)
226 pmu_wait_done_udelays_max = udelays;
227
228 if (pmu_read_busy_status() == 0)
229 return 0;
230
231 udelay(100);
232 pmu_wait_done_udelays++;
233 }
234
235 /*
236 * if this fires, observe
237 * /sys/kernel/debug/mrst_pmu_wait_done_calls
238 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
239 */
240 WARN_ONCE(1, "SCU not done for 50ms");
241 return -EBUSY;
242}
243
244u32 mrst_pmu_msi_is_disabled(void)
245{
246 return pmu_msi_is_disabled();
247}
248
249void mrst_pmu_enable_msi(void)
250{
251 pmu_msi_enable();
252}
253
254/**
255 * pmu_irq - pmu driver interrupt handler
256 * Context: interrupt context
257 */
258static irqreturn_t pmu_irq(int irq, void *dummy)
259{
260 union pmu_pm_ics pmu_ics;
261
262 pmu_ics.value = pmu_read_ics();
263
264 if (!pmu_ics.bits.pending)
265 return IRQ_NONE;
266
267 switch (pmu_ics.bits.cause) {
268 case INT_SPURIOUS:
269 case INT_CMD_DONE:
270 case INT_CMD_ERR:
271 case INT_WAKE_RX:
272 case INT_SS_ERROR:
273 case INT_S0IX_MISS:
274 case INT_NO_ACKC6:
275 pmu_irq_stats[pmu_ics.bits.cause]++;
276 break;
277 default:
278 pmu_irq_stats[INT_INVALID]++;
279 }
280
281 pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
282
283 return IRQ_HANDLED;
284}
285
286/*
287 * Translate PCI power management to MRST LSS D-states
288 */
289static int pci_2_mrst_state(int lss, pci_power_t pci_state)
290{
291 switch (pci_state) {
292 case PCI_D0:
293 if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
294 return D0i1;
295 else
296 return D0;
297 case PCI_D1:
298 return D0i1;
299 case PCI_D2:
300 return D0i2;
301 case PCI_D3hot:
302 case PCI_D3cold:
303 return D0i3;
304 default:
305 WARN(1, "pci_state %d\n", pci_state);
306 return 0;
307 }
308}
309
310static int pmu_issue_command(u32 pm_ssc)
311{
312 union pmu_pm_set_cfg_cmd_t command;
313
314 if (pmu_read_busy_status()) {
315 pr_debug("pmu is busy, Operation not permitted\n");
316 return -1;
317 }
318
319 /*
320 * enable interrupts in PMU so that interrupts are
321 * propagated when ioc bit for a particular set
322 * command is set
323 */
324
325 pmu_irq_enable();
326
327 /* Configure the sub systems for pmu2 */
328
329 pmu_write_ssc(pm_ssc);
330
331 /*
332 * Send the set config command for pmu its configured
333 * for mode CM_IMMEDIATE & hence with No Trigger
334 */
335
336 command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
337 command.pmu2_params.d_param.cfg_delay = 0;
338 command.pmu2_params.d_param.rsvd = 0;
339
340 /* construct the command to send SET_CFG to particular PMU */
341 command.pmu2_params.d_param.cmd = SET_CFG_CMD;
342 command.pmu2_params.d_param.ioc = 0;
343 command.pmu2_params.d_param.mode_id = 0;
344 command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
345
346 /* write the value of PM_CMD into particular PMU */
347 pr_debug("pmu command being written %x\n",
348 command.pmu_pm_set_cfg_cmd_value);
349
350 pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
351
352 return 0;
353}
354
355static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
356{
357 u16 existing_request;
358 int i;
359
360 for (i = 0; ids[i]; ++i) {
361 struct mrst_device *mrst_dev;
362
363 mrst_dev = pci_id_2_mrst_dev(ids[i]);
364 if (unlikely(!mrst_dev))
365 continue;
366
367 existing_request = mrst_dev->latest_request;
368 if (existing_request < pci_state)
369 pci_state = existing_request;
370 }
371 return pci_state;
372}
373
374/**
375 * pmu_pci_set_power_state - Callback function is used by all the PCI devices
376 * for a platform specific device power on/shutdown.
377 */
378
379int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
380{
381 u32 old_sss, new_sss;
382 int status = 0;
383 struct mrst_device *mrst_dev;
384
385 pmu_set_power_state_entry++;
386
387 BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
388 BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
389
390 mrst_dev = pci_id_2_mrst_dev(pdev->device);
391 if (unlikely(!mrst_dev))
392 return -ENODEV;
393
394 mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
395
396 /* PMU driver calls self as part of PCI initialization, ignore */
397 if (pdev->device == PCI_DEV_ID_MRST_PMU)
398 return 0;
399
400 BUG_ON(!pmu_reg); /* SW bug if called before initialized */
401
402 spin_lock(&mrst_pmu_power_state_lock);
403
404 if (pdev->d3_delay) {
405 dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
406 pdev->d3_delay);
407 pdev->d3_delay = 0;
408 }
409 /*
410 * If Lincroft graphics, simply remember state
411 */
412 if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
413 && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
414 if (pci_state == PCI_D0)
415 graphics_is_off = 0;
416 else
417 graphics_is_off = 1;
418 goto ret;
419 }
420
421 if (!mrst_dev->lss)
422 goto ret; /* device with no LSS */
423
424 if (mrst_dev->latest_request == pci_state)
425 goto ret; /* no change */
426
427 mrst_dev->latest_request = pci_state; /* record latest request */
428
429 /*
430 * LSS9 and LSS10 contain multiple PCI devices.
431 * Use the lowest numbered (highest power) state in the LSS
432 */
433 if (mrst_dev->lss == 9)
434 pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
435 else if (mrst_dev->lss == 10)
436 pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
437
438 status = pmu_wait_ready();
439 if (status)
440 goto ret;
441
442 old_sss = pmu_read_sss();
443 new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
444 new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
445 mrst_dev->lss);
446
447 if (new_sss == old_sss)
448 goto ret; /* nothing to do */
449
450 pmu_set_power_state_send_cmd++;
451
452 status = pmu_issue_command(new_sss);
453
454 if (unlikely(status != 0)) {
455 dev_err(&pdev->dev, "Failed to Issue a PM command\n");
456 goto ret;
457 }
458
459 if (pmu_wait_done())
460 goto ret;
461
462 lss_s0i3_enabled =
463 ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
464ret:
465 spin_unlock(&mrst_pmu_power_state_lock);
466 return status;
467}
468
469#ifdef CONFIG_DEBUG_FS
470static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
471
472static inline const char *d0ix_name(int state)
473{
474 return d0ix_names[(int) state];
475}
476
477static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
478{
479 struct pci_dev *pdev = NULL;
480 u32 cur_pmsss;
481 int lss;
482
483 seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
484
485 cur_pmsss = pmu_read_sss();
486
487 seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
488
489 seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
490 seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
491
492 if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
493 seq_printf(s, "cpu0 is only cpu online\n");
494 else
495 seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
496
497 seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
498
499
500 for_each_pci_dev(pdev) {
501 int pos;
502 u16 pmcsr;
503 struct mrst_device *mrst_dev;
504 int i;
505
506 mrst_dev = pci_id_2_mrst_dev(pdev->device);
507
508 seq_printf(s, "%s %04x/%04X %-16.16s ",
509 dev_name(&pdev->dev),
510 pdev->vendor, pdev->device,
511 dev_driver_string(&pdev->dev));
512
513 if (unlikely (!mrst_dev)) {
514 seq_printf(s, " UNKNOWN\n");
515 continue;
516 }
517
518 if (mrst_dev->lss)
519 seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
520 d0ix_name(((cur_pmsss >>
521 (mrst_dev->lss * 2)) & 0x3)));
522 else
523 seq_printf(s, " ");
524
525 /* PCI PM config space setting */
526 pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
527 if (pos != 0) {
528 pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
529 seq_printf(s, "PCI-%-4s",
530 pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
531 } else {
532 seq_printf(s, " ");
533 }
534
535 seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
536 for (i = 0; i <= PCI_D3cold; ++i)
537 seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
538
539 if (mrst_dev->lss) {
540 unsigned int lssmask;
541
542 lssmask = SSMSK(D0i3, mrst_dev->lss);
543
544 if ((lssmask & S0I3_SSS_TARGET) &&
545 ((lssmask & cur_pmsss) !=
546 (lssmask & S0I3_SSS_TARGET)))
547 seq_printf(s , "[BLOCKS s0i3]");
548 }
549
550 seq_printf(s, "\n");
551 }
552 seq_printf(s, "Wake Counters:\n");
553 for (lss = 0; lss < MRST_NUM_LSS; ++lss)
554 seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
555
556 seq_printf(s, "Interrupt Counters:\n");
557 seq_printf(s,
558 "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
559 "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
560 "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
561 "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
562 pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
563 pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
564 pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
565 pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
566
567 seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
568 pmu_wait_ready_calls);
569 seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
570 pmu_wait_ready_udelays);
571 seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
572 pmu_wait_ready_udelays_max);
573 seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
574 pmu_wait_done_calls);
575 seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
576 pmu_wait_done_udelays);
577 seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
578 pmu_wait_done_udelays_max);
579 seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
580 pmu_set_power_state_entry);
581 seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
582 pmu_set_power_state_send_cmd);
583 seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
584
585 return 0;
586}
587
588static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
589{
590 return single_open(file, debug_mrst_pmu_show, NULL);
591}
592
593static const struct file_operations devices_state_operations = {
594 .open = debug_mrst_pmu_open,
595 .read = seq_read,
596 .llseek = seq_lseek,
597 .release = single_release,
598};
599#endif /* DEBUG_FS */
600
601/*
602 * Validate SCU PCI shim PCI vendor capability byte
603 * against LSS hard-coded in mrst_devs[] above.
604 * DEBUG only.
605 */
606static void pmu_scu_firmware_debug(void)
607{
608 struct pci_dev *pdev = NULL;
609
610 for_each_pci_dev(pdev) {
611 struct mrst_device *mrst_dev;
612 u8 pci_config_lss;
613 int pos;
614
615 mrst_dev = pci_id_2_mrst_dev(pdev->device);
616 if (unlikely(!mrst_dev)) {
617 printk(KERN_ERR FW_BUG "pmu: Unknown "
618 "PCI device 0x%04X\n", pdev->device);
619 continue;
620 }
621
622 if (mrst_dev->lss == 0)
623 continue; /* no LSS in our table */
624
625 pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
626 if (!pos != 0) {
627 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
628 "missing PCI Vendor Capability\n",
629 pdev->device);
630 continue;
631 }
632 pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
633 if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
634 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
635 "invalid PCI Vendor Capability 0x%x "
636 " expected LSS 0x%X\n",
637 pdev->device, pci_config_lss, mrst_dev->lss);
638 continue;
639 }
640 pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
641
642 if (mrst_dev->lss == pci_config_lss)
643 continue;
644
645 printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
646 pdev->device, pci_config_lss, mrst_dev->lss);
647 }
648}
649
650/**
651 * pmu_probe
652 */
653static int __devinit pmu_probe(struct pci_dev *pdev,
654 const struct pci_device_id *pci_id)
655{
656 int ret;
657 struct mrst_pmu_reg *pmu;
658
659 /* Init the device */
660 ret = pci_enable_device(pdev);
661 if (ret) {
662 dev_err(&pdev->dev, "Unable to Enable PCI device\n");
663 return ret;
664 }
665
666 ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
667 if (ret < 0) {
668 dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
669 goto out_err1;
670 }
671
672 /* Map the memory of PMU reg base */
673 pmu = pci_iomap(pdev, 0, 0);
674 if (!pmu) {
675 dev_err(&pdev->dev, "Unable to map the PMU address space\n");
676 ret = -ENOMEM;
677 goto out_err2;
678 }
679
680#ifdef CONFIG_DEBUG_FS
681 /* /sys/kernel/debug/mrst_pmu */
682 (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
683 NULL, NULL, &devices_state_operations);
684#endif
685 pmu_reg = pmu; /* success */
686
687 if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
688 dev_err(&pdev->dev, "Registering isr has failed\n");
689 ret = -1;
690 goto out_err3;
691 }
692
693 pmu_scu_firmware_debug();
694
695 pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
696
697 pmu_wait_ready();
698
699 pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
700 pmu_write_cmd(0x201);
701
702 spin_lock_init(&mrst_pmu_power_state_lock);
703
704 /* Enable the hardware interrupt */
705 pmu_irq_enable();
706 return 0;
707
708out_err3:
709 free_irq(pdev->irq, NULL);
710 pci_iounmap(pdev, pmu_reg);
711 pmu_reg = NULL;
712out_err2:
713 pci_release_region(pdev, 0);
714out_err1:
715 pci_disable_device(pdev);
716 return ret;
717}
718
719static void __devexit pmu_remove(struct pci_dev *pdev)
720{
721 dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
722
723 /* Freeing up the irq */
724 free_irq(pdev->irq, NULL);
725
726 pci_iounmap(pdev, pmu_reg);
727 pmu_reg = NULL;
728
729 /* disable the current PCI device */
730 pci_release_region(pdev, 0);
731 pci_disable_device(pdev);
732}
733
734static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
735 { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
736 { }
737};
738
739MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
740
741static struct pci_driver driver = {
742 .name = MRST_PMU_DRV_NAME,
743 .id_table = pmu_pci_ids,
744 .probe = pmu_probe,
745 .remove = __devexit_p(pmu_remove),
746};
747
748/**
749 * pmu_pci_register - register the PMU driver as PCI device
750 */
751static int __init pmu_pci_register(void)
752{
753 return pci_register_driver(&driver);
754}
755
756/* Register and probe via fs_initcall() to preceed device_initcall() */
757fs_initcall(pmu_pci_register);
758
759static void __exit mid_pci_cleanup(void)
760{
761 pci_unregister_driver(&driver);
762}
763
764static int ia_major;
765static int ia_minor;
766
767static int pmu_sfi_parse_oem(struct sfi_table_header *table)
768{
769 struct sfi_table_simple *sb;
770
771 sb = (struct sfi_table_simple *)table;
772 ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
773 ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
774 printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
775 ia_major, ia_minor);
776
777 return 0;
778}
779
780static int __init scu_fw_check(void)
781{
782 int ret;
783 u32 fw_version;
784
785 if (!pmu_reg)
786 return 0; /* this driver didn't probe-out */
787
788 sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
789
790 if (ia_major < 0x6005 || ia_minor < 0x1525) {
791 WARN(1, "mrst_pmu: IA FW version too old\n");
792 return -1;
793 }
794
795 ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
796 &fw_version, 1);
797
798 if (ret) {
799 WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
800 } else {
801 int scu_major = (fw_version >> 8) & 0xFF;
802 int scu_minor = (fw_version >> 0) & 0xFF;
803
804 printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
805
806 if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
807 printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
808 mrst_pmu_s0i3_enable = true;
809 } else {
810 WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
811 scu_major, scu_minor);
812 }
813 }
814 return 0;
815}
816late_initcall(scu_fw_check);
817module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
new file mode 100644
index 000000000000..bfbfe64b167b
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.h
@@ -0,0 +1,234 @@
1/*
2 * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#ifndef _MRST_PMU_H_
21#define _MRST_PMU_H_
22
23#define PCI_DEV_ID_MRST_PMU 0x0810
24#define MRST_PMU_DRV_NAME "mrst_pmu"
25#define PCI_SUB_CLASS_MASK 0xFF00
26
27#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
28#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
29
30#define SUB_SYS_ALL_D0I1 0x01155555
31#define S0I3_WAKE_SOURCES 0x00001FFF
32
33#define PM_S0I3_COMMAND \
34 ((0 << 31) | /* Reserved */ \
35 (0 << 30) | /* Core must be idle */ \
36 (0xc2 << 22) | /* ACK C6 trigger */ \
37 (3 << 19) | /* Trigger on DMI message */ \
38 (3 << 16) | /* Enter S0i3 */ \
39 (0 << 13) | /* Numeric mode ID (sw) */ \
40 (3 << 9) | /* Trigger mode */ \
41 (0 << 8) | /* Do not interrupt */ \
42 (1 << 0)) /* Set configuration */
43
44#define LSS_DMI 0
45#define LSS_SD_HC0 1
46#define LSS_SD_HC1 2
47#define LSS_NAND 3
48#define LSS_IMAGING 4
49#define LSS_SECURITY 5
50#define LSS_DISPLAY 6
51#define LSS_USB_HC 7
52#define LSS_USB_OTG 8
53#define LSS_AUDIO 9
54#define LSS_AUDIO_LPE 9
55#define LSS_AUDIO_SSP 9
56#define LSS_I2C0 10
57#define LSS_I2C1 10
58#define LSS_I2C2 10
59#define LSS_KBD 10
60#define LSS_SPI0 10
61#define LSS_SPI1 10
62#define LSS_SPI2 10
63#define LSS_GPIO 10
64#define LSS_SRAM 11 /* used by SCU, do not touch */
65#define LSS_SD_HC2 12
66/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
67#define MRST_NUM_LSS 13
68
69#define MIN(a, b) (((a) < (b)) ? (a) : (b))
70
71#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
72#define D0 0
73#define D0i1 1
74#define D0i2 2
75#define D0i3 3
76
77#define S0I3_SSS_TARGET ( \
78 SSMSK(D0i1, LSS_DMI) | \
79 SSMSK(D0i3, LSS_SD_HC0) | \
80 SSMSK(D0i3, LSS_SD_HC1) | \
81 SSMSK(D0i3, LSS_NAND) | \
82 SSMSK(D0i3, LSS_SD_HC2) | \
83 SSMSK(D0i3, LSS_IMAGING) | \
84 SSMSK(D0i3, LSS_SECURITY) | \
85 SSMSK(D0i3, LSS_DISPLAY) | \
86 SSMSK(D0i3, LSS_USB_HC) | \
87 SSMSK(D0i3, LSS_USB_OTG) | \
88 SSMSK(D0i3, LSS_AUDIO) | \
89 SSMSK(D0i1, LSS_I2C0))
90
91/*
92 * D0i1 on Langwell is Autonomous Clock Gating (ACG).
93 * Enable ACG on every LSS except camera and audio
94 */
95#define D0I1_ACG_SSS_TARGET \
96 (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
97
98enum cm_mode {
99 CM_NOP, /* ignore the config mode value */
100 CM_IMMEDIATE,
101 CM_DELAY,
102 CM_TRIGGER,
103 CM_INVALID
104};
105
106enum sys_state {
107 SYS_STATE_S0I0,
108 SYS_STATE_S0I1,
109 SYS_STATE_S0I2,
110 SYS_STATE_S0I3,
111 SYS_STATE_S3,
112 SYS_STATE_S5
113};
114
115#define SET_CFG_CMD 1
116
117enum int_status {
118 INT_SPURIOUS = 0,
119 INT_CMD_DONE = 1,
120 INT_CMD_ERR = 2,
121 INT_WAKE_RX = 3,
122 INT_SS_ERROR = 4,
123 INT_S0IX_MISS = 5,
124 INT_NO_ACKC6 = 6,
125 INT_INVALID = 7,
126};
127
128/* PMU register interface */
129static struct mrst_pmu_reg {
130 u32 pm_sts; /* 0x00 */
131 u32 pm_cmd; /* 0x04 */
132 u32 pm_ics; /* 0x08 */
133 u32 _resv1; /* 0x0C */
134 u32 pm_wkc[2]; /* 0x10 */
135 u32 pm_wks[2]; /* 0x18 */
136 u32 pm_ssc[4]; /* 0x20 */
137 u32 pm_sss[4]; /* 0x30 */
138 u32 pm_wssc[4]; /* 0x40 */
139 u32 pm_c3c4; /* 0x50 */
140 u32 pm_c5c6; /* 0x54 */
141 u32 pm_msi_disable; /* 0x58 */
142} *pmu_reg;
143
144static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
145static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
146static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
147static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
148
149static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
150static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
151static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
152static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
153static inline void pmu_write_wssc(u32 arg)
154 { writel(arg, &pmu_reg->pm_wssc[0]); }
155
156static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
157static inline u32 pmu_msi_is_disabled(void)
158 { return readl(&pmu_reg->pm_msi_disable); }
159
160union pmu_pm_ics {
161 struct {
162 u32 cause:8;
163 u32 enable:1;
164 u32 pending:1;
165 u32 reserved:22;
166 } bits;
167 u32 value;
168};
169
170static inline void pmu_irq_enable(void)
171{
172 union pmu_pm_ics pmu_ics;
173
174 pmu_ics.value = pmu_read_ics();
175 pmu_ics.bits.enable = 1;
176 pmu_write_ics(pmu_ics.value);
177}
178
179union pmu_pm_status {
180 struct {
181 u32 pmu_rev:8;
182 u32 pmu_busy:1;
183 u32 mode_id:4;
184 u32 Reserved:19;
185 } pmu_status_parts;
186 u32 pmu_status_value;
187};
188
189static inline int pmu_read_busy_status(void)
190{
191 union pmu_pm_status result;
192
193 result.pmu_status_value = pmu_read_sts();
194
195 return result.pmu_status_parts.pmu_busy;
196}
197
198/* pmu set config parameters */
199struct cfg_delay_param_t {
200 u32 cmd:8;
201 u32 ioc:1;
202 u32 cfg_mode:4;
203 u32 mode_id:3;
204 u32 sys_state:3;
205 u32 cfg_delay:8;
206 u32 rsvd:5;
207};
208
209struct cfg_trig_param_t {
210 u32 cmd:8;
211 u32 ioc:1;
212 u32 cfg_mode:4;
213 u32 mode_id:3;
214 u32 sys_state:3;
215 u32 cfg_trig_type:3;
216 u32 cfg_trig_val:8;
217 u32 cmbi:1;
218 u32 rsvd1:1;
219};
220
221union pmu_pm_set_cfg_cmd_t {
222 union {
223 struct cfg_delay_param_t d_param;
224 struct cfg_trig_param_t t_param;
225 } pmu2_params;
226 u32 pmu_pm_set_cfg_cmd_value;
227};
228
229#ifdef FUTURE_PATCH
230extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
231#else
232static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
233#endif
234#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 60aeeb56948f..a9627e2e3295 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h> 11#include <linux/memblock.h>
12#include <linux/cpuidle.h>
12 13
13#include <asm/elf.h> 14#include <asm/elf.h>
14#include <asm/vdso.h> 15#include <asm/vdso.h>
@@ -426,7 +427,7 @@ void __init xen_arch_setup(void)
426#ifdef CONFIG_X86_32 427#ifdef CONFIG_X86_32
427 boot_cpu_data.hlt_works_ok = 1; 428 boot_cpu_data.hlt_works_ok = 1;
428#endif 429#endif
429 pm_idle = default_idle; 430 disable_cpuidle();
430 boot_option_idle_override = IDLE_HALT; 431 boot_option_idle_override = IDLE_HALT;
431 432
432 fiddle_vdso(); 433 fiddle_vdso();
diff --git a/block/blk-core.c b/block/blk-core.c
index b850bedad229..b627558c461f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1368 1368
1369static int __init fail_make_request_debugfs(void) 1369static int __init fail_make_request_debugfs(void)
1370{ 1370{
1371 return init_fault_attr_dentries(&fail_make_request, 1371 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1372 "fail_make_request"); 1372 NULL, &fail_make_request);
1373
1374 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1373} 1375}
1374 1376
1375late_initcall(fail_make_request_debugfs); 1377late_initcall(fail_make_request_debugfs);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a338..780354888958 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
28 28
29static int __init fail_io_timeout_debugfs(void) 29static int __init fail_io_timeout_debugfs(void)
30{ 30{
31 return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); 31 struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
32 NULL, &fail_io_timeout);
33
34 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
32} 35}
33 36
34late_initcall(fail_io_timeout_debugfs); 37late_initcall(fail_io_timeout_debugfs);
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 73863d86f022..76dc02f15574 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
126 */ 126 */
127u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); 127u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
128 128
129/*
130 * Disable runtime checking and repair of values returned by control methods.
131 * Use only if the repair is causing a problem on a particular machine.
132 */
133u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
134
129/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ 135/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
130 136
131struct acpi_table_fadt acpi_gbl_FADT; 137struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index c7f743ca395b..5552125d8340 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -357,6 +357,7 @@ struct acpi_predefined_data {
357 char *pathname; 357 char *pathname;
358 const union acpi_predefined_info *predefined; 358 const union acpi_predefined_info *predefined;
359 union acpi_operand_object *parent_package; 359 union acpi_operand_object *parent_package;
360 struct acpi_namespace_node *node;
360 u32 flags; 361 u32 flags;
361 u8 node_flags; 362 u8 node_flags;
362}; 363};
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 94e73c97cf85..c445cca490ea 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] =
468 {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, 468 {{"_SWS", 0, ACPI_RTYPE_INTEGER}},
469 {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, 469 {{"_TC1", 0, ACPI_RTYPE_INTEGER}},
470 {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, 470 {{"_TC2", 0, ACPI_RTYPE_INTEGER}},
471 {{"_TDL", 0, ACPI_RTYPE_INTEGER}},
471 {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, 472 {{"_TIP", 1, ACPI_RTYPE_INTEGER}},
472 {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, 473 {{"_TIV", 1, ACPI_RTYPE_INTEGER}},
473 {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, 474 {{"_TMP", 0, ACPI_RTYPE_INTEGER}},
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 9fb03fa8ffde..c845c8089f39 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
193 } 193 }
194 194
195 /* 195 /*
196 * 1) We have a return value, but if one wasn't expected, just exit, this is 196 * Return value validation and possible repair.
197 * not a problem. For example, if the "Implicit Return" feature is
198 * enabled, methods will always return a value.
199 * 197 *
200 * 2) If the return value can be of any type, then we cannot perform any 198 * 1) Don't perform return value validation/repair if this feature
201 * validation, exit. 199 * has been disabled via a global option.
200 *
201 * 2) We have a return value, but if one wasn't expected, just exit,
202 * this is not a problem. For example, if the "Implicit Return"
203 * feature is enabled, methods will always return a value.
204 *
205 * 3) If the return value can be of any type, then we cannot perform
206 * any validation, just exit.
202 */ 207 */
203 if ((!predefined->info.expected_btypes) || 208 if (acpi_gbl_disable_auto_repair ||
209 (!predefined->info.expected_btypes) ||
204 (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) { 210 (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
205 goto cleanup; 211 goto cleanup;
206 } 212 }
@@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
212 goto cleanup; 218 goto cleanup;
213 } 219 }
214 data->predefined = predefined; 220 data->predefined = predefined;
221 data->node = node;
215 data->node_flags = node->flags; 222 data->node_flags = node->flags;
216 data->pathname = pathname; 223 data->pathname = pathname;
217 224
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 973883babee1..024c4f263f87 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
503{ 503{
504 union acpi_operand_object *return_object = *return_object_ptr; 504 union acpi_operand_object *return_object = *return_object_ptr;
505 acpi_status status; 505 acpi_status status;
506 struct acpi_namespace_node *node;
507
508 /*
509 * We can only sort the _TSS return package if there is no _PSS in the
510 * same scope. This is because if _PSS is present, the ACPI specification
511 * dictates that the _TSS Power Dissipation field is to be ignored, and
512 * therefore some BIOSs leave garbage values in the _TSS Power field(s).
513 * In this case, it is best to just return the _TSS package as-is.
514 * (May, 2011)
515 */
516 status =
517 acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
518 if (ACPI_SUCCESS(status)) {
519 return (AE_OK);
520 }
506 521
507 status = acpi_ns_check_sorted_list(data, return_object, 5, 1, 522 status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
508 ACPI_SORT_DESCENDING, 523 ACPI_SORT_DESCENDING,
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 48db0944ce4a..62365f6075dd 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
126 } 126 }
127 127
128 /* 128 /*
129 * Originally, we checked the table signature for "SSDT" or "PSDT" here. 129 * Validate the incoming table signature.
130 * Next, we added support for OEMx tables, signature "OEM". 130 *
131 * Valid tables were encountered with a null signature, so we've just 131 * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
132 * given up on validating the signature, since it seems to be a waste 132 * 2) We added support for OEMx tables, signature "OEM".
133 * of code. The original code was removed (05/2008). 133 * 3) Valid tables were encountered with a null signature, so we just
134 * gave up on validating the signature, (05/2008).
135 * 4) We encountered non-AML tables such as the MADT, which caused
136 * interpreter errors and kernel faults. So now, we once again allow
137 * only "SSDT", "OEMx", and now, also a null signature. (05/2011).
134 */ 138 */
139 if ((table_desc->pointer->signature[0] != 0x00) &&
140 (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
141 && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
142 ACPI_ERROR((AE_INFO,
143 "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
144 acpi_ut_valid_acpi_name(*(u32 *)table_desc->
145 pointer->
146 signature) ? table_desc->
147 pointer->signature : "????",
148 *(u32 *)table_desc->pointer->signature));
149
150 return_ACPI_STATUS(AE_BAD_SIGNATURE);
151 }
135 152
136 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); 153 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
137 154
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index f739a70b1c70..c34aa51af4ee 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -10,9 +10,11 @@ config ACPI_APEI
10 error injection. 10 error injection.
11 11
12config ACPI_APEI_GHES 12config ACPI_APEI_GHES
13 tristate "APEI Generic Hardware Error Source" 13 bool "APEI Generic Hardware Error Source"
14 depends on ACPI_APEI && X86 14 depends on ACPI_APEI && X86
15 select ACPI_HED 15 select ACPI_HED
16 select LLIST
17 select GENERIC_ALLOCATOR
16 help 18 help
17 Generic Hardware Error Source provides a way to report 19 Generic Hardware Error Source provides a way to report
18 platform hardware errors (such as that from chipset). It 20 platform hardware errors (such as that from chipset). It
@@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER
30 PCIe AER errors may be reported via APEI firmware first mode. 32 PCIe AER errors may be reported via APEI firmware first mode.
31 Turn on this option to enable the corresponding support. 33 Turn on this option to enable the corresponding support.
32 34
35config ACPI_APEI_MEMORY_FAILURE
36 bool "APEI memory error recovering support"
37 depends on ACPI_APEI && MEMORY_FAILURE
38 help
39 Memory errors may be reported via APEI firmware first mode.
40 Turn on this option to enable the memory recovering support.
41
33config ACPI_APEI_EINJ 42config ACPI_APEI_EINJ
34 tristate "APEI Error INJection (EINJ)" 43 tristate "APEI Error INJection (EINJ)"
35 depends on ACPI_APEI && DEBUG_FS 44 depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 4a904a4bf05f..8041248fce9b 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
157 * Interpret the specified action. Go through whole action table, 157 * Interpret the specified action. Go through whole action table,
158 * execute all instructions belong to the action. 158 * execute all instructions belong to the action.
159 */ 159 */
160int apei_exec_run(struct apei_exec_context *ctx, u8 action) 160int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
161 bool optional)
161{ 162{
162 int rc; 163 int rc = -ENOENT;
163 u32 i, ip; 164 u32 i, ip;
164 struct acpi_whea_header *entry; 165 struct acpi_whea_header *entry;
165 apei_exec_ins_func_t run; 166 apei_exec_ins_func_t run;
@@ -198,9 +199,9 @@ rewind:
198 goto rewind; 199 goto rewind;
199 } 200 }
200 201
201 return 0; 202 return !optional && rc < 0 ? rc : 0;
202} 203}
203EXPORT_SYMBOL_GPL(apei_exec_run); 204EXPORT_SYMBOL_GPL(__apei_exec_run);
204 205
205typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, 206typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
206 struct acpi_whea_header *entry, 207 struct acpi_whea_header *entry,
@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
603 return dapei; 604 return dapei;
604} 605}
605EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); 606EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
607
608int apei_osc_setup(void)
609{
610 static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
611 acpi_handle handle;
612 u32 capbuf[3];
613 struct acpi_osc_context context = {
614 .uuid_str = whea_uuid_str,
615 .rev = 1,
616 .cap.length = sizeof(capbuf),
617 .cap.pointer = capbuf,
618 };
619
620 capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
621 capbuf[OSC_SUPPORT_TYPE] = 0;
622 capbuf[OSC_CONTROL_TYPE] = 0;
623
624 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
625 || ACPI_FAILURE(acpi_run_osc(handle, &context)))
626 return -EIO;
627 else {
628 kfree(context.ret.pointer);
629 return 0;
630 }
631}
632EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index ef0581f2094d..f57050e7a5e7 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
50 return ctx->value; 50 return ctx->value;
51} 51}
52 52
53int apei_exec_run(struct apei_exec_context *ctx, u8 action); 53int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
54
55static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
56{
57 return __apei_exec_run(ctx, action, 0);
58}
59
60/* It is optional whether the firmware provides the action */
61static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
62{
63 return __apei_exec_run(ctx, action, 1);
64}
54 65
55/* Common instruction implementation */ 66/* Common instruction implementation */
56 67
@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
113 const struct acpi_hest_generic_status *estatus); 124 const struct acpi_hest_generic_status *estatus);
114int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); 125int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
115int apei_estatus_check(const struct acpi_hest_generic_status *estatus); 126int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
127
128int apei_osc_setup(void);
116#endif 129#endif
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index f74b2ea11f21..589b96c38704 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -46,7 +46,8 @@
46 * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the 46 * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
47 * EINJ table through an unpublished extension. Use with caution as 47 * EINJ table through an unpublished extension. Use with caution as
48 * most will ignore the parameter and make their own choice of address 48 * most will ignore the parameter and make their own choice of address
49 * for error injection. 49 * for error injection. This extension is used only if
50 * param_extension module parameter is specified.
50 */ 51 */
51struct einj_parameter { 52struct einj_parameter {
52 u64 type; 53 u64 type;
@@ -65,6 +66,9 @@ struct einj_parameter {
65 ((struct acpi_whea_header *)((char *)(tab) + \ 66 ((struct acpi_whea_header *)((char *)(tab) + \
66 sizeof(struct acpi_table_einj))) 67 sizeof(struct acpi_table_einj)))
67 68
69static bool param_extension;
70module_param(param_extension, bool, 0);
71
68static struct acpi_table_einj *einj_tab; 72static struct acpi_table_einj *einj_tab;
69 73
70static struct apei_resources einj_resources; 74static struct apei_resources einj_resources;
@@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
285 289
286 einj_exec_ctx_init(&ctx); 290 einj_exec_ctx_init(&ctx);
287 291
288 rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); 292 rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
289 if (rc) 293 if (rc)
290 return rc; 294 return rc;
291 apei_exec_ctx_set_input(&ctx, type); 295 apei_exec_ctx_set_input(&ctx, type);
@@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
323 rc = __einj_error_trigger(trigger_paddr); 327 rc = __einj_error_trigger(trigger_paddr);
324 if (rc) 328 if (rc)
325 return rc; 329 return rc;
326 rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); 330 rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
327 331
328 return rc; 332 return rc;
329} 333}
@@ -489,14 +493,6 @@ static int __init einj_init(void)
489 einj_debug_dir, NULL, &error_type_fops); 493 einj_debug_dir, NULL, &error_type_fops);
490 if (!fentry) 494 if (!fentry)
491 goto err_cleanup; 495 goto err_cleanup;
492 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
493 einj_debug_dir, &error_param1);
494 if (!fentry)
495 goto err_cleanup;
496 fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
497 einj_debug_dir, &error_param2);
498 if (!fentry)
499 goto err_cleanup;
500 fentry = debugfs_create_file("error_inject", S_IWUSR, 496 fentry = debugfs_create_file("error_inject", S_IWUSR,
501 einj_debug_dir, NULL, &error_inject_fops); 497 einj_debug_dir, NULL, &error_inject_fops);
502 if (!fentry) 498 if (!fentry)
@@ -513,12 +509,23 @@ static int __init einj_init(void)
513 rc = apei_exec_pre_map_gars(&ctx); 509 rc = apei_exec_pre_map_gars(&ctx);
514 if (rc) 510 if (rc)
515 goto err_release; 511 goto err_release;
516 param_paddr = einj_get_parameter_address(); 512 if (param_extension) {
517 if (param_paddr) { 513 param_paddr = einj_get_parameter_address();
518 einj_param = ioremap(param_paddr, sizeof(*einj_param)); 514 if (param_paddr) {
519 rc = -ENOMEM; 515 einj_param = ioremap(param_paddr, sizeof(*einj_param));
520 if (!einj_param) 516 rc = -ENOMEM;
521 goto err_unmap; 517 if (!einj_param)
518 goto err_unmap;
519 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
520 einj_debug_dir, &error_param1);
521 if (!fentry)
522 goto err_unmap;
523 fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
524 einj_debug_dir, &error_param2);
525 if (!fentry)
526 goto err_unmap;
527 } else
528 pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
522 } 529 }
523 530
524 pr_info(EINJ_PFX "Error INJection is initialized.\n"); 531 pr_info(EINJ_PFX "Error INJection is initialized.\n");
@@ -526,6 +533,8 @@ static int __init einj_init(void)
526 return 0; 533 return 0;
527 534
528err_unmap: 535err_unmap:
536 if (einj_param)
537 iounmap(einj_param);
529 apei_exec_post_unmap_gars(&ctx); 538 apei_exec_post_unmap_gars(&ctx);
530err_release: 539err_release:
531 apei_resources_release(&einj_resources); 540 apei_resources_release(&einj_resources);
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index a4cfb64c86a1..903549df809b 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
33 33
34#define ERST_DBG_PFX "ERST DBG: " 34#define ERST_DBG_PFX "ERST DBG: "
35 35
36#define ERST_DBG_RECORD_LEN_MAX 4096 36#define ERST_DBG_RECORD_LEN_MAX 0x4000
37 37
38static void *erst_dbg_buf; 38static void *erst_dbg_buf;
39static unsigned int erst_dbg_buf_len; 39static unsigned int erst_dbg_buf_len;
@@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
213 213
214static __init int erst_dbg_init(void) 214static __init int erst_dbg_init(void)
215{ 215{
216 if (erst_disable) {
217 pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
218 return -ENODEV;
219 }
216 return misc_register(&erst_dbg_dev); 220 return misc_register(&erst_dbg_dev);
217} 221}
218 222
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6053f4780df9..2ca59dc69f7f 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
642 int rc; 642 int rc;
643 643
644 erst_exec_ctx_init(&ctx); 644 erst_exec_ctx_init(&ctx);
645 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); 645 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
646 if (rc) 646 if (rc)
647 return rc; 647 return rc;
648 apei_exec_ctx_set_input(&ctx, offset); 648 apei_exec_ctx_set_input(&ctx, offset);
@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
666 if (rc) 666 if (rc)
667 return rc; 667 return rc;
668 val = apei_exec_ctx_get_output(&ctx); 668 val = apei_exec_ctx_get_output(&ctx);
669 rc = apei_exec_run(&ctx, ACPI_ERST_END); 669 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
670 if (rc) 670 if (rc)
671 return rc; 671 return rc;
672 672
@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
681 int rc; 681 int rc;
682 682
683 erst_exec_ctx_init(&ctx); 683 erst_exec_ctx_init(&ctx);
684 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); 684 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
685 if (rc) 685 if (rc)
686 return rc; 686 return rc;
687 apei_exec_ctx_set_input(&ctx, offset); 687 apei_exec_ctx_set_input(&ctx, offset);
@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
709 if (rc) 709 if (rc)
710 return rc; 710 return rc;
711 val = apei_exec_ctx_get_output(&ctx); 711 val = apei_exec_ctx_get_output(&ctx);
712 rc = apei_exec_run(&ctx, ACPI_ERST_END); 712 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
713 if (rc) 713 if (rc)
714 return rc; 714 return rc;
715 715
@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
724 int rc; 724 int rc;
725 725
726 erst_exec_ctx_init(&ctx); 726 erst_exec_ctx_init(&ctx);
727 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); 727 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
728 if (rc) 728 if (rc)
729 return rc; 729 return rc;
730 apei_exec_ctx_set_input(&ctx, record_id); 730 apei_exec_ctx_set_input(&ctx, record_id);
@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
748 if (rc) 748 if (rc)
749 return rc; 749 return rc;
750 val = apei_exec_ctx_get_output(&ctx); 750 val = apei_exec_ctx_get_output(&ctx);
751 rc = apei_exec_run(&ctx, ACPI_ERST_END); 751 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
752 if (rc) 752 if (rc)
753 return rc; 753 return rc;
754 754
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index f703b2881153..0784f99a4665 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
12 * For more information about Generic Hardware Error Source, please 12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6 13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 * 14 *
15 * Copyright 2010 Intel Corp. 15 * Copyright 2010,2011 Intel Corp.
16 * Author: Huang Ying <ying.huang@intel.com> 16 * Author: Huang Ying <ying.huang@intel.com>
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
45#include <linux/irq_work.h>
46#include <linux/llist.h>
47#include <linux/genalloc.h>
45#include <acpi/apei.h> 48#include <acpi/apei.h>
46#include <acpi/atomicio.h> 49#include <acpi/atomicio.h>
47#include <acpi/hed.h> 50#include <acpi/hed.h>
@@ -53,6 +56,30 @@
53#define GHES_PFX "GHES: " 56#define GHES_PFX "GHES: "
54 57
55#define GHES_ESTATUS_MAX_SIZE 65536 58#define GHES_ESTATUS_MAX_SIZE 65536
59#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
60
61#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
62
63/* This is just an estimation for memory pool allocation */
64#define GHES_ESTATUS_CACHE_AVG_SIZE 512
65
66#define GHES_ESTATUS_CACHES_SIZE 4
67
68#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
69/* Prevent too many caches are allocated because of RCU */
70#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
71
72#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
73 (sizeof(struct ghes_estatus_cache) + (estatus_len))
74#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
75 ((struct acpi_hest_generic_status *) \
76 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
77
78#define GHES_ESTATUS_NODE_LEN(estatus_len) \
79 (sizeof(struct ghes_estatus_node) + (estatus_len))
80#define GHES_ESTATUS_FROM_NODE(estatus_node) \
81 ((struct acpi_hest_generic_status *) \
82 ((struct ghes_estatus_node *)(estatus_node) + 1))
56 83
57/* 84/*
58 * One struct ghes is created for each generic hardware error source. 85 * One struct ghes is created for each generic hardware error source.
@@ -77,6 +104,22 @@ struct ghes {
77 }; 104 };
78}; 105};
79 106
107struct ghes_estatus_node {
108 struct llist_node llnode;
109 struct acpi_hest_generic *generic;
110};
111
112struct ghes_estatus_cache {
113 u32 estatus_len;
114 atomic_t count;
115 struct acpi_hest_generic *generic;
116 unsigned long long time_in;
117 struct rcu_head rcu;
118};
119
120int ghes_disable;
121module_param_named(disable, ghes_disable, bool, 0);
122
80static int ghes_panic_timeout __read_mostly = 30; 123static int ghes_panic_timeout __read_mostly = 30;
81 124
82/* 125/*
@@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area;
121static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 164static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
122static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 165static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
123 166
167/*
168 * printk is not safe in NMI context. So in NMI handler, we allocate
169 * required memory from lock-less memory allocator
170 * (ghes_estatus_pool), save estatus into it, put them into lock-less
171 * list (ghes_estatus_llist), then delay printk into IRQ context via
172 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
173 * required pool size by all NMI error source.
174 */
175static struct gen_pool *ghes_estatus_pool;
176static unsigned long ghes_estatus_pool_size_request;
177static struct llist_head ghes_estatus_llist;
178static struct irq_work ghes_proc_irq_work;
179
180struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
181static atomic_t ghes_estatus_cache_alloced;
182
124static int ghes_ioremap_init(void) 183static int ghes_ioremap_init(void)
125{ 184{
126 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 185 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
180 __flush_tlb_one(vaddr); 239 __flush_tlb_one(vaddr);
181} 240}
182 241
242static int ghes_estatus_pool_init(void)
243{
244 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
245 if (!ghes_estatus_pool)
246 return -ENOMEM;
247 return 0;
248}
249
250static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
251 struct gen_pool_chunk *chunk,
252 void *data)
253{
254 free_page(chunk->start_addr);
255}
256
257static void ghes_estatus_pool_exit(void)
258{
259 gen_pool_for_each_chunk(ghes_estatus_pool,
260 ghes_estatus_pool_free_chunk_page, NULL);
261 gen_pool_destroy(ghes_estatus_pool);
262}
263
264static int ghes_estatus_pool_expand(unsigned long len)
265{
266 unsigned long i, pages, size, addr;
267 int ret;
268
269 ghes_estatus_pool_size_request += PAGE_ALIGN(len);
270 size = gen_pool_size(ghes_estatus_pool);
271 if (size >= ghes_estatus_pool_size_request)
272 return 0;
273 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
274 for (i = 0; i < pages; i++) {
275 addr = __get_free_page(GFP_KERNEL);
276 if (!addr)
277 return -ENOMEM;
278 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
279 if (ret)
280 return ret;
281 }
282
283 return 0;
284}
285
286static void ghes_estatus_pool_shrink(unsigned long len)
287{
288 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
289}
290
183static struct ghes *ghes_new(struct acpi_hest_generic *generic) 291static struct ghes *ghes_new(struct acpi_hest_generic *generic)
184{ 292{
185 struct ghes *ghes; 293 struct ghes *ghes;
@@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes)
341 ghes->flags &= ~GHES_TO_CLEAR; 449 ghes->flags &= ~GHES_TO_CLEAR;
342} 450}
343 451
344static void ghes_do_proc(struct ghes *ghes) 452static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
345{ 453{
346 int sev, processed = 0; 454 int sev, sec_sev;
347 struct acpi_hest_generic_data *gdata; 455 struct acpi_hest_generic_data *gdata;
348 456
349 sev = ghes_severity(ghes->estatus->error_severity); 457 sev = ghes_severity(estatus->error_severity);
350 apei_estatus_for_each_section(ghes->estatus, gdata) { 458 apei_estatus_for_each_section(estatus, gdata) {
351#ifdef CONFIG_X86_MCE 459 sec_sev = ghes_severity(gdata->error_severity);
352 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 460 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
353 CPER_SEC_PLATFORM_MEM)) { 461 CPER_SEC_PLATFORM_MEM)) {
354 apei_mce_report_mem_error( 462 struct cper_sec_mem_err *mem_err;
355 sev == GHES_SEV_CORRECTED, 463 mem_err = (struct cper_sec_mem_err *)(gdata+1);
356 (struct cper_sec_mem_err *)(gdata+1)); 464#ifdef CONFIG_X86_MCE
357 processed = 1; 465 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
358 } 466 mem_err);
359#endif 467#endif
468#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
469 if (sev == GHES_SEV_RECOVERABLE &&
470 sec_sev == GHES_SEV_RECOVERABLE &&
471 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
472 unsigned long pfn;
473 pfn = mem_err->physical_addr >> PAGE_SHIFT;
474 memory_failure_queue(pfn, 0, 0);
475 }
476#endif
477 }
360 } 478 }
361} 479}
362 480
363static void ghes_print_estatus(const char *pfx, struct ghes *ghes) 481static void __ghes_print_estatus(const char *pfx,
482 const struct acpi_hest_generic *generic,
483 const struct acpi_hest_generic_status *estatus)
364{ 484{
365 /* Not more than 2 messages every 5 seconds */
366 static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
367
368 if (pfx == NULL) { 485 if (pfx == NULL) {
369 if (ghes_severity(ghes->estatus->error_severity) <= 486 if (ghes_severity(estatus->error_severity) <=
370 GHES_SEV_CORRECTED) 487 GHES_SEV_CORRECTED)
371 pfx = KERN_WARNING HW_ERR; 488 pfx = KERN_WARNING HW_ERR;
372 else 489 else
373 pfx = KERN_ERR HW_ERR; 490 pfx = KERN_ERR HW_ERR;
374 } 491 }
375 if (__ratelimit(&ratelimit)) { 492 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
376 printk( 493 pfx, generic->header.source_id);
377 "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 494 apei_estatus_print(pfx, estatus);
378 pfx, ghes->generic->header.source_id); 495}
379 apei_estatus_print(pfx, ghes->estatus); 496
497static int ghes_print_estatus(const char *pfx,
498 const struct acpi_hest_generic *generic,
499 const struct acpi_hest_generic_status *estatus)
500{
501 /* Not more than 2 messages every 5 seconds */
502 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
503 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
504 struct ratelimit_state *ratelimit;
505
506 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
507 ratelimit = &ratelimit_corrected;
508 else
509 ratelimit = &ratelimit_uncorrected;
510 if (__ratelimit(ratelimit)) {
511 __ghes_print_estatus(pfx, generic, estatus);
512 return 1;
380 } 513 }
514 return 0;
515}
516
517/*
518 * GHES error status reporting throttle, to report more kinds of
519 * errors, instead of just most frequently occurred errors.
520 */
521static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
522{
523 u32 len;
524 int i, cached = 0;
525 unsigned long long now;
526 struct ghes_estatus_cache *cache;
527 struct acpi_hest_generic_status *cache_estatus;
528
529 len = apei_estatus_len(estatus);
530 rcu_read_lock();
531 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
532 cache = rcu_dereference(ghes_estatus_caches[i]);
533 if (cache == NULL)
534 continue;
535 if (len != cache->estatus_len)
536 continue;
537 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
538 if (memcmp(estatus, cache_estatus, len))
539 continue;
540 atomic_inc(&cache->count);
541 now = sched_clock();
542 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
543 cached = 1;
544 break;
545 }
546 rcu_read_unlock();
547 return cached;
548}
549
550static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
551 struct acpi_hest_generic *generic,
552 struct acpi_hest_generic_status *estatus)
553{
554 int alloced;
555 u32 len, cache_len;
556 struct ghes_estatus_cache *cache;
557 struct acpi_hest_generic_status *cache_estatus;
558
559 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
560 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
561 atomic_dec(&ghes_estatus_cache_alloced);
562 return NULL;
563 }
564 len = apei_estatus_len(estatus);
565 cache_len = GHES_ESTATUS_CACHE_LEN(len);
566 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
567 if (!cache) {
568 atomic_dec(&ghes_estatus_cache_alloced);
569 return NULL;
570 }
571 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
572 memcpy(cache_estatus, estatus, len);
573 cache->estatus_len = len;
574 atomic_set(&cache->count, 0);
575 cache->generic = generic;
576 cache->time_in = sched_clock();
577 return cache;
578}
579
580static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
581{
582 u32 len;
583
584 len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
585 len = GHES_ESTATUS_CACHE_LEN(len);
586 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
587 atomic_dec(&ghes_estatus_cache_alloced);
588}
589
590static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
591{
592 struct ghes_estatus_cache *cache;
593
594 cache = container_of(head, struct ghes_estatus_cache, rcu);
595 ghes_estatus_cache_free(cache);
596}
597
598static void ghes_estatus_cache_add(
599 struct acpi_hest_generic *generic,
600 struct acpi_hest_generic_status *estatus)
601{
602 int i, slot = -1, count;
603 unsigned long long now, duration, period, max_period = 0;
604 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
605
606 new_cache = ghes_estatus_cache_alloc(generic, estatus);
607 if (new_cache == NULL)
608 return;
609 rcu_read_lock();
610 now = sched_clock();
611 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
612 cache = rcu_dereference(ghes_estatus_caches[i]);
613 if (cache == NULL) {
614 slot = i;
615 slot_cache = NULL;
616 break;
617 }
618 duration = now - cache->time_in;
619 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
620 slot = i;
621 slot_cache = cache;
622 break;
623 }
624 count = atomic_read(&cache->count);
625 period = duration;
626 do_div(period, (count + 1));
627 if (period > max_period) {
628 max_period = period;
629 slot = i;
630 slot_cache = cache;
631 }
632 }
633 /* new_cache must be put into array after its contents are written */
634 smp_wmb();
635 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
636 slot_cache, new_cache) == slot_cache) {
637 if (slot_cache)
638 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
639 } else
640 ghes_estatus_cache_free(new_cache);
641 rcu_read_unlock();
381} 642}
382 643
383static int ghes_proc(struct ghes *ghes) 644static int ghes_proc(struct ghes *ghes)
@@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes)
387 rc = ghes_read_estatus(ghes, 0); 648 rc = ghes_read_estatus(ghes, 0);
388 if (rc) 649 if (rc)
389 goto out; 650 goto out;
390 ghes_print_estatus(NULL, ghes); 651 if (!ghes_estatus_cached(ghes->estatus)) {
391 ghes_do_proc(ghes); 652 if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
392 653 ghes_estatus_cache_add(ghes->generic, ghes->estatus);
654 }
655 ghes_do_proc(ghes->estatus);
393out: 656out:
394 ghes_clear_estatus(ghes); 657 ghes_clear_estatus(ghes);
395 return 0; 658 return 0;
@@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this,
447 return ret; 710 return ret;
448} 711}
449 712
713static void ghes_proc_in_irq(struct irq_work *irq_work)
714{
715 struct llist_node *llnode, *next, *tail = NULL;
716 struct ghes_estatus_node *estatus_node;
717 struct acpi_hest_generic *generic;
718 struct acpi_hest_generic_status *estatus;
719 u32 len, node_len;
720
721 /*
722 * Because the time order of estatus in list is reversed,
723 * revert it back to proper order.
724 */
725 llnode = llist_del_all(&ghes_estatus_llist);
726 while (llnode) {
727 next = llnode->next;
728 llnode->next = tail;
729 tail = llnode;
730 llnode = next;
731 }
732 llnode = tail;
733 while (llnode) {
734 next = llnode->next;
735 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
736 llnode);
737 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
738 len = apei_estatus_len(estatus);
739 node_len = GHES_ESTATUS_NODE_LEN(len);
740 ghes_do_proc(estatus);
741 if (!ghes_estatus_cached(estatus)) {
742 generic = estatus_node->generic;
743 if (ghes_print_estatus(NULL, generic, estatus))
744 ghes_estatus_cache_add(generic, estatus);
745 }
746 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
747 node_len);
748 llnode = next;
749 }
750}
751
450static int ghes_notify_nmi(struct notifier_block *this, 752static int ghes_notify_nmi(struct notifier_block *this,
451 unsigned long cmd, void *data) 753 unsigned long cmd, void *data)
452{ 754{
@@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
476 778
477 if (sev_global >= GHES_SEV_PANIC) { 779 if (sev_global >= GHES_SEV_PANIC) {
478 oops_begin(); 780 oops_begin();
479 ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); 781 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
782 ghes_global->estatus);
480 /* reboot to log the error! */ 783 /* reboot to log the error! */
481 if (panic_timeout == 0) 784 if (panic_timeout == 0)
482 panic_timeout = ghes_panic_timeout; 785 panic_timeout = ghes_panic_timeout;
@@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this,
484 } 787 }
485 788
486 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 789 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
790#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
791 u32 len, node_len;
792 struct ghes_estatus_node *estatus_node;
793 struct acpi_hest_generic_status *estatus;
794#endif
487 if (!(ghes->flags & GHES_TO_CLEAR)) 795 if (!(ghes->flags & GHES_TO_CLEAR))
488 continue; 796 continue;
489 /* Do not print estatus because printk is not NMI safe */ 797#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
490 ghes_do_proc(ghes); 798 if (ghes_estatus_cached(ghes->estatus))
799 goto next;
800 /* Save estatus for further processing in IRQ context */
801 len = apei_estatus_len(ghes->estatus);
802 node_len = GHES_ESTATUS_NODE_LEN(len);
803 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
804 node_len);
805 if (estatus_node) {
806 estatus_node->generic = ghes->generic;
807 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
808 memcpy(estatus, ghes->estatus, len);
809 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
810 }
811next:
812#endif
491 ghes_clear_estatus(ghes); 813 ghes_clear_estatus(ghes);
492 } 814 }
815#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
816 irq_work_queue(&ghes_proc_irq_work);
817#endif
493 818
494out: 819out:
495 raw_spin_unlock(&ghes_nmi_lock); 820 raw_spin_unlock(&ghes_nmi_lock);
@@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = {
504 .notifier_call = ghes_notify_nmi, 829 .notifier_call = ghes_notify_nmi,
505}; 830};
506 831
832static unsigned long ghes_esource_prealloc_size(
833 const struct acpi_hest_generic *generic)
834{
835 unsigned long block_length, prealloc_records, prealloc_size;
836
837 block_length = min_t(unsigned long, generic->error_block_length,
838 GHES_ESTATUS_MAX_SIZE);
839 prealloc_records = max_t(unsigned long,
840 generic->records_to_preallocate, 1);
841 prealloc_size = min_t(unsigned long, block_length * prealloc_records,
842 GHES_ESOURCE_PREALLOC_MAX_SIZE);
843
844 return prealloc_size;
845}
846
507static int __devinit ghes_probe(struct platform_device *ghes_dev) 847static int __devinit ghes_probe(struct platform_device *ghes_dev)
508{ 848{
509 struct acpi_hest_generic *generic; 849 struct acpi_hest_generic *generic;
510 struct ghes *ghes = NULL; 850 struct ghes *ghes = NULL;
851 unsigned long len;
511 int rc = -EINVAL; 852 int rc = -EINVAL;
512 853
513 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 854 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
573 mutex_unlock(&ghes_list_mutex); 914 mutex_unlock(&ghes_list_mutex);
574 break; 915 break;
575 case ACPI_HEST_NOTIFY_NMI: 916 case ACPI_HEST_NOTIFY_NMI:
917 len = ghes_esource_prealloc_size(generic);
918 ghes_estatus_pool_expand(len);
576 mutex_lock(&ghes_list_mutex); 919 mutex_lock(&ghes_list_mutex);
577 if (list_empty(&ghes_nmi)) 920 if (list_empty(&ghes_nmi))
578 register_die_notifier(&ghes_notifier_nmi); 921 register_die_notifier(&ghes_notifier_nmi);
@@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
597{ 940{
598 struct ghes *ghes; 941 struct ghes *ghes;
599 struct acpi_hest_generic *generic; 942 struct acpi_hest_generic *generic;
943 unsigned long len;
600 944
601 ghes = platform_get_drvdata(ghes_dev); 945 ghes = platform_get_drvdata(ghes_dev);
602 generic = ghes->generic; 946 generic = ghes->generic;
@@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
627 * freed after NMI handler finishes. 971 * freed after NMI handler finishes.
628 */ 972 */
629 synchronize_rcu(); 973 synchronize_rcu();
974 len = ghes_esource_prealloc_size(generic);
975 ghes_estatus_pool_shrink(len);
630 break; 976 break;
631 default: 977 default:
632 BUG(); 978 BUG();
@@ -662,15 +1008,43 @@ static int __init ghes_init(void)
662 return -EINVAL; 1008 return -EINVAL;
663 } 1009 }
664 1010
1011 if (ghes_disable) {
1012 pr_info(GHES_PFX "GHES is not enabled!\n");
1013 return -EINVAL;
1014 }
1015
1016 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1017
665 rc = ghes_ioremap_init(); 1018 rc = ghes_ioremap_init();
666 if (rc) 1019 if (rc)
667 goto err; 1020 goto err;
668 1021
669 rc = platform_driver_register(&ghes_platform_driver); 1022 rc = ghes_estatus_pool_init();
670 if (rc) 1023 if (rc)
671 goto err_ioremap_exit; 1024 goto err_ioremap_exit;
672 1025
1026 rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
1027 GHES_ESTATUS_CACHE_ALLOCED_MAX);
1028 if (rc)
1029 goto err_pool_exit;
1030
1031 rc = platform_driver_register(&ghes_platform_driver);
1032 if (rc)
1033 goto err_pool_exit;
1034
1035 rc = apei_osc_setup();
1036 if (rc == 0 && osc_sb_apei_support_acked)
1037 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1038 else if (rc == 0 && !osc_sb_apei_support_acked)
1039 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1040 else if (rc && osc_sb_apei_support_acked)
1041 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1042 else
1043 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1044
673 return 0; 1045 return 0;
1046err_pool_exit:
1047 ghes_estatus_pool_exit();
674err_ioremap_exit: 1048err_ioremap_exit:
675 ghes_ioremap_exit(); 1049 ghes_ioremap_exit();
676err: 1050err:
@@ -680,6 +1054,7 @@ err:
680static void __exit ghes_exit(void) 1054static void __exit ghes_exit(void)
681{ 1055{
682 platform_driver_unregister(&ghes_platform_driver); 1056 platform_driver_unregister(&ghes_platform_driver);
1057 ghes_estatus_pool_exit();
683 ghes_ioremap_exit(); 1058 ghes_ioremap_exit();
684} 1059}
685 1060
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 181bc2f7bb74..05fee06f4d6e 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -231,16 +231,17 @@ void __init acpi_hest_init(void)
231 goto err; 231 goto err;
232 } 232 }
233 233
234 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); 234 if (!ghes_disable) {
235 if (rc) 235 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
236 goto err; 236 if (rc)
237 237 goto err;
238 rc = hest_ghes_dev_register(ghes_count); 238 rc = hest_ghes_dev_register(ghes_count);
239 if (!rc) { 239 if (rc)
240 pr_info(HEST_PFX "Table parsing has been initialized.\n"); 240 goto err;
241 return;
242 } 241 }
243 242
243 pr_info(HEST_PFX "Table parsing has been initialized.\n");
244 return;
244err: 245err:
245 hest_disable = 1; 246 hest_disable = 1;
246} 247}
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2c661353e8f2..87c0a8daa99a 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -55,6 +55,9 @@
55#define ACPI_BATTERY_NOTIFY_INFO 0x81 55#define ACPI_BATTERY_NOTIFY_INFO 0x81
56#define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82 56#define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82
57 57
58/* Battery power unit: 0 means mW, 1 means mA */
59#define ACPI_BATTERY_POWER_UNIT_MA 1
60
58#define _COMPONENT ACPI_BATTERY_COMPONENT 61#define _COMPONENT ACPI_BATTERY_COMPONENT
59 62
60ACPI_MODULE_NAME("battery"); 63ACPI_MODULE_NAME("battery");
@@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
91enum { 94enum {
92 ACPI_BATTERY_ALARM_PRESENT, 95 ACPI_BATTERY_ALARM_PRESENT,
93 ACPI_BATTERY_XINFO_PRESENT, 96 ACPI_BATTERY_XINFO_PRESENT,
94 /* For buggy DSDTs that report negative 16-bit values for either
95 * charging or discharging current and/or report 0 as 65536
96 * due to bad math.
97 */
98 ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
99 ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, 97 ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
100}; 98};
101 99
@@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = {
301#ifdef CONFIG_ACPI_PROCFS_POWER 299#ifdef CONFIG_ACPI_PROCFS_POWER
302inline char *acpi_battery_units(struct acpi_battery *battery) 300inline char *acpi_battery_units(struct acpi_battery *battery)
303{ 301{
304 return (battery->power_unit)?"mA":"mW"; 302 return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
303 "mA" : "mW";
305} 304}
306#endif 305#endif
307 306
@@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
461 battery->update_time = jiffies; 460 battery->update_time = jiffies;
462 kfree(buffer.pointer); 461 kfree(buffer.pointer);
463 462
464 if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) && 463 /* For buggy DSDTs that report negative 16-bit values for either
465 battery->rate_now != -1) 464 * charging or discharging current and/or report 0 as 65536
465 * due to bad math.
466 */
467 if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
468 battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
469 (s16)(battery->rate_now) < 0) {
466 battery->rate_now = abs((s16)battery->rate_now); 470 battery->rate_now = abs((s16)battery->rate_now);
471 printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
472 " invalid.\n");
473 }
467 474
468 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags) 475 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
469 && battery->capacity_now >= 0 && battery->capacity_now <= 100) 476 && battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
544{ 551{
545 int result; 552 int result;
546 553
547 if (battery->power_unit) { 554 if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
548 battery->bat.properties = charge_battery_props; 555 battery->bat.properties = charge_battery_props;
549 battery->bat.num_properties = 556 battery->bat.num_properties =
550 ARRAY_SIZE(charge_battery_props); 557 ARRAY_SIZE(charge_battery_props);
@@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery)
566 573
567static void sysfs_remove_battery(struct acpi_battery *battery) 574static void sysfs_remove_battery(struct acpi_battery *battery)
568{ 575{
569 if (!battery->bat.dev) 576 mutex_lock(&battery->lock);
577 if (!battery->bat.dev) {
578 mutex_unlock(&battery->lock);
570 return; 579 return;
580 }
581
571 device_remove_file(battery->bat.dev, &alarm_attr); 582 device_remove_file(battery->bat.dev, &alarm_attr);
572 power_supply_unregister(&battery->bat); 583 power_supply_unregister(&battery->bat);
573 battery->bat.dev = NULL; 584 battery->bat.dev = NULL;
574} 585 mutex_unlock(&battery->lock);
575
576static void acpi_battery_quirks(struct acpi_battery *battery)
577{
578 if (dmi_name_in_vendors("Acer") && battery->power_unit) {
579 set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
580 }
581} 586}
582 587
583/* 588/*
@@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery)
592 * 597 *
593 * Handle this correctly so that they won't break userspace. 598 * Handle this correctly so that they won't break userspace.
594 */ 599 */
595static void acpi_battery_quirks2(struct acpi_battery *battery) 600static void acpi_battery_quirks(struct acpi_battery *battery)
596{ 601{
597 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)) 602 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
598 return ; 603 return ;
@@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery)
623 result = acpi_battery_get_info(battery); 628 result = acpi_battery_get_info(battery);
624 if (result) 629 if (result)
625 return result; 630 return result;
626 acpi_battery_quirks(battery);
627 acpi_battery_init_alarm(battery); 631 acpi_battery_init_alarm(battery);
628 } 632 }
629 if (!battery->bat.dev) 633 if (!battery->bat.dev) {
630 sysfs_add_battery(battery); 634 result = sysfs_add_battery(battery);
635 if (result)
636 return result;
637 }
631 result = acpi_battery_get_state(battery); 638 result = acpi_battery_get_state(battery);
632 acpi_battery_quirks2(battery); 639 acpi_battery_quirks(battery);
633 return result; 640 return result;
634} 641}
635 642
@@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
863 }, \ 870 }, \
864 } 871 }
865 872
866static struct battery_file { 873static const struct battery_file {
867 struct file_operations ops; 874 struct file_operations ops;
868 mode_t mode; 875 mode_t mode;
869 const char *name; 876 const char *name;
@@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb,
948 struct acpi_battery *battery = container_of(nb, struct acpi_battery, 955 struct acpi_battery *battery = container_of(nb, struct acpi_battery,
949 pm_nb); 956 pm_nb);
950 switch (mode) { 957 switch (mode) {
958 case PM_POST_HIBERNATION:
951 case PM_POST_SUSPEND: 959 case PM_POST_SUSPEND:
952 sysfs_remove_battery(battery); 960 if (battery->bat.dev) {
953 sysfs_add_battery(battery); 961 sysfs_remove_battery(battery);
962 sysfs_add_battery(battery);
963 }
954 break; 964 break;
955 } 965 }
956 966
@@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device)
975 if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, 985 if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
976 "_BIX", &handle))) 986 "_BIX", &handle)))
977 set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); 987 set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
978 acpi_battery_update(battery); 988 result = acpi_battery_update(battery);
989 if (result)
990 goto fail;
979#ifdef CONFIG_ACPI_PROCFS_POWER 991#ifdef CONFIG_ACPI_PROCFS_POWER
980 result = acpi_battery_add_fs(device); 992 result = acpi_battery_add_fs(device);
981#endif 993#endif
982 if (!result) { 994 if (result) {
983 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
984 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
985 device->status.battery_present ? "present" : "absent");
986 } else {
987#ifdef CONFIG_ACPI_PROCFS_POWER 995#ifdef CONFIG_ACPI_PROCFS_POWER
988 acpi_battery_remove_fs(device); 996 acpi_battery_remove_fs(device);
989#endif 997#endif
990 kfree(battery); 998 goto fail;
991 } 999 }
992 1000
1001 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
1002 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
1003 device->status.battery_present ? "present" : "absent");
1004
993 battery->pm_nb.notifier_call = battery_notify; 1005 battery->pm_nb.notifier_call = battery_notify;
994 register_pm_notifier(&battery->pm_nb); 1006 register_pm_notifier(&battery->pm_nb);
995 1007
996 return result; 1008 return result;
1009
1010fail:
1011 sysfs_remove_battery(battery);
1012 mutex_destroy(&battery->lock);
1013 kfree(battery);
1014 return result;
997} 1015}
998 1016
999static int acpi_battery_remove(struct acpi_device *device, int type) 1017static int acpi_battery_remove(struct acpi_device *device, int type)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d1e06c182cdb..437ddbf0c49a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -39,6 +39,7 @@
39#include <linux/pci.h> 39#include <linux/pci.h>
40#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
41#include <acpi/acpi_drivers.h> 41#include <acpi/acpi_drivers.h>
42#include <acpi/apei.h>
42#include <linux/dmi.h> 43#include <linux/dmi.h>
43#include <linux/suspend.h> 44#include <linux/suspend.h>
44 45
@@ -519,6 +520,7 @@ out_kfree:
519} 520}
520EXPORT_SYMBOL(acpi_run_osc); 521EXPORT_SYMBOL(acpi_run_osc);
521 522
523bool osc_sb_apei_support_acked;
522static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; 524static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
523static void acpi_bus_osc_support(void) 525static void acpi_bus_osc_support(void)
524{ 526{
@@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void)
541#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) 543#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
542 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; 544 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
543#endif 545#endif
546
547 if (!ghes_disable)
548 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
544 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) 549 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
545 return; 550 return;
546 if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) 551 if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
552 u32 *capbuf_ret = context.ret.pointer;
553 if (context.ret.length > OSC_SUPPORT_TYPE)
554 osc_sb_apei_support_acked =
555 capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
547 kfree(context.ret.pointer); 556 kfree(context.ret.pointer);
548 /* do we need to check the returned cap? Sounds no */ 557 }
558 /* do we need to check other returned cap? Sounds no */
549} 559}
550 560
551/* -------------------------------------------------------------------------- 561/* --------------------------------------------------------------------------
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 1864ad3cf895..19a61136d848 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -77,7 +77,7 @@ struct dock_dependent_device {
77 struct list_head list; 77 struct list_head list;
78 struct list_head hotplug_list; 78 struct list_head hotplug_list;
79 acpi_handle handle; 79 acpi_handle handle;
80 struct acpi_dock_ops *ops; 80 const struct acpi_dock_ops *ops;
81 void *context; 81 void *context;
82}; 82};
83 83
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
589 * the dock driver after _DCK is executed. 589 * the dock driver after _DCK is executed.
590 */ 590 */
591int 591int
592register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, 592register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
593 void *context) 593 void *context)
594{ 594{
595 struct dock_dependent_device *dd; 595 struct dock_dependent_device *dd;
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index 05b44201a614..22f918bacd35 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
92 return count; 92 return count;
93} 93}
94 94
95static struct file_operations acpi_ec_io_ops = { 95static const struct file_operations acpi_ec_io_ops = {
96 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
97 .open = acpi_ec_open_io, 97 .open = acpi_ec_open_io,
98 .read = acpi_ec_read_io, 98 .read = acpi_ec_read_io,
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 467479f07c1f..0f0356ca1a9e 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
110 return result; 110 return result;
111} 111}
112 112
113static struct thermal_cooling_device_ops fan_cooling_ops = { 113static const struct thermal_cooling_device_ops fan_cooling_ops = {
114 .get_max_state = fan_get_max_state, 114 .get_max_state = fan_get_max_state,
115 .get_cur_state = fan_get_cur_state, 115 .get_cur_state = fan_get_cur_state,
116 .set_cur_state = fan_set_cur_state, 116 .set_cur_state = fan_set_cur_state,
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 372f9b70f7f4..fa32f584229f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
155{ 155{
156 if (!strcmp("Linux", interface)) { 156 if (!strcmp("Linux", interface)) {
157 157
158 printk(KERN_NOTICE FW_BUG PREFIX 158 printk_once(KERN_NOTICE FW_BUG PREFIX
159 "BIOS _OSI(Linux) query %s%s\n", 159 "BIOS _OSI(Linux) query %s%s\n",
160 osi_linux.enable ? "honored" : "ignored", 160 osi_linux.enable ? "honored" : "ignored",
161 osi_linux.cmdline ? " via cmdline" : 161 osi_linux.cmdline ? " via cmdline" :
@@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args)
237#endif 237#endif
238} 238}
239 239
240#ifdef CONFIG_KEXEC
241static unsigned long acpi_rsdp;
242static int __init setup_acpi_rsdp(char *arg)
243{
244 acpi_rsdp = simple_strtoul(arg, NULL, 16);
245 return 0;
246}
247early_param("acpi_rsdp", setup_acpi_rsdp);
248#endif
249
240acpi_physical_address __init acpi_os_get_root_pointer(void) 250acpi_physical_address __init acpi_os_get_root_pointer(void)
241{ 251{
252#ifdef CONFIG_KEXEC
253 if (acpi_rsdp)
254 return acpi_rsdp;
255#endif
256
242 if (efi_enabled) { 257 if (efi_enabled) {
243 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) 258 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
244 return efi.acpi20; 259 return efi.acpi20;
@@ -1083,7 +1098,13 @@ struct osi_setup_entry {
1083 bool enable; 1098 bool enable;
1084}; 1099};
1085 1100
1086static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX]; 1101static struct osi_setup_entry __initdata
1102 osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
1103 {"Module Device", true},
1104 {"Processor Device", true},
1105 {"3.0 _SCP Extensions", true},
1106 {"Processor Aggregator Device", true},
1107};
1087 1108
1088void __init acpi_osi_setup(char *str) 1109void __init acpi_osi_setup(char *str)
1089{ 1110{
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index f907cfbfa13c..7f9eba9a0b02 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
303/* -------------------------------------------------------------------------- 303/* --------------------------------------------------------------------------
304 PCI Interrupt Routing Support 304 PCI Interrupt Routing Support
305 -------------------------------------------------------------------------- */ 305 -------------------------------------------------------------------------- */
306#ifdef CONFIG_X86_IO_APIC
307extern int noioapicquirk;
308extern int noioapicreroute;
309
310static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
311{
312 struct pci_bus *bus_it;
313
314 for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
315 if (!bus_it->self)
316 return 0;
317 if (bus_it->self->irq_reroute_variant)
318 return bus_it->self->irq_reroute_variant;
319 }
320 return 0;
321}
322
323/*
324 * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
325 * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
326 * during interrupt handling). When this INTx generation cannot be disabled,
327 * we reroute these interrupts to their legacy equivalent to get rid of
328 * spurious interrupts.
329 */
330static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
331 struct acpi_prt_entry *entry)
332{
333 if (noioapicquirk || noioapicreroute) {
334 return 0;
335 } else {
336 switch (bridge_has_boot_interrupt_variant(dev->bus)) {
337 case 0:
338 /* no rerouting necessary */
339 return 0;
340 case INTEL_IRQ_REROUTE_VARIANT:
341 /*
342 * Remap according to INTx routing table in 6700PXH
343 * specs, intel order number 302628-002, section
344 * 2.15.2. Other chipsets (80332, ...) have the same
345 * mapping and are handled here as well.
346 */
347 dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
348 "IRQ %d\n", entry->index,
349 (entry->index % 4) + 16);
350 entry->index = (entry->index % 4) + 16;
351 return 1;
352 default:
353 dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
354 "IRQ: unknown mapping\n", entry->index);
355 return -1;
356 }
357 }
358}
359#endif /* CONFIG_X86_IO_APIC */
360
306static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) 361static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
307{ 362{
308 struct acpi_prt_entry *entry; 363 struct acpi_prt_entry *entry;
@@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
311 366
312 entry = acpi_pci_irq_find_prt_entry(dev, pin); 367 entry = acpi_pci_irq_find_prt_entry(dev, pin);
313 if (entry) { 368 if (entry) {
369#ifdef CONFIG_X86_IO_APIC
370 acpi_reroute_boot_interrupt(dev, entry);
371#endif /* CONFIG_X86_IO_APIC */
314 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n", 372 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
315 pci_name(dev), pin_name(pin))); 373 pci_name(dev), pin_name(pin)));
316 return entry; 374 return entry;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d06078d660ad..2672c798272f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
485 root->secondary.end = 0xFF; 485 root->secondary.end = 0xFF;
486 printk(KERN_WARNING FW_BUG PREFIX 486 printk(KERN_WARNING FW_BUG PREFIX
487 "no secondary bus range in _CRS\n"); 487 "no secondary bus range in _CRS\n");
488 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); 488 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
489 NULL, &bus);
489 if (ACPI_SUCCESS(status)) 490 if (ACPI_SUCCESS(status))
490 root->secondary.start = bus; 491 root->secondary.start = bus;
491 else if (status == AE_NOT_FOUND) 492 else if (status == AE_NOT_FOUND)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 79cb65332894..870550d6a4bf 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
244 return result; 244 return result;
245} 245}
246 246
247struct thermal_cooling_device_ops processor_cooling_ops = { 247const struct thermal_cooling_device_ops processor_cooling_ops = {
248 .get_max_state = processor_get_max_state, 248 .get_max_state = processor_get_max_state,
249 .get_cur_state = processor_get_cur_state, 249 .get_cur_state = processor_get_cur_state,
250 .set_cur_state = processor_set_cur_state, 250 .set_cur_state = processor_set_cur_state,
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 50658ff887d9..6e36d0c0057c 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -130,6 +130,9 @@ struct acpi_sbs {
130 130
131#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger) 131#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
132 132
133static int acpi_sbs_remove(struct acpi_device *device, int type);
134static int acpi_battery_get_state(struct acpi_battery *battery);
135
133static inline int battery_scale(int log) 136static inline int battery_scale(int log)
134{ 137{
135 int scale = 1; 138 int scale = 1;
@@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
195 198
196 if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT) 199 if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
197 return -ENODEV; 200 return -ENODEV;
201
202 acpi_battery_get_state(battery);
198 switch (psp) { 203 switch (psp) {
199 case POWER_SUPPLY_PROP_STATUS: 204 case POWER_SUPPLY_PROP_STATUS:
200 if (battery->rate_now < 0) 205 if (battery->rate_now < 0)
@@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
225 case POWER_SUPPLY_PROP_POWER_NOW: 230 case POWER_SUPPLY_PROP_POWER_NOW:
226 val->intval = abs(battery->rate_now) * 231 val->intval = abs(battery->rate_now) *
227 acpi_battery_ipscale(battery) * 1000; 232 acpi_battery_ipscale(battery) * 1000;
233 val->intval *= (acpi_battery_mode(battery)) ?
234 (battery->voltage_now *
235 acpi_battery_vscale(battery) / 1000) : 1;
228 break; 236 break;
229 case POWER_SUPPLY_PROP_CURRENT_AVG: 237 case POWER_SUPPLY_PROP_CURRENT_AVG:
230 case POWER_SUPPLY_PROP_POWER_AVG: 238 case POWER_SUPPLY_PROP_POWER_AVG:
231 val->intval = abs(battery->rate_avg) * 239 val->intval = abs(battery->rate_avg) *
232 acpi_battery_ipscale(battery) * 1000; 240 acpi_battery_ipscale(battery) * 1000;
241 val->intval *= (acpi_battery_mode(battery)) ?
242 (battery->voltage_now *
243 acpi_battery_vscale(battery) / 1000) : 1;
233 break; 244 break;
234 case POWER_SUPPLY_PROP_CAPACITY: 245 case POWER_SUPPLY_PROP_CAPACITY:
235 val->intval = battery->state_of_charge; 246 val->intval = battery->state_of_charge;
@@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context)
903 } 914 }
904} 915}
905 916
906static int acpi_sbs_remove(struct acpi_device *device, int type);
907
908static int acpi_sbs_add(struct acpi_device *device) 917static int acpi_sbs_add(struct acpi_device *device)
909{ 918{
910 struct acpi_sbs *sbs; 919 struct acpi_sbs *sbs;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 6c949602cbd1..3ed80b2ca907 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
428 DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"), 428 DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
429 }, 429 },
430 }, 430 },
431 {
432 .callback = init_old_suspend_ordering,
433 .ident = "Asus A8N-SLI DELUXE",
434 .matches = {
435 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
436 DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
437 },
438 },
439 {
440 .callback = init_old_suspend_ordering,
441 .ident = "Asus A8N-SLI Premium",
442 .matches = {
443 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
444 DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
445 },
446 },
431 {}, 447 {},
432}; 448};
433#endif /* CONFIG_SUSPEND */ 449#endif /* CONFIG_SUSPEND */
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 77255f250dbb..c538d0ef10ff 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
149 return result; 149 return result;
150} 150}
151 151
152static struct kernel_param_ops param_ops_debug_layer = { 152static const struct kernel_param_ops param_ops_debug_layer = {
153 .set = param_set_uint, 153 .set = param_set_uint,
154 .get = param_get_debug_layer, 154 .get = param_get_debug_layer,
155}; 155};
156 156
157static struct kernel_param_ops param_ops_debug_level = { 157static const struct kernel_param_ops param_ops_debug_level = {
158 .set = param_set_uint, 158 .set = param_set_uint,
159 .get = param_get_debug_level, 159 .get = param_get_debug_level,
160}; 160};
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 2607e17b520f..48fbc647b178 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
812 thermal_zone_unbind_cooling_device); 812 thermal_zone_unbind_cooling_device);
813} 813}
814 814
815static struct thermal_zone_device_ops acpi_thermal_zone_ops = { 815static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
816 .bind = acpi_thermal_bind_cooling_device, 816 .bind = acpi_thermal_bind_cooling_device,
817 .unbind = acpi_thermal_unbind_cooling_device, 817 .unbind = acpi_thermal_unbind_cooling_device,
818 .get_temp = thermal_get_temp, 818 .get_temp = thermal_get_temp,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ada4b4d9bdc8..08a44b532f7c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
307 return acpi_video_device_lcd_set_level(video, level); 307 return acpi_video_device_lcd_set_level(video, level);
308} 308}
309 309
310static struct thermal_cooling_device_ops video_cooling_ops = { 310static const struct thermal_cooling_device_ops video_cooling_ops = {
311 .get_max_state = video_get_max_state, 311 .get_max_state = video_get_max_state,
312 .get_cur_state = video_get_cur_state, 312 .get_cur_state = video_get_cur_state,
313 .set_cur_state = video_set_cur_state, 313 .set_cur_state = video_set_cur_state,
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index e0a5b555cee1..bb7c5f1085cc 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
218 ata_acpi_uevent(dev->link->ap, dev, event); 218 ata_acpi_uevent(dev->link->ap, dev, event);
219} 219}
220 220
221static struct acpi_dock_ops ata_acpi_dev_dock_ops = { 221static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
222 .handler = ata_acpi_dev_notify_dock, 222 .handler = ata_acpi_dev_notify_dock,
223 .uevent = ata_acpi_dev_uevent, 223 .uevent = ata_acpi_dev_uevent,
224}; 224};
225 225
226static struct acpi_dock_ops ata_acpi_ap_dock_ops = { 226static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
227 .handler = ata_acpi_ap_notify_dock, 227 .handler = ata_acpi_ap_notify_dock,
228 .uevent = ata_acpi_ap_uevent, 228 .uevent = ata_acpi_ap_uevent,
229}; 229};
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 49502bc5360a..423fd56bf612 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -616,5 +616,16 @@ config MSM_SMD_PKT
616 Enables userspace clients to read and write to some packet SMD 616 Enables userspace clients to read and write to some packet SMD
617 ports via device interface for MSM chipset. 617 ports via device interface for MSM chipset.
618 618
619config TILE_SROM
620 bool "Character-device access via hypervisor to the Tilera SPI ROM"
621 depends on TILE
622 default y
623 ---help---
624 This device provides character-level read-write access
625 to the SROM, typically via the "0", "1", and "2" devices
626 in /dev/srom/. The Tilera hypervisor makes the flash
627 device appear much like a simple EEPROM, and knows
628 how to partition a single ROM for multiple purposes.
629
619endmenu 630endmenu
620 631
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7a00672bd85d..32762ba769c2 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o
63 63
64obj-$(CONFIG_JS_RTC) += js-rtc.o 64obj-$(CONFIG_JS_RTC) += js-rtc.o
65js-rtc-y = rtc.o 65js-rtc-y = rtc.o
66
67obj-$(CONFIG_TILE_SROM) += tile-srom.o
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index fca0c51bbc90..810aff9e750f 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev)
147 cxt->phys_addr = pdata->mem_address; 147 cxt->phys_addr = pdata->mem_address;
148 cxt->record_size = pdata->record_size; 148 cxt->record_size = pdata->record_size;
149 cxt->dump_oops = pdata->dump_oops; 149 cxt->dump_oops = pdata->dump_oops;
150 /*
151 * Update the module parameter variables as well so they are visible
152 * through /sys/module/ramoops/parameters/
153 */
154 mem_size = pdata->mem_size;
155 mem_address = pdata->mem_address;
156 record_size = pdata->record_size;
157 dump_oops = pdata->dump_oops;
150 158
151 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { 159 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
152 pr_err("request mem region failed\n"); 160 pr_err("request mem region failed\n");
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
new file mode 100644
index 000000000000..cf3ee008dca2
--- /dev/null
+++ b/drivers/char/tile-srom.c
@@ -0,0 +1,481 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * SPI Flash ROM driver
15 *
16 * This source code is derived from code provided in "Linux Device
17 * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
18 * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
19 */
20
21#include <linux/module.h>
22#include <linux/moduleparam.h>
23#include <linux/init.h>
24#include <linux/kernel.h> /* printk() */
25#include <linux/slab.h> /* kmalloc() */
26#include <linux/fs.h> /* everything... */
27#include <linux/errno.h> /* error codes */
28#include <linux/types.h> /* size_t */
29#include <linux/proc_fs.h>
30#include <linux/fcntl.h> /* O_ACCMODE */
31#include <linux/aio.h>
32#include <linux/pagemap.h>
33#include <linux/hugetlb.h>
34#include <linux/uaccess.h>
35#include <linux/platform_device.h>
36#include <hv/hypervisor.h>
37#include <linux/ioctl.h>
38#include <linux/cdev.h>
39#include <linux/delay.h>
40#include <hv/drv_srom_intf.h>
41
42/*
43 * Size of our hypervisor I/O requests. We break up large transfers
44 * so that we don't spend large uninterrupted spans of time in the
45 * hypervisor. Erasing an SROM sector takes a significant fraction of
46 * a second, so if we allowed the user to, say, do one I/O to write the
47 * entire ROM, we'd get soft lockup timeouts, or worse.
48 */
49#define SROM_CHUNK_SIZE ((size_t)4096)
50
51/*
52 * When hypervisor is busy (e.g. erasing), poll the status periodically.
53 */
54
55/*
56 * Interval to poll the state in msec
57 */
58#define SROM_WAIT_TRY_INTERVAL 20
59
60/*
61 * Maximum times to poll the state
62 */
63#define SROM_MAX_WAIT_TRY_TIMES 1000
64
65struct srom_dev {
66 int hv_devhdl; /* Handle for hypervisor device */
67 u32 total_size; /* Size of this device */
68 u32 sector_size; /* Size of a sector */
69 u32 page_size; /* Size of a page */
70 struct mutex lock; /* Allow only one accessor at a time */
71};
72
73static int srom_major; /* Dynamic major by default */
74module_param(srom_major, int, 0);
75MODULE_AUTHOR("Tilera Corporation");
76MODULE_LICENSE("GPL");
77
78static int srom_devs; /* Number of SROM partitions */
79static struct cdev srom_cdev;
80static struct class *srom_class;
81static struct srom_dev *srom_devices;
82
83/*
84 * Handle calling the hypervisor and managing EAGAIN/EBUSY.
85 */
86
87static ssize_t _srom_read(int hv_devhdl, void *buf,
88 loff_t off, size_t count)
89{
90 int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
91 for (;;) {
92 retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
93 count, off);
94 if (retval >= 0)
95 return retval;
96 if (retval == HV_EAGAIN)
97 continue;
98 if (retval == HV_EBUSY && --retries > 0) {
99 msleep(SROM_WAIT_TRY_INTERVAL);
100 continue;
101 }
102 pr_err("_srom_read: error %d\n", retval);
103 return -EIO;
104 }
105}
106
107static ssize_t _srom_write(int hv_devhdl, const void *buf,
108 loff_t off, size_t count)
109{
110 int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
111 for (;;) {
112 retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
113 count, off);
114 if (retval >= 0)
115 return retval;
116 if (retval == HV_EAGAIN)
117 continue;
118 if (retval == HV_EBUSY && --retries > 0) {
119 msleep(SROM_WAIT_TRY_INTERVAL);
120 continue;
121 }
122 pr_err("_srom_write: error %d\n", retval);
123 return -EIO;
124 }
125}
126
127/**
128 * srom_open() - Device open routine.
129 * @inode: Inode for this device.
130 * @filp: File for this specific open of the device.
131 *
132 * Returns zero, or an error code.
133 */
134static int srom_open(struct inode *inode, struct file *filp)
135{
136 filp->private_data = &srom_devices[iminor(inode)];
137 return 0;
138}
139
140
141/**
142 * srom_release() - Device release routine.
143 * @inode: Inode for this device.
144 * @filp: File for this specific open of the device.
145 *
146 * Returns zero, or an error code.
147 */
148static int srom_release(struct inode *inode, struct file *filp)
149{
150 struct srom_dev *srom = filp->private_data;
151 char dummy;
152
153 /* Make sure we've flushed anything written to the ROM. */
154 mutex_lock(&srom->lock);
155 if (srom->hv_devhdl >= 0)
156 _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
157 mutex_unlock(&srom->lock);
158
159 filp->private_data = NULL;
160
161 return 0;
162}
163
164
165/**
166 * srom_read() - Read data from the device.
167 * @filp: File for this specific open of the device.
168 * @buf: User's data buffer.
169 * @count: Number of bytes requested.
170 * @f_pos: File position.
171 *
172 * Returns number of bytes read, or an error code.
173 */
174static ssize_t srom_read(struct file *filp, char __user *buf,
175 size_t count, loff_t *f_pos)
176{
177 int retval = 0;
178 void *kernbuf;
179 struct srom_dev *srom = filp->private_data;
180
181 kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
182 if (!kernbuf)
183 return -ENOMEM;
184
185 if (mutex_lock_interruptible(&srom->lock)) {
186 retval = -ERESTARTSYS;
187 kfree(kernbuf);
188 return retval;
189 }
190
191 while (count) {
192 int hv_retval;
193 int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
194
195 hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
196 *f_pos, bytes_this_pass);
197 if (hv_retval > 0) {
198 if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
199 retval = -EFAULT;
200 break;
201 }
202 } else if (hv_retval <= 0) {
203 if (retval == 0)
204 retval = hv_retval;
205 break;
206 }
207
208 retval += hv_retval;
209 *f_pos += hv_retval;
210 buf += hv_retval;
211 count -= hv_retval;
212 }
213
214 mutex_unlock(&srom->lock);
215 kfree(kernbuf);
216
217 return retval;
218}
219
220/**
221 * srom_write() - Write data to the device.
222 * @filp: File for this specific open of the device.
223 * @buf: User's data buffer.
224 * @count: Number of bytes requested.
225 * @f_pos: File position.
226 *
227 * Returns number of bytes written, or an error code.
228 */
229static ssize_t srom_write(struct file *filp, const char __user *buf,
230 size_t count, loff_t *f_pos)
231{
232 int retval = 0;
233 void *kernbuf;
234 struct srom_dev *srom = filp->private_data;
235
236 kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
237 if (!kernbuf)
238 return -ENOMEM;
239
240 if (mutex_lock_interruptible(&srom->lock)) {
241 retval = -ERESTARTSYS;
242 kfree(kernbuf);
243 return retval;
244 }
245
246 while (count) {
247 int hv_retval;
248 int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
249
250 if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
251 retval = -EFAULT;
252 break;
253 }
254
255 hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
256 *f_pos, bytes_this_pass);
257 if (hv_retval <= 0) {
258 if (retval == 0)
259 retval = hv_retval;
260 break;
261 }
262
263 retval += hv_retval;
264 *f_pos += hv_retval;
265 buf += hv_retval;
266 count -= hv_retval;
267 }
268
269 mutex_unlock(&srom->lock);
270 kfree(kernbuf);
271
272 return retval;
273}
274
275/* Provide our own implementation so we can use srom->total_size. */
276loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
277{
278 struct srom_dev *srom = filp->private_data;
279
280 if (mutex_lock_interruptible(&srom->lock))
281 return -ERESTARTSYS;
282
283 switch (origin) {
284 case SEEK_END:
285 offset += srom->total_size;
286 break;
287 case SEEK_CUR:
288 offset += filp->f_pos;
289 break;
290 }
291
292 if (offset < 0 || offset > srom->total_size) {
293 offset = -EINVAL;
294 } else {
295 filp->f_pos = offset;
296 filp->f_version = 0;
297 }
298
299 mutex_unlock(&srom->lock);
300
301 return offset;
302}
303
304static ssize_t total_show(struct device *dev,
305 struct device_attribute *attr, char *buf)
306{
307 struct srom_dev *srom = dev_get_drvdata(dev);
308 return sprintf(buf, "%u\n", srom->total_size);
309}
310
311static ssize_t sector_show(struct device *dev,
312 struct device_attribute *attr, char *buf)
313{
314 struct srom_dev *srom = dev_get_drvdata(dev);
315 return sprintf(buf, "%u\n", srom->sector_size);
316}
317
318static ssize_t page_show(struct device *dev,
319 struct device_attribute *attr, char *buf)
320{
321 struct srom_dev *srom = dev_get_drvdata(dev);
322 return sprintf(buf, "%u\n", srom->page_size);
323}
324
325static struct device_attribute srom_dev_attrs[] = {
326 __ATTR(total_size, S_IRUGO, total_show, NULL),
327 __ATTR(sector_size, S_IRUGO, sector_show, NULL),
328 __ATTR(page_size, S_IRUGO, page_show, NULL),
329 __ATTR_NULL
330};
331
332static char *srom_devnode(struct device *dev, mode_t *mode)
333{
334 *mode = S_IRUGO | S_IWUSR;
335 return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
336}
337
338/*
339 * The fops
340 */
341static const struct file_operations srom_fops = {
342 .owner = THIS_MODULE,
343 .llseek = srom_llseek,
344 .read = srom_read,
345 .write = srom_write,
346 .open = srom_open,
347 .release = srom_release,
348};
349
350/**
351 * srom_setup_minor() - Initialize per-minor information.
352 * @srom: Per-device SROM state.
353 * @index: Device to set up.
354 */
355static int srom_setup_minor(struct srom_dev *srom, int index)
356{
357 struct device *dev;
358 int devhdl = srom->hv_devhdl;
359
360 mutex_init(&srom->lock);
361
362 if (_srom_read(devhdl, &srom->total_size,
363 SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
364 return -EIO;
365 if (_srom_read(devhdl, &srom->sector_size,
366 SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
367 return -EIO;
368 if (_srom_read(devhdl, &srom->page_size,
369 SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
370 return -EIO;
371
372 dev = device_create(srom_class, &platform_bus,
373 MKDEV(srom_major, index), srom, "%d", index);
374 return IS_ERR(dev) ? PTR_ERR(dev) : 0;
375}
376
377/** srom_init() - Initialize the driver's module. */
378static int srom_init(void)
379{
380 int result, i;
381 dev_t dev = MKDEV(srom_major, 0);
382
383 /*
384 * Start with a plausible number of partitions; the krealloc() call
385 * below will yield about log(srom_devs) additional allocations.
386 */
387 srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
388
389 /* Discover the number of srom partitions. */
390 for (i = 0; ; i++) {
391 int devhdl;
392 char buf[20];
393 struct srom_dev *new_srom_devices =
394 krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
395 GFP_KERNEL | __GFP_ZERO);
396 if (!new_srom_devices) {
397 result = -ENOMEM;
398 goto fail_mem;
399 }
400 srom_devices = new_srom_devices;
401 sprintf(buf, "srom/0/%d", i);
402 devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
403 if (devhdl < 0) {
404 if (devhdl != HV_ENODEV)
405 pr_notice("srom/%d: hv_dev_open failed: %d.\n",
406 i, devhdl);
407 break;
408 }
409 srom_devices[i].hv_devhdl = devhdl;
410 }
411 srom_devs = i;
412
413 /* Bail out early if we have no partitions at all. */
414 if (srom_devs == 0) {
415 result = -ENODEV;
416 goto fail_mem;
417 }
418
419 /* Register our major, and accept a dynamic number. */
420 if (srom_major)
421 result = register_chrdev_region(dev, srom_devs, "srom");
422 else {
423 result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
424 srom_major = MAJOR(dev);
425 }
426 if (result < 0)
427 goto fail_mem;
428
429 /* Register a character device. */
430 cdev_init(&srom_cdev, &srom_fops);
431 srom_cdev.owner = THIS_MODULE;
432 srom_cdev.ops = &srom_fops;
433 result = cdev_add(&srom_cdev, dev, srom_devs);
434 if (result < 0)
435 goto fail_chrdev;
436
437 /* Create a sysfs class. */
438 srom_class = class_create(THIS_MODULE, "srom");
439 if (IS_ERR(srom_class)) {
440 result = PTR_ERR(srom_class);
441 goto fail_cdev;
442 }
443 srom_class->dev_attrs = srom_dev_attrs;
444 srom_class->devnode = srom_devnode;
445
446 /* Do per-partition initialization */
447 for (i = 0; i < srom_devs; i++) {
448 result = srom_setup_minor(srom_devices + i, i);
449 if (result < 0)
450 goto fail_class;
451 }
452
453 return 0;
454
455fail_class:
456 for (i = 0; i < srom_devs; i++)
457 device_destroy(srom_class, MKDEV(srom_major, i));
458 class_destroy(srom_class);
459fail_cdev:
460 cdev_del(&srom_cdev);
461fail_chrdev:
462 unregister_chrdev_region(dev, srom_devs);
463fail_mem:
464 kfree(srom_devices);
465 return result;
466}
467
468/** srom_cleanup() - Clean up the driver's module. */
469static void srom_cleanup(void)
470{
471 int i;
472 for (i = 0; i < srom_devs; i++)
473 device_destroy(srom_class, MKDEV(srom_major, i));
474 class_destroy(srom_class);
475 cdev_del(&srom_cdev);
476 unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
477 kfree(srom_devices);
478}
479
480module_init(srom_init);
481module_exit(srom_cleanup);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7fc2f108f490..3f4051a7c5a7 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -80,7 +80,7 @@ enum tis_defaults {
80static LIST_HEAD(tis_chips); 80static LIST_HEAD(tis_chips);
81static DEFINE_SPINLOCK(tis_lock); 81static DEFINE_SPINLOCK(tis_lock);
82 82
83#ifdef CONFIG_PNP 83#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
84static int is_itpm(struct pnp_dev *dev) 84static int is_itpm(struct pnp_dev *dev)
85{ 85{
86 struct acpi_device *acpi = pnp_acpi_device(dev); 86 struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev)
93 93
94 return 0; 94 return 0;
95} 95}
96#else
97static inline int is_itpm(struct pnp_dev *dev)
98{
99 return 0;
100}
96#endif 101#endif
97 102
98static int check_locality(struct tpm_chip *chip, int l) 103static int check_locality(struct tpm_chip *chip, int l)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index bf5092455a8f..d4c542372886 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -25,9 +25,19 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
25 25
26DEFINE_MUTEX(cpuidle_lock); 26DEFINE_MUTEX(cpuidle_lock);
27LIST_HEAD(cpuidle_detected_devices); 27LIST_HEAD(cpuidle_detected_devices);
28static void (*pm_idle_old)(void);
29 28
30static int enabled_devices; 29static int enabled_devices;
30static int off __read_mostly;
31static int initialized __read_mostly;
32
33int cpuidle_disabled(void)
34{
35 return off;
36}
37void disable_cpuidle(void)
38{
39 off = 1;
40}
31 41
32#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT) 42#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
33static void cpuidle_kick_cpus(void) 43static void cpuidle_kick_cpus(void)
@@ -46,25 +56,23 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
46 * cpuidle_idle_call - the main idle loop 56 * cpuidle_idle_call - the main idle loop
47 * 57 *
48 * NOTE: no locks or semaphores should be used here 58 * NOTE: no locks or semaphores should be used here
59 * return non-zero on failure
49 */ 60 */
50static void cpuidle_idle_call(void) 61int cpuidle_idle_call(void)
51{ 62{
52 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 63 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
53 struct cpuidle_state *target_state; 64 struct cpuidle_state *target_state;
54 int next_state; 65 int next_state;
55 66
67 if (off)
68 return -ENODEV;
69
70 if (!initialized)
71 return -ENODEV;
72
56 /* check if the device is ready */ 73 /* check if the device is ready */
57 if (!dev || !dev->enabled) { 74 if (!dev || !dev->enabled)
58 if (pm_idle_old) 75 return -EBUSY;
59 pm_idle_old();
60 else
61#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
62 default_idle();
63#else
64 local_irq_enable();
65#endif
66 return;
67 }
68 76
69#if 0 77#if 0
70 /* shows regressions, re-enable for 2.6.29 */ 78 /* shows regressions, re-enable for 2.6.29 */
@@ -89,7 +97,7 @@ static void cpuidle_idle_call(void)
89 next_state = cpuidle_curr_governor->select(dev); 97 next_state = cpuidle_curr_governor->select(dev);
90 if (need_resched()) { 98 if (need_resched()) {
91 local_irq_enable(); 99 local_irq_enable();
92 return; 100 return 0;
93 } 101 }
94 102
95 target_state = &dev->states[next_state]; 103 target_state = &dev->states[next_state];
@@ -114,6 +122,8 @@ static void cpuidle_idle_call(void)
114 /* give the governor an opportunity to reflect on the outcome */ 122 /* give the governor an opportunity to reflect on the outcome */
115 if (cpuidle_curr_governor->reflect) 123 if (cpuidle_curr_governor->reflect)
116 cpuidle_curr_governor->reflect(dev); 124 cpuidle_curr_governor->reflect(dev);
125
126 return 0;
117} 127}
118 128
119/** 129/**
@@ -121,10 +131,10 @@ static void cpuidle_idle_call(void)
121 */ 131 */
122void cpuidle_install_idle_handler(void) 132void cpuidle_install_idle_handler(void)
123{ 133{
124 if (enabled_devices && (pm_idle != cpuidle_idle_call)) { 134 if (enabled_devices) {
125 /* Make sure all changes finished before we switch to new idle */ 135 /* Make sure all changes finished before we switch to new idle */
126 smp_wmb(); 136 smp_wmb();
127 pm_idle = cpuidle_idle_call; 137 initialized = 1;
128 } 138 }
129} 139}
130 140
@@ -133,8 +143,8 @@ void cpuidle_install_idle_handler(void)
133 */ 143 */
134void cpuidle_uninstall_idle_handler(void) 144void cpuidle_uninstall_idle_handler(void)
135{ 145{
136 if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) { 146 if (enabled_devices) {
137 pm_idle = pm_idle_old; 147 initialized = 0;
138 cpuidle_kick_cpus(); 148 cpuidle_kick_cpus();
139 } 149 }
140} 150}
@@ -427,7 +437,8 @@ static int __init cpuidle_init(void)
427{ 437{
428 int ret; 438 int ret;
429 439
430 pm_idle_old = pm_idle; 440 if (cpuidle_disabled())
441 return -ENODEV;
431 442
432 ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); 443 ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
433 if (ret) 444 if (ret)
@@ -438,4 +449,5 @@ static int __init cpuidle_init(void)
438 return 0; 449 return 0;
439} 450}
440 451
452module_param(off, int, 0444);
441core_initcall(cpuidle_init); 453core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 33e50d556f17..38c3fd8b9d76 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
13extern struct list_head cpuidle_detected_devices; 13extern struct list_head cpuidle_detected_devices;
14extern struct mutex cpuidle_lock; 14extern struct mutex cpuidle_lock;
15extern spinlock_t cpuidle_driver_lock; 15extern spinlock_t cpuidle_driver_lock;
16extern int cpuidle_disabled(void);
16 17
17/* idle loop */ 18/* idle loop */
18extern void cpuidle_install_idle_handler(void); 19extern void cpuidle_install_idle_handler(void);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index fd1601e3d125..3f7e3cedd133 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -26,6 +26,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
26 if (!drv) 26 if (!drv)
27 return -EINVAL; 27 return -EINVAL;
28 28
29 if (cpuidle_disabled())
30 return -ENODEV;
31
29 spin_lock(&cpuidle_driver_lock); 32 spin_lock(&cpuidle_driver_lock);
30 if (cpuidle_curr_driver) { 33 if (cpuidle_curr_driver) {
31 spin_unlock(&cpuidle_driver_lock); 34 spin_unlock(&cpuidle_driver_lock);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 724c164d31c9..ea2f8e7aa24a 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
81 if (!gov || !gov->select) 81 if (!gov || !gov->select)
82 return -EINVAL; 82 return -EINVAL;
83 83
84 if (cpuidle_disabled())
85 return -ENODEV;
86
84 mutex_lock(&cpuidle_lock); 87 mutex_lock(&cpuidle_lock);
85 if (__cpuidle_find_governor(gov->name) == NULL) { 88 if (__cpuidle_find_governor(gov->name) == NULL) {
86 ret = 0; 89 ret = 0;
diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 30da70d06a6d..cdae207028a7 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev,
45 return 0; 45 return 0;
46} 46}
47 47
48static struct pci_device_id __initdata pci_eisa_pci_tbl[] = { 48static struct pci_device_id pci_eisa_pci_tbl[] = {
49 { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 49 { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
50 PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, 50 PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 },
51 { 0, } 51 { 0, }
52}; 52};
53 53
54static struct pci_driver __initdata pci_eisa_driver = { 54static struct pci_driver __refdata pci_eisa_driver = {
55 .name = "pci_eisa", 55 .name = "pci_eisa",
56 .id_table = pci_eisa_pci_tbl, 56 .id_table = pci_eisa_pci_tbl,
57 .probe = pci_eisa_init, 57 .probe = pci_eisa_init,
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index eacb05e6cfb3..eb80b549ed8d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength)
157 return length; 157 return length;
158} 158}
159 159
160static unsigned long 160static inline unsigned long
161utf16_strlen(efi_char16_t *s) 161utf16_strlen(efi_char16_t *s)
162{ 162{
163 return utf16_strnlen(s, ~0UL); 163 return utf16_strnlen(s, ~0UL);
@@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
580 return -1; 580 return -1;
581} 581}
582 582
583static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, 583static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
584 struct pstore_info *psi) 584 size_t size, struct pstore_info *psi)
585{ 585{
586 return 0; 586 return 0;
587} 587}
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index ce281d152275..67df91af8424 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -483,7 +483,7 @@ static int gpio_keys_get_devtree_pdata(struct device *dev,
483 483
484 buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL); 484 buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
485 if (!buttons) 485 if (!buttons)
486 return -ENODEV; 486 return -ENOMEM;
487 487
488 pp = NULL; 488 pp = NULL;
489 i = 0; 489 i = 0;
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index ab0acaf7fe8f..756348a7f93a 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -754,8 +754,11 @@ fail3:
754 device_remove_file(&client->dev, &dev_attr_disable_kp); 754 device_remove_file(&client->dev, &dev_attr_disable_kp);
755fail2: 755fail2:
756 while (--pwm >= 0) 756 while (--pwm >= 0)
757 if (lm->pwm[pwm].enabled) 757 if (lm->pwm[pwm].enabled) {
758 device_remove_file(lm->pwm[pwm].cdev.dev,
759 &dev_attr_time);
758 led_classdev_unregister(&lm->pwm[pwm].cdev); 760 led_classdev_unregister(&lm->pwm[pwm].cdev);
761 }
759fail1: 762fail1:
760 input_free_device(idev); 763 input_free_device(idev);
761 kfree(lm); 764 kfree(lm);
@@ -775,8 +778,10 @@ static int __devexit lm8323_remove(struct i2c_client *client)
775 device_remove_file(&lm->client->dev, &dev_attr_disable_kp); 778 device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
776 779
777 for (i = 0; i < 3; i++) 780 for (i = 0; i < 3; i++)
778 if (lm->pwm[i].enabled) 781 if (lm->pwm[i].enabled) {
782 device_remove_file(lm->pwm[i].cdev.dev, &dev_attr_time);
779 led_classdev_unregister(&lm->pwm[i].cdev); 783 led_classdev_unregister(&lm->pwm[i].cdev);
784 }
780 785
781 kfree(lm); 786 kfree(lm);
782 787
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index da3828fc2c09..f270447ba951 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -19,6 +19,7 @@
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 */ 20 */
21 21
22#include <linux/kernel.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/input.h> 24#include <linux/input.h>
24#include <linux/platform_device.h> 25#include <linux/platform_device.h>
@@ -37,7 +38,7 @@
37#define KBC_ROW_SCAN_DLY 5 38#define KBC_ROW_SCAN_DLY 5
38 39
39/* KBC uses a 32KHz clock so a cycle = 1/32Khz */ 40/* KBC uses a 32KHz clock so a cycle = 1/32Khz */
40#define KBC_CYCLE_USEC 32 41#define KBC_CYCLE_MS 32
41 42
42/* KBC Registers */ 43/* KBC Registers */
43 44
@@ -647,7 +648,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
647 debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); 648 debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
648 scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows; 649 scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
649 kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt; 650 kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
650 kbc->repoll_dly = ((kbc->repoll_dly * KBC_CYCLE_USEC) + 999) / 1000; 651 kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
651 652
652 input_dev->name = pdev->name; 653 input_dev->name = pdev->name;
653 input_dev->id.bustype = BUS_HOST; 654 input_dev->id.bustype = BUS_HOST;
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index c456f63b6bae..783597a9a64a 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -21,6 +21,7 @@
21#include <linux/i2c.h> 21#include <linux/i2c.h>
22#include <linux/input.h> 22#include <linux/input.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/module.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/input/kxtj9.h> 26#include <linux/input/kxtj9.h>
26#include <linux/input-polldev.h> 27#include <linux/input-polldev.h>
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
index 20f8f9284f02..6c76cf792991 100644
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c
@@ -24,6 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/i2c.h> 25#include <linux/i2c.h>
26#include <linux/input-polldev.h> 26#include <linux/input-polldev.h>
27#include <linux/of_device.h>
27 28
28#define MMA8450_DRV_NAME "mma8450" 29#define MMA8450_DRV_NAME "mma8450"
29 30
@@ -229,10 +230,17 @@ static const struct i2c_device_id mma8450_id[] = {
229}; 230};
230MODULE_DEVICE_TABLE(i2c, mma8450_id); 231MODULE_DEVICE_TABLE(i2c, mma8450_id);
231 232
233static const struct of_device_id mma8450_dt_ids[] = {
234 { .compatible = "fsl,mma8450", },
235 { /* sentinel */ }
236};
237MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
238
232static struct i2c_driver mma8450_driver = { 239static struct i2c_driver mma8450_driver = {
233 .driver = { 240 .driver = {
234 .name = MMA8450_DRV_NAME, 241 .name = MMA8450_DRV_NAME,
235 .owner = THIS_MODULE, 242 .owner = THIS_MODULE,
243 .of_match_table = mma8450_dt_ids,
236 }, 244 },
237 .probe = mma8450_probe, 245 .probe = mma8450_probe,
238 .remove = __devexit_p(mma8450_remove), 246 .remove = __devexit_p(mma8450_remove),
diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c
index 95577c15ae56..4d17d9f3320b 100644
--- a/drivers/input/mouse/hgpk.c
+++ b/drivers/input/mouse/hgpk.c
@@ -32,6 +32,7 @@
32#define DEBUG 32#define DEBUG
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/input.h> 34#include <linux/input.h>
35#include <linux/module.h>
35#include <linux/serio.h> 36#include <linux/serio.h>
36#include <linux/libps2.h> 37#include <linux/libps2.h>
37#include <linux/delay.h> 38#include <linux/delay.h>
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index bc3b5187f3a3..131f9d1c921b 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -249,12 +249,14 @@ static void __ad7879_enable(struct ad7879 *ts)
249 249
250static void __ad7879_disable(struct ad7879 *ts) 250static void __ad7879_disable(struct ad7879 *ts)
251{ 251{
252 u16 reg = (ts->cmd_crtl2 & ~AD7879_PM(-1)) |
253 AD7879_PM(AD7879_PM_SHUTDOWN);
252 disable_irq(ts->irq); 254 disable_irq(ts->irq);
253 255
254 if (del_timer_sync(&ts->timer)) 256 if (del_timer_sync(&ts->timer))
255 ad7879_ts_event_release(ts); 257 ad7879_ts_event_release(ts);
256 258
257 ad7879_write(ts, AD7879_REG_CTRL2, AD7879_PM(AD7879_PM_SHUTDOWN)); 259 ad7879_write(ts, AD7879_REG_CTRL2, reg);
258} 260}
259 261
260 262
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 3ff22e32b602..fb28b5af733b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,14 +17,39 @@
17 * as published by the Free Software Foundation; either version 17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 */ 19 */
20#include <linux/ctype.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/of.h> 22#include <linux/of.h>
22#include <linux/spinlock.h> 23#include <linux/spinlock.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
25 26
27/**
28 * struct alias_prop - Alias property in 'aliases' node
29 * @link: List node to link the structure in aliases_lookup list
30 * @alias: Alias property name
31 * @np: Pointer to device_node that the alias stands for
32 * @id: Index value from end of alias name
33 * @stem: Alias string without the index
34 *
35 * The structure represents one alias property of 'aliases' node as
36 * an entry in aliases_lookup list.
37 */
38struct alias_prop {
39 struct list_head link;
40 const char *alias;
41 struct device_node *np;
42 int id;
43 char stem[0];
44};
45
46static LIST_HEAD(aliases_lookup);
47
26struct device_node *allnodes; 48struct device_node *allnodes;
27struct device_node *of_chosen; 49struct device_node *of_chosen;
50struct device_node *of_aliases;
51
52static DEFINE_MUTEX(of_aliases_mutex);
28 53
29/* use when traversing tree through the allnext, child, sibling, 54/* use when traversing tree through the allnext, child, sibling,
30 * or parent members of struct device_node. 55 * or parent members of struct device_node.
@@ -988,3 +1013,108 @@ out_unlock:
988} 1013}
989#endif /* defined(CONFIG_OF_DYNAMIC) */ 1014#endif /* defined(CONFIG_OF_DYNAMIC) */
990 1015
1016static void of_alias_add(struct alias_prop *ap, struct device_node *np,
1017 int id, const char *stem, int stem_len)
1018{
1019 ap->id = id;
1020 ap->np = np;
1021 strncpy(ap->stem, stem, stem_len);
1022 ap->stem[stem_len] = 0;
1023 list_add_tail(&ap->link, &aliases_lookup);
1024 pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n",
1025 ap->alias, ap->stem, ap->id, np ? np->full_name : NULL);
1026}
1027
1028/**
1029 * of_alias_scan() - Scan all properties of 'aliases' node
1030 *
1031 * The function scans all the properties of 'aliases' node and populate
1032 * the global lookup table with the properties. It returns the
1033 * number of alias_prop found, or error code in error case.
1034 */
1035__init void of_alias_scan(void)
1036{
1037 struct property *pp;
1038
1039 if (!of_aliases)
1040 return;
1041
1042 for_each_property(pp, of_aliases->properties) {
1043 const char *start = pp->name;
1044 const char *end = start + strlen(start);
1045 struct device_node *np;
1046 struct alias_prop *ap;
1047 int id, len;
1048
1049 /* Skip those we do not want to proceed */
1050 if (!strcmp(pp->name, "name") ||
1051 !strcmp(pp->name, "phandle") ||
1052 !strcmp(pp->name, "linux,phandle"))
1053 continue;
1054
1055 np = of_find_node_by_path(pp->value);
1056 if (!np)
1057 continue;
1058
1059 /* walk alias backwards to extract the id and 'stem' string */
1060 while (isdigit(*(end-1)) && end > start)
1061 end--;
1062 len = end - start;
1063 id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1;
1064
1065 /* Allocate an alias_prop with enough space for the stem */
1066 ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4);
1067 if (!ap)
1068 continue;
1069 ap->alias = start;
1070 of_alias_add(ap, np, id, start, len);
1071 }
1072}
1073
1074/**
1075 * of_alias_get_id() - Get alias id for the given device_node
1076 * @np: Pointer to the given device_node
1077 * @stem: Alias stem of the given device_node
1078 *
1079 * The function travels the lookup table to get alias id for the given
1080 * device_node and alias stem. It returns the alias id if find it.
1081 * If not, dynamically creates one in the lookup table and returns it,
1082 * or returns error code if fail to create.
1083 */
1084int of_alias_get_id(struct device_node *np, const char *stem)
1085{
1086 struct alias_prop *app;
1087 int id = 0;
1088 bool found = false;
1089
1090 mutex_lock(&of_aliases_mutex);
1091 list_for_each_entry(app, &aliases_lookup, link) {
1092 if (strcmp(app->stem, stem) != 0)
1093 continue;
1094
1095 if (np == app->np) {
1096 found = true;
1097 id = app->id;
1098 break;
1099 }
1100
1101 if (id <= app->id)
1102 id = app->id + 1;
1103 }
1104
1105 /* If an id is not found, then allocate a new one */
1106 if (!found) {
1107 app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4);
1108 if (!app) {
1109 id = -ENODEV;
1110 goto out;
1111 }
1112 of_alias_add(app, np, id, stem, strlen(stem));
1113 }
1114
1115 out:
1116 mutex_unlock(&of_aliases_mutex);
1117
1118 return id;
1119}
1120EXPORT_SYMBOL_GPL(of_alias_get_id);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 65200af29c52..13d6d3a96b31 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -707,10 +707,12 @@ void __init unflatten_device_tree(void)
707 __unflatten_device_tree(initial_boot_params, &allnodes, 707 __unflatten_device_tree(initial_boot_params, &allnodes,
708 early_init_dt_alloc_memory_arch); 708 early_init_dt_alloc_memory_arch);
709 709
710 /* Get pointer to OF "/chosen" node for use everywhere */ 710 /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
711 of_chosen = of_find_node_by_path("/chosen"); 711 of_chosen = of_find_node_by_path("/chosen");
712 if (of_chosen == NULL) 712 if (of_chosen == NULL)
713 of_chosen = of_find_node_by_path("/chosen@0"); 713 of_chosen = of_find_node_by_path("/chosen@0");
714 of_aliases = of_find_node_by_path("/aliases");
715 of_alias_scan();
714} 716}
715 717
716#endif /* CONFIG_OF_EARLY_FLATTREE */ 718#endif /* CONFIG_OF_EARLY_FLATTREE */
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a70fa89f76fd..220285760b68 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
110} 110}
111 111
112 112
113static struct acpi_dock_ops acpiphp_dock_ops = { 113static const struct acpi_dock_ops acpiphp_dock_ops = {
114 .handler = handle_hotplug_event_func, 114 .handler = handle_hotplug_event_func,
115}; 115};
116 116
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bcae8dd41496..7789002bdd5c 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
368 pr_info("%s: already running\n", pdev->name); 368 pr_info("%s: already running\n", pdev->name);
369 369
370 /* force to 24 hour mode */ 370 /* force to 24 hour mode */
371 new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); 371 new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
372 new_ctrl |= OMAP_RTC_CTRL_STOP; 372 new_ctrl |= OMAP_RTC_CTRL_STOP;
373 373
374 /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: 374 /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 564ff4e0dbc4..8345fb457a40 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,5 +1,6 @@
1config ISCSI_TARGET 1config ISCSI_TARGET
2 tristate "Linux-iSCSI.org iSCSI Target Mode Stack" 2 tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
3 depends on NET
3 select CRYPTO 4 select CRYPTO
4 select CRYPTO_CRC32C 5 select CRYPTO_CRC32C
5 select CRYPTO_CRC32C_INTEL if X86 6 select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 14c81c4265bd..c24fb10de60b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
120 struct iscsi_tiqn *tiqn = NULL; 120 struct iscsi_tiqn *tiqn = NULL;
121 int ret; 121 int ret;
122 122
123 if (strlen(buf) > ISCSI_IQN_LEN) { 123 if (strlen(buf) >= ISCSI_IQN_LEN) {
124 pr_err("Target IQN exceeds %d bytes\n", 124 pr_err("Target IQN exceeds %d bytes\n",
125 ISCSI_IQN_LEN); 125 ISCSI_IQN_LEN);
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
1857 char *text_ptr, *text_in; 1857 char *text_ptr, *text_in;
1858 int cmdsn_ret, niov = 0, rx_got, rx_size; 1858 int cmdsn_ret, niov = 0, rx_got, rx_size;
1859 u32 checksum = 0, data_crc = 0, payload_length; 1859 u32 checksum = 0, data_crc = 0, payload_length;
1860 u32 padding = 0, text_length = 0; 1860 u32 padding = 0, pad_bytes = 0, text_length = 0;
1861 struct iscsi_cmd *cmd; 1861 struct iscsi_cmd *cmd;
1862 struct kvec iov[3]; 1862 struct kvec iov[3];
1863 struct iscsi_text *hdr; 1863 struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
1896 1896
1897 padding = ((-payload_length) & 3); 1897 padding = ((-payload_length) & 3);
1898 if (padding != 0) { 1898 if (padding != 0) {
1899 iov[niov].iov_base = cmd->pad_bytes; 1899 iov[niov].iov_base = &pad_bytes;
1900 iov[niov++].iov_len = padding; 1900 iov[niov++].iov_len = padding;
1901 rx_size += padding; 1901 rx_size += padding;
1902 pr_debug("Receiving %u additional bytes" 1902 pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
1917 if (conn->conn_ops->DataDigest) { 1917 if (conn->conn_ops->DataDigest) {
1918 iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, 1918 iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
1919 text_in, text_length, 1919 text_in, text_length,
1920 padding, cmd->pad_bytes, 1920 padding, (u8 *)&pad_bytes,
1921 (u8 *)&data_crc); 1921 (u8 *)&data_crc);
1922 1922
1923 if (checksum != data_crc) { 1923 if (checksum != data_crc) {
@@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask(
3468} 3468}
3469 3469
3470#else 3470#else
3471#define iscsit_thread_get_cpumask(X) ({}) 3471
3472void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
3473{
3474 return;
3475}
3476
3472#define iscsit_thread_check_cpumask(X, Y, Z) ({}) 3477#define iscsit_thread_check_cpumask(X, Y, Z) ({})
3473#endif /* CONFIG_SMP */ 3478#endif /* CONFIG_SMP */
3474 3479
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 32bb92c44450..f095e65b1ccf 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
181 return ERR_PTR(-EOVERFLOW); 181 return ERR_PTR(-EOVERFLOW);
182 } 182 }
183 memset(buf, 0, MAX_PORTAL_LEN + 1); 183 memset(buf, 0, MAX_PORTAL_LEN + 1);
184 snprintf(buf, MAX_PORTAL_LEN, "%s", name); 184 snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
185 185
186 memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); 186 memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
187 187
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 713a4d23557a..4d087ac11067 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
978 pr_err("Unable to allocate memory for struct iscsi_login.\n"); 978 pr_err("Unable to allocate memory for struct iscsi_login.\n");
979 iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, 979 iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
980 ISCSI_LOGIN_STATUS_NO_RESOURCES); 980 ISCSI_LOGIN_STATUS_NO_RESOURCES);
981 goto out; 981 return NULL;
982 } 982 }
983 983
984 login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); 984 login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c75a01a1c475..89760329d5d0 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb(
1747} 1747}
1748EXPORT_SYMBOL(transport_generic_handle_cdb); 1748EXPORT_SYMBOL(transport_generic_handle_cdb);
1749 1749
1750static void transport_generic_request_failure(struct se_cmd *,
1751 struct se_device *, int, int);
1750/* 1752/*
1751 * Used by fabric module frontends to queue tasks directly. 1753 * Used by fabric module frontends to queue tasks directly.
1752 * Many only be used from process context only 1754 * Many only be used from process context only
@@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb);
1754int transport_handle_cdb_direct( 1756int transport_handle_cdb_direct(
1755 struct se_cmd *cmd) 1757 struct se_cmd *cmd)
1756{ 1758{
1759 int ret;
1760
1757 if (!cmd->se_lun) { 1761 if (!cmd->se_lun) {
1758 dump_stack(); 1762 dump_stack();
1759 pr_err("cmd->se_lun is NULL\n"); 1763 pr_err("cmd->se_lun is NULL\n");
@@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct(
1765 " from interrupt context\n"); 1769 " from interrupt context\n");
1766 return -EINVAL; 1770 return -EINVAL;
1767 } 1771 }
1768 1772 /*
1769 return transport_generic_new_cmd(cmd); 1773 * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
1774 * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
1775 * in existing usage to ensure that outstanding descriptors are handled
1776 * correctly during shutdown via transport_generic_wait_for_tasks()
1777 *
1778 * Also, we don't take cmd->t_state_lock here as we only expect
1779 * this to be called for initial descriptor submission.
1780 */
1781 cmd->t_state = TRANSPORT_NEW_CMD;
1782 atomic_set(&cmd->t_transport_active, 1);
1783 /*
1784 * transport_generic_new_cmd() is already handling QUEUE_FULL,
1785 * so follow TRANSPORT_NEW_CMD processing thread context usage
1786 * and call transport_generic_request_failure() if necessary..
1787 */
1788 ret = transport_generic_new_cmd(cmd);
1789 if (ret == -EAGAIN)
1790 return 0;
1791 else if (ret < 0) {
1792 cmd->transport_error_status = ret;
1793 transport_generic_request_failure(cmd, NULL, 0,
1794 (cmd->data_direction != DMA_TO_DEVICE));
1795 }
1796 return 0;
1770} 1797}
1771EXPORT_SYMBOL(transport_handle_cdb_direct); 1798EXPORT_SYMBOL(transport_handle_cdb_direct);
1772 1799
@@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer(
3324 goto out_invalid_cdb_field; 3351 goto out_invalid_cdb_field;
3325 } 3352 }
3326 3353
3327 cmd->t_task_lba = get_unaligned_be16(&cdb[2]); 3354 cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
3328 passthrough = (dev->transport->transport_type == 3355 passthrough = (dev->transport->transport_type ==
3329 TRANSPORT_PLUGIN_PHBA_PDEV); 3356 TRANSPORT_PLUGIN_PHBA_PDEV);
3330 /* 3357 /*
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index f7fff7ed63c3..bd4fe21a23b8 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
187 187
188ssize_t ft_format_wwn(char *, size_t, u64); 188ssize_t ft_format_wwn(char *, size_t, u64);
189 189
190/*
191 * Underlying HW specific helper function
192 */
193void ft_invl_hw_context(struct ft_cmd *);
194
190#endif /* __TCM_FC_H__ */ 195#endif /* __TCM_FC_H__ */
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 09df38b4610c..5654dc22f7ae 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
320 default: 320 default:
321 pr_debug("%s: unhandled frame r_ctl %x\n", 321 pr_debug("%s: unhandled frame r_ctl %x\n",
322 __func__, fh->fh_r_ctl); 322 __func__, fh->fh_r_ctl);
323 ft_invl_hw_context(cmd);
323 fc_frame_free(fp); 324 fc_frame_free(fp);
324 transport_generic_free_cmd(&cmd->se_cmd, 0, 0); 325 transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
325 break; 326 break;
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index 8e2a46ddcccb..c37f4cd96452 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
213 if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF)) 213 if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
214 goto drop; 214 goto drop;
215 215
216 f_ctl = ntoh24(fh->fh_f_ctl);
217 ep = fc_seq_exch(seq);
218 lport = ep->lp;
219 if (cmd->was_ddp_setup) {
220 BUG_ON(!ep);
221 BUG_ON(!lport);
222 }
223
216 /* 224 /*
217 * Doesn't expect even single byte of payload. Payload 225 * Doesn't expect payload if DDP is setup. Payload
218 * is expected to be copied directly to user buffers 226 * is expected to be copied directly to user buffers
219 * due to DDP (Large Rx offload) feature, hence 227 * due to DDP (Large Rx offload),
220 * BUG_ON if BUF is non-NULL
221 */ 228 */
222 buf = fc_frame_payload_get(fp, 1); 229 buf = fc_frame_payload_get(fp, 1);
223 if (cmd->was_ddp_setup && buf) { 230 if (buf)
224 pr_debug("%s: When DDP was setup, not expected to" 231 pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
225 "receive frame with payload, Payload shall be" 232 "cmd->sg_cnt 0x%x. DDP was setup"
226 "copied directly to buffer instead of coming " 233 " hence not expected to receive frame with "
227 "via. legacy receive queues\n", __func__); 234 "payload, Frame will be dropped if "
228 BUG_ON(buf); 235 "'Sequence Initiative' bit in f_ctl is "
229 } 236 "not set\n", __func__, ep->xid, f_ctl,
237 cmd->sg, cmd->sg_cnt);
238 /*
239 * Invalidate HW DDP context if it was setup for respective
240 * command. Invalidation of HW DDP context is requited in both
241 * situation (success and error).
242 */
243 ft_invl_hw_context(cmd);
230 244
231 /* 245 /*
232 * If ft_cmd indicated 'ddp_setup', in that case only the last frame 246 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
233 * should come with 'TSI bit being set'. If 'TSI bit is not set and if 247 * write data frame is received successfully where payload is
234 * data frame appears here, means error condition. In both the cases 248 * posted directly to user buffer and only the last frame's
235 * release the DDP context (ddp_put) and in error case, as well 249 * header is posted in receive queue.
236 * initiate error recovery mechanism. 250 *
251 * If "Sequence Initiative (TSI)" bit is not set, means error
252 * condition w.r.t. DDP, hence drop the packet and let explict
253 * ABORTS from other end of exchange timer trigger the recovery.
237 */ 254 */
238 ep = fc_seq_exch(seq); 255 if (f_ctl & FC_FC_SEQ_INIT)
239 if (cmd->was_ddp_setup) { 256 goto last_frame;
240 BUG_ON(!ep); 257 else
241 lport = ep->lp; 258 goto drop;
242 BUG_ON(!lport);
243 }
244 if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
245 f_ctl = ntoh24(fh->fh_f_ctl);
246 /*
247 * If TSI bit set in f_ctl, means last write data frame is
248 * received successfully where payload is posted directly
249 * to user buffer and only the last frame's header is posted
250 * in legacy receive queue
251 */
252 if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
253 cmd->write_data_len = lport->tt.ddp_done(lport,
254 ep->xid);
255 goto last_frame;
256 } else {
257 /*
258 * Updating the write_data_len may be meaningless at
259 * this point, but just in case if required in future
260 * for debugging or any other purpose
261 */
262 pr_err("%s: Received frame with TSI bit not"
263 " being SET, dropping the frame, "
264 "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
265 __func__, cmd->sg, cmd->sg_cnt);
266 cmd->write_data_len = lport->tt.ddp_done(lport,
267 ep->xid);
268 lport->tt.seq_exch_abort(cmd->seq, 0);
269 goto drop;
270 }
271 }
272 259
273 rel_off = ntohl(fh->fh_parm_offset); 260 rel_off = ntohl(fh->fh_parm_offset);
274 frame_len = fr_len(fp); 261 frame_len = fr_len(fp);
@@ -331,3 +318,39 @@ last_frame:
331drop: 318drop:
332 fc_frame_free(fp); 319 fc_frame_free(fp);
333} 320}
321
322/*
323 * Handle and cleanup any HW specific resources if
324 * received ABORTS, errors, timeouts.
325 */
326void ft_invl_hw_context(struct ft_cmd *cmd)
327{
328 struct fc_seq *seq = cmd->seq;
329 struct fc_exch *ep = NULL;
330 struct fc_lport *lport = NULL;
331
332 BUG_ON(!cmd);
333
334 /* Cleanup the DDP context in HW if DDP was setup */
335 if (cmd->was_ddp_setup && seq) {
336 ep = fc_seq_exch(seq);
337 if (ep) {
338 lport = ep->lp;
339 if (lport && (ep->xid <= lport->lro_xid))
340 /*
341 * "ddp_done" trigger invalidation of HW
342 * specific DDP context
343 */
344 cmd->write_data_len = lport->tt.ddp_done(lport,
345 ep->xid);
346
347 /*
348 * Resetting same variable to indicate HW's
349 * DDP context has been invalidated to avoid
350 * re_invalidation of same context (context is
351 * identified using ep->xid)
352 */
353 cmd->was_ddp_setup = 0;
354 }
355 }
356}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index bf7c687519ef..f7f71b2d3101 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -14,11 +14,7 @@ menuconfig THERMAL
14 If you want this support, you should say Y or M here. 14 If you want this support, you should say Y or M here.
15 15
16config THERMAL_HWMON 16config THERMAL_HWMON
17 bool "Hardware monitoring support" 17 bool
18 depends on THERMAL 18 depends on THERMAL
19 depends on HWMON=y || HWMON=THERMAL 19 depends on HWMON=y || HWMON=THERMAL
20 help 20 default y
21 The generic thermal sysfs driver's hardware monitoring support
22 requires a 2.10.7/3.0.2 or later lm-sensors userspace.
23
24 Say Y if your user-space is new enough.
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 0b1c82ad6805..708f8e92771a 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev,
420 420
421/* hwmon sys I/F */ 421/* hwmon sys I/F */
422#include <linux/hwmon.h> 422#include <linux/hwmon.h>
423
424/* thermal zone devices with the same type share one hwmon device */
425struct thermal_hwmon_device {
426 char type[THERMAL_NAME_LENGTH];
427 struct device *device;
428 int count;
429 struct list_head tz_list;
430 struct list_head node;
431};
432
433struct thermal_hwmon_attr {
434 struct device_attribute attr;
435 char name[16];
436};
437
438/* one temperature input for each thermal zone */
439struct thermal_hwmon_temp {
440 struct list_head hwmon_node;
441 struct thermal_zone_device *tz;
442 struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
443 struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
444};
445
423static LIST_HEAD(thermal_hwmon_list); 446static LIST_HEAD(thermal_hwmon_list);
424 447
425static ssize_t 448static ssize_t
@@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
437 int ret; 460 int ret;
438 struct thermal_hwmon_attr *hwmon_attr 461 struct thermal_hwmon_attr *hwmon_attr
439 = container_of(attr, struct thermal_hwmon_attr, attr); 462 = container_of(attr, struct thermal_hwmon_attr, attr);
440 struct thermal_zone_device *tz 463 struct thermal_hwmon_temp *temp
441 = container_of(hwmon_attr, struct thermal_zone_device, 464 = container_of(hwmon_attr, struct thermal_hwmon_temp,
442 temp_input); 465 temp_input);
466 struct thermal_zone_device *tz = temp->tz;
443 467
444 ret = tz->ops->get_temp(tz, &temperature); 468 ret = tz->ops->get_temp(tz, &temperature);
445 469
@@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
455{ 479{
456 struct thermal_hwmon_attr *hwmon_attr 480 struct thermal_hwmon_attr *hwmon_attr
457 = container_of(attr, struct thermal_hwmon_attr, attr); 481 = container_of(attr, struct thermal_hwmon_attr, attr);
458 struct thermal_zone_device *tz 482 struct thermal_hwmon_temp *temp
459 = container_of(hwmon_attr, struct thermal_zone_device, 483 = container_of(hwmon_attr, struct thermal_hwmon_temp,
460 temp_crit); 484 temp_crit);
485 struct thermal_zone_device *tz = temp->tz;
461 long temperature; 486 long temperature;
462 int ret; 487 int ret;
463 488
@@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
469} 494}
470 495
471 496
472static int 497static struct thermal_hwmon_device *
473thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) 498thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
474{ 499{
475 struct thermal_hwmon_device *hwmon; 500 struct thermal_hwmon_device *hwmon;
476 int new_hwmon_device = 1;
477 int result;
478 501
479 mutex_lock(&thermal_list_lock); 502 mutex_lock(&thermal_list_lock);
480 list_for_each_entry(hwmon, &thermal_hwmon_list, node) 503 list_for_each_entry(hwmon, &thermal_hwmon_list, node)
481 if (!strcmp(hwmon->type, tz->type)) { 504 if (!strcmp(hwmon->type, tz->type)) {
482 new_hwmon_device = 0;
483 mutex_unlock(&thermal_list_lock); 505 mutex_unlock(&thermal_list_lock);
484 goto register_sys_interface; 506 return hwmon;
507 }
508 mutex_unlock(&thermal_list_lock);
509
510 return NULL;
511}
512
513/* Find the temperature input matching a given thermal zone */
514static struct thermal_hwmon_temp *
515thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
516 const struct thermal_zone_device *tz)
517{
518 struct thermal_hwmon_temp *temp;
519
520 mutex_lock(&thermal_list_lock);
521 list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
522 if (temp->tz == tz) {
523 mutex_unlock(&thermal_list_lock);
524 return temp;
485 } 525 }
486 mutex_unlock(&thermal_list_lock); 526 mutex_unlock(&thermal_list_lock);
487 527
528 return NULL;
529}
530
531static int
532thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
533{
534 struct thermal_hwmon_device *hwmon;
535 struct thermal_hwmon_temp *temp;
536 int new_hwmon_device = 1;
537 int result;
538
539 hwmon = thermal_hwmon_lookup_by_type(tz);
540 if (hwmon) {
541 new_hwmon_device = 0;
542 goto register_sys_interface;
543 }
544
488 hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); 545 hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
489 if (!hwmon) 546 if (!hwmon)
490 return -ENOMEM; 547 return -ENOMEM;
@@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
502 goto free_mem; 559 goto free_mem;
503 560
504 register_sys_interface: 561 register_sys_interface:
505 tz->hwmon = hwmon; 562 temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
563 if (!temp) {
564 result = -ENOMEM;
565 goto unregister_name;
566 }
567
568 temp->tz = tz;
506 hwmon->count++; 569 hwmon->count++;
507 570
508 snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, 571 snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
509 "temp%d_input", hwmon->count); 572 "temp%d_input", hwmon->count);
510 tz->temp_input.attr.attr.name = tz->temp_input.name; 573 temp->temp_input.attr.attr.name = temp->temp_input.name;
511 tz->temp_input.attr.attr.mode = 0444; 574 temp->temp_input.attr.attr.mode = 0444;
512 tz->temp_input.attr.show = temp_input_show; 575 temp->temp_input.attr.show = temp_input_show;
513 sysfs_attr_init(&tz->temp_input.attr.attr); 576 sysfs_attr_init(&temp->temp_input.attr.attr);
514 result = device_create_file(hwmon->device, &tz->temp_input.attr); 577 result = device_create_file(hwmon->device, &temp->temp_input.attr);
515 if (result) 578 if (result)
516 goto unregister_name; 579 goto free_temp_mem;
517 580
518 if (tz->ops->get_crit_temp) { 581 if (tz->ops->get_crit_temp) {
519 unsigned long temperature; 582 unsigned long temperature;
520 if (!tz->ops->get_crit_temp(tz, &temperature)) { 583 if (!tz->ops->get_crit_temp(tz, &temperature)) {
521 snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, 584 snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
522 "temp%d_crit", hwmon->count); 585 "temp%d_crit", hwmon->count);
523 tz->temp_crit.attr.attr.name = tz->temp_crit.name; 586 temp->temp_crit.attr.attr.name = temp->temp_crit.name;
524 tz->temp_crit.attr.attr.mode = 0444; 587 temp->temp_crit.attr.attr.mode = 0444;
525 tz->temp_crit.attr.show = temp_crit_show; 588 temp->temp_crit.attr.show = temp_crit_show;
526 sysfs_attr_init(&tz->temp_crit.attr.attr); 589 sysfs_attr_init(&temp->temp_crit.attr.attr);
527 result = device_create_file(hwmon->device, 590 result = device_create_file(hwmon->device,
528 &tz->temp_crit.attr); 591 &temp->temp_crit.attr);
529 if (result) 592 if (result)
530 goto unregister_input; 593 goto unregister_input;
531 } 594 }
@@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
534 mutex_lock(&thermal_list_lock); 597 mutex_lock(&thermal_list_lock);
535 if (new_hwmon_device) 598 if (new_hwmon_device)
536 list_add_tail(&hwmon->node, &thermal_hwmon_list); 599 list_add_tail(&hwmon->node, &thermal_hwmon_list);
537 list_add_tail(&tz->hwmon_node, &hwmon->tz_list); 600 list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
538 mutex_unlock(&thermal_list_lock); 601 mutex_unlock(&thermal_list_lock);
539 602
540 return 0; 603 return 0;
541 604
542 unregister_input: 605 unregister_input:
543 device_remove_file(hwmon->device, &tz->temp_input.attr); 606 device_remove_file(hwmon->device, &temp->temp_input.attr);
607 free_temp_mem:
608 kfree(temp);
544 unregister_name: 609 unregister_name:
545 if (new_hwmon_device) { 610 if (new_hwmon_device) {
546 device_remove_file(hwmon->device, &dev_attr_name); 611 device_remove_file(hwmon->device, &dev_attr_name);
@@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
556static void 621static void
557thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) 622thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
558{ 623{
559 struct thermal_hwmon_device *hwmon = tz->hwmon; 624 struct thermal_hwmon_device *hwmon;
625 struct thermal_hwmon_temp *temp;
626
627 hwmon = thermal_hwmon_lookup_by_type(tz);
628 if (unlikely(!hwmon)) {
629 /* Should never happen... */
630 dev_dbg(&tz->device, "hwmon device lookup failed!\n");
631 return;
632 }
633
634 temp = thermal_hwmon_lookup_temp(hwmon, tz);
635 if (unlikely(!temp)) {
636 /* Should never happen... */
637 dev_dbg(&tz->device, "temperature input lookup failed!\n");
638 return;
639 }
560 640
561 tz->hwmon = NULL; 641 device_remove_file(hwmon->device, &temp->temp_input.attr);
562 device_remove_file(hwmon->device, &tz->temp_input.attr);
563 if (tz->ops->get_crit_temp) 642 if (tz->ops->get_crit_temp)
564 device_remove_file(hwmon->device, &tz->temp_crit.attr); 643 device_remove_file(hwmon->device, &temp->temp_crit.attr);
565 644
566 mutex_lock(&thermal_list_lock); 645 mutex_lock(&thermal_list_lock);
567 list_del(&tz->hwmon_node); 646 list_del(&temp->hwmon_node);
647 kfree(temp);
568 if (!list_empty(&hwmon->tz_list)) { 648 if (!list_empty(&hwmon->tz_list)) {
569 mutex_unlock(&thermal_list_lock); 649 mutex_unlock(&thermal_list_lock);
570 return; 650 return;
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 69407e72aac1..278aeaa92505 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
336 enable its driver. 336 enable its driver.
337 337
338config BACKLIGHT_AAT2870 338config BACKLIGHT_AAT2870
339 bool "AnalogicTech AAT2870 Backlight" 339 tristate "AnalogicTech AAT2870 Backlight"
340 depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE 340 depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
341 help 341 help
342 If you have a AnalogicTech AAT2870 say Y to enable the 342 If you have a AnalogicTech AAT2870 say Y to enable the
diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c
index 4952a617563d..331f1ef1dad5 100644
--- a/drivers/video/backlight/aat2870_bl.c
+++ b/drivers/video/backlight/aat2870_bl.c
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
44 struct backlight_device *bd = aat2870_bl->bd; 44 struct backlight_device *bd = aat2870_bl->bd;
45 int val; 45 int val;
46 46
47 val = brightness * aat2870_bl->max_current; 47 val = brightness * (aat2870_bl->max_current - 1);
48 val /= bd->props.max_brightness; 48 val /= bd->props.max_brightness;
49 49
50 return val; 50 return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
158 props.type = BACKLIGHT_RAW; 158 props.type = BACKLIGHT_RAW;
159 bd = backlight_device_register("aat2870-backlight", &pdev->dev, 159 bd = backlight_device_register("aat2870-backlight", &pdev->dev,
160 aat2870_bl, &aat2870_bl_ops, &props); 160 aat2870_bl, &aat2870_bl_ops, &props);
161 if (!bd) { 161 if (IS_ERR(bd)) {
162 dev_err(&pdev->dev, 162 dev_err(&pdev->dev,
163 "Failed allocate memory for backlight device\n"); 163 "Failed allocate memory for backlight device\n");
164 ret = -ENOMEM; 164 ret = PTR_ERR(bd);
165 goto out_kfree; 165 goto out_kfree;
166 } 166 }
167 167
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
175 else 175 else
176 aat2870_bl->channels = AAT2870_BL_CH_ALL; 176 aat2870_bl->channels = AAT2870_BL_CH_ALL;
177 177
178 if (pdata->max_brightness > 0) 178 if (pdata->max_current > 0)
179 aat2870_bl->max_current = pdata->max_current; 179 aat2870_bl->max_current = pdata->max_current;
180 else 180 else
181 aat2870_bl->max_current = AAT2870_CURRENT_27_9; 181 aat2870_bl->max_current = AAT2870_CURRENT_27_9;
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6e..9fe0b349f4cd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
127 select TMPFS_XATTR 127 select TMPFS_XATTR
128 select GENERIC_ACL 128 select GENERIC_ACL
129 help 129 help
130 POSIX Access Control Lists (ACLs) support permissions for users and 130 POSIX Access Control Lists (ACLs) support additional access rights
131 groups beyond the owner/group/world scheme. 131 for users and groups beyond the standard owner/group/world scheme,
132 and this option selects support for ACLs specifically for tmpfs
133 filesystems.
134
135 If you've selected TMPFS, it's possible that you'll also need
136 this option as there are a number of Linux distros that require
137 POSIX ACL support under /dev for certain features to work properly.
138 For example, some distros need this feature for ALSA-related /dev
139 files for sound to work properly. In short, if you're not sure,
140 say Y.
132 141
133 To learn more about Access Control Lists, visit the POSIX ACLs for 142 To learn more about Access Control Lists, visit the POSIX ACLs for
134 Linux website <http://acl.bestbits.at/>. 143 Linux website <http://acl.bestbits.at/>.
135 144
136 If you don't know what Access Control Lists are, say N.
137
138config TMPFS_XATTR 145config TMPFS_XATTR
139 bool "Tmpfs extended attributes" 146 bool "Tmpfs extended attributes"
140 depends on TMPFS 147 depends on TMPFS
diff --git a/fs/dcache.c b/fs/dcache.c
index 2347cdb15abb..c83cae19161e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -795,6 +795,7 @@ relock:
795 795
796/** 796/**
797 * prune_dcache_sb - shrink the dcache 797 * prune_dcache_sb - shrink the dcache
798 * @sb: superblock
798 * @nr_to_scan: number of entries to try to free 799 * @nr_to_scan: number of entries to try to free
799 * 800 *
800 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is 801 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2d88baf91d3..4687fea0c00f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
124{ 124{
125 void *ret; 125 void *ret;
126 126
127 ret = kmalloc(size, flags); 127 ret = kzalloc(size, flags);
128 if (!ret) 128 if (!ret)
129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
130 return ret; 130 return ret;
diff --git a/fs/stack.c b/fs/stack.c
index 4a6f7f440658..b4f2ab48a61f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
29 * 29 *
30 * We don't actually know what locking is used at the lower level; 30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using 31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and 32 * i_lock as in inode_add_bytes().
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */ 33 */
37 if (sizeof(i_blocks) > sizeof(long)) 34 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock); 35 spin_lock(&src->i_lock);
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 3090471b2a5e..e49c36d38d7e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle);
128extern int register_dock_notifier(struct notifier_block *nb); 128extern int register_dock_notifier(struct notifier_block *nb);
129extern void unregister_dock_notifier(struct notifier_block *nb); 129extern void unregister_dock_notifier(struct notifier_block *nb);
130extern int register_hotplug_dock_device(acpi_handle handle, 130extern int register_hotplug_dock_device(acpi_handle handle,
131 struct acpi_dock_ops *ops, 131 const struct acpi_dock_ops *ops,
132 void *context); 132 void *context);
133extern void unregister_hotplug_dock_device(acpi_handle handle); 133extern void unregister_hotplug_dock_device(acpi_handle handle);
134#else 134#else
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2ed0a8486c19..f554a9313b43 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
47 47
48/* Current ACPICA subsystem version in YYYYMMDD format */ 48/* Current ACPICA subsystem version in YYYYMMDD format */
49 49
50#define ACPI_CA_VERSION 0x20110413 50#define ACPI_CA_VERSION 0x20110623
51 51
52#include "actypes.h" 52#include "actypes.h"
53#include "actbl.h" 53#include "actbl.h"
@@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags;
69extern u32 acpi_gbl_enable_aml_debug_object; 69extern u32 acpi_gbl_enable_aml_debug_object;
70extern u8 acpi_gbl_copy_dsdt_locally; 70extern u8 acpi_gbl_copy_dsdt_locally;
71extern u8 acpi_gbl_truncate_io_addresses; 71extern u8 acpi_gbl_truncate_io_addresses;
72extern u8 acpi_gbl_disable_auto_repair;
72 73
73extern u32 acpi_current_gpe_count; 74extern u32 acpi_current_gpe_count;
74extern struct acpi_table_fadt acpi_gbl_FADT; 75extern struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index e67b523a50e1..51a527d24a8a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -18,6 +18,11 @@
18 18
19extern int hest_disable; 19extern int hest_disable;
20extern int erst_disable; 20extern int erst_disable;
21#ifdef CONFIG_ACPI_APEI_GHES
22extern int ghes_disable;
23#else
24#define ghes_disable 1
25#endif
21 26
22#ifdef CONFIG_ACPI_APEI 27#ifdef CONFIG_ACPI_APEI
23void __init acpi_hest_init(void); 28void __init acpi_hest_init(void);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index ba4928cae473..67055f180330 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver;
337 337
338/* in processor_thermal.c */ 338/* in processor_thermal.c */
339int acpi_processor_get_limit_info(struct acpi_processor *pr); 339int acpi_processor_get_limit_info(struct acpi_processor *pr);
340extern struct thermal_cooling_device_ops processor_cooling_ops; 340extern const struct thermal_cooling_device_ops processor_cooling_ops;
341#ifdef CONFIG_CPU_FREQ 341#ifdef CONFIG_CPU_FREQ
342void acpi_thermal_cpufreq_init(void); 342void acpi_thermal_cpufreq_init(void);
343void acpi_thermal_cpufreq_exit(void); 343void acpi_thermal_cpufreq_exit(void);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1deb2a73c2da..6001b4da39dd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
238extern int pnpacpi_disabled; 238extern int pnpacpi_disabled;
239 239
240#define PXM_INVAL (-1) 240#define PXM_INVAL (-1)
241#define NID_INVAL (-1)
242 241
243int acpi_check_resource_conflict(const struct resource *res); 242int acpi_check_resource_conflict(const struct resource *res);
244 243
@@ -280,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
280#define OSC_SB_CPUHP_OST_SUPPORT 8 279#define OSC_SB_CPUHP_OST_SUPPORT 8
281#define OSC_SB_APEI_SUPPORT 16 280#define OSC_SB_APEI_SUPPORT 16
282 281
282extern bool osc_sb_apei_support_acked;
283
283/* PCI defined _OSC bits */ 284/* PCI defined _OSC bits */
284/* _OSC DW1 Definition (OS Support Fields) */ 285/* _OSC DW1 Definition (OS Support Fields) */
285#define OSC_EXT_PCI_CONFIG_SUPPORT 1 286#define OSC_EXT_PCI_CONFIG_SUPPORT 1
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3bac44cce142..7ad634501e48 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); 146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); 147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
148 148
149#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
149#define BITMAP_LAST_WORD_MASK(nbits) \ 150#define BITMAP_LAST_WORD_MASK(nbits) \
150( \ 151( \
151 ((nbits) % BITS_PER_LONG) ? \ 152 ((nbits) % BITS_PER_LONG) ? \
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36719ead50e8..b51629e15cfc 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -122,6 +122,8 @@ struct cpuidle_driver {
122}; 122};
123 123
124#ifdef CONFIG_CPU_IDLE 124#ifdef CONFIG_CPU_IDLE
125extern void disable_cpuidle(void);
126extern int cpuidle_idle_call(void);
125 127
126extern int cpuidle_register_driver(struct cpuidle_driver *drv); 128extern int cpuidle_register_driver(struct cpuidle_driver *drv);
127struct cpuidle_driver *cpuidle_get_driver(void); 129struct cpuidle_driver *cpuidle_get_driver(void);
@@ -135,6 +137,8 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
135extern void cpuidle_disable_device(struct cpuidle_device *dev); 137extern void cpuidle_disable_device(struct cpuidle_device *dev);
136 138
137#else 139#else
140static inline void disable_cpuidle(void) { }
141static inline int cpuidle_idle_call(void) { return -ENODEV; }
138 142
139static inline int cpuidle_register_driver(struct cpuidle_driver *drv) 143static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
140{return -ENODEV; } 144{return -ENODEV; }
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 3ff060ac7810..c6f996f2abb6 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -25,10 +25,6 @@ struct fault_attr {
25 unsigned long reject_end; 25 unsigned long reject_end;
26 26
27 unsigned long count; 27 unsigned long count;
28
29#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
30 struct dentry *dir;
31#endif
32}; 28};
33 29
34#define FAULT_ATTR_INITIALIZER { \ 30#define FAULT_ATTR_INITIALIZER { \
@@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size);
45 41
46#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 42#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
47 43
48int init_fault_attr_dentries(struct fault_attr *attr, const char *name); 44struct dentry *fault_create_debugfs_attr(const char *name,
49void cleanup_fault_attr_dentries(struct fault_attr *attr); 45 struct dentry *parent, struct fault_attr *attr);
50 46
51#else /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 47#else /* CONFIG_FAULT_INJECTION_DEBUG_FS */
52 48
53static inline int init_fault_attr_dentries(struct fault_attr *attr, 49static inline struct dentry *fault_create_debugfs_attr(const char *name,
54 const char *name) 50 struct dentry *parent, struct fault_attr *attr)
55{
56 return -ENODEV;
57}
58
59static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
60{ 51{
52 return ERR_PTR(-ENODEV);
61} 53}
62 54
63#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 55#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 5bbebda78b02..5e98eeb2af3b 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -1,8 +1,26 @@
1/* 1/*
2 * Basic general purpose allocator for managing special purpose memory 2 * Basic general purpose allocator for managing special purpose
3 * not managed by the regular kmalloc/kfree interface. 3 * memory, for example, memory that is not managed by the regular
4 * Uses for this includes on-device special memory, uncached memory 4 * kmalloc/kfree interface. Uses for this includes on-device special
5 * etc. 5 * memory, uncached memory etc.
6 *
7 * It is safe to use the allocator in NMI handlers and other special
8 * unblockable contexts that could otherwise deadlock on locks. This
9 * is implemented by using atomic operations and retries on any
10 * conflicts. The disadvantage is that there may be livelocks in
11 * extreme cases. For better scalability, one allocator can be used
12 * for each CPU.
13 *
14 * The lockless operation only works if there is enough memory
15 * available. If new memory is added to the pool a lock has to be
16 * still taken. So any user relying on locklessness has to ensure
17 * that sufficient memory is preallocated.
18 *
19 * The basic atomic operation of this allocator is cmpxchg on long.
20 * On architectures that don't have NMI-safe cmpxchg implementation,
21 * the allocator can NOT be used in NMI handler. So code uses the
22 * allocator in NMI handler should depend on
23 * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
6 * 24 *
7 * This source code is licensed under the GNU General Public License, 25 * This source code is licensed under the GNU General Public License,
8 * Version 2. See the file COPYING for more details. 26 * Version 2. See the file COPYING for more details.
@@ -15,7 +33,7 @@
15 * General purpose special memory pool descriptor. 33 * General purpose special memory pool descriptor.
16 */ 34 */
17struct gen_pool { 35struct gen_pool {
18 rwlock_t lock; 36 spinlock_t lock;
19 struct list_head chunks; /* list of chunks in this pool */ 37 struct list_head chunks; /* list of chunks in this pool */
20 int min_alloc_order; /* minimum allocation order */ 38 int min_alloc_order; /* minimum allocation order */
21}; 39};
@@ -24,8 +42,8 @@ struct gen_pool {
24 * General purpose special memory pool chunk descriptor. 42 * General purpose special memory pool chunk descriptor.
25 */ 43 */
26struct gen_pool_chunk { 44struct gen_pool_chunk {
27 spinlock_t lock;
28 struct list_head next_chunk; /* next chunk in pool */ 45 struct list_head next_chunk; /* next chunk in pool */
46 atomic_t avail;
29 phys_addr_t phys_addr; /* physical starting address of memory chunk */ 47 phys_addr_t phys_addr; /* physical starting address of memory chunk */
30 unsigned long start_addr; /* starting address of memory chunk */ 48 unsigned long start_addr; /* starting address of memory chunk */
31 unsigned long end_addr; /* ending address of memory chunk */ 49 unsigned long end_addr; /* ending address of memory chunk */
@@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
56extern void gen_pool_destroy(struct gen_pool *); 74extern void gen_pool_destroy(struct gen_pool *);
57extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); 75extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
58extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); 76extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
77extern void gen_pool_for_each_chunk(struct gen_pool *,
78 void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
79extern size_t gen_pool_avail(struct gen_pool *);
80extern size_t gen_pool_size(struct gen_pool *);
59#endif /* __GENALLOC_H__ */ 81#endif /* __GENALLOC_H__ */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cb4089254f01..3a76faf6a3ee 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -92,7 +92,7 @@ struct vm_area_struct;
92 */ 92 */
93#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 93#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
94 94
95#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ 95#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */
96#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 96#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
97 97
98/* This equals 0, but use constants in case they ever change */ 98/* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 13a801f3d028..255491cf522e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id);
146void ida_destroy(struct ida *ida); 146void ida_destroy(struct ida *ida);
147void ida_init(struct ida *ida); 147void ida_init(struct ida *ida);
148 148
149int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
150 gfp_t gfp_mask);
151void ida_simple_remove(struct ida *ida, unsigned int id);
152
149void __init idr_init_cache(void); 153void __init idr_init_cache(void);
150 154
151#endif /* __IDR_H__ */ 155#endif /* __IDR_H__ */
diff --git a/include/linux/llist.h b/include/linux/llist.h
new file mode 100644
index 000000000000..aa0c8b5b3cd0
--- /dev/null
+++ b/include/linux/llist.h
@@ -0,0 +1,126 @@
1#ifndef LLIST_H
2#define LLIST_H
3/*
4 * Lock-less NULL terminated single linked list
5 *
6 * If there are multiple producers and multiple consumers, llist_add
7 * can be used in producers and llist_del_all can be used in
8 * consumers. They can work simultaneously without lock. But
9 * llist_del_first can not be used here. Because llist_del_first
10 * depends on list->first->next does not changed if list->first is not
11 * changed during its operation, but llist_del_first, llist_add,
12 * llist_add (or llist_del_all, llist_add, llist_add) sequence in
13 * another consumer may violate that.
14 *
15 * If there are multiple producers and one consumer, llist_add can be
16 * used in producers and llist_del_all or llist_del_first can be used
17 * in the consumer.
18 *
19 * This can be summarized as follow:
20 *
21 * | add | del_first | del_all
22 * add | - | - | -
23 * del_first | | L | L
24 * del_all | | | -
25 *
26 * Where "-" stands for no lock is needed, while "L" stands for lock
27 * is needed.
28 *
29 * The list entries deleted via llist_del_all can be traversed with
30 * traversing function such as llist_for_each etc. But the list
31 * entries can not be traversed safely before deleted from the list.
32 * The order of deleted entries is from the newest to the oldest added
33 * one. If you want to traverse from the oldest to the newest, you
34 * must reverse the order by yourself before traversing.
35 *
36 * The basic atomic operation of this list is cmpxchg on long. On
37 * architectures that don't have NMI-safe cmpxchg implementation, the
38 * list can NOT be used in NMI handler. So code uses the list in NMI
39 * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
40 */
41
42struct llist_head {
43 struct llist_node *first;
44};
45
46struct llist_node {
47 struct llist_node *next;
48};
49
50#define LLIST_HEAD_INIT(name) { NULL }
51#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
52
53/**
54 * init_llist_head - initialize lock-less list head
55 * @head: the head for your lock-less list
56 */
57static inline void init_llist_head(struct llist_head *list)
58{
59 list->first = NULL;
60}
61
62/**
63 * llist_entry - get the struct of this entry
64 * @ptr: the &struct llist_node pointer.
65 * @type: the type of the struct this is embedded in.
66 * @member: the name of the llist_node within the struct.
67 */
68#define llist_entry(ptr, type, member) \
69 container_of(ptr, type, member)
70
71/**
72 * llist_for_each - iterate over some deleted entries of a lock-less list
73 * @pos: the &struct llist_node to use as a loop cursor
74 * @node: the first entry of deleted list entries
75 *
76 * In general, some entries of the lock-less list can be traversed
77 * safely only after being deleted from list, so start with an entry
78 * instead of list head.
79 *
80 * If being used on entries deleted from lock-less list directly, the
81 * traverse order is from the newest to the oldest added entry. If
82 * you want to traverse from the oldest to the newest, you must
83 * reverse the order by yourself before traversing.
84 */
85#define llist_for_each(pos, node) \
86 for ((pos) = (node); pos; (pos) = (pos)->next)
87
88/**
89 * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
90 * @pos: the type * to use as a loop cursor.
91 * @node: the fist entry of deleted list entries.
92 * @member: the name of the llist_node with the struct.
93 *
94 * In general, some entries of the lock-less list can be traversed
95 * safely only after being removed from list, so start with an entry
96 * instead of list head.
97 *
98 * If being used on entries deleted from lock-less list directly, the
99 * traverse order is from the newest to the oldest added entry. If
100 * you want to traverse from the oldest to the newest, you must
101 * reverse the order by yourself before traversing.
102 */
103#define llist_for_each_entry(pos, node, member) \
104 for ((pos) = llist_entry((node), typeof(*(pos)), member); \
105 &(pos)->member != NULL; \
106 (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
107
108/**
109 * llist_empty - tests whether a lock-less list is empty
110 * @head: the list to test
111 *
112 * Not guaranteed to be accurate or up to date. Just a quick way to
113 * test whether the list is empty without deleting something from the
114 * list.
115 */
116static inline int llist_empty(const struct llist_head *head)
117{
118 return ACCESS_ONCE(head->first) == NULL;
119}
120
121void llist_add(struct llist_node *new, struct llist_head *head);
122void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
123 struct llist_head *head);
124struct llist_node *llist_del_first(struct llist_head *head);
125struct llist_node *llist_del_all(struct llist_head *head);
126#endif /* LLIST_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b96600786913..3b535db00a94 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void);
86 86
87extern void mem_cgroup_uncharge_page(struct page *page); 87extern void mem_cgroup_uncharge_page(struct page *page);
88extern void mem_cgroup_uncharge_cache_page(struct page *page); 88extern void mem_cgroup_uncharge_cache_page(struct page *page);
89extern int mem_cgroup_shmem_charge_fallback(struct page *page,
90 struct mm_struct *mm, gfp_t gfp_mask);
91 89
92extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 90extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
93int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 91int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
225{ 223{
226} 224}
227 225
228static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
229 struct mm_struct *mm, gfp_t gfp_mask)
230{
231 return 0;
232}
233
234static inline void mem_cgroup_add_lru_list(struct page *page, int lru) 226static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
235{ 227{
236} 228}
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index 89212df05622..f7316c29bdec 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -89,7 +89,7 @@ enum aat2870_id {
89 89
90/* Backlight current magnitude (mA) */ 90/* Backlight current magnitude (mA) */
91enum aat2870_current { 91enum aat2870_current {
92 AAT2870_CURRENT_0_45, 92 AAT2870_CURRENT_0_45 = 1,
93 AAT2870_CURRENT_0_90, 93 AAT2870_CURRENT_0_90,
94 AAT2870_CURRENT_1_80, 94 AAT2870_CURRENT_1_80,
95 AAT2870_CURRENT_2_70, 95 AAT2870_CURRENT_2_70,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3172a1c0f08e..f2690cf49827 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1600,6 +1600,7 @@ enum mf_flags {
1600}; 1600};
1601extern void memory_failure(unsigned long pfn, int trapno); 1601extern void memory_failure(unsigned long pfn, int trapno);
1602extern int __memory_failure(unsigned long pfn, int trapno, int flags); 1602extern int __memory_failure(unsigned long pfn, int trapno, int flags);
1603extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
1603extern int unpoison_memory(unsigned long pfn); 1604extern int unpoison_memory(unsigned long pfn);
1604extern int sysctl_memory_failure_early_kill; 1605extern int sysctl_memory_failure_early_kill;
1605extern int sysctl_memory_failure_recovery; 1606extern int sysctl_memory_failure_recovery;
diff --git a/include/linux/of.h b/include/linux/of.h
index 0085bb01c041..bc3dc6399547 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -68,6 +68,7 @@ struct device_node {
68/* Pointer for first entry in chain of all nodes. */ 68/* Pointer for first entry in chain of all nodes. */
69extern struct device_node *allnodes; 69extern struct device_node *allnodes;
70extern struct device_node *of_chosen; 70extern struct device_node *of_chosen;
71extern struct device_node *of_aliases;
71extern rwlock_t devtree_lock; 72extern rwlock_t devtree_lock;
72 73
73static inline bool of_have_populated_dt(void) 74static inline bool of_have_populated_dt(void)
@@ -209,6 +210,9 @@ extern int of_device_is_available(const struct device_node *device);
209extern const void *of_get_property(const struct device_node *node, 210extern const void *of_get_property(const struct device_node *node,
210 const char *name, 211 const char *name,
211 int *lenp); 212 int *lenp);
213#define for_each_property(pp, properties) \
214 for (pp = properties; pp != NULL; pp = pp->next)
215
212extern int of_n_addr_cells(struct device_node *np); 216extern int of_n_addr_cells(struct device_node *np);
213extern int of_n_size_cells(struct device_node *np); 217extern int of_n_size_cells(struct device_node *np);
214extern const struct of_device_id *of_match_node( 218extern const struct of_device_id *of_match_node(
@@ -221,6 +225,10 @@ extern int of_parse_phandles_with_args(struct device_node *np,
221 const char *list_name, const char *cells_name, int index, 225 const char *list_name, const char *cells_name, int index,
222 struct device_node **out_node, const void **out_args); 226 struct device_node **out_node, const void **out_args);
223 227
228extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align);
229extern void of_alias_scan(void);
230extern int of_alias_get_id(struct device_node *np, const char *stem);
231
224extern int of_machine_is_compatible(const char *compat); 232extern int of_machine_is_compatible(const char *compat);
225 233
226extern int prom_add_property(struct device_node* np, struct property* prop); 234extern int prom_add_property(struct device_node* np, struct property* prop);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c84d900fbbb3..b74b74ffe0e7 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -97,7 +97,6 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
97extern int early_init_dt_scan_memory(unsigned long node, const char *uname, 97extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
98 int depth, void *data); 98 int depth, void *data);
99extern void early_init_dt_add_memory_arch(u64 base, u64 size); 99extern void early_init_dt_add_memory_arch(u64 base, u64 size);
100extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
101extern u64 dt_mem_next_cell(int s, __be32 **cellp); 100extern u64 dt_mem_next_cell(int s, __be32 **cellp);
102 101
103/* 102/*
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 23241c2fecce..9d4539c52e53 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -39,7 +39,15 @@
39 * when it is shrunk, before we rcu free the node. See shrink code for 39 * when it is shrunk, before we rcu free the node. See shrink code for
40 * details. 40 * details.
41 */ 41 */
42#define RADIX_TREE_INDIRECT_PTR 1 42#define RADIX_TREE_INDIRECT_PTR 1
43/*
44 * A common use of the radix tree is to store pointers to struct pages;
45 * but shmem/tmpfs needs also to store swap entries in the same tree:
46 * those are marked as exceptional entries to distinguish them.
47 * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
48 */
49#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
50#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
43 51
44#define radix_tree_indirect_to_ptr(ptr) \ 52#define radix_tree_indirect_to_ptr(ptr) \
45 radix_tree_indirect_to_ptr((void __force *)(ptr)) 53 radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg)
174} 182}
175 183
176/** 184/**
185 * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?
186 * @arg: value returned by radix_tree_deref_slot
187 * Returns: 0 if well-aligned pointer, non-0 if exceptional entry.
188 */
189static inline int radix_tree_exceptional_entry(void *arg)
190{
191 /* Not unlikely because radix_tree_exception often tested first */
192 return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
193}
194
195/**
196 * radix_tree_exception - radix_tree_deref_slot returned either exception?
197 * @arg: value returned by radix_tree_deref_slot
198 * Returns: 0 if well-aligned pointer, non-0 if either kind of exception.
199 */
200static inline int radix_tree_exception(void *arg)
201{
202 return unlikely((unsigned long)arg &
203 (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
204}
205
206/**
177 * radix_tree_replace_slot - replace item in a slot 207 * radix_tree_replace_slot - replace item in a slot
178 * @pslot: pointer to slot, returned by radix_tree_lookup_slot 208 * @pslot: pointer to slot, returned by radix_tree_lookup_slot
179 * @item: new item to store in the slot. 209 * @item: new item to store in the slot.
@@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
194unsigned int 224unsigned int
195radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 225radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
196 unsigned long first_index, unsigned int max_items); 226 unsigned long first_index, unsigned int max_items);
197unsigned int 227unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
198radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 228 void ***results, unsigned long *indices,
199 unsigned long first_index, unsigned int max_items); 229 unsigned long first_index, unsigned int max_items);
200unsigned long radix_tree_next_hole(struct radix_tree_root *root, 230unsigned long radix_tree_next_hole(struct radix_tree_root *root,
201 unsigned long index, unsigned long max_scan); 231 unsigned long index, unsigned long max_scan);
@@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
222 unsigned long nr_to_tag, 252 unsigned long nr_to_tag,
223 unsigned int fromtag, unsigned int totag); 253 unsigned int fromtag, unsigned int totag);
224int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); 254int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
255unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
225 256
226static inline void radix_tree_preload_end(void) 257static inline void radix_tree_preload_end(void)
227{ 258{
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa08fa8fd79b..9291ac3cc627 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -8,22 +8,15 @@
8 8
9/* inode in-kernel data */ 9/* inode in-kernel data */
10 10
11#define SHMEM_NR_DIRECT 16
12
13#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
14
15struct shmem_inode_info { 11struct shmem_inode_info {
16 spinlock_t lock; 12 spinlock_t lock;
17 unsigned long flags; 13 unsigned long flags;
18 unsigned long alloced; /* data pages alloced to file */ 14 unsigned long alloced; /* data pages alloced to file */
19 unsigned long swapped; /* subtotal assigned to swap */
20 unsigned long next_index; /* highest alloced index + 1 */
21 struct shared_policy policy; /* NUMA memory alloc policy */
22 struct page *i_indirect; /* top indirect blocks page */
23 union { 15 union {
24 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ 16 unsigned long swapped; /* subtotal assigned to swap */
25 char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; 17 char *symlink; /* unswappable short symlink */
26 }; 18 };
19 struct shared_policy policy; /* NUMA memory alloc policy */
27 struct list_head swaplist; /* chain of maybes on swap */ 20 struct list_head swaplist; /* chain of maybes on swap */
28 struct list_head xattr_list; /* list of shmem_xattr */ 21 struct list_head xattr_list; /* list of shmem_xattr */
29 struct inode vfs_inode; 22 struct inode vfs_inode;
@@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
49/* 42/*
50 * Functions in mm/shmem.c called directly from elsewhere: 43 * Functions in mm/shmem.c called directly from elsewhere:
51 */ 44 */
52extern int init_tmpfs(void); 45extern int shmem_init(void);
53extern int shmem_fill_super(struct super_block *sb, void *data, int silent); 46extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
54extern struct file *shmem_file_setup(const char *name, 47extern struct file *shmem_file_setup(const char *name,
55 loff_t size, unsigned long flags); 48 loff_t size, unsigned long flags);
@@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
59 pgoff_t index, gfp_t gfp_mask); 52 pgoff_t index, gfp_t gfp_mask);
60extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); 53extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
61extern int shmem_unuse(swp_entry_t entry, struct page *page); 54extern int shmem_unuse(swp_entry_t entry, struct page *page);
62extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
63 struct page **pagep, swp_entry_t *ent);
64 55
65static inline struct page *shmem_read_mapping_page( 56static inline struct page *shmem_read_mapping_page(
66 struct address_space *mapping, pgoff_t index) 57 struct address_space *mapping, pgoff_t index)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cd42e30b7c6e..2189d3ffc85d 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -1,3 +1,8 @@
1#ifndef _LINUX_SWAPOPS_H
2#define _LINUX_SWAPOPS_H
3
4#include <linux/radix-tree.h>
5
1/* 6/*
2 * swapcache pages are stored in the swapper_space radix tree. We want to 7 * swapcache pages are stored in the swapper_space radix tree. We want to
3 * get good packing density in that tree, so the index should be dense in 8 * get good packing density in that tree, so the index should be dense in
@@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
76 return __swp_entry_to_pte(arch_entry); 81 return __swp_entry_to_pte(arch_entry);
77} 82}
78 83
84static inline swp_entry_t radix_to_swp_entry(void *arg)
85{
86 swp_entry_t entry;
87
88 entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
89 return entry;
90}
91
92static inline void *swp_to_radix_entry(swp_entry_t entry)
93{
94 unsigned long value;
95
96 value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
97 return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
98}
99
79#ifdef CONFIG_MIGRATION 100#ifdef CONFIG_MIGRATION
80static inline swp_entry_t make_migration_entry(struct page *page, int write) 101static inline swp_entry_t make_migration_entry(struct page *page, int write)
81{ 102{
@@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry)
169 return 0; 190 return 0;
170} 191}
171#endif 192#endif
193
194#endif /* _LINUX_SWAPOPS_H */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d3ec89fb4122..47b4a27e6e97 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -85,22 +85,6 @@ struct thermal_cooling_device {
85 ((long)t-2732+5)/10 : ((long)t-2732-5)/10) 85 ((long)t-2732+5)/10 : ((long)t-2732-5)/10)
86#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) 86#define CELSIUS_TO_KELVIN(t) ((t)*10+2732)
87 87
88#if defined(CONFIG_THERMAL_HWMON)
89/* thermal zone devices with the same type share one hwmon device */
90struct thermal_hwmon_device {
91 char type[THERMAL_NAME_LENGTH];
92 struct device *device;
93 int count;
94 struct list_head tz_list;
95 struct list_head node;
96};
97
98struct thermal_hwmon_attr {
99 struct device_attribute attr;
100 char name[16];
101};
102#endif
103
104struct thermal_zone_device { 88struct thermal_zone_device {
105 int id; 89 int id;
106 char type[THERMAL_NAME_LENGTH]; 90 char type[THERMAL_NAME_LENGTH];
@@ -120,12 +104,6 @@ struct thermal_zone_device {
120 struct mutex lock; /* protect cooling devices list */ 104 struct mutex lock; /* protect cooling devices list */
121 struct list_head node; 105 struct list_head node;
122 struct delayed_work poll_queue; 106 struct delayed_work poll_queue;
123#if defined(CONFIG_THERMAL_HWMON)
124 struct list_head hwmon_node;
125 struct thermal_hwmon_device *hwmon;
126 struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
127 struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
128#endif
129}; 107};
130/* Adding event notification support elements */ 108/* Adding event notification support elements */
131#define THERMAL_GENL_FAMILY_NAME "thermal_event" 109#define THERMAL_GENL_FAMILY_NAME "thermal_event"
diff --git a/init/main.c b/init/main.c
index d7211faed2ad..9c51ee7adf3d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -369,9 +369,12 @@ static noinline void __init_refok rest_init(void)
369 init_idle_bootup_task(current); 369 init_idle_bootup_task(current);
370 preempt_enable_no_resched(); 370 preempt_enable_no_resched();
371 schedule(); 371 schedule();
372 preempt_disable(); 372
373 /* At this point, we can enable user mode helper functionality */
374 usermodehelper_enable();
373 375
374 /* Call into cpu_idle with preempt disabled */ 376 /* Call into cpu_idle with preempt disabled */
377 preempt_disable();
375 cpu_idle(); 378 cpu_idle();
376} 379}
377 380
@@ -715,7 +718,7 @@ static void __init do_basic_setup(void)
715{ 718{
716 cpuset_init_smp(); 719 cpuset_init_smp();
717 usermodehelper_init(); 720 usermodehelper_init();
718 init_tmpfs(); 721 shmem_init();
719 driver_init(); 722 driver_init();
720 init_irq_proc(); 723 init_irq_proc();
721 do_ctors(); 724 do_ctors();
diff --git a/ipc/shm.c b/ipc/shm.c
index 9fb044f3b345..b5bae9d945b6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
294void shm_destroy_orphaned(struct ipc_namespace *ns) 294void shm_destroy_orphaned(struct ipc_namespace *ns)
295{ 295{
296 down_write(&shm_ids(ns).rw_mutex); 296 down_write(&shm_ids(ns).rw_mutex);
297 if (&shm_ids(ns).in_use) 297 if (shm_ids(ns).in_use)
298 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 298 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
299 up_write(&shm_ids(ns).rw_mutex); 299 up_write(&shm_ids(ns).rw_mutex);
300} 300}
@@ -304,9 +304,12 @@ void exit_shm(struct task_struct *task)
304{ 304{
305 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 305 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
306 306
307 if (shm_ids(ns).in_use == 0)
308 return;
309
307 /* Destroy all already created segments, but not mapped yet */ 310 /* Destroy all already created segments, but not mapped yet */
308 down_write(&shm_ids(ns).rw_mutex); 311 down_write(&shm_ids(ns).rw_mutex);
309 if (&shm_ids(ns).in_use) 312 if (shm_ids(ns).in_use)
310 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); 313 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
311 up_write(&shm_ids(ns).rw_mutex); 314 up_write(&shm_ids(ns).rw_mutex);
312} 315}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 47613dfb7b28..ddc7644c1305 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work)
274 * (used for preventing user land processes from being created after the user 274 * (used for preventing user land processes from being created after the user
275 * land has been frozen during a system-wide hibernation or suspend operation). 275 * land has been frozen during a system-wide hibernation or suspend operation).
276 */ 276 */
277static int usermodehelper_disabled; 277static int usermodehelper_disabled = 1;
278 278
279/* Number of helpers running */ 279/* Number of helpers running */
280static atomic_t running_helpers = ATOMIC_INIT(0); 280static atomic_t running_helpers = ATOMIC_INIT(0);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d1db2880d1cf..e19ce1454ee1 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
291 if (!cpumask_subset(mask, cpu_possible_mask)) 291 if (!cpumask_subset(mask, cpu_possible_mask))
292 return -EINVAL; 292 return -EINVAL;
293 293
294 s = NULL;
295 if (isadd == REGISTER) { 294 if (isadd == REGISTER) {
296 for_each_cpu(cpu, mask) { 295 for_each_cpu(cpu, mask) {
297 if (!s) 296 s = kmalloc_node(sizeof(struct listener),
298 s = kmalloc_node(sizeof(struct listener), 297 GFP_KERNEL, cpu_to_node(cpu));
299 GFP_KERNEL, cpu_to_node(cpu));
300 if (!s) 298 if (!s)
301 goto cleanup; 299 goto cleanup;
300
302 s->pid = pid; 301 s->pid = pid;
303 INIT_LIST_HEAD(&s->list);
304 s->valid = 1; 302 s->valid = 1;
305 303
306 listeners = &per_cpu(listener_array, cpu); 304 listeners = &per_cpu(listener_array, cpu);
307 down_write(&listeners->sem); 305 down_write(&listeners->sem);
308 list_for_each_entry_safe(s2, tmp, &listeners->list, list) { 306 list_for_each_entry(s2, &listeners->list, list) {
309 if (s2->pid == pid) 307 if (s2->pid == pid && s2->valid)
310 goto next_cpu; 308 goto exists;
311 } 309 }
312 list_add(&s->list, &listeners->list); 310 list_add(&s->list, &listeners->list);
313 s = NULL; 311 s = NULL;
314next_cpu: 312exists:
315 up_write(&listeners->sem); 313 up_write(&listeners->sem);
314 kfree(s); /* nop if NULL */
316 } 315 }
317 kfree(s);
318 return 0; 316 return 0;
319 } 317 }
320 318
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be5..6c695ff9caba 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,4 +276,7 @@ config CORDIC
276 so its calculations are in fixed point. Modules can select this 276 so its calculations are in fixed point. Modules can select this
277 when they require this function. Module will be called cordic. 277 when they require this function. Module will be called cordic.
278 278
279config LLIST
280 bool
281
279endmenu 282endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 892f4e282ea1..6457af4a7caf 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
115 115
116obj-$(CONFIG_CORDIC) += cordic.o 116obj-$(CONFIG_CORDIC) += cordic.o
117 117
118obj-$(CONFIG_LLIST) += llist.o
119
118hostprogs-y := gen_crc32table 120hostprogs-y := gen_crc32table
119clean-files := crc32table.h 121clean-files := crc32table.h
120 122
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 37ef4b048795..2f4412e4d071 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
271} 271}
272EXPORT_SYMBOL(__bitmap_weight); 272EXPORT_SYMBOL(__bitmap_weight);
273 273
274#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
275
276void bitmap_set(unsigned long *map, int start, int nr) 274void bitmap_set(unsigned long *map, int start, int nr)
277{ 275{
278 unsigned long *p = map + BIT_WORD(start); 276 unsigned long *p = map + BIT_WORD(start);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 2577b121c7c1..f193b7796449 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
197 return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); 197 return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
198} 198}
199 199
200void cleanup_fault_attr_dentries(struct fault_attr *attr) 200struct dentry *fault_create_debugfs_attr(const char *name,
201{ 201 struct dentry *parent, struct fault_attr *attr)
202 debugfs_remove_recursive(attr->dir);
203}
204
205int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
206{ 202{
207 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 203 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
208 struct dentry *dir; 204 struct dentry *dir;
209 205
210 dir = debugfs_create_dir(name, NULL); 206 dir = debugfs_create_dir(name, parent);
211 if (!dir) 207 if (!dir)
212 return -ENOMEM; 208 return ERR_PTR(-ENOMEM);
213
214 attr->dir = dir;
215 209
216 if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) 210 if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
217 goto fail; 211 goto fail;
@@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
243 237
244#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ 238#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
245 239
246 return 0; 240 return dir;
247fail: 241fail:
248 debugfs_remove_recursive(attr->dir); 242 debugfs_remove_recursive(dir);
249 243
250 return -ENOMEM; 244 return ERR_PTR(-ENOMEM);
251} 245}
252 246
253#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 247#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 577ddf805975..f352cc42f4f8 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -1,8 +1,26 @@
1/* 1/*
2 * Basic general purpose allocator for managing special purpose memory 2 * Basic general purpose allocator for managing special purpose
3 * not managed by the regular kmalloc/kfree interface. 3 * memory, for example, memory that is not managed by the regular
4 * Uses for this includes on-device special memory, uncached memory 4 * kmalloc/kfree interface. Uses for this includes on-device special
5 * etc. 5 * memory, uncached memory etc.
6 *
7 * It is safe to use the allocator in NMI handlers and other special
8 * unblockable contexts that could otherwise deadlock on locks. This
9 * is implemented by using atomic operations and retries on any
10 * conflicts. The disadvantage is that there may be livelocks in
11 * extreme cases. For better scalability, one allocator can be used
12 * for each CPU.
13 *
14 * The lockless operation only works if there is enough memory
15 * available. If new memory is added to the pool a lock has to be
16 * still taken. So any user relying on locklessness has to ensure
17 * that sufficient memory is preallocated.
18 *
19 * The basic atomic operation of this allocator is cmpxchg on long.
20 * On architectures that don't have NMI-safe cmpxchg implementation,
21 * the allocator can NOT be used in NMI handler. So code uses the
22 * allocator in NMI handler should depend on
23 * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
6 * 24 *
7 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> 25 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
8 * 26 *
@@ -13,8 +31,109 @@
13#include <linux/slab.h> 31#include <linux/slab.h>
14#include <linux/module.h> 32#include <linux/module.h>
15#include <linux/bitmap.h> 33#include <linux/bitmap.h>
34#include <linux/rculist.h>
35#include <linux/interrupt.h>
16#include <linux/genalloc.h> 36#include <linux/genalloc.h>
17 37
38static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
39{
40 unsigned long val, nval;
41
42 nval = *addr;
43 do {
44 val = nval;
45 if (val & mask_to_set)
46 return -EBUSY;
47 cpu_relax();
48 } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
49
50 return 0;
51}
52
53static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
54{
55 unsigned long val, nval;
56
57 nval = *addr;
58 do {
59 val = nval;
60 if ((val & mask_to_clear) != mask_to_clear)
61 return -EBUSY;
62 cpu_relax();
63 } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
64
65 return 0;
66}
67
68/*
69 * bitmap_set_ll - set the specified number of bits at the specified position
70 * @map: pointer to a bitmap
71 * @start: a bit position in @map
72 * @nr: number of bits to set
73 *
74 * Set @nr bits start from @start in @map lock-lessly. Several users
75 * can set/clear the same bitmap simultaneously without lock. If two
76 * users set the same bit, one user will return remain bits, otherwise
77 * return 0.
78 */
79static int bitmap_set_ll(unsigned long *map, int start, int nr)
80{
81 unsigned long *p = map + BIT_WORD(start);
82 const int size = start + nr;
83 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
84 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
85
86 while (nr - bits_to_set >= 0) {
87 if (set_bits_ll(p, mask_to_set))
88 return nr;
89 nr -= bits_to_set;
90 bits_to_set = BITS_PER_LONG;
91 mask_to_set = ~0UL;
92 p++;
93 }
94 if (nr) {
95 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
96 if (set_bits_ll(p, mask_to_set))
97 return nr;
98 }
99
100 return 0;
101}
102
103/*
104 * bitmap_clear_ll - clear the specified number of bits at the specified position
105 * @map: pointer to a bitmap
106 * @start: a bit position in @map
107 * @nr: number of bits to set
108 *
109 * Clear @nr bits start from @start in @map lock-lessly. Several users
110 * can set/clear the same bitmap simultaneously without lock. If two
111 * users clear the same bit, one user will return remain bits,
112 * otherwise return 0.
113 */
114static int bitmap_clear_ll(unsigned long *map, int start, int nr)
115{
116 unsigned long *p = map + BIT_WORD(start);
117 const int size = start + nr;
118 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
119 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
120
121 while (nr - bits_to_clear >= 0) {
122 if (clear_bits_ll(p, mask_to_clear))
123 return nr;
124 nr -= bits_to_clear;
125 bits_to_clear = BITS_PER_LONG;
126 mask_to_clear = ~0UL;
127 p++;
128 }
129 if (nr) {
130 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
131 if (clear_bits_ll(p, mask_to_clear))
132 return nr;
133 }
134
135 return 0;
136}
18 137
19/** 138/**
20 * gen_pool_create - create a new special memory pool 139 * gen_pool_create - create a new special memory pool
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
30 149
31 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); 150 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
32 if (pool != NULL) { 151 if (pool != NULL) {
33 rwlock_init(&pool->lock); 152 spin_lock_init(&pool->lock);
34 INIT_LIST_HEAD(&pool->chunks); 153 INIT_LIST_HEAD(&pool->chunks);
35 pool->min_alloc_order = min_alloc_order; 154 pool->min_alloc_order = min_alloc_order;
36 } 155 }
@@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
63 if (unlikely(chunk == NULL)) 182 if (unlikely(chunk == NULL))
64 return -ENOMEM; 183 return -ENOMEM;
65 184
66 spin_lock_init(&chunk->lock);
67 chunk->phys_addr = phys; 185 chunk->phys_addr = phys;
68 chunk->start_addr = virt; 186 chunk->start_addr = virt;
69 chunk->end_addr = virt + size; 187 chunk->end_addr = virt + size;
188 atomic_set(&chunk->avail, size);
70 189
71 write_lock(&pool->lock); 190 spin_lock(&pool->lock);
72 list_add(&chunk->next_chunk, &pool->chunks); 191 list_add_rcu(&chunk->next_chunk, &pool->chunks);
73 write_unlock(&pool->lock); 192 spin_unlock(&pool->lock);
74 193
75 return 0; 194 return 0;
76} 195}
@@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
85 */ 204 */
86phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) 205phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
87{ 206{
88 struct list_head *_chunk;
89 struct gen_pool_chunk *chunk; 207 struct gen_pool_chunk *chunk;
208 phys_addr_t paddr = -1;
90 209
91 read_lock(&pool->lock); 210 rcu_read_lock();
92 list_for_each(_chunk, &pool->chunks) { 211 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
93 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 212 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
94 213 paddr = chunk->phys_addr + (addr - chunk->start_addr);
95 if (addr >= chunk->start_addr && addr < chunk->end_addr) 214 break;
96 return chunk->phys_addr + addr - chunk->start_addr; 215 }
97 } 216 }
98 read_unlock(&pool->lock); 217 rcu_read_unlock();
99 218
100 return -1; 219 return paddr;
101} 220}
102EXPORT_SYMBOL(gen_pool_virt_to_phys); 221EXPORT_SYMBOL(gen_pool_virt_to_phys);
103 222
@@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
115 int order = pool->min_alloc_order; 234 int order = pool->min_alloc_order;
116 int bit, end_bit; 235 int bit, end_bit;
117 236
118
119 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { 237 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
120 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 238 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
121 list_del(&chunk->next_chunk); 239 list_del(&chunk->next_chunk);
@@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
137 * @size: number of bytes to allocate from the pool 255 * @size: number of bytes to allocate from the pool
138 * 256 *
139 * Allocate the requested number of bytes from the specified pool. 257 * Allocate the requested number of bytes from the specified pool.
140 * Uses a first-fit algorithm. 258 * Uses a first-fit algorithm. Can not be used in NMI handler on
259 * architectures without NMI-safe cmpxchg implementation.
141 */ 260 */
142unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 261unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
143{ 262{
144 struct list_head *_chunk;
145 struct gen_pool_chunk *chunk; 263 struct gen_pool_chunk *chunk;
146 unsigned long addr, flags; 264 unsigned long addr = 0;
147 int order = pool->min_alloc_order; 265 int order = pool->min_alloc_order;
148 int nbits, start_bit, end_bit; 266 int nbits, start_bit = 0, end_bit, remain;
267
268#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
269 BUG_ON(in_nmi());
270#endif
149 271
150 if (size == 0) 272 if (size == 0)
151 return 0; 273 return 0;
152 274
153 nbits = (size + (1UL << order) - 1) >> order; 275 nbits = (size + (1UL << order) - 1) >> order;
154 276 rcu_read_lock();
155 read_lock(&pool->lock); 277 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
156 list_for_each(_chunk, &pool->chunks) { 278 if (size > atomic_read(&chunk->avail))
157 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 279 continue;
158 280
159 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 281 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
160 282retry:
161 spin_lock_irqsave(&chunk->lock, flags); 283 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
162 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, 284 start_bit, nbits, 0);
163 nbits, 0); 285 if (start_bit >= end_bit)
164 if (start_bit >= end_bit) {
165 spin_unlock_irqrestore(&chunk->lock, flags);
166 continue; 286 continue;
287 remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
288 if (remain) {
289 remain = bitmap_clear_ll(chunk->bits, start_bit,
290 nbits - remain);
291 BUG_ON(remain);
292 goto retry;
167 } 293 }
168 294
169 addr = chunk->start_addr + ((unsigned long)start_bit << order); 295 addr = chunk->start_addr + ((unsigned long)start_bit << order);
170 296 size = nbits << order;
171 bitmap_set(chunk->bits, start_bit, nbits); 297 atomic_sub(size, &chunk->avail);
172 spin_unlock_irqrestore(&chunk->lock, flags); 298 break;
173 read_unlock(&pool->lock);
174 return addr;
175 } 299 }
176 read_unlock(&pool->lock); 300 rcu_read_unlock();
177 return 0; 301 return addr;
178} 302}
179EXPORT_SYMBOL(gen_pool_alloc); 303EXPORT_SYMBOL(gen_pool_alloc);
180 304
@@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
184 * @addr: starting address of memory to free back to pool 308 * @addr: starting address of memory to free back to pool
185 * @size: size in bytes of memory to free 309 * @size: size in bytes of memory to free
186 * 310 *
187 * Free previously allocated special memory back to the specified pool. 311 * Free previously allocated special memory back to the specified
312 * pool. Can not be used in NMI handler on architectures without
313 * NMI-safe cmpxchg implementation.
188 */ 314 */
189void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) 315void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
190{ 316{
191 struct list_head *_chunk;
192 struct gen_pool_chunk *chunk; 317 struct gen_pool_chunk *chunk;
193 unsigned long flags;
194 int order = pool->min_alloc_order; 318 int order = pool->min_alloc_order;
195 int bit, nbits; 319 int start_bit, nbits, remain;
196 320
197 nbits = (size + (1UL << order) - 1) >> order; 321#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
198 322 BUG_ON(in_nmi());
199 read_lock(&pool->lock); 323#endif
200 list_for_each(_chunk, &pool->chunks) {
201 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
202 324
325 nbits = (size + (1UL << order) - 1) >> order;
326 rcu_read_lock();
327 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
203 if (addr >= chunk->start_addr && addr < chunk->end_addr) { 328 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
204 BUG_ON(addr + size > chunk->end_addr); 329 BUG_ON(addr + size > chunk->end_addr);
205 spin_lock_irqsave(&chunk->lock, flags); 330 start_bit = (addr - chunk->start_addr) >> order;
206 bit = (addr - chunk->start_addr) >> order; 331 remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
207 while (nbits--) 332 BUG_ON(remain);
208 __clear_bit(bit++, chunk->bits); 333 size = nbits << order;
209 spin_unlock_irqrestore(&chunk->lock, flags); 334 atomic_add(size, &chunk->avail);
210 break; 335 rcu_read_unlock();
336 return;
211 } 337 }
212 } 338 }
213 BUG_ON(nbits > 0); 339 rcu_read_unlock();
214 read_unlock(&pool->lock); 340 BUG();
215} 341}
216EXPORT_SYMBOL(gen_pool_free); 342EXPORT_SYMBOL(gen_pool_free);
343
344/**
345 * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
346 * @pool: the generic memory pool
347 * @func: func to call
348 * @data: additional data used by @func
349 *
350 * Call @func for every chunk of generic memory pool. The @func is
351 * called with rcu_read_lock held.
352 */
353void gen_pool_for_each_chunk(struct gen_pool *pool,
354 void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
355 void *data)
356{
357 struct gen_pool_chunk *chunk;
358
359 rcu_read_lock();
360 list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
361 func(pool, chunk, data);
362 rcu_read_unlock();
363}
364EXPORT_SYMBOL(gen_pool_for_each_chunk);
365
366/**
367 * gen_pool_avail - get available free space of the pool
368 * @pool: pool to get available free space
369 *
370 * Return available free space of the specified pool.
371 */
372size_t gen_pool_avail(struct gen_pool *pool)
373{
374 struct gen_pool_chunk *chunk;
375 size_t avail = 0;
376
377 rcu_read_lock();
378 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
379 avail += atomic_read(&chunk->avail);
380 rcu_read_unlock();
381 return avail;
382}
383EXPORT_SYMBOL_GPL(gen_pool_avail);
384
385/**
386 * gen_pool_size - get size in bytes of memory managed by the pool
387 * @pool: pool to get size
388 *
389 * Return size in bytes of memory managed by the pool.
390 */
391size_t gen_pool_size(struct gen_pool *pool)
392{
393 struct gen_pool_chunk *chunk;
394 size_t size = 0;
395
396 rcu_read_lock();
397 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
398 size += chunk->end_addr - chunk->start_addr;
399 rcu_read_unlock();
400 return size;
401}
402EXPORT_SYMBOL_GPL(gen_pool_size);
diff --git a/lib/idr.c b/lib/idr.c
index e15502e8b21e..db040ce3fa73 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -34,8 +34,10 @@
34#include <linux/err.h> 34#include <linux/err.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/idr.h> 36#include <linux/idr.h>
37#include <linux/spinlock.h>
37 38
38static struct kmem_cache *idr_layer_cache; 39static struct kmem_cache *idr_layer_cache;
40static DEFINE_SPINLOCK(simple_ida_lock);
39 41
40static struct idr_layer *get_from_free_list(struct idr *idp) 42static struct idr_layer *get_from_free_list(struct idr *idp)
41{ 43{
@@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida)
926EXPORT_SYMBOL(ida_destroy); 928EXPORT_SYMBOL(ida_destroy);
927 929
928/** 930/**
931 * ida_simple_get - get a new id.
932 * @ida: the (initialized) ida.
933 * @start: the minimum id (inclusive, < 0x8000000)
934 * @end: the maximum id (exclusive, < 0x8000000 or 0)
935 * @gfp_mask: memory allocation flags
936 *
937 * Allocates an id in the range start <= id < end, or returns -ENOSPC.
938 * On memory allocation failure, returns -ENOMEM.
939 *
940 * Use ida_simple_remove() to get rid of an id.
941 */
942int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
943 gfp_t gfp_mask)
944{
945 int ret, id;
946 unsigned int max;
947
948 BUG_ON((int)start < 0);
949 BUG_ON((int)end < 0);
950
951 if (end == 0)
952 max = 0x80000000;
953 else {
954 BUG_ON(end < start);
955 max = end - 1;
956 }
957
958again:
959 if (!ida_pre_get(ida, gfp_mask))
960 return -ENOMEM;
961
962 spin_lock(&simple_ida_lock);
963 ret = ida_get_new_above(ida, start, &id);
964 if (!ret) {
965 if (id > max) {
966 ida_remove(ida, id);
967 ret = -ENOSPC;
968 } else {
969 ret = id;
970 }
971 }
972 spin_unlock(&simple_ida_lock);
973
974 if (unlikely(ret == -EAGAIN))
975 goto again;
976
977 return ret;
978}
979EXPORT_SYMBOL(ida_simple_get);
980
981/**
982 * ida_simple_remove - remove an allocated id.
983 * @ida: the (initialized) ida.
984 * @id: the id returned by ida_simple_get.
985 */
986void ida_simple_remove(struct ida *ida, unsigned int id)
987{
988 BUG_ON((int)id < 0);
989 spin_lock(&simple_ida_lock);
990 ida_remove(ida, id);
991 spin_unlock(&simple_ida_lock);
992}
993EXPORT_SYMBOL(ida_simple_remove);
994
995/**
929 * ida_init - initialize ida handle 996 * ida_init - initialize ida handle
930 * @ida: ida handle 997 * @ida: ida handle
931 * 998 *
diff --git a/lib/llist.c b/lib/llist.c
new file mode 100644
index 000000000000..da445724fa1f
--- /dev/null
+++ b/lib/llist.c
@@ -0,0 +1,129 @@
1/*
2 * Lock-less NULL terminated single linked list
3 *
4 * The basic atomic operation of this list is cmpxchg on long. On
5 * architectures that don't have NMI-safe cmpxchg implementation, the
6 * list can NOT be used in NMI handler. So code uses the list in NMI
7 * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
8 *
9 * Copyright 2010,2011 Intel Corp.
10 * Author: Huang Ying <ying.huang@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version
14 * 2 as published by the Free Software Foundation;
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/interrupt.h>
28#include <linux/llist.h>
29
30#include <asm/system.h>
31
32/**
33 * llist_add - add a new entry
34 * @new: new entry to be added
35 * @head: the head for your lock-less list
36 */
37void llist_add(struct llist_node *new, struct llist_head *head)
38{
39 struct llist_node *entry, *old_entry;
40
41#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
42 BUG_ON(in_nmi());
43#endif
44
45 entry = head->first;
46 do {
47 old_entry = entry;
48 new->next = entry;
49 cpu_relax();
50 } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
51}
52EXPORT_SYMBOL_GPL(llist_add);
53
54/**
55 * llist_add_batch - add several linked entries in batch
56 * @new_first: first entry in batch to be added
57 * @new_last: last entry in batch to be added
58 * @head: the head for your lock-less list
59 */
60void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
61 struct llist_head *head)
62{
63 struct llist_node *entry, *old_entry;
64
65#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
66 BUG_ON(in_nmi());
67#endif
68
69 entry = head->first;
70 do {
71 old_entry = entry;
72 new_last->next = entry;
73 cpu_relax();
74 } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
75}
76EXPORT_SYMBOL_GPL(llist_add_batch);
77
78/**
79 * llist_del_first - delete the first entry of lock-less list
80 * @head: the head for your lock-less list
81 *
82 * If list is empty, return NULL, otherwise, return the first entry
83 * deleted, this is the newest added one.
84 *
85 * Only one llist_del_first user can be used simultaneously with
86 * multiple llist_add users without lock. Because otherwise
87 * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
88 * llist_add) sequence in another user may change @head->first->next,
89 * but keep @head->first. If multiple consumers are needed, please
90 * use llist_del_all or use lock between consumers.
91 */
92struct llist_node *llist_del_first(struct llist_head *head)
93{
94 struct llist_node *entry, *old_entry, *next;
95
96#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
97 BUG_ON(in_nmi());
98#endif
99
100 entry = head->first;
101 do {
102 if (entry == NULL)
103 return NULL;
104 old_entry = entry;
105 next = entry->next;
106 cpu_relax();
107 } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
108
109 return entry;
110}
111EXPORT_SYMBOL_GPL(llist_del_first);
112
113/**
114 * llist_del_all - delete all entries from lock-less list
115 * @head: the head of lock-less list to delete all entries
116 *
117 * If list is empty, return NULL, otherwise, delete all entries and
118 * return the pointer to the first entry. The order of entries
119 * deleted is from the newest to the oldest added one.
120 */
121struct llist_node *llist_del_all(struct llist_head *head)
122{
123#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
124 BUG_ON(in_nmi());
125#endif
126
127 return xchg(&head->first, NULL);
128}
129EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7ea2e033d715..a2f9da59c197 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
823EXPORT_SYMBOL(radix_tree_prev_hole); 823EXPORT_SYMBOL(radix_tree_prev_hole);
824 824
825static unsigned int 825static unsigned int
826__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 826__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
827 unsigned int max_items, unsigned long *next_index) 827 unsigned long index, unsigned int max_items, unsigned long *next_index)
828{ 828{
829 unsigned int nr_found = 0; 829 unsigned int nr_found = 0;
830 unsigned int shift, height; 830 unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
857 857
858 /* Bottom level: grab some items */ 858 /* Bottom level: grab some items */
859 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 859 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
860 index++;
861 if (slot->slots[i]) { 860 if (slot->slots[i]) {
862 results[nr_found++] = &(slot->slots[i]); 861 results[nr_found] = &(slot->slots[i]);
863 if (nr_found == max_items) 862 if (indices)
863 indices[nr_found] = index;
864 if (++nr_found == max_items) {
865 index++;
864 goto out; 866 goto out;
867 }
865 } 868 }
869 index++;
866 } 870 }
867out: 871out:
868 *next_index = index; 872 *next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
918 922
919 if (cur_index > max_index) 923 if (cur_index > max_index)
920 break; 924 break;
921 slots_found = __lookup(node, (void ***)results + ret, cur_index, 925 slots_found = __lookup(node, (void ***)results + ret, NULL,
922 max_items - ret, &next_index); 926 cur_index, max_items - ret, &next_index);
923 nr_found = 0; 927 nr_found = 0;
924 for (i = 0; i < slots_found; i++) { 928 for (i = 0; i < slots_found; i++) {
925 struct radix_tree_node *slot; 929 struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
944 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 948 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
945 * @root: radix tree root 949 * @root: radix tree root
946 * @results: where the results of the lookup are placed 950 * @results: where the results of the lookup are placed
951 * @indices: where their indices should be placed (but usually NULL)
947 * @first_index: start the lookup from this key 952 * @first_index: start the lookup from this key
948 * @max_items: place up to this many items at *results 953 * @max_items: place up to this many items at *results
949 * 954 *
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
958 * protection, radix_tree_deref_slot may fail requiring a retry. 963 * protection, radix_tree_deref_slot may fail requiring a retry.
959 */ 964 */
960unsigned int 965unsigned int
961radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 966radix_tree_gang_lookup_slot(struct radix_tree_root *root,
967 void ***results, unsigned long *indices,
962 unsigned long first_index, unsigned int max_items) 968 unsigned long first_index, unsigned int max_items)
963{ 969{
964 unsigned long max_index; 970 unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
974 if (first_index > 0) 980 if (first_index > 0)
975 return 0; 981 return 0;
976 results[0] = (void **)&root->rnode; 982 results[0] = (void **)&root->rnode;
983 if (indices)
984 indices[0] = 0;
977 return 1; 985 return 1;
978 } 986 }
979 node = indirect_to_ptr(node); 987 node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
987 995
988 if (cur_index > max_index) 996 if (cur_index > max_index)
989 break; 997 break;
990 slots_found = __lookup(node, results + ret, cur_index, 998 slots_found = __lookup(node, results + ret,
991 max_items - ret, &next_index); 999 indices ? indices + ret : NULL,
1000 cur_index, max_items - ret, &next_index);
992 ret += slots_found; 1001 ret += slots_found;
993 if (next_index == 0) 1002 if (next_index == 0)
994 break; 1003 break;
@@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1194} 1203}
1195EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1204EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1196 1205
1206#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1207#include <linux/sched.h> /* for cond_resched() */
1208
1209/*
1210 * This linear search is at present only useful to shmem_unuse_inode().
1211 */
1212static unsigned long __locate(struct radix_tree_node *slot, void *item,
1213 unsigned long index, unsigned long *found_index)
1214{
1215 unsigned int shift, height;
1216 unsigned long i;
1217
1218 height = slot->height;
1219 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1220
1221 for ( ; height > 1; height--) {
1222 i = (index >> shift) & RADIX_TREE_MAP_MASK;
1223 for (;;) {
1224 if (slot->slots[i] != NULL)
1225 break;
1226 index &= ~((1UL << shift) - 1);
1227 index += 1UL << shift;
1228 if (index == 0)
1229 goto out; /* 32-bit wraparound */
1230 i++;
1231 if (i == RADIX_TREE_MAP_SIZE)
1232 goto out;
1233 }
1234
1235 shift -= RADIX_TREE_MAP_SHIFT;
1236 slot = rcu_dereference_raw(slot->slots[i]);
1237 if (slot == NULL)
1238 goto out;
1239 }
1240
1241 /* Bottom level: check items */
1242 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
1243 if (slot->slots[i] == item) {
1244 *found_index = index + i;
1245 index = 0;
1246 goto out;
1247 }
1248 }
1249 index += RADIX_TREE_MAP_SIZE;
1250out:
1251 return index;
1252}
1253
1254/**
1255 * radix_tree_locate_item - search through radix tree for item
1256 * @root: radix tree root
1257 * @item: item to be found
1258 *
1259 * Returns index where item was found, or -1 if not found.
1260 * Caller must hold no lock (since this time-consuming function needs
1261 * to be preemptible), and must check afterwards if item is still there.
1262 */
1263unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1264{
1265 struct radix_tree_node *node;
1266 unsigned long max_index;
1267 unsigned long cur_index = 0;
1268 unsigned long found_index = -1;
1269
1270 do {
1271 rcu_read_lock();
1272 node = rcu_dereference_raw(root->rnode);
1273 if (!radix_tree_is_indirect_ptr(node)) {
1274 rcu_read_unlock();
1275 if (node == item)
1276 found_index = 0;
1277 break;
1278 }
1279
1280 node = indirect_to_ptr(node);
1281 max_index = radix_tree_maxindex(node->height);
1282 if (cur_index > max_index)
1283 break;
1284
1285 cur_index = __locate(node, item, cur_index, &found_index);
1286 rcu_read_unlock();
1287 cond_resched();
1288 } while (cur_index != 0 && cur_index <= max_index);
1289
1290 return found_index;
1291}
1292#else
1293unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1294{
1295 return -1;
1296}
1297#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1197 1298
1198/** 1299/**
1199 * radix_tree_shrink - shrink height of a radix tree to minimal 1300 * radix_tree_shrink - shrink height of a radix tree to minimal
diff --git a/mm/failslab.c b/mm/failslab.c
index 1ce58c201dca..0dd7b8fec71c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
34#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 34#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
35static int __init failslab_debugfs_init(void) 35static int __init failslab_debugfs_init(void)
36{ 36{
37 struct dentry *dir;
37 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 38 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
38 int err;
39 39
40 err = init_fault_attr_dentries(&failslab.attr, "failslab"); 40 dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
41 if (err) 41 if (IS_ERR(dir))
42 return err; 42 return PTR_ERR(dir);
43 43
44 if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, 44 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
45 &failslab.ignore_gfp_wait)) 45 &failslab.ignore_gfp_wait))
46 goto fail; 46 goto fail;
47 if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, 47 if (!debugfs_create_bool("cache-filter", mode, dir,
48 &failslab.cache_filter)) 48 &failslab.cache_filter))
49 goto fail; 49 goto fail;
50 50
51 return 0; 51 return 0;
52fail: 52fail:
53 cleanup_fault_attr_dentries(&failslab.attr); 53 debugfs_remove_recursive(dir);
54 54
55 return -ENOMEM; 55 return -ENOMEM;
56} 56}
diff --git a/mm/filemap.c b/mm/filemap.c
index 867d40222ec7..645a080ba4df 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,7 +33,6 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
37#include <linux/cleancache.h> 36#include <linux/cleancache.h>
38#include "internal.h" 37#include "internal.h"
39 38
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
462 int error; 461 int error;
463 462
464 VM_BUG_ON(!PageLocked(page)); 463 VM_BUG_ON(!PageLocked(page));
464 VM_BUG_ON(PageSwapBacked(page));
465 465
466 error = mem_cgroup_cache_charge(page, current->mm, 466 error = mem_cgroup_cache_charge(page, current->mm,
467 gfp_mask & GFP_RECLAIM_MASK); 467 gfp_mask & GFP_RECLAIM_MASK);
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
479 if (likely(!error)) { 479 if (likely(!error)) {
480 mapping->nrpages++; 480 mapping->nrpages++;
481 __inc_zone_page_state(page, NR_FILE_PAGES); 481 __inc_zone_page_state(page, NR_FILE_PAGES);
482 if (PageSwapBacked(page))
483 __inc_zone_page_state(page, NR_SHMEM);
484 spin_unlock_irq(&mapping->tree_lock); 482 spin_unlock_irq(&mapping->tree_lock);
485 } else { 483 } else {
486 page->mapping = NULL; 484 page->mapping = NULL;
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
502{ 500{
503 int ret; 501 int ret;
504 502
505 /*
506 * Splice_read and readahead add shmem/tmpfs pages into the page cache
507 * before shmem_readpage has a chance to mark them as SwapBacked: they
508 * need to go on the anon lru below, and mem_cgroup_cache_charge
509 * (called in add_to_page_cache) needs to know where they're going too.
510 */
511 if (mapping_cap_swap_backed(mapping))
512 SetPageSwapBacked(page);
513
514 ret = add_to_page_cache(page, mapping, offset, gfp_mask); 503 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
515 if (ret == 0) { 504 if (ret == 0)
516 if (page_is_file_cache(page)) 505 lru_cache_add_file(page);
517 lru_cache_add_file(page);
518 else
519 lru_cache_add_anon(page);
520 }
521 return ret; 506 return ret;
522} 507}
523EXPORT_SYMBOL_GPL(add_to_page_cache_lru); 508EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -714,9 +699,16 @@ repeat:
714 page = radix_tree_deref_slot(pagep); 699 page = radix_tree_deref_slot(pagep);
715 if (unlikely(!page)) 700 if (unlikely(!page))
716 goto out; 701 goto out;
717 if (radix_tree_deref_retry(page)) 702 if (radix_tree_exception(page)) {
718 goto repeat; 703 if (radix_tree_deref_retry(page))
719 704 goto repeat;
705 /*
706 * Otherwise, shmem/tmpfs must be storing a swap entry
707 * here as an exceptional entry: so return it without
708 * attempting to raise page count.
709 */
710 goto out;
711 }
720 if (!page_cache_get_speculative(page)) 712 if (!page_cache_get_speculative(page))
721 goto repeat; 713 goto repeat;
722 714
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
753 745
754repeat: 746repeat:
755 page = find_get_page(mapping, offset); 747 page = find_get_page(mapping, offset);
756 if (page) { 748 if (page && !radix_tree_exception(page)) {
757 lock_page(page); 749 lock_page(page);
758 /* Has the page been truncated? */ 750 /* Has the page been truncated? */
759 if (unlikely(page->mapping != mapping)) { 751 if (unlikely(page->mapping != mapping)) {
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
840 rcu_read_lock(); 832 rcu_read_lock();
841restart: 833restart:
842 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 834 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
843 (void ***)pages, start, nr_pages); 835 (void ***)pages, NULL, start, nr_pages);
844 ret = 0; 836 ret = 0;
845 for (i = 0; i < nr_found; i++) { 837 for (i = 0; i < nr_found; i++) {
846 struct page *page; 838 struct page *page;
@@ -849,13 +841,22 @@ repeat:
849 if (unlikely(!page)) 841 if (unlikely(!page))
850 continue; 842 continue;
851 843
852 /* 844 if (radix_tree_exception(page)) {
853 * This can only trigger when the entry at index 0 moves out 845 if (radix_tree_deref_retry(page)) {
854 * of or back to the root: none yet gotten, safe to restart. 846 /*
855 */ 847 * Transient condition which can only trigger
856 if (radix_tree_deref_retry(page)) { 848 * when entry at index 0 moves out of or back
857 WARN_ON(start | i); 849 * to root: none yet gotten, safe to restart.
858 goto restart; 850 */
851 WARN_ON(start | i);
852 goto restart;
853 }
854 /*
855 * Otherwise, shmem/tmpfs must be storing a swap entry
856 * here as an exceptional entry: so skip over it -
857 * we only reach this from invalidate_mapping_pages().
858 */
859 continue;
859 } 860 }
860 861
861 if (!page_cache_get_speculative(page)) 862 if (!page_cache_get_speculative(page))
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
903 rcu_read_lock(); 904 rcu_read_lock();
904restart: 905restart:
905 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 906 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
906 (void ***)pages, index, nr_pages); 907 (void ***)pages, NULL, index, nr_pages);
907 ret = 0; 908 ret = 0;
908 for (i = 0; i < nr_found; i++) { 909 for (i = 0; i < nr_found; i++) {
909 struct page *page; 910 struct page *page;
@@ -912,12 +913,22 @@ repeat:
912 if (unlikely(!page)) 913 if (unlikely(!page))
913 continue; 914 continue;
914 915
915 /* 916 if (radix_tree_exception(page)) {
916 * This can only trigger when the entry at index 0 moves out 917 if (radix_tree_deref_retry(page)) {
917 * of or back to the root: none yet gotten, safe to restart. 918 /*
918 */ 919 * Transient condition which can only trigger
919 if (radix_tree_deref_retry(page)) 920 * when entry at index 0 moves out of or back
920 goto restart; 921 * to root: none yet gotten, safe to restart.
922 */
923 goto restart;
924 }
925 /*
926 * Otherwise, shmem/tmpfs must be storing a swap entry
927 * here as an exceptional entry: so stop looking for
928 * contiguous pages.
929 */
930 break;
931 }
921 932
922 if (!page_cache_get_speculative(page)) 933 if (!page_cache_get_speculative(page))
923 goto repeat; 934 goto repeat;
@@ -977,12 +988,21 @@ repeat:
977 if (unlikely(!page)) 988 if (unlikely(!page))
978 continue; 989 continue;
979 990
980 /* 991 if (radix_tree_exception(page)) {
981 * This can only trigger when the entry at index 0 moves out 992 if (radix_tree_deref_retry(page)) {
982 * of or back to the root: none yet gotten, safe to restart. 993 /*
983 */ 994 * Transient condition which can only trigger
984 if (radix_tree_deref_retry(page)) 995 * when entry at index 0 moves out of or back
985 goto restart; 996 * to root: none yet gotten, safe to restart.
997 */
998 goto restart;
999 }
1000 /*
1001 * This function is never used on a shmem/tmpfs
1002 * mapping, so a swap entry won't be found here.
1003 */
1004 BUG();
1005 }
986 1006
987 if (!page_cache_get_speculative(page)) 1007 if (!page_cache_get_speculative(page))
988 goto repeat; 1008 goto repeat;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5f84d2351ddb..f4ec4e7ca4cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,7 +35,6 @@
35#include <linux/limits.h> 35#include <linux/limits.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/rbtree.h> 37#include <linux/rbtree.h>
38#include <linux/shmem_fs.h>
39#include <linux/slab.h> 38#include <linux/slab.h>
40#include <linux/swap.h> 39#include <linux/swap.h>
41#include <linux/swapops.h> 40#include <linux/swapops.h>
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2873 return 0; 2872 return 0;
2874 if (PageCompound(page)) 2873 if (PageCompound(page))
2875 return 0; 2874 return 0;
2876 /*
2877 * Corner case handling. This is called from add_to_page_cache()
2878 * in usual. But some FS (shmem) precharges this page before calling it
2879 * and call add_to_page_cache() with GFP_NOWAIT.
2880 *
2881 * For GFP_NOWAIT case, the page may be pre-charged before calling
2882 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
2883 * charge twice. (It works but has to pay a bit larger cost.)
2884 * And when the page is SwapCache, it should take swap information
2885 * into account. This is under lock_page() now.
2886 */
2887 if (!(gfp_mask & __GFP_WAIT)) {
2888 struct page_cgroup *pc;
2889
2890 pc = lookup_page_cgroup(page);
2891 if (!pc)
2892 return 0;
2893 lock_page_cgroup(pc);
2894 if (PageCgroupUsed(pc)) {
2895 unlock_page_cgroup(pc);
2896 return 0;
2897 }
2898 unlock_page_cgroup(pc);
2899 }
2900 2875
2901 if (unlikely(!mm)) 2876 if (unlikely(!mm))
2902 mm = &init_mm; 2877 mm = &init_mm;
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
3486 cgroup_release_and_wakeup_rmdir(&mem->css); 3461 cgroup_release_and_wakeup_rmdir(&mem->css);
3487} 3462}
3488 3463
3489/*
3490 * A call to try to shrink memory usage on charge failure at shmem's swapin.
3491 * Calling hierarchical_reclaim is not enough because we should update
3492 * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
3493 * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
3494 * not from the memcg which this page would be charged to.
3495 * try_charge_swapin does all of these works properly.
3496 */
3497int mem_cgroup_shmem_charge_fallback(struct page *page,
3498 struct mm_struct *mm,
3499 gfp_t gfp_mask)
3500{
3501 struct mem_cgroup *mem;
3502 int ret;
3503
3504 if (mem_cgroup_disabled())
3505 return 0;
3506
3507 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
3508 if (!ret)
3509 mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
3510
3511 return ret;
3512}
3513
3514#ifdef CONFIG_DEBUG_VM 3464#ifdef CONFIG_DEBUG_VM
3515static struct page_cgroup *lookup_page_cgroup_used(struct page *page) 3465static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
3516{ 3466{
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5330 pgoff = pte_to_pgoff(ptent); 5280 pgoff = pte_to_pgoff(ptent);
5331 5281
5332 /* page is moved even if it's not RSS of this task(page-faulted). */ 5282 /* page is moved even if it's not RSS of this task(page-faulted). */
5333 if (!mapping_cap_swap_backed(mapping)) { /* normal file */ 5283 page = find_get_page(mapping, pgoff);
5334 page = find_get_page(mapping, pgoff); 5284
5335 } else { /* shmem/tmpfs file. we should take account of swap too. */ 5285#ifdef CONFIG_SWAP
5336 swp_entry_t ent; 5286 /* shmem/tmpfs may report page out on swap: account for that too. */
5337 mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); 5287 if (radix_tree_exceptional_entry(page)) {
5288 swp_entry_t swap = radix_to_swp_entry(page);
5338 if (do_swap_account) 5289 if (do_swap_account)
5339 entry->val = ent.val; 5290 *entry = swap;
5291 page = find_get_page(&swapper_space, swap.val);
5340 } 5292 }
5341 5293#endif
5342 return page; 5294 return page;
5343} 5295}
5344 5296
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 740c4f52059c..2b43ba051ac9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -53,6 +53,7 @@
53#include <linux/hugetlb.h> 53#include <linux/hugetlb.h>
54#include <linux/memory_hotplug.h> 54#include <linux/memory_hotplug.h>
55#include <linux/mm_inline.h> 55#include <linux/mm_inline.h>
56#include <linux/kfifo.h>
56#include "internal.h" 57#include "internal.h"
57 58
58int sysctl_memory_failure_early_kill __read_mostly = 0; 59int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
1178 __memory_failure(pfn, trapno, 0); 1179 __memory_failure(pfn, trapno, 0);
1179} 1180}
1180 1181
1182#define MEMORY_FAILURE_FIFO_ORDER 4
1183#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
1184
1185struct memory_failure_entry {
1186 unsigned long pfn;
1187 int trapno;
1188 int flags;
1189};
1190
1191struct memory_failure_cpu {
1192 DECLARE_KFIFO(fifo, struct memory_failure_entry,
1193 MEMORY_FAILURE_FIFO_SIZE);
1194 spinlock_t lock;
1195 struct work_struct work;
1196};
1197
1198static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
1199
1200/**
1201 * memory_failure_queue - Schedule handling memory failure of a page.
1202 * @pfn: Page Number of the corrupted page
1203 * @trapno: Trap number reported in the signal to user space.
1204 * @flags: Flags for memory failure handling
1205 *
1206 * This function is called by the low level hardware error handler
1207 * when it detects hardware memory corruption of a page. It schedules
1208 * the recovering of error page, including dropping pages, killing
1209 * processes etc.
1210 *
1211 * The function is primarily of use for corruptions that
1212 * happen outside the current execution context (e.g. when
1213 * detected by a background scrubber)
1214 *
1215 * Can run in IRQ context.
1216 */
1217void memory_failure_queue(unsigned long pfn, int trapno, int flags)
1218{
1219 struct memory_failure_cpu *mf_cpu;
1220 unsigned long proc_flags;
1221 struct memory_failure_entry entry = {
1222 .pfn = pfn,
1223 .trapno = trapno,
1224 .flags = flags,
1225 };
1226
1227 mf_cpu = &get_cpu_var(memory_failure_cpu);
1228 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1229 if (kfifo_put(&mf_cpu->fifo, &entry))
1230 schedule_work_on(smp_processor_id(), &mf_cpu->work);
1231 else
1232 pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
1233 pfn);
1234 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1235 put_cpu_var(memory_failure_cpu);
1236}
1237EXPORT_SYMBOL_GPL(memory_failure_queue);
1238
1239static void memory_failure_work_func(struct work_struct *work)
1240{
1241 struct memory_failure_cpu *mf_cpu;
1242 struct memory_failure_entry entry = { 0, };
1243 unsigned long proc_flags;
1244 int gotten;
1245
1246 mf_cpu = &__get_cpu_var(memory_failure_cpu);
1247 for (;;) {
1248 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1249 gotten = kfifo_get(&mf_cpu->fifo, &entry);
1250 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1251 if (!gotten)
1252 break;
1253 __memory_failure(entry.pfn, entry.trapno, entry.flags);
1254 }
1255}
1256
1257static int __init memory_failure_init(void)
1258{
1259 struct memory_failure_cpu *mf_cpu;
1260 int cpu;
1261
1262 for_each_possible_cpu(cpu) {
1263 mf_cpu = &per_cpu(memory_failure_cpu, cpu);
1264 spin_lock_init(&mf_cpu->lock);
1265 INIT_KFIFO(mf_cpu->fifo);
1266 INIT_WORK(&mf_cpu->work, memory_failure_work_func);
1267 }
1268
1269 return 0;
1270}
1271core_initcall(memory_failure_init);
1272
1181/** 1273/**
1182 * unpoison_memory - Unpoison a previously poisoned page 1274 * unpoison_memory - Unpoison a previously poisoned page
1183 * @pfn: Page number of the to be unpoisoned page 1275 * @pfn: Page number of the to be unpoisoned page
diff --git a/mm/mincore.c b/mm/mincore.c
index a4e6b9d75c76..636a86876ff2 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
69 * file will not get a swp_entry_t in its pte, but rather it is like 69 * file will not get a swp_entry_t in its pte, but rather it is like
70 * any other file mapping (ie. marked !present and faulted in with 70 * any other file mapping (ie. marked !present and faulted in with
71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here. 71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
72 *
73 * However when tmpfs moves the page from pagecache and into swapcache,
74 * it is still in core, but the find_get_page below won't find it.
75 * No big deal, but make a note of it.
76 */ 72 */
77 page = find_get_page(mapping, pgoff); 73 page = find_get_page(mapping, pgoff);
74#ifdef CONFIG_SWAP
75 /* shmem/tmpfs may return swap: account for swapcache page too. */
76 if (radix_tree_exceptional_entry(page)) {
77 swp_entry_t swap = radix_to_swp_entry(page);
78 page = find_get_page(&swapper_space, swap.val);
79 }
80#endif
78 if (page) { 81 if (page) {
79 present = PageUptodate(page); 82 present = PageUptodate(page);
80 page_cache_release(page); 83 page_cache_release(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1dbcf8888f14..6e8ecb6e021c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
1409{ 1409{
1410 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 1410 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
1411 struct dentry *dir; 1411 struct dentry *dir;
1412 int err;
1413 1412
1414 err = init_fault_attr_dentries(&fail_page_alloc.attr, 1413 dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
1415 "fail_page_alloc"); 1414 &fail_page_alloc.attr);
1416 if (err) 1415 if (IS_ERR(dir))
1417 return err; 1416 return PTR_ERR(dir);
1418
1419 dir = fail_page_alloc.attr.dir;
1420 1417
1421 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, 1418 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
1422 &fail_page_alloc.ignore_gfp_wait)) 1419 &fail_page_alloc.ignore_gfp_wait))
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
1430 1427
1431 return 0; 1428 return 0;
1432fail: 1429fail:
1433 cleanup_fault_attr_dentries(&fail_page_alloc.attr); 1430 debugfs_remove_recursive(dir);
1434 1431
1435 return -ENOMEM; 1432 return -ENOMEM;
1436} 1433}
diff --git a/mm/shmem.c b/mm/shmem.c
index 5cc21f8b4cd3..32f6763f16fb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -6,7 +6,8 @@
6 * 2000-2001 Christoph Rohland 6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG 7 * 2000-2001 SAP AG
8 * 2002 Red Hat Inc. 8 * 2002 Red Hat Inc.
9 * Copyright (C) 2002-2005 Hugh Dickins. 9 * Copyright (C) 2002-2011 Hugh Dickins.
10 * Copyright (C) 2011 Google Inc.
10 * Copyright (C) 2002-2005 VERITAS Software Corporation. 11 * Copyright (C) 2002-2005 VERITAS Software Corporation.
11 * Copyright (C) 2004 Andi Kleen, SuSE Labs 12 * Copyright (C) 2004 Andi Kleen, SuSE Labs
12 * 13 *
@@ -28,7 +29,6 @@
28#include <linux/file.h> 29#include <linux/file.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
30#include <linux/module.h> 31#include <linux/module.h>
31#include <linux/percpu_counter.h>
32#include <linux/swap.h> 32#include <linux/swap.h>
33 33
34static struct vfsmount *shm_mnt; 34static struct vfsmount *shm_mnt;
@@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt;
51#include <linux/shmem_fs.h> 51#include <linux/shmem_fs.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/blkdev.h> 53#include <linux/blkdev.h>
54#include <linux/pagevec.h>
55#include <linux/percpu_counter.h>
54#include <linux/splice.h> 56#include <linux/splice.h>
55#include <linux/security.h> 57#include <linux/security.h>
56#include <linux/swapops.h> 58#include <linux/swapops.h>
@@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt;
63#include <linux/magic.h> 65#include <linux/magic.h>
64 66
65#include <asm/uaccess.h> 67#include <asm/uaccess.h>
66#include <asm/div64.h>
67#include <asm/pgtable.h> 68#include <asm/pgtable.h>
68 69
69/*
70 * The maximum size of a shmem/tmpfs file is limited by the maximum size of
71 * its triple-indirect swap vector - see illustration at shmem_swp_entry().
72 *
73 * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
74 * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
75 * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
76 * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
77 *
78 * We use / and * instead of shifts in the definitions below, so that the swap
79 * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
80 */
81#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
82#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
83
84#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
85#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
86
87#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
88#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
89
90#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 70#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
91#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 71#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
92 72
93/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
94#define SHMEM_PAGEIN VM_READ
95#define SHMEM_TRUNCATE VM_WRITE
96
97/* Definition to limit shmem_truncate's steps between cond_rescheds */
98#define LATENCY_LIMIT 64
99
100/* Pretend that each entry is of this size in directory's i_size */ 73/* Pretend that each entry is of this size in directory's i_size */
101#define BOGO_DIRENT_SIZE 20 74#define BOGO_DIRENT_SIZE 20
102 75
76/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
77#define SHORT_SYMLINK_LEN 128
78
103struct shmem_xattr { 79struct shmem_xattr {
104 struct list_head list; /* anchored by shmem_inode_info->xattr_list */ 80 struct list_head list; /* anchored by shmem_inode_info->xattr_list */
105 char *name; /* xattr name */ 81 char *name; /* xattr name */
@@ -107,7 +83,7 @@ struct shmem_xattr {
107 char value[0]; 83 char value[0];
108}; 84};
109 85
110/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ 86/* Flag allocation requirements to shmem_getpage */
111enum sgp_type { 87enum sgp_type {
112 SGP_READ, /* don't exceed i_size, don't allocate page */ 88 SGP_READ, /* don't exceed i_size, don't allocate page */
113 SGP_CACHE, /* don't exceed i_size, may allocate page */ 89 SGP_CACHE, /* don't exceed i_size, may allocate page */
@@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
137 mapping_gfp_mask(inode->i_mapping), fault_type); 113 mapping_gfp_mask(inode->i_mapping), fault_type);
138} 114}
139 115
140static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
141{
142 /*
143 * The above definition of ENTRIES_PER_PAGE, and the use of
144 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
145 * might be reconsidered if it ever diverges from PAGE_SIZE.
146 *
147 * Mobility flags are masked out as swap vectors cannot move
148 */
149 return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
150 PAGE_CACHE_SHIFT-PAGE_SHIFT);
151}
152
153static inline void shmem_dir_free(struct page *page)
154{
155 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
156}
157
158static struct page **shmem_dir_map(struct page *page)
159{
160 return (struct page **)kmap_atomic(page, KM_USER0);
161}
162
163static inline void shmem_dir_unmap(struct page **dir)
164{
165 kunmap_atomic(dir, KM_USER0);
166}
167
168static swp_entry_t *shmem_swp_map(struct page *page)
169{
170 return (swp_entry_t *)kmap_atomic(page, KM_USER1);
171}
172
173static inline void shmem_swp_balance_unmap(void)
174{
175 /*
176 * When passing a pointer to an i_direct entry, to code which
177 * also handles indirect entries and so will shmem_swp_unmap,
178 * we must arrange for the preempt count to remain in balance.
179 * What kmap_atomic of a lowmem page does depends on config
180 * and architecture, so pretend to kmap_atomic some lowmem page.
181 */
182 (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
183}
184
185static inline void shmem_swp_unmap(swp_entry_t *entry)
186{
187 kunmap_atomic(entry, KM_USER1);
188}
189
190static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 116static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
191{ 117{
192 return sb->s_fs_info; 118 return sb->s_fs_info;
@@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
244static LIST_HEAD(shmem_swaplist); 170static LIST_HEAD(shmem_swaplist);
245static DEFINE_MUTEX(shmem_swaplist_mutex); 171static DEFINE_MUTEX(shmem_swaplist_mutex);
246 172
247static void shmem_free_blocks(struct inode *inode, long pages)
248{
249 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
250 if (sbinfo->max_blocks) {
251 percpu_counter_add(&sbinfo->used_blocks, -pages);
252 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
253 }
254}
255
256static int shmem_reserve_inode(struct super_block *sb) 173static int shmem_reserve_inode(struct super_block *sb)
257{ 174{
258 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 175 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb)
279} 196}
280 197
281/** 198/**
282 * shmem_recalc_inode - recalculate the size of an inode 199 * shmem_recalc_inode - recalculate the block usage of an inode
283 * @inode: inode to recalc 200 * @inode: inode to recalc
284 * 201 *
285 * We have to calculate the free blocks since the mm can drop 202 * We have to calculate the free blocks since the mm can drop
@@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode)
297 214
298 freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 215 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
299 if (freed > 0) { 216 if (freed > 0) {
217 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
218 if (sbinfo->max_blocks)
219 percpu_counter_add(&sbinfo->used_blocks, -freed);
300 info->alloced -= freed; 220 info->alloced -= freed;
221 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
301 shmem_unacct_blocks(info->flags, freed); 222 shmem_unacct_blocks(info->flags, freed);
302 shmem_free_blocks(inode, freed);
303 } 223 }
304} 224}
305 225
306/** 226/*
307 * shmem_swp_entry - find the swap vector position in the info structure 227 * Replace item expected in radix tree by a new item, while holding tree lock.
308 * @info: info structure for the inode
309 * @index: index of the page to find
310 * @page: optional page to add to the structure. Has to be preset to
311 * all zeros
312 *
313 * If there is no space allocated yet it will return NULL when
314 * page is NULL, else it will use the page for the needed block,
315 * setting it to NULL on return to indicate that it has been used.
316 *
317 * The swap vector is organized the following way:
318 *
319 * There are SHMEM_NR_DIRECT entries directly stored in the
320 * shmem_inode_info structure. So small files do not need an addional
321 * allocation.
322 *
323 * For pages with index > SHMEM_NR_DIRECT there is the pointer
324 * i_indirect which points to a page which holds in the first half
325 * doubly indirect blocks, in the second half triple indirect blocks:
326 *
327 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
328 * following layout (for SHMEM_NR_DIRECT == 16):
329 *
330 * i_indirect -> dir --> 16-19
331 * | +-> 20-23
332 * |
333 * +-->dir2 --> 24-27
334 * | +-> 28-31
335 * | +-> 32-35
336 * | +-> 36-39
337 * |
338 * +-->dir3 --> 40-43
339 * +-> 44-47
340 * +-> 48-51
341 * +-> 52-55
342 */ 228 */
343static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) 229static int shmem_radix_tree_replace(struct address_space *mapping,
344{ 230 pgoff_t index, void *expected, void *replacement)
345 unsigned long offset; 231{
346 struct page **dir; 232 void **pslot;
347 struct page *subdir; 233 void *item = NULL;
348 234
349 if (index < SHMEM_NR_DIRECT) { 235 VM_BUG_ON(!expected);
350 shmem_swp_balance_unmap(); 236 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
351 return info->i_direct+index; 237 if (pslot)
352 } 238 item = radix_tree_deref_slot_protected(pslot,
353 if (!info->i_indirect) { 239 &mapping->tree_lock);
354 if (page) { 240 if (item != expected)
355 info->i_indirect = *page; 241 return -ENOENT;
356 *page = NULL; 242 if (replacement)
357 } 243 radix_tree_replace_slot(pslot, replacement);
358 return NULL; /* need another page */ 244 else
359 } 245 radix_tree_delete(&mapping->page_tree, index);
360 246 return 0;
361 index -= SHMEM_NR_DIRECT; 247}
362 offset = index % ENTRIES_PER_PAGE;
363 index /= ENTRIES_PER_PAGE;
364 dir = shmem_dir_map(info->i_indirect);
365
366 if (index >= ENTRIES_PER_PAGE/2) {
367 index -= ENTRIES_PER_PAGE/2;
368 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
369 index %= ENTRIES_PER_PAGE;
370 subdir = *dir;
371 if (!subdir) {
372 if (page) {
373 *dir = *page;
374 *page = NULL;
375 }
376 shmem_dir_unmap(dir);
377 return NULL; /* need another page */
378 }
379 shmem_dir_unmap(dir);
380 dir = shmem_dir_map(subdir);
381 }
382 248
383 dir += index; 249/*
384 subdir = *dir; 250 * Like add_to_page_cache_locked, but error if expected item has gone.
385 if (!subdir) { 251 */
386 if (!page || !(subdir = *page)) { 252static int shmem_add_to_page_cache(struct page *page,
387 shmem_dir_unmap(dir); 253 struct address_space *mapping,
388 return NULL; /* need a page */ 254 pgoff_t index, gfp_t gfp, void *expected)
255{
256 int error = 0;
257
258 VM_BUG_ON(!PageLocked(page));
259 VM_BUG_ON(!PageSwapBacked(page));
260
261 if (!expected)
262 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
263 if (!error) {
264 page_cache_get(page);
265 page->mapping = mapping;
266 page->index = index;
267
268 spin_lock_irq(&mapping->tree_lock);
269 if (!expected)
270 error = radix_tree_insert(&mapping->page_tree,
271 index, page);
272 else
273 error = shmem_radix_tree_replace(mapping, index,
274 expected, page);
275 if (!error) {
276 mapping->nrpages++;
277 __inc_zone_page_state(page, NR_FILE_PAGES);
278 __inc_zone_page_state(page, NR_SHMEM);
279 spin_unlock_irq(&mapping->tree_lock);
280 } else {
281 page->mapping = NULL;
282 spin_unlock_irq(&mapping->tree_lock);
283 page_cache_release(page);
389 } 284 }
390 *dir = subdir; 285 if (!expected)
391 *page = NULL; 286 radix_tree_preload_end();
392 } 287 }
393 shmem_dir_unmap(dir); 288 if (error)
394 return shmem_swp_map(subdir) + offset; 289 mem_cgroup_uncharge_cache_page(page);
290 return error;
395} 291}
396 292
397static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) 293/*
294 * Like delete_from_page_cache, but substitutes swap for page.
295 */
296static void shmem_delete_from_page_cache(struct page *page, void *radswap)
398{ 297{
399 long incdec = value? 1: -1; 298 struct address_space *mapping = page->mapping;
299 int error;
400 300
401 entry->val = value; 301 spin_lock_irq(&mapping->tree_lock);
402 info->swapped += incdec; 302 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
403 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { 303 page->mapping = NULL;
404 struct page *page = kmap_atomic_to_page(entry); 304 mapping->nrpages--;
405 set_page_private(page, page_private(page) + incdec); 305 __dec_zone_page_state(page, NR_FILE_PAGES);
406 } 306 __dec_zone_page_state(page, NR_SHMEM);
307 spin_unlock_irq(&mapping->tree_lock);
308 page_cache_release(page);
309 BUG_ON(error);
407} 310}
408 311
409/** 312/*
410 * shmem_swp_alloc - get the position of the swap entry for the page. 313 * Like find_get_pages, but collecting swap entries as well as pages.
411 * @info: info structure for the inode
412 * @index: index of the page to find
413 * @sgp: check and recheck i_size? skip allocation?
414 * @gfp: gfp mask to use for any page allocation
415 *
416 * If the entry does not exist, allocate it.
417 */ 314 */
418static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, 315static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
419 unsigned long index, enum sgp_type sgp, gfp_t gfp) 316 pgoff_t start, unsigned int nr_pages,
420{ 317 struct page **pages, pgoff_t *indices)
421 struct inode *inode = &info->vfs_inode; 318{
422 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 319 unsigned int i;
423 struct page *page = NULL; 320 unsigned int ret;
424 swp_entry_t *entry; 321 unsigned int nr_found;
425 322
426 if (sgp != SGP_WRITE && 323 rcu_read_lock();
427 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 324restart:
428 return ERR_PTR(-EINVAL); 325 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
429 326 (void ***)pages, indices, start, nr_pages);
430 while (!(entry = shmem_swp_entry(info, index, &page))) { 327 ret = 0;
431 if (sgp == SGP_READ) 328 for (i = 0; i < nr_found; i++) {
432 return shmem_swp_map(ZERO_PAGE(0)); 329 struct page *page;
433 /* 330repeat:
434 * Test used_blocks against 1 less max_blocks, since we have 1 data 331 page = radix_tree_deref_slot((void **)pages[i]);
435 * page (and perhaps indirect index pages) yet to allocate: 332 if (unlikely(!page))
436 * a waste to allocate index if we cannot allocate data. 333 continue;
437 */ 334 if (radix_tree_exception(page)) {
438 if (sbinfo->max_blocks) { 335 if (radix_tree_deref_retry(page))
439 if (percpu_counter_compare(&sbinfo->used_blocks, 336 goto restart;
440 sbinfo->max_blocks - 1) >= 0) 337 /*
441 return ERR_PTR(-ENOSPC); 338 * Otherwise, we must be storing a swap entry
442 percpu_counter_inc(&sbinfo->used_blocks); 339 * here as an exceptional entry: so return it
443 inode->i_blocks += BLOCKS_PER_PAGE; 340 * without attempting to raise page count.
341 */
342 goto export;
444 } 343 }
344 if (!page_cache_get_speculative(page))
345 goto repeat;
445 346
446 spin_unlock(&info->lock); 347 /* Has the page moved? */
447 page = shmem_dir_alloc(gfp); 348 if (unlikely(page != *((void **)pages[i]))) {
448 spin_lock(&info->lock); 349 page_cache_release(page);
449 350 goto repeat;
450 if (!page) {
451 shmem_free_blocks(inode, 1);
452 return ERR_PTR(-ENOMEM);
453 }
454 if (sgp != SGP_WRITE &&
455 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
456 entry = ERR_PTR(-EINVAL);
457 break;
458 } 351 }
459 if (info->next_index <= index) 352export:
460 info->next_index = index + 1; 353 indices[ret] = indices[i];
461 } 354 pages[ret] = page;
462 if (page) { 355 ret++;
463 /* another task gave its page, or truncated the file */ 356 }
464 shmem_free_blocks(inode, 1); 357 if (unlikely(!ret && nr_found))
465 shmem_dir_free(page); 358 goto restart;
466 } 359 rcu_read_unlock();
467 if (info->next_index <= index && !IS_ERR(entry)) 360 return ret;
468 info->next_index = index + 1;
469 return entry;
470} 361}
471 362
472/** 363/*
473 * shmem_free_swp - free some swap entries in a directory 364 * Remove swap entry from radix tree, free the swap and its page cache.
474 * @dir: pointer to the directory
475 * @edir: pointer after last entry of the directory
476 * @punch_lock: pointer to spinlock when needed for the holepunch case
477 */ 365 */
478static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, 366static int shmem_free_swap(struct address_space *mapping,
479 spinlock_t *punch_lock) 367 pgoff_t index, void *radswap)
480{ 368{
481 spinlock_t *punch_unlock = NULL; 369 int error;
482 swp_entry_t *ptr; 370
483 int freed = 0; 371 spin_lock_irq(&mapping->tree_lock);
484 372 error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
485 for (ptr = dir; ptr < edir; ptr++) { 373 spin_unlock_irq(&mapping->tree_lock);
486 if (ptr->val) { 374 if (!error)
487 if (unlikely(punch_lock)) { 375 free_swap_and_cache(radix_to_swp_entry(radswap));
488 punch_unlock = punch_lock; 376 return error;
489 punch_lock = NULL;
490 spin_lock(punch_unlock);
491 if (!ptr->val)
492 continue;
493 }
494 free_swap_and_cache(*ptr);
495 *ptr = (swp_entry_t){0};
496 freed++;
497 }
498 }
499 if (punch_unlock)
500 spin_unlock(punch_unlock);
501 return freed;
502}
503
504static int shmem_map_and_free_swp(struct page *subdir, int offset,
505 int limit, struct page ***dir, spinlock_t *punch_lock)
506{
507 swp_entry_t *ptr;
508 int freed = 0;
509
510 ptr = shmem_swp_map(subdir);
511 for (; offset < limit; offset += LATENCY_LIMIT) {
512 int size = limit - offset;
513 if (size > LATENCY_LIMIT)
514 size = LATENCY_LIMIT;
515 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
516 punch_lock);
517 if (need_resched()) {
518 shmem_swp_unmap(ptr);
519 if (*dir) {
520 shmem_dir_unmap(*dir);
521 *dir = NULL;
522 }
523 cond_resched();
524 ptr = shmem_swp_map(subdir);
525 }
526 }
527 shmem_swp_unmap(ptr);
528 return freed;
529} 377}
530 378
531static void shmem_free_pages(struct list_head *next) 379/*
380 * Pagevec may contain swap entries, so shuffle up pages before releasing.
381 */
382static void shmem_pagevec_release(struct pagevec *pvec)
532{ 383{
533 struct page *page; 384 int i, j;
534 int freed = 0; 385
535 386 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
536 do { 387 struct page *page = pvec->pages[i];
537 page = container_of(next, struct page, lru); 388 if (!radix_tree_exceptional_entry(page))
538 next = next->next; 389 pvec->pages[j++] = page;
539 shmem_dir_free(page); 390 }
540 freed++; 391 pvec->nr = j;
541 if (freed >= LATENCY_LIMIT) { 392 pagevec_release(pvec);
542 cond_resched();
543 freed = 0;
544 }
545 } while (next);
546} 393}
547 394
548void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) 395/*
396 * Remove range of pages and swap entries from radix tree, and free them.
397 */
398void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
549{ 399{
400 struct address_space *mapping = inode->i_mapping;
550 struct shmem_inode_info *info = SHMEM_I(inode); 401 struct shmem_inode_info *info = SHMEM_I(inode);
551 unsigned long idx; 402 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
552 unsigned long size; 403 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
553 unsigned long limit; 404 pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
554 unsigned long stage; 405 struct pagevec pvec;
555 unsigned long diroff; 406 pgoff_t indices[PAGEVEC_SIZE];
556 struct page **dir;
557 struct page *topdir;
558 struct page *middir;
559 struct page *subdir;
560 swp_entry_t *ptr;
561 LIST_HEAD(pages_to_free);
562 long nr_pages_to_free = 0;
563 long nr_swaps_freed = 0; 407 long nr_swaps_freed = 0;
564 int offset; 408 pgoff_t index;
565 int freed; 409 int i;
566 int punch_hole;
567 spinlock_t *needs_lock;
568 spinlock_t *punch_lock;
569 unsigned long upper_limit;
570 410
571 truncate_inode_pages_range(inode->i_mapping, start, end); 411 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
572 412
573 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 413 pagevec_init(&pvec, 0);
574 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 414 index = start;
575 if (idx >= info->next_index) 415 while (index <= end) {
576 return; 416 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
417 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
418 pvec.pages, indices);
419 if (!pvec.nr)
420 break;
421 mem_cgroup_uncharge_start();
422 for (i = 0; i < pagevec_count(&pvec); i++) {
423 struct page *page = pvec.pages[i];
577 424
578 spin_lock(&info->lock); 425 index = indices[i];
579 info->flags |= SHMEM_TRUNCATE; 426 if (index > end)
580 if (likely(end == (loff_t) -1)) { 427 break;
581 limit = info->next_index; 428
582 upper_limit = SHMEM_MAX_INDEX; 429 if (radix_tree_exceptional_entry(page)) {
583 info->next_index = idx; 430 nr_swaps_freed += !shmem_free_swap(mapping,
584 needs_lock = NULL; 431 index, page);
585 punch_hole = 0; 432 continue;
586 } else { 433 }
587 if (end + 1 >= inode->i_size) { /* we may free a little more */
588 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
589 PAGE_CACHE_SHIFT;
590 upper_limit = SHMEM_MAX_INDEX;
591 } else {
592 limit = (end + 1) >> PAGE_CACHE_SHIFT;
593 upper_limit = limit;
594 }
595 needs_lock = &info->lock;
596 punch_hole = 1;
597 }
598 434
599 topdir = info->i_indirect; 435 if (!trylock_page(page))
600 if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { 436 continue;
601 info->i_indirect = NULL; 437 if (page->mapping == mapping) {
602 nr_pages_to_free++; 438 VM_BUG_ON(PageWriteback(page));
603 list_add(&topdir->lru, &pages_to_free); 439 truncate_inode_page(mapping, page);
440 }
441 unlock_page(page);
442 }
443 shmem_pagevec_release(&pvec);
444 mem_cgroup_uncharge_end();
445 cond_resched();
446 index++;
604 } 447 }
605 spin_unlock(&info->lock);
606 448
607 if (info->swapped && idx < SHMEM_NR_DIRECT) { 449 if (partial) {
608 ptr = info->i_direct; 450 struct page *page = NULL;
609 size = limit; 451 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
610 if (size > SHMEM_NR_DIRECT) 452 if (page) {
611 size = SHMEM_NR_DIRECT; 453 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
612 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); 454 set_page_dirty(page);
455 unlock_page(page);
456 page_cache_release(page);
457 }
613 } 458 }
614 459
615 /* 460 index = start;
616 * If there are no indirect blocks or we are punching a hole 461 for ( ; ; ) {
617 * below indirect blocks, nothing to be done. 462 cond_resched();
618 */ 463 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
619 if (!topdir || limit <= SHMEM_NR_DIRECT) 464 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
620 goto done2; 465 pvec.pages, indices);
466 if (!pvec.nr) {
467 if (index == start)
468 break;
469 index = start;
470 continue;
471 }
472 if (index == start && indices[0] > end) {
473 shmem_pagevec_release(&pvec);
474 break;
475 }
476 mem_cgroup_uncharge_start();
477 for (i = 0; i < pagevec_count(&pvec); i++) {
478 struct page *page = pvec.pages[i];
621 479
622 /* 480 index = indices[i];
623 * The truncation case has already dropped info->lock, and we're safe 481 if (index > end)
624 * because i_size and next_index have already been lowered, preventing 482 break;
625 * access beyond. But in the punch_hole case, we still need to take
626 * the lock when updating the swap directory, because there might be
627 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
628 * shmem_writepage. However, whenever we find we can remove a whole
629 * directory page (not at the misaligned start or end of the range),
630 * we first NULLify its pointer in the level above, and then have no
631 * need to take the lock when updating its contents: needs_lock and
632 * punch_lock (either pointing to info->lock or NULL) manage this.
633 */
634 483
635 upper_limit -= SHMEM_NR_DIRECT; 484 if (radix_tree_exceptional_entry(page)) {
636 limit -= SHMEM_NR_DIRECT; 485 nr_swaps_freed += !shmem_free_swap(mapping,
637 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 486 index, page);
638 offset = idx % ENTRIES_PER_PAGE; 487 continue;
639 idx -= offset;
640
641 dir = shmem_dir_map(topdir);
642 stage = ENTRIES_PER_PAGEPAGE/2;
643 if (idx < ENTRIES_PER_PAGEPAGE/2) {
644 middir = topdir;
645 diroff = idx/ENTRIES_PER_PAGE;
646 } else {
647 dir += ENTRIES_PER_PAGE/2;
648 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
649 while (stage <= idx)
650 stage += ENTRIES_PER_PAGEPAGE;
651 middir = *dir;
652 if (*dir) {
653 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
654 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
655 if (!diroff && !offset && upper_limit >= stage) {
656 if (needs_lock) {
657 spin_lock(needs_lock);
658 *dir = NULL;
659 spin_unlock(needs_lock);
660 needs_lock = NULL;
661 } else
662 *dir = NULL;
663 nr_pages_to_free++;
664 list_add(&middir->lru, &pages_to_free);
665 } 488 }
666 shmem_dir_unmap(dir);
667 dir = shmem_dir_map(middir);
668 } else {
669 diroff = 0;
670 offset = 0;
671 idx = stage;
672 }
673 }
674 489
675 for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { 490 lock_page(page);
676 if (unlikely(idx == stage)) { 491 if (page->mapping == mapping) {
677 shmem_dir_unmap(dir); 492 VM_BUG_ON(PageWriteback(page));
678 dir = shmem_dir_map(topdir) + 493 truncate_inode_page(mapping, page);
679 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
680 while (!*dir) {
681 dir++;
682 idx += ENTRIES_PER_PAGEPAGE;
683 if (idx >= limit)
684 goto done1;
685 }
686 stage = idx + ENTRIES_PER_PAGEPAGE;
687 middir = *dir;
688 if (punch_hole)
689 needs_lock = &info->lock;
690 if (upper_limit >= stage) {
691 if (needs_lock) {
692 spin_lock(needs_lock);
693 *dir = NULL;
694 spin_unlock(needs_lock);
695 needs_lock = NULL;
696 } else
697 *dir = NULL;
698 nr_pages_to_free++;
699 list_add(&middir->lru, &pages_to_free);
700 } 494 }
701 shmem_dir_unmap(dir); 495 unlock_page(page);
702 cond_resched();
703 dir = shmem_dir_map(middir);
704 diroff = 0;
705 }
706 punch_lock = needs_lock;
707 subdir = dir[diroff];
708 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
709 if (needs_lock) {
710 spin_lock(needs_lock);
711 dir[diroff] = NULL;
712 spin_unlock(needs_lock);
713 punch_lock = NULL;
714 } else
715 dir[diroff] = NULL;
716 nr_pages_to_free++;
717 list_add(&subdir->lru, &pages_to_free);
718 }
719 if (subdir && page_private(subdir) /* has swap entries */) {
720 size = limit - idx;
721 if (size > ENTRIES_PER_PAGE)
722 size = ENTRIES_PER_PAGE;
723 freed = shmem_map_and_free_swp(subdir,
724 offset, size, &dir, punch_lock);
725 if (!dir)
726 dir = shmem_dir_map(middir);
727 nr_swaps_freed += freed;
728 if (offset || punch_lock) {
729 spin_lock(&info->lock);
730 set_page_private(subdir,
731 page_private(subdir) - freed);
732 spin_unlock(&info->lock);
733 } else
734 BUG_ON(page_private(subdir) != freed);
735 } 496 }
736 offset = 0; 497 shmem_pagevec_release(&pvec);
737 } 498 mem_cgroup_uncharge_end();
738done1: 499 index++;
739 shmem_dir_unmap(dir);
740done2:
741 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
742 /*
743 * Call truncate_inode_pages again: racing shmem_unuse_inode
744 * may have swizzled a page in from swap since
745 * truncate_pagecache or generic_delete_inode did it, before we
746 * lowered next_index. Also, though shmem_getpage checks
747 * i_size before adding to cache, no recheck after: so fix the
748 * narrow window there too.
749 */
750 truncate_inode_pages_range(inode->i_mapping, start, end);
751 } 500 }
752 501
753 spin_lock(&info->lock); 502 spin_lock(&info->lock);
754 info->flags &= ~SHMEM_TRUNCATE;
755 info->swapped -= nr_swaps_freed; 503 info->swapped -= nr_swaps_freed;
756 if (nr_pages_to_free)
757 shmem_free_blocks(inode, nr_pages_to_free);
758 shmem_recalc_inode(inode); 504 shmem_recalc_inode(inode);
759 spin_unlock(&info->lock); 505 spin_unlock(&info->lock);
760 506
761 /* 507 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
762 * Empty swap vector directory pages to be freed?
763 */
764 if (!list_empty(&pages_to_free)) {
765 pages_to_free.prev->next = NULL;
766 shmem_free_pages(pages_to_free.next);
767 }
768} 508}
769EXPORT_SYMBOL_GPL(shmem_truncate_range); 509EXPORT_SYMBOL_GPL(shmem_truncate_range);
770 510
@@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
780 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 520 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
781 loff_t oldsize = inode->i_size; 521 loff_t oldsize = inode->i_size;
782 loff_t newsize = attr->ia_size; 522 loff_t newsize = attr->ia_size;
783 struct page *page = NULL;
784 523
785 if (newsize < oldsize) {
786 /*
787 * If truncating down to a partial page, then
788 * if that page is already allocated, hold it
789 * in memory until the truncation is over, so
790 * truncate_partial_page cannot miss it were
791 * it assigned to swap.
792 */
793 if (newsize & (PAGE_CACHE_SIZE-1)) {
794 (void) shmem_getpage(inode,
795 newsize >> PAGE_CACHE_SHIFT,
796 &page, SGP_READ, NULL);
797 if (page)
798 unlock_page(page);
799 }
800 /*
801 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
802 * detect if any pages might have been added to cache
803 * after truncate_inode_pages. But we needn't bother
804 * if it's being fully truncated to zero-length: the
805 * nrpages check is efficient enough in that case.
806 */
807 if (newsize) {
808 struct shmem_inode_info *info = SHMEM_I(inode);
809 spin_lock(&info->lock);
810 info->flags &= ~SHMEM_PAGEIN;
811 spin_unlock(&info->lock);
812 }
813 }
814 if (newsize != oldsize) { 524 if (newsize != oldsize) {
815 i_size_write(inode, newsize); 525 i_size_write(inode, newsize);
816 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 526 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
822 /* unmap again to remove racily COWed private pages */ 532 /* unmap again to remove racily COWed private pages */
823 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); 533 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
824 } 534 }
825 if (page)
826 page_cache_release(page);
827 } 535 }
828 536
829 setattr_copy(inode, attr); 537 setattr_copy(inode, attr);
@@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode)
848 list_del_init(&info->swaplist); 556 list_del_init(&info->swaplist);
849 mutex_unlock(&shmem_swaplist_mutex); 557 mutex_unlock(&shmem_swaplist_mutex);
850 } 558 }
851 } 559 } else
560 kfree(info->symlink);
852 561
853 list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { 562 list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
854 kfree(xattr->name); 563 kfree(xattr->name);
@@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode)
859 end_writeback(inode); 568 end_writeback(inode);
860} 569}
861 570
862static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) 571/*
863{ 572 * If swap found in inode, free it and move page from swapcache to filecache.
864 swp_entry_t *ptr; 573 */
865 574static int shmem_unuse_inode(struct shmem_inode_info *info,
866 for (ptr = dir; ptr < edir; ptr++) { 575 swp_entry_t swap, struct page *page)
867 if (ptr->val == entry.val)
868 return ptr - dir;
869 }
870 return -1;
871}
872
873static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
874{ 576{
875 struct address_space *mapping; 577 struct address_space *mapping = info->vfs_inode.i_mapping;
876 unsigned long idx; 578 void *radswap;
877 unsigned long size; 579 pgoff_t index;
878 unsigned long limit;
879 unsigned long stage;
880 struct page **dir;
881 struct page *subdir;
882 swp_entry_t *ptr;
883 int offset;
884 int error; 580 int error;
885 581
886 idx = 0; 582 radswap = swp_to_radix_entry(swap);
887 ptr = info->i_direct; 583 index = radix_tree_locate_item(&mapping->page_tree, radswap);
888 spin_lock(&info->lock); 584 if (index == -1)
889 if (!info->swapped) { 585 return 0;
890 list_del_init(&info->swaplist);
891 goto lost2;
892 }
893 limit = info->next_index;
894 size = limit;
895 if (size > SHMEM_NR_DIRECT)
896 size = SHMEM_NR_DIRECT;
897 offset = shmem_find_swp(entry, ptr, ptr+size);
898 if (offset >= 0) {
899 shmem_swp_balance_unmap();
900 goto found;
901 }
902 if (!info->i_indirect)
903 goto lost2;
904
905 dir = shmem_dir_map(info->i_indirect);
906 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
907
908 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
909 if (unlikely(idx == stage)) {
910 shmem_dir_unmap(dir-1);
911 if (cond_resched_lock(&info->lock)) {
912 /* check it has not been truncated */
913 if (limit > info->next_index) {
914 limit = info->next_index;
915 if (idx >= limit)
916 goto lost2;
917 }
918 }
919 dir = shmem_dir_map(info->i_indirect) +
920 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
921 while (!*dir) {
922 dir++;
923 idx += ENTRIES_PER_PAGEPAGE;
924 if (idx >= limit)
925 goto lost1;
926 }
927 stage = idx + ENTRIES_PER_PAGEPAGE;
928 subdir = *dir;
929 shmem_dir_unmap(dir);
930 dir = shmem_dir_map(subdir);
931 }
932 subdir = *dir;
933 if (subdir && page_private(subdir)) {
934 ptr = shmem_swp_map(subdir);
935 size = limit - idx;
936 if (size > ENTRIES_PER_PAGE)
937 size = ENTRIES_PER_PAGE;
938 offset = shmem_find_swp(entry, ptr, ptr+size);
939 shmem_swp_unmap(ptr);
940 if (offset >= 0) {
941 shmem_dir_unmap(dir);
942 ptr = shmem_swp_map(subdir);
943 goto found;
944 }
945 }
946 }
947lost1:
948 shmem_dir_unmap(dir-1);
949lost2:
950 spin_unlock(&info->lock);
951 return 0;
952found:
953 idx += offset;
954 ptr += offset;
955 586
956 /* 587 /*
957 * Move _head_ to start search for next from here. 588 * Move _head_ to start search for next from here.
958 * But be careful: shmem_evict_inode checks list_empty without taking 589 * But be careful: shmem_evict_inode checks list_empty without taking
959 * mutex, and there's an instant in list_move_tail when info->swaplist 590 * mutex, and there's an instant in list_move_tail when info->swaplist
960 * would appear empty, if it were the only one on shmem_swaplist. We 591 * would appear empty, if it were the only one on shmem_swaplist.
961 * could avoid doing it if inode NULL; or use this minor optimization.
962 */ 592 */
963 if (shmem_swaplist.next != &info->swaplist) 593 if (shmem_swaplist.next != &info->swaplist)
964 list_move_tail(&shmem_swaplist, &info->swaplist); 594 list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -968,29 +598,34 @@ found:
968 * but also to hold up shmem_evict_inode(): so inode cannot be freed 598 * but also to hold up shmem_evict_inode(): so inode cannot be freed
969 * beneath us (pagelock doesn't help until the page is in pagecache). 599 * beneath us (pagelock doesn't help until the page is in pagecache).
970 */ 600 */
971 mapping = info->vfs_inode.i_mapping; 601 error = shmem_add_to_page_cache(page, mapping, index,
972 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); 602 GFP_NOWAIT, radswap);
973 /* which does mem_cgroup_uncharge_cache_page on error */ 603 /* which does mem_cgroup_uncharge_cache_page on error */
974 604
975 if (error != -ENOMEM) { 605 if (error != -ENOMEM) {
606 /*
607 * Truncation and eviction use free_swap_and_cache(), which
608 * only does trylock page: if we raced, best clean up here.
609 */
976 delete_from_swap_cache(page); 610 delete_from_swap_cache(page);
977 set_page_dirty(page); 611 set_page_dirty(page);
978 info->flags |= SHMEM_PAGEIN; 612 if (!error) {
979 shmem_swp_set(info, ptr, 0); 613 spin_lock(&info->lock);
980 swap_free(entry); 614 info->swapped--;
615 spin_unlock(&info->lock);
616 swap_free(swap);
617 }
981 error = 1; /* not an error, but entry was found */ 618 error = 1; /* not an error, but entry was found */
982 } 619 }
983 shmem_swp_unmap(ptr);
984 spin_unlock(&info->lock);
985 return error; 620 return error;
986} 621}
987 622
988/* 623/*
989 * shmem_unuse() search for an eventually swapped out shmem page. 624 * Search through swapped inodes to find and replace swap by page.
990 */ 625 */
991int shmem_unuse(swp_entry_t entry, struct page *page) 626int shmem_unuse(swp_entry_t swap, struct page *page)
992{ 627{
993 struct list_head *p, *next; 628 struct list_head *this, *next;
994 struct shmem_inode_info *info; 629 struct shmem_inode_info *info;
995 int found = 0; 630 int found = 0;
996 int error; 631 int error;
@@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
999 * Charge page using GFP_KERNEL while we can wait, before taking 634 * Charge page using GFP_KERNEL while we can wait, before taking
1000 * the shmem_swaplist_mutex which might hold up shmem_writepage(). 635 * the shmem_swaplist_mutex which might hold up shmem_writepage().
1001 * Charged back to the user (not to caller) when swap account is used. 636 * Charged back to the user (not to caller) when swap account is used.
1002 * add_to_page_cache() will be called with GFP_NOWAIT.
1003 */ 637 */
1004 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 638 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
1005 if (error) 639 if (error)
1006 goto out; 640 goto out;
1007 /* 641 /* No radix_tree_preload: swap entry keeps a place for page in tree */
1008 * Try to preload while we can wait, to not make a habit of
1009 * draining atomic reserves; but don't latch on to this cpu,
1010 * it's okay if sometimes we get rescheduled after this.
1011 */
1012 error = radix_tree_preload(GFP_KERNEL);
1013 if (error)
1014 goto uncharge;
1015 radix_tree_preload_end();
1016 642
1017 mutex_lock(&shmem_swaplist_mutex); 643 mutex_lock(&shmem_swaplist_mutex);
1018 list_for_each_safe(p, next, &shmem_swaplist) { 644 list_for_each_safe(this, next, &shmem_swaplist) {
1019 info = list_entry(p, struct shmem_inode_info, swaplist); 645 info = list_entry(this, struct shmem_inode_info, swaplist);
1020 found = shmem_unuse_inode(info, entry, page); 646 if (info->swapped)
647 found = shmem_unuse_inode(info, swap, page);
648 else
649 list_del_init(&info->swaplist);
1021 cond_resched(); 650 cond_resched();
1022 if (found) 651 if (found)
1023 break; 652 break;
1024 } 653 }
1025 mutex_unlock(&shmem_swaplist_mutex); 654 mutex_unlock(&shmem_swaplist_mutex);
1026 655
1027uncharge:
1028 if (!found) 656 if (!found)
1029 mem_cgroup_uncharge_cache_page(page); 657 mem_cgroup_uncharge_cache_page(page);
1030 if (found < 0) 658 if (found < 0)
@@ -1041,10 +669,10 @@ out:
1041static int shmem_writepage(struct page *page, struct writeback_control *wbc) 669static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1042{ 670{
1043 struct shmem_inode_info *info; 671 struct shmem_inode_info *info;
1044 swp_entry_t *entry, swap;
1045 struct address_space *mapping; 672 struct address_space *mapping;
1046 unsigned long index;
1047 struct inode *inode; 673 struct inode *inode;
674 swp_entry_t swap;
675 pgoff_t index;
1048 676
1049 BUG_ON(!PageLocked(page)); 677 BUG_ON(!PageLocked(page));
1050 mapping = page->mapping; 678 mapping = page->mapping;
@@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1073 701
1074 /* 702 /*
1075 * Add inode to shmem_unuse()'s list of swapped-out inodes, 703 * Add inode to shmem_unuse()'s list of swapped-out inodes,
1076 * if it's not already there. Do it now because we cannot take 704 * if it's not already there. Do it now before the page is
1077 * mutex while holding spinlock, and must do so before the page 705 * moved to swap cache, when its pagelock no longer protects
1078 * is moved to swap cache, when its pagelock no longer protects
1079 * the inode from eviction. But don't unlock the mutex until 706 * the inode from eviction. But don't unlock the mutex until
1080 * we've taken the spinlock, because shmem_unuse_inode() will 707 * we've incremented swapped, because shmem_unuse_inode() will
1081 * prune a !swapped inode from the swaplist under both locks. 708 * prune a !swapped inode from the swaplist under this mutex.
1082 */ 709 */
1083 mutex_lock(&shmem_swaplist_mutex); 710 mutex_lock(&shmem_swaplist_mutex);
1084 if (list_empty(&info->swaplist)) 711 if (list_empty(&info->swaplist))
1085 list_add_tail(&info->swaplist, &shmem_swaplist); 712 list_add_tail(&info->swaplist, &shmem_swaplist);
1086 713
1087 spin_lock(&info->lock);
1088 mutex_unlock(&shmem_swaplist_mutex);
1089
1090 if (index >= info->next_index) {
1091 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1092 goto unlock;
1093 }
1094 entry = shmem_swp_entry(info, index, NULL);
1095 if (entry->val) {
1096 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1097 free_swap_and_cache(*entry);
1098 shmem_swp_set(info, entry, 0);
1099 }
1100 shmem_recalc_inode(inode);
1101
1102 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { 714 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1103 delete_from_page_cache(page);
1104 shmem_swp_set(info, entry, swap.val);
1105 shmem_swp_unmap(entry);
1106 swap_shmem_alloc(swap); 715 swap_shmem_alloc(swap);
716 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
717
718 spin_lock(&info->lock);
719 info->swapped++;
720 shmem_recalc_inode(inode);
1107 spin_unlock(&info->lock); 721 spin_unlock(&info->lock);
722
723 mutex_unlock(&shmem_swaplist_mutex);
1108 BUG_ON(page_mapped(page)); 724 BUG_ON(page_mapped(page));
1109 swap_writepage(page, wbc); 725 swap_writepage(page, wbc);
1110 return 0; 726 return 0;
1111 } 727 }
1112 728
1113 shmem_swp_unmap(entry); 729 mutex_unlock(&shmem_swaplist_mutex);
1114unlock:
1115 spin_unlock(&info->lock);
1116 /*
1117 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
1118 * clear SWAP_HAS_CACHE flag.
1119 */
1120 swapcache_free(swap, NULL); 730 swapcache_free(swap, NULL);
1121redirty: 731redirty:
1122 set_page_dirty(page); 732 set_page_dirty(page);
@@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1153} 763}
1154#endif /* CONFIG_TMPFS */ 764#endif /* CONFIG_TMPFS */
1155 765
1156static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, 766static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1157 struct shmem_inode_info *info, unsigned long idx) 767 struct shmem_inode_info *info, pgoff_t index)
1158{ 768{
1159 struct mempolicy mpol, *spol; 769 struct mempolicy mpol, *spol;
1160 struct vm_area_struct pvma; 770 struct vm_area_struct pvma;
1161 struct page *page;
1162 771
1163 spol = mpol_cond_copy(&mpol, 772 spol = mpol_cond_copy(&mpol,
1164 mpol_shared_policy_lookup(&info->policy, idx)); 773 mpol_shared_policy_lookup(&info->policy, index));
1165 774
1166 /* Create a pseudo vma that just contains the policy */ 775 /* Create a pseudo vma that just contains the policy */
1167 pvma.vm_start = 0; 776 pvma.vm_start = 0;
1168 pvma.vm_pgoff = idx; 777 pvma.vm_pgoff = index;
1169 pvma.vm_ops = NULL; 778 pvma.vm_ops = NULL;
1170 pvma.vm_policy = spol; 779 pvma.vm_policy = spol;
1171 page = swapin_readahead(entry, gfp, &pvma, 0); 780 return swapin_readahead(swap, gfp, &pvma, 0);
1172 return page;
1173} 781}
1174 782
1175static struct page *shmem_alloc_page(gfp_t gfp, 783static struct page *shmem_alloc_page(gfp_t gfp,
1176 struct shmem_inode_info *info, unsigned long idx) 784 struct shmem_inode_info *info, pgoff_t index)
1177{ 785{
1178 struct vm_area_struct pvma; 786 struct vm_area_struct pvma;
1179 787
1180 /* Create a pseudo vma that just contains the policy */ 788 /* Create a pseudo vma that just contains the policy */
1181 pvma.vm_start = 0; 789 pvma.vm_start = 0;
1182 pvma.vm_pgoff = idx; 790 pvma.vm_pgoff = index;
1183 pvma.vm_ops = NULL; 791 pvma.vm_ops = NULL;
1184 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); 792 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1185 793
1186 /* 794 /*
1187 * alloc_page_vma() will drop the shared policy reference 795 * alloc_page_vma() will drop the shared policy reference
@@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp,
1190} 798}
1191#else /* !CONFIG_NUMA */ 799#else /* !CONFIG_NUMA */
1192#ifdef CONFIG_TMPFS 800#ifdef CONFIG_TMPFS
1193static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) 801static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1194{ 802{
1195} 803}
1196#endif /* CONFIG_TMPFS */ 804#endif /* CONFIG_TMPFS */
1197 805
1198static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, 806static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1199 struct shmem_inode_info *info, unsigned long idx) 807 struct shmem_inode_info *info, pgoff_t index)
1200{ 808{
1201 return swapin_readahead(entry, gfp, NULL, 0); 809 return swapin_readahead(swap, gfp, NULL, 0);
1202} 810}
1203 811
1204static inline struct page *shmem_alloc_page(gfp_t gfp, 812static inline struct page *shmem_alloc_page(gfp_t gfp,
1205 struct shmem_inode_info *info, unsigned long idx) 813 struct shmem_inode_info *info, pgoff_t index)
1206{ 814{
1207 return alloc_page(gfp); 815 return alloc_page(gfp);
1208} 816}
@@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1222 * vm. If we swap it in we mark it dirty since we also free the swap 830 * vm. If we swap it in we mark it dirty since we also free the swap
1223 * entry since a page cannot live in both the swap and page cache 831 * entry since a page cannot live in both the swap and page cache
1224 */ 832 */
1225static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, 833static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1226 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) 834 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1227{ 835{
1228 struct address_space *mapping = inode->i_mapping; 836 struct address_space *mapping = inode->i_mapping;
1229 struct shmem_inode_info *info = SHMEM_I(inode); 837 struct shmem_inode_info *info;
1230 struct shmem_sb_info *sbinfo; 838 struct shmem_sb_info *sbinfo;
1231 struct page *page; 839 struct page *page;
1232 struct page *prealloc_page = NULL;
1233 swp_entry_t *entry;
1234 swp_entry_t swap; 840 swp_entry_t swap;
1235 int error; 841 int error;
1236 int ret; 842 int once = 0;
1237 843
1238 if (idx >= SHMEM_MAX_INDEX) 844 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1239 return -EFBIG; 845 return -EFBIG;
1240repeat: 846repeat:
1241 page = find_lock_page(mapping, idx); 847 swap.val = 0;
1242 if (page) { 848 page = find_lock_page(mapping, index);
849 if (radix_tree_exceptional_entry(page)) {
850 swap = radix_to_swp_entry(page);
851 page = NULL;
852 }
853
854 if (sgp != SGP_WRITE &&
855 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
856 error = -EINVAL;
857 goto failed;
858 }
859
860 if (page || (sgp == SGP_READ && !swap.val)) {
1243 /* 861 /*
1244 * Once we can get the page lock, it must be uptodate: 862 * Once we can get the page lock, it must be uptodate:
1245 * if there were an error in reading back from swap, 863 * if there were an error in reading back from swap,
1246 * the page would not be inserted into the filecache. 864 * the page would not be inserted into the filecache.
1247 */ 865 */
1248 BUG_ON(!PageUptodate(page)); 866 BUG_ON(page && !PageUptodate(page));
1249 goto done; 867 *pagep = page;
868 return 0;
1250 } 869 }
1251 870
1252 /* 871 /*
1253 * Try to preload while we can wait, to not make a habit of 872 * Fast cache lookup did not find it:
1254 * draining atomic reserves; but don't latch on to this cpu. 873 * bring it back from swap or allocate.
1255 */ 874 */
1256 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); 875 info = SHMEM_I(inode);
1257 if (error) 876 sbinfo = SHMEM_SB(inode->i_sb);
1258 goto out;
1259 radix_tree_preload_end();
1260
1261 if (sgp != SGP_READ && !prealloc_page) {
1262 prealloc_page = shmem_alloc_page(gfp, info, idx);
1263 if (prealloc_page) {
1264 SetPageSwapBacked(prealloc_page);
1265 if (mem_cgroup_cache_charge(prealloc_page,
1266 current->mm, GFP_KERNEL)) {
1267 page_cache_release(prealloc_page);
1268 prealloc_page = NULL;
1269 }
1270 }
1271 }
1272
1273 spin_lock(&info->lock);
1274 shmem_recalc_inode(inode);
1275 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1276 if (IS_ERR(entry)) {
1277 spin_unlock(&info->lock);
1278 error = PTR_ERR(entry);
1279 goto out;
1280 }
1281 swap = *entry;
1282 877
1283 if (swap.val) { 878 if (swap.val) {
1284 /* Look it up and read it in.. */ 879 /* Look it up and read it in.. */
1285 page = lookup_swap_cache(swap); 880 page = lookup_swap_cache(swap);
1286 if (!page) { 881 if (!page) {
1287 shmem_swp_unmap(entry);
1288 spin_unlock(&info->lock);
1289 /* here we actually do the io */ 882 /* here we actually do the io */
1290 if (fault_type) 883 if (fault_type)
1291 *fault_type |= VM_FAULT_MAJOR; 884 *fault_type |= VM_FAULT_MAJOR;
1292 page = shmem_swapin(swap, gfp, info, idx); 885 page = shmem_swapin(swap, gfp, info, index);
1293 if (!page) { 886 if (!page) {
1294 spin_lock(&info->lock); 887 error = -ENOMEM;
1295 entry = shmem_swp_alloc(info, idx, sgp, gfp); 888 goto failed;
1296 if (IS_ERR(entry))
1297 error = PTR_ERR(entry);
1298 else {
1299 if (entry->val == swap.val)
1300 error = -ENOMEM;
1301 shmem_swp_unmap(entry);
1302 }
1303 spin_unlock(&info->lock);
1304 if (error)
1305 goto out;
1306 goto repeat;
1307 } 889 }
1308 wait_on_page_locked(page);
1309 page_cache_release(page);
1310 goto repeat;
1311 } 890 }
1312 891
1313 /* We have to do this with page locked to prevent races */ 892 /* We have to do this with page locked to prevent races */
1314 if (!trylock_page(page)) { 893 lock_page(page);
1315 shmem_swp_unmap(entry);
1316 spin_unlock(&info->lock);
1317 wait_on_page_locked(page);
1318 page_cache_release(page);
1319 goto repeat;
1320 }
1321 if (PageWriteback(page)) {
1322 shmem_swp_unmap(entry);
1323 spin_unlock(&info->lock);
1324 wait_on_page_writeback(page);
1325 unlock_page(page);
1326 page_cache_release(page);
1327 goto repeat;
1328 }
1329 if (!PageUptodate(page)) { 894 if (!PageUptodate(page)) {
1330 shmem_swp_unmap(entry);
1331 spin_unlock(&info->lock);
1332 unlock_page(page);
1333 page_cache_release(page);
1334 error = -EIO; 895 error = -EIO;
1335 goto out; 896 goto failed;
1336 } 897 }
1337 898 wait_on_page_writeback(page);
1338 error = add_to_page_cache_locked(page, mapping, 899
1339 idx, GFP_NOWAIT); 900 /* Someone may have already done it for us */
1340 if (error) { 901 if (page->mapping) {
1341 shmem_swp_unmap(entry); 902 if (page->mapping == mapping &&
1342 spin_unlock(&info->lock); 903 page->index == index)
1343 if (error == -ENOMEM) { 904 goto done;
1344 /* 905 error = -EEXIST;
1345 * reclaim from proper memory cgroup and 906 goto failed;
1346 * call memcg's OOM if needed.
1347 */
1348 error = mem_cgroup_shmem_charge_fallback(
1349 page, current->mm, gfp);
1350 if (error) {
1351 unlock_page(page);
1352 page_cache_release(page);
1353 goto out;
1354 }
1355 }
1356 unlock_page(page);
1357 page_cache_release(page);
1358 goto repeat;
1359 } 907 }
1360 908
1361 info->flags |= SHMEM_PAGEIN; 909 error = mem_cgroup_cache_charge(page, current->mm,
1362 shmem_swp_set(info, entry, 0); 910 gfp & GFP_RECLAIM_MASK);
1363 shmem_swp_unmap(entry); 911 if (!error)
1364 delete_from_swap_cache(page); 912 error = shmem_add_to_page_cache(page, mapping, index,
913 gfp, swp_to_radix_entry(swap));
914 if (error)
915 goto failed;
916
917 spin_lock(&info->lock);
918 info->swapped--;
919 shmem_recalc_inode(inode);
1365 spin_unlock(&info->lock); 920 spin_unlock(&info->lock);
921
922 delete_from_swap_cache(page);
1366 set_page_dirty(page); 923 set_page_dirty(page);
1367 swap_free(swap); 924 swap_free(swap);
1368 925
1369 } else if (sgp == SGP_READ) { 926 } else {
1370 shmem_swp_unmap(entry); 927 if (shmem_acct_block(info->flags)) {
1371 page = find_get_page(mapping, idx); 928 error = -ENOSPC;
1372 if (page && !trylock_page(page)) { 929 goto failed;
1373 spin_unlock(&info->lock);
1374 wait_on_page_locked(page);
1375 page_cache_release(page);
1376 goto repeat;
1377 } 930 }
1378 spin_unlock(&info->lock);
1379
1380 } else if (prealloc_page) {
1381 shmem_swp_unmap(entry);
1382 sbinfo = SHMEM_SB(inode->i_sb);
1383 if (sbinfo->max_blocks) { 931 if (sbinfo->max_blocks) {
1384 if (percpu_counter_compare(&sbinfo->used_blocks, 932 if (percpu_counter_compare(&sbinfo->used_blocks,
1385 sbinfo->max_blocks) >= 0 || 933 sbinfo->max_blocks) >= 0) {
1386 shmem_acct_block(info->flags)) 934 error = -ENOSPC;
1387 goto nospace; 935 goto unacct;
936 }
1388 percpu_counter_inc(&sbinfo->used_blocks); 937 percpu_counter_inc(&sbinfo->used_blocks);
1389 inode->i_blocks += BLOCKS_PER_PAGE;
1390 } else if (shmem_acct_block(info->flags))
1391 goto nospace;
1392
1393 page = prealloc_page;
1394 prealloc_page = NULL;
1395
1396 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1397 if (IS_ERR(entry))
1398 error = PTR_ERR(entry);
1399 else {
1400 swap = *entry;
1401 shmem_swp_unmap(entry);
1402 } 938 }
1403 ret = error || swap.val; 939
1404 if (ret) 940 page = shmem_alloc_page(gfp, info, index);
1405 mem_cgroup_uncharge_cache_page(page); 941 if (!page) {
1406 else 942 error = -ENOMEM;
1407 ret = add_to_page_cache_lru(page, mapping, 943 goto decused;
1408 idx, GFP_NOWAIT);
1409 /*
1410 * At add_to_page_cache_lru() failure,
1411 * uncharge will be done automatically.
1412 */
1413 if (ret) {
1414 shmem_unacct_blocks(info->flags, 1);
1415 shmem_free_blocks(inode, 1);
1416 spin_unlock(&info->lock);
1417 page_cache_release(page);
1418 if (error)
1419 goto out;
1420 goto repeat;
1421 } 944 }
1422 945
1423 info->flags |= SHMEM_PAGEIN; 946 SetPageSwapBacked(page);
947 __set_page_locked(page);
948 error = mem_cgroup_cache_charge(page, current->mm,
949 gfp & GFP_RECLAIM_MASK);
950 if (!error)
951 error = shmem_add_to_page_cache(page, mapping, index,
952 gfp, NULL);
953 if (error)
954 goto decused;
955 lru_cache_add_anon(page);
956
957 spin_lock(&info->lock);
1424 info->alloced++; 958 info->alloced++;
959 inode->i_blocks += BLOCKS_PER_PAGE;
960 shmem_recalc_inode(inode);
1425 spin_unlock(&info->lock); 961 spin_unlock(&info->lock);
962
1426 clear_highpage(page); 963 clear_highpage(page);
1427 flush_dcache_page(page); 964 flush_dcache_page(page);
1428 SetPageUptodate(page); 965 SetPageUptodate(page);
1429 if (sgp == SGP_DIRTY) 966 if (sgp == SGP_DIRTY)
1430 set_page_dirty(page); 967 set_page_dirty(page);
1431
1432 } else {
1433 spin_unlock(&info->lock);
1434 error = -ENOMEM;
1435 goto out;
1436 } 968 }
1437done: 969done:
1438 *pagep = page; 970 /* Perhaps the file has been truncated since we checked */
1439 error = 0; 971 if (sgp != SGP_WRITE &&
1440out: 972 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1441 if (prealloc_page) { 973 error = -EINVAL;
1442 mem_cgroup_uncharge_cache_page(prealloc_page); 974 goto trunc;
1443 page_cache_release(prealloc_page);
1444 } 975 }
1445 return error; 976 *pagep = page;
977 return 0;
1446 978
1447nospace:
1448 /* 979 /*
1449 * Perhaps the page was brought in from swap between find_lock_page 980 * Error recovery.
1450 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1451 * but must also avoid reporting a spurious ENOSPC while working on a
1452 * full tmpfs.
1453 */ 981 */
1454 page = find_get_page(mapping, idx); 982trunc:
983 ClearPageDirty(page);
984 delete_from_page_cache(page);
985 spin_lock(&info->lock);
986 info->alloced--;
987 inode->i_blocks -= BLOCKS_PER_PAGE;
1455 spin_unlock(&info->lock); 988 spin_unlock(&info->lock);
989decused:
990 if (sbinfo->max_blocks)
991 percpu_counter_add(&sbinfo->used_blocks, -1);
992unacct:
993 shmem_unacct_blocks(info->flags, 1);
994failed:
995 if (swap.val && error != -EINVAL) {
996 struct page *test = find_get_page(mapping, index);
997 if (test && !radix_tree_exceptional_entry(test))
998 page_cache_release(test);
999 /* Have another try if the entry has changed */
1000 if (test != swp_to_radix_entry(swap))
1001 error = -EEXIST;
1002 }
1456 if (page) { 1003 if (page) {
1004 unlock_page(page);
1457 page_cache_release(page); 1005 page_cache_release(page);
1006 }
1007 if (error == -ENOSPC && !once++) {
1008 info = SHMEM_I(inode);
1009 spin_lock(&info->lock);
1010 shmem_recalc_inode(inode);
1011 spin_unlock(&info->lock);
1458 goto repeat; 1012 goto repeat;
1459 } 1013 }
1460 error = -ENOSPC; 1014 if (error == -EEXIST)
1461 goto out; 1015 goto repeat;
1016 return error;
1462} 1017}
1463 1018
1464static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1019static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1467 int error; 1022 int error;
1468 int ret = VM_FAULT_LOCKED; 1023 int ret = VM_FAULT_LOCKED;
1469 1024
1470 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1471 return VM_FAULT_SIGBUS;
1472
1473 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1025 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1474 if (error) 1026 if (error)
1475 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1027 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1482} 1034}
1483 1035
1484#ifdef CONFIG_NUMA 1036#ifdef CONFIG_NUMA
1485static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 1037static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1486{ 1038{
1487 struct inode *i = vma->vm_file->f_path.dentry->d_inode; 1039 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1488 return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); 1040 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1489} 1041}
1490 1042
1491static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 1043static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1492 unsigned long addr) 1044 unsigned long addr)
1493{ 1045{
1494 struct inode *i = vma->vm_file->f_path.dentry->d_inode; 1046 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1495 unsigned long idx; 1047 pgoff_t index;
1496 1048
1497 idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 1049 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1498 return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); 1050 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1499} 1051}
1500#endif 1052#endif
1501 1053
@@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1593 1145
1594#ifdef CONFIG_TMPFS 1146#ifdef CONFIG_TMPFS
1595static const struct inode_operations shmem_symlink_inode_operations; 1147static const struct inode_operations shmem_symlink_inode_operations;
1596static const struct inode_operations shmem_symlink_inline_operations; 1148static const struct inode_operations shmem_short_symlink_operations;
1597 1149
1598static int 1150static int
1599shmem_write_begin(struct file *file, struct address_space *mapping, 1151shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1626{ 1178{
1627 struct inode *inode = filp->f_path.dentry->d_inode; 1179 struct inode *inode = filp->f_path.dentry->d_inode;
1628 struct address_space *mapping = inode->i_mapping; 1180 struct address_space *mapping = inode->i_mapping;
1629 unsigned long index, offset; 1181 pgoff_t index;
1182 unsigned long offset;
1630 enum sgp_type sgp = SGP_READ; 1183 enum sgp_type sgp = SGP_READ;
1631 1184
1632 /* 1185 /*
@@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1642 1195
1643 for (;;) { 1196 for (;;) {
1644 struct page *page = NULL; 1197 struct page *page = NULL;
1645 unsigned long end_index, nr, ret; 1198 pgoff_t end_index;
1199 unsigned long nr, ret;
1646 loff_t i_size = i_size_read(inode); 1200 loff_t i_size = i_size_read(inode);
1647 1201
1648 end_index = i_size >> PAGE_CACHE_SHIFT; 1202 end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1880 buf->f_namelen = NAME_MAX; 1434 buf->f_namelen = NAME_MAX;
1881 if (sbinfo->max_blocks) { 1435 if (sbinfo->max_blocks) {
1882 buf->f_blocks = sbinfo->max_blocks; 1436 buf->f_blocks = sbinfo->max_blocks;
1883 buf->f_bavail = buf->f_bfree = 1437 buf->f_bavail =
1884 sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); 1438 buf->f_bfree = sbinfo->max_blocks -
1439 percpu_counter_sum(&sbinfo->used_blocks);
1885 } 1440 }
1886 if (sbinfo->max_inodes) { 1441 if (sbinfo->max_inodes) {
1887 buf->f_files = sbinfo->max_inodes; 1442 buf->f_files = sbinfo->max_inodes;
@@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2055 1610
2056 info = SHMEM_I(inode); 1611 info = SHMEM_I(inode);
2057 inode->i_size = len-1; 1612 inode->i_size = len-1;
2058 if (len <= SHMEM_SYMLINK_INLINE_LEN) { 1613 if (len <= SHORT_SYMLINK_LEN) {
2059 /* do it inline */ 1614 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2060 memcpy(info->inline_symlink, symname, len); 1615 if (!info->symlink) {
2061 inode->i_op = &shmem_symlink_inline_operations; 1616 iput(inode);
1617 return -ENOMEM;
1618 }
1619 inode->i_op = &shmem_short_symlink_operations;
2062 } else { 1620 } else {
2063 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); 1621 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2064 if (error) { 1622 if (error) {
@@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2081 return 0; 1639 return 0;
2082} 1640}
2083 1641
2084static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) 1642static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2085{ 1643{
2086 nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); 1644 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2087 return NULL; 1645 return NULL;
2088} 1646}
2089 1647
2090static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) 1648static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2091{ 1649{
2092 struct page *page = NULL; 1650 struct page *page = NULL;
2093 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); 1651 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2094 nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); 1652 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2095 if (page) 1653 if (page)
2096 unlock_page(page); 1654 unlock_page(page);
2097 return page; 1655 return page;
@@ -2202,7 +1760,6 @@ out:
2202 return err; 1760 return err;
2203} 1761}
2204 1762
2205
2206static const struct xattr_handler *shmem_xattr_handlers[] = { 1763static const struct xattr_handler *shmem_xattr_handlers[] = {
2207#ifdef CONFIG_TMPFS_POSIX_ACL 1764#ifdef CONFIG_TMPFS_POSIX_ACL
2208 &generic_acl_access_handler, 1765 &generic_acl_access_handler,
@@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2332} 1889}
2333#endif /* CONFIG_TMPFS_XATTR */ 1890#endif /* CONFIG_TMPFS_XATTR */
2334 1891
2335static const struct inode_operations shmem_symlink_inline_operations = { 1892static const struct inode_operations shmem_short_symlink_operations = {
2336 .readlink = generic_readlink, 1893 .readlink = generic_readlink,
2337 .follow_link = shmem_follow_link_inline, 1894 .follow_link = shmem_follow_short_symlink,
2338#ifdef CONFIG_TMPFS_XATTR 1895#ifdef CONFIG_TMPFS_XATTR
2339 .setxattr = shmem_setxattr, 1896 .setxattr = shmem_setxattr,
2340 .getxattr = shmem_getxattr, 1897 .getxattr = shmem_getxattr,
@@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2534 if (config.max_inodes < inodes) 2091 if (config.max_inodes < inodes)
2535 goto out; 2092 goto out;
2536 /* 2093 /*
2537 * Those tests also disallow limited->unlimited while any are in 2094 * Those tests disallow limited->unlimited while any are in use;
2538 * use, so i_blocks will always be zero when max_blocks is zero;
2539 * but we must separately disallow unlimited->limited, because 2095 * but we must separately disallow unlimited->limited, because
2540 * in that case we have no record of how much is already in use. 2096 * in that case we have no record of how much is already in use.
2541 */ 2097 */
@@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2627 goto failed; 2183 goto failed;
2628 sbinfo->free_inodes = sbinfo->max_inodes; 2184 sbinfo->free_inodes = sbinfo->max_inodes;
2629 2185
2630 sb->s_maxbytes = SHMEM_MAX_BYTES; 2186 sb->s_maxbytes = MAX_LFS_FILESIZE;
2631 sb->s_blocksize = PAGE_CACHE_SIZE; 2187 sb->s_blocksize = PAGE_CACHE_SIZE;
2632 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 2188 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2633 sb->s_magic = TMPFS_MAGIC; 2189 sb->s_magic = TMPFS_MAGIC;
@@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep;
2662 2218
2663static struct inode *shmem_alloc_inode(struct super_block *sb) 2219static struct inode *shmem_alloc_inode(struct super_block *sb)
2664{ 2220{
2665 struct shmem_inode_info *p; 2221 struct shmem_inode_info *info;
2666 p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); 2222 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
2667 if (!p) 2223 if (!info)
2668 return NULL; 2224 return NULL;
2669 return &p->vfs_inode; 2225 return &info->vfs_inode;
2670} 2226}
2671 2227
2672static void shmem_i_callback(struct rcu_head *head) 2228static void shmem_destroy_callback(struct rcu_head *head)
2673{ 2229{
2674 struct inode *inode = container_of(head, struct inode, i_rcu); 2230 struct inode *inode = container_of(head, struct inode, i_rcu);
2675 INIT_LIST_HEAD(&inode->i_dentry); 2231 INIT_LIST_HEAD(&inode->i_dentry);
@@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head)
2678 2234
2679static void shmem_destroy_inode(struct inode *inode) 2235static void shmem_destroy_inode(struct inode *inode)
2680{ 2236{
2681 if ((inode->i_mode & S_IFMT) == S_IFREG) { 2237 if ((inode->i_mode & S_IFMT) == S_IFREG)
2682 /* only struct inode is valid if it's an inline symlink */
2683 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2238 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2684 } 2239 call_rcu(&inode->i_rcu, shmem_destroy_callback);
2685 call_rcu(&inode->i_rcu, shmem_i_callback);
2686} 2240}
2687 2241
2688static void init_once(void *foo) 2242static void shmem_init_inode(void *foo)
2689{ 2243{
2690 struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2244 struct shmem_inode_info *info = foo;
2691 2245 inode_init_once(&info->vfs_inode);
2692 inode_init_once(&p->vfs_inode);
2693} 2246}
2694 2247
2695static int init_inodecache(void) 2248static int shmem_init_inodecache(void)
2696{ 2249{
2697 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 2250 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2698 sizeof(struct shmem_inode_info), 2251 sizeof(struct shmem_inode_info),
2699 0, SLAB_PANIC, init_once); 2252 0, SLAB_PANIC, shmem_init_inode);
2700 return 0; 2253 return 0;
2701} 2254}
2702 2255
2703static void destroy_inodecache(void) 2256static void shmem_destroy_inodecache(void)
2704{ 2257{
2705 kmem_cache_destroy(shmem_inode_cachep); 2258 kmem_cache_destroy(shmem_inode_cachep);
2706} 2259}
@@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = {
2797#endif 2350#endif
2798}; 2351};
2799 2352
2800
2801static struct dentry *shmem_mount(struct file_system_type *fs_type, 2353static struct dentry *shmem_mount(struct file_system_type *fs_type,
2802 int flags, const char *dev_name, void *data) 2354 int flags, const char *dev_name, void *data)
2803{ 2355{
2804 return mount_nodev(fs_type, flags, data, shmem_fill_super); 2356 return mount_nodev(fs_type, flags, data, shmem_fill_super);
2805} 2357}
2806 2358
2807static struct file_system_type tmpfs_fs_type = { 2359static struct file_system_type shmem_fs_type = {
2808 .owner = THIS_MODULE, 2360 .owner = THIS_MODULE,
2809 .name = "tmpfs", 2361 .name = "tmpfs",
2810 .mount = shmem_mount, 2362 .mount = shmem_mount,
2811 .kill_sb = kill_litter_super, 2363 .kill_sb = kill_litter_super,
2812}; 2364};
2813 2365
2814int __init init_tmpfs(void) 2366int __init shmem_init(void)
2815{ 2367{
2816 int error; 2368 int error;
2817 2369
@@ -2819,18 +2371,18 @@ int __init init_tmpfs(void)
2819 if (error) 2371 if (error)
2820 goto out4; 2372 goto out4;
2821 2373
2822 error = init_inodecache(); 2374 error = shmem_init_inodecache();
2823 if (error) 2375 if (error)
2824 goto out3; 2376 goto out3;
2825 2377
2826 error = register_filesystem(&tmpfs_fs_type); 2378 error = register_filesystem(&shmem_fs_type);
2827 if (error) { 2379 if (error) {
2828 printk(KERN_ERR "Could not register tmpfs\n"); 2380 printk(KERN_ERR "Could not register tmpfs\n");
2829 goto out2; 2381 goto out2;
2830 } 2382 }
2831 2383
2832 shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, 2384 shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
2833 tmpfs_fs_type.name, NULL); 2385 shmem_fs_type.name, NULL);
2834 if (IS_ERR(shm_mnt)) { 2386 if (IS_ERR(shm_mnt)) {
2835 error = PTR_ERR(shm_mnt); 2387 error = PTR_ERR(shm_mnt);
2836 printk(KERN_ERR "Could not kern_mount tmpfs\n"); 2388 printk(KERN_ERR "Could not kern_mount tmpfs\n");
@@ -2839,9 +2391,9 @@ int __init init_tmpfs(void)
2839 return 0; 2391 return 0;
2840 2392
2841out1: 2393out1:
2842 unregister_filesystem(&tmpfs_fs_type); 2394 unregister_filesystem(&shmem_fs_type);
2843out2: 2395out2:
2844 destroy_inodecache(); 2396 shmem_destroy_inodecache();
2845out3: 2397out3:
2846 bdi_destroy(&shmem_backing_dev_info); 2398 bdi_destroy(&shmem_backing_dev_info);
2847out4: 2399out4:
@@ -2849,45 +2401,6 @@ out4:
2849 return error; 2401 return error;
2850} 2402}
2851 2403
2852#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2853/**
2854 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2855 * @inode: the inode to be searched
2856 * @pgoff: the offset to be searched
2857 * @pagep: the pointer for the found page to be stored
2858 * @ent: the pointer for the found swap entry to be stored
2859 *
2860 * If a page is found, refcount of it is incremented. Callers should handle
2861 * these refcount.
2862 */
2863void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2864 struct page **pagep, swp_entry_t *ent)
2865{
2866 swp_entry_t entry = { .val = 0 }, *ptr;
2867 struct page *page = NULL;
2868 struct shmem_inode_info *info = SHMEM_I(inode);
2869
2870 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2871 goto out;
2872
2873 spin_lock(&info->lock);
2874 ptr = shmem_swp_entry(info, pgoff, NULL);
2875#ifdef CONFIG_SWAP
2876 if (ptr && ptr->val) {
2877 entry.val = ptr->val;
2878 page = find_get_page(&swapper_space, entry.val);
2879 } else
2880#endif
2881 page = find_get_page(inode->i_mapping, pgoff);
2882 if (ptr)
2883 shmem_swp_unmap(ptr);
2884 spin_unlock(&info->lock);
2885out:
2886 *pagep = page;
2887 *ent = entry;
2888}
2889#endif
2890
2891#else /* !CONFIG_SHMEM */ 2404#else /* !CONFIG_SHMEM */
2892 2405
2893/* 2406/*
@@ -2901,23 +2414,23 @@ out:
2901 2414
2902#include <linux/ramfs.h> 2415#include <linux/ramfs.h>
2903 2416
2904static struct file_system_type tmpfs_fs_type = { 2417static struct file_system_type shmem_fs_type = {
2905 .name = "tmpfs", 2418 .name = "tmpfs",
2906 .mount = ramfs_mount, 2419 .mount = ramfs_mount,
2907 .kill_sb = kill_litter_super, 2420 .kill_sb = kill_litter_super,
2908}; 2421};
2909 2422
2910int __init init_tmpfs(void) 2423int __init shmem_init(void)
2911{ 2424{
2912 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); 2425 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
2913 2426
2914 shm_mnt = kern_mount(&tmpfs_fs_type); 2427 shm_mnt = kern_mount(&shmem_fs_type);
2915 BUG_ON(IS_ERR(shm_mnt)); 2428 BUG_ON(IS_ERR(shm_mnt));
2916 2429
2917 return 0; 2430 return 0;
2918} 2431}
2919 2432
2920int shmem_unuse(swp_entry_t entry, struct page *page) 2433int shmem_unuse(swp_entry_t swap, struct page *page)
2921{ 2434{
2922 return 0; 2435 return 0;
2923} 2436}
@@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
2927 return 0; 2440 return 0;
2928} 2441}
2929 2442
2930void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) 2443void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
2931{ 2444{
2932 truncate_inode_pages_range(inode->i_mapping, start, end); 2445 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
2933} 2446}
2934EXPORT_SYMBOL_GPL(shmem_truncate_range); 2447EXPORT_SYMBOL_GPL(shmem_truncate_range);
2935 2448
2936#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2937/**
2938 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2939 * @inode: the inode to be searched
2940 * @pgoff: the offset to be searched
2941 * @pagep: the pointer for the found page to be stored
2942 * @ent: the pointer for the found swap entry to be stored
2943 *
2944 * If a page is found, refcount of it is incremented. Callers should handle
2945 * these refcount.
2946 */
2947void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2948 struct page **pagep, swp_entry_t *ent)
2949{
2950 struct page *page = NULL;
2951
2952 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2953 goto out;
2954 page = find_get_page(inode->i_mapping, pgoff);
2955out:
2956 *pagep = page;
2957 *ent = (swp_entry_t){ .val = 0 };
2958}
2959#endif
2960
2961#define shmem_vm_ops generic_file_vm_ops 2449#define shmem_vm_ops generic_file_vm_ops
2962#define shmem_file_operations ramfs_file_operations 2450#define shmem_file_operations ramfs_file_operations
2963#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 2451#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
2964#define shmem_acct_size(flags, size) 0 2452#define shmem_acct_size(flags, size) 0
2965#define shmem_unacct_size(flags, size) do {} while (0) 2453#define shmem_unacct_size(flags, size) do {} while (0)
2966#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
2967 2454
2968#endif /* CONFIG_SHMEM */ 2455#endif /* CONFIG_SHMEM */
2969 2456
@@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2987 if (IS_ERR(shm_mnt)) 2474 if (IS_ERR(shm_mnt))
2988 return (void *)shm_mnt; 2475 return (void *)shm_mnt;
2989 2476
2990 if (size < 0 || size > SHMEM_MAX_BYTES) 2477 if (size < 0 || size > MAX_LFS_FILESIZE)
2991 return ERR_PTR(-EINVAL); 2478 return ERR_PTR(-EINVAL);
2992 2479
2993 if (shmem_acct_size(flags, size)) 2480 if (shmem_acct_size(flags, size))
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1b8c33907242..17bc224bce68 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
1924 1924
1925 /* 1925 /*
1926 * Find out how many pages are allowed for a single swap 1926 * Find out how many pages are allowed for a single swap
1927 * device. There are two limiting factors: 1) the number of 1927 * device. There are three limiting factors: 1) the number
1928 * bits for the swap offset in the swp_entry_t type and 1928 * of bits for the swap offset in the swp_entry_t type, and
1929 * 2) the number of bits in the a swap pte as defined by 1929 * 2) the number of bits in the swap pte as defined by the
1930 * the different architectures. In order to find the 1930 * the different architectures, and 3) the number of free bits
1931 * largest possible bit mask a swap entry with swap type 0 1931 * in an exceptional radix_tree entry. In order to find the
1932 * largest possible bit mask, a swap entry with swap type 0
1932 * and swap offset ~0UL is created, encoded to a swap pte, 1933 * and swap offset ~0UL is created, encoded to a swap pte,
1933 * decoded to a swp_entry_t again and finally the swap 1934 * decoded to a swp_entry_t again, and finally the swap
1934 * offset is extracted. This will mask all the bits from 1935 * offset is extracted. This will mask all the bits from
1935 * the initial ~0UL mask that can't be encoded in either 1936 * the initial ~0UL mask that can't be encoded in either
1936 * the swp_entry_t or the architecture definition of a 1937 * the swp_entry_t or the architecture definition of a
1937 * swap pte. 1938 * swap pte. Then the same is done for a radix_tree entry.
1938 */ 1939 */
1939 maxpages = swp_offset(pte_to_swp_entry( 1940 maxpages = swp_offset(pte_to_swp_entry(
1940 swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; 1941 swp_entry_to_pte(swp_entry(0, ~0UL))));
1942 maxpages = swp_offset(radix_to_swp_entry(
1943 swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
1944
1941 if (maxpages > swap_header->info.last_page) { 1945 if (maxpages > swap_header->info.last_page) {
1942 maxpages = swap_header->info.last_page + 1; 1946 maxpages = swap_header->info.last_page + 1;
1943 /* p->max is an unsigned int: don't overflow it */ 1947 /* p->max is an unsigned int: don't overflow it */
diff --git a/mm/truncate.c b/mm/truncate.c
index 232eb2736a79..b40ac6d4e86e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
336 unsigned long count = 0; 336 unsigned long count = 0;
337 int i; 337 int i;
338 338
339 /*
340 * Note: this function may get called on a shmem/tmpfs mapping:
341 * pagevec_lookup() might then return 0 prematurely (because it
342 * got a gangful of swap entries); but it's hardly worth worrying
343 * about - it can rarely have anything to free from such a mapping
344 * (most pages are dirty), and already skips over any difficulties.
345 */
346
339 pagevec_init(&pvec, 0); 347 pagevec_init(&pvec, 0);
340 while (index <= end && pagevec_lookup(&pvec, mapping, index, 348 while (index <= end && pagevec_lookup(&pvec, mapping, index,
341 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 349 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6d8ef4a3a9b5..8b2d37b59c9e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset)
128void print_header(void) 128void print_header(void)
129{ 129{
130 if (show_pkg) 130 if (show_pkg)
131 fprintf(stderr, "pkg "); 131 fprintf(stderr, "pk");
132 if (show_core) 132 if (show_core)
133 fprintf(stderr, "core"); 133 fprintf(stderr, " cr");
134 if (show_cpu) 134 if (show_cpu)
135 fprintf(stderr, " CPU"); 135 fprintf(stderr, " CPU");
136 if (do_nhm_cstates) 136 if (do_nhm_cstates)
137 fprintf(stderr, " %%c0 "); 137 fprintf(stderr, " %%c0 ");
138 if (has_aperf) 138 if (has_aperf)
139 fprintf(stderr, " GHz"); 139 fprintf(stderr, " GHz");
140 fprintf(stderr, " TSC"); 140 fprintf(stderr, " TSC");
141 if (do_nhm_cstates) 141 if (do_nhm_cstates)
142 fprintf(stderr, " %%c1 "); 142 fprintf(stderr, " %%c1");
143 if (do_nhm_cstates) 143 if (do_nhm_cstates)
144 fprintf(stderr, " %%c3 "); 144 fprintf(stderr, " %%c3");
145 if (do_nhm_cstates) 145 if (do_nhm_cstates)
146 fprintf(stderr, " %%c6 "); 146 fprintf(stderr, " %%c6");
147 if (do_snb_cstates) 147 if (do_snb_cstates)
148 fprintf(stderr, " %%c7 "); 148 fprintf(stderr, " %%c7");
149 if (do_snb_cstates) 149 if (do_snb_cstates)
150 fprintf(stderr, " %%pc2 "); 150 fprintf(stderr, " %%pc2");
151 if (do_nhm_cstates) 151 if (do_nhm_cstates)
152 fprintf(stderr, " %%pc3 "); 152 fprintf(stderr, " %%pc3");
153 if (do_nhm_cstates) 153 if (do_nhm_cstates)
154 fprintf(stderr, " %%pc6 "); 154 fprintf(stderr, " %%pc6");
155 if (do_snb_cstates) 155 if (do_snb_cstates)
156 fprintf(stderr, " %%pc7 "); 156 fprintf(stderr, " %%pc7");
157 if (extra_msr_offset) 157 if (extra_msr_offset)
158 fprintf(stderr, " MSR 0x%x ", extra_msr_offset); 158 fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
159 159
160 putc('\n', stderr); 160 putc('\n', stderr);
161} 161}
@@ -194,14 +194,14 @@ void print_cnt(struct counters *p)
194 /* topology columns, print blanks on 1st (average) line */ 194 /* topology columns, print blanks on 1st (average) line */
195 if (p == cnt_average) { 195 if (p == cnt_average) {
196 if (show_pkg) 196 if (show_pkg)
197 fprintf(stderr, " "); 197 fprintf(stderr, " ");
198 if (show_core) 198 if (show_core)
199 fprintf(stderr, " "); 199 fprintf(stderr, " ");
200 if (show_cpu) 200 if (show_cpu)
201 fprintf(stderr, " "); 201 fprintf(stderr, " ");
202 } else { 202 } else {
203 if (show_pkg) 203 if (show_pkg)
204 fprintf(stderr, "%4d", p->pkg); 204 fprintf(stderr, "%d", p->pkg);
205 if (show_core) 205 if (show_core)
206 fprintf(stderr, "%4d", p->core); 206 fprintf(stderr, "%4d", p->core);
207 if (show_cpu) 207 if (show_cpu)
@@ -241,22 +241,22 @@ void print_cnt(struct counters *p)
241 if (!skip_c1) 241 if (!skip_c1)
242 fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); 242 fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
243 else 243 else
244 fprintf(stderr, " ****"); 244 fprintf(stderr, " ****");
245 } 245 }
246 if (do_nhm_cstates) 246 if (do_nhm_cstates)
247 fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); 247 fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
248 if (do_nhm_cstates) 248 if (do_nhm_cstates)
249 fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); 249 fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
250 if (do_snb_cstates) 250 if (do_snb_cstates)
251 fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); 251 fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
252 if (do_snb_cstates) 252 if (do_snb_cstates)
253 fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); 253 fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc);
254 if (do_nhm_cstates) 254 if (do_nhm_cstates)
255 fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); 255 fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc);
256 if (do_nhm_cstates) 256 if (do_nhm_cstates)
257 fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); 257 fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc);
258 if (do_snb_cstates) 258 if (do_snb_cstates)
259 fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); 259 fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc);
260 if (extra_msr_offset) 260 if (extra_msr_offset)
261 fprintf(stderr, " 0x%016llx", p->extra_msr); 261 fprintf(stderr, " 0x%016llx", p->extra_msr);
262 putc('\n', stderr); 262 putc('\n', stderr);
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 2618ef2ba31f..33c5c7ee148f 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -137,7 +137,6 @@ void cmdline(int argc, char **argv)
137void validate_cpuid(void) 137void validate_cpuid(void)
138{ 138{
139 unsigned int eax, ebx, ecx, edx, max_level; 139 unsigned int eax, ebx, ecx, edx, max_level;
140 char brand[16];
141 unsigned int fms, family, model, stepping; 140 unsigned int fms, family, model, stepping;
142 141
143 eax = ebx = ecx = edx = 0; 142 eax = ebx = ecx = edx = 0;
@@ -160,8 +159,8 @@ void validate_cpuid(void)
160 model += ((fms >> 16) & 0xf) << 4; 159 model += ((fms >> 16) & 0xf) << 4;
161 160
162 if (verbose > 1) 161 if (verbose > 1)
163 printf("CPUID %s %d levels family:model:stepping " 162 printf("CPUID %d levels family:model:stepping "
164 "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, 163 "0x%x:%x:%x (%d:%d:%d)\n", max_level,
165 family, model, stepping, family, model, stepping); 164 family, model, stepping, family, model, stepping);
166 165
167 if (!(edx & (1 << 5))) { 166 if (!(edx & (1 << 5))) {