aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-lm353315
-rw-r--r--Documentation/cgroups/memory.txt24
-rw-r--r--Documentation/devicetree/bindings/mfd/da9052-i2c.txt60
-rw-r--r--Documentation/devicetree/bindings/mfd/tps65910.txt133
-rw-r--r--Documentation/devicetree/bindings/mfd/twl6040.txt62
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/proc.txt2
-rw-r--r--Documentation/filesystems/vfs.txt13
-rw-r--r--Documentation/vm/transhuge.txt62
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/arm/mach-ux500/board-mop500.c2
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/x86/include/asm/pgtable-3level.h50
-rw-r--r--arch/x86/include/asm/sta2x11.h12
-rw-r--r--arch/x86/kernel/e820.c53
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/mm/init.c16
-rw-r--r--arch/x86/mm/numa.c32
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pat.c42
-rw-r--r--arch/x86/mm/srat.c5
-rw-r--r--drivers/base/node.c8
-rw-r--r--drivers/gpio/Kconfig30
-rw-r--r--drivers/gpio/Makefile2
-rw-r--r--drivers/gpio/gpio-ich.c419
-rw-r--r--drivers/gpio/gpio-sch.c8
-rw-r--r--drivers/gpio/gpio-sta2x11.c435
-rw-r--r--drivers/gpio/gpio-tps65910.c188
-rw-r--r--drivers/gpio/gpio-wm831x.c6
-rw-r--r--drivers/input/misc/wm831x-on.c2
-rw-r--r--drivers/input/touchscreen/wm831x-ts.c9
-rw-r--r--drivers/mfd/Kconfig76
-rw-r--r--drivers/mfd/Makefile10
-rw-r--r--drivers/mfd/ab8500-core.c423
-rw-r--r--drivers/mfd/ab8500-debugfs.c6
-rw-r--r--drivers/mfd/ab8500-gpadc.c8
-rw-r--r--drivers/mfd/ab8500-i2c.c128
-rw-r--r--drivers/mfd/ab8500-sysctrl.c6
-rw-r--r--drivers/mfd/anatop-mfd.c35
-rw-r--r--drivers/mfd/asic3.c33
-rw-r--r--drivers/mfd/cs5535-mfd.c13
-rw-r--r--drivers/mfd/da9052-core.c140
-rw-r--r--drivers/mfd/da9052-i2c.c72
-rw-r--r--drivers/mfd/da9052-spi.c19
-rw-r--r--drivers/mfd/db8500-prcmu.c35
-rw-r--r--drivers/mfd/intel_msic.c31
-rw-r--r--drivers/mfd/janz-cmodio.c17
-rw-r--r--drivers/mfd/lm3533-core.c667
-rw-r--r--drivers/mfd/lm3533-ctrlbank.c148
-rw-r--r--drivers/mfd/lpc_ich.c888
-rw-r--r--drivers/mfd/lpc_sch.c26
-rw-r--r--drivers/mfd/max77693-irq.c309
-rw-r--r--drivers/mfd/max77693.c249
-rw-r--r--drivers/mfd/mc13xxx-core.c239
-rw-r--r--drivers/mfd/mc13xxx-i2c.c128
-rw-r--r--drivers/mfd/mc13xxx-spi.c140
-rw-r--r--drivers/mfd/mc13xxx.h45
-rw-r--r--drivers/mfd/pcf50633-core.c36
-rw-r--r--drivers/mfd/rc5t583.c8
-rw-r--r--drivers/mfd/rdc321x-southbridge.c13
-rw-r--r--drivers/mfd/s5m-core.c6
-rw-r--r--drivers/mfd/sta2x11-mfd.c467
-rw-r--r--drivers/mfd/stmpe-spi.c1
-rw-r--r--drivers/mfd/tps65090.c33
-rw-r--r--drivers/mfd/tps65217.c17
-rw-r--r--drivers/mfd/tps65910-irq.c130
-rw-r--r--drivers/mfd/tps65910.c205
-rw-r--r--drivers/mfd/twl4030-irq.c1
-rw-r--r--drivers/mfd/twl6040-core.c120
-rw-r--r--drivers/mfd/twl6040-irq.c32
-rw-r--r--drivers/mfd/vx855.c12
-rw-r--r--drivers/mfd/wm831x-auxadc.c6
-rw-r--r--drivers/mfd/wm831x-core.c45
-rw-r--r--drivers/mfd/wm831x-irq.c148
-rw-r--r--drivers/mfd/wm8350-core.c31
-rw-r--r--drivers/mfd/wm8350-i2c.c61
-rw-r--r--drivers/mfd/wm8400-core.c250
-rw-r--r--drivers/mfd/wm8994-core.c25
-rw-r--r--drivers/mfd/wm8994-regmap.c1
-rw-r--r--drivers/misc/ab8500-pwm.c6
-rw-r--r--drivers/power/wm831x_power.c21
-rw-r--r--drivers/regulator/anatop-regulator.c18
-rw-r--r--drivers/regulator/tps65910-regulator.c82
-rw-r--r--drivers/regulator/wm831x-dcdc.c24
-rw-r--r--drivers/regulator/wm831x-isink.c4
-rw-r--r--drivers/regulator/wm831x-ldo.c10
-rw-r--r--drivers/rtc/rtc-wm831x.c2
-rw-r--r--drivers/staging/android/ashmem.c8
-rw-r--r--drivers/tty/pty.c2
-rw-r--r--drivers/tty/tty_ldisc.c41
-rw-r--r--drivers/watchdog/Kconfig1
-rw-r--r--drivers/watchdog/iTCO_vendor.h6
-rw-r--r--drivers/watchdog/iTCO_vendor_support.c43
-rw-r--r--drivers/watchdog/iTCO_wdt.c529
-rw-r--r--fs/bad_inode.c1
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/exofs.h14
-rw-r--r--fs/exofs/super.c16
-rw-r--r--fs/exofs/sys.c200
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c90
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c2
-rw-r--r--fs/nfs/client.c268
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c39
-rw-r--r--fs/nfs/direct.c746
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/fscache.c15
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c85
-rw-r--r--fs/nfs/idmap.c30
-rw-r--r--fs/nfs/inode.c118
-rw-r--r--fs/nfs/internal.h135
-rw-r--r--fs/nfs/namespace.c103
-rw-r--r--fs/nfs/netns.h5
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3proc.c27
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs4_fs.h23
-rw-r--r--fs/nfs/nfs4filelayout.c688
-rw-r--r--fs/nfs/nfs4filelayout.h63
-rw-r--r--fs/nfs/nfs4filelayoutdev.c102
-rw-r--r--fs/nfs/nfs4namespace.c55
-rw-r--r--fs/nfs/nfs4proc.c537
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c225
-rw-r--r--fs/nfs/nfs4xdr.c399
-rw-r--r--fs/nfs/objlayout/objio_osd.c18
-rw-r--r--fs/nfs/objlayout/objlayout.c19
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.c352
-rw-r--r--fs/nfs/pnfs.h127
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c437
-rw-r--r--fs/nfs/super.c760
-rw-r--r--fs/nfs/write.c809
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/asm-generic/pgtable.h22
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/bootmem.h3
-rw-r--r--include/linux/bug.h7
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/kernel-page-flags.h4
-rw-r--r--include/linux/memcontrol.h16
-rw-r--r--include/linux/mempolicy.h9
-rw-r--r--include/linux/mfd/abx500/ab8500.h18
-rw-r--r--include/linux/mfd/anatop.h4
-rw-r--r--include/linux/mfd/asic3.h2
-rw-r--r--include/linux/mfd/da9052/da9052.h19
-rw-r--r--include/linux/mfd/lm3533.h104
-rw-r--r--include/linux/mfd/lpc_ich.h48
-rw-r--r--include/linux/mfd/max77693-private.h227
-rw-r--r--include/linux/mfd/max77693.h36
-rw-r--r--include/linux/mfd/sta2x11-mfd.h324
-rw-r--r--include/linux/mfd/stmpe.h2
-rw-r--r--include/linux/mfd/tps65910.h49
-rw-r--r--include/linux/mfd/twl6040.h2
-rw-r--r--include/linux/mfd/wm831x/core.h12
-rw-r--r--include/linux/mfd/wm8350/core.h9
-rw-r--r--include/linux/mfd/wm8400-private.h14
-rw-r--r--include/linux/mfd/wm8994/core.h1
-rw-r--r--include/linux/mfd/wm8994/registers.h3
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h11
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmzone.h29
-rw-r--r--include/linux/nfs4.h13
-rw-r--r--include/linux/nfs_fs.h31
-rw-r--r--include/linux/nfs_fs_sb.h17
-rw-r--r--include/linux/nfs_page.h20
-rw-r--r--include/linux/nfs_xdr.h210
-rw-r--r--include/linux/oom.h5
-rw-r--r--include/linux/pagemap.h8
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/swap.h50
-rw-r--r--include/trace/events/vmscan.h122
-rw-r--r--kernel/cgroup.c20
-rw-r--r--kernel/fork.c12
-rw-r--r--lib/swiotlb.c8
-rw-r--r--mm/Kconfig10
-rw-r--r--mm/Makefile9
-rw-r--r--mm/bootmem.c134
-rw-r--r--mm/compaction.c142
-rw-r--r--mm/filemap.c39
-rw-r--r--mm/huge_memory.c21
-rw-r--r--mm/hugetlb.c32
-rw-r--r--mm/internal.h14
-rw-r--r--mm/madvise.c15
-rw-r--r--mm/memblock.c42
-rw-r--r--mm/memcontrol.c127
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/memory.c20
-rw-r--r--mm/memory_hotplug.c14
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/mmap.c53
-rw-r--r--mm/nobootmem.c112
-rw-r--r--mm/oom_kill.c44
-rw-r--r--mm/page_alloc.c78
-rw-r--r--mm/readahead.c40
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/shmem.c513
-rw-r--r--mm/sparse.c25
-rw-r--r--mm/swap.c51
-rw-r--r--mm/swapfile.c33
-rw-r--r--mm/thrash.c155
-rw-r--r--mm/truncate.c25
-rw-r--r--mm/vmalloc.c7
-rw-r--r--mm/vmscan.c306
-rw-r--r--mm/vmstat.c10
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/sunrpc/rpc_pipe.c10
-rw-r--r--net/sunrpc/rpcb_clnt.c2
-rw-r--r--net/sunrpc/xprt.c7
-rw-r--r--tools/vm/page-types.c50
221 files changed, 12515 insertions, 6036 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
new file mode 100644
index 000000000000..1b62230b33b9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
@@ -0,0 +1,15 @@
1What: /sys/bus/i2c/devices/.../output_hvled[n]
2Date: April 2012
3KernelVersion: 3.5
4Contact: Johan Hovold <jhovold@gmail.com>
5Description:
6 Set the controlling backlight device for high-voltage current
7 sink HVLED[n] (n = 1, 2) (0, 1).
8
9What: /sys/bus/i2c/devices/.../output_lvled[n]
10Date: April 2012
11KernelVersion: 3.5
12Contact: Johan Hovold <jhovold@gmail.com>
13Description:
14 Set the controlling led device for low-voltage current sink
15 LVLED[n] (n = 1..5) (0..3).
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 9b1067afb224..6a066a270fc5 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -184,12 +184,14 @@ behind this approach is that a cgroup that aggressively uses a shared
184page will eventually get charged for it (once it is uncharged from 184page will eventually get charged for it (once it is uncharged from
185the cgroup that brought it in -- this will happen on memory pressure). 185the cgroup that brought it in -- this will happen on memory pressure).
186 186
187But see section 8.2: when moving a task to another cgroup, its pages may
188be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
189
187Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. 190Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
188When you do swapoff and make swapped-out pages of shmem(tmpfs) to 191When you do swapoff and make swapped-out pages of shmem(tmpfs) to
189be backed into memory in force, charges for pages are accounted against the 192be backed into memory in force, charges for pages are accounted against the
190caller of swapoff rather than the users of shmem. 193caller of swapoff rather than the users of shmem.
191 194
192
1932.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) 1952.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
194 196
195Swap Extension allows you to record charge for swap. A swapped-in page is 197Swap Extension allows you to record charge for swap. A swapped-in page is
@@ -430,17 +432,10 @@ hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
430hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to 432hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
431 hierarchy under which memory cgroup is. 433 hierarchy under which memory cgroup is.
432 434
433total_cache - sum of all children's "cache" 435total_<counter> - # hierarchical version of <counter>, which in
434total_rss - sum of all children's "rss" 436 addition to the cgroup's own value includes the
435total_mapped_file - sum of all children's "cache" 437 sum of all hierarchical children's values of
436total_pgpgin - sum of all children's "pgpgin" 438 <counter>, i.e. total_cache
437total_pgpgout - sum of all children's "pgpgout"
438total_swap - sum of all children's "swap"
439total_inactive_anon - sum of all children's "inactive_anon"
440total_active_anon - sum of all children's "active_anon"
441total_inactive_file - sum of all children's "inactive_file"
442total_active_file - sum of all children's "active_file"
443total_unevictable - sum of all children's "unevictable"
444 439
445# The following additional stats are dependent on CONFIG_DEBUG_VM. 440# The following additional stats are dependent on CONFIG_DEBUG_VM.
446 441
@@ -622,8 +617,7 @@ memory cgroup.
622 bit | what type of charges would be moved ? 617 bit | what type of charges would be moved ?
623 -----+------------------------------------------------------------------------ 618 -----+------------------------------------------------------------------------
624 0 | A charge of an anonymous page(or swap of it) used by the target task. 619 0 | A charge of an anonymous page(or swap of it) used by the target task.
625 | Those pages and swaps must be used only by the target task. You must 620 | You must enable Swap Extension(see 2.4) to enable move of swap charges.
626 | enable Swap Extension(see 2.4) to enable move of swap charges.
627 -----+------------------------------------------------------------------------ 621 -----+------------------------------------------------------------------------
628 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory) 622 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory)
629 | and swaps of tmpfs file) mmapped by the target task. Unlike the case of 623 | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
@@ -636,8 +630,6 @@ memory cgroup.
636 630
6378.3 TODO 6318.3 TODO
638 632
639- Implement madvise(2) to let users decide the vma to be moved or not to be
640 moved.
641- All of moving charge operations are done under cgroup_mutex. It's not good 633- All of moving charge operations are done under cgroup_mutex. It's not good
642 behavior to hold the mutex too long, so we may need some trick. 634 behavior to hold the mutex too long, so we may need some trick.
643 635
diff --git a/Documentation/devicetree/bindings/mfd/da9052-i2c.txt b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt
new file mode 100644
index 000000000000..1857f4a6b9a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt
@@ -0,0 +1,60 @@
1* Dialog DA9052/53 Power Management Integrated Circuit (PMIC)
2
3Required properties:
4- compatible : Should be "dlg,da9052", "dlg,da9053-aa",
5 "dlg,da9053-ab", or "dlg,da9053-bb"
6
7Sub-nodes:
8- regulators : Contain the regulator nodes. The DA9052/53 regulators are
9 bound using their names as listed below:
10
11 buck0 : regulator BUCK0
12 buck1 : regulator BUCK1
13 buck2 : regulator BUCK2
14 buck3 : regulator BUCK3
15 ldo4 : regulator LDO4
16 ldo5 : regulator LDO5
17 ldo6 : regulator LDO6
18 ldo7 : regulator LDO7
19 ldo8 : regulator LDO8
20 ldo9 : regulator LDO9
21 ldo10 : regulator LDO10
22 ldo11 : regulator LDO11
23 ldo12 : regulator LDO12
24 ldo13 : regulator LDO13
25
26 The bindings details of individual regulator device can be found in:
27 Documentation/devicetree/bindings/regulator/regulator.txt
28
29Examples:
30
31i2c@63fc8000 { /* I2C1 */
32 status = "okay";
33
34 pmic: dialog@48 {
35 compatible = "dlg,da9053-aa";
36 reg = <0x48>;
37
38 regulators {
39 buck0 {
40 regulator-min-microvolt = <500000>;
41 regulator-max-microvolt = <2075000>;
42 };
43
44 buck1 {
45 regulator-min-microvolt = <500000>;
46 regulator-max-microvolt = <2075000>;
47 };
48
49 buck2 {
50 regulator-min-microvolt = <925000>;
51 regulator-max-microvolt = <2500000>;
52 };
53
54 buck3 {
55 regulator-min-microvolt = <925000>;
56 regulator-max-microvolt = <2500000>;
57 };
58 };
59 };
60};
diff --git a/Documentation/devicetree/bindings/mfd/tps65910.txt b/Documentation/devicetree/bindings/mfd/tps65910.txt
new file mode 100644
index 000000000000..645f5eaadb3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/tps65910.txt
@@ -0,0 +1,133 @@
1TPS65910 Power Management Integrated Circuit
2
3Required properties:
4- compatible: "ti,tps65910" or "ti,tps65911"
5- reg: I2C slave address
6- interrupts: the interrupt outputs of the controller
7- #gpio-cells: number of cells to describe a GPIO, this should be 2.
8 The first cell is the GPIO number.
9 The second cell is used to specify additional options <unused>.
10- gpio-controller: mark the device as a GPIO controller
11- #interrupt-cells: the number of cells to describe an IRQ, this should be 2.
12 The first cell is the IRQ number.
13 The second cell is the flags, encoded as the trigger masks from
14 Documentation/devicetree/bindings/interrupts.txt
15- regulators: This is the list of child nodes that specify the regulator
16 initialization data for defined regulators. Not all regulators for the given
17 device need to be present. The definition for each of these nodes is defined
18 using the standard binding for regulators found at
19 Documentation/devicetree/bindings/regulator/regulator.txt.
20
21 The valid names for regulators are:
22 tps65910: vrtc, vio, vdd1, vdd2, vdd3, vdig1, vdig2, vpll, vdac, vaux1,
23 vaux2, vaux33, vmmc
24 tps65911: vrtc, vio, vdd1, vdd3, vddctrl, ldo1, ldo2, ldo3, ldo4, ldo5,
25 ldo6, ldo7, ldo8
26
27Optional properties:
28- ti,vmbch-threshold: (tps65911) main battery charged threshold
29 comparator. (see VMBCH_VSEL in TPS65910 datasheet)
30- ti,vmbch2-threshold: (tps65911) main battery discharged threshold
31 comparator. (see VMBCH_VSEL in TPS65910 datasheet)
32- ti,en-gpio-sleep: enable sleep control for gpios
33 There should be 9 entries here, one for each gpio.
34
35Regulator Optional properties:
36- ti,regulator-ext-sleep-control: enable external sleep
37 control through external inputs [0 (not enabled), 1 (EN1), 2 (EN2) or 4(EN3)]
38 If this property is not defined, it defaults to 0 (not enabled).
39
40Example:
41
42 pmu: tps65910@d2 {
43 compatible = "ti,tps65910";
44 reg = <0xd2>;
45 interrupt-parent = <&intc>;
46 interrupts = < 0 118 0x04 >;
47
48 #gpio-cells = <2>;
49 gpio-controller;
50
51 #interrupt-cells = <2>;
52 interrupt-controller;
53
54 ti,vmbch-threshold = 0;
55 ti,vmbch2-threshold = 0;
56
57 ti,en-gpio-sleep = <0 0 1 0 0 0 0 0 0>;
58
59 regulators {
60 vdd1_reg: vdd1 {
61 regulator-min-microvolt = < 600000>;
62 regulator-max-microvolt = <1500000>;
63 regulator-always-on;
64 regulator-boot-on;
65 ti,regulator-ext-sleep-control = <0>;
66 };
67 vdd2_reg: vdd2 {
68 regulator-min-microvolt = < 600000>;
69 regulator-max-microvolt = <1500000>;
70 regulator-always-on;
71 regulator-boot-on;
72 ti,regulator-ext-sleep-control = <4>;
73 };
74 vddctrl_reg: vddctrl {
75 regulator-min-microvolt = < 600000>;
76 regulator-max-microvolt = <1400000>;
77 regulator-always-on;
78 regulator-boot-on;
79 ti,regulator-ext-sleep-control = <0>;
80 };
81 vio_reg: vio {
82 regulator-min-microvolt = <1500000>;
83 regulator-max-microvolt = <1800000>;
84 regulator-always-on;
85 regulator-boot-on;
86 ti,regulator-ext-sleep-control = <1>;
87 };
88 ldo1_reg: ldo1 {
89 regulator-min-microvolt = <1000000>;
90 regulator-max-microvolt = <3300000>;
91 ti,regulator-ext-sleep-control = <0>;
92 };
93 ldo2_reg: ldo2 {
94 regulator-min-microvolt = <1050000>;
95 regulator-max-microvolt = <1050000>;
96 ti,regulator-ext-sleep-control = <0>;
97 };
98 ldo3_reg: ldo3 {
99 regulator-min-microvolt = <1000000>;
100 regulator-max-microvolt = <3300000>;
101 ti,regulator-ext-sleep-control = <0>;
102 };
103 ldo4_reg: ldo4 {
104 regulator-min-microvolt = <1000000>;
105 regulator-max-microvolt = <3300000>;
106 regulator-always-on;
107 ti,regulator-ext-sleep-control = <0>;
108 };
109 ldo5_reg: ldo5 {
110 regulator-min-microvolt = <1000000>;
111 regulator-max-microvolt = <3300000>;
112 ti,regulator-ext-sleep-control = <0>;
113 };
114 ldo6_reg: ldo6 {
115 regulator-min-microvolt = <1200000>;
116 regulator-max-microvolt = <1200000>;
117 ti,regulator-ext-sleep-control = <0>;
118 };
119 ldo7_reg: ldo7 {
120 regulator-min-microvolt = <1200000>;
121 regulator-max-microvolt = <1200000>;
122 regulator-always-on;
123 regulator-boot-on;
124 ti,regulator-ext-sleep-control = <1>;
125 };
126 ldo8_reg: ldo8 {
127 regulator-min-microvolt = <1000000>;
128 regulator-max-microvolt = <3300000>;
129 regulator-always-on;
130 ti,regulator-ext-sleep-control = <1>;
131 };
132 };
133 };
diff --git a/Documentation/devicetree/bindings/mfd/twl6040.txt b/Documentation/devicetree/bindings/mfd/twl6040.txt
new file mode 100644
index 000000000000..bc67c6f424aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl6040.txt
@@ -0,0 +1,62 @@
1Texas Instruments TWL6040 family
2
3The TWL6040s are 8-channel high quality low-power audio codecs providing audio
4and vibra functionality on OMAP4+ platforms.
5They are connected ot the host processor via i2c for commands, McPDM for audio
6data and commands.
7
8Required properties:
9- compatible : Must be "ti,twl6040";
10- reg: must be 0x4b for i2c address
11- interrupts: twl6040 has one interrupt line connecteded to the main SoC
12- interrupt-parent: The parent interrupt controller
13- twl6040,audpwron-gpio: Power on GPIO line for the twl6040
14
15- vio-supply: Regulator for the twl6040 VIO supply
16- v2v1-supply: Regulator for the twl6040 V2V1 supply
17
18Optional properties, nodes:
19- enable-active-high: To power on the twl6040 during boot.
20
21Vibra functionality
22Required properties:
23- vddvibl-supply: Regulator for the left vibra motor
24- vddvibr-supply: Regulator for the right vibra motor
25- vibra { }: Configuration section for vibra parameters containing the following
26 properties:
27- ti,vibldrv-res: Resistance parameter for left driver
28- ti,vibrdrv-res: Resistance parameter for right driver
29- ti,viblmotor-res: Resistance parameter for left motor
30- ti,viblmotor-res: Resistance parameter for right motor
31
32Optional properties within vibra { } section:
33- vddvibl_uV: If the vddvibl default voltage need to be changed
34- vddvibr_uV: If the vddvibr default voltage need to be changed
35
36Example:
37&i2c1 {
38 twl6040: twl@4b {
39 compatible = "ti,twl6040";
40 reg = <0x4b>;
41
42 interrupts = <0 119 4>;
43 interrupt-parent = <&gic>;
44 twl6040,audpwron-gpio = <&gpio4 31 0>;
45
46 vio-supply = <&v1v8>;
47 v2v1-supply = <&v2v1>;
48 enable-active-high;
49
50 /* regulators for vibra motor */
51 vddvibl-supply = <&vbat>;
52 vddvibr-supply = <&vbat>;
53
54 vibra {
55 /* Vibra driver, motor resistance parameters */
56 ti,vibldrv-res = <8>;
57 ti,vibrdrv-res = <3>;
58 ti,viblmotor-res = <10>;
59 ti,vibrmotor-res = <10>;
60 };
61 };
62};
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4fca82e5276e..d449e632e6a0 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -60,7 +60,6 @@ ata *);
60 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 60 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
61 ssize_t (*listxattr) (struct dentry *, char *, size_t); 61 ssize_t (*listxattr) (struct dentry *, char *, size_t);
62 int (*removexattr) (struct dentry *, const char *); 62 int (*removexattr) (struct dentry *, const char *);
63 void (*truncate_range)(struct inode *, loff_t, loff_t);
64 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); 63 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
65 64
66locking rules: 65locking rules:
@@ -87,7 +86,6 @@ setxattr: yes
87getxattr: no 86getxattr: no
88listxattr: no 87listxattr: no
89removexattr: yes 88removexattr: yes
90truncate_range: yes
91fiemap: no 89fiemap: no
92 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 90 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
93victim. 91victim.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ef088e55ab2e..912af6ce5626 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -743,6 +743,7 @@ Committed_AS: 100056 kB
743VmallocTotal: 112216 kB 743VmallocTotal: 112216 kB
744VmallocUsed: 428 kB 744VmallocUsed: 428 kB
745VmallocChunk: 111088 kB 745VmallocChunk: 111088 kB
746AnonHugePages: 49152 kB
746 747
747 MemTotal: Total usable ram (i.e. physical ram minus a few reserved 748 MemTotal: Total usable ram (i.e. physical ram minus a few reserved
748 bits and the kernel binary code) 749 bits and the kernel binary code)
@@ -776,6 +777,7 @@ VmallocChunk: 111088 kB
776 Dirty: Memory which is waiting to get written back to the disk 777 Dirty: Memory which is waiting to get written back to the disk
777 Writeback: Memory which is actively being written back to the disk 778 Writeback: Memory which is actively being written back to the disk
778 AnonPages: Non-file backed pages mapped into userspace page tables 779 AnonPages: Non-file backed pages mapped into userspace page tables
780AnonHugePages: Non-file backed huge pages mapped into userspace page tables
779 Mapped: files which have been mmaped, such as libraries 781 Mapped: files which have been mmaped, such as libraries
780 Slab: in-kernel data structures cache 782 Slab: in-kernel data structures cache
781SReclaimable: Part of Slab, that might be reclaimed, such as caches 783SReclaimable: Part of Slab, that might be reclaimed, such as caches
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 0d0492028082..ef19f91a0f12 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -363,7 +363,6 @@ struct inode_operations {
363 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 363 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
364 ssize_t (*listxattr) (struct dentry *, char *, size_t); 364 ssize_t (*listxattr) (struct dentry *, char *, size_t);
365 int (*removexattr) (struct dentry *, const char *); 365 int (*removexattr) (struct dentry *, const char *);
366 void (*truncate_range)(struct inode *, loff_t, loff_t);
367}; 366};
368 367
369Again, all methods are called without any locks being held, unless 368Again, all methods are called without any locks being held, unless
@@ -472,9 +471,6 @@ otherwise noted.
472 removexattr: called by the VFS to remove an extended attribute from 471 removexattr: called by the VFS to remove an extended attribute from
473 a file. This method is called by removexattr(2) system call. 472 a file. This method is called by removexattr(2) system call.
474 473
475 truncate_range: a method provided by the underlying filesystem to truncate a
476 range of blocks , i.e. punch a hole somewhere in a file.
477
478 474
479The Address Space Object 475The Address Space Object
480======================== 476========================
@@ -760,7 +756,7 @@ struct file_operations
760---------------------- 756----------------------
761 757
762This describes how the VFS can manipulate an open file. As of kernel 758This describes how the VFS can manipulate an open file. As of kernel
7632.6.22, the following members are defined: 7593.5, the following members are defined:
764 760
765struct file_operations { 761struct file_operations {
766 struct module *owner; 762 struct module *owner;
@@ -790,6 +786,8 @@ struct file_operations {
790 int (*flock) (struct file *, int, struct file_lock *); 786 int (*flock) (struct file *, int, struct file_lock *);
791 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); 787 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
792 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); 788 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
789 int (*setlease)(struct file *, long arg, struct file_lock **);
790 long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
793}; 791};
794 792
795Again, all methods are called without any locks being held, unless 793Again, all methods are called without any locks being held, unless
@@ -858,6 +856,11 @@ otherwise noted.
858 splice_read: called by the VFS to splice data from file to a pipe. This 856 splice_read: called by the VFS to splice data from file to a pipe. This
859 method is used by the splice(2) system call 857 method is used by the splice(2) system call
860 858
859 setlease: called by the VFS to set or release a file lock lease.
860 setlease has the file_lock_lock held and must not sleep.
861
862 fallocate: called by the VFS to preallocate blocks or punch a hole.
863
861Note that the file operations are implemented by the specific 864Note that the file operations are implemented by the specific
862filesystem in which the inode resides. When opening a device node 865filesystem in which the inode resides. When opening a device node
863(character or block special) most filesystems will call special 866(character or block special) most filesystems will call special
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 29bdf62aac09..f734bb2a78dc 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -166,6 +166,68 @@ behavior. So to make them effective you need to restart any
166application that could have been using hugepages. This also applies to 166application that could have been using hugepages. This also applies to
167the regions registered in khugepaged. 167the regions registered in khugepaged.
168 168
169== Monitoring usage ==
170
171The number of transparent huge pages currently used by the system is
172available by reading the AnonHugePages field in /proc/meminfo. To
173identify what applications are using transparent huge pages, it is
174necessary to read /proc/PID/smaps and count the AnonHugePages fields
175for each mapping. Note that reading the smaps file is expensive and
176reading it frequently will incur overhead.
177
178There are a number of counters in /proc/vmstat that may be used to
179monitor how successfully the system is providing huge pages for use.
180
181thp_fault_alloc is incremented every time a huge page is successfully
182 allocated to handle a page fault. This applies to both the
183 first time a page is faulted and for COW faults.
184
185thp_collapse_alloc is incremented by khugepaged when it has found
186 a range of pages to collapse into one huge page and has
187 successfully allocated a new huge page to store the data.
188
189thp_fault_fallback is incremented if a page fault fails to allocate
190 a huge page and instead falls back to using small pages.
191
192thp_collapse_alloc_failed is incremented if khugepaged found a range
193 of pages that should be collapsed into one huge page but failed
194 the allocation.
195
196thp_split is incremented every time a huge page is split into base
197 pages. This can happen for a variety of reasons but a common
198 reason is that a huge page is old and is being reclaimed.
199
200As the system ages, allocating huge pages may be expensive as the
201system uses memory compaction to copy data around memory to free a
202huge page for use. There are some counters in /proc/vmstat to help
203monitor this overhead.
204
205compact_stall is incremented every time a process stalls to run
206 memory compaction so that a huge page is free for use.
207
208compact_success is incremented if the system compacted memory and
209 freed a huge page for use.
210
211compact_fail is incremented if the system tries to compact memory
212 but failed.
213
214compact_pages_moved is incremented each time a page is moved. If
215 this value is increasing rapidly, it implies that the system
216 is copying a lot of data to satisfy the huge page allocation.
217 It is possible that the cost of copying exceeds any savings
218 from reduced TLB misses.
219
220compact_pagemigrate_failed is incremented when the underlying mechanism
221 for moving a page failed.
222
223compact_blocks_moved is incremented each time memory compaction examines
224 a huge page aligned range of pages.
225
226It is possible to establish how long the stalls were using the function
227tracer to record how long was spent in __alloc_pages_nodemask and
228using the mm_page_alloc tracepoint to identify which allocations were
229for huge pages.
230
169== get_user_pages and follow_page == 231== get_user_pages and follow_page ==
170 232
171get_user_pages and follow_page if run on a hugepage, will return the 233get_user_pages and follow_page if run on a hugepage, will return the
diff --git a/MAINTAINERS b/MAINTAINERS
index 6f90c64a2539..cc710d2ef009 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3382,6 +3382,12 @@ W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
3382S: Supported 3382S: Supported
3383F: drivers/scsi/ips.* 3383F: drivers/scsi/ips.*
3384 3384
3385ICH LPC AND GPIO DRIVER
3386M: Peter Tyser <ptyser@xes-inc.com>
3387S: Maintained
3388F: drivers/mfd/lpc_ich.c
3389F: drivers/gpio/gpio-ich.c
3390
3385IDE SUBSYSTEM 3391IDE SUBSYSTEM
3386M: "David S. Miller" <davem@davemloft.net> 3392M: "David S. Miller" <davem@davemloft.net>
3387L: linux-ide@vger.kernel.org 3393L: linux-ide@vger.kernel.org
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index b23a643f03f4..fba8adea421e 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -206,7 +206,7 @@ static struct resource ab8500_resources[] = {
206}; 206};
207 207
208struct platform_device ab8500_device = { 208struct platform_device ab8500_device = {
209 .name = "ab8500-i2c", 209 .name = "ab8500-core",
210 .id = 0, 210 .id = 0,
211 .dev = { 211 .dev = {
212 .platform_data = &ab8500_platdata, 212 .platform_data = &ab8500_platdata,
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 22d34d64cc81..bb344650a14f 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -40,6 +40,7 @@ config CRIS
40 bool 40 bool
41 default y 41 default y
42 select HAVE_IDE 42 select HAVE_IDE
43 select GENERIC_ATOMIC64
43 select HAVE_GENERIC_HARDIRQS 44 select HAVE_GENERIC_HARDIRQS
44 select GENERIC_IRQ_SHOW 45 select GENERIC_IRQ_SHOW
45 select GENERIC_IOMAP 46 select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index effff47a3c82..43876f16caf1 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
31 ptep->pte_low = pte.pte_low; 31 ptep->pte_low = pte.pte_low;
32} 32}
33 33
34#define pmd_read_atomic pmd_read_atomic
35/*
36 * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
37 * a "*pmdp" dereference done by gcc. Problem is, in certain places
38 * where pte_offset_map_lock is called, concurrent page faults are
39 * allowed, if the mmap_sem is hold for reading. An example is mincore
40 * vs page faults vs MADV_DONTNEED. On the page fault side
41 * pmd_populate rightfully does a set_64bit, but if we're reading the
42 * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
43 * because gcc will not read the 64bit of the pmd atomically. To fix
44 * this all places running pmd_offset_map_lock() while holding the
45 * mmap_sem in read mode, shall read the pmdp pointer using this
46 * function to know if the pmd is null nor not, and in turn to know if
47 * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
48 * operations.
49 *
50 * Without THP if the mmap_sem is hold for reading, the
51 * pmd can only transition from null to not null while pmd_read_atomic runs.
52 * So there's no need of literally reading it atomically.
53 *
54 * With THP if the mmap_sem is hold for reading, the pmd can become
55 * THP or null or point to a pte (and in turn become "stable") at any
56 * time under pmd_read_atomic, so it's mandatory to read it atomically
57 * with cmpxchg8b.
58 */
59#ifndef CONFIG_TRANSPARENT_HUGEPAGE
60static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
61{
62 pmdval_t ret;
63 u32 *tmp = (u32 *)pmdp;
64
65 ret = (pmdval_t) (*tmp);
66 if (ret) {
67 /*
68 * If the low part is null, we must not read the high part
69 * or we can end up with a partial pmd.
70 */
71 smp_rmb();
72 ret |= ((pmdval_t)*(tmp + 1)) << 32;
73 }
74
75 return (pmd_t) { ret };
76}
77#else /* CONFIG_TRANSPARENT_HUGEPAGE */
78static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
79{
80 return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
81}
82#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
83
34static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) 84static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
35{ 85{
36 set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); 86 set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
new file mode 100644
index 000000000000..e9d32df89ccc
--- /dev/null
+++ b/arch/x86/include/asm/sta2x11.h
@@ -0,0 +1,12 @@
1/*
2 * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
3 */
4#ifndef __ASM_STA2X11_H
5#define __ASM_STA2X11_H
6
7#include <linux/pci.h>
8
9/* This needs to be called from the MFD to configure its sub-devices */
10struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
11
12#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9976eb..41857970517f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
113 int x = e820x->nr_map; 113 int x = e820x->nr_map;
114 114
115 if (x >= ARRAY_SIZE(e820x->map)) { 115 if (x >= ARRAY_SIZE(e820x->map)) {
116 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 116 printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n",
117 (unsigned long long) start,
118 (unsigned long long) (start + size - 1));
117 return; 119 return;
118 } 120 }
119 121
@@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type)
133 switch (type) { 135 switch (type) {
134 case E820_RAM: 136 case E820_RAM:
135 case E820_RESERVED_KERN: 137 case E820_RESERVED_KERN:
136 printk(KERN_CONT "(usable)"); 138 printk(KERN_CONT "usable");
137 break; 139 break;
138 case E820_RESERVED: 140 case E820_RESERVED:
139 printk(KERN_CONT "(reserved)"); 141 printk(KERN_CONT "reserved");
140 break; 142 break;
141 case E820_ACPI: 143 case E820_ACPI:
142 printk(KERN_CONT "(ACPI data)"); 144 printk(KERN_CONT "ACPI data");
143 break; 145 break;
144 case E820_NVS: 146 case E820_NVS:
145 printk(KERN_CONT "(ACPI NVS)"); 147 printk(KERN_CONT "ACPI NVS");
146 break; 148 break;
147 case E820_UNUSABLE: 149 case E820_UNUSABLE:
148 printk(KERN_CONT "(unusable)"); 150 printk(KERN_CONT "unusable");
149 break; 151 break;
150 default: 152 default:
151 printk(KERN_CONT "type %u", type); 153 printk(KERN_CONT "type %u", type);
@@ -158,10 +160,10 @@ void __init e820_print_map(char *who)
158 int i; 160 int i;
159 161
160 for (i = 0; i < e820.nr_map; i++) { 162 for (i = 0; i < e820.nr_map; i++) {
161 printk(KERN_INFO " %s: %016Lx - %016Lx ", who, 163 printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
162 (unsigned long long) e820.map[i].addr, 164 (unsigned long long) e820.map[i].addr,
163 (unsigned long long) 165 (unsigned long long)
164 (e820.map[i].addr + e820.map[i].size)); 166 (e820.map[i].addr + e820.map[i].size - 1));
165 e820_print_type(e820.map[i].type); 167 e820_print_type(e820.map[i].type);
166 printk(KERN_CONT "\n"); 168 printk(KERN_CONT "\n");
167 } 169 }
@@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
428 size = ULLONG_MAX - start; 430 size = ULLONG_MAX - start;
429 431
430 end = start + size; 432 end = start + size;
431 printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", 433 printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ",
432 (unsigned long long) start, 434 (unsigned long long) start, (unsigned long long) (end - 1));
433 (unsigned long long) end);
434 e820_print_type(old_type); 435 e820_print_type(old_type);
435 printk(KERN_CONT " ==> "); 436 printk(KERN_CONT " ==> ");
436 e820_print_type(new_type); 437 e820_print_type(new_type);
@@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
509 size = ULLONG_MAX - start; 510 size = ULLONG_MAX - start;
510 511
511 end = start + size; 512 end = start + size;
512 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", 513 printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ",
513 (unsigned long long) start, 514 (unsigned long long) start, (unsigned long long) (end - 1));
514 (unsigned long long) end);
515 if (checktype) 515 if (checktype)
516 e820_print_type(old_type); 516 e820_print_type(old_type);
517 printk(KERN_CONT "\n"); 517 printk(KERN_CONT "\n");
@@ -567,7 +567,7 @@ void __init update_e820(void)
567 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 567 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
568 return; 568 return;
569 e820.nr_map = nr_map; 569 e820.nr_map = nr_map;
570 printk(KERN_INFO "modified physical RAM map:\n"); 570 printk(KERN_INFO "e820: modified physical RAM map:\n");
571 e820_print_map("modified"); 571 e820_print_map("modified");
572} 572}
573static void __init update_e820_saved(void) 573static void __init update_e820_saved(void)
@@ -637,8 +637,8 @@ __init void e820_setup_gap(void)
637 if (!found) { 637 if (!found) {
638 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; 638 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
639 printk(KERN_ERR 639 printk(KERN_ERR
640 "PCI: Warning: Cannot find a gap in the 32bit address range\n" 640 "e820: cannot find a gap in the 32bit address range\n"
641 "PCI: Unassigned devices with 32bit resource registers may break!\n"); 641 "e820: PCI devices with unassigned 32bit BARs may break!\n");
642 } 642 }
643#endif 643#endif
644 644
@@ -648,8 +648,8 @@ __init void e820_setup_gap(void)
648 pci_mem_start = gapstart; 648 pci_mem_start = gapstart;
649 649
650 printk(KERN_INFO 650 printk(KERN_INFO
651 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", 651 "e820: [mem %#010lx-%#010lx] available for PCI devices\n",
652 pci_mem_start, gapstart, gapsize); 652 gapstart, gapstart + gapsize - 1);
653} 653}
654 654
655/** 655/**
@@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata)
667 extmap = (struct e820entry *)(sdata->data); 667 extmap = (struct e820entry *)(sdata->data);
668 __append_e820_map(extmap, entries); 668 __append_e820_map(extmap, entries);
669 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 669 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
670 printk(KERN_INFO "extended physical RAM map:\n"); 670 printk(KERN_INFO "e820: extended physical RAM map:\n");
671 e820_print_map("extended"); 671 e820_print_map("extended");
672} 672}
673 673
@@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
734 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 734 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
735 if (addr) { 735 if (addr) {
736 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); 736 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
737 printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); 737 printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n");
738 update_e820_saved(); 738 update_e820_saved();
739 } 739 }
740 740
@@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
784 if (last_pfn > max_arch_pfn) 784 if (last_pfn > max_arch_pfn)
785 last_pfn = max_arch_pfn; 785 last_pfn = max_arch_pfn;
786 786
787 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", 787 printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
788 last_pfn, max_arch_pfn); 788 last_pfn, max_arch_pfn);
789 return last_pfn; 789 return last_pfn;
790} 790}
@@ -888,7 +888,7 @@ void __init finish_e820_parsing(void)
888 early_panic("Invalid user supplied memory map"); 888 early_panic("Invalid user supplied memory map");
889 e820.nr_map = nr; 889 e820.nr_map = nr;
890 890
891 printk(KERN_INFO "user-defined physical RAM map:\n"); 891 printk(KERN_INFO "e820: user-defined physical RAM map:\n");
892 e820_print_map("user"); 892 e820_print_map("user");
893 } 893 }
894} 894}
@@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void)
996 end = MAX_RESOURCE_SIZE; 996 end = MAX_RESOURCE_SIZE;
997 if (start >= end) 997 if (start >= end)
998 continue; 998 continue;
999 printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", 999 printk(KERN_DEBUG
1000 start, end); 1000 "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n",
1001 start, end);
1001 reserve_region_with_split(&iomem_resource, start, end, 1002 reserve_region_with_split(&iomem_resource, start, end,
1002 "RAM buffer"); 1003 "RAM buffer");
1003 } 1004 }
@@ -1047,7 +1048,7 @@ void __init setup_memory_map(void)
1047 1048
1048 who = x86_init.resources.memory_setup(); 1049 who = x86_init.resources.memory_setup();
1049 memcpy(&e820_saved, &e820, sizeof(struct e820map)); 1050 memcpy(&e820_saved, &e820, sizeof(struct e820map));
1050 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1051 printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
1051 e820_print_map(who); 1052 e820_print_map(who);
1052} 1053}
1053 1054
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b02d4dd6b8a3..fbca2e6223bf 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -568,8 +568,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
568 struct mpf_intel *mpf; 568 struct mpf_intel *mpf;
569 unsigned long mem; 569 unsigned long mem;
570 570
571 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 571 apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
572 bp, length); 572 base, base + length - 1);
573 BUILD_BUG_ON(sizeof(*mpf) != 16); 573 BUILD_BUG_ON(sizeof(*mpf) != 16);
574 574
575 while (length > 0) { 575 while (length > 0) {
@@ -584,8 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
584#endif 584#endif
585 mpf_found = mpf; 585 mpf_found = mpf;
586 586
587 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", 587 printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
588 mpf, (u64)virt_to_phys(mpf)); 588 (unsigned long long) virt_to_phys(mpf),
589 (unsigned long long) virt_to_phys(mpf) +
590 sizeof(*mpf) - 1, mpf);
589 591
590 mem = virt_to_phys(mpf); 592 mem = virt_to_phys(mpf);
591 memblock_reserve(mem, sizeof(*mpf)); 593 memblock_reserve(mem, sizeof(*mpf));
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f2afee6a19c1..982e44f960db 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -334,8 +334,8 @@ static void __init relocate_initrd(void)
334 memblock_reserve(ramdisk_here, area_size); 334 memblock_reserve(ramdisk_here, area_size);
335 initrd_start = ramdisk_here + PAGE_OFFSET; 335 initrd_start = ramdisk_here + PAGE_OFFSET;
336 initrd_end = initrd_start + ramdisk_size; 336 initrd_end = initrd_start + ramdisk_size;
337 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 337 printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
338 ramdisk_here, ramdisk_here + ramdisk_size); 338 ramdisk_here, ramdisk_here + ramdisk_size - 1);
339 339
340 q = (char *)initrd_start; 340 q = (char *)initrd_start;
341 341
@@ -366,8 +366,8 @@ static void __init relocate_initrd(void)
366 /* high pages is not converted by early_res_to_bootmem */ 366 /* high pages is not converted by early_res_to_bootmem */
367 ramdisk_image = boot_params.hdr.ramdisk_image; 367 ramdisk_image = boot_params.hdr.ramdisk_image;
368 ramdisk_size = boot_params.hdr.ramdisk_size; 368 ramdisk_size = boot_params.hdr.ramdisk_size;
369 printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" 369 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
370 " %08llx - %08llx\n", 370 " [mem %#010llx-%#010llx]\n",
371 ramdisk_image, ramdisk_image + ramdisk_size - 1, 371 ramdisk_image, ramdisk_image + ramdisk_size - 1,
372 ramdisk_here, ramdisk_here + ramdisk_size - 1); 372 ramdisk_here, ramdisk_here + ramdisk_size - 1);
373} 373}
@@ -392,8 +392,8 @@ static void __init reserve_initrd(void)
392 ramdisk_size, end_of_lowmem>>1); 392 ramdisk_size, end_of_lowmem>>1);
393 } 393 }
394 394
395 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, 395 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
396 ramdisk_end); 396 ramdisk_end - 1);
397 397
398 398
399 if (ramdisk_end <= end_of_lowmem) { 399 if (ramdisk_end <= end_of_lowmem) {
@@ -906,8 +906,8 @@ void __init setup_arch(char **cmdline_p)
906 setup_bios_corruption_check(); 906 setup_bios_corruption_check();
907#endif 907#endif
908 908
909 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", 909 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
910 max_pfn_mapped<<PAGE_SHIFT); 910 (max_pfn_mapped<<PAGE_SHIFT) - 1);
911 911
912 setup_trampolines(); 912 setup_trampolines();
913 913
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 319b6f2fb8b9..97141c26a13a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -84,8 +84,9 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
84 pgt_buf_end = pgt_buf_start; 84 pgt_buf_end = pgt_buf_start;
85 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); 85 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
86 86
87 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", 87 printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
88 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); 88 end - 1, pgt_buf_start << PAGE_SHIFT,
89 (pgt_buf_top << PAGE_SHIFT) - 1);
89} 90}
90 91
91void __init native_pagetable_reserve(u64 start, u64 end) 92void __init native_pagetable_reserve(u64 start, u64 end)
@@ -132,7 +133,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
132 int nr_range, i; 133 int nr_range, i;
133 int use_pse, use_gbpages; 134 int use_pse, use_gbpages;
134 135
135 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); 136 printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
137 start, end - 1);
136 138
137#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 139#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
138 /* 140 /*
@@ -251,8 +253,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
251 } 253 }
252 254
253 for (i = 0; i < nr_range; i++) 255 for (i = 0; i < nr_range; i++)
254 printk(KERN_DEBUG " %010lx - %010lx page %s\n", 256 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
255 mr[i].start, mr[i].end, 257 mr[i].start, mr[i].end - 1,
256 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( 258 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
257 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 259 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
258 260
@@ -350,8 +352,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
350 * create a kernel page fault: 352 * create a kernel page fault:
351 */ 353 */
352#ifdef CONFIG_DEBUG_PAGEALLOC 354#ifdef CONFIG_DEBUG_PAGEALLOC
353 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", 355 printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
354 begin, end); 356 begin, end - 1);
355 set_memory_np(begin, (end - begin) >> PAGE_SHIFT); 357 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
356#else 358#else
357 /* 359 /*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 19d3fa08b119..2d125be1bae9 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
141 141
142 /* whine about and ignore invalid blks */ 142 /* whine about and ignore invalid blks */
143 if (start > end || nid < 0 || nid >= MAX_NUMNODES) { 143 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
144 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", 144 pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
145 nid, start, end); 145 nid, start, end - 1);
146 return 0; 146 return 0;
147 } 147 }
148 148
@@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
210 210
211 start = roundup(start, ZONE_ALIGN); 211 start = roundup(start, ZONE_ALIGN);
212 212
213 printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", 213 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
214 nid, start, end); 214 nid, start, end - 1);
215 215
216 /* 216 /*
217 * Allocate node data. Try remap allocator first, node-local 217 * Allocate node data. Try remap allocator first, node-local
@@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
232 } 232 }
233 233
234 /* report and initialize */ 234 /* report and initialize */
235 printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", 235 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); 236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
238 if (!remapped && tnid != nid) 238 if (!remapped && tnid != nid)
@@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
291 */ 291 */
292 if (bi->end > bj->start && bi->start < bj->end) { 292 if (bi->end > bj->start && bi->start < bj->end) {
293 if (bi->nid != bj->nid) { 293 if (bi->nid != bj->nid) {
294 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", 294 pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
295 bi->nid, bi->start, bi->end, 295 bi->nid, bi->start, bi->end - 1,
296 bj->nid, bj->start, bj->end); 296 bj->nid, bj->start, bj->end - 1);
297 return -EINVAL; 297 return -EINVAL;
298 } 298 }
299 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", 299 pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
300 bi->nid, bi->start, bi->end, 300 bi->nid, bi->start, bi->end - 1,
301 bj->start, bj->end); 301 bj->start, bj->end - 1);
302 } 302 }
303 303
304 /* 304 /*
@@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
320 } 320 }
321 if (k < mi->nr_blks) 321 if (k < mi->nr_blks)
322 continue; 322 continue;
323 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", 323 printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
324 bi->nid, bi->start, bi->end, bj->start, bj->end, 324 bi->nid, bi->start, bi->end - 1, bj->start,
325 start, end); 325 bj->end - 1, start, end - 1);
326 bi->start = start; 326 bi->start = start;
327 bi->end = end; 327 bi->end = end;
328 numa_remove_memblk_from(j--, mi); 328 numa_remove_memblk_from(j--, mi);
@@ -616,8 +616,8 @@ static int __init dummy_numa_init(void)
616{ 616{
617 printk(KERN_INFO "%s\n", 617 printk(KERN_INFO "%s\n",
618 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 618 numa_off ? "NUMA turned off" : "No NUMA configuration found");
619 printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", 619 printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
620 0LLU, PFN_PHYS(max_pfn)); 620 0LLU, PFN_PHYS(max_pfn) - 1);
621 621
622 node_set(0, numa_nodes_parsed); 622 node_set(0, numa_nodes_parsed);
623 numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); 623 numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 871dd8868170..dbbbb47260cc 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
68 numa_remove_memblk_from(phys_blk, pi); 68 numa_remove_memblk_from(phys_blk, pi);
69 } 69 }
70 70
71 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 71 printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
72 eb->start, eb->end, (eb->end - eb->start) >> 20); 72 nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
73 return 0; 73 return 0;
74} 74}
75 75
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f6ff57b7efa5..f11729fd019c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -209,9 +209,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
209 page = pfn_to_page(pfn); 209 page = pfn_to_page(pfn);
210 type = get_page_memtype(page); 210 type = get_page_memtype(page);
211 if (type != -1) { 211 if (type != -1) {
212 printk(KERN_INFO "reserve_ram_pages_type failed " 212 printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n",
213 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", 213 start, end - 1, type, req_type);
214 start, end, type, req_type);
215 if (new_type) 214 if (new_type)
216 *new_type = type; 215 *new_type = type;
217 216
@@ -314,9 +313,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
314 313
315 err = rbt_memtype_check_insert(new, new_type); 314 err = rbt_memtype_check_insert(new, new_type);
316 if (err) { 315 if (err) {
317 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " 316 printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
318 "track %s, req %s\n", 317 start, end - 1,
319 start, end, cattr_name(new->type), cattr_name(req_type)); 318 cattr_name(new->type), cattr_name(req_type));
320 kfree(new); 319 kfree(new);
321 spin_unlock(&memtype_lock); 320 spin_unlock(&memtype_lock);
322 321
@@ -325,8 +324,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
325 324
326 spin_unlock(&memtype_lock); 325 spin_unlock(&memtype_lock);
327 326
328 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 327 dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
329 start, end, cattr_name(new->type), cattr_name(req_type), 328 start, end - 1, cattr_name(new->type), cattr_name(req_type),
330 new_type ? cattr_name(*new_type) : "-"); 329 new_type ? cattr_name(*new_type) : "-");
331 330
332 return err; 331 return err;
@@ -360,14 +359,14 @@ int free_memtype(u64 start, u64 end)
360 spin_unlock(&memtype_lock); 359 spin_unlock(&memtype_lock);
361 360
362 if (!entry) { 361 if (!entry) {
363 printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", 362 printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
364 current->comm, current->pid, start, end); 363 current->comm, current->pid, start, end - 1);
365 return -EINVAL; 364 return -EINVAL;
366 } 365 }
367 366
368 kfree(entry); 367 kfree(entry);
369 368
370 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); 369 dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
371 370
372 return 0; 371 return 0;
373} 372}
@@ -491,9 +490,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
491 490
492 while (cursor < to) { 491 while (cursor < to) {
493 if (!devmem_is_allowed(pfn)) { 492 if (!devmem_is_allowed(pfn)) {
494 printk(KERN_INFO 493 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
495 "Program %s tried to access /dev/mem between %Lx->%Lx.\n", 494 current->comm, from, to - 1);
496 current->comm, from, to);
497 return 0; 495 return 0;
498 } 496 }
499 cursor += PAGE_SIZE; 497 cursor += PAGE_SIZE;
@@ -554,12 +552,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
554 size; 552 size;
555 553
556 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { 554 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
557 printk(KERN_INFO 555 printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
558 "%s:%d ioremap_change_attr failed %s " 556 "for [mem %#010Lx-%#010Lx]\n",
559 "for %Lx-%Lx\n",
560 current->comm, current->pid, 557 current->comm, current->pid,
561 cattr_name(flags), 558 cattr_name(flags),
562 base, (unsigned long long)(base + size)); 559 base, (unsigned long long)(base + size-1));
563 return -EINVAL; 560 return -EINVAL;
564 } 561 }
565 return 0; 562 return 0;
@@ -591,12 +588,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
591 588
592 flags = lookup_memtype(paddr); 589 flags = lookup_memtype(paddr);
593 if (want_flags != flags) { 590 if (want_flags != flags) {
594 printk(KERN_WARNING 591 printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
595 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
596 current->comm, current->pid, 592 current->comm, current->pid,
597 cattr_name(want_flags), 593 cattr_name(want_flags),
598 (unsigned long long)paddr, 594 (unsigned long long)paddr,
599 (unsigned long long)(paddr + size), 595 (unsigned long long)(paddr + size - 1),
600 cattr_name(flags)); 596 cattr_name(flags));
601 *vma_prot = __pgprot((pgprot_val(*vma_prot) & 597 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
602 (~_PAGE_CACHE_MASK)) | 598 (~_PAGE_CACHE_MASK)) |
@@ -614,11 +610,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
614 !is_new_memtype_allowed(paddr, size, want_flags, flags)) { 610 !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
615 free_memtype(paddr, paddr + size); 611 free_memtype(paddr, paddr + size);
616 printk(KERN_ERR "%s:%d map pfn expected mapping type %s" 612 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
617 " for %Lx-%Lx, got %s\n", 613 " for [mem %#010Lx-%#010Lx], got %s\n",
618 current->comm, current->pid, 614 current->comm, current->pid,
619 cattr_name(want_flags), 615 cattr_name(want_flags),
620 (unsigned long long)paddr, 616 (unsigned long long)paddr,
621 (unsigned long long)(paddr + size), 617 (unsigned long long)(paddr + size - 1),
622 cattr_name(flags)); 618 cattr_name(flags));
623 return -EINVAL; 619 return -EINVAL;
624 } 620 }
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index efb5b4b93711..732af3a96183 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -176,8 +176,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
176 return; 176 return;
177 } 177 }
178 178
179 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 179 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
180 start, end); 180 node, pxm,
181 (unsigned long long) start, (unsigned long long) end - 1);
181} 182}
182 183
183void __init acpi_numa_arch_fixup(void) {} 184void __init acpi_numa_arch_fixup(void) {}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 90aa2a11a933..af1a177216f1 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -592,11 +592,9 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
592{ 592{
593 int n; 593 int n;
594 594
595 n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]); 595 n = nodelist_scnprintf(buf, PAGE_SIZE-2, node_states[state]);
596 if (n > 0 && PAGE_SIZE > n + 1) { 596 buf[n++] = '\n';
597 *(buf + n++) = '\n'; 597 buf[n] = '\0';
598 *(buf + n++) = '\0';
599 }
600 return n; 598 return n;
601} 599}
602 600
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index aa3642cb8209..0356099ae040 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -167,6 +167,14 @@ config GPIO_PXA
167 help 167 help
168 Say yes here to support the PXA GPIO device 168 Say yes here to support the PXA GPIO device
169 169
170config GPIO_STA2X11
171 bool "STA2x11/ConneXt GPIO support"
172 depends on MFD_STA2X11
173 select GENERIC_IRQ_CHIP
174 help
175 Say yes here to support the STA2x11/ConneXt GPIO device.
176 The GPIO module has 128 GPIO pins with alternate functions.
177
170config GPIO_XILINX 178config GPIO_XILINX
171 bool "Xilinx GPIO support" 179 bool "Xilinx GPIO support"
172 depends on PPC_OF || MICROBLAZE 180 depends on PPC_OF || MICROBLAZE
@@ -180,13 +188,13 @@ config GPIO_VR41XX
180 Say yes here to support the NEC VR4100 series General-purpose I/O Uint 188 Say yes here to support the NEC VR4100 series General-purpose I/O Uint
181 189
182config GPIO_SCH 190config GPIO_SCH
183 tristate "Intel SCH/TunnelCreek GPIO" 191 tristate "Intel SCH/TunnelCreek/Centerton GPIO"
184 depends on PCI && X86 192 depends on PCI && X86
185 select MFD_CORE 193 select MFD_CORE
186 select LPC_SCH 194 select LPC_SCH
187 help 195 help
188 Say yes here to support GPIO interface on Intel Poulsbo SCH 196 Say yes here to support GPIO interface on Intel Poulsbo SCH,
189 or Intel Tunnel Creek processor. 197 Intel Tunnel Creek processor or Intel Centerton processor.
190 The Intel SCH contains a total of 14 GPIO pins. Ten GPIOs are 198 The Intel SCH contains a total of 14 GPIO pins. Ten GPIOs are
191 powered by the core power rail and are turned off during sleep 199 powered by the core power rail and are turned off during sleep
192 modes (S3 and higher). The remaining four GPIOs are powered by 200 modes (S3 and higher). The remaining four GPIOs are powered by
@@ -195,6 +203,22 @@ config GPIO_SCH
195 system from the Suspend-to-RAM state. 203 system from the Suspend-to-RAM state.
196 The Intel Tunnel Creek processor has 5 GPIOs powered by the 204 The Intel Tunnel Creek processor has 5 GPIOs powered by the
197 core power rail and 9 from suspend power supply. 205 core power rail and 9 from suspend power supply.
206 The Intel Centerton processor has a total of 30 GPIO pins.
207 Twenty-one are powered by the core power rail and 9 from the
208 suspend power supply.
209
210config GPIO_ICH
211 tristate "Intel ICH GPIO"
212 depends on PCI && X86
213 select MFD_CORE
214 select LPC_ICH
215 help
216 Say yes here to support the GPIO functionality of a number of Intel
217 ICH-based chipsets. Currently supported devices: ICH6, ICH7, ICH8
218 ICH9, ICH10, Series 5/3400 (eg Ibex Peak), Series 6/C200 (eg
219 Cougar Point), NM10 (Tiger Point), and 3100 (Whitmore Lake).
220
221 If unsure, say N.
198 222
199config GPIO_VX855 223config GPIO_VX855
200 tristate "VIA VX855/VX875 GPIO" 224 tristate "VIA VX855/VX875 GPIO"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 07a79e245407..fde36e5e3537 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_DAVINCI) += gpio-davinci.o
19obj-$(CONFIG_GPIO_EM) += gpio-em.o 19obj-$(CONFIG_GPIO_EM) += gpio-em.o
20obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o 20obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o
21obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o 21obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o
22obj-$(CONFIG_GPIO_ICH) += gpio-ich.o
22obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o 23obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o
23obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o 24obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o
24obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o 25obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o
@@ -51,6 +52,7 @@ obj-$(CONFIG_PLAT_SAMSUNG) += gpio-samsung.o
51obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o 52obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o
52obj-$(CONFIG_GPIO_SCH) += gpio-sch.o 53obj-$(CONFIG_GPIO_SCH) += gpio-sch.o
53obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o 54obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o
55obj-$(CONFIG_GPIO_STA2X11) += gpio-sta2x11.o
54obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o 56obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o
55obj-$(CONFIG_GPIO_SX150X) += gpio-sx150x.o 57obj-$(CONFIG_GPIO_SX150X) += gpio-sx150x.o
56obj-$(CONFIG_GPIO_TC3589X) += gpio-tc3589x.o 58obj-$(CONFIG_GPIO_TC3589X) += gpio-tc3589x.o
diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c
new file mode 100644
index 000000000000..b7c06517403d
--- /dev/null
+++ b/drivers/gpio/gpio-ich.c
@@ -0,0 +1,419 @@
1/*
2 * Intel ICH6-10, Series 5 and 6 GPIO driver
3 *
4 * Copyright (C) 2010 Extreme Engineering Solutions.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/module.h>
24#include <linux/pci.h>
25#include <linux/gpio.h>
26#include <linux/platform_device.h>
27#include <linux/mfd/lpc_ich.h>
28
29#define DRV_NAME "gpio_ich"
30
31/*
32 * GPIO register offsets in GPIO I/O space.
33 * Each chunk of 32 GPIOs is manipulated via its own USE_SELx, IO_SELx, and
34 * LVLx registers. Logic in the read/write functions takes a register and
35 * an absolute bit number and determines the proper register offset and bit
36 * number in that register. For example, to read the value of GPIO bit 50
37 * the code would access offset ichx_regs[2(=GPIO_LVL)][1(=50/32)],
38 * bit 18 (50%32).
39 */
40enum GPIO_REG {
41 GPIO_USE_SEL = 0,
42 GPIO_IO_SEL,
43 GPIO_LVL,
44};
45
46static const u8 ichx_regs[3][3] = {
47 {0x00, 0x30, 0x40}, /* USE_SEL[1-3] offsets */
48 {0x04, 0x34, 0x44}, /* IO_SEL[1-3] offsets */
49 {0x0c, 0x38, 0x48}, /* LVL[1-3] offsets */
50};
51
52#define ICHX_WRITE(val, reg, base_res) outl(val, (reg) + (base_res)->start)
53#define ICHX_READ(reg, base_res) inl((reg) + (base_res)->start)
54
55struct ichx_desc {
56 /* Max GPIO pins the chipset can have */
57 uint ngpio;
58
59 /* Whether the chipset has GPIO in GPE0_STS in the PM IO region */
60 bool uses_gpe0;
61
62 /* USE_SEL is bogus on some chipsets, eg 3100 */
63 u32 use_sel_ignore[3];
64
65 /* Some chipsets have quirks, let these use their own request/get */
66 int (*request)(struct gpio_chip *chip, unsigned offset);
67 int (*get)(struct gpio_chip *chip, unsigned offset);
68};
69
70static struct {
71 spinlock_t lock;
72 struct platform_device *dev;
73 struct gpio_chip chip;
74 struct resource *gpio_base; /* GPIO IO base */
75 struct resource *pm_base; /* Power Mangagment IO base */
76 struct ichx_desc *desc; /* Pointer to chipset-specific description */
77 u32 orig_gpio_ctrl; /* Orig CTRL value, used to restore on exit */
78} ichx_priv;
79
80static int modparam_gpiobase = -1; /* dynamic */
81module_param_named(gpiobase, modparam_gpiobase, int, 0444);
82MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, "
83 "which is the default.");
84
85static int ichx_write_bit(int reg, unsigned nr, int val, int verify)
86{
87 unsigned long flags;
88 u32 data, tmp;
89 int reg_nr = nr / 32;
90 int bit = nr & 0x1f;
91 int ret = 0;
92
93 spin_lock_irqsave(&ichx_priv.lock, flags);
94
95 data = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base);
96 if (val)
97 data |= 1 << bit;
98 else
99 data &= ~(1 << bit);
100 ICHX_WRITE(data, ichx_regs[reg][reg_nr], ichx_priv.gpio_base);
101 tmp = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base);
102 if (verify && data != tmp)
103 ret = -EPERM;
104
105 spin_unlock_irqrestore(&ichx_priv.lock, flags);
106
107 return ret;
108}
109
110static int ichx_read_bit(int reg, unsigned nr)
111{
112 unsigned long flags;
113 u32 data;
114 int reg_nr = nr / 32;
115 int bit = nr & 0x1f;
116
117 spin_lock_irqsave(&ichx_priv.lock, flags);
118
119 data = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base);
120
121 spin_unlock_irqrestore(&ichx_priv.lock, flags);
122
123 return data & (1 << bit) ? 1 : 0;
124}
125
126static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
127{
128 /*
129 * Try setting pin as an input and verify it worked since many pins
130 * are output-only.
131 */
132 if (ichx_write_bit(GPIO_IO_SEL, nr, 1, 1))
133 return -EINVAL;
134
135 return 0;
136}
137
138static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
139 int val)
140{
141 /* Set GPIO output value. */
142 ichx_write_bit(GPIO_LVL, nr, val, 0);
143
144 /*
145 * Try setting pin as an output and verify it worked since many pins
146 * are input-only.
147 */
148 if (ichx_write_bit(GPIO_IO_SEL, nr, 0, 1))
149 return -EINVAL;
150
151 return 0;
152}
153
154static int ichx_gpio_get(struct gpio_chip *chip, unsigned nr)
155{
156 return ichx_read_bit(GPIO_LVL, nr);
157}
158
159static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr)
160{
161 unsigned long flags;
162 u32 data;
163
164 /*
165 * GPI 0 - 15 need to be read from the power management registers on
166 * a ICH6/3100 bridge.
167 */
168 if (nr < 16) {
169 if (!ichx_priv.pm_base)
170 return -ENXIO;
171
172 spin_lock_irqsave(&ichx_priv.lock, flags);
173
174 /* GPI 0 - 15 are latched, write 1 to clear*/
175 ICHX_WRITE(1 << (16 + nr), 0, ichx_priv.pm_base);
176 data = ICHX_READ(0, ichx_priv.pm_base);
177
178 spin_unlock_irqrestore(&ichx_priv.lock, flags);
179
180 return (data >> 16) & (1 << nr) ? 1 : 0;
181 } else {
182 return ichx_gpio_get(chip, nr);
183 }
184}
185
186static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr)
187{
188 /*
189 * Note we assume the BIOS properly set a bridge's USE value. Some
190 * chips (eg Intel 3100) have bogus USE values though, so first see if
191 * the chipset's USE value can be trusted for this specific bit.
192 * If it can't be trusted, assume that the pin can be used as a GPIO.
193 */
194 if (ichx_priv.desc->use_sel_ignore[nr / 32] & (1 << (nr & 0x1f)))
195 return 1;
196
197 return ichx_read_bit(GPIO_USE_SEL, nr) ? 0 : -ENODEV;
198}
199
200static int ich6_gpio_request(struct gpio_chip *chip, unsigned nr)
201{
202 /*
203 * Fixups for bits 16 and 17 are necessary on the Intel ICH6/3100
204 * bridge as they are controlled by USE register bits 0 and 1. See
205 * "Table 704 GPIO_USE_SEL1 register" in the i3100 datasheet for
206 * additional info.
207 */
208 if (nr == 16 || nr == 17)
209 nr -= 16;
210
211 return ichx_gpio_request(chip, nr);
212}
213
214static void ichx_gpio_set(struct gpio_chip *chip, unsigned nr, int val)
215{
216 ichx_write_bit(GPIO_LVL, nr, val, 0);
217}
218
219static void __devinit ichx_gpiolib_setup(struct gpio_chip *chip)
220{
221 chip->owner = THIS_MODULE;
222 chip->label = DRV_NAME;
223 chip->dev = &ichx_priv.dev->dev;
224
225 /* Allow chip-specific overrides of request()/get() */
226 chip->request = ichx_priv.desc->request ?
227 ichx_priv.desc->request : ichx_gpio_request;
228 chip->get = ichx_priv.desc->get ?
229 ichx_priv.desc->get : ichx_gpio_get;
230
231 chip->set = ichx_gpio_set;
232 chip->direction_input = ichx_gpio_direction_input;
233 chip->direction_output = ichx_gpio_direction_output;
234 chip->base = modparam_gpiobase;
235 chip->ngpio = ichx_priv.desc->ngpio;
236 chip->can_sleep = 0;
237 chip->dbg_show = NULL;
238}
239
240/* ICH6-based, 631xesb-based */
241static struct ichx_desc ich6_desc = {
242 /* Bridges using the ICH6 controller need fixups for GPIO 0 - 17 */
243 .request = ich6_gpio_request,
244 .get = ich6_gpio_get,
245
246 /* GPIO 0-15 are read in the GPE0_STS PM register */
247 .uses_gpe0 = true,
248
249 .ngpio = 50,
250};
251
252/* Intel 3100 */
253static struct ichx_desc i3100_desc = {
254 /*
255 * Bits 16,17, 20 of USE_SEL and bit 16 of USE_SEL2 always read 0 on
256 * the Intel 3100. See "Table 712. GPIO Summary Table" of 3100
257 * Datasheet for more info.
258 */
259 .use_sel_ignore = {0x00130000, 0x00010000, 0x0},
260
261 /* The 3100 needs fixups for GPIO 0 - 17 */
262 .request = ich6_gpio_request,
263 .get = ich6_gpio_get,
264
265 /* GPIO 0-15 are read in the GPE0_STS PM register */
266 .uses_gpe0 = true,
267
268 .ngpio = 50,
269};
270
271/* ICH7 and ICH8-based */
272static struct ichx_desc ich7_desc = {
273 .ngpio = 50,
274};
275
276/* ICH9-based */
277static struct ichx_desc ich9_desc = {
278 .ngpio = 61,
279};
280
281/* ICH10-based - Consumer/corporate versions have different amount of GPIO */
282static struct ichx_desc ich10_cons_desc = {
283 .ngpio = 61,
284};
285static struct ichx_desc ich10_corp_desc = {
286 .ngpio = 72,
287};
288
289/* Intel 5 series, 6 series, 3400 series, and C200 series */
290static struct ichx_desc intel5_desc = {
291 .ngpio = 76,
292};
293
294static int __devinit ichx_gpio_probe(struct platform_device *pdev)
295{
296 struct resource *res_base, *res_pm;
297 int err;
298 struct lpc_ich_info *ich_info = pdev->dev.platform_data;
299
300 if (!ich_info)
301 return -ENODEV;
302
303 ichx_priv.dev = pdev;
304
305 switch (ich_info->gpio_version) {
306 case ICH_I3100_GPIO:
307 ichx_priv.desc = &i3100_desc;
308 break;
309 case ICH_V5_GPIO:
310 ichx_priv.desc = &intel5_desc;
311 break;
312 case ICH_V6_GPIO:
313 ichx_priv.desc = &ich6_desc;
314 break;
315 case ICH_V7_GPIO:
316 ichx_priv.desc = &ich7_desc;
317 break;
318 case ICH_V9_GPIO:
319 ichx_priv.desc = &ich9_desc;
320 break;
321 case ICH_V10CORP_GPIO:
322 ichx_priv.desc = &ich10_corp_desc;
323 break;
324 case ICH_V10CONS_GPIO:
325 ichx_priv.desc = &ich10_cons_desc;
326 break;
327 default:
328 return -ENODEV;
329 }
330
331 res_base = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPIO);
332 if (!res_base || !res_base->start || !res_base->end)
333 return -ENODEV;
334
335 if (!request_region(res_base->start, resource_size(res_base),
336 pdev->name))
337 return -EBUSY;
338
339 ichx_priv.gpio_base = res_base;
340
341 /*
342 * If necessary, determine the I/O address of ACPI/power management
343 * registers which are needed to read the the GPE0 register for GPI pins
344 * 0 - 15 on some chipsets.
345 */
346 if (!ichx_priv.desc->uses_gpe0)
347 goto init;
348
349 res_pm = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPE0);
350 if (!res_pm) {
351 pr_warn("ACPI BAR is unavailable, GPI 0 - 15 unavailable\n");
352 goto init;
353 }
354
355 if (!request_region(res_pm->start, resource_size(res_pm),
356 pdev->name)) {
357 pr_warn("ACPI BAR is busy, GPI 0 - 15 unavailable\n");
358 goto init;
359 }
360
361 ichx_priv.pm_base = res_pm;
362
363init:
364 ichx_gpiolib_setup(&ichx_priv.chip);
365 err = gpiochip_add(&ichx_priv.chip);
366 if (err) {
367 pr_err("Failed to register GPIOs\n");
368 goto add_err;
369 }
370
371 pr_info("GPIO from %d to %d on %s\n", ichx_priv.chip.base,
372 ichx_priv.chip.base + ichx_priv.chip.ngpio - 1, DRV_NAME);
373
374 return 0;
375
376add_err:
377 release_region(ichx_priv.gpio_base->start,
378 resource_size(ichx_priv.gpio_base));
379 if (ichx_priv.pm_base)
380 release_region(ichx_priv.pm_base->start,
381 resource_size(ichx_priv.pm_base));
382 return err;
383}
384
385static int __devexit ichx_gpio_remove(struct platform_device *pdev)
386{
387 int err;
388
389 err = gpiochip_remove(&ichx_priv.chip);
390 if (err) {
391 dev_err(&pdev->dev, "%s failed, %d\n",
392 "gpiochip_remove()", err);
393 return err;
394 }
395
396 release_region(ichx_priv.gpio_base->start,
397 resource_size(ichx_priv.gpio_base));
398 if (ichx_priv.pm_base)
399 release_region(ichx_priv.pm_base->start,
400 resource_size(ichx_priv.pm_base));
401
402 return 0;
403}
404
405static struct platform_driver ichx_gpio_driver = {
406 .driver = {
407 .owner = THIS_MODULE,
408 .name = DRV_NAME,
409 },
410 .probe = ichx_gpio_probe,
411 .remove = __devexit_p(ichx_gpio_remove),
412};
413
414module_platform_driver(ichx_gpio_driver);
415
416MODULE_AUTHOR("Peter Tyser <ptyser@xes-inc.com>");
417MODULE_DESCRIPTION("GPIO interface for Intel ICH series");
418MODULE_LICENSE("GPL");
419MODULE_ALIAS("platform:"DRV_NAME);
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index 8cadf4d683a8..424dce8e3f30 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -232,6 +232,14 @@ static int __devinit sch_gpio_probe(struct platform_device *pdev)
232 sch_gpio_resume.ngpio = 9; 232 sch_gpio_resume.ngpio = 9;
233 break; 233 break;
234 234
235 case PCI_DEVICE_ID_INTEL_CENTERTON_ILB:
236 sch_gpio_core.base = 0;
237 sch_gpio_core.ngpio = 21;
238
239 sch_gpio_resume.base = 21;
240 sch_gpio_resume.ngpio = 9;
241 break;
242
235 default: 243 default:
236 return -ENODEV; 244 return -ENODEV;
237 } 245 }
diff --git a/drivers/gpio/gpio-sta2x11.c b/drivers/gpio/gpio-sta2x11.c
new file mode 100644
index 000000000000..38416be8ba11
--- /dev/null
+++ b/drivers/gpio/gpio-sta2x11.c
@@ -0,0 +1,435 @@
1/*
2 * STMicroelectronics ConneXt (STA2X11) GPIO driver
3 *
4 * Copyright 2012 ST Microelectronics (Alessandro Rubini)
5 * Based on gpio-ml-ioh.c, Copyright 2010 OKI Semiconductors Ltd.
6 * Also based on previous sta2x11 work, Copyright 2011 Wind River Systems, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15 * See the GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/gpio.h>
27#include <linux/interrupt.h>
28#include <linux/irq.h>
29#include <linux/pci.h>
30#include <linux/platform_device.h>
31#include <linux/mfd/sta2x11-mfd.h>
32
33struct gsta_regs {
34 u32 dat; /* 0x00 */
35 u32 dats;
36 u32 datc;
37 u32 pdis;
38 u32 dir; /* 0x10 */
39 u32 dirs;
40 u32 dirc;
41 u32 unused_1c;
42 u32 afsela; /* 0x20 */
43 u32 unused_24[7];
44 u32 rimsc; /* 0x40 */
45 u32 fimsc;
46 u32 is;
47 u32 ic;
48};
49
50struct gsta_gpio {
51 spinlock_t lock;
52 struct device *dev;
53 void __iomem *reg_base;
54 struct gsta_regs __iomem *regs[GSTA_NR_BLOCKS];
55 struct gpio_chip gpio;
56 int irq_base;
57 /* FIXME: save the whole config here (AF, ...) */
58 unsigned irq_type[GSTA_NR_GPIO];
59};
60
61static inline struct gsta_regs __iomem *__regs(struct gsta_gpio *chip, int nr)
62{
63 return chip->regs[nr / GSTA_GPIO_PER_BLOCK];
64}
65
66static inline u32 __bit(int nr)
67{
68 return 1U << (nr % GSTA_GPIO_PER_BLOCK);
69}
70
71/*
72 * gpio methods
73 */
74
75static void gsta_gpio_set(struct gpio_chip *gpio, unsigned nr, int val)
76{
77 struct gsta_gpio *chip = container_of(gpio, struct gsta_gpio, gpio);
78 struct gsta_regs __iomem *regs = __regs(chip, nr);
79 u32 bit = __bit(nr);
80
81 if (val)
82 writel(bit, &regs->dats);
83 else
84 writel(bit, &regs->datc);
85}
86
87static int gsta_gpio_get(struct gpio_chip *gpio, unsigned nr)
88{
89 struct gsta_gpio *chip = container_of(gpio, struct gsta_gpio, gpio);
90 struct gsta_regs __iomem *regs = __regs(chip, nr);
91 u32 bit = __bit(nr);
92
93 return readl(&regs->dat) & bit;
94}
95
96static int gsta_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
97 int val)
98{
99 struct gsta_gpio *chip = container_of(gpio, struct gsta_gpio, gpio);
100 struct gsta_regs __iomem *regs = __regs(chip, nr);
101 u32 bit = __bit(nr);
102
103 writel(bit, &regs->dirs);
104 /* Data register after direction, otherwise pullup/down is selected */
105 if (val)
106 writel(bit, &regs->dats);
107 else
108 writel(bit, &regs->datc);
109 return 0;
110}
111
112static int gsta_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
113{
114 struct gsta_gpio *chip = container_of(gpio, struct gsta_gpio, gpio);
115 struct gsta_regs __iomem *regs = __regs(chip, nr);
116 u32 bit = __bit(nr);
117
118 writel(bit, &regs->dirc);
119 return 0;
120}
121
122static int gsta_gpio_to_irq(struct gpio_chip *gpio, unsigned offset)
123{
124 struct gsta_gpio *chip = container_of(gpio, struct gsta_gpio, gpio);
125 return chip->irq_base + offset;
126}
127
128static void gsta_gpio_setup(struct gsta_gpio *chip) /* called from probe */
129{
130 struct gpio_chip *gpio = &chip->gpio;
131
132 /*
133 * ARCH_NR_GPIOS is currently 256 and dynamic allocation starts
134 * from the end. However, for compatibility, we need the first
135 * ConneXt device to start from gpio 0: it's the main chipset
136 * on most boards so documents and drivers assume gpio0..gpio127
137 */
138 static int gpio_base;
139
140 gpio->label = dev_name(chip->dev);
141 gpio->owner = THIS_MODULE;
142 gpio->direction_input = gsta_gpio_direction_input;
143 gpio->get = gsta_gpio_get;
144 gpio->direction_output = gsta_gpio_direction_output;
145 gpio->set = gsta_gpio_set;
146 gpio->dbg_show = NULL;
147 gpio->base = gpio_base;
148 gpio->ngpio = GSTA_NR_GPIO;
149 gpio->can_sleep = 0;
150 gpio->to_irq = gsta_gpio_to_irq;
151
152 /*
153 * After the first device, turn to dynamic gpio numbers.
154 * For example, with ARCH_NR_GPIOS = 256 we can fit two cards
155 */
156 if (!gpio_base)
157 gpio_base = -1;
158}
159
160/*
161 * Special method: alternate functions and pullup/pulldown. This is only
162 * invoked on startup to configure gpio's according to platform data.
163 * FIXME : this functionality shall be managed (and exported to other drivers)
164 * via the pin control subsystem.
165 */
166static void gsta_set_config(struct gsta_gpio *chip, int nr, unsigned cfg)
167{
168 struct gsta_regs __iomem *regs = __regs(chip, nr);
169 unsigned long flags;
170 u32 bit = __bit(nr);
171 u32 val;
172 int err = 0;
173
174 pr_info("%s: %p %i %i\n", __func__, chip, nr, cfg);
175
176 if (cfg == PINMUX_TYPE_NONE)
177 return;
178
179 /* Alternate function or not? */
180 spin_lock_irqsave(&chip->lock, flags);
181 val = readl(&regs->afsela);
182 if (cfg == PINMUX_TYPE_FUNCTION)
183 val |= bit;
184 else
185 val &= ~bit;
186 writel(val | bit, &regs->afsela);
187 if (cfg == PINMUX_TYPE_FUNCTION) {
188 spin_unlock_irqrestore(&chip->lock, flags);
189 return;
190 }
191
192 /* not alternate function: set details */
193 switch (cfg) {
194 case PINMUX_TYPE_OUTPUT_LOW:
195 writel(bit, &regs->dirs);
196 writel(bit, &regs->datc);
197 break;
198 case PINMUX_TYPE_OUTPUT_HIGH:
199 writel(bit, &regs->dirs);
200 writel(bit, &regs->dats);
201 break;
202 case PINMUX_TYPE_INPUT:
203 writel(bit, &regs->dirc);
204 val = readl(&regs->pdis) | bit;
205 writel(val, &regs->pdis);
206 break;
207 case PINMUX_TYPE_INPUT_PULLUP:
208 writel(bit, &regs->dirc);
209 val = readl(&regs->pdis) & ~bit;
210 writel(val, &regs->pdis);
211 writel(bit, &regs->dats);
212 break;
213 case PINMUX_TYPE_INPUT_PULLDOWN:
214 writel(bit, &regs->dirc);
215 val = readl(&regs->pdis) & ~bit;
216 writel(val, &regs->pdis);
217 writel(bit, &regs->datc);
218 break;
219 default:
220 err = 1;
221 }
222 spin_unlock_irqrestore(&chip->lock, flags);
223 if (err)
224 pr_err("%s: chip %p, pin %i, cfg %i is invalid\n",
225 __func__, chip, nr, cfg);
226}
227
228/*
229 * Irq methods
230 */
231
232static void gsta_irq_disable(struct irq_data *data)
233{
234 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
235 struct gsta_gpio *chip = gc->private;
236 int nr = data->irq - chip->irq_base;
237 struct gsta_regs __iomem *regs = __regs(chip, nr);
238 u32 bit = __bit(nr);
239 u32 val;
240 unsigned long flags;
241
242 spin_lock_irqsave(&chip->lock, flags);
243 if (chip->irq_type[nr] & IRQ_TYPE_EDGE_RISING) {
244 val = readl(&regs->rimsc) & ~bit;
245 writel(val, &regs->rimsc);
246 }
247 if (chip->irq_type[nr] & IRQ_TYPE_EDGE_FALLING) {
248 val = readl(&regs->fimsc) & ~bit;
249 writel(val, &regs->fimsc);
250 }
251 spin_unlock_irqrestore(&chip->lock, flags);
252 return;
253}
254
255static void gsta_irq_enable(struct irq_data *data)
256{
257 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
258 struct gsta_gpio *chip = gc->private;
259 int nr = data->irq - chip->irq_base;
260 struct gsta_regs __iomem *regs = __regs(chip, nr);
261 u32 bit = __bit(nr);
262 u32 val;
263 int type;
264 unsigned long flags;
265
266 type = chip->irq_type[nr];
267
268 spin_lock_irqsave(&chip->lock, flags);
269 val = readl(&regs->rimsc);
270 if (type & IRQ_TYPE_EDGE_RISING)
271 writel(val | bit, &regs->rimsc);
272 else
273 writel(val & ~bit, &regs->rimsc);
274 val = readl(&regs->rimsc);
275 if (type & IRQ_TYPE_EDGE_FALLING)
276 writel(val | bit, &regs->fimsc);
277 else
278 writel(val & ~bit, &regs->fimsc);
279 spin_unlock_irqrestore(&chip->lock, flags);
280 return;
281}
282
283static int gsta_irq_type(struct irq_data *d, unsigned int type)
284{
285 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
286 struct gsta_gpio *chip = gc->private;
287 int nr = d->irq - chip->irq_base;
288
289 /* We only support edge interrupts */
290 if (!(type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))) {
291 pr_debug("%s: unsupported type 0x%x\n", __func__, type);
292 return -EINVAL;
293 }
294
295 chip->irq_type[nr] = type; /* used for enable/disable */
296
297 gsta_irq_enable(d);
298 return 0;
299}
300
301static irqreturn_t gsta_gpio_handler(int irq, void *dev_id)
302{
303 struct gsta_gpio *chip = dev_id;
304 struct gsta_regs __iomem *regs;
305 u32 is;
306 int i, nr, base;
307 irqreturn_t ret = IRQ_NONE;
308
309 for (i = 0; i < GSTA_NR_BLOCKS; i++) {
310 regs = chip->regs[i];
311 base = chip->irq_base + i * GSTA_GPIO_PER_BLOCK;
312 while ((is = readl(&regs->is))) {
313 nr = __ffs(is);
314 irq = base + nr;
315 generic_handle_irq(irq);
316 writel(1 << nr, &regs->ic);
317 ret = IRQ_HANDLED;
318 }
319 }
320 return ret;
321}
322
323static __devinit void gsta_alloc_irq_chip(struct gsta_gpio *chip)
324{
325 struct irq_chip_generic *gc;
326 struct irq_chip_type *ct;
327
328 gc = irq_alloc_generic_chip(KBUILD_MODNAME, 1, chip->irq_base,
329 chip->reg_base, handle_simple_irq);
330 gc->private = chip;
331 ct = gc->chip_types;
332
333 ct->chip.irq_set_type = gsta_irq_type;
334 ct->chip.irq_disable = gsta_irq_disable;
335 ct->chip.irq_enable = gsta_irq_enable;
336
337 /* FIXME: this makes at most 32 interrupts. Request 0 by now */
338 irq_setup_generic_chip(gc, 0 /* IRQ_MSK(GSTA_GPIO_PER_BLOCK) */, 0,
339 IRQ_NOREQUEST | IRQ_NOPROBE, 0);
340
341 /* Set up all all 128 interrupts: code from setup_generic_chip */
342 {
343 struct irq_chip_type *ct = gc->chip_types;
344 int i, j;
345 for (j = 0; j < GSTA_NR_GPIO; j++) {
346 i = chip->irq_base + j;
347 irq_set_chip_and_handler(i, &ct->chip, ct->handler);
348 irq_set_chip_data(i, gc);
349 irq_modify_status(i, IRQ_NOREQUEST | IRQ_NOPROBE, 0);
350 }
351 gc->irq_cnt = i - gc->irq_base;
352 }
353}
354
355/* The platform device used here is instantiated by the MFD device */
356static int __devinit gsta_probe(struct platform_device *dev)
357{
358 int i, err;
359 struct pci_dev *pdev;
360 struct sta2x11_gpio_pdata *gpio_pdata;
361 struct gsta_gpio *chip;
362 struct resource *res;
363
364 pdev = *(struct pci_dev **)(dev->dev.platform_data);
365 gpio_pdata = dev_get_platdata(&pdev->dev);
366
367 if (gpio_pdata == NULL)
368 dev_err(&dev->dev, "no gpio config\n");
369 pr_debug("gpio config: %p\n", gpio_pdata);
370
371 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
372
373 chip = devm_kzalloc(&dev->dev, sizeof(*chip), GFP_KERNEL);
374 chip->dev = &dev->dev;
375 chip->reg_base = devm_request_and_ioremap(&dev->dev, res);
376
377 for (i = 0; i < GSTA_NR_BLOCKS; i++) {
378 chip->regs[i] = chip->reg_base + i * 4096;
379 /* disable all irqs */
380 writel(0, &chip->regs[i]->rimsc);
381 writel(0, &chip->regs[i]->fimsc);
382 writel(~0, &chip->regs[i]->ic);
383 }
384 spin_lock_init(&chip->lock);
385 gsta_gpio_setup(chip);
386 for (i = 0; i < GSTA_NR_GPIO; i++)
387 gsta_set_config(chip, i, gpio_pdata->pinconfig[i]);
388
389 /* 384 was used in previous code: be compatible for other drivers */
390 err = irq_alloc_descs(-1, 384, GSTA_NR_GPIO, NUMA_NO_NODE);
391 if (err < 0) {
392 dev_warn(&dev->dev, "sta2x11 gpio: Can't get irq base (%i)\n",
393 -err);
394 return err;
395 }
396 chip->irq_base = err;
397 gsta_alloc_irq_chip(chip);
398
399 err = request_irq(pdev->irq, gsta_gpio_handler,
400 IRQF_SHARED, KBUILD_MODNAME, chip);
401 if (err < 0) {
402 dev_err(&dev->dev, "sta2x11 gpio: Can't request irq (%i)\n",
403 -err);
404 goto err_free_descs;
405 }
406
407 err = gpiochip_add(&chip->gpio);
408 if (err < 0) {
409 dev_err(&dev->dev, "sta2x11 gpio: Can't register (%i)\n",
410 -err);
411 goto err_free_irq;
412 }
413
414 platform_set_drvdata(dev, chip);
415 return 0;
416
417err_free_irq:
418 free_irq(pdev->irq, chip);
419err_free_descs:
420 irq_free_descs(chip->irq_base, GSTA_NR_GPIO);
421 return err;
422}
423
424static struct platform_driver sta2x11_gpio_platform_driver = {
425 .driver = {
426 .name = "sta2x11-gpio",
427 .owner = THIS_MODULE,
428 },
429 .probe = gsta_probe,
430};
431
432module_platform_driver(sta2x11_gpio_platform_driver);
433
434MODULE_LICENSE("GPL v2");
435MODULE_DESCRIPTION("sta2x11_gpio GPIO driver");
diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c
index 7eef648a3351..c1ad2884f2ed 100644
--- a/drivers/gpio/gpio-tps65910.c
+++ b/drivers/gpio/gpio-tps65910.c
@@ -18,14 +18,27 @@
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/gpio.h> 19#include <linux/gpio.h>
20#include <linux/i2c.h> 20#include <linux/i2c.h>
21#include <linux/platform_device.h>
21#include <linux/mfd/tps65910.h> 22#include <linux/mfd/tps65910.h>
23#include <linux/of_device.h>
24
25struct tps65910_gpio {
26 struct gpio_chip gpio_chip;
27 struct tps65910 *tps65910;
28};
29
30static inline struct tps65910_gpio *to_tps65910_gpio(struct gpio_chip *chip)
31{
32 return container_of(chip, struct tps65910_gpio, gpio_chip);
33}
22 34
23static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset) 35static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset)
24{ 36{
25 struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio); 37 struct tps65910_gpio *tps65910_gpio = to_tps65910_gpio(gc);
26 uint8_t val; 38 struct tps65910 *tps65910 = tps65910_gpio->tps65910;
39 unsigned int val;
27 40
28 tps65910->read(tps65910, TPS65910_GPIO0 + offset, 1, &val); 41 tps65910_reg_read(tps65910, TPS65910_GPIO0 + offset, &val);
29 42
30 if (val & GPIO_STS_MASK) 43 if (val & GPIO_STS_MASK)
31 return 1; 44 return 1;
@@ -36,83 +49,170 @@ static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset)
36static void tps65910_gpio_set(struct gpio_chip *gc, unsigned offset, 49static void tps65910_gpio_set(struct gpio_chip *gc, unsigned offset,
37 int value) 50 int value)
38{ 51{
39 struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio); 52 struct tps65910_gpio *tps65910_gpio = to_tps65910_gpio(gc);
53 struct tps65910 *tps65910 = tps65910_gpio->tps65910;
40 54
41 if (value) 55 if (value)
42 tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset, 56 tps65910_reg_set_bits(tps65910, TPS65910_GPIO0 + offset,
43 GPIO_SET_MASK); 57 GPIO_SET_MASK);
44 else 58 else
45 tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset, 59 tps65910_reg_clear_bits(tps65910, TPS65910_GPIO0 + offset,
46 GPIO_SET_MASK); 60 GPIO_SET_MASK);
47} 61}
48 62
49static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset, 63static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset,
50 int value) 64 int value)
51{ 65{
52 struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio); 66 struct tps65910_gpio *tps65910_gpio = to_tps65910_gpio(gc);
67 struct tps65910 *tps65910 = tps65910_gpio->tps65910;
53 68
54 /* Set the initial value */ 69 /* Set the initial value */
55 tps65910_gpio_set(gc, offset, value); 70 tps65910_gpio_set(gc, offset, value);
56 71
57 return tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset, 72 return tps65910_reg_set_bits(tps65910, TPS65910_GPIO0 + offset,
58 GPIO_CFG_MASK); 73 GPIO_CFG_MASK);
59} 74}
60 75
61static int tps65910_gpio_input(struct gpio_chip *gc, unsigned offset) 76static int tps65910_gpio_input(struct gpio_chip *gc, unsigned offset)
62{ 77{
63 struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio); 78 struct tps65910_gpio *tps65910_gpio = to_tps65910_gpio(gc);
79 struct tps65910 *tps65910 = tps65910_gpio->tps65910;
64 80
65 return tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset, 81 return tps65910_reg_clear_bits(tps65910, TPS65910_GPIO0 + offset,
66 GPIO_CFG_MASK); 82 GPIO_CFG_MASK);
67} 83}
68 84
69void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base) 85#ifdef CONFIG_OF
86static struct tps65910_board *tps65910_parse_dt_for_gpio(struct device *dev,
87 struct tps65910 *tps65910, int chip_ngpio)
70{ 88{
89 struct tps65910_board *tps65910_board = tps65910->of_plat_data;
90 unsigned int prop_array[TPS6591X_MAX_NUM_GPIO];
91 int ngpio = min(chip_ngpio, TPS6591X_MAX_NUM_GPIO);
71 int ret; 92 int ret;
72 struct tps65910_board *board_data; 93 int idx;
94
95 tps65910_board->gpio_base = -1;
96 ret = of_property_read_u32_array(tps65910->dev->of_node,
97 "ti,en-gpio-sleep", prop_array, ngpio);
98 if (ret < 0) {
99 dev_dbg(dev, "ti,en-gpio-sleep not specified\n");
100 return tps65910_board;
101 }
73 102
74 if (!gpio_base) 103 for (idx = 0; idx < ngpio; idx++)
75 return; 104 tps65910_board->en_gpio_sleep[idx] = (prop_array[idx] != 0);
76 105
77 tps65910->gpio.owner = THIS_MODULE; 106 return tps65910_board;
78 tps65910->gpio.label = tps65910->i2c_client->name; 107}
79 tps65910->gpio.dev = tps65910->dev; 108#else
80 tps65910->gpio.base = gpio_base; 109static struct tps65910_board *tps65910_parse_dt_for_gpio(struct device *dev,
110 struct tps65910 *tps65910, int chip_ngpio)
111{
112 return NULL;
113}
114#endif
115
116static int __devinit tps65910_gpio_probe(struct platform_device *pdev)
117{
118 struct tps65910 *tps65910 = dev_get_drvdata(pdev->dev.parent);
119 struct tps65910_board *pdata = dev_get_platdata(tps65910->dev);
120 struct tps65910_gpio *tps65910_gpio;
121 int ret;
122 int i;
123
124 tps65910_gpio = devm_kzalloc(&pdev->dev,
125 sizeof(*tps65910_gpio), GFP_KERNEL);
126 if (!tps65910_gpio) {
127 dev_err(&pdev->dev, "Could not allocate tps65910_gpio\n");
128 return -ENOMEM;
129 }
130
131 tps65910_gpio->tps65910 = tps65910;
132
133 tps65910_gpio->gpio_chip.owner = THIS_MODULE;
134 tps65910_gpio->gpio_chip.label = tps65910->i2c_client->name;
81 135
82 switch(tps65910_chip_id(tps65910)) { 136 switch(tps65910_chip_id(tps65910)) {
83 case TPS65910: 137 case TPS65910:
84 tps65910->gpio.ngpio = TPS65910_NUM_GPIO; 138 tps65910_gpio->gpio_chip.ngpio = TPS65910_NUM_GPIO;
85 break; 139 break;
86 case TPS65911: 140 case TPS65911:
87 tps65910->gpio.ngpio = TPS65911_NUM_GPIO; 141 tps65910_gpio->gpio_chip.ngpio = TPS65911_NUM_GPIO;
88 break; 142 break;
89 default: 143 default:
90 return; 144 return -EINVAL;
145 }
146 tps65910_gpio->gpio_chip.can_sleep = 1;
147 tps65910_gpio->gpio_chip.direction_input = tps65910_gpio_input;
148 tps65910_gpio->gpio_chip.direction_output = tps65910_gpio_output;
149 tps65910_gpio->gpio_chip.set = tps65910_gpio_set;
150 tps65910_gpio->gpio_chip.get = tps65910_gpio_get;
151 tps65910_gpio->gpio_chip.dev = &pdev->dev;
152 if (pdata && pdata->gpio_base)
153 tps65910_gpio->gpio_chip.base = pdata->gpio_base;
154 else
155 tps65910_gpio->gpio_chip.base = -1;
156
157 if (!pdata && tps65910->dev->of_node)
158 pdata = tps65910_parse_dt_for_gpio(&pdev->dev, tps65910,
159 tps65910_gpio->gpio_chip.ngpio);
160
161 if (!pdata)
162 goto skip_init;
163
164 /* Configure sleep control for gpios if provided */
165 for (i = 0; i < tps65910_gpio->gpio_chip.ngpio; ++i) {
166 if (!pdata->en_gpio_sleep[i])
167 continue;
168
169 ret = tps65910_reg_set_bits(tps65910,
170 TPS65910_GPIO0 + i, GPIO_SLEEP_MASK);
171 if (ret < 0)
172 dev_warn(tps65910->dev,
173 "GPIO Sleep setting failed with err %d\n", ret);
91 } 174 }
92 tps65910->gpio.can_sleep = 1; 175
93 176skip_init:
94 tps65910->gpio.direction_input = tps65910_gpio_input; 177 ret = gpiochip_add(&tps65910_gpio->gpio_chip);
95 tps65910->gpio.direction_output = tps65910_gpio_output; 178 if (ret < 0) {
96 tps65910->gpio.set = tps65910_gpio_set; 179 dev_err(&pdev->dev, "Could not register gpiochip, %d\n", ret);
97 tps65910->gpio.get = tps65910_gpio_get; 180 return ret;
98
99 /* Configure sleep control for gpios */
100 board_data = dev_get_platdata(tps65910->dev);
101 if (board_data) {
102 int i;
103 for (i = 0; i < tps65910->gpio.ngpio; ++i) {
104 if (board_data->en_gpio_sleep[i]) {
105 ret = tps65910_set_bits(tps65910,
106 TPS65910_GPIO0 + i, GPIO_SLEEP_MASK);
107 if (ret < 0)
108 dev_warn(tps65910->dev,
109 "GPIO Sleep setting failed\n");
110 }
111 }
112 } 181 }
113 182
114 ret = gpiochip_add(&tps65910->gpio); 183 platform_set_drvdata(pdev, tps65910_gpio);
184
185 return ret;
186}
187
188static int __devexit tps65910_gpio_remove(struct platform_device *pdev)
189{
190 struct tps65910_gpio *tps65910_gpio = platform_get_drvdata(pdev);
115 191
116 if (ret) 192 return gpiochip_remove(&tps65910_gpio->gpio_chip);
117 dev_warn(tps65910->dev, "GPIO registration failed: %d\n", ret);
118} 193}
194
195static struct platform_driver tps65910_gpio_driver = {
196 .driver.name = "tps65910-gpio",
197 .driver.owner = THIS_MODULE,
198 .probe = tps65910_gpio_probe,
199 .remove = __devexit_p(tps65910_gpio_remove),
200};
201
202static int __init tps65910_gpio_init(void)
203{
204 return platform_driver_register(&tps65910_gpio_driver);
205}
206subsys_initcall(tps65910_gpio_init);
207
208static void __exit tps65910_gpio_exit(void)
209{
210 platform_driver_unregister(&tps65910_gpio_driver);
211}
212module_exit(tps65910_gpio_exit);
213
214MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>");
215MODULE_AUTHOR("Jorge Eduardo Candelaria jedu@slimlogic.co.uk>");
216MODULE_DESCRIPTION("GPIO interface for TPS65910/TPS6511 PMICs");
217MODULE_LICENSE("GPL v2");
218MODULE_ALIAS("platform:tps65910-gpio");
diff --git a/drivers/gpio/gpio-wm831x.c b/drivers/gpio/gpio-wm831x.c
index deb949e75ec1..e56a2165641c 100644
--- a/drivers/gpio/gpio-wm831x.c
+++ b/drivers/gpio/gpio-wm831x.c
@@ -102,10 +102,8 @@ static int wm831x_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
102 struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip); 102 struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
103 struct wm831x *wm831x = wm831x_gpio->wm831x; 103 struct wm831x *wm831x = wm831x_gpio->wm831x;
104 104
105 if (!wm831x->irq_base) 105 return irq_create_mapping(wm831x->irq_domain,
106 return -EINVAL; 106 WM831X_IRQ_GPIO_1 + offset);
107
108 return wm831x->irq_base + WM831X_IRQ_GPIO_1 + offset;
109} 107}
110 108
111static int wm831x_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, 109static int wm831x_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
index 47f18d6bce46..6790a812a1db 100644
--- a/drivers/input/misc/wm831x-on.c
+++ b/drivers/input/misc/wm831x-on.c
@@ -73,7 +73,7 @@ static int __devinit wm831x_on_probe(struct platform_device *pdev)
73{ 73{
74 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 74 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
75 struct wm831x_on *wm831x_on; 75 struct wm831x_on *wm831x_on;
76 int irq = platform_get_irq(pdev, 0); 76 int irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0));
77 int ret; 77 int ret;
78 78
79 wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL); 79 wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL);
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index 4bc851a9dc3d..e83410721e38 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -260,15 +260,16 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
260 * If we have a direct IRQ use it, otherwise use the interrupt 260 * If we have a direct IRQ use it, otherwise use the interrupt
261 * from the WM831x IRQ controller. 261 * from the WM831x IRQ controller.
262 */ 262 */
263 wm831x_ts->data_irq = wm831x_irq(wm831x,
264 platform_get_irq_byname(pdev,
265 "TCHDATA"));
263 if (pdata && pdata->data_irq) 266 if (pdata && pdata->data_irq)
264 wm831x_ts->data_irq = pdata->data_irq; 267 wm831x_ts->data_irq = pdata->data_irq;
265 else
266 wm831x_ts->data_irq = platform_get_irq_byname(pdev, "TCHDATA");
267 268
269 wm831x_ts->pd_irq = wm831x_irq(wm831x,
270 platform_get_irq_byname(pdev, "TCHPD"));
268 if (pdata && pdata->pd_irq) 271 if (pdata && pdata->pd_irq)
269 wm831x_ts->pd_irq = pdata->pd_irq; 272 wm831x_ts->pd_irq = pdata->pd_irq;
270 else
271 wm831x_ts->pd_irq = platform_get_irq_byname(pdev, "TCHPD");
272 273
273 if (pdata) 274 if (pdata)
274 wm831x_ts->pressure = pdata->pressure; 275 wm831x_ts->pressure = pdata->pressure;
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f4b4dad77391..e129c820df7d 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -106,6 +106,19 @@ config UCB1400_CORE
106 To compile this driver as a module, choose M here: the 106 To compile this driver as a module, choose M here: the
107 module will be called ucb1400_core. 107 module will be called ucb1400_core.
108 108
109config MFD_LM3533
110 tristate "LM3533 Lighting Power chip"
111 depends on I2C
112 select MFD_CORE
113 select REGMAP_I2C
114 help
115 Say yes here to enable support for National Semiconductor / TI
116 LM3533 Lighting Power chips.
117
118 This driver provides common support for accessing the device;
119 additional drivers must be enabled in order to use the LED,
120 backlight or ambient-light-sensor functionality of the device.
121
109config TPS6105X 122config TPS6105X
110 tristate "TPS61050/61052 Boost Converters" 123 tristate "TPS61050/61052 Boost Converters"
111 depends on I2C 124 depends on I2C
@@ -177,8 +190,8 @@ config MFD_TPS65910
177 bool "TPS65910 Power Management chip" 190 bool "TPS65910 Power Management chip"
178 depends on I2C=y && GPIOLIB 191 depends on I2C=y && GPIOLIB
179 select MFD_CORE 192 select MFD_CORE
180 select GPIO_TPS65910
181 select REGMAP_I2C 193 select REGMAP_I2C
194 select IRQ_DOMAIN
182 help 195 help
183 if you say yes here you get support for the TPS65910 series of 196 if you say yes here you get support for the TPS65910 series of
184 Power Management chips. 197 Power Management chips.
@@ -409,6 +422,19 @@ config PMIC_ADP5520
409 individual components like LCD backlight, LEDs, GPIOs and Kepad 422 individual components like LCD backlight, LEDs, GPIOs and Kepad
410 under the corresponding menus. 423 under the corresponding menus.
411 424
425config MFD_MAX77693
426 bool "Maxim Semiconductor MAX77693 PMIC Support"
427 depends on I2C=y && GENERIC_HARDIRQS
428 select MFD_CORE
429 select REGMAP_I2C
430 help
431 Say yes here to support for Maxim Semiconductor MAX77693.
432 This is a companion Power Management IC with Flash, Haptic, Charger,
433 and MUIC(Micro USB Interface Controller) controls on chip.
434 This driver provides common support for accessing the device;
435 additional drivers must be enabled in order to use the functionality
436 of the device.
437
412config MFD_MAX8925 438config MFD_MAX8925
413 bool "Maxim Semiconductor MAX8925 PMIC Support" 439 bool "Maxim Semiconductor MAX8925 PMIC Support"
414 depends on I2C=y && GENERIC_HARDIRQS 440 depends on I2C=y && GENERIC_HARDIRQS
@@ -454,9 +480,9 @@ config MFD_S5M_CORE
454 of the device 480 of the device
455 481
456config MFD_WM8400 482config MFD_WM8400
457 tristate "Support Wolfson Microelectronics WM8400" 483 bool "Support Wolfson Microelectronics WM8400"
458 select MFD_CORE 484 select MFD_CORE
459 depends on I2C 485 depends on I2C=y
460 select REGMAP_I2C 486 select REGMAP_I2C
461 help 487 help
462 Support for the Wolfson Microelecronics WM8400 PMIC and audio 488 Support for the Wolfson Microelecronics WM8400 PMIC and audio
@@ -473,6 +499,7 @@ config MFD_WM831X_I2C
473 select MFD_CORE 499 select MFD_CORE
474 select MFD_WM831X 500 select MFD_WM831X
475 select REGMAP_I2C 501 select REGMAP_I2C
502 select IRQ_DOMAIN
476 depends on I2C=y && GENERIC_HARDIRQS 503 depends on I2C=y && GENERIC_HARDIRQS
477 help 504 help
478 Support for the Wolfson Microelecronics WM831x and WM832x PMICs 505 Support for the Wolfson Microelecronics WM831x and WM832x PMICs
@@ -485,6 +512,7 @@ config MFD_WM831X_SPI
485 select MFD_CORE 512 select MFD_CORE
486 select MFD_WM831X 513 select MFD_WM831X
487 select REGMAP_SPI 514 select REGMAP_SPI
515 select IRQ_DOMAIN
488 depends on SPI_MASTER && GENERIC_HARDIRQS 516 depends on SPI_MASTER && GENERIC_HARDIRQS
489 help 517 help
490 Support for the Wolfson Microelecronics WM831x and WM832x PMICs 518 Support for the Wolfson Microelecronics WM831x and WM832x PMICs
@@ -597,17 +625,32 @@ config MFD_MC13783
597 tristate 625 tristate
598 626
599config MFD_MC13XXX 627config MFD_MC13XXX
600 tristate "Support Freescale MC13783 and MC13892" 628 tristate
601 depends on SPI_MASTER 629 depends on SPI_MASTER || I2C
602 select MFD_CORE 630 select MFD_CORE
603 select MFD_MC13783 631 select MFD_MC13783
604 help 632 help
605 Support for the Freescale (Atlas) PMIC and audio CODECs 633 Enable support for the Freescale MC13783 and MC13892 PMICs.
606 MC13783 and MC13892. 634 This driver provides common support for accessing the device,
607 This driver provides common support for accessing the device,
608 additional drivers must be enabled in order to use the 635 additional drivers must be enabled in order to use the
609 functionality of the device. 636 functionality of the device.
610 637
638config MFD_MC13XXX_SPI
639 tristate "Freescale MC13783 and MC13892 SPI interface"
640 depends on SPI_MASTER
641 select REGMAP_SPI
642 select MFD_MC13XXX
643 help
644 Select this if your MC13xxx is connected via an SPI bus.
645
646config MFD_MC13XXX_I2C
647 tristate "Freescale MC13892 I2C interface"
648 depends on I2C
649 select REGMAP_I2C
650 select MFD_MC13XXX
651 help
652 Select this if your MC13xxx is connected via an I2C bus.
653
611config ABX500_CORE 654config ABX500_CORE
612 bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" 655 bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions"
613 default y if ARCH_U300 || ARCH_U8500 656 default y if ARCH_U300 || ARCH_U8500
@@ -651,7 +694,7 @@ config EZX_PCAP
651 694
652config AB8500_CORE 695config AB8500_CORE
653 bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" 696 bool "ST-Ericsson AB8500 Mixed Signal Power Management chip"
654 depends on GENERIC_HARDIRQS && ABX500_CORE 697 depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU
655 select MFD_CORE 698 select MFD_CORE
656 help 699 help
657 Select this option to enable access to AB8500 power management 700 Select this option to enable access to AB8500 power management
@@ -722,6 +765,16 @@ config LPC_SCH
722 LPC bridge function of the Intel SCH provides support for 765 LPC bridge function of the Intel SCH provides support for
723 System Management Bus and General Purpose I/O. 766 System Management Bus and General Purpose I/O.
724 767
768config LPC_ICH
769 tristate "Intel ICH LPC"
770 depends on PCI
771 select MFD_CORE
772 help
773 The LPC bridge function of the Intel ICH provides support for
774 many functional units. This driver provides needed support for
775 other drivers to control these functions, currently GPIO and
776 watchdog.
777
725config MFD_RDC321X 778config MFD_RDC321X
726 tristate "Support for RDC-R321x southbridge" 779 tristate "Support for RDC-R321x southbridge"
727 select MFD_CORE 780 select MFD_CORE
@@ -854,6 +907,11 @@ config MFD_RC5T583
854 Additional drivers must be enabled in order to use the 907 Additional drivers must be enabled in order to use the
855 different functionality of the device. 908 different functionality of the device.
856 909
910config MFD_STA2X11
911 bool "STA2X11 multi function device support"
912 depends on STA2X11
913 select MFD_CORE
914
857config MFD_ANATOP 915config MFD_ANATOP
858 bool "Support for Freescale i.MX on-chip ANATOP controller" 916 bool "Support for Freescale i.MX on-chip ANATOP controller"
859 depends on SOC_IMX6Q 917 depends on SOC_IMX6Q
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 43672b87805a..75f6ed68a4b9 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_MFD_DAVINCI_VOICECODEC) += davinci_voicecodec.o
15obj-$(CONFIG_MFD_DM355EVM_MSP) += dm355evm_msp.o 15obj-$(CONFIG_MFD_DM355EVM_MSP) += dm355evm_msp.o
16obj-$(CONFIG_MFD_TI_SSP) += ti-ssp.o 16obj-$(CONFIG_MFD_TI_SSP) += ti-ssp.o
17 17
18obj-$(CONFIG_MFD_STA2X11) += sta2x11-mfd.o
18obj-$(CONFIG_MFD_STMPE) += stmpe.o 19obj-$(CONFIG_MFD_STMPE) += stmpe.o
19obj-$(CONFIG_STMPE_I2C) += stmpe-i2c.o 20obj-$(CONFIG_STMPE_I2C) += stmpe-i2c.o
20obj-$(CONFIG_STMPE_SPI) += stmpe-spi.o 21obj-$(CONFIG_STMPE_SPI) += stmpe-spi.o
@@ -54,6 +55,8 @@ obj-$(CONFIG_TWL6030_PWM) += twl6030-pwm.o
54obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o twl6040-irq.o 55obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o twl6040-irq.o
55 56
56obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o 57obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o
58obj-$(CONFIG_MFD_MC13XXX_SPI) += mc13xxx-spi.o
59obj-$(CONFIG_MFD_MC13XXX_I2C) += mc13xxx-i2c.o
57 60
58obj-$(CONFIG_MFD_CORE) += mfd-core.o 61obj-$(CONFIG_MFD_CORE) += mfd-core.o
59 62
@@ -75,6 +78,7 @@ obj-$(CONFIG_PMIC_DA9052) += da9052-core.o
75obj-$(CONFIG_MFD_DA9052_SPI) += da9052-spi.o 78obj-$(CONFIG_MFD_DA9052_SPI) += da9052-spi.o
76obj-$(CONFIG_MFD_DA9052_I2C) += da9052-i2c.o 79obj-$(CONFIG_MFD_DA9052_I2C) += da9052-i2c.o
77 80
81obj-$(CONFIG_MFD_MAX77693) += max77693.o max77693-irq.o
78max8925-objs := max8925-core.o max8925-i2c.o 82max8925-objs := max8925-core.o max8925-i2c.o
79obj-$(CONFIG_MFD_MAX8925) += max8925.o 83obj-$(CONFIG_MFD_MAX8925) += max8925.o
80obj-$(CONFIG_MFD_MAX8997) += max8997.o max8997-irq.o 84obj-$(CONFIG_MFD_MAX8997) += max8997.o max8997-irq.o
@@ -87,15 +91,15 @@ obj-$(CONFIG_PCF50633_GPIO) += pcf50633-gpio.o
87obj-$(CONFIG_ABX500_CORE) += abx500-core.o 91obj-$(CONFIG_ABX500_CORE) += abx500-core.o
88obj-$(CONFIG_AB3100_CORE) += ab3100-core.o 92obj-$(CONFIG_AB3100_CORE) += ab3100-core.o
89obj-$(CONFIG_AB3100_OTP) += ab3100-otp.o 93obj-$(CONFIG_AB3100_OTP) += ab3100-otp.o
90obj-$(CONFIG_AB8500_CORE) += ab8500-core.o ab8500-sysctrl.o
91obj-$(CONFIG_AB8500_DEBUG) += ab8500-debugfs.o 94obj-$(CONFIG_AB8500_DEBUG) += ab8500-debugfs.o
92obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o 95obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o
93obj-$(CONFIG_MFD_DB8500_PRCMU) += db8500-prcmu.o 96obj-$(CONFIG_MFD_DB8500_PRCMU) += db8500-prcmu.o
94# ab8500-i2c need to come after db8500-prcmu (which provides the channel) 97# ab8500-core need to come after db8500-prcmu (which provides the channel)
95obj-$(CONFIG_AB8500_I2C_CORE) += ab8500-i2c.o 98obj-$(CONFIG_AB8500_CORE) += ab8500-core.o ab8500-sysctrl.o
96obj-$(CONFIG_MFD_TIMBERDALE) += timberdale.o 99obj-$(CONFIG_MFD_TIMBERDALE) += timberdale.o
97obj-$(CONFIG_PMIC_ADP5520) += adp5520.o 100obj-$(CONFIG_PMIC_ADP5520) += adp5520.o
98obj-$(CONFIG_LPC_SCH) += lpc_sch.o 101obj-$(CONFIG_LPC_SCH) += lpc_sch.o
102obj-$(CONFIG_LPC_ICH) += lpc_ich.o
99obj-$(CONFIG_MFD_RDC321X) += rdc321x-southbridge.o 103obj-$(CONFIG_MFD_RDC321X) += rdc321x-southbridge.o
100obj-$(CONFIG_MFD_JANZ_CMODIO) += janz-cmodio.o 104obj-$(CONFIG_MFD_JANZ_CMODIO) += janz-cmodio.o
101obj-$(CONFIG_MFD_JZ4740_ADC) += jz4740-adc.o 105obj-$(CONFIG_MFD_JZ4740_ADC) += jz4740-adc.o
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 1f08704f7ae8..dac0e2998603 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -18,7 +18,10 @@
18#include <linux/mfd/core.h> 18#include <linux/mfd/core.h>
19#include <linux/mfd/abx500.h> 19#include <linux/mfd/abx500.h>
20#include <linux/mfd/abx500/ab8500.h> 20#include <linux/mfd/abx500/ab8500.h>
21#include <linux/mfd/dbx500-prcmu.h>
21#include <linux/regulator/ab8500.h> 22#include <linux/regulator/ab8500.h>
23#include <linux/of.h>
24#include <linux/of_device.h>
22 25
23/* 26/*
24 * Interrupt register offsets 27 * Interrupt register offsets
@@ -91,12 +94,24 @@
91#define AB8500_IT_MASK23_REG 0x56 94#define AB8500_IT_MASK23_REG 0x56
92#define AB8500_IT_MASK24_REG 0x57 95#define AB8500_IT_MASK24_REG 0x57
93 96
97/*
98 * latch hierarchy registers
99 */
100#define AB8500_IT_LATCHHIER1_REG 0x60
101#define AB8500_IT_LATCHHIER2_REG 0x61
102#define AB8500_IT_LATCHHIER3_REG 0x62
103
104#define AB8500_IT_LATCHHIER_NUM 3
105
94#define AB8500_REV_REG 0x80 106#define AB8500_REV_REG 0x80
95#define AB8500_IC_NAME_REG 0x82 107#define AB8500_IC_NAME_REG 0x82
96#define AB8500_SWITCH_OFF_STATUS 0x00 108#define AB8500_SWITCH_OFF_STATUS 0x00
97 109
98#define AB8500_TURN_ON_STATUS 0x00 110#define AB8500_TURN_ON_STATUS 0x00
99 111
112static bool no_bm; /* No battery management */
113module_param(no_bm, bool, S_IRUGO);
114
100#define AB9540_MODEM_CTRL2_REG 0x23 115#define AB9540_MODEM_CTRL2_REG 0x23
101#define AB9540_MODEM_CTRL2_SWDBBRSTN_BIT BIT(2) 116#define AB9540_MODEM_CTRL2_SWDBBRSTN_BIT BIT(2)
102 117
@@ -125,6 +140,41 @@ static const char ab8500_version_str[][7] = {
125 [AB8500_VERSION_AB8540] = "AB8540", 140 [AB8500_VERSION_AB8540] = "AB8540",
126}; 141};
127 142
143static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
144{
145 int ret;
146
147 ret = prcmu_abb_write((u8)(addr >> 8), (u8)(addr & 0xFF), &data, 1);
148 if (ret < 0)
149 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
150 return ret;
151}
152
153static int ab8500_i2c_write_masked(struct ab8500 *ab8500, u16 addr, u8 mask,
154 u8 data)
155{
156 int ret;
157
158 ret = prcmu_abb_write_masked((u8)(addr >> 8), (u8)(addr & 0xFF), &data,
159 &mask, 1);
160 if (ret < 0)
161 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
162 return ret;
163}
164
165static int ab8500_i2c_read(struct ab8500 *ab8500, u16 addr)
166{
167 int ret;
168 u8 data;
169
170 ret = prcmu_abb_read((u8)(addr >> 8), (u8)(addr & 0xFF), &data, 1);
171 if (ret < 0) {
172 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
173 return ret;
174 }
175 return (int)data;
176}
177
128static int ab8500_get_chip_id(struct device *dev) 178static int ab8500_get_chip_id(struct device *dev)
129{ 179{
130 struct ab8500 *ab8500; 180 struct ab8500 *ab8500;
@@ -161,9 +211,13 @@ static int set_register_interruptible(struct ab8500 *ab8500, u8 bank,
161static int ab8500_set_register(struct device *dev, u8 bank, 211static int ab8500_set_register(struct device *dev, u8 bank,
162 u8 reg, u8 value) 212 u8 reg, u8 value)
163{ 213{
214 int ret;
164 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); 215 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
165 216
166 return set_register_interruptible(ab8500, bank, reg, value); 217 atomic_inc(&ab8500->transfer_ongoing);
218 ret = set_register_interruptible(ab8500, bank, reg, value);
219 atomic_dec(&ab8500->transfer_ongoing);
220 return ret;
167} 221}
168 222
169static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, 223static int get_register_interruptible(struct ab8500 *ab8500, u8 bank,
@@ -192,9 +246,13 @@ static int get_register_interruptible(struct ab8500 *ab8500, u8 bank,
192static int ab8500_get_register(struct device *dev, u8 bank, 246static int ab8500_get_register(struct device *dev, u8 bank,
193 u8 reg, u8 *value) 247 u8 reg, u8 *value)
194{ 248{
249 int ret;
195 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); 250 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
196 251
197 return get_register_interruptible(ab8500, bank, reg, value); 252 atomic_inc(&ab8500->transfer_ongoing);
253 ret = get_register_interruptible(ab8500, bank, reg, value);
254 atomic_dec(&ab8500->transfer_ongoing);
255 return ret;
198} 256}
199 257
200static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank, 258static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank,
@@ -241,11 +299,14 @@ out:
241static int ab8500_mask_and_set_register(struct device *dev, 299static int ab8500_mask_and_set_register(struct device *dev,
242 u8 bank, u8 reg, u8 bitmask, u8 bitvalues) 300 u8 bank, u8 reg, u8 bitmask, u8 bitvalues)
243{ 301{
302 int ret;
244 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); 303 struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
245 304
246 return mask_and_set_register_interruptible(ab8500, bank, reg, 305 atomic_inc(&ab8500->transfer_ongoing);
247 bitmask, bitvalues); 306 ret= mask_and_set_register_interruptible(ab8500, bank, reg,
248 307 bitmask, bitvalues);
308 atomic_dec(&ab8500->transfer_ongoing);
309 return ret;
249} 310}
250 311
251static struct abx500_ops ab8500_ops = { 312static struct abx500_ops ab8500_ops = {
@@ -264,6 +325,7 @@ static void ab8500_irq_lock(struct irq_data *data)
264 struct ab8500 *ab8500 = irq_data_get_irq_chip_data(data); 325 struct ab8500 *ab8500 = irq_data_get_irq_chip_data(data);
265 326
266 mutex_lock(&ab8500->irq_lock); 327 mutex_lock(&ab8500->irq_lock);
328 atomic_inc(&ab8500->transfer_ongoing);
267} 329}
268 330
269static void ab8500_irq_sync_unlock(struct irq_data *data) 331static void ab8500_irq_sync_unlock(struct irq_data *data)
@@ -292,7 +354,7 @@ static void ab8500_irq_sync_unlock(struct irq_data *data)
292 reg = AB8500_IT_MASK1_REG + ab8500->irq_reg_offset[i]; 354 reg = AB8500_IT_MASK1_REG + ab8500->irq_reg_offset[i];
293 set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new); 355 set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new);
294 } 356 }
295 357 atomic_dec(&ab8500->transfer_ongoing);
296 mutex_unlock(&ab8500->irq_lock); 358 mutex_unlock(&ab8500->irq_lock);
297} 359}
298 360
@@ -325,6 +387,90 @@ static struct irq_chip ab8500_irq_chip = {
325 .irq_unmask = ab8500_irq_unmask, 387 .irq_unmask = ab8500_irq_unmask,
326}; 388};
327 389
390static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500,
391 int latch_offset, u8 latch_val)
392{
393 int int_bit = __ffs(latch_val);
394 int line, i;
395
396 do {
397 int_bit = __ffs(latch_val);
398
399 for (i = 0; i < ab8500->mask_size; i++)
400 if (ab8500->irq_reg_offset[i] == latch_offset)
401 break;
402
403 if (i >= ab8500->mask_size) {
404 dev_err(ab8500->dev, "Register offset 0x%2x not declared\n",
405 latch_offset);
406 return -ENXIO;
407 }
408
409 line = (i << 3) + int_bit;
410 latch_val &= ~(1 << int_bit);
411
412 handle_nested_irq(ab8500->irq_base + line);
413 } while (latch_val);
414
415 return 0;
416}
417
418static int ab8500_handle_hierarchical_latch(struct ab8500 *ab8500,
419 int hier_offset, u8 hier_val)
420{
421 int latch_bit, status;
422 u8 latch_offset, latch_val;
423
424 do {
425 latch_bit = __ffs(hier_val);
426 latch_offset = (hier_offset << 3) + latch_bit;
427
428 /* Fix inconsistent ITFromLatch25 bit mapping... */
429 if (unlikely(latch_offset == 17))
430 latch_offset = 24;
431
432 status = get_register_interruptible(ab8500,
433 AB8500_INTERRUPT,
434 AB8500_IT_LATCH1_REG + latch_offset,
435 &latch_val);
436 if (status < 0 || latch_val == 0)
437 goto discard;
438
439 status = ab8500_handle_hierarchical_line(ab8500,
440 latch_offset, latch_val);
441 if (status < 0)
442 return status;
443discard:
444 hier_val &= ~(1 << latch_bit);
445 } while (hier_val);
446
447 return 0;
448}
449
450static irqreturn_t ab8500_hierarchical_irq(int irq, void *dev)
451{
452 struct ab8500 *ab8500 = dev;
453 u8 i;
454
455 dev_vdbg(ab8500->dev, "interrupt\n");
456
457 /* Hierarchical interrupt version */
458 for (i = 0; i < AB8500_IT_LATCHHIER_NUM; i++) {
459 int status;
460 u8 hier_val;
461
462 status = get_register_interruptible(ab8500, AB8500_INTERRUPT,
463 AB8500_IT_LATCHHIER1_REG + i, &hier_val);
464 if (status < 0 || hier_val == 0)
465 continue;
466
467 status = ab8500_handle_hierarchical_latch(ab8500, i, hier_val);
468 if (status < 0)
469 break;
470 }
471 return IRQ_HANDLED;
472}
473
328static irqreturn_t ab8500_irq(int irq, void *dev) 474static irqreturn_t ab8500_irq(int irq, void *dev)
329{ 475{
330 struct ab8500 *ab8500 = dev; 476 struct ab8500 *ab8500 = dev;
@@ -332,6 +478,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
332 478
333 dev_vdbg(ab8500->dev, "interrupt\n"); 479 dev_vdbg(ab8500->dev, "interrupt\n");
334 480
481 atomic_inc(&ab8500->transfer_ongoing);
482
335 for (i = 0; i < ab8500->mask_size; i++) { 483 for (i = 0; i < ab8500->mask_size; i++) {
336 int regoffset = ab8500->irq_reg_offset[i]; 484 int regoffset = ab8500->irq_reg_offset[i];
337 int status; 485 int status;
@@ -355,9 +503,10 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
355 503
356 handle_nested_irq(ab8500->irq_base + line); 504 handle_nested_irq(ab8500->irq_base + line);
357 value &= ~(1 << bit); 505 value &= ~(1 << bit);
506
358 } while (value); 507 } while (value);
359 } 508 }
360 509 atomic_dec(&ab8500->transfer_ongoing);
361 return IRQ_HANDLED; 510 return IRQ_HANDLED;
362} 511}
363 512
@@ -411,6 +560,14 @@ static void ab8500_irq_remove(struct ab8500 *ab8500)
411 } 560 }
412} 561}
413 562
563int ab8500_suspend(struct ab8500 *ab8500)
564{
565 if (atomic_read(&ab8500->transfer_ongoing))
566 return -EINVAL;
567 else
568 return 0;
569}
570
414/* AB8500 GPIO Resources */ 571/* AB8500 GPIO Resources */
415static struct resource __devinitdata ab8500_gpio_resources[] = { 572static struct resource __devinitdata ab8500_gpio_resources[] = {
416 { 573 {
@@ -744,6 +901,39 @@ static struct resource __devinitdata ab8500_usb_resources[] = {
744 }, 901 },
745}; 902};
746 903
904static struct resource __devinitdata ab8505_iddet_resources[] = {
905 {
906 .name = "KeyDeglitch",
907 .start = AB8505_INT_KEYDEGLITCH,
908 .end = AB8505_INT_KEYDEGLITCH,
909 .flags = IORESOURCE_IRQ,
910 },
911 {
912 .name = "KP",
913 .start = AB8505_INT_KP,
914 .end = AB8505_INT_KP,
915 .flags = IORESOURCE_IRQ,
916 },
917 {
918 .name = "IKP",
919 .start = AB8505_INT_IKP,
920 .end = AB8505_INT_IKP,
921 .flags = IORESOURCE_IRQ,
922 },
923 {
924 .name = "IKR",
925 .start = AB8505_INT_IKR,
926 .end = AB8505_INT_IKR,
927 .flags = IORESOURCE_IRQ,
928 },
929 {
930 .name = "KeyStuck",
931 .start = AB8505_INT_KEYSTUCK,
932 .end = AB8505_INT_KEYSTUCK,
933 .flags = IORESOURCE_IRQ,
934 },
935};
936
747static struct resource __devinitdata ab8500_temp_resources[] = { 937static struct resource __devinitdata ab8500_temp_resources[] = {
748 { 938 {
749 .name = "AB8500_TEMP_WARM", 939 .name = "AB8500_TEMP_WARM",
@@ -778,35 +968,11 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = {
778 .resources = ab8500_rtc_resources, 968 .resources = ab8500_rtc_resources,
779 }, 969 },
780 { 970 {
781 .name = "ab8500-charger",
782 .num_resources = ARRAY_SIZE(ab8500_charger_resources),
783 .resources = ab8500_charger_resources,
784 },
785 {
786 .name = "ab8500-btemp",
787 .num_resources = ARRAY_SIZE(ab8500_btemp_resources),
788 .resources = ab8500_btemp_resources,
789 },
790 {
791 .name = "ab8500-fg",
792 .num_resources = ARRAY_SIZE(ab8500_fg_resources),
793 .resources = ab8500_fg_resources,
794 },
795 {
796 .name = "ab8500-chargalg",
797 .num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
798 .resources = ab8500_chargalg_resources,
799 },
800 {
801 .name = "ab8500-acc-det", 971 .name = "ab8500-acc-det",
802 .num_resources = ARRAY_SIZE(ab8500_av_acc_detect_resources), 972 .num_resources = ARRAY_SIZE(ab8500_av_acc_detect_resources),
803 .resources = ab8500_av_acc_detect_resources, 973 .resources = ab8500_av_acc_detect_resources,
804 }, 974 },
805 { 975 {
806 .name = "ab8500-codec",
807 },
808
809 {
810 .name = "ab8500-poweron-key", 976 .name = "ab8500-poweron-key",
811 .num_resources = ARRAY_SIZE(ab8500_poweronkey_db_resources), 977 .num_resources = ARRAY_SIZE(ab8500_poweronkey_db_resources),
812 .resources = ab8500_poweronkey_db_resources, 978 .resources = ab8500_poweronkey_db_resources,
@@ -834,6 +1000,29 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = {
834 }, 1000 },
835}; 1001};
836 1002
1003static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
1004 {
1005 .name = "ab8500-charger",
1006 .num_resources = ARRAY_SIZE(ab8500_charger_resources),
1007 .resources = ab8500_charger_resources,
1008 },
1009 {
1010 .name = "ab8500-btemp",
1011 .num_resources = ARRAY_SIZE(ab8500_btemp_resources),
1012 .resources = ab8500_btemp_resources,
1013 },
1014 {
1015 .name = "ab8500-fg",
1016 .num_resources = ARRAY_SIZE(ab8500_fg_resources),
1017 .resources = ab8500_fg_resources,
1018 },
1019 {
1020 .name = "ab8500-chargalg",
1021 .num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
1022 .resources = ab8500_chargalg_resources,
1023 },
1024};
1025
837static struct mfd_cell __devinitdata ab8500_devs[] = { 1026static struct mfd_cell __devinitdata ab8500_devs[] = {
838 { 1027 {
839 .name = "ab8500-gpio", 1028 .name = "ab8500-gpio",
@@ -845,6 +1034,9 @@ static struct mfd_cell __devinitdata ab8500_devs[] = {
845 .num_resources = ARRAY_SIZE(ab8500_usb_resources), 1034 .num_resources = ARRAY_SIZE(ab8500_usb_resources),
846 .resources = ab8500_usb_resources, 1035 .resources = ab8500_usb_resources,
847 }, 1036 },
1037 {
1038 .name = "ab8500-codec",
1039 },
848}; 1040};
849 1041
850static struct mfd_cell __devinitdata ab9540_devs[] = { 1042static struct mfd_cell __devinitdata ab9540_devs[] = {
@@ -858,6 +1050,18 @@ static struct mfd_cell __devinitdata ab9540_devs[] = {
858 .num_resources = ARRAY_SIZE(ab8500_usb_resources), 1050 .num_resources = ARRAY_SIZE(ab8500_usb_resources),
859 .resources = ab8500_usb_resources, 1051 .resources = ab8500_usb_resources,
860 }, 1052 },
1053 {
1054 .name = "ab9540-codec",
1055 },
1056};
1057
1058/* Device list common to ab9540 and ab8505 */
1059static struct mfd_cell __devinitdata ab9540_ab8505_devs[] = {
1060 {
1061 .name = "ab-iddet",
1062 .num_resources = ARRAY_SIZE(ab8505_iddet_resources),
1063 .resources = ab8505_iddet_resources,
1064 },
861}; 1065};
862 1066
863static ssize_t show_chip_id(struct device *dev, 1067static ssize_t show_chip_id(struct device *dev,
@@ -1003,18 +1207,66 @@ static struct attribute_group ab9540_attr_group = {
1003 .attrs = ab9540_sysfs_entries, 1207 .attrs = ab9540_sysfs_entries,
1004}; 1208};
1005 1209
1006int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version) 1210static const struct of_device_id ab8500_match[] = {
1211 {
1212 .compatible = "stericsson,ab8500",
1213 .data = (void *)AB8500_VERSION_AB8500,
1214 },
1215 {},
1216};
1217
1218static int __devinit ab8500_probe(struct platform_device *pdev)
1007{ 1219{
1008 struct ab8500_platform_data *plat = dev_get_platdata(ab8500->dev); 1220 struct ab8500_platform_data *plat = dev_get_platdata(&pdev->dev);
1221 const struct platform_device_id *platid = platform_get_device_id(pdev);
1222 enum ab8500_version version = AB8500_VERSION_UNDEFINED;
1223 struct device_node *np = pdev->dev.of_node;
1224 struct ab8500 *ab8500;
1225 struct resource *resource;
1009 int ret; 1226 int ret;
1010 int i; 1227 int i;
1011 u8 value; 1228 u8 value;
1012 1229
1230 ab8500 = kzalloc(sizeof *ab8500, GFP_KERNEL);
1231 if (!ab8500)
1232 return -ENOMEM;
1233
1013 if (plat) 1234 if (plat)
1014 ab8500->irq_base = plat->irq_base; 1235 ab8500->irq_base = plat->irq_base;
1236 else if (np)
1237 ret = of_property_read_u32(np, "stericsson,irq-base", &ab8500->irq_base);
1238
1239 if (!ab8500->irq_base) {
1240 dev_info(&pdev->dev, "couldn't find irq-base\n");
1241 ret = -EINVAL;
1242 goto out_free_ab8500;
1243 }
1244
1245 ab8500->dev = &pdev->dev;
1246
1247 resource = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
1248 if (!resource) {
1249 ret = -ENODEV;
1250 goto out_free_ab8500;
1251 }
1252
1253 ab8500->irq = resource->start;
1254
1255 ab8500->read = ab8500_i2c_read;
1256 ab8500->write = ab8500_i2c_write;
1257 ab8500->write_masked = ab8500_i2c_write_masked;
1015 1258
1016 mutex_init(&ab8500->lock); 1259 mutex_init(&ab8500->lock);
1017 mutex_init(&ab8500->irq_lock); 1260 mutex_init(&ab8500->irq_lock);
1261 atomic_set(&ab8500->transfer_ongoing, 0);
1262
1263 platform_set_drvdata(pdev, ab8500);
1264
1265 if (platid)
1266 version = platid->driver_data;
1267 else if (np)
1268 version = (unsigned int)
1269 of_match_device(ab8500_match, &pdev->dev)->data;
1018 1270
1019 if (version != AB8500_VERSION_UNDEFINED) 1271 if (version != AB8500_VERSION_UNDEFINED)
1020 ab8500->version = version; 1272 ab8500->version = version;
@@ -1022,7 +1274,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version)
1022 ret = get_register_interruptible(ab8500, AB8500_MISC, 1274 ret = get_register_interruptible(ab8500, AB8500_MISC,
1023 AB8500_IC_NAME_REG, &value); 1275 AB8500_IC_NAME_REG, &value);
1024 if (ret < 0) 1276 if (ret < 0)
1025 return ret; 1277 goto out_free_ab8500;
1026 1278
1027 ab8500->version = value; 1279 ab8500->version = value;
1028 } 1280 }
@@ -1030,7 +1282,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version)
1030 ret = get_register_interruptible(ab8500, AB8500_MISC, 1282 ret = get_register_interruptible(ab8500, AB8500_MISC,
1031 AB8500_REV_REG, &value); 1283 AB8500_REV_REG, &value);
1032 if (ret < 0) 1284 if (ret < 0)
1033 return ret; 1285 goto out_free_ab8500;
1034 1286
1035 ab8500->chip_id = value; 1287 ab8500->chip_id = value;
1036 1288
@@ -1105,30 +1357,57 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version)
1105 if (ret) 1357 if (ret)
1106 goto out_freeoldmask; 1358 goto out_freeoldmask;
1107 1359
1108 ret = request_threaded_irq(ab8500->irq, NULL, ab8500_irq, 1360 /* Activate this feature only in ab9540 */
1109 IRQF_ONESHOT | IRQF_NO_SUSPEND, 1361 /* till tests are done on ab8500 1p2 or later*/
1110 "ab8500", ab8500); 1362 if (is_ab9540(ab8500))
1363 ret = request_threaded_irq(ab8500->irq, NULL,
1364 ab8500_hierarchical_irq,
1365 IRQF_ONESHOT | IRQF_NO_SUSPEND,
1366 "ab8500", ab8500);
1367 else
1368 ret = request_threaded_irq(ab8500->irq, NULL,
1369 ab8500_irq,
1370 IRQF_ONESHOT | IRQF_NO_SUSPEND,
1371 "ab8500", ab8500);
1111 if (ret) 1372 if (ret)
1112 goto out_removeirq; 1373 goto out_removeirq;
1113 } 1374 }
1114 1375
1115 ret = mfd_add_devices(ab8500->dev, 0, abx500_common_devs, 1376 if (!np) {
1116 ARRAY_SIZE(abx500_common_devs), NULL, 1377 ret = mfd_add_devices(ab8500->dev, 0, abx500_common_devs,
1117 ab8500->irq_base); 1378 ARRAY_SIZE(abx500_common_devs), NULL,
1379 ab8500->irq_base);
1118 1380
1119 if (ret) 1381 if (ret)
1120 goto out_freeirq; 1382 goto out_freeirq;
1383
1384 if (is_ab9540(ab8500))
1385 ret = mfd_add_devices(ab8500->dev, 0, ab9540_devs,
1386 ARRAY_SIZE(ab9540_devs), NULL,
1387 ab8500->irq_base);
1388 else
1389 ret = mfd_add_devices(ab8500->dev, 0, ab8500_devs,
1390 ARRAY_SIZE(ab8500_devs), NULL,
1391 ab8500->irq_base);
1392 if (ret)
1393 goto out_freeirq;
1121 1394
1122 if (is_ab9540(ab8500)) 1395 if (is_ab9540(ab8500) || is_ab8505(ab8500))
1123 ret = mfd_add_devices(ab8500->dev, 0, ab9540_devs, 1396 ret = mfd_add_devices(ab8500->dev, 0, ab9540_ab8505_devs,
1124 ARRAY_SIZE(ab9540_devs), NULL, 1397 ARRAY_SIZE(ab9540_ab8505_devs), NULL,
1125 ab8500->irq_base); 1398 ab8500->irq_base);
1126 else 1399 if (ret)
1127 ret = mfd_add_devices(ab8500->dev, 0, ab8500_devs, 1400 goto out_freeirq;
1128 ARRAY_SIZE(ab9540_devs), NULL, 1401 }
1129 ab8500->irq_base); 1402
1130 if (ret) 1403 if (!no_bm) {
1131 goto out_freeirq; 1404 /* Add battery management devices */
1405 ret = mfd_add_devices(ab8500->dev, 0, ab8500_bm_devs,
1406 ARRAY_SIZE(ab8500_bm_devs), NULL,
1407 ab8500->irq_base);
1408 if (ret)
1409 dev_err(ab8500->dev, "error adding bm devices\n");
1410 }
1132 1411
1133 if (is_ab9540(ab8500)) 1412 if (is_ab9540(ab8500))
1134 ret = sysfs_create_group(&ab8500->dev->kobj, 1413 ret = sysfs_create_group(&ab8500->dev->kobj,
@@ -1151,12 +1430,16 @@ out_freeoldmask:
1151 kfree(ab8500->oldmask); 1430 kfree(ab8500->oldmask);
1152out_freemask: 1431out_freemask:
1153 kfree(ab8500->mask); 1432 kfree(ab8500->mask);
1433out_free_ab8500:
1434 kfree(ab8500);
1154 1435
1155 return ret; 1436 return ret;
1156} 1437}
1157 1438
1158int __devexit ab8500_exit(struct ab8500 *ab8500) 1439static int __devexit ab8500_remove(struct platform_device *pdev)
1159{ 1440{
1441 struct ab8500 *ab8500 = platform_get_drvdata(pdev);
1442
1160 if (is_ab9540(ab8500)) 1443 if (is_ab9540(ab8500))
1161 sysfs_remove_group(&ab8500->dev->kobj, &ab9540_attr_group); 1444 sysfs_remove_group(&ab8500->dev->kobj, &ab9540_attr_group);
1162 else 1445 else
@@ -1168,10 +1451,42 @@ int __devexit ab8500_exit(struct ab8500 *ab8500)
1168 } 1451 }
1169 kfree(ab8500->oldmask); 1452 kfree(ab8500->oldmask);
1170 kfree(ab8500->mask); 1453 kfree(ab8500->mask);
1454 kfree(ab8500);
1171 1455
1172 return 0; 1456 return 0;
1173} 1457}
1174 1458
1459static const struct platform_device_id ab8500_id[] = {
1460 { "ab8500-core", AB8500_VERSION_AB8500 },
1461 { "ab8505-i2c", AB8500_VERSION_AB8505 },
1462 { "ab9540-i2c", AB8500_VERSION_AB9540 },
1463 { "ab8540-i2c", AB8500_VERSION_AB8540 },
1464 { }
1465};
1466
1467static struct platform_driver ab8500_core_driver = {
1468 .driver = {
1469 .name = "ab8500-core",
1470 .owner = THIS_MODULE,
1471 .of_match_table = ab8500_match,
1472 },
1473 .probe = ab8500_probe,
1474 .remove = __devexit_p(ab8500_remove),
1475 .id_table = ab8500_id,
1476};
1477
1478static int __init ab8500_core_init(void)
1479{
1480 return platform_driver_register(&ab8500_core_driver);
1481}
1482
1483static void __exit ab8500_core_exit(void)
1484{
1485 platform_driver_unregister(&ab8500_core_driver);
1486}
1487arch_initcall(ab8500_core_init);
1488module_exit(ab8500_core_exit);
1489
1175MODULE_AUTHOR("Mattias Wallin, Srinidhi Kasagar, Rabin Vincent"); 1490MODULE_AUTHOR("Mattias Wallin, Srinidhi Kasagar, Rabin Vincent");
1176MODULE_DESCRIPTION("AB8500 MFD core"); 1491MODULE_DESCRIPTION("AB8500 MFD core");
1177MODULE_LICENSE("GPL v2"); 1492MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index 9a0211aa8897..50c4c89ab220 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -608,10 +608,16 @@ static int __devexit ab8500_debug_remove(struct platform_device *plf)
608 return 0; 608 return 0;
609} 609}
610 610
611static const struct of_device_id ab8500_debug_match[] = {
612 { .compatible = "stericsson,ab8500-debug", },
613 {}
614};
615
611static struct platform_driver ab8500_debug_driver = { 616static struct platform_driver ab8500_debug_driver = {
612 .driver = { 617 .driver = {
613 .name = "ab8500-debug", 618 .name = "ab8500-debug",
614 .owner = THIS_MODULE, 619 .owner = THIS_MODULE,
620 .of_match_table = ab8500_debug_match,
615 }, 621 },
616 .probe = ab8500_debug_probe, 622 .probe = ab8500_debug_probe,
617 .remove = __devexit_p(ab8500_debug_remove) 623 .remove = __devexit_p(ab8500_debug_remove)
diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index c39fc716e1dc..b86fd8e1ec3f 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c
@@ -584,7 +584,7 @@ static int __devinit ab8500_gpadc_probe(struct platform_device *pdev)
584 584
585 gpadc->irq = platform_get_irq_byname(pdev, "SW_CONV_END"); 585 gpadc->irq = platform_get_irq_byname(pdev, "SW_CONV_END");
586 if (gpadc->irq < 0) { 586 if (gpadc->irq < 0) {
587 dev_err(gpadc->dev, "failed to get platform irq-%d\n", 587 dev_err(&pdev->dev, "failed to get platform irq-%d\n",
588 gpadc->irq); 588 gpadc->irq);
589 ret = gpadc->irq; 589 ret = gpadc->irq;
590 goto fail; 590 goto fail;
@@ -648,12 +648,18 @@ static int __devexit ab8500_gpadc_remove(struct platform_device *pdev)
648 return 0; 648 return 0;
649} 649}
650 650
651static const struct of_device_id ab8500_gpadc_match[] = {
652 { .compatible = "stericsson,ab8500-gpadc", },
653 {}
654};
655
651static struct platform_driver ab8500_gpadc_driver = { 656static struct platform_driver ab8500_gpadc_driver = {
652 .probe = ab8500_gpadc_probe, 657 .probe = ab8500_gpadc_probe,
653 .remove = __devexit_p(ab8500_gpadc_remove), 658 .remove = __devexit_p(ab8500_gpadc_remove),
654 .driver = { 659 .driver = {
655 .name = "ab8500-gpadc", 660 .name = "ab8500-gpadc",
656 .owner = THIS_MODULE, 661 .owner = THIS_MODULE,
662 .of_match_table = ab8500_gpadc_match,
657 }, 663 },
658}; 664};
659 665
diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
deleted file mode 100644
index b83045f102be..000000000000
--- a/drivers/mfd/ab8500-i2c.c
+++ /dev/null
@@ -1,128 +0,0 @@
1/*
2 * Copyright (C) ST-Ericsson SA 2010
3 * Author: Mattias Wallin <mattias.wallin@stericsson.com> for ST-Ericsson.
4 * License Terms: GNU General Public License v2
5 * This file was based on drivers/mfd/ab8500-spi.c
6 */
7
8#include <linux/kernel.h>
9#include <linux/slab.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/platform_device.h>
13#include <linux/mfd/abx500/ab8500.h>
14#include <linux/mfd/dbx500-prcmu.h>
15
16static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
17{
18 int ret;
19
20 ret = prcmu_abb_write((u8)(addr >> 8), (u8)(addr & 0xFF), &data, 1);
21 if (ret < 0)
22 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
23 return ret;
24}
25
26static int ab8500_i2c_write_masked(struct ab8500 *ab8500, u16 addr, u8 mask,
27 u8 data)
28{
29 int ret;
30
31 ret = prcmu_abb_write_masked((u8)(addr >> 8), (u8)(addr & 0xFF), &data,
32 &mask, 1);
33 if (ret < 0)
34 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
35 return ret;
36}
37
38static int ab8500_i2c_read(struct ab8500 *ab8500, u16 addr)
39{
40 int ret;
41 u8 data;
42
43 ret = prcmu_abb_read((u8)(addr >> 8), (u8)(addr & 0xFF), &data, 1);
44 if (ret < 0) {
45 dev_err(ab8500->dev, "prcmu i2c error %d\n", ret);
46 return ret;
47 }
48 return (int)data;
49}
50
51static int __devinit ab8500_i2c_probe(struct platform_device *plf)
52{
53 const struct platform_device_id *platid = platform_get_device_id(plf);
54 struct ab8500 *ab8500;
55 struct resource *resource;
56 int ret;
57
58 ab8500 = kzalloc(sizeof *ab8500, GFP_KERNEL);
59 if (!ab8500)
60 return -ENOMEM;
61
62 ab8500->dev = &plf->dev;
63
64 resource = platform_get_resource(plf, IORESOURCE_IRQ, 0);
65 if (!resource) {
66 kfree(ab8500);
67 return -ENODEV;
68 }
69
70 ab8500->irq = resource->start;
71
72 ab8500->read = ab8500_i2c_read;
73 ab8500->write = ab8500_i2c_write;
74 ab8500->write_masked = ab8500_i2c_write_masked;
75
76 platform_set_drvdata(plf, ab8500);
77
78 ret = ab8500_init(ab8500, platid->driver_data);
79 if (ret)
80 kfree(ab8500);
81
82
83 return ret;
84}
85
86static int __devexit ab8500_i2c_remove(struct platform_device *plf)
87{
88 struct ab8500 *ab8500 = platform_get_drvdata(plf);
89
90 ab8500_exit(ab8500);
91 kfree(ab8500);
92
93 return 0;
94}
95
96static const struct platform_device_id ab8500_id[] = {
97 { "ab8500-i2c", AB8500_VERSION_AB8500 },
98 { "ab8505-i2c", AB8500_VERSION_AB8505 },
99 { "ab9540-i2c", AB8500_VERSION_AB9540 },
100 { "ab8540-i2c", AB8500_VERSION_AB8540 },
101 { }
102};
103
104static struct platform_driver ab8500_i2c_driver = {
105 .driver = {
106 .name = "ab8500-i2c",
107 .owner = THIS_MODULE,
108 },
109 .probe = ab8500_i2c_probe,
110 .remove = __devexit_p(ab8500_i2c_remove),
111 .id_table = ab8500_id,
112};
113
114static int __init ab8500_i2c_init(void)
115{
116 return platform_driver_register(&ab8500_i2c_driver);
117}
118
119static void __exit ab8500_i2c_exit(void)
120{
121 platform_driver_unregister(&ab8500_i2c_driver);
122}
123arch_initcall(ab8500_i2c_init);
124module_exit(ab8500_i2c_exit);
125
126MODULE_AUTHOR("Mattias WALLIN <mattias.wallin@stericsson.com");
127MODULE_DESCRIPTION("AB8500 Core access via PRCMU I2C");
128MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index c28d4eb1eff0..5a3e51ccf258 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -61,10 +61,16 @@ static int __devexit ab8500_sysctrl_remove(struct platform_device *pdev)
61 return 0; 61 return 0;
62} 62}
63 63
64static const struct of_device_id ab8500_sysctrl_match[] = {
65 { .compatible = "stericsson,ab8500-sysctrl", },
66 {}
67};
68
64static struct platform_driver ab8500_sysctrl_driver = { 69static struct platform_driver ab8500_sysctrl_driver = {
65 .driver = { 70 .driver = {
66 .name = "ab8500-sysctrl", 71 .name = "ab8500-sysctrl",
67 .owner = THIS_MODULE, 72 .owner = THIS_MODULE,
73 .of_match_table = ab8500_sysctrl_match,
68 }, 74 },
69 .probe = ab8500_sysctrl_probe, 75 .probe = ab8500_sysctrl_probe,
70 .remove = __devexit_p(ab8500_sysctrl_remove), 76 .remove = __devexit_p(ab8500_sysctrl_remove),
diff --git a/drivers/mfd/anatop-mfd.c b/drivers/mfd/anatop-mfd.c
index 2af42480635e..6da06341f6c9 100644
--- a/drivers/mfd/anatop-mfd.c
+++ b/drivers/mfd/anatop-mfd.c
@@ -41,39 +41,26 @@
41#include <linux/of_address.h> 41#include <linux/of_address.h>
42#include <linux/mfd/anatop.h> 42#include <linux/mfd/anatop.h>
43 43
44u32 anatop_get_bits(struct anatop *adata, u32 addr, int bit_shift, 44u32 anatop_read_reg(struct anatop *adata, u32 addr)
45 int bit_width)
46{ 45{
47 u32 val, mask; 46 return readl(adata->ioreg + addr);
48
49 if (bit_width == 32)
50 mask = ~0;
51 else
52 mask = (1 << bit_width) - 1;
53
54 val = readl(adata->ioreg + addr);
55 val = (val >> bit_shift) & mask;
56
57 return val;
58} 47}
59EXPORT_SYMBOL_GPL(anatop_get_bits); 48EXPORT_SYMBOL_GPL(anatop_read_reg);
60 49
61void anatop_set_bits(struct anatop *adata, u32 addr, int bit_shift, 50void anatop_write_reg(struct anatop *adata, u32 addr, u32 data, u32 mask)
62 int bit_width, u32 data)
63{ 51{
64 u32 val, mask; 52 u32 val;
65 53
66 if (bit_width == 32) 54 data &= mask;
67 mask = ~0;
68 else
69 mask = (1 << bit_width) - 1;
70 55
71 spin_lock(&adata->reglock); 56 spin_lock(&adata->reglock);
72 val = readl(adata->ioreg + addr) & ~(mask << bit_shift); 57 val = readl(adata->ioreg + addr);
73 writel((data << bit_shift) | val, adata->ioreg + addr); 58 val &= ~mask;
59 val |= data;
60 writel(val, adata->ioreg + addr);
74 spin_unlock(&adata->reglock); 61 spin_unlock(&adata->reglock);
75} 62}
76EXPORT_SYMBOL_GPL(anatop_set_bits); 63EXPORT_SYMBOL_GPL(anatop_write_reg);
77 64
78static const struct of_device_id of_anatop_match[] = { 65static const struct of_device_id of_anatop_match[] = {
79 { .compatible = "fsl,imx6q-anatop", }, 66 { .compatible = "fsl,imx6q-anatop", },
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 1582c3d95257..383421bf5760 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -353,12 +353,28 @@ static int asic3_gpio_irq_type(struct irq_data *data, unsigned int type)
353 return 0; 353 return 0;
354} 354}
355 355
356static int asic3_gpio_irq_set_wake(struct irq_data *data, unsigned int on)
357{
358 struct asic3 *asic = irq_data_get_irq_chip_data(data);
359 u32 bank, index;
360 u16 bit;
361
362 bank = asic3_irq_to_bank(asic, data->irq);
363 index = asic3_irq_to_index(asic, data->irq);
364 bit = 1<<index;
365
366 asic3_set_register(asic, bank + ASIC3_GPIO_SLEEP_MASK, bit, !on);
367
368 return 0;
369}
370
356static struct irq_chip asic3_gpio_irq_chip = { 371static struct irq_chip asic3_gpio_irq_chip = {
357 .name = "ASIC3-GPIO", 372 .name = "ASIC3-GPIO",
358 .irq_ack = asic3_mask_gpio_irq, 373 .irq_ack = asic3_mask_gpio_irq,
359 .irq_mask = asic3_mask_gpio_irq, 374 .irq_mask = asic3_mask_gpio_irq,
360 .irq_unmask = asic3_unmask_gpio_irq, 375 .irq_unmask = asic3_unmask_gpio_irq,
361 .irq_set_type = asic3_gpio_irq_type, 376 .irq_set_type = asic3_gpio_irq_type,
377 .irq_set_wake = asic3_gpio_irq_set_wake,
362}; 378};
363 379
364static struct irq_chip asic3_irq_chip = { 380static struct irq_chip asic3_irq_chip = {
@@ -529,7 +545,7 @@ static int asic3_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
529{ 545{
530 struct asic3 *asic = container_of(chip, struct asic3, gpio); 546 struct asic3 *asic = container_of(chip, struct asic3, gpio);
531 547
532 return (offset < ASIC3_NUM_GPIOS) ? asic->irq_base + offset : -ENXIO; 548 return asic->irq_base + offset;
533} 549}
534 550
535static __init int asic3_gpio_probe(struct platform_device *pdev, 551static __init int asic3_gpio_probe(struct platform_device *pdev,
@@ -894,10 +910,13 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
894 asic3_mmc_resources[0].start >>= asic->bus_shift; 910 asic3_mmc_resources[0].start >>= asic->bus_shift;
895 asic3_mmc_resources[0].end >>= asic->bus_shift; 911 asic3_mmc_resources[0].end >>= asic->bus_shift;
896 912
897 ret = mfd_add_devices(&pdev->dev, pdev->id, 913 if (pdata->clock_rate) {
914 ds1wm_pdata.clock_rate = pdata->clock_rate;
915 ret = mfd_add_devices(&pdev->dev, pdev->id,
898 &asic3_cell_ds1wm, 1, mem, asic->irq_base); 916 &asic3_cell_ds1wm, 1, mem, asic->irq_base);
899 if (ret < 0) 917 if (ret < 0)
900 goto out; 918 goto out;
919 }
901 920
902 if (mem_sdio && (irq >= 0)) { 921 if (mem_sdio && (irq >= 0)) {
903 ret = mfd_add_devices(&pdev->dev, pdev->id, 922 ret = mfd_add_devices(&pdev->dev, pdev->id,
@@ -1000,6 +1019,9 @@ static int __init asic3_probe(struct platform_device *pdev)
1000 1019
1001 asic3_mfd_probe(pdev, pdata, mem); 1020 asic3_mfd_probe(pdev, pdata, mem);
1002 1021
1022 asic3_set_register(asic, ASIC3_OFFSET(EXTCF, SELECT),
1023 (ASIC3_EXTCF_CF0_BUF_EN|ASIC3_EXTCF_CF0_PWAIT_EN), 1);
1024
1003 dev_info(asic->dev, "ASIC3 Core driver\n"); 1025 dev_info(asic->dev, "ASIC3 Core driver\n");
1004 1026
1005 return 0; 1027 return 0;
@@ -1021,6 +1043,9 @@ static int __devexit asic3_remove(struct platform_device *pdev)
1021 int ret; 1043 int ret;
1022 struct asic3 *asic = platform_get_drvdata(pdev); 1044 struct asic3 *asic = platform_get_drvdata(pdev);
1023 1045
1046 asic3_set_register(asic, ASIC3_OFFSET(EXTCF, SELECT),
1047 (ASIC3_EXTCF_CF0_BUF_EN|ASIC3_EXTCF_CF0_PWAIT_EN), 0);
1048
1024 asic3_mfd_remove(pdev); 1049 asic3_mfd_remove(pdev);
1025 1050
1026 ret = asic3_gpio_remove(pdev); 1051 ret = asic3_gpio_remove(pdev);
diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 315fef5d466a..3419e726de47 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c
@@ -186,18 +186,7 @@ static struct pci_driver cs5535_mfd_driver = {
186 .remove = __devexit_p(cs5535_mfd_remove), 186 .remove = __devexit_p(cs5535_mfd_remove),
187}; 187};
188 188
189static int __init cs5535_mfd_init(void) 189module_pci_driver(cs5535_mfd_driver);
190{
191 return pci_register_driver(&cs5535_mfd_driver);
192}
193
194static void __exit cs5535_mfd_exit(void)
195{
196 pci_unregister_driver(&cs5535_mfd_driver);
197}
198
199module_init(cs5535_mfd_init);
200module_exit(cs5535_mfd_exit);
201 190
202MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>"); 191MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
203MODULE_DESCRIPTION("MFD driver for CS5535/CS5536 southbridge's ISA PCI device"); 192MODULE_DESCRIPTION("MFD driver for CS5535/CS5536 southbridge's ISA PCI device");
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 7776aff46269..1f1313c90573 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -318,6 +318,135 @@ static bool da9052_reg_volatile(struct device *dev, unsigned int reg)
318 } 318 }
319} 319}
320 320
321/*
322 * TBAT look-up table is computed from the R90 reg (8 bit register)
323 * reading as below. The battery temperature is in milliCentigrade
324 * TBAT = (1/(t1+1/298) - 273) * 1000 mC
325 * where t1 = (1/B)* ln(( ADCval * 2.5)/(R25*ITBAT*255))
326 * Default values are R25 = 10e3, B = 3380, ITBAT = 50e-6
327 * Example:
328 * R25=10E3, B=3380, ITBAT=50e-6, ADCVAL=62d calculates
329 * TBAT = 20015 mili degrees Centrigrade
330 *
331*/
332static const int32_t tbat_lookup[255] = {
333 183258, 144221, 124334, 111336, 101826, 94397, 88343, 83257,
334 78889, 75071, 71688, 68656, 65914, 63414, 61120, 59001,
335 570366, 55204, 53490, 51881, 50364, 48931, 47574, 46285,
336 45059, 43889, 42772, 41703, 40678, 39694, 38748, 37838,
337 36961, 36115, 35297, 34507, 33743, 33002, 32284, 31588,
338 30911, 30254, 29615, 28994, 28389, 27799, 27225, 26664,
339 26117, 25584, 25062, 24553, 24054, 23567, 23091, 22624,
340 22167, 21719, 21281, 20851, 20429, 20015, 19610, 19211,
341 18820, 18436, 18058, 17688, 17323, 16965, 16612, 16266,
342 15925, 15589, 15259, 14933, 14613, 14298, 13987, 13681,
343 13379, 13082, 12788, 12499, 12214, 11933, 11655, 11382,
344 11112, 10845, 10582, 10322, 10066, 9812, 9562, 9315,
345 9071, 8830, 8591, 8356, 8123, 7893, 7665, 7440,
346 7218, 6998, 6780, 6565, 6352, 6141, 5933, 5726,
347 5522, 5320, 5120, 4922, 4726, 4532, 4340, 4149,
348 3961, 3774, 3589, 3406, 3225, 3045, 2867, 2690,
349 2516, 2342, 2170, 2000, 1831, 1664, 1498, 1334,
350 1171, 1009, 849, 690, 532, 376, 221, 67,
351 -84, -236, -386, -535, -683, -830, -975, -1119,
352 -1263, -1405, -1546, -1686, -1825, -1964, -2101, -2237,
353 -2372, -2506, -2639, -2771, -2902, -3033, -3162, -3291,
354 -3418, -3545, -3671, -3796, -3920, -4044, -4166, -4288,
355 -4409, -4529, -4649, -4767, -4885, -5002, -5119, -5235,
356 -5349, -5464, -5577, -5690, -5802, -5913, -6024, -6134,
357 -6244, -6352, -6461, -6568, -6675, -6781, -6887, -6992,
358 -7096, -7200, -7303, -7406, -7508, -7609, -7710, -7810,
359 -7910, -8009, -8108, -8206, -8304, -8401, -8497, -8593,
360 -8689, -8784, -8878, -8972, -9066, -9159, -9251, -9343,
361 -9435, -9526, -9617, -9707, -9796, -9886, -9975, -10063,
362 -10151, -10238, -10325, -10412, -10839, -10923, -11007, -11090,
363 -11173, -11256, -11338, -11420, -11501, -11583, -11663, -11744,
364 -11823, -11903, -11982
365};
366
367static const u8 chan_mux[DA9052_ADC_VBBAT + 1] = {
368 [DA9052_ADC_VDDOUT] = DA9052_ADC_MAN_MUXSEL_VDDOUT,
369 [DA9052_ADC_ICH] = DA9052_ADC_MAN_MUXSEL_ICH,
370 [DA9052_ADC_TBAT] = DA9052_ADC_MAN_MUXSEL_TBAT,
371 [DA9052_ADC_VBAT] = DA9052_ADC_MAN_MUXSEL_VBAT,
372 [DA9052_ADC_IN4] = DA9052_ADC_MAN_MUXSEL_AD4,
373 [DA9052_ADC_IN5] = DA9052_ADC_MAN_MUXSEL_AD5,
374 [DA9052_ADC_IN6] = DA9052_ADC_MAN_MUXSEL_AD6,
375 [DA9052_ADC_VBBAT] = DA9052_ADC_MAN_MUXSEL_VBBAT
376};
377
378int da9052_adc_manual_read(struct da9052 *da9052, unsigned char channel)
379{
380 int ret;
381 unsigned short calc_data;
382 unsigned short data;
383 unsigned char mux_sel;
384
385 if (channel > DA9052_ADC_VBBAT)
386 return -EINVAL;
387
388 mutex_lock(&da9052->auxadc_lock);
389
390 /* Channel gets activated on enabling the Conversion bit */
391 mux_sel = chan_mux[channel] | DA9052_ADC_MAN_MAN_CONV;
392
393 ret = da9052_reg_write(da9052, DA9052_ADC_MAN_REG, mux_sel);
394 if (ret < 0)
395 goto err;
396
397 /* Wait for an interrupt */
398 if (!wait_for_completion_timeout(&da9052->done,
399 msecs_to_jiffies(500))) {
400 dev_err(da9052->dev,
401 "timeout waiting for ADC conversion interrupt\n");
402 ret = -ETIMEDOUT;
403 goto err;
404 }
405
406 ret = da9052_reg_read(da9052, DA9052_ADC_RES_H_REG);
407 if (ret < 0)
408 goto err;
409
410 calc_data = (unsigned short)ret;
411 data = calc_data << 2;
412
413 ret = da9052_reg_read(da9052, DA9052_ADC_RES_L_REG);
414 if (ret < 0)
415 goto err;
416
417 calc_data = (unsigned short)(ret & DA9052_ADC_RES_LSB);
418 data |= calc_data;
419
420 ret = data;
421
422err:
423 mutex_unlock(&da9052->auxadc_lock);
424 return ret;
425}
426EXPORT_SYMBOL_GPL(da9052_adc_manual_read);
427
428static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
429{
430 struct da9052 *da9052 = irq_data;
431
432 complete(&da9052->done);
433
434 return IRQ_HANDLED;
435}
436
437int da9052_adc_read_temp(struct da9052 *da9052)
438{
439 int tbat;
440
441 tbat = da9052_reg_read(da9052, DA9052_TBAT_RES_REG);
442 if (tbat <= 0)
443 return tbat;
444
445 /* ARRAY_SIZE check is not needed since TBAT is a 8-bit register */
446 return tbat_lookup[tbat - 1];
447}
448EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
449
321static struct resource da9052_rtc_resource = { 450static struct resource da9052_rtc_resource = {
322 .name = "ALM", 451 .name = "ALM",
323 .start = DA9052_IRQ_ALARM, 452 .start = DA9052_IRQ_ALARM,
@@ -646,6 +775,9 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
646 struct irq_desc *desc; 775 struct irq_desc *desc;
647 int ret; 776 int ret;
648 777
778 mutex_init(&da9052->auxadc_lock);
779 init_completion(&da9052->done);
780
649 if (pdata && pdata->init != NULL) 781 if (pdata && pdata->init != NULL)
650 pdata->init(da9052); 782 pdata->init(da9052);
651 783
@@ -665,6 +797,12 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
665 797
666 da9052->irq_base = regmap_irq_chip_get_base(da9052->irq_data); 798 da9052->irq_base = regmap_irq_chip_get_base(da9052->irq_data);
667 799
800 ret = request_threaded_irq(DA9052_IRQ_ADC_EOM, NULL, da9052_auxadc_irq,
801 IRQF_TRIGGER_LOW | IRQF_ONESHOT,
802 "adc irq", da9052);
803 if (ret != 0)
804 dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
805
668 ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info, 806 ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
669 ARRAY_SIZE(da9052_subdev_info), NULL, 0); 807 ARRAY_SIZE(da9052_subdev_info), NULL, 0);
670 if (ret) 808 if (ret)
@@ -673,6 +811,7 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
673 return 0; 811 return 0;
674 812
675err: 813err:
814 free_irq(DA9052_IRQ_ADC_EOM, da9052);
676 mfd_remove_devices(da9052->dev); 815 mfd_remove_devices(da9052->dev);
677regmap_err: 816regmap_err:
678 return ret; 817 return ret;
@@ -680,6 +819,7 @@ regmap_err:
680 819
681void da9052_device_exit(struct da9052 *da9052) 820void da9052_device_exit(struct da9052 *da9052)
682{ 821{
822 free_irq(DA9052_IRQ_ADC_EOM, da9052);
683 regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data); 823 regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
684 mfd_remove_devices(da9052->dev); 824 mfd_remove_devices(da9052->dev);
685} 825}
diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index 36b88e395499..82c9d6450286 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -22,6 +22,11 @@
22#include <linux/mfd/da9052/da9052.h> 22#include <linux/mfd/da9052/da9052.h>
23#include <linux/mfd/da9052/reg.h> 23#include <linux/mfd/da9052/reg.h>
24 24
25#ifdef CONFIG_OF
26#include <linux/of.h>
27#include <linux/of_device.h>
28#endif
29
25static int da9052_i2c_enable_multiwrite(struct da9052 *da9052) 30static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
26{ 31{
27 int reg_val, ret; 32 int reg_val, ret;
@@ -41,13 +46,31 @@ static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
41 return 0; 46 return 0;
42} 47}
43 48
49static struct i2c_device_id da9052_i2c_id[] = {
50 {"da9052", DA9052},
51 {"da9053-aa", DA9053_AA},
52 {"da9053-ba", DA9053_BA},
53 {"da9053-bb", DA9053_BB},
54 {}
55};
56
57#ifdef CONFIG_OF
58static const struct of_device_id dialog_dt_ids[] = {
59 { .compatible = "dlg,da9052", .data = &da9052_i2c_id[0] },
60 { .compatible = "dlg,da9053-aa", .data = &da9052_i2c_id[1] },
61 { .compatible = "dlg,da9053-ab", .data = &da9052_i2c_id[2] },
62 { .compatible = "dlg,da9053-bb", .data = &da9052_i2c_id[3] },
63 { /* sentinel */ }
64};
65#endif
66
44static int __devinit da9052_i2c_probe(struct i2c_client *client, 67static int __devinit da9052_i2c_probe(struct i2c_client *client,
45 const struct i2c_device_id *id) 68 const struct i2c_device_id *id)
46{ 69{
47 struct da9052 *da9052; 70 struct da9052 *da9052;
48 int ret; 71 int ret;
49 72
50 da9052 = kzalloc(sizeof(struct da9052), GFP_KERNEL); 73 da9052 = devm_kzalloc(&client->dev, sizeof(struct da9052), GFP_KERNEL);
51 if (!da9052) 74 if (!da9052)
52 return -ENOMEM; 75 return -ENOMEM;
53 76
@@ -55,8 +78,7 @@ static int __devinit da9052_i2c_probe(struct i2c_client *client,
55 I2C_FUNC_SMBUS_BYTE_DATA)) { 78 I2C_FUNC_SMBUS_BYTE_DATA)) {
56 dev_info(&client->dev, "Error in %s:i2c_check_functionality\n", 79 dev_info(&client->dev, "Error in %s:i2c_check_functionality\n",
57 __func__); 80 __func__);
58 ret = -ENODEV; 81 return -ENODEV;
59 goto err;
60 } 82 }
61 83
62 da9052->dev = &client->dev; 84 da9052->dev = &client->dev;
@@ -64,29 +86,39 @@ static int __devinit da9052_i2c_probe(struct i2c_client *client,
64 86
65 i2c_set_clientdata(client, da9052); 87 i2c_set_clientdata(client, da9052);
66 88
67 da9052->regmap = regmap_init_i2c(client, &da9052_regmap_config); 89 da9052->regmap = devm_regmap_init_i2c(client, &da9052_regmap_config);
68 if (IS_ERR(da9052->regmap)) { 90 if (IS_ERR(da9052->regmap)) {
69 ret = PTR_ERR(da9052->regmap); 91 ret = PTR_ERR(da9052->regmap);
70 dev_err(&client->dev, "Failed to allocate register map: %d\n", 92 dev_err(&client->dev, "Failed to allocate register map: %d\n",
71 ret); 93 ret);
72 goto err; 94 return ret;
73 } 95 }
74 96
75 ret = da9052_i2c_enable_multiwrite(da9052); 97 ret = da9052_i2c_enable_multiwrite(da9052);
76 if (ret < 0) 98 if (ret < 0)
77 goto err_regmap; 99 return ret;
100
101#ifdef CONFIG_OF
102 if (!id) {
103 struct device_node *np = client->dev.of_node;
104 const struct of_device_id *deviceid;
105
106 deviceid = of_match_node(dialog_dt_ids, np);
107 id = (const struct i2c_device_id *)deviceid->data;
108 }
109#endif
110
111 if (!id) {
112 ret = -ENODEV;
113 dev_err(&client->dev, "id is null.\n");
114 return ret;
115 }
78 116
79 ret = da9052_device_init(da9052, id->driver_data); 117 ret = da9052_device_init(da9052, id->driver_data);
80 if (ret != 0) 118 if (ret != 0)
81 goto err_regmap; 119 return ret;
82 120
83 return 0; 121 return 0;
84
85err_regmap:
86 regmap_exit(da9052->regmap);
87err:
88 kfree(da9052);
89 return ret;
90} 122}
91 123
92static int __devexit da9052_i2c_remove(struct i2c_client *client) 124static int __devexit da9052_i2c_remove(struct i2c_client *client)
@@ -94,20 +126,9 @@ static int __devexit da9052_i2c_remove(struct i2c_client *client)
94 struct da9052 *da9052 = i2c_get_clientdata(client); 126 struct da9052 *da9052 = i2c_get_clientdata(client);
95 127
96 da9052_device_exit(da9052); 128 da9052_device_exit(da9052);
97 regmap_exit(da9052->regmap);
98 kfree(da9052);
99
100 return 0; 129 return 0;
101} 130}
102 131
103static struct i2c_device_id da9052_i2c_id[] = {
104 {"da9052", DA9052},
105 {"da9053-aa", DA9053_AA},
106 {"da9053-ba", DA9053_BA},
107 {"da9053-bb", DA9053_BB},
108 {}
109};
110
111static struct i2c_driver da9052_i2c_driver = { 132static struct i2c_driver da9052_i2c_driver = {
112 .probe = da9052_i2c_probe, 133 .probe = da9052_i2c_probe,
113 .remove = __devexit_p(da9052_i2c_remove), 134 .remove = __devexit_p(da9052_i2c_remove),
@@ -115,6 +136,9 @@ static struct i2c_driver da9052_i2c_driver = {
115 .driver = { 136 .driver = {
116 .name = "da9052", 137 .name = "da9052",
117 .owner = THIS_MODULE, 138 .owner = THIS_MODULE,
139#ifdef CONFIG_OF
140 .of_match_table = dialog_dt_ids,
141#endif
118 }, 142 },
119}; 143};
120 144
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 6faf149e8d94..dbeadc5a6436 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -25,8 +25,9 @@ static int __devinit da9052_spi_probe(struct spi_device *spi)
25{ 25{
26 int ret; 26 int ret;
27 const struct spi_device_id *id = spi_get_device_id(spi); 27 const struct spi_device_id *id = spi_get_device_id(spi);
28 struct da9052 *da9052 = kzalloc(sizeof(struct da9052), GFP_KERNEL); 28 struct da9052 *da9052;
29 29
30 da9052 = devm_kzalloc(&spi->dev, sizeof(struct da9052), GFP_KERNEL);
30 if (!da9052) 31 if (!da9052)
31 return -ENOMEM; 32 return -ENOMEM;
32 33
@@ -42,25 +43,19 @@ static int __devinit da9052_spi_probe(struct spi_device *spi)
42 da9052_regmap_config.read_flag_mask = 1; 43 da9052_regmap_config.read_flag_mask = 1;
43 da9052_regmap_config.write_flag_mask = 0; 44 da9052_regmap_config.write_flag_mask = 0;
44 45
45 da9052->regmap = regmap_init_spi(spi, &da9052_regmap_config); 46 da9052->regmap = devm_regmap_init_spi(spi, &da9052_regmap_config);
46 if (IS_ERR(da9052->regmap)) { 47 if (IS_ERR(da9052->regmap)) {
47 ret = PTR_ERR(da9052->regmap); 48 ret = PTR_ERR(da9052->regmap);
48 dev_err(&spi->dev, "Failed to allocate register map: %d\n", 49 dev_err(&spi->dev, "Failed to allocate register map: %d\n",
49 ret); 50 ret);
50 goto err; 51 return ret;
51 } 52 }
52 53
53 ret = da9052_device_init(da9052, id->driver_data); 54 ret = da9052_device_init(da9052, id->driver_data);
54 if (ret != 0) 55 if (ret != 0)
55 goto err_regmap; 56 return ret;
56 57
57 return 0; 58 return 0;
58
59err_regmap:
60 regmap_exit(da9052->regmap);
61err:
62 kfree(da9052);
63 return ret;
64} 59}
65 60
66static int __devexit da9052_spi_remove(struct spi_device *spi) 61static int __devexit da9052_spi_remove(struct spi_device *spi)
@@ -68,9 +63,6 @@ static int __devexit da9052_spi_remove(struct spi_device *spi)
68 struct da9052 *da9052 = dev_get_drvdata(&spi->dev); 63 struct da9052 *da9052 = dev_get_drvdata(&spi->dev);
69 64
70 da9052_device_exit(da9052); 65 da9052_device_exit(da9052);
71 regmap_exit(da9052->regmap);
72 kfree(da9052);
73
74 return 0; 66 return 0;
75} 67}
76 68
@@ -88,7 +80,6 @@ static struct spi_driver da9052_spi_driver = {
88 .id_table = da9052_spi_id, 80 .id_table = da9052_spi_id,
89 .driver = { 81 .driver = {
90 .name = "da9052", 82 .name = "da9052",
91 .bus = &spi_bus_type,
92 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
93 }, 84 },
94}; 85};
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 5be32489714f..671c8bc14bbc 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -2720,6 +2720,7 @@ static struct regulator_consumer_supply db8500_vape_consumers[] = {
2720 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.1"), 2720 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.1"),
2721 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.2"), 2721 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.2"),
2722 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.3"), 2722 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.3"),
2723 REGULATOR_SUPPLY("v-i2c", "nmk-i2c.4"),
2723 /* "v-mmc" changed to "vcore" in the mainline kernel */ 2724 /* "v-mmc" changed to "vcore" in the mainline kernel */
2724 REGULATOR_SUPPLY("vcore", "sdi0"), 2725 REGULATOR_SUPPLY("vcore", "sdi0"),
2725 REGULATOR_SUPPLY("vcore", "sdi1"), 2726 REGULATOR_SUPPLY("vcore", "sdi1"),
@@ -2958,9 +2959,10 @@ static struct mfd_cell db8500_prcmu_devs[] = {
2958 * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic 2959 * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
2959 * 2960 *
2960 */ 2961 */
2961static int __init db8500_prcmu_probe(struct platform_device *pdev) 2962static int __devinit db8500_prcmu_probe(struct platform_device *pdev)
2962{ 2963{
2963 int err = 0; 2964 struct device_node *np = pdev->dev.of_node;
2965 int irq = 0, err = 0;
2964 2966
2965 if (ux500_is_svp()) 2967 if (ux500_is_svp())
2966 return -ENODEV; 2968 return -ENODEV;
@@ -2970,8 +2972,14 @@ static int __init db8500_prcmu_probe(struct platform_device *pdev)
2970 /* Clean up the mailbox interrupts after pre-kernel code. */ 2972 /* Clean up the mailbox interrupts after pre-kernel code. */
2971 writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR); 2973 writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
2972 2974
2973 err = request_threaded_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 2975 if (np)
2974 prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL); 2976 irq = platform_get_irq(pdev, 0);
2977
2978 if (!np || irq <= 0)
2979 irq = IRQ_DB8500_PRCMU1;
2980
2981 err = request_threaded_irq(irq, prcmu_irq_handler,
2982 prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
2975 if (err < 0) { 2983 if (err < 0) {
2976 pr_err("prcmu: Failed to allocate IRQ_DB8500_PRCMU1.\n"); 2984 pr_err("prcmu: Failed to allocate IRQ_DB8500_PRCMU1.\n");
2977 err = -EBUSY; 2985 err = -EBUSY;
@@ -2981,14 +2989,16 @@ static int __init db8500_prcmu_probe(struct platform_device *pdev)
2981 if (cpu_is_u8500v20_or_later()) 2989 if (cpu_is_u8500v20_or_later())
2982 prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET); 2990 prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
2983 2991
2984 err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, 2992 if (!np) {
2985 ARRAY_SIZE(db8500_prcmu_devs), NULL, 2993 err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
2986 0); 2994 ARRAY_SIZE(db8500_prcmu_devs), NULL, 0);
2995 if (err) {
2996 pr_err("prcmu: Failed to add subdevices\n");
2997 return err;
2998 }
2999 }
2987 3000
2988 if (err) 3001 pr_info("DB8500 PRCMU initialized\n");
2989 pr_err("prcmu: Failed to add subdevices\n");
2990 else
2991 pr_info("DB8500 PRCMU initialized\n");
2992 3002
2993no_irq_return: 3003no_irq_return:
2994 return err; 3004 return err;
@@ -2999,11 +3009,12 @@ static struct platform_driver db8500_prcmu_driver = {
2999 .name = "db8500-prcmu", 3009 .name = "db8500-prcmu",
3000 .owner = THIS_MODULE, 3010 .owner = THIS_MODULE,
3001 }, 3011 },
3012 .probe = db8500_prcmu_probe,
3002}; 3013};
3003 3014
3004static int __init db8500_prcmu_init(void) 3015static int __init db8500_prcmu_init(void)
3005{ 3016{
3006 return platform_driver_probe(&db8500_prcmu_driver, db8500_prcmu_probe); 3017 return platform_driver_register(&db8500_prcmu_driver);
3007} 3018}
3008 3019
3009arch_initcall(db8500_prcmu_init); 3020arch_initcall(db8500_prcmu_init);
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index b76657eb0c51..59df5584cb58 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c
@@ -406,7 +406,7 @@ static int __devinit intel_msic_probe(struct platform_device *pdev)
406 return -ENXIO; 406 return -ENXIO;
407 } 407 }
408 408
409 msic = kzalloc(sizeof(*msic), GFP_KERNEL); 409 msic = devm_kzalloc(&pdev->dev, sizeof(*msic), GFP_KERNEL);
410 if (!msic) 410 if (!msic)
411 return -ENOMEM; 411 return -ENOMEM;
412 412
@@ -421,21 +421,13 @@ static int __devinit intel_msic_probe(struct platform_device *pdev)
421 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 421 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
422 if (!res) { 422 if (!res) {
423 dev_err(&pdev->dev, "failed to get SRAM iomem resource\n"); 423 dev_err(&pdev->dev, "failed to get SRAM iomem resource\n");
424 ret = -ENODEV; 424 return -ENODEV;
425 goto fail_free_msic;
426 } 425 }
427 426
428 res = request_mem_region(res->start, resource_size(res), pdev->name); 427 msic->irq_base = devm_request_and_ioremap(&pdev->dev, res);
429 if (!res) {
430 ret = -EBUSY;
431 goto fail_free_msic;
432 }
433
434 msic->irq_base = ioremap_nocache(res->start, resource_size(res));
435 if (!msic->irq_base) { 428 if (!msic->irq_base) {
436 dev_err(&pdev->dev, "failed to map SRAM memory\n"); 429 dev_err(&pdev->dev, "failed to map SRAM memory\n");
437 ret = -ENOMEM; 430 return -ENOMEM;
438 goto fail_release_region;
439 } 431 }
440 432
441 platform_set_drvdata(pdev, msic); 433 platform_set_drvdata(pdev, msic);
@@ -443,7 +435,7 @@ static int __devinit intel_msic_probe(struct platform_device *pdev)
443 ret = intel_msic_init_devices(msic); 435 ret = intel_msic_init_devices(msic);
444 if (ret) { 436 if (ret) {
445 dev_err(&pdev->dev, "failed to initialize MSIC devices\n"); 437 dev_err(&pdev->dev, "failed to initialize MSIC devices\n");
446 goto fail_unmap_mem; 438 return ret;
447 } 439 }
448 440
449 dev_info(&pdev->dev, "Intel MSIC version %c%d (vendor %#x)\n", 441 dev_info(&pdev->dev, "Intel MSIC version %c%d (vendor %#x)\n",
@@ -451,27 +443,14 @@ static int __devinit intel_msic_probe(struct platform_device *pdev)
451 msic->vendor); 443 msic->vendor);
452 444
453 return 0; 445 return 0;
454
455fail_unmap_mem:
456 iounmap(msic->irq_base);
457fail_release_region:
458 release_mem_region(res->start, resource_size(res));
459fail_free_msic:
460 kfree(msic);
461
462 return ret;
463} 446}
464 447
465static int __devexit intel_msic_remove(struct platform_device *pdev) 448static int __devexit intel_msic_remove(struct platform_device *pdev)
466{ 449{
467 struct intel_msic *msic = platform_get_drvdata(pdev); 450 struct intel_msic *msic = platform_get_drvdata(pdev);
468 struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
469 451
470 intel_msic_remove_devices(msic); 452 intel_msic_remove_devices(msic);
471 platform_set_drvdata(pdev, NULL); 453 platform_set_drvdata(pdev, NULL);
472 iounmap(msic->irq_base);
473 release_mem_region(res->start, resource_size(res));
474 kfree(msic);
475 454
476 return 0; 455 return 0;
477} 456}
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index a9223ed1b7c5..2ea99989551a 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -283,23 +283,8 @@ static struct pci_driver cmodio_pci_driver = {
283 .remove = __devexit_p(cmodio_pci_remove), 283 .remove = __devexit_p(cmodio_pci_remove),
284}; 284};
285 285
286/* 286module_pci_driver(cmodio_pci_driver);
287 * Module Init / Exit
288 */
289
290static int __init cmodio_init(void)
291{
292 return pci_register_driver(&cmodio_pci_driver);
293}
294
295static void __exit cmodio_exit(void)
296{
297 pci_unregister_driver(&cmodio_pci_driver);
298}
299 287
300MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); 288MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
301MODULE_DESCRIPTION("Janz CMOD-IO PCI MODULbus Carrier Board Driver"); 289MODULE_DESCRIPTION("Janz CMOD-IO PCI MODULbus Carrier Board Driver");
302MODULE_LICENSE("GPL"); 290MODULE_LICENSE("GPL");
303
304module_init(cmodio_init);
305module_exit(cmodio_exit);
diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
new file mode 100644
index 000000000000..0b2879b87fd9
--- /dev/null
+++ b/drivers/mfd/lm3533-core.c
@@ -0,0 +1,667 @@
1/*
2 * lm3533-core.c -- LM3533 Core
3 *
4 * Copyright (C) 2011-2012 Texas Instruments
5 *
6 * Author: Johan Hovold <jhovold@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/err.h>
18#include <linux/gpio.h>
19#include <linux/i2c.h>
20#include <linux/mfd/core.h>
21#include <linux/regmap.h>
22#include <linux/seq_file.h>
23#include <linux/slab.h>
24#include <linux/uaccess.h>
25
26#include <linux/mfd/lm3533.h>
27
28
29#define LM3533_BOOST_OVP_MASK 0x06
30#define LM3533_BOOST_OVP_SHIFT 1
31
32#define LM3533_BOOST_FREQ_MASK 0x01
33#define LM3533_BOOST_FREQ_SHIFT 0
34
35#define LM3533_BL_ID_MASK 1
36#define LM3533_LED_ID_MASK 3
37#define LM3533_BL_ID_MAX 1
38#define LM3533_LED_ID_MAX 3
39
40#define LM3533_HVLED_ID_MAX 2
41#define LM3533_LVLED_ID_MAX 5
42
43#define LM3533_REG_OUTPUT_CONF1 0x10
44#define LM3533_REG_OUTPUT_CONF2 0x11
45#define LM3533_REG_BOOST_PWM 0x2c
46
47#define LM3533_REG_MAX 0xb2
48
49
50static struct mfd_cell lm3533_als_devs[] = {
51 {
52 .name = "lm3533-als",
53 .id = -1,
54 },
55};
56
57static struct mfd_cell lm3533_bl_devs[] = {
58 {
59 .name = "lm3533-backlight",
60 .id = 0,
61 },
62 {
63 .name = "lm3533-backlight",
64 .id = 1,
65 },
66};
67
68static struct mfd_cell lm3533_led_devs[] = {
69 {
70 .name = "lm3533-leds",
71 .id = 0,
72 },
73 {
74 .name = "lm3533-leds",
75 .id = 1,
76 },
77 {
78 .name = "lm3533-leds",
79 .id = 2,
80 },
81 {
82 .name = "lm3533-leds",
83 .id = 3,
84 },
85};
86
87int lm3533_read(struct lm3533 *lm3533, u8 reg, u8 *val)
88{
89 int tmp;
90 int ret;
91
92 ret = regmap_read(lm3533->regmap, reg, &tmp);
93 if (ret < 0) {
94 dev_err(lm3533->dev, "failed to read register %02x: %d\n",
95 reg, ret);
96 return ret;
97 }
98
99 *val = tmp;
100
101 dev_dbg(lm3533->dev, "read [%02x]: %02x\n", reg, *val);
102
103 return ret;
104}
105EXPORT_SYMBOL_GPL(lm3533_read);
106
107int lm3533_write(struct lm3533 *lm3533, u8 reg, u8 val)
108{
109 int ret;
110
111 dev_dbg(lm3533->dev, "write [%02x]: %02x\n", reg, val);
112
113 ret = regmap_write(lm3533->regmap, reg, val);
114 if (ret < 0) {
115 dev_err(lm3533->dev, "failed to write register %02x: %d\n",
116 reg, ret);
117 }
118
119 return ret;
120}
121EXPORT_SYMBOL_GPL(lm3533_write);
122
123int lm3533_update(struct lm3533 *lm3533, u8 reg, u8 val, u8 mask)
124{
125 int ret;
126
127 dev_dbg(lm3533->dev, "update [%02x]: %02x/%02x\n", reg, val, mask);
128
129 ret = regmap_update_bits(lm3533->regmap, reg, mask, val);
130 if (ret < 0) {
131 dev_err(lm3533->dev, "failed to update register %02x: %d\n",
132 reg, ret);
133 }
134
135 return ret;
136}
137EXPORT_SYMBOL_GPL(lm3533_update);
138
139static int lm3533_set_boost_freq(struct lm3533 *lm3533,
140 enum lm3533_boost_freq freq)
141{
142 int ret;
143
144 ret = lm3533_update(lm3533, LM3533_REG_BOOST_PWM,
145 freq << LM3533_BOOST_FREQ_SHIFT,
146 LM3533_BOOST_FREQ_MASK);
147 if (ret)
148 dev_err(lm3533->dev, "failed to set boost frequency\n");
149
150 return ret;
151}
152
153
154static int lm3533_set_boost_ovp(struct lm3533 *lm3533,
155 enum lm3533_boost_ovp ovp)
156{
157 int ret;
158
159 ret = lm3533_update(lm3533, LM3533_REG_BOOST_PWM,
160 ovp << LM3533_BOOST_OVP_SHIFT,
161 LM3533_BOOST_OVP_MASK);
162 if (ret)
163 dev_err(lm3533->dev, "failed to set boost ovp\n");
164
165 return ret;
166}
167
168/*
169 * HVLED output config -- output hvled controlled by backlight bl
170 */
171static int lm3533_set_hvled_config(struct lm3533 *lm3533, u8 hvled, u8 bl)
172{
173 u8 val;
174 u8 mask;
175 int shift;
176 int ret;
177
178 if (hvled == 0 || hvled > LM3533_HVLED_ID_MAX)
179 return -EINVAL;
180
181 if (bl > LM3533_BL_ID_MAX)
182 return -EINVAL;
183
184 shift = hvled - 1;
185 mask = LM3533_BL_ID_MASK << shift;
186 val = bl << shift;
187
188 ret = lm3533_update(lm3533, LM3533_REG_OUTPUT_CONF1, val, mask);
189 if (ret)
190 dev_err(lm3533->dev, "failed to set hvled config\n");
191
192 return ret;
193}
194
195/*
196 * LVLED output config -- output lvled controlled by LED led
197 */
198static int lm3533_set_lvled_config(struct lm3533 *lm3533, u8 lvled, u8 led)
199{
200 u8 reg;
201 u8 val;
202 u8 mask;
203 int shift;
204 int ret;
205
206 if (lvled == 0 || lvled > LM3533_LVLED_ID_MAX)
207 return -EINVAL;
208
209 if (led > LM3533_LED_ID_MAX)
210 return -EINVAL;
211
212 if (lvled < 4) {
213 reg = LM3533_REG_OUTPUT_CONF1;
214 shift = 2 * lvled;
215 } else {
216 reg = LM3533_REG_OUTPUT_CONF2;
217 shift = 2 * (lvled - 4);
218 }
219
220 mask = LM3533_LED_ID_MASK << shift;
221 val = led << shift;
222
223 ret = lm3533_update(lm3533, reg, val, mask);
224 if (ret)
225 dev_err(lm3533->dev, "failed to set lvled config\n");
226
227 return ret;
228}
229
230static void lm3533_enable(struct lm3533 *lm3533)
231{
232 if (gpio_is_valid(lm3533->gpio_hwen))
233 gpio_set_value(lm3533->gpio_hwen, 1);
234}
235
236static void lm3533_disable(struct lm3533 *lm3533)
237{
238 if (gpio_is_valid(lm3533->gpio_hwen))
239 gpio_set_value(lm3533->gpio_hwen, 0);
240}
241
242enum lm3533_attribute_type {
243 LM3533_ATTR_TYPE_BACKLIGHT,
244 LM3533_ATTR_TYPE_LED,
245};
246
247struct lm3533_device_attribute {
248 struct device_attribute dev_attr;
249 enum lm3533_attribute_type type;
250 union {
251 struct {
252 u8 id;
253 } output;
254 } u;
255};
256
257#define to_lm3533_dev_attr(_attr) \
258 container_of(_attr, struct lm3533_device_attribute, dev_attr)
259
260static ssize_t show_output(struct device *dev,
261 struct device_attribute *attr, char *buf)
262{
263 struct lm3533 *lm3533 = dev_get_drvdata(dev);
264 struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
265 int id = lattr->u.output.id;
266 u8 reg;
267 u8 val;
268 u8 mask;
269 int shift;
270 int ret;
271
272 if (lattr->type == LM3533_ATTR_TYPE_BACKLIGHT) {
273 reg = LM3533_REG_OUTPUT_CONF1;
274 shift = id - 1;
275 mask = LM3533_BL_ID_MASK << shift;
276 } else {
277 if (id < 4) {
278 reg = LM3533_REG_OUTPUT_CONF1;
279 shift = 2 * id;
280 } else {
281 reg = LM3533_REG_OUTPUT_CONF2;
282 shift = 2 * (id - 4);
283 }
284 mask = LM3533_LED_ID_MASK << shift;
285 }
286
287 ret = lm3533_read(lm3533, reg, &val);
288 if (ret)
289 return ret;
290
291 val = (val & mask) >> shift;
292
293 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
294}
295
296static ssize_t store_output(struct device *dev,
297 struct device_attribute *attr,
298 const char *buf, size_t len)
299{
300 struct lm3533 *lm3533 = dev_get_drvdata(dev);
301 struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
302 int id = lattr->u.output.id;
303 u8 val;
304 int ret;
305
306 if (kstrtou8(buf, 0, &val))
307 return -EINVAL;
308
309 if (lattr->type == LM3533_ATTR_TYPE_BACKLIGHT)
310 ret = lm3533_set_hvled_config(lm3533, id, val);
311 else
312 ret = lm3533_set_lvled_config(lm3533, id, val);
313
314 if (ret)
315 return ret;
316
317 return len;
318}
319
320#define LM3533_OUTPUT_ATTR(_name, _mode, _show, _store, _type, _id) \
321 struct lm3533_device_attribute lm3533_dev_attr_##_name = \
322 { .dev_attr = __ATTR(_name, _mode, _show, _store), \
323 .type = _type, \
324 .u.output = { .id = _id }, }
325
326#define LM3533_OUTPUT_ATTR_RW(_name, _type, _id) \
327 LM3533_OUTPUT_ATTR(output_##_name, S_IRUGO | S_IWUSR, \
328 show_output, store_output, _type, _id)
329
330#define LM3533_OUTPUT_HVLED_ATTR_RW(_nr) \
331 LM3533_OUTPUT_ATTR_RW(hvled##_nr, LM3533_ATTR_TYPE_BACKLIGHT, _nr)
332#define LM3533_OUTPUT_LVLED_ATTR_RW(_nr) \
333 LM3533_OUTPUT_ATTR_RW(lvled##_nr, LM3533_ATTR_TYPE_LED, _nr)
334/*
335 * Output config:
336 *
337 * output_hvled<nr> 0-1
338 * output_lvled<nr> 0-3
339 */
340static LM3533_OUTPUT_HVLED_ATTR_RW(1);
341static LM3533_OUTPUT_HVLED_ATTR_RW(2);
342static LM3533_OUTPUT_LVLED_ATTR_RW(1);
343static LM3533_OUTPUT_LVLED_ATTR_RW(2);
344static LM3533_OUTPUT_LVLED_ATTR_RW(3);
345static LM3533_OUTPUT_LVLED_ATTR_RW(4);
346static LM3533_OUTPUT_LVLED_ATTR_RW(5);
347
348static struct attribute *lm3533_attributes[] = {
349 &lm3533_dev_attr_output_hvled1.dev_attr.attr,
350 &lm3533_dev_attr_output_hvled2.dev_attr.attr,
351 &lm3533_dev_attr_output_lvled1.dev_attr.attr,
352 &lm3533_dev_attr_output_lvled2.dev_attr.attr,
353 &lm3533_dev_attr_output_lvled3.dev_attr.attr,
354 &lm3533_dev_attr_output_lvled4.dev_attr.attr,
355 &lm3533_dev_attr_output_lvled5.dev_attr.attr,
356 NULL,
357};
358
359#define to_dev_attr(_attr) \
360 container_of(_attr, struct device_attribute, attr)
361
362static umode_t lm3533_attr_is_visible(struct kobject *kobj,
363 struct attribute *attr, int n)
364{
365 struct device *dev = container_of(kobj, struct device, kobj);
366 struct lm3533 *lm3533 = dev_get_drvdata(dev);
367 struct device_attribute *dattr = to_dev_attr(attr);
368 struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(dattr);
369 enum lm3533_attribute_type type = lattr->type;
370 umode_t mode = attr->mode;
371
372 if (!lm3533->have_backlights && type == LM3533_ATTR_TYPE_BACKLIGHT)
373 mode = 0;
374 else if (!lm3533->have_leds && type == LM3533_ATTR_TYPE_LED)
375 mode = 0;
376
377 return mode;
378};
379
380static struct attribute_group lm3533_attribute_group = {
381 .is_visible = lm3533_attr_is_visible,
382 .attrs = lm3533_attributes
383};
384
385static int __devinit lm3533_device_als_init(struct lm3533 *lm3533)
386{
387 struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
388 int ret;
389
390 if (!pdata->als)
391 return 0;
392
393 lm3533_als_devs[0].platform_data = pdata->als;
394 lm3533_als_devs[0].pdata_size = sizeof(*pdata->als);
395
396 ret = mfd_add_devices(lm3533->dev, 0, lm3533_als_devs, 1, NULL, 0);
397 if (ret) {
398 dev_err(lm3533->dev, "failed to add ALS device\n");
399 return ret;
400 }
401
402 lm3533->have_als = 1;
403
404 return 0;
405}
406
407static int __devinit lm3533_device_bl_init(struct lm3533 *lm3533)
408{
409 struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
410 int i;
411 int ret;
412
413 if (!pdata->backlights || pdata->num_backlights == 0)
414 return 0;
415
416 if (pdata->num_backlights > ARRAY_SIZE(lm3533_bl_devs))
417 pdata->num_backlights = ARRAY_SIZE(lm3533_bl_devs);
418
419 for (i = 0; i < pdata->num_backlights; ++i) {
420 lm3533_bl_devs[i].platform_data = &pdata->backlights[i];
421 lm3533_bl_devs[i].pdata_size = sizeof(pdata->backlights[i]);
422 }
423
424 ret = mfd_add_devices(lm3533->dev, 0, lm3533_bl_devs,
425 pdata->num_backlights, NULL, 0);
426 if (ret) {
427 dev_err(lm3533->dev, "failed to add backlight devices\n");
428 return ret;
429 }
430
431 lm3533->have_backlights = 1;
432
433 return 0;
434}
435
436static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
437{
438 struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
439 int i;
440 int ret;
441
442 if (!pdata->leds || pdata->num_leds == 0)
443 return 0;
444
445 if (pdata->num_leds > ARRAY_SIZE(lm3533_led_devs))
446 pdata->num_leds = ARRAY_SIZE(lm3533_led_devs);
447
448 for (i = 0; i < pdata->num_leds; ++i) {
449 lm3533_led_devs[i].platform_data = &pdata->leds[i];
450 lm3533_led_devs[i].pdata_size = sizeof(pdata->leds[i]);
451 }
452
453 ret = mfd_add_devices(lm3533->dev, 0, lm3533_led_devs,
454 pdata->num_leds, NULL, 0);
455 if (ret) {
456 dev_err(lm3533->dev, "failed to add LED devices\n");
457 return ret;
458 }
459
460 lm3533->have_leds = 1;
461
462 return 0;
463}
464
465static int __devinit lm3533_device_setup(struct lm3533 *lm3533,
466 struct lm3533_platform_data *pdata)
467{
468 int ret;
469
470 ret = lm3533_set_boost_freq(lm3533, pdata->boost_freq);
471 if (ret)
472 return ret;
473
474 ret = lm3533_set_boost_ovp(lm3533, pdata->boost_ovp);
475 if (ret)
476 return ret;
477
478 return 0;
479}
480
481static int __devinit lm3533_device_init(struct lm3533 *lm3533)
482{
483 struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
484 int ret;
485
486 dev_dbg(lm3533->dev, "%s\n", __func__);
487
488 if (!pdata) {
489 dev_err(lm3533->dev, "no platform data\n");
490 return -EINVAL;
491 }
492
493 lm3533->gpio_hwen = pdata->gpio_hwen;
494
495 dev_set_drvdata(lm3533->dev, lm3533);
496
497 if (gpio_is_valid(lm3533->gpio_hwen)) {
498 ret = gpio_request_one(lm3533->gpio_hwen, GPIOF_OUT_INIT_LOW,
499 "lm3533-hwen");
500 if (ret < 0) {
501 dev_err(lm3533->dev,
502 "failed to request HWEN GPIO %d\n",
503 lm3533->gpio_hwen);
504 return ret;
505 }
506 }
507
508 lm3533_enable(lm3533);
509
510 ret = lm3533_device_setup(lm3533, pdata);
511 if (ret)
512 goto err_disable;
513
514 lm3533_device_als_init(lm3533);
515 lm3533_device_bl_init(lm3533);
516 lm3533_device_led_init(lm3533);
517
518 ret = sysfs_create_group(&lm3533->dev->kobj, &lm3533_attribute_group);
519 if (ret < 0) {
520 dev_err(lm3533->dev, "failed to create sysfs attributes\n");
521 goto err_unregister;
522 }
523
524 return 0;
525
526err_unregister:
527 mfd_remove_devices(lm3533->dev);
528err_disable:
529 lm3533_disable(lm3533);
530 if (gpio_is_valid(lm3533->gpio_hwen))
531 gpio_free(lm3533->gpio_hwen);
532
533 return ret;
534}
535
536static void __devexit lm3533_device_exit(struct lm3533 *lm3533)
537{
538 dev_dbg(lm3533->dev, "%s\n", __func__);
539
540 sysfs_remove_group(&lm3533->dev->kobj, &lm3533_attribute_group);
541
542 mfd_remove_devices(lm3533->dev);
543 lm3533_disable(lm3533);
544 if (gpio_is_valid(lm3533->gpio_hwen))
545 gpio_free(lm3533->gpio_hwen);
546}
547
548static bool lm3533_readable_register(struct device *dev, unsigned int reg)
549{
550 switch (reg) {
551 case 0x10 ... 0x2c:
552 case 0x30 ... 0x38:
553 case 0x40 ... 0x45:
554 case 0x50 ... 0x57:
555 case 0x60 ... 0x6e:
556 case 0x70 ... 0x75:
557 case 0x80 ... 0x85:
558 case 0x90 ... 0x95:
559 case 0xa0 ... 0xa5:
560 case 0xb0 ... 0xb2:
561 return true;
562 default:
563 return false;
564 }
565}
566
567static bool lm3533_volatile_register(struct device *dev, unsigned int reg)
568{
569 switch (reg) {
570 case 0x34 ... 0x36: /* zone */
571 case 0x37 ... 0x38: /* adc */
572 case 0xb0 ... 0xb1: /* fault */
573 return true;
574 default:
575 return false;
576 }
577}
578
579static bool lm3533_precious_register(struct device *dev, unsigned int reg)
580{
581 switch (reg) {
582 case 0x34: /* zone */
583 return true;
584 default:
585 return false;
586 }
587}
588
589static struct regmap_config regmap_config = {
590 .reg_bits = 8,
591 .val_bits = 8,
592 .max_register = LM3533_REG_MAX,
593 .readable_reg = lm3533_readable_register,
594 .volatile_reg = lm3533_volatile_register,
595 .precious_reg = lm3533_precious_register,
596};
597
598static int __devinit lm3533_i2c_probe(struct i2c_client *i2c,
599 const struct i2c_device_id *id)
600{
601 struct lm3533 *lm3533;
602 int ret;
603
604 dev_dbg(&i2c->dev, "%s\n", __func__);
605
606 lm3533 = devm_kzalloc(&i2c->dev, sizeof(*lm3533), GFP_KERNEL);
607 if (!lm3533)
608 return -ENOMEM;
609
610 i2c_set_clientdata(i2c, lm3533);
611
612 lm3533->regmap = devm_regmap_init_i2c(i2c, &regmap_config);
613 if (IS_ERR(lm3533->regmap))
614 return PTR_ERR(lm3533->regmap);
615
616 lm3533->dev = &i2c->dev;
617 lm3533->irq = i2c->irq;
618
619 ret = lm3533_device_init(lm3533);
620 if (ret)
621 return ret;
622
623 return 0;
624}
625
626static int __devexit lm3533_i2c_remove(struct i2c_client *i2c)
627{
628 struct lm3533 *lm3533 = i2c_get_clientdata(i2c);
629
630 dev_dbg(&i2c->dev, "%s\n", __func__);
631
632 lm3533_device_exit(lm3533);
633
634 return 0;
635}
636
637static const struct i2c_device_id lm3533_i2c_ids[] = {
638 { "lm3533", 0 },
639 { },
640};
641MODULE_DEVICE_TABLE(i2c, lm3533_i2c_ids);
642
643static struct i2c_driver lm3533_i2c_driver = {
644 .driver = {
645 .name = "lm3533",
646 .owner = THIS_MODULE,
647 },
648 .id_table = lm3533_i2c_ids,
649 .probe = lm3533_i2c_probe,
650 .remove = __devexit_p(lm3533_i2c_remove),
651};
652
653static int __init lm3533_i2c_init(void)
654{
655 return i2c_add_driver(&lm3533_i2c_driver);
656}
657subsys_initcall(lm3533_i2c_init);
658
659static void __exit lm3533_i2c_exit(void)
660{
661 i2c_del_driver(&lm3533_i2c_driver);
662}
663module_exit(lm3533_i2c_exit);
664
665MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
666MODULE_DESCRIPTION("LM3533 Core");
667MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/lm3533-ctrlbank.c b/drivers/mfd/lm3533-ctrlbank.c
new file mode 100644
index 000000000000..a4cb7a5220a7
--- /dev/null
+++ b/drivers/mfd/lm3533-ctrlbank.c
@@ -0,0 +1,148 @@
1/*
2 * lm3533-ctrlbank.c -- LM3533 Generic Control Bank interface
3 *
4 * Copyright (C) 2011-2012 Texas Instruments
5 *
6 * Author: Johan Hovold <jhovold@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/device.h>
15#include <linux/module.h>
16
17#include <linux/mfd/lm3533.h>
18
19
20#define LM3533_MAX_CURRENT_MIN 5000
21#define LM3533_MAX_CURRENT_MAX 29800
22#define LM3533_MAX_CURRENT_STEP 800
23
24#define LM3533_BRIGHTNESS_MAX 255
25#define LM3533_PWM_MAX 0x3f
26
27#define LM3533_REG_PWM_BASE 0x14
28#define LM3533_REG_MAX_CURRENT_BASE 0x1f
29#define LM3533_REG_CTRLBANK_ENABLE 0x27
30#define LM3533_REG_BRIGHTNESS_BASE 0x40
31
32
33static inline u8 lm3533_ctrlbank_get_reg(struct lm3533_ctrlbank *cb, u8 base)
34{
35 return base + cb->id;
36}
37
38int lm3533_ctrlbank_enable(struct lm3533_ctrlbank *cb)
39{
40 u8 mask;
41 int ret;
42
43 dev_dbg(cb->dev, "%s - %d\n", __func__, cb->id);
44
45 mask = 1 << cb->id;
46 ret = lm3533_update(cb->lm3533, LM3533_REG_CTRLBANK_ENABLE,
47 mask, mask);
48 if (ret)
49 dev_err(cb->dev, "failed to enable ctrlbank %d\n", cb->id);
50
51 return ret;
52}
53EXPORT_SYMBOL_GPL(lm3533_ctrlbank_enable);
54
55int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb)
56{
57 u8 mask;
58 int ret;
59
60 dev_dbg(cb->dev, "%s - %d\n", __func__, cb->id);
61
62 mask = 1 << cb->id;
63 ret = lm3533_update(cb->lm3533, LM3533_REG_CTRLBANK_ENABLE, 0, mask);
64 if (ret)
65 dev_err(cb->dev, "failed to disable ctrlbank %d\n", cb->id);
66
67 return ret;
68}
69EXPORT_SYMBOL_GPL(lm3533_ctrlbank_disable);
70
71/*
72 * Full-scale current.
73 *
74 * imax 5000 - 29800 uA (800 uA step)
75 */
76int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u16 imax)
77{
78 u8 reg;
79 u8 val;
80 int ret;
81
82 if (imax < LM3533_MAX_CURRENT_MIN || imax > LM3533_MAX_CURRENT_MAX)
83 return -EINVAL;
84
85 val = (imax - LM3533_MAX_CURRENT_MIN) / LM3533_MAX_CURRENT_STEP;
86
87 reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_MAX_CURRENT_BASE);
88 ret = lm3533_write(cb->lm3533, reg, val);
89 if (ret)
90 dev_err(cb->dev, "failed to set max current\n");
91
92 return ret;
93}
94EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_max_current);
95
96#define lm3533_ctrlbank_set(_name, _NAME) \
97int lm3533_ctrlbank_set_##_name(struct lm3533_ctrlbank *cb, u8 val) \
98{ \
99 u8 reg; \
100 int ret; \
101 \
102 if (val > LM3533_##_NAME##_MAX) \
103 return -EINVAL; \
104 \
105 reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE); \
106 ret = lm3533_write(cb->lm3533, reg, val); \
107 if (ret) \
108 dev_err(cb->dev, "failed to set " #_name "\n"); \
109 \
110 return ret; \
111} \
112EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_##_name);
113
114#define lm3533_ctrlbank_get(_name, _NAME) \
115int lm3533_ctrlbank_get_##_name(struct lm3533_ctrlbank *cb, u8 *val) \
116{ \
117 u8 reg; \
118 int ret; \
119 \
120 reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE); \
121 ret = lm3533_read(cb->lm3533, reg, val); \
122 if (ret) \
123 dev_err(cb->dev, "failed to get " #_name "\n"); \
124 \
125 return ret; \
126} \
127EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_##_name);
128
129lm3533_ctrlbank_set(brightness, BRIGHTNESS);
130lm3533_ctrlbank_get(brightness, BRIGHTNESS);
131
132/*
133 * PWM-input control mask:
134 *
135 * bit 5 - PWM-input enabled in Zone 4
136 * bit 4 - PWM-input enabled in Zone 3
137 * bit 3 - PWM-input enabled in Zone 2
138 * bit 2 - PWM-input enabled in Zone 1
139 * bit 1 - PWM-input enabled in Zone 0
140 * bit 0 - PWM-input enabled
141 */
142lm3533_ctrlbank_set(pwm, PWM);
143lm3533_ctrlbank_get(pwm, PWM);
144
145
146MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
147MODULE_DESCRIPTION("LM3533 Control Bank interface");
148MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
new file mode 100644
index 000000000000..027cc8f86132
--- /dev/null
+++ b/drivers/mfd/lpc_ich.c
@@ -0,0 +1,888 @@
1/*
2 * lpc_ich.c - LPC interface for Intel ICH
3 *
4 * LPC bridge function of the Intel ICH contains many other
5 * functional units, such as Interrupt controllers, Timers,
6 * Power Management, System Management, GPIO, RTC, and LPC
7 * Configuration Registers.
8 *
9 * This driver is derived from lpc_sch.
10
11 * Copyright (c) 2011 Extreme Engineering Solution, Inc.
12 * Author: Aaron Sierra <asierra@xes-inc.com>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License 2 as published
16 * by the Free Software Foundation.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; see the file COPYING. If not, write to
25 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26 *
27 * This driver supports the following I/O Controller hubs:
28 * (See the intel documentation on http://developer.intel.com.)
29 * document number 290655-003, 290677-014: 82801AA (ICH), 82801AB (ICHO)
30 * document number 290687-002, 298242-027: 82801BA (ICH2)
31 * document number 290733-003, 290739-013: 82801CA (ICH3-S)
32 * document number 290716-001, 290718-007: 82801CAM (ICH3-M)
33 * document number 290744-001, 290745-025: 82801DB (ICH4)
34 * document number 252337-001, 252663-008: 82801DBM (ICH4-M)
35 * document number 273599-001, 273645-002: 82801E (C-ICH)
36 * document number 252516-001, 252517-028: 82801EB (ICH5), 82801ER (ICH5R)
37 * document number 300641-004, 300884-013: 6300ESB
38 * document number 301473-002, 301474-026: 82801F (ICH6)
39 * document number 313082-001, 313075-006: 631xESB, 632xESB
40 * document number 307013-003, 307014-024: 82801G (ICH7)
41 * document number 322896-001, 322897-001: NM10
42 * document number 313056-003, 313057-017: 82801H (ICH8)
43 * document number 316972-004, 316973-012: 82801I (ICH9)
44 * document number 319973-002, 319974-002: 82801J (ICH10)
45 * document number 322169-001, 322170-003: 5 Series, 3400 Series (PCH)
46 * document number 320066-003, 320257-008: EP80597 (IICH)
47 * document number 324645-001, 324646-001: Cougar Point (CPT)
48 * document number TBD : Patsburg (PBG)
49 * document number TBD : DH89xxCC
50 * document number TBD : Panther Point
51 * document number TBD : Lynx Point
52 */
53
54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56#include <linux/init.h>
57#include <linux/kernel.h>
58#include <linux/module.h>
59#include <linux/errno.h>
60#include <linux/acpi.h>
61#include <linux/pci.h>
62#include <linux/mfd/core.h>
63#include <linux/mfd/lpc_ich.h>
64
65#define ACPIBASE 0x40
66#define ACPIBASE_GPE_OFF 0x28
67#define ACPIBASE_GPE_END 0x2f
68#define ACPIBASE_SMI_OFF 0x30
69#define ACPIBASE_SMI_END 0x33
70#define ACPIBASE_TCO_OFF 0x60
71#define ACPIBASE_TCO_END 0x7f
72#define ACPICTRL 0x44
73
74#define ACPIBASE_GCS_OFF 0x3410
75#define ACPIBASE_GCS_END 0x3414
76
77#define GPIOBASE 0x48
78#define GPIOCTRL 0x4C
79
80#define RCBABASE 0xf0
81
82#define wdt_io_res(i) wdt_res(0, i)
83#define wdt_mem_res(i) wdt_res(ICH_RES_MEM_OFF, i)
84#define wdt_res(b, i) (&wdt_ich_res[(b) + (i)])
85
86static int lpc_ich_acpi_save = -1;
87static int lpc_ich_gpio_save = -1;
88
89static struct resource wdt_ich_res[] = {
90 /* ACPI - TCO */
91 {
92 .flags = IORESOURCE_IO,
93 },
94 /* ACPI - SMI */
95 {
96 .flags = IORESOURCE_IO,
97 },
98 /* GCS */
99 {
100 .flags = IORESOURCE_MEM,
101 },
102};
103
104static struct resource gpio_ich_res[] = {
105 /* GPIO */
106 {
107 .flags = IORESOURCE_IO,
108 },
109 /* ACPI - GPE0 */
110 {
111 .flags = IORESOURCE_IO,
112 },
113};
114
115enum lpc_cells {
116 LPC_WDT = 0,
117 LPC_GPIO,
118};
119
120static struct mfd_cell lpc_ich_cells[] = {
121 [LPC_WDT] = {
122 .name = "iTCO_wdt",
123 .num_resources = ARRAY_SIZE(wdt_ich_res),
124 .resources = wdt_ich_res,
125 .ignore_resource_conflicts = true,
126 },
127 [LPC_GPIO] = {
128 .name = "gpio_ich",
129 .num_resources = ARRAY_SIZE(gpio_ich_res),
130 .resources = gpio_ich_res,
131 .ignore_resource_conflicts = true,
132 },
133};
134
135/* chipset related info */
136enum lpc_chipsets {
137 LPC_ICH = 0, /* ICH */
138 LPC_ICH0, /* ICH0 */
139 LPC_ICH2, /* ICH2 */
140 LPC_ICH2M, /* ICH2-M */
141 LPC_ICH3, /* ICH3-S */
142 LPC_ICH3M, /* ICH3-M */
143 LPC_ICH4, /* ICH4 */
144 LPC_ICH4M, /* ICH4-M */
145 LPC_CICH, /* C-ICH */
146 LPC_ICH5, /* ICH5 & ICH5R */
147 LPC_6300ESB, /* 6300ESB */
148 LPC_ICH6, /* ICH6 & ICH6R */
149 LPC_ICH6M, /* ICH6-M */
150 LPC_ICH6W, /* ICH6W & ICH6RW */
151 LPC_631XESB, /* 631xESB/632xESB */
152 LPC_ICH7, /* ICH7 & ICH7R */
153 LPC_ICH7DH, /* ICH7DH */
154 LPC_ICH7M, /* ICH7-M & ICH7-U */
155 LPC_ICH7MDH, /* ICH7-M DH */
156 LPC_NM10, /* NM10 */
157 LPC_ICH8, /* ICH8 & ICH8R */
158 LPC_ICH8DH, /* ICH8DH */
159 LPC_ICH8DO, /* ICH8DO */
160 LPC_ICH8M, /* ICH8M */
161 LPC_ICH8ME, /* ICH8M-E */
162 LPC_ICH9, /* ICH9 */
163 LPC_ICH9R, /* ICH9R */
164 LPC_ICH9DH, /* ICH9DH */
165 LPC_ICH9DO, /* ICH9DO */
166 LPC_ICH9M, /* ICH9M */
167 LPC_ICH9ME, /* ICH9M-E */
168 LPC_ICH10, /* ICH10 */
169 LPC_ICH10R, /* ICH10R */
170 LPC_ICH10D, /* ICH10D */
171 LPC_ICH10DO, /* ICH10DO */
172 LPC_PCH, /* PCH Desktop Full Featured */
173 LPC_PCHM, /* PCH Mobile Full Featured */
174 LPC_P55, /* P55 */
175 LPC_PM55, /* PM55 */
176 LPC_H55, /* H55 */
177 LPC_QM57, /* QM57 */
178 LPC_H57, /* H57 */
179 LPC_HM55, /* HM55 */
180 LPC_Q57, /* Q57 */
181 LPC_HM57, /* HM57 */
182 LPC_PCHMSFF, /* PCH Mobile SFF Full Featured */
183 LPC_QS57, /* QS57 */
184 LPC_3400, /* 3400 */
185 LPC_3420, /* 3420 */
186 LPC_3450, /* 3450 */
187 LPC_EP80579, /* EP80579 */
188 LPC_CPT, /* Cougar Point */
189 LPC_CPTD, /* Cougar Point Desktop */
190 LPC_CPTM, /* Cougar Point Mobile */
191 LPC_PBG, /* Patsburg */
192 LPC_DH89XXCC, /* DH89xxCC */
193 LPC_PPT, /* Panther Point */
194 LPC_LPT, /* Lynx Point */
195};
196
197struct lpc_ich_info lpc_chipset_info[] __devinitdata = {
198 [LPC_ICH] = {
199 .name = "ICH",
200 .iTCO_version = 1,
201 },
202 [LPC_ICH0] = {
203 .name = "ICH0",
204 .iTCO_version = 1,
205 },
206 [LPC_ICH2] = {
207 .name = "ICH2",
208 .iTCO_version = 1,
209 },
210 [LPC_ICH2M] = {
211 .name = "ICH2-M",
212 .iTCO_version = 1,
213 },
214 [LPC_ICH3] = {
215 .name = "ICH3-S",
216 .iTCO_version = 1,
217 },
218 [LPC_ICH3M] = {
219 .name = "ICH3-M",
220 .iTCO_version = 1,
221 },
222 [LPC_ICH4] = {
223 .name = "ICH4",
224 .iTCO_version = 1,
225 },
226 [LPC_ICH4M] = {
227 .name = "ICH4-M",
228 .iTCO_version = 1,
229 },
230 [LPC_CICH] = {
231 .name = "C-ICH",
232 .iTCO_version = 1,
233 },
234 [LPC_ICH5] = {
235 .name = "ICH5 or ICH5R",
236 .iTCO_version = 1,
237 },
238 [LPC_6300ESB] = {
239 .name = "6300ESB",
240 .iTCO_version = 1,
241 },
242 [LPC_ICH6] = {
243 .name = "ICH6 or ICH6R",
244 .iTCO_version = 2,
245 .gpio_version = ICH_V6_GPIO,
246 },
247 [LPC_ICH6M] = {
248 .name = "ICH6-M",
249 .iTCO_version = 2,
250 .gpio_version = ICH_V6_GPIO,
251 },
252 [LPC_ICH6W] = {
253 .name = "ICH6W or ICH6RW",
254 .iTCO_version = 2,
255 .gpio_version = ICH_V6_GPIO,
256 },
257 [LPC_631XESB] = {
258 .name = "631xESB/632xESB",
259 .iTCO_version = 2,
260 .gpio_version = ICH_V6_GPIO,
261 },
262 [LPC_ICH7] = {
263 .name = "ICH7 or ICH7R",
264 .iTCO_version = 2,
265 .gpio_version = ICH_V7_GPIO,
266 },
267 [LPC_ICH7DH] = {
268 .name = "ICH7DH",
269 .iTCO_version = 2,
270 .gpio_version = ICH_V7_GPIO,
271 },
272 [LPC_ICH7M] = {
273 .name = "ICH7-M or ICH7-U",
274 .iTCO_version = 2,
275 .gpio_version = ICH_V7_GPIO,
276 },
277 [LPC_ICH7MDH] = {
278 .name = "ICH7-M DH",
279 .iTCO_version = 2,
280 .gpio_version = ICH_V7_GPIO,
281 },
282 [LPC_NM10] = {
283 .name = "NM10",
284 .iTCO_version = 2,
285 },
286 [LPC_ICH8] = {
287 .name = "ICH8 or ICH8R",
288 .iTCO_version = 2,
289 .gpio_version = ICH_V7_GPIO,
290 },
291 [LPC_ICH8DH] = {
292 .name = "ICH8DH",
293 .iTCO_version = 2,
294 .gpio_version = ICH_V7_GPIO,
295 },
296 [LPC_ICH8DO] = {
297 .name = "ICH8DO",
298 .iTCO_version = 2,
299 .gpio_version = ICH_V7_GPIO,
300 },
301 [LPC_ICH8M] = {
302 .name = "ICH8M",
303 .iTCO_version = 2,
304 .gpio_version = ICH_V7_GPIO,
305 },
306 [LPC_ICH8ME] = {
307 .name = "ICH8M-E",
308 .iTCO_version = 2,
309 .gpio_version = ICH_V7_GPIO,
310 },
311 [LPC_ICH9] = {
312 .name = "ICH9",
313 .iTCO_version = 2,
314 .gpio_version = ICH_V9_GPIO,
315 },
316 [LPC_ICH9R] = {
317 .name = "ICH9R",
318 .iTCO_version = 2,
319 .gpio_version = ICH_V9_GPIO,
320 },
321 [LPC_ICH9DH] = {
322 .name = "ICH9DH",
323 .iTCO_version = 2,
324 .gpio_version = ICH_V9_GPIO,
325 },
326 [LPC_ICH9DO] = {
327 .name = "ICH9DO",
328 .iTCO_version = 2,
329 .gpio_version = ICH_V9_GPIO,
330 },
331 [LPC_ICH9M] = {
332 .name = "ICH9M",
333 .iTCO_version = 2,
334 .gpio_version = ICH_V9_GPIO,
335 },
336 [LPC_ICH9ME] = {
337 .name = "ICH9M-E",
338 .iTCO_version = 2,
339 .gpio_version = ICH_V9_GPIO,
340 },
341 [LPC_ICH10] = {
342 .name = "ICH10",
343 .iTCO_version = 2,
344 .gpio_version = ICH_V10CONS_GPIO,
345 },
346 [LPC_ICH10R] = {
347 .name = "ICH10R",
348 .iTCO_version = 2,
349 .gpio_version = ICH_V10CONS_GPIO,
350 },
351 [LPC_ICH10D] = {
352 .name = "ICH10D",
353 .iTCO_version = 2,
354 .gpio_version = ICH_V10CORP_GPIO,
355 },
356 [LPC_ICH10DO] = {
357 .name = "ICH10DO",
358 .iTCO_version = 2,
359 .gpio_version = ICH_V10CORP_GPIO,
360 },
361 [LPC_PCH] = {
362 .name = "PCH Desktop Full Featured",
363 .iTCO_version = 2,
364 .gpio_version = ICH_V5_GPIO,
365 },
366 [LPC_PCHM] = {
367 .name = "PCH Mobile Full Featured",
368 .iTCO_version = 2,
369 .gpio_version = ICH_V5_GPIO,
370 },
371 [LPC_P55] = {
372 .name = "P55",
373 .iTCO_version = 2,
374 .gpio_version = ICH_V5_GPIO,
375 },
376 [LPC_PM55] = {
377 .name = "PM55",
378 .iTCO_version = 2,
379 .gpio_version = ICH_V5_GPIO,
380 },
381 [LPC_H55] = {
382 .name = "H55",
383 .iTCO_version = 2,
384 .gpio_version = ICH_V5_GPIO,
385 },
386 [LPC_QM57] = {
387 .name = "QM57",
388 .iTCO_version = 2,
389 .gpio_version = ICH_V5_GPIO,
390 },
391 [LPC_H57] = {
392 .name = "H57",
393 .iTCO_version = 2,
394 .gpio_version = ICH_V5_GPIO,
395 },
396 [LPC_HM55] = {
397 .name = "HM55",
398 .iTCO_version = 2,
399 .gpio_version = ICH_V5_GPIO,
400 },
401 [LPC_Q57] = {
402 .name = "Q57",
403 .iTCO_version = 2,
404 .gpio_version = ICH_V5_GPIO,
405 },
406 [LPC_HM57] = {
407 .name = "HM57",
408 .iTCO_version = 2,
409 .gpio_version = ICH_V5_GPIO,
410 },
411 [LPC_PCHMSFF] = {
412 .name = "PCH Mobile SFF Full Featured",
413 .iTCO_version = 2,
414 .gpio_version = ICH_V5_GPIO,
415 },
416 [LPC_QS57] = {
417 .name = "QS57",
418 .iTCO_version = 2,
419 .gpio_version = ICH_V5_GPIO,
420 },
421 [LPC_3400] = {
422 .name = "3400",
423 .iTCO_version = 2,
424 .gpio_version = ICH_V5_GPIO,
425 },
426 [LPC_3420] = {
427 .name = "3420",
428 .iTCO_version = 2,
429 .gpio_version = ICH_V5_GPIO,
430 },
431 [LPC_3450] = {
432 .name = "3450",
433 .iTCO_version = 2,
434 .gpio_version = ICH_V5_GPIO,
435 },
436 [LPC_EP80579] = {
437 .name = "EP80579",
438 .iTCO_version = 2,
439 },
440 [LPC_CPT] = {
441 .name = "Cougar Point",
442 .iTCO_version = 2,
443 .gpio_version = ICH_V5_GPIO,
444 },
445 [LPC_CPTD] = {
446 .name = "Cougar Point Desktop",
447 .iTCO_version = 2,
448 .gpio_version = ICH_V5_GPIO,
449 },
450 [LPC_CPTM] = {
451 .name = "Cougar Point Mobile",
452 .iTCO_version = 2,
453 .gpio_version = ICH_V5_GPIO,
454 },
455 [LPC_PBG] = {
456 .name = "Patsburg",
457 .iTCO_version = 2,
458 },
459 [LPC_DH89XXCC] = {
460 .name = "DH89xxCC",
461 .iTCO_version = 2,
462 },
463 [LPC_PPT] = {
464 .name = "Panther Point",
465 .iTCO_version = 2,
466 },
467 [LPC_LPT] = {
468 .name = "Lynx Point",
469 .iTCO_version = 2,
470 },
471};
472
473/*
474 * This data only exists for exporting the supported PCI ids
475 * via MODULE_DEVICE_TABLE. We do not actually register a
476 * pci_driver, because the I/O Controller Hub has also other
477 * functions that probably will be registered by other drivers.
478 */
479static DEFINE_PCI_DEVICE_TABLE(lpc_ich_ids) = {
480 { PCI_VDEVICE(INTEL, 0x2410), LPC_ICH},
481 { PCI_VDEVICE(INTEL, 0x2420), LPC_ICH0},
482 { PCI_VDEVICE(INTEL, 0x2440), LPC_ICH2},
483 { PCI_VDEVICE(INTEL, 0x244c), LPC_ICH2M},
484 { PCI_VDEVICE(INTEL, 0x2480), LPC_ICH3},
485 { PCI_VDEVICE(INTEL, 0x248c), LPC_ICH3M},
486 { PCI_VDEVICE(INTEL, 0x24c0), LPC_ICH4},
487 { PCI_VDEVICE(INTEL, 0x24cc), LPC_ICH4M},
488 { PCI_VDEVICE(INTEL, 0x2450), LPC_CICH},
489 { PCI_VDEVICE(INTEL, 0x24d0), LPC_ICH5},
490 { PCI_VDEVICE(INTEL, 0x25a1), LPC_6300ESB},
491 { PCI_VDEVICE(INTEL, 0x2640), LPC_ICH6},
492 { PCI_VDEVICE(INTEL, 0x2641), LPC_ICH6M},
493 { PCI_VDEVICE(INTEL, 0x2642), LPC_ICH6W},
494 { PCI_VDEVICE(INTEL, 0x2670), LPC_631XESB},
495 { PCI_VDEVICE(INTEL, 0x2671), LPC_631XESB},
496 { PCI_VDEVICE(INTEL, 0x2672), LPC_631XESB},
497 { PCI_VDEVICE(INTEL, 0x2673), LPC_631XESB},
498 { PCI_VDEVICE(INTEL, 0x2674), LPC_631XESB},
499 { PCI_VDEVICE(INTEL, 0x2675), LPC_631XESB},
500 { PCI_VDEVICE(INTEL, 0x2676), LPC_631XESB},
501 { PCI_VDEVICE(INTEL, 0x2677), LPC_631XESB},
502 { PCI_VDEVICE(INTEL, 0x2678), LPC_631XESB},
503 { PCI_VDEVICE(INTEL, 0x2679), LPC_631XESB},
504 { PCI_VDEVICE(INTEL, 0x267a), LPC_631XESB},
505 { PCI_VDEVICE(INTEL, 0x267b), LPC_631XESB},
506 { PCI_VDEVICE(INTEL, 0x267c), LPC_631XESB},
507 { PCI_VDEVICE(INTEL, 0x267d), LPC_631XESB},
508 { PCI_VDEVICE(INTEL, 0x267e), LPC_631XESB},
509 { PCI_VDEVICE(INTEL, 0x267f), LPC_631XESB},
510 { PCI_VDEVICE(INTEL, 0x27b8), LPC_ICH7},
511 { PCI_VDEVICE(INTEL, 0x27b0), LPC_ICH7DH},
512 { PCI_VDEVICE(INTEL, 0x27b9), LPC_ICH7M},
513 { PCI_VDEVICE(INTEL, 0x27bd), LPC_ICH7MDH},
514 { PCI_VDEVICE(INTEL, 0x27bc), LPC_NM10},
515 { PCI_VDEVICE(INTEL, 0x2810), LPC_ICH8},
516 { PCI_VDEVICE(INTEL, 0x2812), LPC_ICH8DH},
517 { PCI_VDEVICE(INTEL, 0x2814), LPC_ICH8DO},
518 { PCI_VDEVICE(INTEL, 0x2815), LPC_ICH8M},
519 { PCI_VDEVICE(INTEL, 0x2811), LPC_ICH8ME},
520 { PCI_VDEVICE(INTEL, 0x2918), LPC_ICH9},
521 { PCI_VDEVICE(INTEL, 0x2916), LPC_ICH9R},
522 { PCI_VDEVICE(INTEL, 0x2912), LPC_ICH9DH},
523 { PCI_VDEVICE(INTEL, 0x2914), LPC_ICH9DO},
524 { PCI_VDEVICE(INTEL, 0x2919), LPC_ICH9M},
525 { PCI_VDEVICE(INTEL, 0x2917), LPC_ICH9ME},
526 { PCI_VDEVICE(INTEL, 0x3a18), LPC_ICH10},
527 { PCI_VDEVICE(INTEL, 0x3a16), LPC_ICH10R},
528 { PCI_VDEVICE(INTEL, 0x3a1a), LPC_ICH10D},
529 { PCI_VDEVICE(INTEL, 0x3a14), LPC_ICH10DO},
530 { PCI_VDEVICE(INTEL, 0x3b00), LPC_PCH},
531 { PCI_VDEVICE(INTEL, 0x3b01), LPC_PCHM},
532 { PCI_VDEVICE(INTEL, 0x3b02), LPC_P55},
533 { PCI_VDEVICE(INTEL, 0x3b03), LPC_PM55},
534 { PCI_VDEVICE(INTEL, 0x3b06), LPC_H55},
535 { PCI_VDEVICE(INTEL, 0x3b07), LPC_QM57},
536 { PCI_VDEVICE(INTEL, 0x3b08), LPC_H57},
537 { PCI_VDEVICE(INTEL, 0x3b09), LPC_HM55},
538 { PCI_VDEVICE(INTEL, 0x3b0a), LPC_Q57},
539 { PCI_VDEVICE(INTEL, 0x3b0b), LPC_HM57},
540 { PCI_VDEVICE(INTEL, 0x3b0d), LPC_PCHMSFF},
541 { PCI_VDEVICE(INTEL, 0x3b0f), LPC_QS57},
542 { PCI_VDEVICE(INTEL, 0x3b12), LPC_3400},
543 { PCI_VDEVICE(INTEL, 0x3b14), LPC_3420},
544 { PCI_VDEVICE(INTEL, 0x3b16), LPC_3450},
545 { PCI_VDEVICE(INTEL, 0x5031), LPC_EP80579},
546 { PCI_VDEVICE(INTEL, 0x1c41), LPC_CPT},
547 { PCI_VDEVICE(INTEL, 0x1c42), LPC_CPTD},
548 { PCI_VDEVICE(INTEL, 0x1c43), LPC_CPTM},
549 { PCI_VDEVICE(INTEL, 0x1c44), LPC_CPT},
550 { PCI_VDEVICE(INTEL, 0x1c45), LPC_CPT},
551 { PCI_VDEVICE(INTEL, 0x1c46), LPC_CPT},
552 { PCI_VDEVICE(INTEL, 0x1c47), LPC_CPT},
553 { PCI_VDEVICE(INTEL, 0x1c48), LPC_CPT},
554 { PCI_VDEVICE(INTEL, 0x1c49), LPC_CPT},
555 { PCI_VDEVICE(INTEL, 0x1c4a), LPC_CPT},
556 { PCI_VDEVICE(INTEL, 0x1c4b), LPC_CPT},
557 { PCI_VDEVICE(INTEL, 0x1c4c), LPC_CPT},
558 { PCI_VDEVICE(INTEL, 0x1c4d), LPC_CPT},
559 { PCI_VDEVICE(INTEL, 0x1c4e), LPC_CPT},
560 { PCI_VDEVICE(INTEL, 0x1c4f), LPC_CPT},
561 { PCI_VDEVICE(INTEL, 0x1c50), LPC_CPT},
562 { PCI_VDEVICE(INTEL, 0x1c51), LPC_CPT},
563 { PCI_VDEVICE(INTEL, 0x1c52), LPC_CPT},
564 { PCI_VDEVICE(INTEL, 0x1c53), LPC_CPT},
565 { PCI_VDEVICE(INTEL, 0x1c54), LPC_CPT},
566 { PCI_VDEVICE(INTEL, 0x1c55), LPC_CPT},
567 { PCI_VDEVICE(INTEL, 0x1c56), LPC_CPT},
568 { PCI_VDEVICE(INTEL, 0x1c57), LPC_CPT},
569 { PCI_VDEVICE(INTEL, 0x1c58), LPC_CPT},
570 { PCI_VDEVICE(INTEL, 0x1c59), LPC_CPT},
571 { PCI_VDEVICE(INTEL, 0x1c5a), LPC_CPT},
572 { PCI_VDEVICE(INTEL, 0x1c5b), LPC_CPT},
573 { PCI_VDEVICE(INTEL, 0x1c5c), LPC_CPT},
574 { PCI_VDEVICE(INTEL, 0x1c5d), LPC_CPT},
575 { PCI_VDEVICE(INTEL, 0x1c5e), LPC_CPT},
576 { PCI_VDEVICE(INTEL, 0x1c5f), LPC_CPT},
577 { PCI_VDEVICE(INTEL, 0x1d40), LPC_PBG},
578 { PCI_VDEVICE(INTEL, 0x1d41), LPC_PBG},
579 { PCI_VDEVICE(INTEL, 0x2310), LPC_DH89XXCC},
580 { PCI_VDEVICE(INTEL, 0x1e40), LPC_PPT},
581 { PCI_VDEVICE(INTEL, 0x1e41), LPC_PPT},
582 { PCI_VDEVICE(INTEL, 0x1e42), LPC_PPT},
583 { PCI_VDEVICE(INTEL, 0x1e43), LPC_PPT},
584 { PCI_VDEVICE(INTEL, 0x1e44), LPC_PPT},
585 { PCI_VDEVICE(INTEL, 0x1e45), LPC_PPT},
586 { PCI_VDEVICE(INTEL, 0x1e46), LPC_PPT},
587 { PCI_VDEVICE(INTEL, 0x1e47), LPC_PPT},
588 { PCI_VDEVICE(INTEL, 0x1e48), LPC_PPT},
589 { PCI_VDEVICE(INTEL, 0x1e49), LPC_PPT},
590 { PCI_VDEVICE(INTEL, 0x1e4a), LPC_PPT},
591 { PCI_VDEVICE(INTEL, 0x1e4b), LPC_PPT},
592 { PCI_VDEVICE(INTEL, 0x1e4c), LPC_PPT},
593 { PCI_VDEVICE(INTEL, 0x1e4d), LPC_PPT},
594 { PCI_VDEVICE(INTEL, 0x1e4e), LPC_PPT},
595 { PCI_VDEVICE(INTEL, 0x1e4f), LPC_PPT},
596 { PCI_VDEVICE(INTEL, 0x1e50), LPC_PPT},
597 { PCI_VDEVICE(INTEL, 0x1e51), LPC_PPT},
598 { PCI_VDEVICE(INTEL, 0x1e52), LPC_PPT},
599 { PCI_VDEVICE(INTEL, 0x1e53), LPC_PPT},
600 { PCI_VDEVICE(INTEL, 0x1e54), LPC_PPT},
601 { PCI_VDEVICE(INTEL, 0x1e55), LPC_PPT},
602 { PCI_VDEVICE(INTEL, 0x1e56), LPC_PPT},
603 { PCI_VDEVICE(INTEL, 0x1e57), LPC_PPT},
604 { PCI_VDEVICE(INTEL, 0x1e58), LPC_PPT},
605 { PCI_VDEVICE(INTEL, 0x1e59), LPC_PPT},
606 { PCI_VDEVICE(INTEL, 0x1e5a), LPC_PPT},
607 { PCI_VDEVICE(INTEL, 0x1e5b), LPC_PPT},
608 { PCI_VDEVICE(INTEL, 0x1e5c), LPC_PPT},
609 { PCI_VDEVICE(INTEL, 0x1e5d), LPC_PPT},
610 { PCI_VDEVICE(INTEL, 0x1e5e), LPC_PPT},
611 { PCI_VDEVICE(INTEL, 0x1e5f), LPC_PPT},
612 { PCI_VDEVICE(INTEL, 0x8c40), LPC_LPT},
613 { PCI_VDEVICE(INTEL, 0x8c41), LPC_LPT},
614 { PCI_VDEVICE(INTEL, 0x8c42), LPC_LPT},
615 { PCI_VDEVICE(INTEL, 0x8c43), LPC_LPT},
616 { PCI_VDEVICE(INTEL, 0x8c44), LPC_LPT},
617 { PCI_VDEVICE(INTEL, 0x8c45), LPC_LPT},
618 { PCI_VDEVICE(INTEL, 0x8c46), LPC_LPT},
619 { PCI_VDEVICE(INTEL, 0x8c47), LPC_LPT},
620 { PCI_VDEVICE(INTEL, 0x8c48), LPC_LPT},
621 { PCI_VDEVICE(INTEL, 0x8c49), LPC_LPT},
622 { PCI_VDEVICE(INTEL, 0x8c4a), LPC_LPT},
623 { PCI_VDEVICE(INTEL, 0x8c4b), LPC_LPT},
624 { PCI_VDEVICE(INTEL, 0x8c4c), LPC_LPT},
625 { PCI_VDEVICE(INTEL, 0x8c4d), LPC_LPT},
626 { PCI_VDEVICE(INTEL, 0x8c4e), LPC_LPT},
627 { PCI_VDEVICE(INTEL, 0x8c4f), LPC_LPT},
628 { PCI_VDEVICE(INTEL, 0x8c50), LPC_LPT},
629 { PCI_VDEVICE(INTEL, 0x8c51), LPC_LPT},
630 { PCI_VDEVICE(INTEL, 0x8c52), LPC_LPT},
631 { PCI_VDEVICE(INTEL, 0x8c53), LPC_LPT},
632 { PCI_VDEVICE(INTEL, 0x8c54), LPC_LPT},
633 { PCI_VDEVICE(INTEL, 0x8c55), LPC_LPT},
634 { PCI_VDEVICE(INTEL, 0x8c56), LPC_LPT},
635 { PCI_VDEVICE(INTEL, 0x8c57), LPC_LPT},
636 { PCI_VDEVICE(INTEL, 0x8c58), LPC_LPT},
637 { PCI_VDEVICE(INTEL, 0x8c59), LPC_LPT},
638 { PCI_VDEVICE(INTEL, 0x8c5a), LPC_LPT},
639 { PCI_VDEVICE(INTEL, 0x8c5b), LPC_LPT},
640 { PCI_VDEVICE(INTEL, 0x8c5c), LPC_LPT},
641 { PCI_VDEVICE(INTEL, 0x8c5d), LPC_LPT},
642 { PCI_VDEVICE(INTEL, 0x8c5e), LPC_LPT},
643 { PCI_VDEVICE(INTEL, 0x8c5f), LPC_LPT},
644 { 0, }, /* End of list */
645};
646MODULE_DEVICE_TABLE(pci, lpc_ich_ids);
647
648static void lpc_ich_restore_config_space(struct pci_dev *dev)
649{
650 if (lpc_ich_acpi_save >= 0) {
651 pci_write_config_byte(dev, ACPICTRL, lpc_ich_acpi_save);
652 lpc_ich_acpi_save = -1;
653 }
654
655 if (lpc_ich_gpio_save >= 0) {
656 pci_write_config_byte(dev, GPIOCTRL, lpc_ich_gpio_save);
657 lpc_ich_gpio_save = -1;
658 }
659}
660
661static void __devinit lpc_ich_enable_acpi_space(struct pci_dev *dev)
662{
663 u8 reg_save;
664
665 pci_read_config_byte(dev, ACPICTRL, &reg_save);
666 pci_write_config_byte(dev, ACPICTRL, reg_save | 0x10);
667 lpc_ich_acpi_save = reg_save;
668}
669
670static void __devinit lpc_ich_enable_gpio_space(struct pci_dev *dev)
671{
672 u8 reg_save;
673
674 pci_read_config_byte(dev, GPIOCTRL, &reg_save);
675 pci_write_config_byte(dev, GPIOCTRL, reg_save | 0x10);
676 lpc_ich_gpio_save = reg_save;
677}
678
679static void __devinit lpc_ich_finalize_cell(struct mfd_cell *cell,
680 const struct pci_device_id *id)
681{
682 cell->platform_data = &lpc_chipset_info[id->driver_data];
683 cell->pdata_size = sizeof(struct lpc_ich_info);
684}
685
686static int __devinit lpc_ich_init_gpio(struct pci_dev *dev,
687 const struct pci_device_id *id)
688{
689 u32 base_addr_cfg;
690 u32 base_addr;
691 int ret;
692 bool acpi_conflict = false;
693 struct resource *res;
694
695 /* Setup power management base register */
696 pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
697 base_addr = base_addr_cfg & 0x0000ff80;
698 if (!base_addr) {
699 dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
700 lpc_ich_cells[LPC_GPIO].num_resources--;
701 goto gpe0_done;
702 }
703
704 res = &gpio_ich_res[ICH_RES_GPE0];
705 res->start = base_addr + ACPIBASE_GPE_OFF;
706 res->end = base_addr + ACPIBASE_GPE_END;
707 ret = acpi_check_resource_conflict(res);
708 if (ret) {
709 /*
710 * This isn't fatal for the GPIO, but we have to make sure that
711 * the platform_device subsystem doesn't see this resource
712 * or it will register an invalid region.
713 */
714 lpc_ich_cells[LPC_GPIO].num_resources--;
715 acpi_conflict = true;
716 } else {
717 lpc_ich_enable_acpi_space(dev);
718 }
719
720gpe0_done:
721 /* Setup GPIO base register */
722 pci_read_config_dword(dev, GPIOBASE, &base_addr_cfg);
723 base_addr = base_addr_cfg & 0x0000ff80;
724 if (!base_addr) {
725 dev_err(&dev->dev, "I/O space for GPIO uninitialized\n");
726 ret = -ENODEV;
727 goto gpio_done;
728 }
729
730 /* Older devices provide fewer GPIO and have a smaller resource size. */
731 res = &gpio_ich_res[ICH_RES_GPIO];
732 res->start = base_addr;
733 switch (lpc_chipset_info[id->driver_data].gpio_version) {
734 case ICH_V5_GPIO:
735 case ICH_V10CORP_GPIO:
736 res->end = res->start + 128 - 1;
737 break;
738 default:
739 res->end = res->start + 64 - 1;
740 break;
741 }
742
743 ret = acpi_check_resource_conflict(res);
744 if (ret) {
745 /* this isn't necessarily fatal for the GPIO */
746 acpi_conflict = true;
747 goto gpio_done;
748 }
749 lpc_ich_enable_gpio_space(dev);
750
751 lpc_ich_finalize_cell(&lpc_ich_cells[LPC_GPIO], id);
752 ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_GPIO],
753 1, NULL, 0);
754
755gpio_done:
756 if (acpi_conflict)
757 pr_warn("Resource conflict(s) found affecting %s\n",
758 lpc_ich_cells[LPC_GPIO].name);
759 return ret;
760}
761
762static int __devinit lpc_ich_init_wdt(struct pci_dev *dev,
763 const struct pci_device_id *id)
764{
765 u32 base_addr_cfg;
766 u32 base_addr;
767 int ret;
768 bool acpi_conflict = false;
769 struct resource *res;
770
771 /* Setup power management base register */
772 pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
773 base_addr = base_addr_cfg & 0x0000ff80;
774 if (!base_addr) {
775 dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
776 ret = -ENODEV;
777 goto wdt_done;
778 }
779
780 res = wdt_io_res(ICH_RES_IO_TCO);
781 res->start = base_addr + ACPIBASE_TCO_OFF;
782 res->end = base_addr + ACPIBASE_TCO_END;
783 ret = acpi_check_resource_conflict(res);
784 if (ret) {
785 acpi_conflict = true;
786 goto wdt_done;
787 }
788
789 res = wdt_io_res(ICH_RES_IO_SMI);
790 res->start = base_addr + ACPIBASE_SMI_OFF;
791 res->end = base_addr + ACPIBASE_SMI_END;
792 ret = acpi_check_resource_conflict(res);
793 if (ret) {
794 acpi_conflict = true;
795 goto wdt_done;
796 }
797 lpc_ich_enable_acpi_space(dev);
798
799 /*
800 * Get the Memory-Mapped GCS register. To get access to it
801 * we have to read RCBA from PCI Config space 0xf0 and use
802 * it as base. GCS = RCBA + ICH6_GCS(0x3410).
803 */
804 if (lpc_chipset_info[id->driver_data].iTCO_version == 2) {
805 pci_read_config_dword(dev, RCBABASE, &base_addr_cfg);
806 base_addr = base_addr_cfg & 0xffffc000;
807 if (!(base_addr_cfg & 1)) {
808 pr_err("RCBA is disabled by hardware/BIOS, "
809 "device disabled\n");
810 ret = -ENODEV;
811 goto wdt_done;
812 }
813 res = wdt_mem_res(ICH_RES_MEM_GCS);
814 res->start = base_addr + ACPIBASE_GCS_OFF;
815 res->end = base_addr + ACPIBASE_GCS_END;
816 ret = acpi_check_resource_conflict(res);
817 if (ret) {
818 acpi_conflict = true;
819 goto wdt_done;
820 }
821 }
822
823 lpc_ich_finalize_cell(&lpc_ich_cells[LPC_WDT], id);
824 ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_WDT],
825 1, NULL, 0);
826
827wdt_done:
828 if (acpi_conflict)
829 pr_warn("Resource conflict(s) found affecting %s\n",
830 lpc_ich_cells[LPC_WDT].name);
831 return ret;
832}
833
834static int __devinit lpc_ich_probe(struct pci_dev *dev,
835 const struct pci_device_id *id)
836{
837 int ret;
838 bool cell_added = false;
839
840 ret = lpc_ich_init_wdt(dev, id);
841 if (!ret)
842 cell_added = true;
843
844 ret = lpc_ich_init_gpio(dev, id);
845 if (!ret)
846 cell_added = true;
847
848 /*
849 * We only care if at least one or none of the cells registered
850 * successfully.
851 */
852 if (!cell_added) {
853 lpc_ich_restore_config_space(dev);
854 return -ENODEV;
855 }
856
857 return 0;
858}
859
860static void __devexit lpc_ich_remove(struct pci_dev *dev)
861{
862 mfd_remove_devices(&dev->dev);
863 lpc_ich_restore_config_space(dev);
864}
865
866static struct pci_driver lpc_ich_driver = {
867 .name = "lpc_ich",
868 .id_table = lpc_ich_ids,
869 .probe = lpc_ich_probe,
870 .remove = __devexit_p(lpc_ich_remove),
871};
872
873static int __init lpc_ich_init(void)
874{
875 return pci_register_driver(&lpc_ich_driver);
876}
877
878static void __exit lpc_ich_exit(void)
879{
880 pci_unregister_driver(&lpc_ich_driver);
881}
882
883module_init(lpc_ich_init);
884module_exit(lpc_ich_exit);
885
886MODULE_AUTHOR("Aaron Sierra <asierra@xes-inc.com>");
887MODULE_DESCRIPTION("LPC interface for Intel ICH");
888MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index abc421364a45..9f20abc5e393 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c
@@ -36,6 +36,7 @@
36 36
37#define GPIOBASE 0x44 37#define GPIOBASE 0x44
38#define GPIO_IO_SIZE 64 38#define GPIO_IO_SIZE 64
39#define GPIO_IO_SIZE_CENTERTON 128
39 40
40#define WDTBASE 0x84 41#define WDTBASE 0x84
41#define WDT_IO_SIZE 64 42#define WDT_IO_SIZE 64
@@ -68,7 +69,7 @@ static struct resource wdt_sch_resource = {
68 69
69static struct mfd_cell tunnelcreek_cells[] = { 70static struct mfd_cell tunnelcreek_cells[] = {
70 { 71 {
71 .name = "tunnelcreek_wdt", 72 .name = "ie6xx_wdt",
72 .num_resources = 1, 73 .num_resources = 1,
73 .resources = &wdt_sch_resource, 74 .resources = &wdt_sch_resource,
74 }, 75 },
@@ -77,6 +78,7 @@ static struct mfd_cell tunnelcreek_cells[] = {
77static DEFINE_PCI_DEVICE_TABLE(lpc_sch_ids) = { 78static DEFINE_PCI_DEVICE_TABLE(lpc_sch_ids) = {
78 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) }, 79 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) },
79 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ITC_LPC) }, 80 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ITC_LPC) },
81 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CENTERTON_ILB) },
80 { 0, } 82 { 0, }
81}; 83};
82MODULE_DEVICE_TABLE(pci, lpc_sch_ids); 84MODULE_DEVICE_TABLE(pci, lpc_sch_ids);
@@ -115,7 +117,11 @@ static int __devinit lpc_sch_probe(struct pci_dev *dev,
115 } 117 }
116 118
117 gpio_sch_resource.start = base_addr; 119 gpio_sch_resource.start = base_addr;
118 gpio_sch_resource.end = base_addr + GPIO_IO_SIZE - 1; 120
121 if (id->device == PCI_DEVICE_ID_INTEL_CENTERTON_ILB)
122 gpio_sch_resource.end = base_addr + GPIO_IO_SIZE_CENTERTON - 1;
123 else
124 gpio_sch_resource.end = base_addr + GPIO_IO_SIZE - 1;
119 125
120 for (i=0; i < ARRAY_SIZE(lpc_sch_cells); i++) 126 for (i=0; i < ARRAY_SIZE(lpc_sch_cells); i++)
121 lpc_sch_cells[i].id = id->device; 127 lpc_sch_cells[i].id = id->device;
@@ -125,7 +131,8 @@ static int __devinit lpc_sch_probe(struct pci_dev *dev,
125 if (ret) 131 if (ret)
126 goto out_dev; 132 goto out_dev;
127 133
128 if (id->device == PCI_DEVICE_ID_INTEL_ITC_LPC) { 134 if (id->device == PCI_DEVICE_ID_INTEL_ITC_LPC
135 || id->device == PCI_DEVICE_ID_INTEL_CENTERTON_ILB) {
129 pci_read_config_dword(dev, WDTBASE, &base_addr_cfg); 136 pci_read_config_dword(dev, WDTBASE, &base_addr_cfg);
130 if (!(base_addr_cfg & (1 << 31))) { 137 if (!(base_addr_cfg & (1 << 31))) {
131 dev_err(&dev->dev, "Decode of the WDT I/O range disabled\n"); 138 dev_err(&dev->dev, "Decode of the WDT I/O range disabled\n");
@@ -167,18 +174,7 @@ static struct pci_driver lpc_sch_driver = {
167 .remove = __devexit_p(lpc_sch_remove), 174 .remove = __devexit_p(lpc_sch_remove),
168}; 175};
169 176
170static int __init lpc_sch_init(void) 177module_pci_driver(lpc_sch_driver);
171{
172 return pci_register_driver(&lpc_sch_driver);
173}
174
175static void __exit lpc_sch_exit(void)
176{
177 pci_unregister_driver(&lpc_sch_driver);
178}
179
180module_init(lpc_sch_init);
181module_exit(lpc_sch_exit);
182 178
183MODULE_AUTHOR("Denis Turischev <denis@compulab.co.il>"); 179MODULE_AUTHOR("Denis Turischev <denis@compulab.co.il>");
184MODULE_DESCRIPTION("LPC interface for Intel Poulsbo SCH"); 180MODULE_DESCRIPTION("LPC interface for Intel Poulsbo SCH");
diff --git a/drivers/mfd/max77693-irq.c b/drivers/mfd/max77693-irq.c
new file mode 100644
index 000000000000..2b403569e0a6
--- /dev/null
+++ b/drivers/mfd/max77693-irq.c
@@ -0,0 +1,309 @@
1/*
2 * max77693-irq.c - Interrupt controller support for MAX77693
3 *
4 * Copyright (C) 2012 Samsung Electronics Co.Ltd
5 * SangYoung Son <hello.son@samsung.com>
6 *
7 * This program is not provided / owned by Maxim Integrated Products.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * This driver is based on max8997-irq.c
24 */
25
26#include <linux/err.h>
27#include <linux/irq.h>
28#include <linux/interrupt.h>
29#include <linux/module.h>
30#include <linux/irqdomain.h>
31#include <linux/mfd/max77693.h>
32#include <linux/mfd/max77693-private.h>
33
34static const u8 max77693_mask_reg[] = {
35 [LED_INT] = MAX77693_LED_REG_FLASH_INT_MASK,
36 [TOPSYS_INT] = MAX77693_PMIC_REG_TOPSYS_INT_MASK,
37 [CHG_INT] = MAX77693_CHG_REG_CHG_INT_MASK,
38 [MUIC_INT1] = MAX77693_MUIC_REG_INTMASK1,
39 [MUIC_INT2] = MAX77693_MUIC_REG_INTMASK2,
40 [MUIC_INT3] = MAX77693_MUIC_REG_INTMASK3,
41};
42
43static struct regmap *max77693_get_regmap(struct max77693_dev *max77693,
44 enum max77693_irq_source src)
45{
46 switch (src) {
47 case LED_INT ... CHG_INT:
48 return max77693->regmap;
49 case MUIC_INT1 ... MUIC_INT3:
50 return max77693->regmap_muic;
51 default:
52 return ERR_PTR(-EINVAL);
53 }
54}
55
56struct max77693_irq_data {
57 int mask;
58 enum max77693_irq_source group;
59};
60
61#define DECLARE_IRQ(idx, _group, _mask) \
62 [(idx)] = { .group = (_group), .mask = (_mask) }
63static const struct max77693_irq_data max77693_irqs[] = {
64 DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_OPEN, LED_INT, 1 << 0),
65 DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_SHORT, LED_INT, 1 << 1),
66 DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_OPEN, LED_INT, 1 << 2),
67 DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_SHORT, LED_INT, 1 << 3),
68 DECLARE_IRQ(MAX77693_LED_IRQ_MAX_FLASH, LED_INT, 1 << 4),
69
70 DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T120C_INT, TOPSYS_INT, 1 << 0),
71 DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T140C_INT, TOPSYS_INT, 1 << 1),
72 DECLARE_IRQ(MAX77693_TOPSYS_IRQ_LOWSYS_INT, TOPSYS_INT, 1 << 3),
73
74 DECLARE_IRQ(MAX77693_CHG_IRQ_BYP_I, CHG_INT, 1 << 0),
75 DECLARE_IRQ(MAX77693_CHG_IRQ_THM_I, CHG_INT, 1 << 2),
76 DECLARE_IRQ(MAX77693_CHG_IRQ_BAT_I, CHG_INT, 1 << 3),
77 DECLARE_IRQ(MAX77693_CHG_IRQ_CHG_I, CHG_INT, 1 << 4),
78 DECLARE_IRQ(MAX77693_CHG_IRQ_CHGIN_I, CHG_INT, 1 << 6),
79
80 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC, MUIC_INT1, 1 << 0),
81 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_LOW, MUIC_INT1, 1 << 1),
82 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_ERR, MUIC_INT1, 1 << 2),
83 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC1K, MUIC_INT1, 1 << 3),
84
85 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGTYP, MUIC_INT2, 1 << 0),
86 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGDETREUN, MUIC_INT2, 1 << 1),
87 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DCDTMR, MUIC_INT2, 1 << 2),
88 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DXOVP, MUIC_INT2, 1 << 3),
89 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VBVOLT, MUIC_INT2, 1 << 4),
90 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VIDRM, MUIC_INT2, 1 << 5),
91
92 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_EOC, MUIC_INT3, 1 << 0),
93 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CGMBC, MUIC_INT3, 1 << 1),
94 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_OVP, MUIC_INT3, 1 << 2),
95 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_MBCCHG_ERR, MUIC_INT3, 1 << 3),
96 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CHG_ENABLED, MUIC_INT3, 1 << 4),
97 DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_BAT_DET, MUIC_INT3, 1 << 5),
98};
99
100static void max77693_irq_lock(struct irq_data *data)
101{
102 struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
103
104 mutex_lock(&max77693->irqlock);
105}
106
107static void max77693_irq_sync_unlock(struct irq_data *data)
108{
109 struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
110 int i;
111
112 for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
113 u8 mask_reg = max77693_mask_reg[i];
114 struct regmap *map = max77693_get_regmap(max77693, i);
115
116 if (mask_reg == MAX77693_REG_INVALID ||
117 IS_ERR_OR_NULL(map))
118 continue;
119 max77693->irq_masks_cache[i] = max77693->irq_masks_cur[i];
120
121 max77693_write_reg(map, max77693_mask_reg[i],
122 max77693->irq_masks_cur[i]);
123 }
124
125 mutex_unlock(&max77693->irqlock);
126}
127
128static const inline struct max77693_irq_data *
129irq_to_max77693_irq(struct max77693_dev *max77693, int irq)
130{
131 return &max77693_irqs[irq];
132}
133
134static void max77693_irq_mask(struct irq_data *data)
135{
136 struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
137 const struct max77693_irq_data *irq_data =
138 irq_to_max77693_irq(max77693, data->irq);
139
140 if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
141 max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
142 else
143 max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
144}
145
146static void max77693_irq_unmask(struct irq_data *data)
147{
148 struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
149 const struct max77693_irq_data *irq_data =
150 irq_to_max77693_irq(max77693, data->irq);
151
152 if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
153 max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
154 else
155 max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
156}
157
158static struct irq_chip max77693_irq_chip = {
159 .name = "max77693",
160 .irq_bus_lock = max77693_irq_lock,
161 .irq_bus_sync_unlock = max77693_irq_sync_unlock,
162 .irq_mask = max77693_irq_mask,
163 .irq_unmask = max77693_irq_unmask,
164};
165
166#define MAX77693_IRQSRC_CHG (1 << 0)
167#define MAX77693_IRQSRC_TOP (1 << 1)
168#define MAX77693_IRQSRC_FLASH (1 << 2)
169#define MAX77693_IRQSRC_MUIC (1 << 3)
170static irqreturn_t max77693_irq_thread(int irq, void *data)
171{
172 struct max77693_dev *max77693 = data;
173 u8 irq_reg[MAX77693_IRQ_GROUP_NR] = {};
174 u8 irq_src;
175 int ret;
176 int i, cur_irq;
177
178 ret = max77693_read_reg(max77693->regmap, MAX77693_PMIC_REG_INTSRC,
179 &irq_src);
180 if (ret < 0) {
181 dev_err(max77693->dev, "Failed to read interrupt source: %d\n",
182 ret);
183 return IRQ_NONE;
184 }
185
186 if (irq_src & MAX77693_IRQSRC_CHG)
187 /* CHG_INT */
188 ret = max77693_read_reg(max77693->regmap, MAX77693_CHG_REG_CHG_INT,
189 &irq_reg[CHG_INT]);
190
191 if (irq_src & MAX77693_IRQSRC_TOP)
192 /* TOPSYS_INT */
193 ret = max77693_read_reg(max77693->regmap,
194 MAX77693_PMIC_REG_TOPSYS_INT, &irq_reg[TOPSYS_INT]);
195
196 if (irq_src & MAX77693_IRQSRC_FLASH)
197 /* LED_INT */
198 ret = max77693_read_reg(max77693->regmap,
199 MAX77693_LED_REG_FLASH_INT, &irq_reg[LED_INT]);
200
201 if (irq_src & MAX77693_IRQSRC_MUIC)
202 /* MUIC INT1 ~ INT3 */
203 max77693_bulk_read(max77693->regmap, MAX77693_MUIC_REG_INT1,
204 MAX77693_NUM_IRQ_MUIC_REGS, &irq_reg[MUIC_INT1]);
205
206 /* Apply masking */
207 for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
208 if (i >= MUIC_INT1 && i <= MUIC_INT3)
209 irq_reg[i] &= max77693->irq_masks_cur[i];
210 else
211 irq_reg[i] &= ~max77693->irq_masks_cur[i];
212 }
213
214 /* Report */
215 for (i = 0; i < MAX77693_IRQ_NR; i++) {
216 if (irq_reg[max77693_irqs[i].group] & max77693_irqs[i].mask) {
217 cur_irq = irq_find_mapping(max77693->irq_domain, i);
218 if (cur_irq)
219 handle_nested_irq(cur_irq);
220 }
221 }
222
223 return IRQ_HANDLED;
224}
225
226int max77693_irq_resume(struct max77693_dev *max77693)
227{
228 if (max77693->irq)
229 max77693_irq_thread(0, max77693);
230
231 return 0;
232}
233
234static int max77693_irq_domain_map(struct irq_domain *d, unsigned int irq,
235 irq_hw_number_t hw)
236{
237 struct max77693_dev *max77693 = d->host_data;
238
239 irq_set_chip_data(irq, max77693);
240 irq_set_chip_and_handler(irq, &max77693_irq_chip, handle_edge_irq);
241 irq_set_nested_thread(irq, 1);
242#ifdef CONFIG_ARM
243 set_irq_flags(irq, IRQF_VALID);
244#else
245 irq_set_noprobe(irq);
246#endif
247 return 0;
248}
249
250static struct irq_domain_ops max77693_irq_domain_ops = {
251 .map = max77693_irq_domain_map,
252};
253
254int max77693_irq_init(struct max77693_dev *max77693)
255{
256 struct irq_domain *domain;
257 int i;
258 int ret;
259
260 mutex_init(&max77693->irqlock);
261
262 /* Mask individual interrupt sources */
263 for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
264 struct regmap *map;
265 /* MUIC IRQ 0:MASK 1:NOT MASK */
266 /* Other IRQ 1:MASK 0:NOT MASK */
267 if (i >= MUIC_INT1 && i <= MUIC_INT3) {
268 max77693->irq_masks_cur[i] = 0x00;
269 max77693->irq_masks_cache[i] = 0x00;
270 } else {
271 max77693->irq_masks_cur[i] = 0xff;
272 max77693->irq_masks_cache[i] = 0xff;
273 }
274 map = max77693_get_regmap(max77693, i);
275
276 if (IS_ERR_OR_NULL(map))
277 continue;
278 if (max77693_mask_reg[i] == MAX77693_REG_INVALID)
279 continue;
280 if (i >= MUIC_INT1 && i <= MUIC_INT3)
281 max77693_write_reg(map, max77693_mask_reg[i], 0x00);
282 else
283 max77693_write_reg(map, max77693_mask_reg[i], 0xff);
284 }
285
286 domain = irq_domain_add_linear(NULL, MAX77693_IRQ_NR,
287 &max77693_irq_domain_ops, max77693);
288 if (!domain) {
289 dev_err(max77693->dev, "could not create irq domain\n");
290 return -ENODEV;
291 }
292 max77693->irq_domain = domain;
293
294 ret = request_threaded_irq(max77693->irq, NULL, max77693_irq_thread,
295 IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
296 "max77693-irq", max77693);
297
298 if (ret)
299 dev_err(max77693->dev, "Failed to request IRQ %d: %d\n",
300 max77693->irq, ret);
301
302 return 0;
303}
304
305void max77693_irq_exit(struct max77693_dev *max77693)
306{
307 if (max77693->irq)
308 free_irq(max77693->irq, max77693);
309}
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
new file mode 100644
index 000000000000..e9e4278722f3
--- /dev/null
+++ b/drivers/mfd/max77693.c
@@ -0,0 +1,249 @@
1/*
2 * max77693.c - mfd core driver for the MAX 77693
3 *
4 * Copyright (C) 2012 Samsung Electronics
5 * SangYoung Son <hello.son@smasung.com>
6 *
7 * This program is not provided / owned by Maxim Integrated Products.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * This driver is based on max8997.c
24 */
25
26#include <linux/module.h>
27#include <linux/slab.h>
28#include <linux/i2c.h>
29#include <linux/err.h>
30#include <linux/interrupt.h>
31#include <linux/pm_runtime.h>
32#include <linux/mutex.h>
33#include <linux/mfd/core.h>
34#include <linux/mfd/max77693.h>
35#include <linux/mfd/max77693-private.h>
36#include <linux/regulator/machine.h>
37#include <linux/regmap.h>
38
39#define I2C_ADDR_PMIC (0xCC >> 1) /* Charger, Flash LED */
40#define I2C_ADDR_MUIC (0x4A >> 1)
41#define I2C_ADDR_HAPTIC (0x90 >> 1)
42
43static struct mfd_cell max77693_devs[] = {
44 { .name = "max77693-pmic", },
45 { .name = "max77693-charger", },
46 { .name = "max77693-flash", },
47 { .name = "max77693-muic", },
48 { .name = "max77693-haptic", },
49};
50
51int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest)
52{
53 unsigned int val;
54 int ret;
55
56 ret = regmap_read(map, reg, &val);
57 *dest = val;
58
59 return ret;
60}
61EXPORT_SYMBOL_GPL(max77693_read_reg);
62
63int max77693_bulk_read(struct regmap *map, u8 reg, int count, u8 *buf)
64{
65 int ret;
66
67 ret = regmap_bulk_read(map, reg, buf, count);
68
69 return ret;
70}
71EXPORT_SYMBOL_GPL(max77693_bulk_read);
72
73int max77693_write_reg(struct regmap *map, u8 reg, u8 value)
74{
75 int ret;
76
77 ret = regmap_write(map, reg, value);
78
79 return ret;
80}
81EXPORT_SYMBOL_GPL(max77693_write_reg);
82
83int max77693_bulk_write(struct regmap *map, u8 reg, int count, u8 *buf)
84{
85 int ret;
86
87 ret = regmap_bulk_write(map, reg, buf, count);
88
89 return ret;
90}
91EXPORT_SYMBOL_GPL(max77693_bulk_write);
92
93int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask)
94{
95 int ret;
96
97 ret = regmap_update_bits(map, reg, mask, val);
98
99 return ret;
100}
101EXPORT_SYMBOL_GPL(max77693_update_reg);
102
103static const struct regmap_config max77693_regmap_config = {
104 .reg_bits = 8,
105 .val_bits = 8,
106 .max_register = MAX77693_PMIC_REG_END,
107};
108
109static int max77693_i2c_probe(struct i2c_client *i2c,
110 const struct i2c_device_id *id)
111{
112 struct max77693_dev *max77693;
113 struct max77693_platform_data *pdata = i2c->dev.platform_data;
114 u8 reg_data;
115 int ret = 0;
116
117 max77693 = devm_kzalloc(&i2c->dev,
118 sizeof(struct max77693_dev), GFP_KERNEL);
119 if (max77693 == NULL)
120 return -ENOMEM;
121
122 max77693->regmap = devm_regmap_init_i2c(i2c, &max77693_regmap_config);
123 if (IS_ERR(max77693->regmap)) {
124 ret = PTR_ERR(max77693->regmap);
125 dev_err(max77693->dev,"failed to allocate register map: %d\n",
126 ret);
127 goto err_regmap;
128 }
129
130 i2c_set_clientdata(i2c, max77693);
131 max77693->dev = &i2c->dev;
132 max77693->i2c = i2c;
133 max77693->irq = i2c->irq;
134 max77693->type = id->driver_data;
135
136 if (!pdata)
137 goto err_regmap;
138
139 max77693->wakeup = pdata->wakeup;
140
141 mutex_init(&max77693->iolock);
142
143 if (max77693_read_reg(max77693->regmap,
144 MAX77693_PMIC_REG_PMIC_ID2, &reg_data) < 0) {
145 dev_err(max77693->dev, "device not found on this channel\n");
146 ret = -ENODEV;
147 goto err_regmap;
148 } else
149 dev_info(max77693->dev, "device ID: 0x%x\n", reg_data);
150
151 max77693->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
152 i2c_set_clientdata(max77693->muic, max77693);
153
154 max77693->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
155 i2c_set_clientdata(max77693->haptic, max77693);
156
157 ret = max77693_irq_init(max77693);
158 if (ret < 0)
159 goto err_mfd;
160
161 pm_runtime_set_active(max77693->dev);
162
163 ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
164 ARRAY_SIZE(max77693_devs), NULL, 0);
165 if (ret < 0)
166 goto err_mfd;
167
168 device_init_wakeup(max77693->dev, pdata->wakeup);
169
170 return ret;
171
172err_mfd:
173 i2c_unregister_device(max77693->muic);
174 i2c_unregister_device(max77693->haptic);
175err_regmap:
176 kfree(max77693);
177
178 return ret;
179}
180
181static int max77693_i2c_remove(struct i2c_client *i2c)
182{
183 struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
184
185 mfd_remove_devices(max77693->dev);
186 i2c_unregister_device(max77693->muic);
187 i2c_unregister_device(max77693->haptic);
188
189 return 0;
190}
191
192static const struct i2c_device_id max77693_i2c_id[] = {
193 { "max77693", TYPE_MAX77693 },
194 { }
195};
196MODULE_DEVICE_TABLE(i2c, max77693_i2c_id);
197
198static int max77693_suspend(struct device *dev)
199{
200 struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
201 struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
202
203 if (device_may_wakeup(dev))
204 irq_set_irq_wake(max77693->irq, 1);
205 return 0;
206}
207
208static int max77693_resume(struct device *dev)
209{
210 struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
211 struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
212
213 if (device_may_wakeup(dev))
214 irq_set_irq_wake(max77693->irq, 0);
215 return max77693_irq_resume(max77693);
216}
217
218const struct dev_pm_ops max77693_pm = {
219 .suspend = max77693_suspend,
220 .resume = max77693_resume,
221};
222
223static struct i2c_driver max77693_i2c_driver = {
224 .driver = {
225 .name = "max77693",
226 .owner = THIS_MODULE,
227 .pm = &max77693_pm,
228 },
229 .probe = max77693_i2c_probe,
230 .remove = max77693_i2c_remove,
231 .id_table = max77693_i2c_id,
232};
233
234static int __init max77693_i2c_init(void)
235{
236 return i2c_add_driver(&max77693_i2c_driver);
237}
238/* init early so consumer devices can complete system boot */
239subsys_initcall(max77693_i2c_init);
240
241static void __exit max77693_i2c_exit(void)
242{
243 i2c_del_driver(&max77693_i2c_driver);
244}
245module_exit(max77693_i2c_exit);
246
247MODULE_DESCRIPTION("MAXIM 77693 multi-function core driver");
248MODULE_AUTHOR("SangYoung, Son <hello.son@samsung.com>");
249MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index 738722cdecaa..f0ea3b8b3e4a 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -15,24 +15,13 @@
15#include <linux/platform_device.h> 15#include <linux/platform_device.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/spi/spi.h>
19#include <linux/mfd/core.h> 18#include <linux/mfd/core.h>
20#include <linux/mfd/mc13xxx.h> 19#include <linux/mfd/mc13xxx.h>
21#include <linux/of.h> 20#include <linux/of.h>
22#include <linux/of_device.h> 21#include <linux/of_device.h>
23#include <linux/of_gpio.h> 22#include <linux/of_gpio.h>
24 23
25struct mc13xxx { 24#include "mc13xxx.h"
26 struct spi_device *spidev;
27 struct mutex lock;
28 int irq;
29 int flags;
30
31 irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
32 void *irqdata[MC13XXX_NUM_IRQ];
33
34 int adcflags;
35};
36 25
37#define MC13XXX_IRQSTAT0 0 26#define MC13XXX_IRQSTAT0 0
38#define MC13XXX_IRQSTAT0_ADCDONEI (1 << 0) 27#define MC13XXX_IRQSTAT0_ADCDONEI (1 << 0)
@@ -139,34 +128,29 @@ struct mc13xxx {
139 128
140#define MC13XXX_ADC2 45 129#define MC13XXX_ADC2 45
141 130
142#define MC13XXX_NUMREGS 0x3f
143
144void mc13xxx_lock(struct mc13xxx *mc13xxx) 131void mc13xxx_lock(struct mc13xxx *mc13xxx)
145{ 132{
146 if (!mutex_trylock(&mc13xxx->lock)) { 133 if (!mutex_trylock(&mc13xxx->lock)) {
147 dev_dbg(&mc13xxx->spidev->dev, "wait for %s from %pf\n", 134 dev_dbg(mc13xxx->dev, "wait for %s from %pf\n",
148 __func__, __builtin_return_address(0)); 135 __func__, __builtin_return_address(0));
149 136
150 mutex_lock(&mc13xxx->lock); 137 mutex_lock(&mc13xxx->lock);
151 } 138 }
152 dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", 139 dev_dbg(mc13xxx->dev, "%s from %pf\n",
153 __func__, __builtin_return_address(0)); 140 __func__, __builtin_return_address(0));
154} 141}
155EXPORT_SYMBOL(mc13xxx_lock); 142EXPORT_SYMBOL(mc13xxx_lock);
156 143
157void mc13xxx_unlock(struct mc13xxx *mc13xxx) 144void mc13xxx_unlock(struct mc13xxx *mc13xxx)
158{ 145{
159 dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", 146 dev_dbg(mc13xxx->dev, "%s from %pf\n",
160 __func__, __builtin_return_address(0)); 147 __func__, __builtin_return_address(0));
161 mutex_unlock(&mc13xxx->lock); 148 mutex_unlock(&mc13xxx->lock);
162} 149}
163EXPORT_SYMBOL(mc13xxx_unlock); 150EXPORT_SYMBOL(mc13xxx_unlock);
164 151
165#define MC13XXX_REGOFFSET_SHIFT 25
166int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) 152int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val)
167{ 153{
168 struct spi_transfer t;
169 struct spi_message m;
170 int ret; 154 int ret;
171 155
172 BUG_ON(!mutex_is_locked(&mc13xxx->lock)); 156 BUG_ON(!mutex_is_locked(&mc13xxx->lock));
@@ -174,84 +158,35 @@ int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val)
174 if (offset > MC13XXX_NUMREGS) 158 if (offset > MC13XXX_NUMREGS)
175 return -EINVAL; 159 return -EINVAL;
176 160
177 *val = offset << MC13XXX_REGOFFSET_SHIFT; 161 ret = regmap_read(mc13xxx->regmap, offset, val);
178 162 dev_vdbg(mc13xxx->dev, "[0x%02x] -> 0x%06x\n", offset, *val);
179 memset(&t, 0, sizeof(t));
180
181 t.tx_buf = val;
182 t.rx_buf = val;
183 t.len = sizeof(u32);
184
185 spi_message_init(&m);
186 spi_message_add_tail(&t, &m);
187
188 ret = spi_sync(mc13xxx->spidev, &m);
189
190 /* error in message.status implies error return from spi_sync */
191 BUG_ON(!ret && m.status);
192 163
193 if (ret) 164 return ret;
194 return ret;
195
196 *val &= 0xffffff;
197
198 dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val);
199
200 return 0;
201} 165}
202EXPORT_SYMBOL(mc13xxx_reg_read); 166EXPORT_SYMBOL(mc13xxx_reg_read);
203 167
204int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) 168int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val)
205{ 169{
206 u32 buf;
207 struct spi_transfer t;
208 struct spi_message m;
209 int ret;
210
211 BUG_ON(!mutex_is_locked(&mc13xxx->lock)); 170 BUG_ON(!mutex_is_locked(&mc13xxx->lock));
212 171
213 dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val); 172 dev_vdbg(mc13xxx->dev, "[0x%02x] <- 0x%06x\n", offset, val);
214 173
215 if (offset > MC13XXX_NUMREGS || val > 0xffffff) 174 if (offset > MC13XXX_NUMREGS || val > 0xffffff)
216 return -EINVAL; 175 return -EINVAL;
217 176
218 buf = 1 << 31 | offset << MC13XXX_REGOFFSET_SHIFT | val; 177 return regmap_write(mc13xxx->regmap, offset, val);
219
220 memset(&t, 0, sizeof(t));
221
222 t.tx_buf = &buf;
223 t.rx_buf = &buf;
224 t.len = sizeof(u32);
225
226 spi_message_init(&m);
227 spi_message_add_tail(&t, &m);
228
229 ret = spi_sync(mc13xxx->spidev, &m);
230
231 BUG_ON(!ret && m.status);
232
233 if (ret)
234 return ret;
235
236 return 0;
237} 178}
238EXPORT_SYMBOL(mc13xxx_reg_write); 179EXPORT_SYMBOL(mc13xxx_reg_write);
239 180
240int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, 181int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset,
241 u32 mask, u32 val) 182 u32 mask, u32 val)
242{ 183{
243 int ret; 184 BUG_ON(!mutex_is_locked(&mc13xxx->lock));
244 u32 valread;
245
246 BUG_ON(val & ~mask); 185 BUG_ON(val & ~mask);
186 dev_vdbg(mc13xxx->dev, "[0x%02x] <- 0x%06x (mask: 0x%06x)\n",
187 offset, val, mask);
247 188
248 ret = mc13xxx_reg_read(mc13xxx, offset, &valread); 189 return regmap_update_bits(mc13xxx->regmap, offset, mask, val);
249 if (ret)
250 return ret;
251
252 valread = (valread & ~mask) | val;
253
254 return mc13xxx_reg_write(mc13xxx, offset, valread);
255} 190}
256EXPORT_SYMBOL(mc13xxx_reg_rmw); 191EXPORT_SYMBOL(mc13xxx_reg_rmw);
257 192
@@ -439,7 +374,7 @@ static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx,
439 if (handled == IRQ_HANDLED) 374 if (handled == IRQ_HANDLED)
440 num_handled++; 375 num_handled++;
441 } else { 376 } else {
442 dev_err(&mc13xxx->spidev->dev, 377 dev_err(mc13xxx->dev,
443 "BUG: irq %u but no handler\n", 378 "BUG: irq %u but no handler\n",
444 baseirq + irq); 379 baseirq + irq);
445 380
@@ -475,25 +410,23 @@ static irqreturn_t mc13xxx_irq_thread(int irq, void *data)
475 return IRQ_RETVAL(handled); 410 return IRQ_RETVAL(handled);
476} 411}
477 412
478enum mc13xxx_id {
479 MC13XXX_ID_MC13783,
480 MC13XXX_ID_MC13892,
481 MC13XXX_ID_INVALID,
482};
483
484static const char *mc13xxx_chipname[] = { 413static const char *mc13xxx_chipname[] = {
485 [MC13XXX_ID_MC13783] = "mc13783", 414 [MC13XXX_ID_MC13783] = "mc13783",
486 [MC13XXX_ID_MC13892] = "mc13892", 415 [MC13XXX_ID_MC13892] = "mc13892",
487}; 416};
488 417
489#define maskval(reg, mask) (((reg) & (mask)) >> __ffs(mask)) 418#define maskval(reg, mask) (((reg) & (mask)) >> __ffs(mask))
490static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id) 419static int mc13xxx_identify(struct mc13xxx *mc13xxx)
491{ 420{
492 u32 icid; 421 u32 icid;
493 u32 revision; 422 u32 revision;
494 const char *name;
495 int ret; 423 int ret;
496 424
425 /*
426 * Get the generation ID from register 46, as apparently some older
427 * IC revisions only have this info at this location. Newer ICs seem to
428 * have both.
429 */
497 ret = mc13xxx_reg_read(mc13xxx, 46, &icid); 430 ret = mc13xxx_reg_read(mc13xxx, 46, &icid);
498 if (ret) 431 if (ret)
499 return ret; 432 return ret;
@@ -502,26 +435,23 @@ static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id)
502 435
503 switch (icid) { 436 switch (icid) {
504 case 2: 437 case 2:
505 *id = MC13XXX_ID_MC13783; 438 mc13xxx->ictype = MC13XXX_ID_MC13783;
506 name = "mc13783";
507 break; 439 break;
508 case 7: 440 case 7:
509 *id = MC13XXX_ID_MC13892; 441 mc13xxx->ictype = MC13XXX_ID_MC13892;
510 name = "mc13892";
511 break; 442 break;
512 default: 443 default:
513 *id = MC13XXX_ID_INVALID; 444 mc13xxx->ictype = MC13XXX_ID_INVALID;
514 break; 445 break;
515 } 446 }
516 447
517 if (*id == MC13XXX_ID_MC13783 || *id == MC13XXX_ID_MC13892) { 448 if (mc13xxx->ictype == MC13XXX_ID_MC13783 ||
449 mc13xxx->ictype == MC13XXX_ID_MC13892) {
518 ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision); 450 ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision);
519 if (ret)
520 return ret;
521 451
522 dev_info(&mc13xxx->spidev->dev, "%s: rev: %d.%d, " 452 dev_info(mc13xxx->dev, "%s: rev: %d.%d, "
523 "fin: %d, fab: %d, icid: %d/%d\n", 453 "fin: %d, fab: %d, icid: %d/%d\n",
524 mc13xxx_chipname[*id], 454 mc13xxx_chipname[mc13xxx->ictype],
525 maskval(revision, MC13XXX_REVISION_REVFULL), 455 maskval(revision, MC13XXX_REVISION_REVFULL),
526 maskval(revision, MC13XXX_REVISION_REVMETAL), 456 maskval(revision, MC13XXX_REVISION_REVMETAL),
527 maskval(revision, MC13XXX_REVISION_FIN), 457 maskval(revision, MC13XXX_REVISION_FIN),
@@ -530,26 +460,12 @@ static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id)
530 maskval(revision, MC13XXX_REVISION_ICIDCODE)); 460 maskval(revision, MC13XXX_REVISION_ICIDCODE));
531 } 461 }
532 462
533 if (*id != MC13XXX_ID_INVALID) { 463 return (mc13xxx->ictype == MC13XXX_ID_INVALID) ? -ENODEV : 0;
534 const struct spi_device_id *devid =
535 spi_get_device_id(mc13xxx->spidev);
536 if (!devid || devid->driver_data != *id)
537 dev_warn(&mc13xxx->spidev->dev, "device id doesn't "
538 "match auto detection!\n");
539 }
540
541 return 0;
542} 464}
543 465
544static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx) 466static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx)
545{ 467{
546 const struct spi_device_id *devid = 468 return mc13xxx_chipname[mc13xxx->ictype];
547 spi_get_device_id(mc13xxx->spidev);
548
549 if (!devid)
550 return NULL;
551
552 return mc13xxx_chipname[devid->driver_data];
553} 469}
554 470
555int mc13xxx_get_flags(struct mc13xxx *mc13xxx) 471int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
@@ -592,7 +508,7 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
592 }; 508 };
593 init_completion(&adcdone_data.done); 509 init_completion(&adcdone_data.done);
594 510
595 dev_dbg(&mc13xxx->spidev->dev, "%s\n", __func__); 511 dev_dbg(mc13xxx->dev, "%s\n", __func__);
596 512
597 mc13xxx_lock(mc13xxx); 513 mc13xxx_lock(mc13xxx);
598 514
@@ -637,7 +553,8 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
637 adc1 |= ato << MC13783_ADC1_ATO_SHIFT; 553 adc1 |= ato << MC13783_ADC1_ATO_SHIFT;
638 if (atox) 554 if (atox)
639 adc1 |= MC13783_ADC1_ATOX; 555 adc1 |= MC13783_ADC1_ATOX;
640 dev_dbg(&mc13xxx->spidev->dev, "%s: request irq\n", __func__); 556
557 dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__);
641 mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, 558 mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
642 mc13xxx_handler_adcdone, __func__, &adcdone_data); 559 mc13xxx_handler_adcdone, __func__, &adcdone_data);
643 mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE); 560 mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE);
@@ -695,7 +612,7 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
695 if (!cell.name) 612 if (!cell.name)
696 return -ENOMEM; 613 return -ENOMEM;
697 614
698 return mfd_add_devices(&mc13xxx->spidev->dev, -1, &cell, 1, NULL, 0); 615 return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0);
699} 616}
700 617
701static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) 618static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
@@ -706,7 +623,7 @@ static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
706#ifdef CONFIG_OF 623#ifdef CONFIG_OF
707static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) 624static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx)
708{ 625{
709 struct device_node *np = mc13xxx->spidev->dev.of_node; 626 struct device_node *np = mc13xxx->dev->of_node;
710 627
711 if (!np) 628 if (!np)
712 return -ENODEV; 629 return -ENODEV;
@@ -732,55 +649,15 @@ static inline int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx)
732} 649}
733#endif 650#endif
734 651
735static const struct spi_device_id mc13xxx_device_id[] = { 652int mc13xxx_common_init(struct mc13xxx *mc13xxx,
736 { 653 struct mc13xxx_platform_data *pdata, int irq)
737 .name = "mc13783",
738 .driver_data = MC13XXX_ID_MC13783,
739 }, {
740 .name = "mc13892",
741 .driver_data = MC13XXX_ID_MC13892,
742 }, {
743 /* sentinel */
744 }
745};
746MODULE_DEVICE_TABLE(spi, mc13xxx_device_id);
747
748static const struct of_device_id mc13xxx_dt_ids[] = {
749 { .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, },
750 { .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, },
751 { /* sentinel */ }
752};
753MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids);
754
755static int mc13xxx_probe(struct spi_device *spi)
756{ 654{
757 const struct of_device_id *of_id;
758 struct spi_driver *sdrv = to_spi_driver(spi->dev.driver);
759 struct mc13xxx *mc13xxx;
760 struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev);
761 enum mc13xxx_id id;
762 int ret; 655 int ret;
763 656
764 of_id = of_match_device(mc13xxx_dt_ids, &spi->dev);
765 if (of_id)
766 sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data];
767
768 mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
769 if (!mc13xxx)
770 return -ENOMEM;
771
772 dev_set_drvdata(&spi->dev, mc13xxx);
773 spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
774 spi->bits_per_word = 32;
775 spi_setup(spi);
776
777 mc13xxx->spidev = spi;
778
779 mutex_init(&mc13xxx->lock);
780 mc13xxx_lock(mc13xxx); 657 mc13xxx_lock(mc13xxx);
781 658
782 ret = mc13xxx_identify(mc13xxx, &id); 659 ret = mc13xxx_identify(mc13xxx);
783 if (ret || id == MC13XXX_ID_INVALID) 660 if (ret)
784 goto err_revision; 661 goto err_revision;
785 662
786 /* mask all irqs */ 663 /* mask all irqs */
@@ -792,18 +669,19 @@ static int mc13xxx_probe(struct spi_device *spi)
792 if (ret) 669 if (ret)
793 goto err_mask; 670 goto err_mask;
794 671
795 ret = request_threaded_irq(spi->irq, NULL, mc13xxx_irq_thread, 672 ret = request_threaded_irq(irq, NULL, mc13xxx_irq_thread,
796 IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx); 673 IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx);
797 674
798 if (ret) { 675 if (ret) {
799err_mask: 676err_mask:
800err_revision: 677err_revision:
801 mc13xxx_unlock(mc13xxx); 678 mc13xxx_unlock(mc13xxx);
802 dev_set_drvdata(&spi->dev, NULL);
803 kfree(mc13xxx); 679 kfree(mc13xxx);
804 return ret; 680 return ret;
805 } 681 }
806 682
683 mc13xxx->irq = irq;
684
807 mc13xxx_unlock(mc13xxx); 685 mc13xxx_unlock(mc13xxx);
808 686
809 if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata) 687 if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata)
@@ -838,42 +716,19 @@ err_revision:
838 716
839 return 0; 717 return 0;
840} 718}
719EXPORT_SYMBOL_GPL(mc13xxx_common_init);
841 720
842static int __devexit mc13xxx_remove(struct spi_device *spi) 721void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx)
843{ 722{
844 struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); 723 free_irq(mc13xxx->irq, mc13xxx);
845 724
846 free_irq(mc13xxx->spidev->irq, mc13xxx); 725 mfd_remove_devices(mc13xxx->dev);
847 726
848 mfd_remove_devices(&spi->dev); 727 regmap_exit(mc13xxx->regmap);
849 728
850 kfree(mc13xxx); 729 kfree(mc13xxx);
851
852 return 0;
853}
854
855static struct spi_driver mc13xxx_driver = {
856 .id_table = mc13xxx_device_id,
857 .driver = {
858 .name = "mc13xxx",
859 .owner = THIS_MODULE,
860 .of_match_table = mc13xxx_dt_ids,
861 },
862 .probe = mc13xxx_probe,
863 .remove = __devexit_p(mc13xxx_remove),
864};
865
866static int __init mc13xxx_init(void)
867{
868 return spi_register_driver(&mc13xxx_driver);
869}
870subsys_initcall(mc13xxx_init);
871
872static void __exit mc13xxx_exit(void)
873{
874 spi_unregister_driver(&mc13xxx_driver);
875} 730}
876module_exit(mc13xxx_exit); 731EXPORT_SYMBOL_GPL(mc13xxx_common_cleanup);
877 732
878MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC"); 733MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC");
879MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>"); 734MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
diff --git a/drivers/mfd/mc13xxx-i2c.c b/drivers/mfd/mc13xxx-i2c.c
new file mode 100644
index 000000000000..d22501dad6a6
--- /dev/null
+++ b/drivers/mfd/mc13xxx-i2c.c
@@ -0,0 +1,128 @@
1/*
2 * Copyright 2009-2010 Creative Product Design
3 * Marc Reilly marc@cpdesign.com.au
4 *
5 * This program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License version 2 as published by the
7 * Free Software Foundation.
8 */
9
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/platform_device.h>
13#include <linux/mutex.h>
14#include <linux/mfd/core.h>
15#include <linux/mfd/mc13xxx.h>
16#include <linux/of.h>
17#include <linux/of_device.h>
18#include <linux/of_gpio.h>
19#include <linux/i2c.h>
20#include <linux/err.h>
21
22#include "mc13xxx.h"
23
24static const struct i2c_device_id mc13xxx_i2c_device_id[] = {
25 {
26 .name = "mc13892",
27 .driver_data = MC13XXX_ID_MC13892,
28 }, {
29 /* sentinel */
30 }
31};
32MODULE_DEVICE_TABLE(i2c, mc13xxx_i2c_device_id);
33
34static const struct of_device_id mc13xxx_dt_ids[] = {
35 {
36 .compatible = "fsl,mc13892",
37 .data = (void *) &mc13xxx_i2c_device_id[0],
38 }, {
39 /* sentinel */
40 }
41};
42MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids);
43
44static struct regmap_config mc13xxx_regmap_i2c_config = {
45 .reg_bits = 8,
46 .val_bits = 24,
47
48 .max_register = MC13XXX_NUMREGS,
49
50 .cache_type = REGCACHE_NONE,
51};
52
53static int mc13xxx_i2c_probe(struct i2c_client *client,
54 const struct i2c_device_id *id)
55{
56 const struct of_device_id *of_id;
57 struct i2c_driver *idrv = to_i2c_driver(client->dev.driver);
58 struct mc13xxx *mc13xxx;
59 struct mc13xxx_platform_data *pdata = dev_get_platdata(&client->dev);
60 int ret;
61
62 of_id = of_match_device(mc13xxx_dt_ids, &client->dev);
63 if (of_id)
64 idrv->id_table = (const struct i2c_device_id*) of_id->data;
65
66 mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
67 if (!mc13xxx)
68 return -ENOMEM;
69
70 dev_set_drvdata(&client->dev, mc13xxx);
71
72 mc13xxx->dev = &client->dev;
73 mutex_init(&mc13xxx->lock);
74
75 mc13xxx->regmap = regmap_init_i2c(client, &mc13xxx_regmap_i2c_config);
76 if (IS_ERR(mc13xxx->regmap)) {
77 ret = PTR_ERR(mc13xxx->regmap);
78 dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n",
79 ret);
80 dev_set_drvdata(&client->dev, NULL);
81 kfree(mc13xxx);
82 return ret;
83 }
84
85 ret = mc13xxx_common_init(mc13xxx, pdata, client->irq);
86
87 if (ret == 0 && (id->driver_data != mc13xxx->ictype))
88 dev_warn(mc13xxx->dev,
89 "device id doesn't match auto detection!\n");
90
91 return ret;
92}
93
94static int __devexit mc13xxx_i2c_remove(struct i2c_client *client)
95{
96 struct mc13xxx *mc13xxx = dev_get_drvdata(&client->dev);
97
98 mc13xxx_common_cleanup(mc13xxx);
99
100 return 0;
101}
102
103static struct i2c_driver mc13xxx_i2c_driver = {
104 .id_table = mc13xxx_i2c_device_id,
105 .driver = {
106 .owner = THIS_MODULE,
107 .name = "mc13xxx",
108 .of_match_table = mc13xxx_dt_ids,
109 },
110 .probe = mc13xxx_i2c_probe,
111 .remove = __devexit_p(mc13xxx_i2c_remove),
112};
113
114static int __init mc13xxx_i2c_init(void)
115{
116 return i2c_add_driver(&mc13xxx_i2c_driver);
117}
118subsys_initcall(mc13xxx_i2c_init);
119
120static void __exit mc13xxx_i2c_exit(void)
121{
122 i2c_del_driver(&mc13xxx_i2c_driver);
123}
124module_exit(mc13xxx_i2c_exit);
125
126MODULE_DESCRIPTION("i2c driver for Freescale MC13XXX PMIC");
127MODULE_AUTHOR("Marc Reilly <marc@cpdesign.com.au");
128MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
new file mode 100644
index 000000000000..3fcdab3eb8eb
--- /dev/null
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -0,0 +1,140 @@
1/*
2 * Copyright 2009-2010 Pengutronix
3 * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
4 *
5 * loosely based on an earlier driver that has
6 * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
7 *
8 * This program is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License version 2 as published by the
10 * Free Software Foundation.
11 */
12
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <linux/platform_device.h>
16#include <linux/mutex.h>
17#include <linux/interrupt.h>
18#include <linux/mfd/core.h>
19#include <linux/mfd/mc13xxx.h>
20#include <linux/of.h>
21#include <linux/of_device.h>
22#include <linux/of_gpio.h>
23#include <linux/err.h>
24#include <linux/spi/spi.h>
25
26#include "mc13xxx.h"
27
28static const struct spi_device_id mc13xxx_device_id[] = {
29 {
30 .name = "mc13783",
31 .driver_data = MC13XXX_ID_MC13783,
32 }, {
33 .name = "mc13892",
34 .driver_data = MC13XXX_ID_MC13892,
35 }, {
36 /* sentinel */
37 }
38};
39MODULE_DEVICE_TABLE(spi, mc13xxx_device_id);
40
41static const struct of_device_id mc13xxx_dt_ids[] = {
42 { .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, },
43 { .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, },
44 { /* sentinel */ }
45};
46MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids);
47
48static struct regmap_config mc13xxx_regmap_spi_config = {
49 .reg_bits = 7,
50 .pad_bits = 1,
51 .val_bits = 24,
52
53 .max_register = MC13XXX_NUMREGS,
54
55 .cache_type = REGCACHE_NONE,
56};
57
58static int mc13xxx_spi_probe(struct spi_device *spi)
59{
60 const struct of_device_id *of_id;
61 struct spi_driver *sdrv = to_spi_driver(spi->dev.driver);
62 struct mc13xxx *mc13xxx;
63 struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev);
64 int ret;
65
66 of_id = of_match_device(mc13xxx_dt_ids, &spi->dev);
67 if (of_id)
68 sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data];
69
70 mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
71 if (!mc13xxx)
72 return -ENOMEM;
73
74 dev_set_drvdata(&spi->dev, mc13xxx);
75 spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
76 spi->bits_per_word = 32;
77
78 mc13xxx->dev = &spi->dev;
79 mutex_init(&mc13xxx->lock);
80
81 mc13xxx->regmap = regmap_init_spi(spi, &mc13xxx_regmap_spi_config);
82 if (IS_ERR(mc13xxx->regmap)) {
83 ret = PTR_ERR(mc13xxx->regmap);
84 dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n",
85 ret);
86 dev_set_drvdata(&spi->dev, NULL);
87 kfree(mc13xxx);
88 return ret;
89 }
90
91 ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq);
92
93 if (ret) {
94 dev_set_drvdata(&spi->dev, NULL);
95 } else {
96 const struct spi_device_id *devid =
97 spi_get_device_id(spi);
98 if (!devid || devid->driver_data != mc13xxx->ictype)
99 dev_warn(mc13xxx->dev,
100 "device id doesn't match auto detection!\n");
101 }
102
103 return ret;
104}
105
106static int __devexit mc13xxx_spi_remove(struct spi_device *spi)
107{
108 struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev);
109
110 mc13xxx_common_cleanup(mc13xxx);
111
112 return 0;
113}
114
115static struct spi_driver mc13xxx_spi_driver = {
116 .id_table = mc13xxx_device_id,
117 .driver = {
118 .name = "mc13xxx",
119 .owner = THIS_MODULE,
120 .of_match_table = mc13xxx_dt_ids,
121 },
122 .probe = mc13xxx_spi_probe,
123 .remove = __devexit_p(mc13xxx_spi_remove),
124};
125
126static int __init mc13xxx_init(void)
127{
128 return spi_register_driver(&mc13xxx_spi_driver);
129}
130subsys_initcall(mc13xxx_init);
131
132static void __exit mc13xxx_exit(void)
133{
134 spi_unregister_driver(&mc13xxx_spi_driver);
135}
136module_exit(mc13xxx_exit);
137
138MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC");
139MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
140MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h
new file mode 100644
index 000000000000..bbba06feea06
--- /dev/null
+++ b/drivers/mfd/mc13xxx.h
@@ -0,0 +1,45 @@
1/*
2 * Copyright 2012 Creative Product Design
3 * Marc Reilly <marc@cpdesign.com.au>
4 *
5 * This program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License version 2 as published by the
7 * Free Software Foundation.
8 */
9#ifndef __DRIVERS_MFD_MC13XXX_H
10#define __DRIVERS_MFD_MC13XXX_H
11
12#include <linux/mutex.h>
13#include <linux/regmap.h>
14#include <linux/mfd/mc13xxx.h>
15
16enum mc13xxx_id {
17 MC13XXX_ID_MC13783,
18 MC13XXX_ID_MC13892,
19 MC13XXX_ID_INVALID,
20};
21
22#define MC13XXX_NUMREGS 0x3f
23
24struct mc13xxx {
25 struct regmap *regmap;
26
27 struct device *dev;
28 enum mc13xxx_id ictype;
29
30 struct mutex lock;
31 int irq;
32 int flags;
33
34 irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
35 void *irqdata[MC13XXX_NUM_IRQ];
36
37 int adcflags;
38};
39
40int mc13xxx_common_init(struct mc13xxx *mc13xxx,
41 struct mc13xxx_platform_data *pdata, int irq);
42
43void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx);
44
45#endif /* __DRIVERS_MFD_MC13XXX_H */
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 189c2f07b83f..29c122bf28ea 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -204,7 +204,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
204 return -ENOENT; 204 return -ENOENT;
205 } 205 }
206 206
207 pcf = kzalloc(sizeof(*pcf), GFP_KERNEL); 207 pcf = devm_kzalloc(&client->dev, sizeof(*pcf), GFP_KERNEL);
208 if (!pcf) 208 if (!pcf)
209 return -ENOMEM; 209 return -ENOMEM;
210 210
@@ -212,12 +212,11 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
212 212
213 mutex_init(&pcf->lock); 213 mutex_init(&pcf->lock);
214 214
215 pcf->regmap = regmap_init_i2c(client, &pcf50633_regmap_config); 215 pcf->regmap = devm_regmap_init_i2c(client, &pcf50633_regmap_config);
216 if (IS_ERR(pcf->regmap)) { 216 if (IS_ERR(pcf->regmap)) {
217 ret = PTR_ERR(pcf->regmap); 217 ret = PTR_ERR(pcf->regmap);
218 dev_err(pcf->dev, "Failed to allocate register map: %d\n", 218 dev_err(pcf->dev, "Failed to allocate register map: %d\n", ret);
219 ret); 219 return ret;
220 goto err_free;
221 } 220 }
222 221
223 i2c_set_clientdata(client, pcf); 222 i2c_set_clientdata(client, pcf);
@@ -228,7 +227,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
228 if (version < 0 || variant < 0) { 227 if (version < 0 || variant < 0) {
229 dev_err(pcf->dev, "Unable to probe pcf50633\n"); 228 dev_err(pcf->dev, "Unable to probe pcf50633\n");
230 ret = -ENODEV; 229 ret = -ENODEV;
231 goto err_regmap; 230 return ret;
232 } 231 }
233 232
234 dev_info(pcf->dev, "Probed device version %d variant %d\n", 233 dev_info(pcf->dev, "Probed device version %d variant %d\n",
@@ -237,16 +236,11 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
237 pcf50633_irq_init(pcf, client->irq); 236 pcf50633_irq_init(pcf, client->irq);
238 237
239 /* Create sub devices */ 238 /* Create sub devices */
240 pcf50633_client_dev_register(pcf, "pcf50633-input", 239 pcf50633_client_dev_register(pcf, "pcf50633-input", &pcf->input_pdev);
241 &pcf->input_pdev); 240 pcf50633_client_dev_register(pcf, "pcf50633-rtc", &pcf->rtc_pdev);
242 pcf50633_client_dev_register(pcf, "pcf50633-rtc", 241 pcf50633_client_dev_register(pcf, "pcf50633-mbc", &pcf->mbc_pdev);
243 &pcf->rtc_pdev); 242 pcf50633_client_dev_register(pcf, "pcf50633-adc", &pcf->adc_pdev);
244 pcf50633_client_dev_register(pcf, "pcf50633-mbc", 243 pcf50633_client_dev_register(pcf, "pcf50633-backlight", &pcf->bl_pdev);
245 &pcf->mbc_pdev);
246 pcf50633_client_dev_register(pcf, "pcf50633-adc",
247 &pcf->adc_pdev);
248 pcf50633_client_dev_register(pcf, "pcf50633-backlight",
249 &pcf->bl_pdev);
250 244
251 245
252 for (i = 0; i < PCF50633_NUM_REGULATORS; i++) { 246 for (i = 0; i < PCF50633_NUM_REGULATORS; i++) {
@@ -274,13 +268,6 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
274 pdata->probe_done(pcf); 268 pdata->probe_done(pcf);
275 269
276 return 0; 270 return 0;
277
278err_regmap:
279 regmap_exit(pcf->regmap);
280err_free:
281 kfree(pcf);
282
283 return ret;
284} 271}
285 272
286static int __devexit pcf50633_remove(struct i2c_client *client) 273static int __devexit pcf50633_remove(struct i2c_client *client)
@@ -300,9 +287,6 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
300 for (i = 0; i < PCF50633_NUM_REGULATORS; i++) 287 for (i = 0; i < PCF50633_NUM_REGULATORS; i++)
301 platform_device_unregister(pcf->regulator_pdev[i]); 288 platform_device_unregister(pcf->regulator_pdev[i]);
302 289
303 regmap_exit(pcf->regmap);
304 kfree(pcf);
305
306 return 0; 290 return 0;
307} 291}
308 292
diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c
index 44afae0a69ce..cdc1df7fa0e9 100644
--- a/drivers/mfd/rc5t583.c
+++ b/drivers/mfd/rc5t583.c
@@ -75,6 +75,7 @@ static struct deepsleep_control_data deepsleep_data[] = {
75 (RC5T583_EXT_PWRREQ1_CONTROL | RC5T583_EXT_PWRREQ2_CONTROL) 75 (RC5T583_EXT_PWRREQ1_CONTROL | RC5T583_EXT_PWRREQ2_CONTROL)
76 76
77static struct mfd_cell rc5t583_subdevs[] = { 77static struct mfd_cell rc5t583_subdevs[] = {
78 {.name = "rc5t583-gpio",},
78 {.name = "rc5t583-regulator",}, 79 {.name = "rc5t583-regulator",},
79 {.name = "rc5t583-rtc", }, 80 {.name = "rc5t583-rtc", },
80 {.name = "rc5t583-key", } 81 {.name = "rc5t583-key", }
@@ -267,7 +268,7 @@ static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c,
267 rc5t583->dev = &i2c->dev; 268 rc5t583->dev = &i2c->dev;
268 i2c_set_clientdata(i2c, rc5t583); 269 i2c_set_clientdata(i2c, rc5t583);
269 270
270 rc5t583->regmap = regmap_init_i2c(i2c, &rc5t583_regmap_config); 271 rc5t583->regmap = devm_regmap_init_i2c(i2c, &rc5t583_regmap_config);
271 if (IS_ERR(rc5t583->regmap)) { 272 if (IS_ERR(rc5t583->regmap)) {
272 ret = PTR_ERR(rc5t583->regmap); 273 ret = PTR_ERR(rc5t583->regmap);
273 dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret); 274 dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret);
@@ -276,7 +277,7 @@ static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c,
276 277
277 ret = rc5t583_clear_ext_power_req(rc5t583, pdata); 278 ret = rc5t583_clear_ext_power_req(rc5t583, pdata);
278 if (ret < 0) 279 if (ret < 0)
279 goto err_irq_init; 280 return ret;
280 281
281 if (i2c->irq) { 282 if (i2c->irq) {
282 ret = rc5t583_irq_init(rc5t583, i2c->irq, pdata->irq_base); 283 ret = rc5t583_irq_init(rc5t583, i2c->irq, pdata->irq_base);
@@ -299,8 +300,6 @@ static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c,
299err_add_devs: 300err_add_devs:
300 if (irq_init_success) 301 if (irq_init_success)
301 rc5t583_irq_exit(rc5t583); 302 rc5t583_irq_exit(rc5t583);
302err_irq_init:
303 regmap_exit(rc5t583->regmap);
304 return ret; 303 return ret;
305} 304}
306 305
@@ -310,7 +309,6 @@ static int __devexit rc5t583_i2c_remove(struct i2c_client *i2c)
310 309
311 mfd_remove_devices(rc5t583->dev); 310 mfd_remove_devices(rc5t583->dev);
312 rc5t583_irq_exit(rc5t583); 311 rc5t583_irq_exit(rc5t583);
313 regmap_exit(rc5t583->regmap);
314 return 0; 312 return 0;
315} 313}
316 314
diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index 809bd4a61089..685d61e431ad 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -108,18 +108,7 @@ static struct pci_driver rdc321x_sb_driver = {
108 .remove = __devexit_p(rdc321x_sb_remove), 108 .remove = __devexit_p(rdc321x_sb_remove),
109}; 109};
110 110
111static int __init rdc321x_sb_init(void) 111module_pci_driver(rdc321x_sb_driver);
112{
113 return pci_register_driver(&rdc321x_sb_driver);
114}
115
116static void __exit rdc321x_sb_exit(void)
117{
118 pci_unregister_driver(&rdc321x_sb_driver);
119}
120
121module_init(rdc321x_sb_init);
122module_exit(rdc321x_sb_exit);
123 112
124MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>"); 113MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
125MODULE_LICENSE("GPL"); 114MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/s5m-core.c b/drivers/mfd/s5m-core.c
index 48949d998d10..dd170307e60e 100644
--- a/drivers/mfd/s5m-core.c
+++ b/drivers/mfd/s5m-core.c
@@ -114,12 +114,12 @@ static int s5m87xx_i2c_probe(struct i2c_client *i2c,
114 s5m87xx->wakeup = pdata->wakeup; 114 s5m87xx->wakeup = pdata->wakeup;
115 } 115 }
116 116
117 s5m87xx->regmap = regmap_init_i2c(i2c, &s5m_regmap_config); 117 s5m87xx->regmap = devm_regmap_init_i2c(i2c, &s5m_regmap_config);
118 if (IS_ERR(s5m87xx->regmap)) { 118 if (IS_ERR(s5m87xx->regmap)) {
119 ret = PTR_ERR(s5m87xx->regmap); 119 ret = PTR_ERR(s5m87xx->regmap);
120 dev_err(&i2c->dev, "Failed to allocate register map: %d\n", 120 dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
121 ret); 121 ret);
122 goto err; 122 return ret;
123 } 123 }
124 124
125 s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR); 125 s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
@@ -159,7 +159,6 @@ err:
159 mfd_remove_devices(s5m87xx->dev); 159 mfd_remove_devices(s5m87xx->dev);
160 s5m_irq_exit(s5m87xx); 160 s5m_irq_exit(s5m87xx);
161 i2c_unregister_device(s5m87xx->rtc); 161 i2c_unregister_device(s5m87xx->rtc);
162 regmap_exit(s5m87xx->regmap);
163 return ret; 162 return ret;
164} 163}
165 164
@@ -170,7 +169,6 @@ static int s5m87xx_i2c_remove(struct i2c_client *i2c)
170 mfd_remove_devices(s5m87xx->dev); 169 mfd_remove_devices(s5m87xx->dev);
171 s5m_irq_exit(s5m87xx); 170 s5m_irq_exit(s5m87xx);
172 i2c_unregister_device(s5m87xx->rtc); 171 i2c_unregister_device(s5m87xx->rtc);
173 regmap_exit(s5m87xx->regmap);
174 return 0; 172 return 0;
175} 173}
176 174
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
new file mode 100644
index 000000000000..d31fed07aefb
--- /dev/null
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -0,0 +1,467 @@
1/*
2 * Copyright (c) 2009-2011 Wind River Systems, Inc.
3 * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/spinlock.h>
23#include <linux/errno.h>
24#include <linux/device.h>
25#include <linux/slab.h>
26#include <linux/list.h>
27#include <linux/io.h>
28#include <linux/ioport.h>
29#include <linux/pci.h>
30#include <linux/debugfs.h>
31#include <linux/seq_file.h>
32#include <linux/platform_device.h>
33#include <linux/mfd/core.h>
34#include <linux/mfd/sta2x11-mfd.h>
35
36#include <asm/sta2x11.h>
37
38/* This describes STA2X11 MFD chip for us, we may have several */
39struct sta2x11_mfd {
40 struct sta2x11_instance *instance;
41 spinlock_t lock;
42 struct list_head list;
43 void __iomem *sctl_regs;
44 void __iomem *apbreg_regs;
45};
46
47static LIST_HEAD(sta2x11_mfd_list);
48
49/* Three functions to act on the list */
50static struct sta2x11_mfd *sta2x11_mfd_find(struct pci_dev *pdev)
51{
52 struct sta2x11_instance *instance;
53 struct sta2x11_mfd *mfd;
54
55 if (!pdev && !list_empty(&sta2x11_mfd_list)) {
56 pr_warning("%s: Unspecified device, "
57 "using first instance\n", __func__);
58 return list_entry(sta2x11_mfd_list.next,
59 struct sta2x11_mfd, list);
60 }
61
62 instance = sta2x11_get_instance(pdev);
63 if (!instance)
64 return NULL;
65 list_for_each_entry(mfd, &sta2x11_mfd_list, list) {
66 if (mfd->instance == instance)
67 return mfd;
68 }
69 return NULL;
70}
71
72static int __devinit sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
73{
74 struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
75 struct sta2x11_instance *instance;
76
77 if (mfd)
78 return -EBUSY;
79 instance = sta2x11_get_instance(pdev);
80 if (!instance)
81 return -EINVAL;
82 mfd = kzalloc(sizeof(*mfd), flags);
83 if (!mfd)
84 return -ENOMEM;
85 INIT_LIST_HEAD(&mfd->list);
86 spin_lock_init(&mfd->lock);
87 mfd->instance = instance;
88 list_add(&mfd->list, &sta2x11_mfd_list);
89 return 0;
90}
91
92static int __devexit mfd_remove(struct pci_dev *pdev)
93{
94 struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
95
96 if (!mfd)
97 return -ENODEV;
98 list_del(&mfd->list);
99 kfree(mfd);
100 return 0;
101}
102
103/* These two functions are exported and are not expected to fail */
104u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
105{
106 struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
107 u32 r;
108 unsigned long flags;
109
110 if (!mfd) {
111 dev_warn(&pdev->dev, ": can't access sctl regs\n");
112 return 0;
113 }
114 if (!mfd->sctl_regs) {
115 dev_warn(&pdev->dev, ": system ctl not initialized\n");
116 return 0;
117 }
118 spin_lock_irqsave(&mfd->lock, flags);
119 r = readl(mfd->sctl_regs + reg);
120 r &= ~mask;
121 r |= val;
122 if (mask)
123 writel(r, mfd->sctl_regs + reg);
124 spin_unlock_irqrestore(&mfd->lock, flags);
125 return r;
126}
127EXPORT_SYMBOL(sta2x11_sctl_mask);
128
129u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
130{
131 struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
132 u32 r;
133 unsigned long flags;
134
135 if (!mfd) {
136 dev_warn(&pdev->dev, ": can't access apb regs\n");
137 return 0;
138 }
139 if (!mfd->apbreg_regs) {
140 dev_warn(&pdev->dev, ": apb bridge not initialized\n");
141 return 0;
142 }
143 spin_lock_irqsave(&mfd->lock, flags);
144 r = readl(mfd->apbreg_regs + reg);
145 r &= ~mask;
146 r |= val;
147 if (mask)
148 writel(r, mfd->apbreg_regs + reg);
149 spin_unlock_irqrestore(&mfd->lock, flags);
150 return r;
151}
152EXPORT_SYMBOL(sta2x11_apbreg_mask);
153
154/* Two debugfs files, for our registers (FIXME: one instance only) */
155#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
156static struct debugfs_reg32 sta2x11_sctl_regs[] = {
157 REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL),
158 REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0),
159 REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1),
160 REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3),
161 REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1),
162 REG(SCCLKSTAT2), REG(SCRSTSTA),
163};
164#undef REG
165
166static struct debugfs_regset32 sctl_regset = {
167 .regs = sta2x11_sctl_regs,
168 .nregs = ARRAY_SIZE(sta2x11_sctl_regs),
169};
170
171#define REG(regname) {.name = #regname, .offset = regname}
172static struct debugfs_reg32 sta2x11_apbreg_regs[] = {
173 REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC),
174 REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG),
175};
176#undef REG
177
178static struct debugfs_regset32 apbreg_regset = {
179 .regs = sta2x11_apbreg_regs,
180 .nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
181};
182
183static struct dentry *sta2x11_sctl_debugfs;
184static struct dentry *sta2x11_apbreg_debugfs;
185
186/* Probe for the two platform devices */
187static int sta2x11_sctl_probe(struct platform_device *dev)
188{
189 struct pci_dev **pdev;
190 struct sta2x11_mfd *mfd;
191 struct resource *res;
192
193 pdev = dev->dev.platform_data;
194 mfd = sta2x11_mfd_find(*pdev);
195 if (!mfd)
196 return -ENODEV;
197
198 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
199 if (!res)
200 return -ENOMEM;
201
202 if (!request_mem_region(res->start, resource_size(res),
203 "sta2x11-sctl"))
204 return -EBUSY;
205
206 mfd->sctl_regs = ioremap(res->start, resource_size(res));
207 if (!mfd->sctl_regs) {
208 release_mem_region(res->start, resource_size(res));
209 return -ENOMEM;
210 }
211 sctl_regset.base = mfd->sctl_regs;
212 sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl",
213 S_IFREG | S_IRUGO,
214 NULL, &sctl_regset);
215 return 0;
216}
217
218static int sta2x11_apbreg_probe(struct platform_device *dev)
219{
220 struct pci_dev **pdev;
221 struct sta2x11_mfd *mfd;
222 struct resource *res;
223
224 pdev = dev->dev.platform_data;
225 dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev);
226 dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev);
227
228 mfd = sta2x11_mfd_find(*pdev);
229 if (!mfd)
230 return -ENODEV;
231
232 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
233 if (!res)
234 return -ENOMEM;
235
236 if (!request_mem_region(res->start, resource_size(res),
237 "sta2x11-apbreg"))
238 return -EBUSY;
239
240 mfd->apbreg_regs = ioremap(res->start, resource_size(res));
241 if (!mfd->apbreg_regs) {
242 release_mem_region(res->start, resource_size(res));
243 return -ENOMEM;
244 }
245 dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs);
246
247 apbreg_regset.base = mfd->apbreg_regs;
248 sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg",
249 S_IFREG | S_IRUGO,
250 NULL, &apbreg_regset);
251 return 0;
252}
253
254/* The two platform drivers */
255static struct platform_driver sta2x11_sctl_platform_driver = {
256 .driver = {
257 .name = "sta2x11-sctl",
258 .owner = THIS_MODULE,
259 },
260 .probe = sta2x11_sctl_probe,
261};
262
263static int __init sta2x11_sctl_init(void)
264{
265 pr_info("%s\n", __func__);
266 return platform_driver_register(&sta2x11_sctl_platform_driver);
267}
268
269static struct platform_driver sta2x11_platform_driver = {
270 .driver = {
271 .name = "sta2x11-apbreg",
272 .owner = THIS_MODULE,
273 },
274 .probe = sta2x11_apbreg_probe,
275};
276
277static int __init sta2x11_apbreg_init(void)
278{
279 pr_info("%s\n", __func__);
280 return platform_driver_register(&sta2x11_platform_driver);
281}
282
283/*
284 * What follows is the PCI device that hosts the above two pdevs.
285 * Each logic block is 4kB and they are all consecutive: we use this info.
286 */
287
288/* Bar 0 */
289enum bar0_cells {
290 STA2X11_GPIO_0 = 0,
291 STA2X11_GPIO_1,
292 STA2X11_GPIO_2,
293 STA2X11_GPIO_3,
294 STA2X11_SCTL,
295 STA2X11_SCR,
296 STA2X11_TIME,
297};
298/* Bar 1 */
299enum bar1_cells {
300 STA2X11_APBREG = 0,
301};
302#define CELL_4K(_name, _cell) { \
303 .name = _name, \
304 .start = _cell * 4096, .end = _cell * 4096 + 4095, \
305 .flags = IORESOURCE_MEM, \
306 }
307
308static const __devinitconst struct resource gpio_resources[] = {
309 {
310 .name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */
311 .start = 0,
312 .end = (4 * 4096) - 1,
313 .flags = IORESOURCE_MEM,
314 }
315};
316static const __devinitconst struct resource sctl_resources[] = {
317 CELL_4K("sta2x11-sctl", STA2X11_SCTL),
318};
319static const __devinitconst struct resource scr_resources[] = {
320 CELL_4K("sta2x11-scr", STA2X11_SCR),
321};
322static const __devinitconst struct resource time_resources[] = {
323 CELL_4K("sta2x11-time", STA2X11_TIME),
324};
325
326static const __devinitconst struct resource apbreg_resources[] = {
327 CELL_4K("sta2x11-apbreg", STA2X11_APBREG),
328};
329
330#define DEV(_name, _r) \
331 { .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
332
333static __devinitdata struct mfd_cell sta2x11_mfd_bar0[] = {
334 DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
335 DEV("sta2x11-sctl", sctl_resources),
336 DEV("sta2x11-scr", scr_resources),
337 DEV("sta2x11-time", time_resources),
338};
339
340static __devinitdata struct mfd_cell sta2x11_mfd_bar1[] = {
341 DEV("sta2x11-apbreg", apbreg_resources),
342};
343
344static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state)
345{
346 pci_save_state(pdev);
347 pci_disable_device(pdev);
348 pci_set_power_state(pdev, pci_choose_state(pdev, state));
349
350 return 0;
351}
352
353static int sta2x11_mfd_resume(struct pci_dev *pdev)
354{
355 int err;
356
357 pci_set_power_state(pdev, 0);
358 err = pci_enable_device(pdev);
359 if (err)
360 return err;
361 pci_restore_state(pdev);
362
363 return 0;
364}
365
366static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
367 const struct pci_device_id *pci_id)
368{
369 int err, i;
370 struct sta2x11_gpio_pdata *gpio_data;
371
372 dev_info(&pdev->dev, "%s\n", __func__);
373
374 err = pci_enable_device(pdev);
375 if (err) {
376 dev_err(&pdev->dev, "Can't enable device.\n");
377 return err;
378 }
379
380 err = pci_enable_msi(pdev);
381 if (err)
382 dev_info(&pdev->dev, "Enable msi failed\n");
383
384 /* Read gpio config data as pci device's platform data */
385 gpio_data = dev_get_platdata(&pdev->dev);
386 if (!gpio_data)
387 dev_warn(&pdev->dev, "no gpio configuration\n");
388
389 dev_dbg(&pdev->dev, "%s, gpio_data = %p (%p)\n", __func__,
390 gpio_data, &gpio_data);
391 dev_dbg(&pdev->dev, "%s, pdev = %p (%p)\n", __func__,
392 pdev, &pdev);
393
394 /* platform data is the pci device for all of them */
395 for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) {
396 sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev);
397 sta2x11_mfd_bar0[i].platform_data = &pdev;
398 }
399 sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev);
400 sta2x11_mfd_bar1[0].platform_data = &pdev;
401
402 /* Record this pdev before mfd_add_devices: their probe looks for it */
403 sta2x11_mfd_add(pdev, GFP_ATOMIC);
404
405
406 err = mfd_add_devices(&pdev->dev, -1,
407 sta2x11_mfd_bar0,
408 ARRAY_SIZE(sta2x11_mfd_bar0),
409 &pdev->resource[0],
410 0);
411 if (err) {
412 dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
413 goto err_disable;
414 }
415
416 err = mfd_add_devices(&pdev->dev, -1,
417 sta2x11_mfd_bar1,
418 ARRAY_SIZE(sta2x11_mfd_bar1),
419 &pdev->resource[1],
420 0);
421 if (err) {
422 dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
423 goto err_disable;
424 }
425
426 return 0;
427
428err_disable:
429 mfd_remove_devices(&pdev->dev);
430 pci_disable_device(pdev);
431 pci_disable_msi(pdev);
432 return err;
433}
434
435static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = {
436 {PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)},
437 {0,},
438};
439
440static struct pci_driver sta2x11_mfd_driver = {
441 .name = "sta2x11-mfd",
442 .id_table = sta2x11_mfd_tbl,
443 .probe = sta2x11_mfd_probe,
444 .suspend = sta2x11_mfd_suspend,
445 .resume = sta2x11_mfd_resume,
446};
447
448static int __init sta2x11_mfd_init(void)
449{
450 pr_info("%s\n", __func__);
451 return pci_register_driver(&sta2x11_mfd_driver);
452}
453
454/*
455 * All of this must be ready before "normal" devices like MMCI appear.
456 * But MFD (the pci device) can't be too early. The following choice
457 * prepares platform drivers very early and probe the PCI device later,
458 * but before other PCI devices.
459 */
460subsys_initcall(sta2x11_apbreg_init);
461subsys_initcall(sta2x11_sctl_init);
462rootfs_initcall(sta2x11_mfd_init);
463
464MODULE_LICENSE("GPL v2");
465MODULE_AUTHOR("Wind River");
466MODULE_DESCRIPTION("STA2x11 mfd for GPIO, SCTL and APBREG");
467MODULE_DEVICE_TABLE(pci, sta2x11_mfd_tbl);
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index b58c43c7ea93..afd459013ecb 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -122,7 +122,6 @@ MODULE_DEVICE_TABLE(spi, stmpe_id);
122static struct spi_driver stmpe_spi_driver = { 122static struct spi_driver stmpe_spi_driver = {
123 .driver = { 123 .driver = {
124 .name = "stmpe-spi", 124 .name = "stmpe-spi",
125 .bus = &spi_bus_type,
126 .owner = THIS_MODULE, 125 .owner = THIS_MODULE,
127#ifdef CONFIG_PM 126#ifdef CONFIG_PM
128 .pm = &stmpe_dev_pm_ops, 127 .pm = &stmpe_dev_pm_ops,
diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 47f802bf1848..396b9d1b6bd6 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -283,27 +283,24 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
283 } 283 }
284 } 284 }
285 285
286 tps65090->rmap = regmap_init_i2c(tps65090->client, 286 tps65090->rmap = devm_regmap_init_i2c(tps65090->client,
287 &tps65090_regmap_config); 287 &tps65090_regmap_config);
288 if (IS_ERR(tps65090->rmap)) { 288 if (IS_ERR(tps65090->rmap)) {
289 dev_err(&client->dev, "regmap_init failed with err: %ld\n", 289 ret = PTR_ERR(tps65090->rmap);
290 PTR_ERR(tps65090->rmap)); 290 dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
291 goto err_irq_exit; 291 goto err_irq_exit;
292 }; 292 }
293 293
294 ret = mfd_add_devices(tps65090->dev, -1, tps65090s, 294 ret = mfd_add_devices(tps65090->dev, -1, tps65090s,
295 ARRAY_SIZE(tps65090s), NULL, 0); 295 ARRAY_SIZE(tps65090s), NULL, 0);
296 if (ret) { 296 if (ret) {
297 dev_err(&client->dev, "add mfd devices failed with err: %d\n", 297 dev_err(&client->dev, "add mfd devices failed with err: %d\n",
298 ret); 298 ret);
299 goto err_regmap_exit; 299 goto err_irq_exit;
300 } 300 }
301 301
302 return 0; 302 return 0;
303 303
304err_regmap_exit:
305 regmap_exit(tps65090->rmap);
306
307err_irq_exit: 304err_irq_exit:
308 if (client->irq) 305 if (client->irq)
309 free_irq(client->irq, tps65090); 306 free_irq(client->irq, tps65090);
@@ -316,29 +313,34 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client)
316 struct tps65090 *tps65090 = i2c_get_clientdata(client); 313 struct tps65090 *tps65090 = i2c_get_clientdata(client);
317 314
318 mfd_remove_devices(tps65090->dev); 315 mfd_remove_devices(tps65090->dev);
319 regmap_exit(tps65090->rmap);
320 if (client->irq) 316 if (client->irq)
321 free_irq(client->irq, tps65090); 317 free_irq(client->irq, tps65090);
322 318
323 return 0; 319 return 0;
324} 320}
325 321
326#ifdef CONFIG_PM 322#ifdef CONFIG_PM_SLEEP
327static int tps65090_i2c_suspend(struct i2c_client *client, pm_message_t state) 323static int tps65090_suspend(struct device *dev)
328{ 324{
325 struct i2c_client *client = to_i2c_client(dev);
329 if (client->irq) 326 if (client->irq)
330 disable_irq(client->irq); 327 disable_irq(client->irq);
331 return 0; 328 return 0;
332} 329}
333 330
334static int tps65090_i2c_resume(struct i2c_client *client) 331static int tps65090_resume(struct device *dev)
335{ 332{
333 struct i2c_client *client = to_i2c_client(dev);
336 if (client->irq) 334 if (client->irq)
337 enable_irq(client->irq); 335 enable_irq(client->irq);
338 return 0; 336 return 0;
339} 337}
340#endif 338#endif
341 339
340static const struct dev_pm_ops tps65090_pm_ops = {
341 SET_SYSTEM_SLEEP_PM_OPS(tps65090_suspend, tps65090_resume)
342};
343
342static const struct i2c_device_id tps65090_id_table[] = { 344static const struct i2c_device_id tps65090_id_table[] = {
343 { "tps65090", 0 }, 345 { "tps65090", 0 },
344 { }, 346 { },
@@ -349,13 +351,10 @@ static struct i2c_driver tps65090_driver = {
349 .driver = { 351 .driver = {
350 .name = "tps65090", 352 .name = "tps65090",
351 .owner = THIS_MODULE, 353 .owner = THIS_MODULE,
354 .pm = &tps65090_pm_ops,
352 }, 355 },
353 .probe = tps65090_i2c_probe, 356 .probe = tps65090_i2c_probe,
354 .remove = __devexit_p(tps65090_i2c_remove), 357 .remove = __devexit_p(tps65090_i2c_remove),
355#ifdef CONFIG_PM
356 .suspend = tps65090_i2c_suspend,
357 .resume = tps65090_i2c_resume,
358#endif
359 .id_table = tps65090_id_table, 358 .id_table = tps65090_id_table,
360}; 359};
361 360
diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index f7d854e4cc62..db194e433c08 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(tps65217_reg_write);
96 * @val: Value to write. 96 * @val: Value to write.
97 * @level: Password protected level 97 * @level: Password protected level
98 */ 98 */
99int tps65217_update_bits(struct tps65217 *tps, unsigned int reg, 99static int tps65217_update_bits(struct tps65217 *tps, unsigned int reg,
100 unsigned int mask, unsigned int val, unsigned int level) 100 unsigned int mask, unsigned int val, unsigned int level)
101{ 101{
102 int ret; 102 int ret;
@@ -150,7 +150,7 @@ static int __devinit tps65217_probe(struct i2c_client *client,
150 return -ENOMEM; 150 return -ENOMEM;
151 151
152 tps->pdata = pdata; 152 tps->pdata = pdata;
153 tps->regmap = regmap_init_i2c(client, &tps65217_regmap_config); 153 tps->regmap = devm_regmap_init_i2c(client, &tps65217_regmap_config);
154 if (IS_ERR(tps->regmap)) { 154 if (IS_ERR(tps->regmap)) {
155 ret = PTR_ERR(tps->regmap); 155 ret = PTR_ERR(tps->regmap);
156 dev_err(tps->dev, "Failed to allocate register map: %d\n", 156 dev_err(tps->dev, "Failed to allocate register map: %d\n",
@@ -163,9 +163,9 @@ static int __devinit tps65217_probe(struct i2c_client *client,
163 163
164 ret = tps65217_reg_read(tps, TPS65217_REG_CHIPID, &version); 164 ret = tps65217_reg_read(tps, TPS65217_REG_CHIPID, &version);
165 if (ret < 0) { 165 if (ret < 0) {
166 dev_err(tps->dev, "Failed to read revision" 166 dev_err(tps->dev, "Failed to read revision register: %d\n",
167 " register: %d\n", ret); 167 ret);
168 goto err_regmap; 168 return ret;
169 } 169 }
170 170
171 dev_info(tps->dev, "TPS65217 ID %#x version 1.%d\n", 171 dev_info(tps->dev, "TPS65217 ID %#x version 1.%d\n",
@@ -190,11 +190,6 @@ static int __devinit tps65217_probe(struct i2c_client *client,
190 } 190 }
191 191
192 return 0; 192 return 0;
193
194err_regmap:
195 regmap_exit(tps->regmap);
196
197 return ret;
198} 193}
199 194
200static int __devexit tps65217_remove(struct i2c_client *client) 195static int __devexit tps65217_remove(struct i2c_client *client)
@@ -205,8 +200,6 @@ static int __devexit tps65217_remove(struct i2c_client *client)
205 for (i = 0; i < TPS65217_NUM_REGULATOR; i++) 200 for (i = 0; i < TPS65217_NUM_REGULATOR; i++)
206 platform_device_unregister(tps->regulator_pdev[i]); 201 platform_device_unregister(tps->regulator_pdev[i]);
207 202
208 regmap_exit(tps->regmap);
209
210 return 0; 203 return 0;
211} 204}
212 205
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index c9ed5c00a621..09aab3e4776d 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -20,15 +20,10 @@
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/irq.h> 22#include <linux/irq.h>
23#include <linux/irqdomain.h>
23#include <linux/gpio.h> 24#include <linux/gpio.h>
24#include <linux/mfd/tps65910.h> 25#include <linux/mfd/tps65910.h>
25 26
26static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
27 int irq)
28{
29 return (irq - tps65910->irq_base);
30}
31
32/* 27/*
33 * This is a threaded IRQ handler so can access I2C/SPI. Since all 28 * This is a threaded IRQ handler so can access I2C/SPI. Since all
34 * interrupts are clear on read the IRQ line will be reasserted and 29 * interrupts are clear on read the IRQ line will be reasserted and
@@ -41,28 +36,28 @@ static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
41static irqreturn_t tps65910_irq(int irq, void *irq_data) 36static irqreturn_t tps65910_irq(int irq, void *irq_data)
42{ 37{
43 struct tps65910 *tps65910 = irq_data; 38 struct tps65910 *tps65910 = irq_data;
39 unsigned int reg;
44 u32 irq_sts; 40 u32 irq_sts;
45 u32 irq_mask; 41 u32 irq_mask;
46 u8 reg;
47 int i; 42 int i;
48 43
49 tps65910->read(tps65910, TPS65910_INT_STS, 1, &reg); 44 tps65910_reg_read(tps65910, TPS65910_INT_STS, &reg);
50 irq_sts = reg; 45 irq_sts = reg;
51 tps65910->read(tps65910, TPS65910_INT_STS2, 1, &reg); 46 tps65910_reg_read(tps65910, TPS65910_INT_STS2, &reg);
52 irq_sts |= reg << 8; 47 irq_sts |= reg << 8;
53 switch (tps65910_chip_id(tps65910)) { 48 switch (tps65910_chip_id(tps65910)) {
54 case TPS65911: 49 case TPS65911:
55 tps65910->read(tps65910, TPS65910_INT_STS3, 1, &reg); 50 tps65910_reg_read(tps65910, TPS65910_INT_STS3, &reg);
56 irq_sts |= reg << 16; 51 irq_sts |= reg << 16;
57 } 52 }
58 53
59 tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg); 54 tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
60 irq_mask = reg; 55 irq_mask = reg;
61 tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg); 56 tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
62 irq_mask |= reg << 8; 57 irq_mask |= reg << 8;
63 switch (tps65910_chip_id(tps65910)) { 58 switch (tps65910_chip_id(tps65910)) {
64 case TPS65911: 59 case TPS65911:
65 tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg); 60 tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
66 irq_mask |= reg << 16; 61 irq_mask |= reg << 16;
67 } 62 }
68 63
@@ -76,19 +71,19 @@ static irqreturn_t tps65910_irq(int irq, void *irq_data)
76 if (!(irq_sts & (1 << i))) 71 if (!(irq_sts & (1 << i)))
77 continue; 72 continue;
78 73
79 handle_nested_irq(tps65910->irq_base + i); 74 handle_nested_irq(irq_find_mapping(tps65910->domain, i));
80 } 75 }
81 76
82 /* Write the STS register back to clear IRQs we handled */ 77 /* Write the STS register back to clear IRQs we handled */
83 reg = irq_sts & 0xFF; 78 reg = irq_sts & 0xFF;
84 irq_sts >>= 8; 79 irq_sts >>= 8;
85 tps65910->write(tps65910, TPS65910_INT_STS, 1, &reg); 80 tps65910_reg_write(tps65910, TPS65910_INT_STS, reg);
86 reg = irq_sts & 0xFF; 81 reg = irq_sts & 0xFF;
87 tps65910->write(tps65910, TPS65910_INT_STS2, 1, &reg); 82 tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg);
88 switch (tps65910_chip_id(tps65910)) { 83 switch (tps65910_chip_id(tps65910)) {
89 case TPS65911: 84 case TPS65911:
90 reg = irq_sts >> 8; 85 reg = irq_sts >> 8;
91 tps65910->write(tps65910, TPS65910_INT_STS3, 1, &reg); 86 tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg);
92 } 87 }
93 88
94 return IRQ_HANDLED; 89 return IRQ_HANDLED;
@@ -105,27 +100,27 @@ static void tps65910_irq_sync_unlock(struct irq_data *data)
105{ 100{
106 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); 101 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
107 u32 reg_mask; 102 u32 reg_mask;
108 u8 reg; 103 unsigned int reg;
109 104
110 tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg); 105 tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
111 reg_mask = reg; 106 reg_mask = reg;
112 tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg); 107 tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
113 reg_mask |= reg << 8; 108 reg_mask |= reg << 8;
114 switch (tps65910_chip_id(tps65910)) { 109 switch (tps65910_chip_id(tps65910)) {
115 case TPS65911: 110 case TPS65911:
116 tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg); 111 tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
117 reg_mask |= reg << 16; 112 reg_mask |= reg << 16;
118 } 113 }
119 114
120 if (tps65910->irq_mask != reg_mask) { 115 if (tps65910->irq_mask != reg_mask) {
121 reg = tps65910->irq_mask & 0xFF; 116 reg = tps65910->irq_mask & 0xFF;
122 tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg); 117 tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg);
123 reg = tps65910->irq_mask >> 8 & 0xFF; 118 reg = tps65910->irq_mask >> 8 & 0xFF;
124 tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg); 119 tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg);
125 switch (tps65910_chip_id(tps65910)) { 120 switch (tps65910_chip_id(tps65910)) {
126 case TPS65911: 121 case TPS65911:
127 reg = tps65910->irq_mask >> 16; 122 reg = tps65910->irq_mask >> 16;
128 tps65910->write(tps65910, TPS65910_INT_MSK3, 1, &reg); 123 tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg);
129 } 124 }
130 } 125 }
131 mutex_unlock(&tps65910->irq_lock); 126 mutex_unlock(&tps65910->irq_lock);
@@ -135,14 +130,14 @@ static void tps65910_irq_enable(struct irq_data *data)
135{ 130{
136 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); 131 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
137 132
138 tps65910->irq_mask &= ~( 1 << irq_to_tps65910_irq(tps65910, data->irq)); 133 tps65910->irq_mask &= ~(1 << data->hwirq);
139} 134}
140 135
141static void tps65910_irq_disable(struct irq_data *data) 136static void tps65910_irq_disable(struct irq_data *data)
142{ 137{
143 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); 138 struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
144 139
145 tps65910->irq_mask |= ( 1 << irq_to_tps65910_irq(tps65910, data->irq)); 140 tps65910->irq_mask |= (1 << data->hwirq);
146} 141}
147 142
148#ifdef CONFIG_PM_SLEEP 143#ifdef CONFIG_PM_SLEEP
@@ -164,10 +159,35 @@ static struct irq_chip tps65910_irq_chip = {
164 .irq_set_wake = tps65910_irq_set_wake, 159 .irq_set_wake = tps65910_irq_set_wake,
165}; 160};
166 161
162static int tps65910_irq_map(struct irq_domain *h, unsigned int virq,
163 irq_hw_number_t hw)
164{
165 struct tps65910 *tps65910 = h->host_data;
166
167 irq_set_chip_data(virq, tps65910);
168 irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq);
169 irq_set_nested_thread(virq, 1);
170
171 /* ARM needs us to explicitly flag the IRQ as valid
172 * and will set them noprobe when we do so. */
173#ifdef CONFIG_ARM
174 set_irq_flags(virq, IRQF_VALID);
175#else
176 irq_set_noprobe(virq);
177#endif
178
179 return 0;
180}
181
182static struct irq_domain_ops tps65910_domain_ops = {
183 .map = tps65910_irq_map,
184 .xlate = irq_domain_xlate_twocell,
185};
186
167int tps65910_irq_init(struct tps65910 *tps65910, int irq, 187int tps65910_irq_init(struct tps65910 *tps65910, int irq,
168 struct tps65910_platform_data *pdata) 188 struct tps65910_platform_data *pdata)
169{ 189{
170 int ret, cur_irq; 190 int ret;
171 int flags = IRQF_ONESHOT; 191 int flags = IRQF_ONESHOT;
172 192
173 if (!irq) { 193 if (!irq) {
@@ -175,17 +195,11 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
175 return -EINVAL; 195 return -EINVAL;
176 } 196 }
177 197
178 if (!pdata || !pdata->irq_base) { 198 if (!pdata) {
179 dev_warn(tps65910->dev, "No interrupt support, no IRQ base\n"); 199 dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
180 return -EINVAL; 200 return -EINVAL;
181 } 201 }
182 202
183 tps65910->irq_mask = 0xFFFFFF;
184
185 mutex_init(&tps65910->irq_lock);
186 tps65910->chip_irq = irq;
187 tps65910->irq_base = pdata->irq_base;
188
189 switch (tps65910_chip_id(tps65910)) { 203 switch (tps65910_chip_id(tps65910)) {
190 case TPS65910: 204 case TPS65910:
191 tps65910->irq_num = TPS65910_NUM_IRQ; 205 tps65910->irq_num = TPS65910_NUM_IRQ;
@@ -195,22 +209,36 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
195 break; 209 break;
196 } 210 }
197 211
198 /* Register with genirq */ 212 if (pdata->irq_base > 0) {
199 for (cur_irq = tps65910->irq_base; 213 pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0,
200 cur_irq < tps65910->irq_num + tps65910->irq_base; 214 tps65910->irq_num, -1);
201 cur_irq++) { 215 if (pdata->irq_base < 0) {
202 irq_set_chip_data(cur_irq, tps65910); 216 dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n",
203 irq_set_chip_and_handler(cur_irq, &tps65910_irq_chip, 217 pdata->irq_base);
204 handle_edge_irq); 218 return pdata->irq_base;
205 irq_set_nested_thread(cur_irq, 1); 219 }
206 220 }
207 /* ARM needs us to explicitly flag the IRQ as valid 221
208 * and will set them noprobe when we do so. */ 222 tps65910->irq_mask = 0xFFFFFF;
209#ifdef CONFIG_ARM 223
210 set_irq_flags(cur_irq, IRQF_VALID); 224 mutex_init(&tps65910->irq_lock);
211#else 225 tps65910->chip_irq = irq;
212 irq_set_noprobe(cur_irq); 226 tps65910->irq_base = pdata->irq_base;
213#endif 227
228 if (pdata->irq_base > 0)
229 tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node,
230 tps65910->irq_num,
231 pdata->irq_base,
232 0,
233 &tps65910_domain_ops, tps65910);
234 else
235 tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node,
236 tps65910->irq_num,
237 &tps65910_domain_ops, tps65910);
238
239 if (!tps65910->domain) {
240 dev_err(tps65910->dev, "Failed to create IRQ domain\n");
241 return -ENOMEM;
214 } 242 }
215 243
216 ret = request_threaded_irq(irq, NULL, tps65910_irq, flags, 244 ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index bf2b25ebf2ca..be9e07b77325 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -19,13 +19,16 @@
19#include <linux/err.h> 19#include <linux/err.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/i2c.h> 21#include <linux/i2c.h>
22#include <linux/gpio.h>
23#include <linux/mfd/core.h> 22#include <linux/mfd/core.h>
24#include <linux/regmap.h> 23#include <linux/regmap.h>
25#include <linux/mfd/tps65910.h> 24#include <linux/mfd/tps65910.h>
25#include <linux/of_device.h>
26 26
27static struct mfd_cell tps65910s[] = { 27static struct mfd_cell tps65910s[] = {
28 { 28 {
29 .name = "tps65910-gpio",
30 },
31 {
29 .name = "tps65910-pmic", 32 .name = "tps65910-pmic",
30 }, 33 },
31 { 34 {
@@ -37,30 +40,6 @@ static struct mfd_cell tps65910s[] = {
37}; 40};
38 41
39 42
40static int tps65910_i2c_read(struct tps65910 *tps65910, u8 reg,
41 int bytes, void *dest)
42{
43 return regmap_bulk_read(tps65910->regmap, reg, dest, bytes);
44}
45
46static int tps65910_i2c_write(struct tps65910 *tps65910, u8 reg,
47 int bytes, void *src)
48{
49 return regmap_bulk_write(tps65910->regmap, reg, src, bytes);
50}
51
52int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
53{
54 return regmap_update_bits(tps65910->regmap, reg, mask, mask);
55}
56EXPORT_SYMBOL_GPL(tps65910_set_bits);
57
58int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
59{
60 return regmap_update_bits(tps65910->regmap, reg, mask, 0);
61}
62EXPORT_SYMBOL_GPL(tps65910_clear_bits);
63
64static bool is_volatile_reg(struct device *dev, unsigned int reg) 43static bool is_volatile_reg(struct device *dev, unsigned int reg)
65{ 44{
66 struct tps65910 *tps65910 = dev_get_drvdata(dev); 45 struct tps65910 *tps65910 = dev_get_drvdata(dev);
@@ -85,80 +64,197 @@ static const struct regmap_config tps65910_regmap_config = {
85 .reg_bits = 8, 64 .reg_bits = 8,
86 .val_bits = 8, 65 .val_bits = 8,
87 .volatile_reg = is_volatile_reg, 66 .volatile_reg = is_volatile_reg,
88 .max_register = TPS65910_MAX_REGISTER, 67 .max_register = TPS65910_MAX_REGISTER - 1,
89 .num_reg_defaults_raw = TPS65910_MAX_REGISTER,
90 .cache_type = REGCACHE_RBTREE, 68 .cache_type = REGCACHE_RBTREE,
91}; 69};
92 70
93static int tps65910_i2c_probe(struct i2c_client *i2c, 71static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
94 const struct i2c_device_id *id) 72 struct tps65910_board *pmic_pdata)
73{
74 struct device *dev = NULL;
75 int ret = 0;
76
77 dev = tps65910->dev;
78
79 if (!pmic_pdata->en_dev_slp)
80 return 0;
81
82 /* enabling SLEEP device state */
83 ret = tps65910_reg_set_bits(tps65910, TPS65910_DEVCTRL,
84 DEVCTRL_DEV_SLP_MASK);
85 if (ret < 0) {
86 dev_err(dev, "set dev_slp failed: %d\n", ret);
87 goto err_sleep_init;
88 }
89
90 /* Return if there is no sleep keepon data. */
91 if (!pmic_pdata->slp_keepon)
92 return 0;
93
94 if (pmic_pdata->slp_keepon->therm_keepon) {
95 ret = tps65910_reg_set_bits(tps65910,
96 TPS65910_SLEEP_KEEP_RES_ON,
97 SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK);
98 if (ret < 0) {
99 dev_err(dev, "set therm_keepon failed: %d\n", ret);
100 goto disable_dev_slp;
101 }
102 }
103
104 if (pmic_pdata->slp_keepon->clkout32k_keepon) {
105 ret = tps65910_reg_set_bits(tps65910,
106 TPS65910_SLEEP_KEEP_RES_ON,
107 SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK);
108 if (ret < 0) {
109 dev_err(dev, "set clkout32k_keepon failed: %d\n", ret);
110 goto disable_dev_slp;
111 }
112 }
113
114 if (pmic_pdata->slp_keepon->i2chs_keepon) {
115 ret = tps65910_reg_set_bits(tps65910,
116 TPS65910_SLEEP_KEEP_RES_ON,
117 SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK);
118 if (ret < 0) {
119 dev_err(dev, "set i2chs_keepon failed: %d\n", ret);
120 goto disable_dev_slp;
121 }
122 }
123
124 return 0;
125
126disable_dev_slp:
127 tps65910_reg_clear_bits(tps65910, TPS65910_DEVCTRL,
128 DEVCTRL_DEV_SLP_MASK);
129
130err_sleep_init:
131 return ret;
132}
133
134#ifdef CONFIG_OF
135static struct of_device_id tps65910_of_match[] = {
136 { .compatible = "ti,tps65910", .data = (void *)TPS65910},
137 { .compatible = "ti,tps65911", .data = (void *)TPS65911},
138 { },
139};
140MODULE_DEVICE_TABLE(of, tps65910_of_match);
141
142static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
143 int *chip_id)
144{
145 struct device_node *np = client->dev.of_node;
146 struct tps65910_board *board_info;
147 unsigned int prop;
148 const struct of_device_id *match;
149 int ret = 0;
150
151 match = of_match_device(tps65910_of_match, &client->dev);
152 if (!match) {
153 dev_err(&client->dev, "Failed to find matching dt id\n");
154 return NULL;
155 }
156
157 *chip_id = (int)match->data;
158
159 board_info = devm_kzalloc(&client->dev, sizeof(*board_info),
160 GFP_KERNEL);
161 if (!board_info) {
162 dev_err(&client->dev, "Failed to allocate pdata\n");
163 return NULL;
164 }
165
166 ret = of_property_read_u32(np, "ti,vmbch-threshold", &prop);
167 if (!ret)
168 board_info->vmbch_threshold = prop;
169 else if (*chip_id == TPS65911)
170 dev_warn(&client->dev, "VMBCH-Threshold not specified");
171
172 ret = of_property_read_u32(np, "ti,vmbch2-threshold", &prop);
173 if (!ret)
174 board_info->vmbch2_threshold = prop;
175 else if (*chip_id == TPS65911)
176 dev_warn(&client->dev, "VMBCH2-Threshold not specified");
177
178 board_info->irq = client->irq;
179 board_info->irq_base = -1;
180
181 return board_info;
182}
183#else
184static inline
185struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
186 int *chip_id)
187{
188 return NULL;
189}
190#endif
191
192static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
193 const struct i2c_device_id *id)
95{ 194{
96 struct tps65910 *tps65910; 195 struct tps65910 *tps65910;
97 struct tps65910_board *pmic_plat_data; 196 struct tps65910_board *pmic_plat_data;
197 struct tps65910_board *of_pmic_plat_data = NULL;
98 struct tps65910_platform_data *init_data; 198 struct tps65910_platform_data *init_data;
99 int ret = 0; 199 int ret = 0;
200 int chip_id = id->driver_data;
100 201
101 pmic_plat_data = dev_get_platdata(&i2c->dev); 202 pmic_plat_data = dev_get_platdata(&i2c->dev);
203
204 if (!pmic_plat_data && i2c->dev.of_node) {
205 pmic_plat_data = tps65910_parse_dt(i2c, &chip_id);
206 of_pmic_plat_data = pmic_plat_data;
207 }
208
102 if (!pmic_plat_data) 209 if (!pmic_plat_data)
103 return -EINVAL; 210 return -EINVAL;
104 211
105 init_data = kzalloc(sizeof(struct tps65910_platform_data), GFP_KERNEL); 212 init_data = devm_kzalloc(&i2c->dev, sizeof(*init_data), GFP_KERNEL);
106 if (init_data == NULL) 213 if (init_data == NULL)
107 return -ENOMEM; 214 return -ENOMEM;
108 215
109 tps65910 = kzalloc(sizeof(struct tps65910), GFP_KERNEL); 216 tps65910 = devm_kzalloc(&i2c->dev, sizeof(*tps65910), GFP_KERNEL);
110 if (tps65910 == NULL) { 217 if (tps65910 == NULL)
111 kfree(init_data);
112 return -ENOMEM; 218 return -ENOMEM;
113 }
114 219
220 tps65910->of_plat_data = of_pmic_plat_data;
115 i2c_set_clientdata(i2c, tps65910); 221 i2c_set_clientdata(i2c, tps65910);
116 tps65910->dev = &i2c->dev; 222 tps65910->dev = &i2c->dev;
117 tps65910->i2c_client = i2c; 223 tps65910->i2c_client = i2c;
118 tps65910->id = id->driver_data; 224 tps65910->id = chip_id;
119 tps65910->read = tps65910_i2c_read;
120 tps65910->write = tps65910_i2c_write;
121 mutex_init(&tps65910->io_mutex); 225 mutex_init(&tps65910->io_mutex);
122 226
123 tps65910->regmap = regmap_init_i2c(i2c, &tps65910_regmap_config); 227 tps65910->regmap = devm_regmap_init_i2c(i2c, &tps65910_regmap_config);
124 if (IS_ERR(tps65910->regmap)) { 228 if (IS_ERR(tps65910->regmap)) {
125 ret = PTR_ERR(tps65910->regmap); 229 ret = PTR_ERR(tps65910->regmap);
126 dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret); 230 dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret);
127 goto regmap_err; 231 return ret;
128 } 232 }
129 233
130 ret = mfd_add_devices(tps65910->dev, -1, 234 ret = mfd_add_devices(tps65910->dev, -1,
131 tps65910s, ARRAY_SIZE(tps65910s), 235 tps65910s, ARRAY_SIZE(tps65910s),
132 NULL, 0); 236 NULL, 0);
133 if (ret < 0) 237 if (ret < 0) {
134 goto err; 238 dev_err(&i2c->dev, "mfd_add_devices failed: %d\n", ret);
239 return ret;
240 }
135 241
136 init_data->irq = pmic_plat_data->irq; 242 init_data->irq = pmic_plat_data->irq;
137 init_data->irq_base = pmic_plat_data->irq_base; 243 init_data->irq_base = pmic_plat_data->irq_base;
138 244
139 tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
140
141 tps65910_irq_init(tps65910, init_data->irq, init_data); 245 tps65910_irq_init(tps65910, init_data->irq, init_data);
142 246
143 kfree(init_data); 247 tps65910_sleepinit(tps65910, pmic_plat_data);
144 return ret;
145 248
146err:
147 regmap_exit(tps65910->regmap);
148regmap_err:
149 kfree(tps65910);
150 kfree(init_data);
151 return ret; 249 return ret;
152} 250}
153 251
154static int tps65910_i2c_remove(struct i2c_client *i2c) 252static __devexit int tps65910_i2c_remove(struct i2c_client *i2c)
155{ 253{
156 struct tps65910 *tps65910 = i2c_get_clientdata(i2c); 254 struct tps65910 *tps65910 = i2c_get_clientdata(i2c);
157 255
158 tps65910_irq_exit(tps65910); 256 tps65910_irq_exit(tps65910);
159 mfd_remove_devices(tps65910->dev); 257 mfd_remove_devices(tps65910->dev);
160 regmap_exit(tps65910->regmap);
161 kfree(tps65910);
162 258
163 return 0; 259 return 0;
164} 260}
@@ -175,9 +271,10 @@ static struct i2c_driver tps65910_i2c_driver = {
175 .driver = { 271 .driver = {
176 .name = "tps65910", 272 .name = "tps65910",
177 .owner = THIS_MODULE, 273 .owner = THIS_MODULE,
274 .of_match_table = of_match_ptr(tps65910_of_match),
178 }, 275 },
179 .probe = tps65910_i2c_probe, 276 .probe = tps65910_i2c_probe,
180 .remove = tps65910_i2c_remove, 277 .remove = __devexit_p(tps65910_i2c_remove),
181 .id_table = tps65910_i2c_id, 278 .id_table = tps65910_i2c_id,
182}; 279};
183 280
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 5d656e814358..ad733d76207a 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -757,6 +757,7 @@ int twl4030_init_irq(struct device *dev, int irq_num)
757 dev_err(dev, "could not claim irq%d: %d\n", irq_num, status); 757 dev_err(dev, "could not claim irq%d: %d\n", irq_num, status);
758 goto fail_rqirq; 758 goto fail_rqirq;
759 } 759 }
760 enable_irq_wake(irq_num);
760 761
761 return irq_base; 762 return irq_base;
762fail_rqirq: 763fail_rqirq:
diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index 2d6bedadca09..4ded9e7aa246 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -27,7 +27,12 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/err.h>
30#include <linux/platform_device.h> 31#include <linux/platform_device.h>
32#include <linux/of.h>
33#include <linux/of_irq.h>
34#include <linux/of_gpio.h>
35#include <linux/of_platform.h>
31#include <linux/gpio.h> 36#include <linux/gpio.h>
32#include <linux/delay.h> 37#include <linux/delay.h>
33#include <linux/i2c.h> 38#include <linux/i2c.h>
@@ -35,8 +40,24 @@
35#include <linux/err.h> 40#include <linux/err.h>
36#include <linux/mfd/core.h> 41#include <linux/mfd/core.h>
37#include <linux/mfd/twl6040.h> 42#include <linux/mfd/twl6040.h>
43#include <linux/regulator/consumer.h>
38 44
39#define VIBRACTRL_MEMBER(reg) ((reg == TWL6040_REG_VIBCTLL) ? 0 : 1) 45#define VIBRACTRL_MEMBER(reg) ((reg == TWL6040_REG_VIBCTLL) ? 0 : 1)
46#define TWL6040_NUM_SUPPLIES (2)
47
48static bool twl6040_has_vibra(struct twl6040_platform_data *pdata,
49 struct device_node *node)
50{
51 if (pdata && pdata->vibra)
52 return true;
53
54#ifdef CONFIG_OF
55 if (of_find_node_by_name(node, "vibra"))
56 return true;
57#endif
58
59 return false;
60}
40 61
41int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg) 62int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg)
42{ 63{
@@ -502,17 +523,18 @@ static int __devinit twl6040_probe(struct i2c_client *client,
502 const struct i2c_device_id *id) 523 const struct i2c_device_id *id)
503{ 524{
504 struct twl6040_platform_data *pdata = client->dev.platform_data; 525 struct twl6040_platform_data *pdata = client->dev.platform_data;
526 struct device_node *node = client->dev.of_node;
505 struct twl6040 *twl6040; 527 struct twl6040 *twl6040;
506 struct mfd_cell *cell = NULL; 528 struct mfd_cell *cell = NULL;
507 int ret, children = 0; 529 int irq, ret, children = 0;
508 530
509 if (!pdata) { 531 if (!pdata && !node) {
510 dev_err(&client->dev, "Platform data is missing\n"); 532 dev_err(&client->dev, "Platform data is missing\n");
511 return -EINVAL; 533 return -EINVAL;
512 } 534 }
513 535
514 /* In order to operate correctly we need valid interrupt config */ 536 /* In order to operate correctly we need valid interrupt config */
515 if (!client->irq || !pdata->irq_base) { 537 if (!client->irq) {
516 dev_err(&client->dev, "Invalid IRQ configuration\n"); 538 dev_err(&client->dev, "Invalid IRQ configuration\n");
517 return -EINVAL; 539 return -EINVAL;
518 } 540 }
@@ -524,7 +546,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
524 goto err; 546 goto err;
525 } 547 }
526 548
527 twl6040->regmap = regmap_init_i2c(client, &twl6040_regmap_config); 549 twl6040->regmap = devm_regmap_init_i2c(client, &twl6040_regmap_config);
528 if (IS_ERR(twl6040->regmap)) { 550 if (IS_ERR(twl6040->regmap)) {
529 ret = PTR_ERR(twl6040->regmap); 551 ret = PTR_ERR(twl6040->regmap);
530 goto err; 552 goto err;
@@ -532,9 +554,23 @@ static int __devinit twl6040_probe(struct i2c_client *client,
532 554
533 i2c_set_clientdata(client, twl6040); 555 i2c_set_clientdata(client, twl6040);
534 556
557 twl6040->supplies[0].supply = "vio";
558 twl6040->supplies[1].supply = "v2v1";
559 ret = regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES,
560 twl6040->supplies);
561 if (ret != 0) {
562 dev_err(&client->dev, "Failed to get supplies: %d\n", ret);
563 goto regulator_get_err;
564 }
565
566 ret = regulator_bulk_enable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
567 if (ret != 0) {
568 dev_err(&client->dev, "Failed to enable supplies: %d\n", ret);
569 goto power_err;
570 }
571
535 twl6040->dev = &client->dev; 572 twl6040->dev = &client->dev;
536 twl6040->irq = client->irq; 573 twl6040->irq = client->irq;
537 twl6040->irq_base = pdata->irq_base;
538 574
539 mutex_init(&twl6040->mutex); 575 mutex_init(&twl6040->mutex);
540 mutex_init(&twl6040->io_mutex); 576 mutex_init(&twl6040->io_mutex);
@@ -543,22 +579,26 @@ static int __devinit twl6040_probe(struct i2c_client *client,
543 twl6040->rev = twl6040_reg_read(twl6040, TWL6040_REG_ASICREV); 579 twl6040->rev = twl6040_reg_read(twl6040, TWL6040_REG_ASICREV);
544 580
545 /* ERRATA: Automatic power-up is not possible in ES1.0 */ 581 /* ERRATA: Automatic power-up is not possible in ES1.0 */
546 if (twl6040_get_revid(twl6040) > TWL6040_REV_ES1_0) 582 if (twl6040_get_revid(twl6040) > TWL6040_REV_ES1_0) {
547 twl6040->audpwron = pdata->audpwron_gpio; 583 if (pdata)
548 else 584 twl6040->audpwron = pdata->audpwron_gpio;
585 else
586 twl6040->audpwron = of_get_named_gpio(node,
587 "ti,audpwron-gpio", 0);
588 } else
549 twl6040->audpwron = -EINVAL; 589 twl6040->audpwron = -EINVAL;
550 590
551 if (gpio_is_valid(twl6040->audpwron)) { 591 if (gpio_is_valid(twl6040->audpwron)) {
552 ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW, 592 ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW,
553 "audpwron"); 593 "audpwron");
554 if (ret) 594 if (ret)
555 goto gpio1_err; 595 goto gpio_err;
556 } 596 }
557 597
558 /* codec interrupt */ 598 /* codec interrupt */
559 ret = twl6040_irq_init(twl6040); 599 ret = twl6040_irq_init(twl6040);
560 if (ret) 600 if (ret)
561 goto gpio2_err; 601 goto irq_init_err;
562 602
563 ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY, 603 ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY,
564 NULL, twl6040_naudint_handler, 0, 604 NULL, twl6040_naudint_handler, 0,
@@ -572,22 +612,27 @@ static int __devinit twl6040_probe(struct i2c_client *client,
572 /* dual-access registers controlled by I2C only */ 612 /* dual-access registers controlled by I2C only */
573 twl6040_set_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_I2CSEL); 613 twl6040_set_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_I2CSEL);
574 614
575 if (pdata->codec) { 615 /*
576 int irq = twl6040->irq_base + TWL6040_IRQ_PLUG; 616 * The main functionality of twl6040 to provide audio on OMAP4+ systems.
577 617 * We can add the ASoC codec child whenever this driver has been loaded.
578 cell = &twl6040->cells[children]; 618 * The ASoC codec can work without pdata, pass the platform_data only if
579 cell->name = "twl6040-codec"; 619 * it has been provided.
580 twl6040_codec_rsrc[0].start = irq; 620 */
581 twl6040_codec_rsrc[0].end = irq; 621 irq = twl6040->irq_base + TWL6040_IRQ_PLUG;
582 cell->resources = twl6040_codec_rsrc; 622 cell = &twl6040->cells[children];
583 cell->num_resources = ARRAY_SIZE(twl6040_codec_rsrc); 623 cell->name = "twl6040-codec";
624 twl6040_codec_rsrc[0].start = irq;
625 twl6040_codec_rsrc[0].end = irq;
626 cell->resources = twl6040_codec_rsrc;
627 cell->num_resources = ARRAY_SIZE(twl6040_codec_rsrc);
628 if (pdata && pdata->codec) {
584 cell->platform_data = pdata->codec; 629 cell->platform_data = pdata->codec;
585 cell->pdata_size = sizeof(*pdata->codec); 630 cell->pdata_size = sizeof(*pdata->codec);
586 children++;
587 } 631 }
632 children++;
588 633
589 if (pdata->vibra) { 634 if (twl6040_has_vibra(pdata, node)) {
590 int irq = twl6040->irq_base + TWL6040_IRQ_VIB; 635 irq = twl6040->irq_base + TWL6040_IRQ_VIB;
591 636
592 cell = &twl6040->cells[children]; 637 cell = &twl6040->cells[children];
593 cell->name = "twl6040-vibra"; 638 cell->name = "twl6040-vibra";
@@ -596,21 +641,17 @@ static int __devinit twl6040_probe(struct i2c_client *client,
596 cell->resources = twl6040_vibra_rsrc; 641 cell->resources = twl6040_vibra_rsrc;
597 cell->num_resources = ARRAY_SIZE(twl6040_vibra_rsrc); 642 cell->num_resources = ARRAY_SIZE(twl6040_vibra_rsrc);
598 643
599 cell->platform_data = pdata->vibra; 644 if (pdata && pdata->vibra) {
600 cell->pdata_size = sizeof(*pdata->vibra); 645 cell->platform_data = pdata->vibra;
646 cell->pdata_size = sizeof(*pdata->vibra);
647 }
601 children++; 648 children++;
602 } 649 }
603 650
604 if (children) { 651 ret = mfd_add_devices(&client->dev, -1, twl6040->cells, children,
605 ret = mfd_add_devices(&client->dev, -1, twl6040->cells, 652 NULL, 0);
606 children, NULL, 0); 653 if (ret)
607 if (ret)
608 goto mfd_err;
609 } else {
610 dev_err(&client->dev, "No platform data found for children\n");
611 ret = -ENODEV;
612 goto mfd_err; 654 goto mfd_err;
613 }
614 655
615 return 0; 656 return 0;
616 657
@@ -618,12 +659,15 @@ mfd_err:
618 free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040); 659 free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
619irq_err: 660irq_err:
620 twl6040_irq_exit(twl6040); 661 twl6040_irq_exit(twl6040);
621gpio2_err: 662irq_init_err:
622 if (gpio_is_valid(twl6040->audpwron)) 663 if (gpio_is_valid(twl6040->audpwron))
623 gpio_free(twl6040->audpwron); 664 gpio_free(twl6040->audpwron);
624gpio1_err: 665gpio_err:
666 regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
667power_err:
668 regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
669regulator_get_err:
625 i2c_set_clientdata(client, NULL); 670 i2c_set_clientdata(client, NULL);
626 regmap_exit(twl6040->regmap);
627err: 671err:
628 return ret; 672 return ret;
629} 673}
@@ -643,7 +687,9 @@ static int __devexit twl6040_remove(struct i2c_client *client)
643 687
644 mfd_remove_devices(&client->dev); 688 mfd_remove_devices(&client->dev);
645 i2c_set_clientdata(client, NULL); 689 i2c_set_clientdata(client, NULL);
646 regmap_exit(twl6040->regmap); 690
691 regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
692 regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
647 693
648 return 0; 694 return 0;
649} 695}
diff --git a/drivers/mfd/twl6040-irq.c b/drivers/mfd/twl6040-irq.c
index b3f8ddaa28a8..4b42543da228 100644
--- a/drivers/mfd/twl6040-irq.c
+++ b/drivers/mfd/twl6040-irq.c
@@ -23,7 +23,10 @@
23 23
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/err.h>
26#include <linux/irq.h> 27#include <linux/irq.h>
28#include <linux/of.h>
29#include <linux/irqdomain.h>
27#include <linux/interrupt.h> 30#include <linux/interrupt.h>
28#include <linux/mfd/core.h> 31#include <linux/mfd/core.h>
29#include <linux/mfd/twl6040.h> 32#include <linux/mfd/twl6040.h>
@@ -138,7 +141,8 @@ static irqreturn_t twl6040_irq_thread(int irq, void *data)
138 141
139int twl6040_irq_init(struct twl6040 *twl6040) 142int twl6040_irq_init(struct twl6040 *twl6040)
140{ 143{
141 int cur_irq, ret; 144 struct device_node *node = twl6040->dev->of_node;
145 int i, nr_irqs, irq_base, ret;
142 u8 val; 146 u8 val;
143 147
144 mutex_init(&twl6040->irq_mutex); 148 mutex_init(&twl6040->irq_mutex);
@@ -148,21 +152,31 @@ int twl6040_irq_init(struct twl6040 *twl6040)
148 twl6040->irq_masks_cache = TWL6040_ALLINT_MSK; 152 twl6040->irq_masks_cache = TWL6040_ALLINT_MSK;
149 twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK); 153 twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK);
150 154
155 nr_irqs = ARRAY_SIZE(twl6040_irqs);
156
157 irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0);
158 if (IS_ERR_VALUE(irq_base)) {
159 dev_err(twl6040->dev, "Fail to allocate IRQ descs\n");
160 return irq_base;
161 }
162 twl6040->irq_base = irq_base;
163
164 irq_domain_add_legacy(node, ARRAY_SIZE(twl6040_irqs), irq_base, 0,
165 &irq_domain_simple_ops, NULL);
166
151 /* Register them with genirq */ 167 /* Register them with genirq */
152 for (cur_irq = twl6040->irq_base; 168 for (i = irq_base; i < irq_base + nr_irqs; i++) {
153 cur_irq < twl6040->irq_base + ARRAY_SIZE(twl6040_irqs); 169 irq_set_chip_data(i, twl6040);
154 cur_irq++) { 170 irq_set_chip_and_handler(i, &twl6040_irq_chip,
155 irq_set_chip_data(cur_irq, twl6040);
156 irq_set_chip_and_handler(cur_irq, &twl6040_irq_chip,
157 handle_level_irq); 171 handle_level_irq);
158 irq_set_nested_thread(cur_irq, 1); 172 irq_set_nested_thread(i, 1);
159 173
160 /* ARM needs us to explicitly flag the IRQ as valid 174 /* ARM needs us to explicitly flag the IRQ as valid
161 * and will set them noprobe when we do so. */ 175 * and will set them noprobe when we do so. */
162#ifdef CONFIG_ARM 176#ifdef CONFIG_ARM
163 set_irq_flags(cur_irq, IRQF_VALID); 177 set_irq_flags(i, IRQF_VALID);
164#else 178#else
165 irq_set_noprobe(cur_irq); 179 irq_set_noprobe(i);
166#endif 180#endif
167 } 181 }
168 182
diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c
index b73cc15e0081..872aff21e4be 100644
--- a/drivers/mfd/vx855.c
+++ b/drivers/mfd/vx855.c
@@ -131,17 +131,7 @@ static struct pci_driver vx855_pci_driver = {
131 .remove = __devexit_p(vx855_remove), 131 .remove = __devexit_p(vx855_remove),
132}; 132};
133 133
134static int vx855_init(void) 134module_pci_driver(vx855_pci_driver);
135{
136 return pci_register_driver(&vx855_pci_driver);
137}
138module_init(vx855_init);
139
140static void vx855_exit(void)
141{
142 pci_unregister_driver(&vx855_pci_driver);
143}
144module_exit(vx855_exit);
145 135
146MODULE_LICENSE("GPL"); 136MODULE_LICENSE("GPL");
147MODULE_AUTHOR("Harald Welte <HaraldWelte@viatech.com>"); 137MODULE_AUTHOR("Harald Welte <HaraldWelte@viatech.com>");
diff --git a/drivers/mfd/wm831x-auxadc.c b/drivers/mfd/wm831x-auxadc.c
index 87210954a066..6ee3018d8653 100644
--- a/drivers/mfd/wm831x-auxadc.c
+++ b/drivers/mfd/wm831x-auxadc.c
@@ -280,11 +280,11 @@ void wm831x_auxadc_init(struct wm831x *wm831x)
280 mutex_init(&wm831x->auxadc_lock); 280 mutex_init(&wm831x->auxadc_lock);
281 INIT_LIST_HEAD(&wm831x->auxadc_pending); 281 INIT_LIST_HEAD(&wm831x->auxadc_pending);
282 282
283 if (wm831x->irq && wm831x->irq_base) { 283 if (wm831x->irq) {
284 wm831x->auxadc_read = wm831x_auxadc_read_irq; 284 wm831x->auxadc_read = wm831x_auxadc_read_irq;
285 285
286 ret = request_threaded_irq(wm831x->irq_base + 286 ret = request_threaded_irq(wm831x_irq(wm831x,
287 WM831X_IRQ_AUXADC_DATA, 287 WM831X_IRQ_AUXADC_DATA),
288 NULL, wm831x_auxadc_irq, 0, 288 NULL, wm831x_auxadc_irq, 0,
289 "auxadc", wm831x); 289 "auxadc", wm831x);
290 if (ret < 0) { 290 if (ret < 0) {
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 838056c3493a..946698fd2dc6 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -614,8 +614,15 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
614} 614}
615EXPORT_SYMBOL_GPL(wm831x_set_bits); 615EXPORT_SYMBOL_GPL(wm831x_set_bits);
616 616
617static struct resource wm831x_io_parent = {
618 .start = 0,
619 .end = 0xffffffff,
620 .flags = IORESOURCE_IO,
621};
622
617static struct resource wm831x_dcdc1_resources[] = { 623static struct resource wm831x_dcdc1_resources[] = {
618 { 624 {
625 .parent = &wm831x_io_parent,
619 .start = WM831X_DC1_CONTROL_1, 626 .start = WM831X_DC1_CONTROL_1,
620 .end = WM831X_DC1_DVS_CONTROL, 627 .end = WM831X_DC1_DVS_CONTROL,
621 .flags = IORESOURCE_IO, 628 .flags = IORESOURCE_IO,
@@ -637,6 +644,7 @@ static struct resource wm831x_dcdc1_resources[] = {
637 644
638static struct resource wm831x_dcdc2_resources[] = { 645static struct resource wm831x_dcdc2_resources[] = {
639 { 646 {
647 .parent = &wm831x_io_parent,
640 .start = WM831X_DC2_CONTROL_1, 648 .start = WM831X_DC2_CONTROL_1,
641 .end = WM831X_DC2_DVS_CONTROL, 649 .end = WM831X_DC2_DVS_CONTROL,
642 .flags = IORESOURCE_IO, 650 .flags = IORESOURCE_IO,
@@ -657,6 +665,7 @@ static struct resource wm831x_dcdc2_resources[] = {
657 665
658static struct resource wm831x_dcdc3_resources[] = { 666static struct resource wm831x_dcdc3_resources[] = {
659 { 667 {
668 .parent = &wm831x_io_parent,
660 .start = WM831X_DC3_CONTROL_1, 669 .start = WM831X_DC3_CONTROL_1,
661 .end = WM831X_DC3_SLEEP_CONTROL, 670 .end = WM831X_DC3_SLEEP_CONTROL,
662 .flags = IORESOURCE_IO, 671 .flags = IORESOURCE_IO,
@@ -671,6 +680,7 @@ static struct resource wm831x_dcdc3_resources[] = {
671 680
672static struct resource wm831x_dcdc4_resources[] = { 681static struct resource wm831x_dcdc4_resources[] = {
673 { 682 {
683 .parent = &wm831x_io_parent,
674 .start = WM831X_DC4_CONTROL, 684 .start = WM831X_DC4_CONTROL,
675 .end = WM831X_DC4_SLEEP_CONTROL, 685 .end = WM831X_DC4_SLEEP_CONTROL,
676 .flags = IORESOURCE_IO, 686 .flags = IORESOURCE_IO,
@@ -685,6 +695,7 @@ static struct resource wm831x_dcdc4_resources[] = {
685 695
686static struct resource wm8320_dcdc4_buck_resources[] = { 696static struct resource wm8320_dcdc4_buck_resources[] = {
687 { 697 {
698 .parent = &wm831x_io_parent,
688 .start = WM831X_DC4_CONTROL, 699 .start = WM831X_DC4_CONTROL,
689 .end = WM832X_DC4_SLEEP_CONTROL, 700 .end = WM832X_DC4_SLEEP_CONTROL,
690 .flags = IORESOURCE_IO, 701 .flags = IORESOURCE_IO,
@@ -707,6 +718,7 @@ static struct resource wm831x_gpio_resources[] = {
707 718
708static struct resource wm831x_isink1_resources[] = { 719static struct resource wm831x_isink1_resources[] = {
709 { 720 {
721 .parent = &wm831x_io_parent,
710 .start = WM831X_CURRENT_SINK_1, 722 .start = WM831X_CURRENT_SINK_1,
711 .end = WM831X_CURRENT_SINK_1, 723 .end = WM831X_CURRENT_SINK_1,
712 .flags = IORESOURCE_IO, 724 .flags = IORESOURCE_IO,
@@ -720,6 +732,7 @@ static struct resource wm831x_isink1_resources[] = {
720 732
721static struct resource wm831x_isink2_resources[] = { 733static struct resource wm831x_isink2_resources[] = {
722 { 734 {
735 .parent = &wm831x_io_parent,
723 .start = WM831X_CURRENT_SINK_2, 736 .start = WM831X_CURRENT_SINK_2,
724 .end = WM831X_CURRENT_SINK_2, 737 .end = WM831X_CURRENT_SINK_2,
725 .flags = IORESOURCE_IO, 738 .flags = IORESOURCE_IO,
@@ -733,6 +746,7 @@ static struct resource wm831x_isink2_resources[] = {
733 746
734static struct resource wm831x_ldo1_resources[] = { 747static struct resource wm831x_ldo1_resources[] = {
735 { 748 {
749 .parent = &wm831x_io_parent,
736 .start = WM831X_LDO1_CONTROL, 750 .start = WM831X_LDO1_CONTROL,
737 .end = WM831X_LDO1_SLEEP_CONTROL, 751 .end = WM831X_LDO1_SLEEP_CONTROL,
738 .flags = IORESOURCE_IO, 752 .flags = IORESOURCE_IO,
@@ -747,6 +761,7 @@ static struct resource wm831x_ldo1_resources[] = {
747 761
748static struct resource wm831x_ldo2_resources[] = { 762static struct resource wm831x_ldo2_resources[] = {
749 { 763 {
764 .parent = &wm831x_io_parent,
750 .start = WM831X_LDO2_CONTROL, 765 .start = WM831X_LDO2_CONTROL,
751 .end = WM831X_LDO2_SLEEP_CONTROL, 766 .end = WM831X_LDO2_SLEEP_CONTROL,
752 .flags = IORESOURCE_IO, 767 .flags = IORESOURCE_IO,
@@ -761,6 +776,7 @@ static struct resource wm831x_ldo2_resources[] = {
761 776
762static struct resource wm831x_ldo3_resources[] = { 777static struct resource wm831x_ldo3_resources[] = {
763 { 778 {
779 .parent = &wm831x_io_parent,
764 .start = WM831X_LDO3_CONTROL, 780 .start = WM831X_LDO3_CONTROL,
765 .end = WM831X_LDO3_SLEEP_CONTROL, 781 .end = WM831X_LDO3_SLEEP_CONTROL,
766 .flags = IORESOURCE_IO, 782 .flags = IORESOURCE_IO,
@@ -775,6 +791,7 @@ static struct resource wm831x_ldo3_resources[] = {
775 791
776static struct resource wm831x_ldo4_resources[] = { 792static struct resource wm831x_ldo4_resources[] = {
777 { 793 {
794 .parent = &wm831x_io_parent,
778 .start = WM831X_LDO4_CONTROL, 795 .start = WM831X_LDO4_CONTROL,
779 .end = WM831X_LDO4_SLEEP_CONTROL, 796 .end = WM831X_LDO4_SLEEP_CONTROL,
780 .flags = IORESOURCE_IO, 797 .flags = IORESOURCE_IO,
@@ -789,6 +806,7 @@ static struct resource wm831x_ldo4_resources[] = {
789 806
790static struct resource wm831x_ldo5_resources[] = { 807static struct resource wm831x_ldo5_resources[] = {
791 { 808 {
809 .parent = &wm831x_io_parent,
792 .start = WM831X_LDO5_CONTROL, 810 .start = WM831X_LDO5_CONTROL,
793 .end = WM831X_LDO5_SLEEP_CONTROL, 811 .end = WM831X_LDO5_SLEEP_CONTROL,
794 .flags = IORESOURCE_IO, 812 .flags = IORESOURCE_IO,
@@ -803,6 +821,7 @@ static struct resource wm831x_ldo5_resources[] = {
803 821
804static struct resource wm831x_ldo6_resources[] = { 822static struct resource wm831x_ldo6_resources[] = {
805 { 823 {
824 .parent = &wm831x_io_parent,
806 .start = WM831X_LDO6_CONTROL, 825 .start = WM831X_LDO6_CONTROL,
807 .end = WM831X_LDO6_SLEEP_CONTROL, 826 .end = WM831X_LDO6_SLEEP_CONTROL,
808 .flags = IORESOURCE_IO, 827 .flags = IORESOURCE_IO,
@@ -817,6 +836,7 @@ static struct resource wm831x_ldo6_resources[] = {
817 836
818static struct resource wm831x_ldo7_resources[] = { 837static struct resource wm831x_ldo7_resources[] = {
819 { 838 {
839 .parent = &wm831x_io_parent,
820 .start = WM831X_LDO7_CONTROL, 840 .start = WM831X_LDO7_CONTROL,
821 .end = WM831X_LDO7_SLEEP_CONTROL, 841 .end = WM831X_LDO7_SLEEP_CONTROL,
822 .flags = IORESOURCE_IO, 842 .flags = IORESOURCE_IO,
@@ -831,6 +851,7 @@ static struct resource wm831x_ldo7_resources[] = {
831 851
832static struct resource wm831x_ldo8_resources[] = { 852static struct resource wm831x_ldo8_resources[] = {
833 { 853 {
854 .parent = &wm831x_io_parent,
834 .start = WM831X_LDO8_CONTROL, 855 .start = WM831X_LDO8_CONTROL,
835 .end = WM831X_LDO8_SLEEP_CONTROL, 856 .end = WM831X_LDO8_SLEEP_CONTROL,
836 .flags = IORESOURCE_IO, 857 .flags = IORESOURCE_IO,
@@ -845,6 +866,7 @@ static struct resource wm831x_ldo8_resources[] = {
845 866
846static struct resource wm831x_ldo9_resources[] = { 867static struct resource wm831x_ldo9_resources[] = {
847 { 868 {
869 .parent = &wm831x_io_parent,
848 .start = WM831X_LDO9_CONTROL, 870 .start = WM831X_LDO9_CONTROL,
849 .end = WM831X_LDO9_SLEEP_CONTROL, 871 .end = WM831X_LDO9_SLEEP_CONTROL,
850 .flags = IORESOURCE_IO, 872 .flags = IORESOURCE_IO,
@@ -859,6 +881,7 @@ static struct resource wm831x_ldo9_resources[] = {
859 881
860static struct resource wm831x_ldo10_resources[] = { 882static struct resource wm831x_ldo10_resources[] = {
861 { 883 {
884 .parent = &wm831x_io_parent,
862 .start = WM831X_LDO10_CONTROL, 885 .start = WM831X_LDO10_CONTROL,
863 .end = WM831X_LDO10_SLEEP_CONTROL, 886 .end = WM831X_LDO10_SLEEP_CONTROL,
864 .flags = IORESOURCE_IO, 887 .flags = IORESOURCE_IO,
@@ -873,6 +896,7 @@ static struct resource wm831x_ldo10_resources[] = {
873 896
874static struct resource wm831x_ldo11_resources[] = { 897static struct resource wm831x_ldo11_resources[] = {
875 { 898 {
899 .parent = &wm831x_io_parent,
876 .start = WM831X_LDO11_ON_CONTROL, 900 .start = WM831X_LDO11_ON_CONTROL,
877 .end = WM831X_LDO11_SLEEP_CONTROL, 901 .end = WM831X_LDO11_SLEEP_CONTROL,
878 .flags = IORESOURCE_IO, 902 .flags = IORESOURCE_IO,
@@ -974,6 +998,7 @@ static struct resource wm831x_rtc_resources[] = {
974 998
975static struct resource wm831x_status1_resources[] = { 999static struct resource wm831x_status1_resources[] = {
976 { 1000 {
1001 .parent = &wm831x_io_parent,
977 .start = WM831X_STATUS_LED_1, 1002 .start = WM831X_STATUS_LED_1,
978 .end = WM831X_STATUS_LED_1, 1003 .end = WM831X_STATUS_LED_1,
979 .flags = IORESOURCE_IO, 1004 .flags = IORESOURCE_IO,
@@ -982,6 +1007,7 @@ static struct resource wm831x_status1_resources[] = {
982 1007
983static struct resource wm831x_status2_resources[] = { 1008static struct resource wm831x_status2_resources[] = {
984 { 1009 {
1010 .parent = &wm831x_io_parent,
985 .start = WM831X_STATUS_LED_2, 1011 .start = WM831X_STATUS_LED_2,
986 .end = WM831X_STATUS_LED_2, 1012 .end = WM831X_STATUS_LED_2,
987 .flags = IORESOURCE_IO, 1013 .flags = IORESOURCE_IO,
@@ -1787,27 +1813,27 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1787 case WM8310: 1813 case WM8310:
1788 ret = mfd_add_devices(wm831x->dev, wm831x_num, 1814 ret = mfd_add_devices(wm831x->dev, wm831x_num,
1789 wm8310_devs, ARRAY_SIZE(wm8310_devs), 1815 wm8310_devs, ARRAY_SIZE(wm8310_devs),
1790 NULL, wm831x->irq_base); 1816 NULL, 0);
1791 break; 1817 break;
1792 1818
1793 case WM8311: 1819 case WM8311:
1794 ret = mfd_add_devices(wm831x->dev, wm831x_num, 1820 ret = mfd_add_devices(wm831x->dev, wm831x_num,
1795 wm8311_devs, ARRAY_SIZE(wm8311_devs), 1821 wm8311_devs, ARRAY_SIZE(wm8311_devs),
1796 NULL, wm831x->irq_base); 1822 NULL, 0);
1797 if (!pdata || !pdata->disable_touch) 1823 if (!pdata || !pdata->disable_touch)
1798 mfd_add_devices(wm831x->dev, wm831x_num, 1824 mfd_add_devices(wm831x->dev, wm831x_num,
1799 touch_devs, ARRAY_SIZE(touch_devs), 1825 touch_devs, ARRAY_SIZE(touch_devs),
1800 NULL, wm831x->irq_base); 1826 NULL, 0);
1801 break; 1827 break;
1802 1828
1803 case WM8312: 1829 case WM8312:
1804 ret = mfd_add_devices(wm831x->dev, wm831x_num, 1830 ret = mfd_add_devices(wm831x->dev, wm831x_num,
1805 wm8312_devs, ARRAY_SIZE(wm8312_devs), 1831 wm8312_devs, ARRAY_SIZE(wm8312_devs),
1806 NULL, wm831x->irq_base); 1832 NULL, 0);
1807 if (!pdata || !pdata->disable_touch) 1833 if (!pdata || !pdata->disable_touch)
1808 mfd_add_devices(wm831x->dev, wm831x_num, 1834 mfd_add_devices(wm831x->dev, wm831x_num,
1809 touch_devs, ARRAY_SIZE(touch_devs), 1835 touch_devs, ARRAY_SIZE(touch_devs),
1810 NULL, wm831x->irq_base); 1836 NULL, 0);
1811 break; 1837 break;
1812 1838
1813 case WM8320: 1839 case WM8320:
@@ -1816,7 +1842,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1816 case WM8326: 1842 case WM8326:
1817 ret = mfd_add_devices(wm831x->dev, wm831x_num, 1843 ret = mfd_add_devices(wm831x->dev, wm831x_num,
1818 wm8320_devs, ARRAY_SIZE(wm8320_devs), 1844 wm8320_devs, ARRAY_SIZE(wm8320_devs),
1819 NULL, wm831x->irq_base); 1845 NULL, 0);
1820 break; 1846 break;
1821 1847
1822 default: 1848 default:
@@ -1841,7 +1867,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1841 if (ret & WM831X_XTAL_ENA) { 1867 if (ret & WM831X_XTAL_ENA) {
1842 ret = mfd_add_devices(wm831x->dev, wm831x_num, 1868 ret = mfd_add_devices(wm831x->dev, wm831x_num,
1843 rtc_devs, ARRAY_SIZE(rtc_devs), 1869 rtc_devs, ARRAY_SIZE(rtc_devs),
1844 NULL, wm831x->irq_base); 1870 NULL, 0);
1845 if (ret != 0) { 1871 if (ret != 0) {
1846 dev_err(wm831x->dev, "Failed to add RTC: %d\n", ret); 1872 dev_err(wm831x->dev, "Failed to add RTC: %d\n", ret);
1847 goto err_irq; 1873 goto err_irq;
@@ -1854,7 +1880,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1854 /* Treat errors as non-critical */ 1880 /* Treat errors as non-critical */
1855 ret = mfd_add_devices(wm831x->dev, wm831x_num, backlight_devs, 1881 ret = mfd_add_devices(wm831x->dev, wm831x_num, backlight_devs,
1856 ARRAY_SIZE(backlight_devs), NULL, 1882 ARRAY_SIZE(backlight_devs), NULL,
1857 wm831x->irq_base); 1883 0);
1858 if (ret < 0) 1884 if (ret < 0)
1859 dev_err(wm831x->dev, "Failed to add backlight: %d\n", 1885 dev_err(wm831x->dev, "Failed to add backlight: %d\n",
1860 ret); 1886 ret);
@@ -1883,8 +1909,7 @@ void wm831x_device_exit(struct wm831x *wm831x)
1883{ 1909{
1884 wm831x_otp_exit(wm831x); 1910 wm831x_otp_exit(wm831x);
1885 mfd_remove_devices(wm831x->dev); 1911 mfd_remove_devices(wm831x->dev);
1886 if (wm831x->irq_base) 1912 free_irq(wm831x_irq(wm831x, WM831X_IRQ_AUXADC_DATA), wm831x);
1887 free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
1888 wm831x_irq_exit(wm831x); 1913 wm831x_irq_exit(wm831x);
1889} 1914}
1890 1915
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index bec4d0539160..804e56ec99eb 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -18,6 +18,7 @@
18#include <linux/irq.h> 18#include <linux/irq.h>
19#include <linux/mfd/core.h> 19#include <linux/mfd/core.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/irqdomain.h>
21 22
22#include <linux/mfd/wm831x/core.h> 23#include <linux/mfd/wm831x/core.h>
23#include <linux/mfd/wm831x/pdata.h> 24#include <linux/mfd/wm831x/pdata.h>
@@ -328,7 +329,7 @@ static inline int irq_data_to_status_reg(struct wm831x_irq_data *irq_data)
328static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x, 329static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x,
329 int irq) 330 int irq)
330{ 331{
331 return &wm831x_irqs[irq - wm831x->irq_base]; 332 return &wm831x_irqs[irq];
332} 333}
333 334
334static void wm831x_irq_lock(struct irq_data *data) 335static void wm831x_irq_lock(struct irq_data *data)
@@ -374,7 +375,7 @@ static void wm831x_irq_enable(struct irq_data *data)
374{ 375{
375 struct wm831x *wm831x = irq_data_get_irq_chip_data(data); 376 struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
376 struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x, 377 struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x,
377 data->irq); 378 data->hwirq);
378 379
379 wm831x->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask; 380 wm831x->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
380} 381}
@@ -383,7 +384,7 @@ static void wm831x_irq_disable(struct irq_data *data)
383{ 384{
384 struct wm831x *wm831x = irq_data_get_irq_chip_data(data); 385 struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
385 struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x, 386 struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x,
386 data->irq); 387 data->hwirq);
387 388
388 wm831x->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask; 389 wm831x->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
389} 390}
@@ -393,7 +394,7 @@ static int wm831x_irq_set_type(struct irq_data *data, unsigned int type)
393 struct wm831x *wm831x = irq_data_get_irq_chip_data(data); 394 struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
394 int irq; 395 int irq;
395 396
396 irq = data->irq - wm831x->irq_base; 397 irq = data->hwirq;
397 398
398 if (irq < WM831X_IRQ_GPIO_1 || irq > WM831X_IRQ_GPIO_11) { 399 if (irq < WM831X_IRQ_GPIO_1 || irq > WM831X_IRQ_GPIO_11) {
399 /* Ignore internal-only IRQs */ 400 /* Ignore internal-only IRQs */
@@ -412,22 +413,25 @@ static int wm831x_irq_set_type(struct irq_data *data, unsigned int type)
412 * do the update here as we can be called with the bus lock 413 * do the update here as we can be called with the bus lock
413 * held. 414 * held.
414 */ 415 */
416 wm831x->gpio_level_low[irq] = false;
417 wm831x->gpio_level_high[irq] = false;
415 switch (type) { 418 switch (type) {
416 case IRQ_TYPE_EDGE_BOTH: 419 case IRQ_TYPE_EDGE_BOTH:
417 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_INT_MODE; 420 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_INT_MODE;
418 wm831x->gpio_level[irq] = false;
419 break; 421 break;
420 case IRQ_TYPE_EDGE_RISING: 422 case IRQ_TYPE_EDGE_RISING:
421 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL; 423 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
422 wm831x->gpio_level[irq] = false;
423 break; 424 break;
424 case IRQ_TYPE_EDGE_FALLING: 425 case IRQ_TYPE_EDGE_FALLING:
425 wm831x->gpio_update[irq] = 0x10000; 426 wm831x->gpio_update[irq] = 0x10000;
426 wm831x->gpio_level[irq] = false;
427 break; 427 break;
428 case IRQ_TYPE_LEVEL_HIGH: 428 case IRQ_TYPE_LEVEL_HIGH:
429 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL; 429 wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
430 wm831x->gpio_level[irq] = true; 430 wm831x->gpio_level_high[irq] = true;
431 break;
432 case IRQ_TYPE_LEVEL_LOW:
433 wm831x->gpio_update[irq] = 0x10000;
434 wm831x->gpio_level_low[irq] = true;
431 break; 435 break;
432 default: 436 default:
433 return -EINVAL; 437 return -EINVAL;
@@ -469,9 +473,11 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
469 * descriptors. 473 * descriptors.
470 */ 474 */
471 if (primary & WM831X_TCHPD_INT) 475 if (primary & WM831X_TCHPD_INT)
472 handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHPD); 476 handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
477 WM831X_IRQ_TCHPD));
473 if (primary & WM831X_TCHDATA_INT) 478 if (primary & WM831X_TCHDATA_INT)
474 handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHDATA); 479 handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
480 WM831X_IRQ_TCHDATA));
475 primary &= ~(WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT); 481 primary &= ~(WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT);
476 482
477 for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) { 483 for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
@@ -507,16 +513,29 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
507 } 513 }
508 514
509 if (*status & wm831x_irqs[i].mask) 515 if (*status & wm831x_irqs[i].mask)
510 handle_nested_irq(wm831x->irq_base + i); 516 handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
517 i));
511 518
512 /* Simulate an edge triggered IRQ by polling the input 519 /* Simulate an edge triggered IRQ by polling the input
513 * status. This is sucky but improves interoperability. 520 * status. This is sucky but improves interoperability.
514 */ 521 */
515 if (primary == WM831X_GP_INT && 522 if (primary == WM831X_GP_INT &&
516 wm831x->gpio_level[i - WM831X_IRQ_GPIO_1]) { 523 wm831x->gpio_level_high[i - WM831X_IRQ_GPIO_1]) {
517 ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL); 524 ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
518 while (ret & 1 << (i - WM831X_IRQ_GPIO_1)) { 525 while (ret & 1 << (i - WM831X_IRQ_GPIO_1)) {
519 handle_nested_irq(wm831x->irq_base + i); 526 handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
527 i));
528 ret = wm831x_reg_read(wm831x,
529 WM831X_GPIO_LEVEL);
530 }
531 }
532
533 if (primary == WM831X_GP_INT &&
534 wm831x->gpio_level_low[i - WM831X_IRQ_GPIO_1]) {
535 ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
536 while (!(ret & 1 << (i - WM831X_IRQ_GPIO_1))) {
537 handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
538 i));
520 ret = wm831x_reg_read(wm831x, 539 ret = wm831x_reg_read(wm831x,
521 WM831X_GPIO_LEVEL); 540 WM831X_GPIO_LEVEL);
522 } 541 }
@@ -527,10 +546,34 @@ out:
527 return IRQ_HANDLED; 546 return IRQ_HANDLED;
528} 547}
529 548
549static int wm831x_irq_map(struct irq_domain *h, unsigned int virq,
550 irq_hw_number_t hw)
551{
552 irq_set_chip_data(virq, h->host_data);
553 irq_set_chip_and_handler(virq, &wm831x_irq_chip, handle_edge_irq);
554 irq_set_nested_thread(virq, 1);
555
556 /* ARM needs us to explicitly flag the IRQ as valid
557 * and will set them noprobe when we do so. */
558#ifdef CONFIG_ARM
559 set_irq_flags(virq, IRQF_VALID);
560#else
561 irq_set_noprobe(virq);
562#endif
563
564 return 0;
565}
566
567static struct irq_domain_ops wm831x_irq_domain_ops = {
568 .map = wm831x_irq_map,
569 .xlate = irq_domain_xlate_twocell,
570};
571
530int wm831x_irq_init(struct wm831x *wm831x, int irq) 572int wm831x_irq_init(struct wm831x *wm831x, int irq)
531{ 573{
532 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 574 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
533 int i, cur_irq, ret; 575 struct irq_domain *domain;
576 int i, ret, irq_base;
534 577
535 mutex_init(&wm831x->irq_lock); 578 mutex_init(&wm831x->irq_lock);
536 579
@@ -543,18 +586,33 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
543 } 586 }
544 587
545 /* Try to dynamically allocate IRQs if no base is specified */ 588 /* Try to dynamically allocate IRQs if no base is specified */
546 if (!pdata || !pdata->irq_base) 589 if (pdata && pdata->irq_base) {
547 wm831x->irq_base = -1; 590 irq_base = irq_alloc_descs(pdata->irq_base, 0,
591 WM831X_NUM_IRQS, 0);
592 if (irq_base < 0) {
593 dev_warn(wm831x->dev, "Failed to allocate IRQs: %d\n",
594 irq_base);
595 irq_base = 0;
596 }
597 } else {
598 irq_base = 0;
599 }
600
601 if (irq_base)
602 domain = irq_domain_add_legacy(wm831x->dev->of_node,
603 ARRAY_SIZE(wm831x_irqs),
604 irq_base, 0,
605 &wm831x_irq_domain_ops,
606 wm831x);
548 else 607 else
549 wm831x->irq_base = pdata->irq_base; 608 domain = irq_domain_add_linear(wm831x->dev->of_node,
609 ARRAY_SIZE(wm831x_irqs),
610 &wm831x_irq_domain_ops,
611 wm831x);
550 612
551 wm831x->irq_base = irq_alloc_descs(wm831x->irq_base, 0, 613 if (!domain) {
552 WM831X_NUM_IRQS, 0); 614 dev_warn(wm831x->dev, "Failed to allocate IRQ domain\n");
553 if (wm831x->irq_base < 0) { 615 return -EINVAL;
554 dev_warn(wm831x->dev, "Failed to allocate IRQs: %d\n",
555 wm831x->irq_base);
556 wm831x->irq_base = 0;
557 return 0;
558 } 616 }
559 617
560 if (pdata && pdata->irq_cmos) 618 if (pdata && pdata->irq_cmos)
@@ -565,38 +623,22 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
565 wm831x_set_bits(wm831x, WM831X_IRQ_CONFIG, 623 wm831x_set_bits(wm831x, WM831X_IRQ_CONFIG,
566 WM831X_IRQ_OD, i); 624 WM831X_IRQ_OD, i);
567 625
568 /* Try to flag /IRQ as a wake source; there are a number of
569 * unconditional wake sources in the PMIC so this isn't
570 * conditional but we don't actually care *too* much if it
571 * fails.
572 */
573 ret = enable_irq_wake(irq);
574 if (ret != 0) {
575 dev_warn(wm831x->dev, "Can't enable IRQ as wake source: %d\n",
576 ret);
577 }
578
579 wm831x->irq = irq; 626 wm831x->irq = irq;
580 627 wm831x->irq_domain = domain;
581 /* Register them with genirq */
582 for (cur_irq = wm831x->irq_base;
583 cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base;
584 cur_irq++) {
585 irq_set_chip_data(cur_irq, wm831x);
586 irq_set_chip_and_handler(cur_irq, &wm831x_irq_chip,
587 handle_edge_irq);
588 irq_set_nested_thread(cur_irq, 1);
589
590 /* ARM needs us to explicitly flag the IRQ as valid
591 * and will set them noprobe when we do so. */
592#ifdef CONFIG_ARM
593 set_irq_flags(cur_irq, IRQF_VALID);
594#else
595 irq_set_noprobe(cur_irq);
596#endif
597 }
598 628
599 if (irq) { 629 if (irq) {
630 /* Try to flag /IRQ as a wake source; there are a number of
631 * unconditional wake sources in the PMIC so this isn't
632 * conditional but we don't actually care *too* much if it
633 * fails.
634 */
635 ret = enable_irq_wake(irq);
636 if (ret != 0) {
637 dev_warn(wm831x->dev,
638 "Can't enable IRQ as wake source: %d\n",
639 ret);
640 }
641
600 ret = request_threaded_irq(irq, NULL, wm831x_irq_thread, 642 ret = request_threaded_irq(irq, NULL, wm831x_irq_thread,
601 IRQF_TRIGGER_LOW | IRQF_ONESHOT, 643 IRQF_TRIGGER_LOW | IRQF_ONESHOT,
602 "wm831x", wm831x); 644 "wm831x", wm831x);
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index dd1caaac55e4..8a9b11ca076a 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -20,6 +20,7 @@
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/regmap.h>
23#include <linux/workqueue.h> 24#include <linux/workqueue.h>
24 25
25#include <linux/mfd/wm8350/core.h> 26#include <linux/mfd/wm8350/core.h>
@@ -74,7 +75,7 @@ static int wm8350_phys_read(struct wm8350 *wm8350, u8 reg, int num_regs,
74 int bytes = num_regs * 2; 75 int bytes = num_regs * 2;
75 76
76 dev_dbg(wm8350->dev, "volatile read\n"); 77 dev_dbg(wm8350->dev, "volatile read\n");
77 ret = wm8350->read_dev(wm8350, reg, bytes, (char *)dest); 78 ret = regmap_raw_read(wm8350->regmap, reg, dest, bytes);
78 79
79 for (i = reg; i < reg + num_regs; i++) { 80 for (i = reg; i < reg + num_regs; i++) {
80 /* Cache is CPU endian */ 81 /* Cache is CPU endian */
@@ -96,9 +97,6 @@ static int wm8350_read(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *dest)
96 int ret = 0; 97 int ret = 0;
97 int bytes = num_regs * 2; 98 int bytes = num_regs * 2;
98 99
99 if (wm8350->read_dev == NULL)
100 return -ENODEV;
101
102 if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) { 100 if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) {
103 dev_err(wm8350->dev, "invalid reg %x\n", 101 dev_err(wm8350->dev, "invalid reg %x\n",
104 reg + num_regs - 1); 102 reg + num_regs - 1);
@@ -149,9 +147,6 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
149 int end = reg + num_regs; 147 int end = reg + num_regs;
150 int bytes = num_regs * 2; 148 int bytes = num_regs * 2;
151 149
152 if (wm8350->write_dev == NULL)
153 return -ENODEV;
154
155 if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) { 150 if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) {
156 dev_err(wm8350->dev, "invalid reg %x\n", 151 dev_err(wm8350->dev, "invalid reg %x\n",
157 reg + num_regs - 1); 152 reg + num_regs - 1);
@@ -182,7 +177,7 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
182 } 177 }
183 178
184 /* Actually write it out */ 179 /* Actually write it out */
185 return wm8350->write_dev(wm8350, reg, bytes, (char *)src); 180 return regmap_raw_write(wm8350->regmap, reg, src, bytes);
186} 181}
187 182
188/* 183/*
@@ -515,9 +510,8 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
515 * a PMIC so the device many not be in a virgin state and we 510 * a PMIC so the device many not be in a virgin state and we
516 * can't rely on the silicon values. 511 * can't rely on the silicon values.
517 */ 512 */
518 ret = wm8350->read_dev(wm8350, 0, 513 ret = regmap_raw_read(wm8350->regmap, 0, wm8350->reg_cache,
519 sizeof(u16) * (WM8350_MAX_REGISTER + 1), 514 sizeof(u16) * (WM8350_MAX_REGISTER + 1));
520 wm8350->reg_cache);
521 if (ret < 0) { 515 if (ret < 0) {
522 dev_err(wm8350->dev, 516 dev_err(wm8350->dev,
523 "failed to read initial cache values\n"); 517 "failed to read initial cache values\n");
@@ -570,35 +564,30 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
570 struct wm8350_platform_data *pdata) 564 struct wm8350_platform_data *pdata)
571{ 565{
572 int ret; 566 int ret;
573 u16 id1, id2, mask_rev; 567 unsigned int id1, id2, mask_rev;
574 u16 cust_id, mode, chip_rev; 568 unsigned int cust_id, mode, chip_rev;
575 569
576 dev_set_drvdata(wm8350->dev, wm8350); 570 dev_set_drvdata(wm8350->dev, wm8350);
577 571
578 /* get WM8350 revision and config mode */ 572 /* get WM8350 revision and config mode */
579 ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1); 573 ret = regmap_read(wm8350->regmap, WM8350_RESET_ID, &id1);
580 if (ret != 0) { 574 if (ret != 0) {
581 dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); 575 dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
582 goto err; 576 goto err;
583 } 577 }
584 578
585 ret = wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2); 579 ret = regmap_read(wm8350->regmap, WM8350_ID, &id2);
586 if (ret != 0) { 580 if (ret != 0) {
587 dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); 581 dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
588 goto err; 582 goto err;
589 } 583 }
590 584
591 ret = wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), 585 ret = regmap_read(wm8350->regmap, WM8350_REVISION, &mask_rev);
592 &mask_rev);
593 if (ret != 0) { 586 if (ret != 0) {
594 dev_err(wm8350->dev, "Failed to read revision: %d\n", ret); 587 dev_err(wm8350->dev, "Failed to read revision: %d\n", ret);
595 goto err; 588 goto err;
596 } 589 }
597 590
598 id1 = be16_to_cpu(id1);
599 id2 = be16_to_cpu(id2);
600 mask_rev = be16_to_cpu(mask_rev);
601
602 if (id1 != 0x6143) { 591 if (id1 != 0x6143) {
603 dev_err(wm8350->dev, 592 dev_err(wm8350->dev,
604 "Device with ID %x is not a WM8350\n", id1); 593 "Device with ID %x is not a WM8350\n", id1);
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index d955faaf27c4..a68aceb4e48c 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -15,47 +15,18 @@
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/moduleparam.h> 17#include <linux/moduleparam.h>
18#include <linux/err.h>
18#include <linux/init.h> 19#include <linux/init.h>
19#include <linux/i2c.h> 20#include <linux/i2c.h>
20#include <linux/platform_device.h> 21#include <linux/platform_device.h>
21#include <linux/mfd/wm8350/core.h> 22#include <linux/mfd/wm8350/core.h>
23#include <linux/regmap.h>
22#include <linux/slab.h> 24#include <linux/slab.h>
23 25
24static int wm8350_i2c_read_device(struct wm8350 *wm8350, char reg, 26static const struct regmap_config wm8350_regmap = {
25 int bytes, void *dest) 27 .reg_bits = 8,
26{ 28 .val_bits = 16,
27 int ret; 29};
28
29 ret = i2c_master_send(wm8350->i2c_client, &reg, 1);
30 if (ret < 0)
31 return ret;
32 ret = i2c_master_recv(wm8350->i2c_client, dest, bytes);
33 if (ret < 0)
34 return ret;
35 if (ret != bytes)
36 return -EIO;
37 return 0;
38}
39
40static int wm8350_i2c_write_device(struct wm8350 *wm8350, char reg,
41 int bytes, void *src)
42{
43 /* we add 1 byte for device register */
44 u8 msg[(WM8350_MAX_REGISTER << 1) + 1];
45 int ret;
46
47 if (bytes > ((WM8350_MAX_REGISTER << 1) + 1))
48 return -EINVAL;
49
50 msg[0] = reg;
51 memcpy(&msg[1], src, bytes);
52 ret = i2c_master_send(wm8350->i2c_client, msg, bytes + 1);
53 if (ret < 0)
54 return ret;
55 if (ret != bytes + 1)
56 return -EIO;
57 return 0;
58}
59 30
60static int wm8350_i2c_probe(struct i2c_client *i2c, 31static int wm8350_i2c_probe(struct i2c_client *i2c,
61 const struct i2c_device_id *id) 32 const struct i2c_device_id *id)
@@ -67,20 +38,18 @@ static int wm8350_i2c_probe(struct i2c_client *i2c,
67 if (wm8350 == NULL) 38 if (wm8350 == NULL)
68 return -ENOMEM; 39 return -ENOMEM;
69 40
41 wm8350->regmap = devm_regmap_init_i2c(i2c, &wm8350_regmap);
42 if (IS_ERR(wm8350->regmap)) {
43 ret = PTR_ERR(wm8350->regmap);
44 dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
45 ret);
46 return ret;
47 }
48
70 i2c_set_clientdata(i2c, wm8350); 49 i2c_set_clientdata(i2c, wm8350);
71 wm8350->dev = &i2c->dev; 50 wm8350->dev = &i2c->dev;
72 wm8350->i2c_client = i2c;
73 wm8350->read_dev = wm8350_i2c_read_device;
74 wm8350->write_dev = wm8350_i2c_write_device;
75
76 ret = wm8350_device_init(wm8350, i2c->irq, i2c->dev.platform_data);
77 if (ret < 0)
78 goto err;
79
80 return ret;
81 51
82err: 52 return wm8350_device_init(wm8350, i2c->irq, i2c->dev.platform_data);
83 return ret;
84} 53}
85 54
86static int wm8350_i2c_remove(struct i2c_client *i2c) 55static int wm8350_i2c_remove(struct i2c_client *i2c)
diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c
index 1189a17f0f25..4b7d378551d5 100644
--- a/drivers/mfd/wm8400-core.c
+++ b/drivers/mfd/wm8400-core.c
@@ -23,136 +23,16 @@
23#include <linux/regmap.h> 23#include <linux/regmap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26static struct { 26static bool wm8400_volatile(struct device *dev, unsigned int reg)
27 u16 readable; /* Mask of readable bits */
28 u16 writable; /* Mask of writable bits */
29 u16 vol; /* Mask of volatile bits */
30 int is_codec; /* Register controlled by codec reset */
31 u16 default_val; /* Value on reset */
32} reg_data[] = {
33 { 0xFFFF, 0xFFFF, 0x0000, 0, 0x6172 }, /* R0 */
34 { 0x7000, 0x0000, 0x8000, 0, 0x0000 }, /* R1 */
35 { 0xFF17, 0xFF17, 0x0000, 0, 0x0000 }, /* R2 */
36 { 0xEBF3, 0xEBF3, 0x0000, 1, 0x6000 }, /* R3 */
37 { 0x3CF3, 0x3CF3, 0x0000, 1, 0x0000 }, /* R4 */
38 { 0xF1F8, 0xF1F8, 0x0000, 1, 0x4050 }, /* R5 */
39 { 0xFC1F, 0xFC1F, 0x0000, 1, 0x4000 }, /* R6 */
40 { 0xDFDE, 0xDFDE, 0x0000, 1, 0x01C8 }, /* R7 */
41 { 0xFCFC, 0xFCFC, 0x0000, 1, 0x0000 }, /* R8 */
42 { 0xEFFF, 0xEFFF, 0x0000, 1, 0x0040 }, /* R9 */
43 { 0xEFFF, 0xEFFF, 0x0000, 1, 0x0040 }, /* R10 */
44 { 0x27F7, 0x27F7, 0x0000, 1, 0x0004 }, /* R11 */
45 { 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R12 */
46 { 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R13 */
47 { 0x1FEF, 0x1FEF, 0x0000, 1, 0x0000 }, /* R14 */
48 { 0x0163, 0x0163, 0x0000, 1, 0x0100 }, /* R15 */
49 { 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R16 */
50 { 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R17 */
51 { 0x1FFF, 0x0FFF, 0x0000, 1, 0x0000 }, /* R18 */
52 { 0xFFFF, 0xFFFF, 0x0000, 1, 0x1000 }, /* R19 */
53 { 0xFFFF, 0xFFFF, 0x0000, 1, 0x1010 }, /* R20 */
54 { 0xFFFF, 0xFFFF, 0x0000, 1, 0x1010 }, /* R21 */
55 { 0x0FDD, 0x0FDD, 0x0000, 1, 0x8000 }, /* R22 */
56 { 0x1FFF, 0x1FFF, 0x0000, 1, 0x0800 }, /* R23 */
57 { 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R24 */
58 { 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R25 */
59 { 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R26 */
60 { 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R27 */
61 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R28 */
62 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R29 */
63 { 0x0000, 0x0077, 0x0000, 1, 0x0066 }, /* R30 */
64 { 0x0000, 0x0033, 0x0000, 1, 0x0022 }, /* R31 */
65 { 0x0000, 0x01FF, 0x0000, 1, 0x0079 }, /* R32 */
66 { 0x0000, 0x01FF, 0x0000, 1, 0x0079 }, /* R33 */
67 { 0x0000, 0x0003, 0x0000, 1, 0x0003 }, /* R34 */
68 { 0x0000, 0x01FF, 0x0000, 1, 0x0003 }, /* R35 */
69 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R36 */
70 { 0x0000, 0x003F, 0x0000, 1, 0x0100 }, /* R37 */
71 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R38 */
72 { 0x0000, 0x000F, 0x0000, 0, 0x0000 }, /* R39 */
73 { 0x0000, 0x00FF, 0x0000, 1, 0x0000 }, /* R40 */
74 { 0x0000, 0x01B7, 0x0000, 1, 0x0000 }, /* R41 */
75 { 0x0000, 0x01B7, 0x0000, 1, 0x0000 }, /* R42 */
76 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R43 */
77 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R44 */
78 { 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R45 */
79 { 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R46 */
80 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R47 */
81 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R48 */
82 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R49 */
83 { 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R50 */
84 { 0x0000, 0x01B3, 0x0000, 1, 0x0180 }, /* R51 */
85 { 0x0000, 0x0077, 0x0000, 1, 0x0000 }, /* R52 */
86 { 0x0000, 0x0077, 0x0000, 1, 0x0000 }, /* R53 */
87 { 0x0000, 0x00FF, 0x0000, 1, 0x0000 }, /* R54 */
88 { 0x0000, 0x0001, 0x0000, 1, 0x0000 }, /* R55 */
89 { 0x0000, 0x003F, 0x0000, 1, 0x0000 }, /* R56 */
90 { 0x0000, 0x004F, 0x0000, 1, 0x0000 }, /* R57 */
91 { 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R58 */
92 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R59 */
93 { 0x1FFF, 0x1FFF, 0x0000, 1, 0x0000 }, /* R60 */
94 { 0xFFFF, 0xFFFF, 0x0000, 1, 0x0000 }, /* R61 */
95 { 0x03FF, 0x03FF, 0x0000, 1, 0x0000 }, /* R62 */
96 { 0x007F, 0x007F, 0x0000, 1, 0x0000 }, /* R63 */
97 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R64 */
98 { 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R65 */
99 { 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R66 */
100 { 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R67 */
101 { 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R68 */
102 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R69 */
103 { 0xFFFF, 0xFFFF, 0x0000, 0, 0x4400 }, /* R70 */
104 { 0x23FF, 0x23FF, 0x0000, 0, 0x0000 }, /* R71 */
105 { 0xFFFF, 0xFFFF, 0x0000, 0, 0x4400 }, /* R72 */
106 { 0x23FF, 0x23FF, 0x0000, 0, 0x0000 }, /* R73 */
107 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R74 */
108 { 0x000E, 0x000E, 0x0000, 0, 0x0008 }, /* R75 */
109 { 0xE00F, 0xE00F, 0x0000, 0, 0x0000 }, /* R76 */
110 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R77 */
111 { 0x03C0, 0x03C0, 0x0000, 0, 0x02C0 }, /* R78 */
112 { 0xFFFF, 0x0000, 0xffff, 0, 0x0000 }, /* R79 */
113 { 0xFFFF, 0xFFFF, 0x0000, 0, 0x0000 }, /* R80 */
114 { 0xFFFF, 0x0000, 0xffff, 0, 0x0000 }, /* R81 */
115 { 0x2BFF, 0x0000, 0xffff, 0, 0x0000 }, /* R82 */
116 { 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R83 */
117 { 0x80FF, 0x80FF, 0x0000, 0, 0x00ff }, /* R84 */
118};
119
120static int wm8400_read(struct wm8400 *wm8400, u8 reg, int num_regs, u16 *dest)
121{ 27{
122 int i, ret = 0; 28 switch (reg) {
123 29 case WM8400_INTERRUPT_STATUS_1:
124 BUG_ON(reg + num_regs > ARRAY_SIZE(wm8400->reg_cache)); 30 case WM8400_INTERRUPT_LEVELS:
125 31 case WM8400_SHUTDOWN_REASON:
126 /* If there are any volatile reads then read back the entire block */ 32 return true;
127 for (i = reg; i < reg + num_regs; i++) 33 default:
128 if (reg_data[i].vol) { 34 return false;
129 ret = regmap_bulk_read(wm8400->regmap, reg, dest,
130 num_regs);
131 return ret;
132 }
133
134 /* Otherwise use the cache */
135 memcpy(dest, &wm8400->reg_cache[reg], num_regs * sizeof(u16));
136
137 return 0;
138}
139
140static int wm8400_write(struct wm8400 *wm8400, u8 reg, int num_regs,
141 u16 *src)
142{
143 int ret, i;
144
145 BUG_ON(reg + num_regs > ARRAY_SIZE(wm8400->reg_cache));
146
147 for (i = 0; i < num_regs; i++) {
148 BUG_ON(!reg_data[reg + i].writable);
149 wm8400->reg_cache[reg + i] = src[i];
150 ret = regmap_write(wm8400->regmap, reg, src[i]);
151 if (ret != 0)
152 return ret;
153 } 35 }
154
155 return 0;
156} 36}
157 37
158/** 38/**
@@ -165,13 +45,12 @@ static int wm8400_write(struct wm8400 *wm8400, u8 reg, int num_regs,
165 */ 45 */
166u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg) 46u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg)
167{ 47{
168 u16 val; 48 unsigned int val;
169 49 int ret;
170 mutex_lock(&wm8400->io_lock);
171
172 wm8400_read(wm8400, reg, 1, &val);
173 50
174 mutex_unlock(&wm8400->io_lock); 51 ret = regmap_read(wm8400->regmap, reg, &val);
52 if (ret < 0)
53 return ret;
175 54
176 return val; 55 return val;
177} 56}
@@ -179,63 +58,10 @@ EXPORT_SYMBOL_GPL(wm8400_reg_read);
179 58
180int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data) 59int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data)
181{ 60{
182 int ret; 61 return regmap_bulk_read(wm8400->regmap, reg, data, count);
183
184 mutex_lock(&wm8400->io_lock);
185
186 ret = wm8400_read(wm8400, reg, count, data);
187
188 mutex_unlock(&wm8400->io_lock);
189
190 return ret;
191} 62}
192EXPORT_SYMBOL_GPL(wm8400_block_read); 63EXPORT_SYMBOL_GPL(wm8400_block_read);
193 64
194/**
195 * wm8400_set_bits - Bitmask write
196 *
197 * @wm8400: Pointer to wm8400 control structure
198 * @reg: Register to access
199 * @mask: Mask of bits to change
200 * @val: Value to set for masked bits
201 */
202int wm8400_set_bits(struct wm8400 *wm8400, u8 reg, u16 mask, u16 val)
203{
204 u16 tmp;
205 int ret;
206
207 mutex_lock(&wm8400->io_lock);
208
209 ret = wm8400_read(wm8400, reg, 1, &tmp);
210 tmp = (tmp & ~mask) | val;
211 if (ret == 0)
212 ret = wm8400_write(wm8400, reg, 1, &tmp);
213
214 mutex_unlock(&wm8400->io_lock);
215
216 return ret;
217}
218EXPORT_SYMBOL_GPL(wm8400_set_bits);
219
220/**
221 * wm8400_reset_codec_reg_cache - Reset cached codec registers to
222 * their default values.
223 */
224void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400)
225{
226 int i;
227
228 mutex_lock(&wm8400->io_lock);
229
230 /* Reset all codec registers to their initial value */
231 for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
232 if (reg_data[i].is_codec)
233 wm8400->reg_cache[i] = reg_data[i].default_val;
234
235 mutex_unlock(&wm8400->io_lock);
236}
237EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
238
239static int wm8400_register_codec(struct wm8400 *wm8400) 65static int wm8400_register_codec(struct wm8400 *wm8400)
240{ 66{
241 struct mfd_cell cell = { 67 struct mfd_cell cell = {
@@ -257,44 +83,24 @@ static int wm8400_register_codec(struct wm8400 *wm8400)
257static int wm8400_init(struct wm8400 *wm8400, 83static int wm8400_init(struct wm8400 *wm8400,
258 struct wm8400_platform_data *pdata) 84 struct wm8400_platform_data *pdata)
259{ 85{
260 u16 reg; 86 unsigned int reg;
261 int ret, i; 87 int ret;
262
263 mutex_init(&wm8400->io_lock);
264 88
265 dev_set_drvdata(wm8400->dev, wm8400); 89 dev_set_drvdata(wm8400->dev, wm8400);
266 90
267 /* Check that this is actually a WM8400 */ 91 /* Check that this is actually a WM8400 */
268 ret = regmap_read(wm8400->regmap, WM8400_RESET_ID, &i); 92 ret = regmap_read(wm8400->regmap, WM8400_RESET_ID, &reg);
269 if (ret != 0) { 93 if (ret != 0) {
270 dev_err(wm8400->dev, "Chip ID register read failed\n"); 94 dev_err(wm8400->dev, "Chip ID register read failed\n");
271 return -EIO; 95 return -EIO;
272 } 96 }
273 if (i != reg_data[WM8400_RESET_ID].default_val) { 97 if (reg != 0x6172) {
274 dev_err(wm8400->dev, "Device is not a WM8400, ID is %x\n", i); 98 dev_err(wm8400->dev, "Device is not a WM8400, ID is %x\n",
99 reg);
275 return -ENODEV; 100 return -ENODEV;
276 } 101 }
277 102
278 /* We don't know what state the hardware is in and since this 103 ret = regmap_read(wm8400->regmap, WM8400_ID, &reg);
279 * is a PMIC we can't reset it safely so initialise the register
280 * cache from the hardware.
281 */
282 ret = regmap_raw_read(wm8400->regmap, 0, wm8400->reg_cache,
283 ARRAY_SIZE(wm8400->reg_cache));
284 if (ret != 0) {
285 dev_err(wm8400->dev, "Register cache read failed\n");
286 return -EIO;
287 }
288 for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
289 wm8400->reg_cache[i] = be16_to_cpu(wm8400->reg_cache[i]);
290
291 /* If the codec is in reset use hard coded values */
292 if (!(wm8400->reg_cache[WM8400_POWER_MANAGEMENT_1] & WM8400_CODEC_ENA))
293 for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
294 if (reg_data[i].is_codec)
295 wm8400->reg_cache[i] = reg_data[i].default_val;
296
297 ret = wm8400_read(wm8400, WM8400_ID, 1, &reg);
298 if (ret != 0) { 104 if (ret != 0) {
299 dev_err(wm8400->dev, "ID register read failed: %d\n", ret); 105 dev_err(wm8400->dev, "ID register read failed: %d\n", ret);
300 return ret; 106 return ret;
@@ -334,8 +140,22 @@ static const struct regmap_config wm8400_regmap_config = {
334 .reg_bits = 8, 140 .reg_bits = 8,
335 .val_bits = 16, 141 .val_bits = 16,
336 .max_register = WM8400_REGISTER_COUNT - 1, 142 .max_register = WM8400_REGISTER_COUNT - 1,
143
144 .volatile_reg = wm8400_volatile,
145
146 .cache_type = REGCACHE_RBTREE,
337}; 147};
338 148
149/**
150 * wm8400_reset_codec_reg_cache - Reset cached codec registers to
151 * their default values.
152 */
153void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400)
154{
155 regmap_reinit_cache(wm8400->regmap, &wm8400_regmap_config);
156}
157EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
158
339#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) 159#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
340static int wm8400_i2c_probe(struct i2c_client *i2c, 160static int wm8400_i2c_probe(struct i2c_client *i2c,
341 const struct i2c_device_id *id) 161 const struct i2c_device_id *id)
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 9d7ca1e978fa..1e321d349777 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -500,7 +500,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
500 ret); 500 ret);
501 goto err_enable; 501 goto err_enable;
502 } 502 }
503 wm8994->revision = ret; 503 wm8994->revision = ret & WM8994_CHIP_REV_MASK;
504 wm8994->cust_id = (ret & WM8994_CUST_ID_MASK) >> WM8994_CUST_ID_SHIFT;
504 505
505 switch (wm8994->type) { 506 switch (wm8994->type) {
506 case WM8994: 507 case WM8994:
@@ -553,8 +554,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
553 break; 554 break;
554 } 555 }
555 556
556 dev_info(wm8994->dev, "%s revision %c\n", devname, 557 dev_info(wm8994->dev, "%s revision %c CUST_ID %02x\n", devname,
557 'A' + wm8994->revision); 558 'A' + wm8994->revision, wm8994->cust_id);
558 559
559 switch (wm8994->type) { 560 switch (wm8994->type) {
560 case WM1811: 561 case WM1811:
@@ -732,23 +733,7 @@ static struct i2c_driver wm8994_i2c_driver = {
732 .id_table = wm8994_i2c_id, 733 .id_table = wm8994_i2c_id,
733}; 734};
734 735
735static int __init wm8994_i2c_init(void) 736module_i2c_driver(wm8994_i2c_driver);
736{
737 int ret;
738
739 ret = i2c_add_driver(&wm8994_i2c_driver);
740 if (ret != 0)
741 pr_err("Failed to register wm8994 I2C driver: %d\n", ret);
742
743 return ret;
744}
745module_init(wm8994_i2c_init);
746
747static void __exit wm8994_i2c_exit(void)
748{
749 i2c_del_driver(&wm8994_i2c_driver);
750}
751module_exit(wm8994_i2c_exit);
752 737
753MODULE_DESCRIPTION("Core support for the WM8994 audio CODEC"); 738MODULE_DESCRIPTION("Core support for the WM8994 audio CODEC");
754MODULE_LICENSE("GPL"); 739MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/wm8994-regmap.c b/drivers/mfd/wm8994-regmap.c
index bfd25af6ecb1..52e9e2944940 100644
--- a/drivers/mfd/wm8994-regmap.c
+++ b/drivers/mfd/wm8994-regmap.c
@@ -1122,7 +1122,6 @@ static bool wm8994_volatile_register(struct device *dev, unsigned int reg)
1122 case WM8994_RATE_STATUS: 1122 case WM8994_RATE_STATUS:
1123 case WM8958_MIC_DETECT_3: 1123 case WM8958_MIC_DETECT_3:
1124 case WM8994_DC_SERVO_4E: 1124 case WM8994_DC_SERVO_4E:
1125 case WM8994_CHIP_REVISION:
1126 case WM8994_INTERRUPT_STATUS_1: 1125 case WM8994_INTERRUPT_STATUS_1:
1127 case WM8994_INTERRUPT_STATUS_2: 1126 case WM8994_INTERRUPT_STATUS_2:
1128 return true; 1127 return true;
diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c
index d7a9aa14e5d5..042a8fe4efaa 100644
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c
@@ -142,10 +142,16 @@ static int __devexit ab8500_pwm_remove(struct platform_device *pdev)
142 return 0; 142 return 0;
143} 143}
144 144
145static const struct of_device_id ab8500_pwm_match[] = {
146 { .compatible = "stericsson,ab8500-pwm", },
147 {}
148};
149
145static struct platform_driver ab8500_pwm_driver = { 150static struct platform_driver ab8500_pwm_driver = {
146 .driver = { 151 .driver = {
147 .name = "ab8500-pwm", 152 .name = "ab8500-pwm",
148 .owner = THIS_MODULE, 153 .owner = THIS_MODULE,
154 .of_match_table = ab8500_pwm_match,
149 }, 155 },
150 .probe = ab8500_pwm_probe, 156 .probe = ab8500_pwm_probe,
151 .remove = __devexit_p(ab8500_pwm_remove), 157 .remove = __devexit_p(ab8500_pwm_remove),
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
index 987332b71d8d..fc1ad9551182 100644
--- a/drivers/power/wm831x_power.c
+++ b/drivers/power/wm831x_power.c
@@ -565,7 +565,7 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
565 goto err_usb; 565 goto err_usb;
566 } 566 }
567 567
568 irq = platform_get_irq_byname(pdev, "SYSLO"); 568 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
569 ret = request_threaded_irq(irq, NULL, wm831x_syslo_irq, 569 ret = request_threaded_irq(irq, NULL, wm831x_syslo_irq,
570 IRQF_TRIGGER_RISING, "System power low", 570 IRQF_TRIGGER_RISING, "System power low",
571 power); 571 power);
@@ -575,7 +575,7 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
575 goto err_battery; 575 goto err_battery;
576 } 576 }
577 577
578 irq = platform_get_irq_byname(pdev, "PWR SRC"); 578 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
579 ret = request_threaded_irq(irq, NULL, wm831x_pwr_src_irq, 579 ret = request_threaded_irq(irq, NULL, wm831x_pwr_src_irq,
580 IRQF_TRIGGER_RISING, "Power source", 580 IRQF_TRIGGER_RISING, "Power source",
581 power); 581 power);
@@ -586,7 +586,9 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
586 } 586 }
587 587
588 for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) { 588 for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
589 irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]); 589 irq = wm831x_irq(wm831x,
590 platform_get_irq_byname(pdev,
591 wm831x_bat_irqs[i]));
590 ret = request_threaded_irq(irq, NULL, wm831x_bat_irq, 592 ret = request_threaded_irq(irq, NULL, wm831x_bat_irq,
591 IRQF_TRIGGER_RISING, 593 IRQF_TRIGGER_RISING,
592 wm831x_bat_irqs[i], 594 wm831x_bat_irqs[i],
@@ -606,10 +608,10 @@ err_bat_irq:
606 irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]); 608 irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
607 free_irq(irq, power); 609 free_irq(irq, power);
608 } 610 }
609 irq = platform_get_irq_byname(pdev, "PWR SRC"); 611 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
610 free_irq(irq, power); 612 free_irq(irq, power);
611err_syslo: 613err_syslo:
612 irq = platform_get_irq_byname(pdev, "SYSLO"); 614 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
613 free_irq(irq, power); 615 free_irq(irq, power);
614err_battery: 616err_battery:
615 if (power->have_battery) 617 if (power->have_battery)
@@ -626,17 +628,20 @@ err_kmalloc:
626static __devexit int wm831x_power_remove(struct platform_device *pdev) 628static __devexit int wm831x_power_remove(struct platform_device *pdev)
627{ 629{
628 struct wm831x_power *wm831x_power = platform_get_drvdata(pdev); 630 struct wm831x_power *wm831x_power = platform_get_drvdata(pdev);
631 struct wm831x *wm831x = wm831x_power->wm831x;
629 int irq, i; 632 int irq, i;
630 633
631 for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) { 634 for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
632 irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]); 635 irq = wm831x_irq(wm831x,
636 platform_get_irq_byname(pdev,
637 wm831x_bat_irqs[i]));
633 free_irq(irq, wm831x_power); 638 free_irq(irq, wm831x_power);
634 } 639 }
635 640
636 irq = platform_get_irq_byname(pdev, "PWR SRC"); 641 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
637 free_irq(irq, wm831x_power); 642 free_irq(irq, wm831x_power);
638 643
639 irq = platform_get_irq_byname(pdev, "SYSLO"); 644 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
640 free_irq(irq, wm831x_power); 645 free_irq(irq, wm831x_power);
641 646
642 if (wm831x_power->have_battery) 647 if (wm831x_power->have_battery)
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 49b2112b0486..3660bace123c 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -47,7 +47,7 @@ static int anatop_set_voltage(struct regulator_dev *reg, int min_uV,
47 int max_uV, unsigned *selector) 47 int max_uV, unsigned *selector)
48{ 48{
49 struct anatop_regulator *anatop_reg = rdev_get_drvdata(reg); 49 struct anatop_regulator *anatop_reg = rdev_get_drvdata(reg);
50 u32 val, sel; 50 u32 val, sel, mask;
51 int uv; 51 int uv;
52 52
53 uv = min_uV; 53 uv = min_uV;
@@ -71,11 +71,10 @@ static int anatop_set_voltage(struct regulator_dev *reg, int min_uV,
71 val = anatop_reg->min_bit_val + sel; 71 val = anatop_reg->min_bit_val + sel;
72 *selector = sel; 72 *selector = sel;
73 dev_dbg(&reg->dev, "%s: calculated val %d\n", __func__, val); 73 dev_dbg(&reg->dev, "%s: calculated val %d\n", __func__, val);
74 anatop_set_bits(anatop_reg->mfd, 74 mask = ((1 << anatop_reg->vol_bit_width) - 1) <<
75 anatop_reg->control_reg, 75 anatop_reg->vol_bit_shift;
76 anatop_reg->vol_bit_shift, 76 val <<= anatop_reg->vol_bit_shift;
77 anatop_reg->vol_bit_width, 77 anatop_write_reg(anatop_reg->mfd, anatop_reg->control_reg, val, mask);
78 val);
79 78
80 return 0; 79 return 0;
81} 80}
@@ -88,10 +87,9 @@ static int anatop_get_voltage_sel(struct regulator_dev *reg)
88 if (!anatop_reg->control_reg) 87 if (!anatop_reg->control_reg)
89 return -ENOTSUPP; 88 return -ENOTSUPP;
90 89
91 val = anatop_get_bits(anatop_reg->mfd, 90 val = anatop_read_reg(anatop_reg->mfd, anatop_reg->control_reg);
92 anatop_reg->control_reg, 91 val = (val & ((1 << anatop_reg->vol_bit_width) - 1)) >>
93 anatop_reg->vol_bit_shift, 92 anatop_reg->vol_bit_shift;
94 anatop_reg->vol_bit_width);
95 93
96 return val - anatop_reg->min_bit_val; 94 return val - anatop_reg->min_bit_val;
97} 95}
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 4e01a423471b..6bf864b4bdf6 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -331,21 +331,16 @@ struct tps65910_reg {
331 331
332static inline int tps65910_read(struct tps65910_reg *pmic, u8 reg) 332static inline int tps65910_read(struct tps65910_reg *pmic, u8 reg)
333{ 333{
334 u8 val; 334 unsigned int val;
335 int err; 335 int err;
336 336
337 err = pmic->mfd->read(pmic->mfd, reg, 1, &val); 337 err = tps65910_reg_read(pmic->mfd, reg, &val);
338 if (err) 338 if (err)
339 return err; 339 return err;
340 340
341 return val; 341 return val;
342} 342}
343 343
344static inline int tps65910_write(struct tps65910_reg *pmic, u8 reg, u8 val)
345{
346 return pmic->mfd->write(pmic->mfd, reg, 1, &val);
347}
348
349static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg, 344static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg,
350 u8 set_mask, u8 clear_mask) 345 u8 set_mask, u8 clear_mask)
351{ 346{
@@ -362,7 +357,7 @@ static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg,
362 357
363 data &= ~clear_mask; 358 data &= ~clear_mask;
364 data |= set_mask; 359 data |= set_mask;
365 err = tps65910_write(pmic, reg, data); 360 err = tps65910_reg_write(pmic->mfd, reg, data);
366 if (err) 361 if (err)
367 dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg); 362 dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
368 363
@@ -371,7 +366,7 @@ out:
371 return err; 366 return err;
372} 367}
373 368
374static int tps65910_reg_read(struct tps65910_reg *pmic, u8 reg) 369static int tps65910_reg_read_locked(struct tps65910_reg *pmic, u8 reg)
375{ 370{
376 int data; 371 int data;
377 372
@@ -385,13 +380,13 @@ static int tps65910_reg_read(struct tps65910_reg *pmic, u8 reg)
385 return data; 380 return data;
386} 381}
387 382
388static int tps65910_reg_write(struct tps65910_reg *pmic, u8 reg, u8 val) 383static int tps65910_reg_write_locked(struct tps65910_reg *pmic, u8 reg, u8 val)
389{ 384{
390 int err; 385 int err;
391 386
392 mutex_lock(&pmic->mutex); 387 mutex_lock(&pmic->mutex);
393 388
394 err = tps65910_write(pmic, reg, val); 389 err = tps65910_reg_write(pmic->mfd, reg, val);
395 if (err < 0) 390 if (err < 0)
396 dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg); 391 dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
397 392
@@ -490,9 +485,9 @@ static int tps65910_set_mode(struct regulator_dev *dev, unsigned int mode)
490 LDO_ST_MODE_BIT); 485 LDO_ST_MODE_BIT);
491 case REGULATOR_MODE_IDLE: 486 case REGULATOR_MODE_IDLE:
492 value = LDO_ST_ON_BIT | LDO_ST_MODE_BIT; 487 value = LDO_ST_ON_BIT | LDO_ST_MODE_BIT;
493 return tps65910_set_bits(mfd, reg, value); 488 return tps65910_reg_set_bits(mfd, reg, value);
494 case REGULATOR_MODE_STANDBY: 489 case REGULATOR_MODE_STANDBY:
495 return tps65910_clear_bits(mfd, reg, LDO_ST_ON_BIT); 490 return tps65910_reg_clear_bits(mfd, reg, LDO_ST_ON_BIT);
496 } 491 }
497 492
498 return -EINVAL; 493 return -EINVAL;
@@ -507,7 +502,7 @@ static unsigned int tps65910_get_mode(struct regulator_dev *dev)
507 if (reg < 0) 502 if (reg < 0)
508 return reg; 503 return reg;
509 504
510 value = tps65910_reg_read(pmic, reg); 505 value = tps65910_reg_read_locked(pmic, reg);
511 if (value < 0) 506 if (value < 0)
512 return value; 507 return value;
513 508
@@ -527,28 +522,28 @@ static int tps65910_get_voltage_dcdc_sel(struct regulator_dev *dev)
527 522
528 switch (id) { 523 switch (id) {
529 case TPS65910_REG_VDD1: 524 case TPS65910_REG_VDD1:
530 opvsel = tps65910_reg_read(pmic, TPS65910_VDD1_OP); 525 opvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD1_OP);
531 mult = tps65910_reg_read(pmic, TPS65910_VDD1); 526 mult = tps65910_reg_read_locked(pmic, TPS65910_VDD1);
532 mult = (mult & VDD1_VGAIN_SEL_MASK) >> VDD1_VGAIN_SEL_SHIFT; 527 mult = (mult & VDD1_VGAIN_SEL_MASK) >> VDD1_VGAIN_SEL_SHIFT;
533 srvsel = tps65910_reg_read(pmic, TPS65910_VDD1_SR); 528 srvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD1_SR);
534 sr = opvsel & VDD1_OP_CMD_MASK; 529 sr = opvsel & VDD1_OP_CMD_MASK;
535 opvsel &= VDD1_OP_SEL_MASK; 530 opvsel &= VDD1_OP_SEL_MASK;
536 srvsel &= VDD1_SR_SEL_MASK; 531 srvsel &= VDD1_SR_SEL_MASK;
537 vselmax = 75; 532 vselmax = 75;
538 break; 533 break;
539 case TPS65910_REG_VDD2: 534 case TPS65910_REG_VDD2:
540 opvsel = tps65910_reg_read(pmic, TPS65910_VDD2_OP); 535 opvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD2_OP);
541 mult = tps65910_reg_read(pmic, TPS65910_VDD2); 536 mult = tps65910_reg_read_locked(pmic, TPS65910_VDD2);
542 mult = (mult & VDD2_VGAIN_SEL_MASK) >> VDD2_VGAIN_SEL_SHIFT; 537 mult = (mult & VDD2_VGAIN_SEL_MASK) >> VDD2_VGAIN_SEL_SHIFT;
543 srvsel = tps65910_reg_read(pmic, TPS65910_VDD2_SR); 538 srvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD2_SR);
544 sr = opvsel & VDD2_OP_CMD_MASK; 539 sr = opvsel & VDD2_OP_CMD_MASK;
545 opvsel &= VDD2_OP_SEL_MASK; 540 opvsel &= VDD2_OP_SEL_MASK;
546 srvsel &= VDD2_SR_SEL_MASK; 541 srvsel &= VDD2_SR_SEL_MASK;
547 vselmax = 75; 542 vselmax = 75;
548 break; 543 break;
549 case TPS65911_REG_VDDCTRL: 544 case TPS65911_REG_VDDCTRL:
550 opvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_OP); 545 opvsel = tps65910_reg_read_locked(pmic, TPS65911_VDDCTRL_OP);
551 srvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_SR); 546 srvsel = tps65910_reg_read_locked(pmic, TPS65911_VDDCTRL_SR);
552 sr = opvsel & VDDCTRL_OP_CMD_MASK; 547 sr = opvsel & VDDCTRL_OP_CMD_MASK;
553 opvsel &= VDDCTRL_OP_SEL_MASK; 548 opvsel &= VDDCTRL_OP_SEL_MASK;
554 srvsel &= VDDCTRL_SR_SEL_MASK; 549 srvsel &= VDDCTRL_SR_SEL_MASK;
@@ -588,7 +583,7 @@ static int tps65910_get_voltage_sel(struct regulator_dev *dev)
588 if (reg < 0) 583 if (reg < 0)
589 return reg; 584 return reg;
590 585
591 value = tps65910_reg_read(pmic, reg); 586 value = tps65910_reg_read_locked(pmic, reg);
592 if (value < 0) 587 if (value < 0)
593 return value; 588 return value;
594 589
@@ -625,7 +620,7 @@ static int tps65911_get_voltage_sel(struct regulator_dev *dev)
625 620
626 reg = pmic->get_ctrl_reg(id); 621 reg = pmic->get_ctrl_reg(id);
627 622
628 value = tps65910_reg_read(pmic, reg); 623 value = tps65910_reg_read_locked(pmic, reg);
629 624
630 switch (id) { 625 switch (id) {
631 case TPS65911_REG_LDO1: 626 case TPS65911_REG_LDO1:
@@ -670,7 +665,7 @@ static int tps65910_set_voltage_dcdc_sel(struct regulator_dev *dev,
670 tps65910_modify_bits(pmic, TPS65910_VDD1, 665 tps65910_modify_bits(pmic, TPS65910_VDD1,
671 (dcdc_mult << VDD1_VGAIN_SEL_SHIFT), 666 (dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
672 VDD1_VGAIN_SEL_MASK); 667 VDD1_VGAIN_SEL_MASK);
673 tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel); 668 tps65910_reg_write_locked(pmic, TPS65910_VDD1_OP, vsel);
674 break; 669 break;
675 case TPS65910_REG_VDD2: 670 case TPS65910_REG_VDD2:
676 dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1; 671 dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
@@ -681,11 +676,11 @@ static int tps65910_set_voltage_dcdc_sel(struct regulator_dev *dev,
681 tps65910_modify_bits(pmic, TPS65910_VDD2, 676 tps65910_modify_bits(pmic, TPS65910_VDD2,
682 (dcdc_mult << VDD2_VGAIN_SEL_SHIFT), 677 (dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
683 VDD1_VGAIN_SEL_MASK); 678 VDD1_VGAIN_SEL_MASK);
684 tps65910_reg_write(pmic, TPS65910_VDD2_OP, vsel); 679 tps65910_reg_write_locked(pmic, TPS65910_VDD2_OP, vsel);
685 break; 680 break;
686 case TPS65911_REG_VDDCTRL: 681 case TPS65911_REG_VDDCTRL:
687 vsel = selector + 3; 682 vsel = selector + 3;
688 tps65910_reg_write(pmic, TPS65911_VDDCTRL_OP, vsel); 683 tps65910_reg_write_locked(pmic, TPS65911_VDDCTRL_OP, vsel);
689 } 684 }
690 685
691 return 0; 686 return 0;
@@ -936,10 +931,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
936 931
937 /* External EN1 control */ 932 /* External EN1 control */
938 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN1) 933 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN1)
939 ret = tps65910_set_bits(mfd, 934 ret = tps65910_reg_set_bits(mfd,
940 TPS65910_EN1_LDO_ASS + regoffs, bit_pos); 935 TPS65910_EN1_LDO_ASS + regoffs, bit_pos);
941 else 936 else
942 ret = tps65910_clear_bits(mfd, 937 ret = tps65910_reg_clear_bits(mfd,
943 TPS65910_EN1_LDO_ASS + regoffs, bit_pos); 938 TPS65910_EN1_LDO_ASS + regoffs, bit_pos);
944 if (ret < 0) { 939 if (ret < 0) {
945 dev_err(mfd->dev, 940 dev_err(mfd->dev,
@@ -949,10 +944,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
949 944
950 /* External EN2 control */ 945 /* External EN2 control */
951 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN2) 946 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN2)
952 ret = tps65910_set_bits(mfd, 947 ret = tps65910_reg_set_bits(mfd,
953 TPS65910_EN2_LDO_ASS + regoffs, bit_pos); 948 TPS65910_EN2_LDO_ASS + regoffs, bit_pos);
954 else 949 else
955 ret = tps65910_clear_bits(mfd, 950 ret = tps65910_reg_clear_bits(mfd,
956 TPS65910_EN2_LDO_ASS + regoffs, bit_pos); 951 TPS65910_EN2_LDO_ASS + regoffs, bit_pos);
957 if (ret < 0) { 952 if (ret < 0) {
958 dev_err(mfd->dev, 953 dev_err(mfd->dev,
@@ -964,10 +959,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
964 if ((tps65910_chip_id(mfd) == TPS65910) && 959 if ((tps65910_chip_id(mfd) == TPS65910) &&
965 (id >= TPS65910_REG_VDIG1)) { 960 (id >= TPS65910_REG_VDIG1)) {
966 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3) 961 if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3)
967 ret = tps65910_set_bits(mfd, 962 ret = tps65910_reg_set_bits(mfd,
968 TPS65910_EN3_LDO_ASS + regoffs, bit_pos); 963 TPS65910_EN3_LDO_ASS + regoffs, bit_pos);
969 else 964 else
970 ret = tps65910_clear_bits(mfd, 965 ret = tps65910_reg_clear_bits(mfd,
971 TPS65910_EN3_LDO_ASS + regoffs, bit_pos); 966 TPS65910_EN3_LDO_ASS + regoffs, bit_pos);
972 if (ret < 0) { 967 if (ret < 0) {
973 dev_err(mfd->dev, 968 dev_err(mfd->dev,
@@ -979,10 +974,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
979 /* Return if no external control is selected */ 974 /* Return if no external control is selected */
980 if (!(ext_sleep_config & EXT_SLEEP_CONTROL)) { 975 if (!(ext_sleep_config & EXT_SLEEP_CONTROL)) {
981 /* Clear all sleep controls */ 976 /* Clear all sleep controls */
982 ret = tps65910_clear_bits(mfd, 977 ret = tps65910_reg_clear_bits(mfd,
983 TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos); 978 TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos);
984 if (!ret) 979 if (!ret)
985 ret = tps65910_clear_bits(mfd, 980 ret = tps65910_reg_clear_bits(mfd,
986 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos); 981 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
987 if (ret < 0) 982 if (ret < 0)
988 dev_err(mfd->dev, 983 dev_err(mfd->dev,
@@ -1001,32 +996,33 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
1001 (tps65910_chip_id(mfd) == TPS65911))) { 996 (tps65910_chip_id(mfd) == TPS65911))) {
1002 int op_reg_add = pmic->get_ctrl_reg(id) + 1; 997 int op_reg_add = pmic->get_ctrl_reg(id) + 1;
1003 int sr_reg_add = pmic->get_ctrl_reg(id) + 2; 998 int sr_reg_add = pmic->get_ctrl_reg(id) + 2;
1004 int opvsel = tps65910_reg_read(pmic, op_reg_add); 999 int opvsel = tps65910_reg_read_locked(pmic, op_reg_add);
1005 int srvsel = tps65910_reg_read(pmic, sr_reg_add); 1000 int srvsel = tps65910_reg_read_locked(pmic, sr_reg_add);
1006 if (opvsel & VDD1_OP_CMD_MASK) { 1001 if (opvsel & VDD1_OP_CMD_MASK) {
1007 u8 reg_val = srvsel & VDD1_OP_SEL_MASK; 1002 u8 reg_val = srvsel & VDD1_OP_SEL_MASK;
1008 ret = tps65910_reg_write(pmic, op_reg_add, reg_val); 1003 ret = tps65910_reg_write_locked(pmic, op_reg_add,
1004 reg_val);
1009 if (ret < 0) { 1005 if (ret < 0) {
1010 dev_err(mfd->dev, 1006 dev_err(mfd->dev,
1011 "Error in configuring op register\n"); 1007 "Error in configuring op register\n");
1012 return ret; 1008 return ret;
1013 } 1009 }
1014 } 1010 }
1015 ret = tps65910_reg_write(pmic, sr_reg_add, 0); 1011 ret = tps65910_reg_write_locked(pmic, sr_reg_add, 0);
1016 if (ret < 0) { 1012 if (ret < 0) {
1017 dev_err(mfd->dev, "Error in settting sr register\n"); 1013 dev_err(mfd->dev, "Error in settting sr register\n");
1018 return ret; 1014 return ret;
1019 } 1015 }
1020 } 1016 }
1021 1017
1022 ret = tps65910_clear_bits(mfd, 1018 ret = tps65910_reg_clear_bits(mfd,
1023 TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos); 1019 TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos);
1024 if (!ret) { 1020 if (!ret) {
1025 if (ext_sleep_config & TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP) 1021 if (ext_sleep_config & TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP)
1026 ret = tps65910_set_bits(mfd, 1022 ret = tps65910_reg_set_bits(mfd,
1027 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos); 1023 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
1028 else 1024 else
1029 ret = tps65910_clear_bits(mfd, 1025 ret = tps65910_reg_clear_bits(mfd,
1030 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos); 1026 TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
1031 } 1027 }
1032 if (ret < 0) 1028 if (ret < 0)
@@ -1177,7 +1173,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
1177 platform_set_drvdata(pdev, pmic); 1173 platform_set_drvdata(pdev, pmic);
1178 1174
1179 /* Give control of all register to control port */ 1175 /* Give control of all register to control port */
1180 tps65910_set_bits(pmic->mfd, TPS65910_DEVCTRL, 1176 tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL,
1181 DEVCTRL_SR_CTL_I2C_SEL_MASK); 1177 DEVCTRL_SR_CTL_I2C_SEL_MASK);
1182 1178
1183 switch(tps65910_chip_id(tps65910)) { 1179 switch(tps65910_chip_id(tps65910)) {
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index a885911bb5fc..099da11e989f 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -535,7 +535,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
535 goto err; 535 goto err;
536 } 536 }
537 537
538 irq = platform_get_irq_byname(pdev, "UV"); 538 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
539 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq, 539 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
540 IRQF_TRIGGER_RISING, dcdc->name, dcdc); 540 IRQF_TRIGGER_RISING, dcdc->name, dcdc);
541 if (ret != 0) { 541 if (ret != 0) {
@@ -544,7 +544,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
544 goto err_regulator; 544 goto err_regulator;
545 } 545 }
546 546
547 irq = platform_get_irq_byname(pdev, "HC"); 547 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "HC"));
548 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_oc_irq, 548 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_oc_irq,
549 IRQF_TRIGGER_RISING, dcdc->name, dcdc); 549 IRQF_TRIGGER_RISING, dcdc->name, dcdc);
550 if (ret != 0) { 550 if (ret != 0) {
@@ -558,7 +558,8 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
558 return 0; 558 return 0;
559 559
560err_uv: 560err_uv:
561 free_irq(platform_get_irq_byname(pdev, "UV"), dcdc); 561 free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV")),
562 dcdc);
562err_regulator: 563err_regulator:
563 regulator_unregister(dcdc->regulator); 564 regulator_unregister(dcdc->regulator);
564err: 565err:
@@ -570,11 +571,14 @@ err:
570static __devexit int wm831x_buckv_remove(struct platform_device *pdev) 571static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
571{ 572{
572 struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev); 573 struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
574 struct wm831x *wm831x = dcdc->wm831x;
573 575
574 platform_set_drvdata(pdev, NULL); 576 platform_set_drvdata(pdev, NULL);
575 577
576 free_irq(platform_get_irq_byname(pdev, "HC"), dcdc); 578 free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "HC")),
577 free_irq(platform_get_irq_byname(pdev, "UV"), dcdc); 579 dcdc);
580 free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV")),
581 dcdc);
578 regulator_unregister(dcdc->regulator); 582 regulator_unregister(dcdc->regulator);
579 if (dcdc->dvs_gpio) 583 if (dcdc->dvs_gpio)
580 gpio_free(dcdc->dvs_gpio); 584 gpio_free(dcdc->dvs_gpio);
@@ -726,7 +730,7 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
726 goto err; 730 goto err;
727 } 731 }
728 732
729 irq = platform_get_irq_byname(pdev, "UV"); 733 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
730 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq, 734 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
731 IRQF_TRIGGER_RISING, dcdc->name, dcdc); 735 IRQF_TRIGGER_RISING, dcdc->name, dcdc);
732 if (ret != 0) { 736 if (ret != 0) {
@@ -751,7 +755,8 @@ static __devexit int wm831x_buckp_remove(struct platform_device *pdev)
751 755
752 platform_set_drvdata(pdev, NULL); 756 platform_set_drvdata(pdev, NULL);
753 757
754 free_irq(platform_get_irq_byname(pdev, "UV"), dcdc); 758 free_irq(wm831x_irq(dcdc->wm831x, platform_get_irq_byname(pdev, "UV")),
759 dcdc);
755 regulator_unregister(dcdc->regulator); 760 regulator_unregister(dcdc->regulator);
756 761
757 return 0; 762 return 0;
@@ -859,7 +864,7 @@ static __devinit int wm831x_boostp_probe(struct platform_device *pdev)
859 goto err; 864 goto err;
860 } 865 }
861 866
862 irq = platform_get_irq_byname(pdev, "UV"); 867 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
863 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq, 868 ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
864 IRQF_TRIGGER_RISING, dcdc->name, 869 IRQF_TRIGGER_RISING, dcdc->name,
865 dcdc); 870 dcdc);
@@ -885,7 +890,8 @@ static __devexit int wm831x_boostp_remove(struct platform_device *pdev)
885 890
886 platform_set_drvdata(pdev, NULL); 891 platform_set_drvdata(pdev, NULL);
887 892
888 free_irq(platform_get_irq_byname(pdev, "UV"), dcdc); 893 free_irq(wm831x_irq(dcdc->wm831x, platform_get_irq_byname(pdev, "UV")),
894 dcdc);
889 regulator_unregister(dcdc->regulator); 895 regulator_unregister(dcdc->regulator);
890 896
891 return 0; 897 return 0;
diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c
index b50ab778b098..0d207c297714 100644
--- a/drivers/regulator/wm831x-isink.c
+++ b/drivers/regulator/wm831x-isink.c
@@ -202,7 +202,7 @@ static __devinit int wm831x_isink_probe(struct platform_device *pdev)
202 goto err; 202 goto err;
203 } 203 }
204 204
205 irq = platform_get_irq(pdev, 0); 205 irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0));
206 ret = request_threaded_irq(irq, NULL, wm831x_isink_irq, 206 ret = request_threaded_irq(irq, NULL, wm831x_isink_irq,
207 IRQF_TRIGGER_RISING, isink->name, isink); 207 IRQF_TRIGGER_RISING, isink->name, isink);
208 if (ret != 0) { 208 if (ret != 0) {
@@ -227,7 +227,7 @@ static __devexit int wm831x_isink_remove(struct platform_device *pdev)
227 227
228 platform_set_drvdata(pdev, NULL); 228 platform_set_drvdata(pdev, NULL);
229 229
230 free_irq(platform_get_irq(pdev, 0), isink); 230 free_irq(wm831x_irq(isink->wm831x, platform_get_irq(pdev, 0)), isink);
231 231
232 regulator_unregister(isink->regulator); 232 regulator_unregister(isink->regulator);
233 233
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index aa1f8b3fbe16..a9a28d8ac185 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -321,7 +321,7 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
321 goto err; 321 goto err;
322 } 322 }
323 323
324 irq = platform_get_irq_byname(pdev, "UV"); 324 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
325 ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq, 325 ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
326 IRQF_TRIGGER_RISING, ldo->name, 326 IRQF_TRIGGER_RISING, ldo->name,
327 ldo); 327 ldo);
@@ -347,7 +347,8 @@ static __devexit int wm831x_gp_ldo_remove(struct platform_device *pdev)
347 347
348 platform_set_drvdata(pdev, NULL); 348 platform_set_drvdata(pdev, NULL);
349 349
350 free_irq(platform_get_irq_byname(pdev, "UV"), ldo); 350 free_irq(wm831x_irq(ldo->wm831x,
351 platform_get_irq_byname(pdev, "UV")), ldo);
351 regulator_unregister(ldo->regulator); 352 regulator_unregister(ldo->regulator);
352 353
353 return 0; 354 return 0;
@@ -582,7 +583,7 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
582 goto err; 583 goto err;
583 } 584 }
584 585
585 irq = platform_get_irq_byname(pdev, "UV"); 586 irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
586 ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq, 587 ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
587 IRQF_TRIGGER_RISING, ldo->name, ldo); 588 IRQF_TRIGGER_RISING, ldo->name, ldo);
588 if (ret != 0) { 589 if (ret != 0) {
@@ -605,7 +606,8 @@ static __devexit int wm831x_aldo_remove(struct platform_device *pdev)
605{ 606{
606 struct wm831x_ldo *ldo = platform_get_drvdata(pdev); 607 struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
607 608
608 free_irq(platform_get_irq_byname(pdev, "UV"), ldo); 609 free_irq(wm831x_irq(ldo->wm831x, platform_get_irq_byname(pdev, "UV")),
610 ldo);
609 regulator_unregister(ldo->regulator); 611 regulator_unregister(ldo->regulator);
610 612
611 return 0; 613 return 0;
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 3b6e6a67e765..59c6245e0421 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -396,7 +396,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
396{ 396{
397 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 397 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
398 struct wm831x_rtc *wm831x_rtc; 398 struct wm831x_rtc *wm831x_rtc;
399 int alm_irq = platform_get_irq_byname(pdev, "ALM"); 399 int alm_irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "ALM"));
400 int ret = 0; 400 int ret = 0;
401 401
402 wm831x_rtc = devm_kzalloc(&pdev->dev, sizeof(*wm831x_rtc), GFP_KERNEL); 402 wm831x_rtc = devm_kzalloc(&pdev->dev, sizeof(*wm831x_rtc), GFP_KERNEL);
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 4511420849bc..e84dbecd0991 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/file.h> 20#include <linux/file.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/falloc.h>
22#include <linux/miscdevice.h> 23#include <linux/miscdevice.h>
23#include <linux/security.h> 24#include <linux/security.h>
24#include <linux/mm.h> 25#include <linux/mm.h>
@@ -363,11 +364,12 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
363 364
364 mutex_lock(&ashmem_mutex); 365 mutex_lock(&ashmem_mutex);
365 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { 366 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
366 struct inode *inode = range->asma->file->f_dentry->d_inode;
367 loff_t start = range->pgstart * PAGE_SIZE; 367 loff_t start = range->pgstart * PAGE_SIZE;
368 loff_t end = (range->pgend + 1) * PAGE_SIZE - 1; 368 loff_t end = (range->pgend + 1) * PAGE_SIZE;
369 369
370 vmtruncate_range(inode, start, end); 370 do_fallocate(range->asma->file,
371 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
372 start, end - start);
371 range->purged = ASHMEM_WAS_PURGED; 373 range->purged = ASHMEM_WAS_PURGED;
372 lru_del(range); 374 lru_del(range);
373 375
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 59af3945ea85..65c7c62c7aae 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -633,7 +633,6 @@ static int ptmx_open(struct inode *inode, struct file *filp)
633 mutex_unlock(&devpts_mutex); 633 mutex_unlock(&devpts_mutex);
634 634
635 mutex_lock(&tty_mutex); 635 mutex_lock(&tty_mutex);
636 mutex_lock(&devpts_mutex);
637 tty = tty_init_dev(ptm_driver, index); 636 tty = tty_init_dev(ptm_driver, index);
638 637
639 if (IS_ERR(tty)) { 638 if (IS_ERR(tty)) {
@@ -643,7 +642,6 @@ static int ptmx_open(struct inode *inode, struct file *filp)
643 642
644 /* The tty returned here is locked so we can safely 643 /* The tty returned here is locked so we can safely
645 drop the mutex */ 644 drop the mutex */
646 mutex_unlock(&devpts_mutex);
647 mutex_unlock(&tty_mutex); 645 mutex_unlock(&tty_mutex);
648 646
649 set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ 647 set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 173a9000a6cb..ba8be396a621 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -894,6 +894,23 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
894 tty_ldisc_enable(tty); 894 tty_ldisc_enable(tty);
895 return 0; 895 return 0;
896} 896}
897
898static void tty_ldisc_kill(struct tty_struct *tty)
899{
900 mutex_lock(&tty->ldisc_mutex);
901 /*
902 * Now kill off the ldisc
903 */
904 tty_ldisc_close(tty, tty->ldisc);
905 tty_ldisc_put(tty->ldisc);
906 /* Force an oops if we mess this up */
907 tty->ldisc = NULL;
908
909 /* Ensure the next open requests the N_TTY ldisc */
910 tty_set_termios_ldisc(tty, N_TTY);
911 mutex_unlock(&tty->ldisc_mutex);
912}
913
897/** 914/**
898 * tty_ldisc_release - release line discipline 915 * tty_ldisc_release - release line discipline
899 * @tty: tty being shut down 916 * @tty: tty being shut down
@@ -912,27 +929,19 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
912 * race with the set_ldisc code path. 929 * race with the set_ldisc code path.
913 */ 930 */
914 931
915 tty_unlock(tty); 932 tty_unlock_pair(tty, o_tty);
916 tty_ldisc_halt(tty); 933 tty_ldisc_halt(tty);
917 tty_ldisc_flush_works(tty); 934 tty_ldisc_flush_works(tty);
918 tty_lock(tty); 935 if (o_tty) {
919 936 tty_ldisc_halt(o_tty);
920 mutex_lock(&tty->ldisc_mutex); 937 tty_ldisc_flush_works(o_tty);
921 /* 938 }
922 * Now kill off the ldisc 939 tty_lock_pair(tty, o_tty);
923 */
924 tty_ldisc_close(tty, tty->ldisc);
925 tty_ldisc_put(tty->ldisc);
926 /* Force an oops if we mess this up */
927 tty->ldisc = NULL;
928 940
929 /* Ensure the next open requests the N_TTY ldisc */
930 tty_set_termios_ldisc(tty, N_TTY);
931 mutex_unlock(&tty->ldisc_mutex);
932 941
933 /* This will need doing differently if we need to lock */ 942 tty_ldisc_kill(tty);
934 if (o_tty) 943 if (o_tty)
935 tty_ldisc_release(o_tty, NULL); 944 tty_ldisc_kill(o_tty);
936 945
937 /* And the memory resources remaining (buffers, termios) will be 946 /* And the memory resources remaining (buffers, termios) will be
938 disposed of when the kref hits zero */ 947 disposed of when the kref hits zero */
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index a18bf6358eb8..d92d7488be16 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -565,6 +565,7 @@ config INTEL_SCU_WATCHDOG
565config ITCO_WDT 565config ITCO_WDT
566 tristate "Intel TCO Timer/Watchdog" 566 tristate "Intel TCO Timer/Watchdog"
567 depends on (X86 || IA64) && PCI 567 depends on (X86 || IA64) && PCI
568 select LPC_ICH
568 ---help--- 569 ---help---
569 Hardware driver for the intel TCO timer based watchdog devices. 570 Hardware driver for the intel TCO timer based watchdog devices.
570 These drivers are included in the Intel 82801 I/O Controller 571 These drivers are included in the Intel 82801 I/O Controller
diff --git a/drivers/watchdog/iTCO_vendor.h b/drivers/watchdog/iTCO_vendor.h
index 9e27e6422f66..3c57b45537a2 100644
--- a/drivers/watchdog/iTCO_vendor.h
+++ b/drivers/watchdog/iTCO_vendor.h
@@ -1,8 +1,8 @@
1/* iTCO Vendor Specific Support hooks */ 1/* iTCO Vendor Specific Support hooks */
2#ifdef CONFIG_ITCO_VENDOR_SUPPORT 2#ifdef CONFIG_ITCO_VENDOR_SUPPORT
3extern void iTCO_vendor_pre_start(unsigned long, unsigned int); 3extern void iTCO_vendor_pre_start(struct resource *, unsigned int);
4extern void iTCO_vendor_pre_stop(unsigned long); 4extern void iTCO_vendor_pre_stop(struct resource *);
5extern void iTCO_vendor_pre_keepalive(unsigned long, unsigned int); 5extern void iTCO_vendor_pre_keepalive(struct resource *, unsigned int);
6extern void iTCO_vendor_pre_set_heartbeat(unsigned int); 6extern void iTCO_vendor_pre_set_heartbeat(unsigned int);
7extern int iTCO_vendor_check_noreboot_on(void); 7extern int iTCO_vendor_check_noreboot_on(void);
8#else 8#else
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 2721d29ce243..b6b2f90b5d44 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -35,11 +35,6 @@
35 35
36#include "iTCO_vendor.h" 36#include "iTCO_vendor.h"
37 37
38/* iTCO defines */
39#define SMI_EN (acpibase + 0x30) /* SMI Control and Enable Register */
40#define TCOBASE (acpibase + 0x60) /* TCO base address */
41#define TCO1_STS (TCOBASE + 0x04) /* TCO1 Status Register */
42
43/* List of vendor support modes */ 38/* List of vendor support modes */
44/* SuperMicro Pentium 3 Era 370SSE+-OEM1/P3TSSE */ 39/* SuperMicro Pentium 3 Era 370SSE+-OEM1/P3TSSE */
45#define SUPERMICRO_OLD_BOARD 1 40#define SUPERMICRO_OLD_BOARD 1
@@ -82,24 +77,24 @@ MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
82 * 20.6 seconds. 77 * 20.6 seconds.
83 */ 78 */
84 79
85static void supermicro_old_pre_start(unsigned long acpibase) 80static void supermicro_old_pre_start(struct resource *smires)
86{ 81{
87 unsigned long val32; 82 unsigned long val32;
88 83
89 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ 84 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
90 val32 = inl(SMI_EN); 85 val32 = inl(smires->start);
91 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ 86 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
92 outl(val32, SMI_EN); /* Needed to activate watchdog */ 87 outl(val32, smires->start); /* Needed to activate watchdog */
93} 88}
94 89
95static void supermicro_old_pre_stop(unsigned long acpibase) 90static void supermicro_old_pre_stop(struct resource *smires)
96{ 91{
97 unsigned long val32; 92 unsigned long val32;
98 93
99 /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */ 94 /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */
100 val32 = inl(SMI_EN); 95 val32 = inl(smires->start);
101 val32 |= 0x00002000; /* Turn on SMI clearing watchdog */ 96 val32 |= 0x00002000; /* Turn on SMI clearing watchdog */
102 outl(val32, SMI_EN); /* Needed to deactivate watchdog */ 97 outl(val32, smires->start); /* Needed to deactivate watchdog */
103} 98}
104 99
105/* 100/*
@@ -270,66 +265,66 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
270 * Don't use this fix if you don't need to!!! 265 * Don't use this fix if you don't need to!!!
271 */ 266 */
272 267
273static void broken_bios_start(unsigned long acpibase) 268static void broken_bios_start(struct resource *smires)
274{ 269{
275 unsigned long val32; 270 unsigned long val32;
276 271
277 val32 = inl(SMI_EN); 272 val32 = inl(smires->start);
278 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# 273 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI#
279 Bit 0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */ 274 Bit 0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */
280 val32 &= 0xffffdffe; 275 val32 &= 0xffffdffe;
281 outl(val32, SMI_EN); 276 outl(val32, smires->start);
282} 277}
283 278
284static void broken_bios_stop(unsigned long acpibase) 279static void broken_bios_stop(struct resource *smires)
285{ 280{
286 unsigned long val32; 281 unsigned long val32;
287 282
288 val32 = inl(SMI_EN); 283 val32 = inl(smires->start);
289 /* Bit 13: TCO_EN -> 1 = Enables TCO logic generating an SMI# 284 /* Bit 13: TCO_EN -> 1 = Enables TCO logic generating an SMI#
290 Bit 0: GBL_SMI_EN -> 1 = Turn global SMI on again. */ 285 Bit 0: GBL_SMI_EN -> 1 = Turn global SMI on again. */
291 val32 |= 0x00002001; 286 val32 |= 0x00002001;
292 outl(val32, SMI_EN); 287 outl(val32, smires->start);
293} 288}
294 289
295/* 290/*
296 * Generic Support Functions 291 * Generic Support Functions
297 */ 292 */
298 293
299void iTCO_vendor_pre_start(unsigned long acpibase, 294void iTCO_vendor_pre_start(struct resource *smires,
300 unsigned int heartbeat) 295 unsigned int heartbeat)
301{ 296{
302 switch (vendorsupport) { 297 switch (vendorsupport) {
303 case SUPERMICRO_OLD_BOARD: 298 case SUPERMICRO_OLD_BOARD:
304 supermicro_old_pre_start(acpibase); 299 supermicro_old_pre_start(smires);
305 break; 300 break;
306 case SUPERMICRO_NEW_BOARD: 301 case SUPERMICRO_NEW_BOARD:
307 supermicro_new_pre_start(heartbeat); 302 supermicro_new_pre_start(heartbeat);
308 break; 303 break;
309 case BROKEN_BIOS: 304 case BROKEN_BIOS:
310 broken_bios_start(acpibase); 305 broken_bios_start(smires);
311 break; 306 break;
312 } 307 }
313} 308}
314EXPORT_SYMBOL(iTCO_vendor_pre_start); 309EXPORT_SYMBOL(iTCO_vendor_pre_start);
315 310
316void iTCO_vendor_pre_stop(unsigned long acpibase) 311void iTCO_vendor_pre_stop(struct resource *smires)
317{ 312{
318 switch (vendorsupport) { 313 switch (vendorsupport) {
319 case SUPERMICRO_OLD_BOARD: 314 case SUPERMICRO_OLD_BOARD:
320 supermicro_old_pre_stop(acpibase); 315 supermicro_old_pre_stop(smires);
321 break; 316 break;
322 case SUPERMICRO_NEW_BOARD: 317 case SUPERMICRO_NEW_BOARD:
323 supermicro_new_pre_stop(); 318 supermicro_new_pre_stop();
324 break; 319 break;
325 case BROKEN_BIOS: 320 case BROKEN_BIOS:
326 broken_bios_stop(acpibase); 321 broken_bios_stop(smires);
327 break; 322 break;
328 } 323 }
329} 324}
330EXPORT_SYMBOL(iTCO_vendor_pre_stop); 325EXPORT_SYMBOL(iTCO_vendor_pre_stop);
331 326
332void iTCO_vendor_pre_keepalive(unsigned long acpibase, unsigned int heartbeat) 327void iTCO_vendor_pre_keepalive(struct resource *smires, unsigned int heartbeat)
333{ 328{
334 if (vendorsupport == SUPERMICRO_NEW_BOARD) 329 if (vendorsupport == SUPERMICRO_NEW_BOARD)
335 supermicro_new_pre_set_heartbeat(heartbeat); 330 supermicro_new_pre_set_heartbeat(heartbeat);
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 9fecb95645a3..741528b032e2 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -66,316 +66,16 @@
66#include <linux/spinlock.h> /* For spin_lock/spin_unlock/... */ 66#include <linux/spinlock.h> /* For spin_lock/spin_unlock/... */
67#include <linux/uaccess.h> /* For copy_to_user/put_user/... */ 67#include <linux/uaccess.h> /* For copy_to_user/put_user/... */
68#include <linux/io.h> /* For inb/outb/... */ 68#include <linux/io.h> /* For inb/outb/... */
69#include <linux/mfd/core.h>
70#include <linux/mfd/lpc_ich.h>
69 71
70#include "iTCO_vendor.h" 72#include "iTCO_vendor.h"
71 73
72/* TCO related info */
73enum iTCO_chipsets {
74 TCO_ICH = 0, /* ICH */
75 TCO_ICH0, /* ICH0 */
76 TCO_ICH2, /* ICH2 */
77 TCO_ICH2M, /* ICH2-M */
78 TCO_ICH3, /* ICH3-S */
79 TCO_ICH3M, /* ICH3-M */
80 TCO_ICH4, /* ICH4 */
81 TCO_ICH4M, /* ICH4-M */
82 TCO_CICH, /* C-ICH */
83 TCO_ICH5, /* ICH5 & ICH5R */
84 TCO_6300ESB, /* 6300ESB */
85 TCO_ICH6, /* ICH6 & ICH6R */
86 TCO_ICH6M, /* ICH6-M */
87 TCO_ICH6W, /* ICH6W & ICH6RW */
88 TCO_631XESB, /* 631xESB/632xESB */
89 TCO_ICH7, /* ICH7 & ICH7R */
90 TCO_ICH7DH, /* ICH7DH */
91 TCO_ICH7M, /* ICH7-M & ICH7-U */
92 TCO_ICH7MDH, /* ICH7-M DH */
93 TCO_NM10, /* NM10 */
94 TCO_ICH8, /* ICH8 & ICH8R */
95 TCO_ICH8DH, /* ICH8DH */
96 TCO_ICH8DO, /* ICH8DO */
97 TCO_ICH8M, /* ICH8M */
98 TCO_ICH8ME, /* ICH8M-E */
99 TCO_ICH9, /* ICH9 */
100 TCO_ICH9R, /* ICH9R */
101 TCO_ICH9DH, /* ICH9DH */
102 TCO_ICH9DO, /* ICH9DO */
103 TCO_ICH9M, /* ICH9M */
104 TCO_ICH9ME, /* ICH9M-E */
105 TCO_ICH10, /* ICH10 */
106 TCO_ICH10R, /* ICH10R */
107 TCO_ICH10D, /* ICH10D */
108 TCO_ICH10DO, /* ICH10DO */
109 TCO_PCH, /* PCH Desktop Full Featured */
110 TCO_PCHM, /* PCH Mobile Full Featured */
111 TCO_P55, /* P55 */
112 TCO_PM55, /* PM55 */
113 TCO_H55, /* H55 */
114 TCO_QM57, /* QM57 */
115 TCO_H57, /* H57 */
116 TCO_HM55, /* HM55 */
117 TCO_Q57, /* Q57 */
118 TCO_HM57, /* HM57 */
119 TCO_PCHMSFF, /* PCH Mobile SFF Full Featured */
120 TCO_QS57, /* QS57 */
121 TCO_3400, /* 3400 */
122 TCO_3420, /* 3420 */
123 TCO_3450, /* 3450 */
124 TCO_EP80579, /* EP80579 */
125 TCO_CPT, /* Cougar Point */
126 TCO_CPTD, /* Cougar Point Desktop */
127 TCO_CPTM, /* Cougar Point Mobile */
128 TCO_PBG, /* Patsburg */
129 TCO_DH89XXCC, /* DH89xxCC */
130 TCO_PPT, /* Panther Point */
131 TCO_LPT, /* Lynx Point */
132};
133
134static struct {
135 char *name;
136 unsigned int iTCO_version;
137} iTCO_chipset_info[] __devinitdata = {
138 {"ICH", 1},
139 {"ICH0", 1},
140 {"ICH2", 1},
141 {"ICH2-M", 1},
142 {"ICH3-S", 1},
143 {"ICH3-M", 1},
144 {"ICH4", 1},
145 {"ICH4-M", 1},
146 {"C-ICH", 1},
147 {"ICH5 or ICH5R", 1},
148 {"6300ESB", 1},
149 {"ICH6 or ICH6R", 2},
150 {"ICH6-M", 2},
151 {"ICH6W or ICH6RW", 2},
152 {"631xESB/632xESB", 2},
153 {"ICH7 or ICH7R", 2},
154 {"ICH7DH", 2},
155 {"ICH7-M or ICH7-U", 2},
156 {"ICH7-M DH", 2},
157 {"NM10", 2},
158 {"ICH8 or ICH8R", 2},
159 {"ICH8DH", 2},
160 {"ICH8DO", 2},
161 {"ICH8M", 2},
162 {"ICH8M-E", 2},
163 {"ICH9", 2},
164 {"ICH9R", 2},
165 {"ICH9DH", 2},
166 {"ICH9DO", 2},
167 {"ICH9M", 2},
168 {"ICH9M-E", 2},
169 {"ICH10", 2},
170 {"ICH10R", 2},
171 {"ICH10D", 2},
172 {"ICH10DO", 2},
173 {"PCH Desktop Full Featured", 2},
174 {"PCH Mobile Full Featured", 2},
175 {"P55", 2},
176 {"PM55", 2},
177 {"H55", 2},
178 {"QM57", 2},
179 {"H57", 2},
180 {"HM55", 2},
181 {"Q57", 2},
182 {"HM57", 2},
183 {"PCH Mobile SFF Full Featured", 2},
184 {"QS57", 2},
185 {"3400", 2},
186 {"3420", 2},
187 {"3450", 2},
188 {"EP80579", 2},
189 {"Cougar Point", 2},
190 {"Cougar Point Desktop", 2},
191 {"Cougar Point Mobile", 2},
192 {"Patsburg", 2},
193 {"DH89xxCC", 2},
194 {"Panther Point", 2},
195 {"Lynx Point", 2},
196 {NULL, 0}
197};
198
199/*
200 * This data only exists for exporting the supported PCI ids
201 * via MODULE_DEVICE_TABLE. We do not actually register a
202 * pci_driver, because the I/O Controller Hub has also other
203 * functions that probably will be registered by other drivers.
204 */
205static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
206 { PCI_VDEVICE(INTEL, 0x2410), TCO_ICH},
207 { PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0},
208 { PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2},
209 { PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M},
210 { PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3},
211 { PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M},
212 { PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4},
213 { PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M},
214 { PCI_VDEVICE(INTEL, 0x2450), TCO_CICH},
215 { PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5},
216 { PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB},
217 { PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6},
218 { PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M},
219 { PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W},
220 { PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB},
221 { PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB},
222 { PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB},
223 { PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB},
224 { PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB},
225 { PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB},
226 { PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB},
227 { PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB},
228 { PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB},
229 { PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB},
230 { PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB},
231 { PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB},
232 { PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB},
233 { PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB},
234 { PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB},
235 { PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB},
236 { PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7},
237 { PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH},
238 { PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M},
239 { PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH},
240 { PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10},
241 { PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8},
242 { PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH},
243 { PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO},
244 { PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M},
245 { PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME},
246 { PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9},
247 { PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R},
248 { PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH},
249 { PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO},
250 { PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M},
251 { PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME},
252 { PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10},
253 { PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R},
254 { PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D},
255 { PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO},
256 { PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH},
257 { PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM},
258 { PCI_VDEVICE(INTEL, 0x3b02), TCO_P55},
259 { PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55},
260 { PCI_VDEVICE(INTEL, 0x3b06), TCO_H55},
261 { PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57},
262 { PCI_VDEVICE(INTEL, 0x3b08), TCO_H57},
263 { PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55},
264 { PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57},
265 { PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57},
266 { PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF},
267 { PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57},
268 { PCI_VDEVICE(INTEL, 0x3b12), TCO_3400},
269 { PCI_VDEVICE(INTEL, 0x3b14), TCO_3420},
270 { PCI_VDEVICE(INTEL, 0x3b16), TCO_3450},
271 { PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579},
272 { PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT},
273 { PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD},
274 { PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM},
275 { PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT},
276 { PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT},
277 { PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT},
278 { PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT},
279 { PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT},
280 { PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT},
281 { PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT},
282 { PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT},
283 { PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT},
284 { PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT},
285 { PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT},
286 { PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT},
287 { PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT},
288 { PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT},
289 { PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT},
290 { PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT},
291 { PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT},
292 { PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT},
293 { PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT},
294 { PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT},
295 { PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT},
296 { PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT},
297 { PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT},
298 { PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT},
299 { PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT},
300 { PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT},
301 { PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT},
302 { PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT},
303 { PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG},
304 { PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG},
305 { PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC},
306 { PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT},
307 { PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT},
308 { PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT},
309 { PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT},
310 { PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT},
311 { PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT},
312 { PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT},
313 { PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT},
314 { PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT},
315 { PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT},
316 { PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT},
317 { PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT},
318 { PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT},
319 { PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT},
320 { PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT},
321 { PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT},
322 { PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT},
323 { PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT},
324 { PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT},
325 { PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT},
326 { PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT},
327 { PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT},
328 { PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT},
329 { PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT},
330 { PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT},
331 { PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT},
332 { PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT},
333 { PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT},
334 { PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT},
335 { PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT},
336 { PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT},
337 { PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT},
338 { PCI_VDEVICE(INTEL, 0x8c40), TCO_LPT},
339 { PCI_VDEVICE(INTEL, 0x8c41), TCO_LPT},
340 { PCI_VDEVICE(INTEL, 0x8c42), TCO_LPT},
341 { PCI_VDEVICE(INTEL, 0x8c43), TCO_LPT},
342 { PCI_VDEVICE(INTEL, 0x8c44), TCO_LPT},
343 { PCI_VDEVICE(INTEL, 0x8c45), TCO_LPT},
344 { PCI_VDEVICE(INTEL, 0x8c46), TCO_LPT},
345 { PCI_VDEVICE(INTEL, 0x8c47), TCO_LPT},
346 { PCI_VDEVICE(INTEL, 0x8c48), TCO_LPT},
347 { PCI_VDEVICE(INTEL, 0x8c49), TCO_LPT},
348 { PCI_VDEVICE(INTEL, 0x8c4a), TCO_LPT},
349 { PCI_VDEVICE(INTEL, 0x8c4b), TCO_LPT},
350 { PCI_VDEVICE(INTEL, 0x8c4c), TCO_LPT},
351 { PCI_VDEVICE(INTEL, 0x8c4d), TCO_LPT},
352 { PCI_VDEVICE(INTEL, 0x8c4e), TCO_LPT},
353 { PCI_VDEVICE(INTEL, 0x8c4f), TCO_LPT},
354 { PCI_VDEVICE(INTEL, 0x8c50), TCO_LPT},
355 { PCI_VDEVICE(INTEL, 0x8c51), TCO_LPT},
356 { PCI_VDEVICE(INTEL, 0x8c52), TCO_LPT},
357 { PCI_VDEVICE(INTEL, 0x8c53), TCO_LPT},
358 { PCI_VDEVICE(INTEL, 0x8c54), TCO_LPT},
359 { PCI_VDEVICE(INTEL, 0x8c55), TCO_LPT},
360 { PCI_VDEVICE(INTEL, 0x8c56), TCO_LPT},
361 { PCI_VDEVICE(INTEL, 0x8c57), TCO_LPT},
362 { PCI_VDEVICE(INTEL, 0x8c58), TCO_LPT},
363 { PCI_VDEVICE(INTEL, 0x8c59), TCO_LPT},
364 { PCI_VDEVICE(INTEL, 0x8c5a), TCO_LPT},
365 { PCI_VDEVICE(INTEL, 0x8c5b), TCO_LPT},
366 { PCI_VDEVICE(INTEL, 0x8c5c), TCO_LPT},
367 { PCI_VDEVICE(INTEL, 0x8c5d), TCO_LPT},
368 { PCI_VDEVICE(INTEL, 0x8c5e), TCO_LPT},
369 { PCI_VDEVICE(INTEL, 0x8c5f), TCO_LPT},
370 { 0, }, /* End of list */
371};
372MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
373
374/* Address definitions for the TCO */ 74/* Address definitions for the TCO */
375/* TCO base address */ 75/* TCO base address */
376#define TCOBASE (iTCO_wdt_private.ACPIBASE + 0x60) 76#define TCOBASE (iTCO_wdt_private.tco_res->start)
377/* SMI Control and Enable Register */ 77/* SMI Control and Enable Register */
378#define SMI_EN (iTCO_wdt_private.ACPIBASE + 0x30) 78#define SMI_EN (iTCO_wdt_private.smi_res->start)
379 79
380#define TCO_RLD (TCOBASE + 0x00) /* TCO Timer Reload and Curr. Value */ 80#define TCO_RLD (TCOBASE + 0x00) /* TCO Timer Reload and Curr. Value */
381#define TCOv1_TMR (TCOBASE + 0x01) /* TCOv1 Timer Initial Value */ 81#define TCOv1_TMR (TCOBASE + 0x01) /* TCOv1 Timer Initial Value */
@@ -393,19 +93,18 @@ static char expect_release;
393static struct { /* this is private data for the iTCO_wdt device */ 93static struct { /* this is private data for the iTCO_wdt device */
394 /* TCO version/generation */ 94 /* TCO version/generation */
395 unsigned int iTCO_version; 95 unsigned int iTCO_version;
396 /* The device's ACPIBASE address (TCOBASE = ACPIBASE+0x60) */ 96 struct resource *tco_res;
397 unsigned long ACPIBASE; 97 struct resource *smi_res;
98 struct resource *gcs_res;
398 /* NO_REBOOT flag is Memory-Mapped GCS register bit 5 (TCO version 2)*/ 99 /* NO_REBOOT flag is Memory-Mapped GCS register bit 5 (TCO version 2)*/
399 unsigned long __iomem *gcs; 100 unsigned long __iomem *gcs;
400 /* the lock for io operations */ 101 /* the lock for io operations */
401 spinlock_t io_lock; 102 spinlock_t io_lock;
103 struct platform_device *dev;
402 /* the PCI-device */ 104 /* the PCI-device */
403 struct pci_dev *pdev; 105 struct pci_dev *pdev;
404} iTCO_wdt_private; 106} iTCO_wdt_private;
405 107
406/* the watchdog platform device */
407static struct platform_device *iTCO_wdt_platform_device;
408
409/* module parameters */ 108/* module parameters */
410#define WATCHDOG_HEARTBEAT 30 /* 30 sec default heartbeat */ 109#define WATCHDOG_HEARTBEAT 30 /* 30 sec default heartbeat */
411static int heartbeat = WATCHDOG_HEARTBEAT; /* in seconds */ 110static int heartbeat = WATCHDOG_HEARTBEAT; /* in seconds */
@@ -485,7 +184,7 @@ static int iTCO_wdt_start(void)
485 184
486 spin_lock(&iTCO_wdt_private.io_lock); 185 spin_lock(&iTCO_wdt_private.io_lock);
487 186
488 iTCO_vendor_pre_start(iTCO_wdt_private.ACPIBASE, heartbeat); 187 iTCO_vendor_pre_start(iTCO_wdt_private.smi_res, heartbeat);
489 188
490 /* disable chipset's NO_REBOOT bit */ 189 /* disable chipset's NO_REBOOT bit */
491 if (iTCO_wdt_unset_NO_REBOOT_bit()) { 190 if (iTCO_wdt_unset_NO_REBOOT_bit()) {
@@ -519,7 +218,7 @@ static int iTCO_wdt_stop(void)
519 218
520 spin_lock(&iTCO_wdt_private.io_lock); 219 spin_lock(&iTCO_wdt_private.io_lock);
521 220
522 iTCO_vendor_pre_stop(iTCO_wdt_private.ACPIBASE); 221 iTCO_vendor_pre_stop(iTCO_wdt_private.smi_res);
523 222
524 /* Bit 11: TCO Timer Halt -> 1 = The TCO timer is disabled */ 223 /* Bit 11: TCO Timer Halt -> 1 = The TCO timer is disabled */
525 val = inw(TCO1_CNT); 224 val = inw(TCO1_CNT);
@@ -541,7 +240,7 @@ static int iTCO_wdt_keepalive(void)
541{ 240{
542 spin_lock(&iTCO_wdt_private.io_lock); 241 spin_lock(&iTCO_wdt_private.io_lock);
543 242
544 iTCO_vendor_pre_keepalive(iTCO_wdt_private.ACPIBASE, heartbeat); 243 iTCO_vendor_pre_keepalive(iTCO_wdt_private.smi_res, heartbeat);
545 244
546 /* Reload the timer by writing to the TCO Timer Counter register */ 245 /* Reload the timer by writing to the TCO Timer Counter register */
547 if (iTCO_wdt_private.iTCO_version == 2) 246 if (iTCO_wdt_private.iTCO_version == 2)
@@ -786,83 +485,120 @@ static struct miscdevice iTCO_wdt_miscdev = {
786 * Init & exit routines 485 * Init & exit routines
787 */ 486 */
788 487
789static int __devinit iTCO_wdt_init(struct pci_dev *pdev, 488static void __devexit iTCO_wdt_cleanup(void)
790 const struct pci_device_id *ent, struct platform_device *dev) 489{
490 /* Stop the timer before we leave */
491 if (!nowayout)
492 iTCO_wdt_stop();
493
494 /* Deregister */
495 misc_deregister(&iTCO_wdt_miscdev);
496
497 /* release resources */
498 release_region(iTCO_wdt_private.tco_res->start,
499 resource_size(iTCO_wdt_private.tco_res));
500 release_region(iTCO_wdt_private.smi_res->start,
501 resource_size(iTCO_wdt_private.smi_res));
502 if (iTCO_wdt_private.iTCO_version == 2) {
503 iounmap(iTCO_wdt_private.gcs);
504 release_mem_region(iTCO_wdt_private.gcs_res->start,
505 resource_size(iTCO_wdt_private.gcs_res));
506 }
507
508 iTCO_wdt_private.tco_res = NULL;
509 iTCO_wdt_private.smi_res = NULL;
510 iTCO_wdt_private.gcs_res = NULL;
511 iTCO_wdt_private.gcs = NULL;
512}
513
514static int __devinit iTCO_wdt_probe(struct platform_device *dev)
791{ 515{
792 int ret; 516 int ret = -ENODEV;
793 u32 base_address;
794 unsigned long RCBA;
795 unsigned long val32; 517 unsigned long val32;
518 struct lpc_ich_info *ich_info = dev->dev.platform_data;
519
520 if (!ich_info)
521 goto out;
522
523 spin_lock_init(&iTCO_wdt_private.io_lock);
524
525 iTCO_wdt_private.tco_res =
526 platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_TCO);
527 if (!iTCO_wdt_private.tco_res)
528 goto out;
529
530 iTCO_wdt_private.smi_res =
531 platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_SMI);
532 if (!iTCO_wdt_private.smi_res)
533 goto out;
534
535 iTCO_wdt_private.iTCO_version = ich_info->iTCO_version;
536 iTCO_wdt_private.dev = dev;
537 iTCO_wdt_private.pdev = to_pci_dev(dev->dev.parent);
796 538
797 /* 539 /*
798 * Find the ACPI/PM base I/O address which is the base 540 * Get the Memory-Mapped GCS register, we need it for the
799 * for the TCO registers (TCOBASE=ACPIBASE + 0x60) 541 * NO_REBOOT flag (TCO v2).
800 * ACPIBASE is bits [15:7] from 0x40-0x43
801 */ 542 */
802 pci_read_config_dword(pdev, 0x40, &base_address);
803 base_address &= 0x0000ff80;
804 if (base_address == 0x00000000) {
805 /* Something's wrong here, ACPIBASE has to be set */
806 pr_err("failed to get TCOBASE address, device disabled by hardware/BIOS\n");
807 return -ENODEV;
808 }
809 iTCO_wdt_private.iTCO_version =
810 iTCO_chipset_info[ent->driver_data].iTCO_version;
811 iTCO_wdt_private.ACPIBASE = base_address;
812 iTCO_wdt_private.pdev = pdev;
813
814 /* Get the Memory-Mapped GCS register, we need it for the
815 NO_REBOOT flag (TCO v2). To get access to it you have to
816 read RCBA from PCI Config space 0xf0 and use it as base.
817 GCS = RCBA + ICH6_GCS(0x3410). */
818 if (iTCO_wdt_private.iTCO_version == 2) { 543 if (iTCO_wdt_private.iTCO_version == 2) {
819 pci_read_config_dword(pdev, 0xf0, &base_address); 544 iTCO_wdt_private.gcs_res = platform_get_resource(dev,
820 if ((base_address & 1) == 0) { 545 IORESOURCE_MEM,
821 pr_err("RCBA is disabled by hardware/BIOS, device disabled\n"); 546 ICH_RES_MEM_GCS);
822 ret = -ENODEV; 547
548 if (!iTCO_wdt_private.gcs_res)
549 goto out;
550
551 if (!request_mem_region(iTCO_wdt_private.gcs_res->start,
552 resource_size(iTCO_wdt_private.gcs_res), dev->name)) {
553 ret = -EBUSY;
823 goto out; 554 goto out;
824 } 555 }
825 RCBA = base_address & 0xffffc000; 556 iTCO_wdt_private.gcs = ioremap(iTCO_wdt_private.gcs_res->start,
826 iTCO_wdt_private.gcs = ioremap((RCBA + 0x3410), 4); 557 resource_size(iTCO_wdt_private.gcs_res));
558 if (!iTCO_wdt_private.gcs) {
559 ret = -EIO;
560 goto unreg_gcs;
561 }
827 } 562 }
828 563
829 /* Check chipset's NO_REBOOT bit */ 564 /* Check chipset's NO_REBOOT bit */
830 if (iTCO_wdt_unset_NO_REBOOT_bit() && iTCO_vendor_check_noreboot_on()) { 565 if (iTCO_wdt_unset_NO_REBOOT_bit() && iTCO_vendor_check_noreboot_on()) {
831 pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); 566 pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n");
832 ret = -ENODEV; /* Cannot reset NO_REBOOT bit */ 567 ret = -ENODEV; /* Cannot reset NO_REBOOT bit */
833 goto out_unmap; 568 goto unmap_gcs;
834 } 569 }
835 570
836 /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ 571 /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
837 iTCO_wdt_set_NO_REBOOT_bit(); 572 iTCO_wdt_set_NO_REBOOT_bit();
838 573
839 /* The TCO logic uses the TCO_EN bit in the SMI_EN register */ 574 /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
840 if (!request_region(SMI_EN, 4, "iTCO_wdt")) { 575 if (!request_region(iTCO_wdt_private.smi_res->start,
841 pr_err("I/O address 0x%04lx already in use, device disabled\n", 576 resource_size(iTCO_wdt_private.smi_res), dev->name)) {
577 pr_err("I/O address 0x%04llx already in use, device disabled\n",
842 SMI_EN); 578 SMI_EN);
843 ret = -EIO; 579 ret = -EBUSY;
844 goto out_unmap; 580 goto unmap_gcs;
845 } 581 }
846 if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) { 582 if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) {
847 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ 583 /*
584 * Bit 13: TCO_EN -> 0
585 * Disables TCO logic generating an SMI#
586 */
848 val32 = inl(SMI_EN); 587 val32 = inl(SMI_EN);
849 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ 588 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
850 outl(val32, SMI_EN); 589 outl(val32, SMI_EN);
851 } 590 }
852 591
853 /* The TCO I/O registers reside in a 32-byte range pointed to 592 if (!request_region(iTCO_wdt_private.tco_res->start,
854 by the TCOBASE value */ 593 resource_size(iTCO_wdt_private.tco_res), dev->name)) {
855 if (!request_region(TCOBASE, 0x20, "iTCO_wdt")) { 594 pr_err("I/O address 0x%04llx already in use, device disabled\n",
856 pr_err("I/O address 0x%04lx already in use, device disabled\n",
857 TCOBASE); 595 TCOBASE);
858 ret = -EIO; 596 ret = -EBUSY;
859 goto unreg_smi_en; 597 goto unreg_smi;
860 } 598 }
861 599
862 pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04lx)\n", 600 pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
863 iTCO_chipset_info[ent->driver_data].name, 601 ich_info->name, ich_info->iTCO_version, TCOBASE);
864 iTCO_chipset_info[ent->driver_data].iTCO_version,
865 TCOBASE);
866 602
867 /* Clear out the (probably old) status */ 603 /* Clear out the (probably old) status */
868 outw(0x0008, TCO1_STS); /* Clear the Time Out Status bit */ 604 outw(0x0008, TCO1_STS); /* Clear the Time Out Status bit */
@@ -883,7 +619,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
883 if (ret != 0) { 619 if (ret != 0) {
884 pr_err("cannot register miscdev on minor=%d (err=%d)\n", 620 pr_err("cannot register miscdev on minor=%d (err=%d)\n",
885 WATCHDOG_MINOR, ret); 621 WATCHDOG_MINOR, ret);
886 goto unreg_region; 622 goto unreg_tco;
887 } 623 }
888 624
889 pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n", 625 pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n",
@@ -891,62 +627,31 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
891 627
892 return 0; 628 return 0;
893 629
894unreg_region: 630unreg_tco:
895 release_region(TCOBASE, 0x20); 631 release_region(iTCO_wdt_private.tco_res->start,
896unreg_smi_en: 632 resource_size(iTCO_wdt_private.tco_res));
897 release_region(SMI_EN, 4); 633unreg_smi:
898out_unmap: 634 release_region(iTCO_wdt_private.smi_res->start,
635 resource_size(iTCO_wdt_private.smi_res));
636unmap_gcs:
899 if (iTCO_wdt_private.iTCO_version == 2) 637 if (iTCO_wdt_private.iTCO_version == 2)
900 iounmap(iTCO_wdt_private.gcs); 638 iounmap(iTCO_wdt_private.gcs);
901out: 639unreg_gcs:
902 iTCO_wdt_private.ACPIBASE = 0;
903 return ret;
904}
905
906static void __devexit iTCO_wdt_cleanup(void)
907{
908 /* Stop the timer before we leave */
909 if (!nowayout)
910 iTCO_wdt_stop();
911
912 /* Deregister */
913 misc_deregister(&iTCO_wdt_miscdev);
914 release_region(TCOBASE, 0x20);
915 release_region(SMI_EN, 4);
916 if (iTCO_wdt_private.iTCO_version == 2) 640 if (iTCO_wdt_private.iTCO_version == 2)
917 iounmap(iTCO_wdt_private.gcs); 641 release_mem_region(iTCO_wdt_private.gcs_res->start,
918 pci_dev_put(iTCO_wdt_private.pdev); 642 resource_size(iTCO_wdt_private.gcs_res));
919 iTCO_wdt_private.ACPIBASE = 0; 643out:
920} 644 iTCO_wdt_private.tco_res = NULL;
921 645 iTCO_wdt_private.smi_res = NULL;
922static int __devinit iTCO_wdt_probe(struct platform_device *dev) 646 iTCO_wdt_private.gcs_res = NULL;
923{ 647 iTCO_wdt_private.gcs = NULL;
924 int ret = -ENODEV;
925 int found = 0;
926 struct pci_dev *pdev = NULL;
927 const struct pci_device_id *ent;
928
929 spin_lock_init(&iTCO_wdt_private.io_lock);
930
931 for_each_pci_dev(pdev) {
932 ent = pci_match_id(iTCO_wdt_pci_tbl, pdev);
933 if (ent) {
934 found++;
935 ret = iTCO_wdt_init(pdev, ent, dev);
936 if (!ret)
937 break;
938 }
939 }
940
941 if (!found)
942 pr_info("No device detected\n");
943 648
944 return ret; 649 return ret;
945} 650}
946 651
947static int __devexit iTCO_wdt_remove(struct platform_device *dev) 652static int __devexit iTCO_wdt_remove(struct platform_device *dev)
948{ 653{
949 if (iTCO_wdt_private.ACPIBASE) 654 if (iTCO_wdt_private.tco_res || iTCO_wdt_private.smi_res)
950 iTCO_wdt_cleanup(); 655 iTCO_wdt_cleanup();
951 656
952 return 0; 657 return 0;
@@ -977,23 +682,11 @@ static int __init iTCO_wdt_init_module(void)
977 if (err) 682 if (err)
978 return err; 683 return err;
979 684
980 iTCO_wdt_platform_device = platform_device_register_simple(DRV_NAME,
981 -1, NULL, 0);
982 if (IS_ERR(iTCO_wdt_platform_device)) {
983 err = PTR_ERR(iTCO_wdt_platform_device);
984 goto unreg_platform_driver;
985 }
986
987 return 0; 685 return 0;
988
989unreg_platform_driver:
990 platform_driver_unregister(&iTCO_wdt_driver);
991 return err;
992} 686}
993 687
994static void __exit iTCO_wdt_cleanup_module(void) 688static void __exit iTCO_wdt_cleanup_module(void)
995{ 689{
996 platform_device_unregister(iTCO_wdt_platform_device);
997 platform_driver_unregister(&iTCO_wdt_driver); 690 platform_driver_unregister(&iTCO_wdt_driver);
998 pr_info("Watchdog Module Unloaded\n"); 691 pr_info("Watchdog Module Unloaded\n");
999} 692}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 37268c5bb98b..1b35d6bd06b0 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops =
292 .getxattr = bad_inode_getxattr, 292 .getxattr = bad_inode_getxattr,
293 .listxattr = bad_inode_listxattr, 293 .listxattr = bad_inode_listxattr,
294 .removexattr = bad_inode_removexattr, 294 .removexattr = bad_inode_removexattr,
295 /* truncate_range returns void */
296}; 295};
297 296
298 297
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 352ba149d23e..389ba8312d5d 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -16,5 +16,5 @@
16libore-y := ore.o ore_raid.o 16libore-y := ore.o ore_raid.o
17obj-$(CONFIG_ORE) += libore.o 17obj-$(CONFIG_ORE) += libore.o
18 18
19exofs-y := inode.o file.o symlink.o namei.o dir.o super.o 19exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o
20obj-$(CONFIG_EXOFS_FS) += exofs.o 20obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index ca9d49665ef6..fffe86fd7a42 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -56,6 +56,9 @@
56struct exofs_dev { 56struct exofs_dev {
57 struct ore_dev ored; 57 struct ore_dev ored;
58 unsigned did; 58 unsigned did;
59 unsigned urilen;
60 uint8_t *uri;
61 struct kobject ed_kobj;
59}; 62};
60/* 63/*
61 * our extension to the in-memory superblock 64 * our extension to the in-memory superblock
@@ -73,6 +76,7 @@ struct exofs_sb_info {
73 struct ore_layout layout; /* Default files layout */ 76 struct ore_layout layout; /* Default files layout */
74 struct ore_comp one_comp; /* id & cred of partition id=0*/ 77 struct ore_comp one_comp; /* id & cred of partition id=0*/
75 struct ore_components oc; /* comps for the partition */ 78 struct ore_components oc; /* comps for the partition */
79 struct kobject s_kobj; /* holds per-sbi kobject */
76}; 80};
77 81
78/* 82/*
@@ -176,6 +180,16 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
176 const struct osd_obj_id *obj); 180 const struct osd_obj_id *obj);
177int exofs_sbi_write_stats(struct exofs_sb_info *sbi); 181int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
178 182
183/* sys.c */
184int exofs_sysfs_init(void);
185void exofs_sysfs_uninit(void);
186int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
187 struct exofs_dt_device_info *dt_dev);
188void exofs_sysfs_sb_del(struct exofs_sb_info *sbi);
189int exofs_sysfs_odev_add(struct exofs_dev *edev,
190 struct exofs_sb_info *sbi);
191void exofs_sysfs_dbg_print(void);
192
179/********************* 193/*********************
180 * operation vectors * 194 * operation vectors *
181 *********************/ 195 *********************/
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 735ca06430ac..433783624d10 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -472,6 +472,7 @@ static void exofs_put_super(struct super_block *sb)
472 _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), 472 _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
473 sbi->one_comp.obj.partition); 473 sbi->one_comp.obj.partition);
474 474
475 exofs_sysfs_sb_del(sbi);
475 bdi_destroy(&sbi->bdi); 476 bdi_destroy(&sbi->bdi);
476 exofs_free_sbi(sbi); 477 exofs_free_sbi(sbi);
477 sb->s_fs_info = NULL; 478 sb->s_fs_info = NULL;
@@ -632,6 +633,12 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
632 memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], 633 memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
633 (numdevs - 1) * sizeof(sbi->oc.ods[0])); 634 (numdevs - 1) * sizeof(sbi->oc.ods[0]));
634 635
636 /* create sysfs subdir under which we put the device table
637 * And cluster layout. A Superblock is identified by the string:
638 * "dev[0].osdname"_"pid"
639 */
640 exofs_sysfs_sb_add(sbi, &dt->dt_dev_table[0]);
641
635 for (i = 0; i < numdevs; i++) { 642 for (i = 0; i < numdevs; i++) {
636 struct exofs_fscb fscb; 643 struct exofs_fscb fscb;
637 struct osd_dev_info odi; 644 struct osd_dev_info odi;
@@ -657,6 +664,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
657 eds[i].ored.od = fscb_od; 664 eds[i].ored.od = fscb_od;
658 ++sbi->oc.numdevs; 665 ++sbi->oc.numdevs;
659 fscb_od = NULL; 666 fscb_od = NULL;
667 exofs_sysfs_odev_add(&eds[i], sbi);
660 continue; 668 continue;
661 } 669 }
662 670
@@ -682,6 +690,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
682 odi.osdname); 690 odi.osdname);
683 goto out; 691 goto out;
684 } 692 }
693 exofs_sysfs_odev_add(&eds[i], sbi);
685 694
686 /* TODO: verify other information is correct and FS-uuid 695 /* TODO: verify other information is correct and FS-uuid
687 * matches. Benny what did you say about device table 696 * matches. Benny what did you say about device table
@@ -745,7 +754,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
745 sbi->one_comp.obj.partition = opts->pid; 754 sbi->one_comp.obj.partition = opts->pid;
746 sbi->one_comp.obj.id = 0; 755 sbi->one_comp.obj.id = 0;
747 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); 756 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
748 sbi->oc.numdevs = 1;
749 sbi->oc.single_comp = EC_SINGLE_COMP; 757 sbi->oc.single_comp = EC_SINGLE_COMP;
750 sbi->oc.comps = &sbi->one_comp; 758 sbi->oc.comps = &sbi->one_comp;
751 759
@@ -804,6 +812,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
804 goto free_sbi; 812 goto free_sbi;
805 813
806 ore_comp_set_dev(&sbi->oc, 0, od); 814 ore_comp_set_dev(&sbi->oc, 0, od);
815 sbi->oc.numdevs = 1;
807 } 816 }
808 817
809 __sbi_read_stats(sbi); 818 __sbi_read_stats(sbi);
@@ -844,6 +853,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
844 goto free_sbi; 853 goto free_sbi;
845 } 854 }
846 855
856 exofs_sysfs_dbg_print();
847 _exofs_print_device("Mounting", opts->dev_name, 857 _exofs_print_device("Mounting", opts->dev_name,
848 ore_comp_dev(&sbi->oc, 0), 858 ore_comp_dev(&sbi->oc, 0),
849 sbi->one_comp.obj.partition); 859 sbi->one_comp.obj.partition);
@@ -1023,6 +1033,9 @@ static int __init init_exofs(void)
1023 if (err) 1033 if (err)
1024 goto out_d; 1034 goto out_d;
1025 1035
1036 /* We don't fail if sysfs creation failed */
1037 exofs_sysfs_init();
1038
1026 return 0; 1039 return 0;
1027out_d: 1040out_d:
1028 destroy_inodecache(); 1041 destroy_inodecache();
@@ -1032,6 +1045,7 @@ out:
1032 1045
1033static void __exit exit_exofs(void) 1046static void __exit exit_exofs(void)
1034{ 1047{
1048 exofs_sysfs_uninit();
1035 unregister_filesystem(&exofs_type); 1049 unregister_filesystem(&exofs_type);
1036 destroy_inodecache(); 1050 destroy_inodecache();
1037} 1051}
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
new file mode 100644
index 000000000000..e32bc919e4e3
--- /dev/null
+++ b/fs/exofs/sys.c
@@ -0,0 +1,200 @@
1/*
2 * Copyright (C) 2012
3 * Sachin Bhamare <sbhamare@panasas.com>
4 * Boaz Harrosh <bharrosh@panasas.com>
5 *
6 * This file is part of exofs.
7 *
8 * exofs is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License 2 as published by
10 * the Free Software Foundation.
11 *
12 * exofs is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with exofs; if not, write to the:
19 * Free Software Foundation <licensing@fsf.org>
20 */
21
22#include <linux/kobject.h>
23#include <linux/device.h>
24
25#include "exofs.h"
26
27struct odev_attr {
28 struct attribute attr;
29 ssize_t (*show)(struct exofs_dev *, char *);
30 ssize_t (*store)(struct exofs_dev *, const char *, size_t);
31};
32
33static ssize_t odev_attr_show(struct kobject *kobj, struct attribute *attr,
34 char *buf)
35{
36 struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
37 struct odev_attr *a = container_of(attr, struct odev_attr, attr);
38
39 return a->show ? a->show(edp, buf) : 0;
40}
41
42static ssize_t odev_attr_store(struct kobject *kobj, struct attribute *attr,
43 const char *buf, size_t len)
44{
45 struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
46 struct odev_attr *a = container_of(attr, struct odev_attr, attr);
47
48 return a->store ? a->store(edp, buf, len) : len;
49}
50
51static const struct sysfs_ops odev_attr_ops = {
52 .show = odev_attr_show,
53 .store = odev_attr_store,
54};
55
56
57static struct kset *exofs_kset;
58
59static ssize_t osdname_show(struct exofs_dev *edp, char *buf)
60{
61 struct osd_dev *odev = edp->ored.od;
62 const struct osd_dev_info *odi = osduld_device_info(odev);
63
64 return snprintf(buf, odi->osdname_len + 1, "%s", odi->osdname);
65}
66
67static ssize_t systemid_show(struct exofs_dev *edp, char *buf)
68{
69 struct osd_dev *odev = edp->ored.od;
70 const struct osd_dev_info *odi = osduld_device_info(odev);
71
72 memcpy(buf, odi->systemid, odi->systemid_len);
73 return odi->systemid_len;
74}
75
76static ssize_t uri_show(struct exofs_dev *edp, char *buf)
77{
78 return snprintf(buf, edp->urilen, "%s", edp->uri);
79}
80
81static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len)
82{
83 edp->urilen = strlen(buf) + 1;
84 edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL);
85 strncpy(edp->uri, buf, edp->urilen);
86 return edp->urilen;
87}
88
89#define OSD_ATTR(name, mode, show, store) \
90 static struct odev_attr odev_attr_##name = \
91 __ATTR(name, mode, show, store)
92
93OSD_ATTR(osdname, S_IRUGO, osdname_show, NULL);
94OSD_ATTR(systemid, S_IRUGO, systemid_show, NULL);
95OSD_ATTR(uri, S_IRWXU, uri_show, uri_store);
96
97static struct attribute *odev_attrs[] = {
98 &odev_attr_osdname.attr,
99 &odev_attr_systemid.attr,
100 &odev_attr_uri.attr,
101 NULL,
102};
103
104static struct kobj_type odev_ktype = {
105 .default_attrs = odev_attrs,
106 .sysfs_ops = &odev_attr_ops,
107};
108
109static struct kobj_type uuid_ktype = {
110};
111
112void exofs_sysfs_dbg_print()
113{
114#ifdef CONFIG_EXOFS_DEBUG
115 struct kobject *k_name, *k_tmp;
116
117 list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
118 printk(KERN_INFO "%s: name %s ref %d\n",
119 __func__, kobject_name(k_name),
120 (int)atomic_read(&k_name->kref.refcount));
121 }
122#endif
123}
124/*
125 * This function removes all kobjects under exofs_kset
126 * At the end of it, exofs_kset kobject will have a refcount
127 * of 1 which gets decremented only on exofs module unload
128 */
129void exofs_sysfs_sb_del(struct exofs_sb_info *sbi)
130{
131 struct kobject *k_name, *k_tmp;
132 struct kobject *s_kobj = &sbi->s_kobj;
133
134 list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
135 /* Remove all that are children of this SBI */
136 if (k_name->parent == s_kobj)
137 kobject_put(k_name);
138 }
139 kobject_put(s_kobj);
140}
141
142/*
143 * This function creates sysfs entries to hold the current exofs cluster
144 * instance (uniquely identified by osdname,pid tuple).
145 * This function gets called once per exofs mount instance.
146 */
147int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
148 struct exofs_dt_device_info *dt_dev)
149{
150 struct kobject *s_kobj;
151 int retval = 0;
152 uint64_t pid = sbi->one_comp.obj.partition;
153
154 /* allocate new uuid dirent */
155 s_kobj = &sbi->s_kobj;
156 s_kobj->kset = exofs_kset;
157 retval = kobject_init_and_add(s_kobj, &uuid_ktype,
158 &exofs_kset->kobj, "%s_%llx", dt_dev->osdname, pid);
159 if (retval) {
160 EXOFS_ERR("ERROR: Failed to create sysfs entry for "
161 "uuid-%s_%llx => %d\n", dt_dev->osdname, pid, retval);
162 return -ENOMEM;
163 }
164 return 0;
165}
166
167int exofs_sysfs_odev_add(struct exofs_dev *edev, struct exofs_sb_info *sbi)
168{
169 struct kobject *d_kobj;
170 int retval = 0;
171
172 /* create osd device group which contains following attributes
173 * osdname, systemid & uri
174 */
175 d_kobj = &edev->ed_kobj;
176 d_kobj->kset = exofs_kset;
177 retval = kobject_init_and_add(d_kobj, &odev_ktype,
178 &sbi->s_kobj, "dev%u", edev->did);
179 if (retval) {
180 EXOFS_ERR("ERROR: Failed to create sysfs entry for "
181 "device dev%u\n", edev->did);
182 return retval;
183 }
184 return 0;
185}
186
187int exofs_sysfs_init(void)
188{
189 exofs_kset = kset_create_and_add("exofs", NULL, fs_kobj);
190 if (!exofs_kset) {
191 EXOFS_ERR("ERROR: kset_create_and_add exofs failed\n");
192 return -ENOMEM;
193 }
194 return 0;
195}
196
197void exofs_sysfs_uninit(void)
198{
199 kset_unregister(exofs_kset);
200}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a0e6c599147..f90f4f5cd421 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFS_V2
33 bool "NFS client support for NFS version 2"
34 depends on NFS_FS
35 default y
36 help
37 This option enables support for version 2 of the NFS protocol
38 (RFC 1094) in the kernel's NFS client.
39
40 If unsure, say Y.
41
32config NFS_V3 42config NFS_V3
33 bool "NFS client support for NFS version 3" 43 bool "NFS client support for NFS version 3"
34 depends on NFS_FS 44 depends on NFS_FS
45 default y
35 help 46 help
36 This option enables support for version 3 of the NFS protocol 47 This option enables support for version 3 of the NFS protocol
37 (RFC 1813) in the kernel's NFS client. 48 (RFC 1813) in the kernel's NFS client.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d0abb3..7ddd45d9f170 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o \ 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o 10 dns_resolve.o cache_lib.o
11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 13nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 14nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
14nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 15nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..7ae8a608956f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
187 struct parallel_io *par = bio->bi_private; 187 struct parallel_io *par = bio->bi_private;
188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
190 struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
191 190
192 do { 191 do {
193 struct page *page = bvec->bv_page; 192 struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
198 SetPageUptodate(page); 197 SetPageUptodate(page);
199 } while (bvec >= bio->bi_io_vec); 198 } while (bvec >= bio->bi_io_vec);
200 if (!uptodate) { 199 if (!uptodate) {
201 if (!rdata->pnfs_error) 200 struct nfs_read_data *rdata = par->data;
202 rdata->pnfs_error = -EIO; 201 struct nfs_pgio_header *header = rdata->header;
203 pnfs_set_lo_fail(rdata->lseg); 202
203 if (!header->pnfs_error)
204 header->pnfs_error = -EIO;
205 pnfs_set_lo_fail(header->lseg);
204 } 206 }
205 bio_put(bio); 207 bio_put(bio);
206 put_parallel(par); 208 put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
221{ 223{
222 struct nfs_read_data *rdata = data; 224 struct nfs_read_data *rdata = data;
223 225
224 rdata->task.tk_status = rdata->pnfs_error; 226 rdata->task.tk_status = rdata->header->pnfs_error;
225 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 227 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
226 schedule_work(&rdata->task.u.tk_work); 228 schedule_work(&rdata->task.u.tk_work);
227} 229}
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
229static enum pnfs_try_status 231static enum pnfs_try_status
230bl_read_pagelist(struct nfs_read_data *rdata) 232bl_read_pagelist(struct nfs_read_data *rdata)
231{ 233{
234 struct nfs_pgio_header *header = rdata->header;
232 int i, hole; 235 int i, hole;
233 struct bio *bio = NULL; 236 struct bio *bio = NULL;
234 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 237 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
239 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 242 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
240 243
241 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 244 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
242 rdata->npages, f_offset, (unsigned int)rdata->args.count); 245 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
243 246
244 par = alloc_parallel(rdata); 247 par = alloc_parallel(rdata);
245 if (!par) 248 if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
249 252
250 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 253 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251 /* Code assumes extents are page-aligned */ 254 /* Code assumes extents are page-aligned */
252 for (i = pg_index; i < rdata->npages; i++) { 255 for (i = pg_index; i < rdata->pages.npages; i++) {
253 if (!extent_length) { 256 if (!extent_length) {
254 /* We've used up the previous extent */ 257 /* We've used up the previous extent */
255 bl_put_extent(be); 258 bl_put_extent(be);
256 bl_put_extent(cow_read); 259 bl_put_extent(cow_read);
257 bio = bl_submit_bio(READ, bio); 260 bio = bl_submit_bio(READ, bio);
258 /* Get the next one */ 261 /* Get the next one */
259 be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), 262 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
260 isect, &cow_read); 263 isect, &cow_read);
261 if (!be) { 264 if (!be) {
262 rdata->pnfs_error = -EIO; 265 header->pnfs_error = -EIO;
263 goto out; 266 goto out;
264 } 267 }
265 extent_length = be->be_length - 268 extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
282 struct pnfs_block_extent *be_read; 285 struct pnfs_block_extent *be_read;
283 286
284 be_read = (hole && cow_read) ? cow_read : be; 287 be_read = (hole && cow_read) ? cow_read : be;
285 bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, 288 bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
289 READ,
286 isect, pages[i], be_read, 290 isect, pages[i], be_read,
287 bl_end_io_read, par); 291 bl_end_io_read, par);
288 if (IS_ERR(bio)) { 292 if (IS_ERR(bio)) {
289 rdata->pnfs_error = PTR_ERR(bio); 293 header->pnfs_error = PTR_ERR(bio);
290 bio = NULL; 294 bio = NULL;
291 goto out; 295 goto out;
292 } 296 }
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
294 isect += PAGE_CACHE_SECTORS; 298 isect += PAGE_CACHE_SECTORS;
295 extent_length -= PAGE_CACHE_SECTORS; 299 extent_length -= PAGE_CACHE_SECTORS;
296 } 300 }
297 if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { 301 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
298 rdata->res.eof = 1; 302 rdata->res.eof = 1;
299 rdata->res.count = rdata->inode->i_size - f_offset; 303 rdata->res.count = header->inode->i_size - f_offset;
300 } else { 304 } else {
301 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; 305 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
302 } 306 }
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
345 struct parallel_io *par = bio->bi_private; 349 struct parallel_io *par = bio->bi_private;
346 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 350 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
347 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 351 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
348 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
349 352
350 do { 353 do {
351 struct page *page = bvec->bv_page; 354 struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
358 } while (bvec >= bio->bi_io_vec); 361 } while (bvec >= bio->bi_io_vec);
359 362
360 if (unlikely(!uptodate)) { 363 if (unlikely(!uptodate)) {
361 if (!wdata->pnfs_error) 364 struct nfs_write_data *data = par->data;
362 wdata->pnfs_error = -EIO; 365 struct nfs_pgio_header *header = data->header;
363 pnfs_set_lo_fail(wdata->lseg); 366
367 if (!header->pnfs_error)
368 header->pnfs_error = -EIO;
369 pnfs_set_lo_fail(header->lseg);
364 } 370 }
365 bio_put(bio); 371 bio_put(bio);
366 put_parallel(par); 372 put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
370{ 376{
371 struct parallel_io *par = bio->bi_private; 377 struct parallel_io *par = bio->bi_private;
372 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 378 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
373 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; 379 struct nfs_write_data *data = par->data;
380 struct nfs_pgio_header *header = data->header;
374 381
375 if (!uptodate) { 382 if (!uptodate) {
376 if (!wdata->pnfs_error) 383 if (!header->pnfs_error)
377 wdata->pnfs_error = -EIO; 384 header->pnfs_error = -EIO;
378 pnfs_set_lo_fail(wdata->lseg); 385 pnfs_set_lo_fail(header->lseg);
379 } 386 }
380 bio_put(bio); 387 bio_put(bio);
381 put_parallel(par); 388 put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
391 dprintk("%s enter\n", __func__); 398 dprintk("%s enter\n", __func__);
392 task = container_of(work, struct rpc_task, u.tk_work); 399 task = container_of(work, struct rpc_task, u.tk_work);
393 wdata = container_of(task, struct nfs_write_data, task); 400 wdata = container_of(task, struct nfs_write_data, task);
394 if (likely(!wdata->pnfs_error)) { 401 if (likely(!wdata->header->pnfs_error)) {
395 /* Marks for LAYOUTCOMMIT */ 402 /* Marks for LAYOUTCOMMIT */
396 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 403 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
397 wdata->args.offset, wdata->args.count); 404 wdata->args.offset, wdata->args.count);
398 } 405 }
399 pnfs_ld_write_done(wdata); 406 pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
404{ 411{
405 struct nfs_write_data *wdata = data; 412 struct nfs_write_data *wdata = data;
406 413
407 if (unlikely(wdata->pnfs_error)) { 414 if (unlikely(wdata->header->pnfs_error)) {
408 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, 415 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
409 num_se); 416 num_se);
410 } 417 }
411 418
412 wdata->task.tk_status = wdata->pnfs_error; 419 wdata->task.tk_status = wdata->header->pnfs_error;
413 wdata->verf.committed = NFS_FILE_SYNC; 420 wdata->verf.committed = NFS_FILE_SYNC;
414 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 421 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
415 schedule_work(&wdata->task.u.tk_work); 422 schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
540static enum pnfs_try_status 547static enum pnfs_try_status
541bl_write_pagelist(struct nfs_write_data *wdata, int sync) 548bl_write_pagelist(struct nfs_write_data *wdata, int sync)
542{ 549{
550 struct nfs_pgio_header *header = wdata->header;
543 int i, ret, npg_zero, pg_index, last = 0; 551 int i, ret, npg_zero, pg_index, last = 0;
544 struct bio *bio = NULL; 552 struct bio *bio = NULL;
545 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 553 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
552 pgoff_t index; 560 pgoff_t index;
553 u64 temp; 561 u64 temp;
554 int npg_per_block = 562 int npg_per_block =
555 NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 563 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
556 564
557 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 565 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
558 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 566 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
566 /* At this point, have to be more careful with error handling */ 574 /* At this point, have to be more careful with error handling */
567 575
568 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 576 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
569 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 577 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
570 if (!be || !is_writable(be, isect)) { 578 if (!be || !is_writable(be, isect)) {
571 dprintk("%s no matching extents!\n", __func__); 579 dprintk("%s no matching extents!\n", __func__);
572 goto out_mds; 580 goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
597 dprintk("%s zero %dth page: index %lu isect %llu\n", 605 dprintk("%s zero %dth page: index %lu isect %llu\n",
598 __func__, npg_zero, index, 606 __func__, npg_zero, index,
599 (unsigned long long)isect); 607 (unsigned long long)isect);
600 page = bl_find_get_zeroing_page(wdata->inode, index, 608 page = bl_find_get_zeroing_page(header->inode, index,
601 cow_read); 609 cow_read);
602 if (unlikely(IS_ERR(page))) { 610 if (unlikely(IS_ERR(page))) {
603 wdata->pnfs_error = PTR_ERR(page); 611 header->pnfs_error = PTR_ERR(page);
604 goto out; 612 goto out;
605 } else if (page == NULL) 613 } else if (page == NULL)
606 goto next_page; 614 goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
612 __func__, ret); 620 __func__, ret);
613 end_page_writeback(page); 621 end_page_writeback(page);
614 page_cache_release(page); 622 page_cache_release(page);
615 wdata->pnfs_error = ret; 623 header->pnfs_error = ret;
616 goto out; 624 goto out;
617 } 625 }
618 if (likely(!bl_push_one_short_extent(be->be_inval))) 626 if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
620 else { 628 else {
621 end_page_writeback(page); 629 end_page_writeback(page);
622 page_cache_release(page); 630 page_cache_release(page);
623 wdata->pnfs_error = -ENOMEM; 631 header->pnfs_error = -ENOMEM;
624 goto out; 632 goto out;
625 } 633 }
626 /* FIXME: This should be done in bi_end_io */ 634 /* FIXME: This should be done in bi_end_io */
627 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 635 mark_extents_written(BLK_LSEG2EXT(header->lseg),
628 page->index << PAGE_CACHE_SHIFT, 636 page->index << PAGE_CACHE_SHIFT,
629 PAGE_CACHE_SIZE); 637 PAGE_CACHE_SIZE);
630 638
@@ -632,7 +640,7 @@ fill_invalid_ext:
632 isect, page, be, 640 isect, page, be,
633 bl_end_io_write_zero, par); 641 bl_end_io_write_zero, par);
634 if (IS_ERR(bio)) { 642 if (IS_ERR(bio)) {
635 wdata->pnfs_error = PTR_ERR(bio); 643 header->pnfs_error = PTR_ERR(bio);
636 bio = NULL; 644 bio = NULL;
637 goto out; 645 goto out;
638 } 646 }
@@ -647,16 +655,16 @@ next_page:
647 655
648 /* Middle pages */ 656 /* Middle pages */
649 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 657 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
650 for (i = pg_index; i < wdata->npages; i++) { 658 for (i = pg_index; i < wdata->pages.npages; i++) {
651 if (!extent_length) { 659 if (!extent_length) {
652 /* We've used up the previous extent */ 660 /* We've used up the previous extent */
653 bl_put_extent(be); 661 bl_put_extent(be);
654 bio = bl_submit_bio(WRITE, bio); 662 bio = bl_submit_bio(WRITE, bio);
655 /* Get the next one */ 663 /* Get the next one */
656 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), 664 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
657 isect, NULL); 665 isect, NULL);
658 if (!be || !is_writable(be, isect)) { 666 if (!be || !is_writable(be, isect)) {
659 wdata->pnfs_error = -EINVAL; 667 header->pnfs_error = -EINVAL;
660 goto out; 668 goto out;
661 } 669 }
662 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 670 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
664 be->be_inval))) 672 be->be_inval)))
665 par->bse_count++; 673 par->bse_count++;
666 else { 674 else {
667 wdata->pnfs_error = -ENOMEM; 675 header->pnfs_error = -ENOMEM;
668 goto out; 676 goto out;
669 } 677 }
670 } 678 }
@@ -677,15 +685,15 @@ next_page:
677 if (unlikely(ret)) { 685 if (unlikely(ret)) {
678 dprintk("%s bl_mark_sectors_init fail %d\n", 686 dprintk("%s bl_mark_sectors_init fail %d\n",
679 __func__, ret); 687 __func__, ret);
680 wdata->pnfs_error = ret; 688 header->pnfs_error = ret;
681 goto out; 689 goto out;
682 } 690 }
683 } 691 }
684 bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, 692 bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
685 isect, pages[i], be, 693 isect, pages[i], be,
686 bl_end_io_write, par); 694 bl_end_io_write, par);
687 if (IS_ERR(bio)) { 695 if (IS_ERR(bio)) {
688 wdata->pnfs_error = PTR_ERR(bio); 696 header->pnfs_error = PTR_ERR(bio);
689 bio = NULL; 697 bio = NULL;
690 goto out; 698 goto out;
691 } 699 }
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a5c88a554d92..c96554245ccf 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
123 uint8_t *dataptr; 123 uint8_t *dataptr;
124 DECLARE_WAITQUEUE(wq, current); 124 DECLARE_WAITQUEUE(wq, current);
125 int offset, len, i, rc; 125 int offset, len, i, rc;
126 struct net *net = server->nfs_client->net; 126 struct net *net = server->nfs_client->cl_net;
127 struct nfs_net *nn = net_generic(net, nfs_net_id); 127 struct nfs_net *nn = net_generic(net, nfs_net_id);
128 struct bl_dev_msg *reply = &nn->bl_mount_reply; 128 struct bl_dev_msg *reply = &nn->bl_mount_reply;
129 129
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..7d108753af81 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) 65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
66{ 66{
67 int ret = 0; 67 int ret = 0;
68 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 68 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
69 69
70 if (clp->rpc_ops->version != 4 || minorversion != 0) 70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret; 71 return ret;
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
90 * RPC cruft for NFS 90 * RPC cruft for NFS
91 */ 91 */
92static const struct rpc_version *nfs_version[5] = { 92static const struct rpc_version *nfs_version[5] = {
93#ifdef CONFIG_NFS_V2
93 [2] = &nfs_version2, 94 [2] = &nfs_version2,
95#endif
94#ifdef CONFIG_NFS_V3 96#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 97 [3] = &nfs_version3,
96#endif 98#endif
@@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {
129#endif /* CONFIG_NFS_V3_ACL */ 131#endif /* CONFIG_NFS_V3_ACL */
130 132
131struct nfs_client_initdata { 133struct nfs_client_initdata {
134 unsigned long init_flags;
132 const char *hostname; 135 const char *hostname;
133 const struct sockaddr *addr; 136 const struct sockaddr *addr;
134 size_t addrlen; 137 size_t addrlen;
@@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
172 clp->cl_rpcclient = ERR_PTR(-EINVAL); 175 clp->cl_rpcclient = ERR_PTR(-EINVAL);
173 176
174 clp->cl_proto = cl_init->proto; 177 clp->cl_proto = cl_init->proto;
175 clp->net = get_net(cl_init->net); 178 clp->cl_net = get_net(cl_init->net);
176 179
177#ifdef CONFIG_NFS_V4 180#ifdef CONFIG_NFS_V4
178 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); 181 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
182 spin_lock_init(&clp->cl_lock); 185 spin_lock_init(&clp->cl_lock);
183 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 186 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
184 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 187 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
185 clp->cl_boot_time = CURRENT_TIME;
186 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 188 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
187 clp->cl_minorversion = cl_init->minorversion; 189 clp->cl_minorversion = cl_init->minorversion;
188 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; 190 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
207 if (nfs4_has_session(clp)) { 209 if (nfs4_has_session(clp)) {
208 nfs4_deviceid_purge_client(clp); 210 nfs4_deviceid_purge_client(clp);
209 nfs4_destroy_session(clp->cl_session); 211 nfs4_destroy_session(clp->cl_session);
212 nfs4_destroy_clientid(clp);
210 } 213 }
211 214
212} 215}
@@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
235 nfs_idmap_delete(clp); 238 nfs_idmap_delete(clp);
236 239
237 rpc_destroy_wait_queue(&clp->cl_rpcwaitq); 240 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
241 kfree(clp->cl_serverowner);
242 kfree(clp->cl_serverscope);
243 kfree(clp->cl_implid);
238} 244}
239 245
240/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ 246/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
248/* nfs_client_lock held */ 254/* nfs_client_lock held */
249static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 255static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
250{ 256{
251 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 257 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
252 258
253 if (clp->cl_cb_ident) 259 if (clp->cl_cb_ident)
254 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); 260 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)
301 if (clp->cl_machine_cred != NULL) 307 if (clp->cl_machine_cred != NULL)
302 put_rpccred(clp->cl_machine_cred); 308 put_rpccred(clp->cl_machine_cred);
303 309
304 put_net(clp->net); 310 put_net(clp->cl_net);
305 kfree(clp->cl_hostname); 311 kfree(clp->cl_hostname);
306 kfree(clp->server_scope);
307 kfree(clp->impl_id);
308 kfree(clp); 312 kfree(clp);
309 313
310 dprintk("<-- nfs_free_client()\n"); 314 dprintk("<-- nfs_free_client()\n");
@@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)
321 return; 325 return;
322 326
323 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); 327 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
324 nn = net_generic(clp->net, nfs_net_id); 328 nn = net_generic(clp->cl_net, nfs_net_id);
325 329
326 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { 330 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
327 list_del(&clp->cl_share_link); 331 list_del(&clp->cl_share_link);
@@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
456 clp->cl_cons_state == NFS_CS_SESSION_INITING)) 460 clp->cl_cons_state == NFS_CS_SESSION_INITING))
457 return false; 461 return false;
458 462
463 smp_rmb();
464
459 /* Match the version and minorversion */ 465 /* Match the version and minorversion */
460 if (clp->rpc_ops->version != 4 || 466 if (clp->rpc_ops->version != 4 ||
461 clp->cl_minorversion != minorversion) 467 clp->cl_minorversion != minorversion)
@@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
504 return NULL; 510 return NULL;
505} 511}
506 512
513static bool nfs_client_init_is_complete(const struct nfs_client *clp)
514{
515 return clp->cl_cons_state != NFS_CS_INITING;
516}
517
518int nfs_wait_client_init_complete(const struct nfs_client *clp)
519{
520 return wait_event_killable(nfs_client_active_wq,
521 nfs_client_init_is_complete(clp));
522}
523
524/*
525 * Found an existing client. Make sure it's ready before returning.
526 */
527static struct nfs_client *
528nfs_found_client(const struct nfs_client_initdata *cl_init,
529 struct nfs_client *clp)
530{
531 int error;
532
533 error = nfs_wait_client_init_complete(clp);
534 if (error < 0) {
535 nfs_put_client(clp);
536 return ERR_PTR(-ERESTARTSYS);
537 }
538
539 if (clp->cl_cons_state < NFS_CS_READY) {
540 error = clp->cl_cons_state;
541 nfs_put_client(clp);
542 return ERR_PTR(error);
543 }
544
545 smp_rmb();
546
547 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
548
549 dprintk("<-- %s found nfs_client %p for %s\n",
550 __func__, clp, cl_init->hostname ?: "");
551 return clp;
552}
553
507/* 554/*
508 * Look up a client by IP address and protocol version 555 * Look up a client by IP address and protocol version
509 * - creates a new record if one doesn't yet exist 556 * - creates a new record if one doesn't yet exist
@@ -512,11 +559,9 @@ static struct nfs_client *
512nfs_get_client(const struct nfs_client_initdata *cl_init, 559nfs_get_client(const struct nfs_client_initdata *cl_init,
513 const struct rpc_timeout *timeparms, 560 const struct rpc_timeout *timeparms,
514 const char *ip_addr, 561 const char *ip_addr,
515 rpc_authflavor_t authflavour, 562 rpc_authflavor_t authflavour)
516 int noresvport)
517{ 563{
518 struct nfs_client *clp, *new = NULL; 564 struct nfs_client *clp, *new = NULL;
519 int error;
520 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 565 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
521 566
522 dprintk("--> nfs_get_client(%s,v%u)\n", 567 dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
527 spin_lock(&nn->nfs_client_lock); 572 spin_lock(&nn->nfs_client_lock);
528 573
529 clp = nfs_match_client(cl_init); 574 clp = nfs_match_client(cl_init);
530 if (clp) 575 if (clp) {
531 goto found_client; 576 spin_unlock(&nn->nfs_client_lock);
532 if (new) 577 if (new)
533 goto install_client; 578 nfs_free_client(new);
579 return nfs_found_client(cl_init, clp);
580 }
581 if (new) {
582 list_add(&new->cl_share_link, &nn->nfs_client_list);
583 spin_unlock(&nn->nfs_client_lock);
584 new->cl_flags = cl_init->init_flags;
585 return cl_init->rpc_ops->init_client(new,
586 timeparms, ip_addr,
587 authflavour);
588 }
534 589
535 spin_unlock(&nn->nfs_client_lock); 590 spin_unlock(&nn->nfs_client_lock);
536 591
537 new = nfs_alloc_client(cl_init); 592 new = nfs_alloc_client(cl_init);
538 } while (!IS_ERR(new)); 593 } while (!IS_ERR(new));
539 594
540 dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); 595 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
596 cl_init->hostname ?: "", PTR_ERR(new));
541 return new; 597 return new;
542
543 /* install a new client and return with it unready */
544install_client:
545 clp = new;
546 list_add(&clp->cl_share_link, &nn->nfs_client_list);
547 spin_unlock(&nn->nfs_client_lock);
548
549 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
550 authflavour, noresvport);
551 if (error < 0) {
552 nfs_put_client(clp);
553 return ERR_PTR(error);
554 }
555 dprintk("--> nfs_get_client() = %p [new]\n", clp);
556 return clp;
557
558 /* found an existing client
559 * - make sure it's ready before returning
560 */
561found_client:
562 spin_unlock(&nn->nfs_client_lock);
563
564 if (new)
565 nfs_free_client(new);
566
567 error = wait_event_killable(nfs_client_active_wq,
568 clp->cl_cons_state < NFS_CS_INITING);
569 if (error < 0) {
570 nfs_put_client(clp);
571 return ERR_PTR(-ERESTARTSYS);
572 }
573
574 if (clp->cl_cons_state < NFS_CS_READY) {
575 error = clp->cl_cons_state;
576 nfs_put_client(clp);
577 return ERR_PTR(error);
578 }
579
580 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
581
582 dprintk("--> nfs_get_client() = %p [share]\n", clp);
583 return clp;
584} 598}
585 599
586/* 600/*
@@ -588,27 +602,12 @@ found_client:
588 */ 602 */
589void nfs_mark_client_ready(struct nfs_client *clp, int state) 603void nfs_mark_client_ready(struct nfs_client *clp, int state)
590{ 604{
605 smp_wmb();
591 clp->cl_cons_state = state; 606 clp->cl_cons_state = state;
592 wake_up_all(&nfs_client_active_wq); 607 wake_up_all(&nfs_client_active_wq);
593} 608}
594 609
595/* 610/*
596 * With sessions, the client is not marked ready until after a
597 * successful EXCHANGE_ID and CREATE_SESSION.
598 *
599 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
600 * other versions of NFS can be tried.
601 */
602int nfs4_check_client_ready(struct nfs_client *clp)
603{
604 if (!nfs4_has_session(clp))
605 return 0;
606 if (clp->cl_cons_state < NFS_CS_READY)
607 return -EPROTONOSUPPORT;
608 return 0;
609}
610
611/*
612 * Initialise the timeout values for a connection 611 * Initialise the timeout values for a connection
613 */ 612 */
614static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, 613static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
654 */ 653 */
655static int nfs_create_rpc_client(struct nfs_client *clp, 654static int nfs_create_rpc_client(struct nfs_client *clp,
656 const struct rpc_timeout *timeparms, 655 const struct rpc_timeout *timeparms,
657 rpc_authflavor_t flavor, 656 rpc_authflavor_t flavor)
658 int discrtry, int noresvport)
659{ 657{
660 struct rpc_clnt *clnt = NULL; 658 struct rpc_clnt *clnt = NULL;
661 struct rpc_create_args args = { 659 struct rpc_create_args args = {
662 .net = clp->net, 660 .net = clp->cl_net,
663 .protocol = clp->cl_proto, 661 .protocol = clp->cl_proto,
664 .address = (struct sockaddr *)&clp->cl_addr, 662 .address = (struct sockaddr *)&clp->cl_addr,
665 .addrsize = clp->cl_addrlen, 663 .addrsize = clp->cl_addrlen,
@@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
670 .authflavor = flavor, 668 .authflavor = flavor,
671 }; 669 };
672 670
673 if (discrtry) 671 if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
674 args.flags |= RPC_CLNT_CREATE_DISCRTRY; 672 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
675 if (noresvport) 673 if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
676 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 674 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
677 675
678 if (!IS_ERR(clp->cl_rpcclient)) 676 if (!IS_ERR(clp->cl_rpcclient))
@@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)
713 .nfs_version = clp->rpc_ops->version, 711 .nfs_version = clp->rpc_ops->version,
714 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 712 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
715 1 : 0, 713 1 : 0,
716 .net = clp->net, 714 .net = clp->cl_net,
717 }; 715 };
718 716
719 if (nlm_init.nfs_version > 3) 717 if (nlm_init.nfs_version > 3)
@@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
805 return 0; 803 return 0;
806} 804}
807 805
808/* 806/**
809 * Initialise an NFS2 or NFS3 client 807 * nfs_init_client - Initialise an NFS2 or NFS3 client
808 *
809 * @clp: nfs_client to initialise
810 * @timeparms: timeout parameters for underlying RPC transport
811 * @ip_addr: IP presentation address (not used)
812 * @authflavor: authentication flavor for underlying RPC transport
813 *
814 * Returns pointer to an NFS client, or an ERR_PTR value.
810 */ 815 */
811int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, 816struct nfs_client *nfs_init_client(struct nfs_client *clp,
812 const char *ip_addr, rpc_authflavor_t authflavour, 817 const struct rpc_timeout *timeparms,
813 int noresvport) 818 const char *ip_addr, rpc_authflavor_t authflavour)
814{ 819{
815 int error; 820 int error;
816 821
817 if (clp->cl_cons_state == NFS_CS_READY) { 822 if (clp->cl_cons_state == NFS_CS_READY) {
818 /* the client is already initialised */ 823 /* the client is already initialised */
819 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); 824 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
820 return 0; 825 return clp;
821 } 826 }
822 827
823 /* 828 /*
824 * Create a client RPC handle for doing FSSTAT with UNIX auth only 829 * Create a client RPC handle for doing FSSTAT with UNIX auth only
825 * - RFC 2623, sec 2.3.2 830 * - RFC 2623, sec 2.3.2
826 */ 831 */
827 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 832 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
828 0, noresvport);
829 if (error < 0) 833 if (error < 0)
830 goto error; 834 goto error;
831 nfs_mark_client_ready(clp, NFS_CS_READY); 835 nfs_mark_client_ready(clp, NFS_CS_READY);
832 return 0; 836 return clp;
833 837
834error: 838error:
835 nfs_mark_client_ready(clp, error); 839 nfs_mark_client_ready(clp, error);
840 nfs_put_client(clp);
836 dprintk("<-- nfs_init_client() = xerror %d\n", error); 841 dprintk("<-- nfs_init_client() = xerror %d\n", error);
837 return error; 842 return ERR_PTR(error);
838} 843}
839 844
840/* 845/*
@@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,
847 .hostname = data->nfs_server.hostname, 852 .hostname = data->nfs_server.hostname,
848 .addr = (const struct sockaddr *)&data->nfs_server.address, 853 .addr = (const struct sockaddr *)&data->nfs_server.address,
849 .addrlen = data->nfs_server.addrlen, 854 .addrlen = data->nfs_server.addrlen,
850 .rpc_ops = &nfs_v2_clientops, 855 .rpc_ops = NULL,
851 .proto = data->nfs_server.protocol, 856 .proto = data->nfs_server.protocol,
852 .net = data->net, 857 .net = data->net,
853 }; 858 };
@@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,
857 862
858 dprintk("--> nfs_init_server()\n"); 863 dprintk("--> nfs_init_server()\n");
859 864
865 switch (data->version) {
866#ifdef CONFIG_NFS_V2
867 case 2:
868 cl_init.rpc_ops = &nfs_v2_clientops;
869 break;
870#endif
860#ifdef CONFIG_NFS_V3 871#ifdef CONFIG_NFS_V3
861 if (data->version == 3) 872 case 3:
862 cl_init.rpc_ops = &nfs_v3_clientops; 873 cl_init.rpc_ops = &nfs_v3_clientops;
874 break;
863#endif 875#endif
876 default:
877 return -EPROTONOSUPPORT;
878 }
864 879
865 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 880 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
866 data->timeo, data->retrans); 881 data->timeo, data->retrans);
882 if (data->flags & NFS_MOUNT_NORESVPORT)
883 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
867 884
868 /* Allocate or find a client reference we can use */ 885 /* Allocate or find a client reference we can use */
869 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, 886 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
870 data->flags & NFS_MOUNT_NORESVPORT);
871 if (IS_ERR(clp)) { 887 if (IS_ERR(clp)) {
872 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 888 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
873 return PTR_ERR(clp); 889 return PTR_ERR(clp);
@@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,
880 server->options = data->options; 896 server->options = data->options;
881 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| 897 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
882 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| 898 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
883 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; 899 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
884 900
885 if (data->rsize) 901 if (data->rsize)
886 server->rsize = nfs_block_size(data->rsize, NULL); 902 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
1048static void nfs_server_insert_lists(struct nfs_server *server) 1064static void nfs_server_insert_lists(struct nfs_server *server)
1049{ 1065{
1050 struct nfs_client *clp = server->nfs_client; 1066 struct nfs_client *clp = server->nfs_client;
1051 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 1067 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1052 1068
1053 spin_lock(&nn->nfs_client_lock); 1069 spin_lock(&nn->nfs_client_lock);
1054 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1070 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
1065 1081
1066 if (clp == NULL) 1082 if (clp == NULL)
1067 return; 1083 return;
1068 nn = net_generic(clp->net, nfs_net_id); 1084 nn = net_generic(clp->cl_net, nfs_net_id);
1069 spin_lock(&nn->nfs_client_lock); 1085 spin_lock(&nn->nfs_client_lock);
1070 list_del_rcu(&server->client_link); 1086 list_del_rcu(&server->client_link);
1071 if (list_empty(&clp->cl_superblocks)) 1087 if (list_empty(&clp->cl_superblocks))
@@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1333 * so that the client back channel can find the 1349 * so that the client back channel can find the
1334 * nfs_client struct 1350 * nfs_client struct
1335 */ 1351 */
1336 clp->cl_cons_state = NFS_CS_SESSION_INITING; 1352 nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
1337 } 1353 }
1338#endif /* CONFIG_NFS_V4_1 */ 1354#endif /* CONFIG_NFS_V4_1 */
1339 1355
1340 return nfs4_init_callback(clp); 1356 return nfs4_init_callback(clp);
1341} 1357}
1342 1358
1343/* 1359/**
1344 * Initialise an NFS4 client record 1360 * nfs4_init_client - Initialise an NFS4 client record
1361 *
1362 * @clp: nfs_client to initialise
1363 * @timeparms: timeout parameters for underlying RPC transport
1364 * @ip_addr: callback IP address in presentation format
1365 * @authflavor: authentication flavor for underlying RPC transport
1366 *
1367 * Returns pointer to an NFS client, or an ERR_PTR value.
1345 */ 1368 */
1346int nfs4_init_client(struct nfs_client *clp, 1369struct nfs_client *nfs4_init_client(struct nfs_client *clp,
1347 const struct rpc_timeout *timeparms, 1370 const struct rpc_timeout *timeparms,
1348 const char *ip_addr, 1371 const char *ip_addr,
1349 rpc_authflavor_t authflavour, 1372 rpc_authflavor_t authflavour)
1350 int noresvport)
1351{ 1373{
1352 char buf[INET6_ADDRSTRLEN + 1]; 1374 char buf[INET6_ADDRSTRLEN + 1];
1353 int error; 1375 int error;
@@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,
1355 if (clp->cl_cons_state == NFS_CS_READY) { 1377 if (clp->cl_cons_state == NFS_CS_READY) {
1356 /* the client is initialised already */ 1378 /* the client is initialised already */
1357 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); 1379 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
1358 return 0; 1380 return clp;
1359 } 1381 }
1360 1382
1361 /* Check NFS protocol revision and initialize RPC op vector */ 1383 /* Check NFS protocol revision and initialize RPC op vector */
1362 clp->rpc_ops = &nfs_v4_clientops; 1384 clp->rpc_ops = &nfs_v4_clientops;
1363 1385
1364 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1386 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
1365 1, noresvport); 1387 error = nfs_create_rpc_client(clp, timeparms, authflavour);
1366 if (error < 0) 1388 if (error < 0)
1367 goto error; 1389 goto error;
1368 1390
@@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,
1395 1417
1396 if (!nfs4_has_session(clp)) 1418 if (!nfs4_has_session(clp))
1397 nfs_mark_client_ready(clp, NFS_CS_READY); 1419 nfs_mark_client_ready(clp, NFS_CS_READY);
1398 return 0; 1420 return clp;
1399 1421
1400error: 1422error:
1401 nfs_mark_client_ready(clp, error); 1423 nfs_mark_client_ready(clp, error);
1424 nfs_put_client(clp);
1402 dprintk("<-- nfs4_init_client() = xerror %d\n", error); 1425 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
1403 return error; 1426 return ERR_PTR(error);
1404} 1427}
1405 1428
1406/* 1429/*
@@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,
1429 1452
1430 dprintk("--> nfs4_set_client()\n"); 1453 dprintk("--> nfs4_set_client()\n");
1431 1454
1455 if (server->flags & NFS_MOUNT_NORESVPORT)
1456 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
1457
1432 /* Allocate or find a client reference we can use */ 1458 /* Allocate or find a client reference we can use */
1433 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, 1459 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
1434 server->flags & NFS_MOUNT_NORESVPORT);
1435 if (IS_ERR(clp)) { 1460 if (IS_ERR(clp)) {
1436 error = PTR_ERR(clp); 1461 error = PTR_ERR(clp);
1437 goto error; 1462 goto error;
@@ -1465,8 +1490,8 @@ error:
1465 * the MDS. 1490 * the MDS.
1466 */ 1491 */
1467struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 1492struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1468 const struct sockaddr *ds_addr, 1493 const struct sockaddr *ds_addr, int ds_addrlen,
1469 int ds_addrlen, int ds_proto) 1494 int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
1470{ 1495{
1471 struct nfs_client_initdata cl_init = { 1496 struct nfs_client_initdata cl_init = {
1472 .addr = ds_addr, 1497 .addr = ds_addr,
@@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1474 .rpc_ops = &nfs_v4_clientops, 1499 .rpc_ops = &nfs_v4_clientops,
1475 .proto = ds_proto, 1500 .proto = ds_proto,
1476 .minorversion = mds_clp->cl_minorversion, 1501 .minorversion = mds_clp->cl_minorversion,
1477 .net = mds_clp->net, 1502 .net = mds_clp->cl_net,
1478 };
1479 struct rpc_timeout ds_timeout = {
1480 .to_initval = 15 * HZ,
1481 .to_maxval = 15 * HZ,
1482 .to_retries = 1,
1483 .to_exponential = 1,
1484 }; 1503 };
1504 struct rpc_timeout ds_timeout;
1485 struct nfs_client *clp; 1505 struct nfs_client *clp;
1486 1506
1487 /* 1507 /*
@@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1489 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS 1509 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1490 * (section 13.1 RFC 5661). 1510 * (section 13.1 RFC 5661).
1491 */ 1511 */
1512 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
1492 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 1513 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1493 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); 1514 mds_clp->cl_rpcclient->cl_auth->au_flavor);
1494 1515
1495 dprintk("<-- %s %p\n", __func__, clp); 1516 dprintk("<-- %s %p\n", __func__, clp);
1496 return clp; 1517 return clp;
@@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1701 rpc_protocol(parent_server->client), 1722 rpc_protocol(parent_server->client),
1702 parent_server->client->cl_timeout, 1723 parent_server->client->cl_timeout,
1703 parent_client->cl_mvops->minor_version, 1724 parent_client->cl_mvops->minor_version,
1704 parent_client->net); 1725 parent_client->cl_net);
1705 if (error < 0) 1726 if (error < 0)
1706 goto error; 1727 goto error;
1707 1728
@@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)
1805 idr_init(&nn->cb_ident_idr); 1826 idr_init(&nn->cb_ident_idr);
1806#endif 1827#endif
1807 spin_lock_init(&nn->nfs_client_lock); 1828 spin_lock_init(&nn->nfs_client_lock);
1829 nn->boot_time = CURRENT_TIME;
1808} 1830}
1809 1831
1810#ifdef CONFIG_PROC_FS 1832#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 89af1d269274..bd3a9601d32d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -316,6 +316,10 @@ out:
316 * nfs_client_return_marked_delegations - return previously marked delegations 316 * nfs_client_return_marked_delegations - return previously marked delegations
317 * @clp: nfs_client to process 317 * @clp: nfs_client to process
318 * 318 *
319 * Note that this function is designed to be called by the state
320 * manager thread. For this reason, it cannot flush the dirty data,
321 * since that could deadlock in case of a state recovery error.
322 *
319 * Returns zero on success, or a negative errno value. 323 * Returns zero on success, or a negative errno value.
320 */ 324 */
321int nfs_client_return_marked_delegations(struct nfs_client *clp) 325int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
340 server); 344 server);
341 rcu_read_unlock(); 345 rcu_read_unlock();
342 346
343 if (delegation != NULL) { 347 if (delegation != NULL)
344 filemap_flush(inode->i_mapping);
345 err = __nfs_inode_return_delegation(inode, 348 err = __nfs_inode_return_delegation(inode,
346 delegation, 0); 349 delegation, 0);
347 }
348 iput(inode); 350 iput(inode);
349 if (!err) 351 if (!err)
350 goto restart; 352 goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
380 * nfs_inode_return_delegation - synchronously return a delegation 382 * nfs_inode_return_delegation - synchronously return a delegation
381 * @inode: inode to process 383 * @inode: inode to process
382 * 384 *
385 * This routine will always flush any dirty data to disk on the
386 * assumption that if we need to return the delegation, then
387 * we should stop caching.
388 *
383 * Returns zero on success, or a negative errno value. 389 * Returns zero on success, or a negative errno value.
384 */ 390 */
385int nfs_inode_return_delegation(struct inode *inode) 391int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
389 struct nfs_delegation *delegation; 395 struct nfs_delegation *delegation;
390 int err = 0; 396 int err = 0;
391 397
398 nfs_wb_all(inode);
392 if (rcu_access_pointer(nfsi->delegation) != NULL) { 399 if (rcu_access_pointer(nfsi->delegation) != NULL) {
393 delegation = nfs_detach_delegation(nfsi, server); 400 delegation = nfs_detach_delegation(nfsi, server);
394 if (delegation != NULL) { 401 if (delegation != NULL) {
395 nfs_wb_all(inode);
396 err = __nfs_inode_return_delegation(inode, delegation, 1); 402 err = __nfs_inode_return_delegation(inode, delegation, 1);
397 } 403 }
398 } 404 }
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
538 struct nfs_client *clp = server->nfs_client; 544 struct nfs_client *clp = server->nfs_client;
539 struct nfs_delegation *delegation; 545 struct nfs_delegation *delegation;
540 546
547 filemap_flush(inode->i_mapping);
548
541 rcu_read_lock(); 549 rcu_read_lock();
542 delegation = rcu_dereference(NFS_I(inode)->delegation); 550 delegation = rcu_dereference(NFS_I(inode)->delegation);
543 551
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index cd6a7a8dadae..72709c4193fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
66 66
67static inline int nfs_inode_return_delegation(struct inode *inode) 67static inline int nfs_inode_return_delegation(struct inode *inode)
68{ 68{
69 nfs_wb_all(inode);
69 return 0; 70 return 0;
70} 71}
71#endif 72#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eedd24d0ad2e..0989a2099688 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -475,6 +475,29 @@ different:
475} 475}
476 476
477static 477static
478bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
479{
480 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
481 return false;
482 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
483 return true;
484 if (filp->f_pos == 0)
485 return true;
486 return false;
487}
488
489/*
490 * This function is called by the lookup code to request the use of
491 * readdirplus to accelerate any future lookups in the same
492 * directory.
493 */
494static
495void nfs_advise_use_readdirplus(struct inode *dir)
496{
497 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
498}
499
500static
478void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 501void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479{ 502{
480 struct qstr filename = QSTR_INIT(entry->name, entry->len); 503 struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
871 desc->file = filp; 894 desc->file = filp;
872 desc->dir_cookie = &dir_ctx->dir_cookie; 895 desc->dir_cookie = &dir_ctx->dir_cookie;
873 desc->decode = NFS_PROTO(inode)->decode_dirent; 896 desc->decode = NFS_PROTO(inode)->decode_dirent;
874 desc->plus = NFS_USE_READDIRPLUS(inode); 897 desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
875 898
876 nfs_block_sillyrename(dentry); 899 nfs_block_sillyrename(dentry);
877 res = nfs_revalidate_mapping(inode, filp->f_mapping); 900 res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1111 if (!inode) { 1134 if (!inode) {
1112 if (nfs_neg_need_reval(dir, dentry, nd)) 1135 if (nfs_neg_need_reval(dir, dentry, nd))
1113 goto out_bad; 1136 goto out_bad;
1114 goto out_valid; 1137 goto out_valid_noent;
1115 } 1138 }
1116 1139
1117 if (is_bad_inode(inode)) { 1140 if (is_bad_inode(inode)) {
@@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1140 if (fhandle == NULL || fattr == NULL) 1163 if (fhandle == NULL || fattr == NULL)
1141 goto out_error; 1164 goto out_error;
1142 1165
1143 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1166 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1144 if (error) 1167 if (error)
1145 goto out_bad; 1168 goto out_bad;
1146 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1169 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1153out_set_verifier: 1176out_set_verifier:
1154 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1177 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1155 out_valid: 1178 out_valid:
1179 /* Success: notify readdir to use READDIRPLUS */
1180 nfs_advise_use_readdirplus(dir);
1181 out_valid_noent:
1156 dput(parent); 1182 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", 1183 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
1158 __func__, dentry->d_parent->d_name.name, 1184 __func__, dentry->d_parent->d_name.name,
@@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1296 parent = dentry->d_parent; 1322 parent = dentry->d_parent;
1297 /* Protect against concurrent sillydeletes */ 1323 /* Protect against concurrent sillydeletes */
1298 nfs_block_sillyrename(parent); 1324 nfs_block_sillyrename(parent);
1299 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1325 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1300 if (error == -ENOENT) 1326 if (error == -ENOENT)
1301 goto no_entry; 1327 goto no_entry;
1302 if (error < 0) { 1328 if (error < 0) {
@@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1308 if (IS_ERR(res)) 1334 if (IS_ERR(res))
1309 goto out_unblock_sillyrename; 1335 goto out_unblock_sillyrename;
1310 1336
1337 /* Success: notify readdir to use READDIRPLUS */
1338 nfs_advise_use_readdirplus(dir);
1339
1311no_entry: 1340no_entry:
1312 res = d_materialise_unique(dentry, inode); 1341 res = d_materialise_unique(dentry, inode);
1313 if (res != NULL) { 1342 if (res != NULL) {
@@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1643 if (dentry->d_inode) 1672 if (dentry->d_inode)
1644 goto out; 1673 goto out;
1645 if (fhandle->size == 0) { 1674 if (fhandle->size == 0) {
1646 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1675 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1647 if (error) 1676 if (error)
1648 goto out_error; 1677 goto out_error;
1649 } 1678 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f7bdd3..23d170bc44f4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
56 56
57#include "internal.h" 57#include "internal.h"
58#include "iostat.h" 58#include "iostat.h"
59#include "pnfs.h"
59 60
60#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
61 62
@@ -81,16 +82,19 @@ struct nfs_direct_req {
81 struct completion completion; /* wait for i/o completion */ 82 struct completion completion; /* wait for i/o completion */
82 83
83 /* commit state */ 84 /* commit state */
84 struct list_head rewrite_list; /* saved nfs_write_data structs */ 85 struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
85 struct nfs_write_data * commit_data; /* special write_data for commits */ 86 struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
87 struct work_struct work;
86 int flags; 88 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 89#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
88#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 90#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
89 struct nfs_writeverf verf; /* unstable write verifier */ 91 struct nfs_writeverf verf; /* unstable write verifier */
90}; 92};
91 93
94static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
95static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
92static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 96static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
93static const struct rpc_call_ops nfs_write_direct_ops; 97static void nfs_direct_write_schedule_work(struct work_struct *work);
94 98
95static inline void get_dreq(struct nfs_direct_req *dreq) 99static inline void get_dreq(struct nfs_direct_req *dreq)
96{ 100{
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124 return -EINVAL; 128 return -EINVAL;
125} 129}
126 130
127static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
128{
129 unsigned int npages;
130 unsigned int i;
131
132 if (count == 0)
133 return;
134 pages += (pgbase >> PAGE_SHIFT);
135 npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136 for (i = 0; i < npages; i++) {
137 struct page *page = pages[i];
138 if (!PageCompound(page))
139 set_page_dirty(page);
140 }
141}
142
143static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 131static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
144{ 132{
145 unsigned int i; 133 unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
147 page_cache_release(pages[i]); 135 page_cache_release(pages[i]);
148} 136}
149 137
138void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
139 struct nfs_direct_req *dreq)
140{
141 cinfo->lock = &dreq->lock;
142 cinfo->mds = &dreq->mds_cinfo;
143 cinfo->ds = &dreq->ds_cinfo;
144 cinfo->dreq = dreq;
145 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
146}
147
150static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 148static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
151{ 149{
152 struct nfs_direct_req *dreq; 150 struct nfs_direct_req *dreq;
153 151
154 dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); 152 dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
155 if (!dreq) 153 if (!dreq)
156 return NULL; 154 return NULL;
157 155
158 kref_init(&dreq->kref); 156 kref_init(&dreq->kref);
159 kref_get(&dreq->kref); 157 kref_get(&dreq->kref);
160 init_completion(&dreq->completion); 158 init_completion(&dreq->completion);
161 INIT_LIST_HEAD(&dreq->rewrite_list); 159 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
162 dreq->iocb = NULL; 160 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
165 spin_lock_init(&dreq->lock); 161 spin_lock_init(&dreq->lock);
166 atomic_set(&dreq->io_count, 0);
167 dreq->count = 0;
168 dreq->error = 0;
169 dreq->flags = 0;
170 162
171 return dreq; 163 return dreq;
172} 164}
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226 nfs_direct_req_release(dreq); 218 nfs_direct_req_release(dreq);
227} 219}
228 220
229/* 221static void nfs_direct_readpage_release(struct nfs_page *req)
230 * We must hold a reference to all the pages in this direct read request
231 * until the RPCs complete. This could be long *after* we are woken up in
232 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233 */
234static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
235{ 222{
236 struct nfs_read_data *data = calldata; 223 dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
237 224 req->wb_context->dentry->d_inode->i_sb->s_id,
238 nfs_readpage_result(task, data); 225 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
226 req->wb_bytes,
227 (long long)req_offset(req));
228 nfs_release_request(req);
239} 229}
240 230
241static void nfs_direct_read_release(void *calldata) 231static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
242{ 232{
233 unsigned long bytes = 0;
234 struct nfs_direct_req *dreq = hdr->dreq;
243 235
244 struct nfs_read_data *data = calldata; 236 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
245 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 237 goto out_put;
246 int status = data->task.tk_status;
247 238
248 spin_lock(&dreq->lock); 239 spin_lock(&dreq->lock);
249 if (unlikely(status < 0)) { 240 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
250 dreq->error = status; 241 dreq->error = hdr->error;
251 spin_unlock(&dreq->lock); 242 else
252 } else { 243 dreq->count += hdr->good_bytes;
253 dreq->count += data->res.count; 244 spin_unlock(&dreq->lock);
254 spin_unlock(&dreq->lock);
255 nfs_direct_dirty_pages(data->pagevec,
256 data->args.pgbase,
257 data->res.count);
258 }
259 nfs_direct_release_pages(data->pagevec, data->npages);
260 245
246 while (!list_empty(&hdr->pages)) {
247 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
248 struct page *page = req->wb_page;
249
250 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
251 if (bytes > hdr->good_bytes)
252 zero_user(page, 0, PAGE_SIZE);
253 else if (hdr->good_bytes - bytes < PAGE_SIZE)
254 zero_user_segment(page,
255 hdr->good_bytes & ~PAGE_MASK,
256 PAGE_SIZE);
257 }
258 if (!PageCompound(page)) {
259 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
260 if (bytes < hdr->good_bytes)
261 set_page_dirty(page);
262 } else
263 set_page_dirty(page);
264 }
265 bytes += req->wb_bytes;
266 nfs_list_remove_request(req);
267 nfs_direct_readpage_release(req);
268 }
269out_put:
261 if (put_dreq(dreq)) 270 if (put_dreq(dreq))
262 nfs_direct_complete(dreq); 271 nfs_direct_complete(dreq);
263 nfs_readdata_free(data); 272 hdr->release(hdr);
273}
274
275static void nfs_read_sync_pgio_error(struct list_head *head)
276{
277 struct nfs_page *req;
278
279 while (!list_empty(head)) {
280 req = nfs_list_entry(head->next);
281 nfs_list_remove_request(req);
282 nfs_release_request(req);
283 }
264} 284}
265 285
266static const struct rpc_call_ops nfs_read_direct_ops = { 286static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
267 .rpc_call_prepare = nfs_read_prepare, 287{
268 .rpc_call_done = nfs_direct_read_result, 288 get_dreq(hdr->dreq);
269 .rpc_release = nfs_direct_read_release, 289}
290
291static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
292 .error_cleanup = nfs_read_sync_pgio_error,
293 .init_hdr = nfs_direct_pgio_init,
294 .completion = nfs_direct_read_completion,
270}; 295};
271 296
272/* 297/*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
276 * handled automatically by nfs_direct_read_result(). Otherwise, if 301 * handled automatically by nfs_direct_read_result(). Otherwise, if
277 * no requests have been sent, just return an error. 302 * no requests have been sent, just return an error.
278 */ 303 */
279static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, 304static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
280 const struct iovec *iov, 305 const struct iovec *iov,
281 loff_t pos) 306 loff_t pos)
282{ 307{
308 struct nfs_direct_req *dreq = desc->pg_dreq;
283 struct nfs_open_context *ctx = dreq->ctx; 309 struct nfs_open_context *ctx = dreq->ctx;
284 struct inode *inode = ctx->dentry->d_inode; 310 struct inode *inode = ctx->dentry->d_inode;
285 unsigned long user_addr = (unsigned long)iov->iov_base; 311 unsigned long user_addr = (unsigned long)iov->iov_base;
286 size_t count = iov->iov_len; 312 size_t count = iov->iov_len;
287 size_t rsize = NFS_SERVER(inode)->rsize; 313 size_t rsize = NFS_SERVER(inode)->rsize;
288 struct rpc_task *task;
289 struct rpc_message msg = {
290 .rpc_cred = ctx->cred,
291 };
292 struct rpc_task_setup task_setup_data = {
293 .rpc_client = NFS_CLIENT(inode),
294 .rpc_message = &msg,
295 .callback_ops = &nfs_read_direct_ops,
296 .workqueue = nfsiod_workqueue,
297 .flags = RPC_TASK_ASYNC,
298 };
299 unsigned int pgbase; 314 unsigned int pgbase;
300 int result; 315 int result;
301 ssize_t started = 0; 316 ssize_t started = 0;
317 struct page **pagevec = NULL;
318 unsigned int npages;
302 319
303 do { 320 do {
304 struct nfs_read_data *data;
305 size_t bytes; 321 size_t bytes;
322 int i;
306 323
307 pgbase = user_addr & ~PAGE_MASK; 324 pgbase = user_addr & ~PAGE_MASK;
308 bytes = min(rsize,count); 325 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
309 326
310 result = -ENOMEM; 327 result = -ENOMEM;
311 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); 328 npages = nfs_page_array_len(pgbase, bytes);
312 if (unlikely(!data)) 329 if (!pagevec)
330 pagevec = kmalloc(npages * sizeof(struct page *),
331 GFP_KERNEL);
332 if (!pagevec)
313 break; 333 break;
314
315 down_read(&current->mm->mmap_sem); 334 down_read(&current->mm->mmap_sem);
316 result = get_user_pages(current, current->mm, user_addr, 335 result = get_user_pages(current, current->mm, user_addr,
317 data->npages, 1, 0, data->pagevec, NULL); 336 npages, 1, 0, pagevec, NULL);
318 up_read(&current->mm->mmap_sem); 337 up_read(&current->mm->mmap_sem);
319 if (result < 0) { 338 if (result < 0)
320 nfs_readdata_free(data);
321 break; 339 break;
322 } 340 if ((unsigned)result < npages) {
323 if ((unsigned)result < data->npages) {
324 bytes = result * PAGE_SIZE; 341 bytes = result * PAGE_SIZE;
325 if (bytes <= pgbase) { 342 if (bytes <= pgbase) {
326 nfs_direct_release_pages(data->pagevec, result); 343 nfs_direct_release_pages(pagevec, result);
327 nfs_readdata_free(data);
328 break; 344 break;
329 } 345 }
330 bytes -= pgbase; 346 bytes -= pgbase;
331 data->npages = result; 347 npages = result;
332 } 348 }
333 349
334 get_dreq(dreq); 350 for (i = 0; i < npages; i++) {
335 351 struct nfs_page *req;
336 data->req = (struct nfs_page *) dreq; 352 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
337 data->inode = inode; 353 /* XXX do we need to do the eof zeroing found in async_filler? */
338 data->cred = msg.rpc_cred; 354 req = nfs_create_request(dreq->ctx, dreq->inode,
339 data->args.fh = NFS_FH(inode); 355 pagevec[i],
340 data->args.context = ctx; 356 pgbase, req_len);
341 data->args.lock_context = dreq->l_ctx; 357 if (IS_ERR(req)) {
342 data->args.offset = pos; 358 result = PTR_ERR(req);
343 data->args.pgbase = pgbase; 359 break;
344 data->args.pages = data->pagevec; 360 }
345 data->args.count = bytes; 361 req->wb_index = pos >> PAGE_SHIFT;
346 data->res.fattr = &data->fattr; 362 req->wb_offset = pos & ~PAGE_MASK;
347 data->res.eof = 0; 363 if (!nfs_pageio_add_request(desc, req)) {
348 data->res.count = bytes; 364 result = desc->pg_error;
349 nfs_fattr_init(&data->fattr); 365 nfs_release_request(req);
350 msg.rpc_argp = &data->args; 366 break;
351 msg.rpc_resp = &data->res; 367 }
352 368 pgbase = 0;
353 task_setup_data.task = &data->task; 369 bytes -= req_len;
354 task_setup_data.callback_data = data; 370 started += req_len;
355 NFS_PROTO(inode)->read_setup(data, &msg); 371 user_addr += req_len;
356 372 pos += req_len;
357 task = rpc_run_task(&task_setup_data); 373 count -= req_len;
358 if (IS_ERR(task)) 374 }
359 break; 375 /* The nfs_page now hold references to these pages */
360 rpc_put_task(task); 376 nfs_direct_release_pages(pagevec, npages);
361 377 } while (count != 0 && result >= 0);
362 dprintk("NFS: %5u initiated direct read call " 378
363 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 379 kfree(pagevec);
364 data->task.tk_pid,
365 inode->i_sb->s_id,
366 (long long)NFS_FILEID(inode),
367 bytes,
368 (unsigned long long)data->args.offset);
369
370 started += bytes;
371 user_addr += bytes;
372 pos += bytes;
373 /* FIXME: Remove this unnecessary math from final patch */
374 pgbase += bytes;
375 pgbase &= ~PAGE_MASK;
376 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
377
378 count -= bytes;
379 } while (count != 0);
380 380
381 if (started) 381 if (started)
382 return started; 382 return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
388 unsigned long nr_segs, 388 unsigned long nr_segs,
389 loff_t pos) 389 loff_t pos)
390{ 390{
391 struct nfs_pageio_descriptor desc;
391 ssize_t result = -EINVAL; 392 ssize_t result = -EINVAL;
392 size_t requested_bytes = 0; 393 size_t requested_bytes = 0;
393 unsigned long seg; 394 unsigned long seg;
394 395
396 nfs_pageio_init_read(&desc, dreq->inode,
397 &nfs_direct_read_completion_ops);
395 get_dreq(dreq); 398 get_dreq(dreq);
399 desc.pg_dreq = dreq;
396 400
397 for (seg = 0; seg < nr_segs; seg++) { 401 for (seg = 0; seg < nr_segs; seg++) {
398 const struct iovec *vec = &iov[seg]; 402 const struct iovec *vec = &iov[seg];
399 result = nfs_direct_read_schedule_segment(dreq, vec, pos); 403 result = nfs_direct_read_schedule_segment(&desc, vec, pos);
400 if (result < 0) 404 if (result < 0)
401 break; 405 break;
402 requested_bytes += result; 406 requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
405 pos += vec->iov_len; 409 pos += vec->iov_len;
406 } 410 }
407 411
412 nfs_pageio_complete(&desc);
413
408 /* 414 /*
409 * If no bytes were started, return the error, and let the 415 * If no bytes were started, return the error, and let the
410 * generic layer handle the completion. 416 * generic layer handle the completion.
@@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
441 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 447 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
442 if (!result) 448 if (!result)
443 result = nfs_direct_wait(dreq); 449 result = nfs_direct_wait(dreq);
450 NFS_I(inode)->read_io += result;
444out_release: 451out_release:
445 nfs_direct_req_release(dreq); 452 nfs_direct_req_release(dreq);
446out: 453out:
447 return result; 454 return result;
448} 455}
449 456
450static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
451{
452 while (!list_empty(&dreq->rewrite_list)) {
453 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
454 list_del(&data->pages);
455 nfs_direct_release_pages(data->pagevec, data->npages);
456 nfs_writedata_free(data);
457 }
458}
459
460#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 457#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
461static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 458static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
462{ 459{
463 struct inode *inode = dreq->inode; 460 struct nfs_pageio_descriptor desc;
464 struct list_head *p; 461 struct nfs_page *req, *tmp;
465 struct nfs_write_data *data; 462 LIST_HEAD(reqs);
466 struct rpc_task *task; 463 struct nfs_commit_info cinfo;
467 struct rpc_message msg = { 464 LIST_HEAD(failed);
468 .rpc_cred = dreq->ctx->cred, 465
469 }; 466 nfs_init_cinfo_from_dreq(&cinfo, dreq);
470 struct rpc_task_setup task_setup_data = { 467 pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
471 .rpc_client = NFS_CLIENT(inode), 468 spin_lock(cinfo.lock);
472 .rpc_message = &msg, 469 nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
473 .callback_ops = &nfs_write_direct_ops, 470 spin_unlock(cinfo.lock);
474 .workqueue = nfsiod_workqueue,
475 .flags = RPC_TASK_ASYNC,
476 };
477 471
478 dreq->count = 0; 472 dreq->count = 0;
479 get_dreq(dreq); 473 get_dreq(dreq);
480 474
481 list_for_each(p, &dreq->rewrite_list) { 475 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
482 data = list_entry(p, struct nfs_write_data, pages); 476 &nfs_direct_write_completion_ops);
483 477 desc.pg_dreq = dreq;
484 get_dreq(dreq); 478
485 479 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 /* Use stable writes */ 480 if (!nfs_pageio_add_request(&desc, req)) {
487 data->args.stable = NFS_FILE_SYNC; 481 nfs_list_add_request(req, &failed);
488 482 spin_lock(cinfo.lock);
489 /* 483 dreq->flags = 0;
490 * Reset data->res. 484 dreq->error = -EIO;
491 */ 485 spin_unlock(cinfo.lock);
492 nfs_fattr_init(&data->fattr); 486 }
493 data->res.count = data->args.count;
494 memset(&data->verf, 0, sizeof(data->verf));
495
496 /*
497 * Reuse data->task; data->args should not have changed
498 * since the original request was sent.
499 */
500 task_setup_data.task = &data->task;
501 task_setup_data.callback_data = data;
502 msg.rpc_argp = &data->args;
503 msg.rpc_resp = &data->res;
504 NFS_PROTO(inode)->write_setup(data, &msg);
505
506 /*
507 * We're called via an RPC callback, so BKL is already held.
508 */
509 task = rpc_run_task(&task_setup_data);
510 if (!IS_ERR(task))
511 rpc_put_task(task);
512
513 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
514 data->task.tk_pid,
515 inode->i_sb->s_id,
516 (long long)NFS_FILEID(inode),
517 data->args.count,
518 (unsigned long long)data->args.offset);
519 } 487 }
488 nfs_pageio_complete(&desc);
520 489
521 if (put_dreq(dreq)) 490 while (!list_empty(&failed))
522 nfs_direct_write_complete(dreq, inode); 491 nfs_unlock_and_release_request(req);
523}
524
525static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
526{
527 struct nfs_write_data *data = calldata;
528 492
529 /* Call the NFS version-specific code */ 493 if (put_dreq(dreq))
530 NFS_PROTO(data->inode)->commit_done(task, data); 494 nfs_direct_write_complete(dreq, dreq->inode);
531} 495}
532 496
533static void nfs_direct_commit_release(void *calldata) 497static void nfs_direct_commit_complete(struct nfs_commit_data *data)
534{ 498{
535 struct nfs_write_data *data = calldata; 499 struct nfs_direct_req *dreq = data->dreq;
536 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 500 struct nfs_commit_info cinfo;
501 struct nfs_page *req;
537 int status = data->task.tk_status; 502 int status = data->task.tk_status;
538 503
504 nfs_init_cinfo_from_dreq(&cinfo, dreq);
539 if (status < 0) { 505 if (status < 0) {
540 dprintk("NFS: %5u commit failed with error %d.\n", 506 dprintk("NFS: %5u commit failed with error %d.\n",
541 data->task.tk_pid, status); 507 data->task.tk_pid, status);
542 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 508 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
543 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 509 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
544 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 510 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)
546 } 512 }
547 513
548 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 514 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
549 nfs_direct_write_complete(dreq, data->inode); 515 while (!list_empty(&data->pages)) {
550 nfs_commit_free(data); 516 req = nfs_list_entry(data->pages.next);
517 nfs_list_remove_request(req);
518 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
519 /* Note the rewrite will go through mds */
520 kref_get(&req->wb_kref);
521 nfs_mark_request_commit(req, NULL, &cinfo);
522 }
523 nfs_unlock_and_release_request(req);
524 }
525
526 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
527 nfs_direct_write_complete(dreq, data->inode);
528}
529
530static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
531{
532 /* There is no lock to clear */
551} 533}
552 534
553static const struct rpc_call_ops nfs_commit_direct_ops = { 535static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
554 .rpc_call_prepare = nfs_write_prepare, 536 .completion = nfs_direct_commit_complete,
555 .rpc_call_done = nfs_direct_commit_result, 537 .error_cleanup = nfs_direct_error_cleanup,
556 .rpc_release = nfs_direct_commit_release,
557}; 538};
558 539
559static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 540static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
560{ 541{
561 struct nfs_write_data *data = dreq->commit_data; 542 int res;
562 struct rpc_task *task; 543 struct nfs_commit_info cinfo;
563 struct rpc_message msg = { 544 LIST_HEAD(mds_list);
564 .rpc_argp = &data->args, 545
565 .rpc_resp = &data->res, 546 nfs_init_cinfo_from_dreq(&cinfo, dreq);
566 .rpc_cred = dreq->ctx->cred, 547 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
567 }; 548 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
568 struct rpc_task_setup task_setup_data = { 549 if (res < 0) /* res == -ENOMEM */
569 .task = &data->task, 550 nfs_direct_write_reschedule(dreq);
570 .rpc_client = NFS_CLIENT(dreq->inode),
571 .rpc_message = &msg,
572 .callback_ops = &nfs_commit_direct_ops,
573 .callback_data = data,
574 .workqueue = nfsiod_workqueue,
575 .flags = RPC_TASK_ASYNC,
576 };
577
578 data->inode = dreq->inode;
579 data->cred = msg.rpc_cred;
580
581 data->args.fh = NFS_FH(data->inode);
582 data->args.offset = 0;
583 data->args.count = 0;
584 data->args.context = dreq->ctx;
585 data->args.lock_context = dreq->l_ctx;
586 data->res.count = 0;
587 data->res.fattr = &data->fattr;
588 data->res.verf = &data->verf;
589 nfs_fattr_init(&data->fattr);
590
591 NFS_PROTO(data->inode)->commit_setup(data, &msg);
592
593 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
594 dreq->commit_data = NULL;
595
596 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
597
598 task = rpc_run_task(&task_setup_data);
599 if (!IS_ERR(task))
600 rpc_put_task(task);
601} 551}
602 552
603static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 553static void nfs_direct_write_schedule_work(struct work_struct *work)
604{ 554{
555 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
605 int flags = dreq->flags; 556 int flags = dreq->flags;
606 557
607 dreq->flags = 0; 558 dreq->flags = 0;
@@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
613 nfs_direct_write_reschedule(dreq); 564 nfs_direct_write_reschedule(dreq);
614 break; 565 break;
615 default: 566 default:
616 if (dreq->commit_data != NULL) 567 nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
617 nfs_commit_free(dreq->commit_data);
618 nfs_direct_free_writedata(dreq);
619 nfs_zap_mapping(inode, inode->i_mapping);
620 nfs_direct_complete(dreq); 568 nfs_direct_complete(dreq);
621 } 569 }
622} 570}
623 571
624static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 572static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
625{ 573{
626 dreq->commit_data = nfs_commitdata_alloc(); 574 schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
627 if (dreq->commit_data != NULL)
628 dreq->commit_data->req = (struct nfs_page *) dreq;
629} 575}
576
630#else 577#else
631static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 578static void nfs_direct_write_schedule_work(struct work_struct *work)
632{ 579{
633 dreq->commit_data = NULL;
634} 580}
635 581
636static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 582static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
637{ 583{
638 nfs_direct_free_writedata(dreq);
639 nfs_zap_mapping(inode, inode->i_mapping); 584 nfs_zap_mapping(inode, inode->i_mapping);
640 nfs_direct_complete(dreq); 585 nfs_direct_complete(dreq);
641} 586}
642#endif 587#endif
643 588
644static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
645{
646 struct nfs_write_data *data = calldata;
647
648 nfs_writeback_done(task, data);
649}
650
651/* 589/*
652 * NB: Return the value of the first error return code. Subsequent 590 * NB: Return the value of the first error return code. Subsequent
653 * errors after the first one are ignored. 591 * errors after the first one are ignored.
654 */ 592 */
655static void nfs_direct_write_release(void *calldata)
656{
657 struct nfs_write_data *data = calldata;
658 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
659 int status = data->task.tk_status;
660
661 spin_lock(&dreq->lock);
662
663 if (unlikely(status < 0)) {
664 /* An error has occurred, so we should not commit */
665 dreq->flags = 0;
666 dreq->error = status;
667 }
668 if (unlikely(dreq->error != 0))
669 goto out_unlock;
670
671 dreq->count += data->res.count;
672
673 if (data->res.verf->committed != NFS_FILE_SYNC) {
674 switch (dreq->flags) {
675 case 0:
676 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
677 dreq->flags = NFS_ODIRECT_DO_COMMIT;
678 break;
679 case NFS_ODIRECT_DO_COMMIT:
680 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
681 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
682 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
683 }
684 }
685 }
686out_unlock:
687 spin_unlock(&dreq->lock);
688
689 if (put_dreq(dreq))
690 nfs_direct_write_complete(dreq, data->inode);
691}
692
693static const struct rpc_call_ops nfs_write_direct_ops = {
694 .rpc_call_prepare = nfs_write_prepare,
695 .rpc_call_done = nfs_direct_write_result,
696 .rpc_release = nfs_direct_write_release,
697};
698
699/* 593/*
700 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 594 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
701 * operation. If nfs_writedata_alloc() or get_user_pages() fails, 595 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
703 * handled automatically by nfs_direct_write_result(). Otherwise, if 597 * handled automatically by nfs_direct_write_result(). Otherwise, if
704 * no requests have been sent, just return an error. 598 * no requests have been sent, just return an error.
705 */ 599 */
706static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, 600static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
707 const struct iovec *iov, 601 const struct iovec *iov,
708 loff_t pos, int sync) 602 loff_t pos)
709{ 603{
604 struct nfs_direct_req *dreq = desc->pg_dreq;
710 struct nfs_open_context *ctx = dreq->ctx; 605 struct nfs_open_context *ctx = dreq->ctx;
711 struct inode *inode = ctx->dentry->d_inode; 606 struct inode *inode = ctx->dentry->d_inode;
712 unsigned long user_addr = (unsigned long)iov->iov_base; 607 unsigned long user_addr = (unsigned long)iov->iov_base;
713 size_t count = iov->iov_len; 608 size_t count = iov->iov_len;
714 struct rpc_task *task;
715 struct rpc_message msg = {
716 .rpc_cred = ctx->cred,
717 };
718 struct rpc_task_setup task_setup_data = {
719 .rpc_client = NFS_CLIENT(inode),
720 .rpc_message = &msg,
721 .callback_ops = &nfs_write_direct_ops,
722 .workqueue = nfsiod_workqueue,
723 .flags = RPC_TASK_ASYNC,
724 };
725 size_t wsize = NFS_SERVER(inode)->wsize; 609 size_t wsize = NFS_SERVER(inode)->wsize;
726 unsigned int pgbase; 610 unsigned int pgbase;
727 int result; 611 int result;
728 ssize_t started = 0; 612 ssize_t started = 0;
613 struct page **pagevec = NULL;
614 unsigned int npages;
729 615
730 do { 616 do {
731 struct nfs_write_data *data;
732 size_t bytes; 617 size_t bytes;
618 int i;
733 619
734 pgbase = user_addr & ~PAGE_MASK; 620 pgbase = user_addr & ~PAGE_MASK;
735 bytes = min(wsize,count); 621 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
736 622
737 result = -ENOMEM; 623 result = -ENOMEM;
738 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); 624 npages = nfs_page_array_len(pgbase, bytes);
739 if (unlikely(!data)) 625 if (!pagevec)
626 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
627 if (!pagevec)
740 break; 628 break;
741 629
742 down_read(&current->mm->mmap_sem); 630 down_read(&current->mm->mmap_sem);
743 result = get_user_pages(current, current->mm, user_addr, 631 result = get_user_pages(current, current->mm, user_addr,
744 data->npages, 0, 0, data->pagevec, NULL); 632 npages, 0, 0, pagevec, NULL);
745 up_read(&current->mm->mmap_sem); 633 up_read(&current->mm->mmap_sem);
746 if (result < 0) { 634 if (result < 0)
747 nfs_writedata_free(data);
748 break; 635 break;
749 } 636
750 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < npages) {
751 bytes = result * PAGE_SIZE; 638 bytes = result * PAGE_SIZE;
752 if (bytes <= pgbase) { 639 if (bytes <= pgbase) {
753 nfs_direct_release_pages(data->pagevec, result); 640 nfs_direct_release_pages(pagevec, result);
754 nfs_writedata_free(data);
755 break; 641 break;
756 } 642 }
757 bytes -= pgbase; 643 bytes -= pgbase;
758 data->npages = result; 644 npages = result;
759 } 645 }
760 646
761 get_dreq(dreq); 647 for (i = 0; i < npages; i++) {
762 648 struct nfs_page *req;
763 list_move_tail(&data->pages, &dreq->rewrite_list); 649 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
764
765 data->req = (struct nfs_page *) dreq;
766 data->inode = inode;
767 data->cred = msg.rpc_cred;
768 data->args.fh = NFS_FH(inode);
769 data->args.context = ctx;
770 data->args.lock_context = dreq->l_ctx;
771 data->args.offset = pos;
772 data->args.pgbase = pgbase;
773 data->args.pages = data->pagevec;
774 data->args.count = bytes;
775 data->args.stable = sync;
776 data->res.fattr = &data->fattr;
777 data->res.count = bytes;
778 data->res.verf = &data->verf;
779 nfs_fattr_init(&data->fattr);
780
781 task_setup_data.task = &data->task;
782 task_setup_data.callback_data = data;
783 msg.rpc_argp = &data->args;
784 msg.rpc_resp = &data->res;
785 NFS_PROTO(inode)->write_setup(data, &msg);
786
787 task = rpc_run_task(&task_setup_data);
788 if (IS_ERR(task))
789 break;
790 rpc_put_task(task);
791
792 dprintk("NFS: %5u initiated direct write call "
793 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
794 data->task.tk_pid,
795 inode->i_sb->s_id,
796 (long long)NFS_FILEID(inode),
797 bytes,
798 (unsigned long long)data->args.offset);
799 650
800 started += bytes; 651 req = nfs_create_request(dreq->ctx, dreq->inode,
801 user_addr += bytes; 652 pagevec[i],
802 pos += bytes; 653 pgbase, req_len);
803 654 if (IS_ERR(req)) {
804 /* FIXME: Remove this useless math from the final patch */ 655 result = PTR_ERR(req);
805 pgbase += bytes; 656 break;
806 pgbase &= ~PAGE_MASK; 657 }
807 BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 658 nfs_lock_request(req);
659 req->wb_index = pos >> PAGE_SHIFT;
660 req->wb_offset = pos & ~PAGE_MASK;
661 if (!nfs_pageio_add_request(desc, req)) {
662 result = desc->pg_error;
663 nfs_unlock_and_release_request(req);
664 break;
665 }
666 pgbase = 0;
667 bytes -= req_len;
668 started += req_len;
669 user_addr += req_len;
670 pos += req_len;
671 count -= req_len;
672 }
673 /* The nfs_page now hold references to these pages */
674 nfs_direct_release_pages(pagevec, npages);
675 } while (count != 0 && result >= 0);
808 676
809 count -= bytes; 677 kfree(pagevec);
810 } while (count != 0);
811 678
812 if (started) 679 if (started)
813 return started; 680 return started;
814 return result < 0 ? (ssize_t) result : -EFAULT; 681 return result < 0 ? (ssize_t) result : -EFAULT;
815} 682}
816 683
684static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
685{
686 struct nfs_direct_req *dreq = hdr->dreq;
687 struct nfs_commit_info cinfo;
688 int bit = -1;
689 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
690
691 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
692 goto out_put;
693
694 nfs_init_cinfo_from_dreq(&cinfo, dreq);
695
696 spin_lock(&dreq->lock);
697
698 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
699 dreq->flags = 0;
700 dreq->error = hdr->error;
701 }
702 if (dreq->error != 0)
703 bit = NFS_IOHDR_ERROR;
704 else {
705 dreq->count += hdr->good_bytes;
706 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
707 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
708 bit = NFS_IOHDR_NEED_RESCHED;
709 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
710 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
711 bit = NFS_IOHDR_NEED_RESCHED;
712 else if (dreq->flags == 0) {
713 memcpy(&dreq->verf, &req->wb_verf,
714 sizeof(dreq->verf));
715 bit = NFS_IOHDR_NEED_COMMIT;
716 dreq->flags = NFS_ODIRECT_DO_COMMIT;
717 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
718 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
719 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
720 bit = NFS_IOHDR_NEED_RESCHED;
721 } else
722 bit = NFS_IOHDR_NEED_COMMIT;
723 }
724 }
725 }
726 spin_unlock(&dreq->lock);
727
728 while (!list_empty(&hdr->pages)) {
729 req = nfs_list_entry(hdr->pages.next);
730 nfs_list_remove_request(req);
731 switch (bit) {
732 case NFS_IOHDR_NEED_RESCHED:
733 case NFS_IOHDR_NEED_COMMIT:
734 kref_get(&req->wb_kref);
735 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
736 }
737 nfs_unlock_and_release_request(req);
738 }
739
740out_put:
741 if (put_dreq(dreq))
742 nfs_direct_write_complete(dreq, hdr->inode);
743 hdr->release(hdr);
744}
745
746static void nfs_write_sync_pgio_error(struct list_head *head)
747{
748 struct nfs_page *req;
749
750 while (!list_empty(head)) {
751 req = nfs_list_entry(head->next);
752 nfs_list_remove_request(req);
753 nfs_unlock_and_release_request(req);
754 }
755}
756
757static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
758 .error_cleanup = nfs_write_sync_pgio_error,
759 .init_hdr = nfs_direct_pgio_init,
760 .completion = nfs_direct_write_completion,
761};
762
817static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 763static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
818 const struct iovec *iov, 764 const struct iovec *iov,
819 unsigned long nr_segs, 765 unsigned long nr_segs,
820 loff_t pos, int sync) 766 loff_t pos)
821{ 767{
768 struct nfs_pageio_descriptor desc;
822 ssize_t result = 0; 769 ssize_t result = 0;
823 size_t requested_bytes = 0; 770 size_t requested_bytes = 0;
824 unsigned long seg; 771 unsigned long seg;
825 772
773 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
774 &nfs_direct_write_completion_ops);
775 desc.pg_dreq = dreq;
826 get_dreq(dreq); 776 get_dreq(dreq);
827 777
828 for (seg = 0; seg < nr_segs; seg++) { 778 for (seg = 0; seg < nr_segs; seg++) {
829 const struct iovec *vec = &iov[seg]; 779 const struct iovec *vec = &iov[seg];
830 result = nfs_direct_write_schedule_segment(dreq, vec, 780 result = nfs_direct_write_schedule_segment(&desc, vec, pos);
831 pos, sync);
832 if (result < 0) 781 if (result < 0)
833 break; 782 break;
834 requested_bytes += result; 783 requested_bytes += result;
@@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
836 break; 785 break;
837 pos += vec->iov_len; 786 pos += vec->iov_len;
838 } 787 }
788 nfs_pageio_complete(&desc);
789 NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
839 790
840 /* 791 /*
841 * If no bytes were started, return the error, and let the 792 * If no bytes were started, return the error, and let the
@@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
858 ssize_t result = -ENOMEM; 809 ssize_t result = -ENOMEM;
859 struct inode *inode = iocb->ki_filp->f_mapping->host; 810 struct inode *inode = iocb->ki_filp->f_mapping->host;
860 struct nfs_direct_req *dreq; 811 struct nfs_direct_req *dreq;
861 size_t wsize = NFS_SERVER(inode)->wsize;
862 int sync = NFS_UNSTABLE;
863 812
864 dreq = nfs_direct_req_alloc(); 813 dreq = nfs_direct_req_alloc();
865 if (!dreq) 814 if (!dreq)
866 goto out; 815 goto out;
867 nfs_alloc_commit_data(dreq);
868
869 if (dreq->commit_data == NULL || count <= wsize)
870 sync = NFS_FILE_SYNC;
871 816
872 dreq->inode = inode; 817 dreq->inode = inode;
873 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 818 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
877 if (!is_sync_kiocb(iocb)) 822 if (!is_sync_kiocb(iocb))
878 dreq->iocb = iocb; 823 dreq->iocb = iocb;
879 824
880 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 825 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
881 if (!result) 826 if (!result)
882 result = nfs_direct_wait(dreq); 827 result = nfs_direct_wait(dreq);
883out_release: 828out_release:
@@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
997 task_io_account_write(count); 942 task_io_account_write(count);
998 943
999 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 944 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
945 if (retval > 0) {
946 struct inode *inode = mapping->host;
1000 947
1001 if (retval > 0)
1002 iocb->ki_pos = pos + retval; 948 iocb->ki_pos = pos + retval;
1003 949 spin_lock(&inode->i_lock);
950 if (i_size_read(inode) < iocb->ki_pos)
951 i_size_write(inode, iocb->ki_pos);
952 spin_unlock(&inode->i_lock);
953 }
1004out: 954out:
1005 return retval; 955 return retval;
1006} 956}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709fd328..56311ca5f9f8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
174 if ((file->f_mode & FMODE_WRITE) == 0) 174 if ((file->f_mode & FMODE_WRITE) == 0)
175 return 0; 175 return 0;
176 176
177 /*
178 * If we're holding a write delegation, then just start the i/o
179 * but don't wait for completion (or send a commit).
180 */
181 if (nfs_have_delegation(inode, FMODE_WRITE))
182 return filemap_fdatawrite(file->f_mapping);
183
177 /* Flush writes to the server and return any errors */ 184 /* Flush writes to the server and return any errors */
178 return vfs_fsync(file, 0); 185 return vfs_fsync(file, 0);
179} 186}
@@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
417 424
418 if (status < 0) 425 if (status < 0)
419 return status; 426 return status;
427 NFS_I(mapping->host)->write_io += copied;
420 return copied; 428 return copied;
421} 429}
422 430
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ae65c16b3670..c817787fbdb4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent 64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
65 * superblock across an automount point of some nature. 65 * superblock across an automount point of some nature.
66 */ 66 */
67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, 67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
68 struct nfs_clone_mount *mntdata)
69{ 68{
70 struct nfs_fscache_key *key, *xkey; 69 struct nfs_fscache_key *key, *xkey;
71 struct nfs_server *nfss = NFS_SB(sb); 70 struct nfs_server *nfss = NFS_SB(sb);
72 struct rb_node **p, *parent; 71 struct rb_node **p, *parent;
73 int diff, ulen; 72 int diff;
74
75 if (uniq) {
76 ulen = strlen(uniq);
77 } else if (mntdata) {
78 struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
79 if (mnt_s->fscache_key) {
80 uniq = mnt_s->fscache_key->key.uniquifier;
81 ulen = mnt_s->fscache_key->key.uniq_len;
82 }
83 }
84 73
85 if (!uniq) { 74 if (!uniq) {
86 uniq = ""; 75 uniq = "";
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d0679f..c5b11b53ff33 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
73extern void nfs_fscache_get_client_cookie(struct nfs_client *); 73extern void nfs_fscache_get_client_cookie(struct nfs_client *);
74extern void nfs_fscache_release_client_cookie(struct nfs_client *); 74extern void nfs_fscache_release_client_cookie(struct nfs_client *);
75 75
76extern void nfs_fscache_get_super_cookie(struct super_block *, 76extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
77 const char *,
78 struct nfs_clone_mount *);
79extern void nfs_fscache_release_super_cookie(struct super_block *); 77extern void nfs_fscache_release_super_cookie(struct super_block *);
80 78
81extern void nfs_fscache_init_inode_cookie(struct inode *); 79extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
172static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} 170static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
173static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} 171static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
174 172
175static inline void nfs_fscache_get_super_cookie(
176 struct super_block *sb,
177 const char *uniq,
178 struct nfs_clone_mount *mntdata)
179{
180}
181static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} 173static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
182 174
183static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} 175static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4ca6f5c8038e..8abfb19bd3aa 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
150 goto out; 150 goto out;
151 151
152 /* Start by getting the root filehandle from the server */ 152 /* Start by getting the root filehandle from the server */
153 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 153 ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
154 if (ret < 0) { 154 if (ret < 0) {
155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); 155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
156 goto out; 156 goto out;
@@ -178,87 +178,4 @@ out:
178 return ret; 178 return ret;
179} 179}
180 180
181/*
182 * get an NFS4 root dentry from the root filehandle
183 */
184struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
185 const char *devname)
186{
187 struct nfs_server *server = NFS_SB(sb);
188 struct nfs_fattr *fattr = NULL;
189 struct dentry *ret;
190 struct inode *inode;
191 void *name = kstrdup(devname, GFP_KERNEL);
192 int error;
193
194 dprintk("--> nfs4_get_root()\n");
195
196 if (!name)
197 return ERR_PTR(-ENOMEM);
198
199 /* get the info about the server and filesystem */
200 error = nfs4_server_capabilities(server, mntfh);
201 if (error < 0) {
202 dprintk("nfs_get_root: getcaps error = %d\n",
203 -error);
204 kfree(name);
205 return ERR_PTR(error);
206 }
207
208 fattr = nfs_alloc_fattr();
209 if (fattr == NULL) {
210 kfree(name);
211 return ERR_PTR(-ENOMEM);
212 }
213
214 /* get the actual root for this mount */
215 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
216 if (error < 0) {
217 dprintk("nfs_get_root: getattr error = %d\n", -error);
218 ret = ERR_PTR(error);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
223 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
224 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
225
226 inode = nfs_fhget(sb, mntfh, fattr);
227 if (IS_ERR(inode)) {
228 dprintk("nfs_get_root: get root inode failed\n");
229 ret = ERR_CAST(inode);
230 goto out;
231 }
232
233 error = nfs_superblock_set_dummy_root(sb, inode);
234 if (error != 0) {
235 ret = ERR_PTR(error);
236 goto out;
237 }
238
239 /* root dentries normally start off anonymous and get spliced in later
240 * if the dentry tree reaches them; however if the dentry already
241 * exists, we'll pick it up at this point and use it as the root
242 */
243 ret = d_obtain_alias(inode);
244 if (IS_ERR(ret)) {
245 dprintk("nfs_get_root: get root dentry failed\n");
246 goto out;
247 }
248
249 security_d_instantiate(ret, inode);
250 spin_lock(&ret->d_lock);
251 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
252 ret->d_fsdata = name;
253 name = NULL;
254 }
255 spin_unlock(&ret->d_lock);
256out:
257 if (name)
258 kfree(name);
259 nfs_free_fattr(fattr);
260 dprintk("<-- nfs4_get_root()\n");
261 return ret;
262}
263
264#endif /* CONFIG_NFS_V4 */ 181#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ba3019f5934c..b5b86a05059c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
415static void nfs_idmap_unregister(struct nfs_client *clp, 415static void nfs_idmap_unregister(struct nfs_client *clp,
416 struct rpc_pipe *pipe) 416 struct rpc_pipe *pipe)
417{ 417{
418 struct net *net = clp->net; 418 struct net *net = clp->cl_net;
419 struct super_block *pipefs_sb; 419 struct super_block *pipefs_sb;
420 420
421 pipefs_sb = rpc_get_sb_net(net); 421 pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
429 struct idmap *idmap, 429 struct idmap *idmap,
430 struct rpc_pipe *pipe) 430 struct rpc_pipe *pipe)
431{ 431{
432 struct net *net = clp->net; 432 struct net *net = clp->cl_net;
433 struct super_block *pipefs_sb; 433 struct super_block *pipefs_sb;
434 int err = 0; 434 int err = 0;
435 435
@@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
530 struct nfs_net *nn = net_generic(net, nfs_net_id); 530 struct nfs_net *nn = net_generic(net, nfs_net_id);
531 struct dentry *cl_dentry; 531 struct dentry *cl_dentry;
532 struct nfs_client *clp; 532 struct nfs_client *clp;
533 int err;
533 534
535restart:
534 spin_lock(&nn->nfs_client_lock); 536 spin_lock(&nn->nfs_client_lock);
535 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 537 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
538 /* Wait for initialisation to finish */
539 if (clp->cl_cons_state == NFS_CS_INITING) {
540 atomic_inc(&clp->cl_count);
541 spin_unlock(&nn->nfs_client_lock);
542 err = nfs_wait_client_init_complete(clp);
543 nfs_put_client(clp);
544 if (err)
545 return NULL;
546 goto restart;
547 }
548 /* Skip nfs_clients that failed to initialise */
549 if (clp->cl_cons_state < 0)
550 continue;
551 smp_rmb();
536 if (clp->rpc_ops != &nfs_v4_clientops) 552 if (clp->rpc_ops != &nfs_v4_clientops)
537 continue; 553 continue;
538 cl_dentry = clp->cl_idmap->idmap_pipe->dentry; 554 cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
@@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
640 struct idmap_msg *im; 656 struct idmap_msg *im;
641 struct idmap *idmap = (struct idmap *)aux; 657 struct idmap *idmap = (struct idmap *)aux;
642 struct key *key = cons->key; 658 struct key *key = cons->key;
643 int ret; 659 int ret = -ENOMEM;
644 660
645 /* msg and im are freed in idmap_pipe_destroy_msg */ 661 /* msg and im are freed in idmap_pipe_destroy_msg */
646 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 662 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
647 if (IS_ERR(msg)) { 663 if (!msg)
648 ret = PTR_ERR(msg);
649 goto out0; 664 goto out0;
650 }
651 665
652 im = kmalloc(sizeof(*im), GFP_KERNEL); 666 im = kmalloc(sizeof(*im), GFP_KERNEL);
653 if (IS_ERR(im)) { 667 if (!im)
654 ret = PTR_ERR(im);
655 goto out1; 668 goto out1;
656 }
657 669
658 ret = nfs_idmap_prepare_message(key->description, im, msg); 670 ret = nfs_idmap_prepare_message(key->description, im, msg);
659 if (ret < 0) 671 if (ret < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6073139b402..2f6f78c4b42d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
285 inode->i_mode = fattr->mode; 285 inode->i_mode = fattr->mode;
286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
287 && nfs_server_capable(inode, NFS_CAP_MODE)) 287 && nfs_server_capable(inode, NFS_CAP_MODE))
288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
289 | NFS_INO_INVALID_ACCESS
290 | NFS_INO_INVALID_ACL;
291 /* Why so? Because we want revalidate for devices/FIFOs, and 289 /* Why so? Because we want revalidate for devices/FIFOs, and
292 * that's precisely what we have in nfs_file_inode_operations. 290 * that's precisely what we have in nfs_file_inode_operations.
293 */ 291 */
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
300 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 298 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
301 inode->i_fop = &nfs_dir_operations; 299 inode->i_fop = &nfs_dir_operations;
302 inode->i_data.a_ops = &nfs_dir_aops; 300 inode->i_data.a_ops = &nfs_dir_aops;
303 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
304 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
305 /* Deal with crossing mountpoints */ 301 /* Deal with crossing mountpoints */
306 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || 302 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
307 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { 303 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
327 inode->i_gid = -2; 323 inode->i_gid = -2;
328 inode->i_blocks = 0; 324 inode->i_blocks = 0;
329 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 325 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
326 nfsi->write_io = 0;
327 nfsi->read_io = 0;
330 328
331 nfsi->read_cache_jiffies = fattr->time_start; 329 nfsi->read_cache_jiffies = fattr->time_start;
332 nfsi->attr_gencount = fattr->gencount; 330 nfsi->attr_gencount = fattr->gencount;
@@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
337 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 335 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
338 inode->i_mtime = fattr->mtime; 336 inode->i_mtime = fattr->mtime;
339 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 337 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 338 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
341 | NFS_INO_INVALID_DATA;
342 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 339 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
343 inode->i_ctime = fattr->ctime; 340 inode->i_ctime = fattr->ctime;
344 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 341 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
345 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 342 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
346 | NFS_INO_INVALID_ACCESS
347 | NFS_INO_INVALID_ACL;
348 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 343 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
349 inode->i_version = fattr->change_attr; 344 inode->i_version = fattr->change_attr;
350 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 345 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
351 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 346 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
352 | NFS_INO_INVALID_DATA;
353 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 347 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
354 inode->i_size = nfs_size_to_loff_t(fattr->size); 348 inode->i_size = nfs_size_to_loff_t(fattr->size);
355 else 349 else
356 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 350 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
357 | NFS_INO_INVALID_DATA
358 | NFS_INO_REVAL_PAGECACHE; 351 | NFS_INO_REVAL_PAGECACHE;
359 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 352 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
360 set_nlink(inode, fattr->nlink); 353 set_nlink(inode, fattr->nlink);
@@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
363 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 356 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
364 inode->i_uid = fattr->uid; 357 inode->i_uid = fattr->uid;
365 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 358 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 359 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
367 | NFS_INO_INVALID_ACCESS
368 | NFS_INO_INVALID_ACL;
369 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 360 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
370 inode->i_gid = fattr->gid; 361 inode->i_gid = fattr->gid;
371 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 362 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 363 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
373 | NFS_INO_INVALID_ACCESS
374 | NFS_INO_INVALID_ACL;
375 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 364 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
376 inode->i_blocks = fattr->du.nfs2.blocks; 365 inode->i_blocks = fattr->du.nfs2.blocks;
377 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 366 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
654 nfs_init_lock_context(&ctx->lock_context); 643 nfs_init_lock_context(&ctx->lock_context);
655 ctx->lock_context.open_context = ctx; 644 ctx->lock_context.open_context = ctx;
656 INIT_LIST_HEAD(&ctx->list); 645 INIT_LIST_HEAD(&ctx->list);
646 ctx->mdsthreshold = NULL;
657 return ctx; 647 return ctx;
658} 648}
659 649
@@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
682 put_rpccred(ctx->cred); 672 put_rpccred(ctx->cred);
683 dput(ctx->dentry); 673 dput(ctx->dentry);
684 nfs_sb_deactive(sb); 674 nfs_sb_deactive(sb);
675 kfree(ctx->mdsthreshold);
685 kfree(ctx); 676 kfree(ctx);
686} 677}
687 678
@@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
870 return 0; 861 return 0;
871} 862}
872 863
864static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
865{
866 if (nfs_have_delegated_attributes(inode))
867 return false;
868 return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
869 || nfs_attribute_timeout(inode)
870 || NFS_STALE(inode);
871}
872
873/** 873/**
874 * nfs_revalidate_mapping - Revalidate the pagecache 874 * nfs_revalidate_mapping - Revalidate the pagecache
875 * @inode - pointer to host inode 875 * @inode - pointer to host inode
@@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
880 struct nfs_inode *nfsi = NFS_I(inode); 880 struct nfs_inode *nfsi = NFS_I(inode);
881 int ret = 0; 881 int ret = 0;
882 882
883 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 883 if (nfs_mapping_need_revalidate_inode(inode)) {
884 || nfs_attribute_cache_expired(inode)
885 || NFS_STALE(inode)) {
886 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 884 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
887 if (ret < 0) 885 if (ret < 0)
888 goto out; 886 goto out;
@@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
948 unsigned long invalid = 0; 946 unsigned long invalid = 0;
949 947
950 948
949 if (nfs_have_delegated_attributes(inode))
950 return 0;
951 /* Has the inode gone and changed behind our back? */ 951 /* Has the inode gone and changed behind our back? */
952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
953 return -EIO; 953 return -EIO;
@@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
960 960
961 /* Verify a few of the more important attributes */ 961 /* Verify a few of the more important attributes */
962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) 962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
963 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 963 invalid |= NFS_INO_INVALID_ATTR;
964 964
965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
966 cur_size = i_size_read(inode); 966 cur_size = i_size_read(inode);
@@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1279 nfs_display_fhandle_hash(NFS_FH(inode)), 1279 nfs_display_fhandle_hash(NFS_FH(inode)),
1280 atomic_read(&inode->i_count), fattr->valid); 1280 atomic_read(&inode->i_count), fattr->valid);
1281 1281
1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
1283 goto out_fileid; 1283 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1284 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1285 NFS_SERVER(inode)->nfs_client->cl_hostname,
1286 inode->i_sb->s_id, (long long)nfsi->fileid,
1287 (long long)fattr->fileid);
1288 goto out_err;
1289 }
1284 1290
1285 /* 1291 /*
1286 * Make sure the inode's type hasn't changed. 1292 * Make sure the inode's type hasn't changed.
1287 */ 1293 */
1288 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1294 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1289 goto out_changed; 1295 /*
1296 * Big trouble! The inode has become a different object.
1297 */
1298 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1299 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1300 goto out_err;
1301 }
1290 1302
1291 server = NFS_SERVER(inode); 1303 server = NFS_SERVER(inode);
1292 /* Update the fsid? */ 1304 /* Update the fsid? */
@@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1314 if (inode->i_version != fattr->change_attr) { 1326 if (inode->i_version != fattr->change_attr) {
1315 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1327 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1316 inode->i_sb->s_id, inode->i_ino); 1328 inode->i_sb->s_id, inode->i_ino);
1317 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1329 invalid |= NFS_INO_INVALID_ATTR
1330 | NFS_INO_INVALID_DATA
1331 | NFS_INO_INVALID_ACCESS
1332 | NFS_INO_INVALID_ACL
1333 | NFS_INO_REVAL_PAGECACHE;
1318 if (S_ISDIR(inode->i_mode)) 1334 if (S_ISDIR(inode->i_mode))
1319 nfs_force_lookup_revalidate(inode); 1335 nfs_force_lookup_revalidate(inode);
1320 inode->i_version = fattr->change_attr; 1336 inode->i_version = fattr->change_attr;
@@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1323 invalid |= save_cache_validity; 1339 invalid |= save_cache_validity;
1324 1340
1325 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1341 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1326 /* NFSv2/v3: Check if the mtime agrees */ 1342 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1327 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1328 dprintk("NFS: mtime change on server for file %s/%ld\n",
1329 inode->i_sb->s_id, inode->i_ino);
1330 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1331 if (S_ISDIR(inode->i_mode))
1332 nfs_force_lookup_revalidate(inode);
1333 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1334 }
1335 } else if (server->caps & NFS_CAP_MTIME) 1343 } else if (server->caps & NFS_CAP_MTIME)
1336 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1344 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1337 | NFS_INO_INVALID_DATA
1338 | NFS_INO_REVAL_PAGECACHE
1339 | NFS_INO_REVAL_FORCED); 1345 | NFS_INO_REVAL_FORCED);
1340 1346
1341 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1347 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1342 /* If ctime has changed we should definitely clear access+acl caches */ 1348 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1343 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1344 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1345 /* and probably clear data for a directory too as utimes can cause
1346 * havoc with our cache.
1347 */
1348 if (S_ISDIR(inode->i_mode)) {
1349 invalid |= NFS_INO_INVALID_DATA;
1350 nfs_force_lookup_revalidate(inode);
1351 }
1352 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1353 }
1354 } else if (server->caps & NFS_CAP_CTIME) 1349 } else if (server->caps & NFS_CAP_CTIME)
1355 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1350 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1356 | NFS_INO_INVALID_ACCESS
1357 | NFS_INO_INVALID_ACL
1358 | NFS_INO_REVAL_FORCED); 1351 | NFS_INO_REVAL_FORCED);
1359 1352
1360 /* Check if our cached file size is stale */ 1353 /* Check if our cached file size is stale */
@@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1466 nfsi->cache_validity |= invalid; 1459 nfsi->cache_validity |= invalid;
1467 1460
1468 return 0; 1461 return 0;
1469 out_changed:
1470 /*
1471 * Big trouble! The inode has become a different object.
1472 */
1473 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1474 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1475 out_err: 1462 out_err:
1476 /* 1463 /*
1477 * No need to worry about unhashing the dentry, as the 1464 * No need to worry about unhashing the dentry, as the
@@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1480 */ 1467 */
1481 nfs_invalidate_inode(inode); 1468 nfs_invalidate_inode(inode);
1482 return -ESTALE; 1469 return -ESTALE;
1483
1484 out_fileid:
1485 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1486 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1487 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1488 (long long)nfsi->fileid, (long long)fattr->fileid);
1489 goto out_err;
1490} 1470}
1491 1471
1492 1472
@@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1547 nfsi->delegation_state = 0; 1527 nfsi->delegation_state = 0;
1548 init_rwsem(&nfsi->rwsem); 1528 init_rwsem(&nfsi->rwsem);
1549 nfsi->layout = NULL; 1529 nfsi->layout = NULL;
1550 atomic_set(&nfsi->commits_outstanding, 0); 1530 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1551#endif 1531#endif
1552} 1532}
1553 1533
@@ -1559,9 +1539,9 @@ static void init_once(void *foo)
1559 INIT_LIST_HEAD(&nfsi->open_files); 1539 INIT_LIST_HEAD(&nfsi->open_files);
1560 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1540 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1561 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1541 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1562 INIT_LIST_HEAD(&nfsi->commit_list); 1542 INIT_LIST_HEAD(&nfsi->commit_info.list);
1563 nfsi->npages = 0; 1543 nfsi->npages = 0;
1564 nfsi->ncommit = 0; 1544 nfsi->commit_info.ncommit = 0;
1565 atomic_set(&nfsi->silly_count, 1); 1545 atomic_set(&nfsi->silly_count, 1);
1566 INIT_HLIST_HEAD(&nfsi->silly_list); 1546 INIT_HLIST_HEAD(&nfsi->silly_list);
1567 init_waitqueue_head(&nfsi->waitqueue); 1547 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..1848a7275592 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
103 unsigned int version; 103 unsigned int version;
104 unsigned int minorversion; 104 unsigned int minorversion;
105 char *fscache_uniq; 105 char *fscache_uniq;
106 bool need_mount;
106 107
107 struct { 108 struct {
108 struct sockaddr_storage address; 109 struct sockaddr_storage address;
@@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
167 struct nfs_fh *, 168 struct nfs_fh *,
168 struct nfs_fattr *, 169 struct nfs_fattr *,
169 rpc_authflavor_t); 170 rpc_authflavor_t);
171extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
170extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 172extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
171extern int nfs4_check_client_ready(struct nfs_client *clp);
172extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 173extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
173 const struct sockaddr *ds_addr, 174 const struct sockaddr *ds_addr,
174 int ds_addrlen, int ds_proto); 175 int ds_addrlen, int ds_proto,
176 unsigned int ds_timeo,
177 unsigned int ds_retrans);
175#ifdef CONFIG_PROC_FS 178#ifdef CONFIG_PROC_FS
176extern int __init nfs_fs_proc_init(void); 179extern int __init nfs_fs_proc_init(void);
177extern void nfs_fs_proc_exit(void); 180extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
185} 188}
186#endif 189#endif
187 190
188/* nfs4namespace.c */
189#ifdef CONFIG_NFS_V4
190extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
191#else
192static inline
193struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
194{
195 return ERR_PTR(-ENOENT);
196}
197#endif
198
199/* callback_xdr.c */ 191/* callback_xdr.c */
200extern struct svc_version nfs4_callback_version1; 192extern struct svc_version nfs4_callback_version1;
201extern struct svc_version nfs4_callback_version4; 193extern struct svc_version nfs4_callback_version4;
202 194
195struct nfs_pageio_descriptor;
203/* pagelist.c */ 196/* pagelist.c */
204extern int __init nfs_init_nfspagecache(void); 197extern int __init nfs_init_nfspagecache(void);
205extern void nfs_destroy_nfspagecache(void); 198extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
210 203
211extern int __init nfs_init_directcache(void); 204extern int __init nfs_init_directcache(void);
212extern void nfs_destroy_directcache(void); 205extern void nfs_destroy_directcache(void);
206extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
207extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
208 struct nfs_pgio_header *hdr,
209 void (*release)(struct nfs_pgio_header *hdr));
210void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
213 211
214/* nfs2xdr.c */ 212/* nfs2xdr.c */
215extern int nfs_stat_to_errno(enum nfs_stat);
216extern struct rpc_procinfo nfs_procedures[]; 213extern struct rpc_procinfo nfs_procedures[];
217extern int nfs2_decode_dirent(struct xdr_stream *, 214extern int nfs2_decode_dirent(struct xdr_stream *,
218 struct nfs_entry *, int); 215 struct nfs_entry *, int);
@@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;
237extern struct rpc_procinfo nfs4_procedures[]; 234extern struct rpc_procinfo nfs4_procedures[];
238#endif 235#endif
239 236
240extern int nfs4_init_ds_session(struct nfs_client *clp); 237extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
241 238
242/* proc.c */ 239/* proc.c */
243void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 240void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
244extern int nfs_init_client(struct nfs_client *clp, 241extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
245 const struct rpc_timeout *timeparms, 242 const struct rpc_timeout *timeparms,
246 const char *ip_addr, rpc_authflavor_t authflavour, 243 const char *ip_addr, rpc_authflavor_t authflavour);
247 int noresvport);
248 244
249/* dir.c */ 245/* dir.c */
250extern int nfs_access_cache_shrinker(struct shrinker *shrink, 246extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
280extern char *nfs_path(char **p, struct dentry *dentry, 276extern char *nfs_path(char **p, struct dentry *dentry,
281 char *buffer, ssize_t buflen); 277 char *buffer, ssize_t buflen);
282extern struct vfsmount *nfs_d_automount(struct path *path); 278extern struct vfsmount *nfs_d_automount(struct path *path);
283#ifdef CONFIG_NFS_V4 279struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
284rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 280 struct nfs_fh *, struct nfs_fattr *);
285#endif 281struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
282 struct nfs_fattr *, rpc_authflavor_t);
286 283
287/* getroot.c */ 284/* getroot.c */
288extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 285extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
294extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 291extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
295#endif 292#endif
296 293
297struct nfs_pageio_descriptor; 294struct nfs_pgio_completion_ops;
298/* read.c */ 295/* read.c */
299extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296extern struct nfs_read_header *nfs_readhdr_alloc(void);
300 const struct rpc_call_ops *call_ops); 297extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
298extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
299 struct inode *inode,
300 const struct nfs_pgio_completion_ops *compl_ops);
301extern int nfs_initiate_read(struct rpc_clnt *clnt,
302 struct nfs_read_data *data,
303 const struct rpc_call_ops *call_ops, int flags);
301extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 304extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
302extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 305extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
303 struct list_head *head); 306 struct nfs_pgio_header *hdr);
304
305extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 307extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
306 struct inode *inode); 308 struct inode *inode,
309 const struct nfs_pgio_completion_ops *compl_ops);
307extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 310extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
308extern void nfs_readdata_release(struct nfs_read_data *rdata); 311extern void nfs_readdata_release(struct nfs_read_data *rdata);
309 312
310/* write.c */ 313/* write.c */
314extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
315 struct inode *inode, int ioflags,
316 const struct nfs_pgio_completion_ops *compl_ops);
317extern struct nfs_write_header *nfs_writehdr_alloc(void);
318extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
311extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 319extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
312 struct list_head *head); 320 struct nfs_pgio_header *hdr);
313extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 321extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
314 struct inode *inode, int ioflags); 322 struct inode *inode, int ioflags,
323 const struct nfs_pgio_completion_ops *compl_ops);
315extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 324extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
316extern void nfs_writedata_release(struct nfs_write_data *wdata); 325extern void nfs_writedata_release(struct nfs_write_data *wdata);
317extern void nfs_commit_free(struct nfs_write_data *p); 326extern void nfs_commit_free(struct nfs_commit_data *p);
318extern int nfs_initiate_write(struct nfs_write_data *data, 327extern int nfs_initiate_write(struct rpc_clnt *clnt,
319 struct rpc_clnt *clnt, 328 struct nfs_write_data *data,
320 const struct rpc_call_ops *call_ops, 329 const struct rpc_call_ops *call_ops,
321 int how); 330 int how, int flags);
322extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 331extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
323extern int nfs_initiate_commit(struct nfs_write_data *data, 332extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
324 struct rpc_clnt *clnt, 333extern int nfs_initiate_commit(struct rpc_clnt *clnt,
334 struct nfs_commit_data *data,
325 const struct rpc_call_ops *call_ops, 335 const struct rpc_call_ops *call_ops,
326 int how); 336 int how, int flags);
327extern void nfs_init_commit(struct nfs_write_data *data, 337extern void nfs_init_commit(struct nfs_commit_data *data,
328 struct list_head *head, 338 struct list_head *head,
329 struct pnfs_layout_segment *lseg); 339 struct pnfs_layout_segment *lseg,
340 struct nfs_commit_info *cinfo);
341int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
342 struct nfs_commit_info *cinfo, int max);
343int nfs_scan_commit(struct inode *inode, struct list_head *dst,
344 struct nfs_commit_info *cinfo);
345void nfs_mark_request_commit(struct nfs_page *req,
346 struct pnfs_layout_segment *lseg,
347 struct nfs_commit_info *cinfo);
348int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
349 int how, struct nfs_commit_info *cinfo);
330void nfs_retry_commit(struct list_head *page_list, 350void nfs_retry_commit(struct list_head *page_list,
331 struct pnfs_layout_segment *lseg); 351 struct pnfs_layout_segment *lseg,
332void nfs_commit_clear_lock(struct nfs_inode *nfsi); 352 struct nfs_commit_info *cinfo);
333void nfs_commitdata_release(void *data); 353void nfs_commitdata_release(struct nfs_commit_data *data);
334void nfs_commit_release_pages(struct nfs_write_data *data); 354void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); 355 struct nfs_commit_info *cinfo);
336void nfs_request_remove_commit_list(struct nfs_page *req); 356void nfs_request_remove_commit_list(struct nfs_page *req,
357 struct nfs_commit_info *cinfo);
358void nfs_init_cinfo(struct nfs_commit_info *cinfo,
359 struct inode *inode,
360 struct nfs_direct_req *dreq);
337 361
338#ifdef CONFIG_MIGRATION 362#ifdef CONFIG_MIGRATION
339extern int nfs_migrate_page(struct address_space *, 363extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,
342#define nfs_migrate_page NULL 366#define nfs_migrate_page NULL
343#endif 367#endif
344 368
369/* direct.c */
370void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
371 struct nfs_direct_req *dreq);
372
345/* nfs4proc.c */ 373/* nfs4proc.c */
346extern void __nfs4_read_done_cb(struct nfs_read_data *); 374extern void __nfs4_read_done_cb(struct nfs_read_data *);
347extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); 375extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
348extern int nfs4_init_client(struct nfs_client *clp,
349 const struct rpc_timeout *timeparms, 376 const struct rpc_timeout *timeparms,
350 const char *ip_addr, 377 const char *ip_addr,
351 rpc_authflavor_t authflavour, 378 rpc_authflavor_t authflavour);
352 int noresvport);
353extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
354extern int _nfs4_call_sync(struct rpc_clnt *clnt, 379extern int _nfs4_call_sync(struct rpc_clnt *clnt,
355 struct nfs_server *server, 380 struct nfs_server *server,
356 struct rpc_message *msg, 381 struct rpc_message *msg,
@@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
466 PAGE_SIZE - 1) >> PAGE_SHIFT; 491 PAGE_SIZE - 1) >> PAGE_SHIFT;
467} 492}
468 493
494/*
495 * Convert a struct timespec into a 64-bit change attribute
496 *
497 * This does approximately the same thing as timespec_to_ns(),
498 * but for calculation efficiency, we multiply the seconds by
499 * 1024*1024*1024.
500 */
501static inline
502u64 nfs_timespec_to_change_attr(const struct timespec *ts)
503{
504 return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
505}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..08b9c93675da 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
27int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
28 28
29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
30 struct nfs_fh *fh,
31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
33
34/* 29/*
35 * nfs_path - reconstruct the path given an arbitrary dentry 30 * nfs_path - reconstruct the path given an arbitrary dentry
36 * @base - used to return pointer to the end of devname part of path 31 * @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
118 return ERR_PTR(-ENAMETOOLONG); 113 return ERR_PTR(-ENAMETOOLONG);
119} 114}
120 115
121#ifdef CONFIG_NFS_V4
122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
152 struct qstr *name,
153 struct nfs_fh *fh,
154 struct nfs_fattr *fattr)
155{
156 int err;
157
158 if (NFS_PROTO(dir)->version == 4)
159 return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
160
161 err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
162 if (err)
163 return ERR_PTR(err);
164 return rpc_clone_client(NFS_SERVER(dir)->client);
165}
166#else /* CONFIG_NFS_V4 */
167static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
168 struct qstr *name,
169 struct nfs_fh *fh,
170 struct nfs_fattr *fattr)
171{
172 int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
173 if (err)
174 return ERR_PTR(err);
175 return rpc_clone_client(NFS_SERVER(dir)->client);
176}
177#endif /* CONFIG_NFS_V4 */
178
179/* 116/*
180 * nfs_d_automount - Handle crossing a mountpoint on the server 117 * nfs_d_automount - Handle crossing a mountpoint on the server
181 * @path - The mountpoint 118 * @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
191struct vfsmount *nfs_d_automount(struct path *path) 128struct vfsmount *nfs_d_automount(struct path *path)
192{ 129{
193 struct vfsmount *mnt; 130 struct vfsmount *mnt;
194 struct dentry *parent; 131 struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
195 struct nfs_fh *fh = NULL; 132 struct nfs_fh *fh = NULL;
196 struct nfs_fattr *fattr = NULL; 133 struct nfs_fattr *fattr = NULL;
197 struct rpc_clnt *client;
198 134
199 dprintk("--> nfs_d_automount()\n"); 135 dprintk("--> nfs_d_automount()\n");
200 136
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
210 146
211 dprintk("%s: enter\n", __func__); 147 dprintk("%s: enter\n", __func__);
212 148
213 /* Look it up again to get its attributes */ 149 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
214 parent = dget_parent(path->dentry);
215 client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
216 dput(parent);
217 if (IS_ERR(client)) {
218 mnt = ERR_CAST(client);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
223 mnt = nfs_do_refmount(client, path->dentry);
224 else
225 mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
226 rpc_shutdown_client(client);
227
228 if (IS_ERR(mnt)) 150 if (IS_ERR(mnt))
229 goto out; 151 goto out;
230 152
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
297 * @authflavor - security flavor to use when performing the mount 219 * @authflavor - security flavor to use when performing the mount
298 * 220 *
299 */ 221 */
300static struct vfsmount *nfs_do_submount(struct dentry *dentry, 222struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
301 struct nfs_fh *fh, 223 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
302 struct nfs_fattr *fattr,
303 rpc_authflavor_t authflavor)
304{ 224{
305 struct nfs_clone_mount mountdata = { 225 struct nfs_clone_mount mountdata = {
306 .sb = dentry->d_sb, 226 .sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
333 dprintk("<-- nfs_do_submount() = %p\n", mnt); 253 dprintk("<-- nfs_do_submount() = %p\n", mnt);
334 return mnt; 254 return mnt;
335} 255}
256
257struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
258 struct nfs_fh *fh, struct nfs_fattr *fattr)
259{
260 int err;
261 struct dentry *parent = dget_parent(dentry);
262
263 /* Look it up again to get its attributes */
264 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
265 dput(parent);
266 if (err != 0)
267 return ERR_PTR(err);
268
269 return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
270}
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index aa14ec303e94..8a6394edb8b0 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -1,3 +1,7 @@
1/*
2 * NFS-private data for each "struct net". Accessed with net_generic().
3 */
4
1#ifndef __NFS_NETNS_H__ 5#ifndef __NFS_NETNS_H__
2#define __NFS_NETNS_H__ 6#define __NFS_NETNS_H__
3 7
@@ -20,6 +24,7 @@ struct nfs_net {
20 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ 24 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
21#endif 25#endif
22 spinlock_t nfs_client_lock; 26 spinlock_t nfs_client_lock;
27 struct timespec boot_time;
23}; 28};
24 29
25extern int nfs_net_id; 30extern int nfs_net_id;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000fabbd..baf759bccd05 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64static int nfs_stat_to_errno(enum nfs_stat);
64 65
65/* 66/*
66 * While encoding arguments, set up the reply buffer in advance to 67 * While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 p = xdr_decode_time(p, &fattr->atime); 314 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime); 315 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime); 316 xdr_decode_time(p, &fattr->ctime);
317 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
318
316 return 0; 319 return 0;
317out_overflow: 320out_overflow:
318 print_overflow_msg(__func__, xdr); 321 print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
1109 * Returns a local errno value, or -EIO if the NFS status code is 1112 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3. 1113 * not recognized. This function is used jointly by NFSv2 and NFSv3.
1111 */ 1114 */
1112int nfs_stat_to_errno(enum nfs_stat status) 1115static int nfs_stat_to_errno(enum nfs_stat status)
1113{ 1116{
1114 int i; 1117 int i;
1115 1118
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 75c68299358e..2292a0fd2bff 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
142} 142}
143 143
144static int 144static int
145nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 145nfs3_proc_lookup(struct inode *dir, struct qstr *name,
146 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 146 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
147{ 147{
148 struct nfs3_diropargs arg = { 148 struct nfs3_diropargs arg = {
@@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
810 810
811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
812{ 812{
813 if (nfs3_async_handle_jukebox(task, data->inode)) 813 struct inode *inode = data->header->inode;
814
815 if (nfs3_async_handle_jukebox(task, inode))
814 return -EAGAIN; 816 return -EAGAIN;
815 817
816 nfs_invalidate_atime(data->inode); 818 nfs_invalidate_atime(inode);
817 nfs_refresh_inode(data->inode, &data->fattr); 819 nfs_refresh_inode(inode, &data->fattr);
818 return 0; 820 return 0;
819} 821}
820 822
@@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
830 832
831static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 833static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
832{ 834{
833 if (nfs3_async_handle_jukebox(task, data->inode)) 835 struct inode *inode = data->header->inode;
836
837 if (nfs3_async_handle_jukebox(task, inode))
834 return -EAGAIN; 838 return -EAGAIN;
835 if (task->tk_status >= 0) 839 if (task->tk_status >= 0)
836 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 840 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
837 return 0; 841 return 0;
838} 842}
839 843
@@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
847 rpc_call_start(task); 851 rpc_call_start(task);
848} 852}
849 853
850static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 854static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
855{
856 rpc_call_start(task);
857}
858
859static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
851{ 860{
852 if (nfs3_async_handle_jukebox(task, data->inode)) 861 if (nfs3_async_handle_jukebox(task, data->inode))
853 return -EAGAIN; 862 return -EAGAIN;
@@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
855 return 0; 864 return 0;
856} 865}
857 866
858static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 867static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
859{ 868{
860 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 869 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
861} 870}
@@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
875 .file_inode_ops = &nfs3_file_inode_operations, 884 .file_inode_ops = &nfs3_file_inode_operations,
876 .file_ops = &nfs_file_operations, 885 .file_ops = &nfs_file_operations,
877 .getroot = nfs3_proc_get_root, 886 .getroot = nfs3_proc_get_root,
887 .submount = nfs_submount,
878 .getattr = nfs3_proc_getattr, 888 .getattr = nfs3_proc_getattr,
879 .setattr = nfs3_proc_setattr, 889 .setattr = nfs3_proc_setattr,
880 .lookup = nfs3_proc_lookup, 890 .lookup = nfs3_proc_lookup,
@@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
906 .write_rpc_prepare = nfs3_proc_write_rpc_prepare, 916 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
907 .write_done = nfs3_write_done, 917 .write_done = nfs3_write_done,
908 .commit_setup = nfs3_proc_commit_setup, 918 .commit_setup = nfs3_proc_commit_setup,
919 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
909 .commit_done = nfs3_commit_done, 920 .commit_done = nfs3_commit_done,
910 .lock = nfs3_proc_lock, 921 .lock = nfs3_proc_lock,
911 .clear_acl_cache = nfs3_forget_cached_acls, 922 .clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a3ce55..902de489ec9b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) 86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) 87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
88 88
89static int nfs3_stat_to_errno(enum nfs_stat);
90
89/* 91/*
90 * Map file type to S_IFMT bits 92 * Map file type to S_IFMT bits
91 */ 93 */
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 p = xdr_decode_nfstime3(p, &fattr->atime); 677 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime); 678 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime); 679 xdr_decode_nfstime3(p, &fattr->ctime);
680 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
678 681
679 fattr->valid |= NFS_ATTR_FATTR_V3; 682 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0; 683 return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
725 goto out_overflow; 728 goto out_overflow;
726 729
727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 730 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
731 | NFS_ATTR_FATTR_PRECHANGE
728 | NFS_ATTR_FATTR_PREMTIME 732 | NFS_ATTR_FATTR_PREMTIME
729 | NFS_ATTR_FATTR_PRECTIME; 733 | NFS_ATTR_FATTR_PRECTIME;
730 734
731 p = xdr_decode_size3(p, &fattr->pre_size); 735 p = xdr_decode_size3(p, &fattr->pre_size);
732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime); 736 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
733 xdr_decode_nfstime3(p, &fattr->pre_ctime); 737 xdr_decode_nfstime3(p, &fattr->pre_ctime);
738 fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
734 739
735 return 0; 740 return 0;
736out_overflow: 741out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
1287 * }; 1292 * };
1288 */ 1293 */
1289static void encode_commit3args(struct xdr_stream *xdr, 1294static void encode_commit3args(struct xdr_stream *xdr,
1290 const struct nfs_writeargs *args) 1295 const struct nfs_commitargs *args)
1291{ 1296{
1292 __be32 *p; 1297 __be32 *p;
1293 1298
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
1300 1305
1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, 1306static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
1302 struct xdr_stream *xdr, 1307 struct xdr_stream *xdr,
1303 const struct nfs_writeargs *args) 1308 const struct nfs_commitargs *args)
1304{ 1309{
1305 encode_commit3args(xdr, args); 1310 encode_commit3args(xdr, args);
1306} 1311}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
1385out: 1390out:
1386 return error; 1391 return error;
1387out_default: 1392out_default:
1388 return nfs_stat_to_errno(status); 1393 return nfs3_stat_to_errno(status);
1389} 1394}
1390 1395
1391/* 1396/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
1424out: 1429out:
1425 return error; 1430 return error;
1426out_status: 1431out_status:
1427 return nfs_stat_to_errno(status); 1432 return nfs3_stat_to_errno(status);
1428} 1433}
1429 1434
1430/* 1435/*
@@ -1472,7 +1477,7 @@ out_default:
1472 error = decode_post_op_attr(xdr, result->dir_attr); 1477 error = decode_post_op_attr(xdr, result->dir_attr);
1473 if (unlikely(error)) 1478 if (unlikely(error))
1474 goto out; 1479 goto out;
1475 return nfs_stat_to_errno(status); 1480 return nfs3_stat_to_errno(status);
1476} 1481}
1477 1482
1478/* 1483/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
1513out: 1518out:
1514 return error; 1519 return error;
1515out_default: 1520out_default:
1516 return nfs_stat_to_errno(status); 1521 return nfs3_stat_to_errno(status);
1517} 1522}
1518 1523
1519/* 1524/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
1554out: 1559out:
1555 return error; 1560 return error;
1556out_default: 1561out_default:
1557 return nfs_stat_to_errno(status); 1562 return nfs3_stat_to_errno(status);
1558} 1563}
1559 1564
1560/* 1565/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1636out: 1641out:
1637 return error; 1642 return error;
1638out_status: 1643out_status:
1639 return nfs_stat_to_errno(status); 1644 return nfs3_stat_to_errno(status);
1640} 1645}
1641 1646
1642/* 1647/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1706out: 1711out:
1707 return error; 1712 return error;
1708out_status: 1713out_status:
1709 return nfs_stat_to_errno(status); 1714 return nfs3_stat_to_errno(status);
1710} 1715}
1711 1716
1712/* 1717/*
@@ -1770,7 +1775,7 @@ out_default:
1770 error = decode_wcc_data(xdr, result->dir_attr); 1775 error = decode_wcc_data(xdr, result->dir_attr);
1771 if (unlikely(error)) 1776 if (unlikely(error))
1772 goto out; 1777 goto out;
1773 return nfs_stat_to_errno(status); 1778 return nfs3_stat_to_errno(status);
1774} 1779}
1775 1780
1776/* 1781/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1809out: 1814out:
1810 return error; 1815 return error;
1811out_status: 1816out_status:
1812 return nfs_stat_to_errno(status); 1817 return nfs3_stat_to_errno(status);
1813} 1818}
1814 1819
1815/* 1820/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1853out: 1858out:
1854 return error; 1859 return error;
1855out_status: 1860out_status:
1856 return nfs_stat_to_errno(status); 1861 return nfs3_stat_to_errno(status);
1857} 1862}
1858 1863
1859/* 1864/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1896out: 1901out:
1897 return error; 1902 return error;
1898out_status: 1903out_status:
1899 return nfs_stat_to_errno(status); 1904 return nfs3_stat_to_errno(status);
1900} 1905}
1901 1906
1902/** 1907/**
@@ -2088,7 +2093,7 @@ out_default:
2088 error = decode_post_op_attr(xdr, result->dir_attr); 2093 error = decode_post_op_attr(xdr, result->dir_attr);
2089 if (unlikely(error)) 2094 if (unlikely(error))
2090 goto out; 2095 goto out;
2091 return nfs_stat_to_errno(status); 2096 return nfs3_stat_to_errno(status);
2092} 2097}
2093 2098
2094/* 2099/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2156out: 2161out:
2157 return error; 2162 return error;
2158out_status: 2163out_status:
2159 return nfs_stat_to_errno(status); 2164 return nfs3_stat_to_errno(status);
2160} 2165}
2161 2166
2162/* 2167/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2232out: 2237out:
2233 return error; 2238 return error;
2234out_status: 2239out_status:
2235 return nfs_stat_to_errno(status); 2240 return nfs3_stat_to_errno(status);
2236} 2241}
2237 2242
2238/* 2243/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2295out: 2300out:
2296 return error; 2301 return error;
2297out_status: 2302out_status:
2298 return nfs_stat_to_errno(status); 2303 return nfs3_stat_to_errno(status);
2299} 2304}
2300 2305
2301/* 2306/*
@@ -2319,7 +2324,7 @@ out_status:
2319 */ 2324 */
2320static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, 2325static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2321 struct xdr_stream *xdr, 2326 struct xdr_stream *xdr,
2322 struct nfs_writeres *result) 2327 struct nfs_commitres *result)
2323{ 2328{
2324 enum nfs_stat status; 2329 enum nfs_stat status;
2325 int error; 2330 int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2336out: 2341out:
2337 return error; 2342 return error;
2338out_status: 2343out_status:
2339 return nfs_stat_to_errno(status); 2344 return nfs3_stat_to_errno(status);
2340} 2345}
2341 2346
2342#ifdef CONFIG_NFS_V3_ACL 2347#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
2401out: 2406out:
2402 return error; 2407 return error;
2403out_default: 2408out_default:
2404 return nfs_stat_to_errno(status); 2409 return nfs3_stat_to_errno(status);
2405} 2410}
2406 2411
2407static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, 2412static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
2420out: 2425out:
2421 return error; 2426 return error;
2422out_default: 2427out_default:
2423 return nfs_stat_to_errno(status); 2428 return nfs3_stat_to_errno(status);
2424} 2429}
2425 2430
2426#endif /* CONFIG_NFS_V3_ACL */ 2431#endif /* CONFIG_NFS_V3_ACL */
2427 2432
2433
2434/*
2435 * We need to translate between nfs status return values and
2436 * the local errno values which may not be the same.
2437 */
2438static const struct {
2439 int stat;
2440 int errno;
2441} nfs_errtbl[] = {
2442 { NFS_OK, 0 },
2443 { NFSERR_PERM, -EPERM },
2444 { NFSERR_NOENT, -ENOENT },
2445 { NFSERR_IO, -errno_NFSERR_IO},
2446 { NFSERR_NXIO, -ENXIO },
2447/* { NFSERR_EAGAIN, -EAGAIN }, */
2448 { NFSERR_ACCES, -EACCES },
2449 { NFSERR_EXIST, -EEXIST },
2450 { NFSERR_XDEV, -EXDEV },
2451 { NFSERR_NODEV, -ENODEV },
2452 { NFSERR_NOTDIR, -ENOTDIR },
2453 { NFSERR_ISDIR, -EISDIR },
2454 { NFSERR_INVAL, -EINVAL },
2455 { NFSERR_FBIG, -EFBIG },
2456 { NFSERR_NOSPC, -ENOSPC },
2457 { NFSERR_ROFS, -EROFS },
2458 { NFSERR_MLINK, -EMLINK },
2459 { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
2460 { NFSERR_NOTEMPTY, -ENOTEMPTY },
2461 { NFSERR_DQUOT, -EDQUOT },
2462 { NFSERR_STALE, -ESTALE },
2463 { NFSERR_REMOTE, -EREMOTE },
2464#ifdef EWFLUSH
2465 { NFSERR_WFLUSH, -EWFLUSH },
2466#endif
2467 { NFSERR_BADHANDLE, -EBADHANDLE },
2468 { NFSERR_NOT_SYNC, -ENOTSYNC },
2469 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
2470 { NFSERR_NOTSUPP, -ENOTSUPP },
2471 { NFSERR_TOOSMALL, -ETOOSMALL },
2472 { NFSERR_SERVERFAULT, -EREMOTEIO },
2473 { NFSERR_BADTYPE, -EBADTYPE },
2474 { NFSERR_JUKEBOX, -EJUKEBOX },
2475 { -1, -EIO }
2476};
2477
2478/**
2479 * nfs3_stat_to_errno - convert an NFS status code to a local errno
2480 * @status: NFS status code to convert
2481 *
2482 * Returns a local errno value, or -EIO if the NFS status code is
2483 * not recognized. This function is used jointly by NFSv2 and NFSv3.
2484 */
2485static int nfs3_stat_to_errno(enum nfs_stat status)
2486{
2487 int i;
2488
2489 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
2490 if (nfs_errtbl[i].stat == (int)status)
2491 return nfs_errtbl[i].errno;
2492 }
2493 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
2494 return nfs_errtbl[i].errno;
2495}
2496
2497
2428#define PROC(proc, argtype, restype, timer) \ 2498#define PROC(proc, argtype, restype, timer) \
2429[NFS3PROC_##proc] = { \ 2499[NFS3PROC_##proc] = { \
2430 .p_proc = NFS3PROC_##proc, \ 2500 .p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021020b3..c6827f93ab57 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -24,6 +24,8 @@ enum nfs4_client_state {
24 NFS4CLNT_RECALL_SLOT, 24 NFS4CLNT_RECALL_SLOT,
25 NFS4CLNT_LEASE_CONFIRM, 25 NFS4CLNT_LEASE_CONFIRM,
26 NFS4CLNT_SERVER_SCOPE_MISMATCH, 26 NFS4CLNT_SERVER_SCOPE_MISMATCH,
27 NFS4CLNT_PURGE_STATE,
28 NFS4CLNT_BIND_CONN_TO_SESSION,
27}; 29};
28 30
29enum nfs4_session_state { 31enum nfs4_session_state {
@@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {
52 const struct nfs4_state_maintenance_ops *state_renewal_ops; 54 const struct nfs4_state_maintenance_ops *state_renewal_ops;
53}; 55};
54 56
55struct nfs_unique_id {
56 struct rb_node rb_node;
57 __u64 id;
58};
59
60#define NFS_SEQID_CONFIRMED 1 57#define NFS_SEQID_CONFIRMED 1
61struct nfs_seqid_counter { 58struct nfs_seqid_counter {
62 ktime_t create_time; 59 ktime_t create_time;
@@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;
206extern const struct inode_operations nfs4_dir_inode_operations; 203extern const struct inode_operations nfs4_dir_inode_operations;
207 204
208/* nfs4namespace.c */ 205/* nfs4namespace.c */
206rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
209struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 207struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
208struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
209 struct nfs_fh *, struct nfs_fattr *);
210 210
211/* nfs4proc.c */ 211/* nfs4proc.c */
212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
214extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
215extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
214extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 216extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
217extern int nfs4_destroy_clientid(struct nfs_client *clp);
215extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 218extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 219extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 220extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,
239 struct rpc_task *task); 242 struct rpc_task *task);
240extern void nfs4_destroy_session(struct nfs4_session *session); 243extern void nfs4_destroy_session(struct nfs4_session *session);
241extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 244extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
242extern int nfs4_proc_create_session(struct nfs_client *); 245extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
243extern int nfs4_proc_destroy_session(struct nfs4_session *); 246extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
244extern int nfs4_init_session(struct nfs_server *server); 247extern int nfs4_init_session(struct nfs_server *server);
245extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 248extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
246 struct nfs_fsinfo *fsinfo); 249 struct nfs_fsinfo *fsinfo);
@@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
310#if defined(CONFIG_NFS_V4_1) 313#if defined(CONFIG_NFS_V4_1)
311struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 314struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
312struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 315struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
313extern void nfs4_schedule_session_recovery(struct nfs4_session *); 316extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
314#else 317#else
315static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) 318static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
316{ 319{
317} 320}
318#endif /* CONFIG_NFS_V4_1 */ 321#endif /* CONFIG_NFS_V4_1 */
@@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
334extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 337extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
335extern void nfs41_handle_recall_slot(struct nfs_client *clp); 338extern void nfs41_handle_recall_slot(struct nfs_client *clp);
336extern void nfs41_handle_server_scope(struct nfs_client *, 339extern void nfs41_handle_server_scope(struct nfs_client *,
337 struct server_scope **); 340 struct nfs41_server_scope **);
338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 341extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 342extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
340extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, 343extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9ea8a31..e1340293872c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
82 BUG(); 82 BUG();
83} 83}
84 84
85static void filelayout_reset_write(struct nfs_write_data *data)
86{
87 struct nfs_pgio_header *hdr = data->header;
88 struct rpc_task *task = &data->task;
89
90 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
91 dprintk("%s Reset task %5u for i/o through MDS "
92 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
93 data->task.tk_pid,
94 hdr->inode->i_sb->s_id,
95 (long long)NFS_FILEID(hdr->inode),
96 data->args.count,
97 (unsigned long long)data->args.offset);
98
99 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
100 &hdr->pages,
101 hdr->completion_ops);
102 }
103}
104
105static void filelayout_reset_read(struct nfs_read_data *data)
106{
107 struct nfs_pgio_header *hdr = data->header;
108 struct rpc_task *task = &data->task;
109
110 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
111 dprintk("%s Reset task %5u for i/o through MDS "
112 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
113 data->task.tk_pid,
114 hdr->inode->i_sb->s_id,
115 (long long)NFS_FILEID(hdr->inode),
116 data->args.count,
117 (unsigned long long)data->args.offset);
118
119 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
120 &hdr->pages,
121 hdr->completion_ops);
122 }
123}
124
85static int filelayout_async_handle_error(struct rpc_task *task, 125static int filelayout_async_handle_error(struct rpc_task *task,
86 struct nfs4_state *state, 126 struct nfs4_state *state,
87 struct nfs_client *clp, 127 struct nfs_client *clp,
88 int *reset) 128 struct pnfs_layout_segment *lseg)
89{ 129{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode); 130 struct inode *inode = lseg->pls_layout->plh_inode;
131 struct nfs_server *mds_server = NFS_SERVER(inode);
132 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
91 struct nfs_client *mds_client = mds_server->nfs_client; 133 struct nfs_client *mds_client = mds_server->nfs_client;
134 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
92 135
93 if (task->tk_status >= 0) 136 if (task->tk_status >= 0)
94 return 0; 137 return 0;
95 *reset = 0;
96 138
97 switch (task->tk_status) { 139 switch (task->tk_status) {
98 /* MDS state errors */ 140 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED: 141 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED: 142 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID: 143 case -NFS4ERR_BAD_STATEID:
144 if (state == NULL)
145 break;
102 nfs_remove_bad_delegation(state->inode); 146 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE: 147 case -NFS4ERR_OPENMODE:
148 if (state == NULL)
149 break;
104 nfs4_schedule_stateid_recovery(mds_server, state); 150 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery; 151 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED: 152 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state); 153 if (state != NULL)
154 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client); 155 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery; 156 goto wait_on_recovery;
110 /* DS session errors */ 157 /* DS session errors */
@@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
118 dprintk("%s ERROR %d, Reset session. Exchangeid " 165 dprintk("%s ERROR %d, Reset session. Exchangeid "
119 "flags 0x%x\n", __func__, task->tk_status, 166 "flags 0x%x\n", __func__, task->tk_status,
120 clp->cl_exchange_flags); 167 clp->cl_exchange_flags);
121 nfs4_schedule_session_recovery(clp->cl_session); 168 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
122 break; 169 break;
123 case -NFS4ERR_DELAY: 170 case -NFS4ERR_DELAY:
124 case -NFS4ERR_GRACE: 171 case -NFS4ERR_GRACE:
@@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
127 break; 174 break;
128 case -NFS4ERR_RETRY_UNCACHED_REP: 175 case -NFS4ERR_RETRY_UNCACHED_REP:
129 break; 176 break;
177 /* Invalidate Layout errors */
178 case -NFS4ERR_PNFS_NO_LAYOUT:
179 case -ESTALE: /* mapped NFS4ERR_STALE */
180 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
181 case -EISDIR: /* mapped NFS4ERR_ISDIR */
182 case -NFS4ERR_FHEXPIRED:
183 case -NFS4ERR_WRONG_TYPE:
184 dprintk("%s Invalid layout error %d\n", __func__,
185 task->tk_status);
186 /*
187 * Destroy layout so new i/o will get a new layout.
188 * Layout will not be destroyed until all current lseg
189 * references are put. Mark layout as invalid to resend failed
190 * i/o and all i/o waiting on the slot table to the MDS until
191 * layout is destroyed and a new valid layout is obtained.
192 */
193 set_bit(NFS_LAYOUT_INVALID,
194 &NFS_I(inode)->layout->plh_flags);
195 pnfs_destroy_layout(NFS_I(inode));
196 rpc_wake_up(&tbl->slot_tbl_waitq);
197 goto reset;
198 /* RPC connection errors */
199 case -ECONNREFUSED:
200 case -EHOSTDOWN:
201 case -EHOSTUNREACH:
202 case -ENETUNREACH:
203 case -EIO:
204 case -ETIMEDOUT:
205 case -EPIPE:
206 dprintk("%s DS connection error %d\n", __func__,
207 task->tk_status);
208 if (!filelayout_test_devid_invalid(devid))
209 _pnfs_return_layout(inode);
210 filelayout_mark_devid_invalid(devid);
211 rpc_wake_up(&tbl->slot_tbl_waitq);
212 nfs4_ds_disconnect(clp);
213 /* fall through */
130 default: 214 default:
131 dprintk("%s DS error. Retry through MDS %d\n", __func__, 215reset:
216 dprintk("%s Retry through MDS. Error %d\n", __func__,
132 task->tk_status); 217 task->tk_status);
133 *reset = 1; 218 return -NFS4ERR_RESET_TO_MDS;
134 break;
135 } 219 }
136out: 220out:
137 task->tk_status = 0; 221 task->tk_status = 0;
@@ -148,18 +232,17 @@ wait_on_recovery:
148static int filelayout_read_done_cb(struct rpc_task *task, 232static int filelayout_read_done_cb(struct rpc_task *task,
149 struct nfs_read_data *data) 233 struct nfs_read_data *data)
150{ 234{
151 int reset = 0; 235 struct nfs_pgio_header *hdr = data->header;
236 int err;
152 237
153 dprintk("%s DS read\n", __func__); 238 err = filelayout_async_handle_error(task, data->args.context->state,
239 data->ds_clp, hdr->lseg);
154 240
155 if (filelayout_async_handle_error(task, data->args.context->state, 241 switch (err) {
156 data->ds_clp, &reset) == -EAGAIN) { 242 case -NFS4ERR_RESET_TO_MDS:
157 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 243 filelayout_reset_read(data);
158 __func__, data->ds_clp, data->ds_clp->cl_session); 244 return task->tk_status;
159 if (reset) { 245 case -EAGAIN:
160 pnfs_set_lo_fail(data->lseg);
161 nfs4_reset_read(task, data);
162 }
163 rpc_restart_call_prepare(task); 246 rpc_restart_call_prepare(task);
164 return -EAGAIN; 247 return -EAGAIN;
165 } 248 }
@@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
175static void 258static void
176filelayout_set_layoutcommit(struct nfs_write_data *wdata) 259filelayout_set_layoutcommit(struct nfs_write_data *wdata)
177{ 260{
178 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || 261 struct nfs_pgio_header *hdr = wdata->header;
262
263 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
179 wdata->res.verf->committed == NFS_FILE_SYNC) 264 wdata->res.verf->committed == NFS_FILE_SYNC)
180 return; 265 return;
181 266
182 pnfs_set_layoutcommit(wdata); 267 pnfs_set_layoutcommit(wdata);
183 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 268 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
184 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); 269 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
185} 270}
186 271
187/* 272/*
@@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
191 */ 276 */
192static void filelayout_read_prepare(struct rpc_task *task, void *data) 277static void filelayout_read_prepare(struct rpc_task *task, void *data)
193{ 278{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 279 struct nfs_read_data *rdata = data;
195 280
281 if (filelayout_reset_to_mds(rdata->header->lseg)) {
282 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
283 filelayout_reset_read(rdata);
284 rpc_exit(task, 0);
285 return;
286 }
196 rdata->read_done_cb = filelayout_read_done_cb; 287 rdata->read_done_cb = filelayout_read_done_cb;
197 288
198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 289 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
205 296
206static void filelayout_read_call_done(struct rpc_task *task, void *data) 297static void filelayout_read_call_done(struct rpc_task *task, void *data)
207{ 298{
208 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 299 struct nfs_read_data *rdata = data;
209 300
210 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 301 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
211 302
303 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
304 task->tk_status == 0)
305 return;
306
212 /* Note this may cause RPC to be resent */ 307 /* Note this may cause RPC to be resent */
213 rdata->mds_ops->rpc_call_done(task, data); 308 rdata->header->mds_ops->rpc_call_done(task, data);
214} 309}
215 310
216static void filelayout_read_count_stats(struct rpc_task *task, void *data) 311static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{ 312{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 313 struct nfs_read_data *rdata = data;
219 314
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); 315 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
221} 316}
222 317
223static void filelayout_read_release(void *data) 318static void filelayout_read_release(void *data)
224{ 319{
225 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 320 struct nfs_read_data *rdata = data;
226 321
227 put_lseg(rdata->lseg); 322 nfs_put_client(rdata->ds_clp);
228 rdata->mds_ops->rpc_release(data); 323 rdata->header->mds_ops->rpc_release(data);
229} 324}
230 325
231static int filelayout_write_done_cb(struct rpc_task *task, 326static int filelayout_write_done_cb(struct rpc_task *task,
232 struct nfs_write_data *data) 327 struct nfs_write_data *data)
233{ 328{
234 int reset = 0; 329 struct nfs_pgio_header *hdr = data->header;
235 330 int err;
236 if (filelayout_async_handle_error(task, data->args.context->state, 331
237 data->ds_clp, &reset) == -EAGAIN) { 332 err = filelayout_async_handle_error(task, data->args.context->state,
238 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 333 data->ds_clp, hdr->lseg);
239 __func__, data->ds_clp, data->ds_clp->cl_session); 334
240 if (reset) { 335 switch (err) {
241 pnfs_set_lo_fail(data->lseg); 336 case -NFS4ERR_RESET_TO_MDS:
242 nfs4_reset_write(task, data); 337 filelayout_reset_write(data);
243 } 338 return task->tk_status;
339 case -EAGAIN:
244 rpc_restart_call_prepare(task); 340 rpc_restart_call_prepare(task);
245 return -EAGAIN; 341 return -EAGAIN;
246 } 342 }
@@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
250} 346}
251 347
252/* Fake up some data that will cause nfs_commit_release to retry the writes. */ 348/* Fake up some data that will cause nfs_commit_release to retry the writes. */
253static void prepare_to_resend_writes(struct nfs_write_data *data) 349static void prepare_to_resend_writes(struct nfs_commit_data *data)
254{ 350{
255 struct nfs_page *first = nfs_list_entry(data->pages.next); 351 struct nfs_page *first = nfs_list_entry(data->pages.next);
256 352
@@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
261} 357}
262 358
263static int filelayout_commit_done_cb(struct rpc_task *task, 359static int filelayout_commit_done_cb(struct rpc_task *task,
264 struct nfs_write_data *data) 360 struct nfs_commit_data *data)
265{ 361{
266 int reset = 0; 362 int err;
267 363
268 if (filelayout_async_handle_error(task, data->args.context->state, 364 err = filelayout_async_handle_error(task, NULL, data->ds_clp,
269 data->ds_clp, &reset) == -EAGAIN) { 365 data->lseg);
270 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 366
271 __func__, data->ds_clp, data->ds_clp->cl_session); 367 switch (err) {
272 if (reset) { 368 case -NFS4ERR_RESET_TO_MDS:
273 prepare_to_resend_writes(data); 369 prepare_to_resend_writes(data);
274 pnfs_set_lo_fail(data->lseg); 370 return -EAGAIN;
275 } else 371 case -EAGAIN:
276 rpc_restart_call_prepare(task); 372 rpc_restart_call_prepare(task);
277 return -EAGAIN; 373 return -EAGAIN;
278 } 374 }
279 375
@@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
282 378
283static void filelayout_write_prepare(struct rpc_task *task, void *data) 379static void filelayout_write_prepare(struct rpc_task *task, void *data)
284{ 380{
285 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 381 struct nfs_write_data *wdata = data;
286 382
383 if (filelayout_reset_to_mds(wdata->header->lseg)) {
384 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
385 filelayout_reset_write(wdata);
386 rpc_exit(task, 0);
387 return;
388 }
287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 389 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
288 &wdata->args.seq_args, &wdata->res.seq_res, 390 &wdata->args.seq_args, &wdata->res.seq_res,
289 task)) 391 task))
@@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
294 396
295static void filelayout_write_call_done(struct rpc_task *task, void *data) 397static void filelayout_write_call_done(struct rpc_task *task, void *data)
296{ 398{
297 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 399 struct nfs_write_data *wdata = data;
400
401 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
402 task->tk_status == 0)
403 return;
298 404
299 /* Note this may cause RPC to be resent */ 405 /* Note this may cause RPC to be resent */
300 wdata->mds_ops->rpc_call_done(task, data); 406 wdata->header->mds_ops->rpc_call_done(task, data);
301} 407}
302 408
303static void filelayout_write_count_stats(struct rpc_task *task, void *data) 409static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{ 410{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 411 struct nfs_write_data *wdata = data;
306 412
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); 413 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
308} 414}
309 415
310static void filelayout_write_release(void *data) 416static void filelayout_write_release(void *data)
311{ 417{
312 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 418 struct nfs_write_data *wdata = data;
419
420 nfs_put_client(wdata->ds_clp);
421 wdata->header->mds_ops->rpc_release(data);
422}
423
424static void filelayout_commit_prepare(struct rpc_task *task, void *data)
425{
426 struct nfs_commit_data *wdata = data;
313 427
314 put_lseg(wdata->lseg); 428 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
315 wdata->mds_ops->rpc_release(data); 429 &wdata->args.seq_args, &wdata->res.seq_res,
430 task))
431 return;
432
433 rpc_call_start(task);
434}
435
436static void filelayout_write_commit_done(struct rpc_task *task, void *data)
437{
438 struct nfs_commit_data *wdata = data;
439
440 /* Note this may cause RPC to be resent */
441 wdata->mds_ops->rpc_call_done(task, data);
442}
443
444static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
445{
446 struct nfs_commit_data *cdata = data;
447
448 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
316} 449}
317 450
318static void filelayout_commit_release(void *data) 451static void filelayout_commit_release(void *calldata)
319{ 452{
320 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 453 struct nfs_commit_data *data = calldata;
321 454
322 nfs_commit_release_pages(wdata); 455 data->completion_ops->completion(data);
323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 456 put_lseg(data->lseg);
324 nfs_commit_clear_lock(NFS_I(wdata->inode)); 457 nfs_put_client(data->ds_clp);
325 put_lseg(wdata->lseg); 458 nfs_commitdata_release(data);
326 nfs_commitdata_release(wdata);
327} 459}
328 460
329static const struct rpc_call_ops filelayout_read_call_ops = { 461static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
341}; 473};
342 474
343static const struct rpc_call_ops filelayout_commit_call_ops = { 475static const struct rpc_call_ops filelayout_commit_call_ops = {
344 .rpc_call_prepare = filelayout_write_prepare, 476 .rpc_call_prepare = filelayout_commit_prepare,
345 .rpc_call_done = filelayout_write_call_done, 477 .rpc_call_done = filelayout_write_commit_done,
346 .rpc_count_stats = filelayout_write_count_stats, 478 .rpc_count_stats = filelayout_commit_count_stats,
347 .rpc_release = filelayout_commit_release, 479 .rpc_release = filelayout_commit_release,
348}; 480};
349 481
350static enum pnfs_try_status 482static enum pnfs_try_status
351filelayout_read_pagelist(struct nfs_read_data *data) 483filelayout_read_pagelist(struct nfs_read_data *data)
352{ 484{
353 struct pnfs_layout_segment *lseg = data->lseg; 485 struct nfs_pgio_header *hdr = data->header;
486 struct pnfs_layout_segment *lseg = hdr->lseg;
354 struct nfs4_pnfs_ds *ds; 487 struct nfs4_pnfs_ds *ds;
355 loff_t offset = data->args.offset; 488 loff_t offset = data->args.offset;
356 u32 j, idx; 489 u32 j, idx;
@@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
358 int status; 491 int status;
359 492
360 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 493 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
361 __func__, data->inode->i_ino, 494 __func__, hdr->inode->i_ino,
362 data->args.pgbase, (size_t)data->args.count, offset); 495 data->args.pgbase, (size_t)data->args.count, offset);
363 496
364 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
365 return PNFS_NOT_ATTEMPTED;
366
367 /* Retrieve the correct rpc_client for the byte range */ 497 /* Retrieve the correct rpc_client for the byte range */
368 j = nfs4_fl_calc_j_index(lseg, offset); 498 j = nfs4_fl_calc_j_index(lseg, offset);
369 idx = nfs4_fl_calc_ds_index(lseg, j); 499 idx = nfs4_fl_calc_ds_index(lseg, j);
370 ds = nfs4_fl_prepare_ds(lseg, idx); 500 ds = nfs4_fl_prepare_ds(lseg, idx);
371 if (!ds) { 501 if (!ds)
372 /* Either layout fh index faulty, or ds connect failed */
373 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
374 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
375 return PNFS_NOT_ATTEMPTED; 502 return PNFS_NOT_ATTEMPTED;
376 } 503 dprintk("%s USE DS: %s cl_count %d\n", __func__,
377 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 504 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
378 505
379 /* No multipath support. Use first DS */ 506 /* No multipath support. Use first DS */
507 atomic_inc(&ds->ds_clp->cl_count);
380 data->ds_clp = ds->ds_clp; 508 data->ds_clp = ds->ds_clp;
381 fh = nfs4_fl_select_ds_fh(lseg, j); 509 fh = nfs4_fl_select_ds_fh(lseg, j);
382 if (fh) 510 if (fh)
@@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
386 data->mds_offset = offset; 514 data->mds_offset = offset;
387 515
388 /* Perform an asynchronous read to ds */ 516 /* Perform an asynchronous read to ds */
389 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, 517 status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
390 &filelayout_read_call_ops); 518 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
391 BUG_ON(status != 0); 519 BUG_ON(status != 0);
392 return PNFS_ATTEMPTED; 520 return PNFS_ATTEMPTED;
393} 521}
@@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
396static enum pnfs_try_status 524static enum pnfs_try_status
397filelayout_write_pagelist(struct nfs_write_data *data, int sync) 525filelayout_write_pagelist(struct nfs_write_data *data, int sync)
398{ 526{
399 struct pnfs_layout_segment *lseg = data->lseg; 527 struct nfs_pgio_header *hdr = data->header;
528 struct pnfs_layout_segment *lseg = hdr->lseg;
400 struct nfs4_pnfs_ds *ds; 529 struct nfs4_pnfs_ds *ds;
401 loff_t offset = data->args.offset; 530 loff_t offset = data->args.offset;
402 u32 j, idx; 531 u32 j, idx;
403 struct nfs_fh *fh; 532 struct nfs_fh *fh;
404 int status; 533 int status;
405 534
406 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
407 return PNFS_NOT_ATTEMPTED;
408
409 /* Retrieve the correct rpc_client for the byte range */ 535 /* Retrieve the correct rpc_client for the byte range */
410 j = nfs4_fl_calc_j_index(lseg, offset); 536 j = nfs4_fl_calc_j_index(lseg, offset);
411 idx = nfs4_fl_calc_ds_index(lseg, j); 537 idx = nfs4_fl_calc_ds_index(lseg, j);
412 ds = nfs4_fl_prepare_ds(lseg, idx); 538 ds = nfs4_fl_prepare_ds(lseg, idx);
413 if (!ds) { 539 if (!ds)
414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
418 return PNFS_NOT_ATTEMPTED; 540 return PNFS_NOT_ATTEMPTED;
419 } 541 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
420 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 542 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
421 data->inode->i_ino, sync, (size_t) data->args.count, offset, 543 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
422 ds->ds_remotestr);
423 544
424 data->write_done_cb = filelayout_write_done_cb; 545 data->write_done_cb = filelayout_write_done_cb;
546 atomic_inc(&ds->ds_clp->cl_count);
425 data->ds_clp = ds->ds_clp; 547 data->ds_clp = ds->ds_clp;
426 fh = nfs4_fl_select_ds_fh(lseg, j); 548 fh = nfs4_fl_select_ds_fh(lseg, j);
427 if (fh) 549 if (fh)
@@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
433 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 555 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
434 556
435 /* Perform an asynchronous write */ 557 /* Perform an asynchronous write */
436 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, 558 status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
437 &filelayout_write_call_ops, sync); 559 &filelayout_write_call_ops, sync,
560 RPC_TASK_SOFTCONN);
438 BUG_ON(status != 0); 561 BUG_ON(status != 0);
439 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
440} 563}
@@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
650 773
651 dprintk("--> %s\n", __func__); 774 dprintk("--> %s\n", __func__);
652 nfs4_fl_put_deviceid(fl->dsaddr); 775 nfs4_fl_put_deviceid(fl->dsaddr);
653 kfree(fl->commit_buckets); 776 /* This assumes a single RW lseg */
777 if (lseg->pls_range.iomode == IOMODE_RW) {
778 struct nfs4_filelayout *flo;
779
780 flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
781 flo->commit_info.nbuckets = 0;
782 kfree(flo->commit_info.buckets);
783 flo->commit_info.buckets = NULL;
784 }
654 _filelayout_free_lseg(fl); 785 _filelayout_free_lseg(fl);
655} 786}
656 787
788static int
789filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
790 struct nfs_commit_info *cinfo,
791 gfp_t gfp_flags)
792{
793 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
794 struct pnfs_commit_bucket *buckets;
795 int size;
796
797 if (fl->commit_through_mds)
798 return 0;
799 if (cinfo->ds->nbuckets != 0) {
800 /* This assumes there is only one IOMODE_RW lseg. What
801 * we really want to do is have a layout_hdr level
802 * dictionary of <multipath_list4, fh> keys, each
803 * associated with a struct list_head, populated by calls
804 * to filelayout_write_pagelist().
805 * */
806 return 0;
807 }
808
809 size = (fl->stripe_type == STRIPE_SPARSE) ?
810 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
811
812 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
813 gfp_flags);
814 if (!buckets)
815 return -ENOMEM;
816 else {
817 int i;
818
819 spin_lock(cinfo->lock);
820 if (cinfo->ds->nbuckets != 0)
821 kfree(buckets);
822 else {
823 cinfo->ds->buckets = buckets;
824 cinfo->ds->nbuckets = size;
825 for (i = 0; i < size; i++) {
826 INIT_LIST_HEAD(&buckets[i].written);
827 INIT_LIST_HEAD(&buckets[i].committing);
828 }
829 }
830 spin_unlock(cinfo->lock);
831 return 0;
832 }
833}
834
657static struct pnfs_layout_segment * 835static struct pnfs_layout_segment *
658filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 836filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 struct nfs4_layoutget_res *lgr, 837 struct nfs4_layoutget_res *lgr,
@@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
673 _filelayout_free_lseg(fl); 851 _filelayout_free_lseg(fl);
674 return NULL; 852 return NULL;
675 } 853 }
676
677 /* This assumes there is only one IOMODE_RW lseg. What
678 * we really want to do is have a layout_hdr level
679 * dictionary of <multipath_list4, fh> keys, each
680 * associated with a struct list_head, populated by calls
681 * to filelayout_write_pagelist().
682 * */
683 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
684 int i;
685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
687
688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
689 if (!fl->commit_buckets) {
690 filelayout_free_lseg(&fl->generic_hdr);
691 return NULL;
692 }
693 fl->number_of_buckets = size;
694 for (i = 0; i < size; i++) {
695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
698 }
699 return &fl->generic_hdr; 854 return &fl->generic_hdr;
700} 855}
701 856
@@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
716 !nfs_generic_pg_test(pgio, prev, req)) 871 !nfs_generic_pg_test(pgio, prev, req))
717 return false; 872 return false;
718 873
719 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 874 p_stripe = (u64)req_offset(prev);
720 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 875 r_stripe = (u64)req_offset(req);
721 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 876 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
722 877
723 do_div(p_stripe, stripe_unit); 878 do_div(p_stripe, stripe_unit);
@@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
732{ 887{
733 BUG_ON(pgio->pg_lseg != NULL); 888 BUG_ON(pgio->pg_lseg != NULL);
734 889
890 if (req->wb_offset != req->wb_pgbase) {
891 /*
892 * Handling unaligned pages is difficult, because have to
893 * somehow split a req in two in certain cases in the
894 * pg.test code. Avoid this by just not using pnfs
895 * in this case.
896 */
897 nfs_pageio_reset_read_mds(pgio);
898 return;
899 }
735 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
736 req->wb_context, 901 req->wb_context,
737 0, 902 0,
@@ -747,8 +912,13 @@ static void
747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 912filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
748 struct nfs_page *req) 913 struct nfs_page *req)
749{ 914{
915 struct nfs_commit_info cinfo;
916 int status;
917
750 BUG_ON(pgio->pg_lseg != NULL); 918 BUG_ON(pgio->pg_lseg != NULL);
751 919
920 if (req->wb_offset != req->wb_pgbase)
921 goto out_mds;
752 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
753 req->wb_context, 923 req->wb_context,
754 0, 924 0,
@@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
757 GFP_NOFS); 927 GFP_NOFS);
758 /* If no lseg, fall back to write through mds */ 928 /* If no lseg, fall back to write through mds */
759 if (pgio->pg_lseg == NULL) 929 if (pgio->pg_lseg == NULL)
760 nfs_pageio_reset_write_mds(pgio); 930 goto out_mds;
931 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
932 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
933 if (status < 0) {
934 put_lseg(pgio->pg_lseg);
935 pgio->pg_lseg = NULL;
936 goto out_mds;
937 }
938 return;
939out_mds:
940 nfs_pageio_reset_write_mds(pgio);
761} 941}
762 942
763static const struct nfs_pageio_ops filelayout_pg_read_ops = { 943static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
784 * If this will make the bucket empty, it will need to put the lseg reference. 964 * If this will make the bucket empty, it will need to put the lseg reference.
785 */ 965 */
786static void 966static void
787filelayout_clear_request_commit(struct nfs_page *req) 967filelayout_clear_request_commit(struct nfs_page *req,
968 struct nfs_commit_info *cinfo)
788{ 969{
789 struct pnfs_layout_segment *freeme = NULL; 970 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791 971
792 spin_lock(&inode->i_lock); 972 spin_lock(cinfo->lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 973 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out; 974 goto out;
975 cinfo->ds->nwritten--;
795 if (list_is_singular(&req->wb_list)) { 976 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg; 977 struct pnfs_commit_bucket *bucket;
797 978
798 /* From here we can find the bucket, but for the moment, 979 bucket = list_first_entry(&req->wb_list,
799 * since there is only one relevant lseg... 980 struct pnfs_commit_bucket,
800 */ 981 written);
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 982 freeme = bucket->wlseg;
802 if (lseg->pls_range.iomode == IOMODE_RW) { 983 bucket->wlseg = NULL;
803 freeme = lseg;
804 break;
805 }
806 }
807 } 984 }
808out: 985out:
809 nfs_request_remove_commit_list(req); 986 nfs_request_remove_commit_list(req, cinfo);
810 spin_unlock(&inode->i_lock); 987 spin_unlock(cinfo->lock);
811 put_lseg(freeme); 988 put_lseg(freeme);
812} 989}
813 990
814static struct list_head * 991static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req, 992filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg) 993 struct pnfs_layout_segment *lseg,
994 struct nfs_commit_info *cinfo)
817{ 995{
818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 996 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
819 u32 i, j; 997 u32 i, j;
820 struct list_head *list; 998 struct list_head *list;
999 struct pnfs_commit_bucket *buckets;
821 1000
822 if (fl->commit_through_mds) 1001 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; 1002 return &cinfo->mds->list;
824 1003
825 /* Note that we are calling nfs4_fl_calc_j_index on each page 1004 /* Note that we are calling nfs4_fl_calc_j_index on each page
826 * that ends up being committed to a data server. An attractive 1005 * that ends up being committed to a data server. An attractive
@@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
828 * to store the value calculated in filelayout_write_pagelist 1007 * to store the value calculated in filelayout_write_pagelist
829 * and just use that here. 1008 * and just use that here.
830 */ 1009 */
831 j = nfs4_fl_calc_j_index(lseg, 1010 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
833 i = select_bucket_index(fl, j); 1011 i = select_bucket_index(fl, j);
834 list = &fl->commit_buckets[i].written; 1012 buckets = cinfo->ds->buckets;
1013 list = &buckets[i].written;
835 if (list_empty(list)) { 1014 if (list_empty(list)) {
836 /* Non-empty buckets hold a reference on the lseg. That ref 1015 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released 1016 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled 1017 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in 1018 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req 1019 * filelayout_clear_request_commit
841 */ 1020 */
842 get_lseg(lseg); 1021 buckets[i].wlseg = get_lseg(lseg);
843 } 1022 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1023 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1024 cinfo->ds->nwritten++;
845 return list; 1025 return list;
846} 1026}
847 1027
848static void 1028static void
849filelayout_mark_request_commit(struct nfs_page *req, 1029filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg) 1030 struct pnfs_layout_segment *lseg,
1031 struct nfs_commit_info *cinfo)
851{ 1032{
852 struct list_head *list; 1033 struct list_head *list;
853 1034
854 list = filelayout_choose_commit_list(req, lseg); 1035 list = filelayout_choose_commit_list(req, lseg, cinfo);
855 nfs_request_add_commit_list(req, list); 1036 nfs_request_add_commit_list(req, list, cinfo);
856} 1037}
857 1038
858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1039static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
880 return flseg->fh_array[i]; 1061 return flseg->fh_array[i];
881} 1062}
882 1063
883static int filelayout_initiate_commit(struct nfs_write_data *data, int how) 1064static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
884{ 1065{
885 struct pnfs_layout_segment *lseg = data->lseg; 1066 struct pnfs_layout_segment *lseg = data->lseg;
886 struct nfs4_pnfs_ds *ds; 1067 struct nfs4_pnfs_ds *ds;
@@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1071 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
891 ds = nfs4_fl_prepare_ds(lseg, idx); 1072 ds = nfs4_fl_prepare_ds(lseg, idx);
892 if (!ds) { 1073 if (!ds) {
893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
897 prepare_to_resend_writes(data); 1074 prepare_to_resend_writes(data);
898 filelayout_commit_release(data); 1075 filelayout_commit_release(data);
899 return -EAGAIN; 1076 return -EAGAIN;
900 } 1077 }
901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 1078 dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
902 data->write_done_cb = filelayout_commit_done_cb; 1079 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
1080 data->commit_done_cb = filelayout_commit_done_cb;
1081 atomic_inc(&ds->ds_clp->cl_count);
903 data->ds_clp = ds->ds_clp; 1082 data->ds_clp = ds->ds_clp;
904 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1083 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
905 if (fh) 1084 if (fh)
906 data->args.fh = fh; 1085 data->args.fh = fh;
907 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, 1086 return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
908 &filelayout_commit_call_ops, how); 1087 &filelayout_commit_call_ops, how,
909} 1088 RPC_TASK_SOFTCONN);
910
911/*
912 * This is only useful while we are using whole file layouts.
913 */
914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
916{
917 struct pnfs_layout_segment *lseg;
918
919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
920 if (lseg->pls_range.iomode == IOMODE_RW)
921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
933 spin_unlock(&inode->i_lock);
934 return rv;
935} 1089}
936 1090
937static int 1091static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, 1092transfer_commit_list(struct list_head *src, struct list_head *dst,
939 spinlock_t *lock) 1093 struct nfs_commit_info *cinfo, int max)
940{ 1094{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp; 1095 struct nfs_page *req, *tmp;
944 int ret = 0; 1096 int ret = 0;
945 1097
946 list_for_each_entry_safe(req, tmp, src, wb_list) { 1098 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req)) 1099 if (!nfs_lock_request(req))
948 continue; 1100 continue;
949 if (cond_resched_lock(lock)) 1101 kref_get(&req->wb_kref);
1102 if (cond_resched_lock(cinfo->lock))
950 list_safe_reset_next(req, tmp, wb_list); 1103 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req); 1104 nfs_request_remove_commit_list(req, cinfo);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1105 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst); 1106 nfs_list_add_request(req, dst);
954 ret++; 1107 ret++;
955 if (ret == max) 1108 if ((ret == max) && !cinfo->dreq)
956 break; 1109 break;
957 } 1110 }
958 return ret; 1111 return ret;
959} 1112}
960 1113
1114static int
1115filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1116 struct nfs_commit_info *cinfo,
1117 int max)
1118{
1119 struct list_head *src = &bucket->written;
1120 struct list_head *dst = &bucket->committing;
1121 int ret;
1122
1123 ret = transfer_commit_list(src, dst, cinfo, max);
1124 if (ret) {
1125 cinfo->ds->nwritten -= ret;
1126 cinfo->ds->ncommitting += ret;
1127 bucket->clseg = bucket->wlseg;
1128 if (list_empty(src))
1129 bucket->wlseg = NULL;
1130 else
1131 get_lseg(bucket->clseg);
1132 }
1133 return ret;
1134}
1135
961/* Move reqs from written to committing lists, returning count of number moved. 1136/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held. 1137 * Note called with cinfo->lock held.
963 */ 1138 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max, 1139static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
965 spinlock_t *lock) 1140 int max)
966{ 1141{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt; 1142 int i, rv = 0, cnt;
970 1143
971 lseg = find_only_write_lseg_locked(inode); 1144 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
972 if (!lseg) 1145 cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
973 goto out_done; 1146 cinfo, max);
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt; 1147 max -= cnt;
981 rv += cnt; 1148 rv += cnt;
982 } 1149 }
983out_done:
984 return rv; 1150 return rv;
985} 1151}
986 1152
1153/* Pull everything off the committing lists and dump into @dst */
1154static void filelayout_recover_commit_reqs(struct list_head *dst,
1155 struct nfs_commit_info *cinfo)
1156{
1157 struct pnfs_commit_bucket *b;
1158 int i;
1159
1160 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1161 * only called on single thread per dreq.
1162 * Can't take the lock because need to do put_lseg
1163 */
1164 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1165 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1166 BUG_ON(!list_empty(&b->written));
1167 put_lseg(b->wlseg);
1168 b->wlseg = NULL;
1169 }
1170 }
1171 cinfo->ds->nwritten = 0;
1172}
1173
987static unsigned int 1174static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list) 1175alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
989{ 1176{
990 struct pnfs_layout_segment *lseg; 1177 struct pnfs_ds_commit_info *fl_cinfo;
991 struct nfs4_filelayout_segment *fl; 1178 struct pnfs_commit_bucket *bucket;
992 struct nfs_write_data *data; 1179 struct nfs_commit_data *data;
993 int i, j; 1180 int i, j;
994 unsigned int nreq = 0; 1181 unsigned int nreq = 0;
995 1182
996 /* Won't need this when non-whole file layout segments are supported 1183 fl_cinfo = cinfo->ds;
997 * instead we will use a pnfs_layout_hdr structure */ 1184 bucket = fl_cinfo->buckets;
998 lseg = find_only_write_lseg(inode); 1185 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
999 if (!lseg) 1186 if (list_empty(&bucket->committing))
1000 return 0;
1001 fl = FILELAYOUT_LSEG(lseg);
1002 for (i = 0; i < fl->number_of_buckets; i++) {
1003 if (list_empty(&fl->commit_buckets[i].committing))
1004 continue; 1187 continue;
1005 data = nfs_commitdata_alloc(); 1188 data = nfs_commitdata_alloc();
1006 if (!data) 1189 if (!data)
1007 break; 1190 break;
1008 data->ds_commit_index = i; 1191 data->ds_commit_index = i;
1009 data->lseg = lseg; 1192 data->lseg = bucket->clseg;
1193 bucket->clseg = NULL;
1010 list_add(&data->pages, list); 1194 list_add(&data->pages, list);
1011 nreq++; 1195 nreq++;
1012 } 1196 }
1013 1197
1014 /* Clean up on error */ 1198 /* Clean up on error */
1015 for (j = i; j < fl->number_of_buckets; j++) { 1199 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1016 if (list_empty(&fl->commit_buckets[i].committing)) 1200 if (list_empty(&bucket->committing))
1017 continue; 1201 continue;
1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); 1202 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1019 put_lseg(lseg); /* associated with emptying bucket */ 1203 put_lseg(bucket->clseg);
1204 bucket->clseg = NULL;
1020 } 1205 }
1021 put_lseg(lseg);
1022 /* Caller will clean up entries put on list */ 1206 /* Caller will clean up entries put on list */
1023 return nreq; 1207 return nreq;
1024} 1208}
@@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
1026/* This follows nfs_commit_list pretty closely */ 1210/* This follows nfs_commit_list pretty closely */
1027static int 1211static int
1028filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1212filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1029 int how) 1213 int how, struct nfs_commit_info *cinfo)
1030{ 1214{
1031 struct nfs_write_data *data, *tmp; 1215 struct nfs_commit_data *data, *tmp;
1032 LIST_HEAD(list); 1216 LIST_HEAD(list);
1033 unsigned int nreq = 0; 1217 unsigned int nreq = 0;
1034 1218
@@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1039 list_add(&data->pages, &list); 1223 list_add(&data->pages, &list);
1040 nreq++; 1224 nreq++;
1041 } else 1225 } else
1042 nfs_retry_commit(mds_pages, NULL); 1226 nfs_retry_commit(mds_pages, NULL, cinfo);
1043 } 1227 }
1044 1228
1045 nreq += alloc_ds_commits(inode, &list); 1229 nreq += alloc_ds_commits(cinfo, &list);
1046 1230
1047 if (nreq == 0) { 1231 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode)); 1232 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1049 goto out; 1233 goto out;
1050 } 1234 }
1051 1235
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding); 1236 atomic_add(nreq, &cinfo->mds->rpcs_out);
1053 1237
1054 list_for_each_entry_safe(data, tmp, &list, pages) { 1238 list_for_each_entry_safe(data, tmp, &list, pages) {
1055 list_del_init(&data->pages); 1239 list_del_init(&data->pages);
1056 if (!data->lseg) { 1240 if (!data->lseg) {
1057 nfs_init_commit(data, mds_pages, NULL); 1241 nfs_init_commit(data, mds_pages, NULL, cinfo);
1058 nfs_initiate_commit(data, NFS_CLIENT(inode), 1242 nfs_initiate_commit(NFS_CLIENT(inode), data,
1059 data->mds_ops, how); 1243 data->mds_ops, how, 0);
1060 } else { 1244 } else {
1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); 1245 struct pnfs_commit_bucket *buckets;
1246
1247 buckets = cinfo->ds->buckets;
1248 nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
1062 filelayout_initiate_commit(data, how); 1249 filelayout_initiate_commit(data, how);
1063 } 1250 }
1064 } 1251 }
1065out: 1252out:
1253 cinfo->ds->ncommitting = 0;
1066 return PNFS_ATTEMPTED; 1254 return PNFS_ATTEMPTED;
1067} 1255}
1068 1256
@@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
1072 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1260 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1073} 1261}
1074 1262
1263static struct pnfs_layout_hdr *
1264filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1265{
1266 struct nfs4_filelayout *flo;
1267
1268 flo = kzalloc(sizeof(*flo), gfp_flags);
1269 return &flo->generic_hdr;
1270}
1271
1272static void
1273filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1274{
1275 kfree(FILELAYOUT_FROM_HDR(lo));
1276}
1277
1278static struct pnfs_ds_commit_info *
1279filelayout_get_ds_info(struct inode *inode)
1280{
1281 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1282
1283 if (layout == NULL)
1284 return NULL;
1285 else
1286 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1287}
1288
1075static struct pnfs_layoutdriver_type filelayout_type = { 1289static struct pnfs_layoutdriver_type filelayout_type = {
1076 .id = LAYOUT_NFSV4_1_FILES, 1290 .id = LAYOUT_NFSV4_1_FILES,
1077 .name = "LAYOUT_NFSV4_1_FILES", 1291 .name = "LAYOUT_NFSV4_1_FILES",
1078 .owner = THIS_MODULE, 1292 .owner = THIS_MODULE,
1293 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1294 .free_layout_hdr = filelayout_free_layout_hdr,
1079 .alloc_lseg = filelayout_alloc_lseg, 1295 .alloc_lseg = filelayout_alloc_lseg,
1080 .free_lseg = filelayout_free_lseg, 1296 .free_lseg = filelayout_free_lseg,
1081 .pg_read_ops = &filelayout_pg_read_ops, 1297 .pg_read_ops = &filelayout_pg_read_ops,
1082 .pg_write_ops = &filelayout_pg_write_ops, 1298 .pg_write_ops = &filelayout_pg_write_ops,
1299 .get_ds_info = &filelayout_get_ds_info,
1083 .mark_request_commit = filelayout_mark_request_commit, 1300 .mark_request_commit = filelayout_mark_request_commit,
1084 .clear_request_commit = filelayout_clear_request_commit, 1301 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists, 1302 .scan_commit_lists = filelayout_scan_commit_lists,
1303 .recover_commit_reqs = filelayout_recover_commit_reqs,
1086 .commit_pagelist = filelayout_commit_pagelist, 1304 .commit_pagelist = filelayout_commit_pagelist,
1087 .read_pagelist = filelayout_read_pagelist, 1305 .read_pagelist = filelayout_read_pagelist,
1088 .write_pagelist = filelayout_write_pagelist, 1306 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb1f5e3..43fe802dd678 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules.
37 * Set by module paramters dataserver_timeo and dataserver_retrans.
38 */
39#define NFS4_DEF_DS_TIMEO 60
40#define NFS4_DEF_DS_RETRANS 5
41
42/*
36 * Field testing shows we need to support up to 4096 stripe indices. 43 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 44 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 45 * reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096 48#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ 49#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43 50
51/* error codes for internal use */
52#define NFS4ERR_RESET_TO_MDS 12001
53
44enum stripetype4 { 54enum stripetype4 {
45 STRIPE_SPARSE = 1, 55 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2 56 STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
62 atomic_t ds_count; 72 atomic_t ds_count;
63}; 73};
64 74
65/* nfs4_file_layout_dsaddr flags */
66#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
67
68struct nfs4_file_layout_dsaddr { 75struct nfs4_file_layout_dsaddr {
69 struct nfs4_deviceid_node id_node; 76 struct nfs4_deviceid_node id_node;
70 unsigned long flags;
71 u32 stripe_count; 77 u32 stripe_count;
72 u8 *stripe_indices; 78 u8 *stripe_indices;
73 u32 ds_num; 79 u32 ds_num;
74 struct nfs4_pnfs_ds *ds_list[1]; 80 struct nfs4_pnfs_ds *ds_list[1];
75}; 81};
76 82
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
82struct nfs4_filelayout_segment { 83struct nfs4_filelayout_segment {
83 struct pnfs_layout_segment generic_hdr; 84 struct pnfs_layout_segment generic_hdr;
84 u32 stripe_type; 85 u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 90 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
90 unsigned int num_fh; 91 unsigned int num_fh;
91 struct nfs_fh **fh_array; 92 struct nfs_fh **fh_array;
92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
93 int number_of_buckets;
94}; 93};
95 94
95struct nfs4_filelayout {
96 struct pnfs_layout_hdr generic_hdr;
97 struct pnfs_ds_commit_info commit_info;
98};
99
100static inline struct nfs4_filelayout *
101FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
102{
103 return container_of(lo, struct nfs4_filelayout, generic_hdr);
104}
105
96static inline struct nfs4_filelayout_segment * 106static inline struct nfs4_filelayout_segment *
97FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) 107FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
98{ 108{
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
107 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 117 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
108} 118}
109 119
120static inline void
121filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
122{
123 u32 *p = (u32 *)&node->deviceid;
124
125 printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
126 p[0], p[1], p[2], p[3]);
127
128 set_bit(NFS_DEVICEID_INVALID, &node->flags);
129}
130
131static inline bool
132filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
133{
134 return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
135}
136
137static inline bool
138filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
139{
140 return test_bit(NFS_DEVICEID_INVALID, &node->flags);
141}
142
143static inline bool
144filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
145{
146 return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
147 filelayout_test_layout_invalid(lseg->pls_layout);
148}
149
110extern struct nfs_fh * 150extern struct nfs_fh *
111nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 151nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
112 152
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
119extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 159extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
120struct nfs4_file_layout_dsaddr * 160struct nfs4_file_layout_dsaddr *
121get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 161get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
162void nfs4_ds_disconnect(struct nfs_client *clp);
122 163
123#endif /* FS_NFS_NFS4FILELAYOUT_H */ 164#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c9cff9adb2d3..a1fab8da7f03 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
30 30
31#include <linux/nfs_fs.h> 31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/module.h>
33 34
34#include "internal.h" 35#include "internal.h"
35#include "nfs4filelayout.h" 36#include "nfs4filelayout.h"
36 37
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 38#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 39
40static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
41static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
42
39/* 43/*
40 * Data server cache 44 * Data server cache
41 * 45 *
@@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
145} 149}
146 150
147/* 151/*
152 * Lookup DS by nfs_client pointer. Zero data server client pointer
153 */
154void nfs4_ds_disconnect(struct nfs_client *clp)
155{
156 struct nfs4_pnfs_ds *ds;
157 struct nfs_client *found = NULL;
158
159 dprintk("%s clp %p\n", __func__, clp);
160 spin_lock(&nfs4_ds_cache_lock);
161 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
162 if (ds->ds_clp && ds->ds_clp == clp) {
163 found = ds->ds_clp;
164 ds->ds_clp = NULL;
165 }
166 spin_unlock(&nfs4_ds_cache_lock);
167 if (found) {
168 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
169 nfs_put_client(clp);
170 }
171}
172
173/*
148 * Create an rpc connection to the nfs4_pnfs_ds data server 174 * Create an rpc connection to the nfs4_pnfs_ds data server
149 * Currently only supports IPv4 and IPv6 addresses 175 * Currently only supports IPv4 and IPv6 addresses
150 */ 176 */
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
165 __func__, ds->ds_remotestr, da->da_remotestr); 191 __func__, ds->ds_remotestr, da->da_remotestr);
166 192
167 clp = nfs4_set_ds_client(mds_srv->nfs_client, 193 clp = nfs4_set_ds_client(mds_srv->nfs_client,
168 (struct sockaddr *)&da->da_addr, 194 (struct sockaddr *)&da->da_addr,
169 da->da_addrlen, IPPROTO_TCP); 195 da->da_addrlen, IPPROTO_TCP,
196 dataserver_timeo, dataserver_retrans);
170 if (!IS_ERR(clp)) 197 if (!IS_ERR(clp))
171 break; 198 break;
172 } 199 }
@@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
176 goto out; 203 goto out;
177 } 204 }
178 205
179 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { 206 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
180 if (!is_ds_client(clp)) {
181 status = -ENODEV;
182 goto out_put;
183 }
184 ds->ds_clp = clp;
185 dprintk("%s [existing] server=%s\n", __func__,
186 ds->ds_remotestr);
187 goto out;
188 }
189
190 /*
191 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
192 * be equal to the MDS lease. Renewal is scheduled in create_session.
193 */
194 spin_lock(&mds_srv->nfs_client->cl_lock);
195 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
196 spin_unlock(&mds_srv->nfs_client->cl_lock);
197 clp->cl_last_renewal = jiffies;
198
199 /* New nfs_client */
200 status = nfs4_init_ds_session(clp);
201 if (status) 207 if (status)
202 goto out_put; 208 goto out_put;
203 209
@@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
602 608
603 mp_count = be32_to_cpup(p); /* multipath count */ 609 mp_count = be32_to_cpup(p); /* multipath count */
604 for (j = 0; j < mp_count; j++) { 610 for (j = 0; j < mp_count; j++) {
605 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, 611 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
606 &stream, gfp_flags); 612 &stream, gfp_flags);
607 if (da) 613 if (da)
608 list_add_tail(&da->da_node, &dsaddrs); 614 list_add_tail(&da->da_node, &dsaddrs);
@@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
791 return flseg->fh_array[i]; 797 return flseg->fh_array[i];
792} 798}
793 799
794static void
795filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
796 int err, const char *ds_remotestr)
797{
798 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
799
800 printk(KERN_ERR "NFS: data server %s connection error %d."
801 " Deviceid [%x%x%x%x] marked out of use.\n",
802 ds_remotestr, err, p[0], p[1], p[2], p[3]);
803
804 spin_lock(&nfs4_ds_cache_lock);
805 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
806 spin_unlock(&nfs4_ds_cache_lock);
807}
808
809struct nfs4_pnfs_ds * 800struct nfs4_pnfs_ds *
810nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 801nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
811{ 802{
812 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 803 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 804 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
805 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
806
807 if (filelayout_test_devid_invalid(devid))
808 return NULL;
814 809
815 if (ds == NULL) { 810 if (ds == NULL) {
816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 811 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
817 __func__, ds_idx); 812 __func__, ds_idx);
818 return NULL; 813 goto mark_dev_invalid;
819 } 814 }
820 815
821 if (!ds->ds_clp) { 816 if (!ds->ds_clp) {
822 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 817 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
823 int err; 818 int err;
824 819
825 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
826 /* Already tried to connect, don't try again */
827 dprintk("%s Deviceid marked out of use\n", __func__);
828 return NULL;
829 }
830 err = nfs4_ds_connect(s, ds); 820 err = nfs4_ds_connect(s, ds);
831 if (err) { 821 if (err)
832 filelayout_mark_devid_negative(dsaddr, err, 822 goto mark_dev_invalid;
833 ds->ds_remotestr);
834 return NULL;
835 }
836 } 823 }
837 return ds; 824 return ds;
825
826mark_dev_invalid:
827 filelayout_mark_devid_invalid(devid);
828 return NULL;
838} 829}
830
831module_param(dataserver_retrans, uint, 0644);
832MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
833 "retries a request before it attempts further "
834 " recovery action.");
835module_param(dataserver_timeo, uint, 0644);
836MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
837 "NFSv4.1 client waits for a response from a "
838 " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3dedc4ec7..017b4b01a69c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
132 return ret; 132 return ret;
133} 133}
134 134
135rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
136{
137 struct gss_api_mech *mech;
138 struct xdr_netobj oid;
139 int i;
140 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
141
142 for (i = 0; i < flavors->num_flavors; i++) {
143 struct nfs4_secinfo_flavor *flavor;
144 flavor = &flavors->flavors[i];
145
146 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
147 pseudoflavor = flavor->flavor;
148 break;
149 } else if (flavor->flavor == RPC_AUTH_GSS) {
150 oid.len = flavor->gss.sec_oid4.len;
151 oid.data = flavor->gss.sec_oid4.data;
152 mech = gss_mech_get_by_OID(&oid);
153 if (!mech)
154 continue;
155 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
156 gss_mech_put(mech);
157 break;
158 }
159 }
160
161 return pseudoflavor;
162}
163
135static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 164static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
136{ 165{
137 struct page *page; 166 struct page *page;
@@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino
168 rpc_authflavor_t flavor; 197 rpc_authflavor_t flavor;
169 198
170 flavor = nfs4_negotiate_security(inode, name); 199 flavor = nfs4_negotiate_security(inode, name);
171 if (flavor < 0) 200 if ((int)flavor < 0)
172 return ERR_PTR(flavor); 201 return ERR_PTR(flavor);
173 202
174 clone = rpc_clone_client(clnt); 203 clone = rpc_clone_client(clnt);
@@ -300,7 +329,7 @@ out:
300 * @dentry - dentry of referral 329 * @dentry - dentry of referral
301 * 330 *
302 */ 331 */
303struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) 332static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
304{ 333{
305 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 334 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
306 struct dentry *parent; 335 struct dentry *parent;
@@ -341,3 +370,25 @@ out:
341 dprintk("%s: done\n", __func__); 370 dprintk("%s: done\n", __func__);
342 return mnt; 371 return mnt;
343} 372}
373
374struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
375 struct nfs_fh *fh, struct nfs_fattr *fattr)
376{
377 struct dentry *parent = dget_parent(dentry);
378 struct rpc_clnt *client;
379 struct vfsmount *mnt;
380
381 /* Look it up again to get its attributes and sec flavor */
382 client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
383 dput(parent);
384 if (IS_ERR(client))
385 return ERR_CAST(client);
386
387 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
388 mnt = nfs_do_refmount(client, dentry);
389 else
390 mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
391
392 rpc_shutdown_client(client);
393 return mnt;
394}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ab985f6f0da8..d48dbefa0e71 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64#include "iostat.h" 64#include "iostat.h"
65#include "callback.h" 65#include "callback.h"
66#include "pnfs.h" 66#include "pnfs.h"
67#include "netns.h"
67 68
68#define NFSDBG_FACILITY NFSDBG_PROC 69#define NFSDBG_FACILITY NFSDBG_PROC
69 70
@@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 81static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 82static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 83static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
84static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 85static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 86static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
85 struct nfs_fattr *fattr, struct iattr *sattr, 87 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)
101 case -NFS4ERR_BADOWNER: 103 case -NFS4ERR_BADOWNER:
102 case -NFS4ERR_BADNAME: 104 case -NFS4ERR_BADNAME:
103 return -EINVAL; 105 return -EINVAL;
106 case -NFS4ERR_SHARE_DENIED:
107 return -EACCES;
104 default: 108 default:
105 dprintk("%s could not handle NFSv4 error %d\n", 109 dprintk("%s could not handle NFSv4 error %d\n",
106 __func__, -err); 110 __func__, -err);
@@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
304 case -NFS4ERR_SEQ_MISORDERED: 308 case -NFS4ERR_SEQ_MISORDERED:
305 dprintk("%s ERROR: %d Reset session\n", __func__, 309 dprintk("%s ERROR: %d Reset session\n", __func__,
306 errorcode); 310 errorcode);
307 nfs4_schedule_session_recovery(clp->cl_session); 311 nfs4_schedule_session_recovery(clp->cl_session, errorcode);
308 exception->retry = 1; 312 exception->retry = 1;
309 break; 313 break;
310#endif /* defined(CONFIG_NFS_V4_1) */ 314#endif /* defined(CONFIG_NFS_V4_1) */
@@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
772 struct nfs_inode *nfsi = NFS_I(dir); 776 struct nfs_inode *nfsi = NFS_I(dir);
773 777
774 spin_lock(&dir->i_lock); 778 spin_lock(&dir->i_lock);
775 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; 779 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
776 if (!cinfo->atomic || cinfo->before != dir->i_version) 780 if (!cinfo->atomic || cinfo->before != dir->i_version)
777 nfs_force_lookup_revalidate(dir); 781 nfs_force_lookup_revalidate(dir);
778 dir->i_version = cinfo->after; 782 dir->i_version = cinfo->after;
@@ -788,7 +792,6 @@ struct nfs4_opendata {
788 struct nfs4_string owner_name; 792 struct nfs4_string owner_name;
789 struct nfs4_string group_name; 793 struct nfs4_string group_name;
790 struct nfs_fattr f_attr; 794 struct nfs_fattr f_attr;
791 struct nfs_fattr dir_attr;
792 struct dentry *dir; 795 struct dentry *dir;
793 struct dentry *dentry; 796 struct dentry *dentry;
794 struct nfs4_state_owner *owner; 797 struct nfs4_state_owner *owner;
@@ -804,12 +807,10 @@ struct nfs4_opendata {
804static void nfs4_init_opendata_res(struct nfs4_opendata *p) 807static void nfs4_init_opendata_res(struct nfs4_opendata *p)
805{ 808{
806 p->o_res.f_attr = &p->f_attr; 809 p->o_res.f_attr = &p->f_attr;
807 p->o_res.dir_attr = &p->dir_attr;
808 p->o_res.seqid = p->o_arg.seqid; 810 p->o_res.seqid = p->o_arg.seqid;
809 p->c_res.seqid = p->c_arg.seqid; 811 p->c_res.seqid = p->c_arg.seqid;
810 p->o_res.server = p->o_arg.server; 812 p->o_res.server = p->o_arg.server;
811 nfs_fattr_init(&p->f_attr); 813 nfs_fattr_init(&p->f_attr);
812 nfs_fattr_init(&p->dir_attr);
813 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); 814 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
814} 815}
815 816
@@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
843 p->o_arg.name = &dentry->d_name; 844 p->o_arg.name = &dentry->d_name;
844 p->o_arg.server = server; 845 p->o_arg.server = server;
845 p->o_arg.bitmask = server->attr_bitmask; 846 p->o_arg.bitmask = server->attr_bitmask;
846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
848 if (attrs != NULL && attrs->ia_valid != 0) { 848 if (attrs != NULL && attrs->ia_valid != 0) {
849 __be32 verf[2]; 849 __be32 verf[2];
@@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1332 case -NFS4ERR_BAD_HIGH_SLOT: 1332 case -NFS4ERR_BAD_HIGH_SLOT:
1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1334 case -NFS4ERR_DEADSESSION: 1334 case -NFS4ERR_DEADSESSION:
1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1336 goto out; 1336 goto out;
1337 case -NFS4ERR_STALE_CLIENTID: 1337 case -NFS4ERR_STALE_CLIENTID:
1338 case -NFS4ERR_STALE_STATEID: 1338 case -NFS4ERR_STALE_STATEID:
@@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1611 1611
1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); 1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
1613 1613
1614 nfs_refresh_inode(dir, o_res->dir_attr);
1615
1616 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1614 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1617 status = _nfs4_proc_open_confirm(data); 1615 status = _nfs4_proc_open_confirm(data);
1618 if (status != 0) 1616 if (status != 0)
@@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1645 1643
1646 nfs_fattr_map_and_free_names(server, &data->f_attr); 1644 nfs_fattr_map_and_free_names(server, &data->f_attr);
1647 1645
1648 if (o_arg->open_flags & O_CREAT) { 1646 if (o_arg->open_flags & O_CREAT)
1649 update_changeattr(dir, &o_res->cinfo); 1647 update_changeattr(dir, &o_res->cinfo);
1650 nfs_post_op_update_inode(dir, o_res->dir_attr);
1651 } else
1652 nfs_refresh_inode(dir, o_res->dir_attr);
1653 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 1648 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1654 server->caps &= ~NFS_CAP_POSIX_LOCK; 1649 server->caps &= ~NFS_CAP_POSIX_LOCK;
1655 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1650 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1789/* 1784/*
1790 * Returns a referenced nfs4_state 1785 * Returns a referenced nfs4_state
1791 */ 1786 */
1792static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1787static int _nfs4_do_open(struct inode *dir,
1788 struct dentry *dentry,
1789 fmode_t fmode,
1790 int flags,
1791 struct iattr *sattr,
1792 struct rpc_cred *cred,
1793 struct nfs4_state **res,
1794 struct nfs4_threshold **ctx_th)
1793{ 1795{
1794 struct nfs4_state_owner *sp; 1796 struct nfs4_state_owner *sp;
1795 struct nfs4_state *state = NULL; 1797 struct nfs4_state *state = NULL;
@@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1814 if (opendata == NULL) 1816 if (opendata == NULL)
1815 goto err_put_state_owner; 1817 goto err_put_state_owner;
1816 1818
1819 if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
1820 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
1821 if (!opendata->f_attr.mdsthreshold)
1822 goto err_opendata_put;
1823 }
1817 if (dentry->d_inode != NULL) 1824 if (dentry->d_inode != NULL)
1818 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1825 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1819 1826
@@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1839 nfs_setattr_update_inode(state->inode, sattr); 1846 nfs_setattr_update_inode(state->inode, sattr);
1840 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 1847 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1841 } 1848 }
1849
1850 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
1851 *ctx_th = opendata->f_attr.mdsthreshold;
1852 else
1853 kfree(opendata->f_attr.mdsthreshold);
1854 opendata->f_attr.mdsthreshold = NULL;
1855
1842 nfs4_opendata_put(opendata); 1856 nfs4_opendata_put(opendata);
1843 nfs4_put_state_owner(sp); 1857 nfs4_put_state_owner(sp);
1844 *res = state; 1858 *res = state;
1845 return 0; 1859 return 0;
1846err_opendata_put: 1860err_opendata_put:
1861 kfree(opendata->f_attr.mdsthreshold);
1847 nfs4_opendata_put(opendata); 1862 nfs4_opendata_put(opendata);
1848err_put_state_owner: 1863err_put_state_owner:
1849 nfs4_put_state_owner(sp); 1864 nfs4_put_state_owner(sp);
@@ -1853,14 +1868,21 @@ out_err:
1853} 1868}
1854 1869
1855 1870
1856static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) 1871static struct nfs4_state *nfs4_do_open(struct inode *dir,
1872 struct dentry *dentry,
1873 fmode_t fmode,
1874 int flags,
1875 struct iattr *sattr,
1876 struct rpc_cred *cred,
1877 struct nfs4_threshold **ctx_th)
1857{ 1878{
1858 struct nfs4_exception exception = { }; 1879 struct nfs4_exception exception = { };
1859 struct nfs4_state *res; 1880 struct nfs4_state *res;
1860 int status; 1881 int status;
1861 1882
1862 do { 1883 do {
1863 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); 1884 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
1885 &res, ctx_th);
1864 if (status == 0) 1886 if (status == 0)
1865 break; 1887 break;
1866 /* NOTE: BAD_SEQID means the server and client disagree about the 1888 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2184 struct nfs4_state *state; 2206 struct nfs4_state *state;
2185 2207
2186 /* Protect against concurrent sillydeletes */ 2208 /* Protect against concurrent sillydeletes */
2187 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); 2209 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
2210 ctx->cred, &ctx->mdsthreshold);
2188 if (IS_ERR(state)) 2211 if (IS_ERR(state))
2189 return ERR_CAST(state); 2212 return ERR_CAST(state);
2190 ctx->state = state; 2213 ctx->state = state;
@@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2354/* 2377/*
2355 * get the file handle for the "/" directory on the server 2378 * get the file handle for the "/" directory on the server
2356 */ 2379 */
2357static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2380int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
2358 struct nfs_fsinfo *info) 2381 struct nfs_fsinfo *info)
2359{ 2382{
2360 int minor_version = server->nfs_client->cl_minorversion; 2383 int minor_version = server->nfs_client->cl_minorversion;
2361 int status = nfs4_lookup_root(server, fhandle, info); 2384 int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2372 return nfs4_map_errors(status); 2395 return nfs4_map_errors(status);
2373} 2396}
2374 2397
2398static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
2399 struct nfs_fsinfo *info)
2400{
2401 int error;
2402 struct nfs_fattr *fattr = info->fattr;
2403
2404 error = nfs4_server_capabilities(server, mntfh);
2405 if (error < 0) {
2406 dprintk("nfs4_get_root: getcaps error = %d\n", -error);
2407 return error;
2408 }
2409
2410 error = nfs4_proc_getattr(server, mntfh, fattr);
2411 if (error < 0) {
2412 dprintk("nfs4_get_root: getattr error = %d\n", -error);
2413 return error;
2414 }
2415
2416 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
2417 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
2418 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
2419
2420 return error;
2421}
2422
2375/* 2423/*
2376 * Get locations and (maybe) other attributes of a referral. 2424 * Get locations and (maybe) other attributes of a referral.
2377 * Note that we'll actually follow the referral later when 2425 * Note that we'll actually follow the referral later when
@@ -2578,7 +2626,7 @@ out:
2578 return err; 2626 return err;
2579} 2627}
2580 2628
2581static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 2629static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
2582 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2630 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2583{ 2631{
2584 int status; 2632 int status;
@@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2761 fmode = ctx->mode; 2809 fmode = ctx->mode;
2762 } 2810 }
2763 sattr->ia_mode &= ~current_umask(); 2811 sattr->ia_mode &= ~current_umask();
2764 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); 2812 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
2765 d_drop(dentry); 2813 d_drop(dentry);
2766 if (IS_ERR(state)) { 2814 if (IS_ERR(state)) {
2767 status = PTR_ERR(state); 2815 status = PTR_ERR(state);
@@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2783 struct nfs_removeargs args = { 2831 struct nfs_removeargs args = {
2784 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2785 .name = *name, 2833 .name = *name,
2786 .bitmask = server->attr_bitmask,
2787 }; 2834 };
2788 struct nfs_removeres res = { 2835 struct nfs_removeres res = {
2789 .server = server, 2836 .server = server,
@@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2793 .rpc_argp = &args, 2840 .rpc_argp = &args,
2794 .rpc_resp = &res, 2841 .rpc_resp = &res,
2795 }; 2842 };
2796 int status = -ENOMEM; 2843 int status;
2797
2798 res.dir_attr = nfs_alloc_fattr();
2799 if (res.dir_attr == NULL)
2800 goto out;
2801 2844
2802 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); 2845 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2803 if (status == 0) { 2846 if (status == 0)
2804 update_changeattr(dir, &res.cinfo); 2847 update_changeattr(dir, &res.cinfo);
2805 nfs_post_op_update_inode(dir, res.dir_attr);
2806 }
2807 nfs_free_fattr(res.dir_attr);
2808out:
2809 return status; 2848 return status;
2810} 2849}
2811 2850
@@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2827 struct nfs_removeargs *args = msg->rpc_argp; 2866 struct nfs_removeargs *args = msg->rpc_argp;
2828 struct nfs_removeres *res = msg->rpc_resp; 2867 struct nfs_removeres *res = msg->rpc_resp;
2829 2868
2830 args->bitmask = server->cache_consistency_bitmask;
2831 res->server = server; 2869 res->server = server;
2832 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2870 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2833 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); 2871 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2852 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2890 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2853 return 0; 2891 return 0;
2854 update_changeattr(dir, &res->cinfo); 2892 update_changeattr(dir, &res->cinfo);
2855 nfs_post_op_update_inode(dir, res->dir_attr);
2856 return 1; 2893 return 1;
2857} 2894}
2858 2895
@@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2863 struct nfs_renameres *res = msg->rpc_resp; 2900 struct nfs_renameres *res = msg->rpc_resp;
2864 2901
2865 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2902 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2866 arg->bitmask = server->attr_bitmask;
2867 res->server = server; 2903 res->server = server;
2868 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); 2904 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2869} 2905}
@@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2889 return 0; 2925 return 0;
2890 2926
2891 update_changeattr(old_dir, &res->old_cinfo); 2927 update_changeattr(old_dir, &res->old_cinfo);
2892 nfs_post_op_update_inode(old_dir, res->old_fattr);
2893 update_changeattr(new_dir, &res->new_cinfo); 2928 update_changeattr(new_dir, &res->new_cinfo);
2894 nfs_post_op_update_inode(new_dir, res->new_fattr);
2895 return 1; 2929 return 1;
2896} 2930}
2897 2931
@@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2904 .new_dir = NFS_FH(new_dir), 2938 .new_dir = NFS_FH(new_dir),
2905 .old_name = old_name, 2939 .old_name = old_name,
2906 .new_name = new_name, 2940 .new_name = new_name,
2907 .bitmask = server->attr_bitmask,
2908 }; 2941 };
2909 struct nfs_renameres res = { 2942 struct nfs_renameres res = {
2910 .server = server, 2943 .server = server,
@@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2916 }; 2949 };
2917 int status = -ENOMEM; 2950 int status = -ENOMEM;
2918 2951
2919 res.old_fattr = nfs_alloc_fattr();
2920 res.new_fattr = nfs_alloc_fattr();
2921 if (res.old_fattr == NULL || res.new_fattr == NULL)
2922 goto out;
2923
2924 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2952 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2925 if (!status) { 2953 if (!status) {
2926 update_changeattr(old_dir, &res.old_cinfo); 2954 update_changeattr(old_dir, &res.old_cinfo);
2927 nfs_post_op_update_inode(old_dir, res.old_fattr);
2928 update_changeattr(new_dir, &res.new_cinfo); 2955 update_changeattr(new_dir, &res.new_cinfo);
2929 nfs_post_op_update_inode(new_dir, res.new_fattr);
2930 } 2956 }
2931out:
2932 nfs_free_fattr(res.new_fattr);
2933 nfs_free_fattr(res.old_fattr);
2934 return status; 2957 return status;
2935} 2958}
2936 2959
@@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2968 int status = -ENOMEM; 2991 int status = -ENOMEM;
2969 2992
2970 res.fattr = nfs_alloc_fattr(); 2993 res.fattr = nfs_alloc_fattr();
2971 res.dir_attr = nfs_alloc_fattr(); 2994 if (res.fattr == NULL)
2972 if (res.fattr == NULL || res.dir_attr == NULL)
2973 goto out; 2995 goto out;
2974 2996
2975 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2997 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2976 if (!status) { 2998 if (!status) {
2977 update_changeattr(dir, &res.cinfo); 2999 update_changeattr(dir, &res.cinfo);
2978 nfs_post_op_update_inode(dir, res.dir_attr);
2979 nfs_post_op_update_inode(inode, res.fattr); 3000 nfs_post_op_update_inode(inode, res.fattr);
2980 } 3001 }
2981out: 3002out:
2982 nfs_free_fattr(res.dir_attr);
2983 nfs_free_fattr(res.fattr); 3003 nfs_free_fattr(res.fattr);
2984 return status; 3004 return status;
2985} 3005}
@@ -3002,7 +3022,6 @@ struct nfs4_createdata {
3002 struct nfs4_create_res res; 3022 struct nfs4_create_res res;
3003 struct nfs_fh fh; 3023 struct nfs_fh fh;
3004 struct nfs_fattr fattr; 3024 struct nfs_fattr fattr;
3005 struct nfs_fattr dir_fattr;
3006}; 3025};
3007 3026
3008static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, 3027static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
3026 data->res.server = server; 3045 data->res.server = server;
3027 data->res.fh = &data->fh; 3046 data->res.fh = &data->fh;
3028 data->res.fattr = &data->fattr; 3047 data->res.fattr = &data->fattr;
3029 data->res.dir_fattr = &data->dir_fattr;
3030 nfs_fattr_init(data->res.fattr); 3048 nfs_fattr_init(data->res.fattr);
3031 nfs_fattr_init(data->res.dir_fattr);
3032 } 3049 }
3033 return data; 3050 return data;
3034} 3051}
@@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
3039 &data->arg.seq_args, &data->res.seq_res, 1); 3056 &data->arg.seq_args, &data->res.seq_res, 1);
3040 if (status == 0) { 3057 if (status == 0) {
3041 update_changeattr(dir, &data->res.dir_cinfo); 3058 update_changeattr(dir, &data->res.dir_cinfo);
3042 nfs_post_op_update_inode(dir, data->res.dir_fattr);
3043 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); 3059 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
3044 } 3060 }
3045 return status; 3061 return status;
@@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3335 3351
3336void __nfs4_read_done_cb(struct nfs_read_data *data) 3352void __nfs4_read_done_cb(struct nfs_read_data *data)
3337{ 3353{
3338 nfs_invalidate_atime(data->inode); 3354 nfs_invalidate_atime(data->header->inode);
3339} 3355}
3340 3356
3341static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3357static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3342{ 3358{
3343 struct nfs_server *server = NFS_SERVER(data->inode); 3359 struct nfs_server *server = NFS_SERVER(data->header->inode);
3344 3360
3345 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3361 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3346 rpc_restart_call_prepare(task); 3362 rpc_restart_call_prepare(task);
@@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3375 3391
3376static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 3392static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3377{ 3393{
3378 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3394 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3379 &data->args.seq_args, 3395 &data->args.seq_args,
3380 &data->res.seq_res, 3396 &data->res.seq_res,
3381 task)) 3397 task))
@@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
3383 rpc_call_start(task); 3399 rpc_call_start(task);
3384} 3400}
3385 3401
3386/* Reset the the nfs_read_data to send the read to the MDS. */
3387void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3388{
3389 dprintk("%s Reset task for i/o through\n", __func__);
3390 put_lseg(data->lseg);
3391 data->lseg = NULL;
3392 /* offsets will differ in the dense stripe case */
3393 data->args.offset = data->mds_offset;
3394 data->ds_clp = NULL;
3395 data->args.fh = NFS_FH(data->inode);
3396 data->read_done_cb = nfs4_read_done_cb;
3397 task->tk_ops = data->mds_ops;
3398 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3399}
3400EXPORT_SYMBOL_GPL(nfs4_reset_read);
3401
3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3403{ 3403{
3404 struct inode *inode = data->inode; 3404 struct inode *inode = data->header->inode;
3405 3405
3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3407 rpc_restart_call_prepare(task); 3407 rpc_restart_call_prepare(task);
@@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3409 } 3409 }
3410 if (task->tk_status >= 0) { 3410 if (task->tk_status >= 0) {
3411 renew_lease(NFS_SERVER(inode), data->timestamp); 3411 renew_lease(NFS_SERVER(inode), data->timestamp);
3412 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 3412 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
3413 } 3413 }
3414 return 0; 3414 return 0;
3415} 3415}
@@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3422 nfs4_write_done_cb(task, data); 3422 nfs4_write_done_cb(task, data);
3423} 3423}
3424 3424
3425/* Reset the the nfs_write_data to send the write to the MDS. */ 3425static
3426void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) 3426bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
3427{ 3427{
3428 dprintk("%s Reset task for i/o through\n", __func__); 3428 const struct nfs_pgio_header *hdr = data->header;
3429 put_lseg(data->lseg); 3429
3430 data->lseg = NULL; 3430 /* Don't request attributes for pNFS or O_DIRECT writes */
3431 data->ds_clp = NULL; 3431 if (data->ds_clp != NULL || hdr->dreq != NULL)
3432 data->write_done_cb = nfs4_write_done_cb; 3432 return false;
3433 data->args.fh = NFS_FH(data->inode); 3433 /* Otherwise, request attributes if and only if we don't hold
3434 data->args.bitmask = data->res.server->cache_consistency_bitmask; 3434 * a delegation
3435 data->args.offset = data->mds_offset; 3435 */
3436 data->res.fattr = &data->fattr; 3436 return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
3437 task->tk_ops = data->mds_ops;
3438 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3439} 3437}
3440EXPORT_SYMBOL_GPL(nfs4_reset_write);
3441 3438
3442static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3439static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3443{ 3440{
3444 struct nfs_server *server = NFS_SERVER(data->inode); 3441 struct nfs_server *server = NFS_SERVER(data->header->inode);
3445 3442
3446 if (data->lseg) { 3443 if (!nfs4_write_need_cache_consistency_data(data)) {
3447 data->args.bitmask = NULL; 3444 data->args.bitmask = NULL;
3448 data->res.fattr = NULL; 3445 data->res.fattr = NULL;
3449 } else 3446 } else
3450 data->args.bitmask = server->cache_consistency_bitmask; 3447 data->args.bitmask = server->cache_consistency_bitmask;
3448
3451 if (!data->write_done_cb) 3449 if (!data->write_done_cb)
3452 data->write_done_cb = nfs4_write_done_cb; 3450 data->write_done_cb = nfs4_write_done_cb;
3453 data->res.server = server; 3451 data->res.server = server;
@@ -3459,6 +3457,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3459 3457
3460static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) 3458static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3461{ 3459{
3460 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3461 &data->args.seq_args,
3462 &data->res.seq_res,
3463 task))
3464 return;
3465 rpc_call_start(task);
3466}
3467
3468static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
3469{
3462 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3470 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3463 &data->args.seq_args, 3471 &data->args.seq_args,
3464 &data->res.seq_res, 3472 &data->res.seq_res,
@@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
3467 rpc_call_start(task); 3475 rpc_call_start(task);
3468} 3476}
3469 3477
3470static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3478static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
3471{ 3479{
3472 struct inode *inode = data->inode; 3480 struct inode *inode = data->inode;
3473 3481
@@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
3475 rpc_restart_call_prepare(task); 3483 rpc_restart_call_prepare(task);
3476 return -EAGAIN; 3484 return -EAGAIN;
3477 } 3485 }
3478 nfs_refresh_inode(inode, data->res.fattr);
3479 return 0; 3486 return 0;
3480} 3487}
3481 3488
3482static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3489static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
3483{ 3490{
3484 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3491 if (!nfs4_sequence_done(task, &data->res.seq_res))
3485 return -EAGAIN; 3492 return -EAGAIN;
3486 return data->write_done_cb(task, data); 3493 return data->commit_done_cb(task, data);
3487} 3494}
3488 3495
3489static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3496static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
3490{ 3497{
3491 struct nfs_server *server = NFS_SERVER(data->inode); 3498 struct nfs_server *server = NFS_SERVER(data->inode);
3492 3499
3493 if (data->lseg) { 3500 if (data->commit_done_cb == NULL)
3494 data->args.bitmask = NULL; 3501 data->commit_done_cb = nfs4_commit_done_cb;
3495 data->res.fattr = NULL;
3496 } else
3497 data->args.bitmask = server->cache_consistency_bitmask;
3498 if (!data->write_done_cb)
3499 data->write_done_cb = nfs4_commit_done_cb;
3500 data->res.server = server; 3502 data->res.server = server;
3501 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3503 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3502 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 3504 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3905 case -NFS4ERR_SEQ_MISORDERED: 3907 case -NFS4ERR_SEQ_MISORDERED:
3906 dprintk("%s ERROR %d, Reset session\n", __func__, 3908 dprintk("%s ERROR %d, Reset session\n", __func__,
3907 task->tk_status); 3909 task->tk_status);
3908 nfs4_schedule_session_recovery(clp->cl_session); 3910 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
3909 task->tk_status = 0; 3911 task->tk_status = 0;
3910 return -EAGAIN; 3912 return -EAGAIN;
3911#endif /* CONFIG_NFS_V4_1 */ 3913#endif /* CONFIG_NFS_V4_1 */
@@ -3931,13 +3933,21 @@ wait_on_recovery:
3931 return -EAGAIN; 3933 return -EAGAIN;
3932} 3934}
3933 3935
3934static void nfs4_construct_boot_verifier(struct nfs_client *clp, 3936static void nfs4_init_boot_verifier(const struct nfs_client *clp,
3935 nfs4_verifier *bootverf) 3937 nfs4_verifier *bootverf)
3936{ 3938{
3937 __be32 verf[2]; 3939 __be32 verf[2];
3938 3940
3939 verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); 3941 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
3940 verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); 3942 /* An impossible timestamp guarantees this value
3943 * will never match a generated boot time. */
3944 verf[0] = 0;
3945 verf[1] = (__be32)(NSEC_PER_SEC + 1);
3946 } else {
3947 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
3948 verf[0] = (__be32)nn->boot_time.tv_sec;
3949 verf[1] = (__be32)nn->boot_time.tv_nsec;
3950 }
3941 memcpy(bootverf->data, verf, sizeof(bootverf->data)); 3951 memcpy(bootverf->data, verf, sizeof(bootverf->data));
3942} 3952}
3943 3953
@@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3960 int loop = 0; 3970 int loop = 0;
3961 int status; 3971 int status;
3962 3972
3963 nfs4_construct_boot_verifier(clp, &sc_verifier); 3973 nfs4_init_boot_verifier(clp, &sc_verifier);
3964 3974
3965 for(;;) { 3975 for(;;) {
3966 rcu_read_lock(); 3976 rcu_read_lock();
@@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4104 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4114 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4105 data->args.fhandle = &data->fh; 4115 data->args.fhandle = &data->fh;
4106 data->args.stateid = &data->stateid; 4116 data->args.stateid = &data->stateid;
4107 data->args.bitmask = server->attr_bitmask; 4117 data->args.bitmask = server->cache_consistency_bitmask;
4108 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4118 nfs_copy_fh(&data->fh, NFS_FH(inode));
4109 nfs4_stateid_copy(&data->stateid, stateid); 4119 nfs4_stateid_copy(&data->stateid, stateid);
4110 data->res.fattr = &data->fattr; 4120 data->res.fattr = &data->fattr;
@@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4125 if (status != 0) 4135 if (status != 0)
4126 goto out; 4136 goto out;
4127 status = data->rpc_status; 4137 status = data->rpc_status;
4128 if (status != 0) 4138 if (status == 0)
4129 goto out; 4139 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
4130 nfs_refresh_inode(inode, &data->fattr); 4140 else
4141 nfs_refresh_inode(inode, &data->fattr);
4131out: 4142out:
4132 rpc_put_task(task); 4143 rpc_put_task(task);
4133 return status; 4144 return status;
@@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4837 case -NFS4ERR_BAD_HIGH_SLOT: 4848 case -NFS4ERR_BAD_HIGH_SLOT:
4838 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4849 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4839 case -NFS4ERR_DEADSESSION: 4850 case -NFS4ERR_DEADSESSION:
4840 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 4851 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
4841 goto out; 4852 goto out;
4842 case -ERESTARTSYS: 4853 case -ERESTARTSYS:
4843 /* 4854 /*
@@ -5079,7 +5090,8 @@ out_inval:
5079} 5090}
5080 5091
5081static bool 5092static bool
5082nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) 5093nfs41_same_server_scope(struct nfs41_server_scope *a,
5094 struct nfs41_server_scope *b)
5083{ 5095{
5084 if (a->server_scope_sz == b->server_scope_sz && 5096 if (a->server_scope_sz == b->server_scope_sz &&
5085 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) 5097 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5089,6 +5101,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
5089} 5101}
5090 5102
5091/* 5103/*
5104 * nfs4_proc_bind_conn_to_session()
5105 *
5106 * The 4.1 client currently uses the same TCP connection for the
5107 * fore and backchannel.
5108 */
5109int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
5110{
5111 int status;
5112 struct nfs41_bind_conn_to_session_res res;
5113 struct rpc_message msg = {
5114 .rpc_proc =
5115 &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
5116 .rpc_argp = clp,
5117 .rpc_resp = &res,
5118 .rpc_cred = cred,
5119 };
5120
5121 dprintk("--> %s\n", __func__);
5122 BUG_ON(clp == NULL);
5123
5124 res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
5125 if (unlikely(res.session == NULL)) {
5126 status = -ENOMEM;
5127 goto out;
5128 }
5129
5130 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5131 if (status == 0) {
5132 if (memcmp(res.session->sess_id.data,
5133 clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
5134 dprintk("NFS: %s: Session ID mismatch\n", __func__);
5135 status = -EIO;
5136 goto out_session;
5137 }
5138 if (res.dir != NFS4_CDFS4_BOTH) {
5139 dprintk("NFS: %s: Unexpected direction from server\n",
5140 __func__);
5141 status = -EIO;
5142 goto out_session;
5143 }
5144 if (res.use_conn_in_rdma_mode) {
5145 dprintk("NFS: %s: Server returned RDMA mode = true\n",
5146 __func__);
5147 status = -EIO;
5148 goto out_session;
5149 }
5150 }
5151out_session:
5152 kfree(res.session);
5153out:
5154 dprintk("<-- %s status= %d\n", __func__, status);
5155 return status;
5156}
5157
5158/*
5092 * nfs4_proc_exchange_id() 5159 * nfs4_proc_exchange_id()
5093 * 5160 *
5094 * Since the clientid has expired, all compounds using sessions 5161 * Since the clientid has expired, all compounds using sessions
@@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5105 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, 5172 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
5106 }; 5173 };
5107 struct nfs41_exchange_id_res res = { 5174 struct nfs41_exchange_id_res res = {
5108 .client = clp, 5175 0
5109 }; 5176 };
5110 int status; 5177 int status;
5111 struct rpc_message msg = { 5178 struct rpc_message msg = {
@@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5118 dprintk("--> %s\n", __func__); 5185 dprintk("--> %s\n", __func__);
5119 BUG_ON(clp == NULL); 5186 BUG_ON(clp == NULL);
5120 5187
5121 nfs4_construct_boot_verifier(clp, &verifier); 5188 nfs4_init_boot_verifier(clp, &verifier);
5122 5189
5123 args.id_len = scnprintf(args.id, sizeof(args.id), 5190 args.id_len = scnprintf(args.id, sizeof(args.id),
5124 "%s/%s/%u", 5191 "%s/%s/%u",
@@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5126 clp->cl_rpcclient->cl_nodename, 5193 clp->cl_rpcclient->cl_nodename,
5127 clp->cl_rpcclient->cl_auth->au_flavor); 5194 clp->cl_rpcclient->cl_auth->au_flavor);
5128 5195
5129 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 5196 res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
5130 if (unlikely(!res.server_scope)) { 5197 GFP_NOFS);
5198 if (unlikely(res.server_owner == NULL)) {
5131 status = -ENOMEM; 5199 status = -ENOMEM;
5132 goto out; 5200 goto out;
5133 } 5201 }
5134 5202
5135 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); 5203 res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
5136 if (unlikely(!res.impl_id)) { 5204 GFP_NOFS);
5205 if (unlikely(res.server_scope == NULL)) {
5206 status = -ENOMEM;
5207 goto out_server_owner;
5208 }
5209
5210 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
5211 if (unlikely(res.impl_id == NULL)) {
5137 status = -ENOMEM; 5212 status = -ENOMEM;
5138 goto out_server_scope; 5213 goto out_server_scope;
5139 } 5214 }
5140 5215
5141 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5216 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5142 if (!status) 5217 if (status == 0)
5143 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 5218 status = nfs4_check_cl_exchange_flags(res.flags);
5219
5220 if (status == 0) {
5221 clp->cl_clientid = res.clientid;
5222 clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
5223 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
5224 clp->cl_seqid = res.seqid;
5225
5226 kfree(clp->cl_serverowner);
5227 clp->cl_serverowner = res.server_owner;
5228 res.server_owner = NULL;
5144 5229
5145 if (!status) {
5146 /* use the most recent implementation id */ 5230 /* use the most recent implementation id */
5147 kfree(clp->impl_id); 5231 kfree(clp->cl_implid);
5148 clp->impl_id = res.impl_id; 5232 clp->cl_implid = res.impl_id;
5149 } else
5150 kfree(res.impl_id);
5151 5233
5152 if (!status) { 5234 if (clp->cl_serverscope != NULL &&
5153 if (clp->server_scope && 5235 !nfs41_same_server_scope(clp->cl_serverscope,
5154 !nfs41_same_server_scope(clp->server_scope,
5155 res.server_scope)) { 5236 res.server_scope)) {
5156 dprintk("%s: server_scope mismatch detected\n", 5237 dprintk("%s: server_scope mismatch detected\n",
5157 __func__); 5238 __func__);
5158 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); 5239 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
5159 kfree(clp->server_scope); 5240 kfree(clp->cl_serverscope);
5160 clp->server_scope = NULL; 5241 clp->cl_serverscope = NULL;
5161 } 5242 }
5162 5243
5163 if (!clp->server_scope) { 5244 if (clp->cl_serverscope == NULL) {
5164 clp->server_scope = res.server_scope; 5245 clp->cl_serverscope = res.server_scope;
5165 goto out; 5246 goto out;
5166 } 5247 }
5167 } 5248 } else
5249 kfree(res.impl_id);
5168 5250
5251out_server_owner:
5252 kfree(res.server_owner);
5169out_server_scope: 5253out_server_scope:
5170 kfree(res.server_scope); 5254 kfree(res.server_scope);
5171out: 5255out:
5172 if (clp->impl_id) 5256 if (clp->cl_implid != NULL)
5173 dprintk("%s: Server Implementation ID: " 5257 dprintk("%s: Server Implementation ID: "
5174 "domain: %s, name: %s, date: %llu,%u\n", 5258 "domain: %s, name: %s, date: %llu,%u\n",
5175 __func__, clp->impl_id->domain, clp->impl_id->name, 5259 __func__, clp->cl_implid->domain, clp->cl_implid->name,
5176 clp->impl_id->date.seconds, 5260 clp->cl_implid->date.seconds,
5177 clp->impl_id->date.nseconds); 5261 clp->cl_implid->date.nseconds);
5178 dprintk("<-- %s status= %d\n", __func__, status); 5262 dprintk("<-- %s status= %d\n", __func__, status);
5179 return status; 5263 return status;
5180} 5264}
5181 5265
5266static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
5267 struct rpc_cred *cred)
5268{
5269 struct rpc_message msg = {
5270 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
5271 .rpc_argp = clp,
5272 .rpc_cred = cred,
5273 };
5274 int status;
5275
5276 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5277 if (status)
5278 pr_warn("NFS: Got error %d from the server %s on "
5279 "DESTROY_CLIENTID.", status, clp->cl_hostname);
5280 return status;
5281}
5282
5283static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
5284 struct rpc_cred *cred)
5285{
5286 unsigned int loop;
5287 int ret;
5288
5289 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
5290 ret = _nfs4_proc_destroy_clientid(clp, cred);
5291 switch (ret) {
5292 case -NFS4ERR_DELAY:
5293 case -NFS4ERR_CLIENTID_BUSY:
5294 ssleep(1);
5295 break;
5296 default:
5297 return ret;
5298 }
5299 }
5300 return 0;
5301}
5302
5303int nfs4_destroy_clientid(struct nfs_client *clp)
5304{
5305 struct rpc_cred *cred;
5306 int ret = 0;
5307
5308 if (clp->cl_mvops->minor_version < 1)
5309 goto out;
5310 if (clp->cl_exchange_flags == 0)
5311 goto out;
5312 cred = nfs4_get_exchange_id_cred(clp);
5313 ret = nfs4_proc_destroy_clientid(clp, cred);
5314 if (cred)
5315 put_rpccred(cred);
5316 switch (ret) {
5317 case 0:
5318 case -NFS4ERR_STALE_CLIENTID:
5319 clp->cl_exchange_flags = 0;
5320 }
5321out:
5322 return ret;
5323}
5324
5182struct nfs4_get_lease_time_data { 5325struct nfs4_get_lease_time_data {
5183 struct nfs4_get_lease_time_args *args; 5326 struct nfs4_get_lease_time_args *args;
5184 struct nfs4_get_lease_time_res *res; 5327 struct nfs4_get_lease_time_res *res;
@@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5399void nfs4_destroy_session(struct nfs4_session *session) 5542void nfs4_destroy_session(struct nfs4_session *session)
5400{ 5543{
5401 struct rpc_xprt *xprt; 5544 struct rpc_xprt *xprt;
5545 struct rpc_cred *cred;
5402 5546
5403 nfs4_proc_destroy_session(session); 5547 cred = nfs4_get_exchange_id_cred(session->clp);
5548 nfs4_proc_destroy_session(session, cred);
5549 if (cred)
5550 put_rpccred(cred);
5404 5551
5405 rcu_read_lock(); 5552 rcu_read_lock();
5406 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); 5553 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
@@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
5510 return nfs4_verify_back_channel_attrs(args, session); 5657 return nfs4_verify_back_channel_attrs(args, session);
5511} 5658}
5512 5659
5513static int _nfs4_proc_create_session(struct nfs_client *clp) 5660static int _nfs4_proc_create_session(struct nfs_client *clp,
5661 struct rpc_cred *cred)
5514{ 5662{
5515 struct nfs4_session *session = clp->cl_session; 5663 struct nfs4_session *session = clp->cl_session;
5516 struct nfs41_create_session_args args = { 5664 struct nfs41_create_session_args args = {
@@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5524 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], 5672 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
5525 .rpc_argp = &args, 5673 .rpc_argp = &args,
5526 .rpc_resp = &res, 5674 .rpc_resp = &res,
5675 .rpc_cred = cred,
5527 }; 5676 };
5528 int status; 5677 int status;
5529 5678
@@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5548 * It is the responsibility of the caller to verify the session is 5697 * It is the responsibility of the caller to verify the session is
5549 * expired before calling this routine. 5698 * expired before calling this routine.
5550 */ 5699 */
5551int nfs4_proc_create_session(struct nfs_client *clp) 5700int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
5552{ 5701{
5553 int status; 5702 int status;
5554 unsigned *ptr; 5703 unsigned *ptr;
@@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5556 5705
5557 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5706 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5558 5707
5559 status = _nfs4_proc_create_session(clp); 5708 status = _nfs4_proc_create_session(clp, cred);
5560 if (status) 5709 if (status)
5561 goto out; 5710 goto out;
5562 5711
@@ -5578,10 +5727,15 @@ out:
5578 * Issue the over-the-wire RPC DESTROY_SESSION. 5727 * Issue the over-the-wire RPC DESTROY_SESSION.
5579 * The caller must serialize access to this routine. 5728 * The caller must serialize access to this routine.
5580 */ 5729 */
5581int nfs4_proc_destroy_session(struct nfs4_session *session) 5730int nfs4_proc_destroy_session(struct nfs4_session *session,
5731 struct rpc_cred *cred)
5582{ 5732{
5733 struct rpc_message msg = {
5734 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
5735 .rpc_argp = session,
5736 .rpc_cred = cred,
5737 };
5583 int status = 0; 5738 int status = 0;
5584 struct rpc_message msg;
5585 5739
5586 dprintk("--> nfs4_proc_destroy_session\n"); 5740 dprintk("--> nfs4_proc_destroy_session\n");
5587 5741
@@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5589 if (session->clp->cl_cons_state != NFS_CS_READY) 5743 if (session->clp->cl_cons_state != NFS_CS_READY)
5590 return status; 5744 return status;
5591 5745
5592 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
5593 msg.rpc_argp = session;
5594 msg.rpc_resp = NULL;
5595 msg.rpc_cred = NULL;
5596 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5746 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5597 5747
5598 if (status) 5748 if (status)
@@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5604 return status; 5754 return status;
5605} 5755}
5606 5756
5757/*
5758 * With sessions, the client is not marked ready until after a
5759 * successful EXCHANGE_ID and CREATE_SESSION.
5760 *
5761 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
5762 * other versions of NFS can be tried.
5763 */
5764static int nfs41_check_session_ready(struct nfs_client *clp)
5765{
5766 int ret;
5767
5768 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
5769 ret = nfs4_client_recover_expired_lease(clp);
5770 if (ret)
5771 return ret;
5772 }
5773 if (clp->cl_cons_state < NFS_CS_READY)
5774 return -EPROTONOSUPPORT;
5775 smp_rmb();
5776 return 0;
5777}
5778
5607int nfs4_init_session(struct nfs_server *server) 5779int nfs4_init_session(struct nfs_server *server)
5608{ 5780{
5609 struct nfs_client *clp = server->nfs_client; 5781 struct nfs_client *clp = server->nfs_client;
5610 struct nfs4_session *session; 5782 struct nfs4_session *session;
5611 unsigned int rsize, wsize; 5783 unsigned int rsize, wsize;
5612 int ret;
5613 5784
5614 if (!nfs4_has_session(clp)) 5785 if (!nfs4_has_session(clp))
5615 return 0; 5786 return 0;
5616 5787
5617 session = clp->cl_session; 5788 session = clp->cl_session;
5618 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5789 spin_lock(&clp->cl_lock);
5619 return 0; 5790 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5620 5791
5621 rsize = server->rsize; 5792 rsize = server->rsize;
5622 if (rsize == 0) 5793 if (rsize == 0)
5623 rsize = NFS_MAX_FILE_IO_SIZE; 5794 rsize = NFS_MAX_FILE_IO_SIZE;
5624 wsize = server->wsize; 5795 wsize = server->wsize;
5625 if (wsize == 0) 5796 if (wsize == 0)
5626 wsize = NFS_MAX_FILE_IO_SIZE; 5797 wsize = NFS_MAX_FILE_IO_SIZE;
5627 5798
5628 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; 5799 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
5629 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; 5800 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
5801 }
5802 spin_unlock(&clp->cl_lock);
5630 5803
5631 ret = nfs4_recover_expired_lease(server); 5804 return nfs41_check_session_ready(clp);
5632 if (!ret)
5633 ret = nfs4_check_client_ready(clp);
5634 return ret;
5635} 5805}
5636 5806
5637int nfs4_init_ds_session(struct nfs_client *clp) 5807int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
5638{ 5808{
5639 struct nfs4_session *session = clp->cl_session; 5809 struct nfs4_session *session = clp->cl_session;
5640 int ret; 5810 int ret;
5641 5811
5642 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5812 spin_lock(&clp->cl_lock);
5643 return 0; 5813 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5644 5814 /*
5645 ret = nfs4_client_recover_expired_lease(clp); 5815 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
5646 if (!ret) 5816 * DS lease to be equal to the MDS lease.
5647 /* Test for the DS role */ 5817 */
5648 if (!is_ds_client(clp)) 5818 clp->cl_lease_time = lease_time;
5649 ret = -ENODEV; 5819 clp->cl_last_renewal = jiffies;
5650 if (!ret) 5820 }
5651 ret = nfs4_check_client_ready(clp); 5821 spin_unlock(&clp->cl_lock);
5652 return ret;
5653 5822
5823 ret = nfs41_check_session_ready(clp);
5824 if (ret)
5825 return ret;
5826 /* Test for the DS role */
5827 if (!is_ds_client(clp))
5828 return -ENODEV;
5829 return 0;
5654} 5830}
5655EXPORT_SYMBOL_GPL(nfs4_init_ds_session); 5831EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5656 5832
@@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6557 .file_inode_ops = &nfs4_file_inode_operations, 6733 .file_inode_ops = &nfs4_file_inode_operations,
6558 .file_ops = &nfs4_file_operations, 6734 .file_ops = &nfs4_file_operations,
6559 .getroot = nfs4_proc_get_root, 6735 .getroot = nfs4_proc_get_root,
6736 .submount = nfs4_submount,
6560 .getattr = nfs4_proc_getattr, 6737 .getattr = nfs4_proc_getattr,
6561 .setattr = nfs4_proc_setattr, 6738 .setattr = nfs4_proc_setattr,
6562 .lookup = nfs4_proc_lookup, 6739 .lookup = nfs4_proc_lookup,
@@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6589 .write_rpc_prepare = nfs4_proc_write_rpc_prepare, 6766 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6590 .write_done = nfs4_write_done, 6767 .write_done = nfs4_write_done,
6591 .commit_setup = nfs4_proc_commit_setup, 6768 .commit_setup = nfs4_proc_commit_setup,
6769 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
6592 .commit_done = nfs4_commit_done, 6770 .commit_done = nfs4_commit_done,
6593 .lock = nfs4_proc_lock, 6771 .lock = nfs4_proc_lock,
6594 .clear_acl_cache = nfs4_zap_acl_attr, 6772 .clear_acl_cache = nfs4_zap_acl_attr,
6595 .close_context = nfs4_close_context, 6773 .close_context = nfs4_close_context,
6596 .open_context = nfs4_atomic_open, 6774 .open_context = nfs4_atomic_open,
6597 .init_client = nfs4_init_client, 6775 .init_client = nfs4_init_client,
6598 .secinfo = nfs4_proc_secinfo,
6599}; 6776};
6600 6777
6601static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 6778static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0eae7f..6930bec91bca 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
49#include "nfs4_fs.h" 49#include "nfs4_fs.h"
50#include "delegation.h" 50#include "delegation.h"
51 51
52#define NFSDBG_FACILITY NFSDBG_PROC 52#define NFSDBG_FACILITY NFSDBG_STATE
53 53
54void 54void
55nfs4_renew_state(struct work_struct *work) 55nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7f0fcfc1fe9d..c679b9ecef63 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,8 @@
57#include "internal.h" 57#include "internal.h"
58#include "pnfs.h" 58#include "pnfs.h"
59 59
60#define NFSDBG_FACILITY NFSDBG_STATE
61
60#define OPENOWNER_POOL_SIZE 8 62#define OPENOWNER_POOL_SIZE 8
61 63
62const nfs4_stateid zero_stateid; 64const nfs4_stateid zero_stateid;
@@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
254 goto out; 256 goto out;
255 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 257 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
256do_confirm: 258do_confirm:
257 status = nfs4_proc_create_session(clp); 259 status = nfs4_proc_create_session(clp, cred);
258 if (status != 0) 260 if (status != 0)
259 goto out; 261 goto out;
260 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 262 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1106 return; 1108 return;
1107 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1109 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1108 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1110 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1111 dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1112 clp->cl_hostname);
1109 nfs4_schedule_state_manager(clp); 1113 nfs4_schedule_state_manager(clp);
1110} 1114}
1111EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); 1115EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
@@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1122{ 1126{
1123 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1127 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1124 nfs_expire_all_delegations(clp); 1128 nfs_expire_all_delegations(clp);
1129 dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1130 clp->cl_hostname);
1125} 1131}
1126 1132
1127void nfs4_schedule_path_down_recovery(struct nfs_client *clp) 1133void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
@@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1158 struct nfs_client *clp = server->nfs_client; 1164 struct nfs_client *clp = server->nfs_client;
1159 1165
1160 nfs4_state_mark_reclaim_nograce(clp, state); 1166 nfs4_state_mark_reclaim_nograce(clp, state);
1167 dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1168 clp->cl_hostname);
1161 nfs4_schedule_state_manager(clp); 1169 nfs4_schedule_state_manager(clp);
1162} 1170}
1163EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); 1171EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1491 case -NFS4ERR_BADSLOT: 1499 case -NFS4ERR_BADSLOT:
1492 case -NFS4ERR_BAD_HIGH_SLOT: 1500 case -NFS4ERR_BAD_HIGH_SLOT:
1493 case -NFS4ERR_DEADSESSION: 1501 case -NFS4ERR_DEADSESSION:
1494 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1495 case -NFS4ERR_SEQ_FALSE_RETRY: 1502 case -NFS4ERR_SEQ_FALSE_RETRY:
1496 case -NFS4ERR_SEQ_MISORDERED: 1503 case -NFS4ERR_SEQ_MISORDERED:
1497 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1504 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1498 /* Zero session reset errors */ 1505 /* Zero session reset errors */
1499 break; 1506 break;
1507 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1508 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1509 break;
1500 case -EKEYEXPIRED: 1510 case -EKEYEXPIRED:
1501 /* Nothing we can do */ 1511 /* Nothing we can do */
1502 nfs4_warn_keyexpired(clp->cl_hostname); 1512 nfs4_warn_keyexpired(clp->cl_hostname);
1503 break; 1513 break;
1504 default: 1514 default:
1515 dprintk("%s: failed to handle error %d for server %s\n",
1516 __func__, error, clp->cl_hostname);
1505 return error; 1517 return error;
1506 } 1518 }
1519 dprintk("%s: handled error %d for server %s\n", __func__, error,
1520 clp->cl_hostname);
1507 return 0; 1521 return 0;
1508} 1522}
1509 1523
@@ -1572,34 +1586,82 @@ out:
1572 return nfs4_recovery_handle_error(clp, status); 1586 return nfs4_recovery_handle_error(clp, status);
1573} 1587}
1574 1588
1589/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1590 * on EXCHANGE_ID for v4.1
1591 */
1592static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1593{
1594 switch (status) {
1595 case -NFS4ERR_SEQ_MISORDERED:
1596 if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1597 return -ESERVERFAULT;
1598 /* Lease confirmation error: retry after purging the lease */
1599 ssleep(1);
1600 case -NFS4ERR_CLID_INUSE:
1601 case -NFS4ERR_STALE_CLIENTID:
1602 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1603 break;
1604 case -EACCES:
1605 if (clp->cl_machine_cred == NULL)
1606 return -EACCES;
1607 /* Handle case where the user hasn't set up machine creds */
1608 nfs4_clear_machine_cred(clp);
1609 case -NFS4ERR_DELAY:
1610 case -ETIMEDOUT:
1611 case -EAGAIN:
1612 ssleep(1);
1613 break;
1614
1615 case -NFS4ERR_MINOR_VERS_MISMATCH:
1616 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1617 nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
1618 dprintk("%s: exit with error %d for server %s\n",
1619 __func__, -EPROTONOSUPPORT, clp->cl_hostname);
1620 return -EPROTONOSUPPORT;
1621 case -EKEYEXPIRED:
1622 nfs4_warn_keyexpired(clp->cl_hostname);
1623 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1624 * in nfs4_exchange_id */
1625 default:
1626 dprintk("%s: exit with error %d for server %s\n", __func__,
1627 status, clp->cl_hostname);
1628 return status;
1629 }
1630 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1631 dprintk("%s: handled error %d for server %s\n", __func__, status,
1632 clp->cl_hostname);
1633 return 0;
1634}
1635
1575static int nfs4_reclaim_lease(struct nfs_client *clp) 1636static int nfs4_reclaim_lease(struct nfs_client *clp)
1576{ 1637{
1577 struct rpc_cred *cred; 1638 struct rpc_cred *cred;
1578 const struct nfs4_state_recovery_ops *ops = 1639 const struct nfs4_state_recovery_ops *ops =
1579 clp->cl_mvops->reboot_recovery_ops; 1640 clp->cl_mvops->reboot_recovery_ops;
1580 int status = -ENOENT; 1641 int status;
1581 1642
1582 cred = ops->get_clid_cred(clp); 1643 cred = ops->get_clid_cred(clp);
1583 if (cred != NULL) { 1644 if (cred == NULL)
1584 status = ops->establish_clid(clp, cred); 1645 return -ENOENT;
1585 put_rpccred(cred); 1646 status = ops->establish_clid(clp, cred);
1586 /* Handle case where the user hasn't set up machine creds */ 1647 put_rpccred(cred);
1587 if (status == -EACCES && cred == clp->cl_machine_cred) { 1648 if (status != 0)
1588 nfs4_clear_machine_cred(clp); 1649 return nfs4_handle_reclaim_lease_error(clp, status);
1589 status = -EAGAIN; 1650 return 0;
1590 }
1591 if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
1592 status = -EPROTONOSUPPORT;
1593 }
1594 return status;
1595} 1651}
1596 1652
1597#ifdef CONFIG_NFS_V4_1 1653#ifdef CONFIG_NFS_V4_1
1598void nfs4_schedule_session_recovery(struct nfs4_session *session) 1654void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
1599{ 1655{
1600 struct nfs_client *clp = session->clp; 1656 struct nfs_client *clp = session->clp;
1601 1657
1602 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1658 switch (err) {
1659 default:
1660 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1661 break;
1662 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1663 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1664 }
1603 nfs4_schedule_lease_recovery(clp); 1665 nfs4_schedule_lease_recovery(clp);
1604} 1666}
1605EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); 1667EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1607void nfs41_handle_recall_slot(struct nfs_client *clp) 1669void nfs41_handle_recall_slot(struct nfs_client *clp)
1608{ 1670{
1609 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1671 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1672 dprintk("%s: scheduling slot recall for server %s\n", __func__,
1673 clp->cl_hostname);
1610 nfs4_schedule_state_manager(clp); 1674 nfs4_schedule_state_manager(clp);
1611} 1675}
1612 1676
1613static void nfs4_reset_all_state(struct nfs_client *clp) 1677static void nfs4_reset_all_state(struct nfs_client *clp)
1614{ 1678{
1615 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1679 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1616 clp->cl_boot_time = CURRENT_TIME; 1680 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1681 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1617 nfs4_state_start_reclaim_nograce(clp); 1682 nfs4_state_start_reclaim_nograce(clp);
1683 dprintk("%s: scheduling reset of all state for server %s!\n",
1684 __func__, clp->cl_hostname);
1618 nfs4_schedule_state_manager(clp); 1685 nfs4_schedule_state_manager(clp);
1619 } 1686 }
1620} 1687}
@@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1623{ 1690{
1624 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1691 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1625 nfs4_state_start_reclaim_reboot(clp); 1692 nfs4_state_start_reclaim_reboot(clp);
1693 dprintk("%s: server %s rebooted!\n", __func__,
1694 clp->cl_hostname);
1626 nfs4_schedule_state_manager(clp); 1695 nfs4_schedule_state_manager(clp);
1627 } 1696 }
1628} 1697}
1629 1698
1630static void nfs41_handle_state_revoked(struct nfs_client *clp) 1699static void nfs41_handle_state_revoked(struct nfs_client *clp)
1631{ 1700{
1632 /* Temporary */
1633 nfs4_reset_all_state(clp); 1701 nfs4_reset_all_state(clp);
1702 dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
1634} 1703}
1635 1704
1636static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) 1705static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
1637{ 1706{
1638 /* This will need to handle layouts too */ 1707 /* This will need to handle layouts too */
1639 nfs_expire_all_delegations(clp); 1708 nfs_expire_all_delegations(clp);
1709 dprintk("%s: Recallable state revoked on server %s!\n", __func__,
1710 clp->cl_hostname);
1640} 1711}
1641 1712
1642static void nfs41_handle_cb_path_down(struct nfs_client *clp) 1713static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
1643{ 1714{
1644 nfs_expire_all_delegations(clp); 1715 nfs_expire_all_delegations(clp);
1645 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1716 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1646 nfs4_schedule_state_manager(clp); 1717 nfs4_schedule_state_manager(clp);
1718 dprintk("%s: server %s declared a backchannel fault\n", __func__,
1719 clp->cl_hostname);
1720}
1721
1722static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1723{
1724 if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1725 &clp->cl_state) == 0)
1726 nfs4_schedule_state_manager(clp);
1647} 1727}
1648 1728
1649void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1729void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1650{ 1730{
1651 if (!flags) 1731 if (!flags)
1652 return; 1732 return;
1733
1734 dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
1735 __func__, clp->cl_hostname, clp->cl_clientid, flags);
1736
1653 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1737 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1654 nfs41_handle_server_reboot(clp); 1738 nfs41_handle_server_reboot(clp);
1655 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1739 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1659 nfs41_handle_state_revoked(clp); 1743 nfs41_handle_state_revoked(clp);
1660 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1744 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1661 nfs41_handle_recallable_state_revoked(clp); 1745 nfs41_handle_recallable_state_revoked(clp);
1662 if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1746 if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
1663 SEQ4_STATUS_BACKCHANNEL_FAULT | 1747 nfs41_handle_backchannel_fault(clp);
1664 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1748 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1749 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1665 nfs41_handle_cb_path_down(clp); 1750 nfs41_handle_cb_path_down(clp);
1666} 1751}
1667 1752
1668static int nfs4_reset_session(struct nfs_client *clp) 1753static int nfs4_reset_session(struct nfs_client *clp)
1669{ 1754{
1755 struct rpc_cred *cred;
1670 int status; 1756 int status;
1671 1757
1672 nfs4_begin_drain_session(clp); 1758 nfs4_begin_drain_session(clp);
1673 status = nfs4_proc_destroy_session(clp->cl_session); 1759 cred = nfs4_get_exchange_id_cred(clp);
1760 status = nfs4_proc_destroy_session(clp->cl_session, cred);
1674 if (status && status != -NFS4ERR_BADSESSION && 1761 if (status && status != -NFS4ERR_BADSESSION &&
1675 status != -NFS4ERR_DEADSESSION) { 1762 status != -NFS4ERR_DEADSESSION) {
1676 status = nfs4_recovery_handle_error(clp, status); 1763 status = nfs4_recovery_handle_error(clp, status);
@@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)
1678 } 1765 }
1679 1766
1680 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); 1767 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1681 status = nfs4_proc_create_session(clp); 1768 status = nfs4_proc_create_session(clp, cred);
1682 if (status) { 1769 if (status) {
1683 status = nfs4_recovery_handle_error(clp, status); 1770 dprintk("%s: session reset failed with status %d for server %s!\n",
1771 __func__, status, clp->cl_hostname);
1772 status = nfs4_handle_reclaim_lease_error(clp, status);
1684 goto out; 1773 goto out;
1685 } 1774 }
1686 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1775 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1687 /* create_session negotiated new slot table */ 1776 /* create_session negotiated new slot table */
1688 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1777 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1778 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1779 dprintk("%s: session reset was successful for server %s!\n",
1780 __func__, clp->cl_hostname);
1689 1781
1690 /* Let the state manager reestablish state */ 1782 /* Let the state manager reestablish state */
1691 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1783 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1692 nfs41_setup_state_renewal(clp); 1784 nfs41_setup_state_renewal(clp);
1693out: 1785out:
1786 if (cred)
1787 put_rpccred(cred);
1694 return status; 1788 return status;
1695} 1789}
1696 1790
@@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
1722 return 0; 1816 return 0;
1723} 1817}
1724 1818
1725#else /* CONFIG_NFS_V4_1 */ 1819static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1726static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1727static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1728static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1729#endif /* CONFIG_NFS_V4_1 */
1730
1731/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1732 * on EXCHANGE_ID for v4.1
1733 */
1734static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1735{ 1820{
1736 switch (status) { 1821 struct rpc_cred *cred;
1737 case -NFS4ERR_CLID_INUSE: 1822 int ret;
1738 case -NFS4ERR_STALE_CLIENTID: 1823
1739 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 1824 nfs4_begin_drain_session(clp);
1825 cred = nfs4_get_exchange_id_cred(clp);
1826 ret = nfs4_proc_bind_conn_to_session(clp, cred);
1827 if (cred)
1828 put_rpccred(cred);
1829 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1830 switch (ret) {
1831 case 0:
1832 dprintk("%s: bind_conn_to_session was successful for server %s!\n",
1833 __func__, clp->cl_hostname);
1740 break; 1834 break;
1741 case -NFS4ERR_DELAY: 1835 case -NFS4ERR_DELAY:
1742 case -ETIMEDOUT:
1743 case -EAGAIN:
1744 ssleep(1); 1836 ssleep(1);
1837 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1745 break; 1838 break;
1746
1747 case -EKEYEXPIRED:
1748 nfs4_warn_keyexpired(clp->cl_hostname);
1749 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1750 * in nfs4_exchange_id */
1751 default: 1839 default:
1752 return; 1840 return nfs4_recovery_handle_error(clp, ret);
1753 } 1841 }
1754 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1842 return 0;
1755} 1843}
1844#else /* CONFIG_NFS_V4_1 */
1845static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1846static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1847static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1848
1849static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1850{
1851 return 0;
1852}
1853#endif /* CONFIG_NFS_V4_1 */
1756 1854
1757static void nfs4_state_manager(struct nfs_client *clp) 1855static void nfs4_state_manager(struct nfs_client *clp)
1758{ 1856{
@@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
1760 1858
1761 /* Ensure exclusive access to NFSv4 state */ 1859 /* Ensure exclusive access to NFSv4 state */
1762 do { 1860 do {
1861 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
1862 status = nfs4_reclaim_lease(clp);
1863 if (status < 0)
1864 goto out_error;
1865 clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1866 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1867 }
1868
1763 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1869 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1764 /* We're going to have to re-establish a clientid */ 1870 /* We're going to have to re-establish a clientid */
1765 status = nfs4_reclaim_lease(clp); 1871 status = nfs4_reclaim_lease(clp);
1766 if (status) { 1872 if (status < 0)
1767 nfs4_set_lease_expired(clp, status);
1768 if (test_bit(NFS4CLNT_LEASE_EXPIRED,
1769 &clp->cl_state))
1770 continue;
1771 if (clp->cl_cons_state ==
1772 NFS_CS_SESSION_INITING)
1773 nfs_mark_client_ready(clp, status);
1774 goto out_error; 1873 goto out_error;
1775 } 1874 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1875 continue;
1776 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1876 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1777 1877
1778 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, 1878 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
1803 goto out_error; 1903 goto out_error;
1804 } 1904 }
1805 1905
1906 /* Send BIND_CONN_TO_SESSION */
1907 if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1908 &clp->cl_state) && nfs4_has_session(clp)) {
1909 status = nfs4_bind_conn_to_session(clp);
1910 if (status < 0)
1911 goto out_error;
1912 continue;
1913 }
1914
1806 /* First recover reboot state... */ 1915 /* First recover reboot state... */
1807 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1916 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1808 status = nfs4_do_reclaim(clp, 1917 status = nfs4_do_reclaim(clp,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..ee4a74db95d0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -53,9 +53,11 @@
53#include <linux/nfs4.h> 53#include <linux/nfs4.h>
54#include <linux/nfs_fs.h> 54#include <linux/nfs_fs.h>
55#include <linux/nfs_idmap.h> 55#include <linux/nfs_idmap.h>
56
56#include "nfs4_fs.h" 57#include "nfs4_fs.h"
57#include "internal.h" 58#include "internal.h"
58#include "pnfs.h" 59#include "pnfs.h"
60#include "netns.h"
59 61
60#define NFSDBG_FACILITY NFSDBG_XDR 62#define NFSDBG_FACILITY NFSDBG_XDR
61 63
@@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);
99#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 101#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
100#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 102#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
101#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 103#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
104/* We support only one layout type per file system */
105#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
102/* This is based on getfattr, which uses the most attributes: */ 106/* This is based on getfattr, which uses the most attributes: */
103#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 107#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
104 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) 108 3 + 3 + 3 + nfs4_owner_maxsz + \
109 nfs4_group_maxsz + decode_mdsthreshold_maxsz))
105#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 110#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
106 nfs4_fattr_value_maxsz) 111 nfs4_fattr_value_maxsz)
107#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 112#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);
321 1 /* csr_flags */ + \ 326 1 /* csr_flags */ + \
322 decode_channel_attrs_maxsz + \ 327 decode_channel_attrs_maxsz + \
323 decode_channel_attrs_maxsz) 328 decode_channel_attrs_maxsz)
329#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \
330 /* bctsa_sessid */ \
331 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
332 1 /* bctsa_dir */ + \
333 1 /* bctsa_use_conn_in_rdma_mode */)
334#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \
335 /* bctsr_sessid */ \
336 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
337 1 /* bctsr_dir */ + \
338 1 /* bctsr_use_conn_in_rdma_mode */)
324#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) 339#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
325#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) 340#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
341#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2)
342#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz)
326#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ 343#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
327 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) 344 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
328#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ 345#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
@@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);
421#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 438#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
422 encode_sequence_maxsz + \ 439 encode_sequence_maxsz + \
423 encode_putfh_maxsz + \ 440 encode_putfh_maxsz + \
424 encode_commit_maxsz + \ 441 encode_commit_maxsz)
425 encode_getattr_maxsz)
426#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 442#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
427 decode_sequence_maxsz + \ 443 decode_sequence_maxsz + \
428 decode_putfh_maxsz + \ 444 decode_putfh_maxsz + \
429 decode_commit_maxsz + \ 445 decode_commit_maxsz)
430 decode_getattr_maxsz)
431#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 446#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
432 encode_sequence_maxsz + \ 447 encode_sequence_maxsz + \
433 encode_putfh_maxsz + \ 448 encode_putfh_maxsz + \
434 encode_savefh_maxsz + \
435 encode_open_maxsz + \ 449 encode_open_maxsz + \
436 encode_getfh_maxsz + \ 450 encode_getfh_maxsz + \
437 encode_getattr_maxsz + \
438 encode_restorefh_maxsz + \
439 encode_getattr_maxsz) 451 encode_getattr_maxsz)
440#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 452#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
441 decode_sequence_maxsz + \ 453 decode_sequence_maxsz + \
442 decode_putfh_maxsz + \ 454 decode_putfh_maxsz + \
443 decode_savefh_maxsz + \
444 decode_open_maxsz + \ 455 decode_open_maxsz + \
445 decode_getfh_maxsz + \ 456 decode_getfh_maxsz + \
446 decode_getattr_maxsz + \
447 decode_restorefh_maxsz + \
448 decode_getattr_maxsz) 457 decode_getattr_maxsz)
449#define NFS4_enc_open_confirm_sz \ 458#define NFS4_enc_open_confirm_sz \
450 (compound_encode_hdr_maxsz + \ 459 (compound_encode_hdr_maxsz + \
@@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);
595#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 604#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
596 encode_sequence_maxsz + \ 605 encode_sequence_maxsz + \
597 encode_putfh_maxsz + \ 606 encode_putfh_maxsz + \
598 encode_remove_maxsz + \ 607 encode_remove_maxsz)
599 encode_getattr_maxsz)
600#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 608#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
601 decode_sequence_maxsz + \ 609 decode_sequence_maxsz + \
602 decode_putfh_maxsz + \ 610 decode_putfh_maxsz + \
603 decode_remove_maxsz + \ 611 decode_remove_maxsz)
604 decode_getattr_maxsz)
605#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 612#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
606 encode_sequence_maxsz + \ 613 encode_sequence_maxsz + \
607 encode_putfh_maxsz + \ 614 encode_putfh_maxsz + \
608 encode_savefh_maxsz + \ 615 encode_savefh_maxsz + \
609 encode_putfh_maxsz + \ 616 encode_putfh_maxsz + \
610 encode_rename_maxsz + \ 617 encode_rename_maxsz)
611 encode_getattr_maxsz + \
612 encode_restorefh_maxsz + \
613 encode_getattr_maxsz)
614#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 618#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
615 decode_sequence_maxsz + \ 619 decode_sequence_maxsz + \
616 decode_putfh_maxsz + \ 620 decode_putfh_maxsz + \
617 decode_savefh_maxsz + \ 621 decode_savefh_maxsz + \
618 decode_putfh_maxsz + \ 622 decode_putfh_maxsz + \
619 decode_rename_maxsz + \ 623 decode_rename_maxsz)
620 decode_getattr_maxsz + \
621 decode_restorefh_maxsz + \
622 decode_getattr_maxsz)
623#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 624#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
624 encode_sequence_maxsz + \ 625 encode_sequence_maxsz + \
625 encode_putfh_maxsz + \ 626 encode_putfh_maxsz + \
626 encode_savefh_maxsz + \ 627 encode_savefh_maxsz + \
627 encode_putfh_maxsz + \ 628 encode_putfh_maxsz + \
628 encode_link_maxsz + \ 629 encode_link_maxsz + \
629 decode_getattr_maxsz + \
630 encode_restorefh_maxsz + \ 630 encode_restorefh_maxsz + \
631 decode_getattr_maxsz) 631 encode_getattr_maxsz)
632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
633 decode_sequence_maxsz + \ 633 decode_sequence_maxsz + \
634 decode_putfh_maxsz + \ 634 decode_putfh_maxsz + \
635 decode_savefh_maxsz + \ 635 decode_savefh_maxsz + \
636 decode_putfh_maxsz + \ 636 decode_putfh_maxsz + \
637 decode_link_maxsz + \ 637 decode_link_maxsz + \
638 decode_getattr_maxsz + \
639 decode_restorefh_maxsz + \ 638 decode_restorefh_maxsz + \
640 decode_getattr_maxsz) 639 decode_getattr_maxsz)
641#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 640#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);
653#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 652#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
654 encode_sequence_maxsz + \ 653 encode_sequence_maxsz + \
655 encode_putfh_maxsz + \ 654 encode_putfh_maxsz + \
656 encode_savefh_maxsz + \
657 encode_create_maxsz + \ 655 encode_create_maxsz + \
658 encode_getfh_maxsz + \ 656 encode_getfh_maxsz + \
659 encode_getattr_maxsz + \
660 encode_restorefh_maxsz + \
661 encode_getattr_maxsz) 657 encode_getattr_maxsz)
662#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 658#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
663 decode_sequence_maxsz + \ 659 decode_sequence_maxsz + \
664 decode_putfh_maxsz + \ 660 decode_putfh_maxsz + \
665 decode_savefh_maxsz + \
666 decode_create_maxsz + \ 661 decode_create_maxsz + \
667 decode_getfh_maxsz + \ 662 decode_getfh_maxsz + \
668 decode_getattr_maxsz + \
669 decode_restorefh_maxsz + \
670 decode_getattr_maxsz) 663 decode_getattr_maxsz)
671#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 664#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
672 encode_sequence_maxsz + \ 665 encode_sequence_maxsz + \
@@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);
738 decode_putfh_maxsz + \ 731 decode_putfh_maxsz + \
739 decode_secinfo_maxsz) 732 decode_secinfo_maxsz)
740#if defined(CONFIG_NFS_V4_1) 733#if defined(CONFIG_NFS_V4_1)
734#define NFS4_enc_bind_conn_to_session_sz \
735 (compound_encode_hdr_maxsz + \
736 encode_bind_conn_to_session_maxsz)
737#define NFS4_dec_bind_conn_to_session_sz \
738 (compound_decode_hdr_maxsz + \
739 decode_bind_conn_to_session_maxsz)
741#define NFS4_enc_exchange_id_sz \ 740#define NFS4_enc_exchange_id_sz \
742 (compound_encode_hdr_maxsz + \ 741 (compound_encode_hdr_maxsz + \
743 encode_exchange_id_maxsz) 742 encode_exchange_id_maxsz)
@@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);
754 encode_destroy_session_maxsz) 753 encode_destroy_session_maxsz)
755#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ 754#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
756 decode_destroy_session_maxsz) 755 decode_destroy_session_maxsz)
756#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \
757 encode_destroy_clientid_maxsz)
758#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \
759 decode_destroy_clientid_maxsz)
757#define NFS4_enc_sequence_sz \ 760#define NFS4_enc_sequence_sz \
758 (compound_decode_hdr_maxsz + \ 761 (compound_decode_hdr_maxsz + \
759 encode_sequence_maxsz) 762 encode_sequence_maxsz)
@@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
1103 encode_nfs4_stateid(xdr, arg->stateid); 1106 encode_nfs4_stateid(xdr, arg->stateid);
1104} 1107}
1105 1108
1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1109static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
1107{ 1110{
1108 __be32 *p; 1111 __be32 *p;
1109 1112
@@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
1194 bitmask[1] & nfs4_fattr_bitmap[1], hdr); 1197 bitmask[1] & nfs4_fattr_bitmap[1], hdr);
1195} 1198}
1196 1199
1200static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
1201 struct compound_hdr *hdr)
1202{
1203 encode_getattr_three(xdr,
1204 bitmask[0] & nfs4_fattr_bitmap[0],
1205 bitmask[1] & nfs4_fattr_bitmap[1],
1206 bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
1207 hdr);
1208}
1209
1197static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1210static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
1198{ 1211{
1199 encode_getattr_three(xdr, 1212 encode_getattr_three(xdr,
@@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
1678 1691
1679#if defined(CONFIG_NFS_V4_1) 1692#if defined(CONFIG_NFS_V4_1)
1680/* NFSv4.1 operations */ 1693/* NFSv4.1 operations */
1694static void encode_bind_conn_to_session(struct xdr_stream *xdr,
1695 struct nfs4_session *session,
1696 struct compound_hdr *hdr)
1697{
1698 __be32 *p;
1699
1700 encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
1701 decode_bind_conn_to_session_maxsz, hdr);
1702 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1703 p = xdr_reserve_space(xdr, 8);
1704 *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
1705 *p = 0; /* use_conn_in_rdma_mode = False */
1706}
1707
1681static void encode_exchange_id(struct xdr_stream *xdr, 1708static void encode_exchange_id(struct xdr_stream *xdr,
1682 struct nfs41_exchange_id_args *args, 1709 struct nfs41_exchange_id_args *args,
1683 struct compound_hdr *hdr) 1710 struct compound_hdr *hdr)
@@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1726 char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; 1753 char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
1727 uint32_t len; 1754 uint32_t len;
1728 struct nfs_client *clp = args->client; 1755 struct nfs_client *clp = args->client;
1756 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1729 u32 max_resp_sz_cached; 1757 u32 max_resp_sz_cached;
1730 1758
1731 /* 1759 /*
@@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1767 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ 1795 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1768 1796
1769 /* authsys_parms rfc1831 */ 1797 /* authsys_parms rfc1831 */
1770 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1798 *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
1771 p = xdr_encode_opaque(p, machine_name, len); 1799 p = xdr_encode_opaque(p, machine_name, len);
1772 *p++ = cpu_to_be32(0); /* UID */ 1800 *p++ = cpu_to_be32(0); /* UID */
1773 *p++ = cpu_to_be32(0); /* GID */ 1801 *p++ = cpu_to_be32(0); /* GID */
@@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1782 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1810 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1783} 1811}
1784 1812
1813static void encode_destroy_clientid(struct xdr_stream *xdr,
1814 uint64_t clientid,
1815 struct compound_hdr *hdr)
1816{
1817 encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
1818 encode_uint64(xdr, clientid);
1819}
1820
1785static void encode_reclaim_complete(struct xdr_stream *xdr, 1821static void encode_reclaim_complete(struct xdr_stream *xdr,
1786 struct nfs41_reclaim_complete_args *args, 1822 struct nfs41_reclaim_complete_args *args,
1787 struct compound_hdr *hdr) 1823 struct compound_hdr *hdr)
@@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
2064 encode_sequence(xdr, &args->seq_args, &hdr); 2100 encode_sequence(xdr, &args->seq_args, &hdr);
2065 encode_putfh(xdr, args->fh, &hdr); 2101 encode_putfh(xdr, args->fh, &hdr);
2066 encode_remove(xdr, &args->name, &hdr); 2102 encode_remove(xdr, &args->name, &hdr);
2067 encode_getfattr(xdr, args->bitmask, &hdr);
2068 encode_nops(&hdr); 2103 encode_nops(&hdr);
2069} 2104}
2070 2105
@@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
2084 encode_savefh(xdr, &hdr); 2119 encode_savefh(xdr, &hdr);
2085 encode_putfh(xdr, args->new_dir, &hdr); 2120 encode_putfh(xdr, args->new_dir, &hdr);
2086 encode_rename(xdr, args->old_name, args->new_name, &hdr); 2121 encode_rename(xdr, args->old_name, args->new_name, &hdr);
2087 encode_getfattr(xdr, args->bitmask, &hdr);
2088 encode_restorefh(xdr, &hdr);
2089 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_nops(&hdr); 2122 encode_nops(&hdr);
2091} 2123}
2092 2124
@@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
2106 encode_savefh(xdr, &hdr); 2138 encode_savefh(xdr, &hdr);
2107 encode_putfh(xdr, args->dir_fh, &hdr); 2139 encode_putfh(xdr, args->dir_fh, &hdr);
2108 encode_link(xdr, args->name, &hdr); 2140 encode_link(xdr, args->name, &hdr);
2109 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_restorefh(xdr, &hdr); 2141 encode_restorefh(xdr, &hdr);
2111 encode_getfattr(xdr, args->bitmask, &hdr); 2142 encode_getfattr(xdr, args->bitmask, &hdr);
2112 encode_nops(&hdr); 2143 encode_nops(&hdr);
@@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
2125 encode_compound_hdr(xdr, req, &hdr); 2156 encode_compound_hdr(xdr, req, &hdr);
2126 encode_sequence(xdr, &args->seq_args, &hdr); 2157 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_putfh(xdr, args->dir_fh, &hdr); 2158 encode_putfh(xdr, args->dir_fh, &hdr);
2128 encode_savefh(xdr, &hdr);
2129 encode_create(xdr, args, &hdr); 2159 encode_create(xdr, args, &hdr);
2130 encode_getfh(xdr, &hdr); 2160 encode_getfh(xdr, &hdr);
2131 encode_getfattr(xdr, args->bitmask, &hdr); 2161 encode_getfattr(xdr, args->bitmask, &hdr);
2132 encode_restorefh(xdr, &hdr);
2133 encode_getfattr(xdr, args->bitmask, &hdr);
2134 encode_nops(&hdr); 2162 encode_nops(&hdr);
2135} 2163}
2136 2164
@@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2191 encode_compound_hdr(xdr, req, &hdr); 2219 encode_compound_hdr(xdr, req, &hdr);
2192 encode_sequence(xdr, &args->seq_args, &hdr); 2220 encode_sequence(xdr, &args->seq_args, &hdr);
2193 encode_putfh(xdr, args->fh, &hdr); 2221 encode_putfh(xdr, args->fh, &hdr);
2194 encode_savefh(xdr, &hdr);
2195 encode_open(xdr, args, &hdr); 2222 encode_open(xdr, args, &hdr);
2196 encode_getfh(xdr, &hdr); 2223 encode_getfh(xdr, &hdr);
2197 encode_getfattr(xdr, args->bitmask, &hdr); 2224 encode_getfattr_open(xdr, args->bitmask, &hdr);
2198 encode_restorefh(xdr, &hdr);
2199 encode_getfattr(xdr, args->dir_bitmask, &hdr);
2200 encode_nops(&hdr); 2225 encode_nops(&hdr);
2201} 2226}
2202 2227
@@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2448 * a COMMIT request 2473 * a COMMIT request
2449 */ 2474 */
2450static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, 2475static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 struct nfs_writeargs *args) 2476 struct nfs_commitargs *args)
2452{ 2477{
2453 struct compound_hdr hdr = { 2478 struct compound_hdr hdr = {
2454 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2479 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2458 encode_sequence(xdr, &args->seq_args, &hdr); 2483 encode_sequence(xdr, &args->seq_args, &hdr);
2459 encode_putfh(xdr, args->fh, &hdr); 2484 encode_putfh(xdr, args->fh, &hdr);
2460 encode_commit(xdr, args, &hdr); 2485 encode_commit(xdr, args, &hdr);
2461 if (args->bitmask)
2462 encode_getfattr(xdr, args->bitmask, &hdr);
2463 encode_nops(&hdr); 2486 encode_nops(&hdr);
2464} 2487}
2465 2488
@@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2602 encode_compound_hdr(xdr, req, &hdr); 2625 encode_compound_hdr(xdr, req, &hdr);
2603 encode_sequence(xdr, &args->seq_args, &hdr); 2626 encode_sequence(xdr, &args->seq_args, &hdr);
2604 encode_putfh(xdr, args->fhandle, &hdr); 2627 encode_putfh(xdr, args->fhandle, &hdr);
2605 encode_delegreturn(xdr, args->stateid, &hdr);
2606 encode_getfattr(xdr, args->bitmask, &hdr); 2628 encode_getfattr(xdr, args->bitmask, &hdr);
2629 encode_delegreturn(xdr, args->stateid, &hdr);
2607 encode_nops(&hdr); 2630 encode_nops(&hdr);
2608} 2631}
2609 2632
@@ -2651,6 +2674,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2651 2674
2652#if defined(CONFIG_NFS_V4_1) 2675#if defined(CONFIG_NFS_V4_1)
2653/* 2676/*
2677 * BIND_CONN_TO_SESSION request
2678 */
2679static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
2680 struct xdr_stream *xdr,
2681 struct nfs_client *clp)
2682{
2683 struct compound_hdr hdr = {
2684 .minorversion = clp->cl_mvops->minor_version,
2685 };
2686
2687 encode_compound_hdr(xdr, req, &hdr);
2688 encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
2689 encode_nops(&hdr);
2690}
2691
2692/*
2654 * EXCHANGE_ID request 2693 * EXCHANGE_ID request
2655 */ 2694 */
2656static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, 2695static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
@@ -2699,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
2699} 2738}
2700 2739
2701/* 2740/*
2741 * a DESTROY_CLIENTID request
2742 */
2743static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
2744 struct xdr_stream *xdr,
2745 struct nfs_client *clp)
2746{
2747 struct compound_hdr hdr = {
2748 .minorversion = clp->cl_mvops->minor_version,
2749 };
2750
2751 encode_compound_hdr(xdr, req, &hdr);
2752 encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
2753 encode_nops(&hdr);
2754}
2755
2756/*
2702 * a SEQUENCE request 2757 * a SEQUENCE request
2703 */ 2758 */
2704static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, 2759static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4102 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); 4157 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4103} 4158}
4104 4159
4105static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4160static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
4106{ 4161{
4107 int status; 4162 int status;
4108 4163
@@ -4220,6 +4275,110 @@ xdr_error:
4220 return status; 4275 return status;
4221} 4276}
4222 4277
4278static int decode_threshold_hint(struct xdr_stream *xdr,
4279 uint32_t *bitmap,
4280 uint64_t *res,
4281 uint32_t hint_bit)
4282{
4283 __be32 *p;
4284
4285 *res = 0;
4286 if (likely(bitmap[0] & hint_bit)) {
4287 p = xdr_inline_decode(xdr, 8);
4288 if (unlikely(!p))
4289 goto out_overflow;
4290 xdr_decode_hyper(p, res);
4291 }
4292 return 0;
4293out_overflow:
4294 print_overflow_msg(__func__, xdr);
4295 return -EIO;
4296}
4297
4298static int decode_first_threshold_item4(struct xdr_stream *xdr,
4299 struct nfs4_threshold *res)
4300{
4301 __be32 *p, *savep;
4302 uint32_t bitmap[3] = {0,}, attrlen;
4303 int status;
4304
4305 /* layout type */
4306 p = xdr_inline_decode(xdr, 4);
4307 if (unlikely(!p)) {
4308 print_overflow_msg(__func__, xdr);
4309 return -EIO;
4310 }
4311 res->l_type = be32_to_cpup(p);
4312
4313 /* thi_hintset bitmap */
4314 status = decode_attr_bitmap(xdr, bitmap);
4315 if (status < 0)
4316 goto xdr_error;
4317
4318 /* thi_hintlist length */
4319 status = decode_attr_length(xdr, &attrlen, &savep);
4320 if (status < 0)
4321 goto xdr_error;
4322 /* thi_hintlist */
4323 status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
4324 if (status < 0)
4325 goto xdr_error;
4326 status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
4327 if (status < 0)
4328 goto xdr_error;
4329 status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
4330 THRESHOLD_RD_IO);
4331 if (status < 0)
4332 goto xdr_error;
4333 status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
4334 THRESHOLD_WR_IO);
4335 if (status < 0)
4336 goto xdr_error;
4337
4338 status = verify_attr_len(xdr, savep, attrlen);
4339 res->bm = bitmap[0];
4340
4341 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
4342 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
4343 res->wr_io_sz);
4344xdr_error:
4345 dprintk("%s ret=%d!\n", __func__, status);
4346 return status;
4347}
4348
4349/*
4350 * Thresholds on pNFS direct I/O vrs MDS I/O
4351 */
4352static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
4353 uint32_t *bitmap,
4354 struct nfs4_threshold *res)
4355{
4356 __be32 *p;
4357 int status = 0;
4358 uint32_t num;
4359
4360 if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
4361 return -EIO;
4362 if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
4363 p = xdr_inline_decode(xdr, 4);
4364 if (unlikely(!p))
4365 goto out_overflow;
4366 num = be32_to_cpup(p);
4367 if (num == 0)
4368 return 0;
4369 if (num > 1)
4370 printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
4371 "drivers per filesystem not supported\n",
4372 __func__);
4373
4374 status = decode_first_threshold_item4(xdr, res);
4375 }
4376 return status;
4377out_overflow:
4378 print_overflow_msg(__func__, xdr);
4379 return -EIO;
4380}
4381
4223static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4382static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4224 struct nfs_fattr *fattr, struct nfs_fh *fh, 4383 struct nfs_fattr *fattr, struct nfs_fh *fh,
4225 struct nfs4_fs_locations *fs_loc, 4384 struct nfs4_fs_locations *fs_loc,
@@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4326 goto xdr_error; 4485 goto xdr_error;
4327 fattr->valid |= status; 4486 fattr->valid |= status;
4328 4487
4488 status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
4489 if (status < 0)
4490 goto xdr_error;
4491
4329xdr_error: 4492xdr_error:
4330 dprintk("%s: xdr returned %d\n", __func__, -status); 4493 dprintk("%s: xdr returned %d\n", __func__, -status);
4331 return status; 4494 return status;
@@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5156 uint32_t dummy; 5319 uint32_t dummy;
5157 char *dummy_str; 5320 char *dummy_str;
5158 int status; 5321 int status;
5159 struct nfs_client *clp = res->client;
5160 uint32_t impl_id_count; 5322 uint32_t impl_id_count;
5161 5323
5162 status = decode_op_hdr(xdr, OP_EXCHANGE_ID); 5324 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5166 p = xdr_inline_decode(xdr, 8); 5328 p = xdr_inline_decode(xdr, 8);
5167 if (unlikely(!p)) 5329 if (unlikely(!p))
5168 goto out_overflow; 5330 goto out_overflow;
5169 xdr_decode_hyper(p, &clp->cl_clientid); 5331 xdr_decode_hyper(p, &res->clientid);
5170 p = xdr_inline_decode(xdr, 12); 5332 p = xdr_inline_decode(xdr, 12);
5171 if (unlikely(!p)) 5333 if (unlikely(!p))
5172 goto out_overflow; 5334 goto out_overflow;
5173 clp->cl_seqid = be32_to_cpup(p++); 5335 res->seqid = be32_to_cpup(p++);
5174 clp->cl_exchange_flags = be32_to_cpup(p++); 5336 res->flags = be32_to_cpup(p++);
5175 5337
5176 /* We ask for SP4_NONE */ 5338 /* We ask for SP4_NONE */
5177 dummy = be32_to_cpup(p); 5339 dummy = be32_to_cpup(p);
5178 if (dummy != SP4_NONE) 5340 if (dummy != SP4_NONE)
5179 return -EIO; 5341 return -EIO;
5180 5342
5181 /* Throw away minor_id */ 5343 /* server_owner4.so_minor_id */
5182 p = xdr_inline_decode(xdr, 8); 5344 p = xdr_inline_decode(xdr, 8);
5183 if (unlikely(!p)) 5345 if (unlikely(!p))
5184 goto out_overflow; 5346 goto out_overflow;
5347 p = xdr_decode_hyper(p, &res->server_owner->minor_id);
5185 5348
5186 /* Throw away Major id */ 5349 /* server_owner4.so_major_id */
5187 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5350 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5188 if (unlikely(status)) 5351 if (unlikely(status))
5189 return status; 5352 return status;
5353 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5354 return -EIO;
5355 memcpy(res->server_owner->major_id, dummy_str, dummy);
5356 res->server_owner->major_id_sz = dummy;
5190 5357
5191 /* Save server_scope */ 5358 /* server_scope4 */
5192 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5359 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5193 if (unlikely(status)) 5360 if (unlikely(status))
5194 return status; 5361 return status;
5195
5196 if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) 5362 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5197 return -EIO; 5363 return -EIO;
5198
5199 memcpy(res->server_scope->server_scope, dummy_str, dummy); 5364 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5200 res->server_scope->server_scope_sz = dummy; 5365 res->server_scope->server_scope_sz = dummy;
5201 5366
@@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
5276 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); 5441 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
5277} 5442}
5278 5443
5444static int decode_bind_conn_to_session(struct xdr_stream *xdr,
5445 struct nfs41_bind_conn_to_session_res *res)
5446{
5447 __be32 *p;
5448 int status;
5449
5450 status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
5451 if (!status)
5452 status = decode_sessionid(xdr, &res->session->sess_id);
5453 if (unlikely(status))
5454 return status;
5455
5456 /* dir flags, rdma mode bool */
5457 p = xdr_inline_decode(xdr, 8);
5458 if (unlikely(!p))
5459 goto out_overflow;
5460
5461 res->dir = be32_to_cpup(p++);
5462 if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
5463 return -EIO;
5464 if (be32_to_cpup(p) == 0)
5465 res->use_conn_in_rdma_mode = false;
5466 else
5467 res->use_conn_in_rdma_mode = true;
5468
5469 return 0;
5470out_overflow:
5471 print_overflow_msg(__func__, xdr);
5472 return -EIO;
5473}
5474
5279static int decode_create_session(struct xdr_stream *xdr, 5475static int decode_create_session(struct xdr_stream *xdr,
5280 struct nfs41_create_session_res *res) 5476 struct nfs41_create_session_res *res)
5281{ 5477{
@@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
5312 return decode_op_hdr(xdr, OP_DESTROY_SESSION); 5508 return decode_op_hdr(xdr, OP_DESTROY_SESSION);
5313} 5509}
5314 5510
5511static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
5512{
5513 return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
5514}
5515
5315static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) 5516static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
5316{ 5517{
5317 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); 5518 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5800 if (status) 6001 if (status)
5801 goto out; 6002 goto out;
5802 status = decode_remove(xdr, &res->cinfo); 6003 status = decode_remove(xdr, &res->cinfo);
5803 if (status)
5804 goto out;
5805 decode_getfattr(xdr, res->dir_attr, res->server);
5806out: 6004out:
5807 return status; 6005 return status;
5808} 6006}
@@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5832 if (status) 6030 if (status)
5833 goto out; 6031 goto out;
5834 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); 6032 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5835 if (status)
5836 goto out;
5837 /* Current FH is target directory */
5838 if (decode_getfattr(xdr, res->new_fattr, res->server))
5839 goto out;
5840 status = decode_restorefh(xdr);
5841 if (status)
5842 goto out;
5843 decode_getfattr(xdr, res->old_fattr, res->server);
5844out: 6033out:
5845 return status; 6034 return status;
5846} 6035}
@@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5876 * Note order: OP_LINK leaves the directory as the current 6065 * Note order: OP_LINK leaves the directory as the current
5877 * filehandle. 6066 * filehandle.
5878 */ 6067 */
5879 if (decode_getfattr(xdr, res->dir_attr, res->server))
5880 goto out;
5881 status = decode_restorefh(xdr); 6068 status = decode_restorefh(xdr);
5882 if (status) 6069 if (status)
5883 goto out; 6070 goto out;
@@ -5904,21 +6091,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5904 status = decode_putfh(xdr); 6091 status = decode_putfh(xdr);
5905 if (status) 6092 if (status)
5906 goto out; 6093 goto out;
5907 status = decode_savefh(xdr);
5908 if (status)
5909 goto out;
5910 status = decode_create(xdr, &res->dir_cinfo); 6094 status = decode_create(xdr, &res->dir_cinfo);
5911 if (status) 6095 if (status)
5912 goto out; 6096 goto out;
5913 status = decode_getfh(xdr, res->fh); 6097 status = decode_getfh(xdr, res->fh);
5914 if (status) 6098 if (status)
5915 goto out; 6099 goto out;
5916 if (decode_getfattr(xdr, res->fattr, res->server)) 6100 decode_getfattr(xdr, res->fattr, res->server);
5917 goto out;
5918 status = decode_restorefh(xdr);
5919 if (status)
5920 goto out;
5921 decode_getfattr(xdr, res->dir_fattr, res->server);
5922out: 6101out:
5923 return status; 6102 return status;
5924} 6103}
@@ -6075,19 +6254,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6075 status = decode_putfh(xdr); 6254 status = decode_putfh(xdr);
6076 if (status) 6255 if (status)
6077 goto out; 6256 goto out;
6078 status = decode_savefh(xdr);
6079 if (status)
6080 goto out;
6081 status = decode_open(xdr, res); 6257 status = decode_open(xdr, res);
6082 if (status) 6258 if (status)
6083 goto out; 6259 goto out;
6084 if (decode_getfh(xdr, &res->fh) != 0) 6260 if (decode_getfh(xdr, &res->fh) != 0)
6085 goto out; 6261 goto out;
6086 if (decode_getfattr(xdr, res->f_attr, res->server) != 0) 6262 decode_getfattr(xdr, res->f_attr, res->server);
6087 goto out;
6088 if (decode_restorefh(xdr) != 0)
6089 goto out;
6090 decode_getfattr(xdr, res->dir_attr, res->server);
6091out: 6263out:
6092 return status; 6264 return status;
6093} 6265}
@@ -6353,7 +6525,7 @@ out:
6353 * Decode COMMIT response 6525 * Decode COMMIT response
6354 */ 6526 */
6355static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6527static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6356 struct nfs_writeres *res) 6528 struct nfs_commitres *res)
6357{ 6529{
6358 struct compound_hdr hdr; 6530 struct compound_hdr hdr;
6359 int status; 6531 int status;
@@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6368 if (status) 6540 if (status)
6369 goto out; 6541 goto out;
6370 status = decode_commit(xdr, res); 6542 status = decode_commit(xdr, res);
6371 if (status)
6372 goto out;
6373 if (res->fattr)
6374 decode_getfattr(xdr, res->fattr, res->server);
6375out: 6543out:
6376 return status; 6544 return status;
6377} 6545}
@@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
6527 status = decode_putfh(xdr); 6695 status = decode_putfh(xdr);
6528 if (status != 0) 6696 if (status != 0)
6529 goto out; 6697 goto out;
6530 status = decode_delegreturn(xdr); 6698 status = decode_getfattr(xdr, res->fattr, res->server);
6531 if (status != 0) 6699 if (status != 0)
6532 goto out; 6700 goto out;
6533 decode_getfattr(xdr, res->fattr, res->server); 6701 status = decode_delegreturn(xdr);
6534out: 6702out:
6535 return status; 6703 return status;
6536} 6704}
@@ -6591,6 +6759,22 @@ out:
6591 6759
6592#if defined(CONFIG_NFS_V4_1) 6760#if defined(CONFIG_NFS_V4_1)
6593/* 6761/*
6762 * Decode BIND_CONN_TO_SESSION response
6763 */
6764static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
6765 struct xdr_stream *xdr,
6766 void *res)
6767{
6768 struct compound_hdr hdr;
6769 int status;
6770
6771 status = decode_compound_hdr(xdr, &hdr);
6772 if (!status)
6773 status = decode_bind_conn_to_session(xdr, res);
6774 return status;
6775}
6776
6777/*
6594 * Decode EXCHANGE_ID response 6778 * Decode EXCHANGE_ID response
6595 */ 6779 */
6596static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, 6780static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
@@ -6639,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
6639} 6823}
6640 6824
6641/* 6825/*
6826 * Decode DESTROY_CLIENTID response
6827 */
6828static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
6829 struct xdr_stream *xdr,
6830 void *res)
6831{
6832 struct compound_hdr hdr;
6833 int status;
6834
6835 status = decode_compound_hdr(xdr, &hdr);
6836 if (!status)
6837 status = decode_destroy_clientid(xdr, res);
6838 return status;
6839}
6840
6841/*
6642 * Decode SEQUENCE response 6842 * Decode SEQUENCE response
6643 */ 6843 */
6644static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, 6844static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
@@ -7085,6 +7285,9 @@ struct rpc_procinfo nfs4_procedures[] = {
7085 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), 7285 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
7086 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), 7286 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
7087 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), 7287 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
7288 PROC(BIND_CONN_TO_SESSION,
7289 enc_bind_conn_to_session, dec_bind_conn_to_session),
7290 PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
7088#endif /* CONFIG_NFS_V4_1 */ 7291#endif /* CONFIG_NFS_V4_1 */
7089}; 7292};
7090 7293
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..b47277baebab 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); 211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
212} 212}
213 213
214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
215 struct objio_segment **pseg) 215 struct objio_segment **pseg)
216{ 216{
217/* This is the in memory structure of the objio_segment 217/* This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
440 440
441int objio_read_pagelist(struct nfs_read_data *rdata) 441int objio_read_pagelist(struct nfs_read_data *rdata)
442{ 442{
443 struct nfs_pgio_header *hdr = rdata->header;
443 struct objio_state *objios; 444 struct objio_state *objios;
444 int ret; 445 int ret;
445 446
446 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
447 rdata->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, rdata->args.pages, rdata->args.pgbase,
448 rdata->args.offset, rdata->args.count, rdata, 449 rdata->args.offset, rdata->args.count, rdata,
449 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
450 if (unlikely(ret)) 451 if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
483{ 484{
484 struct objio_state *objios = priv; 485 struct objio_state *objios = priv;
485 struct nfs_write_data *wdata = objios->oir.rpcdata; 486 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping;
486 pgoff_t index = offset / PAGE_SIZE; 488 pgoff_t index = offset / PAGE_SIZE;
487 struct page *page = find_get_page(wdata->inode->i_mapping, index); 489 struct page *page = find_get_page(mapping, index);
488 490
489 if (!page) { 491 if (!page) {
490 page = find_or_create_page(wdata->inode->i_mapping, 492 page = find_or_create_page(mapping, index, GFP_NOFS);
491 index, GFP_NOFS);
492 if (unlikely(!page)) { 493 if (unlikely(!page)) {
493 dprintk("%s: grab_cache_page Failed index=0x%lx\n", 494 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
494 __func__, index); 495 __func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
518 519
519int objio_write_pagelist(struct nfs_write_data *wdata, int how) 520int objio_write_pagelist(struct nfs_write_data *wdata, int how)
520{ 521{
522 struct nfs_pgio_header *hdr = wdata->header;
521 struct objio_state *objios; 523 struct objio_state *objios;
522 int ret; 524 int ret;
523 525
524 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, 526 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
525 wdata->lseg, wdata->args.pages, wdata->args.pgbase, 527 hdr->lseg, wdata->args.pages, wdata->args.pgbase,
526 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 528 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
527 &objios); 529 &objios);
528 if (unlikely(ret)) 530 if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc21a19..874613545301 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
258 if (status >= 0) 258 if (status >= 0)
259 rdata->res.count = status; 259 rdata->res.count = status;
260 else 260 else
261 rdata->pnfs_error = status; 261 rdata->header->pnfs_error = status;
262 objlayout_iodone(oir); 262 objlayout_iodone(oir);
263 /* must not use oir after this point */ 263 /* must not use oir after this point */
264 264
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
279enum pnfs_try_status 279enum pnfs_try_status
280objlayout_read_pagelist(struct nfs_read_data *rdata) 280objlayout_read_pagelist(struct nfs_read_data *rdata)
281{ 281{
282 struct nfs_pgio_header *hdr = rdata->header;
283 struct inode *inode = hdr->inode;
282 loff_t offset = rdata->args.offset; 284 loff_t offset = rdata->args.offset;
283 size_t count = rdata->args.count; 285 size_t count = rdata->args.count;
284 int err; 286 int err;
285 loff_t eof; 287 loff_t eof;
286 288
287 eof = i_size_read(rdata->inode); 289 eof = i_size_read(inode);
288 if (unlikely(offset + count > eof)) { 290 if (unlikely(offset + count > eof)) {
289 if (offset >= eof) { 291 if (offset >= eof) {
290 err = 0; 292 err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
297 } 299 }
298 300
299 rdata->res.eof = (offset + count) >= eof; 301 rdata->res.eof = (offset + count) >= eof;
300 _fix_verify_io_params(rdata->lseg, &rdata->args.pages, 302 _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
301 &rdata->args.pgbase, 303 &rdata->args.pgbase,
302 rdata->args.offset, rdata->args.count); 304 rdata->args.offset, rdata->args.count);
303 305
304 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 306 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
305 __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); 307 __func__, inode->i_ino, offset, count, rdata->res.eof);
306 308
307 err = objio_read_pagelist(rdata); 309 err = objio_read_pagelist(rdata);
308 out: 310 out:
309 if (unlikely(err)) { 311 if (unlikely(err)) {
310 rdata->pnfs_error = err; 312 hdr->pnfs_error = err;
311 dprintk("%s: Returned Error %d\n", __func__, err); 313 dprintk("%s: Returned Error %d\n", __func__, err);
312 return PNFS_NOT_ATTEMPTED; 314 return PNFS_NOT_ATTEMPTED;
313 } 315 }
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
340 wdata->res.count = status; 342 wdata->res.count = status;
341 wdata->verf.committed = oir->committed; 343 wdata->verf.committed = oir->committed;
342 } else { 344 } else {
343 wdata->pnfs_error = status; 345 wdata->header->pnfs_error = status;
344 } 346 }
345 objlayout_iodone(oir); 347 objlayout_iodone(oir);
346 /* must not use oir after this point */ 348 /* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
363objlayout_write_pagelist(struct nfs_write_data *wdata, 365objlayout_write_pagelist(struct nfs_write_data *wdata,
364 int how) 366 int how)
365{ 367{
368 struct nfs_pgio_header *hdr = wdata->header;
366 int err; 369 int err;
367 370
368 _fix_verify_io_params(wdata->lseg, &wdata->args.pages, 371 _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
369 &wdata->args.pgbase, 372 &wdata->args.pgbase,
370 wdata->args.offset, wdata->args.count); 373 wdata->args.offset, wdata->args.count);
371 374
372 err = objio_write_pagelist(wdata, how); 375 err = objio_write_pagelist(wdata, how);
373 if (unlikely(err)) { 376 if (unlikely(err)) {
374 wdata->pnfs_error = err; 377 hdr->pnfs_error = err;
375 dprintk("%s: Returned Error %d\n", __func__, err); 378 dprintk("%s: Returned Error %d\n", __func__, err);
376 return PNFS_NOT_ATTEMPTED; 379 return PNFS_NOT_ATTEMPTED;
377 } 380 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fceaa9f62..aed913c833f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,47 @@
26 26
27static struct kmem_cache *nfs_page_cachep; 27static struct kmem_cache *nfs_page_cachep;
28 28
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{
31 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array))
33 p->pagevec = p->page_array;
34 else {
35 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
36 if (!p->pagevec)
37 p->npages = 0;
38 }
39 return p->pagevec != NULL;
40}
41
42void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
43 struct nfs_pgio_header *hdr,
44 void (*release)(struct nfs_pgio_header *hdr))
45{
46 hdr->req = nfs_list_entry(desc->pg_list.next);
47 hdr->inode = desc->pg_inode;
48 hdr->cred = hdr->req->wb_context->cred;
49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq;
52 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr)
55 hdr->completion_ops->init_hdr(hdr);
56}
57
58void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
59{
60 spin_lock(&hdr->lock);
61 if (pos < hdr->io_start + hdr->good_bytes) {
62 set_bit(NFS_IOHDR_ERROR, &hdr->flags);
63 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
64 hdr->good_bytes = pos - hdr->io_start;
65 hdr->error = error;
66 }
67 spin_unlock(&hdr->lock);
68}
69
29static inline struct nfs_page * 70static inline struct nfs_page *
30nfs_page_alloc(void) 71nfs_page_alloc(void)
31{ 72{
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
76 * long write-back delay. This will be adjusted in 117 * long write-back delay. This will be adjusted in
77 * update_nfs_request below if the region is not locked. */ 118 * update_nfs_request below if the region is not locked. */
78 req->wb_page = page; 119 req->wb_page = page;
79 atomic_set(&req->wb_complete, 0);
80 req->wb_index = page->index; 120 req->wb_index = page->index;
81 page_cache_get(page); 121 page_cache_get(page);
82 BUG_ON(PagePrivate(page));
83 BUG_ON(!PageLocked(page));
84 BUG_ON(page->mapping->host != inode);
85 req->wb_offset = offset; 122 req->wb_offset = offset;
86 req->wb_pgbase = offset; 123 req->wb_pgbase = offset;
87 req->wb_bytes = count; 124 req->wb_bytes = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
104 clear_bit(PG_BUSY, &req->wb_flags); 141 clear_bit(PG_BUSY, &req->wb_flags);
105 smp_mb__after_clear_bit(); 142 smp_mb__after_clear_bit();
106 wake_up_bit(&req->wb_flags, PG_BUSY); 143 wake_up_bit(&req->wb_flags, PG_BUSY);
144}
145
146/**
147 * nfs_unlock_and_release_request - Unlock request and release the nfs_page
148 * @req:
149 */
150void nfs_unlock_and_release_request(struct nfs_page *req)
151{
152 nfs_unlock_request(req);
107 nfs_release_request(req); 153 nfs_release_request(req);
108} 154}
109 155
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
203void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 249void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
204 struct inode *inode, 250 struct inode *inode,
205 const struct nfs_pageio_ops *pg_ops, 251 const struct nfs_pageio_ops *pg_ops,
252 const struct nfs_pgio_completion_ops *compl_ops,
206 size_t bsize, 253 size_t bsize,
207 int io_flags) 254 int io_flags)
208{ 255{
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 desc->pg_recoalesce = 0; 262 desc->pg_recoalesce = 0;
216 desc->pg_inode = inode; 263 desc->pg_inode = inode;
217 desc->pg_ops = pg_ops; 264 desc->pg_ops = pg_ops;
265 desc->pg_completion_ops = compl_ops;
218 desc->pg_ioflags = io_flags; 266 desc->pg_ioflags = io_flags;
219 desc->pg_error = 0; 267 desc->pg_error = 0;
220 desc->pg_lseg = NULL; 268 desc->pg_lseg = NULL;
269 desc->pg_dreq = NULL;
221} 270}
222 271
223/** 272/**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
241 return false; 290 return false;
242 if (req->wb_context->state != prev->wb_context->state) 291 if (req->wb_context->state != prev->wb_context->state)
243 return false; 292 return false;
244 if (req->wb_index != (prev->wb_index + 1))
245 return false;
246 if (req->wb_pgbase != 0) 293 if (req->wb_pgbase != 0)
247 return false; 294 return false;
248 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 295 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
249 return false; 296 return false;
297 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
298 return false;
250 return pgio->pg_ops->pg_test(pgio, prev, req); 299 return pgio->pg_ops->pg_test(pgio, prev, req);
251} 300}
252 301
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..b8323aa7b543 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
395 dprintk("%s:Begin lo %p\n", __func__, lo); 395 dprintk("%s:Begin lo %p\n", __func__, lo);
396 396
397 if (list_empty(&lo->plh_segs)) { 397 if (list_empty(&lo->plh_segs)) {
398 /* Reset MDS Threshold I/O counters */
399 NFS_I(lo->plh_inode)->write_io = 0;
400 NFS_I(lo->plh_inode)->read_io = 0;
398 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) 401 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
399 put_layout_hdr_locked(lo); 402 put_layout_hdr_locked(lo);
400 return 0; 403 return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
455 spin_unlock(&nfsi->vfs_inode.i_lock); 458 spin_unlock(&nfsi->vfs_inode.i_lock);
456 pnfs_free_lseg_list(&tmp_list); 459 pnfs_free_lseg_list(&tmp_list);
457} 460}
461EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
458 462
459/* 463/*
460 * Called by the state manger to remove all layouts established under an 464 * Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
692 dprintk("<-- %s status: %d\n", __func__, status); 696 dprintk("<-- %s status: %d\n", __func__, status);
693 return status; 697 return status;
694} 698}
699EXPORT_SYMBOL_GPL(_pnfs_return_layout);
695 700
696bool pnfs_roc(struct inode *ino) 701bool pnfs_roc(struct inode *ino)
697{ 702{
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
931} 936}
932 937
933/* 938/*
939 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
940 * to the MDS or over pNFS
941 *
942 * The nfs_inode read_io and write_io fields are cumulative counters reset
943 * when there are no layout segments. Note that in pnfs_update_layout iomode
944 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
945 * WRITE request.
946 *
947 * A return of true means use MDS I/O.
948 *
949 * From rfc 5661:
950 * If a file's size is smaller than the file size threshold, data accesses
951 * SHOULD be sent to the metadata server. If an I/O request has a length that
952 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
953 * server. If both file size and I/O size are provided, the client SHOULD
954 * reach or exceed both thresholds before sending its read or write
955 * requests to the data server.
956 */
957static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
958 struct inode *ino, int iomode)
959{
960 struct nfs4_threshold *t = ctx->mdsthreshold;
961 struct nfs_inode *nfsi = NFS_I(ino);
962 loff_t fsize = i_size_read(ino);
963 bool size = false, size_set = false, io = false, io_set = false, ret = false;
964
965 if (t == NULL)
966 return ret;
967
968 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
969 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
970
971 switch (iomode) {
972 case IOMODE_READ:
973 if (t->bm & THRESHOLD_RD) {
974 dprintk("%s fsize %llu\n", __func__, fsize);
975 size_set = true;
976 if (fsize < t->rd_sz)
977 size = true;
978 }
979 if (t->bm & THRESHOLD_RD_IO) {
980 dprintk("%s nfsi->read_io %llu\n", __func__,
981 nfsi->read_io);
982 io_set = true;
983 if (nfsi->read_io < t->rd_io_sz)
984 io = true;
985 }
986 break;
987 case IOMODE_RW:
988 if (t->bm & THRESHOLD_WR) {
989 dprintk("%s fsize %llu\n", __func__, fsize);
990 size_set = true;
991 if (fsize < t->wr_sz)
992 size = true;
993 }
994 if (t->bm & THRESHOLD_WR_IO) {
995 dprintk("%s nfsi->write_io %llu\n", __func__,
996 nfsi->write_io);
997 io_set = true;
998 if (nfsi->write_io < t->wr_io_sz)
999 io = true;
1000 }
1001 break;
1002 }
1003 if (size_set && io_set) {
1004 if (size && io)
1005 ret = true;
1006 } else if (size || io)
1007 ret = true;
1008
1009 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1010 return ret;
1011}
1012
1013/*
934 * Layout segment is retreived from the server if not cached. 1014 * Layout segment is retreived from the server if not cached.
935 * The appropriate layout segment is referenced and returned to the caller. 1015 * The appropriate layout segment is referenced and returned to the caller.
936 */ 1016 */
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
957 1037
958 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1038 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
959 return NULL; 1039 return NULL;
1040
1041 if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1042 return NULL;
1043
960 spin_lock(&ino->i_lock); 1044 spin_lock(&ino->i_lock);
961 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1045 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
962 if (lo == NULL) { 1046 if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1082{ 1166{
1083 BUG_ON(pgio->pg_lseg != NULL); 1167 BUG_ON(pgio->pg_lseg != NULL);
1084 1168
1169 if (req->wb_offset != req->wb_pgbase) {
1170 nfs_pageio_reset_read_mds(pgio);
1171 return;
1172 }
1085 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1173 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1086 req->wb_context, 1174 req->wb_context,
1087 req_offset(req), 1175 req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1100{ 1188{
1101 BUG_ON(pgio->pg_lseg != NULL); 1189 BUG_ON(pgio->pg_lseg != NULL);
1102 1190
1191 if (req->wb_offset != req->wb_pgbase) {
1192 nfs_pageio_reset_write_mds(pgio);
1193 return;
1194 }
1103 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1195 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1104 req->wb_context, 1196 req->wb_context,
1105 req_offset(req), 1197 req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1113EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1205EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1114 1206
1115bool 1207bool
1116pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1208pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1209 const struct nfs_pgio_completion_ops *compl_ops)
1117{ 1210{
1118 struct nfs_server *server = NFS_SERVER(inode); 1211 struct nfs_server *server = NFS_SERVER(inode);
1119 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1212 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1120 1213
1121 if (ld == NULL) 1214 if (ld == NULL)
1122 return false; 1215 return false;
1123 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1216 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1217 server->rsize, 0);
1124 return true; 1218 return true;
1125} 1219}
1126 1220
1127bool 1221bool
1128pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1222pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1223 int ioflags,
1224 const struct nfs_pgio_completion_ops *compl_ops)
1129{ 1225{
1130 struct nfs_server *server = NFS_SERVER(inode); 1226 struct nfs_server *server = NFS_SERVER(inode);
1131 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1227 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1132 1228
1133 if (ld == NULL) 1229 if (ld == NULL)
1134 return false; 1230 return false;
1135 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1231 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1232 server->wsize, ioflags);
1136 return true; 1233 return true;
1137} 1234}
1138 1235
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1162} 1259}
1163EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1260EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1164 1261
1165static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) 1262int pnfs_write_done_resend_to_mds(struct inode *inode,
1263 struct list_head *head,
1264 const struct nfs_pgio_completion_ops *compl_ops)
1166{ 1265{
1167 struct nfs_pageio_descriptor pgio; 1266 struct nfs_pageio_descriptor pgio;
1168 LIST_HEAD(failed); 1267 LIST_HEAD(failed);
1169 1268
1170 /* Resend all requests through the MDS */ 1269 /* Resend all requests through the MDS */
1171 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); 1270 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
1172 while (!list_empty(head)) { 1271 while (!list_empty(head)) {
1173 struct nfs_page *req = nfs_list_entry(head->next); 1272 struct nfs_page *req = nfs_list_entry(head->next);
1174 1273
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
1188 } 1287 }
1189 return 0; 1288 return 0;
1190} 1289}
1290EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1291
1292static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1293{
1294 struct nfs_pgio_header *hdr = data->header;
1295
1296 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1297 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1298 PNFS_LAYOUTRET_ON_ERROR) {
1299 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1300 pnfs_return_layout(hdr->inode);
1301 }
1302 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1303 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1304 &hdr->pages,
1305 hdr->completion_ops);
1306}
1191 1307
1192/* 1308/*
1193 * Called by non rpc-based layout drivers 1309 * Called by non rpc-based layout drivers
1194 */ 1310 */
1195void pnfs_ld_write_done(struct nfs_write_data *data) 1311void pnfs_ld_write_done(struct nfs_write_data *data)
1196{ 1312{
1197 if (likely(!data->pnfs_error)) { 1313 struct nfs_pgio_header *hdr = data->header;
1314
1315 if (!hdr->pnfs_error) {
1198 pnfs_set_layoutcommit(data); 1316 pnfs_set_layoutcommit(data);
1199 data->mds_ops->rpc_call_done(&data->task, data); 1317 hdr->mds_ops->rpc_call_done(&data->task, data);
1200 } else { 1318 } else
1201 dprintk("pnfs write error = %d\n", data->pnfs_error); 1319 pnfs_ld_handle_write_error(data);
1202 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1320 hdr->mds_ops->rpc_release(data);
1203 PNFS_LAYOUTRET_ON_ERROR) {
1204 /* Don't lo_commit on error, Server will needs to
1205 * preform a file recovery.
1206 */
1207 clear_bit(NFS_INO_LAYOUTCOMMIT,
1208 &NFS_I(data->inode)->flags);
1209 pnfs_return_layout(data->inode);
1210 }
1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1212 }
1213 put_lseg(data->lseg);
1214 data->mds_ops->rpc_release(data);
1215} 1321}
1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1322EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1217 1323
@@ -1219,12 +1325,13 @@ static void
1219pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1325pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1220 struct nfs_write_data *data) 1326 struct nfs_write_data *data)
1221{ 1327{
1222 list_splice_tail_init(&data->pages, &desc->pg_list); 1328 struct nfs_pgio_header *hdr = data->header;
1223 if (data->req && list_empty(&data->req->wb_list)) 1329
1224 nfs_list_add_request(data->req, &desc->pg_list); 1330 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1225 nfs_pageio_reset_write_mds(desc); 1331 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1226 desc->pg_recoalesce = 1; 1332 nfs_pageio_reset_write_mds(desc);
1227 put_lseg(data->lseg); 1333 desc->pg_recoalesce = 1;
1334 }
1228 nfs_writedata_release(data); 1335 nfs_writedata_release(data);
1229} 1336}
1230 1337
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1234 struct pnfs_layout_segment *lseg, 1341 struct pnfs_layout_segment *lseg,
1235 int how) 1342 int how)
1236{ 1343{
1237 struct inode *inode = wdata->inode; 1344 struct nfs_pgio_header *hdr = wdata->header;
1345 struct inode *inode = hdr->inode;
1238 enum pnfs_try_status trypnfs; 1346 enum pnfs_try_status trypnfs;
1239 struct nfs_server *nfss = NFS_SERVER(inode); 1347 struct nfs_server *nfss = NFS_SERVER(inode);
1240 1348
1241 wdata->mds_ops = call_ops; 1349 hdr->mds_ops = call_ops;
1242 wdata->lseg = get_lseg(lseg);
1243 1350
1244 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1351 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1245 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1352 inode->i_ino, wdata->args.count, wdata->args.offset, how);
1246
1247 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1353 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1248 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1354 if (trypnfs != PNFS_NOT_ATTEMPTED)
1249 put_lseg(wdata->lseg);
1250 wdata->lseg = NULL;
1251 } else
1252 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1355 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1253
1254 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1356 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1255 return trypnfs; 1357 return trypnfs;
1256} 1358}
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1266 while (!list_empty(head)) { 1368 while (!list_empty(head)) {
1267 enum pnfs_try_status trypnfs; 1369 enum pnfs_try_status trypnfs;
1268 1370
1269 data = list_entry(head->next, struct nfs_write_data, list); 1371 data = list_first_entry(head, struct nfs_write_data, list);
1270 list_del_init(&data->list); 1372 list_del_init(&data->list);
1271 1373
1272 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1374 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1276 put_lseg(lseg); 1378 put_lseg(lseg);
1277} 1379}
1278 1380
1381static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1382{
1383 put_lseg(hdr->lseg);
1384 nfs_writehdr_free(hdr);
1385}
1386
1279int 1387int
1280pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1388pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1281{ 1389{
1282 LIST_HEAD(head); 1390 struct nfs_write_header *whdr;
1391 struct nfs_pgio_header *hdr;
1283 int ret; 1392 int ret;
1284 1393
1285 ret = nfs_generic_flush(desc, &head); 1394 whdr = nfs_writehdr_alloc();
1286 if (ret != 0) { 1395 if (!whdr) {
1396 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1287 put_lseg(desc->pg_lseg); 1397 put_lseg(desc->pg_lseg);
1288 desc->pg_lseg = NULL; 1398 desc->pg_lseg = NULL;
1289 return ret; 1399 return -ENOMEM;
1290 } 1400 }
1291 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1401 hdr = &whdr->header;
1292 return 0; 1402 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1403 hdr->lseg = get_lseg(desc->pg_lseg);
1404 atomic_inc(&hdr->refcnt);
1405 ret = nfs_generic_flush(desc, hdr);
1406 if (ret != 0) {
1407 put_lseg(desc->pg_lseg);
1408 desc->pg_lseg = NULL;
1409 } else
1410 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1411 if (atomic_dec_and_test(&hdr->refcnt))
1412 hdr->completion_ops->completion(hdr);
1413 return ret;
1293} 1414}
1294EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1415EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1295 1416
1296static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1417int pnfs_read_done_resend_to_mds(struct inode *inode,
1418 struct list_head *head,
1419 const struct nfs_pgio_completion_ops *compl_ops)
1297{ 1420{
1298 struct nfs_pageio_descriptor pgio; 1421 struct nfs_pageio_descriptor pgio;
1422 LIST_HEAD(failed);
1299 1423
1300 put_lseg(data->lseg); 1424 /* Resend all requests through the MDS */
1301 data->lseg = NULL; 1425 nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
1302 dprintk("pnfs write error = %d\n", data->pnfs_error); 1426 while (!list_empty(head)) {
1303 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1427 struct nfs_page *req = nfs_list_entry(head->next);
1304 PNFS_LAYOUTRET_ON_ERROR)
1305 pnfs_return_layout(data->inode);
1306
1307 nfs_pageio_init_read_mds(&pgio, data->inode);
1308
1309 while (!list_empty(&data->pages)) {
1310 struct nfs_page *req = nfs_list_entry(data->pages.next);
1311 1428
1312 nfs_list_remove_request(req); 1429 nfs_list_remove_request(req);
1313 nfs_pageio_add_request(&pgio, req); 1430 if (!nfs_pageio_add_request(&pgio, req))
1431 nfs_list_add_request(req, &failed);
1314 } 1432 }
1315 nfs_pageio_complete(&pgio); 1433 nfs_pageio_complete(&pgio);
1434
1435 if (!list_empty(&failed)) {
1436 list_move(&failed, head);
1437 return -EIO;
1438 }
1439 return 0;
1440}
1441EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1442
1443static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1444{
1445 struct nfs_pgio_header *hdr = data->header;
1446
1447 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1448 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1449 PNFS_LAYOUTRET_ON_ERROR) {
1450 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1451 pnfs_return_layout(hdr->inode);
1452 }
1453 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1454 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1455 &hdr->pages,
1456 hdr->completion_ops);
1316} 1457}
1317 1458
1318/* 1459/*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1320 */ 1461 */
1321void pnfs_ld_read_done(struct nfs_read_data *data) 1462void pnfs_ld_read_done(struct nfs_read_data *data)
1322{ 1463{
1323 if (likely(!data->pnfs_error)) { 1464 struct nfs_pgio_header *hdr = data->header;
1465
1466 if (likely(!hdr->pnfs_error)) {
1324 __nfs4_read_done_cb(data); 1467 __nfs4_read_done_cb(data);
1325 data->mds_ops->rpc_call_done(&data->task, data); 1468 hdr->mds_ops->rpc_call_done(&data->task, data);
1326 } else 1469 } else
1327 pnfs_ld_handle_read_error(data); 1470 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg); 1471 hdr->mds_ops->rpc_release(data);
1329 data->mds_ops->rpc_release(data);
1330} 1472}
1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1473EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1332 1474
@@ -1334,11 +1476,13 @@ static void
1334pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1476pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1335 struct nfs_read_data *data) 1477 struct nfs_read_data *data)
1336{ 1478{
1337 list_splice_tail_init(&data->pages, &desc->pg_list); 1479 struct nfs_pgio_header *hdr = data->header;
1338 if (data->req && list_empty(&data->req->wb_list)) 1480
1339 nfs_list_add_request(data->req, &desc->pg_list); 1481 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1340 nfs_pageio_reset_read_mds(desc); 1482 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1341 desc->pg_recoalesce = 1; 1483 nfs_pageio_reset_read_mds(desc);
1484 desc->pg_recoalesce = 1;
1485 }
1342 nfs_readdata_release(data); 1486 nfs_readdata_release(data);
1343} 1487}
1344 1488
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1350 const struct rpc_call_ops *call_ops, 1494 const struct rpc_call_ops *call_ops,
1351 struct pnfs_layout_segment *lseg) 1495 struct pnfs_layout_segment *lseg)
1352{ 1496{
1353 struct inode *inode = rdata->inode; 1497 struct nfs_pgio_header *hdr = rdata->header;
1498 struct inode *inode = hdr->inode;
1354 struct nfs_server *nfss = NFS_SERVER(inode); 1499 struct nfs_server *nfss = NFS_SERVER(inode);
1355 enum pnfs_try_status trypnfs; 1500 enum pnfs_try_status trypnfs;
1356 1501
1357 rdata->mds_ops = call_ops; 1502 hdr->mds_ops = call_ops;
1358 rdata->lseg = get_lseg(lseg);
1359 1503
1360 dprintk("%s: Reading ino:%lu %u@%llu\n", 1504 dprintk("%s: Reading ino:%lu %u@%llu\n",
1361 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1505 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1362 1506
1363 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1507 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1364 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1508 if (trypnfs != PNFS_NOT_ATTEMPTED)
1365 put_lseg(rdata->lseg);
1366 rdata->lseg = NULL;
1367 } else {
1368 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1509 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1369 }
1370 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1510 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1371 return trypnfs; 1511 return trypnfs;
1372} 1512}
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1382 while (!list_empty(head)) { 1522 while (!list_empty(head)) {
1383 enum pnfs_try_status trypnfs; 1523 enum pnfs_try_status trypnfs;
1384 1524
1385 data = list_entry(head->next, struct nfs_read_data, list); 1525 data = list_first_entry(head, struct nfs_read_data, list);
1386 list_del_init(&data->list); 1526 list_del_init(&data->list);
1387 1527
1388 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1528 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1392 put_lseg(lseg); 1532 put_lseg(lseg);
1393} 1533}
1394 1534
1535static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1536{
1537 put_lseg(hdr->lseg);
1538 nfs_readhdr_free(hdr);
1539}
1540
1395int 1541int
1396pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1542pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1397{ 1543{
1398 LIST_HEAD(head); 1544 struct nfs_read_header *rhdr;
1545 struct nfs_pgio_header *hdr;
1399 int ret; 1546 int ret;
1400 1547
1401 ret = nfs_generic_pagein(desc, &head); 1548 rhdr = nfs_readhdr_alloc();
1402 if (ret != 0) { 1549 if (!rhdr) {
1550 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1551 ret = -ENOMEM;
1403 put_lseg(desc->pg_lseg); 1552 put_lseg(desc->pg_lseg);
1404 desc->pg_lseg = NULL; 1553 desc->pg_lseg = NULL;
1405 return ret; 1554 return ret;
1406 } 1555 }
1407 pnfs_do_multiple_reads(desc, &head); 1556 hdr = &rhdr->header;
1408 return 0; 1557 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1558 hdr->lseg = get_lseg(desc->pg_lseg);
1559 atomic_inc(&hdr->refcnt);
1560 ret = nfs_generic_pagein(desc, hdr);
1561 if (ret != 0) {
1562 put_lseg(desc->pg_lseg);
1563 desc->pg_lseg = NULL;
1564 } else
1565 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1566 if (atomic_dec_and_test(&hdr->refcnt))
1567 hdr->completion_ops->completion(hdr);
1568 return ret;
1409} 1569}
1410EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1570EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1411 1571
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1438void 1598void
1439pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1599pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1440{ 1600{
1441 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1601 struct nfs_pgio_header *hdr = wdata->header;
1602 struct inode *inode = hdr->inode;
1603 struct nfs_inode *nfsi = NFS_I(inode);
1442 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1604 loff_t end_pos = wdata->mds_offset + wdata->res.count;
1443 bool mark_as_dirty = false; 1605 bool mark_as_dirty = false;
1444 1606
1445 spin_lock(&nfsi->vfs_inode.i_lock); 1607 spin_lock(&inode->i_lock);
1446 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1608 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1447 mark_as_dirty = true; 1609 mark_as_dirty = true;
1448 dprintk("%s: Set layoutcommit for inode %lu ", 1610 dprintk("%s: Set layoutcommit for inode %lu ",
1449 __func__, wdata->inode->i_ino); 1611 __func__, inode->i_ino);
1450 } 1612 }
1451 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { 1613 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1452 /* references matched in nfs4_layoutcommit_release */ 1614 /* references matched in nfs4_layoutcommit_release */
1453 get_lseg(wdata->lseg); 1615 get_lseg(hdr->lseg);
1454 } 1616 }
1455 if (end_pos > nfsi->layout->plh_lwb) 1617 if (end_pos > nfsi->layout->plh_lwb)
1456 nfsi->layout->plh_lwb = end_pos; 1618 nfsi->layout->plh_lwb = end_pos;
1457 spin_unlock(&nfsi->vfs_inode.i_lock); 1619 spin_unlock(&inode->i_lock);
1458 dprintk("%s: lseg %p end_pos %llu\n", 1620 dprintk("%s: lseg %p end_pos %llu\n",
1459 __func__, wdata->lseg, nfsi->layout->plh_lwb); 1621 __func__, hdr->lseg, nfsi->layout->plh_lwb);
1460 1622
1461 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1623 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1462 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1624 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1463 if (mark_as_dirty) 1625 if (mark_as_dirty)
1464 mark_inode_dirty_sync(wdata->inode); 1626 mark_inode_dirty_sync(inode);
1465} 1627}
1466EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1628EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1467 1629
@@ -1550,3 +1712,15 @@ out_free:
1550 kfree(data); 1712 kfree(data);
1551 goto out; 1713 goto out;
1552} 1714}
1715
1716struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1717{
1718 struct nfs4_threshold *thp;
1719
1720 thp = kzalloc(sizeof(*thp), GFP_NOFS);
1721 if (!thp) {
1722 dprintk("%s mdsthreshold allocation failed\n", __func__);
1723 return NULL;
1724 }
1725 return thp;
1726}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..29fd23c0efdc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -63,6 +63,7 @@ enum {
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
66 NFS_LAYOUT_INVALID, /* layout is being destroyed */
66}; 67};
67 68
68enum layoutdriver_policy_flags { 69enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 95 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 96 const struct nfs_pageio_ops *pg_write_ops;
96 97
98 struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
97 void (*mark_request_commit) (struct nfs_page *req, 99 void (*mark_request_commit) (struct nfs_page *req,
98 struct pnfs_layout_segment *lseg); 100 struct pnfs_layout_segment *lseg,
99 void (*clear_request_commit) (struct nfs_page *req); 101 struct nfs_commit_info *cinfo);
100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); 102 void (*clear_request_commit) (struct nfs_page *req,
101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 103 struct nfs_commit_info *cinfo);
104 int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
105 int max);
106 void (*recover_commit_reqs) (struct list_head *list,
107 struct nfs_commit_info *cinfo);
108 int (*commit_pagelist)(struct inode *inode,
109 struct list_head *mds_pages,
110 int how,
111 struct nfs_commit_info *cinfo);
102 112
103 /* 113 /*
104 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 114 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
168void get_layout_hdr(struct pnfs_layout_hdr *lo); 178void get_layout_hdr(struct pnfs_layout_hdr *lo);
169void put_lseg(struct pnfs_layout_segment *lseg); 179void put_lseg(struct pnfs_layout_segment *lseg);
170 180
171bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 181bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
172bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); 182 const struct nfs_pgio_completion_ops *);
183bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
184 int, const struct nfs_pgio_completion_ops *);
173 185
174void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 186void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
175void unset_pnfs_layoutdriver(struct nfs_server *); 187void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
211 gfp_t gfp_flags); 223 gfp_t gfp_flags);
212 224
213void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 225void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
226int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
227 const struct nfs_pgio_completion_ops *compl_ops);
228int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
229 const struct nfs_pgio_completion_ops *compl_ops);
230struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
214 231
215/* nfs4_deviceid_flags */ 232/* nfs4_deviceid_flags */
216enum { 233enum {
@@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
261} 278}
262 279
263static inline int 280static inline int
264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 281pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
282 struct nfs_commit_info *cinfo)
265{ 283{
266 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) 284 if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
267 return PNFS_NOT_ATTEMPTED; 285 return PNFS_NOT_ATTEMPTED;
268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 286 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
287}
288
289static inline struct pnfs_ds_commit_info *
290pnfs_get_ds_info(struct inode *inode)
291{
292 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
293
294 if (ld == NULL || ld->get_ds_info == NULL)
295 return NULL;
296 return ld->get_ds_info(inode);
269} 297}
270 298
271static inline bool 299static inline bool
272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 300pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
301 struct nfs_commit_info *cinfo)
273{ 302{
274 struct inode *inode = req->wb_context->dentry->d_inode; 303 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 304 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
276 305
277 if (lseg == NULL || ld->mark_request_commit == NULL) 306 if (lseg == NULL || ld->mark_request_commit == NULL)
278 return false; 307 return false;
279 ld->mark_request_commit(req, lseg); 308 ld->mark_request_commit(req, lseg, cinfo);
280 return true; 309 return true;
281} 310}
282 311
283static inline bool 312static inline bool
284pnfs_clear_request_commit(struct nfs_page *req) 313pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
285{ 314{
286 struct inode *inode = req->wb_context->dentry->d_inode; 315 struct inode *inode = req->wb_context->dentry->d_inode;
287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 316 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
288 317
289 if (ld == NULL || ld->clear_request_commit == NULL) 318 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false; 319 return false;
291 ld->clear_request_commit(req); 320 ld->clear_request_commit(req, cinfo);
292 return true; 321 return true;
293} 322}
294 323
295static inline int 324static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 325pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
326 int max)
297{ 327{
298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 328 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0; 329 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock); 330 else
304 if (ret != 0) 331 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 332}
306 return ret; 333
334static inline void
335pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
336 struct nfs_commit_info *cinfo)
337{
338 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
339 return;
340 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
307} 341}
308 342
309/* Should the pNFS client commit and return the layout upon a setattr */ 343/* Should the pNFS client commit and return the layout upon a setattr */
@@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
327 return 0; 361 return 0;
328} 362}
329 363
364static inline bool
365pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
366 struct nfs_server *nfss)
367{
368 return (dst && src && src->bm != 0 &&
369 nfss->pnfs_curr_ld->id == src->l_type);
370}
371
330#ifdef NFS_DEBUG 372#ifdef NFS_DEBUG
331void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 373void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
332#else 374#else
@@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
396{ 438{
397} 439}
398 440
399static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 441static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
442 const struct nfs_pgio_completion_ops *compl_ops)
400{ 443{
401 return false; 444 return false;
402} 445}
403 446
404static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 447static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
448 const struct nfs_pgio_completion_ops *compl_ops)
405{ 449{
406 return false; 450 return false;
407} 451}
408 452
409static inline int 453static inline int
410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 454pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
455 struct nfs_commit_info *cinfo)
411{ 456{
412 return PNFS_NOT_ATTEMPTED; 457 return PNFS_NOT_ATTEMPTED;
413} 458}
414 459
460static inline struct pnfs_ds_commit_info *
461pnfs_get_ds_info(struct inode *inode)
462{
463 return NULL;
464}
465
415static inline bool 466static inline bool
416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 467pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
468 struct nfs_commit_info *cinfo)
417{ 469{
418 return false; 470 return false;
419} 471}
420 472
421static inline bool 473static inline bool
422pnfs_clear_request_commit(struct nfs_page *req) 474pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
423{ 475{
424 return false; 476 return false;
425} 477}
426 478
427static inline int 479static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 480pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
481 int max)
429{ 482{
430 return 0; 483 return 0;
431} 484}
432 485
486static inline void
487pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
488 struct nfs_commit_info *cinfo)
489{
490}
491
433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 492static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
434{ 493{
435 return 0; 494 return 0;
436} 495}
437 496
497static inline bool
498pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
499 struct nfs_server *nfss)
500{
501 return false;
502}
503
504static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
505{
506 return NULL;
507}
508
438#endif /* CONFIG_NFS_V4_1 */ 509#endif /* CONFIG_NFS_V4_1 */
439 510
440#endif /* FS_NFS_PNFS_H */ 511#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d6408b6437de..a706b6bcc286 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
178} 178}
179 179
180static int 180static int
181nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 181nfs_proc_lookup(struct inode *dir, struct qstr *name,
182 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 182 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
183{ 183{
184 struct nfs_diropargs arg = { 184 struct nfs_diropargs arg = {
@@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
640 640
641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
642{ 642{
643 struct inode *inode = data->header->inode;
644
643 if (nfs_async_handle_expired_key(task)) 645 if (nfs_async_handle_expired_key(task))
644 return -EAGAIN; 646 return -EAGAIN;
645 647
646 nfs_invalidate_atime(data->inode); 648 nfs_invalidate_atime(inode);
647 if (task->tk_status >= 0) { 649 if (task->tk_status >= 0) {
648 nfs_refresh_inode(data->inode, data->res.fattr); 650 nfs_refresh_inode(inode, data->res.fattr);
649 /* Emulate the eof flag, which isn't normally needed in NFSv2 651 /* Emulate the eof flag, which isn't normally needed in NFSv2
650 * as it is guaranteed to always return the file attributes 652 * as it is guaranteed to always return the file attributes
651 */ 653 */
@@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
667 669
668static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 670static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
669{ 671{
672 struct inode *inode = data->header->inode;
673
670 if (nfs_async_handle_expired_key(task)) 674 if (nfs_async_handle_expired_key(task))
671 return -EAGAIN; 675 return -EAGAIN;
672 676
673 if (task->tk_status >= 0) 677 if (task->tk_status >= 0)
674 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 678 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
675 return 0; 679 return 0;
676} 680}
677 681
@@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
687 rpc_call_start(task); 691 rpc_call_start(task);
688} 692}
689 693
694static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
695{
696 BUG();
697}
698
690static void 699static void
691nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 700nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
692{ 701{
693 BUG(); 702 BUG();
694} 703}
@@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
732 .file_inode_ops = &nfs_file_inode_operations, 741 .file_inode_ops = &nfs_file_inode_operations,
733 .file_ops = &nfs_file_operations, 742 .file_ops = &nfs_file_operations,
734 .getroot = nfs_proc_get_root, 743 .getroot = nfs_proc_get_root,
744 .submount = nfs_submount,
735 .getattr = nfs_proc_getattr, 745 .getattr = nfs_proc_getattr,
736 .setattr = nfs_proc_setattr, 746 .setattr = nfs_proc_setattr,
737 .lookup = nfs_proc_lookup, 747 .lookup = nfs_proc_lookup,
@@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
763 .write_rpc_prepare = nfs_proc_write_rpc_prepare, 773 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
764 .write_done = nfs_write_done, 774 .write_done = nfs_write_done,
765 .commit_setup = nfs_proc_commit_setup, 775 .commit_setup = nfs_proc_commit_setup,
776 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
766 .lock = nfs_proc_lock, 777 .lock = nfs_proc_lock,
767 .lock_check_bounds = nfs_lock_check_bounds, 778 .lock_check_bounds = nfs_lock_check_bounds,
768 .close_context = nfs_close_context, 779 .close_context = nfs_close_context,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28c2ea3..86ced7836214 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,43 +30,73 @@
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 31
32static const struct nfs_pageio_ops nfs_pageio_read_ops; 32static const struct nfs_pageio_ops nfs_pageio_read_ops;
33static const struct rpc_call_ops nfs_read_partial_ops; 33static const struct rpc_call_ops nfs_read_common_ops;
34static const struct rpc_call_ops nfs_read_full_ops; 34static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
35 35
36static struct kmem_cache *nfs_rdata_cachep; 36static struct kmem_cache *nfs_rdata_cachep;
37 37
38struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 38struct nfs_read_header *nfs_readhdr_alloc(void)
39{ 39{
40 struct nfs_read_data *p; 40 struct nfs_read_header *rhdr;
41 41
42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 42 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
43 if (p) { 43 if (rhdr) {
44 INIT_LIST_HEAD(&p->pages); 44 struct nfs_pgio_header *hdr = &rhdr->header;
45 p->npages = pagecount; 45
46 if (pagecount <= ARRAY_SIZE(p->page_array)) 46 INIT_LIST_HEAD(&hdr->pages);
47 p->pagevec = p->page_array; 47 INIT_LIST_HEAD(&hdr->rpc_list);
48 else { 48 spin_lock_init(&hdr->lock);
49 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 49 atomic_set(&hdr->refcnt, 0);
50 if (!p->pagevec) { 50 }
51 kmem_cache_free(nfs_rdata_cachep, p); 51 return rhdr;
52 p = NULL; 52}
53 } 53
54 } 54static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
55 unsigned int pagecount)
56{
57 struct nfs_read_data *data, *prealloc;
58
59 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
60 if (prealloc->header == NULL)
61 data = prealloc;
62 else
63 data = kzalloc(sizeof(*data), GFP_KERNEL);
64 if (!data)
65 goto out;
66
67 if (nfs_pgarray_set(&data->pages, pagecount)) {
68 data->header = hdr;
69 atomic_inc(&hdr->refcnt);
70 } else {
71 if (data != prealloc)
72 kfree(data);
73 data = NULL;
55 } 74 }
56 return p; 75out:
76 return data;
57} 77}
58 78
59void nfs_readdata_free(struct nfs_read_data *p) 79void nfs_readhdr_free(struct nfs_pgio_header *hdr)
60{ 80{
61 if (p && (p->pagevec != &p->page_array[0])) 81 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
62 kfree(p->pagevec); 82
63 kmem_cache_free(nfs_rdata_cachep, p); 83 kmem_cache_free(nfs_rdata_cachep, rhdr);
64} 84}
65 85
66void nfs_readdata_release(struct nfs_read_data *rdata) 86void nfs_readdata_release(struct nfs_read_data *rdata)
67{ 87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
68 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
69 nfs_readdata_free(rdata); 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data)
95 kfree(rdata);
96 else
97 rdata->header = NULL;
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
70} 100}
71 101
72static 102static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
78 return 0; 108 return 0;
79} 109}
80 110
81static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
82{
83 unsigned int remainder = data->args.count - data->res.count;
84 unsigned int base = data->args.pgbase + data->res.count;
85 unsigned int pglen;
86 struct page **pages;
87
88 if (data->res.eof == 0 || remainder == 0)
89 return;
90 /*
91 * Note: "remainder" can never be negative, since we check for
92 * this in the XDR code.
93 */
94 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
95 base &= ~PAGE_CACHE_MASK;
96 pglen = PAGE_CACHE_SIZE - base;
97 for (;;) {
98 if (remainder <= pglen) {
99 zero_user(*pages, base, remainder);
100 break;
101 }
102 zero_user(*pages, base, pglen);
103 pages++;
104 remainder -= pglen;
105 pglen = PAGE_CACHE_SIZE;
106 base = 0;
107 }
108}
109
110void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 111void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
111 struct inode *inode) 112 struct inode *inode,
113 const struct nfs_pgio_completion_ops *compl_ops)
112{ 114{
113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
114 NFS_SERVER(inode)->rsize, 0); 116 NFS_SERVER(inode)->rsize, 0);
115} 117}
116 118
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
121} 123}
122EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 124EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
123 125
124static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 126void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
125 struct inode *inode) 127 struct inode *inode,
128 const struct nfs_pgio_completion_ops *compl_ops)
126{ 129{
127 if (!pnfs_pageio_init_read(pgio, inode)) 130 if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
128 nfs_pageio_init_read_mds(pgio, inode); 131 nfs_pageio_init_read_mds(pgio, inode, compl_ops);
129} 132}
130 133
131int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 134int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
146 if (len < PAGE_CACHE_SIZE) 149 if (len < PAGE_CACHE_SIZE)
147 zero_user_segment(page, len, PAGE_CACHE_SIZE); 150 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 151
149 nfs_pageio_init_read(&pgio, inode); 152 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
150 nfs_pageio_add_request(&pgio, new); 153 nfs_pageio_add_request(&pgio, new);
151 nfs_pageio_complete(&pgio); 154 nfs_pageio_complete(&pgio);
155 NFS_I(inode)->read_io += pgio.pg_bytes_written;
152 return 0; 156 return 0;
153} 157}
154 158
@@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)
169 nfs_release_request(req); 173 nfs_release_request(req);
170} 174}
171 175
172int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 176/* Note io was page aligned */
173 const struct rpc_call_ops *call_ops) 177static void nfs_read_completion(struct nfs_pgio_header *hdr)
178{
179 unsigned long bytes = 0;
180
181 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
182 goto out;
183 while (!list_empty(&hdr->pages)) {
184 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
185 struct page *page = req->wb_page;
186
187 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
188 if (bytes > hdr->good_bytes)
189 zero_user(page, 0, PAGE_SIZE);
190 else if (hdr->good_bytes - bytes < PAGE_SIZE)
191 zero_user_segment(page,
192 hdr->good_bytes & ~PAGE_MASK,
193 PAGE_SIZE);
194 }
195 bytes += req->wb_bytes;
196 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
197 if (bytes <= hdr->good_bytes)
198 SetPageUptodate(page);
199 } else
200 SetPageUptodate(page);
201 nfs_list_remove_request(req);
202 nfs_readpage_release(req);
203 }
204out:
205 hdr->release(hdr);
206}
207
208int nfs_initiate_read(struct rpc_clnt *clnt,
209 struct nfs_read_data *data,
210 const struct rpc_call_ops *call_ops, int flags)
174{ 211{
175 struct inode *inode = data->inode; 212 struct inode *inode = data->header->inode;
176 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 213 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
177 struct rpc_task *task; 214 struct rpc_task *task;
178 struct rpc_message msg = { 215 struct rpc_message msg = {
179 .rpc_argp = &data->args, 216 .rpc_argp = &data->args,
180 .rpc_resp = &data->res, 217 .rpc_resp = &data->res,
181 .rpc_cred = data->cred, 218 .rpc_cred = data->header->cred,
182 }; 219 };
183 struct rpc_task_setup task_setup_data = { 220 struct rpc_task_setup task_setup_data = {
184 .task = &data->task, 221 .task = &data->task,
@@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
187 .callback_ops = call_ops, 224 .callback_ops = call_ops,
188 .callback_data = data, 225 .callback_data = data,
189 .workqueue = nfsiod_workqueue, 226 .workqueue = nfsiod_workqueue,
190 .flags = RPC_TASK_ASYNC | swap_flags, 227 .flags = RPC_TASK_ASYNC | swap_flags | flags,
191 }; 228 };
192 229
193 /* Set up the initial task struct. */ 230 /* Set up the initial task struct. */
@@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
212/* 249/*
213 * Set up the NFS read request struct 250 * Set up the NFS read request struct
214 */ 251 */
215static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 252static void nfs_read_rpcsetup(struct nfs_read_data *data,
216 unsigned int count, unsigned int offset) 253 unsigned int count, unsigned int offset)
217{ 254{
218 struct inode *inode = req->wb_context->dentry->d_inode; 255 struct nfs_page *req = data->header->req;
219
220 data->req = req;
221 data->inode = inode;
222 data->cred = req->wb_context->cred;
223 256
224 data->args.fh = NFS_FH(inode); 257 data->args.fh = NFS_FH(data->header->inode);
225 data->args.offset = req_offset(req) + offset; 258 data->args.offset = req_offset(req) + offset;
226 data->args.pgbase = req->wb_pgbase + offset; 259 data->args.pgbase = req->wb_pgbase + offset;
227 data->args.pages = data->pagevec; 260 data->args.pages = data->pages.pagevec;
228 data->args.count = count; 261 data->args.count = count;
229 data->args.context = get_nfs_open_context(req->wb_context); 262 data->args.context = get_nfs_open_context(req->wb_context);
230 data->args.lock_context = req->wb_lock_context; 263 data->args.lock_context = req->wb_lock_context;
@@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
238static int nfs_do_read(struct nfs_read_data *data, 271static int nfs_do_read(struct nfs_read_data *data,
239 const struct rpc_call_ops *call_ops) 272 const struct rpc_call_ops *call_ops)
240{ 273{
241 struct inode *inode = data->args.context->dentry->d_inode; 274 struct inode *inode = data->header->inode;
242 275
243 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 276 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
244} 277}
245 278
246static int 279static int
@@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,
253 while (!list_empty(head)) { 286 while (!list_empty(head)) {
254 int ret2; 287 int ret2;
255 288
256 data = list_entry(head->next, struct nfs_read_data, list); 289 data = list_first_entry(head, struct nfs_read_data, list);
257 list_del_init(&data->list); 290 list_del_init(&data->list);
258 291
259 ret2 = nfs_do_read(data, call_ops); 292 ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)
275 } 308 }
276} 309}
277 310
311static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
312 .error_cleanup = nfs_async_read_error,
313 .completion = nfs_read_completion,
314};
315
316static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
317 struct nfs_pgio_header *hdr)
318{
319 set_bit(NFS_IOHDR_REDO, &hdr->flags);
320 while (!list_empty(&hdr->rpc_list)) {
321 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
322 struct nfs_read_data, list);
323 list_del(&data->list);
324 nfs_readdata_release(data);
325 }
326 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
327}
328
278/* 329/*
279 * Generate multiple requests to fill a single page. 330 * Generate multiple requests to fill a single page.
280 * 331 *
@@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)
288 * won't see the new data until our attribute cache is updated. This is more 339 * won't see the new data until our attribute cache is updated. This is more
289 * or less conventional NFS client behavior. 340 * or less conventional NFS client behavior.
290 */ 341 */
291static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 342static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
343 struct nfs_pgio_header *hdr)
292{ 344{
293 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 345 struct nfs_page *req = hdr->req;
294 struct page *page = req->wb_page; 346 struct page *page = req->wb_page;
295 struct nfs_read_data *data; 347 struct nfs_read_data *data;
296 size_t rsize = desc->pg_bsize, nbytes; 348 size_t rsize = desc->pg_bsize, nbytes;
297 unsigned int offset; 349 unsigned int offset;
298 int requests = 0;
299 int ret = 0;
300
301 nfs_list_remove_request(req);
302 350
303 offset = 0; 351 offset = 0;
304 nbytes = desc->pg_count; 352 nbytes = desc->pg_count;
305 do { 353 do {
306 size_t len = min(nbytes,rsize); 354 size_t len = min(nbytes,rsize);
307 355
308 data = nfs_readdata_alloc(1); 356 data = nfs_readdata_alloc(hdr, 1);
309 if (!data) 357 if (!data) {
310 goto out_bad; 358 nfs_pagein_error(desc, hdr);
311 data->pagevec[0] = page; 359 return -ENOMEM;
312 nfs_read_rpcsetup(req, data, len, offset); 360 }
313 list_add(&data->list, res); 361 data->pages.pagevec[0] = page;
314 requests++; 362 nfs_read_rpcsetup(data, len, offset);
363 list_add(&data->list, &hdr->rpc_list);
315 nbytes -= len; 364 nbytes -= len;
316 offset += len; 365 offset += len;
317 } while(nbytes != 0); 366 } while (nbytes != 0);
318 atomic_set(&req->wb_complete, requests); 367
319 desc->pg_rpc_callops = &nfs_read_partial_ops; 368 nfs_list_remove_request(req);
320 return ret; 369 nfs_list_add_request(req, &hdr->pages);
321out_bad: 370 desc->pg_rpc_callops = &nfs_read_common_ops;
322 while (!list_empty(res)) { 371 return 0;
323 data = list_entry(res->next, struct nfs_read_data, list);
324 list_del(&data->list);
325 nfs_readdata_release(data);
326 }
327 nfs_readpage_release(req);
328 return -ENOMEM;
329} 372}
330 373
331static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 374static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
375 struct nfs_pgio_header *hdr)
332{ 376{
333 struct nfs_page *req; 377 struct nfs_page *req;
334 struct page **pages; 378 struct page **pages;
335 struct nfs_read_data *data; 379 struct nfs_read_data *data;
336 struct list_head *head = &desc->pg_list; 380 struct list_head *head = &desc->pg_list;
337 int ret = 0;
338 381
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 382 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 383 desc->pg_count));
341 if (!data) { 384 if (!data) {
342 nfs_async_read_error(head); 385 nfs_pagein_error(desc, hdr);
343 ret = -ENOMEM; 386 return -ENOMEM;
344 goto out;
345 } 387 }
346 388
347 pages = data->pagevec; 389 pages = data->pages.pagevec;
348 while (!list_empty(head)) { 390 while (!list_empty(head)) {
349 req = nfs_list_entry(head->next); 391 req = nfs_list_entry(head->next);
350 nfs_list_remove_request(req); 392 nfs_list_remove_request(req);
351 nfs_list_add_request(req, &data->pages); 393 nfs_list_add_request(req, &hdr->pages);
352 *pages++ = req->wb_page; 394 *pages++ = req->wb_page;
353 } 395 }
354 req = nfs_list_entry(data->pages.next);
355 396
356 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 397 nfs_read_rpcsetup(data, desc->pg_count, 0);
357 list_add(&data->list, res); 398 list_add(&data->list, &hdr->rpc_list);
358 desc->pg_rpc_callops = &nfs_read_full_ops; 399 desc->pg_rpc_callops = &nfs_read_common_ops;
359out: 400 return 0;
360 return ret;
361} 401}
362 402
363int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 403int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
404 struct nfs_pgio_header *hdr)
364{ 405{
365 if (desc->pg_bsize < PAGE_CACHE_SIZE) 406 if (desc->pg_bsize < PAGE_CACHE_SIZE)
366 return nfs_pagein_multi(desc, head); 407 return nfs_pagein_multi(desc, hdr);
367 return nfs_pagein_one(desc, head); 408 return nfs_pagein_one(desc, hdr);
368} 409}
369 410
370static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 411static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
371{ 412{
372 LIST_HEAD(head); 413 struct nfs_read_header *rhdr;
414 struct nfs_pgio_header *hdr;
373 int ret; 415 int ret;
374 416
375 ret = nfs_generic_pagein(desc, &head); 417 rhdr = nfs_readhdr_alloc();
418 if (!rhdr) {
419 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
420 return -ENOMEM;
421 }
422 hdr = &rhdr->header;
423 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
424 atomic_inc(&hdr->refcnt);
425 ret = nfs_generic_pagein(desc, hdr);
376 if (ret == 0) 426 if (ret == 0)
377 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 427 ret = nfs_do_multiple_reads(&hdr->rpc_list,
428 desc->pg_rpc_callops);
429 if (atomic_dec_and_test(&hdr->refcnt))
430 hdr->completion_ops->completion(hdr);
378 return ret; 431 return ret;
379} 432}
380 433
@@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
389 */ 442 */
390int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 443int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
391{ 444{
445 struct inode *inode = data->header->inode;
392 int status; 446 int status;
393 447
394 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 448 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
395 task->tk_status); 449 task->tk_status);
396 450
397 status = NFS_PROTO(data->inode)->read_done(task, data); 451 status = NFS_PROTO(inode)->read_done(task, data);
398 if (status != 0) 452 if (status != 0)
399 return status; 453 return status;
400 454
401 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 455 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
402 456
403 if (task->tk_status == -ESTALE) { 457 if (task->tk_status == -ESTALE) {
404 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 458 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
405 nfs_mark_for_revalidate(data->inode); 459 nfs_mark_for_revalidate(inode);
406 } 460 }
407 return 0; 461 return 0;
408} 462}
@@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
412 struct nfs_readargs *argp = &data->args; 466 struct nfs_readargs *argp = &data->args;
413 struct nfs_readres *resp = &data->res; 467 struct nfs_readres *resp = &data->res;
414 468
415 if (resp->eof || resp->count == argp->count)
416 return;
417
418 /* This is a short read! */ 469 /* This is a short read! */
419 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 470 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
420 /* Has the server at least made some progress? */ 471 /* Has the server at least made some progress? */
421 if (resp->count == 0) 472 if (resp->count == 0) {
473 nfs_set_pgio_error(data->header, -EIO, argp->offset);
422 return; 474 return;
423 475 }
424 /* Yes, so retry the read at the end of the data */ 476 /* Yes, so retry the read at the end of the data */
425 data->mds_offset += resp->count; 477 data->mds_offset += resp->count;
426 argp->offset += resp->count; 478 argp->offset += resp->count;
@@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
429 rpc_restart_call_prepare(task); 481 rpc_restart_call_prepare(task);
430} 482}
431 483
432/* 484static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
433 * Handle a read reply that fills part of a page.
434 */
435static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
436{ 485{
437 struct nfs_read_data *data = calldata; 486 struct nfs_read_data *data = calldata;
438 487 struct nfs_pgio_header *hdr = data->header;
488
489 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
439 if (nfs_readpage_result(task, data) != 0) 490 if (nfs_readpage_result(task, data) != 0)
440 return; 491 return;
441 if (task->tk_status < 0) 492 if (task->tk_status < 0)
442 return; 493 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
443 494 else if (data->res.eof) {
444 nfs_readpage_truncate_uninitialised_page(data); 495 loff_t bound;
445 nfs_readpage_retry(task, data); 496
497 bound = data->args.offset + data->res.count;
498 spin_lock(&hdr->lock);
499 if (bound < hdr->io_start + hdr->good_bytes) {
500 set_bit(NFS_IOHDR_EOF, &hdr->flags);
501 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
502 hdr->good_bytes = bound - hdr->io_start;
503 }
504 spin_unlock(&hdr->lock);
505 } else if (data->res.count != data->args.count)
506 nfs_readpage_retry(task, data);
446} 507}
447 508
448static void nfs_readpage_release_partial(void *calldata) 509static void nfs_readpage_release_common(void *calldata)
449{ 510{
450 struct nfs_read_data *data = calldata;
451 struct nfs_page *req = data->req;
452 struct page *page = req->wb_page;
453 int status = data->task.tk_status;
454
455 if (status < 0)
456 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
457
458 if (atomic_dec_and_test(&req->wb_complete)) {
459 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
460 SetPageUptodate(page);
461 nfs_readpage_release(req);
462 }
463 nfs_readdata_release(calldata); 511 nfs_readdata_release(calldata);
464} 512}
465 513
466void nfs_read_prepare(struct rpc_task *task, void *calldata) 514void nfs_read_prepare(struct rpc_task *task, void *calldata)
467{ 515{
468 struct nfs_read_data *data = calldata; 516 struct nfs_read_data *data = calldata;
469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data); 517 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
470}
471
472static const struct rpc_call_ops nfs_read_partial_ops = {
473 .rpc_call_prepare = nfs_read_prepare,
474 .rpc_call_done = nfs_readpage_result_partial,
475 .rpc_release = nfs_readpage_release_partial,
476};
477
478static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
479{
480 unsigned int count = data->res.count;
481 unsigned int base = data->args.pgbase;
482 struct page **pages;
483
484 if (data->res.eof)
485 count = data->args.count;
486 if (unlikely(count == 0))
487 return;
488 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
489 base &= ~PAGE_CACHE_MASK;
490 count += base;
491 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
492 SetPageUptodate(*pages);
493 if (count == 0)
494 return;
495 /* Was this a short read? */
496 if (data->res.eof || data->res.count == data->args.count)
497 SetPageUptodate(*pages);
498}
499
500/*
501 * This is the callback from RPC telling us whether a reply was
502 * received or some error occurred (timeout or socket shutdown).
503 */
504static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
505{
506 struct nfs_read_data *data = calldata;
507
508 if (nfs_readpage_result(task, data) != 0)
509 return;
510 if (task->tk_status < 0)
511 return;
512 /*
513 * Note: nfs_readpage_retry may change the values of
514 * data->args. In the multi-page case, we therefore need
515 * to ensure that we call nfs_readpage_set_pages_uptodate()
516 * first.
517 */
518 nfs_readpage_truncate_uninitialised_page(data);
519 nfs_readpage_set_pages_uptodate(data);
520 nfs_readpage_retry(task, data);
521}
522
523static void nfs_readpage_release_full(void *calldata)
524{
525 struct nfs_read_data *data = calldata;
526
527 while (!list_empty(&data->pages)) {
528 struct nfs_page *req = nfs_list_entry(data->pages.next);
529
530 nfs_list_remove_request(req);
531 nfs_readpage_release(req);
532 }
533 nfs_readdata_release(calldata);
534} 518}
535 519
536static const struct rpc_call_ops nfs_read_full_ops = { 520static const struct rpc_call_ops nfs_read_common_ops = {
537 .rpc_call_prepare = nfs_read_prepare, 521 .rpc_call_prepare = nfs_read_prepare,
538 .rpc_call_done = nfs_readpage_result_full, 522 .rpc_call_done = nfs_readpage_result_common,
539 .rpc_release = nfs_readpage_release_full, 523 .rpc_release = nfs_readpage_release_common,
540}; 524};
541 525
542/* 526/*
@@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
668 if (ret == 0) 652 if (ret == 0)
669 goto read_complete; /* all pages were read */ 653 goto read_complete; /* all pages were read */
670 654
671 nfs_pageio_init_read(&pgio, inode); 655 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
672 656
673 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 657 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
674 658
675 nfs_pageio_complete(&pgio); 659 nfs_pageio_complete(&pgio);
660 NFS_I(inode)->read_io += pgio.pg_bytes_written;
676 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 661 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
677 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 662 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
678read_complete: 663read_complete:
@@ -684,7 +669,7 @@ out:
684int __init nfs_init_readpagecache(void) 669int __init nfs_init_readpagecache(void)
685{ 670{
686 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 671 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
687 sizeof(struct nfs_read_data), 672 sizeof(struct nfs_read_header),
688 0, SLAB_HWCACHE_ALIGN, 673 0, SLAB_HWCACHE_ALIGN,
689 NULL); 674 NULL);
690 if (nfs_rdata_cachep == NULL) 675 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca7e4bf..ff656c022684 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -66,6 +66,7 @@
66#include "pnfs.h" 66#include "pnfs.h"
67 67
68#define NFSDBG_FACILITY NFSDBG_VFS 68#define NFSDBG_FACILITY NFSDBG_VFS
69#define NFS_TEXT_DATA 1
69 70
70#ifdef CONFIG_NFS_V3 71#ifdef CONFIG_NFS_V3
71#define NFS_DEFAULT_VERSION 3 72#define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
277 { Opt_vers_err, NULL } 278 { Opt_vers_err, NULL }
278}; 279};
279 280
281struct nfs_mount_info {
282 void (*fill_super)(struct super_block *, struct nfs_mount_info *);
283 int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
284 struct nfs_parsed_mount_data *parsed;
285 struct nfs_clone_mount *cloned;
286 struct nfs_fh *mntfh;
287};
288
280static void nfs_umount_begin(struct super_block *); 289static void nfs_umount_begin(struct super_block *);
281static int nfs_statfs(struct dentry *, struct kstatfs *); 290static int nfs_statfs(struct dentry *, struct kstatfs *);
282static int nfs_show_options(struct seq_file *, struct dentry *); 291static int nfs_show_options(struct seq_file *, struct dentry *);
283static int nfs_show_devname(struct seq_file *, struct dentry *); 292static int nfs_show_devname(struct seq_file *, struct dentry *);
284static int nfs_show_path(struct seq_file *, struct dentry *); 293static int nfs_show_path(struct seq_file *, struct dentry *);
285static int nfs_show_stats(struct seq_file *, struct dentry *); 294static int nfs_show_stats(struct seq_file *, struct dentry *);
295static struct dentry *nfs_fs_mount_common(struct file_system_type *,
296 struct nfs_server *, int, const char *, struct nfs_mount_info *);
286static struct dentry *nfs_fs_mount(struct file_system_type *, 297static struct dentry *nfs_fs_mount(struct file_system_type *,
287 int, const char *, void *); 298 int, const char *, void *);
288static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 299static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
323}; 334};
324 335
325#ifdef CONFIG_NFS_V4 336#ifdef CONFIG_NFS_V4
326static int nfs4_validate_text_mount_data(void *options, 337static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
338static int nfs4_validate_mount_data(void *options,
327 struct nfs_parsed_mount_data *args, const char *dev_name); 339 struct nfs_parsed_mount_data *args, const char *dev_name);
328static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 340static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
329 struct nfs_parsed_mount_data *data); 341 struct nfs_mount_info *mount_info);
330static struct dentry *nfs4_mount(struct file_system_type *fs_type,
331 int flags, const char *dev_name, void *raw_data);
332static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 342static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
333 int flags, const char *dev_name, void *raw_data); 343 int flags, const char *dev_name, void *raw_data);
334static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 344static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
342static struct file_system_type nfs4_fs_type = { 352static struct file_system_type nfs4_fs_type = {
343 .owner = THIS_MODULE, 353 .owner = THIS_MODULE,
344 .name = "nfs4", 354 .name = "nfs4",
345 .mount = nfs4_mount, 355 .mount = nfs_fs_mount,
346 .kill_sb = nfs4_kill_super, 356 .kill_sb = nfs4_kill_super,
347 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
348}; 358};
@@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
786 796
787static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) 797static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
788{ 798{
789 if (nfss->nfs_client && nfss->nfs_client->impl_id) { 799 if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
790 struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; 800 struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
791 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," 801 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
792 "date='%llu,%u'", 802 "date='%llu,%u'",
793 impl_id->name, impl_id->domain, 803 impl_id->name, impl_id->domain,
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
938 rpc_killall_tasks(rpc); 948 rpc_killall_tasks(rpc);
939} 949}
940 950
941static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) 951static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
942{ 952{
943 struct nfs_parsed_mount_data *data; 953 struct nfs_parsed_mount_data *data;
944 954
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
953 data->nfs_server.protocol = XPRT_TRANSPORT_TCP; 963 data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
954 data->auth_flavors[0] = RPC_AUTH_UNIX; 964 data->auth_flavors[0] = RPC_AUTH_UNIX;
955 data->auth_flavor_len = 1; 965 data->auth_flavor_len = 1;
956 data->version = version;
957 data->minorversion = 0; 966 data->minorversion = 0;
967 data->need_mount = true;
958 data->net = current->nsproxy->net_ns; 968 data->net = current->nsproxy->net_ns;
959 security_init_mnt_opts(&data->lsm_opts); 969 security_init_mnt_opts(&data->lsm_opts);
960 } 970 }
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1674 * Use the remote server's MOUNT service to request the NFS file handle 1684 * Use the remote server's MOUNT service to request the NFS file handle
1675 * corresponding to the provided path. 1685 * corresponding to the provided path.
1676 */ 1686 */
1677static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1687static int nfs_request_mount(struct nfs_parsed_mount_data *args,
1678 struct nfs_fh *root_fh) 1688 struct nfs_fh *root_fh)
1679{ 1689{
1680 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; 1690 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1681 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); 1691 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1738 return nfs_walk_authlist(args, &request); 1748 return nfs_walk_authlist(args, &request);
1739} 1749}
1740 1750
1751static struct dentry *nfs_try_mount(int flags, const char *dev_name,
1752 struct nfs_mount_info *mount_info)
1753{
1754 int status;
1755 struct nfs_server *server;
1756
1757 if (mount_info->parsed->need_mount) {
1758 status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
1759 if (status)
1760 return ERR_PTR(status);
1761 }
1762
1763 /* Get a volume representation */
1764 server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
1765 if (IS_ERR(server))
1766 return ERR_CAST(server);
1767
1768 return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
1769}
1770
1741/* 1771/*
1742 * Split "dev_name" into "hostname:export_path". 1772 * Split "dev_name" into "hostname:export_path".
1743 * 1773 *
@@ -1826,10 +1856,10 @@ out_path:
1826 * + breaking back: trying proto=udp after proto=tcp, v2 after v3, 1856 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1827 * mountproto=tcp after mountproto=udp, and so on 1857 * mountproto=tcp after mountproto=udp, and so on
1828 */ 1858 */
1829static int nfs_validate_mount_data(void *options, 1859static int nfs23_validate_mount_data(void *options,
1830 struct nfs_parsed_mount_data *args, 1860 struct nfs_parsed_mount_data *args,
1831 struct nfs_fh *mntfh, 1861 struct nfs_fh *mntfh,
1832 const char *dev_name) 1862 const char *dev_name)
1833{ 1863{
1834 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1864 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1835 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; 1865 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
1883 args->acregmax = data->acregmax; 1913 args->acregmax = data->acregmax;
1884 args->acdirmin = data->acdirmin; 1914 args->acdirmin = data->acdirmin;
1885 args->acdirmax = data->acdirmax; 1915 args->acdirmax = data->acdirmax;
1916 args->need_mount = false;
1886 1917
1887 memcpy(sap, &data->addr, sizeof(data->addr)); 1918 memcpy(sap, &data->addr, sizeof(data->addr));
1888 args->nfs_server.addrlen = sizeof(data->addr); 1919 args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
1934 } 1965 }
1935 1966
1936 break; 1967 break;
1937 default: { 1968 default:
1938 int status; 1969 return NFS_TEXT_DATA;
1939
1940 if (nfs_parse_mount_options((char *)options, args) == 0)
1941 return -EINVAL;
1942
1943 if (!nfs_verify_server_address(sap))
1944 goto out_no_address;
1945
1946 if (args->version == 4)
1947#ifdef CONFIG_NFS_V4
1948 return nfs4_validate_text_mount_data(options,
1949 args, dev_name);
1950#else
1951 goto out_v4_not_compiled;
1952#endif
1953
1954 nfs_set_port(sap, &args->nfs_server.port, 0);
1955
1956 nfs_set_mount_transport_protocol(args);
1957
1958 status = nfs_parse_devname(dev_name,
1959 &args->nfs_server.hostname,
1960 PAGE_SIZE,
1961 &args->nfs_server.export_path,
1962 NFS_MAXPATHLEN);
1963 if (!status)
1964 status = nfs_try_mount(args, mntfh);
1965
1966 kfree(args->nfs_server.export_path);
1967 args->nfs_server.export_path = NULL;
1968
1969 if (status)
1970 return status;
1971
1972 break;
1973 }
1974 } 1970 }
1975 1971
1976#ifndef CONFIG_NFS_V3 1972#ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
1999 return -EPROTONOSUPPORT; 1995 return -EPROTONOSUPPORT;
2000#endif /* !CONFIG_NFS_V3 */ 1996#endif /* !CONFIG_NFS_V3 */
2001 1997
2002#ifndef CONFIG_NFS_V4
2003out_v4_not_compiled:
2004 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2005 return -EPROTONOSUPPORT;
2006#endif /* !CONFIG_NFS_V4 */
2007
2008out_nomem: 1998out_nomem:
2009 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1999 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
2010 return -ENOMEM; 2000 return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
2018 return -EINVAL; 2008 return -EINVAL;
2019} 2009}
2020 2010
2011#ifdef CONFIG_NFS_V4
2012static int nfs_validate_mount_data(struct file_system_type *fs_type,
2013 void *options,
2014 struct nfs_parsed_mount_data *args,
2015 struct nfs_fh *mntfh,
2016 const char *dev_name)
2017{
2018 if (fs_type == &nfs_fs_type)
2019 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2020 return nfs4_validate_mount_data(options, args, dev_name);
2021}
2022#else
2023static int nfs_validate_mount_data(struct file_system_type *fs_type,
2024 void *options,
2025 struct nfs_parsed_mount_data *args,
2026 struct nfs_fh *mntfh,
2027 const char *dev_name)
2028{
2029 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2030}
2031#endif
2032
2033static int nfs_validate_text_mount_data(void *options,
2034 struct nfs_parsed_mount_data *args,
2035 const char *dev_name)
2036{
2037 int port = 0;
2038 int max_namelen = PAGE_SIZE;
2039 int max_pathlen = NFS_MAXPATHLEN;
2040 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2041
2042 if (nfs_parse_mount_options((char *)options, args) == 0)
2043 return -EINVAL;
2044
2045 if (!nfs_verify_server_address(sap))
2046 goto out_no_address;
2047
2048 if (args->version == 4) {
2049#ifdef CONFIG_NFS_V4
2050 port = NFS_PORT;
2051 max_namelen = NFS4_MAXNAMLEN;
2052 max_pathlen = NFS4_MAXPATHLEN;
2053 nfs_validate_transport_protocol(args);
2054 nfs4_validate_mount_flags(args);
2055#else
2056 goto out_v4_not_compiled;
2057#endif /* CONFIG_NFS_V4 */
2058 } else
2059 nfs_set_mount_transport_protocol(args);
2060
2061 nfs_set_port(sap, &args->nfs_server.port, port);
2062
2063 if (args->auth_flavor_len > 1)
2064 goto out_bad_auth;
2065
2066 return nfs_parse_devname(dev_name,
2067 &args->nfs_server.hostname,
2068 max_namelen,
2069 &args->nfs_server.export_path,
2070 max_pathlen);
2071
2072#ifndef CONFIG_NFS_V4
2073out_v4_not_compiled:
2074 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2075 return -EPROTONOSUPPORT;
2076#endif /* !CONFIG_NFS_V4 */
2077
2078out_no_address:
2079 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
2080 return -EINVAL;
2081
2082out_bad_auth:
2083 dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
2084 return -EINVAL;
2085}
2086
2021static int 2087static int
2022nfs_compare_remount_data(struct nfs_server *nfss, 2088nfs_compare_remount_data(struct nfs_server *nfss,
2023 struct nfs_parsed_mount_data *data) 2089 struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2129 * Finish setting up an NFS2/3 superblock 2195 * Finish setting up an NFS2/3 superblock
2130 */ 2196 */
2131static void nfs_fill_super(struct super_block *sb, 2197static void nfs_fill_super(struct super_block *sb,
2132 struct nfs_parsed_mount_data *data) 2198 struct nfs_mount_info *mount_info)
2133{ 2199{
2200 struct nfs_parsed_mount_data *data = mount_info->parsed;
2134 struct nfs_server *server = NFS_SB(sb); 2201 struct nfs_server *server = NFS_SB(sb);
2135 2202
2136 sb->s_blocksize_bits = 0; 2203 sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
2154 * Finish setting up a cloned NFS2/3 superblock 2221 * Finish setting up a cloned NFS2/3 superblock
2155 */ 2222 */
2156static void nfs_clone_super(struct super_block *sb, 2223static void nfs_clone_super(struct super_block *sb,
2157 const struct super_block *old_sb) 2224 struct nfs_mount_info *mount_info)
2158{ 2225{
2226 const struct super_block *old_sb = mount_info->cloned->sb;
2159 struct nfs_server *server = NFS_SB(sb); 2227 struct nfs_server *server = NFS_SB(sb);
2160 2228
2161 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2229 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
2278 return nfs_compare_mount_options(sb, server, mntflags); 2346 return nfs_compare_mount_options(sb, server, mntflags);
2279} 2347}
2280 2348
2349#ifdef CONFIG_NFS_FSCACHE
2350static void nfs_get_cache_cookie(struct super_block *sb,
2351 struct nfs_parsed_mount_data *parsed,
2352 struct nfs_clone_mount *cloned)
2353{
2354 char *uniq = NULL;
2355 int ulen = 0;
2356
2357 if (parsed && parsed->fscache_uniq) {
2358 uniq = parsed->fscache_uniq;
2359 ulen = strlen(parsed->fscache_uniq);
2360 } else if (cloned) {
2361 struct nfs_server *mnt_s = NFS_SB(cloned->sb);
2362 if (mnt_s->fscache_key) {
2363 uniq = mnt_s->fscache_key->key.uniquifier;
2364 ulen = mnt_s->fscache_key->key.uniq_len;
2365 };
2366 }
2367
2368 nfs_fscache_get_super_cookie(sb, uniq, ulen);
2369}
2370#else
2371static void nfs_get_cache_cookie(struct super_block *sb,
2372 struct nfs_parsed_mount_data *parsed,
2373 struct nfs_clone_mount *cloned)
2374{
2375}
2376#endif
2377
2281static int nfs_bdi_register(struct nfs_server *server) 2378static int nfs_bdi_register(struct nfs_server *server)
2282{ 2379{
2283 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2380 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2284} 2381}
2285 2382
2286static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, 2383static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
2287 int flags, const char *dev_name, void *raw_data) 2384 struct nfs_mount_info *mount_info)
2385{
2386 return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
2387}
2388
2389static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
2390 struct nfs_mount_info *mount_info)
2391{
2392 /* clone any lsm security options from the parent to the new sb */
2393 security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
2394 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
2395 return -ESTALE;
2396 return 0;
2397}
2398
2399static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
2400 struct nfs_server *server,
2401 int flags, const char *dev_name,
2402 struct nfs_mount_info *mount_info)
2288{ 2403{
2289 struct nfs_server *server = NULL;
2290 struct super_block *s; 2404 struct super_block *s;
2291 struct nfs_parsed_mount_data *data;
2292 struct nfs_fh *mntfh;
2293 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2405 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2294 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2406 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2295 struct nfs_sb_mountdata sb_mntdata = { 2407 struct nfs_sb_mountdata sb_mntdata = {
2296 .mntflags = flags, 2408 .mntflags = flags,
2409 .server = server,
2297 }; 2410 };
2298 int error; 2411 int error;
2299 2412
2300 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2301 mntfh = nfs_alloc_fhandle();
2302 if (data == NULL || mntfh == NULL)
2303 goto out;
2304
2305 /* Validate the mount data */
2306 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2307 if (error < 0) {
2308 mntroot = ERR_PTR(error);
2309 goto out;
2310 }
2311
2312#ifdef CONFIG_NFS_V4
2313 if (data->version == 4) {
2314 mntroot = nfs4_try_mount(flags, dev_name, data);
2315 goto out;
2316 }
2317#endif /* CONFIG_NFS_V4 */
2318
2319 /* Get a volume representation */
2320 server = nfs_create_server(data, mntfh);
2321 if (IS_ERR(server)) {
2322 mntroot = ERR_CAST(server);
2323 goto out;
2324 }
2325 sb_mntdata.server = server;
2326
2327 if (server->flags & NFS_MOUNT_UNSHARED) 2413 if (server->flags & NFS_MOUNT_UNSHARED)
2328 compare_super = NULL; 2414 compare_super = NULL;
2329 2415
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2351 2437
2352 if (!s->s_root) { 2438 if (!s->s_root) {
2353 /* initial superblock/root creation */ 2439 /* initial superblock/root creation */
2354 nfs_fill_super(s, data); 2440 mount_info->fill_super(s, mount_info);
2355 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); 2441 nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
2356 } 2442 }
2357 2443
2358 mntroot = nfs_get_root(s, mntfh, dev_name); 2444 mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
2359 if (IS_ERR(mntroot)) 2445 if (IS_ERR(mntroot))
2360 goto error_splat_super; 2446 goto error_splat_super;
2361 2447
2362 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2448 error = mount_info->set_security(s, mntroot, mount_info);
2363 if (error) 2449 if (error)
2364 goto error_splat_root; 2450 goto error_splat_root;
2365 2451
2366 s->s_flags |= MS_ACTIVE; 2452 s->s_flags |= MS_ACTIVE;
2367 2453
2368out: 2454out:
2369 nfs_free_parsed_mount_data(data);
2370 nfs_free_fhandle(mntfh);
2371 return mntroot; 2455 return mntroot;
2372 2456
2373out_err_nosb: 2457out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
2385 goto out; 2469 goto out;
2386} 2470}
2387 2471
2472static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2473 int flags, const char *dev_name, void *raw_data)
2474{
2475 struct nfs_mount_info mount_info = {
2476 .fill_super = nfs_fill_super,
2477 .set_security = nfs_set_sb_security,
2478 };
2479 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2480 int error;
2481
2482 mount_info.parsed = nfs_alloc_parsed_mount_data();
2483 mount_info.mntfh = nfs_alloc_fhandle();
2484 if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
2485 goto out;
2486
2487 /* Validate the mount data */
2488 error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
2489 if (error == NFS_TEXT_DATA)
2490 error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
2491 if (error < 0) {
2492 mntroot = ERR_PTR(error);
2493 goto out;
2494 }
2495
2496#ifdef CONFIG_NFS_V4
2497 if (mount_info.parsed->version == 4)
2498 mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
2499 else
2500#endif /* CONFIG_NFS_V4 */
2501 mntroot = nfs_try_mount(flags, dev_name, &mount_info);
2502
2503out:
2504 nfs_free_parsed_mount_data(mount_info.parsed);
2505 nfs_free_fhandle(mount_info.mntfh);
2506 return mntroot;
2507}
2508
2388/* 2509/*
2389 * Ensure that we unregister the bdi before kill_anon_super 2510 * Ensure that we unregister the bdi before kill_anon_super
2390 * releases the device name 2511 * releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
2409} 2530}
2410 2531
2411/* 2532/*
2412 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2533 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
2413 */ 2534 */
2414static struct dentry * 2535static struct dentry *
2415nfs_xdev_mount(struct file_system_type *fs_type, int flags, 2536nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
2416 const char *dev_name, void *raw_data) 2537 const char *dev_name, struct nfs_mount_info *mount_info)
2417{ 2538{
2418 struct nfs_clone_mount *data = raw_data; 2539 struct nfs_clone_mount *data = mount_info->cloned;
2419 struct super_block *s;
2420 struct nfs_server *server; 2540 struct nfs_server *server;
2421 struct dentry *mntroot; 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2422 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2423 struct nfs_sb_mountdata sb_mntdata = {
2424 .mntflags = flags,
2425 };
2426 int error; 2542 int error;
2427 2543
2428 dprintk("--> nfs_xdev_mount()\n"); 2544 dprintk("--> nfs_xdev_mount_common()\n");
2545
2546 mount_info->mntfh = data->fh;
2429 2547
2430 /* create a new volume representation */ 2548 /* create a new volume representation */
2431 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2432 if (IS_ERR(server)) { 2550 if (IS_ERR(server)) {
2433 error = PTR_ERR(server); 2551 error = PTR_ERR(server);
2434 goto out_err_noserver; 2552 goto out_err;
2435 }
2436 sb_mntdata.server = server;
2437
2438 if (server->flags & NFS_MOUNT_UNSHARED)
2439 compare_super = NULL;
2440
2441 /* -o noac implies -o sync */
2442 if (server->flags & NFS_MOUNT_NOAC)
2443 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2444
2445 /* Get a superblock - note that we may end up sharing one that already exists */
2446 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2447 if (IS_ERR(s)) {
2448 error = PTR_ERR(s);
2449 goto out_err_nosb;
2450 }
2451
2452 if (s->s_fs_info != server) {
2453 nfs_free_server(server);
2454 server = NULL;
2455 } else {
2456 error = nfs_bdi_register(server);
2457 if (error)
2458 goto error_splat_bdi;
2459 }
2460
2461 if (!s->s_root) {
2462 /* initial superblock/root creation */
2463 nfs_clone_super(s, data->sb);
2464 nfs_fscache_get_super_cookie(s, NULL, data);
2465 }
2466
2467 mntroot = nfs_get_root(s, data->fh, dev_name);
2468 if (IS_ERR(mntroot)) {
2469 error = PTR_ERR(mntroot);
2470 goto error_splat_super;
2471 }
2472 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
2473 dput(mntroot);
2474 error = -ESTALE;
2475 goto error_splat_super;
2476 } 2553 }
2477 2554
2478 s->s_flags |= MS_ACTIVE; 2555 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2479 2556 dprintk("<-- nfs_xdev_mount_common() = 0\n");
2480 /* clone any lsm security options from the parent to the new sb */ 2557out:
2481 security_sb_clone_mnt_opts(data->sb, s);
2482
2483 dprintk("<-- nfs_xdev_mount() = 0\n");
2484 return mntroot; 2558 return mntroot;
2485 2559
2486out_err_nosb: 2560out_err:
2487 nfs_free_server(server); 2561 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
2488out_err_noserver: 2562 goto out;
2489 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); 2563}
2490 return ERR_PTR(error);
2491 2564
2492error_splat_super: 2565/*
2493 if (server && !s->s_root) 2566 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2494 bdi_unregister(&server->backing_dev_info); 2567 */
2495error_splat_bdi: 2568static struct dentry *
2496 deactivate_locked_super(s); 2569nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2497 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); 2570 const char *dev_name, void *raw_data)
2498 return ERR_PTR(error); 2571{
2572 struct nfs_mount_info mount_info = {
2573 .fill_super = nfs_clone_super,
2574 .set_security = nfs_clone_sb_security,
2575 .cloned = raw_data,
2576 };
2577 return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
2499} 2578}
2500 2579
2501#ifdef CONFIG_NFS_V4 2580#ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
2504 * Finish setting up a cloned NFS4 superblock 2583 * Finish setting up a cloned NFS4 superblock
2505 */ 2584 */
2506static void nfs4_clone_super(struct super_block *sb, 2585static void nfs4_clone_super(struct super_block *sb,
2507 const struct super_block *old_sb) 2586 struct nfs_mount_info *mount_info)
2508{ 2587{
2588 const struct super_block *old_sb = mount_info->cloned->sb;
2509 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2589 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
2510 sb->s_blocksize = old_sb->s_blocksize; 2590 sb->s_blocksize = old_sb->s_blocksize;
2511 sb->s_maxbytes = old_sb->s_maxbytes; 2591 sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
2523/* 2603/*
2524 * Set up an NFS4 superblock 2604 * Set up an NFS4 superblock
2525 */ 2605 */
2526static void nfs4_fill_super(struct super_block *sb) 2606static void nfs4_fill_super(struct super_block *sb,
2607 struct nfs_mount_info *mount_info)
2527{ 2608{
2528 sb->s_time_gran = 1; 2609 sb->s_time_gran = 1;
2529 sb->s_op = &nfs4_sops; 2610 sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2542 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); 2623 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2543} 2624}
2544 2625
2545static int nfs4_validate_text_mount_data(void *options,
2546 struct nfs_parsed_mount_data *args,
2547 const char *dev_name)
2548{
2549 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2550
2551 nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
2552
2553 nfs_validate_transport_protocol(args);
2554
2555 nfs4_validate_mount_flags(args);
2556
2557 if (args->version != 4) {
2558 dfprintk(MOUNT,
2559 "NFS4: Illegal mount version\n");
2560 return -EINVAL;
2561 }
2562
2563 if (args->auth_flavor_len > 1) {
2564 dfprintk(MOUNT,
2565 "NFS4: Too many RPC auth flavours specified\n");
2566 return -EINVAL;
2567 }
2568
2569 return nfs_parse_devname(dev_name,
2570 &args->nfs_server.hostname,
2571 NFS4_MAXNAMLEN,
2572 &args->nfs_server.export_path,
2573 NFS4_MAXPATHLEN);
2574}
2575
2576/* 2626/*
2577 * Validate NFSv4 mount options 2627 * Validate NFSv4 mount options
2578 */ 2628 */
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
2643 2693
2644 break; 2694 break;
2645 default: 2695 default:
2646 if (nfs_parse_mount_options((char *)options, args) == 0) 2696 return NFS_TEXT_DATA;
2647 return -EINVAL;
2648
2649 if (!nfs_verify_server_address(sap))
2650 return -EINVAL;
2651
2652 return nfs4_validate_text_mount_data(options, args, dev_name);
2653 } 2697 }
2654 2698
2655 return 0; 2699 return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
2673 */ 2717 */
2674static struct dentry * 2718static struct dentry *
2675nfs4_remote_mount(struct file_system_type *fs_type, int flags, 2719nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2676 const char *dev_name, void *raw_data) 2720 const char *dev_name, void *info)
2677{ 2721{
2678 struct nfs_parsed_mount_data *data = raw_data; 2722 struct nfs_mount_info *mount_info = info;
2679 struct super_block *s;
2680 struct nfs_server *server; 2723 struct nfs_server *server;
2681 struct nfs_fh *mntfh; 2724 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2682 struct dentry *mntroot;
2683 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2684 struct nfs_sb_mountdata sb_mntdata = {
2685 .mntflags = flags,
2686 };
2687 int error = -ENOMEM;
2688 2725
2689 mntfh = nfs_alloc_fhandle(); 2726 mount_info->fill_super = nfs4_fill_super;
2690 if (data == NULL || mntfh == NULL) 2727 mount_info->set_security = nfs_set_sb_security;
2691 goto out;
2692 2728
2693 /* Get a volume representation */ 2729 /* Get a volume representation */
2694 server = nfs4_create_server(data, mntfh); 2730 server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
2695 if (IS_ERR(server)) { 2731 if (IS_ERR(server)) {
2696 error = PTR_ERR(server); 2732 mntroot = ERR_CAST(server);
2697 goto out; 2733 goto out;
2698 } 2734 }
2699 sb_mntdata.server = server;
2700 2735
2701 if (server->flags & NFS4_MOUNT_UNSHARED) 2736 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2702 compare_super = NULL;
2703
2704 /* -o noac implies -o sync */
2705 if (server->flags & NFS_MOUNT_NOAC)
2706 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2707
2708 /* Get a superblock - note that we may end up sharing one that already exists */
2709 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2710 if (IS_ERR(s)) {
2711 error = PTR_ERR(s);
2712 goto out_free;
2713 }
2714
2715 if (s->s_fs_info != server) {
2716 nfs_free_server(server);
2717 server = NULL;
2718 } else {
2719 error = nfs_bdi_register(server);
2720 if (error)
2721 goto error_splat_bdi;
2722 }
2723
2724 if (!s->s_root) {
2725 /* initial superblock/root creation */
2726 nfs4_fill_super(s);
2727 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2728 }
2729
2730 mntroot = nfs4_get_root(s, mntfh, dev_name);
2731 if (IS_ERR(mntroot)) {
2732 error = PTR_ERR(mntroot);
2733 goto error_splat_super;
2734 }
2735
2736 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2737 if (error)
2738 goto error_splat_root;
2739
2740 s->s_flags |= MS_ACTIVE;
2741
2742 nfs_free_fhandle(mntfh);
2743 return mntroot;
2744 2737
2745out: 2738out:
2746 nfs_free_fhandle(mntfh); 2739 return mntroot;
2747 return ERR_PTR(error);
2748
2749out_free:
2750 nfs_free_server(server);
2751 goto out;
2752
2753error_splat_root:
2754 dput(mntroot);
2755error_splat_super:
2756 if (server && !s->s_root)
2757 bdi_unregister(&server->backing_dev_info);
2758error_splat_bdi:
2759 deactivate_locked_super(s);
2760 goto out;
2761} 2740}
2762 2741
2763static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, 2742static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2869} 2848}
2870 2849
2871static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 2850static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2872 struct nfs_parsed_mount_data *data) 2851 struct nfs_mount_info *mount_info)
2873{ 2852{
2874 char *export_path; 2853 char *export_path;
2875 struct vfsmount *root_mnt; 2854 struct vfsmount *root_mnt;
2876 struct dentry *res; 2855 struct dentry *res;
2856 struct nfs_parsed_mount_data *data = mount_info->parsed;
2877 2857
2878 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2858 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2879 2859
2880 export_path = data->nfs_server.export_path; 2860 export_path = data->nfs_server.export_path;
2881 data->nfs_server.export_path = "/"; 2861 data->nfs_server.export_path = "/";
2882 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, 2862 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
2883 data->nfs_server.hostname); 2863 data->nfs_server.hostname);
2884 data->nfs_server.export_path = export_path; 2864 data->nfs_server.export_path = export_path;
2885 2865
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2891 return res; 2871 return res;
2892} 2872}
2893 2873
2894/*
2895 * Get the superblock for an NFS4 mountpoint
2896 */
2897static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2898 int flags, const char *dev_name, void *raw_data)
2899{
2900 struct nfs_parsed_mount_data *data;
2901 int error = -ENOMEM;
2902 struct dentry *res = ERR_PTR(-ENOMEM);
2903
2904 data = nfs_alloc_parsed_mount_data(4);
2905 if (data == NULL)
2906 goto out;
2907
2908 /* Validate the mount data */
2909 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2910 if (error < 0) {
2911 res = ERR_PTR(error);
2912 goto out;
2913 }
2914
2915 res = nfs4_try_mount(flags, dev_name, data);
2916 if (IS_ERR(res))
2917 error = PTR_ERR(res);
2918
2919out:
2920 nfs_free_parsed_mount_data(data);
2921 dprintk("<-- nfs4_mount() = %d%s\n", error,
2922 error != 0 ? " [error]" : "");
2923 return res;
2924}
2925
2926static void nfs4_kill_super(struct super_block *sb) 2874static void nfs4_kill_super(struct super_block *sb)
2927{ 2875{
2928 struct nfs_server *server = NFS_SB(sb); 2876 struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
2942nfs4_xdev_mount(struct file_system_type *fs_type, int flags, 2890nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2943 const char *dev_name, void *raw_data) 2891 const char *dev_name, void *raw_data)
2944{ 2892{
2945 struct nfs_clone_mount *data = raw_data; 2893 struct nfs_mount_info mount_info = {
2946 struct super_block *s; 2894 .fill_super = nfs4_clone_super,
2947 struct nfs_server *server; 2895 .set_security = nfs_clone_sb_security,
2948 struct dentry *mntroot; 2896 .cloned = raw_data,
2949 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2950 struct nfs_sb_mountdata sb_mntdata = {
2951 .mntflags = flags,
2952 }; 2897 };
2953 int error; 2898 return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
2954
2955 dprintk("--> nfs4_xdev_mount()\n");
2956
2957 /* create a new volume representation */
2958 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2959 if (IS_ERR(server)) {
2960 error = PTR_ERR(server);
2961 goto out_err_noserver;
2962 }
2963 sb_mntdata.server = server;
2964
2965 if (server->flags & NFS4_MOUNT_UNSHARED)
2966 compare_super = NULL;
2967
2968 /* -o noac implies -o sync */
2969 if (server->flags & NFS_MOUNT_NOAC)
2970 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2971
2972 /* Get a superblock - note that we may end up sharing one that already exists */
2973 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2974 if (IS_ERR(s)) {
2975 error = PTR_ERR(s);
2976 goto out_err_nosb;
2977 }
2978
2979 if (s->s_fs_info != server) {
2980 nfs_free_server(server);
2981 server = NULL;
2982 } else {
2983 error = nfs_bdi_register(server);
2984 if (error)
2985 goto error_splat_bdi;
2986 }
2987
2988 if (!s->s_root) {
2989 /* initial superblock/root creation */
2990 nfs4_clone_super(s, data->sb);
2991 nfs_fscache_get_super_cookie(s, NULL, data);
2992 }
2993
2994 mntroot = nfs4_get_root(s, data->fh, dev_name);
2995 if (IS_ERR(mntroot)) {
2996 error = PTR_ERR(mntroot);
2997 goto error_splat_super;
2998 }
2999 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3000 dput(mntroot);
3001 error = -ESTALE;
3002 goto error_splat_super;
3003 }
3004
3005 s->s_flags |= MS_ACTIVE;
3006
3007 security_sb_clone_mnt_opts(data->sb, s);
3008
3009 dprintk("<-- nfs4_xdev_mount() = 0\n");
3010 return mntroot;
3011
3012out_err_nosb:
3013 nfs_free_server(server);
3014out_err_noserver:
3015 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3016 return ERR_PTR(error);
3017
3018error_splat_super:
3019 if (server && !s->s_root)
3020 bdi_unregister(&server->backing_dev_info);
3021error_splat_bdi:
3022 deactivate_locked_super(s);
3023 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3024 return ERR_PTR(error);
3025} 2899}
3026 2900
3027static struct dentry * 2901static struct dentry *
3028nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, 2902nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3029 const char *dev_name, void *raw_data) 2903 const char *dev_name, void *raw_data)
3030{ 2904{
3031 struct nfs_clone_mount *data = raw_data; 2905 struct nfs_mount_info mount_info = {
3032 struct super_block *s; 2906 .fill_super = nfs4_fill_super,
3033 struct nfs_server *server; 2907 .set_security = nfs_clone_sb_security,
3034 struct dentry *mntroot; 2908 .cloned = raw_data,
3035 struct nfs_fh *mntfh;
3036 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
3037 struct nfs_sb_mountdata sb_mntdata = {
3038 .mntflags = flags,
3039 }; 2909 };
3040 int error = -ENOMEM; 2910 struct nfs_server *server;
2911 struct dentry *mntroot = ERR_PTR(-ENOMEM);
3041 2912
3042 dprintk("--> nfs4_referral_get_sb()\n"); 2913 dprintk("--> nfs4_referral_get_sb()\n");
3043 2914
3044 mntfh = nfs_alloc_fhandle(); 2915 mount_info.mntfh = nfs_alloc_fhandle();
3045 if (mntfh == NULL) 2916 if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
3046 goto out_err_nofh; 2917 goto out;
3047 2918
3048 /* create a new volume representation */ 2919 /* create a new volume representation */
3049 server = nfs4_create_referral_server(data, mntfh); 2920 server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
3050 if (IS_ERR(server)) { 2921 if (IS_ERR(server)) {
3051 error = PTR_ERR(server); 2922 mntroot = ERR_CAST(server);
3052 goto out_err_noserver; 2923 goto out;
3053 }
3054 sb_mntdata.server = server;
3055
3056 if (server->flags & NFS4_MOUNT_UNSHARED)
3057 compare_super = NULL;
3058
3059 /* -o noac implies -o sync */
3060 if (server->flags & NFS_MOUNT_NOAC)
3061 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
3062
3063 /* Get a superblock - note that we may end up sharing one that already exists */
3064 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
3065 if (IS_ERR(s)) {
3066 error = PTR_ERR(s);
3067 goto out_err_nosb;
3068 }
3069
3070 if (s->s_fs_info != server) {
3071 nfs_free_server(server);
3072 server = NULL;
3073 } else {
3074 error = nfs_bdi_register(server);
3075 if (error)
3076 goto error_splat_bdi;
3077 }
3078
3079 if (!s->s_root) {
3080 /* initial superblock/root creation */
3081 nfs4_fill_super(s);
3082 nfs_fscache_get_super_cookie(s, NULL, data);
3083 }
3084
3085 mntroot = nfs4_get_root(s, mntfh, dev_name);
3086 if (IS_ERR(mntroot)) {
3087 error = PTR_ERR(mntroot);
3088 goto error_splat_super;
3089 }
3090 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3091 dput(mntroot);
3092 error = -ESTALE;
3093 goto error_splat_super;
3094 } 2924 }
3095 2925
3096 s->s_flags |= MS_ACTIVE; 2926 mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
3097 2927out:
3098 security_sb_clone_mnt_opts(data->sb, s); 2928 nfs_free_fhandle(mount_info.mntfh);
3099
3100 nfs_free_fhandle(mntfh);
3101 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3102 return mntroot; 2929 return mntroot;
3103
3104out_err_nosb:
3105 nfs_free_server(server);
3106out_err_noserver:
3107 nfs_free_fhandle(mntfh);
3108out_err_nofh:
3109 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3110 return ERR_PTR(error);
3111
3112error_splat_super:
3113 if (server && !s->s_root)
3114 bdi_unregister(&server->backing_dev_info);
3115error_splat_bdi:
3116 deactivate_locked_super(s);
3117 nfs_free_fhandle(mntfh);
3118 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3119 return ERR_PTR(error);
3120} 2930}
3121 2931
3122/* 2932/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c07462320f6b..e6fe3d69d14c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,20 +39,20 @@
39/* 39/*
40 * Local function declarations 40 * Local function declarations
41 */ 41 */
42static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
43 struct inode *inode, int ioflags);
44static void nfs_redirty_request(struct nfs_page *req); 42static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_write_full_ops;
47static const struct rpc_call_ops nfs_commit_ops; 44static const struct rpc_call_ops nfs_commit_ops;
45static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
46static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48 47
49static struct kmem_cache *nfs_wdata_cachep; 48static struct kmem_cache *nfs_wdata_cachep;
50static mempool_t *nfs_wdata_mempool; 49static mempool_t *nfs_wdata_mempool;
50static struct kmem_cache *nfs_cdata_cachep;
51static mempool_t *nfs_commit_mempool; 51static mempool_t *nfs_commit_mempool;
52 52
53struct nfs_write_data *nfs_commitdata_alloc(void) 53struct nfs_commit_data *nfs_commitdata_alloc(void)
54{ 54{
55 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 55 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
56 56
57 if (p) { 57 if (p) {
58 memset(p, 0, sizeof(*p)); 58 memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
62} 62}
63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
64 64
65void nfs_commit_free(struct nfs_write_data *p) 65void nfs_commit_free(struct nfs_commit_data *p)
66{ 66{
67 if (p && (p->pagevec != &p->page_array[0]))
68 kfree(p->pagevec);
69 mempool_free(p, nfs_commit_mempool); 67 mempool_free(p, nfs_commit_mempool);
70} 68}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 69EXPORT_SYMBOL_GPL(nfs_commit_free);
72 70
73struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 71struct nfs_write_header *nfs_writehdr_alloc(void)
74{ 72{
75 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 73 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
76 74
77 if (p) { 75 if (p) {
76 struct nfs_pgio_header *hdr = &p->header;
77
78 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
79 INIT_LIST_HEAD(&p->pages); 79 INIT_LIST_HEAD(&hdr->pages);
80 p->npages = pagecount; 80 INIT_LIST_HEAD(&hdr->rpc_list);
81 if (pagecount <= ARRAY_SIZE(p->page_array)) 81 spin_lock_init(&hdr->lock);
82 p->pagevec = p->page_array; 82 atomic_set(&hdr->refcnt, 0);
83 else {
84 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
85 if (!p->pagevec) {
86 mempool_free(p, nfs_wdata_mempool);
87 p = NULL;
88 }
89 }
90 } 83 }
91 return p; 84 return p;
92} 85}
93 86
94void nfs_writedata_free(struct nfs_write_data *p) 87static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
88 unsigned int pagecount)
89{
90 struct nfs_write_data *data, *prealloc;
91
92 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
93 if (prealloc->header == NULL)
94 data = prealloc;
95 else
96 data = kzalloc(sizeof(*data), GFP_KERNEL);
97 if (!data)
98 goto out;
99
100 if (nfs_pgarray_set(&data->pages, pagecount)) {
101 data->header = hdr;
102 atomic_inc(&hdr->refcnt);
103 } else {
104 if (data != prealloc)
105 kfree(data);
106 data = NULL;
107 }
108out:
109 return data;
110}
111
112void nfs_writehdr_free(struct nfs_pgio_header *hdr)
95{ 113{
96 if (p && (p->pagevec != &p->page_array[0])) 114 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
97 kfree(p->pagevec); 115 mempool_free(whdr, nfs_wdata_mempool);
98 mempool_free(p, nfs_wdata_mempool);
99} 116}
100 117
101void nfs_writedata_release(struct nfs_write_data *wdata) 118void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 119{
120 struct nfs_pgio_header *hdr = wdata->header;
121 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
122
103 put_nfs_open_context(wdata->args.context); 123 put_nfs_open_context(wdata->args.context);
104 nfs_writedata_free(wdata); 124 if (wdata->pages.pagevec != wdata->pages.page_array)
125 kfree(wdata->pages.pagevec);
126 if (wdata != &write_header->rpc_data)
127 kfree(wdata);
128 else
129 wdata->header = NULL;
130 if (atomic_dec_and_test(&hdr->refcnt))
131 hdr->completion_ops->completion(hdr);
105} 132}
106 133
107static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 134static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
203 struct inode *inode = page->mapping->host; 230 struct inode *inode = page->mapping->host;
204 struct nfs_server *nfss = NFS_SERVER(inode); 231 struct nfs_server *nfss = NFS_SERVER(inode);
205 232
206 page_cache_get(page);
207 if (atomic_long_inc_return(&nfss->writeback) > 233 if (atomic_long_inc_return(&nfss->writeback) >
208 NFS_CONGESTION_ON_THRESH) { 234 NFS_CONGESTION_ON_THRESH) {
209 set_bdi_congested(&nfss->backing_dev_info, 235 set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
219 struct nfs_server *nfss = NFS_SERVER(inode); 245 struct nfs_server *nfss = NFS_SERVER(inode);
220 246
221 end_page_writeback(page); 247 end_page_writeback(page);
222 page_cache_release(page);
223 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 248 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
224 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 249 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
225} 250}
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
235 req = nfs_page_find_request_locked(page); 260 req = nfs_page_find_request_locked(page);
236 if (req == NULL) 261 if (req == NULL)
237 break; 262 break;
238 if (nfs_lock_request_dontget(req)) 263 if (nfs_lock_request(req))
239 break; 264 break;
240 /* Note: If we hold the page lock, as is the case in nfs_writepage, 265 /* Note: If we hold the page lock, as is the case in nfs_writepage,
241 * then the call to nfs_lock_request_dontget() will always 266 * then the call to nfs_lock_request() will always
242 * succeed provided that someone hasn't already marked the 267 * succeed provided that someone hasn't already marked the
243 * request as dirty (in which case we don't care). 268 * request as dirty (in which case we don't care).
244 */ 269 */
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
310 struct nfs_pageio_descriptor pgio; 335 struct nfs_pageio_descriptor pgio;
311 int err; 336 int err;
312 337
313 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 338 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
339 &nfs_async_write_completion_ops);
314 err = nfs_do_writepage(page, wbc, &pgio); 340 err = nfs_do_writepage(page, wbc, &pgio);
315 nfs_pageio_complete(&pgio); 341 nfs_pageio_complete(&pgio);
316 if (err < 0) 342 if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353 379
354 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 380 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
355 381
356 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 382 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
383 &nfs_async_write_completion_ops);
357 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 384 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
358 nfs_pageio_complete(&pgio); 385 nfs_pageio_complete(&pgio);
359 386
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379 struct nfs_inode *nfsi = NFS_I(inode); 406 struct nfs_inode *nfsi = NFS_I(inode);
380 407
381 /* Lock the request! */ 408 /* Lock the request! */
382 nfs_lock_request_dontget(req); 409 nfs_lock_request(req);
383 410
384 spin_lock(&inode->i_lock); 411 spin_lock(&inode->i_lock);
385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 412 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
421/** 448/**
422 * nfs_request_add_commit_list - add request to a commit list 449 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page 450 * @req: pointer to a struct nfs_page
424 * @head: commit list head 451 * @dst: commit list head
452 * @cinfo: holds list lock and accounting info
425 * 453 *
426 * This sets the PG_CLEAN bit, updates the inode global count of 454 * This sets the PG_CLEAN bit, updates the cinfo count of
427 * number of outstanding requests requiring a commit as well as 455 * number of outstanding requests requiring a commit as well as
428 * the MM page stats. 456 * the MM page stats.
429 * 457 *
430 * The caller must _not_ hold the inode->i_lock, but must be 458 * The caller must _not_ hold the cinfo->lock, but must be
431 * holding the nfs_page lock. 459 * holding the nfs_page lock.
432 */ 460 */
433void 461void
434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) 462nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
463 struct nfs_commit_info *cinfo)
435{ 464{
436 struct inode *inode = req->wb_context->dentry->d_inode;
437
438 set_bit(PG_CLEAN, &(req)->wb_flags); 465 set_bit(PG_CLEAN, &(req)->wb_flags);
439 spin_lock(&inode->i_lock); 466 spin_lock(cinfo->lock);
440 nfs_list_add_request(req, head); 467 nfs_list_add_request(req, dst);
441 NFS_I(inode)->ncommit++; 468 cinfo->mds->ncommit++;
442 spin_unlock(&inode->i_lock); 469 spin_unlock(cinfo->lock);
443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 470 if (!cinfo->dreq) {
444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 471 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 472 inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
473 BDI_RECLAIMABLE);
474 __mark_inode_dirty(req->wb_context->dentry->d_inode,
475 I_DIRTY_DATASYNC);
476 }
446} 477}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 478EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
448 479
449/** 480/**
450 * nfs_request_remove_commit_list - Remove request from a commit list 481 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page 482 * @req: pointer to a nfs_page
483 * @cinfo: holds list lock and accounting info
452 * 484 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of 485 * This clears the PG_CLEAN bit, and updates the cinfo's count of
454 * number of outstanding requests requiring a commit 486 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats. 487 * It does not update the MM page stats.
456 * 488 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock. 489 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
458 */ 490 */
459void 491void
460nfs_request_remove_commit_list(struct nfs_page *req) 492nfs_request_remove_commit_list(struct nfs_page *req,
493 struct nfs_commit_info *cinfo)
461{ 494{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 495 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return; 496 return;
466 nfs_list_remove_request(req); 497 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--; 498 cinfo->mds->ncommit--;
468} 499}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 500EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470 501
502static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
503 struct inode *inode)
504{
505 cinfo->lock = &inode->i_lock;
506 cinfo->mds = &NFS_I(inode)->commit_info;
507 cinfo->ds = pnfs_get_ds_info(inode);
508 cinfo->dreq = NULL;
509 cinfo->completion_ops = &nfs_commit_completion_ops;
510}
511
512void nfs_init_cinfo(struct nfs_commit_info *cinfo,
513 struct inode *inode,
514 struct nfs_direct_req *dreq)
515{
516 if (dreq)
517 nfs_init_cinfo_from_dreq(cinfo, dreq);
518 else
519 nfs_init_cinfo_from_inode(cinfo, inode);
520}
521EXPORT_SYMBOL_GPL(nfs_init_cinfo);
471 522
472/* 523/*
473 * Add a request to the inode's commit list. 524 * Add a request to the inode's commit list.
474 */ 525 */
475static void 526void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 527nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
528 struct nfs_commit_info *cinfo)
477{ 529{
478 struct inode *inode = req->wb_context->dentry->d_inode; 530 if (pnfs_mark_request_commit(req, lseg, cinfo))
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return; 531 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); 532 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
483} 533}
484 534
485static void 535static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
494{ 544{
495 if (test_bit(PG_CLEAN, &req->wb_flags)) { 545 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode; 546 struct inode *inode = req->wb_context->dentry->d_inode;
547 struct nfs_commit_info cinfo;
497 548
498 if (!pnfs_clear_request_commit(req)) { 549 nfs_init_cinfo_from_inode(&cinfo, inode);
499 spin_lock(&inode->i_lock); 550 if (!pnfs_clear_request_commit(req, &cinfo)) {
500 nfs_request_remove_commit_list(req); 551 spin_lock(cinfo.lock);
501 spin_unlock(&inode->i_lock); 552 nfs_request_remove_commit_list(req, &cinfo);
553 spin_unlock(cinfo.lock);
502 } 554 }
503 nfs_clear_page_commit(req->wb_page); 555 nfs_clear_page_commit(req->wb_page);
504 } 556 }
@@ -508,28 +560,25 @@ static inline
508int nfs_write_need_commit(struct nfs_write_data *data) 560int nfs_write_need_commit(struct nfs_write_data *data)
509{ 561{
510 if (data->verf.committed == NFS_DATA_SYNC) 562 if (data->verf.committed == NFS_DATA_SYNC)
511 return data->lseg == NULL; 563 return data->header->lseg == NULL;
512 else 564 return data->verf.committed != NFS_FILE_SYNC;
513 return data->verf.committed != NFS_FILE_SYNC;
514} 565}
515 566
516static inline 567#else
517int nfs_reschedule_unstable_write(struct nfs_page *req, 568static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
518 struct nfs_write_data *data) 569 struct inode *inode)
519{ 570{
520 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
521 nfs_mark_request_commit(req, data->lseg);
522 return 1;
523 }
524 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
525 nfs_mark_request_dirty(req);
526 return 1;
527 }
528 return 0;
529} 571}
530#else 572
531static void 573void nfs_init_cinfo(struct nfs_commit_info *cinfo,
532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 574 struct inode *inode,
575 struct nfs_direct_req *dreq)
576{
577}
578
579void
580nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
581 struct nfs_commit_info *cinfo)
533{ 582{
534} 583}
535 584
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
544 return 0; 593 return 0;
545} 594}
546 595
547static inline 596#endif
548int nfs_reschedule_unstable_write(struct nfs_page *req, 597
549 struct nfs_write_data *data) 598static void nfs_write_completion(struct nfs_pgio_header *hdr)
550{ 599{
551 return 0; 600 struct nfs_commit_info cinfo;
601 unsigned long bytes = 0;
602
603 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
604 goto out;
605 nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
606 while (!list_empty(&hdr->pages)) {
607 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
608
609 bytes += req->wb_bytes;
610 nfs_list_remove_request(req);
611 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
612 (hdr->good_bytes < bytes)) {
613 nfs_set_pageerror(req->wb_page);
614 nfs_context_set_write_error(req->wb_context, hdr->error);
615 goto remove_req;
616 }
617 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
618 nfs_mark_request_dirty(req);
619 goto next;
620 }
621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
622 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
623 goto next;
624 }
625remove_req:
626 nfs_inode_remove_request(req);
627next:
628 nfs_unlock_request(req);
629 nfs_end_page_writeback(req->wb_page);
630 nfs_release_request(req);
631 }
632out:
633 hdr->release(hdr);
552} 634}
553#endif
554 635
555#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 636#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
556static int 637static unsigned long
557nfs_need_commit(struct nfs_inode *nfsi) 638nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
558{ 639{
559 return nfsi->ncommit > 0; 640 return cinfo->mds->ncommit;
560} 641}
561 642
562/* i_lock held by caller */ 643/* cinfo->lock held by caller */
563static int 644int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, 645nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
565 spinlock_t *lock) 646 struct nfs_commit_info *cinfo, int max)
566{ 647{
567 struct nfs_page *req, *tmp; 648 struct nfs_page *req, *tmp;
568 int ret = 0; 649 int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
570 list_for_each_entry_safe(req, tmp, src, wb_list) { 651 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req)) 652 if (!nfs_lock_request(req))
572 continue; 653 continue;
573 if (cond_resched_lock(lock)) 654 kref_get(&req->wb_kref);
655 if (cond_resched_lock(cinfo->lock))
574 list_safe_reset_next(req, tmp, wb_list); 656 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req); 657 nfs_request_remove_commit_list(req, cinfo);
576 nfs_list_add_request(req, dst); 658 nfs_list_add_request(req, dst);
577 ret++; 659 ret++;
578 if (ret == max) 660 if ((ret == max) && !cinfo->dreq)
579 break; 661 break;
580 } 662 }
581 return ret; 663 return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
584/* 666/*
585 * nfs_scan_commit - Scan an inode for commit requests 667 * nfs_scan_commit - Scan an inode for commit requests
586 * @inode: NFS inode to scan 668 * @inode: NFS inode to scan
587 * @dst: destination list 669 * @dst: mds destination list
670 * @cinfo: mds and ds lists of reqs ready to commit
588 * 671 *
589 * Moves requests from the inode's 'commit' request list. 672 * Moves requests from the inode's 'commit' request list.
590 * The requests are *not* checked to ensure that they form a contiguous set. 673 * The requests are *not* checked to ensure that they form a contiguous set.
591 */ 674 */
592static int 675int
593nfs_scan_commit(struct inode *inode, struct list_head *dst) 676nfs_scan_commit(struct inode *inode, struct list_head *dst,
677 struct nfs_commit_info *cinfo)
594{ 678{
595 struct nfs_inode *nfsi = NFS_I(inode);
596 int ret = 0; 679 int ret = 0;
597 680
598 spin_lock(&inode->i_lock); 681 spin_lock(cinfo->lock);
599 if (nfsi->ncommit > 0) { 682 if (cinfo->mds->ncommit > 0) {
600 const int max = INT_MAX; 683 const int max = INT_MAX;
601 684
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, 685 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
603 &inode->i_lock); 686 cinfo, max);
604 ret += pnfs_scan_commit_lists(inode, max - ret, 687 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
605 &inode->i_lock);
606 } 688 }
607 spin_unlock(&inode->i_lock); 689 spin_unlock(cinfo->lock);
608 return ret; 690 return ret;
609} 691}
610 692
611#else 693#else
612static inline int nfs_need_commit(struct nfs_inode *nfsi) 694static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
613{ 695{
614 return 0; 696 return 0;
615} 697}
616 698
617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) 699int nfs_scan_commit(struct inode *inode, struct list_head *dst,
700 struct nfs_commit_info *cinfo)
618{ 701{
619 return 0; 702 return 0;
620} 703}
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
659 || end < req->wb_offset) 742 || end < req->wb_offset)
660 goto out_flushme; 743 goto out_flushme;
661 744
662 if (nfs_lock_request_dontget(req)) 745 if (nfs_lock_request(req))
663 break; 746 break;
664 747
665 /* The request is locked, so wait and then retry */ 748 /* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
729 nfs_grow_file(page, offset, count); 812 nfs_grow_file(page, offset, count);
730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 813 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
731 nfs_mark_request_dirty(req); 814 nfs_mark_request_dirty(req);
732 nfs_unlock_request(req); 815 nfs_unlock_and_release_request(req);
733 return 0; 816 return 0;
734} 817}
735 818
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
766 * the PageUptodate() flag. In this case, we will need to turn off 849 * the PageUptodate() flag. In this case, we will need to turn off
767 * write optimisations that depend on the page contents being correct. 850 * write optimisations that depend on the page contents being correct.
768 */ 851 */
769static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 852static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
770{ 853{
771 return PageUptodate(page) && 854 if (nfs_have_delegated_attributes(inode))
772 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 855 goto out;
856 if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
857 return false;
858out:
859 return PageUptodate(page) != 0;
773} 860}
774 861
775/* 862/*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
815 return status; 902 return status;
816} 903}
817 904
818static void nfs_writepage_release(struct nfs_page *req,
819 struct nfs_write_data *data)
820{
821 struct page *page = req->wb_page;
822
823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
824 nfs_inode_remove_request(req);
825 nfs_unlock_request(req);
826 nfs_end_page_writeback(page);
827}
828
829static int flush_task_priority(int how) 905static int flush_task_priority(int how)
830{ 906{
831 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 907 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
837 return RPC_PRIORITY_NORMAL; 913 return RPC_PRIORITY_NORMAL;
838} 914}
839 915
840int nfs_initiate_write(struct nfs_write_data *data, 916int nfs_initiate_write(struct rpc_clnt *clnt,
841 struct rpc_clnt *clnt, 917 struct nfs_write_data *data,
842 const struct rpc_call_ops *call_ops, 918 const struct rpc_call_ops *call_ops,
843 int how) 919 int how, int flags)
844{ 920{
845 struct inode *inode = data->inode; 921 struct inode *inode = data->header->inode;
846 int priority = flush_task_priority(how); 922 int priority = flush_task_priority(how);
847 struct rpc_task *task; 923 struct rpc_task *task;
848 struct rpc_message msg = { 924 struct rpc_message msg = {
849 .rpc_argp = &data->args, 925 .rpc_argp = &data->args,
850 .rpc_resp = &data->res, 926 .rpc_resp = &data->res,
851 .rpc_cred = data->cred, 927 .rpc_cred = data->header->cred,
852 }; 928 };
853 struct rpc_task_setup task_setup_data = { 929 struct rpc_task_setup task_setup_data = {
854 .rpc_client = clnt, 930 .rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
857 .callback_ops = call_ops, 933 .callback_ops = call_ops,
858 .callback_data = data, 934 .callback_data = data,
859 .workqueue = nfsiod_workqueue, 935 .workqueue = nfsiod_workqueue,
860 .flags = RPC_TASK_ASYNC, 936 .flags = RPC_TASK_ASYNC | flags,
861 .priority = priority, 937 .priority = priority,
862 }; 938 };
863 int ret = 0; 939 int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
892/* 968/*
893 * Set up the argument/result storage required for the RPC call. 969 * Set up the argument/result storage required for the RPC call.
894 */ 970 */
895static void nfs_write_rpcsetup(struct nfs_page *req, 971static void nfs_write_rpcsetup(struct nfs_write_data *data,
896 struct nfs_write_data *data,
897 unsigned int count, unsigned int offset, 972 unsigned int count, unsigned int offset,
898 int how) 973 int how, struct nfs_commit_info *cinfo)
899{ 974{
900 struct inode *inode = req->wb_context->dentry->d_inode; 975 struct nfs_page *req = data->header->req;
901 976
902 /* Set up the RPC argument and reply structs 977 /* Set up the RPC argument and reply structs
903 * NB: take care not to mess about with data->commit et al. */ 978 * NB: take care not to mess about with data->commit et al. */
904 979
905 data->req = req; 980 data->args.fh = NFS_FH(data->header->inode);
906 data->inode = inode = req->wb_context->dentry->d_inode;
907 data->cred = req->wb_context->cred;
908
909 data->args.fh = NFS_FH(inode);
910 data->args.offset = req_offset(req) + offset; 981 data->args.offset = req_offset(req) + offset;
911 /* pnfs_set_layoutcommit needs this */ 982 /* pnfs_set_layoutcommit needs this */
912 data->mds_offset = data->args.offset; 983 data->mds_offset = data->args.offset;
913 data->args.pgbase = req->wb_pgbase + offset; 984 data->args.pgbase = req->wb_pgbase + offset;
914 data->args.pages = data->pagevec; 985 data->args.pages = data->pages.pagevec;
915 data->args.count = count; 986 data->args.count = count;
916 data->args.context = get_nfs_open_context(req->wb_context); 987 data->args.context = get_nfs_open_context(req->wb_context);
917 data->args.lock_context = req->wb_lock_context; 988 data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
920 case 0: 991 case 0:
921 break; 992 break;
922 case FLUSH_COND_STABLE: 993 case FLUSH_COND_STABLE:
923 if (nfs_need_commit(NFS_I(inode))) 994 if (nfs_reqs_to_commit(cinfo))
924 break; 995 break;
925 default: 996 default:
926 data->args.stable = NFS_FILE_SYNC; 997 data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
936 const struct rpc_call_ops *call_ops, 1007 const struct rpc_call_ops *call_ops,
937 int how) 1008 int how)
938{ 1009{
939 struct inode *inode = data->args.context->dentry->d_inode; 1010 struct inode *inode = data->header->inode;
940 1011
941 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 1012 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
942} 1013}
943 1014
944static int nfs_do_multiple_writes(struct list_head *head, 1015static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
951 while (!list_empty(head)) { 1022 while (!list_empty(head)) {
952 int ret2; 1023 int ret2;
953 1024
954 data = list_entry(head->next, struct nfs_write_data, list); 1025 data = list_first_entry(head, struct nfs_write_data, list);
955 list_del_init(&data->list); 1026 list_del_init(&data->list);
956 1027
957 ret2 = nfs_do_write(data, call_ops, how); 1028 ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
967 */ 1038 */
968static void nfs_redirty_request(struct nfs_page *req) 1039static void nfs_redirty_request(struct nfs_page *req)
969{ 1040{
970 struct page *page = req->wb_page;
971
972 nfs_mark_request_dirty(req); 1041 nfs_mark_request_dirty(req);
973 nfs_unlock_request(req); 1042 nfs_unlock_request(req);
974 nfs_end_page_writeback(page); 1043 nfs_end_page_writeback(req->wb_page);
1044 nfs_release_request(req);
1045}
1046
1047static void nfs_async_write_error(struct list_head *head)
1048{
1049 struct nfs_page *req;
1050
1051 while (!list_empty(head)) {
1052 req = nfs_list_entry(head->next);
1053 nfs_list_remove_request(req);
1054 nfs_redirty_request(req);
1055 }
1056}
1057
1058static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1059 .error_cleanup = nfs_async_write_error,
1060 .completion = nfs_write_completion,
1061};
1062
1063static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1064 struct nfs_pgio_header *hdr)
1065{
1066 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1067 while (!list_empty(&hdr->rpc_list)) {
1068 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1069 struct nfs_write_data, list);
1070 list_del(&data->list);
1071 nfs_writedata_release(data);
1072 }
1073 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
975} 1074}
976 1075
977/* 1076/*
978 * Generate multiple small requests to write out a single 1077 * Generate multiple small requests to write out a single
979 * contiguous dirty area on one page. 1078 * contiguous dirty area on one page.
980 */ 1079 */
981static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 1080static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1081 struct nfs_pgio_header *hdr)
982{ 1082{
983 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 1083 struct nfs_page *req = hdr->req;
984 struct page *page = req->wb_page; 1084 struct page *page = req->wb_page;
985 struct nfs_write_data *data; 1085 struct nfs_write_data *data;
986 size_t wsize = desc->pg_bsize, nbytes; 1086 size_t wsize = desc->pg_bsize, nbytes;
987 unsigned int offset; 1087 unsigned int offset;
988 int requests = 0; 1088 int requests = 0;
989 int ret = 0; 1089 struct nfs_commit_info cinfo;
990 1090
991 nfs_list_remove_request(req); 1091 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
992 1092
993 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1093 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
994 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || 1094 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
995 desc->pg_count > wsize)) 1095 desc->pg_count > wsize))
996 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1096 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
997 1097
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
1001 do { 1101 do {
1002 size_t len = min(nbytes, wsize); 1102 size_t len = min(nbytes, wsize);
1003 1103
1004 data = nfs_writedata_alloc(1); 1104 data = nfs_writedata_alloc(hdr, 1);
1005 if (!data) 1105 if (!data) {
1006 goto out_bad; 1106 nfs_flush_error(desc, hdr);
1007 data->pagevec[0] = page; 1107 return -ENOMEM;
1008 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); 1108 }
1009 list_add(&data->list, res); 1109 data->pages.pagevec[0] = page;
1110 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1111 list_add(&data->list, &hdr->rpc_list);
1010 requests++; 1112 requests++;
1011 nbytes -= len; 1113 nbytes -= len;
1012 offset += len; 1114 offset += len;
1013 } while (nbytes != 0); 1115 } while (nbytes != 0);
1014 atomic_set(&req->wb_complete, requests); 1116 nfs_list_remove_request(req);
1015 desc->pg_rpc_callops = &nfs_write_partial_ops; 1117 nfs_list_add_request(req, &hdr->pages);
1016 return ret; 1118 desc->pg_rpc_callops = &nfs_write_common_ops;
1017 1119 return 0;
1018out_bad:
1019 while (!list_empty(res)) {
1020 data = list_entry(res->next, struct nfs_write_data, list);
1021 list_del(&data->list);
1022 nfs_writedata_release(data);
1023 }
1024 nfs_redirty_request(req);
1025 return -ENOMEM;
1026} 1120}
1027 1121
1028/* 1122/*
@@ -1033,62 +1127,71 @@ out_bad:
1033 * This is the case if nfs_updatepage detects a conflicting request 1127 * This is the case if nfs_updatepage detects a conflicting request
1034 * that has been written but not committed. 1128 * that has been written but not committed.
1035 */ 1129 */
1036static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 1130static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1131 struct nfs_pgio_header *hdr)
1037{ 1132{
1038 struct nfs_page *req; 1133 struct nfs_page *req;
1039 struct page **pages; 1134 struct page **pages;
1040 struct nfs_write_data *data; 1135 struct nfs_write_data *data;
1041 struct list_head *head = &desc->pg_list; 1136 struct list_head *head = &desc->pg_list;
1042 int ret = 0; 1137 struct nfs_commit_info cinfo;
1043 1138
1044 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 1139 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1045 desc->pg_count)); 1140 desc->pg_count));
1046 if (!data) { 1141 if (!data) {
1047 while (!list_empty(head)) { 1142 nfs_flush_error(desc, hdr);
1048 req = nfs_list_entry(head->next); 1143 return -ENOMEM;
1049 nfs_list_remove_request(req);
1050 nfs_redirty_request(req);
1051 }
1052 ret = -ENOMEM;
1053 goto out;
1054 } 1144 }
1055 pages = data->pagevec; 1145
1146 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1147 pages = data->pages.pagevec;
1056 while (!list_empty(head)) { 1148 while (!list_empty(head)) {
1057 req = nfs_list_entry(head->next); 1149 req = nfs_list_entry(head->next);
1058 nfs_list_remove_request(req); 1150 nfs_list_remove_request(req);
1059 nfs_list_add_request(req, &data->pages); 1151 nfs_list_add_request(req, &hdr->pages);
1060 *pages++ = req->wb_page; 1152 *pages++ = req->wb_page;
1061 } 1153 }
1062 req = nfs_list_entry(data->pages.next);
1063 1154
1064 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1155 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1065 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1156 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1066 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1157 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1067 1158
1068 /* Set up the argument struct */ 1159 /* Set up the argument struct */
1069 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1160 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1070 list_add(&data->list, res); 1161 list_add(&data->list, &hdr->rpc_list);
1071 desc->pg_rpc_callops = &nfs_write_full_ops; 1162 desc->pg_rpc_callops = &nfs_write_common_ops;
1072out: 1163 return 0;
1073 return ret;
1074} 1164}
1075 1165
1076int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1166int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1167 struct nfs_pgio_header *hdr)
1077{ 1168{
1078 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1169 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1079 return nfs_flush_multi(desc, head); 1170 return nfs_flush_multi(desc, hdr);
1080 return nfs_flush_one(desc, head); 1171 return nfs_flush_one(desc, hdr);
1081} 1172}
1082 1173
1083static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1174static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1084{ 1175{
1085 LIST_HEAD(head); 1176 struct nfs_write_header *whdr;
1177 struct nfs_pgio_header *hdr;
1086 int ret; 1178 int ret;
1087 1179
1088 ret = nfs_generic_flush(desc, &head); 1180 whdr = nfs_writehdr_alloc();
1181 if (!whdr) {
1182 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1183 return -ENOMEM;
1184 }
1185 hdr = &whdr->header;
1186 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1187 atomic_inc(&hdr->refcnt);
1188 ret = nfs_generic_flush(desc, hdr);
1089 if (ret == 0) 1189 if (ret == 0)
1090 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1190 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1091 desc->pg_ioflags); 1191 desc->pg_rpc_callops,
1192 desc->pg_ioflags);
1193 if (atomic_dec_and_test(&hdr->refcnt))
1194 hdr->completion_ops->completion(hdr);
1092 return ret; 1195 return ret;
1093} 1196}
1094 1197
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1098}; 1201};
1099 1202
1100void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1203void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1101 struct inode *inode, int ioflags) 1204 struct inode *inode, int ioflags,
1205 const struct nfs_pgio_completion_ops *compl_ops)
1102{ 1206{
1103 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1207 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
1104 NFS_SERVER(inode)->wsize, ioflags); 1208 NFS_SERVER(inode)->wsize, ioflags);
1105} 1209}
1106 1210
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1111} 1215}
1112EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1216EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1113 1217
1114static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1218void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1115 struct inode *inode, int ioflags) 1219 struct inode *inode, int ioflags,
1220 const struct nfs_pgio_completion_ops *compl_ops)
1116{ 1221{
1117 if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1222 if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
1118 nfs_pageio_init_write_mds(pgio, inode, ioflags); 1223 nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
1119} 1224}
1120 1225
1121/* 1226void nfs_write_prepare(struct rpc_task *task, void *calldata)
1122 * Handle a write reply that flushed part of a page.
1123 */
1124static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1125{ 1227{
1126 struct nfs_write_data *data = calldata; 1228 struct nfs_write_data *data = calldata;
1127 1229 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1128 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1129 task->tk_pid,
1130 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1131 (long long)
1132 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1133 data->req->wb_bytes, (long long)req_offset(data->req));
1134
1135 nfs_writeback_done(task, data);
1136} 1230}
1137 1231
1138static void nfs_writeback_release_partial(void *calldata) 1232void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1139{ 1233{
1140 struct nfs_write_data *data = calldata; 1234 struct nfs_commit_data *data = calldata;
1141 struct nfs_page *req = data->req;
1142 struct page *page = req->wb_page;
1143 int status = data->task.tk_status;
1144 1235
1145 if (status < 0) { 1236 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1146 nfs_set_pageerror(page);
1147 nfs_context_set_write_error(req->wb_context, status);
1148 dprintk(", error = %d\n", status);
1149 goto out;
1150 }
1151
1152 if (nfs_write_need_commit(data)) {
1153 struct inode *inode = page->mapping->host;
1154
1155 spin_lock(&inode->i_lock);
1156 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1157 /* Do nothing we need to resend the writes */
1158 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1159 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1160 dprintk(" defer commit\n");
1161 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1162 set_bit(PG_NEED_RESCHED, &req->wb_flags);
1163 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1164 dprintk(" server reboot detected\n");
1165 }
1166 spin_unlock(&inode->i_lock);
1167 } else
1168 dprintk(" OK\n");
1169
1170out:
1171 if (atomic_dec_and_test(&req->wb_complete))
1172 nfs_writepage_release(req, data);
1173 nfs_writedata_release(calldata);
1174} 1237}
1175 1238
1176void nfs_write_prepare(struct rpc_task *task, void *calldata)
1177{
1178 struct nfs_write_data *data = calldata;
1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1180}
1181
1182static const struct rpc_call_ops nfs_write_partial_ops = {
1183 .rpc_call_prepare = nfs_write_prepare,
1184 .rpc_call_done = nfs_writeback_done_partial,
1185 .rpc_release = nfs_writeback_release_partial,
1186};
1187
1188/* 1239/*
1189 * Handle a write reply that flushes a whole page. 1240 * Handle a write reply that flushes a whole page.
1190 * 1241 *
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
1192 * writebacks since the page->count is kept > 1 for as long 1243 * writebacks since the page->count is kept > 1 for as long
1193 * as the page has a write request pending. 1244 * as the page has a write request pending.
1194 */ 1245 */
1195static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1246static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1196{ 1247{
1197 struct nfs_write_data *data = calldata; 1248 struct nfs_write_data *data = calldata;
1198 1249
1199 nfs_writeback_done(task, data); 1250 nfs_writeback_done(task, data);
1200} 1251}
1201 1252
1202static void nfs_writeback_release_full(void *calldata) 1253static void nfs_writeback_release_common(void *calldata)
1203{ 1254{
1204 struct nfs_write_data *data = calldata; 1255 struct nfs_write_data *data = calldata;
1256 struct nfs_pgio_header *hdr = data->header;
1205 int status = data->task.tk_status; 1257 int status = data->task.tk_status;
1258 struct nfs_page *req = hdr->req;
1206 1259
1207 /* Update attributes as result of writeback. */ 1260 if ((status >= 0) && nfs_write_need_commit(data)) {
1208 while (!list_empty(&data->pages)) { 1261 spin_lock(&hdr->lock);
1209 struct nfs_page *req = nfs_list_entry(data->pages.next); 1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1210 struct page *page = req->wb_page; 1263 ; /* Do nothing */
1211 1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1212 nfs_list_remove_request(req);
1213
1214 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1215 data->task.tk_pid,
1216 req->wb_context->dentry->d_inode->i_sb->s_id,
1217 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1218 req->wb_bytes,
1219 (long long)req_offset(req));
1220
1221 if (status < 0) {
1222 nfs_set_pageerror(page);
1223 nfs_context_set_write_error(req->wb_context, status);
1224 dprintk(", error = %d\n", status);
1225 goto remove_request;
1226 }
1227
1228 if (nfs_write_need_commit(data)) {
1229 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1230 nfs_mark_request_commit(req, data->lseg); 1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
1231 dprintk(" marked for commit\n"); 1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1232 goto next; 1268 spin_unlock(&hdr->lock);
1233 }
1234 dprintk(" OK\n");
1235remove_request:
1236 nfs_inode_remove_request(req);
1237 next:
1238 nfs_unlock_request(req);
1239 nfs_end_page_writeback(page);
1240 } 1269 }
1241 nfs_writedata_release(calldata); 1270 nfs_writedata_release(data);
1242} 1271}
1243 1272
1244static const struct rpc_call_ops nfs_write_full_ops = { 1273static const struct rpc_call_ops nfs_write_common_ops = {
1245 .rpc_call_prepare = nfs_write_prepare, 1274 .rpc_call_prepare = nfs_write_prepare,
1246 .rpc_call_done = nfs_writeback_done_full, 1275 .rpc_call_done = nfs_writeback_done_common,
1247 .rpc_release = nfs_writeback_release_full, 1276 .rpc_release = nfs_writeback_release_common,
1248}; 1277};
1249 1278
1250 1279
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1255{ 1284{
1256 struct nfs_writeargs *argp = &data->args; 1285 struct nfs_writeargs *argp = &data->args;
1257 struct nfs_writeres *resp = &data->res; 1286 struct nfs_writeres *resp = &data->res;
1287 struct inode *inode = data->header->inode;
1258 int status; 1288 int status;
1259 1289
1260 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1290 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1267 * another writer had changed the file, but some applications 1297 * another writer had changed the file, but some applications
1268 * depend on tighter cache coherency when writing. 1298 * depend on tighter cache coherency when writing.
1269 */ 1299 */
1270 status = NFS_PROTO(data->inode)->write_done(task, data); 1300 status = NFS_PROTO(inode)->write_done(task, data);
1271 if (status != 0) 1301 if (status != 0)
1272 return; 1302 return;
1273 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1303 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1274 1304
1275#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1305#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1276 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1306 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1288 if (time_before(complain, jiffies)) { 1318 if (time_before(complain, jiffies)) {
1289 dprintk("NFS: faulty NFS server %s:" 1319 dprintk("NFS: faulty NFS server %s:"
1290 " (committed = %d) != (stable = %d)\n", 1320 " (committed = %d) != (stable = %d)\n",
1291 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1321 NFS_SERVER(inode)->nfs_client->cl_hostname,
1292 resp->verf->committed, argp->stable); 1322 resp->verf->committed, argp->stable);
1293 complain = jiffies + 300 * HZ; 1323 complain = jiffies + 300 * HZ;
1294 } 1324 }
1295 } 1325 }
1296#endif 1326#endif
1297 /* Is this a short write? */ 1327 if (task->tk_status < 0)
1298 if (task->tk_status >= 0 && resp->count < argp->count) { 1328 nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
1329 else if (resp->count < argp->count) {
1299 static unsigned long complain; 1330 static unsigned long complain;
1300 1331
1301 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1332 /* This a short write! */
1333 nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
1302 1334
1303 /* Has the server at least made some progress? */ 1335 /* Has the server at least made some progress? */
1304 if (resp->count != 0) { 1336 if (resp->count == 0) {
1305 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1337 if (time_before(complain, jiffies)) {
1306 if (resp->verf->committed != NFS_UNSTABLE) { 1338 printk(KERN_WARNING
1307 /* Resend from where the server left off */ 1339 "NFS: Server wrote zero bytes, expected %u.\n",
1308 data->mds_offset += resp->count; 1340 argp->count);
1309 argp->offset += resp->count; 1341 complain = jiffies + 300 * HZ;
1310 argp->pgbase += resp->count;
1311 argp->count -= resp->count;
1312 } else {
1313 /* Resend as a stable write in order to avoid
1314 * headaches in the case of a server crash.
1315 */
1316 argp->stable = NFS_FILE_SYNC;
1317 } 1342 }
1318 rpc_restart_call_prepare(task); 1343 nfs_set_pgio_error(data->header, -EIO, argp->offset);
1344 task->tk_status = -EIO;
1319 return; 1345 return;
1320 } 1346 }
1321 if (time_before(complain, jiffies)) { 1347 /* Was this an NFSv2 write or an NFSv3 stable write? */
1322 printk(KERN_WARNING 1348 if (resp->verf->committed != NFS_UNSTABLE) {
1323 "NFS: Server wrote zero bytes, expected %u.\n", 1349 /* Resend from where the server left off */
1324 argp->count); 1350 data->mds_offset += resp->count;
1325 complain = jiffies + 300 * HZ; 1351 argp->offset += resp->count;
1352 argp->pgbase += resp->count;
1353 argp->count -= resp->count;
1354 } else {
1355 /* Resend as a stable write in order to avoid
1356 * headaches in the case of a server crash.
1357 */
1358 argp->stable = NFS_FILE_SYNC;
1326 } 1359 }
1327 /* Can't do anything about it except throw an error. */ 1360 rpc_restart_call_prepare(task);
1328 task->tk_status = -EIO;
1329 } 1361 }
1330 return;
1331} 1362}
1332 1363
1333 1364
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1347 return (ret < 0) ? ret : 1; 1378 return (ret < 0) ? ret : 1;
1348} 1379}
1349 1380
1350void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1381static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1351{ 1382{
1352 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1383 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1353 smp_mb__after_clear_bit(); 1384 smp_mb__after_clear_bit();
1354 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1385 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1355} 1386}
1356EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1357 1387
1358void nfs_commitdata_release(void *data) 1388void nfs_commitdata_release(struct nfs_commit_data *data)
1359{ 1389{
1360 struct nfs_write_data *wdata = data; 1390 put_nfs_open_context(data->context);
1361 1391 nfs_commit_free(data);
1362 put_nfs_open_context(wdata->args.context);
1363 nfs_commit_free(wdata);
1364} 1392}
1365EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1393EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1366 1394
1367int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, 1395int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1368 const struct rpc_call_ops *call_ops, 1396 const struct rpc_call_ops *call_ops,
1369 int how) 1397 int how, int flags)
1370{ 1398{
1371 struct rpc_task *task; 1399 struct rpc_task *task;
1372 int priority = flush_task_priority(how); 1400 int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1382 .callback_ops = call_ops, 1410 .callback_ops = call_ops,
1383 .callback_data = data, 1411 .callback_data = data,
1384 .workqueue = nfsiod_workqueue, 1412 .workqueue = nfsiod_workqueue,
1385 .flags = RPC_TASK_ASYNC, 1413 .flags = RPC_TASK_ASYNC | flags,
1386 .priority = priority, 1414 .priority = priority,
1387 }; 1415 };
1388 /* Set up the initial task struct. */ 1416 /* Set up the initial task struct. */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1403/* 1431/*
1404 * Set up the argument/result storage required for the RPC call. 1432 * Set up the argument/result storage required for the RPC call.
1405 */ 1433 */
1406void nfs_init_commit(struct nfs_write_data *data, 1434void nfs_init_commit(struct nfs_commit_data *data,
1407 struct list_head *head, 1435 struct list_head *head,
1408 struct pnfs_layout_segment *lseg) 1436 struct pnfs_layout_segment *lseg,
1437 struct nfs_commit_info *cinfo)
1409{ 1438{
1410 struct nfs_page *first = nfs_list_entry(head->next); 1439 struct nfs_page *first = nfs_list_entry(head->next);
1411 struct inode *inode = first->wb_context->dentry->d_inode; 1440 struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
1419 data->cred = first->wb_context->cred; 1448 data->cred = first->wb_context->cred;
1420 data->lseg = lseg; /* reference transferred */ 1449 data->lseg = lseg; /* reference transferred */
1421 data->mds_ops = &nfs_commit_ops; 1450 data->mds_ops = &nfs_commit_ops;
1451 data->completion_ops = cinfo->completion_ops;
1452 data->dreq = cinfo->dreq;
1422 1453
1423 data->args.fh = NFS_FH(data->inode); 1454 data->args.fh = NFS_FH(data->inode);
1424 /* Note: we always request a commit of the entire inode */ 1455 /* Note: we always request a commit of the entire inode */
1425 data->args.offset = 0; 1456 data->args.offset = 0;
1426 data->args.count = 0; 1457 data->args.count = 0;
1427 data->args.context = get_nfs_open_context(first->wb_context); 1458 data->context = get_nfs_open_context(first->wb_context);
1428 data->res.count = 0;
1429 data->res.fattr = &data->fattr; 1459 data->res.fattr = &data->fattr;
1430 data->res.verf = &data->verf; 1460 data->res.verf = &data->verf;
1431 nfs_fattr_init(&data->fattr); 1461 nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
1433EXPORT_SYMBOL_GPL(nfs_init_commit); 1463EXPORT_SYMBOL_GPL(nfs_init_commit);
1434 1464
1435void nfs_retry_commit(struct list_head *page_list, 1465void nfs_retry_commit(struct list_head *page_list,
1436 struct pnfs_layout_segment *lseg) 1466 struct pnfs_layout_segment *lseg,
1467 struct nfs_commit_info *cinfo)
1437{ 1468{
1438 struct nfs_page *req; 1469 struct nfs_page *req;
1439 1470
1440 while (!list_empty(page_list)) { 1471 while (!list_empty(page_list)) {
1441 req = nfs_list_entry(page_list->next); 1472 req = nfs_list_entry(page_list->next);
1442 nfs_list_remove_request(req); 1473 nfs_list_remove_request(req);
1443 nfs_mark_request_commit(req, lseg); 1474 nfs_mark_request_commit(req, lseg, cinfo);
1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1475 if (!cinfo->dreq) {
1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1476 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1446 BDI_RECLAIMABLE); 1477 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1447 nfs_unlock_request(req); 1478 BDI_RECLAIMABLE);
1479 }
1480 nfs_unlock_and_release_request(req);
1448 } 1481 }
1449} 1482}
1450EXPORT_SYMBOL_GPL(nfs_retry_commit); 1483EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
1453 * Commit dirty pages 1486 * Commit dirty pages
1454 */ 1487 */
1455static int 1488static int
1456nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1489nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1490 struct nfs_commit_info *cinfo)
1457{ 1491{
1458 struct nfs_write_data *data; 1492 struct nfs_commit_data *data;
1459 1493
1460 data = nfs_commitdata_alloc(); 1494 data = nfs_commitdata_alloc();
1461 1495
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1463 goto out_bad; 1497 goto out_bad;
1464 1498
1465 /* Set up the argument struct */ 1499 /* Set up the argument struct */
1466 nfs_init_commit(data, head, NULL); 1500 nfs_init_commit(data, head, NULL, cinfo);
1467 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); 1501 atomic_inc(&cinfo->mds->rpcs_out);
1502 return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
1503 how, 0);
1468 out_bad: 1504 out_bad:
1469 nfs_retry_commit(head, NULL); 1505 nfs_retry_commit(head, NULL, cinfo);
1470 nfs_commit_clear_lock(NFS_I(inode)); 1506 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1471 return -ENOMEM; 1507 return -ENOMEM;
1472} 1508}
1473 1509
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1476 */ 1512 */
1477static void nfs_commit_done(struct rpc_task *task, void *calldata) 1513static void nfs_commit_done(struct rpc_task *task, void *calldata)
1478{ 1514{
1479 struct nfs_write_data *data = calldata; 1515 struct nfs_commit_data *data = calldata;
1480 1516
1481 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1517 dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1482 task->tk_pid, task->tk_status); 1518 task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1485 NFS_PROTO(data->inode)->commit_done(task, data); 1521 NFS_PROTO(data->inode)->commit_done(task, data);
1486} 1522}
1487 1523
1488void nfs_commit_release_pages(struct nfs_write_data *data) 1524static void nfs_commit_release_pages(struct nfs_commit_data *data)
1489{ 1525{
1490 struct nfs_page *req; 1526 struct nfs_page *req;
1491 int status = data->task.tk_status; 1527 int status = data->task.tk_status;
1528 struct nfs_commit_info cinfo;
1492 1529
1493 while (!list_empty(&data->pages)) { 1530 while (!list_empty(&data->pages)) {
1494 req = nfs_list_entry(data->pages.next); 1531 req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1519 dprintk(" mismatch\n"); 1556 dprintk(" mismatch\n");
1520 nfs_mark_request_dirty(req); 1557 nfs_mark_request_dirty(req);
1521 next: 1558 next:
1522 nfs_unlock_request(req); 1559 nfs_unlock_and_release_request(req);
1523 } 1560 }
1561 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1562 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
1563 nfs_commit_clear_lock(NFS_I(data->inode));
1524} 1564}
1525EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1526 1565
1527static void nfs_commit_release(void *calldata) 1566static void nfs_commit_release(void *calldata)
1528{ 1567{
1529 struct nfs_write_data *data = calldata; 1568 struct nfs_commit_data *data = calldata;
1530 1569
1531 nfs_commit_release_pages(data); 1570 data->completion_ops->completion(data);
1532 nfs_commit_clear_lock(NFS_I(data->inode));
1533 nfs_commitdata_release(calldata); 1571 nfs_commitdata_release(calldata);
1534} 1572}
1535 1573
1536static const struct rpc_call_ops nfs_commit_ops = { 1574static const struct rpc_call_ops nfs_commit_ops = {
1537 .rpc_call_prepare = nfs_write_prepare, 1575 .rpc_call_prepare = nfs_commit_prepare,
1538 .rpc_call_done = nfs_commit_done, 1576 .rpc_call_done = nfs_commit_done,
1539 .rpc_release = nfs_commit_release, 1577 .rpc_release = nfs_commit_release,
1540}; 1578};
1541 1579
1580static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1581 .completion = nfs_commit_release_pages,
1582 .error_cleanup = nfs_commit_clear_lock,
1583};
1584
1585int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1586 int how, struct nfs_commit_info *cinfo)
1587{
1588 int status;
1589
1590 status = pnfs_commit_list(inode, head, how, cinfo);
1591 if (status == PNFS_NOT_ATTEMPTED)
1592 status = nfs_commit_list(inode, head, how, cinfo);
1593 return status;
1594}
1595
1542int nfs_commit_inode(struct inode *inode, int how) 1596int nfs_commit_inode(struct inode *inode, int how)
1543{ 1597{
1544 LIST_HEAD(head); 1598 LIST_HEAD(head);
1599 struct nfs_commit_info cinfo;
1545 int may_wait = how & FLUSH_SYNC; 1600 int may_wait = how & FLUSH_SYNC;
1546 int res; 1601 int res;
1547 1602
1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1603 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1549 if (res <= 0) 1604 if (res <= 0)
1550 goto out_mark_dirty; 1605 goto out_mark_dirty;
1551 res = nfs_scan_commit(inode, &head); 1606 nfs_init_cinfo_from_inode(&cinfo, inode);
1607 res = nfs_scan_commit(inode, &head, &cinfo);
1552 if (res) { 1608 if (res) {
1553 int error; 1609 int error;
1554 1610
1555 error = pnfs_commit_list(inode, &head, how); 1611 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1556 if (error == PNFS_NOT_ATTEMPTED)
1557 error = nfs_commit_list(inode, &head, how);
1558 if (error < 0) 1612 if (error < 0)
1559 return error; 1613 return error;
1560 if (!may_wait) 1614 if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1585 int ret = 0; 1639 int ret = 0;
1586 1640
1587 /* no commits means nothing needs to be done */ 1641 /* no commits means nothing needs to be done */
1588 if (!nfsi->ncommit) 1642 if (!nfsi->commit_info.ncommit)
1589 return ret; 1643 return ret;
1590 1644
1591 if (wbc->sync_mode == WB_SYNC_NONE) { 1645 if (wbc->sync_mode == WB_SYNC_NONE) {
1592 /* Don't commit yet if this is a non-blocking flush and there 1646 /* Don't commit yet if this is a non-blocking flush and there
1593 * are a lot of outstanding writes for this mapping. 1647 * are a lot of outstanding writes for this mapping.
1594 */ 1648 */
1595 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1649 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1596 goto out_mark_dirty; 1650 goto out_mark_dirty;
1597 1651
1598 /* don't wait for the COMMIT response */ 1652 /* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1665 req = nfs_page_find_request(page); 1719 req = nfs_page_find_request(page);
1666 if (req == NULL) 1720 if (req == NULL)
1667 break; 1721 break;
1668 if (nfs_lock_request_dontget(req)) { 1722 if (nfs_lock_request(req)) {
1669 nfs_clear_request_commit(req); 1723 nfs_clear_request_commit(req);
1670 nfs_inode_remove_request(req); 1724 nfs_inode_remove_request(req);
1671 /* 1725 /*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1673 * page as being dirty 1727 * page as being dirty
1674 */ 1728 */
1675 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1729 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1676 nfs_unlock_request(req); 1730 nfs_unlock_and_release_request(req);
1677 break; 1731 break;
1678 } 1732 }
1679 ret = nfs_wait_on_request(req); 1733 ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1742int __init nfs_init_writepagecache(void) 1796int __init nfs_init_writepagecache(void)
1743{ 1797{
1744 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1798 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1745 sizeof(struct nfs_write_data), 1799 sizeof(struct nfs_write_header),
1746 0, SLAB_HWCACHE_ALIGN, 1800 0, SLAB_HWCACHE_ALIGN,
1747 NULL); 1801 NULL);
1748 if (nfs_wdata_cachep == NULL) 1802 if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
1753 if (nfs_wdata_mempool == NULL) 1807 if (nfs_wdata_mempool == NULL)
1754 return -ENOMEM; 1808 return -ENOMEM;
1755 1809
1810 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1811 sizeof(struct nfs_commit_data),
1812 0, SLAB_HWCACHE_ALIGN,
1813 NULL);
1814 if (nfs_cdata_cachep == NULL)
1815 return -ENOMEM;
1816
1756 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1817 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1757 nfs_wdata_cachep); 1818 nfs_wdata_cachep);
1758 if (nfs_commit_mempool == NULL) 1819 if (nfs_commit_mempool == NULL)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2d3108a611c..d7d711876b6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = {
411 411
412static int proc_oom_score(struct task_struct *task, char *buffer) 412static int proc_oom_score(struct task_struct *task, char *buffer)
413{ 413{
414 unsigned long totalpages = totalram_pages + total_swap_pages;
414 unsigned long points = 0; 415 unsigned long points = 0;
415 416
416 read_lock(&tasklist_lock); 417 read_lock(&tasklist_lock);
417 if (pid_alive(task)) 418 if (pid_alive(task))
418 points = oom_badness(task, NULL, NULL, 419 points = oom_badness(task, NULL, NULL, totalpages) *
419 totalram_pages + total_swap_pages); 420 1000 / totalpages;
420 read_unlock(&tasklist_lock); 421 read_unlock(&tasklist_lock);
421 return sprintf(buffer, "%lu\n", points); 422 return sprintf(buffer, "%lu\n", points);
422} 423}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1030a716d155..7faaf2acc570 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -784,7 +784,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
784 784
785 /* find the first VMA at or above 'addr' */ 785 /* find the first VMA at or above 'addr' */
786 vma = find_vma(walk->mm, addr); 786 vma = find_vma(walk->mm, addr);
787 if (pmd_trans_huge_lock(pmd, vma) == 1) { 787 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
788 for (; addr != end; addr += PAGE_SIZE) { 788 for (; addr != end; addr += PAGE_SIZE) {
789 unsigned long offset; 789 unsigned long offset;
790 790
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2768f188f55..6f2b45a9b6bc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd)
445#endif /* __HAVE_ARCH_PMD_WRITE */ 445#endif /* __HAVE_ARCH_PMD_WRITE */
446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
447 447
448#ifndef pmd_read_atomic
449static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
450{
451 /*
452 * Depend on compiler for an atomic pmd read. NOTE: this is
453 * only going to work, if the pmdval_t isn't larger than
454 * an unsigned long.
455 */
456 return *pmdp;
457}
458#endif
459
448/* 460/*
449 * This function is meant to be used by sites walking pagetables with 461 * This function is meant to be used by sites walking pagetables with
450 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 462 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
@@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd)
458 * undefined so behaving like if the pmd was none is safe (because it 470 * undefined so behaving like if the pmd was none is safe (because it
459 * can return none anyway). The compiler level barrier() is critically 471 * can return none anyway). The compiler level barrier() is critically
460 * important to compute the two checks atomically on the same pmdval. 472 * important to compute the two checks atomically on the same pmdval.
473 *
474 * For 32bit kernels with a 64bit large pmd_t this automatically takes
475 * care of reading the pmd atomically to avoid SMP race conditions
476 * against pmd_populate() when the mmap_sem is hold for reading by the
477 * caller (a special atomic read not done by "gcc" as in the generic
478 * version above, is also needed when THP is disabled because the page
479 * fault can populate the pmd from under us).
461 */ 480 */
462static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 481static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
463{ 482{
464 /* depend on compiler for an atomic pmd read */ 483 pmd_t pmdval = pmd_read_atomic(pmd);
465 pmd_t pmdval = *pmd;
466 /* 484 /*
467 * The barrier will stabilize the pmdval in a register or on 485 * The barrier will stabilize the pmdval in a register or on
468 * the stack so that it will stop changing under the code. 486 * the stack so that it will stop changing under the code.
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4cd59b95858f..7185b8f15ced 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -225,6 +225,7 @@ header-y += kd.h
225header-y += kdev_t.h 225header-y += kdev_t.h
226header-y += kernel.h 226header-y += kernel.h
227header-y += kernelcapi.h 227header-y += kernelcapi.h
228header-y += kernel-page-flags.h
228header-y += keyboard.h 229header-y += keyboard.h
229header-y += keyctl.h 230header-y += keyctl.h
230header-y += l2tp.h 231header-y += l2tp.h
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1a0cd270bb7a..324fe08ea3b1 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -135,9 +135,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
135extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, 135extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
136 int flags); 136 int flags);
137 137
138extern void *alloc_bootmem_section(unsigned long size,
139 unsigned long section_nr);
140
141#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP 138#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
142extern void *alloc_remap(int nid, unsigned long size); 139extern void *alloc_remap(int nid, unsigned long size);
143#else 140#else
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 72961c39576a..aaac4bba6f5c 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -30,6 +30,13 @@ struct pt_regs;
30#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) 30#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
31#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) 31#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
32 32
33/*
34 * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
35 * expression but avoids the generation of any code, even if that expression
36 * has side-effects.
37 */
38#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
39
33/** 40/**
34 * BUILD_BUG_ON - break compile if a condition is true. 41 * BUILD_BUG_ON - break compile if a condition is true.
35 * @condition: the condition which the compiler should know is false. 42 * @condition: the condition which the compiler should know is false.
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7f2d60..e988037abd2a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,8 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4#include <linux/node.h>
5
4/* Return values for compact_zone() and try_to_compact_pages() */ 6/* Return values for compact_zone() and try_to_compact_pages() */
5/* compaction didn't start as it was not possible or direct reclaim was more suitable */ 7/* compaction didn't start as it was not possible or direct reclaim was more suitable */
6#define COMPACT_SKIPPED 0 8#define COMPACT_SKIPPED 0
@@ -11,6 +13,23 @@
11/* The full zone was compacted */ 13/* The full zone was compacted */
12#define COMPACT_COMPLETE 3 14#define COMPACT_COMPLETE 3
13 15
16/*
17 * compaction supports three modes
18 *
19 * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
20 * MIGRATE_MOVABLE pageblocks as migration sources and targets.
21 * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
22 * MIGRATE_MOVABLE pageblocks as migration sources.
23 * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
24 * targets and convers them to MIGRATE_MOVABLE if possible
25 * COMPACT_SYNC uses synchronous migration and scans all pageblocks
26 */
27enum compact_mode {
28 COMPACT_ASYNC_MOVABLE,
29 COMPACT_ASYNC_UNMOVABLE,
30 COMPACT_SYNC,
31};
32
14#ifdef CONFIG_COMPACTION 33#ifdef CONFIG_COMPACTION
15extern int sysctl_compact_memory; 34extern int sysctl_compact_memory;
16extern int sysctl_compaction_handler(struct ctl_table *table, int write, 35extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc1a9630948..038076b27ea4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1681,7 +1681,6 @@ struct inode_operations {
1681 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1681 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1682 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1682 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1683 int (*removexattr) (struct dentry *, const char *); 1683 int (*removexattr) (struct dentry *, const char *);
1684 void (*truncate_range)(struct inode *, loff_t, loff_t);
1685 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1684 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1686 u64 len); 1685 u64 len);
1687} ____cacheline_aligned; 1686} ____cacheline_aligned;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c8af7a2efb52..4c59b1131187 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -59,6 +59,8 @@ extern pmd_t *page_check_address_pmd(struct page *page,
59#define HPAGE_PMD_MASK HPAGE_MASK 59#define HPAGE_PMD_MASK HPAGE_MASK
60#define HPAGE_PMD_SIZE HPAGE_SIZE 60#define HPAGE_PMD_SIZE HPAGE_SIZE
61 61
62extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
63
62#define transparent_hugepage_enabled(__vma) \ 64#define transparent_hugepage_enabled(__vma) \
63 ((transparent_hugepage_flags & \ 65 ((transparent_hugepage_flags & \
64 (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ 66 (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 26a65711676f..a1bdf6966357 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -32,6 +32,8 @@
32#define KPF_KSM 21 32#define KPF_KSM 21
33#define KPF_THP 22 33#define KPF_THP 22
34 34
35#ifdef __KERNEL__
36
35/* kernel hacking assistances 37/* kernel hacking assistances
36 * WARNING: subject to change, never rely on them! 38 * WARNING: subject to change, never rely on them!
37 */ 39 */
@@ -44,4 +46,6 @@
44#define KPF_ARCH 38 46#define KPF_ARCH 38
45#define KPF_UNCACHED 39 47#define KPF_UNCACHED 39
46 48
49#endif /* __KERNEL__ */
50
47#endif /* LINUX_KERNEL_PAGE_FLAGS_H */ 51#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f94efd2f6c27..cfe9050ad8da 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -79,6 +79,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page);
79 79
80extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, 80extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
81 int order); 81 int order);
82bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
83 struct mem_cgroup *memcg);
82int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); 84int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
83 85
84extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); 86extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
@@ -92,10 +94,13 @@ static inline
92int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) 94int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
93{ 95{
94 struct mem_cgroup *memcg; 96 struct mem_cgroup *memcg;
97 int match;
98
95 rcu_read_lock(); 99 rcu_read_lock();
96 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); 100 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
101 match = __mem_cgroup_same_or_subtree(cgroup, memcg);
97 rcu_read_unlock(); 102 rcu_read_unlock();
98 return cgroup == memcg; 103 return match;
99} 104}
100 105
101extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); 106extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
@@ -121,8 +126,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
121int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 126int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
122unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, 127unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
123 int nid, int zid, unsigned int lrumask); 128 int nid, int zid, unsigned int lrumask);
124struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
125 struct zone *zone);
126struct zone_reclaim_stat* 129struct zone_reclaim_stat*
127mem_cgroup_get_reclaim_stat_from_page(struct page *page); 130mem_cgroup_get_reclaim_stat_from_page(struct page *page);
128extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 131extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
@@ -351,13 +354,6 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
351 return 0; 354 return 0;
352} 355}
353 356
354
355static inline struct zone_reclaim_stat*
356mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
357{
358 return NULL;
359}
360
361static inline struct zone_reclaim_stat* 357static inline struct zone_reclaim_stat*
362mem_cgroup_get_reclaim_stat_from_page(struct page *page) 358mem_cgroup_get_reclaim_stat_from_page(struct page *page)
363{ 359{
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7c727a90d70d..4aa42732e47f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -225,8 +225,8 @@ static inline void check_highest_zone(enum zone_type k)
225 policy_zone = k; 225 policy_zone = k;
226} 226}
227 227
228int do_migrate_pages(struct mm_struct *mm, 228int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
229 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); 229 const nodemask_t *to, int flags);
230 230
231 231
232#ifdef CONFIG_TMPFS 232#ifdef CONFIG_TMPFS
@@ -354,9 +354,8 @@ static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
354 return false; 354 return false;
355} 355}
356 356
357static inline int do_migrate_pages(struct mm_struct *mm, 357static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
358 const nodemask_t *from_nodes, 358 const nodemask_t *to, int flags)
359 const nodemask_t *to_nodes, int flags)
360{ 359{
361 return 0; 360 return 0;
362} 361}
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index fccc3002f271..91dd3ef63e99 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -7,6 +7,7 @@
7#ifndef MFD_AB8500_H 7#ifndef MFD_AB8500_H
8#define MFD_AB8500_H 8#define MFD_AB8500_H
9 9
10#include <linux/atomic.h>
10#include <linux/mutex.h> 11#include <linux/mutex.h>
11 12
12struct device; 13struct device;
@@ -194,6 +195,14 @@ enum ab8500_version {
194#define AB9540_INT_GPIO52F 123 195#define AB9540_INT_GPIO52F 123
195#define AB9540_INT_GPIO53F 124 196#define AB9540_INT_GPIO53F 124
196#define AB9540_INT_GPIO54F 125 /* not 8505 */ 197#define AB9540_INT_GPIO54F 125 /* not 8505 */
198/* ab8500_irq_regoffset[16] -> IT[Source|Latch|Mask]25 */
199#define AB8505_INT_KEYSTUCK 128
200#define AB8505_INT_IKR 129
201#define AB8505_INT_IKP 130
202#define AB8505_INT_KP 131
203#define AB8505_INT_KEYDEGLITCH 132
204#define AB8505_INT_MODPWRSTATUSF 134
205#define AB8505_INT_MODPWRSTATUSR 135
197 206
198/* 207/*
199 * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the 208 * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the
@@ -203,8 +212,8 @@ enum ab8500_version {
203 * which is larger. 212 * which is larger.
204 */ 213 */
205#define AB8500_NR_IRQS 112 214#define AB8500_NR_IRQS 112
206#define AB8505_NR_IRQS 128 215#define AB8505_NR_IRQS 136
207#define AB9540_NR_IRQS 128 216#define AB9540_NR_IRQS 136
208/* This is set to the roof of any AB8500 chip variant IRQ counts */ 217/* This is set to the roof of any AB8500 chip variant IRQ counts */
209#define AB8500_MAX_NR_IRQS AB9540_NR_IRQS 218#define AB8500_MAX_NR_IRQS AB9540_NR_IRQS
210 219
@@ -216,6 +225,7 @@ enum ab8500_version {
216 * @dev: parent device 225 * @dev: parent device
217 * @lock: read/write operations lock 226 * @lock: read/write operations lock
218 * @irq_lock: genirq bus lock 227 * @irq_lock: genirq bus lock
228 * @transfer_ongoing: 0 if no transfer ongoing
219 * @irq: irq line 229 * @irq: irq line
220 * @version: chip version id (e.g. ab8500 or ab9540) 230 * @version: chip version id (e.g. ab8500 or ab9540)
221 * @chip_id: chip revision id 231 * @chip_id: chip revision id
@@ -234,7 +244,7 @@ struct ab8500 {
234 struct device *dev; 244 struct device *dev;
235 struct mutex lock; 245 struct mutex lock;
236 struct mutex irq_lock; 246 struct mutex irq_lock;
237 247 atomic_t transfer_ongoing;
238 int irq_base; 248 int irq_base;
239 int irq; 249 int irq;
240 enum ab8500_version version; 250 enum ab8500_version version;
@@ -280,6 +290,8 @@ extern int __devinit ab8500_init(struct ab8500 *ab8500,
280 enum ab8500_version version); 290 enum ab8500_version version);
281extern int __devexit ab8500_exit(struct ab8500 *ab8500); 291extern int __devexit ab8500_exit(struct ab8500 *ab8500);
282 292
293extern int ab8500_suspend(struct ab8500 *ab8500);
294
283static inline int is_ab8500(struct ab8500 *ab) 295static inline int is_ab8500(struct ab8500 *ab)
284{ 296{
285 return ab->version == AB8500_VERSION_AB8500; 297 return ab->version == AB8500_VERSION_AB8500;
diff --git a/include/linux/mfd/anatop.h b/include/linux/mfd/anatop.h
index 22c1007d3ec5..7f92acf03d9e 100644
--- a/include/linux/mfd/anatop.h
+++ b/include/linux/mfd/anatop.h
@@ -34,7 +34,7 @@ struct anatop {
34 spinlock_t reglock; 34 spinlock_t reglock;
35}; 35};
36 36
37extern u32 anatop_get_bits(struct anatop *, u32, int, int); 37extern u32 anatop_read_reg(struct anatop *, u32);
38extern void anatop_set_bits(struct anatop *, u32, int, int, u32); 38extern void anatop_write_reg(struct anatop *, u32, u32, u32);
39 39
40#endif /* __LINUX_MFD_ANATOP_H */ 40#endif /* __LINUX_MFD_ANATOP_H */
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index ef6faa5cee46..e1148d037e7b 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -31,6 +31,8 @@ struct asic3_platform_data {
31 31
32 unsigned int gpio_base; 32 unsigned int gpio_base;
33 33
34 unsigned int clock_rate;
35
34 struct asic3_led *leds; 36 struct asic3_led *leds;
35}; 37};
36 38
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 8313cd9658e3..0507c4c21a7d 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -33,6 +33,18 @@
33 33
34#include <linux/mfd/da9052/reg.h> 34#include <linux/mfd/da9052/reg.h>
35 35
36/* Common - HWMON Channel Definations */
37#define DA9052_ADC_VDDOUT 0
38#define DA9052_ADC_ICH 1
39#define DA9052_ADC_TBAT 2
40#define DA9052_ADC_VBAT 3
41#define DA9052_ADC_IN4 4
42#define DA9052_ADC_IN5 5
43#define DA9052_ADC_IN6 6
44#define DA9052_ADC_TSI 7
45#define DA9052_ADC_TJUNC 8
46#define DA9052_ADC_VBBAT 9
47
36#define DA9052_IRQ_DCIN 0 48#define DA9052_IRQ_DCIN 0
37#define DA9052_IRQ_VBUS 1 49#define DA9052_IRQ_VBUS 1
38#define DA9052_IRQ_DCINREM 2 50#define DA9052_IRQ_DCINREM 2
@@ -79,6 +91,9 @@ struct da9052 {
79 struct device *dev; 91 struct device *dev;
80 struct regmap *regmap; 92 struct regmap *regmap;
81 93
94 struct mutex auxadc_lock;
95 struct completion done;
96
82 int irq_base; 97 int irq_base;
83 struct regmap_irq_chip_data *irq_data; 98 struct regmap_irq_chip_data *irq_data;
84 u8 chip_id; 99 u8 chip_id;
@@ -86,6 +101,10 @@ struct da9052 {
86 int chip_irq; 101 int chip_irq;
87}; 102};
88 103
104/* ADC API */
105int da9052_adc_manual_read(struct da9052 *da9052, unsigned char channel);
106int da9052_adc_read_temp(struct da9052 *da9052);
107
89/* Device I/O API */ 108/* Device I/O API */
90static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg) 109static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg)
91{ 110{
diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
new file mode 100644
index 000000000000..594bc591f256
--- /dev/null
+++ b/include/linux/mfd/lm3533.h
@@ -0,0 +1,104 @@
1/*
2 * lm3533.h -- LM3533 interface
3 *
4 * Copyright (C) 2011-2012 Texas Instruments
5 *
6 * Author: Johan Hovold <jhovold@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#ifndef __LINUX_MFD_LM3533_H
15#define __LINUX_MFD_LM3533_H
16
17#define LM3533_ATTR_RO(_name) \
18 DEVICE_ATTR(_name, S_IRUGO, show_##_name, NULL)
19#define LM3533_ATTR_RW(_name) \
20 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR , show_##_name, store_##_name)
21
22struct device;
23struct regmap;
24
25struct lm3533 {
26 struct device *dev;
27
28 struct regmap *regmap;
29
30 int gpio_hwen;
31 int irq;
32
33 unsigned have_als:1;
34 unsigned have_backlights:1;
35 unsigned have_leds:1;
36};
37
38struct lm3533_ctrlbank {
39 struct lm3533 *lm3533;
40 struct device *dev;
41 int id;
42};
43
44struct lm3533_als_platform_data {
45 unsigned pwm_mode:1; /* PWM input mode (default analog) */
46 u8 r_select; /* 1 - 127 (ignored in PWM-mode) */
47};
48
49struct lm3533_bl_platform_data {
50 char *name;
51 u16 max_current; /* 5000 - 29800 uA (800 uA step) */
52 u8 default_brightness; /* 0 - 255 */
53 u8 pwm; /* 0 - 0x3f */
54};
55
56struct lm3533_led_platform_data {
57 char *name;
58 const char *default_trigger;
59 u16 max_current; /* 5000 - 29800 uA (800 uA step) */
60 u8 pwm; /* 0 - 0x3f */
61};
62
63enum lm3533_boost_freq {
64 LM3533_BOOST_FREQ_500KHZ,
65 LM3533_BOOST_FREQ_1000KHZ,
66};
67
68enum lm3533_boost_ovp {
69 LM3533_BOOST_OVP_16V,
70 LM3533_BOOST_OVP_24V,
71 LM3533_BOOST_OVP_32V,
72 LM3533_BOOST_OVP_40V,
73};
74
75struct lm3533_platform_data {
76 int gpio_hwen;
77
78 enum lm3533_boost_ovp boost_ovp;
79 enum lm3533_boost_freq boost_freq;
80
81 struct lm3533_als_platform_data *als;
82
83 struct lm3533_bl_platform_data *backlights;
84 int num_backlights;
85
86 struct lm3533_led_platform_data *leds;
87 int num_leds;
88};
89
90extern int lm3533_ctrlbank_enable(struct lm3533_ctrlbank *cb);
91extern int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb);
92
93extern int lm3533_ctrlbank_set_brightness(struct lm3533_ctrlbank *cb, u8 val);
94extern int lm3533_ctrlbank_get_brightness(struct lm3533_ctrlbank *cb, u8 *val);
95extern int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb,
96 u16 imax);
97extern int lm3533_ctrlbank_set_pwm(struct lm3533_ctrlbank *cb, u8 val);
98extern int lm3533_ctrlbank_get_pwm(struct lm3533_ctrlbank *cb, u8 *val);
99
100extern int lm3533_read(struct lm3533 *lm3533, u8 reg, u8 *val);
101extern int lm3533_write(struct lm3533 *lm3533, u8 reg, u8 val);
102extern int lm3533_update(struct lm3533 *lm3533, u8 reg, u8 val, u8 mask);
103
104#endif /* __LINUX_MFD_LM3533_H */
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
new file mode 100644
index 000000000000..fec5256c3f5d
--- /dev/null
+++ b/include/linux/mfd/lpc_ich.h
@@ -0,0 +1,48 @@
1/*
2 * linux/drivers/mfd/lpc_ich.h
3 *
4 * Copyright (c) 2012 Extreme Engineering Solution, Inc.
5 * Author: Aaron Sierra <asierra@xes-inc.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License 2 as published
9 * by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#ifndef LPC_ICH_H
21#define LPC_ICH_H
22
23/* Watchdog resources */
24#define ICH_RES_IO_TCO 0
25#define ICH_RES_IO_SMI 1
26#define ICH_RES_MEM_OFF 2
27#define ICH_RES_MEM_GCS 0
28
29/* GPIO resources */
30#define ICH_RES_GPIO 0
31#define ICH_RES_GPE0 1
32
33/* GPIO compatibility */
34#define ICH_I3100_GPIO 0x401
35#define ICH_V5_GPIO 0x501
36#define ICH_V6_GPIO 0x601
37#define ICH_V7_GPIO 0x701
38#define ICH_V9_GPIO 0x801
39#define ICH_V10CORP_GPIO 0xa01
40#define ICH_V10CONS_GPIO 0xa11
41
42struct lpc_ich_info {
43 char name[32];
44 unsigned int iTCO_version;
45 unsigned int gpio_version;
46};
47
48#endif
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
new file mode 100644
index 000000000000..68263c5fa53c
--- /dev/null
+++ b/include/linux/mfd/max77693-private.h
@@ -0,0 +1,227 @@
1/*
2 * max77693-private.h - Voltage regulator driver for the Maxim 77693
3 *
4 * Copyright (C) 2012 Samsung Electrnoics
5 * SangYoung Son <hello.son@samsung.com>
6 *
7 * This program is not provided / owned by Maxim Integrated Products.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#ifndef __LINUX_MFD_MAX77693_PRIV_H
25#define __LINUX_MFD_MAX77693_PRIV_H
26
27#include <linux/i2c.h>
28
29#define MAX77693_NUM_IRQ_MUIC_REGS 3
30#define MAX77693_REG_INVALID (0xff)
31
32/* Slave addr = 0xCC: PMIC, Charger, Flash LED */
33enum max77693_pmic_reg {
34 MAX77693_LED_REG_IFLASH1 = 0x00,
35 MAX77693_LED_REG_IFLASH2 = 0x01,
36 MAX77693_LED_REG_ITORCH = 0x02,
37 MAX77693_LED_REG_ITORCHTIMER = 0x03,
38 MAX77693_LED_REG_FLASH_TIMER = 0x04,
39 MAX77693_LED_REG_FLASH_EN = 0x05,
40 MAX77693_LED_REG_MAX_FLASH1 = 0x06,
41 MAX77693_LED_REG_MAX_FLASH2 = 0x07,
42 MAX77693_LED_REG_MAX_FLASH3 = 0x08,
43 MAX77693_LED_REG_MAX_FLASH4 = 0x09,
44 MAX77693_LED_REG_VOUT_CNTL = 0x0A,
45 MAX77693_LED_REG_VOUT_FLASH1 = 0x0B,
46 MAX77693_LED_REG_VOUT_FLASH2 = 0x0C,
47 MAX77693_LED_REG_FLASH_INT = 0x0E,
48 MAX77693_LED_REG_FLASH_INT_MASK = 0x0F,
49 MAX77693_LED_REG_FLASH_INT_STATUS = 0x10,
50
51 MAX77693_PMIC_REG_PMIC_ID1 = 0x20,
52 MAX77693_PMIC_REG_PMIC_ID2 = 0x21,
53 MAX77693_PMIC_REG_INTSRC = 0x22,
54 MAX77693_PMIC_REG_INTSRC_MASK = 0x23,
55 MAX77693_PMIC_REG_TOPSYS_INT = 0x24,
56 MAX77693_PMIC_REG_TOPSYS_INT_MASK = 0x26,
57 MAX77693_PMIC_REG_TOPSYS_STAT = 0x28,
58 MAX77693_PMIC_REG_MAINCTRL1 = 0x2A,
59 MAX77693_PMIC_REG_LSCNFG = 0x2B,
60
61 MAX77693_CHG_REG_CHG_INT = 0xB0,
62 MAX77693_CHG_REG_CHG_INT_MASK = 0xB1,
63 MAX77693_CHG_REG_CHG_INT_OK = 0xB2,
64 MAX77693_CHG_REG_CHG_DETAILS_00 = 0xB3,
65 MAX77693_CHG_REG_CHG_DETAILS_01 = 0xB4,
66 MAX77693_CHG_REG_CHG_DETAILS_02 = 0xB5,
67 MAX77693_CHG_REG_CHG_DETAILS_03 = 0xB6,
68 MAX77693_CHG_REG_CHG_CNFG_00 = 0xB7,
69 MAX77693_CHG_REG_CHG_CNFG_01 = 0xB8,
70 MAX77693_CHG_REG_CHG_CNFG_02 = 0xB9,
71 MAX77693_CHG_REG_CHG_CNFG_03 = 0xBA,
72 MAX77693_CHG_REG_CHG_CNFG_04 = 0xBB,
73 MAX77693_CHG_REG_CHG_CNFG_05 = 0xBC,
74 MAX77693_CHG_REG_CHG_CNFG_06 = 0xBD,
75 MAX77693_CHG_REG_CHG_CNFG_07 = 0xBE,
76 MAX77693_CHG_REG_CHG_CNFG_08 = 0xBF,
77 MAX77693_CHG_REG_CHG_CNFG_09 = 0xC0,
78 MAX77693_CHG_REG_CHG_CNFG_10 = 0xC1,
79 MAX77693_CHG_REG_CHG_CNFG_11 = 0xC2,
80 MAX77693_CHG_REG_CHG_CNFG_12 = 0xC3,
81 MAX77693_CHG_REG_CHG_CNFG_13 = 0xC4,
82 MAX77693_CHG_REG_CHG_CNFG_14 = 0xC5,
83 MAX77693_CHG_REG_SAFEOUT_CTRL = 0xC6,
84
85 MAX77693_PMIC_REG_END,
86};
87
88/* Slave addr = 0x4A: MUIC */
89enum max77693_muic_reg {
90 MAX77693_MUIC_REG_ID = 0x00,
91 MAX77693_MUIC_REG_INT1 = 0x01,
92 MAX77693_MUIC_REG_INT2 = 0x02,
93 MAX77693_MUIC_REG_INT3 = 0x03,
94 MAX77693_MUIC_REG_STATUS1 = 0x04,
95 MAX77693_MUIC_REG_STATUS2 = 0x05,
96 MAX77693_MUIC_REG_STATUS3 = 0x06,
97 MAX77693_MUIC_REG_INTMASK1 = 0x07,
98 MAX77693_MUIC_REG_INTMASK2 = 0x08,
99 MAX77693_MUIC_REG_INTMASK3 = 0x09,
100 MAX77693_MUIC_REG_CDETCTRL1 = 0x0A,
101 MAX77693_MUIC_REG_CDETCTRL2 = 0x0B,
102 MAX77693_MUIC_REG_CTRL1 = 0x0C,
103 MAX77693_MUIC_REG_CTRL2 = 0x0D,
104 MAX77693_MUIC_REG_CTRL3 = 0x0E,
105
106 MAX77693_MUIC_REG_END,
107};
108
109/* Slave addr = 0x90: Haptic */
110enum max77693_haptic_reg {
111 MAX77693_HAPTIC_REG_STATUS = 0x00,
112 MAX77693_HAPTIC_REG_CONFIG1 = 0x01,
113 MAX77693_HAPTIC_REG_CONFIG2 = 0x02,
114 MAX77693_HAPTIC_REG_CONFIG_CHNL = 0x03,
115 MAX77693_HAPTIC_REG_CONFG_CYC1 = 0x04,
116 MAX77693_HAPTIC_REG_CONFG_CYC2 = 0x05,
117 MAX77693_HAPTIC_REG_CONFIG_PER1 = 0x06,
118 MAX77693_HAPTIC_REG_CONFIG_PER2 = 0x07,
119 MAX77693_HAPTIC_REG_CONFIG_PER3 = 0x08,
120 MAX77693_HAPTIC_REG_CONFIG_PER4 = 0x09,
121 MAX77693_HAPTIC_REG_CONFIG_DUTY1 = 0x0A,
122 MAX77693_HAPTIC_REG_CONFIG_DUTY2 = 0x0B,
123 MAX77693_HAPTIC_REG_CONFIG_PWM1 = 0x0C,
124 MAX77693_HAPTIC_REG_CONFIG_PWM2 = 0x0D,
125 MAX77693_HAPTIC_REG_CONFIG_PWM3 = 0x0E,
126 MAX77693_HAPTIC_REG_CONFIG_PWM4 = 0x0F,
127 MAX77693_HAPTIC_REG_REV = 0x10,
128
129 MAX77693_HAPTIC_REG_END,
130};
131
132enum max77693_irq_source {
133 LED_INT = 0,
134 TOPSYS_INT,
135 CHG_INT,
136 MUIC_INT1,
137 MUIC_INT2,
138 MUIC_INT3,
139
140 MAX77693_IRQ_GROUP_NR,
141};
142
143enum max77693_irq {
144 /* PMIC - FLASH */
145 MAX77693_LED_IRQ_FLED2_OPEN,
146 MAX77693_LED_IRQ_FLED2_SHORT,
147 MAX77693_LED_IRQ_FLED1_OPEN,
148 MAX77693_LED_IRQ_FLED1_SHORT,
149 MAX77693_LED_IRQ_MAX_FLASH,
150
151 /* PMIC - TOPSYS */
152 MAX77693_TOPSYS_IRQ_T120C_INT,
153 MAX77693_TOPSYS_IRQ_T140C_INT,
154 MAX77693_TOPSYS_IRQ_LOWSYS_INT,
155
156 /* PMIC - Charger */
157 MAX77693_CHG_IRQ_BYP_I,
158 MAX77693_CHG_IRQ_THM_I,
159 MAX77693_CHG_IRQ_BAT_I,
160 MAX77693_CHG_IRQ_CHG_I,
161 MAX77693_CHG_IRQ_CHGIN_I,
162
163 /* MUIC INT1 */
164 MAX77693_MUIC_IRQ_INT1_ADC,
165 MAX77693_MUIC_IRQ_INT1_ADC_LOW,
166 MAX77693_MUIC_IRQ_INT1_ADC_ERR,
167 MAX77693_MUIC_IRQ_INT1_ADC1K,
168
169 /* MUIC INT2 */
170 MAX77693_MUIC_IRQ_INT2_CHGTYP,
171 MAX77693_MUIC_IRQ_INT2_CHGDETREUN,
172 MAX77693_MUIC_IRQ_INT2_DCDTMR,
173 MAX77693_MUIC_IRQ_INT2_DXOVP,
174 MAX77693_MUIC_IRQ_INT2_VBVOLT,
175 MAX77693_MUIC_IRQ_INT2_VIDRM,
176
177 /* MUIC INT3 */
178 MAX77693_MUIC_IRQ_INT3_EOC,
179 MAX77693_MUIC_IRQ_INT3_CGMBC,
180 MAX77693_MUIC_IRQ_INT3_OVP,
181 MAX77693_MUIC_IRQ_INT3_MBCCHG_ERR,
182 MAX77693_MUIC_IRQ_INT3_CHG_ENABLED,
183 MAX77693_MUIC_IRQ_INT3_BAT_DET,
184
185 MAX77693_IRQ_NR,
186};
187
188struct max77693_dev {
189 struct device *dev;
190 struct i2c_client *i2c; /* 0xCC , PMIC, Charger, Flash LED */
191 struct i2c_client *muic; /* 0x4A , MUIC */
192 struct i2c_client *haptic; /* 0x90 , Haptic */
193 struct mutex iolock;
194
195 int type;
196
197 struct regmap *regmap;
198 struct regmap *regmap_muic;
199 struct regmap *regmap_haptic;
200
201 struct irq_domain *irq_domain;
202
203 int irq;
204 int irq_gpio;
205 bool wakeup;
206 struct mutex irqlock;
207 int irq_masks_cur[MAX77693_IRQ_GROUP_NR];
208 int irq_masks_cache[MAX77693_IRQ_GROUP_NR];
209};
210
211enum max77693_types {
212 TYPE_MAX77693,
213};
214
215extern int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest);
216extern int max77693_bulk_read(struct regmap *map, u8 reg, int count,
217 u8 *buf);
218extern int max77693_write_reg(struct regmap *map, u8 reg, u8 value);
219extern int max77693_bulk_write(struct regmap *map, u8 reg, int count,
220 u8 *buf);
221extern int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask);
222
223extern int max77693_irq_init(struct max77693_dev *max77686);
224extern void max77693_irq_exit(struct max77693_dev *max77686);
225extern int max77693_irq_resume(struct max77693_dev *max77686);
226
227#endif /* __LINUX_MFD_MAX77693_PRIV_H */
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
new file mode 100644
index 000000000000..1d28ae90384e
--- /dev/null
+++ b/include/linux/mfd/max77693.h
@@ -0,0 +1,36 @@
1/*
2 * max77693.h - Driver for the Maxim 77693
3 *
4 * Copyright (C) 2012 Samsung Electrnoics
5 * SangYoung Son <hello.son@samsung.com>
6 *
7 * This program is not provided / owned by Maxim Integrated Products.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * This driver is based on max8997.h
24 *
25 * MAX77693 has PMIC, Charger, Flash LED, Haptic, MUIC devices.
26 * The devices share the same I2C bus and included in
27 * this mfd driver.
28 */
29
30#ifndef __LINUX_MFD_MAX77693_H
31#define __LINUX_MFD_MAX77693_H
32
33struct max77693_platform_data {
34 int wakeup;
35};
36#endif /* __LINUX_MFD_MAX77693_H */
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
new file mode 100644
index 000000000000..d179227e866f
--- /dev/null
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -0,0 +1,324 @@
1/*
2 * Copyright (c) 2009-2011 Wind River Systems, Inc.
3 * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * The STMicroelectronics ConneXt (STA2X11) chip has several unrelated
19 * functions in one PCI endpoint functions. This driver simply
20 * registers the platform devices in this iomemregion and exports a few
21 * functions to access common registers
22 */
23
24#ifndef __STA2X11_MFD_H
25#define __STA2X11_MFD_H
26#include <linux/types.h>
27#include <linux/pci.h>
28
29/*
30 * The MFD PCI block includes the GPIO peripherals and other register blocks.
31 * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".)
32 */
33#define GSTA_GPIO_PER_BLOCK 32
34#define GSTA_NR_BLOCKS 4
35#define GSTA_NR_GPIO (GSTA_GPIO_PER_BLOCK * GSTA_NR_BLOCKS)
36
37/* Pinconfig is set by the board definition: altfunc, pull-up, pull-down */
38struct sta2x11_gpio_pdata {
39 unsigned pinconfig[GSTA_NR_GPIO];
40};
41
42/* Macros below lifted from sh_pfc.h, with minor differences */
43#define PINMUX_TYPE_NONE 0
44#define PINMUX_TYPE_FUNCTION 1
45#define PINMUX_TYPE_OUTPUT_LOW 2
46#define PINMUX_TYPE_OUTPUT_HIGH 3
47#define PINMUX_TYPE_INPUT 4
48#define PINMUX_TYPE_INPUT_PULLUP 5
49#define PINMUX_TYPE_INPUT_PULLDOWN 6
50
51/* Give names to GPIO pins, like PXA does, taken from the manual */
52#define STA2X11_GPIO0 0
53#define STA2X11_GPIO1 1
54#define STA2X11_GPIO2 2
55#define STA2X11_GPIO3 3
56#define STA2X11_GPIO4 4
57#define STA2X11_GPIO5 5
58#define STA2X11_GPIO6 6
59#define STA2X11_GPIO7 7
60#define STA2X11_GPIO8_RGBOUT_RED7 8
61#define STA2X11_GPIO9_RGBOUT_RED6 9
62#define STA2X11_GPIO10_RGBOUT_RED5 10
63#define STA2X11_GPIO11_RGBOUT_RED4 11
64#define STA2X11_GPIO12_RGBOUT_RED3 12
65#define STA2X11_GPIO13_RGBOUT_RED2 13
66#define STA2X11_GPIO14_RGBOUT_RED1 14
67#define STA2X11_GPIO15_RGBOUT_RED0 15
68#define STA2X11_GPIO16_RGBOUT_GREEN7 16
69#define STA2X11_GPIO17_RGBOUT_GREEN6 17
70#define STA2X11_GPIO18_RGBOUT_GREEN5 18
71#define STA2X11_GPIO19_RGBOUT_GREEN4 19
72#define STA2X11_GPIO20_RGBOUT_GREEN3 20
73#define STA2X11_GPIO21_RGBOUT_GREEN2 21
74#define STA2X11_GPIO22_RGBOUT_GREEN1 22
75#define STA2X11_GPIO23_RGBOUT_GREEN0 23
76#define STA2X11_GPIO24_RGBOUT_BLUE7 24
77#define STA2X11_GPIO25_RGBOUT_BLUE6 25
78#define STA2X11_GPIO26_RGBOUT_BLUE5 26
79#define STA2X11_GPIO27_RGBOUT_BLUE4 27
80#define STA2X11_GPIO28_RGBOUT_BLUE3 28
81#define STA2X11_GPIO29_RGBOUT_BLUE2 29
82#define STA2X11_GPIO30_RGBOUT_BLUE1 30
83#define STA2X11_GPIO31_RGBOUT_BLUE0 31
84#define STA2X11_GPIO32_RGBOUT_VSYNCH 32
85#define STA2X11_GPIO33_RGBOUT_HSYNCH 33
86#define STA2X11_GPIO34_RGBOUT_DEN 34
87#define STA2X11_GPIO35_ETH_CRS_DV 35
88#define STA2X11_GPIO36_ETH_TXD1 36
89#define STA2X11_GPIO37_ETH_TXD0 37
90#define STA2X11_GPIO38_ETH_TX_EN 38
91#define STA2X11_GPIO39_MDIO 39
92#define STA2X11_GPIO40_ETH_REF_CLK 40
93#define STA2X11_GPIO41_ETH_RXD1 41
94#define STA2X11_GPIO42_ETH_RXD0 42
95#define STA2X11_GPIO43_MDC 43
96#define STA2X11_GPIO44_CAN_TX 44
97#define STA2X11_GPIO45_CAN_RX 45
98#define STA2X11_GPIO46_MLB_DAT 46
99#define STA2X11_GPIO47_MLB_SIG 47
100#define STA2X11_GPIO48_SPI0_CLK 48
101#define STA2X11_GPIO49_SPI0_TXD 49
102#define STA2X11_GPIO50_SPI0_RXD 50
103#define STA2X11_GPIO51_SPI0_FRM 51
104#define STA2X11_GPIO52_SPI1_CLK 52
105#define STA2X11_GPIO53_SPI1_TXD 53
106#define STA2X11_GPIO54_SPI1_RXD 54
107#define STA2X11_GPIO55_SPI1_FRM 55
108#define STA2X11_GPIO56_SPI2_CLK 56
109#define STA2X11_GPIO57_SPI2_TXD 57
110#define STA2X11_GPIO58_SPI2_RXD 58
111#define STA2X11_GPIO59_SPI2_FRM 59
112#define STA2X11_GPIO60_I2C0_SCL 60
113#define STA2X11_GPIO61_I2C0_SDA 61
114#define STA2X11_GPIO62_I2C1_SCL 62
115#define STA2X11_GPIO63_I2C1_SDA 63
116#define STA2X11_GPIO64_I2C2_SCL 64
117#define STA2X11_GPIO65_I2C2_SDA 65
118#define STA2X11_GPIO66_I2C3_SCL 66
119#define STA2X11_GPIO67_I2C3_SDA 67
120#define STA2X11_GPIO68_MSP0_RCK 68
121#define STA2X11_GPIO69_MSP0_RXD 69
122#define STA2X11_GPIO70_MSP0_RFS 70
123#define STA2X11_GPIO71_MSP0_TCK 71
124#define STA2X11_GPIO72_MSP0_TXD 72
125#define STA2X11_GPIO73_MSP0_TFS 73
126#define STA2X11_GPIO74_MSP0_SCK 74
127#define STA2X11_GPIO75_MSP1_CK 75
128#define STA2X11_GPIO76_MSP1_RXD 76
129#define STA2X11_GPIO77_MSP1_FS 77
130#define STA2X11_GPIO78_MSP1_TXD 78
131#define STA2X11_GPIO79_MSP2_CK 79
132#define STA2X11_GPIO80_MSP2_RXD 80
133#define STA2X11_GPIO81_MSP2_FS 81
134#define STA2X11_GPIO82_MSP2_TXD 82
135#define STA2X11_GPIO83_MSP3_CK 83
136#define STA2X11_GPIO84_MSP3_RXD 84
137#define STA2X11_GPIO85_MSP3_FS 85
138#define STA2X11_GPIO86_MSP3_TXD 86
139#define STA2X11_GPIO87_MSP4_CK 87
140#define STA2X11_GPIO88_MSP4_RXD 88
141#define STA2X11_GPIO89_MSP4_FS 89
142#define STA2X11_GPIO90_MSP4_TXD 90
143#define STA2X11_GPIO91_MSP5_CK 91
144#define STA2X11_GPIO92_MSP5_RXD 92
145#define STA2X11_GPIO93_MSP5_FS 93
146#define STA2X11_GPIO94_MSP5_TXD 94
147#define STA2X11_GPIO95_SDIO3_DAT3 95
148#define STA2X11_GPIO96_SDIO3_DAT2 96
149#define STA2X11_GPIO97_SDIO3_DAT1 97
150#define STA2X11_GPIO98_SDIO3_DAT0 98
151#define STA2X11_GPIO99_SDIO3_CLK 99
152#define STA2X11_GPIO100_SDIO3_CMD 100
153#define STA2X11_GPIO101 101
154#define STA2X11_GPIO102 102
155#define STA2X11_GPIO103 103
156#define STA2X11_GPIO104 104
157#define STA2X11_GPIO105_SDIO2_DAT3 105
158#define STA2X11_GPIO106_SDIO2_DAT2 106
159#define STA2X11_GPIO107_SDIO2_DAT1 107
160#define STA2X11_GPIO108_SDIO2_DAT0 108
161#define STA2X11_GPIO109_SDIO2_CLK 109
162#define STA2X11_GPIO110_SDIO2_CMD 110
163#define STA2X11_GPIO111 111
164#define STA2X11_GPIO112 112
165#define STA2X11_GPIO113 113
166#define STA2X11_GPIO114 114
167#define STA2X11_GPIO115_SDIO1_DAT3 115
168#define STA2X11_GPIO116_SDIO1_DAT2 116
169#define STA2X11_GPIO117_SDIO1_DAT1 117
170#define STA2X11_GPIO118_SDIO1_DAT0 118
171#define STA2X11_GPIO119_SDIO1_CLK 119
172#define STA2X11_GPIO120_SDIO1_CMD 120
173#define STA2X11_GPIO121 121
174#define STA2X11_GPIO122 122
175#define STA2X11_GPIO123 123
176#define STA2X11_GPIO124 124
177#define STA2X11_GPIO125_UART2_TXD 125
178#define STA2X11_GPIO126_UART2_RXD 126
179#define STA2X11_GPIO127_UART3_TXD 127
180
181/*
182 * The APB bridge has its own registers, needed by our users as well.
183 * They are accessed with the following read/mask/write function.
184 */
185u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
186
187/* CAN and MLB */
188#define APBREG_BSR 0x00 /* Bridge Status Reg */
189#define APBREG_PAER 0x08 /* Peripherals Address Error Reg */
190#define APBREG_PWAC 0x20 /* Peripheral Write Access Control reg */
191#define APBREG_PRAC 0x40 /* Peripheral Read Access Control reg */
192#define APBREG_PCG 0x60 /* Peripheral Clock Gating Reg */
193#define APBREG_PUR 0x80 /* Peripheral Under Reset Reg */
194#define APBREG_EMU_PCG 0xA0 /* Emulator Peripheral Clock Gating Reg */
195
196#define APBREG_CAN (1 << 1)
197#define APBREG_MLB (1 << 3)
198
199/* SARAC */
200#define APBREG_BSR_SARAC 0x100 /* Bridge Status Reg */
201#define APBREG_PAER_SARAC 0x108 /* Peripherals Address Error Reg */
202#define APBREG_PWAC_SARAC 0x120 /* Peripheral Write Access Control reg */
203#define APBREG_PRAC_SARAC 0x140 /* Peripheral Read Access Control reg */
204#define APBREG_PCG_SARAC 0x160 /* Peripheral Clock Gating Reg */
205#define APBREG_PUR_SARAC 0x180 /* Peripheral Under Reset Reg */
206#define APBREG_EMU_PCG_SARAC 0x1A0 /* Emulator Peripheral Clock Gating Reg */
207
208#define APBREG_SARAC (1 << 2)
209
210/*
211 * The system controller has its own registers. Some of these are accessed
212 * by out users as well, using the following read/mask/write/function
213 */
214u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
215
216#define SCTL_SCCTL 0x00 /* System controller control register */
217#define SCTL_ARMCFG 0x04 /* ARM configuration register */
218#define SCTL_SCPLLCTL 0x08 /* PLL control status register */
219#define SCTL_SCPLLFCTRL 0x0c /* PLL frequency control register */
220#define SCTL_SCRESFRACT 0x10 /* PLL fractional input register */
221#define SCTL_SCRESCTRL1 0x14 /* Peripheral reset control 1 */
222#define SCTL_SCRESXTRL2 0x18 /* Peripheral reset control 2 */
223#define SCTL_SCPEREN0 0x1c /* Peripheral clock enable register 0 */
224#define SCTL_SCPEREN1 0x20 /* Peripheral clock enable register 1 */
225#define SCTL_SCPEREN2 0x24 /* Peripheral clock enable register 2 */
226#define SCTL_SCGRST 0x28 /* Peripheral global reset */
227#define SCTL_SCPCIPMCR1 0x30 /* PCI power management control 1 */
228#define SCTL_SCPCIPMCR2 0x34 /* PCI power management control 2 */
229#define SCTL_SCPCIPMSR1 0x38 /* PCI power management status 1 */
230#define SCTL_SCPCIPMSR2 0x3c /* PCI power management status 2 */
231#define SCTL_SCPCIPMSR3 0x40 /* PCI power management status 3 */
232#define SCTL_SCINTREN 0x44 /* Interrupt enable */
233#define SCTL_SCRISR 0x48 /* RAW interrupt status */
234#define SCTL_SCCLKSTAT0 0x4c /* Peripheral clocks status 0 */
235#define SCTL_SCCLKSTAT1 0x50 /* Peripheral clocks status 1 */
236#define SCTL_SCCLKSTAT2 0x54 /* Peripheral clocks status 2 */
237#define SCTL_SCRSTSTA 0x58 /* Reset status register */
238
239#define SCTL_SCRESCTRL1_USB_PHY_POR (1 << 0)
240#define SCTL_SCRESCTRL1_USB_OTG (1 << 1)
241#define SCTL_SCRESCTRL1_USB_HRST (1 << 2)
242#define SCTL_SCRESCTRL1_USB_PHY_HOST (1 << 3)
243#define SCTL_SCRESCTRL1_SATAII (1 << 4)
244#define SCTL_SCRESCTRL1_VIP (1 << 5)
245#define SCTL_SCRESCTRL1_PER_MMC0 (1 << 6)
246#define SCTL_SCRESCTRL1_PER_MMC1 (1 << 7)
247#define SCTL_SCRESCTRL1_PER_GPIO0 (1 << 8)
248#define SCTL_SCRESCTRL1_PER_GPIO1 (1 << 9)
249#define SCTL_SCRESCTRL1_PER_GPIO2 (1 << 10)
250#define SCTL_SCRESCTRL1_PER_GPIO3 (1 << 11)
251#define SCTL_SCRESCTRL1_PER_MTU0 (1 << 12)
252#define SCTL_SCRESCTRL1_KER_SPI0 (1 << 13)
253#define SCTL_SCRESCTRL1_KER_SPI1 (1 << 14)
254#define SCTL_SCRESCTRL1_KER_SPI2 (1 << 15)
255#define SCTL_SCRESCTRL1_KER_MCI0 (1 << 16)
256#define SCTL_SCRESCTRL1_KER_MCI1 (1 << 17)
257#define SCTL_SCRESCTRL1_PRE_HSI2C0 (1 << 18)
258#define SCTL_SCRESCTRL1_PER_HSI2C1 (1 << 19)
259#define SCTL_SCRESCTRL1_PER_HSI2C2 (1 << 20)
260#define SCTL_SCRESCTRL1_PER_HSI2C3 (1 << 21)
261#define SCTL_SCRESCTRL1_PER_MSP0 (1 << 22)
262#define SCTL_SCRESCTRL1_PER_MSP1 (1 << 23)
263#define SCTL_SCRESCTRL1_PER_MSP2 (1 << 24)
264#define SCTL_SCRESCTRL1_PER_MSP3 (1 << 25)
265#define SCTL_SCRESCTRL1_PER_MSP4 (1 << 26)
266#define SCTL_SCRESCTRL1_PER_MSP5 (1 << 27)
267#define SCTL_SCRESCTRL1_PER_MMC (1 << 28)
268#define SCTL_SCRESCTRL1_KER_MSP0 (1 << 29)
269#define SCTL_SCRESCTRL1_KER_MSP1 (1 << 30)
270#define SCTL_SCRESCTRL1_KER_MSP2 (1 << 31)
271
272#define SCTL_SCPEREN0_UART0 (1 << 0)
273#define SCTL_SCPEREN0_UART1 (1 << 1)
274#define SCTL_SCPEREN0_UART2 (1 << 2)
275#define SCTL_SCPEREN0_UART3 (1 << 3)
276#define SCTL_SCPEREN0_MSP0 (1 << 4)
277#define SCTL_SCPEREN0_MSP1 (1 << 5)
278#define SCTL_SCPEREN0_MSP2 (1 << 6)
279#define SCTL_SCPEREN0_MSP3 (1 << 7)
280#define SCTL_SCPEREN0_MSP4 (1 << 8)
281#define SCTL_SCPEREN0_MSP5 (1 << 9)
282#define SCTL_SCPEREN0_SPI0 (1 << 10)
283#define SCTL_SCPEREN0_SPI1 (1 << 11)
284#define SCTL_SCPEREN0_SPI2 (1 << 12)
285#define SCTL_SCPEREN0_I2C0 (1 << 13)
286#define SCTL_SCPEREN0_I2C1 (1 << 14)
287#define SCTL_SCPEREN0_I2C2 (1 << 15)
288#define SCTL_SCPEREN0_I2C3 (1 << 16)
289#define SCTL_SCPEREN0_SVDO_LVDS (1 << 17)
290#define SCTL_SCPEREN0_USB_HOST (1 << 18)
291#define SCTL_SCPEREN0_USB_OTG (1 << 19)
292#define SCTL_SCPEREN0_MCI0 (1 << 20)
293#define SCTL_SCPEREN0_MCI1 (1 << 21)
294#define SCTL_SCPEREN0_MCI2 (1 << 22)
295#define SCTL_SCPEREN0_MCI3 (1 << 23)
296#define SCTL_SCPEREN0_SATA (1 << 24)
297#define SCTL_SCPEREN0_ETHERNET (1 << 25)
298#define SCTL_SCPEREN0_VIC (1 << 26)
299#define SCTL_SCPEREN0_DMA_AUDIO (1 << 27)
300#define SCTL_SCPEREN0_DMA_SOC (1 << 28)
301#define SCTL_SCPEREN0_RAM (1 << 29)
302#define SCTL_SCPEREN0_VIP (1 << 30)
303#define SCTL_SCPEREN0_ARM (1 << 31)
304
305#define SCTL_SCPEREN1_UART0 (1 << 0)
306#define SCTL_SCPEREN1_UART1 (1 << 1)
307#define SCTL_SCPEREN1_UART2 (1 << 2)
308#define SCTL_SCPEREN1_UART3 (1 << 3)
309#define SCTL_SCPEREN1_MSP0 (1 << 4)
310#define SCTL_SCPEREN1_MSP1 (1 << 5)
311#define SCTL_SCPEREN1_MSP2 (1 << 6)
312#define SCTL_SCPEREN1_MSP3 (1 << 7)
313#define SCTL_SCPEREN1_MSP4 (1 << 8)
314#define SCTL_SCPEREN1_MSP5 (1 << 9)
315#define SCTL_SCPEREN1_SPI0 (1 << 10)
316#define SCTL_SCPEREN1_SPI1 (1 << 11)
317#define SCTL_SCPEREN1_SPI2 (1 << 12)
318#define SCTL_SCPEREN1_I2C0 (1 << 13)
319#define SCTL_SCPEREN1_I2C1 (1 << 14)
320#define SCTL_SCPEREN1_I2C2 (1 << 15)
321#define SCTL_SCPEREN1_I2C3 (1 << 16)
322#define SCTL_SCPEREN1_USB_PHY (1 << 17)
323
324#endif /* __STA2X11_MFD_H */
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 8516fd1eaabc..f8d5b4d5843f 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -117,7 +117,7 @@ struct matrix_keymap_data;
117 * @no_autorepeat: disable key autorepeat 117 * @no_autorepeat: disable key autorepeat
118 */ 118 */
119struct stmpe_keypad_platform_data { 119struct stmpe_keypad_platform_data {
120 struct matrix_keymap_data *keymap_data; 120 const struct matrix_keymap_data *keymap_data;
121 unsigned int debounce_ms; 121 unsigned int debounce_ms;
122 unsigned int scan_count; 122 unsigned int scan_count;
123 bool no_autorepeat; 123 bool no_autorepeat;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 1c6c2860d1a6..dd8dc0a6c462 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -18,6 +18,7 @@
18#define __LINUX_MFD_TPS65910_H 18#define __LINUX_MFD_TPS65910_H
19 19
20#include <linux/gpio.h> 20#include <linux/gpio.h>
21#include <linux/regmap.h>
21 22
22/* TPS chip id list */ 23/* TPS chip id list */
23#define TPS65910 0 24#define TPS65910 0
@@ -783,6 +784,18 @@
783#define TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3 0x4 784#define TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3 0x4
784#define TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP 0x8 785#define TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP 0x8
785 786
787/*
788 * Sleep keepon data: Maintains the state in sleep mode
789 * @therm_keepon: Keep on the thermal monitoring in sleep state.
790 * @clkout32k_keepon: Keep on the 32KHz clock output in sleep state.
791 * @i2chs_keepon: Keep on high speed internal clock in sleep state.
792 */
793struct tps65910_sleep_keepon_data {
794 unsigned therm_keepon:1;
795 unsigned clkout32k_keepon:1;
796 unsigned i2chs_keepon:1;
797};
798
786/** 799/**
787 * struct tps65910_board 800 * struct tps65910_board
788 * Board platform data may be used to initialize regulators. 801 * Board platform data may be used to initialize regulators.
@@ -794,6 +807,8 @@ struct tps65910_board {
794 int irq_base; 807 int irq_base;
795 int vmbch_threshold; 808 int vmbch_threshold;
796 int vmbch2_threshold; 809 int vmbch2_threshold;
810 bool en_dev_slp;
811 struct tps65910_sleep_keepon_data *slp_keepon;
797 bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO]; 812 bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO];
798 unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS]; 813 unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS];
799 struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS]; 814 struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS];
@@ -809,16 +824,14 @@ struct tps65910 {
809 struct regmap *regmap; 824 struct regmap *regmap;
810 struct mutex io_mutex; 825 struct mutex io_mutex;
811 unsigned int id; 826 unsigned int id;
812 int (*read)(struct tps65910 *tps65910, u8 reg, int size, void *dest);
813 int (*write)(struct tps65910 *tps65910, u8 reg, int size, void *src);
814 827
815 /* Client devices */ 828 /* Client devices */
816 struct tps65910_pmic *pmic; 829 struct tps65910_pmic *pmic;
817 struct tps65910_rtc *rtc; 830 struct tps65910_rtc *rtc;
818 struct tps65910_power *power; 831 struct tps65910_power *power;
819 832
820 /* GPIO Handling */ 833 /* Device node parsed board data */
821 struct gpio_chip gpio; 834 struct tps65910_board *of_plat_data;
822 835
823 /* IRQ Handling */ 836 /* IRQ Handling */
824 struct mutex irq_lock; 837 struct mutex irq_lock;
@@ -826,6 +839,7 @@ struct tps65910 {
826 int irq_base; 839 int irq_base;
827 int irq_num; 840 int irq_num;
828 u32 irq_mask; 841 u32 irq_mask;
842 struct irq_domain *domain;
829}; 843};
830 844
831struct tps65910_platform_data { 845struct tps65910_platform_data {
@@ -833,9 +847,6 @@ struct tps65910_platform_data {
833 int irq_base; 847 int irq_base;
834}; 848};
835 849
836int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
837int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
838void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
839int tps65910_irq_init(struct tps65910 *tps65910, int irq, 850int tps65910_irq_init(struct tps65910 *tps65910, int irq,
840 struct tps65910_platform_data *pdata); 851 struct tps65910_platform_data *pdata);
841int tps65910_irq_exit(struct tps65910 *tps65910); 852int tps65910_irq_exit(struct tps65910 *tps65910);
@@ -845,4 +856,28 @@ static inline int tps65910_chip_id(struct tps65910 *tps65910)
845 return tps65910->id; 856 return tps65910->id;
846} 857}
847 858
859static inline int tps65910_reg_read(struct tps65910 *tps65910, u8 reg,
860 unsigned int *val)
861{
862 return regmap_read(tps65910->regmap, reg, val);
863}
864
865static inline int tps65910_reg_write(struct tps65910 *tps65910, u8 reg,
866 unsigned int val)
867{
868 return regmap_write(tps65910->regmap, reg, val);
869}
870
871static inline int tps65910_reg_set_bits(struct tps65910 *tps65910, u8 reg,
872 u8 mask)
873{
874 return regmap_update_bits(tps65910->regmap, reg, mask, mask);
875}
876
877static inline int tps65910_reg_clear_bits(struct tps65910 *tps65910, u8 reg,
878 u8 mask)
879{
880 return regmap_update_bits(tps65910->regmap, reg, mask, 0);
881}
882
848#endif /* __LINUX_MFD_TPS65910_H */ 883#endif /* __LINUX_MFD_TPS65910_H */
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index b15b5f03f5c4..6659487c31e7 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -27,6 +27,7 @@
27 27
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <linux/mfd/core.h> 29#include <linux/mfd/core.h>
30#include <linux/regulator/consumer.h>
30 31
31#define TWL6040_REG_ASICID 0x01 32#define TWL6040_REG_ASICID 0x01
32#define TWL6040_REG_ASICREV 0x02 33#define TWL6040_REG_ASICREV 0x02
@@ -203,6 +204,7 @@ struct regmap;
203struct twl6040 { 204struct twl6040 {
204 struct device *dev; 205 struct device *dev;
205 struct regmap *regmap; 206 struct regmap *regmap;
207 struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
206 struct mutex mutex; 208 struct mutex mutex;
207 struct mutex io_mutex; 209 struct mutex io_mutex;
208 struct mutex irq_mutex; 210 struct mutex irq_mutex;
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 4b1211859f74..4a3b83a77614 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/completion.h> 18#include <linux/completion.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/irqdomain.h>
20#include <linux/list.h> 21#include <linux/list.h>
21#include <linux/regmap.h> 22#include <linux/regmap.h>
22 23
@@ -338,6 +339,7 @@
338#define WM831X_FLL_CLK_SRC_WIDTH 2 /* FLL_CLK_SRC - [1:0] */ 339#define WM831X_FLL_CLK_SRC_WIDTH 2 /* FLL_CLK_SRC - [1:0] */
339 340
340struct regulator_dev; 341struct regulator_dev;
342struct irq_domain;
341 343
342#define WM831X_NUM_IRQ_REGS 5 344#define WM831X_NUM_IRQ_REGS 5
343#define WM831X_NUM_GPIO_REGS 16 345#define WM831X_NUM_GPIO_REGS 16
@@ -367,7 +369,7 @@ struct wm831x {
367 369
368 int irq; /* Our chip IRQ */ 370 int irq; /* Our chip IRQ */
369 struct mutex irq_lock; 371 struct mutex irq_lock;
370 int irq_base; 372 struct irq_domain *irq_domain;
371 int irq_masks_cur[WM831X_NUM_IRQ_REGS]; /* Currently active value */ 373 int irq_masks_cur[WM831X_NUM_IRQ_REGS]; /* Currently active value */
372 int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */ 374 int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */
373 375
@@ -382,7 +384,8 @@ struct wm831x {
382 384
383 /* Used by the interrupt controller code to post writes */ 385 /* Used by the interrupt controller code to post writes */
384 int gpio_update[WM831X_NUM_GPIO_REGS]; 386 int gpio_update[WM831X_NUM_GPIO_REGS];
385 bool gpio_level[WM831X_NUM_GPIO_REGS]; 387 bool gpio_level_high[WM831X_NUM_GPIO_REGS];
388 bool gpio_level_low[WM831X_NUM_GPIO_REGS];
386 389
387 struct mutex auxadc_lock; 390 struct mutex auxadc_lock;
388 struct list_head auxadc_pending; 391 struct list_head auxadc_pending;
@@ -417,6 +420,11 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq);
417void wm831x_irq_exit(struct wm831x *wm831x); 420void wm831x_irq_exit(struct wm831x *wm831x);
418void wm831x_auxadc_init(struct wm831x *wm831x); 421void wm831x_auxadc_init(struct wm831x *wm831x);
419 422
423static inline int wm831x_irq(struct wm831x *wm831x, int irq)
424{
425 return irq_create_mapping(wm831x->irq_domain, irq);
426}
427
420extern struct regmap_config wm831x_regmap_config; 428extern struct regmap_config wm831x_regmap_config;
421 429
422#endif 430#endif
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 98fcc977e82b..9192b6404a73 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -602,6 +602,7 @@ extern const u16 wm8352_mode2_defaults[];
602extern const u16 wm8352_mode3_defaults[]; 602extern const u16 wm8352_mode3_defaults[];
603 603
604struct wm8350; 604struct wm8350;
605struct regmap;
605 606
606struct wm8350_hwmon { 607struct wm8350_hwmon {
607 struct platform_device *pdev; 608 struct platform_device *pdev;
@@ -612,13 +613,7 @@ struct wm8350 {
612 struct device *dev; 613 struct device *dev;
613 614
614 /* device IO */ 615 /* device IO */
615 union { 616 struct regmap *regmap;
616 struct i2c_client *i2c_client;
617 struct spi_device *spi_device;
618 };
619 int (*read_dev)(struct wm8350 *wm8350, char reg, int size, void *dest);
620 int (*write_dev)(struct wm8350 *wm8350, char reg, int size,
621 void *src);
622 u16 *reg_cache; 617 u16 *reg_cache;
623 618
624 struct mutex auxadc_mutex; 619 struct mutex auxadc_mutex;
diff --git a/include/linux/mfd/wm8400-private.h b/include/linux/mfd/wm8400-private.h
index 0147b6968510..2de565b94d0c 100644
--- a/include/linux/mfd/wm8400-private.h
+++ b/include/linux/mfd/wm8400-private.h
@@ -24,19 +24,14 @@
24#include <linux/mfd/wm8400.h> 24#include <linux/mfd/wm8400.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/platform_device.h> 26#include <linux/platform_device.h>
27 27#include <linux/regmap.h>
28struct regmap;
29 28
30#define WM8400_REGISTER_COUNT 0x55 29#define WM8400_REGISTER_COUNT 0x55
31 30
32struct wm8400 { 31struct wm8400 {
33 struct device *dev; 32 struct device *dev;
34
35 struct mutex io_lock;
36 struct regmap *regmap; 33 struct regmap *regmap;
37 34
38 u16 reg_cache[WM8400_REGISTER_COUNT];
39
40 struct platform_device regulators[6]; 35 struct platform_device regulators[6];
41}; 36};
42 37
@@ -930,6 +925,11 @@ struct wm8400 {
930 925
931u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg); 926u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg);
932int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data); 927int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data);
933int wm8400_set_bits(struct wm8400 *wm8400, u8 reg, u16 mask, u16 val); 928
929static inline int wm8400_set_bits(struct wm8400 *wm8400, u8 reg,
930 u16 mask, u16 val)
931{
932 return regmap_update_bits(wm8400->regmap, reg, mask, val);
933}
934 934
935#endif 935#endif
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 6695c3ec4518..1f173306bf05 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -57,6 +57,7 @@ struct wm8994 {
57 57
58 enum wm8994_type type; 58 enum wm8994_type type;
59 int revision; 59 int revision;
60 int cust_id;
60 61
61 struct device *dev; 62 struct device *dev;
62 struct regmap *regmap; 63 struct regmap *regmap;
diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 86e6a032a078..053548961c15 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -2212,6 +2212,9 @@
2212/* 2212/*
2213 * R256 (0x100) - Chip Revision 2213 * R256 (0x100) - Chip Revision
2214 */ 2214 */
2215#define WM8994_CUST_ID_MASK 0xFF00 /* CUST_ID - [15:8] */
2216#define WM8994_CUST_ID_SHIFT 8 /* CUST_ID - [15:8] */
2217#define WM8994_CUST_ID_WIDTH 8 /* CUST_ID - [15:8] */
2215#define WM8994_CHIP_REV_MASK 0x000F /* CHIP_REV - [3:0] */ 2218#define WM8994_CHIP_REV_MASK 0x000F /* CHIP_REV - [3:0] */
2216#define WM8994_CHIP_REV_SHIFT 0 /* CHIP_REV - [3:0] */ 2219#define WM8994_CHIP_REV_SHIFT 0 /* CHIP_REV - [3:0] */
2217#define WM8994_CHIP_REV_WIDTH 4 /* CHIP_REV - [3:0] */ 2220#define WM8994_CHIP_REV_WIDTH 4 /* CHIP_REV - [3:0] */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d5c37f24c63..ce26716238c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -321,6 +321,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
321static inline void compound_lock(struct page *page) 321static inline void compound_lock(struct page *page)
322{ 322{
323#ifdef CONFIG_TRANSPARENT_HUGEPAGE 323#ifdef CONFIG_TRANSPARENT_HUGEPAGE
324 VM_BUG_ON(PageSlab(page));
324 bit_spin_lock(PG_compound_lock, &page->flags); 325 bit_spin_lock(PG_compound_lock, &page->flags);
325#endif 326#endif
326} 327}
@@ -328,6 +329,7 @@ static inline void compound_lock(struct page *page)
328static inline void compound_unlock(struct page *page) 329static inline void compound_unlock(struct page *page)
329{ 330{
330#ifdef CONFIG_TRANSPARENT_HUGEPAGE 331#ifdef CONFIG_TRANSPARENT_HUGEPAGE
332 VM_BUG_ON(PageSlab(page));
331 bit_spin_unlock(PG_compound_lock, &page->flags); 333 bit_spin_unlock(PG_compound_lock, &page->flags);
332#endif 334#endif
333} 335}
@@ -871,8 +873,6 @@ extern void pagefault_out_of_memory(void);
871extern void show_free_areas(unsigned int flags); 873extern void show_free_areas(unsigned int flags);
872extern bool skip_free_areas_node(unsigned int flags, int nid); 874extern bool skip_free_areas_node(unsigned int flags, int nid);
873 875
874int shmem_lock(struct file *file, int lock, struct user_struct *user);
875struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
876int shmem_zero_setup(struct vm_area_struct *); 876int shmem_zero_setup(struct vm_area_struct *);
877 877
878extern int can_do_mlock(void); 878extern int can_do_mlock(void);
@@ -951,11 +951,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
951extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); 951extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
952extern void truncate_setsize(struct inode *inode, loff_t newsize); 952extern void truncate_setsize(struct inode *inode, loff_t newsize);
953extern int vmtruncate(struct inode *inode, loff_t offset); 953extern int vmtruncate(struct inode *inode, loff_t offset);
954extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
955void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); 954void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
956int truncate_inode_page(struct address_space *mapping, struct page *page); 955int truncate_inode_page(struct address_space *mapping, struct page *page);
957int generic_error_remove_page(struct address_space *mapping, struct page *page); 956int generic_error_remove_page(struct address_space *mapping, struct page *page);
958
959int invalidate_inode_page(struct page *page); 957int invalidate_inode_page(struct page *page);
960 958
961#ifdef CONFIG_MMU 959#ifdef CONFIG_MMU
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26574c726121..dad95bdd06d7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -345,17 +345,6 @@ struct mm_struct {
345 /* Architecture-specific MM context */ 345 /* Architecture-specific MM context */
346 mm_context_t context; 346 mm_context_t context;
347 347
348 /* Swap token stuff */
349 /*
350 * Last value of global fault stamp as seen by this process.
351 * In other words, this value gives an indication of how long
352 * it has been since this task got the token.
353 * Look at mm/thrash.c
354 */
355 unsigned int faultstamp;
356 unsigned int token_priority;
357 unsigned int last_interval;
358
359 unsigned long flags; /* Must use atomic bitops to access the bits */ 348 unsigned long flags; /* Must use atomic bitops to access the bits */
360 349
361 struct core_state *core_state; /* coredumping support */ 350 struct core_state *core_state; /* coredumping support */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index c04ecfe03f7f..580bd587d916 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,7 +4,7 @@
4#ifdef CONFIG_DEBUG_VM 4#ifdef CONFIG_DEBUG_VM
5#define VM_BUG_ON(cond) BUG_ON(cond) 5#define VM_BUG_ON(cond) BUG_ON(cond)
6#else 6#else
7#define VM_BUG_ON(cond) do { (void)(cond); } while (0) 7#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
8#endif 8#endif
9 9
10#ifdef CONFIG_DEBUG_VIRTUAL 10#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 54631776dff2..588c5cb2851d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -185,8 +185,22 @@ static inline int is_unevictable_lru(enum lru_list lru)
185 return (lru == LRU_UNEVICTABLE); 185 return (lru == LRU_UNEVICTABLE);
186} 186}
187 187
188struct zone_reclaim_stat {
189 /*
190 * The pageout code in vmscan.c keeps track of how many of the
191 * mem/swap backed and file backed pages are referenced.
192 * The higher the rotated/scanned ratio, the more valuable
193 * that cache is.
194 *
195 * The anon LRU stats live in [0], file LRU stats in [1]
196 */
197 unsigned long recent_rotated[2];
198 unsigned long recent_scanned[2];
199};
200
188struct lruvec { 201struct lruvec {
189 struct list_head lists[NR_LRU_LISTS]; 202 struct list_head lists[NR_LRU_LISTS];
203 struct zone_reclaim_stat reclaim_stat;
190}; 204};
191 205
192/* Mask used at gathering information at once (see memcontrol.c) */ 206/* Mask used at gathering information at once (see memcontrol.c) */
@@ -313,19 +327,6 @@ enum zone_type {
313#error ZONES_SHIFT -- too many zones configured adjust calculation 327#error ZONES_SHIFT -- too many zones configured adjust calculation
314#endif 328#endif
315 329
316struct zone_reclaim_stat {
317 /*
318 * The pageout code in vmscan.c keeps track of how many of the
319 * mem/swap backed and file backed pages are referenced.
320 * The higher the rotated/scanned ratio, the more valuable
321 * that cache is.
322 *
323 * The anon LRU stats live in [0], file LRU stats in [1]
324 */
325 unsigned long recent_rotated[2];
326 unsigned long recent_scanned[2];
327};
328
329struct zone { 330struct zone {
330 /* Fields commonly accessed by the page allocator */ 331 /* Fields commonly accessed by the page allocator */
331 332
@@ -407,8 +408,6 @@ struct zone {
407 spinlock_t lru_lock; 408 spinlock_t lru_lock;
408 struct lruvec lruvec; 409 struct lruvec lruvec;
409 410
410 struct zone_reclaim_stat reclaim_stat;
411
412 unsigned long pages_scanned; /* since last reclaim */ 411 unsigned long pages_scanned; /* since last reclaim */
413 unsigned long flags; /* zone flags, see below */ 412 unsigned long flags; /* zone flags, see below */
414 413
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0987146b0637..af2d2fa30eee 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -69,6 +69,10 @@
69#define NFS4_CDFC4_FORE_OR_BOTH 0x3 69#define NFS4_CDFC4_FORE_OR_BOTH 0x3
70#define NFS4_CDFC4_BACK_OR_BOTH 0x7 70#define NFS4_CDFC4_BACK_OR_BOTH 0x7
71 71
72#define NFS4_CDFS4_FORE 0x1
73#define NFS4_CDFS4_BACK 0x2
74#define NFS4_CDFS4_BOTH 0x3
75
72#define NFS4_SET_TO_SERVER_TIME 0 76#define NFS4_SET_TO_SERVER_TIME 0
73#define NFS4_SET_TO_CLIENT_TIME 1 77#define NFS4_SET_TO_CLIENT_TIME 1
74 78
@@ -526,6 +530,13 @@ enum lock_type4 {
526#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) 530#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
527#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) 531#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
528#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) 532#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
533#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
534
535/* MDS threshold bitmap bits */
536#define THRESHOLD_RD (1UL << 0)
537#define THRESHOLD_WR (1UL << 1)
538#define THRESHOLD_RD_IO (1UL << 2)
539#define THRESHOLD_WR_IO (1UL << 3)
529 540
530#define NFSPROC4_NULL 0 541#define NFSPROC4_NULL 0
531#define NFSPROC4_COMPOUND 1 542#define NFSPROC4_COMPOUND 1
@@ -596,6 +607,8 @@ enum {
596 NFSPROC4_CLNT_TEST_STATEID, 607 NFSPROC4_CLNT_TEST_STATEID,
597 NFSPROC4_CLNT_FREE_STATEID, 608 NFSPROC4_CLNT_FREE_STATEID,
598 NFSPROC4_CLNT_GETDEVICELIST, 609 NFSPROC4_CLNT_GETDEVICELIST,
610 NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
611 NFSPROC4_CLNT_DESTROY_CLIENTID,
599}; 612};
600 613
601/* nfs41 types */ 614/* nfs41 types */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 52a1bdb4ee2b..b23cfc120edb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -102,6 +102,7 @@ struct nfs_open_context {
102 int error; 102 int error;
103 103
104 struct list_head list; 104 struct list_head list;
105 struct nfs4_threshold *mdsthreshold;
105}; 106};
106 107
107struct nfs_open_dir_context { 108struct nfs_open_dir_context {
@@ -179,8 +180,7 @@ struct nfs_inode {
179 __be32 cookieverf[2]; 180 __be32 cookieverf[2];
180 181
181 unsigned long npages; 182 unsigned long npages;
182 unsigned long ncommit; 183 struct nfs_mds_commit_info commit_info;
183 struct list_head commit_list;
184 184
185 /* Open contexts for shared mmap writes */ 185 /* Open contexts for shared mmap writes */
186 struct list_head open_files; 186 struct list_head open_files;
@@ -201,8 +201,10 @@ struct nfs_inode {
201 201
202 /* pNFS layout information */ 202 /* pNFS layout information */
203 struct pnfs_layout_hdr *layout; 203 struct pnfs_layout_hdr *layout;
204 atomic_t commits_outstanding;
205#endif /* CONFIG_NFS_V4*/ 204#endif /* CONFIG_NFS_V4*/
205 /* how many bytes have been written/read and how many bytes queued up */
206 __u64 write_io;
207 __u64 read_io;
206#ifdef CONFIG_NFS_FSCACHE 208#ifdef CONFIG_NFS_FSCACHE
207 struct fscache_cookie *fscache; 209 struct fscache_cookie *fscache;
208#endif 210#endif
@@ -230,7 +232,6 @@ struct nfs_inode {
230#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 232#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
231#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 233#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
232#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ 234#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
233#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */
234#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 235#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
235#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 236#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
236 237
@@ -317,11 +318,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
317 return NFS_SERVER(inode)->caps & cap; 318 return NFS_SERVER(inode)->caps & cap;
318} 319}
319 320
320static inline int NFS_USE_READDIRPLUS(struct inode *inode)
321{
322 return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
323}
324
325static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) 321static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
326{ 322{
327 dentry->d_time = verf; 323 dentry->d_time = verf;
@@ -552,8 +548,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
552extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); 548extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
553#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 549#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
554extern int nfs_commit_inode(struct inode *, int); 550extern int nfs_commit_inode(struct inode *, int);
555extern struct nfs_write_data *nfs_commitdata_alloc(void); 551extern struct nfs_commit_data *nfs_commitdata_alloc(void);
556extern void nfs_commit_free(struct nfs_write_data *wdata); 552extern void nfs_commit_free(struct nfs_commit_data *data);
557#else 553#else
558static inline int 554static inline int
559nfs_commit_inode(struct inode *inode, int how) 555nfs_commit_inode(struct inode *inode, int how)
@@ -569,12 +565,6 @@ nfs_have_writebacks(struct inode *inode)
569} 565}
570 566
571/* 567/*
572 * Allocate nfs_write_data structures
573 */
574extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
575extern void nfs_writedata_free(struct nfs_write_data *);
576
577/*
578 * linux/fs/nfs/read.c 568 * linux/fs/nfs/read.c
579 */ 569 */
580extern int nfs_readpage(struct file *, struct page *); 570extern int nfs_readpage(struct file *, struct page *);
@@ -585,12 +575,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
585 struct page *); 575 struct page *);
586 576
587/* 577/*
588 * Allocate nfs_read_data structures
589 */
590extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
591extern void nfs_readdata_free(struct nfs_read_data *);
592
593/*
594 * linux/fs/nfs3proc.c 578 * linux/fs/nfs3proc.c
595 */ 579 */
596#ifdef CONFIG_NFS_V3_ACL 580#ifdef CONFIG_NFS_V3_ACL
@@ -654,6 +638,7 @@ nfs_fileid_to_ino_t(u64 fileid)
654#define NFSDBG_FSCACHE 0x0800 638#define NFSDBG_FSCACHE 0x0800
655#define NFSDBG_PNFS 0x1000 639#define NFSDBG_PNFS 0x1000
656#define NFSDBG_PNFS_LD 0x2000 640#define NFSDBG_PNFS_LD 0x2000
641#define NFSDBG_STATE 0x4000
657#define NFSDBG_ALL 0xFFFF 642#define NFSDBG_ALL 0xFFFF
658 643
659#ifdef __KERNEL__ 644#ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7073fc74481c..fbb78fb09bd2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -17,7 +17,7 @@ struct nfs4_sequence_args;
17struct nfs4_sequence_res; 17struct nfs4_sequence_res;
18struct nfs_server; 18struct nfs_server;
19struct nfs4_minor_version_ops; 19struct nfs4_minor_version_ops;
20struct server_scope; 20struct nfs41_server_scope;
21struct nfs41_impl_id; 21struct nfs41_impl_id;
22 22
23/* 23/*
@@ -35,6 +35,9 @@ struct nfs_client {
35#define NFS_CS_RENEWD 3 /* - renewd started */ 35#define NFS_CS_RENEWD 3 /* - renewd started */
36#define NFS_CS_STOP_RENEW 4 /* no more state to renew */ 36#define NFS_CS_STOP_RENEW 4 /* no more state to renew */
37#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */ 37#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */
38 unsigned long cl_flags; /* behavior switches */
39#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
40#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
38 struct sockaddr_storage cl_addr; /* server identifier */ 41 struct sockaddr_storage cl_addr; /* server identifier */
39 size_t cl_addrlen; 42 size_t cl_addrlen;
40 char * cl_hostname; /* hostname of server */ 43 char * cl_hostname; /* hostname of server */
@@ -61,9 +64,6 @@ struct nfs_client {
61 64
62 struct rpc_wait_queue cl_rpcwaitq; 65 struct rpc_wait_queue cl_rpcwaitq;
63 66
64 /* used for the setclientid verifier */
65 struct timespec cl_boot_time;
66
67 /* idmapper */ 67 /* idmapper */
68 struct idmap * cl_idmap; 68 struct idmap * cl_idmap;
69 69
@@ -79,16 +79,17 @@ struct nfs_client {
79 u32 cl_seqid; 79 u32 cl_seqid;
80 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 80 /* The flags used for obtaining the clientid during EXCHANGE_ID */
81 u32 cl_exchange_flags; 81 u32 cl_exchange_flags;
82 struct nfs4_session *cl_session; /* sharred session */ 82 struct nfs4_session *cl_session; /* shared session */
83 struct nfs41_server_owner *cl_serverowner;
84 struct nfs41_server_scope *cl_serverscope;
85 struct nfs41_impl_id *cl_implid;
83#endif /* CONFIG_NFS_V4 */ 86#endif /* CONFIG_NFS_V4 */
84 87
85#ifdef CONFIG_NFS_FSCACHE 88#ifdef CONFIG_NFS_FSCACHE
86 struct fscache_cookie *fscache; /* client index cache cookie */ 89 struct fscache_cookie *fscache; /* client index cache cookie */
87#endif 90#endif
88 91
89 struct server_scope *server_scope; /* from exchange_id */ 92 struct net *cl_net;
90 struct nfs41_impl_id *impl_id; /* from exchange_id */
91 struct net *net;
92}; 93};
93 94
94/* 95/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6bec17..88d166b555e8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
27 PG_CLEAN, 27 PG_CLEAN,
28 PG_NEED_COMMIT, 28 PG_NEED_COMMIT,
29 PG_NEED_RESCHED, 29 PG_NEED_RESCHED,
30 PG_PARTIAL_READ_FAILED,
31 PG_COMMIT_TO_DS, 30 PG_COMMIT_TO_DS,
32}; 31};
33 32
@@ -37,7 +36,6 @@ struct nfs_page {
37 struct page *wb_page; /* page to read in/write out */ 36 struct page *wb_page; /* page to read in/write out */
38 struct nfs_open_context *wb_context; /* File state context info */ 37 struct nfs_open_context *wb_context; /* File state context info */
39 struct nfs_lock_context *wb_lock_context; /* lock context info */ 38 struct nfs_lock_context *wb_lock_context; /* lock context info */
40 atomic_t wb_complete; /* i/os we're waiting for */
41 pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ 39 pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */
42 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ 40 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
43 wb_pgbase, /* Start of page data */ 41 wb_pgbase, /* Start of page data */
@@ -68,7 +66,9 @@ struct nfs_pageio_descriptor {
68 int pg_ioflags; 66 int pg_ioflags;
69 int pg_error; 67 int pg_error;
70 const struct rpc_call_ops *pg_rpc_callops; 68 const struct rpc_call_ops *pg_rpc_callops;
69 const struct nfs_pgio_completion_ops *pg_completion_ops;
71 struct pnfs_layout_segment *pg_lseg; 70 struct pnfs_layout_segment *pg_lseg;
71 struct nfs_direct_req *pg_dreq;
72}; 72};
73 73
74#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 74#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -84,6 +84,7 @@ extern void nfs_release_request(struct nfs_page *req);
84extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 84extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
85 struct inode *inode, 85 struct inode *inode,
86 const struct nfs_pageio_ops *pg_ops, 86 const struct nfs_pageio_ops *pg_ops,
87 const struct nfs_pgio_completion_ops *compl_ops,
87 size_t bsize, 88 size_t bsize,
88 int how); 89 int how);
89extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, 90extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -95,26 +96,17 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
95 struct nfs_page *req); 96 struct nfs_page *req);
96extern int nfs_wait_on_request(struct nfs_page *); 97extern int nfs_wait_on_request(struct nfs_page *);
97extern void nfs_unlock_request(struct nfs_page *req); 98extern void nfs_unlock_request(struct nfs_page *req);
99extern void nfs_unlock_and_release_request(struct nfs_page *req);
98 100
99/* 101/*
100 * Lock the page of an asynchronous request without getting a new reference 102 * Lock the page of an asynchronous request
101 */ 103 */
102static inline int 104static inline int
103nfs_lock_request_dontget(struct nfs_page *req)
104{
105 return !test_and_set_bit(PG_BUSY, &req->wb_flags);
106}
107
108static inline int
109nfs_lock_request(struct nfs_page *req) 105nfs_lock_request(struct nfs_page *req)
110{ 106{
111 if (test_and_set_bit(PG_BUSY, &req->wb_flags)) 107 return !test_and_set_bit(PG_BUSY, &req->wb_flags);
112 return 0;
113 kref_get(&req->wb_kref);
114 return 1;
115} 108}
116 109
117
118/** 110/**
119 * nfs_list_add_request - Insert a request into a list 111 * nfs_list_add_request - Insert a request into a list
120 * @req: request 112 * @req: request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7ba3551a0414..d1a7bf51c326 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
35 return a->major == b->major && a->minor == b->minor; 35 return a->major == b->major && a->minor == b->minor;
36} 36}
37 37
38struct nfs4_threshold {
39 __u32 bm;
40 __u32 l_type;
41 __u64 rd_sz;
42 __u64 wr_sz;
43 __u64 rd_io_sz;
44 __u64 wr_io_sz;
45};
46
38struct nfs_fattr { 47struct nfs_fattr {
39 unsigned int valid; /* which fields are valid */ 48 unsigned int valid; /* which fields are valid */
40 umode_t mode; 49 umode_t mode;
@@ -67,6 +76,7 @@ struct nfs_fattr {
67 unsigned long gencount; 76 unsigned long gencount;
68 struct nfs4_string *owner_name; 77 struct nfs4_string *owner_name;
69 struct nfs4_string *group_name; 78 struct nfs4_string *group_name;
79 struct nfs4_threshold *mdsthreshold; /* pNFS threshold hints */
70}; 80};
71 81
72#define NFS_ATTR_FATTR_TYPE (1U << 0) 82#define NFS_ATTR_FATTR_TYPE (1U << 0)
@@ -106,14 +116,14 @@ struct nfs_fattr {
106 | NFS_ATTR_FATTR_FILEID \ 116 | NFS_ATTR_FATTR_FILEID \
107 | NFS_ATTR_FATTR_ATIME \ 117 | NFS_ATTR_FATTR_ATIME \
108 | NFS_ATTR_FATTR_MTIME \ 118 | NFS_ATTR_FATTR_MTIME \
109 | NFS_ATTR_FATTR_CTIME) 119 | NFS_ATTR_FATTR_CTIME \
120 | NFS_ATTR_FATTR_CHANGE)
110#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ 121#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
111 | NFS_ATTR_FATTR_BLOCKS_USED) 122 | NFS_ATTR_FATTR_BLOCKS_USED)
112#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ 123#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
113 | NFS_ATTR_FATTR_SPACE_USED) 124 | NFS_ATTR_FATTR_SPACE_USED)
114#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ 125#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
115 | NFS_ATTR_FATTR_SPACE_USED \ 126 | NFS_ATTR_FATTR_SPACE_USED)
116 | NFS_ATTR_FATTR_CHANGE)
117 127
118/* 128/*
119 * Info on the file system 129 * Info on the file system
@@ -338,7 +348,6 @@ struct nfs_openargs {
338 const struct qstr * name; 348 const struct qstr * name;
339 const struct nfs_server *server; /* Needed for ID mapping */ 349 const struct nfs_server *server; /* Needed for ID mapping */
340 const u32 * bitmask; 350 const u32 * bitmask;
341 const u32 * dir_bitmask;
342 __u32 claim; 351 __u32 claim;
343 struct nfs4_sequence_args seq_args; 352 struct nfs4_sequence_args seq_args;
344}; 353};
@@ -349,7 +358,6 @@ struct nfs_openres {
349 struct nfs4_change_info cinfo; 358 struct nfs4_change_info cinfo;
350 __u32 rflags; 359 __u32 rflags;
351 struct nfs_fattr * f_attr; 360 struct nfs_fattr * f_attr;
352 struct nfs_fattr * dir_attr;
353 struct nfs_seqid * seqid; 361 struct nfs_seqid * seqid;
354 const struct nfs_server *server; 362 const struct nfs_server *server;
355 fmode_t delegation_type; 363 fmode_t delegation_type;
@@ -519,12 +527,29 @@ struct nfs_writeres {
519}; 527};
520 528
521/* 529/*
530 * Arguments to the commit call.
531 */
532struct nfs_commitargs {
533 struct nfs_fh *fh;
534 __u64 offset;
535 __u32 count;
536 const u32 *bitmask;
537 struct nfs4_sequence_args seq_args;
538};
539
540struct nfs_commitres {
541 struct nfs_fattr *fattr;
542 struct nfs_writeverf *verf;
543 const struct nfs_server *server;
544 struct nfs4_sequence_res seq_res;
545};
546
547/*
522 * Common arguments to the unlink call 548 * Common arguments to the unlink call
523 */ 549 */
524struct nfs_removeargs { 550struct nfs_removeargs {
525 const struct nfs_fh *fh; 551 const struct nfs_fh *fh;
526 struct qstr name; 552 struct qstr name;
527 const u32 * bitmask;
528 struct nfs4_sequence_args seq_args; 553 struct nfs4_sequence_args seq_args;
529}; 554};
530 555
@@ -543,7 +568,6 @@ struct nfs_renameargs {
543 const struct nfs_fh *new_dir; 568 const struct nfs_fh *new_dir;
544 const struct qstr *old_name; 569 const struct qstr *old_name;
545 const struct qstr *new_name; 570 const struct qstr *new_name;
546 const u32 *bitmask;
547 struct nfs4_sequence_args seq_args; 571 struct nfs4_sequence_args seq_args;
548}; 572};
549 573
@@ -839,7 +863,6 @@ struct nfs4_create_res {
839 struct nfs_fh * fh; 863 struct nfs_fh * fh;
840 struct nfs_fattr * fattr; 864 struct nfs_fattr * fattr;
841 struct nfs4_change_info dir_cinfo; 865 struct nfs4_change_info dir_cinfo;
842 struct nfs_fattr * dir_fattr;
843 struct nfs4_sequence_res seq_res; 866 struct nfs4_sequence_res seq_res;
844}; 867};
845 868
@@ -1061,6 +1084,21 @@ struct nfstime4 {
1061}; 1084};
1062 1085
1063#ifdef CONFIG_NFS_V4_1 1086#ifdef CONFIG_NFS_V4_1
1087
1088struct pnfs_commit_bucket {
1089 struct list_head written;
1090 struct list_head committing;
1091 struct pnfs_layout_segment *wlseg;
1092 struct pnfs_layout_segment *clseg;
1093};
1094
1095struct pnfs_ds_commit_info {
1096 int nwritten;
1097 int ncommitting;
1098 int nbuckets;
1099 struct pnfs_commit_bucket *buckets;
1100};
1101
1064#define NFS4_EXCHANGE_ID_LEN (48) 1102#define NFS4_EXCHANGE_ID_LEN (48)
1065struct nfs41_exchange_id_args { 1103struct nfs41_exchange_id_args {
1066 struct nfs_client *client; 1104 struct nfs_client *client;
@@ -1070,13 +1108,13 @@ struct nfs41_exchange_id_args {
1070 u32 flags; 1108 u32 flags;
1071}; 1109};
1072 1110
1073struct server_owner { 1111struct nfs41_server_owner {
1074 uint64_t minor_id; 1112 uint64_t minor_id;
1075 uint32_t major_id_sz; 1113 uint32_t major_id_sz;
1076 char major_id[NFS4_OPAQUE_LIMIT]; 1114 char major_id[NFS4_OPAQUE_LIMIT];
1077}; 1115};
1078 1116
1079struct server_scope { 1117struct nfs41_server_scope {
1080 uint32_t server_scope_sz; 1118 uint32_t server_scope_sz;
1081 char server_scope[NFS4_OPAQUE_LIMIT]; 1119 char server_scope[NFS4_OPAQUE_LIMIT];
1082}; 1120};
@@ -1087,10 +1125,18 @@ struct nfs41_impl_id {
1087 struct nfstime4 date; 1125 struct nfstime4 date;
1088}; 1126};
1089 1127
1128struct nfs41_bind_conn_to_session_res {
1129 struct nfs4_session *session;
1130 u32 dir;
1131 bool use_conn_in_rdma_mode;
1132};
1133
1090struct nfs41_exchange_id_res { 1134struct nfs41_exchange_id_res {
1091 struct nfs_client *client; 1135 u64 clientid;
1136 u32 seqid;
1092 u32 flags; 1137 u32 flags;
1093 struct server_scope *server_scope; 1138 struct nfs41_server_owner *server_owner;
1139 struct nfs41_server_scope *server_scope;
1094 struct nfs41_impl_id *impl_id; 1140 struct nfs41_impl_id *impl_id;
1095}; 1141};
1096 1142
@@ -1143,35 +1189,114 @@ struct nfs41_free_stateid_res {
1143 struct nfs4_sequence_res seq_res; 1189 struct nfs4_sequence_res seq_res;
1144}; 1190};
1145 1191
1192#else
1193
1194struct pnfs_ds_commit_info {
1195};
1196
1146#endif /* CONFIG_NFS_V4_1 */ 1197#endif /* CONFIG_NFS_V4_1 */
1147 1198
1148struct nfs_page; 1199struct nfs_page;
1149 1200
1150#define NFS_PAGEVEC_SIZE (8U) 1201#define NFS_PAGEVEC_SIZE (8U)
1151 1202
1203struct nfs_page_array {
1204 struct page **pagevec;
1205 unsigned int npages; /* Max length of pagevec */
1206 struct page *page_array[NFS_PAGEVEC_SIZE];
1207};
1208
1152struct nfs_read_data { 1209struct nfs_read_data {
1210 struct nfs_pgio_header *header;
1211 struct list_head list;
1153 struct rpc_task task; 1212 struct rpc_task task;
1154 struct inode *inode;
1155 struct rpc_cred *cred;
1156 struct nfs_fattr fattr; /* fattr storage */ 1213 struct nfs_fattr fattr; /* fattr storage */
1157 struct list_head pages; /* Coalesced read requests */
1158 struct list_head list; /* lists of struct nfs_read_data */
1159 struct nfs_page *req; /* multi ops per nfs_page */
1160 struct page **pagevec;
1161 unsigned int npages; /* Max length of pagevec */
1162 struct nfs_readargs args; 1214 struct nfs_readargs args;
1163 struct nfs_readres res; 1215 struct nfs_readres res;
1164 unsigned long timestamp; /* For lease renewal */ 1216 unsigned long timestamp; /* For lease renewal */
1165 struct pnfs_layout_segment *lseg;
1166 struct nfs_client *ds_clp; /* pNFS data server */
1167 const struct rpc_call_ops *mds_ops;
1168 int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); 1217 int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
1169 __u64 mds_offset; 1218 __u64 mds_offset;
1219 struct nfs_page_array pages;
1220 struct nfs_client *ds_clp; /* pNFS data server */
1221};
1222
1223/* used as flag bits in nfs_pgio_header */
1224enum {
1225 NFS_IOHDR_ERROR = 0,
1226 NFS_IOHDR_EOF,
1227 NFS_IOHDR_REDO,
1228 NFS_IOHDR_NEED_COMMIT,
1229 NFS_IOHDR_NEED_RESCHED,
1230};
1231
1232struct nfs_pgio_header {
1233 struct inode *inode;
1234 struct rpc_cred *cred;
1235 struct list_head pages;
1236 struct list_head rpc_list;
1237 atomic_t refcnt;
1238 struct nfs_page *req;
1239 struct pnfs_layout_segment *lseg;
1240 loff_t io_start;
1241 const struct rpc_call_ops *mds_ops;
1242 void (*release) (struct nfs_pgio_header *hdr);
1243 const struct nfs_pgio_completion_ops *completion_ops;
1244 struct nfs_direct_req *dreq;
1245 spinlock_t lock;
1246 /* fields protected by lock */
1170 int pnfs_error; 1247 int pnfs_error;
1171 struct page *page_array[NFS_PAGEVEC_SIZE]; 1248 int error; /* merge with pnfs_error */
1249 unsigned long good_bytes; /* boundary of good data */
1250 unsigned long flags;
1251};
1252
1253struct nfs_read_header {
1254 struct nfs_pgio_header header;
1255 struct nfs_read_data rpc_data;
1172}; 1256};
1173 1257
1174struct nfs_write_data { 1258struct nfs_write_data {
1259 struct nfs_pgio_header *header;
1260 struct list_head list;
1261 struct rpc_task task;
1262 struct nfs_fattr fattr;
1263 struct nfs_writeverf verf;
1264 struct nfs_writeargs args; /* argument struct */
1265 struct nfs_writeres res; /* result struct */
1266 unsigned long timestamp; /* For lease renewal */
1267 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
1268 __u64 mds_offset; /* Filelayout dense stripe */
1269 struct nfs_page_array pages;
1270 struct nfs_client *ds_clp; /* pNFS data server */
1271};
1272
1273struct nfs_write_header {
1274 struct nfs_pgio_header header;
1275 struct nfs_write_data rpc_data;
1276};
1277
1278struct nfs_mds_commit_info {
1279 atomic_t rpcs_out;
1280 unsigned long ncommit;
1281 struct list_head list;
1282};
1283
1284struct nfs_commit_data;
1285struct nfs_inode;
1286struct nfs_commit_completion_ops {
1287 void (*error_cleanup) (struct nfs_inode *nfsi);
1288 void (*completion) (struct nfs_commit_data *data);
1289};
1290
1291struct nfs_commit_info {
1292 spinlock_t *lock;
1293 struct nfs_mds_commit_info *mds;
1294 struct pnfs_ds_commit_info *ds;
1295 struct nfs_direct_req *dreq; /* O_DIRECT request */
1296 const struct nfs_commit_completion_ops *completion_ops;
1297};
1298
1299struct nfs_commit_data {
1175 struct rpc_task task; 1300 struct rpc_task task;
1176 struct inode *inode; 1301 struct inode *inode;
1177 struct rpc_cred *cred; 1302 struct rpc_cred *cred;
@@ -1179,22 +1304,22 @@ struct nfs_write_data {
1179 struct nfs_writeverf verf; 1304 struct nfs_writeverf verf;
1180 struct list_head pages; /* Coalesced requests we wish to flush */ 1305 struct list_head pages; /* Coalesced requests we wish to flush */
1181 struct list_head list; /* lists of struct nfs_write_data */ 1306 struct list_head list; /* lists of struct nfs_write_data */
1182 struct nfs_page *req; /* multi ops per nfs_page */ 1307 struct nfs_direct_req *dreq; /* O_DIRECT request */
1183 struct page **pagevec; 1308 struct nfs_commitargs args; /* argument struct */
1184 unsigned int npages; /* Max length of pagevec */ 1309 struct nfs_commitres res; /* result struct */
1185 struct nfs_writeargs args; /* argument struct */ 1310 struct nfs_open_context *context;
1186 struct nfs_writeres res; /* result struct */
1187 struct pnfs_layout_segment *lseg; 1311 struct pnfs_layout_segment *lseg;
1188 struct nfs_client *ds_clp; /* pNFS data server */ 1312 struct nfs_client *ds_clp; /* pNFS data server */
1189 int ds_commit_index; 1313 int ds_commit_index;
1190 const struct rpc_call_ops *mds_ops; 1314 const struct rpc_call_ops *mds_ops;
1191 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); 1315 const struct nfs_commit_completion_ops *completion_ops;
1192#ifdef CONFIG_NFS_V4 1316 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
1193 unsigned long timestamp; /* For lease renewal */ 1317};
1194#endif 1318
1195 __u64 mds_offset; /* Filelayout dense stripe */ 1319struct nfs_pgio_completion_ops {
1196 int pnfs_error; 1320 void (*error_cleanup)(struct list_head *head);
1197 struct page *page_array[NFS_PAGEVEC_SIZE]; 1321 void (*init_hdr)(struct nfs_pgio_header *hdr);
1322 void (*completion)(struct nfs_pgio_header *hdr);
1198}; 1323};
1199 1324
1200struct nfs_unlinkdata { 1325struct nfs_unlinkdata {
@@ -1234,11 +1359,13 @@ struct nfs_rpc_ops {
1234 1359
1235 int (*getroot) (struct nfs_server *, struct nfs_fh *, 1360 int (*getroot) (struct nfs_server *, struct nfs_fh *,
1236 struct nfs_fsinfo *); 1361 struct nfs_fsinfo *);
1362 struct vfsmount *(*submount) (struct nfs_server *, struct dentry *,
1363 struct nfs_fh *, struct nfs_fattr *);
1237 int (*getattr) (struct nfs_server *, struct nfs_fh *, 1364 int (*getattr) (struct nfs_server *, struct nfs_fh *,
1238 struct nfs_fattr *); 1365 struct nfs_fattr *);
1239 int (*setattr) (struct dentry *, struct nfs_fattr *, 1366 int (*setattr) (struct dentry *, struct nfs_fattr *,
1240 struct iattr *); 1367 struct iattr *);
1241 int (*lookup) (struct rpc_clnt *clnt, struct inode *, struct qstr *, 1368 int (*lookup) (struct inode *, struct qstr *,
1242 struct nfs_fh *, struct nfs_fattr *); 1369 struct nfs_fh *, struct nfs_fattr *);
1243 int (*access) (struct inode *, struct nfs_access_entry *); 1370 int (*access) (struct inode *, struct nfs_access_entry *);
1244 int (*readlink)(struct inode *, struct page *, unsigned int, 1371 int (*readlink)(struct inode *, struct page *, unsigned int,
@@ -1277,8 +1404,9 @@ struct nfs_rpc_ops {
1277 void (*write_setup) (struct nfs_write_data *, struct rpc_message *); 1404 void (*write_setup) (struct nfs_write_data *, struct rpc_message *);
1278 void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); 1405 void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
1279 int (*write_done) (struct rpc_task *, struct nfs_write_data *); 1406 int (*write_done) (struct rpc_task *, struct nfs_write_data *);
1280 void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); 1407 void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
1281 int (*commit_done) (struct rpc_task *, struct nfs_write_data *); 1408 void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
1409 int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
1282 int (*lock)(struct file *, int, struct file_lock *); 1410 int (*lock)(struct file *, int, struct file_lock *);
1283 int (*lock_check_bounds)(const struct file_lock *); 1411 int (*lock_check_bounds)(const struct file_lock *);
1284 void (*clear_acl_cache)(struct inode *); 1412 void (*clear_acl_cache)(struct inode *);
@@ -1287,9 +1415,9 @@ struct nfs_rpc_ops {
1287 struct nfs_open_context *ctx, 1415 struct nfs_open_context *ctx,
1288 int open_flags, 1416 int open_flags,
1289 struct iattr *iattr); 1417 struct iattr *iattr);
1290 int (*init_client) (struct nfs_client *, const struct rpc_timeout *, 1418 struct nfs_client *
1291 const char *, rpc_authflavor_t, int); 1419 (*init_client) (struct nfs_client *, const struct rpc_timeout *,
1292 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); 1420 const char *, rpc_authflavor_t);
1293}; 1421};
1294 1422
1295/* 1423/*
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 3d7647536b03..e4c29bc72e70 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,8 +43,9 @@ enum oom_constraint {
43extern void compare_swap_oom_score_adj(int old_val, int new_val); 43extern void compare_swap_oom_score_adj(int old_val, int new_val);
44extern int test_set_oom_score_adj(int new_val); 44extern int test_set_oom_score_adj(int new_val);
45 45
46extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, 46extern unsigned long oom_badness(struct task_struct *p,
47 const nodemask_t *nodemask, unsigned long totalpages); 47 struct mem_cgroup *memcg, const nodemask_t *nodemask,
48 unsigned long totalpages);
48extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 49extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
49extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 50extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
50 51
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index efa26b4da8d2..7cfad3bbb0cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -460,11 +460,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
460 */ 460 */
461static inline int fault_in_multipages_writeable(char __user *uaddr, int size) 461static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
462{ 462{
463 int ret; 463 int ret = 0;
464 char __user *end = uaddr + size - 1; 464 char __user *end = uaddr + size - 1;
465 465
466 if (unlikely(size == 0)) 466 if (unlikely(size == 0))
467 return 0; 467 return ret;
468 468
469 /* 469 /*
470 * Writing zeroes into userspace here is OK, because we know that if 470 * Writing zeroes into userspace here is OK, because we know that if
@@ -489,11 +489,11 @@ static inline int fault_in_multipages_readable(const char __user *uaddr,
489 int size) 489 int size)
490{ 490{
491 volatile char c; 491 volatile char c;
492 int ret; 492 int ret = 0;
493 const char __user *end = uaddr + size - 1; 493 const char __user *end = uaddr + size - 1;
494 494
495 if (unlikely(size == 0)) 495 if (unlikely(size == 0))
496 return 0; 496 return ret;
497 497
498 while (uaddr <= end) { 498 while (uaddr <= end) {
499 ret = __get_user(c, uaddr); 499 ret = __get_user(c, uaddr);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3329965ed63f..ab741b0d0074 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2506,6 +2506,7 @@
2506#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F 2506#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F
2507#define PCI_DEVICE_ID_INTEL_I960 0x0960 2507#define PCI_DEVICE_ID_INTEL_I960 0x0960
2508#define PCI_DEVICE_ID_INTEL_I960RM 0x0962 2508#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
2509#define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60
2509#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 2510#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062
2510#define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 2511#define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085
2511#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F 2512#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fd07c4542cee..3fce545df394 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -173,8 +173,6 @@ enum ttu_flags {
173}; 173};
174#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 174#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
175 175
176bool is_vma_temporary_stack(struct vm_area_struct *vma);
177
178int try_to_unmap(struct page *, enum ttu_flags flags); 176int try_to_unmap(struct page *, enum ttu_flags flags);
179int try_to_unmap_one(struct page *, struct vm_area_struct *, 177int try_to_unmap_one(struct page *, struct vm_area_struct *,
180 unsigned long address, enum ttu_flags flags); 178 unsigned long address, enum ttu_flags flags);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..49c0fa9ef5cf 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -351,31 +351,14 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **);
351extern unsigned int count_swap_pages(int, int); 351extern unsigned int count_swap_pages(int, int);
352extern sector_t map_swap_page(struct page *, struct block_device **); 352extern sector_t map_swap_page(struct page *, struct block_device **);
353extern sector_t swapdev_block(int, pgoff_t); 353extern sector_t swapdev_block(int, pgoff_t);
354extern int page_swapcount(struct page *);
354extern int reuse_swap_page(struct page *); 355extern int reuse_swap_page(struct page *);
355extern int try_to_free_swap(struct page *); 356extern int try_to_free_swap(struct page *);
356struct backing_dev_info; 357struct backing_dev_info;
357 358
358/* linux/mm/thrash.c */
359extern struct mm_struct *swap_token_mm;
360extern void grab_swap_token(struct mm_struct *);
361extern void __put_swap_token(struct mm_struct *);
362extern void disable_swap_token(struct mem_cgroup *memcg);
363
364static inline int has_swap_token(struct mm_struct *mm)
365{
366 return (mm == swap_token_mm);
367}
368
369static inline void put_swap_token(struct mm_struct *mm)
370{
371 if (has_swap_token(mm))
372 __put_swap_token(mm);
373}
374
375#ifdef CONFIG_CGROUP_MEM_RES_CTLR 359#ifdef CONFIG_CGROUP_MEM_RES_CTLR
376extern void 360extern void
377mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); 361mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
378extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
379#else 362#else
380static inline void 363static inline void
381mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) 364mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -462,6 +445,11 @@ static inline void delete_from_swap_cache(struct page *page)
462{ 445{
463} 446}
464 447
448static inline int page_swapcount(struct page *page)
449{
450 return 0;
451}
452
465#define reuse_swap_page(page) (page_mapcount(page) == 1) 453#define reuse_swap_page(page) (page_mapcount(page) == 1)
466 454
467static inline int try_to_free_swap(struct page *page) 455static inline int try_to_free_swap(struct page *page)
@@ -476,37 +464,11 @@ static inline swp_entry_t get_swap_page(void)
476 return entry; 464 return entry;
477} 465}
478 466
479/* linux/mm/thrash.c */
480static inline void put_swap_token(struct mm_struct *mm)
481{
482}
483
484static inline void grab_swap_token(struct mm_struct *mm)
485{
486}
487
488static inline int has_swap_token(struct mm_struct *mm)
489{
490 return 0;
491}
492
493static inline void disable_swap_token(struct mem_cgroup *memcg)
494{
495}
496
497static inline void 467static inline void
498mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 468mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
499{ 469{
500} 470}
501 471
502#ifdef CONFIG_CGROUP_MEM_RES_CTLR
503static inline int
504mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
505{
506 return 0;
507}
508#endif
509
510#endif /* CONFIG_SWAP */ 472#endif /* CONFIG_SWAP */
511#endif /* __KERNEL__*/ 473#endif /* __KERNEL__*/
512#endif /* _LINUX_SWAP_H */ 474#endif /* _LINUX_SWAP_H */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f64560e204bc..bab3b87e4064 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -13,7 +13,7 @@
13#define RECLAIM_WB_ANON 0x0001u 13#define RECLAIM_WB_ANON 0x0001u
14#define RECLAIM_WB_FILE 0x0002u 14#define RECLAIM_WB_FILE 0x0002u
15#define RECLAIM_WB_MIXED 0x0010u 15#define RECLAIM_WB_MIXED 0x0010u
16#define RECLAIM_WB_SYNC 0x0004u 16#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */
17#define RECLAIM_WB_ASYNC 0x0008u 17#define RECLAIM_WB_ASYNC 0x0008u
18 18
19#define show_reclaim_flags(flags) \ 19#define show_reclaim_flags(flags) \
@@ -25,15 +25,15 @@
25 {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ 25 {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \
26 ) : "RECLAIM_WB_NONE" 26 ) : "RECLAIM_WB_NONE"
27 27
28#define trace_reclaim_flags(page, sync) ( \ 28#define trace_reclaim_flags(page) ( \
29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ 29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
30 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 30 (RECLAIM_WB_ASYNC) \
31 ) 31 )
32 32
33#define trace_shrink_flags(file, sync) ( \ 33#define trace_shrink_flags(file) \
34 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \ 34 ( \
35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ 35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
36 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 36 (RECLAIM_WB_ASYNC) \
37 ) 37 )
38 38
39TRACE_EVENT(mm_vmscan_kswapd_sleep, 39TRACE_EVENT(mm_vmscan_kswapd_sleep,
@@ -263,22 +263,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
263 unsigned long nr_requested, 263 unsigned long nr_requested,
264 unsigned long nr_scanned, 264 unsigned long nr_scanned,
265 unsigned long nr_taken, 265 unsigned long nr_taken,
266 unsigned long nr_lumpy_taken,
267 unsigned long nr_lumpy_dirty,
268 unsigned long nr_lumpy_failed,
269 isolate_mode_t isolate_mode, 266 isolate_mode_t isolate_mode,
270 int file), 267 int file),
271 268
272 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), 269 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
273 270
274 TP_STRUCT__entry( 271 TP_STRUCT__entry(
275 __field(int, order) 272 __field(int, order)
276 __field(unsigned long, nr_requested) 273 __field(unsigned long, nr_requested)
277 __field(unsigned long, nr_scanned) 274 __field(unsigned long, nr_scanned)
278 __field(unsigned long, nr_taken) 275 __field(unsigned long, nr_taken)
279 __field(unsigned long, nr_lumpy_taken)
280 __field(unsigned long, nr_lumpy_dirty)
281 __field(unsigned long, nr_lumpy_failed)
282 __field(isolate_mode_t, isolate_mode) 276 __field(isolate_mode_t, isolate_mode)
283 __field(int, file) 277 __field(int, file)
284 ), 278 ),
@@ -288,22 +282,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
288 __entry->nr_requested = nr_requested; 282 __entry->nr_requested = nr_requested;
289 __entry->nr_scanned = nr_scanned; 283 __entry->nr_scanned = nr_scanned;
290 __entry->nr_taken = nr_taken; 284 __entry->nr_taken = nr_taken;
291 __entry->nr_lumpy_taken = nr_lumpy_taken;
292 __entry->nr_lumpy_dirty = nr_lumpy_dirty;
293 __entry->nr_lumpy_failed = nr_lumpy_failed;
294 __entry->isolate_mode = isolate_mode; 285 __entry->isolate_mode = isolate_mode;
295 __entry->file = file; 286 __entry->file = file;
296 ), 287 ),
297 288
298 TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", 289 TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
299 __entry->isolate_mode, 290 __entry->isolate_mode,
300 __entry->order, 291 __entry->order,
301 __entry->nr_requested, 292 __entry->nr_requested,
302 __entry->nr_scanned, 293 __entry->nr_scanned,
303 __entry->nr_taken, 294 __entry->nr_taken,
304 __entry->nr_lumpy_taken,
305 __entry->nr_lumpy_dirty,
306 __entry->nr_lumpy_failed,
307 __entry->file) 295 __entry->file)
308); 296);
309 297
@@ -313,13 +301,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
313 unsigned long nr_requested, 301 unsigned long nr_requested,
314 unsigned long nr_scanned, 302 unsigned long nr_scanned,
315 unsigned long nr_taken, 303 unsigned long nr_taken,
316 unsigned long nr_lumpy_taken,
317 unsigned long nr_lumpy_dirty,
318 unsigned long nr_lumpy_failed,
319 isolate_mode_t isolate_mode, 304 isolate_mode_t isolate_mode,
320 int file), 305 int file),
321 306
322 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) 307 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
323 308
324); 309);
325 310
@@ -329,13 +314,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
329 unsigned long nr_requested, 314 unsigned long nr_requested,
330 unsigned long nr_scanned, 315 unsigned long nr_scanned,
331 unsigned long nr_taken, 316 unsigned long nr_taken,
332 unsigned long nr_lumpy_taken,
333 unsigned long nr_lumpy_dirty,
334 unsigned long nr_lumpy_failed,
335 isolate_mode_t isolate_mode, 317 isolate_mode_t isolate_mode,
336 int file), 318 int file),
337 319
338 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) 320 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
339 321
340); 322);
341 323
@@ -395,88 +377,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
395 show_reclaim_flags(__entry->reclaim_flags)) 377 show_reclaim_flags(__entry->reclaim_flags))
396); 378);
397 379
398TRACE_EVENT(replace_swap_token,
399 TP_PROTO(struct mm_struct *old_mm,
400 struct mm_struct *new_mm),
401
402 TP_ARGS(old_mm, new_mm),
403
404 TP_STRUCT__entry(
405 __field(struct mm_struct*, old_mm)
406 __field(unsigned int, old_prio)
407 __field(struct mm_struct*, new_mm)
408 __field(unsigned int, new_prio)
409 ),
410
411 TP_fast_assign(
412 __entry->old_mm = old_mm;
413 __entry->old_prio = old_mm ? old_mm->token_priority : 0;
414 __entry->new_mm = new_mm;
415 __entry->new_prio = new_mm->token_priority;
416 ),
417
418 TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
419 __entry->old_mm, __entry->old_prio,
420 __entry->new_mm, __entry->new_prio)
421);
422
423DECLARE_EVENT_CLASS(put_swap_token_template,
424 TP_PROTO(struct mm_struct *swap_token_mm),
425
426 TP_ARGS(swap_token_mm),
427
428 TP_STRUCT__entry(
429 __field(struct mm_struct*, swap_token_mm)
430 ),
431
432 TP_fast_assign(
433 __entry->swap_token_mm = swap_token_mm;
434 ),
435
436 TP_printk("token_mm=%p", __entry->swap_token_mm)
437);
438
439DEFINE_EVENT(put_swap_token_template, put_swap_token,
440 TP_PROTO(struct mm_struct *swap_token_mm),
441 TP_ARGS(swap_token_mm)
442);
443
444DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
445 TP_PROTO(struct mm_struct *swap_token_mm),
446 TP_ARGS(swap_token_mm),
447 TP_CONDITION(swap_token_mm != NULL)
448);
449
450TRACE_EVENT_CONDITION(update_swap_token_priority,
451 TP_PROTO(struct mm_struct *mm,
452 unsigned int old_prio,
453 struct mm_struct *swap_token_mm),
454
455 TP_ARGS(mm, old_prio, swap_token_mm),
456
457 TP_CONDITION(mm->token_priority != old_prio),
458
459 TP_STRUCT__entry(
460 __field(struct mm_struct*, mm)
461 __field(unsigned int, old_prio)
462 __field(unsigned int, new_prio)
463 __field(struct mm_struct*, swap_token_mm)
464 __field(unsigned int, swap_token_prio)
465 ),
466
467 TP_fast_assign(
468 __entry->mm = mm;
469 __entry->old_prio = old_prio;
470 __entry->new_prio = mm->token_priority;
471 __entry->swap_token_mm = swap_token_mm;
472 __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
473 ),
474
475 TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
476 __entry->mm, __entry->old_prio, __entry->new_prio,
477 __entry->swap_token_mm, __entry->swap_token_prio)
478);
479
480#endif /* _TRACE_VMSCAN_H */ 380#endif /* _TRACE_VMSCAN_H */
481 381
482/* This part must be outside protection */ 382/* This part must be outside protection */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a0c6af34d500..0f3527d6184a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth);
5132 * @root: the css supporsed to be an ancestor of the child. 5132 * @root: the css supporsed to be an ancestor of the child.
5133 * 5133 *
5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because 5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
5135 * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). 5135 * this function reads css->id, the caller must hold rcu_read_lock().
5136 * But, considering usual usage, the csses should be valid objects after test. 5136 * But, considering usual usage, the csses should be valid objects after test.
5137 * Assuming that the caller will do some action to the child if this returns 5137 * Assuming that the caller will do some action to the child if this returns
5138 * returns true, the caller must take "child";s reference count. 5138 * returns true, the caller must take "child";s reference count.
@@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
5144{ 5144{
5145 struct css_id *child_id; 5145 struct css_id *child_id;
5146 struct css_id *root_id; 5146 struct css_id *root_id;
5147 bool ret = true;
5148 5147
5149 rcu_read_lock();
5150 child_id = rcu_dereference(child->id); 5148 child_id = rcu_dereference(child->id);
5149 if (!child_id)
5150 return false;
5151 root_id = rcu_dereference(root->id); 5151 root_id = rcu_dereference(root->id);
5152 if (!child_id 5152 if (!root_id)
5153 || !root_id 5153 return false;
5154 || (child_id->depth < root_id->depth) 5154 if (child_id->depth < root_id->depth)
5155 || (child_id->stack[root_id->depth] != root_id->id)) 5155 return false;
5156 ret = false; 5156 if (child_id->stack[root_id->depth] != root_id->id)
5157 rcu_read_unlock(); 5157 return false;
5158 return ret; 5158 return true;
5159} 5159}
5160 5160
5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..017fb23d5983 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
386 } 386 }
387 charge = 0; 387 charge = 0;
388 if (mpnt->vm_flags & VM_ACCOUNT) { 388 if (mpnt->vm_flags & VM_ACCOUNT) {
389 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 389 unsigned long len;
390 len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
390 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ 391 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
391 goto fail_nomem; 392 goto fail_nomem;
392 charge = len; 393 charge = len;
@@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm)
614 list_del(&mm->mmlist); 615 list_del(&mm->mmlist);
615 spin_unlock(&mmlist_lock); 616 spin_unlock(&mmlist_lock);
616 } 617 }
617 put_swap_token(mm);
618 if (mm->binfmt) 618 if (mm->binfmt)
619 module_put(mm->binfmt->module); 619 module_put(mm->binfmt->module);
620 mmdrop(mm); 620 mmdrop(mm);
@@ -831,10 +831,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831 memcpy(mm, oldmm, sizeof(*mm)); 831 memcpy(mm, oldmm, sizeof(*mm));
832 mm_init_cpumask(mm); 832 mm_init_cpumask(mm);
833 833
834 /* Initializing for Swap token stuff */
835 mm->token_priority = 0;
836 mm->last_interval = 0;
837
838#ifdef CONFIG_TRANSPARENT_HUGEPAGE 834#ifdef CONFIG_TRANSPARENT_HUGEPAGE
839 mm->pmd_huge_pte = NULL; 835 mm->pmd_huge_pte = NULL;
840#endif 836#endif
@@ -913,10 +909,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
913 goto fail_nomem; 909 goto fail_nomem;
914 910
915good_mm: 911good_mm:
916 /* Initializing for Swap token stuff */
917 mm->token_priority = 0;
918 mm->last_interval = 0;
919
920 tsk->mm = mm; 912 tsk->mm = mm;
921 tsk->active_mm = mm; 913 tsk->active_mm = mm;
922 return 0; 914 return 0;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 414f46ed1dcd..45bc1f83a5ad 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -130,11 +130,9 @@ void swiotlb_print_info(void)
130 pstart = virt_to_phys(io_tlb_start); 130 pstart = virt_to_phys(io_tlb_start);
131 pend = virt_to_phys(io_tlb_end); 131 pend = virt_to_phys(io_tlb_end);
132 132
133 printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", 133 printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
134 bytes >> 20, io_tlb_start, io_tlb_end); 134 (unsigned long long)pstart, (unsigned long long)pend - 1,
135 printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", 135 bytes >> 20, io_tlb_start, io_tlb_end - 1);
136 (unsigned long long)pstart,
137 (unsigned long long)pend);
138} 136}
139 137
140void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) 138void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
diff --git a/mm/Kconfig b/mm/Kconfig
index 39220026c797..b2176374b98e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -349,6 +349,16 @@ choice
349 benefit. 349 benefit.
350endchoice 350endchoice
351 351
352config CROSS_MEMORY_ATTACH
353 bool "Cross Memory Support"
354 depends on MMU
355 default y
356 help
357 Enabling this option adds the system calls process_vm_readv and
358 process_vm_writev which allow a process with the correct privileges
359 to directly read from or write to to another process's address space.
360 See the man page for more details.
361
352# 362#
353# UP and nommu archs use km based percpu allocator 363# UP and nommu archs use km based percpu allocator
354# 364#
diff --git a/mm/Makefile b/mm/Makefile
index 8aada89efbbb..a156285ce88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,8 +5,11 @@
5mmu-y := nommu.o 5mmu-y := nommu.o
6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ 6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o pagewalk.o pgtable-generic.o \ 8 vmalloc.o pagewalk.o pgtable-generic.o
9 process_vm_access.o 9
10ifdef CONFIG_CROSS_MEMORY_ATTACH
11mmu-$(CONFIG_MMU) += process_vm_access.o
12endif
10 13
11obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ 14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
12 maccess.o page_alloc.o page-writeback.o \ 15 maccess.o page_alloc.o page-writeback.o \
@@ -25,7 +28,7 @@ endif
25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 28obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
26 29
27obj-$(CONFIG_BOUNCE) += bounce.o 30obj-$(CONFIG_BOUNCE) += bounce.o
28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
29obj-$(CONFIG_HAS_DMA) += dmapool.o 32obj-$(CONFIG_HAS_DMA) += dmapool.o
30obj-$(CONFIG_HUGETLBFS) += hugetlb.o 33obj-$(CONFIG_HUGETLBFS) += hugetlb.o
31obj-$(CONFIG_NUMA) += mempolicy.o 34obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 0131170c9d54..ec4fcb7a56c8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -77,16 +77,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages)
77 */ 77 */
78static void __init link_bootmem(bootmem_data_t *bdata) 78static void __init link_bootmem(bootmem_data_t *bdata)
79{ 79{
80 struct list_head *iter; 80 bootmem_data_t *ent;
81 81
82 list_for_each(iter, &bdata_list) { 82 list_for_each_entry(ent, &bdata_list, list) {
83 bootmem_data_t *ent; 83 if (bdata->node_min_pfn < ent->node_min_pfn) {
84 84 list_add_tail(&bdata->list, &ent->list);
85 ent = list_entry(iter, bootmem_data_t, list); 85 return;
86 if (bdata->node_min_pfn < ent->node_min_pfn) 86 }
87 break;
88 } 87 }
89 list_add_tail(&bdata->list, iter); 88
89 list_add_tail(&bdata->list, &bdata_list);
90} 90}
91 91
92/* 92/*
@@ -203,7 +203,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
203 } else { 203 } else {
204 unsigned long off = 0; 204 unsigned long off = 0;
205 205
206 while (vec && off < BITS_PER_LONG) { 206 vec >>= start & (BITS_PER_LONG - 1);
207 while (vec) {
207 if (vec & 1) { 208 if (vec & 1) {
208 page = pfn_to_page(start + off); 209 page = pfn_to_page(start + off);
209 __free_pages_bootmem(page, 0); 210 __free_pages_bootmem(page, 0);
@@ -467,7 +468,7 @@ static unsigned long __init align_off(struct bootmem_data *bdata,
467 return ALIGN(base + off, align) - base; 468 return ALIGN(base + off, align) - base;
468} 469}
469 470
470static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 471static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
471 unsigned long size, unsigned long align, 472 unsigned long size, unsigned long align,
472 unsigned long goal, unsigned long limit) 473 unsigned long goal, unsigned long limit)
473{ 474{
@@ -588,14 +589,14 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
588 p_bdata = bootmem_arch_preferred_node(bdata, size, align, 589 p_bdata = bootmem_arch_preferred_node(bdata, size, align,
589 goal, limit); 590 goal, limit);
590 if (p_bdata) 591 if (p_bdata)
591 return alloc_bootmem_core(p_bdata, size, align, 592 return alloc_bootmem_bdata(p_bdata, size, align,
592 goal, limit); 593 goal, limit);
593 } 594 }
594#endif 595#endif
595 return NULL; 596 return NULL;
596} 597}
597 598
598static void * __init ___alloc_bootmem_nopanic(unsigned long size, 599static void * __init alloc_bootmem_core(unsigned long size,
599 unsigned long align, 600 unsigned long align,
600 unsigned long goal, 601 unsigned long goal,
601 unsigned long limit) 602 unsigned long limit)
@@ -603,7 +604,6 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long size,
603 bootmem_data_t *bdata; 604 bootmem_data_t *bdata;
604 void *region; 605 void *region;
605 606
606restart:
607 region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); 607 region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
608 if (region) 608 if (region)
609 return region; 609 return region;
@@ -614,11 +614,25 @@ restart:
614 if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) 614 if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
615 break; 615 break;
616 616
617 region = alloc_bootmem_core(bdata, size, align, goal, limit); 617 region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
618 if (region) 618 if (region)
619 return region; 619 return region;
620 } 620 }
621 621
622 return NULL;
623}
624
625static void * __init ___alloc_bootmem_nopanic(unsigned long size,
626 unsigned long align,
627 unsigned long goal,
628 unsigned long limit)
629{
630 void *ptr;
631
632restart:
633 ptr = alloc_bootmem_core(size, align, goal, limit);
634 if (ptr)
635 return ptr;
622 if (goal) { 636 if (goal) {
623 goal = 0; 637 goal = 0;
624 goto restart; 638 goto restart;
@@ -684,21 +698,56 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
684 return ___alloc_bootmem(size, align, goal, limit); 698 return ___alloc_bootmem(size, align, goal, limit);
685} 699}
686 700
687static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 701static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
688 unsigned long size, unsigned long align, 702 unsigned long size, unsigned long align,
689 unsigned long goal, unsigned long limit) 703 unsigned long goal, unsigned long limit)
690{ 704{
691 void *ptr; 705 void *ptr;
692 706
693 ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); 707again:
708 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size,
709 align, goal, limit);
694 if (ptr) 710 if (ptr)
695 return ptr; 711 return ptr;
696 712
697 ptr = alloc_bootmem_core(bdata, size, align, goal, limit); 713 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
698 if (ptr) 714 if (ptr)
699 return ptr; 715 return ptr;
700 716
701 return ___alloc_bootmem(size, align, goal, limit); 717 ptr = alloc_bootmem_core(size, align, goal, limit);
718 if (ptr)
719 return ptr;
720
721 if (goal) {
722 goal = 0;
723 goto again;
724 }
725
726 return NULL;
727}
728
729void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
730 unsigned long align, unsigned long goal)
731{
732 if (WARN_ON_ONCE(slab_is_available()))
733 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
734
735 return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
736}
737
738void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
739 unsigned long align, unsigned long goal,
740 unsigned long limit)
741{
742 void *ptr;
743
744 ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
745 if (ptr)
746 return ptr;
747
748 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
749 panic("Out of memory");
750 return NULL;
702} 751}
703 752
704/** 753/**
@@ -722,7 +771,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
722 if (WARN_ON_ONCE(slab_is_available())) 771 if (WARN_ON_ONCE(slab_is_available()))
723 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 772 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
724 773
725 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 774 return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
726} 775}
727 776
728void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 777void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -743,7 +792,7 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
743 unsigned long new_goal; 792 unsigned long new_goal;
744 793
745 new_goal = MAX_DMA32_PFN << PAGE_SHIFT; 794 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
746 ptr = alloc_bootmem_core(pgdat->bdata, size, align, 795 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
747 new_goal, 0); 796 new_goal, 0);
748 if (ptr) 797 if (ptr)
749 return ptr; 798 return ptr;
@@ -754,47 +803,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
754 803
755} 804}
756 805
757#ifdef CONFIG_SPARSEMEM
758/**
759 * alloc_bootmem_section - allocate boot memory from a specific section
760 * @size: size of the request in bytes
761 * @section_nr: sparse map section to allocate from
762 *
763 * Return NULL on failure.
764 */
765void * __init alloc_bootmem_section(unsigned long size,
766 unsigned long section_nr)
767{
768 bootmem_data_t *bdata;
769 unsigned long pfn, goal;
770
771 pfn = section_nr_to_pfn(section_nr);
772 goal = pfn << PAGE_SHIFT;
773 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
774
775 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0);
776}
777#endif
778
779void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
780 unsigned long align, unsigned long goal)
781{
782 void *ptr;
783
784 if (WARN_ON_ONCE(slab_is_available()))
785 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
786
787 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
788 if (ptr)
789 return ptr;
790
791 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
792 if (ptr)
793 return ptr;
794
795 return __alloc_bootmem_nopanic(size, align, goal);
796}
797
798#ifndef ARCH_LOW_ADDRESS_LIMIT 806#ifndef ARCH_LOW_ADDRESS_LIMIT
799#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL 807#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
800#endif 808#endif
@@ -839,6 +847,6 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
839 if (WARN_ON_ONCE(slab_is_available())) 847 if (WARN_ON_ONCE(slab_is_available()))
840 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 848 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
841 849
842 return ___alloc_bootmem_node(pgdat->bdata, size, align, 850 return ___alloc_bootmem_node(pgdat, size, align,
843 goal, ARCH_LOW_ADDRESS_LIMIT); 851 goal, ARCH_LOW_ADDRESS_LIMIT);
844} 852}
diff --git a/mm/compaction.c b/mm/compaction.c
index da7d35ea5103..840ee288e296 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -235,7 +235,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
235 */ 235 */
236 while (unlikely(too_many_isolated(zone))) { 236 while (unlikely(too_many_isolated(zone))) {
237 /* async migration should just abort */ 237 /* async migration should just abort */
238 if (!cc->sync) 238 if (cc->mode != COMPACT_SYNC)
239 return 0; 239 return 0;
240 240
241 congestion_wait(BLK_RW_ASYNC, HZ/10); 241 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -303,7 +303,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
303 * satisfies the allocation 303 * satisfies the allocation
304 */ 304 */
305 pageblock_nr = low_pfn >> pageblock_order; 305 pageblock_nr = low_pfn >> pageblock_order;
306 if (!cc->sync && last_pageblock_nr != pageblock_nr && 306 if (cc->mode != COMPACT_SYNC &&
307 last_pageblock_nr != pageblock_nr &&
307 !migrate_async_suitable(get_pageblock_migratetype(page))) { 308 !migrate_async_suitable(get_pageblock_migratetype(page))) {
308 low_pfn += pageblock_nr_pages; 309 low_pfn += pageblock_nr_pages;
309 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 310 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -324,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
324 continue; 325 continue;
325 } 326 }
326 327
327 if (!cc->sync) 328 if (cc->mode != COMPACT_SYNC)
328 mode |= ISOLATE_ASYNC_MIGRATE; 329 mode |= ISOLATE_ASYNC_MIGRATE;
329 330
330 /* Try isolate the page */ 331 /* Try isolate the page */
@@ -357,27 +358,90 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
357 358
358#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 359#endif /* CONFIG_COMPACTION || CONFIG_CMA */
359#ifdef CONFIG_COMPACTION 360#ifdef CONFIG_COMPACTION
361/*
362 * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
363 * converted to MIGRATE_MOVABLE type, false otherwise.
364 */
365static bool rescue_unmovable_pageblock(struct page *page)
366{
367 unsigned long pfn, start_pfn, end_pfn;
368 struct page *start_page, *end_page;
369
370 pfn = page_to_pfn(page);
371 start_pfn = pfn & ~(pageblock_nr_pages - 1);
372 end_pfn = start_pfn + pageblock_nr_pages;
373
374 start_page = pfn_to_page(start_pfn);
375 end_page = pfn_to_page(end_pfn);
376
377 /* Do not deal with pageblocks that overlap zones */
378 if (page_zone(start_page) != page_zone(end_page))
379 return false;
380
381 for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
382 page++) {
383 if (!pfn_valid_within(pfn))
384 continue;
385
386 if (PageBuddy(page)) {
387 int order = page_order(page);
388
389 pfn += (1 << order) - 1;
390 page += (1 << order) - 1;
391
392 continue;
393 } else if (page_count(page) == 0 || PageLRU(page))
394 continue;
395
396 return false;
397 }
398
399 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
400 move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
401 return true;
402}
360 403
361/* Returns true if the page is within a block suitable for migration to */ 404enum smt_result {
362static bool suitable_migration_target(struct page *page) 405 GOOD_AS_MIGRATION_TARGET,
406 FAIL_UNMOVABLE_TARGET,
407 FAIL_BAD_TARGET,
408};
409
410/*
411 * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
412 * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
413 * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
414 */
415static enum smt_result suitable_migration_target(struct page *page,
416 struct compact_control *cc)
363{ 417{
364 418
365 int migratetype = get_pageblock_migratetype(page); 419 int migratetype = get_pageblock_migratetype(page);
366 420
367 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 421 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
368 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 422 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
369 return false; 423 return FAIL_BAD_TARGET;
370 424
371 /* If the page is a large free page, then allow migration */ 425 /* If the page is a large free page, then allow migration */
372 if (PageBuddy(page) && page_order(page) >= pageblock_order) 426 if (PageBuddy(page) && page_order(page) >= pageblock_order)
373 return true; 427 return GOOD_AS_MIGRATION_TARGET;
374 428
375 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 429 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
376 if (migrate_async_suitable(migratetype)) 430 if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
377 return true; 431 migrate_async_suitable(migratetype))
432 return GOOD_AS_MIGRATION_TARGET;
433
434 if (cc->mode == COMPACT_ASYNC_MOVABLE &&
435 migratetype == MIGRATE_UNMOVABLE)
436 return FAIL_UNMOVABLE_TARGET;
437
438 if (cc->mode != COMPACT_ASYNC_MOVABLE &&
439 migratetype == MIGRATE_UNMOVABLE &&
440 rescue_unmovable_pageblock(page))
441 return GOOD_AS_MIGRATION_TARGET;
378 442
379 /* Otherwise skip the block */ 443 /* Otherwise skip the block */
380 return false; 444 return FAIL_BAD_TARGET;
381} 445}
382 446
383/* 447/*
@@ -411,6 +475,13 @@ static void isolate_freepages(struct zone *zone,
411 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 475 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
412 476
413 /* 477 /*
478 * isolate_freepages() may be called more than once during
479 * compact_zone_order() run and we want only the most recent
480 * count.
481 */
482 cc->nr_pageblocks_skipped = 0;
483
484 /*
414 * Isolate free pages until enough are available to migrate the 485 * Isolate free pages until enough are available to migrate the
415 * pages on cc->migratepages. We stop searching if the migrate 486 * pages on cc->migratepages. We stop searching if the migrate
416 * and free page scanners meet or enough free pages are isolated. 487 * and free page scanners meet or enough free pages are isolated.
@@ -418,6 +489,7 @@ static void isolate_freepages(struct zone *zone,
418 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; 489 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
419 pfn -= pageblock_nr_pages) { 490 pfn -= pageblock_nr_pages) {
420 unsigned long isolated; 491 unsigned long isolated;
492 enum smt_result ret;
421 493
422 if (!pfn_valid(pfn)) 494 if (!pfn_valid(pfn))
423 continue; 495 continue;
@@ -434,9 +506,12 @@ static void isolate_freepages(struct zone *zone,
434 continue; 506 continue;
435 507
436 /* Check the block is suitable for migration */ 508 /* Check the block is suitable for migration */
437 if (!suitable_migration_target(page)) 509 ret = suitable_migration_target(page, cc);
510 if (ret != GOOD_AS_MIGRATION_TARGET) {
511 if (ret == FAIL_UNMOVABLE_TARGET)
512 cc->nr_pageblocks_skipped++;
438 continue; 513 continue;
439 514 }
440 /* 515 /*
441 * Found a block suitable for isolating free pages from. Now 516 * Found a block suitable for isolating free pages from. Now
442 * we disabled interrupts, double check things are ok and 517 * we disabled interrupts, double check things are ok and
@@ -445,12 +520,14 @@ static void isolate_freepages(struct zone *zone,
445 */ 520 */
446 isolated = 0; 521 isolated = 0;
447 spin_lock_irqsave(&zone->lock, flags); 522 spin_lock_irqsave(&zone->lock, flags);
448 if (suitable_migration_target(page)) { 523 ret = suitable_migration_target(page, cc);
524 if (ret == GOOD_AS_MIGRATION_TARGET) {
449 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 525 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
450 isolated = isolate_freepages_block(pfn, end_pfn, 526 isolated = isolate_freepages_block(pfn, end_pfn,
451 freelist, false); 527 freelist, false);
452 nr_freepages += isolated; 528 nr_freepages += isolated;
453 } 529 } else if (ret == FAIL_UNMOVABLE_TARGET)
530 cc->nr_pageblocks_skipped++;
454 spin_unlock_irqrestore(&zone->lock, flags); 531 spin_unlock_irqrestore(&zone->lock, flags);
455 532
456 /* 533 /*
@@ -682,8 +759,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
682 759
683 nr_migrate = cc->nr_migratepages; 760 nr_migrate = cc->nr_migratepages;
684 err = migrate_pages(&cc->migratepages, compaction_alloc, 761 err = migrate_pages(&cc->migratepages, compaction_alloc,
685 (unsigned long)cc, false, 762 (unsigned long)&cc->freepages, false,
686 cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); 763 (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
764 : MIGRATE_ASYNC);
687 update_nr_listpages(cc); 765 update_nr_listpages(cc);
688 nr_remaining = cc->nr_migratepages; 766 nr_remaining = cc->nr_migratepages;
689 767
@@ -712,7 +790,8 @@ out:
712 790
713static unsigned long compact_zone_order(struct zone *zone, 791static unsigned long compact_zone_order(struct zone *zone,
714 int order, gfp_t gfp_mask, 792 int order, gfp_t gfp_mask,
715 bool sync) 793 enum compact_mode mode,
794 unsigned long *nr_pageblocks_skipped)
716{ 795{
717 struct compact_control cc = { 796 struct compact_control cc = {
718 .nr_freepages = 0, 797 .nr_freepages = 0,
@@ -720,12 +799,17 @@ static unsigned long compact_zone_order(struct zone *zone,
720 .order = order, 799 .order = order,
721 .migratetype = allocflags_to_migratetype(gfp_mask), 800 .migratetype = allocflags_to_migratetype(gfp_mask),
722 .zone = zone, 801 .zone = zone,
723 .sync = sync, 802 .mode = mode,
724 }; 803 };
804 unsigned long rc;
805
725 INIT_LIST_HEAD(&cc.freepages); 806 INIT_LIST_HEAD(&cc.freepages);
726 INIT_LIST_HEAD(&cc.migratepages); 807 INIT_LIST_HEAD(&cc.migratepages);
727 808
728 return compact_zone(zone, &cc); 809 rc = compact_zone(zone, &cc);
810 *nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
811
812 return rc;
729} 813}
730 814
731int sysctl_extfrag_threshold = 500; 815int sysctl_extfrag_threshold = 500;
@@ -750,6 +834,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
750 struct zoneref *z; 834 struct zoneref *z;
751 struct zone *zone; 835 struct zone *zone;
752 int rc = COMPACT_SKIPPED; 836 int rc = COMPACT_SKIPPED;
837 unsigned long nr_pageblocks_skipped;
838 enum compact_mode mode;
753 839
754 /* 840 /*
755 * Check whether it is worth even starting compaction. The order check is 841 * Check whether it is worth even starting compaction. The order check is
@@ -766,12 +852,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
766 nodemask) { 852 nodemask) {
767 int status; 853 int status;
768 854
769 status = compact_zone_order(zone, order, gfp_mask, sync); 855 mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
856retry:
857 status = compact_zone_order(zone, order, gfp_mask, mode,
858 &nr_pageblocks_skipped);
770 rc = max(status, rc); 859 rc = max(status, rc);
771 860
772 /* If a normal allocation would succeed, stop compacting */ 861 /* If a normal allocation would succeed, stop compacting */
773 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) 862 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
774 break; 863 break;
864
865 if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
866 if (nr_pageblocks_skipped) {
867 mode = COMPACT_ASYNC_UNMOVABLE;
868 goto retry;
869 }
870 }
775 } 871 }
776 872
777 return rc; 873 return rc;
@@ -805,7 +901,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
805 if (ok && cc->order > zone->compact_order_failed) 901 if (ok && cc->order > zone->compact_order_failed)
806 zone->compact_order_failed = cc->order + 1; 902 zone->compact_order_failed = cc->order + 1;
807 /* Currently async compaction is never deferred. */ 903 /* Currently async compaction is never deferred. */
808 else if (!ok && cc->sync) 904 else if (!ok && cc->mode == COMPACT_SYNC)
809 defer_compaction(zone, cc->order); 905 defer_compaction(zone, cc->order);
810 } 906 }
811 907
@@ -820,7 +916,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
820{ 916{
821 struct compact_control cc = { 917 struct compact_control cc = {
822 .order = order, 918 .order = order,
823 .sync = false, 919 .mode = COMPACT_ASYNC_MOVABLE,
824 }; 920 };
825 921
826 return __compact_pgdat(pgdat, &cc); 922 return __compact_pgdat(pgdat, &cc);
@@ -830,7 +926,7 @@ static int compact_node(int nid)
830{ 926{
831 struct compact_control cc = { 927 struct compact_control cc = {
832 .order = -1, 928 .order = -1,
833 .sync = true, 929 .mode = COMPACT_SYNC,
834 }; 930 };
835 931
836 return __compact_pgdat(NODE_DATA(nid), &cc); 932 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/filemap.c b/mm/filemap.c
index 79c4b2b0b14e..64b48f934b89 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -29,7 +29,6 @@
29#include <linux/pagevec.h> 29#include <linux/pagevec.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/syscalls.h>
33#include <linux/cpuset.h> 32#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 34#include <linux/memcontrol.h>
@@ -1478,44 +1477,6 @@ out:
1478} 1477}
1479EXPORT_SYMBOL(generic_file_aio_read); 1478EXPORT_SYMBOL(generic_file_aio_read);
1480 1479
1481static ssize_t
1482do_readahead(struct address_space *mapping, struct file *filp,
1483 pgoff_t index, unsigned long nr)
1484{
1485 if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
1486 return -EINVAL;
1487
1488 force_page_cache_readahead(mapping, filp, index, nr);
1489 return 0;
1490}
1491
1492SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
1493{
1494 ssize_t ret;
1495 struct file *file;
1496
1497 ret = -EBADF;
1498 file = fget(fd);
1499 if (file) {
1500 if (file->f_mode & FMODE_READ) {
1501 struct address_space *mapping = file->f_mapping;
1502 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
1503 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
1504 unsigned long len = end - start + 1;
1505 ret = do_readahead(mapping, file, start, len);
1506 }
1507 fput(file);
1508 }
1509 return ret;
1510}
1511#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
1512asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
1513{
1514 return SYSC_readahead((int) fd, offset, (size_t) count);
1515}
1516SYSCALL_ALIAS(sys_readahead, SyS_readahead);
1517#endif
1518
1519#ifdef CONFIG_MMU 1480#ifdef CONFIG_MMU
1520/** 1481/**
1521 * page_cache_read - adds requested page to the page cache if not already there 1482 * page_cache_read - adds requested page to the page cache if not already there
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f0e5306eeb55..d0def42c121b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -636,16 +636,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
636 unsigned long haddr, pmd_t *pmd, 636 unsigned long haddr, pmd_t *pmd,
637 struct page *page) 637 struct page *page)
638{ 638{
639 int ret = 0;
640 pgtable_t pgtable; 639 pgtable_t pgtable;
641 640
642 VM_BUG_ON(!PageCompound(page)); 641 VM_BUG_ON(!PageCompound(page));
643 pgtable = pte_alloc_one(mm, haddr); 642 pgtable = pte_alloc_one(mm, haddr);
644 if (unlikely(!pgtable)) { 643 if (unlikely(!pgtable))
645 mem_cgroup_uncharge_page(page);
646 put_page(page);
647 return VM_FAULT_OOM; 644 return VM_FAULT_OOM;
648 }
649 645
650 clear_huge_page(page, haddr, HPAGE_PMD_NR); 646 clear_huge_page(page, haddr, HPAGE_PMD_NR);
651 __SetPageUptodate(page); 647 __SetPageUptodate(page);
@@ -675,7 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
675 spin_unlock(&mm->page_table_lock); 671 spin_unlock(&mm->page_table_lock);
676 } 672 }
677 673
678 return ret; 674 return 0;
679} 675}
680 676
681static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) 677static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
@@ -724,8 +720,14 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
724 put_page(page); 720 put_page(page);
725 goto out; 721 goto out;
726 } 722 }
723 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
724 page))) {
725 mem_cgroup_uncharge_page(page);
726 put_page(page);
727 goto out;
728 }
727 729
728 return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); 730 return 0;
729 } 731 }
730out: 732out:
731 /* 733 /*
@@ -950,6 +952,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
950 count_vm_event(THP_FAULT_FALLBACK); 952 count_vm_event(THP_FAULT_FALLBACK);
951 ret = do_huge_pmd_wp_page_fallback(mm, vma, address, 953 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
952 pmd, orig_pmd, page, haddr); 954 pmd, orig_pmd, page, haddr);
955 if (ret & VM_FAULT_OOM)
956 split_huge_page(page);
953 put_page(page); 957 put_page(page);
954 goto out; 958 goto out;
955 } 959 }
@@ -957,6 +961,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
957 961
958 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 962 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
959 put_page(new_page); 963 put_page(new_page);
964 split_huge_page(page);
960 put_page(page); 965 put_page(page);
961 ret |= VM_FAULT_OOM; 966 ret |= VM_FAULT_OOM;
962 goto out; 967 goto out;
@@ -968,8 +973,10 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
968 spin_lock(&mm->page_table_lock); 973 spin_lock(&mm->page_table_lock);
969 put_page(page); 974 put_page(page);
970 if (unlikely(!pmd_same(*pmd, orig_pmd))) { 975 if (unlikely(!pmd_same(*pmd, orig_pmd))) {
976 spin_unlock(&mm->page_table_lock);
971 mem_cgroup_uncharge_page(new_page); 977 mem_cgroup_uncharge_page(new_page);
972 put_page(new_page); 978 put_page(new_page);
979 goto out;
973 } else { 980 } else {
974 pmd_t entry; 981 pmd_t entry;
975 VM_BUG_ON(!PageHead(page)); 982 VM_BUG_ON(!PageHead(page));
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4e28416c47fb..285a81e87ec8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -273,8 +273,8 @@ static long region_count(struct list_head *head, long f, long t)
273 273
274 /* Locate each segment we overlap with, and count that overlap. */ 274 /* Locate each segment we overlap with, and count that overlap. */
275 list_for_each_entry(rg, head, link) { 275 list_for_each_entry(rg, head, link) {
276 int seg_from; 276 long seg_from;
277 int seg_to; 277 long seg_to;
278 278
279 if (rg->to <= f) 279 if (rg->to <= f)
280 continue; 280 continue;
@@ -2157,6 +2157,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
2157 kref_get(&reservations->refs); 2157 kref_get(&reservations->refs);
2158} 2158}
2159 2159
2160static void resv_map_put(struct vm_area_struct *vma)
2161{
2162 struct resv_map *reservations = vma_resv_map(vma);
2163
2164 if (!reservations)
2165 return;
2166 kref_put(&reservations->refs, resv_map_release);
2167}
2168
2160static void hugetlb_vm_op_close(struct vm_area_struct *vma) 2169static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2161{ 2170{
2162 struct hstate *h = hstate_vma(vma); 2171 struct hstate *h = hstate_vma(vma);
@@ -2173,7 +2182,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2173 reserve = (end - start) - 2182 reserve = (end - start) -
2174 region_count(&reservations->regions, start, end); 2183 region_count(&reservations->regions, start, end);
2175 2184
2176 kref_put(&reservations->refs, resv_map_release); 2185 resv_map_put(vma);
2177 2186
2178 if (reserve) { 2187 if (reserve) {
2179 hugetlb_acct_memory(h, -reserve); 2188 hugetlb_acct_memory(h, -reserve);
@@ -2991,12 +3000,16 @@ int hugetlb_reserve_pages(struct inode *inode,
2991 set_vma_resv_flags(vma, HPAGE_RESV_OWNER); 3000 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
2992 } 3001 }
2993 3002
2994 if (chg < 0) 3003 if (chg < 0) {
2995 return chg; 3004 ret = chg;
3005 goto out_err;
3006 }
2996 3007
2997 /* There must be enough pages in the subpool for the mapping */ 3008 /* There must be enough pages in the subpool for the mapping */
2998 if (hugepage_subpool_get_pages(spool, chg)) 3009 if (hugepage_subpool_get_pages(spool, chg)) {
2999 return -ENOSPC; 3010 ret = -ENOSPC;
3011 goto out_err;
3012 }
3000 3013
3001 /* 3014 /*
3002 * Check enough hugepages are available for the reservation. 3015 * Check enough hugepages are available for the reservation.
@@ -3005,7 +3018,7 @@ int hugetlb_reserve_pages(struct inode *inode,
3005 ret = hugetlb_acct_memory(h, chg); 3018 ret = hugetlb_acct_memory(h, chg);
3006 if (ret < 0) { 3019 if (ret < 0) {
3007 hugepage_subpool_put_pages(spool, chg); 3020 hugepage_subpool_put_pages(spool, chg);
3008 return ret; 3021 goto out_err;
3009 } 3022 }
3010 3023
3011 /* 3024 /*
@@ -3022,6 +3035,9 @@ int hugetlb_reserve_pages(struct inode *inode,
3022 if (!vma || vma->vm_flags & VM_MAYSHARE) 3035 if (!vma || vma->vm_flags & VM_MAYSHARE)
3023 region_add(&inode->i_mapping->private_list, from, to); 3036 region_add(&inode->i_mapping->private_list, from, to);
3024 return 0; 3037 return 0;
3038out_err:
3039 resv_map_put(vma);
3040 return ret;
3025} 3041}
3026 3042
3027void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) 3043void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
diff --git a/mm/internal.h b/mm/internal.h
index aee4761cf9a9..4194ab9dc19b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page);
94/* 94/*
95 * in mm/page_alloc.c 95 * in mm/page_alloc.c
96 */ 96 */
97extern void set_pageblock_migratetype(struct page *page, int migratetype);
98extern int move_freepages_block(struct zone *zone, struct page *page,
99 int migratetype);
97extern void __free_pages_bootmem(struct page *page, unsigned int order); 100extern void __free_pages_bootmem(struct page *page, unsigned int order);
98extern void prep_compound_page(struct page *page, unsigned long order); 101extern void prep_compound_page(struct page *page, unsigned long order);
99#ifdef CONFIG_MEMORY_FAILURE 102#ifdef CONFIG_MEMORY_FAILURE
@@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page);
101#endif 104#endif
102 105
103#if defined CONFIG_COMPACTION || defined CONFIG_CMA 106#if defined CONFIG_COMPACTION || defined CONFIG_CMA
107#include <linux/compaction.h>
104 108
105/* 109/*
106 * in mm/compaction.c 110 * in mm/compaction.c
@@ -119,11 +123,14 @@ struct compact_control {
119 unsigned long nr_migratepages; /* Number of pages to migrate */ 123 unsigned long nr_migratepages; /* Number of pages to migrate */
120 unsigned long free_pfn; /* isolate_freepages search base */ 124 unsigned long free_pfn; /* isolate_freepages search base */
121 unsigned long migrate_pfn; /* isolate_migratepages search base */ 125 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 bool sync; /* Synchronous migration */ 126 enum compact_mode mode; /* Compaction mode */
123 127
124 int order; /* order a direct compactor needs */ 128 int order; /* order a direct compactor needs */
125 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 129 int migratetype; /* MOVABLE, RECLAIMABLE etc */
126 struct zone *zone; 130 struct zone *zone;
131
132 /* Number of UNMOVABLE destination pageblocks skipped during scan */
133 unsigned long nr_pageblocks_skipped;
127}; 134};
128 135
129unsigned long 136unsigned long
@@ -164,7 +171,8 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
164 * to determine if it's being mapped into a LOCKED vma. 171 * to determine if it's being mapped into a LOCKED vma.
165 * If so, mark page as mlocked. 172 * If so, mark page as mlocked.
166 */ 173 */
167static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) 174static inline int mlocked_vma_newpage(struct vm_area_struct *vma,
175 struct page *page)
168{ 176{
169 VM_BUG_ON(PageLRU(page)); 177 VM_BUG_ON(PageLRU(page));
170 178
@@ -222,7 +230,7 @@ extern unsigned long vma_address(struct page *page,
222 struct vm_area_struct *vma); 230 struct vm_area_struct *vma);
223#endif 231#endif
224#else /* !CONFIG_MMU */ 232#else /* !CONFIG_MMU */
225static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) 233static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
226{ 234{
227 return 0; 235 return 0;
228} 236}
diff --git a/mm/madvise.c b/mm/madvise.c
index 1ccbba5b6674..deff1b64a08c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,8 +11,10 @@
11#include <linux/mempolicy.h> 11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h> 12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/falloc.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/ksm.h> 16#include <linux/ksm.h>
17#include <linux/fs.h>
16 18
17/* 19/*
18 * Any behaviour which results in changes to the vma->vm_flags needs to 20 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -200,8 +202,7 @@ static long madvise_remove(struct vm_area_struct *vma,
200 struct vm_area_struct **prev, 202 struct vm_area_struct **prev,
201 unsigned long start, unsigned long end) 203 unsigned long start, unsigned long end)
202{ 204{
203 struct address_space *mapping; 205 loff_t offset;
204 loff_t offset, endoff;
205 int error; 206 int error;
206 207
207 *prev = NULL; /* tell sys_madvise we drop mmap_sem */ 208 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
@@ -217,16 +218,14 @@ static long madvise_remove(struct vm_area_struct *vma,
217 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) 218 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
218 return -EACCES; 219 return -EACCES;
219 220
220 mapping = vma->vm_file->f_mapping;
221
222 offset = (loff_t)(start - vma->vm_start) 221 offset = (loff_t)(start - vma->vm_start)
223 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 222 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
224 endoff = (loff_t)(end - vma->vm_start - 1)
225 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
226 223
227 /* vmtruncate_range needs to take i_mutex */ 224 /* filesystem's fallocate may need to take i_mutex */
228 up_read(&current->mm->mmap_sem); 225 up_read(&current->mm->mmap_sem);
229 error = vmtruncate_range(mapping->host, offset, endoff); 226 error = do_fallocate(vma->vm_file,
227 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
228 offset, end - start);
230 down_read(&current->mm->mmap_sem); 229 down_read(&current->mm->mmap_sem);
231 return error; 230 return error;
232} 231}
diff --git a/mm/memblock.c b/mm/memblock.c
index a44eab3157f8..952123eba433 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -37,6 +37,8 @@ struct memblock memblock __initdata_memblock = {
37 37
38int memblock_debug __initdata_memblock; 38int memblock_debug __initdata_memblock;
39static int memblock_can_resize __initdata_memblock; 39static int memblock_can_resize __initdata_memblock;
40static int memblock_memory_in_slab __initdata_memblock = 0;
41static int memblock_reserved_in_slab __initdata_memblock = 0;
40 42
41/* inline so we don't get a warning when pr_debug is compiled out */ 43/* inline so we don't get a warning when pr_debug is compiled out */
42static inline const char *memblock_type_name(struct memblock_type *type) 44static inline const char *memblock_type_name(struct memblock_type *type)
@@ -187,6 +189,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
187 struct memblock_region *new_array, *old_array; 189 struct memblock_region *new_array, *old_array;
188 phys_addr_t old_size, new_size, addr; 190 phys_addr_t old_size, new_size, addr;
189 int use_slab = slab_is_available(); 191 int use_slab = slab_is_available();
192 int *in_slab;
190 193
191 /* We don't allow resizing until we know about the reserved regions 194 /* We don't allow resizing until we know about the reserved regions
192 * of memory that aren't suitable for allocation 195 * of memory that aren't suitable for allocation
@@ -198,6 +201,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
198 old_size = type->max * sizeof(struct memblock_region); 201 old_size = type->max * sizeof(struct memblock_region);
199 new_size = old_size << 1; 202 new_size = old_size << 1;
200 203
204 /* Retrieve the slab flag */
205 if (type == &memblock.memory)
206 in_slab = &memblock_memory_in_slab;
207 else
208 in_slab = &memblock_reserved_in_slab;
209
201 /* Try to find some space for it. 210 /* Try to find some space for it.
202 * 211 *
203 * WARNING: We assume that either slab_is_available() and we use it or 212 * WARNING: We assume that either slab_is_available() and we use it or
@@ -212,14 +221,15 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
212 if (use_slab) { 221 if (use_slab) {
213 new_array = kmalloc(new_size, GFP_KERNEL); 222 new_array = kmalloc(new_size, GFP_KERNEL);
214 addr = new_array ? __pa(new_array) : 0; 223 addr = new_array ? __pa(new_array) : 0;
215 } else 224 } else {
216 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); 225 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
226 new_array = addr ? __va(addr) : 0;
227 }
217 if (!addr) { 228 if (!addr) {
218 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 229 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
219 memblock_type_name(type), type->max, type->max * 2); 230 memblock_type_name(type), type->max, type->max * 2);
220 return -1; 231 return -1;
221 } 232 }
222 new_array = __va(addr);
223 233
224 memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]", 234 memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]",
225 memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1); 235 memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1);
@@ -234,22 +244,24 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
234 type->regions = new_array; 244 type->regions = new_array;
235 type->max <<= 1; 245 type->max <<= 1;
236 246
237 /* If we use SLAB that's it, we are done */ 247 /* Free old array. We needn't free it if the array is the
238 if (use_slab) 248 * static one
239 return 0;
240
241 /* Add the new reserved region now. Should not fail ! */
242 BUG_ON(memblock_reserve(addr, new_size));
243
244 /* If the array wasn't our static init one, then free it. We only do
245 * that before SLAB is available as later on, we don't know whether
246 * to use kfree or free_bootmem_pages(). Shouldn't be a big deal
247 * anyways
248 */ 249 */
249 if (old_array != memblock_memory_init_regions && 250 if (*in_slab)
250 old_array != memblock_reserved_init_regions) 251 kfree(old_array);
252 else if (old_array != memblock_memory_init_regions &&
253 old_array != memblock_reserved_init_regions)
251 memblock_free(__pa(old_array), old_size); 254 memblock_free(__pa(old_array), old_size);
252 255
256 /* Reserve the new array if that comes from the memblock.
257 * Otherwise, we needn't do it
258 */
259 if (!use_slab)
260 BUG_ON(memblock_reserve(addr, new_size));
261
262 /* Update slab flag */
263 *in_slab = use_slab;
264
253 return 0; 265 return 0;
254} 266}
255 267
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f342778a0c0a..00c8898dbb81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -138,7 +138,6 @@ struct mem_cgroup_per_zone {
138 138
139 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; 139 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
140 140
141 struct zone_reclaim_stat reclaim_stat;
142 struct rb_node tree_node; /* RB tree node */ 141 struct rb_node tree_node; /* RB tree node */
143 unsigned long long usage_in_excess;/* Set to the value by which */ 142 unsigned long long usage_in_excess;/* Set to the value by which */
144 /* the soft limit is exceeded*/ 143 /* the soft limit is exceeded*/
@@ -1149,15 +1148,25 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
1149 * Checks whether given mem is same or in the root_mem_cgroup's 1148 * Checks whether given mem is same or in the root_mem_cgroup's
1150 * hierarchy subtree 1149 * hierarchy subtree
1151 */ 1150 */
1151bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1152 struct mem_cgroup *memcg)
1153{
1154 if (root_memcg == memcg)
1155 return true;
1156 if (!root_memcg->use_hierarchy)
1157 return false;
1158 return css_is_ancestor(&memcg->css, &root_memcg->css);
1159}
1160
1152static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 1161static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1153 struct mem_cgroup *memcg) 1162 struct mem_cgroup *memcg)
1154{ 1163{
1155 if (root_memcg != memcg) { 1164 bool ret;
1156 return (root_memcg->use_hierarchy &&
1157 css_is_ancestor(&memcg->css, &root_memcg->css));
1158 }
1159 1165
1160 return true; 1166 rcu_read_lock();
1167 ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
1168 rcu_read_unlock();
1169 return ret;
1161} 1170}
1162 1171
1163int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) 1172int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
@@ -1233,16 +1242,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
1233 return (active > inactive); 1242 return (active > inactive);
1234} 1243}
1235 1244
1236struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
1237 struct zone *zone)
1238{
1239 int nid = zone_to_nid(zone);
1240 int zid = zone_idx(zone);
1241 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
1242
1243 return &mz->reclaim_stat;
1244}
1245
1246struct zone_reclaim_stat * 1245struct zone_reclaim_stat *
1247mem_cgroup_get_reclaim_stat_from_page(struct page *page) 1246mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1248{ 1247{
@@ -1258,7 +1257,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1258 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ 1257 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1259 smp_rmb(); 1258 smp_rmb();
1260 mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); 1259 mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
1261 return &mz->reclaim_stat; 1260 return &mz->lruvec.reclaim_stat;
1262} 1261}
1263 1262
1264#define mem_cgroup_from_res_counter(counter, member) \ 1263#define mem_cgroup_from_res_counter(counter, member) \
@@ -2845,24 +2844,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2845 */ 2844 */
2846 if (do_swap_account && PageSwapCache(page)) { 2845 if (do_swap_account && PageSwapCache(page)) {
2847 swp_entry_t ent = {.val = page_private(page)}; 2846 swp_entry_t ent = {.val = page_private(page)};
2848 struct mem_cgroup *swap_memcg; 2847 mem_cgroup_uncharge_swap(ent);
2849 unsigned short id;
2850
2851 id = swap_cgroup_record(ent, 0);
2852 rcu_read_lock();
2853 swap_memcg = mem_cgroup_lookup(id);
2854 if (swap_memcg) {
2855 /*
2856 * This recorded memcg can be obsolete one. So, avoid
2857 * calling css_tryget
2858 */
2859 if (!mem_cgroup_is_root(swap_memcg))
2860 res_counter_uncharge(&swap_memcg->memsw,
2861 PAGE_SIZE);
2862 mem_cgroup_swap_statistics(swap_memcg, false);
2863 mem_cgroup_put(swap_memcg);
2864 }
2865 rcu_read_unlock();
2866 } 2848 }
2867 /* 2849 /*
2868 * At swapin, we may charge account against cgroup which has no tasks. 2850 * At swapin, we may charge account against cgroup which has no tasks.
@@ -3155,7 +3137,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
3155 * @entry: swap entry to be moved 3137 * @entry: swap entry to be moved
3156 * @from: mem_cgroup which the entry is moved from 3138 * @from: mem_cgroup which the entry is moved from
3157 * @to: mem_cgroup which the entry is moved to 3139 * @to: mem_cgroup which the entry is moved to
3158 * @need_fixup: whether we should fixup res_counters and refcounts.
3159 * 3140 *
3160 * It succeeds only when the swap_cgroup's record for this entry is the same 3141 * It succeeds only when the swap_cgroup's record for this entry is the same
3161 * as the mem_cgroup's id of @from. 3142 * as the mem_cgroup's id of @from.
@@ -3166,7 +3147,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
3166 * both res and memsw, and called css_get(). 3147 * both res and memsw, and called css_get().
3167 */ 3148 */
3168static int mem_cgroup_move_swap_account(swp_entry_t entry, 3149static int mem_cgroup_move_swap_account(swp_entry_t entry,
3169 struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) 3150 struct mem_cgroup *from, struct mem_cgroup *to)
3170{ 3151{
3171 unsigned short old_id, new_id; 3152 unsigned short old_id, new_id;
3172 3153
@@ -3185,24 +3166,13 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
3185 * swap-in, the refcount of @to might be decreased to 0. 3166 * swap-in, the refcount of @to might be decreased to 0.
3186 */ 3167 */
3187 mem_cgroup_get(to); 3168 mem_cgroup_get(to);
3188 if (need_fixup) {
3189 if (!mem_cgroup_is_root(from))
3190 res_counter_uncharge(&from->memsw, PAGE_SIZE);
3191 mem_cgroup_put(from);
3192 /*
3193 * we charged both to->res and to->memsw, so we should
3194 * uncharge to->res.
3195 */
3196 if (!mem_cgroup_is_root(to))
3197 res_counter_uncharge(&to->res, PAGE_SIZE);
3198 }
3199 return 0; 3169 return 0;
3200 } 3170 }
3201 return -EINVAL; 3171 return -EINVAL;
3202} 3172}
3203#else 3173#else
3204static inline int mem_cgroup_move_swap_account(swp_entry_t entry, 3174static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3205 struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) 3175 struct mem_cgroup *from, struct mem_cgroup *to)
3206{ 3176{
3207 return -EINVAL; 3177 return -EINVAL;
3208} 3178}
@@ -3363,7 +3333,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3363void mem_cgroup_replace_page_cache(struct page *oldpage, 3333void mem_cgroup_replace_page_cache(struct page *oldpage,
3364 struct page *newpage) 3334 struct page *newpage)
3365{ 3335{
3366 struct mem_cgroup *memcg; 3336 struct mem_cgroup *memcg = NULL;
3367 struct page_cgroup *pc; 3337 struct page_cgroup *pc;
3368 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3338 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3369 3339
@@ -3373,11 +3343,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
3373 pc = lookup_page_cgroup(oldpage); 3343 pc = lookup_page_cgroup(oldpage);
3374 /* fix accounting on old pages */ 3344 /* fix accounting on old pages */
3375 lock_page_cgroup(pc); 3345 lock_page_cgroup(pc);
3376 memcg = pc->mem_cgroup; 3346 if (PageCgroupUsed(pc)) {
3377 mem_cgroup_charge_statistics(memcg, false, -1); 3347 memcg = pc->mem_cgroup;
3378 ClearPageCgroupUsed(pc); 3348 mem_cgroup_charge_statistics(memcg, false, -1);
3349 ClearPageCgroupUsed(pc);
3350 }
3379 unlock_page_cgroup(pc); 3351 unlock_page_cgroup(pc);
3380 3352
3353 /*
3354 * When called from shmem_replace_page(), in some cases the
3355 * oldpage has already been charged, and in some cases not.
3356 */
3357 if (!memcg)
3358 return;
3359
3381 if (PageSwapBacked(oldpage)) 3360 if (PageSwapBacked(oldpage))
3382 type = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3361 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3383 3362
@@ -4226,21 +4205,19 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
4226 { 4205 {
4227 int nid, zid; 4206 int nid, zid;
4228 struct mem_cgroup_per_zone *mz; 4207 struct mem_cgroup_per_zone *mz;
4208 struct zone_reclaim_stat *rstat;
4229 unsigned long recent_rotated[2] = {0, 0}; 4209 unsigned long recent_rotated[2] = {0, 0};
4230 unsigned long recent_scanned[2] = {0, 0}; 4210 unsigned long recent_scanned[2] = {0, 0};
4231 4211
4232 for_each_online_node(nid) 4212 for_each_online_node(nid)
4233 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 4213 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
4234 mz = mem_cgroup_zoneinfo(memcg, nid, zid); 4214 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
4215 rstat = &mz->lruvec.reclaim_stat;
4235 4216
4236 recent_rotated[0] += 4217 recent_rotated[0] += rstat->recent_rotated[0];
4237 mz->reclaim_stat.recent_rotated[0]; 4218 recent_rotated[1] += rstat->recent_rotated[1];
4238 recent_rotated[1] += 4219 recent_scanned[0] += rstat->recent_scanned[0];
4239 mz->reclaim_stat.recent_rotated[1]; 4220 recent_scanned[1] += rstat->recent_scanned[1];
4240 recent_scanned[0] +=
4241 mz->reclaim_stat.recent_scanned[0];
4242 recent_scanned[1] +=
4243 mz->reclaim_stat.recent_scanned[1];
4244 } 4221 }
4245 cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); 4222 cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
4246 cb->fill(cb, "recent_rotated_file", recent_rotated[1]); 4223 cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
@@ -5135,7 +5112,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5135 return NULL; 5112 return NULL;
5136 if (PageAnon(page)) { 5113 if (PageAnon(page)) {
5137 /* we don't move shared anon */ 5114 /* we don't move shared anon */
5138 if (!move_anon() || page_mapcount(page) > 2) 5115 if (!move_anon())
5139 return NULL; 5116 return NULL;
5140 } else if (!move_file()) 5117 } else if (!move_file())
5141 /* we ignore mapcount for file pages */ 5118 /* we ignore mapcount for file pages */
@@ -5146,26 +5123,32 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5146 return page; 5123 return page;
5147} 5124}
5148 5125
5126#ifdef CONFIG_SWAP
5149static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, 5127static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5150 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5128 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5151{ 5129{
5152 int usage_count;
5153 struct page *page = NULL; 5130 struct page *page = NULL;
5154 swp_entry_t ent = pte_to_swp_entry(ptent); 5131 swp_entry_t ent = pte_to_swp_entry(ptent);
5155 5132
5156 if (!move_anon() || non_swap_entry(ent)) 5133 if (!move_anon() || non_swap_entry(ent))
5157 return NULL; 5134 return NULL;
5158 usage_count = mem_cgroup_count_swap_user(ent, &page); 5135 /*
5159 if (usage_count > 1) { /* we don't move shared anon */ 5136 * Because lookup_swap_cache() updates some statistics counter,
5160 if (page) 5137 * we call find_get_page() with swapper_space directly.
5161 put_page(page); 5138 */
5162 return NULL; 5139 page = find_get_page(&swapper_space, ent.val);
5163 }
5164 if (do_swap_account) 5140 if (do_swap_account)
5165 entry->val = ent.val; 5141 entry->val = ent.val;
5166 5142
5167 return page; 5143 return page;
5168} 5144}
5145#else
5146static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5147 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5148{
5149 return NULL;
5150}
5151#endif
5169 5152
5170static struct page *mc_handle_file_pte(struct vm_area_struct *vma, 5153static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5171 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5154 unsigned long addr, pte_t ptent, swp_entry_t *entry)
@@ -5521,8 +5504,7 @@ put: /* get_mctgt_type() gets the page */
5521 break; 5504 break;
5522 case MC_TARGET_SWAP: 5505 case MC_TARGET_SWAP:
5523 ent = target.ent; 5506 ent = target.ent;
5524 if (!mem_cgroup_move_swap_account(ent, 5507 if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
5525 mc.from, mc.to, false)) {
5526 mc.precharge--; 5508 mc.precharge--;
5527 /* we fixup refcnts and charges later. */ 5509 /* we fixup refcnts and charges later. */
5528 mc.moved_swap++; 5510 mc.moved_swap++;
@@ -5598,7 +5580,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
5598 if (mm) { 5580 if (mm) {
5599 if (mc.to) 5581 if (mc.to)
5600 mem_cgroup_move_charge(mm); 5582 mem_cgroup_move_charge(mm);
5601 put_swap_token(mm);
5602 mmput(mm); 5583 mmput(mm);
5603 } 5584 }
5604 if (mc.to) 5585 if (mc.to)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c99ad4e6b88c..ab1e7145e290 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1388,16 +1388,16 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1388 */ 1388 */
1389 if (!get_page_unless_zero(compound_head(p))) { 1389 if (!get_page_unless_zero(compound_head(p))) {
1390 if (PageHuge(p)) { 1390 if (PageHuge(p)) {
1391 pr_info("get_any_page: %#lx free huge page\n", pfn); 1391 pr_info("%s: %#lx free huge page\n", __func__, pfn);
1392 ret = dequeue_hwpoisoned_huge_page(compound_head(p)); 1392 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1393 } else if (is_free_buddy_page(p)) { 1393 } else if (is_free_buddy_page(p)) {
1394 pr_info("get_any_page: %#lx free buddy page\n", pfn); 1394 pr_info("%s: %#lx free buddy page\n", __func__, pfn);
1395 /* Set hwpoison bit while page is still isolated */ 1395 /* Set hwpoison bit while page is still isolated */
1396 SetPageHWPoison(p); 1396 SetPageHWPoison(p);
1397 ret = 0; 1397 ret = 0;
1398 } else { 1398 } else {
1399 pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", 1399 pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
1400 pfn, p->flags); 1400 __func__, pfn, p->flags);
1401 ret = -EIO; 1401 ret = -EIO;
1402 } 1402 }
1403 } else { 1403 } else {
diff --git a/mm/memory.c b/mm/memory.c
index e40f6759ba98..1b7dc662bf9f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2909 page = lookup_swap_cache(entry); 2909 page = lookup_swap_cache(entry);
2910 if (!page) { 2910 if (!page) {
2911 grab_swap_token(mm); /* Contend for token _before_ read-in */
2912 page = swapin_readahead(entry, 2911 page = swapin_readahead(entry,
2913 GFP_HIGHUSER_MOVABLE, vma, address); 2912 GFP_HIGHUSER_MOVABLE, vma, address);
2914 if (!page) { 2913 if (!page) {
@@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2938 } 2937 }
2939 2938
2940 locked = lock_page_or_retry(page, mm, flags); 2939 locked = lock_page_or_retry(page, mm, flags);
2940
2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2942 if (!locked) { 2942 if (!locked) {
2943 ret |= VM_FAULT_RETRY; 2943 ret |= VM_FAULT_RETRY;
@@ -3486,6 +3486,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3486 if (unlikely(is_vm_hugetlb_page(vma))) 3486 if (unlikely(is_vm_hugetlb_page(vma)))
3487 return hugetlb_fault(mm, vma, address, flags); 3487 return hugetlb_fault(mm, vma, address, flags);
3488 3488
3489retry:
3489 pgd = pgd_offset(mm, address); 3490 pgd = pgd_offset(mm, address);
3490 pud = pud_alloc(mm, pgd, address); 3491 pud = pud_alloc(mm, pgd, address);
3491 if (!pud) 3492 if (!pud)
@@ -3499,13 +3500,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3499 pmd, flags); 3500 pmd, flags);
3500 } else { 3501 } else {
3501 pmd_t orig_pmd = *pmd; 3502 pmd_t orig_pmd = *pmd;
3503 int ret;
3504
3502 barrier(); 3505 barrier();
3503 if (pmd_trans_huge(orig_pmd)) { 3506 if (pmd_trans_huge(orig_pmd)) {
3504 if (flags & FAULT_FLAG_WRITE && 3507 if (flags & FAULT_FLAG_WRITE &&
3505 !pmd_write(orig_pmd) && 3508 !pmd_write(orig_pmd) &&
3506 !pmd_trans_splitting(orig_pmd)) 3509 !pmd_trans_splitting(orig_pmd)) {
3507 return do_huge_pmd_wp_page(mm, vma, address, 3510 ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
3508 pmd, orig_pmd); 3511 orig_pmd);
3512 /*
3513 * If COW results in an oom, the huge pmd will
3514 * have been split, so retry the fault on the
3515 * pte for a smaller charge.
3516 */
3517 if (unlikely(ret & VM_FAULT_OOM))
3518 goto retry;
3519 return ret;
3520 }
3509 return 0; 3521 return 0;
3510 } 3522 }
3511 } 3523 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fc898cb4fe8f..0d7e3ec8e0f3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -74,8 +74,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
74 res->end = start + size - 1; 74 res->end = start + size - 1;
75 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 75 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
76 if (request_resource(&iomem_resource, res) < 0) { 76 if (request_resource(&iomem_resource, res) < 0) {
77 printk("System RAM resource %llx - %llx cannot be added\n", 77 printk("System RAM resource %pR cannot be added\n", res);
78 (unsigned long long)res->start, (unsigned long long)res->end);
79 kfree(res); 78 kfree(res);
80 res = NULL; 79 res = NULL;
81 } 80 }
@@ -502,8 +501,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
502 online_pages_range); 501 online_pages_range);
503 if (ret) { 502 if (ret) {
504 mutex_unlock(&zonelists_mutex); 503 mutex_unlock(&zonelists_mutex);
505 printk(KERN_DEBUG "online_pages %lx at %lx failed\n", 504 printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed\n",
506 nr_pages, pfn); 505 (unsigned long long) pfn << PAGE_SHIFT,
506 (((unsigned long long) pfn + nr_pages)
507 << PAGE_SHIFT) - 1);
507 memory_notify(MEM_CANCEL_ONLINE, &arg); 508 memory_notify(MEM_CANCEL_ONLINE, &arg);
508 unlock_memory_hotplug(); 509 unlock_memory_hotplug();
509 return ret; 510 return ret;
@@ -977,8 +978,9 @@ repeat:
977 return 0; 978 return 0;
978 979
979failed_removal: 980failed_removal:
980 printk(KERN_INFO "memory offlining %lx to %lx failed\n", 981 printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
981 start_pfn, end_pfn); 982 (unsigned long long) start_pfn << PAGE_SHIFT,
983 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
982 memory_notify(MEM_CANCEL_OFFLINE, &arg); 984 memory_notify(MEM_CANCEL_OFFLINE, &arg);
983 /* pushback to free area */ 985 /* pushback to free area */
984 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); 986 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 88f9422b92e7..f15c1b24ca18 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -390,7 +390,7 @@ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
390{ 390{
391 if (!pol) 391 if (!pol)
392 return; 392 return;
393 if (!mpol_store_user_nodemask(pol) && step == 0 && 393 if (!mpol_store_user_nodemask(pol) && step == MPOL_REBIND_ONCE &&
394 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) 394 nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
395 return; 395 return;
396 396
@@ -950,8 +950,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
950 * 950 *
951 * Returns the number of page that could not be moved. 951 * Returns the number of page that could not be moved.
952 */ 952 */
953int do_migrate_pages(struct mm_struct *mm, 953int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
954 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 954 const nodemask_t *to, int flags)
955{ 955{
956 int busy = 0; 956 int busy = 0;
957 int err; 957 int err;
@@ -963,7 +963,7 @@ int do_migrate_pages(struct mm_struct *mm,
963 963
964 down_read(&mm->mmap_sem); 964 down_read(&mm->mmap_sem);
965 965
966 err = migrate_vmas(mm, from_nodes, to_nodes, flags); 966 err = migrate_vmas(mm, from, to, flags);
967 if (err) 967 if (err)
968 goto out; 968 goto out;
969 969
@@ -998,14 +998,34 @@ int do_migrate_pages(struct mm_struct *mm,
998 * moved to an empty node, then there is nothing left worth migrating. 998 * moved to an empty node, then there is nothing left worth migrating.
999 */ 999 */
1000 1000
1001 tmp = *from_nodes; 1001 tmp = *from;
1002 while (!nodes_empty(tmp)) { 1002 while (!nodes_empty(tmp)) {
1003 int s,d; 1003 int s,d;
1004 int source = -1; 1004 int source = -1;
1005 int dest = 0; 1005 int dest = 0;
1006 1006
1007 for_each_node_mask(s, tmp) { 1007 for_each_node_mask(s, tmp) {
1008 d = node_remap(s, *from_nodes, *to_nodes); 1008
1009 /*
1010 * do_migrate_pages() tries to maintain the relative
1011 * node relationship of the pages established between
1012 * threads and memory areas.
1013 *
1014 * However if the number of source nodes is not equal to
1015 * the number of destination nodes we can not preserve
1016 * this node relative relationship. In that case, skip
1017 * copying memory from a node that is in the destination
1018 * mask.
1019 *
1020 * Example: [2,3,4] -> [3,4,5] moves everything.
1021 * [0-7] - > [3,4,5] moves only 0,1,2,6,7.
1022 */
1023
1024 if ((nodes_weight(*from) != nodes_weight(*to)) &&
1025 (node_isset(s, *to)))
1026 continue;
1027
1028 d = node_remap(s, *from, *to);
1009 if (s == d) 1029 if (s == d)
1010 continue; 1030 continue;
1011 1031
@@ -1065,8 +1085,8 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
1065{ 1085{
1066} 1086}
1067 1087
1068int do_migrate_pages(struct mm_struct *mm, 1088int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
1069 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 1089 const nodemask_t *to, int flags)
1070{ 1090{
1071 return -ENOSYS; 1091 return -ENOSYS;
1072} 1092}
diff --git a/mm/mmap.c b/mm/mmap.c
index e8dcfc7de866..4a9c2a391e28 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1639,33 +1639,34 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1639{ 1639{
1640 struct vm_area_struct *vma = NULL; 1640 struct vm_area_struct *vma = NULL;
1641 1641
1642 if (mm) { 1642 if (WARN_ON_ONCE(!mm)) /* Remove this in linux-3.6 */
1643 /* Check the cache first. */ 1643 return NULL;
1644 /* (Cache hit rate is typically around 35%.) */ 1644
1645 vma = mm->mmap_cache; 1645 /* Check the cache first. */
1646 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { 1646 /* (Cache hit rate is typically around 35%.) */
1647 struct rb_node * rb_node; 1647 vma = mm->mmap_cache;
1648 1648 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1649 rb_node = mm->mm_rb.rb_node; 1649 struct rb_node *rb_node;
1650 vma = NULL; 1650
1651 1651 rb_node = mm->mm_rb.rb_node;
1652 while (rb_node) { 1652 vma = NULL;
1653 struct vm_area_struct * vma_tmp; 1653
1654 1654 while (rb_node) {
1655 vma_tmp = rb_entry(rb_node, 1655 struct vm_area_struct *vma_tmp;
1656 struct vm_area_struct, vm_rb); 1656
1657 1657 vma_tmp = rb_entry(rb_node,
1658 if (vma_tmp->vm_end > addr) { 1658 struct vm_area_struct, vm_rb);
1659 vma = vma_tmp; 1659
1660 if (vma_tmp->vm_start <= addr) 1660 if (vma_tmp->vm_end > addr) {
1661 break; 1661 vma = vma_tmp;
1662 rb_node = rb_node->rb_left; 1662 if (vma_tmp->vm_start <= addr)
1663 } else 1663 break;
1664 rb_node = rb_node->rb_right; 1664 rb_node = rb_node->rb_left;
1665 } 1665 } else
1666 if (vma) 1666 rb_node = rb_node->rb_right;
1667 mm->mmap_cache = vma;
1668 } 1667 }
1668 if (vma)
1669 mm->mmap_cache = vma;
1669 } 1670 }
1670 return vma; 1671 return vma;
1671} 1672}
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 1983fb1c7026..d23415c001bc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -274,86 +274,85 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
274 return ___alloc_bootmem(size, align, goal, limit); 274 return ___alloc_bootmem(size, align, goal, limit);
275} 275}
276 276
277/** 277static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
278 * __alloc_bootmem_node - allocate boot memory from a specific node 278 unsigned long size,
279 * @pgdat: node to allocate from 279 unsigned long align,
280 * @size: size of the request in bytes 280 unsigned long goal,
281 * @align: alignment of the region 281 unsigned long limit)
282 * @goal: preferred starting address of the region
283 *
284 * The goal is dropped if it can not be satisfied and the allocation will
285 * fall back to memory below @goal.
286 *
287 * Allocation may fall back to any node in the system if the specified node
288 * can not hold the requested memory.
289 *
290 * The function panics if the request can not be satisfied.
291 */
292void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
293 unsigned long align, unsigned long goal)
294{ 282{
295 void *ptr; 283 void *ptr;
296 284
297 if (WARN_ON_ONCE(slab_is_available()))
298 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
299
300again: 285again:
301 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 286 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
302 goal, -1ULL); 287 goal, limit);
303 if (ptr) 288 if (ptr)
304 return ptr; 289 return ptr;
305 290
306 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, 291 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
307 goal, -1ULL); 292 goal, limit);
308 if (!ptr && goal) { 293 if (ptr)
294 return ptr;
295
296 if (goal) {
309 goal = 0; 297 goal = 0;
310 goto again; 298 goto again;
311 } 299 }
312 return ptr; 300
301 return NULL;
313} 302}
314 303
315void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 304void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
316 unsigned long align, unsigned long goal) 305 unsigned long align, unsigned long goal)
317{ 306{
318 return __alloc_bootmem_node(pgdat, size, align, goal); 307 if (WARN_ON_ONCE(slab_is_available()))
308 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
309
310 return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
319} 311}
320 312
321#ifdef CONFIG_SPARSEMEM 313void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
322/** 314 unsigned long align, unsigned long goal,
323 * alloc_bootmem_section - allocate boot memory from a specific section 315 unsigned long limit)
324 * @size: size of the request in bytes
325 * @section_nr: sparse map section to allocate from
326 *
327 * Return NULL on failure.
328 */
329void * __init alloc_bootmem_section(unsigned long size,
330 unsigned long section_nr)
331{ 316{
332 unsigned long pfn, goal, limit; 317 void *ptr;
333 318
334 pfn = section_nr_to_pfn(section_nr); 319 ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, limit);
335 goal = pfn << PAGE_SHIFT; 320 if (ptr)
336 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; 321 return ptr;
337 322
338 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, 323 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
339 SMP_CACHE_BYTES, goal, limit); 324 panic("Out of memory");
325 return NULL;
340} 326}
341#endif
342 327
343void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, 328/**
329 * __alloc_bootmem_node - allocate boot memory from a specific node
330 * @pgdat: node to allocate from
331 * @size: size of the request in bytes
332 * @align: alignment of the region
333 * @goal: preferred starting address of the region
334 *
335 * The goal is dropped if it can not be satisfied and the allocation will
336 * fall back to memory below @goal.
337 *
338 * Allocation may fall back to any node in the system if the specified node
339 * can not hold the requested memory.
340 *
341 * The function panics if the request can not be satisfied.
342 */
343void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
344 unsigned long align, unsigned long goal) 344 unsigned long align, unsigned long goal)
345{ 345{
346 void *ptr;
347
348 if (WARN_ON_ONCE(slab_is_available())) 346 if (WARN_ON_ONCE(slab_is_available()))
349 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 347 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
350 348
351 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 349 return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
352 goal, -1ULL); 350}
353 if (ptr)
354 return ptr;
355 351
356 return __alloc_bootmem_nopanic(size, align, goal); 352void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
353 unsigned long align, unsigned long goal)
354{
355 return __alloc_bootmem_node(pgdat, size, align, goal);
357} 356}
358 357
359#ifndef ARCH_LOW_ADDRESS_LIMIT 358#ifndef ARCH_LOW_ADDRESS_LIMIT
@@ -397,16 +396,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
397void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 396void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
398 unsigned long align, unsigned long goal) 397 unsigned long align, unsigned long goal)
399{ 398{
400 void *ptr;
401
402 if (WARN_ON_ONCE(slab_is_available())) 399 if (WARN_ON_ONCE(slab_is_available()))
403 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 400 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
404 401
405 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 402 return ___alloc_bootmem_node(pgdat, size, align, goal,
406 goal, ARCH_LOW_ADDRESS_LIMIT); 403 ARCH_LOW_ADDRESS_LIMIT);
407 if (ptr)
408 return ptr;
409
410 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
411 goal, ARCH_LOW_ADDRESS_LIMIT);
412} 404}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9f09a1fde9f9..ed0e19677360 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -180,10 +180,10 @@ static bool oom_unkillable_task(struct task_struct *p,
180 * predictable as possible. The goal is to return the highest value for the 180 * predictable as possible. The goal is to return the highest value for the
181 * task consuming the most memory to avoid subsequent oom failures. 181 * task consuming the most memory to avoid subsequent oom failures.
182 */ 182 */
183unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, 183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184 const nodemask_t *nodemask, unsigned long totalpages) 184 const nodemask_t *nodemask, unsigned long totalpages)
185{ 185{
186 long points; 186 unsigned long points;
187 187
188 if (oom_unkillable_task(p, memcg, nodemask)) 188 if (oom_unkillable_task(p, memcg, nodemask))
189 return 0; 189 return 0;
@@ -198,21 +198,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
198 } 198 }
199 199
200 /* 200 /*
201 * The memory controller may have a limit of 0 bytes, so avoid a divide
202 * by zero, if necessary.
203 */
204 if (!totalpages)
205 totalpages = 1;
206
207 /*
208 * The baseline for the badness score is the proportion of RAM that each 201 * The baseline for the badness score is the proportion of RAM that each
209 * task's rss, pagetable and swap space use. 202 * task's rss, pagetable and swap space use.
210 */ 203 */
211 points = get_mm_rss(p->mm) + p->mm->nr_ptes; 204 points = get_mm_rss(p->mm) + p->mm->nr_ptes +
212 points += get_mm_counter(p->mm, MM_SWAPENTS); 205 get_mm_counter(p->mm, MM_SWAPENTS);
213
214 points *= 1000;
215 points /= totalpages;
216 task_unlock(p); 206 task_unlock(p);
217 207
218 /* 208 /*
@@ -220,23 +210,20 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
220 * implementation used by LSMs. 210 * implementation used by LSMs.
221 */ 211 */
222 if (has_capability_noaudit(p, CAP_SYS_ADMIN)) 212 if (has_capability_noaudit(p, CAP_SYS_ADMIN))
223 points -= 30; 213 points -= 30 * totalpages / 1000;
224 214
225 /* 215 /*
226 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may 216 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may
227 * either completely disable oom killing or always prefer a certain 217 * either completely disable oom killing or always prefer a certain
228 * task. 218 * task.
229 */ 219 */
230 points += p->signal->oom_score_adj; 220 points += p->signal->oom_score_adj * totalpages / 1000;
231 221
232 /* 222 /*
233 * Never return 0 for an eligible task that may be killed since it's 223 * Never return 0 for an eligible task regardless of the root bonus and
234 * possible that no single user task uses more than 0.1% of memory and 224 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
235 * no single admin tasks uses more than 3.0%.
236 */ 225 */
237 if (points <= 0) 226 return points ? points : 1;
238 return 1;
239 return (points < 1000) ? points : 1000;
240} 227}
241 228
242/* 229/*
@@ -314,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
314{ 301{
315 struct task_struct *g, *p; 302 struct task_struct *g, *p;
316 struct task_struct *chosen = NULL; 303 struct task_struct *chosen = NULL;
317 *ppoints = 0; 304 unsigned long chosen_points = 0;
318 305
319 do_each_thread(g, p) { 306 do_each_thread(g, p) {
320 unsigned int points; 307 unsigned int points;
@@ -354,7 +341,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
354 */ 341 */
355 if (p == current) { 342 if (p == current) {
356 chosen = p; 343 chosen = p;
357 *ppoints = 1000; 344 chosen_points = ULONG_MAX;
358 } else if (!force_kill) { 345 } else if (!force_kill) {
359 /* 346 /*
360 * If this task is not being ptraced on exit, 347 * If this task is not being ptraced on exit,
@@ -367,12 +354,13 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
367 } 354 }
368 355
369 points = oom_badness(p, memcg, nodemask, totalpages); 356 points = oom_badness(p, memcg, nodemask, totalpages);
370 if (points > *ppoints) { 357 if (points > chosen_points) {
371 chosen = p; 358 chosen = p;
372 *ppoints = points; 359 chosen_points = points;
373 } 360 }
374 } while_each_thread(g, p); 361 } while_each_thread(g, p);
375 362
363 *ppoints = chosen_points * 1000 / totalpages;
376 return chosen; 364 return chosen;
377} 365}
378 366
@@ -572,7 +560,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
572 } 560 }
573 561
574 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); 562 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
575 limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT; 563 limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
576 read_lock(&tasklist_lock); 564 read_lock(&tasklist_lock);
577 p = select_bad_process(&points, limit, memcg, NULL, false); 565 p = select_bad_process(&points, limit, memcg, NULL, false);
578 if (p && PTR_ERR(p) != -1UL) 566 if (p && PTR_ERR(p) != -1UL)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bab8e3bc4202..8cbfc38e68ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
219 219
220int page_group_by_mobility_disabled __read_mostly; 220int page_group_by_mobility_disabled __read_mostly;
221 221
222static void set_pageblock_migratetype(struct page *page, int migratetype) 222void set_pageblock_migratetype(struct page *page, int migratetype)
223{ 223{
224 224
225 if (unlikely(page_group_by_mobility_disabled)) 225 if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
954 return pages_moved; 954 return pages_moved;
955} 955}
956 956
957static int move_freepages_block(struct zone *zone, struct page *page, 957int move_freepages_block(struct zone *zone, struct page *page,
958 int migratetype) 958 int migratetype)
959{ 959{
960 unsigned long start_pfn, end_pfn; 960 unsigned long start_pfn, end_pfn;
961 struct page *start_page, *end_page; 961 struct page *start_page, *end_page;
@@ -4300,25 +4300,24 @@ static inline void setup_usemap(struct pglist_data *pgdat,
4300 4300
4301#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 4301#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
4302 4302
4303/* Return a sensible default order for the pageblock size. */
4304static inline int pageblock_default_order(void)
4305{
4306 if (HPAGE_SHIFT > PAGE_SHIFT)
4307 return HUGETLB_PAGE_ORDER;
4308
4309 return MAX_ORDER-1;
4310}
4311
4312/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ 4303/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
4313static inline void __init set_pageblock_order(unsigned int order) 4304static inline void __init set_pageblock_order(void)
4314{ 4305{
4306 unsigned int order;
4307
4315 /* Check that pageblock_nr_pages has not already been setup */ 4308 /* Check that pageblock_nr_pages has not already been setup */
4316 if (pageblock_order) 4309 if (pageblock_order)
4317 return; 4310 return;
4318 4311
4312 if (HPAGE_SHIFT > PAGE_SHIFT)
4313 order = HUGETLB_PAGE_ORDER;
4314 else
4315 order = MAX_ORDER - 1;
4316
4319 /* 4317 /*
4320 * Assume the largest contiguous order of interest is a huge page. 4318 * Assume the largest contiguous order of interest is a huge page.
4321 * This value may be variable depending on boot parameters on IA64 4319 * This value may be variable depending on boot parameters on IA64 and
4320 * powerpc.
4322 */ 4321 */
4323 pageblock_order = order; 4322 pageblock_order = order;
4324} 4323}
@@ -4326,15 +4325,13 @@ static inline void __init set_pageblock_order(unsigned int order)
4326 4325
4327/* 4326/*
4328 * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() 4327 * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order()
4329 * and pageblock_default_order() are unused as pageblock_order is set 4328 * is unused as pageblock_order is set at compile-time. See
4330 * at compile-time. See include/linux/pageblock-flags.h for the values of 4329 * include/linux/pageblock-flags.h for the values of pageblock_order based on
4331 * pageblock_order based on the kernel config 4330 * the kernel config
4332 */ 4331 */
4333static inline int pageblock_default_order(unsigned int order) 4332static inline void set_pageblock_order(void)
4334{ 4333{
4335 return MAX_ORDER-1;
4336} 4334}
4337#define set_pageblock_order(x) do {} while (0)
4338 4335
4339#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 4336#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
4340 4337
@@ -4413,16 +4410,16 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4413 zone_pcp_init(zone); 4410 zone_pcp_init(zone);
4414 for_each_lru(lru) 4411 for_each_lru(lru)
4415 INIT_LIST_HEAD(&zone->lruvec.lists[lru]); 4412 INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
4416 zone->reclaim_stat.recent_rotated[0] = 0; 4413 zone->lruvec.reclaim_stat.recent_rotated[0] = 0;
4417 zone->reclaim_stat.recent_rotated[1] = 0; 4414 zone->lruvec.reclaim_stat.recent_rotated[1] = 0;
4418 zone->reclaim_stat.recent_scanned[0] = 0; 4415 zone->lruvec.reclaim_stat.recent_scanned[0] = 0;
4419 zone->reclaim_stat.recent_scanned[1] = 0; 4416 zone->lruvec.reclaim_stat.recent_scanned[1] = 0;
4420 zap_zone_vm_stats(zone); 4417 zap_zone_vm_stats(zone);
4421 zone->flags = 0; 4418 zone->flags = 0;
4422 if (!size) 4419 if (!size)
4423 continue; 4420 continue;
4424 4421
4425 set_pageblock_order(pageblock_default_order()); 4422 set_pageblock_order();
4426 setup_usemap(pgdat, zone, size); 4423 setup_usemap(pgdat, zone, size);
4427 ret = init_currently_empty_zone(zone, zone_start_pfn, 4424 ret = init_currently_empty_zone(zone, zone_start_pfn,
4428 size, MEMMAP_EARLY); 4425 size, MEMMAP_EARLY);
@@ -4815,7 +4812,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4815 find_zone_movable_pfns_for_nodes(); 4812 find_zone_movable_pfns_for_nodes();
4816 4813
4817 /* Print out the zone ranges */ 4814 /* Print out the zone ranges */
4818 printk("Zone PFN ranges:\n"); 4815 printk("Zone ranges:\n");
4819 for (i = 0; i < MAX_NR_ZONES; i++) { 4816 for (i = 0; i < MAX_NR_ZONES; i++) {
4820 if (i == ZONE_MOVABLE) 4817 if (i == ZONE_MOVABLE)
4821 continue; 4818 continue;
@@ -4824,22 +4821,25 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4824 arch_zone_highest_possible_pfn[i]) 4821 arch_zone_highest_possible_pfn[i])
4825 printk(KERN_CONT "empty\n"); 4822 printk(KERN_CONT "empty\n");
4826 else 4823 else
4827 printk(KERN_CONT "%0#10lx -> %0#10lx\n", 4824 printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n",
4828 arch_zone_lowest_possible_pfn[i], 4825 arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
4829 arch_zone_highest_possible_pfn[i]); 4826 (arch_zone_highest_possible_pfn[i]
4827 << PAGE_SHIFT) - 1);
4830 } 4828 }
4831 4829
4832 /* Print out the PFNs ZONE_MOVABLE begins at in each node */ 4830 /* Print out the PFNs ZONE_MOVABLE begins at in each node */
4833 printk("Movable zone start PFN for each node\n"); 4831 printk("Movable zone start for each node\n");
4834 for (i = 0; i < MAX_NUMNODES; i++) { 4832 for (i = 0; i < MAX_NUMNODES; i++) {
4835 if (zone_movable_pfn[i]) 4833 if (zone_movable_pfn[i])
4836 printk(" Node %d: %lu\n", i, zone_movable_pfn[i]); 4834 printk(" Node %d: %#010lx\n", i,
4835 zone_movable_pfn[i] << PAGE_SHIFT);
4837 } 4836 }
4838 4837
4839 /* Print out the early_node_map[] */ 4838 /* Print out the early_node_map[] */
4840 printk("Early memory PFN ranges\n"); 4839 printk("Early memory node ranges\n");
4841 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) 4840 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4842 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn); 4841 printk(" node %3d: [mem %#010lx-%#010lx]\n", nid,
4842 start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
4843 4843
4844 /* Initialise every node */ 4844 /* Initialise every node */
4845 mminit_verify_pageflags_layout(); 4845 mminit_verify_pageflags_layout();
@@ -5657,7 +5657,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5657 .nr_migratepages = 0, 5657 .nr_migratepages = 0,
5658 .order = -1, 5658 .order = -1,
5659 .zone = page_zone(pfn_to_page(start)), 5659 .zone = page_zone(pfn_to_page(start)),
5660 .sync = true, 5660 .mode = COMPACT_SYNC,
5661 }; 5661 };
5662 INIT_LIST_HEAD(&cc.migratepages); 5662 INIT_LIST_HEAD(&cc.migratepages);
5663 5663
@@ -5938,7 +5938,7 @@ bool is_free_buddy_page(struct page *page)
5938} 5938}
5939#endif 5939#endif
5940 5940
5941static struct trace_print_flags pageflag_names[] = { 5941static const struct trace_print_flags pageflag_names[] = {
5942 {1UL << PG_locked, "locked" }, 5942 {1UL << PG_locked, "locked" },
5943 {1UL << PG_error, "error" }, 5943 {1UL << PG_error, "error" },
5944 {1UL << PG_referenced, "referenced" }, 5944 {1UL << PG_referenced, "referenced" },
@@ -5973,7 +5973,9 @@ static struct trace_print_flags pageflag_names[] = {
5973#ifdef CONFIG_MEMORY_FAILURE 5973#ifdef CONFIG_MEMORY_FAILURE
5974 {1UL << PG_hwpoison, "hwpoison" }, 5974 {1UL << PG_hwpoison, "hwpoison" },
5975#endif 5975#endif
5976 {-1UL, NULL }, 5976#ifdef CONFIG_TRANSPARENT_HUGEPAGE
5977 {1UL << PG_compound_lock, "compound_lock" },
5978#endif
5977}; 5979};
5978 5980
5979static void dump_page_flags(unsigned long flags) 5981static void dump_page_flags(unsigned long flags)
@@ -5982,12 +5984,14 @@ static void dump_page_flags(unsigned long flags)
5982 unsigned long mask; 5984 unsigned long mask;
5983 int i; 5985 int i;
5984 5986
5987 BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
5988
5985 printk(KERN_ALERT "page flags: %#lx(", flags); 5989 printk(KERN_ALERT "page flags: %#lx(", flags);
5986 5990
5987 /* remove zone id */ 5991 /* remove zone id */
5988 flags &= (1UL << NR_PAGEFLAGS) - 1; 5992 flags &= (1UL << NR_PAGEFLAGS) - 1;
5989 5993
5990 for (i = 0; pageflag_names[i].name && flags; i++) { 5994 for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) {
5991 5995
5992 mask = pageflag_names[i].mask; 5996 mask = pageflag_names[i].mask;
5993 if ((flags & mask) != mask) 5997 if ((flags & mask) != mask)
diff --git a/mm/readahead.c b/mm/readahead.c
index cbcbb02f3e28..ea8f8fa21649 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
17#include <linux/task_io_accounting_ops.h> 17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
20 22
21/* 23/*
22 * Initialise a struct file's readahead state. Assumes that the caller has 24 * Initialise a struct file's readahead state. Assumes that the caller has
@@ -562,3 +564,41 @@ page_cache_async_readahead(struct address_space *mapping,
562 ondemand_readahead(mapping, ra, filp, true, offset, req_size); 564 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
563} 565}
564EXPORT_SYMBOL_GPL(page_cache_async_readahead); 566EXPORT_SYMBOL_GPL(page_cache_async_readahead);
567
568static ssize_t
569do_readahead(struct address_space *mapping, struct file *filp,
570 pgoff_t index, unsigned long nr)
571{
572 if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
573 return -EINVAL;
574
575 force_page_cache_readahead(mapping, filp, index, nr);
576 return 0;
577}
578
579SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
580{
581 ssize_t ret;
582 struct file *file;
583
584 ret = -EBADF;
585 file = fget(fd);
586 if (file) {
587 if (file->f_mode & FMODE_READ) {
588 struct address_space *mapping = file->f_mapping;
589 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
591 unsigned long len = end - start + 1;
592 ret = do_readahead(mapping, file, start, len);
593 }
594 fput(file);
595 }
596 return ret;
597}
598#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
599asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
600{
601 return SYSC_readahead((int) fd, offset, (size_t) count);
602}
603SYSCALL_ALIAS(sys_readahead, SyS_readahead);
604#endif
diff --git a/mm/rmap.c b/mm/rmap.c
index 5b5ad584ffb7..0f3b7cda2a24 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
755 pte_unmap_unlock(pte, ptl); 755 pte_unmap_unlock(pte, ptl);
756 } 756 }
757 757
758 /* Pretend the page is referenced if the task has the
759 swap token and is in the middle of a page fault. */
760 if (mm != current->mm && has_swap_token(mm) &&
761 rwsem_is_locked(&mm->mmap_sem))
762 referenced++;
763
764 (*mapcount)--; 758 (*mapcount)--;
765 759
766 if (referenced) 760 if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index be5af34a070d..d576b84d913c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt;
53#include <linux/blkdev.h> 53#include <linux/blkdev.h>
54#include <linux/pagevec.h> 54#include <linux/pagevec.h>
55#include <linux/percpu_counter.h> 55#include <linux/percpu_counter.h>
56#include <linux/falloc.h>
56#include <linux/splice.h> 57#include <linux/splice.h>
57#include <linux/security.h> 58#include <linux/security.h>
58#include <linux/swapops.h> 59#include <linux/swapops.h>
@@ -83,12 +84,25 @@ struct shmem_xattr {
83 char value[0]; 84 char value[0];
84}; 85};
85 86
87/*
88 * shmem_fallocate and shmem_writepage communicate via inode->i_private
89 * (with i_mutex making sure that it has only one user at a time):
90 * we would prefer not to enlarge the shmem inode just for that.
91 */
92struct shmem_falloc {
93 pgoff_t start; /* start of range currently being fallocated */
94 pgoff_t next; /* the next page offset to be fallocated */
95 pgoff_t nr_falloced; /* how many new pages have been fallocated */
96 pgoff_t nr_unswapped; /* how often writepage refused to swap out */
97};
98
86/* Flag allocation requirements to shmem_getpage */ 99/* Flag allocation requirements to shmem_getpage */
87enum sgp_type { 100enum sgp_type {
88 SGP_READ, /* don't exceed i_size, don't allocate page */ 101 SGP_READ, /* don't exceed i_size, don't allocate page */
89 SGP_CACHE, /* don't exceed i_size, may allocate page */ 102 SGP_CACHE, /* don't exceed i_size, may allocate page */
90 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ 103 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
91 SGP_WRITE, /* may exceed i_size, may allocate page */ 104 SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
105 SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
92}; 106};
93 107
94#ifdef CONFIG_TMPFS 108#ifdef CONFIG_TMPFS
@@ -103,6 +117,9 @@ static unsigned long shmem_default_max_inodes(void)
103} 117}
104#endif 118#endif
105 119
120static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
121static int shmem_replace_page(struct page **pagep, gfp_t gfp,
122 struct shmem_inode_info *info, pgoff_t index);
106static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 123static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
107 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); 124 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
108 125
@@ -423,27 +440,31 @@ void shmem_unlock_mapping(struct address_space *mapping)
423 440
424/* 441/*
425 * Remove range of pages and swap entries from radix tree, and free them. 442 * Remove range of pages and swap entries from radix tree, and free them.
443 * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
426 */ 444 */
427void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 445static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
446 bool unfalloc)
428{ 447{
429 struct address_space *mapping = inode->i_mapping; 448 struct address_space *mapping = inode->i_mapping;
430 struct shmem_inode_info *info = SHMEM_I(inode); 449 struct shmem_inode_info *info = SHMEM_I(inode);
431 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 450 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
432 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 451 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
433 pgoff_t end = (lend >> PAGE_CACHE_SHIFT); 452 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
453 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
434 struct pagevec pvec; 454 struct pagevec pvec;
435 pgoff_t indices[PAGEVEC_SIZE]; 455 pgoff_t indices[PAGEVEC_SIZE];
436 long nr_swaps_freed = 0; 456 long nr_swaps_freed = 0;
437 pgoff_t index; 457 pgoff_t index;
438 int i; 458 int i;
439 459
440 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 460 if (lend == -1)
461 end = -1; /* unsigned, so actually very big */
441 462
442 pagevec_init(&pvec, 0); 463 pagevec_init(&pvec, 0);
443 index = start; 464 index = start;
444 while (index <= end) { 465 while (index < end) {
445 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 466 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
446 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 467 min(end - index, (pgoff_t)PAGEVEC_SIZE),
447 pvec.pages, indices); 468 pvec.pages, indices);
448 if (!pvec.nr) 469 if (!pvec.nr)
449 break; 470 break;
@@ -452,10 +473,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
452 struct page *page = pvec.pages[i]; 473 struct page *page = pvec.pages[i];
453 474
454 index = indices[i]; 475 index = indices[i];
455 if (index > end) 476 if (index >= end)
456 break; 477 break;
457 478
458 if (radix_tree_exceptional_entry(page)) { 479 if (radix_tree_exceptional_entry(page)) {
480 if (unfalloc)
481 continue;
459 nr_swaps_freed += !shmem_free_swap(mapping, 482 nr_swaps_freed += !shmem_free_swap(mapping,
460 index, page); 483 index, page);
461 continue; 484 continue;
@@ -463,9 +486,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
463 486
464 if (!trylock_page(page)) 487 if (!trylock_page(page))
465 continue; 488 continue;
466 if (page->mapping == mapping) { 489 if (!unfalloc || !PageUptodate(page)) {
467 VM_BUG_ON(PageWriteback(page)); 490 if (page->mapping == mapping) {
468 truncate_inode_page(mapping, page); 491 VM_BUG_ON(PageWriteback(page));
492 truncate_inode_page(mapping, page);
493 }
469 } 494 }
470 unlock_page(page); 495 unlock_page(page);
471 } 496 }
@@ -476,30 +501,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
476 index++; 501 index++;
477 } 502 }
478 503
479 if (partial) { 504 if (partial_start) {
480 struct page *page = NULL; 505 struct page *page = NULL;
481 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); 506 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
482 if (page) { 507 if (page) {
483 zero_user_segment(page, partial, PAGE_CACHE_SIZE); 508 unsigned int top = PAGE_CACHE_SIZE;
509 if (start > end) {
510 top = partial_end;
511 partial_end = 0;
512 }
513 zero_user_segment(page, partial_start, top);
514 set_page_dirty(page);
515 unlock_page(page);
516 page_cache_release(page);
517 }
518 }
519 if (partial_end) {
520 struct page *page = NULL;
521 shmem_getpage(inode, end, &page, SGP_READ, NULL);
522 if (page) {
523 zero_user_segment(page, 0, partial_end);
484 set_page_dirty(page); 524 set_page_dirty(page);
485 unlock_page(page); 525 unlock_page(page);
486 page_cache_release(page); 526 page_cache_release(page);
487 } 527 }
488 } 528 }
529 if (start >= end)
530 return;
489 531
490 index = start; 532 index = start;
491 for ( ; ; ) { 533 for ( ; ; ) {
492 cond_resched(); 534 cond_resched();
493 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 535 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
494 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 536 min(end - index, (pgoff_t)PAGEVEC_SIZE),
495 pvec.pages, indices); 537 pvec.pages, indices);
496 if (!pvec.nr) { 538 if (!pvec.nr) {
497 if (index == start) 539 if (index == start || unfalloc)
498 break; 540 break;
499 index = start; 541 index = start;
500 continue; 542 continue;
501 } 543 }
502 if (index == start && indices[0] > end) { 544 if ((index == start || unfalloc) && indices[0] >= end) {
503 shmem_deswap_pagevec(&pvec); 545 shmem_deswap_pagevec(&pvec);
504 pagevec_release(&pvec); 546 pagevec_release(&pvec);
505 break; 547 break;
@@ -509,19 +551,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
509 struct page *page = pvec.pages[i]; 551 struct page *page = pvec.pages[i];
510 552
511 index = indices[i]; 553 index = indices[i];
512 if (index > end) 554 if (index >= end)
513 break; 555 break;
514 556
515 if (radix_tree_exceptional_entry(page)) { 557 if (radix_tree_exceptional_entry(page)) {
558 if (unfalloc)
559 continue;
516 nr_swaps_freed += !shmem_free_swap(mapping, 560 nr_swaps_freed += !shmem_free_swap(mapping,
517 index, page); 561 index, page);
518 continue; 562 continue;
519 } 563 }
520 564
521 lock_page(page); 565 lock_page(page);
522 if (page->mapping == mapping) { 566 if (!unfalloc || !PageUptodate(page)) {
523 VM_BUG_ON(PageWriteback(page)); 567 if (page->mapping == mapping) {
524 truncate_inode_page(mapping, page); 568 VM_BUG_ON(PageWriteback(page));
569 truncate_inode_page(mapping, page);
570 }
525 } 571 }
526 unlock_page(page); 572 unlock_page(page);
527 } 573 }
@@ -535,7 +581,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
535 info->swapped -= nr_swaps_freed; 581 info->swapped -= nr_swaps_freed;
536 shmem_recalc_inode(inode); 582 shmem_recalc_inode(inode);
537 spin_unlock(&info->lock); 583 spin_unlock(&info->lock);
584}
538 585
586void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
587{
588 shmem_undo_range(inode, lstart, lend, false);
539 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 589 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
540} 590}
541EXPORT_SYMBOL_GPL(shmem_truncate_range); 591EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -604,12 +654,13 @@ static void shmem_evict_inode(struct inode *inode)
604 * If swap found in inode, free it and move page from swapcache to filecache. 654 * If swap found in inode, free it and move page from swapcache to filecache.
605 */ 655 */
606static int shmem_unuse_inode(struct shmem_inode_info *info, 656static int shmem_unuse_inode(struct shmem_inode_info *info,
607 swp_entry_t swap, struct page *page) 657 swp_entry_t swap, struct page **pagep)
608{ 658{
609 struct address_space *mapping = info->vfs_inode.i_mapping; 659 struct address_space *mapping = info->vfs_inode.i_mapping;
610 void *radswap; 660 void *radswap;
611 pgoff_t index; 661 pgoff_t index;
612 int error; 662 gfp_t gfp;
663 int error = 0;
613 664
614 radswap = swp_to_radix_entry(swap); 665 radswap = swp_to_radix_entry(swap);
615 index = radix_tree_locate_item(&mapping->page_tree, radswap); 666 index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +676,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
625 if (shmem_swaplist.next != &info->swaplist) 676 if (shmem_swaplist.next != &info->swaplist)
626 list_move_tail(&shmem_swaplist, &info->swaplist); 677 list_move_tail(&shmem_swaplist, &info->swaplist);
627 678
679 gfp = mapping_gfp_mask(mapping);
680 if (shmem_should_replace_page(*pagep, gfp)) {
681 mutex_unlock(&shmem_swaplist_mutex);
682 error = shmem_replace_page(pagep, gfp, info, index);
683 mutex_lock(&shmem_swaplist_mutex);
684 /*
685 * We needed to drop mutex to make that restrictive page
686 * allocation; but the inode might already be freed by now,
687 * and we cannot refer to inode or mapping or info to check.
688 * However, we do hold page lock on the PageSwapCache page,
689 * so can check if that still has our reference remaining.
690 */
691 if (!page_swapcount(*pagep))
692 error = -ENOENT;
693 }
694
628 /* 695 /*
629 * We rely on shmem_swaplist_mutex, not only to protect the swaplist, 696 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
630 * but also to hold up shmem_evict_inode(): so inode cannot be freed 697 * but also to hold up shmem_evict_inode(): so inode cannot be freed
631 * beneath us (pagelock doesn't help until the page is in pagecache). 698 * beneath us (pagelock doesn't help until the page is in pagecache).
632 */ 699 */
633 error = shmem_add_to_page_cache(page, mapping, index, 700 if (!error)
701 error = shmem_add_to_page_cache(*pagep, mapping, index,
634 GFP_NOWAIT, radswap); 702 GFP_NOWAIT, radswap);
635 /* which does mem_cgroup_uncharge_cache_page on error */
636
637 if (error != -ENOMEM) { 703 if (error != -ENOMEM) {
638 /* 704 /*
639 * Truncation and eviction use free_swap_and_cache(), which 705 * Truncation and eviction use free_swap_and_cache(), which
640 * only does trylock page: if we raced, best clean up here. 706 * only does trylock page: if we raced, best clean up here.
641 */ 707 */
642 delete_from_swap_cache(page); 708 delete_from_swap_cache(*pagep);
643 set_page_dirty(page); 709 set_page_dirty(*pagep);
644 if (!error) { 710 if (!error) {
645 spin_lock(&info->lock); 711 spin_lock(&info->lock);
646 info->swapped--; 712 info->swapped--;
@@ -660,7 +726,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
660 struct list_head *this, *next; 726 struct list_head *this, *next;
661 struct shmem_inode_info *info; 727 struct shmem_inode_info *info;
662 int found = 0; 728 int found = 0;
663 int error; 729 int error = 0;
730
731 /*
732 * There's a faint possibility that swap page was replaced before
733 * caller locked it: it will come back later with the right page.
734 */
735 if (unlikely(!PageSwapCache(page)))
736 goto out;
664 737
665 /* 738 /*
666 * Charge page using GFP_KERNEL while we can wait, before taking 739 * Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +749,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
676 list_for_each_safe(this, next, &shmem_swaplist) { 749 list_for_each_safe(this, next, &shmem_swaplist) {
677 info = list_entry(this, struct shmem_inode_info, swaplist); 750 info = list_entry(this, struct shmem_inode_info, swaplist);
678 if (info->swapped) 751 if (info->swapped)
679 found = shmem_unuse_inode(info, swap, page); 752 found = shmem_unuse_inode(info, swap, &page);
680 else 753 else
681 list_del_init(&info->swaplist); 754 list_del_init(&info->swaplist);
682 cond_resched(); 755 cond_resched();
@@ -685,8 +758,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
685 } 758 }
686 mutex_unlock(&shmem_swaplist_mutex); 759 mutex_unlock(&shmem_swaplist_mutex);
687 760
688 if (!found)
689 mem_cgroup_uncharge_cache_page(page);
690 if (found < 0) 761 if (found < 0)
691 error = found; 762 error = found;
692out: 763out:
@@ -727,6 +798,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
727 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 798 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
728 goto redirty; 799 goto redirty;
729 } 800 }
801
802 /*
803 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
804 * value into swapfile.c, the only way we can correctly account for a
805 * fallocated page arriving here is now to initialize it and write it.
806 *
807 * That's okay for a page already fallocated earlier, but if we have
808 * not yet completed the fallocation, then (a) we want to keep track
809 * of this page in case we have to undo it, and (b) it may not be a
810 * good idea to continue anyway, once we're pushing into swap. So
811 * reactivate the page, and let shmem_fallocate() quit when too many.
812 */
813 if (!PageUptodate(page)) {
814 if (inode->i_private) {
815 struct shmem_falloc *shmem_falloc;
816 spin_lock(&inode->i_lock);
817 shmem_falloc = inode->i_private;
818 if (shmem_falloc &&
819 index >= shmem_falloc->start &&
820 index < shmem_falloc->next)
821 shmem_falloc->nr_unswapped++;
822 else
823 shmem_falloc = NULL;
824 spin_unlock(&inode->i_lock);
825 if (shmem_falloc)
826 goto redirty;
827 }
828 clear_highpage(page);
829 flush_dcache_page(page);
830 SetPageUptodate(page);
831 }
832
730 swap = get_swap_page(); 833 swap = get_swap_page();
731 if (!swap.val) 834 if (!swap.val)
732 goto redirty; 835 goto redirty;
@@ -856,6 +959,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
856#endif 959#endif
857 960
858/* 961/*
962 * When a page is moved from swapcache to shmem filecache (either by the
963 * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
964 * shmem_unuse_inode()), it may have been read in earlier from swap, in
965 * ignorance of the mapping it belongs to. If that mapping has special
966 * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
967 * we may need to copy to a suitable page before moving to filecache.
968 *
969 * In a future release, this may well be extended to respect cpuset and
970 * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
971 * but for now it is a simple matter of zone.
972 */
973static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
974{
975 return page_zonenum(page) > gfp_zone(gfp);
976}
977
978static int shmem_replace_page(struct page **pagep, gfp_t gfp,
979 struct shmem_inode_info *info, pgoff_t index)
980{
981 struct page *oldpage, *newpage;
982 struct address_space *swap_mapping;
983 pgoff_t swap_index;
984 int error;
985
986 oldpage = *pagep;
987 swap_index = page_private(oldpage);
988 swap_mapping = page_mapping(oldpage);
989
990 /*
991 * We have arrived here because our zones are constrained, so don't
992 * limit chance of success by further cpuset and node constraints.
993 */
994 gfp &= ~GFP_CONSTRAINT_MASK;
995 newpage = shmem_alloc_page(gfp, info, index);
996 if (!newpage)
997 return -ENOMEM;
998 VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
999
1000 *pagep = newpage;
1001 page_cache_get(newpage);
1002 copy_highpage(newpage, oldpage);
1003
1004 VM_BUG_ON(!PageLocked(oldpage));
1005 __set_page_locked(newpage);
1006 VM_BUG_ON(!PageUptodate(oldpage));
1007 SetPageUptodate(newpage);
1008 VM_BUG_ON(!PageSwapBacked(oldpage));
1009 SetPageSwapBacked(newpage);
1010 VM_BUG_ON(!swap_index);
1011 set_page_private(newpage, swap_index);
1012 VM_BUG_ON(!PageSwapCache(oldpage));
1013 SetPageSwapCache(newpage);
1014
1015 /*
1016 * Our caller will very soon move newpage out of swapcache, but it's
1017 * a nice clean interface for us to replace oldpage by newpage there.
1018 */
1019 spin_lock_irq(&swap_mapping->tree_lock);
1020 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1021 newpage);
1022 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1023 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1024 spin_unlock_irq(&swap_mapping->tree_lock);
1025 BUG_ON(error);
1026
1027 mem_cgroup_replace_page_cache(oldpage, newpage);
1028 lru_cache_add_anon(newpage);
1029
1030 ClearPageSwapCache(oldpage);
1031 set_page_private(oldpage, 0);
1032
1033 unlock_page(oldpage);
1034 page_cache_release(oldpage);
1035 page_cache_release(oldpage);
1036 return 0;
1037}
1038
1039/*
859 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 1040 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
860 * 1041 *
861 * If we allocate a new one we do not mark it dirty. That's up to the 1042 * If we allocate a new one we do not mark it dirty. That's up to the
@@ -872,6 +1053,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
872 swp_entry_t swap; 1053 swp_entry_t swap;
873 int error; 1054 int error;
874 int once = 0; 1055 int once = 0;
1056 int alloced = 0;
875 1057
876 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) 1058 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
877 return -EFBIG; 1059 return -EFBIG;
@@ -883,19 +1065,21 @@ repeat:
883 page = NULL; 1065 page = NULL;
884 } 1066 }
885 1067
886 if (sgp != SGP_WRITE && 1068 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
887 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1069 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
888 error = -EINVAL; 1070 error = -EINVAL;
889 goto failed; 1071 goto failed;
890 } 1072 }
891 1073
1074 /* fallocated page? */
1075 if (page && !PageUptodate(page)) {
1076 if (sgp != SGP_READ)
1077 goto clear;
1078 unlock_page(page);
1079 page_cache_release(page);
1080 page = NULL;
1081 }
892 if (page || (sgp == SGP_READ && !swap.val)) { 1082 if (page || (sgp == SGP_READ && !swap.val)) {
893 /*
894 * Once we can get the page lock, it must be uptodate:
895 * if there were an error in reading back from swap,
896 * the page would not be inserted into the filecache.
897 */
898 BUG_ON(page && !PageUptodate(page));
899 *pagep = page; 1083 *pagep = page;
900 return 0; 1084 return 0;
901 } 1085 }
@@ -923,19 +1107,20 @@ repeat:
923 1107
924 /* We have to do this with page locked to prevent races */ 1108 /* We have to do this with page locked to prevent races */
925 lock_page(page); 1109 lock_page(page);
1110 if (!PageSwapCache(page) || page->mapping) {
1111 error = -EEXIST; /* try again */
1112 goto failed;
1113 }
926 if (!PageUptodate(page)) { 1114 if (!PageUptodate(page)) {
927 error = -EIO; 1115 error = -EIO;
928 goto failed; 1116 goto failed;
929 } 1117 }
930 wait_on_page_writeback(page); 1118 wait_on_page_writeback(page);
931 1119
932 /* Someone may have already done it for us */ 1120 if (shmem_should_replace_page(page, gfp)) {
933 if (page->mapping) { 1121 error = shmem_replace_page(&page, gfp, info, index);
934 if (page->mapping == mapping && 1122 if (error)
935 page->index == index) 1123 goto failed;
936 goto done;
937 error = -EEXIST;
938 goto failed;
939 } 1124 }
940 1125
941 error = mem_cgroup_cache_charge(page, current->mm, 1126 error = mem_cgroup_cache_charge(page, current->mm,
@@ -991,19 +1176,36 @@ repeat:
991 inode->i_blocks += BLOCKS_PER_PAGE; 1176 inode->i_blocks += BLOCKS_PER_PAGE;
992 shmem_recalc_inode(inode); 1177 shmem_recalc_inode(inode);
993 spin_unlock(&info->lock); 1178 spin_unlock(&info->lock);
1179 alloced = true;
994 1180
995 clear_highpage(page); 1181 /*
996 flush_dcache_page(page); 1182 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
997 SetPageUptodate(page); 1183 */
1184 if (sgp == SGP_FALLOC)
1185 sgp = SGP_WRITE;
1186clear:
1187 /*
1188 * Let SGP_WRITE caller clear ends if write does not fill page;
1189 * but SGP_FALLOC on a page fallocated earlier must initialize
1190 * it now, lest undo on failure cancel our earlier guarantee.
1191 */
1192 if (sgp != SGP_WRITE) {
1193 clear_highpage(page);
1194 flush_dcache_page(page);
1195 SetPageUptodate(page);
1196 }
998 if (sgp == SGP_DIRTY) 1197 if (sgp == SGP_DIRTY)
999 set_page_dirty(page); 1198 set_page_dirty(page);
1000 } 1199 }
1001done: 1200
1002 /* Perhaps the file has been truncated since we checked */ 1201 /* Perhaps the file has been truncated since we checked */
1003 if (sgp != SGP_WRITE && 1202 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1004 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1203 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1005 error = -EINVAL; 1204 error = -EINVAL;
1006 goto trunc; 1205 if (alloced)
1206 goto trunc;
1207 else
1208 goto failed;
1007 } 1209 }
1008 *pagep = page; 1210 *pagep = page;
1009 return 0; 1211 return 0;
@@ -1012,6 +1214,7 @@ done:
1012 * Error recovery. 1214 * Error recovery.
1013 */ 1215 */
1014trunc: 1216trunc:
1217 info = SHMEM_I(inode);
1015 ClearPageDirty(page); 1218 ClearPageDirty(page);
1016 delete_from_page_cache(page); 1219 delete_from_page_cache(page);
1017 spin_lock(&info->lock); 1220 spin_lock(&info->lock);
@@ -1019,6 +1222,7 @@ trunc:
1019 inode->i_blocks -= BLOCKS_PER_PAGE; 1222 inode->i_blocks -= BLOCKS_PER_PAGE;
1020 spin_unlock(&info->lock); 1223 spin_unlock(&info->lock);
1021decused: 1224decused:
1225 sbinfo = SHMEM_SB(inode->i_sb);
1022 if (sbinfo->max_blocks) 1226 if (sbinfo->max_blocks)
1023 percpu_counter_add(&sbinfo->used_blocks, -1); 1227 percpu_counter_add(&sbinfo->used_blocks, -1);
1024unacct: 1228unacct:
@@ -1204,6 +1408,14 @@ shmem_write_end(struct file *file, struct address_space *mapping,
1204 if (pos + copied > inode->i_size) 1408 if (pos + copied > inode->i_size)
1205 i_size_write(inode, pos + copied); 1409 i_size_write(inode, pos + copied);
1206 1410
1411 if (!PageUptodate(page)) {
1412 if (copied < PAGE_CACHE_SIZE) {
1413 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1414 zero_user_segments(page, 0, from,
1415 from + copied, PAGE_CACHE_SIZE);
1416 }
1417 SetPageUptodate(page);
1418 }
1207 set_page_dirty(page); 1419 set_page_dirty(page);
1208 unlock_page(page); 1420 unlock_page(page);
1209 page_cache_release(page); 1421 page_cache_release(page);
@@ -1462,6 +1674,199 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1462 return error; 1674 return error;
1463} 1675}
1464 1676
1677/*
1678 * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
1679 */
1680static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1681 pgoff_t index, pgoff_t end, int origin)
1682{
1683 struct page *page;
1684 struct pagevec pvec;
1685 pgoff_t indices[PAGEVEC_SIZE];
1686 bool done = false;
1687 int i;
1688
1689 pagevec_init(&pvec, 0);
1690 pvec.nr = 1; /* start small: we may be there already */
1691 while (!done) {
1692 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
1693 pvec.nr, pvec.pages, indices);
1694 if (!pvec.nr) {
1695 if (origin == SEEK_DATA)
1696 index = end;
1697 break;
1698 }
1699 for (i = 0; i < pvec.nr; i++, index++) {
1700 if (index < indices[i]) {
1701 if (origin == SEEK_HOLE) {
1702 done = true;
1703 break;
1704 }
1705 index = indices[i];
1706 }
1707 page = pvec.pages[i];
1708 if (page && !radix_tree_exceptional_entry(page)) {
1709 if (!PageUptodate(page))
1710 page = NULL;
1711 }
1712 if (index >= end ||
1713 (page && origin == SEEK_DATA) ||
1714 (!page && origin == SEEK_HOLE)) {
1715 done = true;
1716 break;
1717 }
1718 }
1719 shmem_deswap_pagevec(&pvec);
1720 pagevec_release(&pvec);
1721 pvec.nr = PAGEVEC_SIZE;
1722 cond_resched();
1723 }
1724 return index;
1725}
1726
1727static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
1728{
1729 struct address_space *mapping;
1730 struct inode *inode;
1731 pgoff_t start, end;
1732 loff_t new_offset;
1733
1734 if (origin != SEEK_DATA && origin != SEEK_HOLE)
1735 return generic_file_llseek_size(file, offset, origin,
1736 MAX_LFS_FILESIZE);
1737 mapping = file->f_mapping;
1738 inode = mapping->host;
1739 mutex_lock(&inode->i_mutex);
1740 /* We're holding i_mutex so we can access i_size directly */
1741
1742 if (offset < 0)
1743 offset = -EINVAL;
1744 else if (offset >= inode->i_size)
1745 offset = -ENXIO;
1746 else {
1747 start = offset >> PAGE_CACHE_SHIFT;
1748 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1749 new_offset = shmem_seek_hole_data(mapping, start, end, origin);
1750 new_offset <<= PAGE_CACHE_SHIFT;
1751 if (new_offset > offset) {
1752 if (new_offset < inode->i_size)
1753 offset = new_offset;
1754 else if (origin == SEEK_DATA)
1755 offset = -ENXIO;
1756 else
1757 offset = inode->i_size;
1758 }
1759 }
1760
1761 if (offset >= 0 && offset != file->f_pos) {
1762 file->f_pos = offset;
1763 file->f_version = 0;
1764 }
1765 mutex_unlock(&inode->i_mutex);
1766 return offset;
1767}
1768
1769static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1770 loff_t len)
1771{
1772 struct inode *inode = file->f_path.dentry->d_inode;
1773 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1774 struct shmem_falloc shmem_falloc;
1775 pgoff_t start, index, end;
1776 int error;
1777
1778 mutex_lock(&inode->i_mutex);
1779
1780 if (mode & FALLOC_FL_PUNCH_HOLE) {
1781 struct address_space *mapping = file->f_mapping;
1782 loff_t unmap_start = round_up(offset, PAGE_SIZE);
1783 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1784
1785 if ((u64)unmap_end > (u64)unmap_start)
1786 unmap_mapping_range(mapping, unmap_start,
1787 1 + unmap_end - unmap_start, 0);
1788 shmem_truncate_range(inode, offset, offset + len - 1);
1789 /* No need to unmap again: hole-punching leaves COWed pages */
1790 error = 0;
1791 goto out;
1792 }
1793
1794 /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
1795 error = inode_newsize_ok(inode, offset + len);
1796 if (error)
1797 goto out;
1798
1799 start = offset >> PAGE_CACHE_SHIFT;
1800 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1801 /* Try to avoid a swapstorm if len is impossible to satisfy */
1802 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
1803 error = -ENOSPC;
1804 goto out;
1805 }
1806
1807 shmem_falloc.start = start;
1808 shmem_falloc.next = start;
1809 shmem_falloc.nr_falloced = 0;
1810 shmem_falloc.nr_unswapped = 0;
1811 spin_lock(&inode->i_lock);
1812 inode->i_private = &shmem_falloc;
1813 spin_unlock(&inode->i_lock);
1814
1815 for (index = start; index < end; index++) {
1816 struct page *page;
1817
1818 /*
1819 * Good, the fallocate(2) manpage permits EINTR: we may have
1820 * been interrupted because we are using up too much memory.
1821 */
1822 if (signal_pending(current))
1823 error = -EINTR;
1824 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
1825 error = -ENOMEM;
1826 else
1827 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
1828 NULL);
1829 if (error) {
1830 /* Remove the !PageUptodate pages we added */
1831 shmem_undo_range(inode,
1832 (loff_t)start << PAGE_CACHE_SHIFT,
1833 (loff_t)index << PAGE_CACHE_SHIFT, true);
1834 goto undone;
1835 }
1836
1837 /*
1838 * Inform shmem_writepage() how far we have reached.
1839 * No need for lock or barrier: we have the page lock.
1840 */
1841 shmem_falloc.next++;
1842 if (!PageUptodate(page))
1843 shmem_falloc.nr_falloced++;
1844
1845 /*
1846 * If !PageUptodate, leave it that way so that freeable pages
1847 * can be recognized if we need to rollback on error later.
1848 * But set_page_dirty so that memory pressure will swap rather
1849 * than free the pages we are allocating (and SGP_CACHE pages
1850 * might still be clean: we now need to mark those dirty too).
1851 */
1852 set_page_dirty(page);
1853 unlock_page(page);
1854 page_cache_release(page);
1855 cond_resched();
1856 }
1857
1858 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
1859 i_size_write(inode, offset + len);
1860 inode->i_ctime = CURRENT_TIME;
1861undone:
1862 spin_lock(&inode->i_lock);
1863 inode->i_private = NULL;
1864 spin_unlock(&inode->i_lock);
1865out:
1866 mutex_unlock(&inode->i_mutex);
1867 return error;
1868}
1869
1465static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 1870static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1466{ 1871{
1467 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 1872 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -1665,6 +2070,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1665 kaddr = kmap_atomic(page); 2070 kaddr = kmap_atomic(page);
1666 memcpy(kaddr, symname, len); 2071 memcpy(kaddr, symname, len);
1667 kunmap_atomic(kaddr); 2072 kunmap_atomic(kaddr);
2073 SetPageUptodate(page);
1668 set_page_dirty(page); 2074 set_page_dirty(page);
1669 unlock_page(page); 2075 unlock_page(page);
1670 page_cache_release(page); 2076 page_cache_release(page);
@@ -2270,6 +2676,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2270 } 2676 }
2271 } 2677 }
2272 sb->s_export_op = &shmem_export_ops; 2678 sb->s_export_op = &shmem_export_ops;
2679 sb->s_flags |= MS_NOSEC;
2273#else 2680#else
2274 sb->s_flags |= MS_NOUSER; 2681 sb->s_flags |= MS_NOUSER;
2275#endif 2682#endif
@@ -2364,7 +2771,7 @@ static const struct address_space_operations shmem_aops = {
2364static const struct file_operations shmem_file_operations = { 2771static const struct file_operations shmem_file_operations = {
2365 .mmap = shmem_mmap, 2772 .mmap = shmem_mmap,
2366#ifdef CONFIG_TMPFS 2773#ifdef CONFIG_TMPFS
2367 .llseek = generic_file_llseek, 2774 .llseek = shmem_file_llseek,
2368 .read = do_sync_read, 2775 .read = do_sync_read,
2369 .write = do_sync_write, 2776 .write = do_sync_write,
2370 .aio_read = shmem_file_aio_read, 2777 .aio_read = shmem_file_aio_read,
@@ -2372,12 +2779,12 @@ static const struct file_operations shmem_file_operations = {
2372 .fsync = noop_fsync, 2779 .fsync = noop_fsync,
2373 .splice_read = shmem_file_splice_read, 2780 .splice_read = shmem_file_splice_read,
2374 .splice_write = generic_file_splice_write, 2781 .splice_write = generic_file_splice_write,
2782 .fallocate = shmem_fallocate,
2375#endif 2783#endif
2376}; 2784};
2377 2785
2378static const struct inode_operations shmem_inode_operations = { 2786static const struct inode_operations shmem_inode_operations = {
2379 .setattr = shmem_setattr, 2787 .setattr = shmem_setattr,
2380 .truncate_range = shmem_truncate_range,
2381#ifdef CONFIG_TMPFS_XATTR 2788#ifdef CONFIG_TMPFS_XATTR
2382 .setxattr = shmem_setxattr, 2789 .setxattr = shmem_setxattr,
2383 .getxattr = shmem_getxattr, 2790 .getxattr = shmem_getxattr,
diff --git a/mm/sparse.c b/mm/sparse.c
index a8bc7d364deb..6a4bf9160e85 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -273,10 +273,10 @@ static unsigned long *__kmalloc_section_usemap(void)
273#ifdef CONFIG_MEMORY_HOTREMOVE 273#ifdef CONFIG_MEMORY_HOTREMOVE
274static unsigned long * __init 274static unsigned long * __init
275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
276 unsigned long count) 276 unsigned long size)
277{ 277{
278 unsigned long section_nr; 278 pg_data_t *host_pgdat;
279 279 unsigned long goal;
280 /* 280 /*
281 * A page may contain usemaps for other sections preventing the 281 * A page may contain usemaps for other sections preventing the
282 * page being freed and making a section unremovable while 282 * page being freed and making a section unremovable while
@@ -287,8 +287,10 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
287 * from the same section as the pgdat where possible to avoid 287 * from the same section as the pgdat where possible to avoid
288 * this problem. 288 * this problem.
289 */ 289 */
290 section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 290 goal = __pa(pgdat) & PAGE_SECTION_MASK;
291 return alloc_bootmem_section(usemap_size() * count, section_nr); 291 host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
292 return __alloc_bootmem_node_nopanic(host_pgdat, size,
293 SMP_CACHE_BYTES, goal);
292} 294}
293 295
294static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 296static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -332,9 +334,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
332#else 334#else
333static unsigned long * __init 335static unsigned long * __init
334sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 336sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
335 unsigned long count) 337 unsigned long size)
336{ 338{
337 return NULL; 339 return alloc_bootmem_node_nopanic(pgdat, size);
338} 340}
339 341
340static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 342static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -352,13 +354,10 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
352 int size = usemap_size(); 354 int size = usemap_size();
353 355
354 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), 356 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
355 usemap_count); 357 size * usemap_count);
356 if (!usemap) { 358 if (!usemap) {
357 usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); 359 printk(KERN_WARNING "%s: allocation failed\n", __func__);
358 if (!usemap) { 360 return;
359 printk(KERN_WARNING "%s: allocation failed\n", __func__);
360 return;
361 }
362 } 361 }
363 362
364 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 363 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
diff --git a/mm/swap.c b/mm/swap.c
index 5c13f1338972..0503ad705e7c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,6 +82,25 @@ static void put_compound_page(struct page *page)
82 if (likely(page != page_head && 82 if (likely(page != page_head &&
83 get_page_unless_zero(page_head))) { 83 get_page_unless_zero(page_head))) {
84 unsigned long flags; 84 unsigned long flags;
85
86 /*
87 * THP can not break up slab pages so avoid taking
88 * compound_lock(). Slab performs non-atomic bit ops
89 * on page->flags for better performance. In particular
90 * slab_unlock() in slub used to be a hot path. It is
91 * still hot on arches that do not support
92 * this_cpu_cmpxchg_double().
93 */
94 if (PageSlab(page_head)) {
95 if (PageTail(page)) {
96 if (put_page_testzero(page_head))
97 VM_BUG_ON(1);
98
99 atomic_dec(&page->_mapcount);
100 goto skip_lock_tail;
101 } else
102 goto skip_lock;
103 }
85 /* 104 /*
86 * page_head wasn't a dangling pointer but it 105 * page_head wasn't a dangling pointer but it
87 * may not be a head page anymore by the time 106 * may not be a head page anymore by the time
@@ -92,10 +111,10 @@ static void put_compound_page(struct page *page)
92 if (unlikely(!PageTail(page))) { 111 if (unlikely(!PageTail(page))) {
93 /* __split_huge_page_refcount run before us */ 112 /* __split_huge_page_refcount run before us */
94 compound_unlock_irqrestore(page_head, flags); 113 compound_unlock_irqrestore(page_head, flags);
95 VM_BUG_ON(PageHead(page_head)); 114skip_lock:
96 if (put_page_testzero(page_head)) 115 if (put_page_testzero(page_head))
97 __put_single_page(page_head); 116 __put_single_page(page_head);
98 out_put_single: 117out_put_single:
99 if (put_page_testzero(page)) 118 if (put_page_testzero(page))
100 __put_single_page(page); 119 __put_single_page(page);
101 return; 120 return;
@@ -115,6 +134,8 @@ static void put_compound_page(struct page *page)
115 VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 134 VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
116 VM_BUG_ON(atomic_read(&page->_count) != 0); 135 VM_BUG_ON(atomic_read(&page->_count) != 0);
117 compound_unlock_irqrestore(page_head, flags); 136 compound_unlock_irqrestore(page_head, flags);
137
138skip_lock_tail:
118 if (put_page_testzero(page_head)) { 139 if (put_page_testzero(page_head)) {
119 if (PageHead(page_head)) 140 if (PageHead(page_head))
120 __put_compound_page(page_head); 141 __put_compound_page(page_head);
@@ -162,6 +183,18 @@ bool __get_page_tail(struct page *page)
162 struct page *page_head = compound_trans_head(page); 183 struct page *page_head = compound_trans_head(page);
163 184
164 if (likely(page != page_head && get_page_unless_zero(page_head))) { 185 if (likely(page != page_head && get_page_unless_zero(page_head))) {
186
187 /* Ref to put_compound_page() comment. */
188 if (PageSlab(page_head)) {
189 if (likely(PageTail(page))) {
190 __get_page_tail_foll(page, false);
191 return true;
192 } else {
193 put_page(page_head);
194 return false;
195 }
196 }
197
165 /* 198 /*
166 * page_head wasn't a dangling pointer but it 199 * page_head wasn't a dangling pointer but it
167 * may not be a head page anymore by the time 200 * may not be a head page anymore by the time
@@ -279,21 +312,15 @@ void rotate_reclaimable_page(struct page *page)
279static void update_page_reclaim_stat(struct zone *zone, struct page *page, 312static void update_page_reclaim_stat(struct zone *zone, struct page *page,
280 int file, int rotated) 313 int file, int rotated)
281{ 314{
282 struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; 315 struct zone_reclaim_stat *reclaim_stat;
283 struct zone_reclaim_stat *memcg_reclaim_stat;
284 316
285 memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); 317 reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
318 if (!reclaim_stat)
319 reclaim_stat = &zone->lruvec.reclaim_stat;
286 320
287 reclaim_stat->recent_scanned[file]++; 321 reclaim_stat->recent_scanned[file]++;
288 if (rotated) 322 if (rotated)
289 reclaim_stat->recent_rotated[file]++; 323 reclaim_stat->recent_rotated[file]++;
290
291 if (!memcg_reclaim_stat)
292 return;
293
294 memcg_reclaim_stat->recent_scanned[file]++;
295 if (rotated)
296 memcg_reclaim_stat->recent_rotated[file]++;
297} 324}
298 325
299static void __activate_page(struct page *page, void *arg) 326static void __activate_page(struct page *page, void *arg)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..457b10baef59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
601 * This does not give an exact answer when swap count is continued, 601 * This does not give an exact answer when swap count is continued,
602 * but does include the high COUNT_CONTINUED flag to allow for that. 602 * but does include the high COUNT_CONTINUED flag to allow for that.
603 */ 603 */
604static inline int page_swapcount(struct page *page) 604int page_swapcount(struct page *page)
605{ 605{
606 int count = 0; 606 int count = 0;
607 struct swap_info_struct *p; 607 struct swap_info_struct *p;
@@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry)
717 return p != NULL; 717 return p != NULL;
718} 718}
719 719
720#ifdef CONFIG_CGROUP_MEM_RES_CTLR
721/**
722 * mem_cgroup_count_swap_user - count the user of a swap entry
723 * @ent: the swap entry to be checked
724 * @pagep: the pointer for the swap cache page of the entry to be stored
725 *
726 * Returns the number of the user of the swap entry. The number is valid only
727 * for swaps of anonymous pages.
728 * If the entry is found on swap cache, the page is stored to pagep with
729 * refcount of it being incremented.
730 */
731int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
732{
733 struct page *page;
734 struct swap_info_struct *p;
735 int count = 0;
736
737 page = find_get_page(&swapper_space, ent.val);
738 if (page)
739 count += page_mapcount(page);
740 p = swap_info_get(ent);
741 if (p) {
742 count += swap_count(p->swap_map[swp_offset(ent)]);
743 spin_unlock(&swap_lock);
744 }
745
746 *pagep = page;
747 return count;
748}
749#endif
750
751#ifdef CONFIG_HIBERNATION 720#ifdef CONFIG_HIBERNATION
752/* 721/*
753 * Find the swap type that corresponds to given device (if any). 722 * Find the swap type that corresponds to given device (if any).
diff --git a/mm/thrash.c b/mm/thrash.c
deleted file mode 100644
index 57ad495dbd54..000000000000
--- a/mm/thrash.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * mm/thrash.c
3 *
4 * Copyright (C) 2004, Red Hat, Inc.
5 * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
6 * Released under the GPL, see the file COPYING for details.
7 *
8 * Simple token based thrashing protection, using the algorithm
9 * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
10 *
11 * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
12 * Improved algorithm to pass token:
13 * Each task has a priority which is incremented if it contended
14 * for the token in an interval less than its previous attempt.
15 * If the token is acquired, that task's priority is boosted to prevent
16 * the token from bouncing around too often and to let the task make
17 * some progress in its execution.
18 */
19
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/sched.h>
23#include <linux/swap.h>
24#include <linux/memcontrol.h>
25
26#include <trace/events/vmscan.h>
27
28#define TOKEN_AGING_INTERVAL (0xFF)
29
30static DEFINE_SPINLOCK(swap_token_lock);
31struct mm_struct *swap_token_mm;
32static struct mem_cgroup *swap_token_memcg;
33
34#ifdef CONFIG_CGROUP_MEM_RES_CTLR
35static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
36{
37 struct mem_cgroup *memcg;
38
39 memcg = try_get_mem_cgroup_from_mm(mm);
40 if (memcg)
41 css_put(mem_cgroup_css(memcg));
42
43 return memcg;
44}
45#else
46static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
47{
48 return NULL;
49}
50#endif
51
52void grab_swap_token(struct mm_struct *mm)
53{
54 int current_interval;
55 unsigned int old_prio = mm->token_priority;
56 static unsigned int global_faults;
57 static unsigned int last_aging;
58
59 global_faults++;
60
61 current_interval = global_faults - mm->faultstamp;
62
63 if (!spin_trylock(&swap_token_lock))
64 return;
65
66 /* First come first served */
67 if (!swap_token_mm)
68 goto replace_token;
69
70 /*
71 * Usually, we don't need priority aging because long interval faults
72 * makes priority decrease quickly. But there is one exception. If the
73 * token owner task is sleeping, it never make long interval faults.
74 * Thus, we need a priority aging mechanism instead. The requirements
75 * of priority aging are
76 * 1) An aging interval is reasonable enough long. Too short aging
77 * interval makes quick swap token lost and decrease performance.
78 * 2) The swap token owner task have to get priority aging even if
79 * it's under sleep.
80 */
81 if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
82 swap_token_mm->token_priority /= 2;
83 last_aging = global_faults;
84 }
85
86 if (mm == swap_token_mm) {
87 mm->token_priority += 2;
88 goto update_priority;
89 }
90
91 if (current_interval < mm->last_interval)
92 mm->token_priority++;
93 else {
94 if (likely(mm->token_priority > 0))
95 mm->token_priority--;
96 }
97
98 /* Check if we deserve the token */
99 if (mm->token_priority > swap_token_mm->token_priority)
100 goto replace_token;
101
102update_priority:
103 trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
104
105out:
106 mm->faultstamp = global_faults;
107 mm->last_interval = current_interval;
108 spin_unlock(&swap_token_lock);
109 return;
110
111replace_token:
112 mm->token_priority += 2;
113 trace_replace_swap_token(swap_token_mm, mm);
114 swap_token_mm = mm;
115 swap_token_memcg = swap_token_memcg_from_mm(mm);
116 last_aging = global_faults;
117 goto out;
118}
119
120/* Called on process exit. */
121void __put_swap_token(struct mm_struct *mm)
122{
123 spin_lock(&swap_token_lock);
124 if (likely(mm == swap_token_mm)) {
125 trace_put_swap_token(swap_token_mm);
126 swap_token_mm = NULL;
127 swap_token_memcg = NULL;
128 }
129 spin_unlock(&swap_token_lock);
130}
131
132static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
133{
134 if (!a)
135 return true;
136 if (!b)
137 return true;
138 if (a == b)
139 return true;
140 return false;
141}
142
143void disable_swap_token(struct mem_cgroup *memcg)
144{
145 /* memcg reclaim don't disable unrelated mm token. */
146 if (match_memcg(memcg, swap_token_memcg)) {
147 spin_lock(&swap_token_lock);
148 if (match_memcg(memcg, swap_token_memcg)) {
149 trace_disable_swap_token(swap_token_mm);
150 swap_token_mm = NULL;
151 swap_token_memcg = NULL;
152 }
153 spin_unlock(&swap_token_lock);
154 }
155}
diff --git a/mm/truncate.c b/mm/truncate.c
index 61a183b89df6..75801acdaac7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -602,31 +602,6 @@ int vmtruncate(struct inode *inode, loff_t newsize)
602} 602}
603EXPORT_SYMBOL(vmtruncate); 603EXPORT_SYMBOL(vmtruncate);
604 604
605int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
606{
607 struct address_space *mapping = inode->i_mapping;
608 loff_t holebegin = round_up(lstart, PAGE_SIZE);
609 loff_t holelen = 1 + lend - holebegin;
610
611 /*
612 * If the underlying filesystem is not going to provide
613 * a way to truncate a range of blocks (punch a hole) -
614 * we should return failure right now.
615 */
616 if (!inode->i_op->truncate_range)
617 return -ENOSYS;
618
619 mutex_lock(&inode->i_mutex);
620 inode_dio_wait(inode);
621 unmap_mapping_range(mapping, holebegin, holelen, 1);
622 inode->i_op->truncate_range(inode, lstart, lend);
623 /* unmap again to remove racily COWed private pages */
624 unmap_mapping_range(mapping, holebegin, holelen, 1);
625 mutex_unlock(&inode->i_mutex);
626
627 return 0;
628}
629
630/** 605/**
631 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 606 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
632 * @inode: inode 607 * @inode: inode
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 94dff883b449..2aad49981b57 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1185,9 +1185,10 @@ void __init vmalloc_init(void)
1185 /* Import existing vmlist entries. */ 1185 /* Import existing vmlist entries. */
1186 for (tmp = vmlist; tmp; tmp = tmp->next) { 1186 for (tmp = vmlist; tmp; tmp = tmp->next) {
1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); 1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1188 va->flags = tmp->flags | VM_VM_AREA; 1188 va->flags = VM_VM_AREA;
1189 va->va_start = (unsigned long)tmp->addr; 1189 va->va_start = (unsigned long)tmp->addr;
1190 va->va_end = va->va_start + tmp->size; 1190 va->va_end = va->va_start + tmp->size;
1191 va->vm = tmp;
1191 __insert_vmap_area(va); 1192 __insert_vmap_area(va);
1192 } 1193 }
1193 1194
@@ -2375,8 +2376,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2375 return NULL; 2376 return NULL;
2376 } 2377 }
2377 2378
2378 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL); 2379 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2379 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL); 2380 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2380 if (!vas || !vms) 2381 if (!vas || !vms)
2381 goto err_free2; 2382 goto err_free2;
2382 2383
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3d1365c17868..8deb5f4da4d9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
53#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
54#include <trace/events/vmscan.h> 54#include <trace/events/vmscan.h>
55 55
56/*
57 * reclaim_mode determines how the inactive list is shrunk
58 * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
59 * RECLAIM_MODE_ASYNC: Do not block
60 * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback
61 * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
62 * page from the LRU and reclaim all pages within a
63 * naturally aligned range
64 * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
65 * order-0 pages and then compact the zone
66 */
67typedef unsigned __bitwise__ reclaim_mode_t;
68#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u)
69#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u)
70#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u)
71#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u)
72#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u)
73
74struct scan_control { 56struct scan_control {
75 /* Incremented by the number of inactive pages that were scanned */ 57 /* Incremented by the number of inactive pages that were scanned */
76 unsigned long nr_scanned; 58 unsigned long nr_scanned;
@@ -97,12 +79,6 @@ struct scan_control {
97 int order; 79 int order;
98 80
99 /* 81 /*
100 * Intend to reclaim enough continuous memory rather than reclaim
101 * enough amount of memory. i.e, mode for high order allocation.
102 */
103 reclaim_mode_t reclaim_mode;
104
105 /*
106 * The memory cgroup that hit its limit and as a result is the 82 * The memory cgroup that hit its limit and as a result is the
107 * primary target of this reclaim invocation. 83 * primary target of this reclaim invocation.
108 */ 84 */
@@ -164,35 +140,22 @@ static bool global_reclaim(struct scan_control *sc)
164{ 140{
165 return !sc->target_mem_cgroup; 141 return !sc->target_mem_cgroup;
166} 142}
167
168static bool scanning_global_lru(struct mem_cgroup_zone *mz)
169{
170 return !mz->mem_cgroup;
171}
172#else 143#else
173static bool global_reclaim(struct scan_control *sc) 144static bool global_reclaim(struct scan_control *sc)
174{ 145{
175 return true; 146 return true;
176} 147}
177
178static bool scanning_global_lru(struct mem_cgroup_zone *mz)
179{
180 return true;
181}
182#endif 148#endif
183 149
184static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) 150static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
185{ 151{
186 if (!scanning_global_lru(mz)) 152 return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat;
187 return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
188
189 return &mz->zone->reclaim_stat;
190} 153}
191 154
192static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, 155static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
193 enum lru_list lru) 156 enum lru_list lru)
194{ 157{
195 if (!scanning_global_lru(mz)) 158 if (!mem_cgroup_disabled())
196 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, 159 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
197 zone_to_nid(mz->zone), 160 zone_to_nid(mz->zone),
198 zone_idx(mz->zone), 161 zone_idx(mz->zone),
@@ -364,39 +327,6 @@ out:
364 return ret; 327 return ret;
365} 328}
366 329
367static void set_reclaim_mode(int priority, struct scan_control *sc,
368 bool sync)
369{
370 reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
371
372 /*
373 * Initially assume we are entering either lumpy reclaim or
374 * reclaim/compaction.Depending on the order, we will either set the
375 * sync mode or just reclaim order-0 pages later.
376 */
377 if (COMPACTION_BUILD)
378 sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
379 else
380 sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
381
382 /*
383 * Avoid using lumpy reclaim or reclaim/compaction if possible by
384 * restricting when its set to either costly allocations or when
385 * under memory pressure
386 */
387 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
388 sc->reclaim_mode |= syncmode;
389 else if (sc->order && priority < DEF_PRIORITY - 2)
390 sc->reclaim_mode |= syncmode;
391 else
392 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
393}
394
395static void reset_reclaim_mode(struct scan_control *sc)
396{
397 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
398}
399
400static inline int is_page_cache_freeable(struct page *page) 330static inline int is_page_cache_freeable(struct page *page)
401{ 331{
402 /* 332 /*
@@ -416,10 +346,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
416 return 1; 346 return 1;
417 if (bdi == current->backing_dev_info) 347 if (bdi == current->backing_dev_info)
418 return 1; 348 return 1;
419
420 /* lumpy reclaim for hugepage often need a lot of write */
421 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
422 return 1;
423 return 0; 349 return 0;
424} 350}
425 351
@@ -523,8 +449,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
523 /* synchronous write or broken a_ops? */ 449 /* synchronous write or broken a_ops? */
524 ClearPageReclaim(page); 450 ClearPageReclaim(page);
525 } 451 }
526 trace_mm_vmscan_writepage(page, 452 trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
527 trace_reclaim_flags(page, sc->reclaim_mode));
528 inc_zone_page_state(page, NR_VMSCAN_WRITE); 453 inc_zone_page_state(page, NR_VMSCAN_WRITE);
529 return PAGE_SUCCESS; 454 return PAGE_SUCCESS;
530 } 455 }
@@ -707,13 +632,10 @@ static enum page_references page_check_references(struct page *page,
707 int referenced_ptes, referenced_page; 632 int referenced_ptes, referenced_page;
708 unsigned long vm_flags; 633 unsigned long vm_flags;
709 634
710 referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); 635 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
636 &vm_flags);
711 referenced_page = TestClearPageReferenced(page); 637 referenced_page = TestClearPageReferenced(page);
712 638
713 /* Lumpy reclaim - ignore references */
714 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
715 return PAGEREF_RECLAIM;
716
717 /* 639 /*
718 * Mlock lost the isolation race with us. Let try_to_unmap() 640 * Mlock lost the isolation race with us. Let try_to_unmap()
719 * move the page to the unevictable list. 641 * move the page to the unevictable list.
@@ -722,7 +644,7 @@ static enum page_references page_check_references(struct page *page,
722 return PAGEREF_RECLAIM; 644 return PAGEREF_RECLAIM;
723 645
724 if (referenced_ptes) { 646 if (referenced_ptes) {
725 if (PageAnon(page)) 647 if (PageSwapBacked(page))
726 return PAGEREF_ACTIVATE; 648 return PAGEREF_ACTIVATE;
727 /* 649 /*
728 * All mapped pages start out with page table 650 * All mapped pages start out with page table
@@ -813,19 +735,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
813 735
814 if (PageWriteback(page)) { 736 if (PageWriteback(page)) {
815 nr_writeback++; 737 nr_writeback++;
816 /* 738 unlock_page(page);
817 * Synchronous reclaim cannot queue pages for 739 goto keep;
818 * writeback due to the possibility of stack overflow
819 * but if it encounters a page under writeback, wait
820 * for the IO to complete.
821 */
822 if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
823 may_enter_fs)
824 wait_on_page_writeback(page);
825 else {
826 unlock_page(page);
827 goto keep_lumpy;
828 }
829 } 740 }
830 741
831 references = page_check_references(page, mz, sc); 742 references = page_check_references(page, mz, sc);
@@ -908,7 +819,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
908 goto activate_locked; 819 goto activate_locked;
909 case PAGE_SUCCESS: 820 case PAGE_SUCCESS:
910 if (PageWriteback(page)) 821 if (PageWriteback(page))
911 goto keep_lumpy; 822 goto keep;
912 if (PageDirty(page)) 823 if (PageDirty(page))
913 goto keep; 824 goto keep;
914 825
@@ -994,7 +905,6 @@ cull_mlocked:
994 try_to_free_swap(page); 905 try_to_free_swap(page);
995 unlock_page(page); 906 unlock_page(page);
996 putback_lru_page(page); 907 putback_lru_page(page);
997 reset_reclaim_mode(sc);
998 continue; 908 continue;
999 909
1000activate_locked: 910activate_locked:
@@ -1007,8 +917,6 @@ activate_locked:
1007keep_locked: 917keep_locked:
1008 unlock_page(page); 918 unlock_page(page);
1009keep: 919keep:
1010 reset_reclaim_mode(sc);
1011keep_lumpy:
1012 list_add(&page->lru, &ret_pages); 920 list_add(&page->lru, &ret_pages);
1013 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 921 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
1014 } 922 }
@@ -1064,11 +972,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
1064 if (!all_lru_mode && !!page_is_file_cache(page) != file) 972 if (!all_lru_mode && !!page_is_file_cache(page) != file)
1065 return ret; 973 return ret;
1066 974
1067 /* 975 /* Do not give back unevictable pages for compaction */
1068 * When this function is being called for lumpy reclaim, we
1069 * initially look into all LRU pages, active, inactive and
1070 * unevictable; only give shrink_page_list evictable pages.
1071 */
1072 if (PageUnevictable(page)) 976 if (PageUnevictable(page))
1073 return ret; 977 return ret;
1074 978
@@ -1153,9 +1057,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1153 struct lruvec *lruvec; 1057 struct lruvec *lruvec;
1154 struct list_head *src; 1058 struct list_head *src;
1155 unsigned long nr_taken = 0; 1059 unsigned long nr_taken = 0;
1156 unsigned long nr_lumpy_taken = 0;
1157 unsigned long nr_lumpy_dirty = 0;
1158 unsigned long nr_lumpy_failed = 0;
1159 unsigned long scan; 1060 unsigned long scan;
1160 int lru = LRU_BASE; 1061 int lru = LRU_BASE;
1161 1062
@@ -1168,10 +1069,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1168 1069
1169 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { 1070 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
1170 struct page *page; 1071 struct page *page;
1171 unsigned long pfn;
1172 unsigned long end_pfn;
1173 unsigned long page_pfn;
1174 int zone_id;
1175 1072
1176 page = lru_to_page(src); 1073 page = lru_to_page(src);
1177 prefetchw_prev_lru_page(page, src, flags); 1074 prefetchw_prev_lru_page(page, src, flags);
@@ -1193,84 +1090,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1193 default: 1090 default:
1194 BUG(); 1091 BUG();
1195 } 1092 }
1196
1197 if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
1198 continue;
1199
1200 /*
1201 * Attempt to take all pages in the order aligned region
1202 * surrounding the tag page. Only take those pages of
1203 * the same active state as that tag page. We may safely
1204 * round the target page pfn down to the requested order
1205 * as the mem_map is guaranteed valid out to MAX_ORDER,
1206 * where that page is in a different zone we will detect
1207 * it from its zone id and abort this block scan.
1208 */
1209 zone_id = page_zone_id(page);
1210 page_pfn = page_to_pfn(page);
1211 pfn = page_pfn & ~((1 << sc->order) - 1);
1212 end_pfn = pfn + (1 << sc->order);
1213 for (; pfn < end_pfn; pfn++) {
1214 struct page *cursor_page;
1215
1216 /* The target page is in the block, ignore it. */
1217 if (unlikely(pfn == page_pfn))
1218 continue;
1219
1220 /* Avoid holes within the zone. */
1221 if (unlikely(!pfn_valid_within(pfn)))
1222 break;
1223
1224 cursor_page = pfn_to_page(pfn);
1225
1226 /* Check that we have not crossed a zone boundary. */
1227 if (unlikely(page_zone_id(cursor_page) != zone_id))
1228 break;
1229
1230 /*
1231 * If we don't have enough swap space, reclaiming of
1232 * anon page which don't already have a swap slot is
1233 * pointless.
1234 */
1235 if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
1236 !PageSwapCache(cursor_page))
1237 break;
1238
1239 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
1240 unsigned int isolated_pages;
1241
1242 mem_cgroup_lru_del(cursor_page);
1243 list_move(&cursor_page->lru, dst);
1244 isolated_pages = hpage_nr_pages(cursor_page);
1245 nr_taken += isolated_pages;
1246 nr_lumpy_taken += isolated_pages;
1247 if (PageDirty(cursor_page))
1248 nr_lumpy_dirty += isolated_pages;
1249 scan++;
1250 pfn += isolated_pages - 1;
1251 } else {
1252 /*
1253 * Check if the page is freed already.
1254 *
1255 * We can't use page_count() as that
1256 * requires compound_head and we don't
1257 * have a pin on the page here. If a
1258 * page is tail, we may or may not
1259 * have isolated the head, so assume
1260 * it's not free, it'd be tricky to
1261 * track the head status without a
1262 * page pin.
1263 */
1264 if (!PageTail(cursor_page) &&
1265 !atomic_read(&cursor_page->_count))
1266 continue;
1267 break;
1268 }
1269 }
1270
1271 /* If we break out of the loop above, lumpy reclaim failed */
1272 if (pfn < end_pfn)
1273 nr_lumpy_failed++;
1274 } 1093 }
1275 1094
1276 *nr_scanned = scan; 1095 *nr_scanned = scan;
@@ -1278,7 +1097,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1278 trace_mm_vmscan_lru_isolate(sc->order, 1097 trace_mm_vmscan_lru_isolate(sc->order,
1279 nr_to_scan, scan, 1098 nr_to_scan, scan,
1280 nr_taken, 1099 nr_taken,
1281 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
1282 mode, file); 1100 mode, file);
1283 return nr_taken; 1101 return nr_taken;
1284} 1102}
@@ -1454,47 +1272,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
1454} 1272}
1455 1273
1456/* 1274/*
1457 * Returns true if a direct reclaim should wait on pages under writeback.
1458 *
1459 * If we are direct reclaiming for contiguous pages and we do not reclaim
1460 * everything in the list, try again and wait for writeback IO to complete.
1461 * This will stall high-order allocations noticeably. Only do that when really
1462 * need to free the pages under high memory pressure.
1463 */
1464static inline bool should_reclaim_stall(unsigned long nr_taken,
1465 unsigned long nr_freed,
1466 int priority,
1467 struct scan_control *sc)
1468{
1469 int lumpy_stall_priority;
1470
1471 /* kswapd should not stall on sync IO */
1472 if (current_is_kswapd())
1473 return false;
1474
1475 /* Only stall on lumpy reclaim */
1476 if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
1477 return false;
1478
1479 /* If we have reclaimed everything on the isolated list, no stall */
1480 if (nr_freed == nr_taken)
1481 return false;
1482
1483 /*
1484 * For high-order allocations, there are two stall thresholds.
1485 * High-cost allocations stall immediately where as lower
1486 * order allocations such as stacks require the scanning
1487 * priority to be much higher before stalling.
1488 */
1489 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1490 lumpy_stall_priority = DEF_PRIORITY;
1491 else
1492 lumpy_stall_priority = DEF_PRIORITY / 3;
1493
1494 return priority <= lumpy_stall_priority;
1495}
1496
1497/*
1498 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1275 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
1499 * of reclaimed pages 1276 * of reclaimed pages
1500 */ 1277 */
@@ -1522,10 +1299,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1522 return SWAP_CLUSTER_MAX; 1299 return SWAP_CLUSTER_MAX;
1523 } 1300 }
1524 1301
1525 set_reclaim_mode(priority, sc, false);
1526 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
1527 isolate_mode |= ISOLATE_ACTIVE;
1528
1529 lru_add_drain(); 1302 lru_add_drain();
1530 1303
1531 if (!sc->may_unmap) 1304 if (!sc->may_unmap)
@@ -1556,13 +1329,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1556 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, 1329 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1557 &nr_dirty, &nr_writeback); 1330 &nr_dirty, &nr_writeback);
1558 1331
1559 /* Check if we should syncronously wait for writeback */
1560 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1561 set_reclaim_mode(priority, sc, true);
1562 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1563 priority, &nr_dirty, &nr_writeback);
1564 }
1565
1566 spin_lock_irq(&zone->lru_lock); 1332 spin_lock_irq(&zone->lru_lock);
1567 1333
1568 reclaim_stat->recent_scanned[0] += nr_anon; 1334 reclaim_stat->recent_scanned[0] += nr_anon;
@@ -1616,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1616 zone_idx(zone), 1382 zone_idx(zone),
1617 nr_scanned, nr_reclaimed, 1383 nr_scanned, nr_reclaimed,
1618 priority, 1384 priority,
1619 trace_shrink_flags(file, sc->reclaim_mode)); 1385 trace_shrink_flags(file));
1620 return nr_reclaimed; 1386 return nr_reclaimed;
1621} 1387}
1622 1388
@@ -1695,8 +1461,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
1695 1461
1696 lru_add_drain(); 1462 lru_add_drain();
1697 1463
1698 reset_reclaim_mode(sc);
1699
1700 if (!sc->may_unmap) 1464 if (!sc->may_unmap)
1701 isolate_mode |= ISOLATE_UNMAPPED; 1465 isolate_mode |= ISOLATE_UNMAPPED;
1702 if (!sc->may_writepage) 1466 if (!sc->may_writepage)
@@ -1737,7 +1501,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
1737 } 1501 }
1738 } 1502 }
1739 1503
1740 if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { 1504 if (page_referenced(page, 0, sc->target_mem_cgroup,
1505 &vm_flags)) {
1741 nr_rotated += hpage_nr_pages(page); 1506 nr_rotated += hpage_nr_pages(page);
1742 /* 1507 /*
1743 * Identify referenced, file-backed active pages and 1508 * Identify referenced, file-backed active pages and
@@ -1811,7 +1576,7 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1811 if (!total_swap_pages) 1576 if (!total_swap_pages)
1812 return 0; 1577 return 0;
1813 1578
1814 if (!scanning_global_lru(mz)) 1579 if (!mem_cgroup_disabled())
1815 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, 1580 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
1816 mz->zone); 1581 mz->zone);
1817 1582
@@ -1850,7 +1615,7 @@ static int inactive_file_is_low_global(struct zone *zone)
1850 */ 1615 */
1851static int inactive_file_is_low(struct mem_cgroup_zone *mz) 1616static int inactive_file_is_low(struct mem_cgroup_zone *mz)
1852{ 1617{
1853 if (!scanning_global_lru(mz)) 1618 if (!mem_cgroup_disabled())
1854 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, 1619 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
1855 mz->zone); 1620 mz->zone);
1856 1621
@@ -1984,10 +1749,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1984 * proportional to the fraction of recently scanned pages on 1749 * proportional to the fraction of recently scanned pages on
1985 * each list that were recently referenced and in active use. 1750 * each list that were recently referenced and in active use.
1986 */ 1751 */
1987 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); 1752 ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
1988 ap /= reclaim_stat->recent_rotated[0] + 1; 1753 ap /= reclaim_stat->recent_rotated[0] + 1;
1989 1754
1990 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); 1755 fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
1991 fp /= reclaim_stat->recent_rotated[1] + 1; 1756 fp /= reclaim_stat->recent_rotated[1] + 1;
1992 spin_unlock_irq(&mz->zone->lru_lock); 1757 spin_unlock_irq(&mz->zone->lru_lock);
1993 1758
@@ -2000,7 +1765,7 @@ out:
2000 unsigned long scan; 1765 unsigned long scan;
2001 1766
2002 scan = zone_nr_lru_pages(mz, lru); 1767 scan = zone_nr_lru_pages(mz, lru);
2003 if (priority || noswap) { 1768 if (priority || noswap || !vmscan_swappiness(mz, sc)) {
2004 scan >>= priority; 1769 scan >>= priority;
2005 if (!scan && force_scan) 1770 if (!scan && force_scan)
2006 scan = SWAP_CLUSTER_MAX; 1771 scan = SWAP_CLUSTER_MAX;
@@ -2010,23 +1775,35 @@ out:
2010 } 1775 }
2011} 1776}
2012 1777
1778/* Use reclaim/compaction for costly allocs or under memory pressure */
1779static bool in_reclaim_compaction(int priority, struct scan_control *sc)
1780{
1781 if (COMPACTION_BUILD && sc->order &&
1782 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
1783 priority < DEF_PRIORITY - 2))
1784 return true;
1785
1786 return false;
1787}
1788
2013/* 1789/*
2014 * Reclaim/compaction depends on a number of pages being freed. To avoid 1790 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2015 * disruption to the system, a small number of order-0 pages continue to be 1791 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2016 * rotated and reclaimed in the normal fashion. However, by the time we get 1792 * true if more pages should be reclaimed such that when the page allocator
2017 * back to the allocator and call try_to_compact_zone(), we ensure that 1793 * calls try_to_compact_zone() that it will have enough free pages to succeed.
2018 * there are enough free pages for it to be likely successful 1794 * It will give up earlier than that if there is difficulty reclaiming pages.
2019 */ 1795 */
2020static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, 1796static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
2021 unsigned long nr_reclaimed, 1797 unsigned long nr_reclaimed,
2022 unsigned long nr_scanned, 1798 unsigned long nr_scanned,
1799 int priority,
2023 struct scan_control *sc) 1800 struct scan_control *sc)
2024{ 1801{
2025 unsigned long pages_for_compaction; 1802 unsigned long pages_for_compaction;
2026 unsigned long inactive_lru_pages; 1803 unsigned long inactive_lru_pages;
2027 1804
2028 /* If not in reclaim/compaction mode, stop */ 1805 /* If not in reclaim/compaction mode, stop */
2029 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1806 if (!in_reclaim_compaction(priority, sc))
2030 return false; 1807 return false;
2031 1808
2032 /* Consider stopping depending on scan and reclaim activity */ 1809 /* Consider stopping depending on scan and reclaim activity */
@@ -2128,7 +1905,8 @@ restart:
2128 1905
2129 /* reclaim/compaction might need reclaim to continue */ 1906 /* reclaim/compaction might need reclaim to continue */
2130 if (should_continue_reclaim(mz, nr_reclaimed, 1907 if (should_continue_reclaim(mz, nr_reclaimed,
2131 sc->nr_scanned - nr_scanned, sc)) 1908 sc->nr_scanned - nr_scanned,
1909 priority, sc))
2132 goto restart; 1910 goto restart;
2133 1911
2134 throttle_vm_writeout(sc->gfp_mask); 1912 throttle_vm_writeout(sc->gfp_mask);
@@ -2353,8 +2131,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2353 2131
2354 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2132 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2355 sc->nr_scanned = 0; 2133 sc->nr_scanned = 0;
2356 if (!priority)
2357 disable_swap_token(sc->target_mem_cgroup);
2358 aborted_reclaim = shrink_zones(priority, zonelist, sc); 2134 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2359 2135
2360 /* 2136 /*
@@ -2705,10 +2481,6 @@ loop_again:
2705 unsigned long lru_pages = 0; 2481 unsigned long lru_pages = 0;
2706 int has_under_min_watermark_zone = 0; 2482 int has_under_min_watermark_zone = 0;
2707 2483
2708 /* The swap token gets in the way of swapout... */
2709 if (!priority)
2710 disable_swap_token(NULL);
2711
2712 all_zones_ok = 1; 2484 all_zones_ok = 1;
2713 balanced = 0; 2485 balanced = 0;
2714 2486
@@ -3537,7 +3309,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
3537 if (mapping_unevictable(page_mapping(page))) 3309 if (mapping_unevictable(page_mapping(page)))
3538 return 0; 3310 return 0;
3539 3311
3540 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) 3312 if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
3541 return 0; 3313 return 0;
3542 3314
3543 return 1; 3315 return 1;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0dad31dc1618..1bbbbd9776ad 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1223,7 +1223,6 @@ module_init(setup_vmstat)
1223#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1223#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1224#include <linux/debugfs.h> 1224#include <linux/debugfs.h>
1225 1225
1226static struct dentry *extfrag_debug_root;
1227 1226
1228/* 1227/*
1229 * Return an index indicating how much of the available free memory is 1228 * Return an index indicating how much of the available free memory is
@@ -1361,19 +1360,24 @@ static const struct file_operations extfrag_file_ops = {
1361 1360
1362static int __init extfrag_debug_init(void) 1361static int __init extfrag_debug_init(void)
1363{ 1362{
1363 struct dentry *extfrag_debug_root;
1364
1364 extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 1365 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1365 if (!extfrag_debug_root) 1366 if (!extfrag_debug_root)
1366 return -ENOMEM; 1367 return -ENOMEM;
1367 1368
1368 if (!debugfs_create_file("unusable_index", 0444, 1369 if (!debugfs_create_file("unusable_index", 0444,
1369 extfrag_debug_root, NULL, &unusable_file_ops)) 1370 extfrag_debug_root, NULL, &unusable_file_ops))
1370 return -ENOMEM; 1371 goto fail;
1371 1372
1372 if (!debugfs_create_file("extfrag_index", 0444, 1373 if (!debugfs_create_file("extfrag_index", 0444,
1373 extfrag_debug_root, NULL, &extfrag_file_ops)) 1374 extfrag_debug_root, NULL, &extfrag_file_ops))
1374 return -ENOMEM; 1375 goto fail;
1375 1376
1376 return 0; 1377 return 0;
1378fail:
1379 debugfs_remove_recursive(extfrag_debug_root);
1380 return -ENOMEM;
1377} 1381}
1378 1382
1379module_init(extfrag_debug_init); 1383module_init(extfrag_debug_init);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7fee13b331d1..f56f045778ae 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1286,6 +1286,8 @@ call_reserveresult(struct rpc_task *task)
1286 } 1286 }
1287 1287
1288 switch (status) { 1288 switch (status) {
1289 case -ENOMEM:
1290 rpc_delay(task, HZ >> 2);
1289 case -EAGAIN: /* woken up; retry */ 1291 case -EAGAIN: /* woken up; retry */
1290 task->tk_action = call_reserve; 1292 task->tk_action = call_reserve;
1291 return; 1293 return;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fd2423991c2d..04040476082e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -120,7 +120,7 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall);
120 120
121/** 121/**
122 * rpc_queue_upcall - queue an upcall message to userspace 122 * rpc_queue_upcall - queue an upcall message to userspace
123 * @inode: inode of upcall pipe on which to queue given message 123 * @pipe: upcall pipe on which to queue given message
124 * @msg: message to queue 124 * @msg: message to queue
125 * 125 *
126 * Call with an @inode created by rpc_mkpipe() to queue an upcall. 126 * Call with an @inode created by rpc_mkpipe() to queue an upcall.
@@ -819,9 +819,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
819 * @parent: dentry of directory to create new "pipe" in 819 * @parent: dentry of directory to create new "pipe" in
820 * @name: name of pipe 820 * @name: name of pipe
821 * @private: private data to associate with the pipe, for the caller's use 821 * @private: private data to associate with the pipe, for the caller's use
822 * @ops: operations defining the behavior of the pipe: upcall, downcall, 822 * @pipe: &rpc_pipe containing input parameters
823 * release_pipe, open_pipe, and destroy_msg.
824 * @flags: rpc_pipe flags
825 * 823 *
826 * Data is made available for userspace to read by calls to 824 * Data is made available for userspace to read by calls to
827 * rpc_queue_upcall(). The actual reads will result in calls to 825 * rpc_queue_upcall(). The actual reads will result in calls to
@@ -943,7 +941,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
943 941
944/** 942/**
945 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() 943 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
946 * @clnt: rpc client 944 * @dentry: dentry for the pipe
947 */ 945 */
948int rpc_remove_client_dir(struct dentry *dentry) 946int rpc_remove_client_dir(struct dentry *dentry)
949{ 947{
@@ -1115,7 +1113,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1115 sb->s_op = &s_ops; 1113 sb->s_op = &s_ops;
1116 sb->s_time_gran = 1; 1114 sb->s_time_gran = 1;
1117 1115
1118 inode = rpc_get_inode(sb, S_IFDIR | 0755); 1116 inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
1119 sb->s_root = root = d_make_root(inode); 1117 sb->s_root = root = d_make_root(inode);
1120 if (!root) 1118 if (!root)
1121 return -ENOMEM; 1119 return -ENOMEM;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 78ac39fd9fe7..3c0653439f3d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -394,6 +394,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
394 394
395/** 395/**
396 * rpcb_register - set or unset a port registration with the local rpcbind svc 396 * rpcb_register - set or unset a port registration with the local rpcbind svc
397 * @net: target network namespace
397 * @prog: RPC program number to bind 398 * @prog: RPC program number to bind
398 * @vers: RPC version number to bind 399 * @vers: RPC version number to bind
399 * @prot: transport protocol to register 400 * @prot: transport protocol to register
@@ -521,6 +522,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
521 522
522/** 523/**
523 * rpcb_v4_register - set or unset a port registration with the local rpcbind 524 * rpcb_v4_register - set or unset a port registration with the local rpcbind
525 * @net: target network namespace
524 * @program: RPC program number of service to (un)register 526 * @program: RPC program number of service to (un)register
525 * @version: RPC version number of service to (un)register 527 * @version: RPC version number of service to (un)register
526 * @address: address family, IP address, and port to (un)register 528 * @address: address family, IP address, and port to (un)register
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6fe2dcead150..3c83035cdaa9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -979,20 +979,21 @@ static void xprt_alloc_slot(struct rpc_task *task)
979 list_del(&req->rq_list); 979 list_del(&req->rq_list);
980 goto out_init_req; 980 goto out_init_req;
981 } 981 }
982 req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); 982 req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
983 if (!IS_ERR(req)) 983 if (!IS_ERR(req))
984 goto out_init_req; 984 goto out_init_req;
985 switch (PTR_ERR(req)) { 985 switch (PTR_ERR(req)) {
986 case -ENOMEM: 986 case -ENOMEM:
987 rpc_delay(task, HZ >> 2);
988 dprintk("RPC: dynamic allocation of request slot " 987 dprintk("RPC: dynamic allocation of request slot "
989 "failed! Retrying\n"); 988 "failed! Retrying\n");
989 task->tk_status = -ENOMEM;
990 break; 990 break;
991 case -EAGAIN: 991 case -EAGAIN:
992 rpc_sleep_on(&xprt->backlog, task, NULL); 992 rpc_sleep_on(&xprt->backlog, task, NULL);
993 dprintk("RPC: waiting for request slot\n"); 993 dprintk("RPC: waiting for request slot\n");
994 default:
995 task->tk_status = -EAGAIN;
994 } 996 }
995 task->tk_status = -EAGAIN;
996 return; 997 return;
997out_init_req: 998out_init_req:
998 task->tk_status = 0; 999 task->tk_status = 0;
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 7dab7b25b5c6..f576971f6556 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -35,6 +35,7 @@
35#include <sys/mount.h> 35#include <sys/mount.h>
36#include <sys/statfs.h> 36#include <sys/statfs.h>
37#include "../../include/linux/magic.h" 37#include "../../include/linux/magic.h"
38#include "../../include/linux/kernel-page-flags.h"
38 39
39 40
40#ifndef MAX_PATH 41#ifndef MAX_PATH
@@ -73,33 +74,6 @@
73#define KPF_BYTES 8 74#define KPF_BYTES 8
74#define PROC_KPAGEFLAGS "/proc/kpageflags" 75#define PROC_KPAGEFLAGS "/proc/kpageflags"
75 76
76/* copied from kpageflags_read() */
77#define KPF_LOCKED 0
78#define KPF_ERROR 1
79#define KPF_REFERENCED 2
80#define KPF_UPTODATE 3
81#define KPF_DIRTY 4
82#define KPF_LRU 5
83#define KPF_ACTIVE 6
84#define KPF_SLAB 7
85#define KPF_WRITEBACK 8
86#define KPF_RECLAIM 9
87#define KPF_BUDDY 10
88
89/* [11-20] new additions in 2.6.31 */
90#define KPF_MMAP 11
91#define KPF_ANON 12
92#define KPF_SWAPCACHE 13
93#define KPF_SWAPBACKED 14
94#define KPF_COMPOUND_HEAD 15
95#define KPF_COMPOUND_TAIL 16
96#define KPF_HUGE 17
97#define KPF_UNEVICTABLE 18
98#define KPF_HWPOISON 19
99#define KPF_NOPAGE 20
100#define KPF_KSM 21
101#define KPF_THP 22
102
103/* [32-] kernel hacking assistances */ 77/* [32-] kernel hacking assistances */
104#define KPF_RESERVED 32 78#define KPF_RESERVED 32
105#define KPF_MLOCKED 33 79#define KPF_MLOCKED 33
@@ -326,7 +300,7 @@ static char *page_flag_name(uint64_t flags)
326{ 300{
327 static char buf[65]; 301 static char buf[65];
328 int present; 302 int present;
329 int i, j; 303 size_t i, j;
330 304
331 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { 305 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
332 present = (flags >> i) & 1; 306 present = (flags >> i) & 1;
@@ -344,7 +318,7 @@ static char *page_flag_name(uint64_t flags)
344static char *page_flag_longname(uint64_t flags) 318static char *page_flag_longname(uint64_t flags)
345{ 319{
346 static char buf[1024]; 320 static char buf[1024];
347 int i, n; 321 size_t i, n;
348 322
349 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { 323 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
350 if (!page_flag_names[i]) 324 if (!page_flag_names[i])
@@ -402,7 +376,7 @@ static void show_page(unsigned long voffset,
402 376
403static void show_summary(void) 377static void show_summary(void)
404{ 378{
405 int i; 379 size_t i;
406 380
407 printf(" flags\tpage-count MB" 381 printf(" flags\tpage-count MB"
408 " symbolic-flags\t\t\tlong-symbolic-flags\n"); 382 " symbolic-flags\t\t\tlong-symbolic-flags\n");
@@ -500,7 +474,7 @@ static int debugfs_valid_mountpoint(const char *debugfs)
500/* find the path to the mounted debugfs */ 474/* find the path to the mounted debugfs */
501static const char *debugfs_find_mountpoint(void) 475static const char *debugfs_find_mountpoint(void)
502{ 476{
503 const char **ptr; 477 const char *const *ptr;
504 char type[100]; 478 char type[100];
505 FILE *fp; 479 FILE *fp;
506 480
@@ -537,7 +511,7 @@ static const char *debugfs_find_mountpoint(void)
537 511
538static void debugfs_mount(void) 512static void debugfs_mount(void)
539{ 513{
540 const char **ptr; 514 const char *const *ptr;
541 515
542 /* see if it's already mounted */ 516 /* see if it's already mounted */
543 if (debugfs_find_mountpoint()) 517 if (debugfs_find_mountpoint())
@@ -614,10 +588,10 @@ static int unpoison_page(unsigned long offset)
614 * page frame walker 588 * page frame walker
615 */ 589 */
616 590
617static int hash_slot(uint64_t flags) 591static size_t hash_slot(uint64_t flags)
618{ 592{
619 int k = HASH_KEY(flags); 593 size_t k = HASH_KEY(flags);
620 int i; 594 size_t i;
621 595
622 /* Explicitly reserve slot 0 for flags 0: the following logic 596 /* Explicitly reserve slot 0 for flags 0: the following logic
623 * cannot distinguish an unoccupied slot from slot (flags==0). 597 * cannot distinguish an unoccupied slot from slot (flags==0).
@@ -670,7 +644,7 @@ static void walk_pfn(unsigned long voffset,
670{ 644{
671 uint64_t buf[KPAGEFLAGS_BATCH]; 645 uint64_t buf[KPAGEFLAGS_BATCH];
672 unsigned long batch; 646 unsigned long batch;
673 long pages; 647 unsigned long pages;
674 unsigned long i; 648 unsigned long i;
675 649
676 while (count) { 650 while (count) {
@@ -779,7 +753,7 @@ static const char *page_flag_type(uint64_t flag)
779 753
780static void usage(void) 754static void usage(void)
781{ 755{
782 int i, j; 756 size_t i, j;
783 757
784 printf( 758 printf(
785"page-types [options]\n" 759"page-types [options]\n"
@@ -938,7 +912,7 @@ static void add_bits_filter(uint64_t mask, uint64_t bits)
938 912
939static uint64_t parse_flag_name(const char *str, int len) 913static uint64_t parse_flag_name(const char *str, int len)
940{ 914{
941 int i; 915 size_t i;
942 916
943 if (!*str || !len) 917 if (!*str || !len)
944 return 0; 918 return 0;