aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss12
-rw-r--r--Documentation/DocBook/Makefile1
-rw-r--r--Documentation/DocBook/rapidio.tmpl1
-rw-r--r--Documentation/development-process/1.Intro18
-rw-r--r--Documentation/development-process/2.Process177
-rw-r--r--Documentation/development-process/3.Early-stage31
-rw-r--r--Documentation/development-process/4.Coding21
-rw-r--r--Documentation/development-process/5.Posting28
-rw-r--r--Documentation/development-process/6.Followthrough16
-rw-r--r--Documentation/development-process/7.AdvancedTopics4
-rw-r--r--Documentation/dynamic-debug-howto.txt4
-rw-r--r--Documentation/hwmon/f71882fg19
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/arm/mach-omap2/board-omap4panda.c28
-rw-r--r--arch/arm/mach-omap2/devices.c2
-rw-r--r--arch/arm/mach-omap2/gpmc.c13
-rw-r--r--arch/arm/mach-omap2/omap_l3_smx.c11
-rw-r--r--arch/arm/plat-omap/include/plat/irqs.h2
-rw-r--r--arch/arm/plat-omap/include/plat/onenand.h1
-rw-r--r--arch/arm/plat-pxa/include/plat/pxa3xx_nand.h2
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/cris/arch-v10/drivers/axisflashmap.c6
-rw-r--r--arch/cris/arch-v32/drivers/Kconfig1
-rw-r--r--arch/cris/arch-v32/drivers/axisflashmap.c6
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S14
-rw-r--r--arch/x86/platform/olpc/olpc-xo1.c23
-rw-r--r--drivers/block/cciss.c86
-rw-r--r--drivers/block/cciss.h1
-rw-r--r--drivers/block/cciss_cmd.h1
-rw-r--r--drivers/block/cciss_scsi.c13
-rw-r--r--drivers/block/drbd/drbd_actlog.c335
-rw-r--r--drivers/block/drbd/drbd_bitmap.c752
-rw-r--r--drivers/block/drbd/drbd_int.h270
-rw-r--r--drivers/block/drbd/drbd_main.c673
-rw-r--r--drivers/block/drbd/drbd_nl.c183
-rw-r--r--drivers/block/drbd/drbd_proc.c114
-rw-r--r--drivers/block/drbd/drbd_receiver.c608
-rw-r--r--drivers/block/drbd/drbd_req.c169
-rw-r--r--drivers/block/drbd/drbd_req.h36
-rw-r--r--drivers/block/drbd/drbd_strings.c6
-rw-r--r--drivers/block/drbd/drbd_worker.c360
-rw-r--r--drivers/block/drbd/drbd_wrappers.h2
-rw-r--r--drivers/hwmon/Kconfig17
-rw-r--r--drivers/hwmon/f71882fg.c126
-rw-r--r--drivers/hwmon/pmbus_core.c70
-rw-r--r--drivers/hwspinlock/Kconfig1
-rw-r--r--drivers/ide/ide-io.c12
-rw-r--r--drivers/mfd/88pm860x-core.c11
-rw-r--r--drivers/mfd/Kconfig1
-rw-r--r--drivers/mfd/Makefile2
-rw-r--r--drivers/mfd/ab3550-core.c12
-rw-r--r--drivers/mfd/ab8500-core.c12
-rw-r--r--drivers/mfd/asic3.c38
-rw-r--r--drivers/mfd/cs5535-mfd.c16
-rw-r--r--drivers/mfd/ezx-pcap.c34
-rw-r--r--drivers/mfd/htc-egpio.c23
-rw-r--r--drivers/mfd/htc-i2cpld.c33
-rw-r--r--drivers/mfd/jz4740-adc.c16
-rw-r--r--drivers/mfd/max8925-core.c10
-rw-r--r--drivers/mfd/max8997-irq.c377
-rw-r--r--drivers/mfd/max8998-irq.c8
-rw-r--r--drivers/mfd/max8998.c4
-rw-r--r--drivers/mfd/mfd-core.c53
-rw-r--r--drivers/mfd/pcf50633-core.c2
-rw-r--r--drivers/mfd/rdc321x-southbridge.c1
-rw-r--r--drivers/mfd/stmpe.c12
-rw-r--r--drivers/mfd/t7l66xb.c21
-rw-r--r--drivers/mfd/tc3589x.c12
-rw-r--r--drivers/mfd/tc6393xb.c21
-rw-r--r--drivers/mfd/tps6586x.c6
-rw-r--r--drivers/mfd/twl4030-irq.c66
-rw-r--r--drivers/mfd/twl6030-irq.c25
-rw-r--r--drivers/mfd/wl1273-core.c2
-rw-r--r--drivers/mfd/wm831x-irq.c8
-rw-r--r--drivers/mfd/wm8350-irq.c8
-rw-r--r--drivers/mfd/wm8994-irq.c8
-rw-r--r--drivers/mtd/Kconfig18
-rw-r--r--drivers/mtd/Makefile4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c2
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c3
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0020.c2
-rw-r--r--drivers/mtd/devices/m25p80.c5
-rw-r--r--drivers/mtd/devices/mtdram.c1
-rw-r--r--drivers/mtd/devices/phram.c3
-rw-r--r--drivers/mtd/maps/Kconfig13
-rw-r--r--drivers/mtd/maps/Makefile1
-rw-r--r--drivers/mtd/maps/ceiva.c6
-rw-r--r--drivers/mtd/maps/integrator-flash.c10
-rw-r--r--drivers/mtd/maps/latch-addr-flash.c272
-rw-r--r--drivers/mtd/maps/physmap.c8
-rw-r--r--drivers/mtd/maps/physmap_of.c8
-rw-r--r--drivers/mtd/maps/sa1100-flash.c8
-rw-r--r--drivers/mtd/maps/ts5500_flash.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c42
-rw-r--r--drivers/mtd/mtdconcat.c8
-rw-r--r--drivers/mtd/mtdcore.c6
-rw-r--r--drivers/mtd/mtdswap.c1587
-rw-r--r--drivers/mtd/nand/Kconfig15
-rw-r--r--drivers/mtd/nand/Makefile1
-rw-r--r--drivers/mtd/nand/atmel_nand.c166
-rw-r--r--drivers/mtd/nand/davinci_nand.c3
-rw-r--r--drivers/mtd/nand/mpc5121_nfc.c5
-rw-r--r--drivers/mtd/nand/mxc_nand.c31
-rw-r--r--drivers/mtd/nand/nand_base.c42
-rw-r--r--drivers/mtd/nand/nand_bbt.c8
-rw-r--r--drivers/mtd/nand/nand_bch.c243
-rw-r--r--drivers/mtd/nand/nandsim.c43
-rw-r--r--drivers/mtd/nand/omap2.c16
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c977
-rw-r--r--drivers/mtd/onenand/omap2.c7
-rw-r--r--drivers/mtd/onenand/onenand_base.c15
-rw-r--r--drivers/mtd/sm_ftl.c18
-rw-r--r--drivers/mtd/tests/mtd_speedtest.c80
-rw-r--r--drivers/mtd/tests/mtd_subpagetest.c10
-rw-r--r--drivers/staging/westbridge/astoria/block/cyasblkdev_block.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--include/linux/bch.h79
-rw-r--r--include/linux/drbd.h23
-rw-r--r--include/linux/drbd_limits.h12
-rw-r--r--include/linux/drbd_nl.h13
-rw-r--r--include/linux/drbd_tag_magic.h1
-rw-r--r--include/linux/mfd/core.h27
-rw-r--r--include/linux/mfd/max8997-private.h21
-rw-r--r--include/linux/mfd/max8997.h7
-rw-r--r--include/linux/mtd/blktrans.h3
-rw-r--r--include/linux/mtd/cfi.h1
-rw-r--r--include/linux/mtd/latch-addr-flash.h29
-rw-r--r--include/linux/mtd/nand.h3
-rw-r--r--include/linux/mtd/nand_bch.h72
-rw-r--r--include/linux/mtd/onenand.h1
-rw-r--r--include/sound/pcm.h4
-rw-r--r--ipc/util.c4
-rw-r--r--lib/Kconfig39
-rw-r--r--lib/Makefile1
-rw-r--r--lib/bch.c1368
-rw-r--r--mm/memory.c2
-rw-r--r--sound/core/init.c4
-rw-r--r--sound/core/pcm_native.c9
-rw-r--r--sound/oss/dev_table.h2
-rw-r--r--sound/oss/midi_synth.c30
-rw-r--r--sound/oss/midi_synth.h2
-rw-r--r--sound/oss/opl3.c23
-rw-r--r--sound/oss/sequencer.c2
-rw-r--r--sound/pci/asihpi/asihpi.c137
-rw-r--r--sound/pci/hda/patch_analog.c89
-rw-r--r--sound/pci/hda/patch_realtek.c2
-rw-r--r--sound/usb/quirks-table.h40
150 files changed, 8144 insertions, 2737 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 4f29e5f1ebfa..f5bb0a3bb8c0 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -59,3 +59,15 @@ Kernel Version: 2.6.31
59Contact: iss_storagedev@hp.com 59Contact: iss_storagedev@hp.com
60Description: Displays the usage count (number of opens) of logical drive Y 60Description: Displays the usage count (number of opens) of logical drive Y
61 of controller X. 61 of controller X.
62
63Where: /sys/bus/pci/devices/<dev>/ccissX/resettable
64Date: February 2011
65Kernel Version: 2.6.38
66Contact: iss_storagedev@hp.com
67Description: Value of 1 indicates the controller can honor the reset_devices
68 kernel parameter. Value of 0 indicates reset_devices cannot be
69 honored. This is to allow, for example, kexec tools to be able
70 to warn the user if they designate an unresettable device as
71 a dump device, as kdump requires resetting the device in order
72 to work reliably.
73
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 2deb069aedf1..8436b018c289 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -55,7 +55,6 @@ mandocs: $(MAN)
55build_images = mkdir -p $(objtree)/Documentation/DocBook/media/ && \ 55build_images = mkdir -p $(objtree)/Documentation/DocBook/media/ && \
56 cp $(srctree)/Documentation/DocBook/dvb/*.png \ 56 cp $(srctree)/Documentation/DocBook/dvb/*.png \
57 $(srctree)/Documentation/DocBook/v4l/*.gif \ 57 $(srctree)/Documentation/DocBook/v4l/*.gif \
58 $(srctree)/Documentation/DocBook/v4l/*.png \
59 $(objtree)/Documentation/DocBook/media/ 58 $(objtree)/Documentation/DocBook/media/
60 59
61xmldoclinks: 60xmldoclinks:
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
index 54eb26b57372..50479360d845 100644
--- a/Documentation/DocBook/rapidio.tmpl
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -133,7 +133,6 @@
133!Idrivers/rapidio/rio-sysfs.c 133!Idrivers/rapidio/rio-sysfs.c
134 </sect1> 134 </sect1>
135 <sect1 id="PPC32_support"><title>PPC32 support</title> 135 <sect1 id="PPC32_support"><title>PPC32 support</title>
136!Earch/powerpc/sysdev/fsl_rio.c
137!Iarch/powerpc/sysdev/fsl_rio.c 136!Iarch/powerpc/sysdev/fsl_rio.c
138 </sect1> 137 </sect1>
139 </chapter> 138 </chapter>
diff --git a/Documentation/development-process/1.Intro b/Documentation/development-process/1.Intro
index 8cc2cba2b10d..9b614480aa84 100644
--- a/Documentation/development-process/1.Intro
+++ b/Documentation/development-process/1.Intro
@@ -56,13 +56,13 @@ information on kernel development.
56 56
571.2: WHAT THIS DOCUMENT IS ABOUT 571.2: WHAT THIS DOCUMENT IS ABOUT
58 58
59The Linux kernel, at over 6 million lines of code and well over 1000 active 59The Linux kernel, at over 8 million lines of code and well over 1000
60contributors, is one of the largest and most active free software projects 60contributors to each release, is one of the largest and most active free
61in existence. Since its humble beginning in 1991, this kernel has evolved 61software projects in existence. Since its humble beginning in 1991, this
62into a best-of-breed operating system component which runs on pocket-sized 62kernel has evolved into a best-of-breed operating system component which
63digital music players, desktop PCs, the largest supercomputers in 63runs on pocket-sized digital music players, desktop PCs, the largest
64existence, and all types of systems in between. It is a robust, efficient, 64supercomputers in existence, and all types of systems in between. It is a
65and scalable solution for almost any situation. 65robust, efficient, and scalable solution for almost any situation.
66 66
67With the growth of Linux has come an increase in the number of developers 67With the growth of Linux has come an increase in the number of developers
68(and companies) wishing to participate in its development. Hardware 68(and companies) wishing to participate in its development. Hardware
@@ -115,7 +115,7 @@ This document was written by Jonathan Corbet, corbet@lwn.net. It has been
115improved by comments from Johannes Berg, James Berry, Alex Chiang, Roland 115improved by comments from Johannes Berg, James Berry, Alex Chiang, Roland
116Dreier, Randy Dunlap, Jake Edge, Jiri Kosina, Matt Mackall, Arthur Marsh, 116Dreier, Randy Dunlap, Jake Edge, Jiri Kosina, Matt Mackall, Arthur Marsh,
117Amanda McPherson, Andrew Morton, Andrew Price, Tsugikazu Shibata, and 117Amanda McPherson, Andrew Morton, Andrew Price, Tsugikazu Shibata, and
118Jochen Voß. 118Jochen Voß.
119 119
120This work was supported by the Linux Foundation; thanks especially to 120This work was supported by the Linux Foundation; thanks especially to
121Amanda McPherson, who saw the value of this effort and made it all happen. 121Amanda McPherson, who saw the value of this effort and made it all happen.
@@ -221,7 +221,7 @@ include:
221- Everything that was said above about code review applies doubly to 221- Everything that was said above about code review applies doubly to
222 closed-source code. Since this code is not available at all, it cannot 222 closed-source code. Since this code is not available at all, it cannot
223 have been reviewed by the community and will, beyond doubt, have serious 223 have been reviewed by the community and will, beyond doubt, have serious
224 problems. 224 problems.
225 225
226Makers of embedded systems, in particular, may be tempted to disregard much 226Makers of embedded systems, in particular, may be tempted to disregard much
227of what has been said in this section in the belief that they are shipping 227of what has been said in this section in the belief that they are shipping
diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process
index 911a45186340..4823577c6509 100644
--- a/Documentation/development-process/2.Process
+++ b/Documentation/development-process/2.Process
@@ -14,16 +14,15 @@ The kernel developers use a loosely time-based release process, with a new
14major kernel release happening every two or three months. The recent 14major kernel release happening every two or three months. The recent
15release history looks like this: 15release history looks like this:
16 16
17 2.6.26 July 13, 2008 17 2.6.38 March 14, 2011
18 2.6.25 April 16, 2008 18 2.6.37 January 4, 2011
19 2.6.24 January 24, 2008 19 2.6.36 October 20, 2010
20 2.6.23 October 9, 2007 20 2.6.35 August 1, 2010
21 2.6.22 July 8, 2007 21 2.6.34 May 15, 2010
22 2.6.21 April 25, 2007 22 2.6.33 February 24, 2010
23 2.6.20 February 4, 2007
24 23
25Every 2.6.x release is a major kernel release with new features, internal 24Every 2.6.x release is a major kernel release with new features, internal
26API changes, and more. A typical 2.6 release can contain over 10,000 25API changes, and more. A typical 2.6 release can contain nearly 10,000
27changesets with changes to several hundred thousand lines of code. 2.6 is 26changesets with changes to several hundred thousand lines of code. 2.6 is
28thus the leading edge of Linux kernel development; the kernel uses a 27thus the leading edge of Linux kernel development; the kernel uses a
29rolling development model which is continually integrating major changes. 28rolling development model which is continually integrating major changes.
@@ -42,13 +41,13 @@ merge window do not come out of thin air; they have been collected, tested,
42and staged ahead of time. How that process works will be described in 41and staged ahead of time. How that process works will be described in
43detail later on). 42detail later on).
44 43
45The merge window lasts for two weeks. At the end of this time, Linus 44The merge window lasts for approximately two weeks. At the end of this
46Torvalds will declare that the window is closed and release the first of 45time, Linus Torvalds will declare that the window is closed and release the
47the "rc" kernels. For the kernel which is destined to be 2.6.26, for 46first of the "rc" kernels. For the kernel which is destined to be 2.6.40,
48example, the release which happens at the end of the merge window will be 47for example, the release which happens at the end of the merge window will
49called 2.6.26-rc1. The -rc1 release is the signal that the time to merge 48be called 2.6.40-rc1. The -rc1 release is the signal that the time to
50new features has passed, and that the time to stabilize the next kernel has 49merge new features has passed, and that the time to stabilize the next
51begun. 50kernel has begun.
52 51
53Over the next six to ten weeks, only patches which fix problems should be 52Over the next six to ten weeks, only patches which fix problems should be
54submitted to the mainline. On occasion a more significant change will be 53submitted to the mainline. On occasion a more significant change will be
@@ -66,20 +65,19 @@ will get up to somewhere between -rc6 and -rc9 before the kernel is
66considered to be sufficiently stable and the final 2.6.x release is made. 65considered to be sufficiently stable and the final 2.6.x release is made.
67At that point the whole process starts over again. 66At that point the whole process starts over again.
68 67
69As an example, here is how the 2.6.25 development cycle went (all dates in 68As an example, here is how the 2.6.38 development cycle went (all dates in
702008): 692011):
71 70
72 January 24 2.6.24 stable release 71 January 4 2.6.37 stable release
73 February 10 2.6.25-rc1, merge window closes 72 January 18 2.6.38-rc1, merge window closes
74 February 15 2.6.25-rc2 73 January 21 2.6.38-rc2
75 February 24 2.6.25-rc3 74 February 1 2.6.38-rc3
76 March 4 2.6.25-rc4 75 February 7 2.6.38-rc4
77 March 9 2.6.25-rc5 76 February 15 2.6.38-rc5
78 March 16 2.6.25-rc6 77 February 21 2.6.38-rc6
79 March 25 2.6.25-rc7 78 March 1 2.6.38-rc7
80 April 1 2.6.25-rc8 79 March 7 2.6.38-rc8
81 April 11 2.6.25-rc9 80 March 14 2.6.38 stable release
82 April 16 2.6.25 stable release
83 81
84How do the developers decide when to close the development cycle and create 82How do the developers decide when to close the development cycle and create
85the stable release? The most significant metric used is the list of 83the stable release? The most significant metric used is the list of
@@ -87,7 +85,7 @@ regressions from previous releases. No bugs are welcome, but those which
87break systems which worked in the past are considered to be especially 85break systems which worked in the past are considered to be especially
88serious. For this reason, patches which cause regressions are looked upon 86serious. For this reason, patches which cause regressions are looked upon
89unfavorably and are quite likely to be reverted during the stabilization 87unfavorably and are quite likely to be reverted during the stabilization
90period. 88period.
91 89
92The developers' goal is to fix all known regressions before the stable 90The developers' goal is to fix all known regressions before the stable
93release is made. In the real world, this kind of perfection is hard to 91release is made. In the real world, this kind of perfection is hard to
@@ -99,26 +97,34 @@ kernels go out with a handful of known regressions though, hopefully, none
99of them are serious. 97of them are serious.
100 98
101Once a stable release is made, its ongoing maintenance is passed off to the 99Once a stable release is made, its ongoing maintenance is passed off to the
102"stable team," currently comprised of Greg Kroah-Hartman and Chris Wright. 100"stable team," currently consisting of Greg Kroah-Hartman. The stable team
103The stable team will release occasional updates to the stable release using 101will release occasional updates to the stable release using the 2.6.x.y
104the 2.6.x.y numbering scheme. To be considered for an update release, a 102numbering scheme. To be considered for an update release, a patch must (1)
105patch must (1) fix a significant bug, and (2) already be merged into the 103fix a significant bug, and (2) already be merged into the mainline for the
106mainline for the next development kernel. Continuing our 2.6.25 example, 104next development kernel. Kernels will typically receive stable updates for
107the history (as of this writing) is: 105a little more than one development cycle past their initial release. So,
108 106for example, the 2.6.36 kernel's history looked like:
109 May 1 2.6.25.1 107
110 May 6 2.6.25.2 108 October 10 2.6.36 stable release
111 May 9 2.6.25.3 109 November 22 2.6.36.1
112 May 15 2.6.25.4 110 December 9 2.6.36.2
113 June 7 2.6.25.5 111 January 7 2.6.36.3
114 June 9 2.6.25.6 112 February 17 2.6.36.4
115 June 16 2.6.25.7 113
116 June 21 2.6.25.8 1142.6.36.4 was the final stable update for the 2.6.36 release.
117 June 24 2.6.25.9 115
118 116Some kernels are designated "long term" kernels; they will receive support
119Stable updates for a given kernel are made for approximately six months; 117for a longer period. As of this writing, the current long term kernels
120after that, the maintenance of stable releases is solely the responsibility 118and their maintainers are:
121of the distributors which have shipped that particular kernel. 119
120 2.6.27 Willy Tarreau (Deep-frozen stable kernel)
121 2.6.32 Greg Kroah-Hartman
122 2.6.35 Andi Kleen (Embedded flag kernel)
123
124The selection of a kernel for long-term support is purely a matter of a
125maintainer having the need and the time to maintain that release. There
126are no known plans for long-term support for any specific upcoming
127release.
122 128
123 129
1242.2: THE LIFECYCLE OF A PATCH 1302.2: THE LIFECYCLE OF A PATCH
@@ -130,7 +136,7 @@ each patch implements a change which is desirable to have in the mainline.
130This process can happen quickly for minor fixes, or, in the case of large 136This process can happen quickly for minor fixes, or, in the case of large
131and controversial changes, go on for years. Much developer frustration 137and controversial changes, go on for years. Much developer frustration
132comes from a lack of understanding of this process or from attempts to 138comes from a lack of understanding of this process or from attempts to
133circumvent it. 139circumvent it.
134 140
135In the hopes of reducing that frustration, this document will describe how 141In the hopes of reducing that frustration, this document will describe how
136a patch gets into the kernel. What follows below is an introduction which 142a patch gets into the kernel. What follows below is an introduction which
@@ -193,8 +199,8 @@ involved.
1932.3: HOW PATCHES GET INTO THE KERNEL 1992.3: HOW PATCHES GET INTO THE KERNEL
194 200
195There is exactly one person who can merge patches into the mainline kernel 201There is exactly one person who can merge patches into the mainline kernel
196repository: Linus Torvalds. But, of the over 12,000 patches which went 202repository: Linus Torvalds. But, of the over 9,500 patches which went
197into the 2.6.25 kernel, only 250 (around 2%) were directly chosen by Linus 203into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus
198himself. The kernel project has long since grown to a size where no single 204himself. The kernel project has long since grown to a size where no single
199developer could possibly inspect and select every patch unassisted. The 205developer could possibly inspect and select every patch unassisted. The
200way the kernel developers have addressed this growth is through the use of 206way the kernel developers have addressed this growth is through the use of
@@ -229,7 +235,7 @@ first in trees dedicated to network device drivers, wireless networking,
229etc. This chain of repositories can be arbitrarily long, though it rarely 235etc. This chain of repositories can be arbitrarily long, though it rarely
230exceeds two or three links. Since each maintainer in the chain trusts 236exceeds two or three links. Since each maintainer in the chain trusts
231those managing lower-level trees, this process is known as the "chain of 237those managing lower-level trees, this process is known as the "chain of
232trust." 238trust."
233 239
234Clearly, in a system like this, getting patches into the kernel depends on 240Clearly, in a system like this, getting patches into the kernel depends on
235finding the right maintainer. Sending patches directly to Linus is not 241finding the right maintainer. Sending patches directly to Linus is not
@@ -254,7 +260,7 @@ The answer comes in the form of -next trees, where subsystem trees are
254collected for testing and review. The older of these trees, maintained by 260collected for testing and review. The older of these trees, maintained by
255Andrew Morton, is called "-mm" (for memory management, which is how it got 261Andrew Morton, is called "-mm" (for memory management, which is how it got
256started). The -mm tree integrates patches from a long list of subsystem 262started). The -mm tree integrates patches from a long list of subsystem
257trees; it also has some patches aimed at helping with debugging. 263trees; it also has some patches aimed at helping with debugging.
258 264
259Beyond that, -mm contains a significant collection of patches which have 265Beyond that, -mm contains a significant collection of patches which have
260been selected by Andrew directly. These patches may have been posted on a 266been selected by Andrew directly. These patches may have been posted on a
@@ -264,8 +270,8 @@ subsystem tree of last resort; if there is no other obvious path for a
264patch into the mainline, it is likely to end up in -mm. Miscellaneous 270patch into the mainline, it is likely to end up in -mm. Miscellaneous
265patches which accumulate in -mm will eventually either be forwarded on to 271patches which accumulate in -mm will eventually either be forwarded on to
266an appropriate subsystem tree or be sent directly to Linus. In a typical 272an appropriate subsystem tree or be sent directly to Linus. In a typical
267development cycle, approximately 10% of the patches going into the mainline 273development cycle, approximately 5-10% of the patches going into the
268get there via -mm. 274mainline get there via -mm.
269 275
270The current -mm patch is available in the "mmotm" (-mm of the moment) 276The current -mm patch is available in the "mmotm" (-mm of the moment)
271directory at: 277directory at:
@@ -275,7 +281,7 @@ directory at:
275Use of the MMOTM tree is likely to be a frustrating experience, though; 281Use of the MMOTM tree is likely to be a frustrating experience, though;
276there is a definite chance that it will not even compile. 282there is a definite chance that it will not even compile.
277 283
278The other -next tree, started more recently, is linux-next, maintained by 284The primary tree for next-cycle patch merging is linux-next, maintained by
279Stephen Rothwell. The linux-next tree is, by design, a snapshot of what 285Stephen Rothwell. The linux-next tree is, by design, a snapshot of what
280the mainline is expected to look like after the next merge window closes. 286the mainline is expected to look like after the next merge window closes.
281Linux-next trees are announced on the linux-kernel and linux-next mailing 287Linux-next trees are announced on the linux-kernel and linux-next mailing
@@ -287,25 +293,14 @@ Some information about linux-next has been gathered at:
287 293
288 http://linux.f-seidel.de/linux-next/pmwiki/ 294 http://linux.f-seidel.de/linux-next/pmwiki/
289 295
290How the linux-next tree will fit into the development process is still 296Linux-next has become an integral part of the kernel development process;
291changing. As of this writing, the first full development cycle involving 297all patches merged during a given merge window should really have found
292linux-next (2.6.26) is coming to an end; thus far, it has proved to be a 298their way into linux-next some time before the merge window opens.
293valuable resource for finding and fixing integration problems before the 299
294beginning of the merge window. See http://lwn.net/Articles/287155/ for
295more information on how linux-next has worked to set up the 2.6.27 merge
296window.
297
298Some developers have begun to suggest that linux-next should be used as the
299target for future development as well. The linux-next tree does tend to be
300far ahead of the mainline and is more representative of the tree into which
301any new work will be merged. The downside to this idea is that the
302volatility of linux-next tends to make it a difficult development target.
303See http://lwn.net/Articles/289013/ for more information on this topic, and
304stay tuned; much is still in flux where linux-next is involved.
305 300
3062.4.1: STAGING TREES 3012.4.1: STAGING TREES
307 302
308The kernel source tree now contains the drivers/staging/ directory, where 303The kernel source tree contains the drivers/staging/ directory, where
309many sub-directories for drivers or filesystems that are on their way to 304many sub-directories for drivers or filesystems that are on their way to
310being added to the kernel tree live. They remain in drivers/staging while 305being added to the kernel tree live. They remain in drivers/staging while
311they still need more work; once complete, they can be moved into the 306they still need more work; once complete, they can be moved into the
@@ -313,15 +308,23 @@ kernel proper. This is a way to keep track of drivers that aren't
313up to Linux kernel coding or quality standards, but people may want to use 308up to Linux kernel coding or quality standards, but people may want to use
314them and track development. 309them and track development.
315 310
316Greg Kroah-Hartman currently (as of 2.6.36) maintains the staging tree. 311Greg Kroah-Hartman currently maintains the staging tree. Drivers that
317Drivers that still need work are sent to him, with each driver having 312still need work are sent to him, with each driver having its own
318its own subdirectory in drivers/staging/. Along with the driver source 313subdirectory in drivers/staging/. Along with the driver source files, a
319files, a TODO file should be present in the directory as well. The TODO 314TODO file should be present in the directory as well. The TODO file lists
320file lists the pending work that the driver needs for acceptance into 315the pending work that the driver needs for acceptance into the kernel
321the kernel proper, as well as a list of people that should be Cc'd for any 316proper, as well as a list of people that should be Cc'd for any patches to
322patches to the driver. Staging drivers that don't currently build should 317the driver. Current rules require that drivers contributed to staging
323have their config entries depend upon CONFIG_BROKEN. Once they can 318must, at a minimum, compile properly.
324be successfully built without outside patches, CONFIG_BROKEN can be removed. 319
320Staging can be a relatively easy way to get new drivers into the mainline
321where, with luck, they will come to the attention of other developers and
322improve quickly. Entry into staging is not the end of the story, though;
323code in staging which is not seeing regular progress will eventually be
324removed. Distributors also tend to be relatively reluctant to enable
325staging drivers. So staging is, at best, a stop on the way toward becoming
326a proper mainline driver.
327
325 328
3262.5: TOOLS 3292.5: TOOLS
327 330
@@ -347,11 +350,7 @@ page at:
347 350
348 http://git-scm.com/ 351 http://git-scm.com/
349 352
350That page has pointers to documentation and tutorials. One should be 353That page has pointers to documentation and tutorials.
351aware, in particular, of the Kernel Hacker's Guide to git, which has
352information specific to kernel development:
353
354 http://linux.yyz.us/git-howto.html
355 354
356Among the kernel developers who do not use git, the most popular choice is 355Among the kernel developers who do not use git, the most popular choice is
357almost certainly Mercurial: 356almost certainly Mercurial:
@@ -408,7 +407,7 @@ There are a few hints which can help with linux-kernel survival:
408 important to filter on both the topic of interest (though note that 407 important to filter on both the topic of interest (though note that
409 long-running conversations can drift away from the original subject 408 long-running conversations can drift away from the original subject
410 without changing the email subject line) and the people who are 409 without changing the email subject line) and the people who are
411 participating. 410 participating.
412 411
413- Do not feed the trolls. If somebody is trying to stir up an angry 412- Do not feed the trolls. If somebody is trying to stir up an angry
414 response, ignore them. 413 response, ignore them.
diff --git a/Documentation/development-process/3.Early-stage b/Documentation/development-process/3.Early-stage
index 307a159a70ca..f87ba7b3fbac 100644
--- a/Documentation/development-process/3.Early-stage
+++ b/Documentation/development-process/3.Early-stage
@@ -110,8 +110,8 @@ the kernel community's standards. Some examples include:
110 110
111 - The AppArmor security module made use of internal virtual filesystem 111 - The AppArmor security module made use of internal virtual filesystem
112 data structures in ways which were considered to be unsafe and 112 data structures in ways which were considered to be unsafe and
113 unreliable. This code has since been significantly reworked, but 113 unreliable. This concern (among others) kept AppArmor out of the
114 remains outside of the mainline. 114 mainline for years.
115 115
116In each of these cases, a great deal of pain and extra work could have been 116In each of these cases, a great deal of pain and extra work could have been
117avoided with some early discussion with the kernel developers. 117avoided with some early discussion with the kernel developers.
@@ -138,6 +138,19 @@ patches, and who, if anybody, is attaching Signed-off-by lines to those
138patches. Those are the people who will be best placed to help with a new 138patches. Those are the people who will be best placed to help with a new
139development project. 139development project.
140 140
141The task of finding the right maintainer is sometimes challenging enough
142that the kernel developers have added a script to ease the process:
143
144 .../scripts/get_maintainer.pl
145
146This script will return the current maintainer(s) for a given file or
147directory when given the "-f" option. If passed a patch on the
148command line, it will list the maintainers who should probably receive
149copies of the patch. There are a number of options regulating how hard
150get_maintainer.pl will search for maintainers; please be careful about
151using the more aggressive options as you may end up including developers
152who have no real interest in the code you are modifying.
153
141If all else fails, talking to Andrew Morton can be an effective way to 154If all else fails, talking to Andrew Morton can be an effective way to
142track down a maintainer for a specific piece of code. 155track down a maintainer for a specific piece of code.
143 156
@@ -155,11 +168,15 @@ reaction, but, instead, little or no reaction at all. The sad truth of the
155matter is (1) kernel developers tend to be busy, (2) there is no shortage 168matter is (1) kernel developers tend to be busy, (2) there is no shortage
156of people with grand plans and little code (or even prospect of code) to 169of people with grand plans and little code (or even prospect of code) to
157back them up, and (3) nobody is obligated to review or comment on ideas 170back them up, and (3) nobody is obligated to review or comment on ideas
158posted by others. If a request-for-comments posting yields little in the 171posted by others. Beyond that, high-level designs often hide problems
159way of comments, do not assume that it means there is no interest in the 172which are only reviewed when somebody actually tries to implement those
160project. Unfortunately, you also cannot assume that there are no problems 173designs; for that reason, kernel developers would rather see the code.
161with your idea. The best thing to do in this situation is to proceed, 174
162keeping the community informed as you go. 175If a request-for-comments posting yields little in the way of comments, do
176not assume that it means there is no interest in the project.
177Unfortunately, you also cannot assume that there are no problems with your
178idea. The best thing to do in this situation is to proceed, keeping the
179community informed as you go.
163 180
164 181
1653.5: GETTING OFFICIAL BUY-IN 1823.5: GETTING OFFICIAL BUY-IN
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding
index 2278693c8ffa..f3f1a469443c 100644
--- a/Documentation/development-process/4.Coding
+++ b/Documentation/development-process/4.Coding
@@ -131,6 +131,11 @@ classic time/space tradeoff taught in beginning data structures classes
131often does not apply to contemporary hardware. Space *is* time, in that a 131often does not apply to contemporary hardware. Space *is* time, in that a
132larger program will run slower than one which is more compact. 132larger program will run slower than one which is more compact.
133 133
134More recent compilers take an increasingly active role in deciding whether
135a given function should actually be inlined or not. So the liberal
136placement of "inline" keywords may not just be excessive; it could also be
137irrelevant.
138
134 139
135* Locking 140* Locking
136 141
@@ -285,6 +290,13 @@ be found at https://sparse.wiki.kernel.org/index.php/Main_Page if your
285distributor does not package it); it can then be run on the code by adding 290distributor does not package it); it can then be run on the code by adding
286"C=1" to your make command. 291"C=1" to your make command.
287 292
293The "Coccinelle" tool (http://coccinelle.lip6.fr/) is able to find a wide
294variety of potential coding problems; it can also propose fixes for those
295problems. Quite a few "semantic patches" for the kernel have been packaged
296under the scripts/coccinelle directory; running "make coccicheck" will run
297through those semantic patches and report on any problems found. See
298Documentation/coccinelle.txt for more information.
299
288Other kinds of portability errors are best found by compiling your code for 300Other kinds of portability errors are best found by compiling your code for
289other architectures. If you do not happen to have an S/390 system or a 301other architectures. If you do not happen to have an S/390 system or a
290Blackfin development board handy, you can still perform the compilation 302Blackfin development board handy, you can still perform the compilation
@@ -308,7 +320,9 @@ The first piece of documentation for any patch is its associated
308changelog. Log entries should describe the problem being solved, the form 320changelog. Log entries should describe the problem being solved, the form
309of the solution, the people who worked on the patch, any relevant 321of the solution, the people who worked on the patch, any relevant
310effects on performance, and anything else that might be needed to 322effects on performance, and anything else that might be needed to
311understand the patch. 323understand the patch. Be sure that the changelog says *why* the patch is
324worth applying; a surprising number of developers fail to provide that
325information.
312 326
313Any code which adds a new user-space interface - including new sysfs or 327Any code which adds a new user-space interface - including new sysfs or
314/proc files - should include documentation of that interface which enables 328/proc files - should include documentation of that interface which enables
@@ -321,7 +335,7 @@ boot-time parameters. Any patch which adds new parameters should add the
321appropriate entries to this file. 335appropriate entries to this file.
322 336
323Any new configuration options must be accompanied by help text which 337Any new configuration options must be accompanied by help text which
324clearly explains the options and when the user might want to select them. 338clearly explains the options and when the user might want to select them.
325 339
326Internal API information for many subsystems is documented by way of 340Internal API information for many subsystems is documented by way of
327specially-formatted comments; these comments can be extracted and formatted 341specially-formatted comments; these comments can be extracted and formatted
@@ -372,7 +386,8 @@ which is broken by the change. For a widely-used function, this duty can
372lead to literally hundreds or thousands of changes - many of which are 386lead to literally hundreds or thousands of changes - many of which are
373likely to conflict with work being done by other developers. Needless to 387likely to conflict with work being done by other developers. Needless to
374say, this can be a large job, so it is best to be sure that the 388say, this can be a large job, so it is best to be sure that the
375justification is solid. 389justification is solid. Note that the Coccinelle tool can help with
390wide-ranging API changes.
376 391
377When making an incompatible API change, one should, whenever possible, 392When making an incompatible API change, one should, whenever possible,
378ensure that code which has not been updated is caught by the compiler. 393ensure that code which has not been updated is caught by the compiler.
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index f622c1e9f0f9..903a2546f138 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -60,12 +60,15 @@ even in the short term.
60 60
61Patches must be prepared against a specific version of the kernel. As a 61Patches must be prepared against a specific version of the kernel. As a
62general rule, a patch should be based on the current mainline as found in 62general rule, a patch should be based on the current mainline as found in
63Linus's git tree. It may become necessary to make versions against -mm, 63Linus's git tree. When basing on mainline, start with a well-known release
64linux-next, or a subsystem tree, though, to facilitate wider testing and 64point - a stable or -rc release - rather than branching off the mainline at
65review. Depending on the area of your patch and what is going on 65an arbitrary spot.
66elsewhere, basing a patch against these other trees can require a 66
67significant amount of work resolving conflicts and dealing with API 67It may become necessary to make versions against -mm, linux-next, or a
68changes. 68subsystem tree, though, to facilitate wider testing and review. Depending
69on the area of your patch and what is going on elsewhere, basing a patch
70against these other trees can require a significant amount of work
71resolving conflicts and dealing with API changes.
69 72
70Only the most simple changes should be formatted as a single patch; 73Only the most simple changes should be formatted as a single patch;
71everything else should be made as a logical series of changes. Splitting 74everything else should be made as a logical series of changes. Splitting
@@ -100,11 +103,11 @@ rules of thumb, however, which can help considerably:
100 result is a broken kernel, you will make life harder for developers and 103 result is a broken kernel, you will make life harder for developers and
101 users who are engaging in the noble work of tracking down problems. 104 users who are engaging in the noble work of tracking down problems.
102 105
103 - Do not overdo it, though. One developer recently posted a set of edits 106 - Do not overdo it, though. One developer once posted a set of edits
104 to a single file as 500 separate patches - an act which did not make him 107 to a single file as 500 separate patches - an act which did not make him
105 the most popular person on the kernel mailing list. A single patch can 108 the most popular person on the kernel mailing list. A single patch can
106 be reasonably large as long as it still contains a single *logical* 109 be reasonably large as long as it still contains a single *logical*
107 change. 110 change.
108 111
109 - It can be tempting to add a whole new infrastructure with a series of 112 - It can be tempting to add a whole new infrastructure with a series of
110 patches, but to leave that infrastructure unused until the final patch 113 patches, but to leave that infrastructure unused until the final patch
@@ -162,7 +165,8 @@ To that end, the summary line should describe the effects of and motivation
162for the change as well as possible given the one-line constraint. The 165for the change as well as possible given the one-line constraint. The
163detailed description can then amplify on those topics and provide any 166detailed description can then amplify on those topics and provide any
164needed additional information. If the patch fixes a bug, cite the commit 167needed additional information. If the patch fixes a bug, cite the commit
165which introduced the bug if possible. If a problem is associated with 168which introduced the bug if possible (and please provide both the commit ID
169and the title when citing commits). If a problem is associated with
166specific log or compiler output, include that output to help others 170specific log or compiler output, include that output to help others
167searching for a solution to the same problem. If the change is meant to 171searching for a solution to the same problem. If the change is meant to
168support other changes coming in later patch, say so. If internal APIs are 172support other changes coming in later patch, say so. If internal APIs are
@@ -230,7 +234,7 @@ take care of:
230 which have had gratuitous white-space changes or line wrapping performed 234 which have had gratuitous white-space changes or line wrapping performed
231 by the mail client will not apply at the other end, and often will not 235 by the mail client will not apply at the other end, and often will not
232 be examined in any detail. If there is any doubt at all, mail the patch 236 be examined in any detail. If there is any doubt at all, mail the patch
233 to yourself and convince yourself that it shows up intact. 237 to yourself and convince yourself that it shows up intact.
234 238
235 Documentation/email-clients.txt has some helpful hints on making 239 Documentation/email-clients.txt has some helpful hints on making
236 specific mail clients work for sending patches. 240 specific mail clients work for sending patches.
@@ -287,7 +291,7 @@ something like:
287 291
288where "nn" is the ordinal number of the patch, "mm" is the total number of 292where "nn" is the ordinal number of the patch, "mm" is the total number of
289patches in the series, and "subsys" is the name of the affected subsystem. 293patches in the series, and "subsys" is the name of the affected subsystem.
290Clearly, nn/mm can be omitted for a single, standalone patch. 294Clearly, nn/mm can be omitted for a single, standalone patch.
291 295
292If you have a significant series of patches, it is customary to send an 296If you have a significant series of patches, it is customary to send an
293introductory description as part zero. This convention is not universally 297introductory description as part zero. This convention is not universally
@@ -299,5 +303,5 @@ In general, the second and following parts of a multi-part patch should be
299sent as a reply to the first part so that they all thread together at the 303sent as a reply to the first part so that they all thread together at the
300receiving end. Tools like git and quilt have commands to mail out a set of 304receiving end. Tools like git and quilt have commands to mail out a set of
301patches with the proper threading. If you have a long series, though, and 305patches with the proper threading. If you have a long series, though, and
302are using git, please provide the --no-chain-reply-to option to avoid 306are using git, please stay away from the --chain-reply-to option to avoid
303creating exceptionally deep nesting. 307creating exceptionally deep nesting.
diff --git a/Documentation/development-process/6.Followthrough b/Documentation/development-process/6.Followthrough
index a8fba3d83a85..41d324a9420d 100644
--- a/Documentation/development-process/6.Followthrough
+++ b/Documentation/development-process/6.Followthrough
@@ -66,6 +66,11 @@ be easy to become blinded by your own solution to a problem to the point
66that you don't realize that something is fundamentally wrong or, perhaps, 66that you don't realize that something is fundamentally wrong or, perhaps,
67you're not even solving the right problem. 67you're not even solving the right problem.
68 68
69Andrew Morton has suggested that every review comment which does not result
70in a code change should result in an additional code comment instead; that
71can help future reviewers avoid the questions which came up the first time
72around.
73
69One fatal mistake is to ignore review comments in the hope that they will 74One fatal mistake is to ignore review comments in the hope that they will
70go away. They will not go away. If you repost code without having 75go away. They will not go away. If you repost code without having
71responded to the comments you got the time before, you're likely to find 76responded to the comments you got the time before, you're likely to find
@@ -100,7 +105,7 @@ entry into a subsystem maintainer's tree. How that works varies from one
100subsystem to the next; each maintainer has his or her own way of doing 105subsystem to the next; each maintainer has his or her own way of doing
101things. In particular, there may be more than one tree - one, perhaps, 106things. In particular, there may be more than one tree - one, perhaps,
102dedicated to patches planned for the next merge window, and another for 107dedicated to patches planned for the next merge window, and another for
103longer-term work. 108longer-term work.
104 109
105For patches applying to areas for which there is no obvious subsystem tree 110For patches applying to areas for which there is no obvious subsystem tree
106(memory management patches, for example), the default tree often ends up 111(memory management patches, for example), the default tree often ends up
@@ -109,11 +114,10 @@ through the -mm tree.
109 114
110Inclusion into a subsystem tree can bring a higher level of visibility to a 115Inclusion into a subsystem tree can bring a higher level of visibility to a
111patch. Now other developers working with that tree will get the patch by 116patch. Now other developers working with that tree will get the patch by
112default. Subsystem trees typically feed into -mm and linux-next as well, 117default. Subsystem trees typically feed linux-next as well, making their
113making their contents visible to the development community as a whole. At 118contents visible to the development community as a whole. At this point,
114this point, there's a good chance that you will get more comments from a 119there's a good chance that you will get more comments from a new set of
115new set of reviewers; these comments need to be answered as in the previous 120reviewers; these comments need to be answered as in the previous round.
116round.
117 121
118What may also happen at this point, depending on the nature of your patch, 122What may also happen at this point, depending on the nature of your patch,
119is that conflicts with work being done by others turn up. In the worst 123is that conflicts with work being done by others turn up. In the worst
diff --git a/Documentation/development-process/7.AdvancedTopics b/Documentation/development-process/7.AdvancedTopics
index 837179447e17..26dc3fa196e4 100644
--- a/Documentation/development-process/7.AdvancedTopics
+++ b/Documentation/development-process/7.AdvancedTopics
@@ -119,7 +119,7 @@ can affect your ability to get trees pulled in the future. Quoting Linus:
119 to trust things *without* then having to go and check every 119 to trust things *without* then having to go and check every
120 individual change by hand. 120 individual change by hand.
121 121
122(http://lwn.net/Articles/224135/). 122(http://lwn.net/Articles/224135/).
123 123
124To avoid this kind of situation, ensure that all patches within a given 124To avoid this kind of situation, ensure that all patches within a given
125branch stick closely to the associated topic; a "driver fixes" branch 125branch stick closely to the associated topic; a "driver fixes" branch
@@ -138,7 +138,7 @@ When requesting a pull, be sure to give all the relevant information: where
138your tree is, what branch to pull, and what changes will result from the 138your tree is, what branch to pull, and what changes will result from the
139pull. The git request-pull command can be helpful in this regard; it will 139pull. The git request-pull command can be helpful in this regard; it will
140format the request as other developers expect, and will also check to be 140format the request as other developers expect, and will also check to be
141sure that you have remembered to push those changes to the public server. 141sure that you have remembered to push those changes to the public server.
142 142
143 143
1447.2: REVIEWING PATCHES 1447.2: REVIEWING PATCHES
diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt
index e6c4b757025b..f959909d7154 100644
--- a/Documentation/dynamic-debug-howto.txt
+++ b/Documentation/dynamic-debug-howto.txt
@@ -6,7 +6,7 @@ This document describes how to use the dynamic debug (ddebug) feature.
6 6
7Dynamic debug is designed to allow you to dynamically enable/disable kernel 7Dynamic debug is designed to allow you to dynamically enable/disable kernel
8code to obtain additional kernel information. Currently, if 8code to obtain additional kernel information. Currently, if
9CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be 9CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can be
10dynamically enabled per-callsite. 10dynamically enabled per-callsite.
11 11
12Dynamic debug has even more useful features: 12Dynamic debug has even more useful features:
@@ -26,7 +26,7 @@ Dynamic debug has even more useful features:
26Controlling dynamic debug Behaviour 26Controlling dynamic debug Behaviour
27=================================== 27===================================
28 28
29The behaviour of pr_debug()/dev_debug()s are controlled via writing to a 29The behaviour of pr_debug()/dev_dbg()s are controlled via writing to a
30control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs 30control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs
31filesystem, in order to make use of this feature. Subsequently, we refer to the 31filesystem, in order to make use of this feature. Subsequently, we refer to the
32control file as: <debugfs>/dynamic_debug/control. For example, if you want to 32control file as: <debugfs>/dynamic_debug/control. For example, if you want to
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index 4d0bc70f1852..df02245d1419 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -2,6 +2,10 @@ Kernel driver f71882fg
2====================== 2======================
3 3
4Supported chips: 4Supported chips:
5 * Fintek F71808E
6 Prefix: 'f71808e'
7 Addresses scanned: none, address read from Super I/O config space
8 Datasheet: Not public
5 * Fintek F71858FG 9 * Fintek F71858FG
6 Prefix: 'f71858fg' 10 Prefix: 'f71858fg'
7 Addresses scanned: none, address read from Super I/O config space 11 Addresses scanned: none, address read from Super I/O config space
@@ -26,10 +30,25 @@ Supported chips:
26 Prefix: 'f71889ed' 30 Prefix: 'f71889ed'
27 Addresses scanned: none, address read from Super I/O config space 31 Addresses scanned: none, address read from Super I/O config space
28 Datasheet: Should become available on the Fintek website soon 32 Datasheet: Should become available on the Fintek website soon
33 * Fintek F71889A
34 Prefix: 'f71889a'
35 Addresses scanned: none, address read from Super I/O config space
36 Datasheet: Should become available on the Fintek website soon
29 * Fintek F8000 37 * Fintek F8000
30 Prefix: 'f8000' 38 Prefix: 'f8000'
31 Addresses scanned: none, address read from Super I/O config space 39 Addresses scanned: none, address read from Super I/O config space
32 Datasheet: Not public 40 Datasheet: Not public
41 * Fintek F81801U
42 Prefix: 'f71889fg'
43 Addresses scanned: none, address read from Super I/O config space
44 Datasheet: Not public
45 Note: This is the 64-pin variant of the F71889FG, they have the
46 same device ID and are fully compatible as far as hardware
47 monitoring is concerned.
48 * Fintek F81865F
49 Prefix: 'f81865f'
50 Addresses scanned: none, address read from Super I/O config space
51 Datasheet: Available from the Fintek website
33 52
34Author: Hans de Goede <hdegoede@redhat.com> 53Author: Hans de Goede <hdegoede@redhat.com>
35 54
diff --git a/MAINTAINERS b/MAINTAINERS
index 4fb9017b4413..8aa1cacddbcc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -548,10 +548,8 @@ S: Maintained
548F: sound/aoa/ 548F: sound/aoa/
549 549
550APM DRIVER 550APM DRIVER
551M: Stephen Rothwell <sfr@canb.auug.org.au>
552L: linux-laptop@vger.kernel.org 551L: linux-laptop@vger.kernel.org
553W: http://www.canb.auug.org.au/~sfr/ 552S: Orphan
554S: Supported
555F: arch/x86/kernel/apm_32.c 553F: arch/x86/kernel/apm_32.c
556F: include/linux/apm_bios.h 554F: include/linux/apm_bios.h
557 555
@@ -6633,6 +6631,7 @@ F: drivers/media/video/zr364xx.c
6633 6631
6634USER-MODE LINUX (UML) 6632USER-MODE LINUX (UML)
6635M: Jeff Dike <jdike@addtoit.com> 6633M: Jeff Dike <jdike@addtoit.com>
6634M: Richard Weinberger <richard@nod.at>
6636L: user-mode-linux-devel@lists.sourceforge.net 6635L: user-mode-linux-devel@lists.sourceforge.net
6637L: user-mode-linux-user@lists.sourceforge.net 6636L: user-mode-linux-user@lists.sourceforge.net
6638W: http://user-mode-linux.sourceforge.net 6637W: http://user-mode-linux.sourceforge.net
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index c936c6d7ded0..f3a7b1011914 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -285,19 +285,6 @@ static int __init omap4_twl6030_hsmmc_init(struct omap2_hsmmc_info *controllers)
285 return 0; 285 return 0;
286} 286}
287 287
288static struct regulator_init_data omap4_panda_vaux1 = {
289 .constraints = {
290 .min_uV = 1000000,
291 .max_uV = 3000000,
292 .apply_uV = true,
293 .valid_modes_mask = REGULATOR_MODE_NORMAL
294 | REGULATOR_MODE_STANDBY,
295 .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE
296 | REGULATOR_CHANGE_MODE
297 | REGULATOR_CHANGE_STATUS,
298 },
299};
300
301static struct regulator_init_data omap4_panda_vaux2 = { 288static struct regulator_init_data omap4_panda_vaux2 = {
302 .constraints = { 289 .constraints = {
303 .min_uV = 1200000, 290 .min_uV = 1200000,
@@ -353,19 +340,6 @@ static struct regulator_init_data omap4_panda_vpp = {
353 }, 340 },
354}; 341};
355 342
356static struct regulator_init_data omap4_panda_vusim = {
357 .constraints = {
358 .min_uV = 1200000,
359 .max_uV = 2900000,
360 .apply_uV = true,
361 .valid_modes_mask = REGULATOR_MODE_NORMAL
362 | REGULATOR_MODE_STANDBY,
363 .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE
364 | REGULATOR_CHANGE_MODE
365 | REGULATOR_CHANGE_STATUS,
366 },
367};
368
369static struct regulator_init_data omap4_panda_vana = { 343static struct regulator_init_data omap4_panda_vana = {
370 .constraints = { 344 .constraints = {
371 .min_uV = 2100000, 345 .min_uV = 2100000,
@@ -424,12 +398,10 @@ static struct twl4030_platform_data omap4_panda_twldata = {
424 /* Regulators */ 398 /* Regulators */
425 .vmmc = &omap4_panda_vmmc, 399 .vmmc = &omap4_panda_vmmc,
426 .vpp = &omap4_panda_vpp, 400 .vpp = &omap4_panda_vpp,
427 .vusim = &omap4_panda_vusim,
428 .vana = &omap4_panda_vana, 401 .vana = &omap4_panda_vana,
429 .vcxio = &omap4_panda_vcxio, 402 .vcxio = &omap4_panda_vcxio,
430 .vdac = &omap4_panda_vdac, 403 .vdac = &omap4_panda_vdac,
431 .vusb = &omap4_panda_vusb, 404 .vusb = &omap4_panda_vusb,
432 .vaux1 = &omap4_panda_vaux1,
433 .vaux2 = &omap4_panda_vaux2, 405 .vaux2 = &omap4_panda_vaux2,
434 .vaux3 = &omap4_panda_vaux3, 406 .vaux3 = &omap4_panda_vaux3,
435 .clk32kg = &omap4_panda_clk32kg, 407 .clk32kg = &omap4_panda_clk32kg,
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index e97851492847..84d1b735fe80 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -66,7 +66,7 @@ static int __init omap3_l3_init(void)
66 66
67 WARN(IS_ERR(od), "could not build omap_device for %s\n", oh_name); 67 WARN(IS_ERR(od), "could not build omap_device for %s\n", oh_name);
68 68
69 return PTR_ERR(od); 69 return IS_ERR(od) ? PTR_ERR(od) : 0;
70} 70}
71postcore_initcall(omap3_l3_init); 71postcore_initcall(omap3_l3_init);
72 72
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 674174365f78..493505c3b2f5 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -693,6 +693,7 @@ static int __init gpmc_init(void)
693{ 693{
694 u32 l, irq; 694 u32 l, irq;
695 int cs, ret = -EINVAL; 695 int cs, ret = -EINVAL;
696 int gpmc_irq;
696 char *ck = NULL; 697 char *ck = NULL;
697 698
698 if (cpu_is_omap24xx()) { 699 if (cpu_is_omap24xx()) {
@@ -701,12 +702,15 @@ static int __init gpmc_init(void)
701 l = OMAP2420_GPMC_BASE; 702 l = OMAP2420_GPMC_BASE;
702 else 703 else
703 l = OMAP34XX_GPMC_BASE; 704 l = OMAP34XX_GPMC_BASE;
705 gpmc_irq = INT_34XX_GPMC_IRQ;
704 } else if (cpu_is_omap34xx()) { 706 } else if (cpu_is_omap34xx()) {
705 ck = "gpmc_fck"; 707 ck = "gpmc_fck";
706 l = OMAP34XX_GPMC_BASE; 708 l = OMAP34XX_GPMC_BASE;
709 gpmc_irq = INT_34XX_GPMC_IRQ;
707 } else if (cpu_is_omap44xx()) { 710 } else if (cpu_is_omap44xx()) {
708 ck = "gpmc_ck"; 711 ck = "gpmc_ck";
709 l = OMAP44XX_GPMC_BASE; 712 l = OMAP44XX_GPMC_BASE;
713 gpmc_irq = OMAP44XX_IRQ_GPMC;
710 } 714 }
711 715
712 if (WARN_ON(!ck)) 716 if (WARN_ON(!ck))
@@ -739,16 +743,17 @@ static int __init gpmc_init(void)
739 /* initalize the irq_chained */ 743 /* initalize the irq_chained */
740 irq = OMAP_GPMC_IRQ_BASE; 744 irq = OMAP_GPMC_IRQ_BASE;
741 for (cs = 0; cs < GPMC_CS_NUM; cs++) { 745 for (cs = 0; cs < GPMC_CS_NUM; cs++) {
742 set_irq_handler(irq, handle_simple_irq); 746 set_irq_chip_and_handler(irq, &dummy_irq_chip,
747 handle_simple_irq);
743 set_irq_flags(irq, IRQF_VALID); 748 set_irq_flags(irq, IRQF_VALID);
744 irq++; 749 irq++;
745 } 750 }
746 751
747 ret = request_irq(INT_34XX_GPMC_IRQ, 752 ret = request_irq(gpmc_irq,
748 gpmc_handle_irq, IRQF_SHARED, "gpmc", gpmc_base); 753 gpmc_handle_irq, IRQF_SHARED, "gpmc", gpmc_base);
749 if (ret) 754 if (ret)
750 pr_err("gpmc: irq-%d could not claim: err %d\n", 755 pr_err("gpmc: irq-%d could not claim: err %d\n",
751 INT_34XX_GPMC_IRQ, ret); 756 gpmc_irq, ret);
752 return ret; 757 return ret;
753} 758}
754postcore_initcall(gpmc_init); 759postcore_initcall(gpmc_init);
@@ -757,8 +762,6 @@ static irqreturn_t gpmc_handle_irq(int irq, void *dev)
757{ 762{
758 u8 cs; 763 u8 cs;
759 764
760 if (irq != INT_34XX_GPMC_IRQ)
761 return IRQ_HANDLED;
762 /* check cs to invoke the irq */ 765 /* check cs to invoke the irq */
763 cs = ((gpmc_read_reg(GPMC_PREFETCH_CONFIG1)) >> CS_NUM_SHIFT) & 0x7; 766 cs = ((gpmc_read_reg(GPMC_PREFETCH_CONFIG1)) >> CS_NUM_SHIFT) & 0x7;
764 if (OMAP_GPMC_IRQ_BASE+cs <= OMAP_GPMC_IRQ_END) 767 if (OMAP_GPMC_IRQ_BASE+cs <= OMAP_GPMC_IRQ_END)
diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c
index 265bff3acb9e..5f2da7565b68 100644
--- a/arch/arm/mach-omap2/omap_l3_smx.c
+++ b/arch/arm/mach-omap2/omap_l3_smx.c
@@ -226,7 +226,6 @@ static int __init omap3_l3_probe(struct platform_device *pdev)
226 struct omap3_l3 *l3; 226 struct omap3_l3 *l3;
227 struct resource *res; 227 struct resource *res;
228 int ret; 228 int ret;
229 int irq;
230 229
231 l3 = kzalloc(sizeof(*l3), GFP_KERNEL); 230 l3 = kzalloc(sizeof(*l3), GFP_KERNEL);
232 if (!l3) { 231 if (!l3) {
@@ -249,18 +248,17 @@ static int __init omap3_l3_probe(struct platform_device *pdev)
249 goto err2; 248 goto err2;
250 } 249 }
251 250
252 irq = platform_get_irq(pdev, 0); 251 l3->debug_irq = platform_get_irq(pdev, 0);
253 ret = request_irq(irq, omap3_l3_app_irq, 252 ret = request_irq(l3->debug_irq, omap3_l3_app_irq,
254 IRQF_DISABLED | IRQF_TRIGGER_RISING, 253 IRQF_DISABLED | IRQF_TRIGGER_RISING,
255 "l3-debug-irq", l3); 254 "l3-debug-irq", l3);
256 if (ret) { 255 if (ret) {
257 dev_err(&pdev->dev, "couldn't request debug irq\n"); 256 dev_err(&pdev->dev, "couldn't request debug irq\n");
258 goto err3; 257 goto err3;
259 } 258 }
260 l3->debug_irq = irq;
261 259
262 irq = platform_get_irq(pdev, 1); 260 l3->app_irq = platform_get_irq(pdev, 1);
263 ret = request_irq(irq, omap3_l3_app_irq, 261 ret = request_irq(l3->app_irq, omap3_l3_app_irq,
264 IRQF_DISABLED | IRQF_TRIGGER_RISING, 262 IRQF_DISABLED | IRQF_TRIGGER_RISING,
265 "l3-app-irq", l3); 263 "l3-app-irq", l3);
266 264
@@ -269,7 +267,6 @@ static int __init omap3_l3_probe(struct platform_device *pdev)
269 goto err4; 267 goto err4;
270 } 268 }
271 269
272 l3->app_irq = irq;
273 goto err0; 270 goto err0;
274 271
275err4: 272err4:
diff --git a/arch/arm/plat-omap/include/plat/irqs.h b/arch/arm/plat-omap/include/plat/irqs.h
index d77928370463..5a25098ea7ea 100644
--- a/arch/arm/plat-omap/include/plat/irqs.h
+++ b/arch/arm/plat-omap/include/plat/irqs.h
@@ -416,7 +416,7 @@
416 416
417/* GPMC related */ 417/* GPMC related */
418#define OMAP_GPMC_IRQ_BASE (TWL_IRQ_END) 418#define OMAP_GPMC_IRQ_BASE (TWL_IRQ_END)
419#define OMAP_GPMC_NR_IRQS 7 419#define OMAP_GPMC_NR_IRQS 8
420#define OMAP_GPMC_IRQ_END (OMAP_GPMC_IRQ_BASE + OMAP_GPMC_NR_IRQS) 420#define OMAP_GPMC_IRQ_END (OMAP_GPMC_IRQ_BASE + OMAP_GPMC_NR_IRQS)
421 421
422 422
diff --git a/arch/arm/plat-omap/include/plat/onenand.h b/arch/arm/plat-omap/include/plat/onenand.h
index cbe897ca7f9e..2858667d2e4f 100644
--- a/arch/arm/plat-omap/include/plat/onenand.h
+++ b/arch/arm/plat-omap/include/plat/onenand.h
@@ -32,6 +32,7 @@ struct omap_onenand_platform_data {
32 int dma_channel; 32 int dma_channel;
33 u8 flags; 33 u8 flags;
34 u8 regulator_can_sleep; 34 u8 regulator_can_sleep;
35 u8 skip_initial_unlocking;
35}; 36};
36 37
37#define ONENAND_MAX_PARTITIONS 8 38#define ONENAND_MAX_PARTITIONS 8
diff --git a/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h b/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h
index 01a8448e471c..442301fe48b4 100644
--- a/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h
+++ b/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h
@@ -30,6 +30,7 @@ struct pxa3xx_nand_cmdset {
30}; 30};
31 31
32struct pxa3xx_nand_flash { 32struct pxa3xx_nand_flash {
33 char *name;
33 uint32_t chip_id; 34 uint32_t chip_id;
34 unsigned int page_per_block; /* Pages per block (PG_PER_BLK) */ 35 unsigned int page_per_block; /* Pages per block (PG_PER_BLK) */
35 unsigned int page_size; /* Page size in bytes (PAGE_SZ) */ 36 unsigned int page_size; /* Page size in bytes (PAGE_SZ) */
@@ -37,7 +38,6 @@ struct pxa3xx_nand_flash {
37 unsigned int dfc_width; /* Width of flash controller(DWIDTH_C) */ 38 unsigned int dfc_width; /* Width of flash controller(DWIDTH_C) */
38 unsigned int num_blocks; /* Number of physical blocks in Flash */ 39 unsigned int num_blocks; /* Number of physical blocks in Flash */
39 40
40 struct pxa3xx_nand_cmdset *cmdset; /* NAND command set */
41 struct pxa3xx_nand_timing *timing; /* NAND Flash timing */ 41 struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
42}; 42};
43 43
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 4db5b46e1eff..04a7fc5eaf46 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -276,7 +276,6 @@ config ETRAX_AXISFLASHMAP
276 select MTD_CHAR 276 select MTD_CHAR
277 select MTD_BLOCK 277 select MTD_BLOCK
278 select MTD_PARTITIONS 278 select MTD_PARTITIONS
279 select MTD_CONCAT
280 select MTD_COMPLEX_MAPPINGS 279 select MTD_COMPLEX_MAPPINGS
281 help 280 help
282 This option enables MTD mapping of flash devices. Needed to use 281 This option enables MTD mapping of flash devices. Needed to use
diff --git a/arch/cris/arch-v10/drivers/axisflashmap.c b/arch/cris/arch-v10/drivers/axisflashmap.c
index b2079703af7e..ed708e19d09e 100644
--- a/arch/cris/arch-v10/drivers/axisflashmap.c
+++ b/arch/cris/arch-v10/drivers/axisflashmap.c
@@ -234,7 +234,6 @@ static struct mtd_info *flash_probe(void)
234 } 234 }
235 235
236 if (mtd_cse0 && mtd_cse1) { 236 if (mtd_cse0 && mtd_cse1) {
237#ifdef CONFIG_MTD_CONCAT
238 struct mtd_info *mtds[] = { mtd_cse0, mtd_cse1 }; 237 struct mtd_info *mtds[] = { mtd_cse0, mtd_cse1 };
239 238
240 /* Since the concatenation layer adds a small overhead we 239 /* Since the concatenation layer adds a small overhead we
@@ -246,11 +245,6 @@ static struct mtd_info *flash_probe(void)
246 */ 245 */
247 mtd_cse = mtd_concat_create(mtds, ARRAY_SIZE(mtds), 246 mtd_cse = mtd_concat_create(mtds, ARRAY_SIZE(mtds),
248 "cse0+cse1"); 247 "cse0+cse1");
249#else
250 printk(KERN_ERR "%s and %s: Cannot concatenate due to kernel "
251 "(mis)configuration!\n", map_cse0.name, map_cse1.name);
252 mtd_cse = NULL;
253#endif
254 if (!mtd_cse) { 248 if (!mtd_cse) {
255 printk(KERN_ERR "%s and %s: Concatenation failed!\n", 249 printk(KERN_ERR "%s and %s: Concatenation failed!\n",
256 map_cse0.name, map_cse1.name); 250 map_cse0.name, map_cse1.name);
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index a2dd740c5907..1633b120aa81 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -406,7 +406,6 @@ config ETRAX_AXISFLASHMAP
406 select MTD_CHAR 406 select MTD_CHAR
407 select MTD_BLOCK 407 select MTD_BLOCK
408 select MTD_PARTITIONS 408 select MTD_PARTITIONS
409 select MTD_CONCAT
410 select MTD_COMPLEX_MAPPINGS 409 select MTD_COMPLEX_MAPPINGS
411 help 410 help
412 This option enables MTD mapping of flash devices. Needed to use 411 This option enables MTD mapping of flash devices. Needed to use
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 51e1e85df96d..3d751250271b 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -275,7 +275,6 @@ static struct mtd_info *flash_probe(void)
275 } 275 }
276 276
277 if (count > 1) { 277 if (count > 1) {
278#ifdef CONFIG_MTD_CONCAT
279 /* Since the concatenation layer adds a small overhead we 278 /* Since the concatenation layer adds a small overhead we
280 * could try to figure out if the chips in cse0 and cse1 are 279 * could try to figure out if the chips in cse0 and cse1 are
281 * identical and reprobe the whole cse0+cse1 window. But since 280 * identical and reprobe the whole cse0+cse1 window. But since
@@ -284,11 +283,6 @@ static struct mtd_info *flash_probe(void)
284 * complicating the probing procedure. 283 * complicating the probing procedure.
285 */ 284 */
286 mtd_total = mtd_concat_create(mtds, count, "cse0+cse1"); 285 mtd_total = mtd_concat_create(mtds, count, "cse0+cse1");
287#else
288 printk(KERN_ERR "%s and %s: Cannot concatenate due to kernel "
289 "(mis)configuration!\n", map_cse0.name, map_cse1.name);
290 mtd_toal = NULL;
291#endif
292 if (!mtd_total) { 286 if (!mtd_total) {
293 printk(KERN_ERR "%s and %s: Concatenation failed!\n", 287 printk(KERN_ERR "%s and %s: Concatenation failed!\n",
294 map_cse0.name, map_cse1.name); 288 map_cse0.name, map_cse1.name);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a09e1f052d84..d475b4398d8b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -45,7 +45,7 @@
45#include <linux/stringify.h> 45#include <linux/stringify.h>
46 46
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset percpu_read(this_cpu_off)
50 50
51/* 51/*
@@ -62,9 +62,11 @@
62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ 62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
63}) 63})
64#else 64#else
65#define __percpu_arg(x) "%P" #x 65#define __percpu_prefix ""
66#endif 66#endif
67 67
68#define __percpu_arg(x) __percpu_prefix "%P" #x
69
68/* 70/*
69 * Initialized pointers to per-cpu variables needed for the boot 71 * Initialized pointers to per-cpu variables needed for the boot
70 * processor need to use these macros to get the proper address 72 * processor need to use these macros to get the proper address
@@ -516,11 +518,11 @@ do { \
516 typeof(o2) __n2 = n2; \ 518 typeof(o2) __n2 = n2; \
517 typeof(o2) __dummy; \ 519 typeof(o2) __dummy; \
518 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ 520 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
519 "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ 521 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
520 X86_FEATURE_CX16, \ 522 X86_FEATURE_CX16, \
521 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 523 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
522 "S" (&pcp1), "b"(__n1), "c"(__n2), \ 524 "S" (&pcp1), "b"(__n1), "c"(__n2), \
523 "a"(__o1), "d"(__o2)); \ 525 "a"(__o1), "d"(__o2) : "memory"); \
524 __ret; \ 526 __ret; \
525}) 527})
526 528
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 3e8b08a6de2b..1e572c507d06 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -10,6 +10,12 @@
10#include <asm/frame.h> 10#include <asm/frame.h>
11#include <asm/dwarf2.h> 11#include <asm/dwarf2.h>
12 12
13#ifdef CONFIG_SMP
14#define SEG_PREFIX %gs:
15#else
16#define SEG_PREFIX
17#endif
18
13.text 19.text
14 20
15/* 21/*
@@ -37,13 +43,13 @@ this_cpu_cmpxchg16b_emu:
37 pushf 43 pushf
38 cli 44 cli
39 45
40 cmpq %gs:(%rsi), %rax 46 cmpq SEG_PREFIX(%rsi), %rax
41 jne not_same 47 jne not_same
42 cmpq %gs:8(%rsi), %rdx 48 cmpq SEG_PREFIX 8(%rsi), %rdx
43 jne not_same 49 jne not_same
44 50
45 movq %rbx, %gs:(%rsi) 51 movq %rbx, SEG_PREFIX(%rsi)
46 movq %rcx, %gs:8(%rsi) 52 movq %rcx, SEG_PREFIX 8(%rsi)
47 53
48 popf 54 popf
49 mov $1, %al 55 mov $1, %al
diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c
index 99513642a0e6..ab81fb271760 100644
--- a/arch/x86/platform/olpc/olpc-xo1.c
+++ b/arch/x86/platform/olpc/olpc-xo1.c
@@ -72,9 +72,9 @@ static int __devinit olpc_xo1_probe(struct platform_device *pdev)
72 dev_err(&pdev->dev, "can't fetch device resource info\n"); 72 dev_err(&pdev->dev, "can't fetch device resource info\n");
73 return -EIO; 73 return -EIO;
74 } 74 }
75 if (strcmp(pdev->name, "olpc-xo1-pms") == 0) 75 if (strcmp(pdev->name, "cs5535-pms") == 0)
76 pms_base = res->start; 76 pms_base = res->start;
77 else if (strcmp(pdev->name, "olpc-xo1-ac-acpi") == 0) 77 else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
78 acpi_base = res->start; 78 acpi_base = res->start;
79 79
80 /* If we have both addresses, we can override the poweroff hook */ 80 /* If we have both addresses, we can override the poweroff hook */
@@ -90,9 +90,9 @@ static int __devexit olpc_xo1_remove(struct platform_device *pdev)
90{ 90{
91 mfd_cell_disable(pdev); 91 mfd_cell_disable(pdev);
92 92
93 if (strcmp(pdev->name, "olpc-xo1-pms") == 0) 93 if (strcmp(pdev->name, "cs5535-pms") == 0)
94 pms_base = 0; 94 pms_base = 0;
95 else if (strcmp(pdev->name, "olpc-xo1-acpi") == 0) 95 else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
96 acpi_base = 0; 96 acpi_base = 0;
97 97
98 pm_power_off = NULL; 98 pm_power_off = NULL;
@@ -101,7 +101,7 @@ static int __devexit olpc_xo1_remove(struct platform_device *pdev)
101 101
102static struct platform_driver cs5535_pms_drv = { 102static struct platform_driver cs5535_pms_drv = {
103 .driver = { 103 .driver = {
104 .name = "olpc-xo1-pms", 104 .name = "cs5535-pms",
105 .owner = THIS_MODULE, 105 .owner = THIS_MODULE,
106 }, 106 },
107 .probe = olpc_xo1_probe, 107 .probe = olpc_xo1_probe,
@@ -110,7 +110,7 @@ static struct platform_driver cs5535_pms_drv = {
110 110
111static struct platform_driver cs5535_acpi_drv = { 111static struct platform_driver cs5535_acpi_drv = {
112 .driver = { 112 .driver = {
113 .name = "olpc-xo1-acpi", 113 .name = "olpc-xo1-pm-acpi",
114 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
115 }, 115 },
116 .probe = olpc_xo1_probe, 116 .probe = olpc_xo1_probe,
@@ -121,22 +121,21 @@ static int __init olpc_xo1_init(void)
121{ 121{
122 int r; 122 int r;
123 123
124 r = mfd_shared_platform_driver_register(&cs5535_pms_drv, "cs5535-pms"); 124 r = platform_driver_register(&cs5535_pms_drv);
125 if (r) 125 if (r)
126 return r; 126 return r;
127 127
128 r = mfd_shared_platform_driver_register(&cs5535_acpi_drv, 128 r = platform_driver_register(&cs5535_acpi_drv);
129 "cs5535-acpi");
130 if (r) 129 if (r)
131 mfd_shared_platform_driver_unregister(&cs5535_pms_drv); 130 platform_driver_unregister(&cs5535_pms_drv);
132 131
133 return r; 132 return r;
134} 133}
135 134
136static void __exit olpc_xo1_exit(void) 135static void __exit olpc_xo1_exit(void)
137{ 136{
138 mfd_shared_platform_driver_unregister(&cs5535_acpi_drv); 137 platform_driver_unregister(&cs5535_acpi_drv);
139 mfd_shared_platform_driver_unregister(&cs5535_pms_drv); 138 platform_driver_unregister(&cs5535_pms_drv);
140} 139}
141 140
142MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); 141MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 35658f445fca..9bf13988f1a2 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -193,7 +193,7 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
193 u64 *cfg_offset); 193 u64 *cfg_offset);
194static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, 194static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
195 unsigned long *memory_bar); 195 unsigned long *memory_bar);
196 196static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
197 197
198/* performant mode helper functions */ 198/* performant mode helper functions */
199static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, 199static void calc_bucket_map(int *bucket, int num_buckets, int nsgs,
@@ -231,7 +231,7 @@ static const struct block_device_operations cciss_fops = {
231 */ 231 */
232static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) 232static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
233{ 233{
234 if (likely(h->transMethod == CFGTBL_Trans_Performant)) 234 if (likely(h->transMethod & CFGTBL_Trans_Performant))
235 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); 235 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
236} 236}
237 237
@@ -556,6 +556,44 @@ static void __devinit cciss_procinit(ctlr_info_t *h)
556#define to_hba(n) container_of(n, struct ctlr_info, dev) 556#define to_hba(n) container_of(n, struct ctlr_info, dev)
557#define to_drv(n) container_of(n, drive_info_struct, dev) 557#define to_drv(n) container_of(n, drive_info_struct, dev)
558 558
559/* List of controllers which cannot be reset on kexec with reset_devices */
560static u32 unresettable_controller[] = {
561 0x324a103C, /* Smart Array P712m */
562 0x324b103C, /* SmartArray P711m */
563 0x3223103C, /* Smart Array P800 */
564 0x3234103C, /* Smart Array P400 */
565 0x3235103C, /* Smart Array P400i */
566 0x3211103C, /* Smart Array E200i */
567 0x3212103C, /* Smart Array E200 */
568 0x3213103C, /* Smart Array E200i */
569 0x3214103C, /* Smart Array E200i */
570 0x3215103C, /* Smart Array E200i */
571 0x3237103C, /* Smart Array E500 */
572 0x323D103C, /* Smart Array P700m */
573 0x409C0E11, /* Smart Array 6400 */
574 0x409D0E11, /* Smart Array 6400 EM */
575};
576
577static int ctlr_is_resettable(struct ctlr_info *h)
578{
579 int i;
580
581 for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
582 if (unresettable_controller[i] == h->board_id)
583 return 0;
584 return 1;
585}
586
587static ssize_t host_show_resettable(struct device *dev,
588 struct device_attribute *attr,
589 char *buf)
590{
591 struct ctlr_info *h = to_hba(dev);
592
593 return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
594}
595static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
596
559static ssize_t host_store_rescan(struct device *dev, 597static ssize_t host_store_rescan(struct device *dev,
560 struct device_attribute *attr, 598 struct device_attribute *attr,
561 const char *buf, size_t count) 599 const char *buf, size_t count)
@@ -741,6 +779,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
741 779
742static struct attribute *cciss_host_attrs[] = { 780static struct attribute *cciss_host_attrs[] = {
743 &dev_attr_rescan.attr, 781 &dev_attr_rescan.attr,
782 &dev_attr_resettable.attr,
744 NULL 783 NULL
745}; 784};
746 785
@@ -973,8 +1012,8 @@ static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
973 temp64.val32.upper = c->ErrDesc.Addr.upper; 1012 temp64.val32.upper = c->ErrDesc.Addr.upper;
974 pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct), 1013 pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
975 c->err_info, (dma_addr_t) temp64.val); 1014 c->err_info, (dma_addr_t) temp64.val);
976 pci_free_consistent(h->pdev, sizeof(CommandList_struct), 1015 pci_free_consistent(h->pdev, sizeof(CommandList_struct), c,
977 c, (dma_addr_t) c->busaddr); 1016 (dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr));
978} 1017}
979 1018
980static inline ctlr_info_t *get_host(struct gendisk *disk) 1019static inline ctlr_info_t *get_host(struct gendisk *disk)
@@ -1490,8 +1529,7 @@ static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
1490 return -EINVAL; 1529 return -EINVAL;
1491 if (!capable(CAP_SYS_RAWIO)) 1530 if (!capable(CAP_SYS_RAWIO))
1492 return -EPERM; 1531 return -EPERM;
1493 ioc = (BIG_IOCTL_Command_struct *) 1532 ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
1494 kmalloc(sizeof(*ioc), GFP_KERNEL);
1495 if (!ioc) { 1533 if (!ioc) {
1496 status = -ENOMEM; 1534 status = -ENOMEM;
1497 goto cleanup1; 1535 goto cleanup1;
@@ -2653,6 +2691,10 @@ static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
2653 c->Request.CDB[0]); 2691 c->Request.CDB[0]);
2654 return_status = IO_NEEDS_RETRY; 2692 return_status = IO_NEEDS_RETRY;
2655 break; 2693 break;
2694 case CMD_UNABORTABLE:
2695 dev_warn(&h->pdev->dev, "cmd unabortable\n");
2696 return_status = IO_ERROR;
2697 break;
2656 default: 2698 default:
2657 dev_warn(&h->pdev->dev, "cmd 0x%02x returned " 2699 dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
2658 "unknown status %x\n", c->Request.CDB[0], 2700 "unknown status %x\n", c->Request.CDB[0],
@@ -3103,6 +3145,13 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3103 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3145 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
3104 DID_PASSTHROUGH : DID_ERROR); 3146 DID_PASSTHROUGH : DID_ERROR);
3105 break; 3147 break;
3148 case CMD_UNABORTABLE:
3149 dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
3150 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3151 cmd->err_info->CommandStatus, DRIVER_OK,
3152 cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ?
3153 DID_PASSTHROUGH : DID_ERROR);
3154 break;
3106 default: 3155 default:
3107 dev_warn(&h->pdev->dev, "cmd %p returned " 3156 dev_warn(&h->pdev->dev, "cmd %p returned "
3108 "unknown status %x\n", cmd, 3157 "unknown status %x\n", cmd,
@@ -3136,10 +3185,13 @@ static inline u32 cciss_tag_to_index(u32 tag)
3136 return tag >> DIRECT_LOOKUP_SHIFT; 3185 return tag >> DIRECT_LOOKUP_SHIFT;
3137} 3186}
3138 3187
3139static inline u32 cciss_tag_discard_error_bits(u32 tag) 3188static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag)
3140{ 3189{
3141#define CCISS_ERROR_BITS 0x03 3190#define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1)
3142 return tag & ~CCISS_ERROR_BITS; 3191#define CCISS_SIMPLE_ERROR_BITS 0x03
3192 if (likely(h->transMethod & CFGTBL_Trans_Performant))
3193 return tag & ~CCISS_PERF_ERROR_BITS;
3194 return tag & ~CCISS_SIMPLE_ERROR_BITS;
3143} 3195}
3144 3196
3145static inline void cciss_mark_tag_indexed(u32 *tag) 3197static inline void cciss_mark_tag_indexed(u32 *tag)
@@ -3359,7 +3411,7 @@ static inline u32 next_command(ctlr_info_t *h)
3359{ 3411{
3360 u32 a; 3412 u32 a;
3361 3413
3362 if (unlikely(h->transMethod != CFGTBL_Trans_Performant)) 3414 if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
3363 return h->access.command_completed(h); 3415 return h->access.command_completed(h);
3364 3416
3365 if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { 3417 if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
@@ -3394,14 +3446,12 @@ static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
3394/* process completion of a non-indexed command */ 3446/* process completion of a non-indexed command */
3395static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) 3447static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
3396{ 3448{
3397 u32 tag;
3398 CommandList_struct *c = NULL; 3449 CommandList_struct *c = NULL;
3399 __u32 busaddr_masked, tag_masked; 3450 __u32 busaddr_masked, tag_masked;
3400 3451
3401 tag = cciss_tag_discard_error_bits(raw_tag); 3452 tag_masked = cciss_tag_discard_error_bits(h, raw_tag);
3402 list_for_each_entry(c, &h->cmpQ, list) { 3453 list_for_each_entry(c, &h->cmpQ, list) {
3403 busaddr_masked = cciss_tag_discard_error_bits(c->busaddr); 3454 busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr);
3404 tag_masked = cciss_tag_discard_error_bits(tag);
3405 if (busaddr_masked == tag_masked) { 3455 if (busaddr_masked == tag_masked) {
3406 finish_cmd(h, c, raw_tag); 3456 finish_cmd(h, c, raw_tag);
3407 return next_command(h); 3457 return next_command(h);
@@ -3753,7 +3803,8 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
3753 } 3803 }
3754} 3804}
3755 3805
3756static __devinit void cciss_enter_performant_mode(ctlr_info_t *h) 3806static __devinit void cciss_enter_performant_mode(ctlr_info_t *h,
3807 u32 use_short_tags)
3757{ 3808{
3758 /* This is a bit complicated. There are 8 registers on 3809 /* This is a bit complicated. There are 8 registers on
3759 * the controller which we write to to tell it 8 different 3810 * the controller which we write to to tell it 8 different
@@ -3808,7 +3859,7 @@ static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
3808 writel(0, &h->transtable->RepQCtrAddrHigh32); 3859 writel(0, &h->transtable->RepQCtrAddrHigh32);
3809 writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); 3860 writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
3810 writel(0, &h->transtable->RepQAddr0High32); 3861 writel(0, &h->transtable->RepQAddr0High32);
3811 writel(CFGTBL_Trans_Performant, 3862 writel(CFGTBL_Trans_Performant | use_short_tags,
3812 &(h->cfgtable->HostWrite.TransportRequest)); 3863 &(h->cfgtable->HostWrite.TransportRequest));
3813 3864
3814 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); 3865 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
@@ -3855,7 +3906,8 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
3855 if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) 3906 if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
3856 goto clean_up; 3907 goto clean_up;
3857 3908
3858 cciss_enter_performant_mode(h); 3909 cciss_enter_performant_mode(h,
3910 trans_support & CFGTBL_Trans_use_short_tags);
3859 3911
3860 /* Change the access methods to the performant access methods */ 3912 /* Change the access methods to the performant access methods */
3861 h->access = SA5_performant_access; 3913 h->access = SA5_performant_access;
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 579f74918493..554bbd907d14 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -222,6 +222,7 @@ static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c)
222 h->ctlr, c->busaddr); 222 h->ctlr, c->busaddr);
223#endif /* CCISS_DEBUG */ 223#endif /* CCISS_DEBUG */
224 writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); 224 writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
225 readl(h->vaddr + SA5_REQUEST_PORT_OFFSET);
225 h->commands_outstanding++; 226 h->commands_outstanding++;
226 if ( h->commands_outstanding > h->max_outstanding) 227 if ( h->commands_outstanding > h->max_outstanding)
227 h->max_outstanding = h->commands_outstanding; 228 h->max_outstanding = h->commands_outstanding;
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 35463d2f0ee7..cd441bef031f 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -56,6 +56,7 @@
56 56
57#define CFGTBL_Trans_Simple 0x00000002l 57#define CFGTBL_Trans_Simple 0x00000002l
58#define CFGTBL_Trans_Performant 0x00000004l 58#define CFGTBL_Trans_Performant 0x00000004l
59#define CFGTBL_Trans_use_short_tags 0x20000000l
59 60
60#define CFGTBL_BusType_Ultra2 0x00000001l 61#define CFGTBL_BusType_Ultra2 0x00000001l
61#define CFGTBL_BusType_Ultra3 0x00000002l 62#define CFGTBL_BusType_Ultra3 0x00000002l
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 727d0225b7d0..df793803f5ae 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -824,13 +824,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
824 break; 824 break;
825 case CMD_UNSOLICITED_ABORT: 825 case CMD_UNSOLICITED_ABORT:
826 cmd->result = DID_ABORT << 16; 826 cmd->result = DID_ABORT << 16;
827 dev_warn(&h->pdev->dev, "%p aborted do to an " 827 dev_warn(&h->pdev->dev, "%p aborted due to an "
828 "unsolicited abort\n", c); 828 "unsolicited abort\n", c);
829 break; 829 break;
830 case CMD_TIMEOUT: 830 case CMD_TIMEOUT:
831 cmd->result = DID_TIME_OUT << 16; 831 cmd->result = DID_TIME_OUT << 16;
832 dev_warn(&h->pdev->dev, "%p timedout\n", c); 832 dev_warn(&h->pdev->dev, "%p timedout\n", c);
833 break; 833 break;
834 case CMD_UNABORTABLE:
835 cmd->result = DID_ERROR << 16;
836 dev_warn(&h->pdev->dev, "c %p command "
837 "unabortable\n", c);
838 break;
834 default: 839 default:
835 cmd->result = DID_ERROR << 16; 840 cmd->result = DID_ERROR << 16;
836 dev_warn(&h->pdev->dev, 841 dev_warn(&h->pdev->dev,
@@ -1007,11 +1012,15 @@ cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
1007 break; 1012 break;
1008 case CMD_UNSOLICITED_ABORT: 1013 case CMD_UNSOLICITED_ABORT:
1009 dev_warn(&h->pdev->dev, 1014 dev_warn(&h->pdev->dev,
1010 "%p aborted do to an unsolicited abort\n", c); 1015 "%p aborted due to an unsolicited abort\n", c);
1011 break; 1016 break;
1012 case CMD_TIMEOUT: 1017 case CMD_TIMEOUT:
1013 dev_warn(&h->pdev->dev, "%p timedout\n", c); 1018 dev_warn(&h->pdev->dev, "%p timedout\n", c);
1014 break; 1019 break;
1020 case CMD_UNABORTABLE:
1021 dev_warn(&h->pdev->dev,
1022 "%p unabortable\n", c);
1023 break;
1015 default: 1024 default:
1016 dev_warn(&h->pdev->dev, 1025 dev_warn(&h->pdev->dev,
1017 "%p returned unknown status %x\n", 1026 "%p returned unknown status %x\n",
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index aca302492ff2..2a1642bc451d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -92,7 +92,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
92 bio->bi_end_io = drbd_md_io_complete; 92 bio->bi_end_io = drbd_md_io_complete;
93 bio->bi_rw = rw; 93 bio->bi_rw = rw;
94 94
95 if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 95 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
96 bio_endio(bio, -EIO); 96 bio_endio(bio, -EIO);
97 else 97 else
98 submit_bio(rw, bio); 98 submit_bio(rw, bio);
@@ -176,13 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
176 struct lc_element *al_ext; 176 struct lc_element *al_ext;
177 struct lc_element *tmp; 177 struct lc_element *tmp;
178 unsigned long al_flags = 0; 178 unsigned long al_flags = 0;
179 int wake;
179 180
180 spin_lock_irq(&mdev->al_lock); 181 spin_lock_irq(&mdev->al_lock);
181 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); 182 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
182 if (unlikely(tmp != NULL)) { 183 if (unlikely(tmp != NULL)) {
183 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 184 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
184 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 185 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
186 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
185 spin_unlock_irq(&mdev->al_lock); 187 spin_unlock_irq(&mdev->al_lock);
188 if (wake)
189 wake_up(&mdev->al_wait);
186 return NULL; 190 return NULL;
187 } 191 }
188 } 192 }
@@ -258,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
258 spin_unlock_irqrestore(&mdev->al_lock, flags); 262 spin_unlock_irqrestore(&mdev->al_lock, flags);
259} 263}
260 264
265#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
266/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
267 * are still coupled, or assume too much about their relation.
268 * Code below will not work if this is violated.
269 * Will be cleaned up with some followup patch.
270 */
271# error FIXME
272#endif
273
274static unsigned int al_extent_to_bm_page(unsigned int al_enr)
275{
276 return al_enr >>
277 /* bit to page */
278 ((PAGE_SHIFT + 3) -
279 /* al extent number to bit */
280 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
281}
282
283static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
284{
285 return rs_enr >>
286 /* bit to page */
287 ((PAGE_SHIFT + 3) -
288 /* al extent number to bit */
289 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
290}
291
261int 292int
262w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) 293w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
263{ 294{
@@ -285,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
285 * For now, we must not write the transaction, 316 * For now, we must not write the transaction,
286 * if we cannot write out the bitmap of the evicted extent. */ 317 * if we cannot write out the bitmap of the evicted extent. */
287 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) 318 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
288 drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); 319 drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
289 320
290 /* The bitmap write may have failed, causing a state change. */ 321 /* The bitmap write may have failed, causing a state change. */
291 if (mdev->state.disk < D_INCONSISTENT) { 322 if (mdev->state.disk < D_INCONSISTENT) {
@@ -334,7 +365,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
334 + mdev->ldev->md.al_offset + mdev->al_tr_pos; 365 + mdev->ldev->md.al_offset + mdev->al_tr_pos;
335 366
336 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) 367 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
337 drbd_chk_io_error(mdev, 1, TRUE); 368 drbd_chk_io_error(mdev, 1, true);
338 369
339 if (++mdev->al_tr_pos > 370 if (++mdev->al_tr_pos >
340 div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) 371 div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
@@ -511,225 +542,6 @@ cancel:
511 return 1; 542 return 1;
512} 543}
513 544
514static void atodb_endio(struct bio *bio, int error)
515{
516 struct drbd_atodb_wait *wc = bio->bi_private;
517 struct drbd_conf *mdev = wc->mdev;
518 struct page *page;
519 int uptodate = bio_flagged(bio, BIO_UPTODATE);
520
521 /* strange behavior of some lower level drivers...
522 * fail the request by clearing the uptodate flag,
523 * but do not return any error?! */
524 if (!error && !uptodate)
525 error = -EIO;
526
527 drbd_chk_io_error(mdev, error, TRUE);
528 if (error && wc->error == 0)
529 wc->error = error;
530
531 if (atomic_dec_and_test(&wc->count))
532 complete(&wc->io_done);
533
534 page = bio->bi_io_vec[0].bv_page;
535 put_page(page);
536 bio_put(bio);
537 mdev->bm_writ_cnt++;
538 put_ldev(mdev);
539}
540
541/* sector to word */
542#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
543
544/* activity log to on disk bitmap -- prepare bio unless that sector
545 * is already covered by previously prepared bios */
546static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
547 struct bio **bios,
548 unsigned int enr,
549 struct drbd_atodb_wait *wc) __must_hold(local)
550{
551 struct bio *bio;
552 struct page *page;
553 sector_t on_disk_sector;
554 unsigned int page_offset = PAGE_SIZE;
555 int offset;
556 int i = 0;
557 int err = -ENOMEM;
558
559 /* We always write aligned, full 4k blocks,
560 * so we can ignore the logical_block_size (for now) */
561 enr &= ~7U;
562 on_disk_sector = enr + mdev->ldev->md.md_offset
563 + mdev->ldev->md.bm_offset;
564
565 D_ASSERT(!(on_disk_sector & 7U));
566
567 /* Check if that enr is already covered by an already created bio.
568 * Caution, bios[] is not NULL terminated,
569 * but only initialized to all NULL.
570 * For completely scattered activity log,
571 * the last invocation iterates over all bios,
572 * and finds the last NULL entry.
573 */
574 while ((bio = bios[i])) {
575 if (bio->bi_sector == on_disk_sector)
576 return 0;
577 i++;
578 }
579 /* bios[i] == NULL, the next not yet used slot */
580
581 /* GFP_KERNEL, we are not in the write-out path */
582 bio = bio_alloc(GFP_KERNEL, 1);
583 if (bio == NULL)
584 return -ENOMEM;
585
586 if (i > 0) {
587 const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
588 page_offset = prev_bv->bv_offset + prev_bv->bv_len;
589 page = prev_bv->bv_page;
590 }
591 if (page_offset == PAGE_SIZE) {
592 page = alloc_page(__GFP_HIGHMEM);
593 if (page == NULL)
594 goto out_bio_put;
595 page_offset = 0;
596 } else {
597 get_page(page);
598 }
599
600 offset = S2W(enr);
601 drbd_bm_get_lel(mdev, offset,
602 min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
603 kmap(page) + page_offset);
604 kunmap(page);
605
606 bio->bi_private = wc;
607 bio->bi_end_io = atodb_endio;
608 bio->bi_bdev = mdev->ldev->md_bdev;
609 bio->bi_sector = on_disk_sector;
610
611 if (bio_add_page(bio, page, 4096, page_offset) != 4096)
612 goto out_put_page;
613
614 atomic_inc(&wc->count);
615 /* we already know that we may do this...
616 * get_ldev_if_state(mdev,D_ATTACHING);
617 * just get the extra reference, so that the local_cnt reflects
618 * the number of pending IO requests DRBD at its backing device.
619 */
620 atomic_inc(&mdev->local_cnt);
621
622 bios[i] = bio;
623
624 return 0;
625
626out_put_page:
627 err = -EINVAL;
628 put_page(page);
629out_bio_put:
630 bio_put(bio);
631 return err;
632}
633
634/**
635 * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents
636 * @mdev: DRBD device.
637 *
638 * Called when we detach (unconfigure) local storage,
639 * or when we go from R_PRIMARY to R_SECONDARY role.
640 */
641void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
642{
643 int i, nr_elements;
644 unsigned int enr;
645 struct bio **bios;
646 struct drbd_atodb_wait wc;
647
648 ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
649 return; /* sorry, I don't have any act_log etc... */
650
651 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
652
653 nr_elements = mdev->act_log->nr_elements;
654
655 /* GFP_KERNEL, we are not in anyone's write-out path */
656 bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
657 if (!bios)
658 goto submit_one_by_one;
659
660 atomic_set(&wc.count, 0);
661 init_completion(&wc.io_done);
662 wc.mdev = mdev;
663 wc.error = 0;
664
665 for (i = 0; i < nr_elements; i++) {
666 enr = lc_element_by_index(mdev->act_log, i)->lc_number;
667 if (enr == LC_FREE)
668 continue;
669 /* next statement also does atomic_inc wc.count and local_cnt */
670 if (atodb_prepare_unless_covered(mdev, bios,
671 enr/AL_EXT_PER_BM_SECT,
672 &wc))
673 goto free_bios_submit_one_by_one;
674 }
675
676 /* unnecessary optimization? */
677 lc_unlock(mdev->act_log);
678 wake_up(&mdev->al_wait);
679
680 /* all prepared, submit them */
681 for (i = 0; i < nr_elements; i++) {
682 if (bios[i] == NULL)
683 break;
684 if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
685 bios[i]->bi_rw = WRITE;
686 bio_endio(bios[i], -EIO);
687 } else {
688 submit_bio(WRITE, bios[i]);
689 }
690 }
691
692 /* always (try to) flush bitmap to stable storage */
693 drbd_md_flush(mdev);
694
695 /* In case we did not submit a single IO do not wait for
696 * them to complete. ( Because we would wait forever here. )
697 *
698 * In case we had IOs and they are already complete, there
699 * is not point in waiting anyways.
700 * Therefore this if () ... */
701 if (atomic_read(&wc.count))
702 wait_for_completion(&wc.io_done);
703
704 put_ldev(mdev);
705
706 kfree(bios);
707 return;
708
709 free_bios_submit_one_by_one:
710 /* free everything by calling the endio callback directly. */
711 for (i = 0; i < nr_elements && bios[i]; i++)
712 bio_endio(bios[i], 0);
713
714 kfree(bios);
715
716 submit_one_by_one:
717 dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
718
719 for (i = 0; i < mdev->act_log->nr_elements; i++) {
720 enr = lc_element_by_index(mdev->act_log, i)->lc_number;
721 if (enr == LC_FREE)
722 continue;
723 /* Really slow: if we have al-extents 16..19 active,
724 * sector 4 will be written four times! Synchronous! */
725 drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
726 }
727
728 lc_unlock(mdev->act_log);
729 wake_up(&mdev->al_wait);
730 put_ldev(mdev);
731}
732
733/** 545/**
734 * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents 546 * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
735 * @mdev: DRBD device. 547 * @mdev: DRBD device.
@@ -809,7 +621,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
809 return 1; 621 return 1;
810 } 622 }
811 623
812 drbd_bm_write_sect(mdev, udw->enr); 624 drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
813 put_ldev(mdev); 625 put_ldev(mdev);
814 626
815 kfree(udw); 627 kfree(udw);
@@ -889,7 +701,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
889 dev_warn(DEV, "Kicking resync_lru element enr=%u " 701 dev_warn(DEV, "Kicking resync_lru element enr=%u "
890 "out with rs_failed=%d\n", 702 "out with rs_failed=%d\n",
891 ext->lce.lc_number, ext->rs_failed); 703 ext->lce.lc_number, ext->rs_failed);
892 set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
893 } 704 }
894 ext->rs_left = rs_left; 705 ext->rs_left = rs_left;
895 ext->rs_failed = success ? 0 : count; 706 ext->rs_failed = success ? 0 : count;
@@ -908,7 +719,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
908 drbd_queue_work_front(&mdev->data.work, &udw->w); 719 drbd_queue_work_front(&mdev->data.work, &udw->w);
909 } else { 720 } else {
910 dev_warn(DEV, "Could not kmalloc an udw\n"); 721 dev_warn(DEV, "Could not kmalloc an udw\n");
911 set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
912 } 722 }
913 } 723 }
914 } else { 724 } else {
@@ -919,6 +729,22 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
919 } 729 }
920} 730}
921 731
732void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
733{
734 unsigned long now = jiffies;
735 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
736 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
737 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
738 if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
739 mdev->state.conn != C_PAUSED_SYNC_T &&
740 mdev->state.conn != C_PAUSED_SYNC_S) {
741 mdev->rs_mark_time[next] = now;
742 mdev->rs_mark_left[next] = still_to_go;
743 mdev->rs_last_mark = next;
744 }
745 }
746}
747
922/* clear the bit corresponding to the piece of storage in question: 748/* clear the bit corresponding to the piece of storage in question:
923 * size byte of data starting from sector. Only clear a bits of the affected 749 * size byte of data starting from sector. Only clear a bits of the affected
924 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 750 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
@@ -936,7 +762,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
936 int wake_up = 0; 762 int wake_up = 0;
937 unsigned long flags; 763 unsigned long flags;
938 764
939 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 765 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
940 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 766 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
941 (unsigned long long)sector, size); 767 (unsigned long long)sector, size);
942 return; 768 return;
@@ -969,21 +795,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
969 */ 795 */
970 count = drbd_bm_clear_bits(mdev, sbnr, ebnr); 796 count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
971 if (count && get_ldev(mdev)) { 797 if (count && get_ldev(mdev)) {
972 unsigned long now = jiffies; 798 drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
973 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
974 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
975 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
976 unsigned long tw = drbd_bm_total_weight(mdev);
977 if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
978 mdev->state.conn != C_PAUSED_SYNC_T &&
979 mdev->state.conn != C_PAUSED_SYNC_S) {
980 mdev->rs_mark_time[next] = now;
981 mdev->rs_mark_left[next] = tw;
982 mdev->rs_last_mark = next;
983 }
984 }
985 spin_lock_irqsave(&mdev->al_lock, flags); 799 spin_lock_irqsave(&mdev->al_lock, flags);
986 drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); 800 drbd_try_clear_on_disk_bm(mdev, sector, count, true);
987 spin_unlock_irqrestore(&mdev->al_lock, flags); 801 spin_unlock_irqrestore(&mdev->al_lock, flags);
988 802
989 /* just wake_up unconditional now, various lc_chaged(), 803 /* just wake_up unconditional now, various lc_chaged(),
@@ -998,27 +812,27 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
998/* 812/*
999 * this is intended to set one request worth of data out of sync. 813 * this is intended to set one request worth of data out of sync.
1000 * affects at least 1 bit, 814 * affects at least 1 bit,
1001 * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. 815 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
1002 * 816 *
1003 * called by tl_clear and drbd_send_dblock (==drbd_make_request). 817 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
1004 * so this can be _any_ process. 818 * so this can be _any_ process.
1005 */ 819 */
1006void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, 820int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
1007 const char *file, const unsigned int line) 821 const char *file, const unsigned int line)
1008{ 822{
1009 unsigned long sbnr, ebnr, lbnr, flags; 823 unsigned long sbnr, ebnr, lbnr, flags;
1010 sector_t esector, nr_sectors; 824 sector_t esector, nr_sectors;
1011 unsigned int enr, count; 825 unsigned int enr, count = 0;
1012 struct lc_element *e; 826 struct lc_element *e;
1013 827
1014 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 828 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
1015 dev_err(DEV, "sector: %llus, size: %d\n", 829 dev_err(DEV, "sector: %llus, size: %d\n",
1016 (unsigned long long)sector, size); 830 (unsigned long long)sector, size);
1017 return; 831 return 0;
1018 } 832 }
1019 833
1020 if (!get_ldev(mdev)) 834 if (!get_ldev(mdev))
1021 return; /* no disk, no metadata, no bitmap to set bits in */ 835 return 0; /* no disk, no metadata, no bitmap to set bits in */
1022 836
1023 nr_sectors = drbd_get_capacity(mdev->this_bdev); 837 nr_sectors = drbd_get_capacity(mdev->this_bdev);
1024 esector = sector + (size >> 9) - 1; 838 esector = sector + (size >> 9) - 1;
@@ -1048,6 +862,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
1048 862
1049out: 863out:
1050 put_ldev(mdev); 864 put_ldev(mdev);
865
866 return count;
1051} 867}
1052 868
1053static 869static
@@ -1128,7 +944,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1128 unsigned int enr = BM_SECT_TO_EXT(sector); 944 unsigned int enr = BM_SECT_TO_EXT(sector);
1129 struct bm_extent *bm_ext; 945 struct bm_extent *bm_ext;
1130 int i, sig; 946 int i, sig;
947 int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
948 200 times -> 20 seconds. */
1131 949
950retry:
1132 sig = wait_event_interruptible(mdev->al_wait, 951 sig = wait_event_interruptible(mdev->al_wait,
1133 (bm_ext = _bme_get(mdev, enr))); 952 (bm_ext = _bme_get(mdev, enr)));
1134 if (sig) 953 if (sig)
@@ -1139,16 +958,25 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1139 958
1140 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 959 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
1141 sig = wait_event_interruptible(mdev->al_wait, 960 sig = wait_event_interruptible(mdev->al_wait,
1142 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); 961 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
1143 if (sig) { 962 test_bit(BME_PRIORITY, &bm_ext->flags));
963
964 if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
1144 spin_lock_irq(&mdev->al_lock); 965 spin_lock_irq(&mdev->al_lock);
1145 if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 966 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
1146 clear_bit(BME_NO_WRITES, &bm_ext->flags); 967 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
1147 mdev->resync_locked--; 968 mdev->resync_locked--;
1148 wake_up(&mdev->al_wait); 969 wake_up(&mdev->al_wait);
1149 } 970 }
1150 spin_unlock_irq(&mdev->al_lock); 971 spin_unlock_irq(&mdev->al_lock);
1151 return -EINTR; 972 if (sig)
973 return -EINTR;
974 if (schedule_timeout_interruptible(HZ/10))
975 return -EINTR;
976 if (sa && --sa == 0)
977 dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
978 "Resync stalled?\n");
979 goto retry;
1152 } 980 }
1153 } 981 }
1154 set_bit(BME_LOCKED, &bm_ext->flags); 982 set_bit(BME_LOCKED, &bm_ext->flags);
@@ -1291,8 +1119,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
1291 } 1119 }
1292 1120
1293 if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1121 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
1294 clear_bit(BME_LOCKED, &bm_ext->flags); 1122 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
1295 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1296 mdev->resync_locked--; 1123 mdev->resync_locked--;
1297 wake_up(&mdev->al_wait); 1124 wake_up(&mdev->al_wait);
1298 } 1125 }
@@ -1383,7 +1210,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1383 sector_t esector, nr_sectors; 1210 sector_t esector, nr_sectors;
1384 int wake_up = 0; 1211 int wake_up = 0;
1385 1212
1386 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 1213 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
1387 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1214 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1388 (unsigned long long)sector, size); 1215 (unsigned long long)sector, size);
1389 return; 1216 return;
@@ -1420,7 +1247,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1420 mdev->rs_failed += count; 1247 mdev->rs_failed += count;
1421 1248
1422 if (get_ldev(mdev)) { 1249 if (get_ldev(mdev)) {
1423 drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); 1250 drbd_try_clear_on_disk_bm(mdev, sector, count, false);
1424 put_ldev(mdev); 1251 put_ldev(mdev);
1425 } 1252 }
1426 1253
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 0645ca829a94..f0ae63d2df65 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -28,18 +28,58 @@
28#include <linux/drbd.h> 28#include <linux/drbd.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <asm/kmap_types.h> 30#include <asm/kmap_types.h>
31
32#include <asm-generic/bitops/le.h>
33
31#include "drbd_int.h" 34#include "drbd_int.h"
32 35
36
33/* OPAQUE outside this file! 37/* OPAQUE outside this file!
34 * interface defined in drbd_int.h 38 * interface defined in drbd_int.h
35 39
36 * convention: 40 * convention:
37 * function name drbd_bm_... => used elsewhere, "public". 41 * function name drbd_bm_... => used elsewhere, "public".
38 * function name bm_... => internal to implementation, "private". 42 * function name bm_... => internal to implementation, "private".
43 */
44
45
46/*
47 * LIMITATIONS:
48 * We want to support >= peta byte of backend storage, while for now still using
49 * a granularity of one bit per 4KiB of storage.
50 * 1 << 50 bytes backend storage (1 PiB)
51 * 1 << (50 - 12) bits needed
52 * 38 --> we need u64 to index and count bits
53 * 1 << (38 - 3) bitmap bytes needed
54 * 35 --> we still need u64 to index and count bytes
55 * (that's 32 GiB of bitmap for 1 PiB storage)
56 * 1 << (35 - 2) 32bit longs needed
57 * 33 --> we'd even need u64 to index and count 32bit long words.
58 * 1 << (35 - 3) 64bit longs needed
59 * 32 --> we could get away with a 32bit unsigned int to index and count
60 * 64bit long words, but I rather stay with unsigned long for now.
61 * We probably should neither count nor point to bytes or long words
62 * directly, but either by bitnumber, or by page index and offset.
63 * 1 << (35 - 12)
64 * 22 --> we need that much 4KiB pages of bitmap.
65 * 1 << (22 + 3) --> on a 64bit arch,
66 * we need 32 MiB to store the array of page pointers.
67 *
68 * Because I'm lazy, and because the resulting patch was too large, too ugly
69 * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
70 * (1 << 32) bits * 4k storage.
71 *
39 72
40 * Note that since find_first_bit returns int, at the current granularity of 73 * bitmap storage and IO:
41 * the bitmap (4KB per byte), this implementation "only" supports up to 74 * Bitmap is stored little endian on disk, and is kept little endian in
42 * 1<<(32+12) == 16 TB... 75 * core memory. Currently we still hold the full bitmap in core as long
76 * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
77 * seems excessive.
78 *
79 * We plan to reduce the amount of in-core bitmap pages by pageing them in
80 * and out against their on-disk location as necessary, but need to make
81 * sure we don't cause too much meta data IO, and must not deadlock in
82 * tight memory situations. This needs some more work.
43 */ 83 */
44 84
45/* 85/*
@@ -55,13 +95,9 @@
55struct drbd_bitmap { 95struct drbd_bitmap {
56 struct page **bm_pages; 96 struct page **bm_pages;
57 spinlock_t bm_lock; 97 spinlock_t bm_lock;
58 /* WARNING unsigned long bm_*: 98
59 * 32bit number of bit offset is just enough for 512 MB bitmap. 99 /* see LIMITATIONS: above */
60 * it will blow up if we make the bitmap bigger... 100
61 * not that it makes much sense to have a bitmap that large,
62 * rather change the granularity to 16k or 64k or something.
63 * (that implies other problems, however...)
64 */
65 unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 101 unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
66 unsigned long bm_bits; 102 unsigned long bm_bits;
67 size_t bm_words; 103 size_t bm_words;
@@ -69,29 +105,18 @@ struct drbd_bitmap {
69 sector_t bm_dev_capacity; 105 sector_t bm_dev_capacity;
70 struct mutex bm_change; /* serializes resize operations */ 106 struct mutex bm_change; /* serializes resize operations */
71 107
72 atomic_t bm_async_io; 108 wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
73 wait_queue_head_t bm_io_wait;
74 109
75 unsigned long bm_flags; 110 enum bm_flag bm_flags;
76 111
77 /* debugging aid, in case we are still racy somewhere */ 112 /* debugging aid, in case we are still racy somewhere */
78 char *bm_why; 113 char *bm_why;
79 struct task_struct *bm_task; 114 struct task_struct *bm_task;
80}; 115};
81 116
82/* definition of bits in bm_flags */
83#define BM_LOCKED 0
84#define BM_MD_IO_ERROR 1
85#define BM_P_VMALLOCED 2
86
87static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 117static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
88 unsigned long e, int val, const enum km_type km); 118 unsigned long e, int val, const enum km_type km);
89 119
90static int bm_is_locked(struct drbd_bitmap *b)
91{
92 return test_bit(BM_LOCKED, &b->bm_flags);
93}
94
95#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 120#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
96static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) 121static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
97{ 122{
@@ -108,7 +133,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
108 b->bm_task == mdev->worker.task ? "worker" : "?"); 133 b->bm_task == mdev->worker.task ? "worker" : "?");
109} 134}
110 135
111void drbd_bm_lock(struct drbd_conf *mdev, char *why) 136void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
112{ 137{
113 struct drbd_bitmap *b = mdev->bitmap; 138 struct drbd_bitmap *b = mdev->bitmap;
114 int trylock_failed; 139 int trylock_failed;
@@ -131,8 +156,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
131 b->bm_task == mdev->worker.task ? "worker" : "?"); 156 b->bm_task == mdev->worker.task ? "worker" : "?");
132 mutex_lock(&b->bm_change); 157 mutex_lock(&b->bm_change);
133 } 158 }
134 if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) 159 if (BM_LOCKED_MASK & b->bm_flags)
135 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); 160 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
161 b->bm_flags |= flags & BM_LOCKED_MASK;
136 162
137 b->bm_why = why; 163 b->bm_why = why;
138 b->bm_task = current; 164 b->bm_task = current;
@@ -146,31 +172,137 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
146 return; 172 return;
147 } 173 }
148 174
149 if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) 175 if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
150 dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); 176 dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
151 177
178 b->bm_flags &= ~BM_LOCKED_MASK;
152 b->bm_why = NULL; 179 b->bm_why = NULL;
153 b->bm_task = NULL; 180 b->bm_task = NULL;
154 mutex_unlock(&b->bm_change); 181 mutex_unlock(&b->bm_change);
155} 182}
156 183
157/* word offset to long pointer */ 184/* we store some "meta" info about our pages in page->private */
158static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) 185/* at a granularity of 4k storage per bitmap bit:
186 * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
187 * 1<<38 bits,
188 * 1<<23 4k bitmap pages.
189 * Use 24 bits as page index, covers 2 peta byte storage
190 * at a granularity of 4k per bit.
191 * Used to report the failed page idx on io error from the endio handlers.
192 */
193#define BM_PAGE_IDX_MASK ((1UL<<24)-1)
194/* this page is currently read in, or written back */
195#define BM_PAGE_IO_LOCK 31
196/* if there has been an IO error for this page */
197#define BM_PAGE_IO_ERROR 30
198/* this is to be able to intelligently skip disk IO,
199 * set if bits have been set since last IO. */
200#define BM_PAGE_NEED_WRITEOUT 29
201/* to mark for lazy writeout once syncer cleared all clearable bits,
202 * we if bits have been cleared since last IO. */
203#define BM_PAGE_LAZY_WRITEOUT 28
204
205/* store_page_idx uses non-atomic assingment. It is only used directly after
206 * allocating the page. All other bm_set_page_* and bm_clear_page_* need to
207 * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
208 * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
209 * requires it all to be atomic as well. */
210static void bm_store_page_idx(struct page *page, unsigned long idx)
159{ 211{
160 struct page *page; 212 BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
161 unsigned long page_nr; 213 page_private(page) |= idx;
214}
215
216static unsigned long bm_page_to_idx(struct page *page)
217{
218 return page_private(page) & BM_PAGE_IDX_MASK;
219}
220
221/* As is very unlikely that the same page is under IO from more than one
222 * context, we can get away with a bit per page and one wait queue per bitmap.
223 */
224static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
225{
226 struct drbd_bitmap *b = mdev->bitmap;
227 void *addr = &page_private(b->bm_pages[page_nr]);
228 wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
229}
230
231static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
232{
233 struct drbd_bitmap *b = mdev->bitmap;
234 void *addr = &page_private(b->bm_pages[page_nr]);
235 clear_bit(BM_PAGE_IO_LOCK, addr);
236 smp_mb__after_clear_bit();
237 wake_up(&mdev->bitmap->bm_io_wait);
238}
239
240/* set _before_ submit_io, so it may be reset due to being changed
241 * while this page is in flight... will get submitted later again */
242static void bm_set_page_unchanged(struct page *page)
243{
244 /* use cmpxchg? */
245 clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
246 clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
247}
162 248
249static void bm_set_page_need_writeout(struct page *page)
250{
251 set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
252}
253
254static int bm_test_page_unchanged(struct page *page)
255{
256 volatile const unsigned long *addr = &page_private(page);
257 return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
258}
259
260static void bm_set_page_io_err(struct page *page)
261{
262 set_bit(BM_PAGE_IO_ERROR, &page_private(page));
263}
264
265static void bm_clear_page_io_err(struct page *page)
266{
267 clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
268}
269
270static void bm_set_page_lazy_writeout(struct page *page)
271{
272 set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
273}
274
275static int bm_test_page_lazy_writeout(struct page *page)
276{
277 return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
278}
279
280/* on a 32bit box, this would allow for exactly (2<<38) bits. */
281static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
282{
163 /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ 283 /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
164 page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); 284 unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
165 BUG_ON(page_nr >= b->bm_number_of_pages); 285 BUG_ON(page_nr >= b->bm_number_of_pages);
166 page = b->bm_pages[page_nr]; 286 return page_nr;
287}
167 288
289static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
290{
291 /* page_nr = (bitnr/8) >> PAGE_SHIFT; */
292 unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
293 BUG_ON(page_nr >= b->bm_number_of_pages);
294 return page_nr;
295}
296
297static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
298{
299 struct page *page = b->bm_pages[idx];
168 return (unsigned long *) kmap_atomic(page, km); 300 return (unsigned long *) kmap_atomic(page, km);
169} 301}
170 302
171static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) 303static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
172{ 304{
173 return __bm_map_paddr(b, offset, KM_IRQ1); 305 return __bm_map_pidx(b, idx, KM_IRQ1);
174} 306}
175 307
176static void __bm_unmap(unsigned long *p_addr, const enum km_type km) 308static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
@@ -202,6 +334,7 @@ static void bm_unmap(unsigned long *p_addr)
202 * to be able to report device specific. 334 * to be able to report device specific.
203 */ 335 */
204 336
337
205static void bm_free_pages(struct page **pages, unsigned long number) 338static void bm_free_pages(struct page **pages, unsigned long number)
206{ 339{
207 unsigned long i; 340 unsigned long i;
@@ -269,6 +402,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
269 bm_vk_free(new_pages, vmalloced); 402 bm_vk_free(new_pages, vmalloced);
270 return NULL; 403 return NULL;
271 } 404 }
405 /* we want to know which page it is
406 * from the endio handlers */
407 bm_store_page_idx(page, i);
272 new_pages[i] = page; 408 new_pages[i] = page;
273 } 409 }
274 } else { 410 } else {
@@ -280,9 +416,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
280 } 416 }
281 417
282 if (vmalloced) 418 if (vmalloced)
283 set_bit(BM_P_VMALLOCED, &b->bm_flags); 419 b->bm_flags |= BM_P_VMALLOCED;
284 else 420 else
285 clear_bit(BM_P_VMALLOCED, &b->bm_flags); 421 b->bm_flags &= ~BM_P_VMALLOCED;
286 422
287 return new_pages; 423 return new_pages;
288} 424}
@@ -319,7 +455,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
319{ 455{
320 ERR_IF (!mdev->bitmap) return; 456 ERR_IF (!mdev->bitmap) return;
321 bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); 457 bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
322 bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); 458 bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
323 kfree(mdev->bitmap); 459 kfree(mdev->bitmap);
324 mdev->bitmap = NULL; 460 mdev->bitmap = NULL;
325} 461}
@@ -329,22 +465,39 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
329 * this masks out the remaining bits. 465 * this masks out the remaining bits.
330 * Returns the number of bits cleared. 466 * Returns the number of bits cleared.
331 */ 467 */
468#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
469#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
470#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
332static int bm_clear_surplus(struct drbd_bitmap *b) 471static int bm_clear_surplus(struct drbd_bitmap *b)
333{ 472{
334 const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; 473 unsigned long mask;
335 size_t w = b->bm_bits >> LN2_BPL;
336 int cleared = 0;
337 unsigned long *p_addr, *bm; 474 unsigned long *p_addr, *bm;
475 int tmp;
476 int cleared = 0;
338 477
339 p_addr = bm_map_paddr(b, w); 478 /* number of bits modulo bits per page */
340 bm = p_addr + MLPP(w); 479 tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
341 if (w < b->bm_words) { 480 /* mask the used bits of the word containing the last bit */
481 mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
482 /* bitmap is always stored little endian,
483 * on disk and in core memory alike */
484 mask = cpu_to_lel(mask);
485
486 p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
487 bm = p_addr + (tmp/BITS_PER_LONG);
488 if (mask) {
489 /* If mask != 0, we are not exactly aligned, so bm now points
490 * to the long containing the last bit.
491 * If mask == 0, bm already points to the word immediately
492 * after the last (long word aligned) bit. */
342 cleared = hweight_long(*bm & ~mask); 493 cleared = hweight_long(*bm & ~mask);
343 *bm &= mask; 494 *bm &= mask;
344 w++; bm++; 495 bm++;
345 } 496 }
346 497
347 if (w < b->bm_words) { 498 if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
499 /* on a 32bit arch, we may need to zero out
500 * a padding long to align with a 64bit remote */
348 cleared += hweight_long(*bm); 501 cleared += hweight_long(*bm);
349 *bm = 0; 502 *bm = 0;
350 } 503 }
@@ -354,66 +507,75 @@ static int bm_clear_surplus(struct drbd_bitmap *b)
354 507
355static void bm_set_surplus(struct drbd_bitmap *b) 508static void bm_set_surplus(struct drbd_bitmap *b)
356{ 509{
357 const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; 510 unsigned long mask;
358 size_t w = b->bm_bits >> LN2_BPL;
359 unsigned long *p_addr, *bm; 511 unsigned long *p_addr, *bm;
360 512 int tmp;
361 p_addr = bm_map_paddr(b, w); 513
362 bm = p_addr + MLPP(w); 514 /* number of bits modulo bits per page */
363 if (w < b->bm_words) { 515 tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
516 /* mask the used bits of the word containing the last bit */
517 mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
518 /* bitmap is always stored little endian,
519 * on disk and in core memory alike */
520 mask = cpu_to_lel(mask);
521
522 p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
523 bm = p_addr + (tmp/BITS_PER_LONG);
524 if (mask) {
525 /* If mask != 0, we are not exactly aligned, so bm now points
526 * to the long containing the last bit.
527 * If mask == 0, bm already points to the word immediately
528 * after the last (long word aligned) bit. */
364 *bm |= ~mask; 529 *bm |= ~mask;
365 bm++; w++; 530 bm++;
366 } 531 }
367 532
368 if (w < b->bm_words) { 533 if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
369 *bm = ~(0UL); 534 /* on a 32bit arch, we may need to zero out
535 * a padding long to align with a 64bit remote */
536 *bm = ~0UL;
370 } 537 }
371 bm_unmap(p_addr); 538 bm_unmap(p_addr);
372} 539}
373 540
374static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) 541/* you better not modify the bitmap while this is running,
542 * or its results will be stale */
543static unsigned long bm_count_bits(struct drbd_bitmap *b)
375{ 544{
376 unsigned long *p_addr, *bm, offset = 0; 545 unsigned long *p_addr;
377 unsigned long bits = 0; 546 unsigned long bits = 0;
378 unsigned long i, do_now; 547 unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
379 548 int idx, i, last_word;
380 while (offset < b->bm_words) { 549
381 i = do_now = min_t(size_t, b->bm_words-offset, LWPP); 550 /* all but last page */
382 p_addr = __bm_map_paddr(b, offset, KM_USER0); 551 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
383 bm = p_addr + MLPP(offset); 552 p_addr = __bm_map_pidx(b, idx, KM_USER0);
384 while (i--) { 553 for (i = 0; i < LWPP; i++)
385#ifndef __LITTLE_ENDIAN 554 bits += hweight_long(p_addr[i]);
386 if (swap_endian)
387 *bm = lel_to_cpu(*bm);
388#endif
389 bits += hweight_long(*bm++);
390 }
391 __bm_unmap(p_addr, KM_USER0); 555 __bm_unmap(p_addr, KM_USER0);
392 offset += do_now;
393 cond_resched(); 556 cond_resched();
394 } 557 }
395 558 /* last (or only) page */
559 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
560 p_addr = __bm_map_pidx(b, idx, KM_USER0);
561 for (i = 0; i < last_word; i++)
562 bits += hweight_long(p_addr[i]);
563 p_addr[last_word] &= cpu_to_lel(mask);
564 bits += hweight_long(p_addr[last_word]);
565 /* 32bit arch, may have an unused padding long */
566 if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
567 p_addr[last_word+1] = 0;
568 __bm_unmap(p_addr, KM_USER0);
396 return bits; 569 return bits;
397} 570}
398 571
399static unsigned long bm_count_bits(struct drbd_bitmap *b)
400{
401 return __bm_count_bits(b, 0);
402}
403
404static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
405{
406 return __bm_count_bits(b, 1);
407}
408
409/* offset and len in long words.*/ 572/* offset and len in long words.*/
410static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) 573static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
411{ 574{
412 unsigned long *p_addr, *bm; 575 unsigned long *p_addr, *bm;
576 unsigned int idx;
413 size_t do_now, end; 577 size_t do_now, end;
414 578
415#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
416
417 end = offset + len; 579 end = offset + len;
418 580
419 if (end > b->bm_words) { 581 if (end > b->bm_words) {
@@ -423,15 +585,16 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
423 585
424 while (offset < end) { 586 while (offset < end) {
425 do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; 587 do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
426 p_addr = bm_map_paddr(b, offset); 588 idx = bm_word_to_page_idx(b, offset);
589 p_addr = bm_map_pidx(b, idx);
427 bm = p_addr + MLPP(offset); 590 bm = p_addr + MLPP(offset);
428 if (bm+do_now > p_addr + LWPP) { 591 if (bm+do_now > p_addr + LWPP) {
429 printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 592 printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
430 p_addr, bm, (int)do_now); 593 p_addr, bm, (int)do_now);
431 break; /* breaks to after catch_oob_access_end() only! */ 594 } else
432 } 595 memset(bm, c, do_now * sizeof(long));
433 memset(bm, c, do_now * sizeof(long));
434 bm_unmap(p_addr); 596 bm_unmap(p_addr);
597 bm_set_page_need_writeout(b->bm_pages[idx]);
435 offset += do_now; 598 offset += do_now;
436 } 599 }
437} 600}
@@ -447,7 +610,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
447int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) 610int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
448{ 611{
449 struct drbd_bitmap *b = mdev->bitmap; 612 struct drbd_bitmap *b = mdev->bitmap;
450 unsigned long bits, words, owords, obits, *p_addr, *bm; 613 unsigned long bits, words, owords, obits;
451 unsigned long want, have, onpages; /* number of pages */ 614 unsigned long want, have, onpages; /* number of pages */
452 struct page **npages, **opages = NULL; 615 struct page **npages, **opages = NULL;
453 int err = 0, growing; 616 int err = 0, growing;
@@ -455,7 +618,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
455 618
456 ERR_IF(!b) return -ENOMEM; 619 ERR_IF(!b) return -ENOMEM;
457 620
458 drbd_bm_lock(mdev, "resize"); 621 drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
459 622
460 dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", 623 dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
461 (unsigned long long)capacity); 624 (unsigned long long)capacity);
@@ -463,7 +626,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
463 if (capacity == b->bm_dev_capacity) 626 if (capacity == b->bm_dev_capacity)
464 goto out; 627 goto out;
465 628
466 opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); 629 opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
467 630
468 if (capacity == 0) { 631 if (capacity == 0) {
469 spin_lock_irq(&b->bm_lock); 632 spin_lock_irq(&b->bm_lock);
@@ -491,18 +654,23 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
491 words = ALIGN(bits, 64) >> LN2_BPL; 654 words = ALIGN(bits, 64) >> LN2_BPL;
492 655
493 if (get_ldev(mdev)) { 656 if (get_ldev(mdev)) {
494 D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); 657 u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
495 put_ldev(mdev); 658 put_ldev(mdev);
659 if (bits > bits_on_disk) {
660 dev_info(DEV, "bits = %lu\n", bits);
661 dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
662 err = -ENOSPC;
663 goto out;
664 }
496 } 665 }
497 666
498 /* one extra long to catch off by one errors */ 667 want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
499 want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
500 have = b->bm_number_of_pages; 668 have = b->bm_number_of_pages;
501 if (want == have) { 669 if (want == have) {
502 D_ASSERT(b->bm_pages != NULL); 670 D_ASSERT(b->bm_pages != NULL);
503 npages = b->bm_pages; 671 npages = b->bm_pages;
504 } else { 672 } else {
505 if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) 673 if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
506 npages = NULL; 674 npages = NULL;
507 else 675 else
508 npages = bm_realloc_pages(b, want); 676 npages = bm_realloc_pages(b, want);
@@ -542,11 +710,6 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
542 bm_free_pages(opages + want, have - want); 710 bm_free_pages(opages + want, have - want);
543 } 711 }
544 712
545 p_addr = bm_map_paddr(b, words);
546 bm = p_addr + MLPP(words);
547 *bm = DRBD_MAGIC;
548 bm_unmap(p_addr);
549
550 (void)bm_clear_surplus(b); 713 (void)bm_clear_surplus(b);
551 714
552 spin_unlock_irq(&b->bm_lock); 715 spin_unlock_irq(&b->bm_lock);
@@ -554,7 +717,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
554 bm_vk_free(opages, opages_vmalloced); 717 bm_vk_free(opages, opages_vmalloced);
555 if (!growing) 718 if (!growing)
556 b->bm_set = bm_count_bits(b); 719 b->bm_set = bm_count_bits(b);
557 dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); 720 dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
558 721
559 out: 722 out:
560 drbd_bm_unlock(mdev); 723 drbd_bm_unlock(mdev);
@@ -624,6 +787,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
624 struct drbd_bitmap *b = mdev->bitmap; 787 struct drbd_bitmap *b = mdev->bitmap;
625 unsigned long *p_addr, *bm; 788 unsigned long *p_addr, *bm;
626 unsigned long word, bits; 789 unsigned long word, bits;
790 unsigned int idx;
627 size_t end, do_now; 791 size_t end, do_now;
628 792
629 end = offset + number; 793 end = offset + number;
@@ -638,16 +802,18 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
638 spin_lock_irq(&b->bm_lock); 802 spin_lock_irq(&b->bm_lock);
639 while (offset < end) { 803 while (offset < end) {
640 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 804 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
641 p_addr = bm_map_paddr(b, offset); 805 idx = bm_word_to_page_idx(b, offset);
806 p_addr = bm_map_pidx(b, idx);
642 bm = p_addr + MLPP(offset); 807 bm = p_addr + MLPP(offset);
643 offset += do_now; 808 offset += do_now;
644 while (do_now--) { 809 while (do_now--) {
645 bits = hweight_long(*bm); 810 bits = hweight_long(*bm);
646 word = *bm | lel_to_cpu(*buffer++); 811 word = *bm | *buffer++;
647 *bm++ = word; 812 *bm++ = word;
648 b->bm_set += hweight_long(word) - bits; 813 b->bm_set += hweight_long(word) - bits;
649 } 814 }
650 bm_unmap(p_addr); 815 bm_unmap(p_addr);
816 bm_set_page_need_writeout(b->bm_pages[idx]);
651 } 817 }
652 /* with 32bit <-> 64bit cross-platform connect 818 /* with 32bit <-> 64bit cross-platform connect
653 * this is only correct for current usage, 819 * this is only correct for current usage,
@@ -656,7 +822,6 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
656 */ 822 */
657 if (end == b->bm_words) 823 if (end == b->bm_words)
658 b->bm_set -= bm_clear_surplus(b); 824 b->bm_set -= bm_clear_surplus(b);
659
660 spin_unlock_irq(&b->bm_lock); 825 spin_unlock_irq(&b->bm_lock);
661} 826}
662 827
@@ -686,11 +851,11 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
686 else { 851 else {
687 while (offset < end) { 852 while (offset < end) {
688 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 853 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
689 p_addr = bm_map_paddr(b, offset); 854 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
690 bm = p_addr + MLPP(offset); 855 bm = p_addr + MLPP(offset);
691 offset += do_now; 856 offset += do_now;
692 while (do_now--) 857 while (do_now--)
693 *buffer++ = cpu_to_lel(*bm++); 858 *buffer++ = *bm++;
694 bm_unmap(p_addr); 859 bm_unmap(p_addr);
695 } 860 }
696 } 861 }
@@ -724,9 +889,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
724 spin_unlock_irq(&b->bm_lock); 889 spin_unlock_irq(&b->bm_lock);
725} 890}
726 891
892struct bm_aio_ctx {
893 struct drbd_conf *mdev;
894 atomic_t in_flight;
895 struct completion done;
896 unsigned flags;
897#define BM_AIO_COPY_PAGES 1
898 int error;
899};
900
901/* bv_page may be a copy, or may be the original */
727static void bm_async_io_complete(struct bio *bio, int error) 902static void bm_async_io_complete(struct bio *bio, int error)
728{ 903{
729 struct drbd_bitmap *b = bio->bi_private; 904 struct bm_aio_ctx *ctx = bio->bi_private;
905 struct drbd_conf *mdev = ctx->mdev;
906 struct drbd_bitmap *b = mdev->bitmap;
907 unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
730 int uptodate = bio_flagged(bio, BIO_UPTODATE); 908 int uptodate = bio_flagged(bio, BIO_UPTODATE);
731 909
732 910
@@ -737,38 +915,83 @@ static void bm_async_io_complete(struct bio *bio, int error)
737 if (!error && !uptodate) 915 if (!error && !uptodate)
738 error = -EIO; 916 error = -EIO;
739 917
918 if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
919 !bm_test_page_unchanged(b->bm_pages[idx]))
920 dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
921
740 if (error) { 922 if (error) {
741 /* doh. what now? 923 /* ctx error will hold the completed-last non-zero error code,
742 * for now, set all bits, and flag MD_IO_ERROR */ 924 * in case error codes differ. */
743 __set_bit(BM_MD_IO_ERROR, &b->bm_flags); 925 ctx->error = error;
926 bm_set_page_io_err(b->bm_pages[idx]);
927 /* Not identical to on disk version of it.
928 * Is BM_PAGE_IO_ERROR enough? */
929 if (__ratelimit(&drbd_ratelimit_state))
930 dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
931 error, idx);
932 } else {
933 bm_clear_page_io_err(b->bm_pages[idx]);
934 dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
744 } 935 }
745 if (atomic_dec_and_test(&b->bm_async_io)) 936
746 wake_up(&b->bm_io_wait); 937 bm_page_unlock_io(mdev, idx);
938
939 /* FIXME give back to page pool */
940 if (ctx->flags & BM_AIO_COPY_PAGES)
941 put_page(bio->bi_io_vec[0].bv_page);
747 942
748 bio_put(bio); 943 bio_put(bio);
944
945 if (atomic_dec_and_test(&ctx->in_flight))
946 complete(&ctx->done);
749} 947}
750 948
751static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) 949static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
752{ 950{
753 /* we are process context. we always get a bio */ 951 /* we are process context. we always get a bio */
754 struct bio *bio = bio_alloc(GFP_KERNEL, 1); 952 struct bio *bio = bio_alloc(GFP_KERNEL, 1);
953 struct drbd_conf *mdev = ctx->mdev;
954 struct drbd_bitmap *b = mdev->bitmap;
955 struct page *page;
755 unsigned int len; 956 unsigned int len;
957
756 sector_t on_disk_sector = 958 sector_t on_disk_sector =
757 mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; 959 mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
758 on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); 960 on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
759 961
760 /* this might happen with very small 962 /* this might happen with very small
761 * flexible external meta data device */ 963 * flexible external meta data device,
964 * or with PAGE_SIZE > 4k */
762 len = min_t(unsigned int, PAGE_SIZE, 965 len = min_t(unsigned int, PAGE_SIZE,
763 (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); 966 (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
764 967
968 /* serialize IO on this page */
969 bm_page_lock_io(mdev, page_nr);
970 /* before memcpy and submit,
971 * so it can be redirtied any time */
972 bm_set_page_unchanged(b->bm_pages[page_nr]);
973
974 if (ctx->flags & BM_AIO_COPY_PAGES) {
975 /* FIXME alloc_page is good enough for now, but actually needs
976 * to use pre-allocated page pool */
977 void *src, *dest;
978 page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
979 dest = kmap_atomic(page, KM_USER0);
980 src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
981 memcpy(dest, src, PAGE_SIZE);
982 kunmap_atomic(src, KM_USER1);
983 kunmap_atomic(dest, KM_USER0);
984 bm_store_page_idx(page, page_nr);
985 } else
986 page = b->bm_pages[page_nr];
987
765 bio->bi_bdev = mdev->ldev->md_bdev; 988 bio->bi_bdev = mdev->ldev->md_bdev;
766 bio->bi_sector = on_disk_sector; 989 bio->bi_sector = on_disk_sector;
767 bio_add_page(bio, b->bm_pages[page_nr], len, 0); 990 bio_add_page(bio, page, len, 0);
768 bio->bi_private = b; 991 bio->bi_private = ctx;
769 bio->bi_end_io = bm_async_io_complete; 992 bio->bi_end_io = bm_async_io_complete;
770 993
771 if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { 994 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
772 bio->bi_rw |= rw; 995 bio->bi_rw |= rw;
773 bio_endio(bio, -EIO); 996 bio_endio(bio, -EIO);
774 } else { 997 } else {
@@ -776,87 +999,84 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int
776 } 999 }
777} 1000}
778 1001
779# if defined(__LITTLE_ENDIAN)
780 /* nothing to do, on disk == in memory */
781# define bm_cpu_to_lel(x) ((void)0)
782# else
783static void bm_cpu_to_lel(struct drbd_bitmap *b)
784{
785 /* need to cpu_to_lel all the pages ...
786 * this may be optimized by using
787 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
788 * the following is still not optimal, but better than nothing */
789 unsigned int i;
790 unsigned long *p_addr, *bm;
791 if (b->bm_set == 0) {
792 /* no page at all; avoid swap if all is 0 */
793 i = b->bm_number_of_pages;
794 } else if (b->bm_set == b->bm_bits) {
795 /* only the last page */
796 i = b->bm_number_of_pages - 1;
797 } else {
798 /* all pages */
799 i = 0;
800 }
801 for (; i < b->bm_number_of_pages; i++) {
802 p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
803 for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
804 *bm = cpu_to_lel(*bm);
805 kunmap_atomic(p_addr, KM_USER0);
806 }
807}
808# endif
809/* lel_to_cpu == cpu_to_lel */
810# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
811
812/* 1002/*
813 * bm_rw: read/write the whole bitmap from/to its on disk location. 1003 * bm_rw: read/write the whole bitmap from/to its on disk location.
814 */ 1004 */
815static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) 1005static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
816{ 1006{
1007 struct bm_aio_ctx ctx = {
1008 .mdev = mdev,
1009 .in_flight = ATOMIC_INIT(1),
1010 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
1011 .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
1012 };
817 struct drbd_bitmap *b = mdev->bitmap; 1013 struct drbd_bitmap *b = mdev->bitmap;
818 /* sector_t sector; */ 1014 int num_pages, i, count = 0;
819 int bm_words, num_pages, i;
820 unsigned long now; 1015 unsigned long now;
821 char ppb[10]; 1016 char ppb[10];
822 int err = 0; 1017 int err = 0;
823 1018
824 WARN_ON(!bm_is_locked(b)); 1019 /*
825 1020 * We are protected against bitmap disappearing/resizing by holding an
826 /* no spinlock here, the drbd_bm_lock should be enough! */ 1021 * ldev reference (caller must have called get_ldev()).
827 1022 * For read/write, we are protected against changes to the bitmap by
828 bm_words = drbd_bm_words(mdev); 1023 * the bitmap lock (see drbd_bitmap_io).
829 num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; 1024 * For lazy writeout, we don't care for ongoing changes to the bitmap,
1025 * as we submit copies of pages anyways.
1026 */
1027 if (!ctx.flags)
1028 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
830 1029
831 /* on disk bitmap is little endian */ 1030 num_pages = b->bm_number_of_pages;
832 if (rw == WRITE)
833 bm_cpu_to_lel(b);
834 1031
835 now = jiffies; 1032 now = jiffies;
836 atomic_set(&b->bm_async_io, num_pages);
837 __clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
838 1033
839 /* let the layers below us try to merge these bios... */ 1034 /* let the layers below us try to merge these bios... */
840 for (i = 0; i < num_pages; i++) 1035 for (i = 0; i < num_pages; i++) {
841 bm_page_io_async(mdev, b, i, rw); 1036 /* ignore completely unchanged pages */
1037 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
1038 break;
1039 if (rw & WRITE) {
1040 if (bm_test_page_unchanged(b->bm_pages[i])) {
1041 dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
1042 continue;
1043 }
1044 /* during lazy writeout,
1045 * ignore those pages not marked for lazy writeout. */
1046 if (lazy_writeout_upper_idx &&
1047 !bm_test_page_lazy_writeout(b->bm_pages[i])) {
1048 dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
1049 continue;
1050 }
1051 }
1052 atomic_inc(&ctx.in_flight);
1053 bm_page_io_async(&ctx, i, rw);
1054 ++count;
1055 cond_resched();
1056 }
842 1057
843 wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); 1058 /*
1059 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
1060 * will not complete() early, and decrement / test it here. If there
1061 * are still some bios in flight, we need to wait for them here.
1062 */
1063 if (!atomic_dec_and_test(&ctx.in_flight))
1064 wait_for_completion(&ctx.done);
1065 dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
1066 rw == WRITE ? "WRITE" : "READ",
1067 count, jiffies - now);
844 1068
845 if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { 1069 if (ctx.error) {
846 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); 1070 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
847 drbd_chk_io_error(mdev, 1, TRUE); 1071 drbd_chk_io_error(mdev, 1, true);
848 err = -EIO; 1072 err = -EIO; /* ctx.error ? */
849 } 1073 }
850 1074
851 now = jiffies; 1075 now = jiffies;
852 if (rw == WRITE) { 1076 if (rw == WRITE) {
853 /* swap back endianness */
854 bm_lel_to_cpu(b);
855 /* flush bitmap to stable storage */
856 drbd_md_flush(mdev); 1077 drbd_md_flush(mdev);
857 } else /* rw == READ */ { 1078 } else /* rw == READ */ {
858 /* just read, if necessary adjust endianness */ 1079 b->bm_set = bm_count_bits(b);
859 b->bm_set = bm_count_bits_swap_endian(b);
860 dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", 1080 dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
861 jiffies - now); 1081 jiffies - now);
862 } 1082 }
@@ -874,112 +1094,128 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
874 */ 1094 */
875int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) 1095int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
876{ 1096{
877 return bm_rw(mdev, READ); 1097 return bm_rw(mdev, READ, 0);
878} 1098}
879 1099
880/** 1100/**
881 * drbd_bm_write() - Write the whole bitmap to its on disk location. 1101 * drbd_bm_write() - Write the whole bitmap to its on disk location.
882 * @mdev: DRBD device. 1102 * @mdev: DRBD device.
1103 *
1104 * Will only write pages that have changed since last IO.
883 */ 1105 */
884int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) 1106int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
885{ 1107{
886 return bm_rw(mdev, WRITE); 1108 return bm_rw(mdev, WRITE, 0);
887} 1109}
888 1110
889/** 1111/**
890 * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap 1112 * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
891 * @mdev: DRBD device. 1113 * @mdev: DRBD device.
892 * @enr: Extent number in the resync lru (happens to be sector offset) 1114 * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
893 *
894 * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
895 * by a single sector write. Therefore enr == sector offset from the
896 * start of the bitmap.
897 */ 1115 */
898int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) 1116int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
899{ 1117{
900 sector_t on_disk_sector = enr + mdev->ldev->md.md_offset 1118 return bm_rw(mdev, WRITE, upper_idx);
901 + mdev->ldev->md.bm_offset; 1119}
902 int bm_words, num_words, offset; 1120
903 int err = 0;
904 1121
905 mutex_lock(&mdev->md_io_mutex); 1122/**
906 bm_words = drbd_bm_words(mdev); 1123 * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
907 offset = S2W(enr); /* word offset into bitmap */ 1124 * @mdev: DRBD device.
908 num_words = min(S2W(1), bm_words - offset); 1125 * @idx: bitmap page index
909 if (num_words < S2W(1)) 1126 *
910 memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); 1127 * We don't want to special case on logical_block_size of the backend device,
911 drbd_bm_get_lel(mdev, offset, num_words, 1128 * so we submit PAGE_SIZE aligned pieces.
912 page_address(mdev->md_io_page)); 1129 * Note that on "most" systems, PAGE_SIZE is 4k.
913 if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { 1130 *
914 int i; 1131 * In case this becomes an issue on systems with larger PAGE_SIZE,
915 err = -EIO; 1132 * we may want to change this again to write 4k aligned 4k pieces.
916 dev_err(DEV, "IO ERROR writing bitmap sector %lu " 1133 */
917 "(meta-disk sector %llus)\n", 1134int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
918 enr, (unsigned long long)on_disk_sector); 1135{
919 drbd_chk_io_error(mdev, 1, TRUE); 1136 struct bm_aio_ctx ctx = {
920 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) 1137 .mdev = mdev,
921 drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); 1138 .in_flight = ATOMIC_INIT(1),
1139 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
1140 .flags = BM_AIO_COPY_PAGES,
1141 };
1142
1143 if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
1144 dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
1145 return 0;
922 } 1146 }
1147
1148 bm_page_io_async(&ctx, idx, WRITE_SYNC);
1149 wait_for_completion(&ctx.done);
1150
1151 if (ctx.error)
1152 drbd_chk_io_error(mdev, 1, true);
1153 /* that should force detach, so the in memory bitmap will be
1154 * gone in a moment as well. */
1155
923 mdev->bm_writ_cnt++; 1156 mdev->bm_writ_cnt++;
924 mutex_unlock(&mdev->md_io_mutex); 1157 return ctx.error;
925 return err;
926} 1158}
927 1159
928/* NOTE 1160/* NOTE
929 * find_first_bit returns int, we return unsigned long. 1161 * find_first_bit returns int, we return unsigned long.
930 * should not make much difference anyways, but ... 1162 * For this to work on 32bit arch with bitnumbers > (1<<32),
1163 * we'd need to return u64, and get a whole lot of other places
1164 * fixed where we still use unsigned long.
931 * 1165 *
932 * this returns a bit number, NOT a sector! 1166 * this returns a bit number, NOT a sector!
933 */ 1167 */
934#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
935static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 1168static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
936 const int find_zero_bit, const enum km_type km) 1169 const int find_zero_bit, const enum km_type km)
937{ 1170{
938 struct drbd_bitmap *b = mdev->bitmap; 1171 struct drbd_bitmap *b = mdev->bitmap;
939 unsigned long i = -1UL;
940 unsigned long *p_addr; 1172 unsigned long *p_addr;
941 unsigned long bit_offset; /* bit offset of the mapped page. */ 1173 unsigned long bit_offset;
1174 unsigned i;
1175
942 1176
943 if (bm_fo > b->bm_bits) { 1177 if (bm_fo > b->bm_bits) {
944 dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 1178 dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
1179 bm_fo = DRBD_END_OF_BITMAP;
945 } else { 1180 } else {
946 while (bm_fo < b->bm_bits) { 1181 while (bm_fo < b->bm_bits) {
947 unsigned long offset; 1182 /* bit offset of the first bit in the page */
948 bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ 1183 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
949 offset = bit_offset >> LN2_BPL; /* word offset of the page */ 1184 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
950 p_addr = __bm_map_paddr(b, offset, km);
951 1185
952 if (find_zero_bit) 1186 if (find_zero_bit)
953 i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1187 i = generic_find_next_zero_le_bit(p_addr,
1188 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
954 else 1189 else
955 i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1190 i = generic_find_next_le_bit(p_addr,
1191 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
956 1192
957 __bm_unmap(p_addr, km); 1193 __bm_unmap(p_addr, km);
958 if (i < PAGE_SIZE*8) { 1194 if (i < PAGE_SIZE*8) {
959 i = bit_offset + i; 1195 bm_fo = bit_offset + i;
960 if (i >= b->bm_bits) 1196 if (bm_fo >= b->bm_bits)
961 break; 1197 break;
962 goto found; 1198 goto found;
963 } 1199 }
964 bm_fo = bit_offset + PAGE_SIZE*8; 1200 bm_fo = bit_offset + PAGE_SIZE*8;
965 } 1201 }
966 i = -1UL; 1202 bm_fo = DRBD_END_OF_BITMAP;
967 } 1203 }
968 found: 1204 found:
969 return i; 1205 return bm_fo;
970} 1206}
971 1207
972static unsigned long bm_find_next(struct drbd_conf *mdev, 1208static unsigned long bm_find_next(struct drbd_conf *mdev,
973 unsigned long bm_fo, const int find_zero_bit) 1209 unsigned long bm_fo, const int find_zero_bit)
974{ 1210{
975 struct drbd_bitmap *b = mdev->bitmap; 1211 struct drbd_bitmap *b = mdev->bitmap;
976 unsigned long i = -1UL; 1212 unsigned long i = DRBD_END_OF_BITMAP;
977 1213
978 ERR_IF(!b) return i; 1214 ERR_IF(!b) return i;
979 ERR_IF(!b->bm_pages) return i; 1215 ERR_IF(!b->bm_pages) return i;
980 1216
981 spin_lock_irq(&b->bm_lock); 1217 spin_lock_irq(&b->bm_lock);
982 if (bm_is_locked(b)) 1218 if (BM_DONT_TEST & b->bm_flags)
983 bm_print_lock_info(mdev); 1219 bm_print_lock_info(mdev);
984 1220
985 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 1221 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
@@ -1005,13 +1241,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo
1005 * you must take drbd_bm_lock() first */ 1241 * you must take drbd_bm_lock() first */
1006unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1242unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1007{ 1243{
1008 /* WARN_ON(!bm_is_locked(mdev)); */ 1244 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1009 return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1245 return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
1010} 1246}
1011 1247
1012unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1248unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1013{ 1249{
1014 /* WARN_ON(!bm_is_locked(mdev)); */ 1250 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1015 return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1251 return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
1016} 1252}
1017 1253
@@ -1027,8 +1263,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1027 struct drbd_bitmap *b = mdev->bitmap; 1263 struct drbd_bitmap *b = mdev->bitmap;
1028 unsigned long *p_addr = NULL; 1264 unsigned long *p_addr = NULL;
1029 unsigned long bitnr; 1265 unsigned long bitnr;
1030 unsigned long last_page_nr = -1UL; 1266 unsigned int last_page_nr = -1U;
1031 int c = 0; 1267 int c = 0;
1268 int changed_total = 0;
1032 1269
1033 if (e >= b->bm_bits) { 1270 if (e >= b->bm_bits) {
1034 dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", 1271 dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
@@ -1036,23 +1273,33 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1036 e = b->bm_bits ? b->bm_bits -1 : 0; 1273 e = b->bm_bits ? b->bm_bits -1 : 0;
1037 } 1274 }
1038 for (bitnr = s; bitnr <= e; bitnr++) { 1275 for (bitnr = s; bitnr <= e; bitnr++) {
1039 unsigned long offset = bitnr>>LN2_BPL; 1276 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
1040 unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
1041 if (page_nr != last_page_nr) { 1277 if (page_nr != last_page_nr) {
1042 if (p_addr) 1278 if (p_addr)
1043 __bm_unmap(p_addr, km); 1279 __bm_unmap(p_addr, km);
1044 p_addr = __bm_map_paddr(b, offset, km); 1280 if (c < 0)
1281 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1282 else if (c > 0)
1283 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1284 changed_total += c;
1285 c = 0;
1286 p_addr = __bm_map_pidx(b, page_nr, km);
1045 last_page_nr = page_nr; 1287 last_page_nr = page_nr;
1046 } 1288 }
1047 if (val) 1289 if (val)
1048 c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); 1290 c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
1049 else 1291 else
1050 c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); 1292 c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
1051 } 1293 }
1052 if (p_addr) 1294 if (p_addr)
1053 __bm_unmap(p_addr, km); 1295 __bm_unmap(p_addr, km);
1054 b->bm_set += c; 1296 if (c < 0)
1055 return c; 1297 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1298 else if (c > 0)
1299 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1300 changed_total += c;
1301 b->bm_set += changed_total;
1302 return changed_total;
1056} 1303}
1057 1304
1058/* returns number of bits actually changed. 1305/* returns number of bits actually changed.
@@ -1070,7 +1317,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1070 ERR_IF(!b->bm_pages) return 0; 1317 ERR_IF(!b->bm_pages) return 0;
1071 1318
1072 spin_lock_irqsave(&b->bm_lock, flags); 1319 spin_lock_irqsave(&b->bm_lock, flags);
1073 if (bm_is_locked(b)) 1320 if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
1074 bm_print_lock_info(mdev); 1321 bm_print_lock_info(mdev);
1075 1322
1076 c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); 1323 c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
@@ -1187,12 +1434,11 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
1187 ERR_IF(!b->bm_pages) return 0; 1434 ERR_IF(!b->bm_pages) return 0;
1188 1435
1189 spin_lock_irqsave(&b->bm_lock, flags); 1436 spin_lock_irqsave(&b->bm_lock, flags);
1190 if (bm_is_locked(b)) 1437 if (BM_DONT_TEST & b->bm_flags)
1191 bm_print_lock_info(mdev); 1438 bm_print_lock_info(mdev);
1192 if (bitnr < b->bm_bits) { 1439 if (bitnr < b->bm_bits) {
1193 unsigned long offset = bitnr>>LN2_BPL; 1440 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
1194 p_addr = bm_map_paddr(b, offset); 1441 i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
1195 i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
1196 bm_unmap(p_addr); 1442 bm_unmap(p_addr);
1197 } else if (bitnr == b->bm_bits) { 1443 } else if (bitnr == b->bm_bits) {
1198 i = -1; 1444 i = -1;
@@ -1210,10 +1456,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
1210{ 1456{
1211 unsigned long flags; 1457 unsigned long flags;
1212 struct drbd_bitmap *b = mdev->bitmap; 1458 struct drbd_bitmap *b = mdev->bitmap;
1213 unsigned long *p_addr = NULL, page_nr = -1; 1459 unsigned long *p_addr = NULL;
1214 unsigned long bitnr; 1460 unsigned long bitnr;
1461 unsigned int page_nr = -1U;
1215 int c = 0; 1462 int c = 0;
1216 size_t w;
1217 1463
1218 /* If this is called without a bitmap, that is a bug. But just to be 1464 /* If this is called without a bitmap, that is a bug. But just to be
1219 * robust in case we screwed up elsewhere, in that case pretend there 1465 * robust in case we screwed up elsewhere, in that case pretend there
@@ -1223,20 +1469,20 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
1223 ERR_IF(!b->bm_pages) return 1; 1469 ERR_IF(!b->bm_pages) return 1;
1224 1470
1225 spin_lock_irqsave(&b->bm_lock, flags); 1471 spin_lock_irqsave(&b->bm_lock, flags);
1226 if (bm_is_locked(b)) 1472 if (BM_DONT_TEST & b->bm_flags)
1227 bm_print_lock_info(mdev); 1473 bm_print_lock_info(mdev);
1228 for (bitnr = s; bitnr <= e; bitnr++) { 1474 for (bitnr = s; bitnr <= e; bitnr++) {
1229 w = bitnr >> LN2_BPL; 1475 unsigned int idx = bm_bit_to_page_idx(b, bitnr);
1230 if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { 1476 if (page_nr != idx) {
1231 page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); 1477 page_nr = idx;
1232 if (p_addr) 1478 if (p_addr)
1233 bm_unmap(p_addr); 1479 bm_unmap(p_addr);
1234 p_addr = bm_map_paddr(b, w); 1480 p_addr = bm_map_pidx(b, idx);
1235 } 1481 }
1236 ERR_IF (bitnr >= b->bm_bits) { 1482 ERR_IF (bitnr >= b->bm_bits) {
1237 dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); 1483 dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
1238 } else { 1484 } else {
1239 c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); 1485 c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
1240 } 1486 }
1241 } 1487 }
1242 if (p_addr) 1488 if (p_addr)
@@ -1271,7 +1517,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1271 ERR_IF(!b->bm_pages) return 0; 1517 ERR_IF(!b->bm_pages) return 0;
1272 1518
1273 spin_lock_irqsave(&b->bm_lock, flags); 1519 spin_lock_irqsave(&b->bm_lock, flags);
1274 if (bm_is_locked(b)) 1520 if (BM_DONT_TEST & b->bm_flags)
1275 bm_print_lock_info(mdev); 1521 bm_print_lock_info(mdev);
1276 1522
1277 s = S2W(enr); 1523 s = S2W(enr);
@@ -1279,7 +1525,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1279 count = 0; 1525 count = 0;
1280 if (s < b->bm_words) { 1526 if (s < b->bm_words) {
1281 int n = e-s; 1527 int n = e-s;
1282 p_addr = bm_map_paddr(b, s); 1528 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1283 bm = p_addr + MLPP(s); 1529 bm = p_addr + MLPP(s);
1284 while (n--) 1530 while (n--)
1285 count += hweight_long(*bm++); 1531 count += hweight_long(*bm++);
@@ -1291,18 +1537,20 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1291 return count; 1537 return count;
1292} 1538}
1293 1539
1294/* set all bits covered by the AL-extent al_enr */ 1540/* Set all bits covered by the AL-extent al_enr.
1541 * Returns number of bits changed. */
1295unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) 1542unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1296{ 1543{
1297 struct drbd_bitmap *b = mdev->bitmap; 1544 struct drbd_bitmap *b = mdev->bitmap;
1298 unsigned long *p_addr, *bm; 1545 unsigned long *p_addr, *bm;
1299 unsigned long weight; 1546 unsigned long weight;
1300 int count, s, e, i, do_now; 1547 unsigned long s, e;
1548 int count, i, do_now;
1301 ERR_IF(!b) return 0; 1549 ERR_IF(!b) return 0;
1302 ERR_IF(!b->bm_pages) return 0; 1550 ERR_IF(!b->bm_pages) return 0;
1303 1551
1304 spin_lock_irq(&b->bm_lock); 1552 spin_lock_irq(&b->bm_lock);
1305 if (bm_is_locked(b)) 1553 if (BM_DONT_SET & b->bm_flags)
1306 bm_print_lock_info(mdev); 1554 bm_print_lock_info(mdev);
1307 weight = b->bm_set; 1555 weight = b->bm_set;
1308 1556
@@ -1314,7 +1562,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1314 count = 0; 1562 count = 0;
1315 if (s < b->bm_words) { 1563 if (s < b->bm_words) {
1316 i = do_now = e-s; 1564 i = do_now = e-s;
1317 p_addr = bm_map_paddr(b, s); 1565 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1318 bm = p_addr + MLPP(s); 1566 bm = p_addr + MLPP(s);
1319 while (i--) { 1567 while (i--) {
1320 count += hweight_long(*bm); 1568 count += hweight_long(*bm);
@@ -1326,7 +1574,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1326 if (e == b->bm_words) 1574 if (e == b->bm_words)
1327 b->bm_set -= bm_clear_surplus(b); 1575 b->bm_set -= bm_clear_surplus(b);
1328 } else { 1576 } else {
1329 dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); 1577 dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
1330 } 1578 }
1331 weight = b->bm_set - weight; 1579 weight = b->bm_set - weight;
1332 spin_unlock_irq(&b->bm_lock); 1580 spin_unlock_irq(&b->bm_lock);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b0bd27dfc1e8..81030d8d654b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -72,13 +72,6 @@ extern int fault_devs;
72extern char usermode_helper[]; 72extern char usermode_helper[];
73 73
74 74
75#ifndef TRUE
76#define TRUE 1
77#endif
78#ifndef FALSE
79#define FALSE 0
80#endif
81
82/* I don't remember why XCPU ... 75/* I don't remember why XCPU ...
83 * This is used to wake the asender, 76 * This is used to wake the asender,
84 * and to interrupt sending the sending task 77 * and to interrupt sending the sending task
@@ -104,6 +97,7 @@ extern char usermode_helper[];
104#define ID_SYNCER (-1ULL) 97#define ID_SYNCER (-1ULL)
105#define ID_VACANT 0 98#define ID_VACANT 0
106#define is_syncer_block_id(id) ((id) == ID_SYNCER) 99#define is_syncer_block_id(id) ((id) == ID_SYNCER)
100#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
107 101
108struct drbd_conf; 102struct drbd_conf;
109 103
@@ -137,20 +131,19 @@ enum {
137 DRBD_FAULT_MAX, 131 DRBD_FAULT_MAX,
138}; 132};
139 133
140#ifdef CONFIG_DRBD_FAULT_INJECTION
141extern unsigned int 134extern unsigned int
142_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); 135_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
136
143static inline int 137static inline int
144drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { 138drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
139#ifdef CONFIG_DRBD_FAULT_INJECTION
145 return fault_rate && 140 return fault_rate &&
146 (enable_faults & (1<<type)) && 141 (enable_faults & (1<<type)) &&
147 _drbd_insert_fault(mdev, type); 142 _drbd_insert_fault(mdev, type);
148}
149#define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t)))
150
151#else 143#else
152#define FAULT_ACTIVE(_m, _t) (0) 144 return 0;
153#endif 145#endif
146}
154 147
155/* integer division, round _UP_ to the next integer */ 148/* integer division, round _UP_ to the next integer */
156#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) 149#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
@@ -212,8 +205,10 @@ enum drbd_packets {
212 /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ 205 /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
213 /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ 206 /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
214 P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ 207 P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
208 P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
209 P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
215 210
216 P_MAX_CMD = 0x28, 211 P_MAX_CMD = 0x2A,
217 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ 212 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
218 P_MAX_OPT_CMD = 0x101, 213 P_MAX_OPT_CMD = 0x101,
219 214
@@ -269,6 +264,7 @@ static inline const char *cmdname(enum drbd_packets cmd)
269 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", 264 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
270 [P_COMPRESSED_BITMAP] = "CBitmap", 265 [P_COMPRESSED_BITMAP] = "CBitmap",
271 [P_DELAY_PROBE] = "DelayProbe", 266 [P_DELAY_PROBE] = "DelayProbe",
267 [P_OUT_OF_SYNC] = "OutOfSync",
272 [P_MAX_CMD] = NULL, 268 [P_MAX_CMD] = NULL,
273 }; 269 };
274 270
@@ -512,7 +508,7 @@ struct p_sizes {
512 u64 d_size; /* size of disk */ 508 u64 d_size; /* size of disk */
513 u64 u_size; /* user requested size */ 509 u64 u_size; /* user requested size */
514 u64 c_size; /* current exported size */ 510 u64 c_size; /* current exported size */
515 u32 max_segment_size; /* Maximal size of a BIO */ 511 u32 max_bio_size; /* Maximal size of a BIO */
516 u16 queue_order_type; /* not yet implemented in DRBD*/ 512 u16 queue_order_type; /* not yet implemented in DRBD*/
517 u16 dds_flags; /* use enum dds_flags here. */ 513 u16 dds_flags; /* use enum dds_flags here. */
518} __packed; 514} __packed;
@@ -550,6 +546,13 @@ struct p_discard {
550 u32 pad; 546 u32 pad;
551} __packed; 547} __packed;
552 548
549struct p_block_desc {
550 struct p_header80 head;
551 u64 sector;
552 u32 blksize;
553 u32 pad; /* to multiple of 8 Byte */
554} __packed;
555
553/* Valid values for the encoding field. 556/* Valid values for the encoding field.
554 * Bump proto version when changing this. */ 557 * Bump proto version when changing this. */
555enum drbd_bitmap_code { 558enum drbd_bitmap_code {
@@ -647,6 +650,7 @@ union p_polymorph {
647 struct p_block_req block_req; 650 struct p_block_req block_req;
648 struct p_delay_probe93 delay_probe93; 651 struct p_delay_probe93 delay_probe93;
649 struct p_rs_uuid rs_uuid; 652 struct p_rs_uuid rs_uuid;
653 struct p_block_desc block_desc;
650} __packed; 654} __packed;
651 655
652/**********************************************************************/ 656/**********************************************************************/
@@ -677,13 +681,6 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
677 return thi->t_state; 681 return thi->t_state;
678} 682}
679 683
680
681/*
682 * Having this as the first member of a struct provides sort of "inheritance".
683 * "derived" structs can be "drbd_queue_work()"ed.
684 * The callback should know and cast back to the descendant struct.
685 * drbd_request and drbd_epoch_entry are descendants of drbd_work.
686 */
687struct drbd_work; 684struct drbd_work;
688typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); 685typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
689struct drbd_work { 686struct drbd_work {
@@ -712,9 +709,6 @@ struct drbd_request {
712 * starting a new epoch... 709 * starting a new epoch...
713 */ 710 */
714 711
715 /* up to here, the struct layout is identical to drbd_epoch_entry;
716 * we might be able to use that to our advantage... */
717
718 struct list_head tl_requests; /* ring list in the transfer log */ 712 struct list_head tl_requests; /* ring list in the transfer log */
719 struct bio *master_bio; /* master bio pointer */ 713 struct bio *master_bio; /* master bio pointer */
720 unsigned long rq_state; /* see comments above _req_mod() */ 714 unsigned long rq_state; /* see comments above _req_mod() */
@@ -831,7 +825,7 @@ enum {
831 CRASHED_PRIMARY, /* This node was a crashed primary. 825 CRASHED_PRIMARY, /* This node was a crashed primary.
832 * Gets cleared when the state.conn 826 * Gets cleared when the state.conn
833 * goes into C_CONNECTED state. */ 827 * goes into C_CONNECTED state. */
834 WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ 828 NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
835 CONSIDER_RESYNC, 829 CONSIDER_RESYNC,
836 830
837 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ 831 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
@@ -856,10 +850,37 @@ enum {
856 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ 850 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
857 NEW_CUR_UUID, /* Create new current UUID when thawing IO */ 851 NEW_CUR_UUID, /* Create new current UUID when thawing IO */
858 AL_SUSPENDED, /* Activity logging is currently suspended. */ 852 AL_SUSPENDED, /* Activity logging is currently suspended. */
853 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
859}; 854};
860 855
861struct drbd_bitmap; /* opaque for drbd_conf */ 856struct drbd_bitmap; /* opaque for drbd_conf */
862 857
858/* definition of bits in bm_flags to be used in drbd_bm_lock
859 * and drbd_bitmap_io and friends. */
860enum bm_flag {
861 /* do we need to kfree, or vfree bm_pages? */
862 BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
863
864 /* currently locked for bulk operation */
865 BM_LOCKED_MASK = 0x7,
866
867 /* in detail, that is: */
868 BM_DONT_CLEAR = 0x1,
869 BM_DONT_SET = 0x2,
870 BM_DONT_TEST = 0x4,
871
872 /* (test bit, count bit) allowed (common case) */
873 BM_LOCKED_TEST_ALLOWED = 0x3,
874
875 /* testing bits, as well as setting new bits allowed, but clearing bits
876 * would be unexpected. Used during bitmap receive. Setting new bits
877 * requires sending of "out-of-sync" information, though. */
878 BM_LOCKED_SET_ALLOWED = 0x1,
879
880 /* clear is not expected while bitmap is locked for bulk operation */
881};
882
883
863/* TODO sort members for performance 884/* TODO sort members for performance
864 * MAYBE group them further */ 885 * MAYBE group them further */
865 886
@@ -925,6 +946,7 @@ struct drbd_md_io {
925struct bm_io_work { 946struct bm_io_work {
926 struct drbd_work w; 947 struct drbd_work w;
927 char *why; 948 char *why;
949 enum bm_flag flags;
928 int (*io_fn)(struct drbd_conf *mdev); 950 int (*io_fn)(struct drbd_conf *mdev);
929 void (*done)(struct drbd_conf *mdev, int rv); 951 void (*done)(struct drbd_conf *mdev, int rv);
930}; 952};
@@ -963,9 +985,12 @@ struct drbd_conf {
963 struct drbd_work resync_work, 985 struct drbd_work resync_work,
964 unplug_work, 986 unplug_work,
965 go_diskless, 987 go_diskless,
966 md_sync_work; 988 md_sync_work,
989 start_resync_work;
967 struct timer_list resync_timer; 990 struct timer_list resync_timer;
968 struct timer_list md_sync_timer; 991 struct timer_list md_sync_timer;
992 struct timer_list start_resync_timer;
993 struct timer_list request_timer;
969#ifdef DRBD_DEBUG_MD_SYNC 994#ifdef DRBD_DEBUG_MD_SYNC
970 struct { 995 struct {
971 unsigned int line; 996 unsigned int line;
@@ -1000,9 +1025,9 @@ struct drbd_conf {
1000 struct hlist_head *tl_hash; 1025 struct hlist_head *tl_hash;
1001 unsigned int tl_hash_s; 1026 unsigned int tl_hash_s;
1002 1027
1003 /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ 1028 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */
1004 unsigned long rs_total; 1029 unsigned long rs_total;
1005 /* number of sync IOs that failed in this run */ 1030 /* number of resync blocks that failed in this run */
1006 unsigned long rs_failed; 1031 unsigned long rs_failed;
1007 /* Syncer's start time [unit jiffies] */ 1032 /* Syncer's start time [unit jiffies] */
1008 unsigned long rs_start; 1033 unsigned long rs_start;
@@ -1102,6 +1127,7 @@ struct drbd_conf {
1102 struct fifo_buffer rs_plan_s; /* correction values of resync planer */ 1127 struct fifo_buffer rs_plan_s; /* correction values of resync planer */
1103 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ 1128 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1104 int rs_planed; /* resync sectors already planed */ 1129 int rs_planed; /* resync sectors already planed */
1130 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
1105}; 1131};
1106 1132
1107static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1133static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1163,14 +1189,19 @@ enum dds_flags {
1163}; 1189};
1164 1190
1165extern void drbd_init_set_defaults(struct drbd_conf *mdev); 1191extern void drbd_init_set_defaults(struct drbd_conf *mdev);
1166extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, 1192extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
1167 union drbd_state mask, union drbd_state val); 1193 enum chg_state_flags f,
1194 union drbd_state mask,
1195 union drbd_state val);
1168extern void drbd_force_state(struct drbd_conf *, union drbd_state, 1196extern void drbd_force_state(struct drbd_conf *, union drbd_state,
1169 union drbd_state); 1197 union drbd_state);
1170extern int _drbd_request_state(struct drbd_conf *, union drbd_state, 1198extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
1171 union drbd_state, enum chg_state_flags); 1199 union drbd_state,
1172extern int __drbd_set_state(struct drbd_conf *, union drbd_state, 1200 union drbd_state,
1173 enum chg_state_flags, struct completion *done); 1201 enum chg_state_flags);
1202extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
1203 enum chg_state_flags,
1204 struct completion *done);
1174extern void print_st_err(struct drbd_conf *, union drbd_state, 1205extern void print_st_err(struct drbd_conf *, union drbd_state,
1175 union drbd_state, int); 1206 union drbd_state, int);
1176extern int drbd_thread_start(struct drbd_thread *thi); 1207extern int drbd_thread_start(struct drbd_thread *thi);
@@ -1195,7 +1226,7 @@ extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
1195extern int drbd_send_protocol(struct drbd_conf *mdev); 1226extern int drbd_send_protocol(struct drbd_conf *mdev);
1196extern int drbd_send_uuids(struct drbd_conf *mdev); 1227extern int drbd_send_uuids(struct drbd_conf *mdev);
1197extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); 1228extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
1198extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); 1229extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
1199extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); 1230extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
1200extern int _drbd_send_state(struct drbd_conf *mdev); 1231extern int _drbd_send_state(struct drbd_conf *mdev);
1201extern int drbd_send_state(struct drbd_conf *mdev); 1232extern int drbd_send_state(struct drbd_conf *mdev);
@@ -1220,11 +1251,10 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
1220 struct p_data *dp, int data_size); 1251 struct p_data *dp, int data_size);
1221extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, 1252extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
1222 sector_t sector, int blksize, u64 block_id); 1253 sector_t sector, int blksize, u64 block_id);
1254extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
1223extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, 1255extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
1224 struct drbd_epoch_entry *e); 1256 struct drbd_epoch_entry *e);
1225extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); 1257extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
1226extern int _drbd_send_barrier(struct drbd_conf *mdev,
1227 struct drbd_tl_epoch *barrier);
1228extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, 1258extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1229 sector_t sector, int size, u64 block_id); 1259 sector_t sector, int size, u64 block_id);
1230extern int drbd_send_drequest_csum(struct drbd_conf *mdev, 1260extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
@@ -1235,14 +1265,13 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size)
1235 1265
1236extern int drbd_send_bitmap(struct drbd_conf *mdev); 1266extern int drbd_send_bitmap(struct drbd_conf *mdev);
1237extern int _drbd_send_bitmap(struct drbd_conf *mdev); 1267extern int _drbd_send_bitmap(struct drbd_conf *mdev);
1238extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); 1268extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
1239extern void drbd_free_bc(struct drbd_backing_dev *ldev); 1269extern void drbd_free_bc(struct drbd_backing_dev *ldev);
1240extern void drbd_mdev_cleanup(struct drbd_conf *mdev); 1270extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
1271void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
1241 1272
1242/* drbd_meta-data.c (still in drbd_main.c) */
1243extern void drbd_md_sync(struct drbd_conf *mdev); 1273extern void drbd_md_sync(struct drbd_conf *mdev);
1244extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); 1274extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
1245/* maybe define them below as inline? */
1246extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); 1275extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
1247extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); 1276extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
1248extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); 1277extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
@@ -1261,10 +1290,12 @@ extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
1261extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, 1290extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1262 int (*io_fn)(struct drbd_conf *), 1291 int (*io_fn)(struct drbd_conf *),
1263 void (*done)(struct drbd_conf *, int), 1292 void (*done)(struct drbd_conf *, int),
1264 char *why); 1293 char *why, enum bm_flag flags);
1294extern int drbd_bitmap_io(struct drbd_conf *mdev,
1295 int (*io_fn)(struct drbd_conf *),
1296 char *why, enum bm_flag flags);
1265extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1297extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1266extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1298extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1267extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1268extern void drbd_go_diskless(struct drbd_conf *mdev); 1299extern void drbd_go_diskless(struct drbd_conf *mdev);
1269extern void drbd_ldev_destroy(struct drbd_conf *mdev); 1300extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1270 1301
@@ -1313,6 +1344,7 @@ struct bm_extent {
1313 1344
1314#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ 1345#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
1315#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ 1346#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */
1347#define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */
1316 1348
1317/* drbd_bitmap.c */ 1349/* drbd_bitmap.c */
1318/* 1350/*
@@ -1390,7 +1422,9 @@ struct bm_extent {
1390 * you should use 64bit OS for that much storage, anyways. */ 1422 * you should use 64bit OS for that much storage, anyways. */
1391#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) 1423#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
1392#else 1424#else
1393#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) 1425/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
1426#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
1427/* corresponds to (1UL << 38) bits right now. */
1394#endif 1428#endif
1395#endif 1429#endif
1396 1430
@@ -1398,7 +1432,7 @@ struct bm_extent {
1398 * With a value of 8 all IO in one 128K block make it to the same slot of the 1432 * With a value of 8 all IO in one 128K block make it to the same slot of the
1399 * hash table. */ 1433 * hash table. */
1400#define HT_SHIFT 8 1434#define HT_SHIFT 8
1401#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) 1435#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
1402 1436
1403#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ 1437#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
1404 1438
@@ -1410,16 +1444,20 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new
1410extern void drbd_bm_cleanup(struct drbd_conf *mdev); 1444extern void drbd_bm_cleanup(struct drbd_conf *mdev);
1411extern void drbd_bm_set_all(struct drbd_conf *mdev); 1445extern void drbd_bm_set_all(struct drbd_conf *mdev);
1412extern void drbd_bm_clear_all(struct drbd_conf *mdev); 1446extern void drbd_bm_clear_all(struct drbd_conf *mdev);
1447/* set/clear/test only a few bits at a time */
1413extern int drbd_bm_set_bits( 1448extern int drbd_bm_set_bits(
1414 struct drbd_conf *mdev, unsigned long s, unsigned long e); 1449 struct drbd_conf *mdev, unsigned long s, unsigned long e);
1415extern int drbd_bm_clear_bits( 1450extern int drbd_bm_clear_bits(
1416 struct drbd_conf *mdev, unsigned long s, unsigned long e); 1451 struct drbd_conf *mdev, unsigned long s, unsigned long e);
1417/* bm_set_bits variant for use while holding drbd_bm_lock */ 1452extern int drbd_bm_count_bits(
1453 struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
1454/* bm_set_bits variant for use while holding drbd_bm_lock,
1455 * may process the whole bitmap in one go */
1418extern void _drbd_bm_set_bits(struct drbd_conf *mdev, 1456extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
1419 const unsigned long s, const unsigned long e); 1457 const unsigned long s, const unsigned long e);
1420extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); 1458extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
1421extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); 1459extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
1422extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); 1460extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
1423extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); 1461extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
1424extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); 1462extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
1425extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, 1463extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
@@ -1427,6 +1465,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1427extern size_t drbd_bm_words(struct drbd_conf *mdev); 1465extern size_t drbd_bm_words(struct drbd_conf *mdev);
1428extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); 1466extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
1429extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); 1467extern sector_t drbd_bm_capacity(struct drbd_conf *mdev);
1468
1469#define DRBD_END_OF_BITMAP (~(unsigned long)0)
1430extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); 1470extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
1431/* bm_find_next variants for use while you hold drbd_bm_lock() */ 1471/* bm_find_next variants for use while you hold drbd_bm_lock() */
1432extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); 1472extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
@@ -1437,14 +1477,12 @@ extern int drbd_bm_rs_done(struct drbd_conf *mdev);
1437/* for receive_bitmap */ 1477/* for receive_bitmap */
1438extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, 1478extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
1439 size_t number, unsigned long *buffer); 1479 size_t number, unsigned long *buffer);
1440/* for _drbd_send_bitmap and drbd_bm_write_sect */ 1480/* for _drbd_send_bitmap */
1441extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, 1481extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
1442 size_t number, unsigned long *buffer); 1482 size_t number, unsigned long *buffer);
1443 1483
1444extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); 1484extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
1445extern void drbd_bm_unlock(struct drbd_conf *mdev); 1485extern void drbd_bm_unlock(struct drbd_conf *mdev);
1446
1447extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
1448/* drbd_main.c */ 1486/* drbd_main.c */
1449 1487
1450extern struct kmem_cache *drbd_request_cache; 1488extern struct kmem_cache *drbd_request_cache;
@@ -1467,7 +1505,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
1467extern int proc_details; 1505extern int proc_details;
1468 1506
1469/* drbd_req */ 1507/* drbd_req */
1470extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); 1508extern int drbd_make_request(struct request_queue *q, struct bio *bio);
1471extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); 1509extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
1472extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); 1510extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
1473extern int is_valid_ar_handle(struct drbd_request *, sector_t); 1511extern int is_valid_ar_handle(struct drbd_request *, sector_t);
@@ -1482,8 +1520,9 @@ enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew =
1482extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); 1520extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
1483extern void resync_after_online_grow(struct drbd_conf *); 1521extern void resync_after_online_grow(struct drbd_conf *);
1484extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); 1522extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
1485extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, 1523extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
1486 int force); 1524 enum drbd_role new_role,
1525 int force);
1487extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); 1526extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
1488extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); 1527extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
1489extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); 1528extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
@@ -1499,6 +1538,7 @@ extern int drbd_resync_finished(struct drbd_conf *mdev);
1499extern int drbd_md_sync_page_io(struct drbd_conf *mdev, 1538extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
1500 struct drbd_backing_dev *bdev, sector_t sector, int rw); 1539 struct drbd_backing_dev *bdev, sector_t sector, int rw);
1501extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); 1540extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
1541extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
1502 1542
1503static inline void ov_oos_print(struct drbd_conf *mdev) 1543static inline void ov_oos_print(struct drbd_conf *mdev)
1504{ 1544{
@@ -1522,21 +1562,23 @@ extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
1522extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); 1562extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
1523extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); 1563extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
1524extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1564extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
1525extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); 1565extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int);
1526extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1566extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
1527extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1567extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
1528extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
1529extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); 1568extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
1530extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); 1569extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
1531extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); 1570extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
1532extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); 1571extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
1533extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); 1572extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
1534extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); 1573extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
1574extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
1575extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
1535 1576
1536extern void resync_timer_fn(unsigned long data); 1577extern void resync_timer_fn(unsigned long data);
1578extern void start_resync_timer_fn(unsigned long data);
1537 1579
1538/* drbd_receiver.c */ 1580/* drbd_receiver.c */
1539extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); 1581extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
1540extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1582extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1541 const unsigned rw, const int fault_type); 1583 const unsigned rw, const int fault_type);
1542extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); 1584extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@@ -1619,16 +1661,16 @@ extern int drbd_rs_del_all(struct drbd_conf *mdev);
1619extern void drbd_rs_failed_io(struct drbd_conf *mdev, 1661extern void drbd_rs_failed_io(struct drbd_conf *mdev,
1620 sector_t sector, int size); 1662 sector_t sector, int size);
1621extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); 1663extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
1664extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
1622extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, 1665extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
1623 int size, const char *file, const unsigned int line); 1666 int size, const char *file, const unsigned int line);
1624#define drbd_set_in_sync(mdev, sector, size) \ 1667#define drbd_set_in_sync(mdev, sector, size) \
1625 __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) 1668 __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
1626extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, 1669extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
1627 int size, const char *file, const unsigned int line); 1670 int size, const char *file, const unsigned int line);
1628#define drbd_set_out_of_sync(mdev, sector, size) \ 1671#define drbd_set_out_of_sync(mdev, sector, size) \
1629 __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) 1672 __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
1630extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); 1673extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
1631extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
1632extern void drbd_al_shrink(struct drbd_conf *mdev); 1674extern void drbd_al_shrink(struct drbd_conf *mdev);
1633 1675
1634 1676
@@ -1747,11 +1789,11 @@ static inline void drbd_state_unlock(struct drbd_conf *mdev)
1747 wake_up(&mdev->misc_wait); 1789 wake_up(&mdev->misc_wait);
1748} 1790}
1749 1791
1750static inline int _drbd_set_state(struct drbd_conf *mdev, 1792static inline enum drbd_state_rv
1751 union drbd_state ns, enum chg_state_flags flags, 1793_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1752 struct completion *done) 1794 enum chg_state_flags flags, struct completion *done)
1753{ 1795{
1754 int rv; 1796 enum drbd_state_rv rv;
1755 1797
1756 read_lock(&global_state_lock); 1798 read_lock(&global_state_lock);
1757 rv = __drbd_set_state(mdev, ns, flags, done); 1799 rv = __drbd_set_state(mdev, ns, flags, done);
@@ -1982,17 +2024,17 @@ static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
1982 2024
1983static inline void drbd_thread_stop(struct drbd_thread *thi) 2025static inline void drbd_thread_stop(struct drbd_thread *thi)
1984{ 2026{
1985 _drbd_thread_stop(thi, FALSE, TRUE); 2027 _drbd_thread_stop(thi, false, true);
1986} 2028}
1987 2029
1988static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) 2030static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
1989{ 2031{
1990 _drbd_thread_stop(thi, FALSE, FALSE); 2032 _drbd_thread_stop(thi, false, false);
1991} 2033}
1992 2034
1993static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) 2035static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
1994{ 2036{
1995 _drbd_thread_stop(thi, TRUE, FALSE); 2037 _drbd_thread_stop(thi, true, false);
1996} 2038}
1997 2039
1998/* counts how many answer packets packets we expect from our peer, 2040/* counts how many answer packets packets we expect from our peer,
@@ -2146,17 +2188,18 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
2146static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, 2188static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2147 unsigned long *bits_left, unsigned int *per_mil_done) 2189 unsigned long *bits_left, unsigned int *per_mil_done)
2148{ 2190{
2149 /* 2191 /* this is to break it at compile time when we change that, in case we
2150 * this is to break it at compile time when we change that 2192 * want to support more than (1<<32) bits on a 32bit arch. */
2151 * (we may feel 4TB maximum storage per drbd is not enough)
2152 */
2153 typecheck(unsigned long, mdev->rs_total); 2193 typecheck(unsigned long, mdev->rs_total);
2154 2194
2155 /* note: both rs_total and rs_left are in bits, i.e. in 2195 /* note: both rs_total and rs_left are in bits, i.e. in
2156 * units of BM_BLOCK_SIZE. 2196 * units of BM_BLOCK_SIZE.
2157 * for the percentage, we don't care. */ 2197 * for the percentage, we don't care. */
2158 2198
2159 *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; 2199 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2200 *bits_left = mdev->ov_left;
2201 else
2202 *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
2160 /* >> 10 to prevent overflow, 2203 /* >> 10 to prevent overflow,
2161 * +1 to prevent division by zero */ 2204 * +1 to prevent division by zero */
2162 if (*bits_left > mdev->rs_total) { 2205 if (*bits_left > mdev->rs_total) {
@@ -2171,10 +2214,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2171 *bits_left, mdev->rs_total, mdev->rs_failed); 2214 *bits_left, mdev->rs_total, mdev->rs_failed);
2172 *per_mil_done = 0; 2215 *per_mil_done = 0;
2173 } else { 2216 } else {
2174 /* make sure the calculation happens in long context */ 2217 /* Make sure the division happens in long context.
2175 unsigned long tmp = 1000UL - 2218 * We allow up to one petabyte storage right now,
2176 (*bits_left >> 10)*1000UL 2219 * at a granularity of 4k per bit that is 2**38 bits.
2177 / ((mdev->rs_total >> 10) + 1UL); 2220 * After shift right and multiplication by 1000,
2221 * this should still fit easily into a 32bit long,
2222 * so we don't need a 64bit division on 32bit arch.
2223 * Note: currently we don't support such large bitmaps on 32bit
2224 * arch anyways, but no harm done to be prepared for it here.
2225 */
2226 unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10;
2227 unsigned long left = *bits_left >> shift;
2228 unsigned long total = 1UL + (mdev->rs_total >> shift);
2229 unsigned long tmp = 1000UL - left * 1000UL/total;
2178 *per_mil_done = tmp; 2230 *per_mil_done = tmp;
2179 } 2231 }
2180} 2232}
@@ -2193,8 +2245,9 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
2193 return mxb; 2245 return mxb;
2194} 2246}
2195 2247
2196static inline int drbd_state_is_stable(union drbd_state s) 2248static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2197{ 2249{
2250 union drbd_state s = mdev->state;
2198 2251
2199 /* DO NOT add a default clause, we want the compiler to warn us 2252 /* DO NOT add a default clause, we want the compiler to warn us
2200 * for any newly introduced state we may have forgotten to add here */ 2253 * for any newly introduced state we may have forgotten to add here */
@@ -2211,11 +2264,9 @@ static inline int drbd_state_is_stable(union drbd_state s)
2211 case C_VERIFY_T: 2264 case C_VERIFY_T:
2212 case C_PAUSED_SYNC_S: 2265 case C_PAUSED_SYNC_S:
2213 case C_PAUSED_SYNC_T: 2266 case C_PAUSED_SYNC_T:
2214 /* maybe stable, look at the disk state */ 2267 case C_AHEAD:
2215 break; 2268 case C_BEHIND:
2216 2269 /* transitional states, IO allowed */
2217 /* no new io accepted during tansitional states
2218 * like handshake or teardown */
2219 case C_DISCONNECTING: 2270 case C_DISCONNECTING:
2220 case C_UNCONNECTED: 2271 case C_UNCONNECTED:
2221 case C_TIMEOUT: 2272 case C_TIMEOUT:
@@ -2226,7 +2277,15 @@ static inline int drbd_state_is_stable(union drbd_state s)
2226 case C_WF_REPORT_PARAMS: 2277 case C_WF_REPORT_PARAMS:
2227 case C_STARTING_SYNC_S: 2278 case C_STARTING_SYNC_S:
2228 case C_STARTING_SYNC_T: 2279 case C_STARTING_SYNC_T:
2280 break;
2281
2282 /* Allow IO in BM exchange states with new protocols */
2229 case C_WF_BITMAP_S: 2283 case C_WF_BITMAP_S:
2284 if (mdev->agreed_pro_version < 96)
2285 return 0;
2286 break;
2287
2288 /* no new io accepted in these states */
2230 case C_WF_BITMAP_T: 2289 case C_WF_BITMAP_T:
2231 case C_WF_SYNC_UUID: 2290 case C_WF_SYNC_UUID:
2232 case C_MASK: 2291 case C_MASK:
@@ -2261,41 +2320,47 @@ static inline int is_susp(union drbd_state s)
2261 return s.susp || s.susp_nod || s.susp_fen; 2320 return s.susp || s.susp_nod || s.susp_fen;
2262} 2321}
2263 2322
2264static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) 2323static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
2265{ 2324{
2266 int mxb = drbd_get_max_buffers(mdev); 2325 int mxb = drbd_get_max_buffers(mdev);
2267 2326
2268 if (is_susp(mdev->state)) 2327 if (is_susp(mdev->state))
2269 return 0; 2328 return false;
2270 if (test_bit(SUSPEND_IO, &mdev->flags)) 2329 if (test_bit(SUSPEND_IO, &mdev->flags))
2271 return 0; 2330 return false;
2272 2331
2273 /* to avoid potential deadlock or bitmap corruption, 2332 /* to avoid potential deadlock or bitmap corruption,
2274 * in various places, we only allow new application io 2333 * in various places, we only allow new application io
2275 * to start during "stable" states. */ 2334 * to start during "stable" states. */
2276 2335
2277 /* no new io accepted when attaching or detaching the disk */ 2336 /* no new io accepted when attaching or detaching the disk */
2278 if (!drbd_state_is_stable(mdev->state)) 2337 if (!drbd_state_is_stable(mdev))
2279 return 0; 2338 return false;
2280 2339
2281 /* since some older kernels don't have atomic_add_unless, 2340 /* since some older kernels don't have atomic_add_unless,
2282 * and we are within the spinlock anyways, we have this workaround. */ 2341 * and we are within the spinlock anyways, we have this workaround. */
2283 if (atomic_read(&mdev->ap_bio_cnt) > mxb) 2342 if (atomic_read(&mdev->ap_bio_cnt) > mxb)
2284 return 0; 2343 return false;
2285 if (test_bit(BITMAP_IO, &mdev->flags)) 2344 if (test_bit(BITMAP_IO, &mdev->flags))
2286 return 0; 2345 return false;
2287 return 1; 2346 return true;
2288} 2347}
2289 2348
2290/* I'd like to use wait_event_lock_irq, 2349static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count)
2291 * but I'm not sure when it got introduced,
2292 * and not sure when it has 3 or 4 arguments */
2293static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2294{ 2350{
2295 /* compare with after_state_ch, 2351 bool rv = false;
2296 * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ 2352
2297 DEFINE_WAIT(wait); 2353 spin_lock_irq(&mdev->req_lock);
2354 rv = may_inc_ap_bio(mdev);
2355 if (rv)
2356 atomic_add(count, &mdev->ap_bio_cnt);
2357 spin_unlock_irq(&mdev->req_lock);
2358
2359 return rv;
2360}
2298 2361
2362static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2363{
2299 /* we wait here 2364 /* we wait here
2300 * as long as the device is suspended 2365 * as long as the device is suspended
2301 * until the bitmap is no longer on the fly during connection 2366 * until the bitmap is no longer on the fly during connection
@@ -2304,16 +2369,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2304 * to avoid races with the reconnect code, 2369 * to avoid races with the reconnect code,
2305 * we need to atomic_inc within the spinlock. */ 2370 * we need to atomic_inc within the spinlock. */
2306 2371
2307 spin_lock_irq(&mdev->req_lock); 2372 wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count));
2308 while (!__inc_ap_bio_cond(mdev)) {
2309 prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
2310 spin_unlock_irq(&mdev->req_lock);
2311 schedule();
2312 finish_wait(&mdev->misc_wait, &wait);
2313 spin_lock_irq(&mdev->req_lock);
2314 }
2315 atomic_add(count, &mdev->ap_bio_cnt);
2316 spin_unlock_irq(&mdev->req_lock);
2317} 2373}
2318 2374
2319static inline void dec_ap_bio(struct drbd_conf *mdev) 2375static inline void dec_ap_bio(struct drbd_conf *mdev)
@@ -2333,9 +2389,11 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
2333 } 2389 }
2334} 2390}
2335 2391
2336static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) 2392static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
2337{ 2393{
2394 int changed = mdev->ed_uuid != val;
2338 mdev->ed_uuid = val; 2395 mdev->ed_uuid = val;
2396 return changed;
2339} 2397}
2340 2398
2341static inline int seq_cmp(u32 a, u32 b) 2399static inline int seq_cmp(u32 a, u32 b)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 8a43ce0edeed..dfc85f32d317 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
85MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); 85MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
86MODULE_VERSION(REL_VERSION); 86MODULE_VERSION(REL_VERSION);
87MODULE_LICENSE("GPL"); 87MODULE_LICENSE("GPL");
88MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); 88MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
89 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
89MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); 90MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
90 91
91#include <linux/moduleparam.h> 92#include <linux/moduleparam.h>
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644);
115#endif 116#endif
116 117
117/* module parameter, defined */ 118/* module parameter, defined */
118unsigned int minor_count = 32; 119unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
119int disable_sendpage; 120int disable_sendpage;
120int allow_oos; 121int allow_oos;
121unsigned int cn_idx = CN_IDX_DRBD; 122unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,7 @@ bail:
335 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 336 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
336} 337}
337 338
339
338/** 340/**
339 * _tl_restart() - Walks the transfer log, and applies an action to all requests 341 * _tl_restart() - Walks the transfer log, and applies an action to all requests
340 * @mdev: DRBD device. 342 * @mdev: DRBD device.
@@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
456} 458}
457 459
458/** 460/**
459 * cl_wide_st_chg() - TRUE if the state change is a cluster wide one 461 * cl_wide_st_chg() - true if the state change is a cluster wide one
460 * @mdev: DRBD device. 462 * @mdev: DRBD device.
461 * @os: old (current) state. 463 * @os: old (current) state.
462 * @ns: new (wanted) state. 464 * @ns: new (wanted) state.
@@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
473 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); 475 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
474} 476}
475 477
476int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, 478enum drbd_state_rv
477 union drbd_state mask, union drbd_state val) 479drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
480 union drbd_state mask, union drbd_state val)
478{ 481{
479 unsigned long flags; 482 unsigned long flags;
480 union drbd_state os, ns; 483 union drbd_state os, ns;
481 int rv; 484 enum drbd_state_rv rv;
482 485
483 spin_lock_irqsave(&mdev->req_lock, flags); 486 spin_lock_irqsave(&mdev->req_lock, flags);
484 os = mdev->state; 487 os = mdev->state;
@@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev,
502 drbd_change_state(mdev, CS_HARD, mask, val); 505 drbd_change_state(mdev, CS_HARD, mask, val);
503} 506}
504 507
505static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); 508static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
506static int is_valid_state_transition(struct drbd_conf *, 509static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
507 union drbd_state, union drbd_state); 510 union drbd_state,
511 union drbd_state);
508static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, 512static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
509 union drbd_state ns, const char **warn_sync_abort); 513 union drbd_state ns, const char **warn_sync_abort);
510int drbd_send_state_req(struct drbd_conf *, 514int drbd_send_state_req(struct drbd_conf *,
511 union drbd_state, union drbd_state); 515 union drbd_state, union drbd_state);
512 516
513static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, 517static enum drbd_state_rv
514 union drbd_state mask, union drbd_state val) 518_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
519 union drbd_state val)
515{ 520{
516 union drbd_state os, ns; 521 union drbd_state os, ns;
517 unsigned long flags; 522 unsigned long flags;
518 int rv; 523 enum drbd_state_rv rv;
519 524
520 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) 525 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
521 return SS_CW_SUCCESS; 526 return SS_CW_SUCCESS;
@@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
536 if (rv == SS_SUCCESS) { 541 if (rv == SS_SUCCESS) {
537 rv = is_valid_state_transition(mdev, ns, os); 542 rv = is_valid_state_transition(mdev, ns, os);
538 if (rv == SS_SUCCESS) 543 if (rv == SS_SUCCESS)
539 rv = 0; /* cont waiting, otherwise fail. */ 544 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
540 } 545 }
541 } 546 }
542 spin_unlock_irqrestore(&mdev->req_lock, flags); 547 spin_unlock_irqrestore(&mdev->req_lock, flags);
@@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
554 * Should not be called directly, use drbd_request_state() or 559 * Should not be called directly, use drbd_request_state() or
555 * _drbd_request_state(). 560 * _drbd_request_state().
556 */ 561 */
557static int drbd_req_state(struct drbd_conf *mdev, 562static enum drbd_state_rv
558 union drbd_state mask, union drbd_state val, 563drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
559 enum chg_state_flags f) 564 union drbd_state val, enum chg_state_flags f)
560{ 565{
561 struct completion done; 566 struct completion done;
562 unsigned long flags; 567 unsigned long flags;
563 union drbd_state os, ns; 568 union drbd_state os, ns;
564 int rv; 569 enum drbd_state_rv rv;
565 570
566 init_completion(&done); 571 init_completion(&done);
567 572
@@ -636,10 +641,11 @@ abort:
636 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE 641 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
637 * flag, or when logging of failed state change requests is not desired. 642 * flag, or when logging of failed state change requests is not desired.
638 */ 643 */
639int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, 644enum drbd_state_rv
640 union drbd_state val, enum chg_state_flags f) 645_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
646 union drbd_state val, enum chg_state_flags f)
641{ 647{
642 int rv; 648 enum drbd_state_rv rv;
643 649
644 wait_event(mdev->state_wait, 650 wait_event(mdev->state_wait,
645 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); 651 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
@@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
663 ); 669 );
664} 670}
665 671
666void print_st_err(struct drbd_conf *mdev, 672void print_st_err(struct drbd_conf *mdev, union drbd_state os,
667 union drbd_state os, union drbd_state ns, int err) 673 union drbd_state ns, enum drbd_state_rv err)
668{ 674{
669 if (err == SS_IN_TRANSIENT_STATE) 675 if (err == SS_IN_TRANSIENT_STATE)
670 return; 676 return;
@@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev,
674} 680}
675 681
676 682
677#define drbd_peer_str drbd_role_str
678#define drbd_pdsk_str drbd_disk_str
679
680#define drbd_susp_str(A) ((A) ? "1" : "0")
681#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
682#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
683#define drbd_user_isp_str(A) ((A) ? "1" : "0")
684
685#define PSC(A) \
686 ({ if (ns.A != os.A) { \
687 pbp += sprintf(pbp, #A "( %s -> %s ) ", \
688 drbd_##A##_str(os.A), \
689 drbd_##A##_str(ns.A)); \
690 } })
691
692/** 683/**
693 * is_valid_state() - Returns an SS_ error code if ns is not valid 684 * is_valid_state() - Returns an SS_ error code if ns is not valid
694 * @mdev: DRBD device. 685 * @mdev: DRBD device.
695 * @ns: State to consider. 686 * @ns: State to consider.
696 */ 687 */
697static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) 688static enum drbd_state_rv
689is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
698{ 690{
699 /* See drbd_state_sw_errors in drbd_strings.c */ 691 /* See drbd_state_sw_errors in drbd_strings.c */
700 692
701 enum drbd_fencing_p fp; 693 enum drbd_fencing_p fp;
702 int rv = SS_SUCCESS; 694 enum drbd_state_rv rv = SS_SUCCESS;
703 695
704 fp = FP_DONT_CARE; 696 fp = FP_DONT_CARE;
705 if (get_ldev(mdev)) { 697 if (get_ldev(mdev)) {
@@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
762 * @ns: new state. 754 * @ns: new state.
763 * @os: old state. 755 * @os: old state.
764 */ 756 */
765static int is_valid_state_transition(struct drbd_conf *mdev, 757static enum drbd_state_rv
766 union drbd_state ns, union drbd_state os) 758is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
759 union drbd_state os)
767{ 760{
768 int rv = SS_SUCCESS; 761 enum drbd_state_rv rv = SS_SUCCESS;
769 762
770 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && 763 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
771 os.conn > C_CONNECTED) 764 os.conn > C_CONNECTED)
@@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
800 os.conn < C_CONNECTED) 793 os.conn < C_CONNECTED)
801 rv = SS_NEED_CONNECTION; 794 rv = SS_NEED_CONNECTION;
802 795
796 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
797 && os.conn < C_WF_REPORT_PARAMS)
798 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
799
803 return rv; 800 return rv;
804} 801}
805 802
@@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
817 union drbd_state ns, const char **warn_sync_abort) 814 union drbd_state ns, const char **warn_sync_abort)
818{ 815{
819 enum drbd_fencing_p fp; 816 enum drbd_fencing_p fp;
817 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
820 818
821 fp = FP_DONT_CARE; 819 fp = FP_DONT_CARE;
822 if (get_ldev(mdev)) { 820 if (get_ldev(mdev)) {
@@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
869 ns.conn = C_CONNECTED; 867 ns.conn = C_CONNECTED;
870 } 868 }
871 869
872 if (ns.conn >= C_CONNECTED &&
873 ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
874 (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
875 switch (ns.conn) {
876 case C_WF_BITMAP_T:
877 case C_PAUSED_SYNC_T:
878 ns.disk = D_OUTDATED;
879 break;
880 case C_CONNECTED:
881 case C_WF_BITMAP_S:
882 case C_SYNC_SOURCE:
883 case C_PAUSED_SYNC_S:
884 ns.disk = D_UP_TO_DATE;
885 break;
886 case C_SYNC_TARGET:
887 ns.disk = D_INCONSISTENT;
888 dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
889 break;
890 }
891 if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
892 dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
893 }
894
895 if (ns.conn >= C_CONNECTED &&
896 (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
897 switch (ns.conn) {
898 case C_CONNECTED:
899 case C_WF_BITMAP_T:
900 case C_PAUSED_SYNC_T:
901 case C_SYNC_TARGET:
902 ns.pdsk = D_UP_TO_DATE;
903 break;
904 case C_WF_BITMAP_S:
905 case C_PAUSED_SYNC_S:
906 /* remap any consistent state to D_OUTDATED,
907 * but disallow "upgrade" of not even consistent states.
908 */
909 ns.pdsk =
910 (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
911 ? os.pdsk : D_OUTDATED;
912 break;
913 case C_SYNC_SOURCE:
914 ns.pdsk = D_INCONSISTENT;
915 dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
916 break;
917 }
918 if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
919 dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
920 }
921
922 /* Connection breaks down before we finished "Negotiating" */ 870 /* Connection breaks down before we finished "Negotiating" */
923 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && 871 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
924 get_ldev_if_state(mdev, D_NEGOTIATING)) { 872 get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
933 put_ldev(mdev); 881 put_ldev(mdev);
934 } 882 }
935 883
884 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
885 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
886 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
887 ns.disk = D_UP_TO_DATE;
888 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
889 ns.pdsk = D_UP_TO_DATE;
890 }
891
892 /* Implications of the connection stat on the disk states */
893 disk_min = D_DISKLESS;
894 disk_max = D_UP_TO_DATE;
895 pdsk_min = D_INCONSISTENT;
896 pdsk_max = D_UNKNOWN;
897 switch ((enum drbd_conns)ns.conn) {
898 case C_WF_BITMAP_T:
899 case C_PAUSED_SYNC_T:
900 case C_STARTING_SYNC_T:
901 case C_WF_SYNC_UUID:
902 case C_BEHIND:
903 disk_min = D_INCONSISTENT;
904 disk_max = D_OUTDATED;
905 pdsk_min = D_UP_TO_DATE;
906 pdsk_max = D_UP_TO_DATE;
907 break;
908 case C_VERIFY_S:
909 case C_VERIFY_T:
910 disk_min = D_UP_TO_DATE;
911 disk_max = D_UP_TO_DATE;
912 pdsk_min = D_UP_TO_DATE;
913 pdsk_max = D_UP_TO_DATE;
914 break;
915 case C_CONNECTED:
916 disk_min = D_DISKLESS;
917 disk_max = D_UP_TO_DATE;
918 pdsk_min = D_DISKLESS;
919 pdsk_max = D_UP_TO_DATE;
920 break;
921 case C_WF_BITMAP_S:
922 case C_PAUSED_SYNC_S:
923 case C_STARTING_SYNC_S:
924 case C_AHEAD:
925 disk_min = D_UP_TO_DATE;
926 disk_max = D_UP_TO_DATE;
927 pdsk_min = D_INCONSISTENT;
928 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
929 break;
930 case C_SYNC_TARGET:
931 disk_min = D_INCONSISTENT;
932 disk_max = D_INCONSISTENT;
933 pdsk_min = D_UP_TO_DATE;
934 pdsk_max = D_UP_TO_DATE;
935 break;
936 case C_SYNC_SOURCE:
937 disk_min = D_UP_TO_DATE;
938 disk_max = D_UP_TO_DATE;
939 pdsk_min = D_INCONSISTENT;
940 pdsk_max = D_INCONSISTENT;
941 break;
942 case C_STANDALONE:
943 case C_DISCONNECTING:
944 case C_UNCONNECTED:
945 case C_TIMEOUT:
946 case C_BROKEN_PIPE:
947 case C_NETWORK_FAILURE:
948 case C_PROTOCOL_ERROR:
949 case C_TEAR_DOWN:
950 case C_WF_CONNECTION:
951 case C_WF_REPORT_PARAMS:
952 case C_MASK:
953 break;
954 }
955 if (ns.disk > disk_max)
956 ns.disk = disk_max;
957
958 if (ns.disk < disk_min) {
959 dev_warn(DEV, "Implicitly set disk from %s to %s\n",
960 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
961 ns.disk = disk_min;
962 }
963 if (ns.pdsk > pdsk_max)
964 ns.pdsk = pdsk_max;
965
966 if (ns.pdsk < pdsk_min) {
967 dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
968 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
969 ns.pdsk = pdsk_min;
970 }
971
936 if (fp == FP_STONITH && 972 if (fp == FP_STONITH &&
937 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && 973 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
938 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) 974 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
@@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
961/* helper for __drbd_set_state */ 997/* helper for __drbd_set_state */
962static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) 998static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
963{ 999{
1000 if (mdev->agreed_pro_version < 90)
1001 mdev->ov_start_sector = 0;
1002 mdev->rs_total = drbd_bm_bits(mdev);
1003 mdev->ov_position = 0;
964 if (cs == C_VERIFY_T) { 1004 if (cs == C_VERIFY_T) {
965 /* starting online verify from an arbitrary position 1005 /* starting online verify from an arbitrary position
966 * does not fit well into the existing protocol. 1006 * does not fit well into the existing protocol.
@@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
970 mdev->ov_start_sector = ~(sector_t)0; 1010 mdev->ov_start_sector = ~(sector_t)0;
971 } else { 1011 } else {
972 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); 1012 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
973 if (bit >= mdev->rs_total) 1013 if (bit >= mdev->rs_total) {
974 mdev->ov_start_sector = 1014 mdev->ov_start_sector =
975 BM_BIT_TO_SECT(mdev->rs_total - 1); 1015 BM_BIT_TO_SECT(mdev->rs_total - 1);
1016 mdev->rs_total = 1;
1017 } else
1018 mdev->rs_total -= bit;
976 mdev->ov_position = mdev->ov_start_sector; 1019 mdev->ov_position = mdev->ov_start_sector;
977 } 1020 }
1021 mdev->ov_left = mdev->rs_total;
978} 1022}
979 1023
980static void drbd_resume_al(struct drbd_conf *mdev) 1024static void drbd_resume_al(struct drbd_conf *mdev)
@@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev)
992 * 1036 *
993 * Caller needs to hold req_lock, and global_state_lock. Do not call directly. 1037 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
994 */ 1038 */
995int __drbd_set_state(struct drbd_conf *mdev, 1039enum drbd_state_rv
996 union drbd_state ns, enum chg_state_flags flags, 1040__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
997 struct completion *done) 1041 enum chg_state_flags flags, struct completion *done)
998{ 1042{
999 union drbd_state os; 1043 union drbd_state os;
1000 int rv = SS_SUCCESS; 1044 enum drbd_state_rv rv = SS_SUCCESS;
1001 const char *warn_sync_abort = NULL; 1045 const char *warn_sync_abort = NULL;
1002 struct after_state_chg_work *ascw; 1046 struct after_state_chg_work *ascw;
1003 1047
@@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev,
1033 dev_warn(DEV, "%s aborted.\n", warn_sync_abort); 1077 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
1034 1078
1035 { 1079 {
1036 char *pbp, pb[300]; 1080 char *pbp, pb[300];
1037 pbp = pb; 1081 pbp = pb;
1038 *pbp = 0; 1082 *pbp = 0;
1039 PSC(role); 1083 if (ns.role != os.role)
1040 PSC(peer); 1084 pbp += sprintf(pbp, "role( %s -> %s ) ",
1041 PSC(conn); 1085 drbd_role_str(os.role),
1042 PSC(disk); 1086 drbd_role_str(ns.role));
1043 PSC(pdsk); 1087 if (ns.peer != os.peer)
1044 if (is_susp(ns) != is_susp(os)) 1088 pbp += sprintf(pbp, "peer( %s -> %s ) ",
1045 pbp += sprintf(pbp, "susp( %s -> %s ) ", 1089 drbd_role_str(os.peer),
1046 drbd_susp_str(is_susp(os)), 1090 drbd_role_str(ns.peer));
1047 drbd_susp_str(is_susp(ns))); 1091 if (ns.conn != os.conn)
1048 PSC(aftr_isp); 1092 pbp += sprintf(pbp, "conn( %s -> %s ) ",
1049 PSC(peer_isp); 1093 drbd_conn_str(os.conn),
1050 PSC(user_isp); 1094 drbd_conn_str(ns.conn));
1051 dev_info(DEV, "%s\n", pb); 1095 if (ns.disk != os.disk)
1096 pbp += sprintf(pbp, "disk( %s -> %s ) ",
1097 drbd_disk_str(os.disk),
1098 drbd_disk_str(ns.disk));
1099 if (ns.pdsk != os.pdsk)
1100 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1101 drbd_disk_str(os.pdsk),
1102 drbd_disk_str(ns.pdsk));
1103 if (is_susp(ns) != is_susp(os))
1104 pbp += sprintf(pbp, "susp( %d -> %d ) ",
1105 is_susp(os),
1106 is_susp(ns));
1107 if (ns.aftr_isp != os.aftr_isp)
1108 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1109 os.aftr_isp,
1110 ns.aftr_isp);
1111 if (ns.peer_isp != os.peer_isp)
1112 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1113 os.peer_isp,
1114 ns.peer_isp);
1115 if (ns.user_isp != os.user_isp)
1116 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1117 os.user_isp,
1118 ns.user_isp);
1119 dev_info(DEV, "%s\n", pb);
1052 } 1120 }
1053 1121
1054 /* solve the race between becoming unconfigured, 1122 /* solve the race between becoming unconfigured,
@@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
1074 atomic_inc(&mdev->local_cnt); 1142 atomic_inc(&mdev->local_cnt);
1075 1143
1076 mdev->state = ns; 1144 mdev->state = ns;
1145
1146 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1147 drbd_print_uuids(mdev, "attached to UUIDs");
1148
1077 wake_up(&mdev->misc_wait); 1149 wake_up(&mdev->misc_wait);
1078 wake_up(&mdev->state_wait); 1150 wake_up(&mdev->state_wait);
1079 1151
@@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
1081 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && 1153 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1082 ns.conn < C_CONNECTED) { 1154 ns.conn < C_CONNECTED) {
1083 mdev->ov_start_sector = 1155 mdev->ov_start_sector =
1084 BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left); 1156 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
1085 dev_info(DEV, "Online Verify reached sector %llu\n", 1157 dev_info(DEV, "Online Verify reached sector %llu\n",
1086 (unsigned long long)mdev->ov_start_sector); 1158 (unsigned long long)mdev->ov_start_sector);
1087 } 1159 }
@@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
1106 unsigned long now = jiffies; 1178 unsigned long now = jiffies;
1107 int i; 1179 int i;
1108 1180
1109 mdev->ov_position = 0; 1181 set_ov_position(mdev, ns.conn);
1110 mdev->rs_total = drbd_bm_bits(mdev);
1111 if (mdev->agreed_pro_version >= 90)
1112 set_ov_position(mdev, ns.conn);
1113 else
1114 mdev->ov_start_sector = 0;
1115 mdev->ov_left = mdev->rs_total
1116 - BM_SECT_TO_BIT(mdev->ov_position);
1117 mdev->rs_start = now; 1182 mdev->rs_start = now;
1118 mdev->rs_last_events = 0; 1183 mdev->rs_last_events = 0;
1119 mdev->rs_last_sect_ev = 0; 1184 mdev->rs_last_sect_ev = 0;
@@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev,
1121 mdev->ov_last_oos_start = 0; 1186 mdev->ov_last_oos_start = 0;
1122 1187
1123 for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1188 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1124 mdev->rs_mark_left[i] = mdev->rs_total; 1189 mdev->rs_mark_left[i] = mdev->ov_left;
1125 mdev->rs_mark_time[i] = now; 1190 mdev->rs_mark_time[i] = now;
1126 } 1191 }
1127 1192
1193 drbd_rs_controller_reset(mdev);
1194
1128 if (ns.conn == C_VERIFY_S) { 1195 if (ns.conn == C_VERIFY_S) {
1129 dev_info(DEV, "Starting Online Verify from sector %llu\n", 1196 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1130 (unsigned long long)mdev->ov_position); 1197 (unsigned long long)mdev->ov_position);
@@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
1228 } 1295 }
1229} 1296}
1230 1297
1298int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1299 int (*io_fn)(struct drbd_conf *),
1300 char *why, enum bm_flag flags)
1301{
1302 int rv;
1303
1304 D_ASSERT(current == mdev->worker.task);
1305
1306 /* open coded non-blocking drbd_suspend_io(mdev); */
1307 set_bit(SUSPEND_IO, &mdev->flags);
1308
1309 drbd_bm_lock(mdev, why, flags);
1310 rv = io_fn(mdev);
1311 drbd_bm_unlock(mdev);
1312
1313 drbd_resume_io(mdev);
1314
1315 return rv;
1316}
1317
1231/** 1318/**
1232 * after_state_ch() - Perform after state change actions that may sleep 1319 * after_state_ch() - Perform after state change actions that may sleep
1233 * @mdev: DRBD device. 1320 * @mdev: DRBD device.
@@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1266 1353
1267 nsm.i = -1; 1354 nsm.i = -1;
1268 if (ns.susp_nod) { 1355 if (ns.susp_nod) {
1269 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { 1356 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1270 if (ns.conn == C_CONNECTED) 1357 what = resend;
1271 what = resend, nsm.susp_nod = 0;
1272 else /* ns.conn > C_CONNECTED */
1273 dev_err(DEV, "Unexpected Resynd going on!\n");
1274 }
1275 1358
1276 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) 1359 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
1277 what = restart_frozen_disk_io, nsm.susp_nod = 0; 1360 what = restart_frozen_disk_io;
1278 1361
1362 if (what != nothing)
1363 nsm.susp_nod = 0;
1279 } 1364 }
1280 1365
1281 if (ns.susp_fen) { 1366 if (ns.susp_fen) {
@@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1306 spin_unlock_irq(&mdev->req_lock); 1391 spin_unlock_irq(&mdev->req_lock);
1307 } 1392 }
1308 1393
1394 /* Became sync source. With protocol >= 96, we still need to send out
1395 * the sync uuid now. Need to do that before any drbd_send_state, or
1396 * the other side may go "paused sync" before receiving the sync uuids,
1397 * which is unexpected. */
1398 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1399 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1400 mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
1401 drbd_gen_and_send_sync_uuid(mdev);
1402 put_ldev(mdev);
1403 }
1404
1309 /* Do not change the order of the if above and the two below... */ 1405 /* Do not change the order of the if above and the two below... */
1310 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ 1406 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1311 drbd_send_uuids(mdev); 1407 drbd_send_uuids(mdev);
1312 drbd_send_state(mdev); 1408 drbd_send_state(mdev);
1313 } 1409 }
1314 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S) 1410 /* No point in queuing send_bitmap if we don't have a connection
1315 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); 1411 * anymore, so check also the _current_ state, not only the new state
1412 * at the time this work was queued. */
1413 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1414 mdev->state.conn == C_WF_BITMAP_S)
1415 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1416 "send_bitmap (WFBitMapS)",
1417 BM_LOCKED_TEST_ALLOWED);
1316 1418
1317 /* Lost contact to peer's copy of the data */ 1419 /* Lost contact to peer's copy of the data */
1318 if ((os.pdsk >= D_INCONSISTENT && 1420 if ((os.pdsk >= D_INCONSISTENT &&
@@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1343 1445
1344 /* D_DISKLESS Peer becomes secondary */ 1446 /* D_DISKLESS Peer becomes secondary */
1345 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1447 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1346 drbd_al_to_on_disk_bm(mdev); 1448 /* We may still be Primary ourselves.
1449 * No harm done if the bitmap still changes,
1450 * redirtied pages will follow later. */
1451 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1452 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1453 put_ldev(mdev);
1454 }
1455
1456 /* Write out all changed bits on demote.
1457 * Though, no need to da that just yet
1458 * if there is a resync going on still */
1459 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1460 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1461 /* No changes to the bitmap expected this time, so assert that,
1462 * even though no harm was done if it did change. */
1463 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1464 "demote", BM_LOCKED_TEST_ALLOWED);
1347 put_ldev(mdev); 1465 put_ldev(mdev);
1348 } 1466 }
1349 1467
@@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1371 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) 1489 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1372 drbd_send_state(mdev); 1490 drbd_send_state(mdev);
1373 1491
1492 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1493 drbd_send_state(mdev);
1494
1374 /* We are in the progress to start a full sync... */ 1495 /* We are in the progress to start a full sync... */
1375 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || 1496 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1376 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) 1497 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1377 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); 1498 /* no other bitmap changes expected during this phase */
1499 drbd_queue_bitmap_io(mdev,
1500 &drbd_bmio_set_n_write, &abw_start_sync,
1501 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1378 1502
1379 /* We are invalidating our self... */ 1503 /* We are invalidating our self... */
1380 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && 1504 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1381 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) 1505 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1382 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); 1506 /* other bitmap operation expected during this phase */
1507 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1508 "set_n_write from invalidate", BM_LOCKED_MASK);
1383 1509
1384 /* first half of local IO error, failure to attach, 1510 /* first half of local IO error, failure to attach,
1385 * or administrative detach */ 1511 * or administrative detach */
@@ -1434,8 +1560,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1434 1560
1435 if (drbd_send_state(mdev)) 1561 if (drbd_send_state(mdev))
1436 dev_warn(DEV, "Notified peer that I'm now diskless.\n"); 1562 dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1437 else
1438 dev_err(DEV, "Sending state for being diskless failed\n");
1439 /* corresponding get_ldev in __drbd_set_state 1563 /* corresponding get_ldev in __drbd_set_state
1440 * this may finaly trigger drbd_ldev_destroy. */ 1564 * this may finaly trigger drbd_ldev_destroy. */
1441 put_ldev(mdev); 1565 put_ldev(mdev);
@@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1459 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) 1583 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1460 drbd_send_state(mdev); 1584 drbd_send_state(mdev);
1461 1585
1586 /* This triggers bitmap writeout of potentially still unwritten pages
1587 * if the resync finished cleanly, or aborted because of peer disk
1588 * failure, or because of connection loss.
1589 * For resync aborted because of local disk failure, we cannot do
1590 * any bitmap writeout anymore.
1591 * No harm done if some bits change during this phase.
1592 */
1593 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1594 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1595 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
1596 put_ldev(mdev);
1597 }
1598
1462 /* free tl_hash if we Got thawed and are C_STANDALONE */ 1599 /* free tl_hash if we Got thawed and are C_STANDALONE */
1463 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) 1600 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
1464 drbd_free_tl_hash(mdev); 1601 drbd_free_tl_hash(mdev);
@@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi)
1559 if (!try_module_get(THIS_MODULE)) { 1696 if (!try_module_get(THIS_MODULE)) {
1560 dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); 1697 dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
1561 spin_unlock_irqrestore(&thi->t_lock, flags); 1698 spin_unlock_irqrestore(&thi->t_lock, flags);
1562 return FALSE; 1699 return false;
1563 } 1700 }
1564 1701
1565 init_completion(&thi->stop); 1702 init_completion(&thi->stop);
@@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi)
1576 dev_err(DEV, "Couldn't start thread\n"); 1713 dev_err(DEV, "Couldn't start thread\n");
1577 1714
1578 module_put(THIS_MODULE); 1715 module_put(THIS_MODULE);
1579 return FALSE; 1716 return false;
1580 } 1717 }
1581 spin_lock_irqsave(&thi->t_lock, flags); 1718 spin_lock_irqsave(&thi->t_lock, flags);
1582 thi->task = nt; 1719 thi->task = nt;
@@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi)
1596 break; 1733 break;
1597 } 1734 }
1598 1735
1599 return TRUE; 1736 return true;
1600} 1737}
1601 1738
1602 1739
@@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1694{ 1831{
1695 int sent, ok; 1832 int sent, ok;
1696 1833
1697 ERR_IF(!h) return FALSE; 1834 ERR_IF(!h) return false;
1698 ERR_IF(!size) return FALSE; 1835 ERR_IF(!size) return false;
1699 1836
1700 h->magic = BE_DRBD_MAGIC; 1837 h->magic = BE_DRBD_MAGIC;
1701 h->command = cpu_to_be16(cmd); 1838 h->command = cpu_to_be16(cmd);
@@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1704 sent = drbd_send(mdev, sock, h, size, msg_flags); 1841 sent = drbd_send(mdev, sock, h, size, msg_flags);
1705 1842
1706 ok = (sent == size); 1843 ok = (sent == size);
1707 if (!ok) 1844 if (!ok && !signal_pending(current))
1708 dev_err(DEV, "short sent %s size=%d sent=%d\n", 1845 dev_warn(DEV, "short sent %s size=%d sent=%d\n",
1709 cmdname(cmd), (int)size, sent); 1846 cmdname(cmd), (int)size, sent);
1710 return ok; 1847 return ok;
1711} 1848}
@@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
1840 else { 1977 else {
1841 dev_err(DEV, "--dry-run is not supported by peer"); 1978 dev_err(DEV, "--dry-run is not supported by peer");
1842 kfree(p); 1979 kfree(p);
1843 return 0; 1980 return -1;
1844 } 1981 }
1845 } 1982 }
1846 p->conn_flags = cpu_to_be32(cf); 1983 p->conn_flags = cpu_to_be32(cf);
@@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
1888 return _drbd_send_uuids(mdev, 8); 2025 return _drbd_send_uuids(mdev, 8);
1889} 2026}
1890 2027
2028void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
2029{
2030 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2031 u64 *uuid = mdev->ldev->md.uuid;
2032 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
2033 text,
2034 (unsigned long long)uuid[UI_CURRENT],
2035 (unsigned long long)uuid[UI_BITMAP],
2036 (unsigned long long)uuid[UI_HISTORY_START],
2037 (unsigned long long)uuid[UI_HISTORY_END]);
2038 put_ldev(mdev);
2039 } else {
2040 dev_info(DEV, "%s effective data uuid: %016llX\n",
2041 text,
2042 (unsigned long long)mdev->ed_uuid);
2043 }
2044}
1891 2045
1892int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) 2046int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
1893{ 2047{
1894 struct p_rs_uuid p; 2048 struct p_rs_uuid p;
2049 u64 uuid;
1895 2050
1896 p.uuid = cpu_to_be64(val); 2051 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2052
2053 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
2054 drbd_uuid_set(mdev, UI_BITMAP, uuid);
2055 drbd_print_uuids(mdev, "updated sync UUID");
2056 drbd_md_sync(mdev);
2057 p.uuid = cpu_to_be64(uuid);
1897 2058
1898 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, 2059 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
1899 (struct p_header80 *)&p, sizeof(p)); 2060 (struct p_header80 *)&p, sizeof(p));
@@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
1921 p.d_size = cpu_to_be64(d_size); 2082 p.d_size = cpu_to_be64(d_size);
1922 p.u_size = cpu_to_be64(u_size); 2083 p.u_size = cpu_to_be64(u_size);
1923 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); 2084 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
1924 p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); 2085 p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
1925 p.queue_order_type = cpu_to_be16(q_order_type); 2086 p.queue_order_type = cpu_to_be16(q_order_type);
1926 p.dds_flags = cpu_to_be16(flags); 2087 p.dds_flags = cpu_to_be16(flags);
1927 2088
@@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
1972 (struct p_header80 *)&p, sizeof(p)); 2133 (struct p_header80 *)&p, sizeof(p));
1973} 2134}
1974 2135
1975int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) 2136int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
1976{ 2137{
1977 struct p_req_state_reply p; 2138 struct p_req_state_reply p;
1978 2139
@@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
2076 return len; 2237 return len;
2077} 2238}
2078 2239
2079enum { OK, FAILED, DONE } 2240/**
2241 * send_bitmap_rle_or_plain
2242 *
2243 * Return 0 when done, 1 when another iteration is needed, and a negative error
2244 * code upon failure.
2245 */
2246static int
2080send_bitmap_rle_or_plain(struct drbd_conf *mdev, 2247send_bitmap_rle_or_plain(struct drbd_conf *mdev,
2081 struct p_header80 *h, struct bm_xfer_ctx *c) 2248 struct p_header80 *h, struct bm_xfer_ctx *c)
2082{ 2249{
2083 struct p_compressed_bm *p = (void*)h; 2250 struct p_compressed_bm *p = (void*)h;
2084 unsigned long num_words; 2251 unsigned long num_words;
@@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
2088 len = fill_bitmap_rle_bits(mdev, p, c); 2255 len = fill_bitmap_rle_bits(mdev, p, c);
2089 2256
2090 if (len < 0) 2257 if (len < 0)
2091 return FAILED; 2258 return -EIO;
2092 2259
2093 if (len) { 2260 if (len) {
2094 DCBP_set_code(p, RLE_VLI_Bits); 2261 DCBP_set_code(p, RLE_VLI_Bits);
@@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
2118 if (c->bit_offset > c->bm_bits) 2285 if (c->bit_offset > c->bm_bits)
2119 c->bit_offset = c->bm_bits; 2286 c->bit_offset = c->bm_bits;
2120 } 2287 }
2121 ok = ok ? ((len == 0) ? DONE : OK) : FAILED; 2288 if (ok) {
2122 2289 if (len == 0) {
2123 if (ok == DONE) 2290 INFO_bm_xfer_stats(mdev, "send", c);
2124 INFO_bm_xfer_stats(mdev, "send", c); 2291 return 0;
2125 return ok; 2292 } else
2293 return 1;
2294 }
2295 return -EIO;
2126} 2296}
2127 2297
2128/* See the comment at receive_bitmap() */ 2298/* See the comment at receive_bitmap() */
@@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
2130{ 2300{
2131 struct bm_xfer_ctx c; 2301 struct bm_xfer_ctx c;
2132 struct p_header80 *p; 2302 struct p_header80 *p;
2133 int ret; 2303 int err;
2134 2304
2135 ERR_IF(!mdev->bitmap) return FALSE; 2305 ERR_IF(!mdev->bitmap) return false;
2136 2306
2137 /* maybe we should use some per thread scratch page, 2307 /* maybe we should use some per thread scratch page,
2138 * and allocate that during initial device creation? */ 2308 * and allocate that during initial device creation? */
2139 p = (struct p_header80 *) __get_free_page(GFP_NOIO); 2309 p = (struct p_header80 *) __get_free_page(GFP_NOIO);
2140 if (!p) { 2310 if (!p) {
2141 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); 2311 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
2142 return FALSE; 2312 return false;
2143 } 2313 }
2144 2314
2145 if (get_ldev(mdev)) { 2315 if (get_ldev(mdev)) {
@@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
2165 }; 2335 };
2166 2336
2167 do { 2337 do {
2168 ret = send_bitmap_rle_or_plain(mdev, p, &c); 2338 err = send_bitmap_rle_or_plain(mdev, p, &c);
2169 } while (ret == OK); 2339 } while (err > 0);
2170 2340
2171 free_page((unsigned long) p); 2341 free_page((unsigned long) p);
2172 return (ret == DONE); 2342 return err == 0;
2173} 2343}
2174 2344
2175int drbd_send_bitmap(struct drbd_conf *mdev) 2345int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2192 p.set_size = cpu_to_be32(set_size); 2362 p.set_size = cpu_to_be32(set_size);
2193 2363
2194 if (mdev->state.conn < C_CONNECTED) 2364 if (mdev->state.conn < C_CONNECTED)
2195 return FALSE; 2365 return false;
2196 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, 2366 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
2197 (struct p_header80 *)&p, sizeof(p)); 2367 (struct p_header80 *)&p, sizeof(p));
2198 return ok; 2368 return ok;
@@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2220 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); 2390 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2221 2391
2222 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) 2392 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
2223 return FALSE; 2393 return false;
2224 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, 2394 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
2225 (struct p_header80 *)&p, sizeof(p)); 2395 (struct p_header80 *)&p, sizeof(p));
2226 return ok; 2396 return ok;
@@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2326} 2496}
2327 2497
2328/* called on sndtimeo 2498/* called on sndtimeo
2329 * returns FALSE if we should retry, 2499 * returns false if we should retry,
2330 * TRUE if we think connection is dead 2500 * true if we think connection is dead
2331 */ 2501 */
2332static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) 2502static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
2333{ 2503{
@@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
2340 || mdev->state.conn < C_CONNECTED; 2510 || mdev->state.conn < C_CONNECTED;
2341 2511
2342 if (drop_it) 2512 if (drop_it)
2343 return TRUE; 2513 return true;
2344 2514
2345 drop_it = !--mdev->ko_count; 2515 drop_it = !--mdev->ko_count;
2346 if (!drop_it) { 2516 if (!drop_it) {
@@ -2531,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2531 if (ok && dgs) { 2701 if (ok && dgs) {
2532 dgb = mdev->int_dig_out; 2702 dgb = mdev->int_dig_out;
2533 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2703 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
2534 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); 2704 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2535 } 2705 }
2536 if (ok) { 2706 if (ok) {
2537 if (mdev->net_conf->wire_protocol == DRBD_PROT_A) 2707 /* For protocol A, we have to memcpy the payload into
2708 * socket buffers, as we may complete right away
2709 * as soon as we handed it over to tcp, at which point the data
2710 * pages may become invalid.
2711 *
2712 * For data-integrity enabled, we copy it as well, so we can be
2713 * sure that even if the bio pages may still be modified, it
2714 * won't change the data on the wire, thus if the digest checks
2715 * out ok after sending on this side, but does not fit on the
2716 * receiving side, we sure have detected corruption elsewhere.
2717 */
2718 if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
2538 ok = _drbd_send_bio(mdev, req->master_bio); 2719 ok = _drbd_send_bio(mdev, req->master_bio);
2539 else 2720 else
2540 ok = _drbd_send_zc_bio(mdev, req->master_bio); 2721 ok = _drbd_send_zc_bio(mdev, req->master_bio);
2722
2723 /* double check digest, sometimes buffers have been modified in flight. */
2724 if (dgs > 0 && dgs <= 64) {
2725 /* 64 byte, 512 bit, is the larges digest size
2726 * currently supported in kernel crypto. */
2727 unsigned char digest[64];
2728 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
2729 if (memcmp(mdev->int_dig_out, digest, dgs)) {
2730 dev_warn(DEV,
2731 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
2732 (unsigned long long)req->sector, req->size);
2733 }
2734 } /* else if (dgs > 64) {
2735 ... Be noisy about digest too large ...
2736 } */
2541 } 2737 }
2542 2738
2543 drbd_put_data_sock(mdev); 2739 drbd_put_data_sock(mdev);
@@ -2587,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2587 if (ok && dgs) { 2783 if (ok && dgs) {
2588 dgb = mdev->int_dig_out; 2784 dgb = mdev->int_dig_out;
2589 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2785 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
2590 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); 2786 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2591 } 2787 }
2592 if (ok) 2788 if (ok)
2593 ok = _drbd_send_zc_ee(mdev, e); 2789 ok = _drbd_send_zc_ee(mdev, e);
@@ -2597,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2597 return ok; 2793 return ok;
2598} 2794}
2599 2795
2796int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
2797{
2798 struct p_block_desc p;
2799
2800 p.sector = cpu_to_be64(req->sector);
2801 p.blksize = cpu_to_be32(req->size);
2802
2803 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
2804}
2805
2600/* 2806/*
2601 drbd_send distinguishes two cases: 2807 drbd_send distinguishes two cases:
2602 2808
@@ -2770,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2770 atomic_set(&mdev->pp_in_use_by_net, 0); 2976 atomic_set(&mdev->pp_in_use_by_net, 0);
2771 atomic_set(&mdev->rs_sect_in, 0); 2977 atomic_set(&mdev->rs_sect_in, 0);
2772 atomic_set(&mdev->rs_sect_ev, 0); 2978 atomic_set(&mdev->rs_sect_ev, 0);
2979 atomic_set(&mdev->ap_in_flight, 0);
2773 2980
2774 mutex_init(&mdev->md_io_mutex); 2981 mutex_init(&mdev->md_io_mutex);
2775 mutex_init(&mdev->data.mutex); 2982 mutex_init(&mdev->data.mutex);
@@ -2798,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2798 INIT_LIST_HEAD(&mdev->unplug_work.list); 3005 INIT_LIST_HEAD(&mdev->unplug_work.list);
2799 INIT_LIST_HEAD(&mdev->go_diskless.list); 3006 INIT_LIST_HEAD(&mdev->go_diskless.list);
2800 INIT_LIST_HEAD(&mdev->md_sync_work.list); 3007 INIT_LIST_HEAD(&mdev->md_sync_work.list);
3008 INIT_LIST_HEAD(&mdev->start_resync_work.list);
2801 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 3009 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2802 3010
2803 mdev->resync_work.cb = w_resync_inactive; 3011 mdev->resync_work.cb = w_resync_timer;
2804 mdev->unplug_work.cb = w_send_write_hint; 3012 mdev->unplug_work.cb = w_send_write_hint;
2805 mdev->go_diskless.cb = w_go_diskless; 3013 mdev->go_diskless.cb = w_go_diskless;
2806 mdev->md_sync_work.cb = w_md_sync; 3014 mdev->md_sync_work.cb = w_md_sync;
2807 mdev->bm_io_work.w.cb = w_bitmap_io; 3015 mdev->bm_io_work.w.cb = w_bitmap_io;
3016 mdev->start_resync_work.cb = w_start_resync;
2808 init_timer(&mdev->resync_timer); 3017 init_timer(&mdev->resync_timer);
2809 init_timer(&mdev->md_sync_timer); 3018 init_timer(&mdev->md_sync_timer);
3019 init_timer(&mdev->start_resync_timer);
3020 init_timer(&mdev->request_timer);
2810 mdev->resync_timer.function = resync_timer_fn; 3021 mdev->resync_timer.function = resync_timer_fn;
2811 mdev->resync_timer.data = (unsigned long) mdev; 3022 mdev->resync_timer.data = (unsigned long) mdev;
2812 mdev->md_sync_timer.function = md_sync_timer_fn; 3023 mdev->md_sync_timer.function = md_sync_timer_fn;
2813 mdev->md_sync_timer.data = (unsigned long) mdev; 3024 mdev->md_sync_timer.data = (unsigned long) mdev;
3025 mdev->start_resync_timer.function = start_resync_timer_fn;
3026 mdev->start_resync_timer.data = (unsigned long) mdev;
3027 mdev->request_timer.function = request_timer_fn;
3028 mdev->request_timer.data = (unsigned long) mdev;
2814 3029
2815 init_waitqueue_head(&mdev->misc_wait); 3030 init_waitqueue_head(&mdev->misc_wait);
2816 init_waitqueue_head(&mdev->state_wait); 3031 init_waitqueue_head(&mdev->state_wait);
@@ -2881,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2881 D_ASSERT(list_empty(&mdev->resync_work.list)); 3096 D_ASSERT(list_empty(&mdev->resync_work.list));
2882 D_ASSERT(list_empty(&mdev->unplug_work.list)); 3097 D_ASSERT(list_empty(&mdev->unplug_work.list));
2883 D_ASSERT(list_empty(&mdev->go_diskless.list)); 3098 D_ASSERT(list_empty(&mdev->go_diskless.list));
3099
3100 drbd_set_defaults(mdev);
2884} 3101}
2885 3102
2886 3103
@@ -2923,7 +3140,7 @@ static void drbd_destroy_mempools(void)
2923static int drbd_create_mempools(void) 3140static int drbd_create_mempools(void)
2924{ 3141{
2925 struct page *page; 3142 struct page *page;
2926 const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; 3143 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
2927 int i; 3144 int i;
2928 3145
2929 /* prepare our caches and mempools */ 3146 /* prepare our caches and mempools */
@@ -3087,11 +3304,20 @@ static void drbd_cleanup(void)
3087 3304
3088 unregister_reboot_notifier(&drbd_notifier); 3305 unregister_reboot_notifier(&drbd_notifier);
3089 3306
3307 /* first remove proc,
3308 * drbdsetup uses it's presence to detect
3309 * whether DRBD is loaded.
3310 * If we would get stuck in proc removal,
3311 * but have netlink already deregistered,
3312 * some drbdsetup commands may wait forever
3313 * for an answer.
3314 */
3315 if (drbd_proc)
3316 remove_proc_entry("drbd", NULL);
3317
3090 drbd_nl_cleanup(); 3318 drbd_nl_cleanup();
3091 3319
3092 if (minor_table) { 3320 if (minor_table) {
3093 if (drbd_proc)
3094 remove_proc_entry("drbd", NULL);
3095 i = minor_count; 3321 i = minor_count;
3096 while (i--) 3322 while (i--)
3097 drbd_delete_device(i); 3323 drbd_delete_device(i);
@@ -3119,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
3119 char reason = '-'; 3345 char reason = '-';
3120 int r = 0; 3346 int r = 0;
3121 3347
3122 if (!__inc_ap_bio_cond(mdev)) { 3348 if (!may_inc_ap_bio(mdev)) {
3123 /* DRBD has frozen IO */ 3349 /* DRBD has frozen IO */
3124 r = bdi_bits; 3350 r = bdi_bits;
3125 reason = 'd'; 3351 reason = 'd';
@@ -3172,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
3172 goto out_no_disk; 3398 goto out_no_disk;
3173 mdev->vdisk = disk; 3399 mdev->vdisk = disk;
3174 3400
3175 set_disk_ro(disk, TRUE); 3401 set_disk_ro(disk, true);
3176 3402
3177 disk->queue = q; 3403 disk->queue = q;
3178 disk->major = DRBD_MAJOR; 3404 disk->major = DRBD_MAJOR;
@@ -3188,8 +3414,8 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
3188 q->backing_dev_info.congested_fn = drbd_congested; 3414 q->backing_dev_info.congested_fn = drbd_congested;
3189 q->backing_dev_info.congested_data = mdev; 3415 q->backing_dev_info.congested_data = mdev;
3190 3416
3191 blk_queue_make_request(q, drbd_make_request_26); 3417 blk_queue_make_request(q, drbd_make_request);
3192 blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); 3418 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
3193 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 3419 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3194 blk_queue_merge_bvec(q, drbd_merge_bvec); 3420 blk_queue_merge_bvec(q, drbd_merge_bvec);
3195 q->queue_lock = &mdev->req_lock; 3421 q->queue_lock = &mdev->req_lock;
@@ -3251,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev)
3251 put_disk(mdev->vdisk); 3477 put_disk(mdev->vdisk);
3252 blk_cleanup_queue(mdev->rq_queue); 3478 blk_cleanup_queue(mdev->rq_queue);
3253 free_cpumask_var(mdev->cpu_mask); 3479 free_cpumask_var(mdev->cpu_mask);
3480 drbd_free_tl_hash(mdev);
3254 kfree(mdev); 3481 kfree(mdev);
3255} 3482}
3256 3483
@@ -3266,7 +3493,7 @@ int __init drbd_init(void)
3266 return -EINVAL; 3493 return -EINVAL;
3267 } 3494 }
3268 3495
3269 if (1 > minor_count || minor_count > 255) { 3496 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
3270 printk(KERN_ERR 3497 printk(KERN_ERR
3271 "drbd: invalid minor_count (%d)\n", minor_count); 3498 "drbd: invalid minor_count (%d)\n", minor_count);
3272#ifdef MODULE 3499#ifdef MODULE
@@ -3448,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
3448 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { 3675 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
3449 /* this was a try anyways ... */ 3676 /* this was a try anyways ... */
3450 dev_err(DEV, "meta data update failed!\n"); 3677 dev_err(DEV, "meta data update failed!\n");
3451 drbd_chk_io_error(mdev, 1, TRUE); 3678 drbd_chk_io_error(mdev, 1, true);
3452 } 3679 }
3453 3680
3454 /* Update mdev->ldev->md.la_size_sect, 3681 /* Update mdev->ldev->md.la_size_sect,
@@ -3464,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
3464 * @mdev: DRBD device. 3691 * @mdev: DRBD device.
3465 * @bdev: Device from which the meta data should be read in. 3692 * @bdev: Device from which the meta data should be read in.
3466 * 3693 *
3467 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case 3694 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
3468 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. 3695 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
3469 */ 3696 */
3470int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 3697int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
@@ -3534,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3534 return rv; 3761 return rv;
3535} 3762}
3536 3763
3537static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
3538{
3539 static char *uuid_str[UI_EXTENDED_SIZE] = {
3540 [UI_CURRENT] = "CURRENT",
3541 [UI_BITMAP] = "BITMAP",
3542 [UI_HISTORY_START] = "HISTORY_START",
3543 [UI_HISTORY_END] = "HISTORY_END",
3544 [UI_SIZE] = "SIZE",
3545 [UI_FLAGS] = "FLAGS",
3546 };
3547
3548 if (index >= UI_EXTENDED_SIZE) {
3549 dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
3550 return;
3551 }
3552
3553 dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
3554 uuid_str[index],
3555 (unsigned long long)mdev->ldev->md.uuid[index]);
3556}
3557
3558
3559/** 3764/**
3560 * drbd_md_mark_dirty() - Mark meta data super block as dirty 3765 * drbd_md_mark_dirty() - Mark meta data super block as dirty
3561 * @mdev: DRBD device. 3766 * @mdev: DRBD device.
@@ -3585,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3585{ 3790{
3586 int i; 3791 int i;
3587 3792
3588 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { 3793 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
3589 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; 3794 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
3590 debug_drbd_uuid(mdev, i+1);
3591 }
3592} 3795}
3593 3796
3594void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) 3797void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3603,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3603 } 3806 }
3604 3807
3605 mdev->ldev->md.uuid[idx] = val; 3808 mdev->ldev->md.uuid[idx] = val;
3606 debug_drbd_uuid(mdev, idx);
3607 drbd_md_mark_dirty(mdev); 3809 drbd_md_mark_dirty(mdev);
3608} 3810}
3609 3811
@@ -3613,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3613 if (mdev->ldev->md.uuid[idx]) { 3815 if (mdev->ldev->md.uuid[idx]) {
3614 drbd_uuid_move_history(mdev); 3816 drbd_uuid_move_history(mdev);
3615 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; 3817 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
3616 debug_drbd_uuid(mdev, UI_HISTORY_START);
3617 } 3818 }
3618 _drbd_uuid_set(mdev, idx, val); 3819 _drbd_uuid_set(mdev, idx, val);
3619} 3820}
@@ -3628,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3628void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) 3829void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3629{ 3830{
3630 u64 val; 3831 u64 val;
3832 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3833
3834 if (bm_uuid)
3835 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3631 3836
3632 dev_info(DEV, "Creating new current UUID\n");
3633 D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
3634 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; 3837 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
3635 debug_drbd_uuid(mdev, UI_BITMAP);
3636 3838
3637 get_random_bytes(&val, sizeof(u64)); 3839 get_random_bytes(&val, sizeof(u64));
3638 _drbd_uuid_set(mdev, UI_CURRENT, val); 3840 _drbd_uuid_set(mdev, UI_CURRENT, val);
3841 drbd_print_uuids(mdev, "new current UUID");
3639 /* get it to stable storage _now_ */ 3842 /* get it to stable storage _now_ */
3640 drbd_md_sync(mdev); 3843 drbd_md_sync(mdev);
3641} 3844}
@@ -3649,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3649 drbd_uuid_move_history(mdev); 3852 drbd_uuid_move_history(mdev);
3650 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; 3853 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
3651 mdev->ldev->md.uuid[UI_BITMAP] = 0; 3854 mdev->ldev->md.uuid[UI_BITMAP] = 0;
3652 debug_drbd_uuid(mdev, UI_HISTORY_START);
3653 debug_drbd_uuid(mdev, UI_BITMAP);
3654 } else { 3855 } else {
3655 if (mdev->ldev->md.uuid[UI_BITMAP]) 3856 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3656 dev_warn(DEV, "bm UUID already set"); 3857 if (bm_uuid)
3657 3858 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3658 mdev->ldev->md.uuid[UI_BITMAP] = val;
3659 mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
3660 3859
3661 debug_drbd_uuid(mdev, UI_BITMAP); 3860 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
3662 } 3861 }
3663 drbd_md_mark_dirty(mdev); 3862 drbd_md_mark_dirty(mdev);
3664} 3863}
@@ -3714,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
3714static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) 3913static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3715{ 3914{
3716 struct bm_io_work *work = container_of(w, struct bm_io_work, w); 3915 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
3717 int rv; 3916 int rv = -EIO;
3718 3917
3719 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); 3918 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
3720 3919
3721 drbd_bm_lock(mdev, work->why); 3920 if (get_ldev(mdev)) {
3722 rv = work->io_fn(mdev); 3921 drbd_bm_lock(mdev, work->why, work->flags);
3723 drbd_bm_unlock(mdev); 3922 rv = work->io_fn(mdev);
3923 drbd_bm_unlock(mdev);
3924 put_ldev(mdev);
3925 }
3724 3926
3725 clear_bit(BITMAP_IO, &mdev->flags); 3927 clear_bit(BITMAP_IO, &mdev->flags);
3928 smp_mb__after_clear_bit();
3726 wake_up(&mdev->misc_wait); 3929 wake_up(&mdev->misc_wait);
3727 3930
3728 if (work->done) 3931 if (work->done)
@@ -3730,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3730 3933
3731 clear_bit(BITMAP_IO_QUEUED, &mdev->flags); 3934 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3732 work->why = NULL; 3935 work->why = NULL;
3936 work->flags = 0;
3733 3937
3734 return 1; 3938 return 1;
3735} 3939}
@@ -3784,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev)
3784void drbd_queue_bitmap_io(struct drbd_conf *mdev, 3988void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3785 int (*io_fn)(struct drbd_conf *), 3989 int (*io_fn)(struct drbd_conf *),
3786 void (*done)(struct drbd_conf *, int), 3990 void (*done)(struct drbd_conf *, int),
3787 char *why) 3991 char *why, enum bm_flag flags)
3788{ 3992{
3789 D_ASSERT(current == mdev->worker.task); 3993 D_ASSERT(current == mdev->worker.task);
3790 3994
@@ -3798,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3798 mdev->bm_io_work.io_fn = io_fn; 4002 mdev->bm_io_work.io_fn = io_fn;
3799 mdev->bm_io_work.done = done; 4003 mdev->bm_io_work.done = done;
3800 mdev->bm_io_work.why = why; 4004 mdev->bm_io_work.why = why;
4005 mdev->bm_io_work.flags = flags;
3801 4006
4007 spin_lock_irq(&mdev->req_lock);
3802 set_bit(BITMAP_IO, &mdev->flags); 4008 set_bit(BITMAP_IO, &mdev->flags);
3803 if (atomic_read(&mdev->ap_bio_cnt) == 0) { 4009 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
3804 if (list_empty(&mdev->bm_io_work.w.list)) { 4010 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
3805 set_bit(BITMAP_IO_QUEUED, &mdev->flags);
3806 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); 4011 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
3807 } else
3808 dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
3809 } 4012 }
4013 spin_unlock_irq(&mdev->req_lock);
3810} 4014}
3811 4015
3812/** 4016/**
@@ -3818,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3818 * freezes application IO while that the actual IO operations runs. This 4022 * freezes application IO while that the actual IO operations runs. This
3819 * functions MAY NOT be called from worker context. 4023 * functions MAY NOT be called from worker context.
3820 */ 4024 */
3821int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) 4025int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4026 char *why, enum bm_flag flags)
3822{ 4027{
3823 int rv; 4028 int rv;
3824 4029
3825 D_ASSERT(current != mdev->worker.task); 4030 D_ASSERT(current != mdev->worker.task);
3826 4031
3827 drbd_suspend_io(mdev); 4032 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4033 drbd_suspend_io(mdev);
3828 4034
3829 drbd_bm_lock(mdev, why); 4035 drbd_bm_lock(mdev, why, flags);
3830 rv = io_fn(mdev); 4036 rv = io_fn(mdev);
3831 drbd_bm_unlock(mdev); 4037 drbd_bm_unlock(mdev);
3832 4038
3833 drbd_resume_io(mdev); 4039 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4040 drbd_resume_io(mdev);
3834 4041
3835 return rv; 4042 return rv;
3836} 4043}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index fe81c851ca88..03b29f78a37d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -288,10 +288,11 @@ void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
288 dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); 288 dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
289} 289}
290 290
291int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) 291enum drbd_state_rv
292drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
292{ 293{
293 const int max_tries = 4; 294 const int max_tries = 4;
294 int r = 0; 295 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
295 int try = 0; 296 int try = 0;
296 int forced = 0; 297 int forced = 0;
297 union drbd_state mask, val; 298 union drbd_state mask, val;
@@ -306,17 +307,17 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
306 val.i = 0; val.role = new_role; 307 val.i = 0; val.role = new_role;
307 308
308 while (try++ < max_tries) { 309 while (try++ < max_tries) {
309 r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); 310 rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
310 311
311 /* in case we first succeeded to outdate, 312 /* in case we first succeeded to outdate,
312 * but now suddenly could establish a connection */ 313 * but now suddenly could establish a connection */
313 if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { 314 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
314 val.pdsk = 0; 315 val.pdsk = 0;
315 mask.pdsk = 0; 316 mask.pdsk = 0;
316 continue; 317 continue;
317 } 318 }
318 319
319 if (r == SS_NO_UP_TO_DATE_DISK && force && 320 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
320 (mdev->state.disk < D_UP_TO_DATE && 321 (mdev->state.disk < D_UP_TO_DATE &&
321 mdev->state.disk >= D_INCONSISTENT)) { 322 mdev->state.disk >= D_INCONSISTENT)) {
322 mask.disk = D_MASK; 323 mask.disk = D_MASK;
@@ -325,7 +326,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
325 continue; 326 continue;
326 } 327 }
327 328
328 if (r == SS_NO_UP_TO_DATE_DISK && 329 if (rv == SS_NO_UP_TO_DATE_DISK &&
329 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { 330 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
330 D_ASSERT(mdev->state.pdsk == D_UNKNOWN); 331 D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
331 nps = drbd_try_outdate_peer(mdev); 332 nps = drbd_try_outdate_peer(mdev);
@@ -341,9 +342,9 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
341 continue; 342 continue;
342 } 343 }
343 344
344 if (r == SS_NOTHING_TO_DO) 345 if (rv == SS_NOTHING_TO_DO)
345 goto fail; 346 goto fail;
346 if (r == SS_PRIMARY_NOP && mask.pdsk == 0) { 347 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
347 nps = drbd_try_outdate_peer(mdev); 348 nps = drbd_try_outdate_peer(mdev);
348 349
349 if (force && nps > D_OUTDATED) { 350 if (force && nps > D_OUTDATED) {
@@ -356,25 +357,24 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
356 357
357 continue; 358 continue;
358 } 359 }
359 if (r == SS_TWO_PRIMARIES) { 360 if (rv == SS_TWO_PRIMARIES) {
360 /* Maybe the peer is detected as dead very soon... 361 /* Maybe the peer is detected as dead very soon...
361 retry at most once more in this case. */ 362 retry at most once more in this case. */
362 __set_current_state(TASK_INTERRUPTIBLE); 363 schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
363 schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
364 if (try < max_tries) 364 if (try < max_tries)
365 try = max_tries - 1; 365 try = max_tries - 1;
366 continue; 366 continue;
367 } 367 }
368 if (r < SS_SUCCESS) { 368 if (rv < SS_SUCCESS) {
369 r = _drbd_request_state(mdev, mask, val, 369 rv = _drbd_request_state(mdev, mask, val,
370 CS_VERBOSE + CS_WAIT_COMPLETE); 370 CS_VERBOSE + CS_WAIT_COMPLETE);
371 if (r < SS_SUCCESS) 371 if (rv < SS_SUCCESS)
372 goto fail; 372 goto fail;
373 } 373 }
374 break; 374 break;
375 } 375 }
376 376
377 if (r < SS_SUCCESS) 377 if (rv < SS_SUCCESS)
378 goto fail; 378 goto fail;
379 379
380 if (forced) 380 if (forced)
@@ -384,7 +384,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
384 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); 384 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
385 385
386 if (new_role == R_SECONDARY) { 386 if (new_role == R_SECONDARY) {
387 set_disk_ro(mdev->vdisk, TRUE); 387 set_disk_ro(mdev->vdisk, true);
388 if (get_ldev(mdev)) { 388 if (get_ldev(mdev)) {
389 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 389 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
390 put_ldev(mdev); 390 put_ldev(mdev);
@@ -394,7 +394,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
394 mdev->net_conf->want_lose = 0; 394 mdev->net_conf->want_lose = 0;
395 put_net_conf(mdev); 395 put_net_conf(mdev);
396 } 396 }
397 set_disk_ro(mdev->vdisk, FALSE); 397 set_disk_ro(mdev->vdisk, false);
398 if (get_ldev(mdev)) { 398 if (get_ldev(mdev)) {
399 if (((mdev->state.conn < C_CONNECTED || 399 if (((mdev->state.conn < C_CONNECTED ||
400 mdev->state.pdsk <= D_FAILED) 400 mdev->state.pdsk <= D_FAILED)
@@ -406,10 +406,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
406 } 406 }
407 } 407 }
408 408
409 if ((new_role == R_SECONDARY) && get_ldev(mdev)) { 409 /* writeout of activity log covered areas of the bitmap
410 drbd_al_to_on_disk_bm(mdev); 410 * to stable storage done in after state change already */
411 put_ldev(mdev);
412 }
413 411
414 if (mdev->state.conn >= C_WF_REPORT_PARAMS) { 412 if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
415 /* if this was forced, we should consider sync */ 413 /* if this was forced, we should consider sync */
@@ -423,7 +421,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
423 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 421 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
424 fail: 422 fail:
425 mutex_unlock(&mdev->state_mutex); 423 mutex_unlock(&mdev->state_mutex);
426 return r; 424 return rv;
427} 425}
428 426
429static struct drbd_conf *ensure_mdev(int minor, int create) 427static struct drbd_conf *ensure_mdev(int minor, int create)
@@ -528,17 +526,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
528 } 526 }
529} 527}
530 528
529/* input size is expected to be in KB */
531char *ppsize(char *buf, unsigned long long size) 530char *ppsize(char *buf, unsigned long long size)
532{ 531{
533 /* Needs 9 bytes at max. */ 532 /* Needs 9 bytes at max including trailing NUL:
533 * -1ULL ==> "16384 EB" */
534 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; 534 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
535 int base = 0; 535 int base = 0;
536 while (size >= 10000) { 536 while (size >= 10000 && base < sizeof(units)-1) {
537 /* shift + round */ 537 /* shift + round */
538 size = (size >> 10) + !!(size & (1<<9)); 538 size = (size >> 10) + !!(size & (1<<9));
539 base++; 539 base++;
540 } 540 }
541 sprintf(buf, "%lu %cB", (long)size, units[base]); 541 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
542 542
543 return buf; 543 return buf;
544} 544}
@@ -642,11 +642,19 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_
642 || prev_size != mdev->ldev->md.md_size_sect; 642 || prev_size != mdev->ldev->md.md_size_sect;
643 643
644 if (la_size_changed || md_moved) { 644 if (la_size_changed || md_moved) {
645 int err;
646
645 drbd_al_shrink(mdev); /* All extents inactive. */ 647 drbd_al_shrink(mdev); /* All extents inactive. */
646 dev_info(DEV, "Writing the whole bitmap, %s\n", 648 dev_info(DEV, "Writing the whole bitmap, %s\n",
647 la_size_changed && md_moved ? "size changed and md moved" : 649 la_size_changed && md_moved ? "size changed and md moved" :
648 la_size_changed ? "size changed" : "md moved"); 650 la_size_changed ? "size changed" : "md moved");
649 rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ 651 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
652 err = drbd_bitmap_io(mdev, &drbd_bm_write,
653 "size changed", BM_LOCKED_MASK);
654 if (err) {
655 rv = dev_size_error;
656 goto out;
657 }
650 drbd_md_mark_dirty(mdev); 658 drbd_md_mark_dirty(mdev);
651 } 659 }
652 660
@@ -765,22 +773,21 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
765 return 0; 773 return 0;
766} 774}
767 775
768void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) 776void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
769{ 777{
770 struct request_queue * const q = mdev->rq_queue; 778 struct request_queue * const q = mdev->rq_queue;
771 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; 779 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
772 int max_segments = mdev->ldev->dc.max_bio_bvecs; 780 int max_segments = mdev->ldev->dc.max_bio_bvecs;
781 int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
773 782
774 max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
775
776 blk_queue_max_hw_sectors(q, max_seg_s >> 9);
777 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
778 blk_queue_max_segment_size(q, max_seg_s);
779 blk_queue_logical_block_size(q, 512); 783 blk_queue_logical_block_size(q, 512);
780 blk_queue_segment_boundary(q, PAGE_SIZE-1); 784 blk_queue_max_hw_sectors(q, max_hw_sectors);
781 blk_stack_limits(&q->limits, &b->limits, 0); 785 /* This is the workaround for "bio would need to, but cannot, be split" */
786 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
787 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
788 blk_queue_stack_limits(q, b);
782 789
783 dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); 790 dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
784 791
785 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 792 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
786 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", 793 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
@@ -850,7 +857,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
850static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 857static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
851 struct drbd_nl_cfg_reply *reply) 858 struct drbd_nl_cfg_reply *reply)
852{ 859{
853 enum drbd_ret_codes retcode; 860 enum drbd_ret_code retcode;
854 enum determine_dev_size dd; 861 enum determine_dev_size dd;
855 sector_t max_possible_sectors; 862 sector_t max_possible_sectors;
856 sector_t min_md_device_sectors; 863 sector_t min_md_device_sectors;
@@ -858,8 +865,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
858 struct block_device *bdev; 865 struct block_device *bdev;
859 struct lru_cache *resync_lru = NULL; 866 struct lru_cache *resync_lru = NULL;
860 union drbd_state ns, os; 867 union drbd_state ns, os;
861 unsigned int max_seg_s; 868 unsigned int max_bio_size;
862 int rv; 869 enum drbd_state_rv rv;
863 int cp_discovered = 0; 870 int cp_discovered = 0;
864 int logical_block_size; 871 int logical_block_size;
865 872
@@ -1005,9 +1012,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1005 /* and for any other previously queued work */ 1012 /* and for any other previously queued work */
1006 drbd_flush_workqueue(mdev); 1013 drbd_flush_workqueue(mdev);
1007 1014
1008 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); 1015 rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1016 retcode = rv; /* FIXME: Type mismatch. */
1009 drbd_resume_io(mdev); 1017 drbd_resume_io(mdev);
1010 if (retcode < SS_SUCCESS) 1018 if (rv < SS_SUCCESS)
1011 goto fail; 1019 goto fail;
1012 1020
1013 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1021 if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -1109,20 +1117,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1109 mdev->read_cnt = 0; 1117 mdev->read_cnt = 0;
1110 mdev->writ_cnt = 0; 1118 mdev->writ_cnt = 0;
1111 1119
1112 max_seg_s = DRBD_MAX_SEGMENT_SIZE; 1120 max_bio_size = DRBD_MAX_BIO_SIZE;
1113 if (mdev->state.conn == C_CONNECTED) { 1121 if (mdev->state.conn == C_CONNECTED) {
1114 /* We are Primary, Connected, and now attach a new local 1122 /* We are Primary, Connected, and now attach a new local
1115 * backing store. We must not increase the user visible maximum 1123 * backing store. We must not increase the user visible maximum
1116 * bio size on this device to something the peer may not be 1124 * bio size on this device to something the peer may not be
1117 * able to handle. */ 1125 * able to handle. */
1118 if (mdev->agreed_pro_version < 94) 1126 if (mdev->agreed_pro_version < 94)
1119 max_seg_s = queue_max_segment_size(mdev->rq_queue); 1127 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
1120 else if (mdev->agreed_pro_version == 94) 1128 else if (mdev->agreed_pro_version == 94)
1121 max_seg_s = DRBD_MAX_SIZE_H80_PACKET; 1129 max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
1122 /* else: drbd 8.3.9 and later, stay with default */ 1130 /* else: drbd 8.3.9 and later, stay with default */
1123 } 1131 }
1124 1132
1125 drbd_setup_queue_param(mdev, max_seg_s); 1133 drbd_setup_queue_param(mdev, max_bio_size);
1126 1134
1127 /* If I am currently not R_PRIMARY, 1135 /* If I am currently not R_PRIMARY,
1128 * but meta data primary indicator is set, 1136 * but meta data primary indicator is set,
@@ -1154,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1154 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { 1162 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1155 dev_info(DEV, "Assuming that all blocks are out of sync " 1163 dev_info(DEV, "Assuming that all blocks are out of sync "
1156 "(aka FullSync)\n"); 1164 "(aka FullSync)\n");
1157 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { 1165 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1166 "set_n_write from attaching", BM_LOCKED_MASK)) {
1158 retcode = ERR_IO_MD_DISK; 1167 retcode = ERR_IO_MD_DISK;
1159 goto force_diskless_dec; 1168 goto force_diskless_dec;
1160 } 1169 }
1161 } else { 1170 } else {
1162 if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { 1171 if (drbd_bitmap_io(mdev, &drbd_bm_read,
1172 "read from attaching", BM_LOCKED_MASK) < 0) {
1163 retcode = ERR_IO_MD_DISK; 1173 retcode = ERR_IO_MD_DISK;
1164 goto force_diskless_dec; 1174 goto force_diskless_dec;
1165 } 1175 }
@@ -1167,7 +1177,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1167 1177
1168 if (cp_discovered) { 1178 if (cp_discovered) {
1169 drbd_al_apply_to_bm(mdev); 1179 drbd_al_apply_to_bm(mdev);
1170 drbd_al_to_on_disk_bm(mdev); 1180 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1181 "crashed primary apply AL", BM_LOCKED_MASK)) {
1182 retcode = ERR_IO_MD_DISK;
1183 goto force_diskless_dec;
1184 }
1171 } 1185 }
1172 1186
1173 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) 1187 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
@@ -1279,7 +1293,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1279 struct drbd_nl_cfg_reply *reply) 1293 struct drbd_nl_cfg_reply *reply)
1280{ 1294{
1281 int i, ns; 1295 int i, ns;
1282 enum drbd_ret_codes retcode; 1296 enum drbd_ret_code retcode;
1283 struct net_conf *new_conf = NULL; 1297 struct net_conf *new_conf = NULL;
1284 struct crypto_hash *tfm = NULL; 1298 struct crypto_hash *tfm = NULL;
1285 struct crypto_hash *integrity_w_tfm = NULL; 1299 struct crypto_hash *integrity_w_tfm = NULL;
@@ -1324,6 +1338,8 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1324 new_conf->wire_protocol = DRBD_PROT_C; 1338 new_conf->wire_protocol = DRBD_PROT_C;
1325 new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; 1339 new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
1326 new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; 1340 new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
1341 new_conf->on_congestion = DRBD_ON_CONGESTION_DEF;
1342 new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF;
1327 1343
1328 if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { 1344 if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
1329 retcode = ERR_MANDATORY_TAG; 1345 retcode = ERR_MANDATORY_TAG;
@@ -1345,6 +1361,11 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1345 } 1361 }
1346 } 1362 }
1347 1363
1364 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
1365 retcode = ERR_CONG_NOT_PROTO_A;
1366 goto fail;
1367 }
1368
1348 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { 1369 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1349 retcode = ERR_DISCARD; 1370 retcode = ERR_DISCARD;
1350 goto fail; 1371 goto fail;
@@ -1525,6 +1546,21 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1525 struct drbd_nl_cfg_reply *reply) 1546 struct drbd_nl_cfg_reply *reply)
1526{ 1547{
1527 int retcode; 1548 int retcode;
1549 struct disconnect dc;
1550
1551 memset(&dc, 0, sizeof(struct disconnect));
1552 if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
1553 retcode = ERR_MANDATORY_TAG;
1554 goto fail;
1555 }
1556
1557 if (dc.force) {
1558 spin_lock_irq(&mdev->req_lock);
1559 if (mdev->state.conn >= C_WF_CONNECTION)
1560 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
1561 spin_unlock_irq(&mdev->req_lock);
1562 goto done;
1563 }
1528 1564
1529 retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); 1565 retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
1530 1566
@@ -1842,6 +1878,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1842{ 1878{
1843 int retcode; 1879 int retcode;
1844 1880
1881 /* If there is still bitmap IO pending, probably because of a previous
1882 * resync just being finished, wait for it before requesting a new resync. */
1883 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1884
1845 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); 1885 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
1846 1886
1847 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) 1887 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
@@ -1877,6 +1917,10 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
1877{ 1917{
1878 int retcode; 1918 int retcode;
1879 1919
1920 /* If there is still bitmap IO pending, probably because of a previous
1921 * resync just being finished, wait for it before requesting a new resync. */
1922 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1923
1880 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); 1924 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
1881 1925
1882 if (retcode < SS_SUCCESS) { 1926 if (retcode < SS_SUCCESS) {
@@ -1885,9 +1929,9 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
1885 into a full resync. */ 1929 into a full resync. */
1886 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); 1930 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
1887 if (retcode >= SS_SUCCESS) { 1931 if (retcode >= SS_SUCCESS) {
1888 /* open coded drbd_bitmap_io() */
1889 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, 1932 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
1890 "set_n_write from invalidate_peer")) 1933 "set_n_write from invalidate_peer",
1934 BM_LOCKED_SET_ALLOWED))
1891 retcode = ERR_IO_MD_DISK; 1935 retcode = ERR_IO_MD_DISK;
1892 } 1936 }
1893 } else 1937 } else
@@ -1914,9 +1958,17 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1914 struct drbd_nl_cfg_reply *reply) 1958 struct drbd_nl_cfg_reply *reply)
1915{ 1959{
1916 int retcode = NO_ERROR; 1960 int retcode = NO_ERROR;
1961 union drbd_state s;
1917 1962
1918 if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) 1963 if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
1919 retcode = ERR_PAUSE_IS_CLEAR; 1964 s = mdev->state;
1965 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
1966 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
1967 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
1968 } else {
1969 retcode = ERR_PAUSE_IS_CLEAR;
1970 }
1971 }
1920 1972
1921 reply->ret_code = retcode; 1973 reply->ret_code = retcode;
1922 return 0; 1974 return 0;
@@ -2054,6 +2106,11 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2054 reply->ret_code = ERR_MANDATORY_TAG; 2106 reply->ret_code = ERR_MANDATORY_TAG;
2055 return 0; 2107 return 0;
2056 } 2108 }
2109
2110 /* If there is still bitmap IO pending, e.g. previous resync or verify
2111 * just being finished, wait for it before requesting a new resync. */
2112 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2113
2057 /* w_make_ov_request expects position to be aligned */ 2114 /* w_make_ov_request expects position to be aligned */
2058 mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; 2115 mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
2059 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); 2116 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
@@ -2097,7 +2154,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2097 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ 2154 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2098 2155
2099 if (args.clear_bm) { 2156 if (args.clear_bm) {
2100 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); 2157 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
2158 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
2101 if (err) { 2159 if (err) {
2102 dev_err(DEV, "Writing bitmap failed with %d\n",err); 2160 dev_err(DEV, "Writing bitmap failed with %d\n",err);
2103 retcode = ERR_IO_MD_DISK; 2161 retcode = ERR_IO_MD_DISK;
@@ -2105,6 +2163,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2105 if (skip_initial_sync) { 2163 if (skip_initial_sync) {
2106 drbd_send_uuids_skip_initial_sync(mdev); 2164 drbd_send_uuids_skip_initial_sync(mdev);
2107 _drbd_uuid_set(mdev, UI_BITMAP, 0); 2165 _drbd_uuid_set(mdev, UI_BITMAP, 0);
2166 drbd_print_uuids(mdev, "cleared bitmap UUID");
2108 spin_lock_irq(&mdev->req_lock); 2167 spin_lock_irq(&mdev->req_lock);
2109 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 2168 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2110 CS_VERBOSE, NULL); 2169 CS_VERBOSE, NULL);
@@ -2189,7 +2248,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
2189 goto fail; 2248 goto fail;
2190 } 2249 }
2191 2250
2192 if (nlp->packet_type >= P_nl_after_last_packet) { 2251 if (nlp->packet_type >= P_nl_after_last_packet ||
2252 nlp->packet_type == P_return_code_only) {
2193 retcode = ERR_PACKET_NR; 2253 retcode = ERR_PACKET_NR;
2194 goto fail; 2254 goto fail;
2195 } 2255 }
@@ -2205,7 +2265,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
2205 reply_size += cm->reply_body_size; 2265 reply_size += cm->reply_body_size;
2206 2266
2207 /* allocation not in the IO path, cqueue thread context */ 2267 /* allocation not in the IO path, cqueue thread context */
2208 cn_reply = kmalloc(reply_size, GFP_KERNEL); 2268 cn_reply = kzalloc(reply_size, GFP_KERNEL);
2209 if (!cn_reply) { 2269 if (!cn_reply) {
2210 retcode = ERR_NOMEM; 2270 retcode = ERR_NOMEM;
2211 goto fail; 2271 goto fail;
@@ -2213,7 +2273,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
2213 reply = (struct drbd_nl_cfg_reply *) cn_reply->data; 2273 reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
2214 2274
2215 reply->packet_type = 2275 reply->packet_type =
2216 cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; 2276 cm->reply_body_size ? nlp->packet_type : P_return_code_only;
2217 reply->minor = nlp->drbd_minor; 2277 reply->minor = nlp->drbd_minor;
2218 reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ 2278 reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
2219 /* reply->tag_list; might be modified by cm->function. */ 2279 /* reply->tag_list; might be modified by cm->function. */
@@ -2376,7 +2436,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
2376 /* receiver thread context, which is not in the writeout path (of this node), 2436 /* receiver thread context, which is not in the writeout path (of this node),
2377 * but may be in the writeout path of the _other_ node. 2437 * but may be in the writeout path of the _other_ node.
2378 * GFP_NOIO to avoid potential "distributed deadlock". */ 2438 * GFP_NOIO to avoid potential "distributed deadlock". */
2379 cn_reply = kmalloc( 2439 cn_reply = kzalloc(
2380 sizeof(struct cn_msg)+ 2440 sizeof(struct cn_msg)+
2381 sizeof(struct drbd_nl_cfg_reply)+ 2441 sizeof(struct drbd_nl_cfg_reply)+
2382 sizeof(struct dump_ee_tag_len_struct)+ 2442 sizeof(struct dump_ee_tag_len_struct)+
@@ -2398,10 +2458,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
2398 tl = tl_add_int(tl, T_ee_sector, &e->sector); 2458 tl = tl_add_int(tl, T_ee_sector, &e->sector);
2399 tl = tl_add_int(tl, T_ee_block_id, &e->block_id); 2459 tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
2400 2460
2461 /* dump the first 32k */
2462 len = min_t(unsigned, e->size, 32 << 10);
2401 put_unaligned(T_ee_data, tl++); 2463 put_unaligned(T_ee_data, tl++);
2402 put_unaligned(e->size, tl++); 2464 put_unaligned(len, tl++);
2403 2465
2404 len = e->size;
2405 page = e->pages; 2466 page = e->pages;
2406 page_chain_for_each(page) { 2467 page_chain_for_each(page) {
2407 void *d = kmap_atomic(page, KM_USER0); 2468 void *d = kmap_atomic(page, KM_USER0);
@@ -2410,6 +2471,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
2410 kunmap_atomic(d, KM_USER0); 2471 kunmap_atomic(d, KM_USER0);
2411 tl = (unsigned short*)((char*)tl + l); 2472 tl = (unsigned short*)((char*)tl + l);
2412 len -= l; 2473 len -= l;
2474 if (len == 0)
2475 break;
2413 } 2476 }
2414 put_unaligned(TT_END, tl++); /* Close the tag list */ 2477 put_unaligned(TT_END, tl++); /* Close the tag list */
2415 2478
@@ -2508,6 +2571,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
2508 (struct drbd_nl_cfg_reply *)cn_reply->data; 2571 (struct drbd_nl_cfg_reply *)cn_reply->data;
2509 int rr; 2572 int rr;
2510 2573
2574 memset(buffer, 0, sizeof(buffer));
2511 cn_reply->id = req->id; 2575 cn_reply->id = req->id;
2512 2576
2513 cn_reply->seq = req->seq; 2577 cn_reply->seq = req->seq;
@@ -2515,6 +2579,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
2515 cn_reply->len = sizeof(struct drbd_nl_cfg_reply); 2579 cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
2516 cn_reply->flags = 0; 2580 cn_reply->flags = 0;
2517 2581
2582 reply->packet_type = P_return_code_only;
2518 reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; 2583 reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
2519 reply->ret_code = ret_code; 2584 reply->ret_code = ret_code;
2520 2585
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 7e6ac307e2de..2959cdfb77f5 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -34,6 +34,7 @@
34#include "drbd_int.h" 34#include "drbd_int.h"
35 35
36static int drbd_proc_open(struct inode *inode, struct file *file); 36static int drbd_proc_open(struct inode *inode, struct file *file);
37static int drbd_proc_release(struct inode *inode, struct file *file);
37 38
38 39
39struct proc_dir_entry *drbd_proc; 40struct proc_dir_entry *drbd_proc;
@@ -42,9 +43,22 @@ const struct file_operations drbd_proc_fops = {
42 .open = drbd_proc_open, 43 .open = drbd_proc_open,
43 .read = seq_read, 44 .read = seq_read,
44 .llseek = seq_lseek, 45 .llseek = seq_lseek,
45 .release = single_release, 46 .release = drbd_proc_release,
46}; 47};
47 48
49void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
50{
51 /* v is in kB/sec. We don't expect TiByte/sec yet. */
52 if (unlikely(v >= 1000000)) {
53 /* cool: > GiByte/s */
54 seq_printf(seq, "%ld,", v / 1000000);
55 v /= 1000000;
56 seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
57 } else if (likely(v >= 1000))
58 seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
59 else
60 seq_printf(seq, "%ld", v);
61}
48 62
49/*lge 63/*lge
50 * progress bars shamelessly adapted from driver/md/md.c 64 * progress bars shamelessly adapted from driver/md/md.c
@@ -71,10 +85,15 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
71 seq_printf(seq, "."); 85 seq_printf(seq, ".");
72 seq_printf(seq, "] "); 86 seq_printf(seq, "] ");
73 87
74 seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); 88 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
75 /* if more than 1 GB display in MB */ 89 seq_printf(seq, "verified:");
76 if (mdev->rs_total > 0x100000L) 90 else
77 seq_printf(seq, "(%lu/%lu)M\n\t", 91 seq_printf(seq, "sync'ed:");
92 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
93
94 /* if more than a few GB, display in MB */
95 if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
96 seq_printf(seq, "(%lu/%lu)M",
78 (unsigned long) Bit2KB(rs_left >> 10), 97 (unsigned long) Bit2KB(rs_left >> 10),
79 (unsigned long) Bit2KB(mdev->rs_total >> 10)); 98 (unsigned long) Bit2KB(mdev->rs_total >> 10));
80 else 99 else
@@ -94,6 +113,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
94 /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is 113 /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
95 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at 114 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
96 * least DRBD_SYNC_MARK_STEP time before it will be modified. */ 115 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
116 /* ------------------------ ~18s average ------------------------ */
97 i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; 117 i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
98 dt = (jiffies - mdev->rs_mark_time[i]) / HZ; 118 dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
99 if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) 119 if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
@@ -107,14 +127,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
107 seq_printf(seq, "finish: %lu:%02lu:%02lu", 127 seq_printf(seq, "finish: %lu:%02lu:%02lu",
108 rt / 3600, (rt % 3600) / 60, rt % 60); 128 rt / 3600, (rt % 3600) / 60, rt % 60);
109 129
110 /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
111 dbdt = Bit2KB(db/dt); 130 dbdt = Bit2KB(db/dt);
112 if (dbdt > 1000) 131 seq_printf(seq, " speed: ");
113 seq_printf(seq, " speed: %ld,%03ld", 132 seq_printf_with_thousands_grouping(seq, dbdt);
114 dbdt/1000, dbdt % 1000); 133 seq_printf(seq, " (");
115 else 134 /* ------------------------- ~3s average ------------------------ */
116 seq_printf(seq, " speed: %ld", dbdt); 135 if (proc_details >= 1) {
136 /* this is what drbd_rs_should_slow_down() uses */
137 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
138 dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
139 if (!dt)
140 dt++;
141 db = mdev->rs_mark_left[i] - rs_left;
142 dbdt = Bit2KB(db/dt);
143 seq_printf_with_thousands_grouping(seq, dbdt);
144 seq_printf(seq, " -- ");
145 }
117 146
147 /* --------------------- long term average ---------------------- */
118 /* mean speed since syncer started 148 /* mean speed since syncer started
119 * we do account for PausedSync periods */ 149 * we do account for PausedSync periods */
120 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; 150 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
@@ -122,20 +152,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
122 dt = 1; 152 dt = 1;
123 db = mdev->rs_total - rs_left; 153 db = mdev->rs_total - rs_left;
124 dbdt = Bit2KB(db/dt); 154 dbdt = Bit2KB(db/dt);
125 if (dbdt > 1000) 155 seq_printf_with_thousands_grouping(seq, dbdt);
126 seq_printf(seq, " (%ld,%03ld)", 156 seq_printf(seq, ")");
127 dbdt/1000, dbdt % 1000);
128 else
129 seq_printf(seq, " (%ld)", dbdt);
130 157
131 if (mdev->state.conn == C_SYNC_TARGET) { 158 if (mdev->state.conn == C_SYNC_TARGET ||
132 if (mdev->c_sync_rate > 1000) 159 mdev->state.conn == C_VERIFY_S) {
133 seq_printf(seq, " want: %d,%03d", 160 seq_printf(seq, " want: ");
134 mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000); 161 seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
135 else
136 seq_printf(seq, " want: %d", mdev->c_sync_rate);
137 } 162 }
138 seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); 163 seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
164
165 if (proc_details >= 1) {
166 /* 64 bit:
167 * we convert to sectors in the display below. */
168 unsigned long bm_bits = drbd_bm_bits(mdev);
169 unsigned long bit_pos;
170 if (mdev->state.conn == C_VERIFY_S ||
171 mdev->state.conn == C_VERIFY_T)
172 bit_pos = bm_bits - mdev->ov_left;
173 else
174 bit_pos = mdev->bm_resync_fo;
175 /* Total sectors may be slightly off for oddly
176 * sized devices. So what. */
177 seq_printf(seq,
178 "\t%3d%% sector pos: %llu/%llu\n",
179 (int)(bit_pos / (bm_bits/100+1)),
180 (unsigned long long)bit_pos * BM_SECT_PER_BIT,
181 (unsigned long long)bm_bits * BM_SECT_PER_BIT);
182 }
139} 183}
140 184
141static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) 185static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -232,20 +276,16 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
232 mdev->epochs, 276 mdev->epochs,
233 write_ordering_chars[mdev->write_ordering] 277 write_ordering_chars[mdev->write_ordering]
234 ); 278 );
235 seq_printf(seq, " oos:%lu\n", 279 seq_printf(seq, " oos:%llu\n",
236 Bit2KB(drbd_bm_total_weight(mdev))); 280 Bit2KB((unsigned long long)
281 drbd_bm_total_weight(mdev)));
237 } 282 }
238 if (mdev->state.conn == C_SYNC_SOURCE || 283 if (mdev->state.conn == C_SYNC_SOURCE ||
239 mdev->state.conn == C_SYNC_TARGET) 284 mdev->state.conn == C_SYNC_TARGET ||
285 mdev->state.conn == C_VERIFY_S ||
286 mdev->state.conn == C_VERIFY_T)
240 drbd_syncer_progress(mdev, seq); 287 drbd_syncer_progress(mdev, seq);
241 288
242 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
243 seq_printf(seq, "\t%3d%% %lu/%lu\n",
244 (int)((mdev->rs_total-mdev->ov_left) /
245 (mdev->rs_total/100+1)),
246 mdev->rs_total - mdev->ov_left,
247 mdev->rs_total);
248
249 if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { 289 if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
250 lc_seq_printf_stats(seq, mdev->resync); 290 lc_seq_printf_stats(seq, mdev->resync);
251 lc_seq_printf_stats(seq, mdev->act_log); 291 lc_seq_printf_stats(seq, mdev->act_log);
@@ -265,7 +305,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
265 305
266static int drbd_proc_open(struct inode *inode, struct file *file) 306static int drbd_proc_open(struct inode *inode, struct file *file)
267{ 307{
268 return single_open(file, drbd_seq_show, PDE(inode)->data); 308 if (try_module_get(THIS_MODULE))
309 return single_open(file, drbd_seq_show, PDE(inode)->data);
310 return -ENODEV;
311}
312
313static int drbd_proc_release(struct inode *inode, struct file *file)
314{
315 module_put(THIS_MODULE);
316 return single_release(inode, file);
269} 317}
270 318
271/* PROC FS stuff end */ 319/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 8e68be939deb..fe1564c7d8b6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -277,7 +277,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; 277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
278 int i; 278 int i;
279 279
280 if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) 280 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
281 i = page_chain_free(page); 281 i = page_chain_free(page);
282 else { 282 else {
283 struct page *tmp; 283 struct page *tmp;
@@ -319,7 +319,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
319 struct page *page; 319 struct page *page;
320 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 320 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
321 321
322 if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) 322 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
323 return NULL; 323 return NULL;
324 324
325 e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 325 e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
@@ -725,16 +725,16 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
725 char tb[4]; 725 char tb[4];
726 726
727 if (!*sock) 727 if (!*sock)
728 return FALSE; 728 return false;
729 729
730 rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 730 rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
731 731
732 if (rr > 0 || rr == -EAGAIN) { 732 if (rr > 0 || rr == -EAGAIN) {
733 return TRUE; 733 return true;
734 } else { 734 } else {
735 sock_release(*sock); 735 sock_release(*sock);
736 *sock = NULL; 736 *sock = NULL;
737 return FALSE; 737 return false;
738 } 738 }
739} 739}
740 740
@@ -768,8 +768,7 @@ static int drbd_connect(struct drbd_conf *mdev)
768 if (s || ++try >= 3) 768 if (s || ++try >= 3)
769 break; 769 break;
770 /* give the other side time to call bind() & listen() */ 770 /* give the other side time to call bind() & listen() */
771 __set_current_state(TASK_INTERRUPTIBLE); 771 schedule_timeout_interruptible(HZ / 10);
772 schedule_timeout(HZ / 10);
773 } 772 }
774 773
775 if (s) { 774 if (s) {
@@ -788,8 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev)
788 } 787 }
789 788
790 if (sock && msock) { 789 if (sock && msock) {
791 __set_current_state(TASK_INTERRUPTIBLE); 790 schedule_timeout_interruptible(HZ / 10);
792 schedule_timeout(HZ / 10);
793 ok = drbd_socket_okay(mdev, &sock); 791 ok = drbd_socket_okay(mdev, &sock);
794 ok = drbd_socket_okay(mdev, &msock) && ok; 792 ok = drbd_socket_okay(mdev, &msock) && ok;
795 if (ok) 793 if (ok)
@@ -906,7 +904,7 @@ retry:
906 put_ldev(mdev); 904 put_ldev(mdev);
907 } 905 }
908 906
909 if (!drbd_send_protocol(mdev)) 907 if (drbd_send_protocol(mdev) == -1)
910 return -1; 908 return -1;
911 drbd_send_sync_param(mdev, &mdev->sync_conf); 909 drbd_send_sync_param(mdev, &mdev->sync_conf);
912 drbd_send_sizes(mdev, 0, 0); 910 drbd_send_sizes(mdev, 0, 0);
@@ -914,6 +912,7 @@ retry:
914 drbd_send_state(mdev); 912 drbd_send_state(mdev);
915 clear_bit(USE_DEGR_WFC_T, &mdev->flags); 913 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
916 clear_bit(RESIZE_PENDING, &mdev->flags); 914 clear_bit(RESIZE_PENDING, &mdev->flags);
915 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
917 916
918 return 1; 917 return 1;
919 918
@@ -932,8 +931,9 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
932 931
933 r = drbd_recv(mdev, h, sizeof(*h)); 932 r = drbd_recv(mdev, h, sizeof(*h));
934 if (unlikely(r != sizeof(*h))) { 933 if (unlikely(r != sizeof(*h))) {
935 dev_err(DEV, "short read expecting header on sock: r=%d\n", r); 934 if (!signal_pending(current))
936 return FALSE; 935 dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
936 return false;
937 } 937 }
938 938
939 if (likely(h->h80.magic == BE_DRBD_MAGIC)) { 939 if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
@@ -947,11 +947,11 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
947 be32_to_cpu(h->h80.magic), 947 be32_to_cpu(h->h80.magic),
948 be16_to_cpu(h->h80.command), 948 be16_to_cpu(h->h80.command),
949 be16_to_cpu(h->h80.length)); 949 be16_to_cpu(h->h80.length));
950 return FALSE; 950 return false;
951 } 951 }
952 mdev->last_received = jiffies; 952 mdev->last_received = jiffies;
953 953
954 return TRUE; 954 return true;
955} 955}
956 956
957static void drbd_flush(struct drbd_conf *mdev) 957static void drbd_flush(struct drbd_conf *mdev)
@@ -1074,6 +1074,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1074 * @mdev: DRBD device. 1074 * @mdev: DRBD device.
1075 * @e: epoch entry 1075 * @e: epoch entry
1076 * @rw: flag field, see bio->bi_rw 1076 * @rw: flag field, see bio->bi_rw
1077 *
1078 * May spread the pages to multiple bios,
1079 * depending on bio_add_page restrictions.
1080 *
1081 * Returns 0 if all bios have been submitted,
1082 * -ENOMEM if we could not allocate enough bios,
1083 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1084 * single page to an empty bio (which should never happen and likely indicates
1085 * that the lower level IO stack is in some way broken). This has been observed
1086 * on certain Xen deployments.
1077 */ 1087 */
1078/* TODO allocate from our own bio_set. */ 1088/* TODO allocate from our own bio_set. */
1079int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1089int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
@@ -1086,6 +1096,7 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1086 unsigned ds = e->size; 1096 unsigned ds = e->size;
1087 unsigned n_bios = 0; 1097 unsigned n_bios = 0;
1088 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1098 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
1099 int err = -ENOMEM;
1089 1100
1090 /* In most cases, we will only need one bio. But in case the lower 1101 /* In most cases, we will only need one bio. But in case the lower
1091 * level restrictions happen to be different at this offset on this 1102 * level restrictions happen to be different at this offset on this
@@ -1111,8 +1122,17 @@ next_bio:
1111 page_chain_for_each(page) { 1122 page_chain_for_each(page) {
1112 unsigned len = min_t(unsigned, ds, PAGE_SIZE); 1123 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1113 if (!bio_add_page(bio, page, len, 0)) { 1124 if (!bio_add_page(bio, page, len, 0)) {
1114 /* a single page must always be possible! */ 1125 /* A single page must always be possible!
1115 BUG_ON(bio->bi_vcnt == 0); 1126 * But in case it fails anyways,
1127 * we deal with it, and complain (below). */
1128 if (bio->bi_vcnt == 0) {
1129 dev_err(DEV,
1130 "bio_add_page failed for len=%u, "
1131 "bi_vcnt=0 (bi_sector=%llu)\n",
1132 len, (unsigned long long)bio->bi_sector);
1133 err = -ENOSPC;
1134 goto fail;
1135 }
1116 goto next_bio; 1136 goto next_bio;
1117 } 1137 }
1118 ds -= len; 1138 ds -= len;
@@ -1138,7 +1158,7 @@ fail:
1138 bios = bios->bi_next; 1158 bios = bios->bi_next;
1139 bio_put(bio); 1159 bio_put(bio);
1140 } 1160 }
1141 return -ENOMEM; 1161 return err;
1142} 1162}
1143 1163
1144static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1164static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -1160,7 +1180,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1160 switch (mdev->write_ordering) { 1180 switch (mdev->write_ordering) {
1161 case WO_none: 1181 case WO_none:
1162 if (rv == FE_RECYCLED) 1182 if (rv == FE_RECYCLED)
1163 return TRUE; 1183 return true;
1164 1184
1165 /* receiver context, in the writeout path of the other node. 1185 /* receiver context, in the writeout path of the other node.
1166 * avoid potential distributed deadlock */ 1186 * avoid potential distributed deadlock */
@@ -1188,10 +1208,10 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1188 D_ASSERT(atomic_read(&epoch->active) == 0); 1208 D_ASSERT(atomic_read(&epoch->active) == 0);
1189 D_ASSERT(epoch->flags == 0); 1209 D_ASSERT(epoch->flags == 0);
1190 1210
1191 return TRUE; 1211 return true;
1192 default: 1212 default:
1193 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); 1213 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
1194 return FALSE; 1214 return false;
1195 } 1215 }
1196 1216
1197 epoch->flags = 0; 1217 epoch->flags = 0;
@@ -1209,7 +1229,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1209 } 1229 }
1210 spin_unlock(&mdev->epoch_lock); 1230 spin_unlock(&mdev->epoch_lock);
1211 1231
1212 return TRUE; 1232 return true;
1213} 1233}
1214 1234
1215/* used from receive_RSDataReply (recv_resync_read) 1235/* used from receive_RSDataReply (recv_resync_read)
@@ -1231,21 +1251,25 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
1231 if (dgs) { 1251 if (dgs) {
1232 rr = drbd_recv(mdev, dig_in, dgs); 1252 rr = drbd_recv(mdev, dig_in, dgs);
1233 if (rr != dgs) { 1253 if (rr != dgs) {
1234 dev_warn(DEV, "short read receiving data digest: read %d expected %d\n", 1254 if (!signal_pending(current))
1235 rr, dgs); 1255 dev_warn(DEV,
1256 "short read receiving data digest: read %d expected %d\n",
1257 rr, dgs);
1236 return NULL; 1258 return NULL;
1237 } 1259 }
1238 } 1260 }
1239 1261
1240 data_size -= dgs; 1262 data_size -= dgs;
1241 1263
1264 ERR_IF(data_size == 0) return NULL;
1242 ERR_IF(data_size & 0x1ff) return NULL; 1265 ERR_IF(data_size & 0x1ff) return NULL;
1243 ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; 1266 ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL;
1244 1267
1245 /* even though we trust out peer, 1268 /* even though we trust out peer,
1246 * we sometimes have to double check. */ 1269 * we sometimes have to double check. */
1247 if (sector + (data_size>>9) > capacity) { 1270 if (sector + (data_size>>9) > capacity) {
1248 dev_err(DEV, "capacity: %llus < sector: %llus + size: %u\n", 1271 dev_err(DEV, "request from peer beyond end of local disk: "
1272 "capacity: %llus < sector: %llus + size: %u\n",
1249 (unsigned long long)capacity, 1273 (unsigned long long)capacity,
1250 (unsigned long long)sector, data_size); 1274 (unsigned long long)sector, data_size);
1251 return NULL; 1275 return NULL;
@@ -1264,15 +1288,16 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
1264 unsigned len = min_t(int, ds, PAGE_SIZE); 1288 unsigned len = min_t(int, ds, PAGE_SIZE);
1265 data = kmap(page); 1289 data = kmap(page);
1266 rr = drbd_recv(mdev, data, len); 1290 rr = drbd_recv(mdev, data, len);
1267 if (FAULT_ACTIVE(mdev, DRBD_FAULT_RECEIVE)) { 1291 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
1268 dev_err(DEV, "Fault injection: Corrupting data on receive\n"); 1292 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1269 data[0] = data[0] ^ (unsigned long)-1; 1293 data[0] = data[0] ^ (unsigned long)-1;
1270 } 1294 }
1271 kunmap(page); 1295 kunmap(page);
1272 if (rr != len) { 1296 if (rr != len) {
1273 drbd_free_ee(mdev, e); 1297 drbd_free_ee(mdev, e);
1274 dev_warn(DEV, "short read receiving data: read %d expected %d\n", 1298 if (!signal_pending(current))
1275 rr, len); 1299 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1300 rr, len);
1276 return NULL; 1301 return NULL;
1277 } 1302 }
1278 ds -= rr; 1303 ds -= rr;
@@ -1281,7 +1306,8 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
1281 if (dgs) { 1306 if (dgs) {
1282 drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); 1307 drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv);
1283 if (memcmp(dig_in, dig_vv, dgs)) { 1308 if (memcmp(dig_in, dig_vv, dgs)) {
1284 dev_err(DEV, "Digest integrity check FAILED.\n"); 1309 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1310 (unsigned long long)sector, data_size);
1285 drbd_bcast_ee(mdev, "digest failed", 1311 drbd_bcast_ee(mdev, "digest failed",
1286 dgs, dig_in, dig_vv, e); 1312 dgs, dig_in, dig_vv, e);
1287 drbd_free_ee(mdev, e); 1313 drbd_free_ee(mdev, e);
@@ -1302,7 +1328,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1302 void *data; 1328 void *data;
1303 1329
1304 if (!data_size) 1330 if (!data_size)
1305 return TRUE; 1331 return true;
1306 1332
1307 page = drbd_pp_alloc(mdev, 1, 1); 1333 page = drbd_pp_alloc(mdev, 1, 1);
1308 1334
@@ -1311,8 +1337,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1311 rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); 1337 rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
1312 if (rr != min_t(int, data_size, PAGE_SIZE)) { 1338 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1313 rv = 0; 1339 rv = 0;
1314 dev_warn(DEV, "short read receiving data: read %d expected %d\n", 1340 if (!signal_pending(current))
1315 rr, min_t(int, data_size, PAGE_SIZE)); 1341 dev_warn(DEV,
1342 "short read receiving data: read %d expected %d\n",
1343 rr, min_t(int, data_size, PAGE_SIZE));
1316 break; 1344 break;
1317 } 1345 }
1318 data_size -= rr; 1346 data_size -= rr;
@@ -1337,8 +1365,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1337 if (dgs) { 1365 if (dgs) {
1338 rr = drbd_recv(mdev, dig_in, dgs); 1366 rr = drbd_recv(mdev, dig_in, dgs);
1339 if (rr != dgs) { 1367 if (rr != dgs) {
1340 dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", 1368 if (!signal_pending(current))
1341 rr, dgs); 1369 dev_warn(DEV,
1370 "short read receiving data reply digest: read %d expected %d\n",
1371 rr, dgs);
1342 return 0; 1372 return 0;
1343 } 1373 }
1344 } 1374 }
@@ -1359,9 +1389,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1359 expect); 1389 expect);
1360 kunmap(bvec->bv_page); 1390 kunmap(bvec->bv_page);
1361 if (rr != expect) { 1391 if (rr != expect) {
1362 dev_warn(DEV, "short read receiving data reply: " 1392 if (!signal_pending(current))
1363 "read %d expected %d\n", 1393 dev_warn(DEV, "short read receiving data reply: "
1364 rr, expect); 1394 "read %d expected %d\n",
1395 rr, expect);
1365 return 0; 1396 return 0;
1366 } 1397 }
1367 data_size -= rr; 1398 data_size -= rr;
@@ -1425,11 +1456,10 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1425 1456
1426 atomic_add(data_size >> 9, &mdev->rs_sect_ev); 1457 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1427 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1458 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1428 return TRUE; 1459 return true;
1429 1460
1430 /* drbd_submit_ee currently fails for one reason only: 1461 /* don't care for the reason here */
1431 * not being able to allocate enough bios. 1462 dev_err(DEV, "submit failed, triggering re-connect\n");
1432 * Is dropping the connection going to help? */
1433 spin_lock_irq(&mdev->req_lock); 1463 spin_lock_irq(&mdev->req_lock);
1434 list_del(&e->w.list); 1464 list_del(&e->w.list);
1435 spin_unlock_irq(&mdev->req_lock); 1465 spin_unlock_irq(&mdev->req_lock);
@@ -1437,7 +1467,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1437 drbd_free_ee(mdev, e); 1467 drbd_free_ee(mdev, e);
1438fail: 1468fail:
1439 put_ldev(mdev); 1469 put_ldev(mdev);
1440 return FALSE; 1470 return false;
1441} 1471}
1442 1472
1443static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1473static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -1454,7 +1484,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
1454 spin_unlock_irq(&mdev->req_lock); 1484 spin_unlock_irq(&mdev->req_lock);
1455 if (unlikely(!req)) { 1485 if (unlikely(!req)) {
1456 dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); 1486 dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
1457 return FALSE; 1487 return false;
1458 } 1488 }
1459 1489
1460 /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid 1490 /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
@@ -1611,15 +1641,15 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1611 return ret; 1641 return ret;
1612} 1642}
1613 1643
1614static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf) 1644/* see also bio_flags_to_wire()
1645 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1646 * flags and back. We may replicate to other kernel versions. */
1647static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
1615{ 1648{
1616 if (mdev->agreed_pro_version >= 95) 1649 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1617 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 1650 (dpf & DP_FUA ? REQ_FUA : 0) |
1618 (dpf & DP_FUA ? REQ_FUA : 0) | 1651 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1619 (dpf & DP_FLUSH ? REQ_FUA : 0) | 1652 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1620 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1621 else
1622 return dpf & DP_RW_SYNC ? REQ_SYNC : 0;
1623} 1653}
1624 1654
1625/* mirrored write */ 1655/* mirrored write */
@@ -1632,9 +1662,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1632 u32 dp_flags; 1662 u32 dp_flags;
1633 1663
1634 if (!get_ldev(mdev)) { 1664 if (!get_ldev(mdev)) {
1635 if (__ratelimit(&drbd_ratelimit_state))
1636 dev_err(DEV, "Can not write mirrored data block "
1637 "to local disk.\n");
1638 spin_lock(&mdev->peer_seq_lock); 1665 spin_lock(&mdev->peer_seq_lock);
1639 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) 1666 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1640 mdev->peer_seq++; 1667 mdev->peer_seq++;
@@ -1654,23 +1681,23 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1654 e = read_in_block(mdev, p->block_id, sector, data_size); 1681 e = read_in_block(mdev, p->block_id, sector, data_size);
1655 if (!e) { 1682 if (!e) {
1656 put_ldev(mdev); 1683 put_ldev(mdev);
1657 return FALSE; 1684 return false;
1658 } 1685 }
1659 1686
1660 e->w.cb = e_end_block; 1687 e->w.cb = e_end_block;
1661 1688
1689 dp_flags = be32_to_cpu(p->dp_flags);
1690 rw |= wire_flags_to_bio(mdev, dp_flags);
1691
1692 if (dp_flags & DP_MAY_SET_IN_SYNC)
1693 e->flags |= EE_MAY_SET_IN_SYNC;
1694
1662 spin_lock(&mdev->epoch_lock); 1695 spin_lock(&mdev->epoch_lock);
1663 e->epoch = mdev->current_epoch; 1696 e->epoch = mdev->current_epoch;
1664 atomic_inc(&e->epoch->epoch_size); 1697 atomic_inc(&e->epoch->epoch_size);
1665 atomic_inc(&e->epoch->active); 1698 atomic_inc(&e->epoch->active);
1666 spin_unlock(&mdev->epoch_lock); 1699 spin_unlock(&mdev->epoch_lock);
1667 1700
1668 dp_flags = be32_to_cpu(p->dp_flags);
1669 rw |= write_flags_to_bio(mdev, dp_flags);
1670
1671 if (dp_flags & DP_MAY_SET_IN_SYNC)
1672 e->flags |= EE_MAY_SET_IN_SYNC;
1673
1674 /* I'm the receiver, I do hold a net_cnt reference. */ 1701 /* I'm the receiver, I do hold a net_cnt reference. */
1675 if (!mdev->net_conf->two_primaries) { 1702 if (!mdev->net_conf->two_primaries) {
1676 spin_lock_irq(&mdev->req_lock); 1703 spin_lock_irq(&mdev->req_lock);
@@ -1773,7 +1800,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1773 put_ldev(mdev); 1800 put_ldev(mdev);
1774 wake_asender(mdev); 1801 wake_asender(mdev);
1775 finish_wait(&mdev->misc_wait, &wait); 1802 finish_wait(&mdev->misc_wait, &wait);
1776 return TRUE; 1803 return true;
1777 } 1804 }
1778 1805
1779 if (signal_pending(current)) { 1806 if (signal_pending(current)) {
@@ -1829,11 +1856,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1829 } 1856 }
1830 1857
1831 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) 1858 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
1832 return TRUE; 1859 return true;
1833 1860
1834 /* drbd_submit_ee currently fails for one reason only: 1861 /* don't care for the reason here */
1835 * not being able to allocate enough bios. 1862 dev_err(DEV, "submit failed, triggering re-connect\n");
1836 * Is dropping the connection going to help? */
1837 spin_lock_irq(&mdev->req_lock); 1863 spin_lock_irq(&mdev->req_lock);
1838 list_del(&e->w.list); 1864 list_del(&e->w.list);
1839 hlist_del_init(&e->colision); 1865 hlist_del_init(&e->colision);
@@ -1842,12 +1868,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1842 drbd_al_complete_io(mdev, e->sector); 1868 drbd_al_complete_io(mdev, e->sector);
1843 1869
1844out_interrupted: 1870out_interrupted:
1845 /* yes, the epoch_size now is imbalanced. 1871 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
1846 * but we drop the connection anyways, so we don't have a chance to
1847 * receive a barrier... atomic_inc(&mdev->epoch_size); */
1848 put_ldev(mdev); 1872 put_ldev(mdev);
1849 drbd_free_ee(mdev, e); 1873 drbd_free_ee(mdev, e);
1850 return FALSE; 1874 return false;
1851} 1875}
1852 1876
1853/* We may throttle resync, if the lower device seems to be busy, 1877/* We may throttle resync, if the lower device seems to be busy,
@@ -1861,10 +1885,11 @@ out_interrupted:
1861 * The current sync rate used here uses only the most recent two step marks, 1885 * The current sync rate used here uses only the most recent two step marks,
1862 * to have a short time average so we can react faster. 1886 * to have a short time average so we can react faster.
1863 */ 1887 */
1864int drbd_rs_should_slow_down(struct drbd_conf *mdev) 1888int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
1865{ 1889{
1866 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; 1890 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1867 unsigned long db, dt, dbdt; 1891 unsigned long db, dt, dbdt;
1892 struct lc_element *tmp;
1868 int curr_events; 1893 int curr_events;
1869 int throttle = 0; 1894 int throttle = 0;
1870 1895
@@ -1872,9 +1897,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
1872 if (mdev->sync_conf.c_min_rate == 0) 1897 if (mdev->sync_conf.c_min_rate == 0)
1873 return 0; 1898 return 0;
1874 1899
1900 spin_lock_irq(&mdev->al_lock);
1901 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1902 if (tmp) {
1903 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1904 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1905 spin_unlock_irq(&mdev->al_lock);
1906 return 0;
1907 }
1908 /* Do not slow down if app IO is already waiting for this extent */
1909 }
1910 spin_unlock_irq(&mdev->al_lock);
1911
1875 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + 1912 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1876 (int)part_stat_read(&disk->part0, sectors[1]) - 1913 (int)part_stat_read(&disk->part0, sectors[1]) -
1877 atomic_read(&mdev->rs_sect_ev); 1914 atomic_read(&mdev->rs_sect_ev);
1915
1878 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { 1916 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1879 unsigned long rs_left; 1917 unsigned long rs_left;
1880 int i; 1918 int i;
@@ -1883,8 +1921,12 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
1883 1921
1884 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 1922 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1885 * approx. */ 1923 * approx. */
1886 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; 1924 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1887 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; 1925
1926 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
1927 rs_left = mdev->ov_left;
1928 else
1929 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
1888 1930
1889 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; 1931 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
1890 if (!dt) 1932 if (!dt)
@@ -1912,15 +1954,15 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
1912 sector = be64_to_cpu(p->sector); 1954 sector = be64_to_cpu(p->sector);
1913 size = be32_to_cpu(p->blksize); 1955 size = be32_to_cpu(p->blksize);
1914 1956
1915 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 1957 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
1916 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 1958 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
1917 (unsigned long long)sector, size); 1959 (unsigned long long)sector, size);
1918 return FALSE; 1960 return false;
1919 } 1961 }
1920 if (sector + (size>>9) > capacity) { 1962 if (sector + (size>>9) > capacity) {
1921 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 1963 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
1922 (unsigned long long)sector, size); 1964 (unsigned long long)sector, size);
1923 return FALSE; 1965 return false;
1924 } 1966 }
1925 1967
1926 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { 1968 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
@@ -1957,7 +1999,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
1957 e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); 1999 e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
1958 if (!e) { 2000 if (!e) {
1959 put_ldev(mdev); 2001 put_ldev(mdev);
1960 return FALSE; 2002 return false;
1961 } 2003 }
1962 2004
1963 switch (cmd) { 2005 switch (cmd) {
@@ -1970,6 +2012,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
1970 case P_RS_DATA_REQUEST: 2012 case P_RS_DATA_REQUEST:
1971 e->w.cb = w_e_end_rsdata_req; 2013 e->w.cb = w_e_end_rsdata_req;
1972 fault_type = DRBD_FAULT_RS_RD; 2014 fault_type = DRBD_FAULT_RS_RD;
2015 /* used in the sector offset progress display */
2016 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
1973 break; 2017 break;
1974 2018
1975 case P_OV_REPLY: 2019 case P_OV_REPLY:
@@ -1991,7 +2035,11 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
1991 if (cmd == P_CSUM_RS_REQUEST) { 2035 if (cmd == P_CSUM_RS_REQUEST) {
1992 D_ASSERT(mdev->agreed_pro_version >= 89); 2036 D_ASSERT(mdev->agreed_pro_version >= 89);
1993 e->w.cb = w_e_end_csum_rs_req; 2037 e->w.cb = w_e_end_csum_rs_req;
2038 /* used in the sector offset progress display */
2039 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
1994 } else if (cmd == P_OV_REPLY) { 2040 } else if (cmd == P_OV_REPLY) {
2041 /* track progress, we may need to throttle */
2042 atomic_add(size >> 9, &mdev->rs_sect_in);
1995 e->w.cb = w_e_end_ov_reply; 2043 e->w.cb = w_e_end_ov_reply;
1996 dec_rs_pending(mdev); 2044 dec_rs_pending(mdev);
1997 /* drbd_rs_begin_io done when we sent this request, 2045 /* drbd_rs_begin_io done when we sent this request,
@@ -2003,9 +2051,16 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2003 case P_OV_REQUEST: 2051 case P_OV_REQUEST:
2004 if (mdev->ov_start_sector == ~(sector_t)0 && 2052 if (mdev->ov_start_sector == ~(sector_t)0 &&
2005 mdev->agreed_pro_version >= 90) { 2053 mdev->agreed_pro_version >= 90) {
2054 unsigned long now = jiffies;
2055 int i;
2006 mdev->ov_start_sector = sector; 2056 mdev->ov_start_sector = sector;
2007 mdev->ov_position = sector; 2057 mdev->ov_position = sector;
2008 mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector); 2058 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2059 mdev->rs_total = mdev->ov_left;
2060 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2061 mdev->rs_mark_left[i] = mdev->ov_left;
2062 mdev->rs_mark_time[i] = now;
2063 }
2009 dev_info(DEV, "Online Verify start sector: %llu\n", 2064 dev_info(DEV, "Online Verify start sector: %llu\n",
2010 (unsigned long long)sector); 2065 (unsigned long long)sector);
2011 } 2066 }
@@ -2042,9 +2097,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2042 * we would also throttle its application reads. 2097 * we would also throttle its application reads.
2043 * In that case, throttling is done on the SyncTarget only. 2098 * In that case, throttling is done on the SyncTarget only.
2044 */ 2099 */
2045 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) 2100 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2046 msleep(100); 2101 schedule_timeout_uninterruptible(HZ/10);
2047 if (drbd_rs_begin_io(mdev, e->sector)) 2102 if (drbd_rs_begin_io(mdev, sector))
2048 goto out_free_e; 2103 goto out_free_e;
2049 2104
2050submit_for_resync: 2105submit_for_resync:
@@ -2057,11 +2112,10 @@ submit:
2057 spin_unlock_irq(&mdev->req_lock); 2112 spin_unlock_irq(&mdev->req_lock);
2058 2113
2059 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) 2114 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
2060 return TRUE; 2115 return true;
2061 2116
2062 /* drbd_submit_ee currently fails for one reason only: 2117 /* don't care for the reason here */
2063 * not being able to allocate enough bios. 2118 dev_err(DEV, "submit failed, triggering re-connect\n");
2064 * Is dropping the connection going to help? */
2065 spin_lock_irq(&mdev->req_lock); 2119 spin_lock_irq(&mdev->req_lock);
2066 list_del(&e->w.list); 2120 list_del(&e->w.list);
2067 spin_unlock_irq(&mdev->req_lock); 2121 spin_unlock_irq(&mdev->req_lock);
@@ -2070,7 +2124,7 @@ submit:
2070out_free_e: 2124out_free_e:
2071 put_ldev(mdev); 2125 put_ldev(mdev);
2072 drbd_free_ee(mdev, e); 2126 drbd_free_ee(mdev, e);
2073 return FALSE; 2127 return false;
2074} 2128}
2075 2129
2076static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) 2130static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
@@ -2147,10 +2201,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2147 2201
2148static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) 2202static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2149{ 2203{
2150 int self, peer, hg, rv = -100; 2204 int hg, rv = -100;
2151
2152 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2153 peer = mdev->p_uuid[UI_BITMAP] & 1;
2154 2205
2155 switch (mdev->net_conf->after_sb_1p) { 2206 switch (mdev->net_conf->after_sb_1p) {
2156 case ASB_DISCARD_YOUNGER_PRI: 2207 case ASB_DISCARD_YOUNGER_PRI:
@@ -2177,12 +2228,14 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2177 case ASB_CALL_HELPER: 2228 case ASB_CALL_HELPER:
2178 hg = drbd_asb_recover_0p(mdev); 2229 hg = drbd_asb_recover_0p(mdev);
2179 if (hg == -1 && mdev->state.role == R_PRIMARY) { 2230 if (hg == -1 && mdev->state.role == R_PRIMARY) {
2180 self = drbd_set_role(mdev, R_SECONDARY, 0); 2231 enum drbd_state_rv rv2;
2232
2233 drbd_set_role(mdev, R_SECONDARY, 0);
2181 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2234 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2182 * we might be here in C_WF_REPORT_PARAMS which is transient. 2235 * we might be here in C_WF_REPORT_PARAMS which is transient.
2183 * we do not need to wait for the after state change work either. */ 2236 * we do not need to wait for the after state change work either. */
2184 self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); 2237 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2185 if (self != SS_SUCCESS) { 2238 if (rv2 != SS_SUCCESS) {
2186 drbd_khelper(mdev, "pri-lost-after-sb"); 2239 drbd_khelper(mdev, "pri-lost-after-sb");
2187 } else { 2240 } else {
2188 dev_warn(DEV, "Successfully gave up primary role.\n"); 2241 dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2197,10 +2250,7 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2197 2250
2198static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) 2251static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2199{ 2252{
2200 int self, peer, hg, rv = -100; 2253 int hg, rv = -100;
2201
2202 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2203 peer = mdev->p_uuid[UI_BITMAP] & 1;
2204 2254
2205 switch (mdev->net_conf->after_sb_2p) { 2255 switch (mdev->net_conf->after_sb_2p) {
2206 case ASB_DISCARD_YOUNGER_PRI: 2256 case ASB_DISCARD_YOUNGER_PRI:
@@ -2220,11 +2270,13 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2220 case ASB_CALL_HELPER: 2270 case ASB_CALL_HELPER:
2221 hg = drbd_asb_recover_0p(mdev); 2271 hg = drbd_asb_recover_0p(mdev);
2222 if (hg == -1) { 2272 if (hg == -1) {
2273 enum drbd_state_rv rv2;
2274
2223 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2275 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2224 * we might be here in C_WF_REPORT_PARAMS which is transient. 2276 * we might be here in C_WF_REPORT_PARAMS which is transient.
2225 * we do not need to wait for the after state change work either. */ 2277 * we do not need to wait for the after state change work either. */
2226 self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); 2278 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2227 if (self != SS_SUCCESS) { 2279 if (rv2 != SS_SUCCESS) {
2228 drbd_khelper(mdev, "pri-lost-after-sb"); 2280 drbd_khelper(mdev, "pri-lost-after-sb");
2229 } else { 2281 } else {
2230 dev_warn(DEV, "Successfully gave up primary role.\n"); 2282 dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2263,6 +2315,8 @@ static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2263 -2 C_SYNC_TARGET set BitMap 2315 -2 C_SYNC_TARGET set BitMap
2264 -100 after split brain, disconnect 2316 -100 after split brain, disconnect
2265-1000 unrelated data 2317-1000 unrelated data
2318-1091 requires proto 91
2319-1096 requires proto 96
2266 */ 2320 */
2267static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) 2321static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2268{ 2322{
@@ -2292,7 +2346,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2292 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { 2346 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2293 2347
2294 if (mdev->agreed_pro_version < 91) 2348 if (mdev->agreed_pro_version < 91)
2295 return -1001; 2349 return -1091;
2296 2350
2297 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 2351 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2298 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 2352 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
@@ -2313,7 +2367,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2313 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { 2367 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2314 2368
2315 if (mdev->agreed_pro_version < 91) 2369 if (mdev->agreed_pro_version < 91)
2316 return -1001; 2370 return -1091;
2317 2371
2318 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && 2372 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2319 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 2373 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
@@ -2358,17 +2412,22 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2358 *rule_nr = 51; 2412 *rule_nr = 51;
2359 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); 2413 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2360 if (self == peer) { 2414 if (self == peer) {
2361 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 2415 if (mdev->agreed_pro_version < 96 ?
2362 peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1); 2416 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2363 if (self == peer) { 2417 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2418 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
2364 /* The last P_SYNC_UUID did not get though. Undo the last start of 2419 /* The last P_SYNC_UUID did not get though. Undo the last start of
2365 resync as sync source modifications of the peer's UUIDs. */ 2420 resync as sync source modifications of the peer's UUIDs. */
2366 2421
2367 if (mdev->agreed_pro_version < 91) 2422 if (mdev->agreed_pro_version < 91)
2368 return -1001; 2423 return -1091;
2369 2424
2370 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; 2425 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2371 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; 2426 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
2427
2428 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2429 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2430
2372 return -1; 2431 return -1;
2373 } 2432 }
2374 } 2433 }
@@ -2390,20 +2449,20 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2390 *rule_nr = 71; 2449 *rule_nr = 71;
2391 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 2450 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2392 if (self == peer) { 2451 if (self == peer) {
2393 self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1); 2452 if (mdev->agreed_pro_version < 96 ?
2394 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); 2453 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2395 if (self == peer) { 2454 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2455 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
2396 /* The last P_SYNC_UUID did not get though. Undo the last start of 2456 /* The last P_SYNC_UUID did not get though. Undo the last start of
2397 resync as sync source modifications of our UUIDs. */ 2457 resync as sync source modifications of our UUIDs. */
2398 2458
2399 if (mdev->agreed_pro_version < 91) 2459 if (mdev->agreed_pro_version < 91)
2400 return -1001; 2460 return -1091;
2401 2461
2402 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); 2462 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2403 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); 2463 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2404 2464
2405 dev_info(DEV, "Undid last start of resync:\n"); 2465 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
2406
2407 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, 2466 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2408 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); 2467 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2409 2468
@@ -2466,8 +2525,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2466 dev_alert(DEV, "Unrelated data, aborting!\n"); 2525 dev_alert(DEV, "Unrelated data, aborting!\n");
2467 return C_MASK; 2526 return C_MASK;
2468 } 2527 }
2469 if (hg == -1001) { 2528 if (hg < -1000) {
2470 dev_alert(DEV, "To resolve this both sides have to support at least protocol\n"); 2529 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
2471 return C_MASK; 2530 return C_MASK;
2472 } 2531 }
2473 2532
@@ -2566,7 +2625,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2566 2625
2567 if (abs(hg) >= 2) { 2626 if (abs(hg) >= 2) {
2568 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 2627 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
2569 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) 2628 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2629 BM_LOCKED_SET_ALLOWED))
2570 return C_MASK; 2630 return C_MASK;
2571 } 2631 }
2572 2632
@@ -2660,7 +2720,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig
2660 unsigned char *my_alg = mdev->net_conf->integrity_alg; 2720 unsigned char *my_alg = mdev->net_conf->integrity_alg;
2661 2721
2662 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) 2722 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
2663 return FALSE; 2723 return false;
2664 2724
2665 p_integrity_alg[SHARED_SECRET_MAX-1] = 0; 2725 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2666 if (strcmp(p_integrity_alg, my_alg)) { 2726 if (strcmp(p_integrity_alg, my_alg)) {
@@ -2671,11 +2731,11 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig
2671 my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); 2731 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2672 } 2732 }
2673 2733
2674 return TRUE; 2734 return true;
2675 2735
2676disconnect: 2736disconnect:
2677 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 2737 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2678 return FALSE; 2738 return false;
2679} 2739}
2680 2740
2681/* helper function 2741/* helper function
@@ -2707,7 +2767,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2707 2767
2708static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) 2768static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
2709{ 2769{
2710 int ok = TRUE; 2770 int ok = true;
2711 struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; 2771 struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
2712 unsigned int header_size, data_size, exp_max_sz; 2772 unsigned int header_size, data_size, exp_max_sz;
2713 struct crypto_hash *verify_tfm = NULL; 2773 struct crypto_hash *verify_tfm = NULL;
@@ -2725,7 +2785,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2725 if (packet_size > exp_max_sz) { 2785 if (packet_size > exp_max_sz) {
2726 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", 2786 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
2727 packet_size, exp_max_sz); 2787 packet_size, exp_max_sz);
2728 return FALSE; 2788 return false;
2729 } 2789 }
2730 2790
2731 if (apv <= 88) { 2791 if (apv <= 88) {
@@ -2745,7 +2805,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2745 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 2805 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2746 2806
2747 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) 2807 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
2748 return FALSE; 2808 return false;
2749 2809
2750 mdev->sync_conf.rate = be32_to_cpu(p->rate); 2810 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2751 2811
@@ -2755,11 +2815,11 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2755 dev_err(DEV, "verify-alg too long, " 2815 dev_err(DEV, "verify-alg too long, "
2756 "peer wants %u, accepting only %u byte\n", 2816 "peer wants %u, accepting only %u byte\n",
2757 data_size, SHARED_SECRET_MAX); 2817 data_size, SHARED_SECRET_MAX);
2758 return FALSE; 2818 return false;
2759 } 2819 }
2760 2820
2761 if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) 2821 if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
2762 return FALSE; 2822 return false;
2763 2823
2764 /* we expect NUL terminated string */ 2824 /* we expect NUL terminated string */
2765 /* but just in case someone tries to be evil */ 2825 /* but just in case someone tries to be evil */
@@ -2853,7 +2913,7 @@ disconnect:
2853 /* but free the verify_tfm again, if csums_tfm did not work out */ 2913 /* but free the verify_tfm again, if csums_tfm did not work out */
2854 crypto_free_hash(verify_tfm); 2914 crypto_free_hash(verify_tfm);
2855 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 2915 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2856 return FALSE; 2916 return false;
2857} 2917}
2858 2918
2859static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) 2919static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
@@ -2879,7 +2939,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2879{ 2939{
2880 struct p_sizes *p = &mdev->data.rbuf.sizes; 2940 struct p_sizes *p = &mdev->data.rbuf.sizes;
2881 enum determine_dev_size dd = unchanged; 2941 enum determine_dev_size dd = unchanged;
2882 unsigned int max_seg_s; 2942 unsigned int max_bio_size;
2883 sector_t p_size, p_usize, my_usize; 2943 sector_t p_size, p_usize, my_usize;
2884 int ldsc = 0; /* local disk size changed */ 2944 int ldsc = 0; /* local disk size changed */
2885 enum dds_flags ddsf; 2945 enum dds_flags ddsf;
@@ -2890,7 +2950,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2890 if (p_size == 0 && mdev->state.disk == D_DISKLESS) { 2950 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
2891 dev_err(DEV, "some backing storage is needed\n"); 2951 dev_err(DEV, "some backing storage is needed\n");
2892 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 2952 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2893 return FALSE; 2953 return false;
2894 } 2954 }
2895 2955
2896 /* just store the peer's disk size for now. 2956 /* just store the peer's disk size for now.
@@ -2927,18 +2987,17 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2927 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 2987 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2928 mdev->ldev->dc.disk_size = my_usize; 2988 mdev->ldev->dc.disk_size = my_usize;
2929 put_ldev(mdev); 2989 put_ldev(mdev);
2930 return FALSE; 2990 return false;
2931 } 2991 }
2932 put_ldev(mdev); 2992 put_ldev(mdev);
2933 } 2993 }
2934#undef min_not_zero
2935 2994
2936 ddsf = be16_to_cpu(p->dds_flags); 2995 ddsf = be16_to_cpu(p->dds_flags);
2937 if (get_ldev(mdev)) { 2996 if (get_ldev(mdev)) {
2938 dd = drbd_determin_dev_size(mdev, ddsf); 2997 dd = drbd_determin_dev_size(mdev, ddsf);
2939 put_ldev(mdev); 2998 put_ldev(mdev);
2940 if (dd == dev_size_error) 2999 if (dd == dev_size_error)
2941 return FALSE; 3000 return false;
2942 drbd_md_sync(mdev); 3001 drbd_md_sync(mdev);
2943 } else { 3002 } else {
2944 /* I am diskless, need to accept the peer's size. */ 3003 /* I am diskless, need to accept the peer's size. */
@@ -2952,14 +3011,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2952 } 3011 }
2953 3012
2954 if (mdev->agreed_pro_version < 94) 3013 if (mdev->agreed_pro_version < 94)
2955 max_seg_s = be32_to_cpu(p->max_segment_size); 3014 max_bio_size = be32_to_cpu(p->max_bio_size);
2956 else if (mdev->agreed_pro_version == 94) 3015 else if (mdev->agreed_pro_version == 94)
2957 max_seg_s = DRBD_MAX_SIZE_H80_PACKET; 3016 max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
2958 else /* drbd 8.3.8 onwards */ 3017 else /* drbd 8.3.8 onwards */
2959 max_seg_s = DRBD_MAX_SEGMENT_SIZE; 3018 max_bio_size = DRBD_MAX_BIO_SIZE;
2960 3019
2961 if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) 3020 if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
2962 drbd_setup_queue_param(mdev, max_seg_s); 3021 drbd_setup_queue_param(mdev, max_bio_size);
2963 3022
2964 drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); 3023 drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
2965 put_ldev(mdev); 3024 put_ldev(mdev);
@@ -2985,14 +3044,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2985 } 3044 }
2986 } 3045 }
2987 3046
2988 return TRUE; 3047 return true;
2989} 3048}
2990 3049
2991static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3050static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
2992{ 3051{
2993 struct p_uuids *p = &mdev->data.rbuf.uuids; 3052 struct p_uuids *p = &mdev->data.rbuf.uuids;
2994 u64 *p_uuid; 3053 u64 *p_uuid;
2995 int i; 3054 int i, updated_uuids = 0;
2996 3055
2997 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3056 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
2998 3057
@@ -3009,7 +3068,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3009 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", 3068 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3010 (unsigned long long)mdev->ed_uuid); 3069 (unsigned long long)mdev->ed_uuid);
3011 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3070 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3012 return FALSE; 3071 return false;
3013 } 3072 }
3014 3073
3015 if (get_ldev(mdev)) { 3074 if (get_ldev(mdev)) {
@@ -3021,19 +3080,21 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3021 if (skip_initial_sync) { 3080 if (skip_initial_sync) {
3022 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); 3081 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3023 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, 3082 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
3024 "clear_n_write from receive_uuids"); 3083 "clear_n_write from receive_uuids",
3084 BM_LOCKED_TEST_ALLOWED);
3025 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); 3085 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3026 _drbd_uuid_set(mdev, UI_BITMAP, 0); 3086 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3027 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3087 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3028 CS_VERBOSE, NULL); 3088 CS_VERBOSE, NULL);
3029 drbd_md_sync(mdev); 3089 drbd_md_sync(mdev);
3090 updated_uuids = 1;
3030 } 3091 }
3031 put_ldev(mdev); 3092 put_ldev(mdev);
3032 } else if (mdev->state.disk < D_INCONSISTENT && 3093 } else if (mdev->state.disk < D_INCONSISTENT &&
3033 mdev->state.role == R_PRIMARY) { 3094 mdev->state.role == R_PRIMARY) {
3034 /* I am a diskless primary, the peer just created a new current UUID 3095 /* I am a diskless primary, the peer just created a new current UUID
3035 for me. */ 3096 for me. */
3036 drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); 3097 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3037 } 3098 }
3038 3099
3039 /* Before we test for the disk state, we should wait until an eventually 3100 /* Before we test for the disk state, we should wait until an eventually
@@ -3042,9 +3103,12 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3042 new disk state... */ 3103 new disk state... */
3043 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); 3104 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3044 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) 3105 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
3045 drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); 3106 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3046 3107
3047 return TRUE; 3108 if (updated_uuids)
3109 drbd_print_uuids(mdev, "receiver updated UUIDs to");
3110
3111 return true;
3048} 3112}
3049 3113
3050/** 3114/**
@@ -3081,7 +3145,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3081{ 3145{
3082 struct p_req_state *p = &mdev->data.rbuf.req_state; 3146 struct p_req_state *p = &mdev->data.rbuf.req_state;
3083 union drbd_state mask, val; 3147 union drbd_state mask, val;
3084 int rv; 3148 enum drbd_state_rv rv;
3085 3149
3086 mask.i = be32_to_cpu(p->mask); 3150 mask.i = be32_to_cpu(p->mask);
3087 val.i = be32_to_cpu(p->val); 3151 val.i = be32_to_cpu(p->val);
@@ -3089,7 +3153,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3089 if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && 3153 if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
3090 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { 3154 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3091 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); 3155 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
3092 return TRUE; 3156 return true;
3093 } 3157 }
3094 3158
3095 mask = convert_state(mask); 3159 mask = convert_state(mask);
@@ -3100,7 +3164,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3100 drbd_send_sr_reply(mdev, rv); 3164 drbd_send_sr_reply(mdev, rv);
3101 drbd_md_sync(mdev); 3165 drbd_md_sync(mdev);
3102 3166
3103 return TRUE; 3167 return true;
3104} 3168}
3105 3169
3106static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3170static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -3145,7 +3209,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3145 peer_state.conn == C_CONNECTED) { 3209 peer_state.conn == C_CONNECTED) {
3146 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) 3210 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3147 drbd_resync_finished(mdev); 3211 drbd_resync_finished(mdev);
3148 return TRUE; 3212 return true;
3149 } 3213 }
3150 } 3214 }
3151 3215
@@ -3161,6 +3225,9 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3161 if (ns.conn == C_WF_REPORT_PARAMS) 3225 if (ns.conn == C_WF_REPORT_PARAMS)
3162 ns.conn = C_CONNECTED; 3226 ns.conn = C_CONNECTED;
3163 3227
3228 if (peer_state.conn == C_AHEAD)
3229 ns.conn = C_BEHIND;
3230
3164 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && 3231 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3165 get_ldev_if_state(mdev, D_NEGOTIATING)) { 3232 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3166 int cr; /* consider resync */ 3233 int cr; /* consider resync */
@@ -3195,10 +3262,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3195 real_peer_disk = D_DISKLESS; 3262 real_peer_disk = D_DISKLESS;
3196 } else { 3263 } else {
3197 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) 3264 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
3198 return FALSE; 3265 return false;
3199 D_ASSERT(os.conn == C_WF_REPORT_PARAMS); 3266 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
3200 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3267 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3201 return FALSE; 3268 return false;
3202 } 3269 }
3203 } 3270 }
3204 } 3271 }
@@ -3223,7 +3290,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3223 drbd_uuid_new_current(mdev); 3290 drbd_uuid_new_current(mdev);
3224 clear_bit(NEW_CUR_UUID, &mdev->flags); 3291 clear_bit(NEW_CUR_UUID, &mdev->flags);
3225 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); 3292 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
3226 return FALSE; 3293 return false;
3227 } 3294 }
3228 rv = _drbd_set_state(mdev, ns, cs_flags, NULL); 3295 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
3229 ns = mdev->state; 3296 ns = mdev->state;
@@ -3231,7 +3298,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3231 3298
3232 if (rv < SS_SUCCESS) { 3299 if (rv < SS_SUCCESS) {
3233 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3300 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3234 return FALSE; 3301 return false;
3235 } 3302 }
3236 3303
3237 if (os.conn > C_WF_REPORT_PARAMS) { 3304 if (os.conn > C_WF_REPORT_PARAMS) {
@@ -3249,7 +3316,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3249 3316
3250 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ 3317 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3251 3318
3252 return TRUE; 3319 return true;
3253} 3320}
3254 3321
3255static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3322static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -3258,6 +3325,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3258 3325
3259 wait_event(mdev->misc_wait, 3326 wait_event(mdev->misc_wait,
3260 mdev->state.conn == C_WF_SYNC_UUID || 3327 mdev->state.conn == C_WF_SYNC_UUID ||
3328 mdev->state.conn == C_BEHIND ||
3261 mdev->state.conn < C_CONNECTED || 3329 mdev->state.conn < C_CONNECTED ||
3262 mdev->state.disk < D_NEGOTIATING); 3330 mdev->state.disk < D_NEGOTIATING);
3263 3331
@@ -3269,32 +3337,42 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3269 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); 3337 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3270 _drbd_uuid_set(mdev, UI_BITMAP, 0UL); 3338 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3271 3339
3340 drbd_print_uuids(mdev, "updated sync uuid");
3272 drbd_start_resync(mdev, C_SYNC_TARGET); 3341 drbd_start_resync(mdev, C_SYNC_TARGET);
3273 3342
3274 put_ldev(mdev); 3343 put_ldev(mdev);
3275 } else 3344 } else
3276 dev_err(DEV, "Ignoring SyncUUID packet!\n"); 3345 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3277 3346
3278 return TRUE; 3347 return true;
3279} 3348}
3280 3349
3281enum receive_bitmap_ret { OK, DONE, FAILED }; 3350/**
3282 3351 * receive_bitmap_plain
3283static enum receive_bitmap_ret 3352 *
3353 * Return 0 when done, 1 when another iteration is needed, and a negative error
3354 * code upon failure.
3355 */
3356static int
3284receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, 3357receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3285 unsigned long *buffer, struct bm_xfer_ctx *c) 3358 unsigned long *buffer, struct bm_xfer_ctx *c)
3286{ 3359{
3287 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); 3360 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3288 unsigned want = num_words * sizeof(long); 3361 unsigned want = num_words * sizeof(long);
3362 int err;
3289 3363
3290 if (want != data_size) { 3364 if (want != data_size) {
3291 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); 3365 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
3292 return FAILED; 3366 return -EIO;
3293 } 3367 }
3294 if (want == 0) 3368 if (want == 0)
3295 return DONE; 3369 return 0;
3296 if (drbd_recv(mdev, buffer, want) != want) 3370 err = drbd_recv(mdev, buffer, want);
3297 return FAILED; 3371 if (err != want) {
3372 if (err >= 0)
3373 err = -EIO;
3374 return err;
3375 }
3298 3376
3299 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); 3377 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3300 3378
@@ -3303,10 +3381,16 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3303 if (c->bit_offset > c->bm_bits) 3381 if (c->bit_offset > c->bm_bits)
3304 c->bit_offset = c->bm_bits; 3382 c->bit_offset = c->bm_bits;
3305 3383
3306 return OK; 3384 return 1;
3307} 3385}
3308 3386
3309static enum receive_bitmap_ret 3387/**
3388 * recv_bm_rle_bits
3389 *
3390 * Return 0 when done, 1 when another iteration is needed, and a negative error
3391 * code upon failure.
3392 */
3393static int
3310recv_bm_rle_bits(struct drbd_conf *mdev, 3394recv_bm_rle_bits(struct drbd_conf *mdev,
3311 struct p_compressed_bm *p, 3395 struct p_compressed_bm *p,
3312 struct bm_xfer_ctx *c) 3396 struct bm_xfer_ctx *c)
@@ -3326,18 +3410,18 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3326 3410
3327 bits = bitstream_get_bits(&bs, &look_ahead, 64); 3411 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3328 if (bits < 0) 3412 if (bits < 0)
3329 return FAILED; 3413 return -EIO;
3330 3414
3331 for (have = bits; have > 0; s += rl, toggle = !toggle) { 3415 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3332 bits = vli_decode_bits(&rl, look_ahead); 3416 bits = vli_decode_bits(&rl, look_ahead);
3333 if (bits <= 0) 3417 if (bits <= 0)
3334 return FAILED; 3418 return -EIO;
3335 3419
3336 if (toggle) { 3420 if (toggle) {
3337 e = s + rl -1; 3421 e = s + rl -1;
3338 if (e >= c->bm_bits) { 3422 if (e >= c->bm_bits) {
3339 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 3423 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
3340 return FAILED; 3424 return -EIO;
3341 } 3425 }
3342 _drbd_bm_set_bits(mdev, s, e); 3426 _drbd_bm_set_bits(mdev, s, e);
3343 } 3427 }
@@ -3347,14 +3431,14 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3347 have, bits, look_ahead, 3431 have, bits, look_ahead,
3348 (unsigned int)(bs.cur.b - p->code), 3432 (unsigned int)(bs.cur.b - p->code),
3349 (unsigned int)bs.buf_len); 3433 (unsigned int)bs.buf_len);
3350 return FAILED; 3434 return -EIO;
3351 } 3435 }
3352 look_ahead >>= bits; 3436 look_ahead >>= bits;
3353 have -= bits; 3437 have -= bits;
3354 3438
3355 bits = bitstream_get_bits(&bs, &tmp, 64 - have); 3439 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3356 if (bits < 0) 3440 if (bits < 0)
3357 return FAILED; 3441 return -EIO;
3358 look_ahead |= tmp << have; 3442 look_ahead |= tmp << have;
3359 have += bits; 3443 have += bits;
3360 } 3444 }
@@ -3362,10 +3446,16 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3362 c->bit_offset = s; 3446 c->bit_offset = s;
3363 bm_xfer_ctx_bit_to_word_offset(c); 3447 bm_xfer_ctx_bit_to_word_offset(c);
3364 3448
3365 return (s == c->bm_bits) ? DONE : OK; 3449 return (s != c->bm_bits);
3366} 3450}
3367 3451
3368static enum receive_bitmap_ret 3452/**
3453 * decode_bitmap_c
3454 *
3455 * Return 0 when done, 1 when another iteration is needed, and a negative error
3456 * code upon failure.
3457 */
3458static int
3369decode_bitmap_c(struct drbd_conf *mdev, 3459decode_bitmap_c(struct drbd_conf *mdev,
3370 struct p_compressed_bm *p, 3460 struct p_compressed_bm *p,
3371 struct bm_xfer_ctx *c) 3461 struct bm_xfer_ctx *c)
@@ -3379,7 +3469,7 @@ decode_bitmap_c(struct drbd_conf *mdev,
3379 3469
3380 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 3470 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3381 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3471 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
3382 return FAILED; 3472 return -EIO;
3383} 3473}
3384 3474
3385void INFO_bm_xfer_stats(struct drbd_conf *mdev, 3475void INFO_bm_xfer_stats(struct drbd_conf *mdev,
@@ -3428,13 +3518,13 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3428{ 3518{
3429 struct bm_xfer_ctx c; 3519 struct bm_xfer_ctx c;
3430 void *buffer; 3520 void *buffer;
3431 enum receive_bitmap_ret ret; 3521 int err;
3432 int ok = FALSE; 3522 int ok = false;
3433 struct p_header80 *h = &mdev->data.rbuf.header.h80; 3523 struct p_header80 *h = &mdev->data.rbuf.header.h80;
3434 3524
3435 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 3525 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3436 3526 /* you are supposed to send additional out-of-sync information
3437 drbd_bm_lock(mdev, "receive bitmap"); 3527 * if you actually set bits during this phase */
3438 3528
3439 /* maybe we should use some per thread scratch page, 3529 /* maybe we should use some per thread scratch page,
3440 * and allocate that during initial device creation? */ 3530 * and allocate that during initial device creation? */
@@ -3449,9 +3539,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3449 .bm_words = drbd_bm_words(mdev), 3539 .bm_words = drbd_bm_words(mdev),
3450 }; 3540 };
3451 3541
3452 do { 3542 for(;;) {
3453 if (cmd == P_BITMAP) { 3543 if (cmd == P_BITMAP) {
3454 ret = receive_bitmap_plain(mdev, data_size, buffer, &c); 3544 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
3455 } else if (cmd == P_COMPRESSED_BITMAP) { 3545 } else if (cmd == P_COMPRESSED_BITMAP) {
3456 /* MAYBE: sanity check that we speak proto >= 90, 3546 /* MAYBE: sanity check that we speak proto >= 90,
3457 * and the feature is enabled! */ 3547 * and the feature is enabled! */
@@ -3468,9 +3558,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3468 goto out; 3558 goto out;
3469 if (data_size <= (sizeof(*p) - sizeof(p->head))) { 3559 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3470 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); 3560 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
3471 return FAILED; 3561 goto out;
3472 } 3562 }
3473 ret = decode_bitmap_c(mdev, p, &c); 3563 err = decode_bitmap_c(mdev, p, &c);
3474 } else { 3564 } else {
3475 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); 3565 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
3476 goto out; 3566 goto out;
@@ -3479,24 +3569,26 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3479 c.packets[cmd == P_BITMAP]++; 3569 c.packets[cmd == P_BITMAP]++;
3480 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; 3570 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
3481 3571
3482 if (ret != OK) 3572 if (err <= 0) {
3573 if (err < 0)
3574 goto out;
3483 break; 3575 break;
3484 3576 }
3485 if (!drbd_recv_header(mdev, &cmd, &data_size)) 3577 if (!drbd_recv_header(mdev, &cmd, &data_size))
3486 goto out; 3578 goto out;
3487 } while (ret == OK); 3579 }
3488 if (ret == FAILED)
3489 goto out;
3490 3580
3491 INFO_bm_xfer_stats(mdev, "receive", &c); 3581 INFO_bm_xfer_stats(mdev, "receive", &c);
3492 3582
3493 if (mdev->state.conn == C_WF_BITMAP_T) { 3583 if (mdev->state.conn == C_WF_BITMAP_T) {
3584 enum drbd_state_rv rv;
3585
3494 ok = !drbd_send_bitmap(mdev); 3586 ok = !drbd_send_bitmap(mdev);
3495 if (!ok) 3587 if (!ok)
3496 goto out; 3588 goto out;
3497 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 3589 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
3498 ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 3590 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3499 D_ASSERT(ok == SS_SUCCESS); 3591 D_ASSERT(rv == SS_SUCCESS);
3500 } else if (mdev->state.conn != C_WF_BITMAP_S) { 3592 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3501 /* admin may have requested C_DISCONNECTING, 3593 /* admin may have requested C_DISCONNECTING,
3502 * other threads may have noticed network errors */ 3594 * other threads may have noticed network errors */
@@ -3504,7 +3596,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3504 drbd_conn_str(mdev->state.conn)); 3596 drbd_conn_str(mdev->state.conn));
3505 } 3597 }
3506 3598
3507 ok = TRUE; 3599 ok = true;
3508 out: 3600 out:
3509 drbd_bm_unlock(mdev); 3601 drbd_bm_unlock(mdev);
3510 if (ok && mdev->state.conn == C_WF_BITMAP_S) 3602 if (ok && mdev->state.conn == C_WF_BITMAP_S)
@@ -3538,7 +3630,26 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u
3538 * with the data requests being unplugged */ 3630 * with the data requests being unplugged */
3539 drbd_tcp_quickack(mdev->data.socket); 3631 drbd_tcp_quickack(mdev->data.socket);
3540 3632
3541 return TRUE; 3633 return true;
3634}
3635
3636static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3637{
3638 struct p_block_desc *p = &mdev->data.rbuf.block_desc;
3639
3640 switch (mdev->state.conn) {
3641 case C_WF_SYNC_UUID:
3642 case C_WF_BITMAP_T:
3643 case C_BEHIND:
3644 break;
3645 default:
3646 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3647 drbd_conn_str(mdev->state.conn));
3648 }
3649
3650 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3651
3652 return true;
3542} 3653}
3543 3654
3544typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); 3655typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
@@ -3571,6 +3682,7 @@ static struct data_cmd drbd_cmd_handler[] = {
3571 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 3682 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3572 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 3683 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3573 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 3684 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3685 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
3574 /* anything missing from this table is in 3686 /* anything missing from this table is in
3575 * the asender_tbl, see get_asender_cmd */ 3687 * the asender_tbl, see get_asender_cmd */
3576 [P_MAX_CMD] = { 0, 0, NULL }, 3688 [P_MAX_CMD] = { 0, 0, NULL },
@@ -3610,7 +3722,8 @@ static void drbdd(struct drbd_conf *mdev)
3610 if (shs) { 3722 if (shs) {
3611 rv = drbd_recv(mdev, &header->h80.payload, shs); 3723 rv = drbd_recv(mdev, &header->h80.payload, shs);
3612 if (unlikely(rv != shs)) { 3724 if (unlikely(rv != shs)) {
3613 dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); 3725 if (!signal_pending(current))
3726 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
3614 goto err_out; 3727 goto err_out;
3615 } 3728 }
3616 } 3729 }
@@ -3682,9 +3795,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3682 3795
3683 if (mdev->state.conn == C_STANDALONE) 3796 if (mdev->state.conn == C_STANDALONE)
3684 return; 3797 return;
3685 if (mdev->state.conn >= C_WF_CONNECTION)
3686 dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n",
3687 drbd_conn_str(mdev->state.conn));
3688 3798
3689 /* asender does not clean up anything. it must not interfere, either */ 3799 /* asender does not clean up anything. it must not interfere, either */
3690 drbd_thread_stop(&mdev->asender); 3800 drbd_thread_stop(&mdev->asender);
@@ -3713,6 +3823,8 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3713 atomic_set(&mdev->rs_pending_cnt, 0); 3823 atomic_set(&mdev->rs_pending_cnt, 0);
3714 wake_up(&mdev->misc_wait); 3824 wake_up(&mdev->misc_wait);
3715 3825
3826 del_timer(&mdev->request_timer);
3827
3716 /* make sure syncer is stopped and w_resume_next_sg queued */ 3828 /* make sure syncer is stopped and w_resume_next_sg queued */
3717 del_timer_sync(&mdev->resync_timer); 3829 del_timer_sync(&mdev->resync_timer);
3718 resync_timer_fn((unsigned long)mdev); 3830 resync_timer_fn((unsigned long)mdev);
@@ -3758,13 +3870,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3758 if (os.conn == C_DISCONNECTING) { 3870 if (os.conn == C_DISCONNECTING) {
3759 wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); 3871 wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
3760 3872
3761 if (!is_susp(mdev->state)) {
3762 /* we must not free the tl_hash
3763 * while application io is still on the fly */
3764 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
3765 drbd_free_tl_hash(mdev);
3766 }
3767
3768 crypto_free_hash(mdev->cram_hmac_tfm); 3873 crypto_free_hash(mdev->cram_hmac_tfm);
3769 mdev->cram_hmac_tfm = NULL; 3874 mdev->cram_hmac_tfm = NULL;
3770 3875
@@ -3773,6 +3878,10 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3773 drbd_request_state(mdev, NS(conn, C_STANDALONE)); 3878 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3774 } 3879 }
3775 3880
3881 /* serialize with bitmap writeout triggered by the state change,
3882 * if any. */
3883 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3884
3776 /* tcp_close and release of sendpage pages can be deferred. I don't 3885 /* tcp_close and release of sendpage pages can be deferred. I don't
3777 * want to use SO_LINGER, because apparently it can be deferred for 3886 * want to use SO_LINGER, because apparently it can be deferred for
3778 * more than 20 seconds (longest time I checked). 3887 * more than 20 seconds (longest time I checked).
@@ -3873,7 +3982,8 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
3873 rv = drbd_recv(mdev, &p->head.payload, expect); 3982 rv = drbd_recv(mdev, &p->head.payload, expect);
3874 3983
3875 if (rv != expect) { 3984 if (rv != expect) {
3876 dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv); 3985 if (!signal_pending(current))
3986 dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
3877 return 0; 3987 return 0;
3878 } 3988 }
3879 3989
@@ -3975,7 +4085,8 @@ static int drbd_do_auth(struct drbd_conf *mdev)
3975 rv = drbd_recv(mdev, peers_ch, length); 4085 rv = drbd_recv(mdev, peers_ch, length);
3976 4086
3977 if (rv != length) { 4087 if (rv != length) {
3978 dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); 4088 if (!signal_pending(current))
4089 dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
3979 rv = 0; 4090 rv = 0;
3980 goto fail; 4091 goto fail;
3981 } 4092 }
@@ -4022,7 +4133,8 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4022 rv = drbd_recv(mdev, response , resp_size); 4133 rv = drbd_recv(mdev, response , resp_size);
4023 4134
4024 if (rv != resp_size) { 4135 if (rv != resp_size) {
4025 dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv); 4136 if (!signal_pending(current))
4137 dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
4026 rv = 0; 4138 rv = 0;
4027 goto fail; 4139 goto fail;
4028 } 4140 }
@@ -4074,8 +4186,7 @@ int drbdd_init(struct drbd_thread *thi)
4074 h = drbd_connect(mdev); 4186 h = drbd_connect(mdev);
4075 if (h == 0) { 4187 if (h == 0) {
4076 drbd_disconnect(mdev); 4188 drbd_disconnect(mdev);
4077 __set_current_state(TASK_INTERRUPTIBLE); 4189 schedule_timeout_interruptible(HZ);
4078 schedule_timeout(HZ);
4079 } 4190 }
4080 if (h == -1) { 4191 if (h == -1) {
4081 dev_warn(DEV, "Discarding network configuration.\n"); 4192 dev_warn(DEV, "Discarding network configuration.\n");
@@ -4113,7 +4224,7 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
4113 } 4224 }
4114 wake_up(&mdev->state_wait); 4225 wake_up(&mdev->state_wait);
4115 4226
4116 return TRUE; 4227 return true;
4117} 4228}
4118 4229
4119static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) 4230static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4129,7 +4240,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
4129 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) 4240 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4130 wake_up(&mdev->misc_wait); 4241 wake_up(&mdev->misc_wait);
4131 4242
4132 return TRUE; 4243 return true;
4133} 4244}
4134 4245
4135static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) 4246static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4152,7 +4263,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
4152 dec_rs_pending(mdev); 4263 dec_rs_pending(mdev);
4153 atomic_add(blksize >> 9, &mdev->rs_sect_in); 4264 atomic_add(blksize >> 9, &mdev->rs_sect_in);
4154 4265
4155 return TRUE; 4266 return true;
4156} 4267}
4157 4268
4158/* when we receive the ACK for a write request, 4269/* when we receive the ACK for a write request,
@@ -4176,8 +4287,6 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
4176 return req; 4287 return req;
4177 } 4288 }
4178 } 4289 }
4179 dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n",
4180 (void *)(unsigned long)id, (unsigned long long)sector);
4181 return NULL; 4290 return NULL;
4182} 4291}
4183 4292
@@ -4195,15 +4304,17 @@ static int validate_req_change_req_state(struct drbd_conf *mdev,
4195 req = validator(mdev, id, sector); 4304 req = validator(mdev, id, sector);
4196 if (unlikely(!req)) { 4305 if (unlikely(!req)) {
4197 spin_unlock_irq(&mdev->req_lock); 4306 spin_unlock_irq(&mdev->req_lock);
4198 dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func); 4307
4199 return FALSE; 4308 dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func,
4309 (void *)(unsigned long)id, (unsigned long long)sector);
4310 return false;
4200 } 4311 }
4201 __req_mod(req, what, &m); 4312 __req_mod(req, what, &m);
4202 spin_unlock_irq(&mdev->req_lock); 4313 spin_unlock_irq(&mdev->req_lock);
4203 4314
4204 if (m.bio) 4315 if (m.bio)
4205 complete_master_bio(mdev, &m); 4316 complete_master_bio(mdev, &m);
4206 return TRUE; 4317 return true;
4207} 4318}
4208 4319
4209static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) 4320static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4218,7 +4329,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
4218 if (is_syncer_block_id(p->block_id)) { 4329 if (is_syncer_block_id(p->block_id)) {
4219 drbd_set_in_sync(mdev, sector, blksize); 4330 drbd_set_in_sync(mdev, sector, blksize);
4220 dec_rs_pending(mdev); 4331 dec_rs_pending(mdev);
4221 return TRUE; 4332 return true;
4222 } 4333 }
4223 switch (be16_to_cpu(h->command)) { 4334 switch (be16_to_cpu(h->command)) {
4224 case P_RS_WRITE_ACK: 4335 case P_RS_WRITE_ACK:
@@ -4239,7 +4350,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
4239 break; 4350 break;
4240 default: 4351 default:
4241 D_ASSERT(0); 4352 D_ASSERT(0);
4242 return FALSE; 4353 return false;
4243 } 4354 }
4244 4355
4245 return validate_req_change_req_state(mdev, p->block_id, sector, 4356 return validate_req_change_req_state(mdev, p->block_id, sector,
@@ -4250,20 +4361,44 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
4250{ 4361{
4251 struct p_block_ack *p = (struct p_block_ack *)h; 4362 struct p_block_ack *p = (struct p_block_ack *)h;
4252 sector_t sector = be64_to_cpu(p->sector); 4363 sector_t sector = be64_to_cpu(p->sector);
4253 4364 int size = be32_to_cpu(p->blksize);
4254 if (__ratelimit(&drbd_ratelimit_state)) 4365 struct drbd_request *req;
4255 dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n"); 4366 struct bio_and_error m;
4256 4367
4257 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4368 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4258 4369
4259 if (is_syncer_block_id(p->block_id)) { 4370 if (is_syncer_block_id(p->block_id)) {
4260 int size = be32_to_cpu(p->blksize);
4261 dec_rs_pending(mdev); 4371 dec_rs_pending(mdev);
4262 drbd_rs_failed_io(mdev, sector, size); 4372 drbd_rs_failed_io(mdev, sector, size);
4263 return TRUE; 4373 return true;
4264 } 4374 }
4265 return validate_req_change_req_state(mdev, p->block_id, sector, 4375
4266 _ack_id_to_req, __func__ , neg_acked); 4376 spin_lock_irq(&mdev->req_lock);
4377 req = _ack_id_to_req(mdev, p->block_id, sector);
4378 if (!req) {
4379 spin_unlock_irq(&mdev->req_lock);
4380 if (mdev->net_conf->wire_protocol == DRBD_PROT_A ||
4381 mdev->net_conf->wire_protocol == DRBD_PROT_B) {
4382 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4383 The master bio might already be completed, therefore the
4384 request is no longer in the collision hash.
4385 => Do not try to validate block_id as request. */
4386 /* In Protocol B we might already have got a P_RECV_ACK
4387 but then get a P_NEG_ACK after wards. */
4388 drbd_set_out_of_sync(mdev, sector, size);
4389 return true;
4390 } else {
4391 dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__,
4392 (void *)(unsigned long)p->block_id, (unsigned long long)sector);
4393 return false;
4394 }
4395 }
4396 __req_mod(req, neg_acked, &m);
4397 spin_unlock_irq(&mdev->req_lock);
4398
4399 if (m.bio)
4400 complete_master_bio(mdev, &m);
4401 return true;
4267} 4402}
4268 4403
4269static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) 4404static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4294,11 +4429,20 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
4294 4429
4295 if (get_ldev_if_state(mdev, D_FAILED)) { 4430 if (get_ldev_if_state(mdev, D_FAILED)) {
4296 drbd_rs_complete_io(mdev, sector); 4431 drbd_rs_complete_io(mdev, sector);
4297 drbd_rs_failed_io(mdev, sector, size); 4432 switch (be16_to_cpu(h->command)) {
4433 case P_NEG_RS_DREPLY:
4434 drbd_rs_failed_io(mdev, sector, size);
4435 case P_RS_CANCEL:
4436 break;
4437 default:
4438 D_ASSERT(0);
4439 put_ldev(mdev);
4440 return false;
4441 }
4298 put_ldev(mdev); 4442 put_ldev(mdev);
4299 } 4443 }
4300 4444
4301 return TRUE; 4445 return true;
4302} 4446}
4303 4447
4304static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) 4448static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4307,7 +4451,14 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
4307 4451
4308 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); 4452 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4309 4453
4310 return TRUE; 4454 if (mdev->state.conn == C_AHEAD &&
4455 atomic_read(&mdev->ap_in_flight) == 0 &&
4456 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4457 mdev->start_resync_timer.expires = jiffies + HZ;
4458 add_timer(&mdev->start_resync_timer);
4459 }
4460
4461 return true;
4311} 4462}
4312 4463
4313static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) 4464static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4328,12 +4479,18 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
4328 ov_oos_print(mdev); 4479 ov_oos_print(mdev);
4329 4480
4330 if (!get_ldev(mdev)) 4481 if (!get_ldev(mdev))
4331 return TRUE; 4482 return true;
4332 4483
4333 drbd_rs_complete_io(mdev, sector); 4484 drbd_rs_complete_io(mdev, sector);
4334 dec_rs_pending(mdev); 4485 dec_rs_pending(mdev);
4335 4486
4336 if (--mdev->ov_left == 0) { 4487 --mdev->ov_left;
4488
4489 /* let's advance progress step marks only for every other megabyte */
4490 if ((mdev->ov_left & 0x200) == 0x200)
4491 drbd_advance_rs_marks(mdev, mdev->ov_left);
4492
4493 if (mdev->ov_left == 0) {
4337 w = kmalloc(sizeof(*w), GFP_NOIO); 4494 w = kmalloc(sizeof(*w), GFP_NOIO);
4338 if (w) { 4495 if (w) {
4339 w->cb = w_ov_finished; 4496 w->cb = w_ov_finished;
@@ -4345,12 +4502,12 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
4345 } 4502 }
4346 } 4503 }
4347 put_ldev(mdev); 4504 put_ldev(mdev);
4348 return TRUE; 4505 return true;
4349} 4506}
4350 4507
4351static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) 4508static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
4352{ 4509{
4353 return TRUE; 4510 return true;
4354} 4511}
4355 4512
4356struct asender_cmd { 4513struct asender_cmd {
@@ -4378,6 +4535,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4378 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 4535 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4379 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 4536 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4380 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 4537 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4538 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
4381 [P_MAX_CMD] = { 0, NULL }, 4539 [P_MAX_CMD] = { 0, NULL },
4382 }; 4540 };
4383 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) 4541 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index ad3fc6228f27..5c0c8be1bb0a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -140,9 +140,14 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
140 struct hlist_node *n; 140 struct hlist_node *n;
141 struct hlist_head *slot; 141 struct hlist_head *slot;
142 142
143 /* before we can signal completion to the upper layers, 143 /* Before we can signal completion to the upper layers,
144 * we may need to close the current epoch */ 144 * we may need to close the current epoch.
145 * We can skip this, if this request has not even been sent, because we
146 * did not have a fully established connection yet/anymore, during
147 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
148 */
145 if (mdev->state.conn >= C_CONNECTED && 149 if (mdev->state.conn >= C_CONNECTED &&
150 (s & RQ_NET_SENT) != 0 &&
146 req->epoch == mdev->newest_tle->br_number) 151 req->epoch == mdev->newest_tle->br_number)
147 queue_barrier(mdev); 152 queue_barrier(mdev);
148 153
@@ -440,7 +445,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
440 req->rq_state |= RQ_LOCAL_COMPLETED; 445 req->rq_state |= RQ_LOCAL_COMPLETED;
441 req->rq_state &= ~RQ_LOCAL_PENDING; 446 req->rq_state &= ~RQ_LOCAL_PENDING;
442 447
443 __drbd_chk_io_error(mdev, FALSE); 448 __drbd_chk_io_error(mdev, false);
444 _req_may_be_done_not_susp(req, m); 449 _req_may_be_done_not_susp(req, m);
445 put_ldev(mdev); 450 put_ldev(mdev);
446 break; 451 break;
@@ -461,7 +466,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
461 466
462 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 467 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
463 468
464 __drbd_chk_io_error(mdev, FALSE); 469 __drbd_chk_io_error(mdev, false);
465 put_ldev(mdev); 470 put_ldev(mdev);
466 471
467 /* no point in retrying if there is no good remote data, 472 /* no point in retrying if there is no good remote data,
@@ -545,6 +550,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
545 550
546 break; 551 break;
547 552
553 case queue_for_send_oos:
554 req->rq_state |= RQ_NET_QUEUED;
555 req->w.cb = w_send_oos;
556 drbd_queue_work(&mdev->data.work, &req->w);
557 break;
558
559 case oos_handed_to_network:
560 /* actually the same */
548 case send_canceled: 561 case send_canceled:
549 /* treat it the same */ 562 /* treat it the same */
550 case send_failed: 563 case send_failed:
@@ -558,6 +571,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
558 571
559 case handed_over_to_network: 572 case handed_over_to_network:
560 /* assert something? */ 573 /* assert something? */
574 if (bio_data_dir(req->master_bio) == WRITE)
575 atomic_add(req->size>>9, &mdev->ap_in_flight);
576
561 if (bio_data_dir(req->master_bio) == WRITE && 577 if (bio_data_dir(req->master_bio) == WRITE &&
562 mdev->net_conf->wire_protocol == DRBD_PROT_A) { 578 mdev->net_conf->wire_protocol == DRBD_PROT_A) {
563 /* this is what is dangerous about protocol A: 579 /* this is what is dangerous about protocol A:
@@ -591,6 +607,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
591 dec_ap_pending(mdev); 607 dec_ap_pending(mdev);
592 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); 608 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
593 req->rq_state |= RQ_NET_DONE; 609 req->rq_state |= RQ_NET_DONE;
610 if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
611 atomic_sub(req->size>>9, &mdev->ap_in_flight);
612
594 /* if it is still queued, we may not complete it here. 613 /* if it is still queued, we may not complete it here.
595 * it will be canceled soon. */ 614 * it will be canceled soon. */
596 if (!(req->rq_state & RQ_NET_QUEUED)) 615 if (!(req->rq_state & RQ_NET_QUEUED))
@@ -628,14 +647,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
628 req->rq_state |= RQ_NET_OK; 647 req->rq_state |= RQ_NET_OK;
629 D_ASSERT(req->rq_state & RQ_NET_PENDING); 648 D_ASSERT(req->rq_state & RQ_NET_PENDING);
630 dec_ap_pending(mdev); 649 dec_ap_pending(mdev);
650 atomic_sub(req->size>>9, &mdev->ap_in_flight);
631 req->rq_state &= ~RQ_NET_PENDING; 651 req->rq_state &= ~RQ_NET_PENDING;
632 _req_may_be_done_not_susp(req, m); 652 _req_may_be_done_not_susp(req, m);
633 break; 653 break;
634 654
635 case neg_acked: 655 case neg_acked:
636 /* assert something? */ 656 /* assert something? */
637 if (req->rq_state & RQ_NET_PENDING) 657 if (req->rq_state & RQ_NET_PENDING) {
638 dec_ap_pending(mdev); 658 dec_ap_pending(mdev);
659 atomic_sub(req->size>>9, &mdev->ap_in_flight);
660 }
639 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); 661 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
640 662
641 req->rq_state |= RQ_NET_DONE; 663 req->rq_state |= RQ_NET_DONE;
@@ -690,8 +712,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
690 dev_err(DEV, "FIXME (barrier_acked but pending)\n"); 712 dev_err(DEV, "FIXME (barrier_acked but pending)\n");
691 list_move(&req->tl_requests, &mdev->out_of_sequence_requests); 713 list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
692 } 714 }
693 D_ASSERT(req->rq_state & RQ_NET_SENT); 715 if ((req->rq_state & RQ_NET_MASK) != 0) {
694 req->rq_state |= RQ_NET_DONE; 716 req->rq_state |= RQ_NET_DONE;
717 if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
718 atomic_sub(req->size>>9, &mdev->ap_in_flight);
719 }
695 _req_may_be_done(req, m); /* Allowed while state.susp */ 720 _req_may_be_done(req, m); /* Allowed while state.susp */
696 break; 721 break;
697 722
@@ -738,14 +763,14 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
738 return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); 763 return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
739} 764}
740 765
741static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) 766static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
742{ 767{
743 const int rw = bio_rw(bio); 768 const int rw = bio_rw(bio);
744 const int size = bio->bi_size; 769 const int size = bio->bi_size;
745 const sector_t sector = bio->bi_sector; 770 const sector_t sector = bio->bi_sector;
746 struct drbd_tl_epoch *b = NULL; 771 struct drbd_tl_epoch *b = NULL;
747 struct drbd_request *req; 772 struct drbd_request *req;
748 int local, remote; 773 int local, remote, send_oos = 0;
749 int err = -EIO; 774 int err = -EIO;
750 int ret = 0; 775 int ret = 0;
751 776
@@ -759,6 +784,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
759 bio_endio(bio, -ENOMEM); 784 bio_endio(bio, -ENOMEM);
760 return 0; 785 return 0;
761 } 786 }
787 req->start_time = start_time;
762 788
763 local = get_ldev(mdev); 789 local = get_ldev(mdev);
764 if (!local) { 790 if (!local) {
@@ -808,9 +834,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
808 drbd_al_begin_io(mdev, sector); 834 drbd_al_begin_io(mdev, sector);
809 } 835 }
810 836
811 remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || 837 remote = remote && drbd_should_do_remote(mdev->state);
812 (mdev->state.pdsk == D_INCONSISTENT && 838 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
813 mdev->state.conn >= C_CONNECTED)); 839 D_ASSERT(!(remote && send_oos));
814 840
815 if (!(local || remote) && !is_susp(mdev->state)) { 841 if (!(local || remote) && !is_susp(mdev->state)) {
816 if (__ratelimit(&drbd_ratelimit_state)) 842 if (__ratelimit(&drbd_ratelimit_state))
@@ -824,7 +850,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
824 * but there is a race between testing the bit and pointer outside the 850 * but there is a race between testing the bit and pointer outside the
825 * spinlock, and grabbing the spinlock. 851 * spinlock, and grabbing the spinlock.
826 * if we lost that race, we retry. */ 852 * if we lost that race, we retry. */
827 if (rw == WRITE && remote && 853 if (rw == WRITE && (remote || send_oos) &&
828 mdev->unused_spare_tle == NULL && 854 mdev->unused_spare_tle == NULL &&
829 test_bit(CREATE_BARRIER, &mdev->flags)) { 855 test_bit(CREATE_BARRIER, &mdev->flags)) {
830allocate_barrier: 856allocate_barrier:
@@ -842,18 +868,19 @@ allocate_barrier:
842 if (is_susp(mdev->state)) { 868 if (is_susp(mdev->state)) {
843 /* If we got suspended, use the retry mechanism of 869 /* If we got suspended, use the retry mechanism of
844 generic_make_request() to restart processing of this 870 generic_make_request() to restart processing of this
845 bio. In the next call to drbd_make_request_26 871 bio. In the next call to drbd_make_request
846 we sleep in inc_ap_bio() */ 872 we sleep in inc_ap_bio() */
847 ret = 1; 873 ret = 1;
848 spin_unlock_irq(&mdev->req_lock); 874 spin_unlock_irq(&mdev->req_lock);
849 goto fail_free_complete; 875 goto fail_free_complete;
850 } 876 }
851 877
852 if (remote) { 878 if (remote || send_oos) {
853 remote = (mdev->state.pdsk == D_UP_TO_DATE || 879 remote = drbd_should_do_remote(mdev->state);
854 (mdev->state.pdsk == D_INCONSISTENT && 880 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
855 mdev->state.conn >= C_CONNECTED)); 881 D_ASSERT(!(remote && send_oos));
856 if (!remote) 882
883 if (!(remote || send_oos))
857 dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); 884 dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
858 if (!(local || remote)) { 885 if (!(local || remote)) {
859 dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); 886 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@@ -866,7 +893,7 @@ allocate_barrier:
866 mdev->unused_spare_tle = b; 893 mdev->unused_spare_tle = b;
867 b = NULL; 894 b = NULL;
868 } 895 }
869 if (rw == WRITE && remote && 896 if (rw == WRITE && (remote || send_oos) &&
870 mdev->unused_spare_tle == NULL && 897 mdev->unused_spare_tle == NULL &&
871 test_bit(CREATE_BARRIER, &mdev->flags)) { 898 test_bit(CREATE_BARRIER, &mdev->flags)) {
872 /* someone closed the current epoch 899 /* someone closed the current epoch
@@ -889,7 +916,7 @@ allocate_barrier:
889 * barrier packet. To get the write ordering right, we only have to 916 * barrier packet. To get the write ordering right, we only have to
890 * make sure that, if this is a write request and it triggered a 917 * make sure that, if this is a write request and it triggered a
891 * barrier packet, this request is queued within the same spinlock. */ 918 * barrier packet, this request is queued within the same spinlock. */
892 if (remote && mdev->unused_spare_tle && 919 if ((remote || send_oos) && mdev->unused_spare_tle &&
893 test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { 920 test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
894 _tl_add_barrier(mdev, mdev->unused_spare_tle); 921 _tl_add_barrier(mdev, mdev->unused_spare_tle);
895 mdev->unused_spare_tle = NULL; 922 mdev->unused_spare_tle = NULL;
@@ -937,6 +964,34 @@ allocate_barrier:
937 ? queue_for_net_write 964 ? queue_for_net_write
938 : queue_for_net_read); 965 : queue_for_net_read);
939 } 966 }
967 if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
968 _req_mod(req, queue_for_send_oos);
969
970 if (remote &&
971 mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
972 int congested = 0;
973
974 if (mdev->net_conf->cong_fill &&
975 atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
976 dev_info(DEV, "Congestion-fill threshold reached\n");
977 congested = 1;
978 }
979
980 if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
981 dev_info(DEV, "Congestion-extents threshold reached\n");
982 congested = 1;
983 }
984
985 if (congested) {
986 queue_barrier(mdev); /* last barrier, after mirrored writes */
987
988 if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
989 _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
990 else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
991 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
992 }
993 }
994
940 spin_unlock_irq(&mdev->req_lock); 995 spin_unlock_irq(&mdev->req_lock);
941 kfree(b); /* if someone else has beaten us to it... */ 996 kfree(b); /* if someone else has beaten us to it... */
942 997
@@ -949,9 +1004,9 @@ allocate_barrier:
949 * stable storage, and this is a WRITE, we may not even submit 1004 * stable storage, and this is a WRITE, we may not even submit
950 * this bio. */ 1005 * this bio. */
951 if (get_ldev(mdev)) { 1006 if (get_ldev(mdev)) {
952 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR 1007 if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
953 : rw == READ ? DRBD_FAULT_DT_RD 1008 : rw == READ ? DRBD_FAULT_DT_RD
954 : DRBD_FAULT_DT_RA)) 1009 : DRBD_FAULT_DT_RA))
955 bio_endio(req->private_bio, -EIO); 1010 bio_endio(req->private_bio, -EIO);
956 else 1011 else
957 generic_make_request(req->private_bio); 1012 generic_make_request(req->private_bio);
@@ -1018,16 +1073,19 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
1018 return 0; 1073 return 0;
1019} 1074}
1020 1075
1021int drbd_make_request_26(struct request_queue *q, struct bio *bio) 1076int drbd_make_request(struct request_queue *q, struct bio *bio)
1022{ 1077{
1023 unsigned int s_enr, e_enr; 1078 unsigned int s_enr, e_enr;
1024 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; 1079 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
1080 unsigned long start_time;
1025 1081
1026 if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { 1082 if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
1027 bio_endio(bio, -EPERM); 1083 bio_endio(bio, -EPERM);
1028 return 0; 1084 return 0;
1029 } 1085 }
1030 1086
1087 start_time = jiffies;
1088
1031 /* 1089 /*
1032 * what we "blindly" assume: 1090 * what we "blindly" assume:
1033 */ 1091 */
@@ -1042,12 +1100,12 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1042 1100
1043 if (likely(s_enr == e_enr)) { 1101 if (likely(s_enr == e_enr)) {
1044 inc_ap_bio(mdev, 1); 1102 inc_ap_bio(mdev, 1);
1045 return drbd_make_request_common(mdev, bio); 1103 return drbd_make_request_common(mdev, bio, start_time);
1046 } 1104 }
1047 1105
1048 /* can this bio be split generically? 1106 /* can this bio be split generically?
1049 * Maybe add our own split-arbitrary-bios function. */ 1107 * Maybe add our own split-arbitrary-bios function. */
1050 if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { 1108 if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
1051 /* rather error out here than BUG in bio_split */ 1109 /* rather error out here than BUG in bio_split */
1052 dev_err(DEV, "bio would need to, but cannot, be split: " 1110 dev_err(DEV, "bio would need to, but cannot, be split: "
1053 "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", 1111 "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
@@ -1069,11 +1127,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1069 const int sps = 1 << HT_SHIFT; /* sectors per slot */ 1127 const int sps = 1 << HT_SHIFT; /* sectors per slot */
1070 const int mask = sps - 1; 1128 const int mask = sps - 1;
1071 const sector_t first_sectors = sps - (sect & mask); 1129 const sector_t first_sectors = sps - (sect & mask);
1072 bp = bio_split(bio, 1130 bp = bio_split(bio, first_sectors);
1073#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
1074 bio_split_pool,
1075#endif
1076 first_sectors);
1077 1131
1078 /* we need to get a "reference count" (ap_bio_cnt) 1132 /* we need to get a "reference count" (ap_bio_cnt)
1079 * to avoid races with the disconnect/reconnect/suspend code. 1133 * to avoid races with the disconnect/reconnect/suspend code.
@@ -1084,10 +1138,10 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1084 1138
1085 D_ASSERT(e_enr == s_enr + 1); 1139 D_ASSERT(e_enr == s_enr + 1);
1086 1140
1087 while (drbd_make_request_common(mdev, &bp->bio1)) 1141 while (drbd_make_request_common(mdev, &bp->bio1, start_time))
1088 inc_ap_bio(mdev, 1); 1142 inc_ap_bio(mdev, 1);
1089 1143
1090 while (drbd_make_request_common(mdev, &bp->bio2)) 1144 while (drbd_make_request_common(mdev, &bp->bio2, start_time))
1091 inc_ap_bio(mdev, 1); 1145 inc_ap_bio(mdev, 1);
1092 1146
1093 dec_ap_bio(mdev); 1147 dec_ap_bio(mdev);
@@ -1098,7 +1152,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1098} 1152}
1099 1153
1100/* This is called by bio_add_page(). With this function we reduce 1154/* This is called by bio_add_page(). With this function we reduce
1101 * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs 1155 * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
1102 * units (was AL_EXTENTs). 1156 * units (was AL_EXTENTs).
1103 * 1157 *
1104 * we do the calculation within the lower 32bit of the byte offsets, 1158 * we do the calculation within the lower 32bit of the byte offsets,
@@ -1108,7 +1162,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1108 * As long as the BIO is empty we have to allow at least one bvec, 1162 * As long as the BIO is empty we have to allow at least one bvec,
1109 * regardless of size and offset. so the resulting bio may still 1163 * regardless of size and offset. so the resulting bio may still
1110 * cross extent boundaries. those are dealt with (bio_split) in 1164 * cross extent boundaries. those are dealt with (bio_split) in
1111 * drbd_make_request_26. 1165 * drbd_make_request.
1112 */ 1166 */
1113int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) 1167int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
1114{ 1168{
@@ -1118,8 +1172,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1118 unsigned int bio_size = bvm->bi_size; 1172 unsigned int bio_size = bvm->bi_size;
1119 int limit, backing_limit; 1173 int limit, backing_limit;
1120 1174
1121 limit = DRBD_MAX_SEGMENT_SIZE 1175 limit = DRBD_MAX_BIO_SIZE
1122 - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); 1176 - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
1123 if (limit < 0) 1177 if (limit < 0)
1124 limit = 0; 1178 limit = 0;
1125 if (bio_size == 0) { 1179 if (bio_size == 0) {
@@ -1136,3 +1190,42 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1136 } 1190 }
1137 return limit; 1191 return limit;
1138} 1192}
1193
1194void request_timer_fn(unsigned long data)
1195{
1196 struct drbd_conf *mdev = (struct drbd_conf *) data;
1197 struct drbd_request *req; /* oldest request */
1198 struct list_head *le;
1199 unsigned long et = 0; /* effective timeout = ko_count * timeout */
1200
1201 if (get_net_conf(mdev)) {
1202 et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
1203 put_net_conf(mdev);
1204 }
1205 if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
1206 return; /* Recurring timer stopped */
1207
1208 spin_lock_irq(&mdev->req_lock);
1209 le = &mdev->oldest_tle->requests;
1210 if (list_empty(le)) {
1211 spin_unlock_irq(&mdev->req_lock);
1212 mod_timer(&mdev->request_timer, jiffies + et);
1213 return;
1214 }
1215
1216 le = le->prev;
1217 req = list_entry(le, struct drbd_request, tl_requests);
1218 if (time_is_before_eq_jiffies(req->start_time + et)) {
1219 if (req->rq_state & RQ_NET_PENDING) {
1220 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1221 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1222 } else {
1223 dev_warn(DEV, "Local backing block device frozen?\n");
1224 mod_timer(&mdev->request_timer, jiffies + et);
1225 }
1226 } else {
1227 mod_timer(&mdev->request_timer, req->start_time + et);
1228 }
1229
1230 spin_unlock_irq(&mdev->req_lock);
1231}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index ab2bd09d54b4..32e2c3e6a813 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -82,14 +82,16 @@ enum drbd_req_event {
82 to_be_submitted, 82 to_be_submitted,
83 83
84 /* XXX yes, now I am inconsistent... 84 /* XXX yes, now I am inconsistent...
85 * these two are not "events" but "actions" 85 * these are not "events" but "actions"
86 * oh, well... */ 86 * oh, well... */
87 queue_for_net_write, 87 queue_for_net_write,
88 queue_for_net_read, 88 queue_for_net_read,
89 queue_for_send_oos,
89 90
90 send_canceled, 91 send_canceled,
91 send_failed, 92 send_failed,
92 handed_over_to_network, 93 handed_over_to_network,
94 oos_handed_to_network,
93 connection_lost_while_pending, 95 connection_lost_while_pending,
94 read_retry_remote_canceled, 96 read_retry_remote_canceled,
95 recv_acked_by_peer, 97 recv_acked_by_peer,
@@ -289,7 +291,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
289 req->epoch = 0; 291 req->epoch = 0;
290 req->sector = bio_src->bi_sector; 292 req->sector = bio_src->bi_sector;
291 req->size = bio_src->bi_size; 293 req->size = bio_src->bi_size;
292 req->start_time = jiffies;
293 INIT_HLIST_NODE(&req->colision); 294 INIT_HLIST_NODE(&req->colision);
294 INIT_LIST_HEAD(&req->tl_requests); 295 INIT_LIST_HEAD(&req->tl_requests);
295 INIT_LIST_HEAD(&req->w.list); 296 INIT_LIST_HEAD(&req->w.list);
@@ -321,6 +322,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
321 struct bio_and_error *m); 322 struct bio_and_error *m);
322extern void complete_master_bio(struct drbd_conf *mdev, 323extern void complete_master_bio(struct drbd_conf *mdev,
323 struct bio_and_error *m); 324 struct bio_and_error *m);
325extern void request_timer_fn(unsigned long data);
324 326
325/* use this if you don't want to deal with calling complete_master_bio() 327/* use this if you don't want to deal with calling complete_master_bio()
326 * outside the spinlock, e.g. when walking some list on cleanup. */ 328 * outside the spinlock, e.g. when walking some list on cleanup. */
@@ -338,23 +340,43 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
338 return rv; 340 return rv;
339} 341}
340 342
341/* completion of master bio is outside of spinlock. 343/* completion of master bio is outside of our spinlock.
342 * If you need it irqsave, do it your self! 344 * We still may or may not be inside some irqs disabled section
343 * Which means: don't use from bio endio callback. */ 345 * of the lower level driver completion callback, so we need to
346 * spin_lock_irqsave here. */
344static inline int req_mod(struct drbd_request *req, 347static inline int req_mod(struct drbd_request *req,
345 enum drbd_req_event what) 348 enum drbd_req_event what)
346{ 349{
350 unsigned long flags;
347 struct drbd_conf *mdev = req->mdev; 351 struct drbd_conf *mdev = req->mdev;
348 struct bio_and_error m; 352 struct bio_and_error m;
349 int rv; 353 int rv;
350 354
351 spin_lock_irq(&mdev->req_lock); 355 spin_lock_irqsave(&mdev->req_lock, flags);
352 rv = __req_mod(req, what, &m); 356 rv = __req_mod(req, what, &m);
353 spin_unlock_irq(&mdev->req_lock); 357 spin_unlock_irqrestore(&mdev->req_lock, flags);
354 358
355 if (m.bio) 359 if (m.bio)
356 complete_master_bio(mdev, &m); 360 complete_master_bio(mdev, &m);
357 361
358 return rv; 362 return rv;
359} 363}
364
365static inline bool drbd_should_do_remote(union drbd_state s)
366{
367 return s.pdsk == D_UP_TO_DATE ||
368 (s.pdsk >= D_INCONSISTENT &&
369 s.conn >= C_WF_BITMAP_T &&
370 s.conn < C_AHEAD);
371 /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
372 That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
373 states. */
374}
375static inline bool drbd_should_send_oos(union drbd_state s)
376{
377 return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
378 /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
379 since we enter state C_AHEAD only if proto >= 96 */
380}
381
360#endif 382#endif
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index 85179e1fb50a..c44a2a602772 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -48,6 +48,8 @@ static const char *drbd_conn_s_names[] = {
48 [C_PAUSED_SYNC_T] = "PausedSyncT", 48 [C_PAUSED_SYNC_T] = "PausedSyncT",
49 [C_VERIFY_S] = "VerifyS", 49 [C_VERIFY_S] = "VerifyS",
50 [C_VERIFY_T] = "VerifyT", 50 [C_VERIFY_T] = "VerifyT",
51 [C_AHEAD] = "Ahead",
52 [C_BEHIND] = "Behind",
51}; 53};
52 54
53static const char *drbd_role_s_names[] = { 55static const char *drbd_role_s_names[] = {
@@ -92,7 +94,7 @@ static const char *drbd_state_sw_errors[] = {
92const char *drbd_conn_str(enum drbd_conns s) 94const char *drbd_conn_str(enum drbd_conns s)
93{ 95{
94 /* enums are unsigned... */ 96 /* enums are unsigned... */
95 return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; 97 return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
96} 98}
97 99
98const char *drbd_role_str(enum drbd_role s) 100const char *drbd_role_str(enum drbd_role s)
@@ -105,7 +107,7 @@ const char *drbd_disk_str(enum drbd_disk_state s)
105 return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; 107 return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s];
106} 108}
107 109
108const char *drbd_set_st_err_str(enum drbd_state_ret_codes err) 110const char *drbd_set_st_err_str(enum drbd_state_rv err)
109{ 111{
110 return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : 112 return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
111 err > SS_TWO_PRIMARIES ? "TOO_LARGE" 113 err > SS_TWO_PRIMARIES ? "TOO_LARGE"
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index e027446590d3..f7e6c92f8d03 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -39,18 +39,17 @@
39#include "drbd_req.h" 39#include "drbd_req.h"
40 40
41static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); 41static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
42static int w_make_resync_request(struct drbd_conf *mdev,
43 struct drbd_work *w, int cancel);
42 44
43 45
44 46
45/* defined here: 47/* endio handlers:
46 drbd_md_io_complete 48 * drbd_md_io_complete (defined here)
47 drbd_endio_sec 49 * drbd_endio_pri (defined here)
48 drbd_endio_pri 50 * drbd_endio_sec (defined here)
49 51 * bm_async_io_complete (defined in drbd_bitmap.c)
50 * more endio handlers: 52 *
51 atodb_endio in drbd_actlog.c
52 drbd_bm_async_io_complete in drbd_bitmap.c
53
54 * For all these callbacks, note the following: 53 * For all these callbacks, note the following:
55 * The callbacks will be called in irq context by the IDE drivers, 54 * The callbacks will be called in irq context by the IDE drivers,
56 * and in Softirqs/Tasklets/BH context by the SCSI drivers. 55 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
94 if (list_empty(&mdev->read_ee)) 93 if (list_empty(&mdev->read_ee))
95 wake_up(&mdev->ee_wait); 94 wake_up(&mdev->ee_wait);
96 if (test_bit(__EE_WAS_ERROR, &e->flags)) 95 if (test_bit(__EE_WAS_ERROR, &e->flags))
97 __drbd_chk_io_error(mdev, FALSE); 96 __drbd_chk_io_error(mdev, false);
98 spin_unlock_irqrestore(&mdev->req_lock, flags); 97 spin_unlock_irqrestore(&mdev->req_lock, flags);
99 98
100 drbd_queue_work(&mdev->data.work, &e->w); 99 drbd_queue_work(&mdev->data.work, &e->w);
@@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
137 : list_empty(&mdev->active_ee); 136 : list_empty(&mdev->active_ee);
138 137
139 if (test_bit(__EE_WAS_ERROR, &e->flags)) 138 if (test_bit(__EE_WAS_ERROR, &e->flags))
140 __drbd_chk_io_error(mdev, FALSE); 139 __drbd_chk_io_error(mdev, false);
141 spin_unlock_irqrestore(&mdev->req_lock, flags); 140 spin_unlock_irqrestore(&mdev->req_lock, flags);
142 141
143 if (is_syncer_req) 142 if (is_syncer_req)
@@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error)
163 int uptodate = bio_flagged(bio, BIO_UPTODATE); 162 int uptodate = bio_flagged(bio, BIO_UPTODATE);
164 int is_write = bio_data_dir(bio) == WRITE; 163 int is_write = bio_data_dir(bio) == WRITE;
165 164
166 if (error) 165 if (error && __ratelimit(&drbd_ratelimit_state))
167 dev_warn(DEV, "%s: error=%d s=%llus\n", 166 dev_warn(DEV, "%s: error=%d s=%llus\n",
168 is_write ? "write" : "read", error, 167 is_write ? "write" : "read", error,
169 (unsigned long long)e->sector); 168 (unsigned long long)e->sector);
170 if (!error && !uptodate) { 169 if (!error && !uptodate) {
171 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", 170 if (__ratelimit(&drbd_ratelimit_state))
172 is_write ? "write" : "read", 171 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
173 (unsigned long long)e->sector); 172 is_write ? "write" : "read",
173 (unsigned long long)e->sector);
174 /* strange behavior of some lower level drivers... 174 /* strange behavior of some lower level drivers...
175 * fail the request by clearing the uptodate flag, 175 * fail the request by clearing the uptodate flag,
176 * but do not return any error?! */ 176 * but do not return any error?! */
@@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
250 return w_send_read_req(mdev, w, 0); 250 return w_send_read_req(mdev, w, 0);
251} 251}
252 252
253int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
254{
255 ERR_IF(cancel) return 1;
256 dev_err(DEV, "resync inactive, but callback triggered??\n");
257 return 1; /* Simply ignore this! */
258}
259
260void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) 253void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
261{ 254{
262 struct hash_desc desc; 255 struct hash_desc desc;
@@ -355,7 +348,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
355 if (!get_ldev(mdev)) 348 if (!get_ldev(mdev))
356 return -EIO; 349 return -EIO;
357 350
358 if (drbd_rs_should_slow_down(mdev)) 351 if (drbd_rs_should_slow_down(mdev, sector))
359 goto defer; 352 goto defer;
360 353
361 /* GFP_TRY, because if there is no memory available right now, this may 354 /* GFP_TRY, because if there is no memory available right now, this may
@@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
373 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 366 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
374 return 0; 367 return 0;
375 368
376 /* drbd_submit_ee currently fails for one reason only: 369 /* If it failed because of ENOMEM, retry should help. If it failed
377 * not being able to allocate enough bios. 370 * because bio_add_page failed (probably broken lower level driver),
378 * Is dropping the connection going to help? */ 371 * retry may or may not help.
372 * If it does not, you may need to force disconnect. */
379 spin_lock_irq(&mdev->req_lock); 373 spin_lock_irq(&mdev->req_lock);
380 list_del(&e->w.list); 374 list_del(&e->w.list);
381 spin_unlock_irq(&mdev->req_lock); 375 spin_unlock_irq(&mdev->req_lock);
@@ -386,26 +380,25 @@ defer:
386 return -EAGAIN; 380 return -EAGAIN;
387} 381}
388 382
389void resync_timer_fn(unsigned long data) 383int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
390{ 384{
391 struct drbd_conf *mdev = (struct drbd_conf *) data;
392 int queue;
393
394 queue = 1;
395 switch (mdev->state.conn) { 385 switch (mdev->state.conn) {
396 case C_VERIFY_S: 386 case C_VERIFY_S:
397 mdev->resync_work.cb = w_make_ov_request; 387 w_make_ov_request(mdev, w, cancel);
398 break; 388 break;
399 case C_SYNC_TARGET: 389 case C_SYNC_TARGET:
400 mdev->resync_work.cb = w_make_resync_request; 390 w_make_resync_request(mdev, w, cancel);
401 break; 391 break;
402 default:
403 queue = 0;
404 mdev->resync_work.cb = w_resync_inactive;
405 } 392 }
406 393
407 /* harmless race: list_empty outside data.work.q_lock */ 394 return 1;
408 if (list_empty(&mdev->resync_work.list) && queue) 395}
396
397void resync_timer_fn(unsigned long data)
398{
399 struct drbd_conf *mdev = (struct drbd_conf *) data;
400
401 if (list_empty(&mdev->resync_work.list))
409 drbd_queue_work(&mdev->data.work, &mdev->resync_work); 402 drbd_queue_work(&mdev->data.work, &mdev->resync_work);
410} 403}
411 404
@@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
438 fb->values[i] += value; 431 fb->values[i] += value;
439} 432}
440 433
441int drbd_rs_controller(struct drbd_conf *mdev) 434static int drbd_rs_controller(struct drbd_conf *mdev)
442{ 435{
443 unsigned int sect_in; /* Number of sectors that came in since the last turn */ 436 unsigned int sect_in; /* Number of sectors that came in since the last turn */
444 unsigned int want; /* The number of sectors we want in the proxy */ 437 unsigned int want; /* The number of sectors we want in the proxy */
@@ -492,29 +485,36 @@ int drbd_rs_controller(struct drbd_conf *mdev)
492 return req_sect; 485 return req_sect;
493} 486}
494 487
495int w_make_resync_request(struct drbd_conf *mdev, 488static int drbd_rs_number_requests(struct drbd_conf *mdev)
496 struct drbd_work *w, int cancel) 489{
490 int number;
491 if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
492 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
493 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
494 } else {
495 mdev->c_sync_rate = mdev->sync_conf.rate;
496 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
497 }
498
499 /* ignore the amount of pending requests, the resync controller should
500 * throttle down to incoming reply rate soon enough anyways. */
501 return number;
502}
503
504static int w_make_resync_request(struct drbd_conf *mdev,
505 struct drbd_work *w, int cancel)
497{ 506{
498 unsigned long bit; 507 unsigned long bit;
499 sector_t sector; 508 sector_t sector;
500 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 509 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
501 int max_segment_size; 510 int max_bio_size;
502 int number, rollback_i, size, pe, mx; 511 int number, rollback_i, size;
503 int align, queued, sndbuf; 512 int align, queued, sndbuf;
504 int i = 0; 513 int i = 0;
505 514
506 if (unlikely(cancel)) 515 if (unlikely(cancel))
507 return 1; 516 return 1;
508 517
509 if (unlikely(mdev->state.conn < C_CONNECTED)) {
510 dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
511 return 0;
512 }
513
514 if (mdev->state.conn != C_SYNC_TARGET)
515 dev_err(DEV, "%s in w_make_resync_request\n",
516 drbd_conn_str(mdev->state.conn));
517
518 if (mdev->rs_total == 0) { 518 if (mdev->rs_total == 0) {
519 /* empty resync? */ 519 /* empty resync? */
520 drbd_resync_finished(mdev); 520 drbd_resync_finished(mdev);
@@ -527,49 +527,19 @@ int w_make_resync_request(struct drbd_conf *mdev,
527 to continue resync with a broken disk makes no sense at 527 to continue resync with a broken disk makes no sense at
528 all */ 528 all */
529 dev_err(DEV, "Disk broke down during resync!\n"); 529 dev_err(DEV, "Disk broke down during resync!\n");
530 mdev->resync_work.cb = w_resync_inactive;
531 return 1; 530 return 1;
532 } 531 }
533 532
534 /* starting with drbd 8.3.8, we can handle multi-bio EEs, 533 /* starting with drbd 8.3.8, we can handle multi-bio EEs,
535 * if it should be necessary */ 534 * if it should be necessary */
536 max_segment_size = 535 max_bio_size =
537 mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) : 536 mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 :
538 mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE; 537 mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE;
539 538
540 if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ 539 number = drbd_rs_number_requests(mdev);
541 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); 540 if (number == 0)
542 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
543 } else {
544 mdev->c_sync_rate = mdev->sync_conf.rate;
545 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
546 }
547
548 /* Throttle resync on lower level disk activity, which may also be
549 * caused by application IO on Primary/SyncTarget.
550 * Keep this after the call to drbd_rs_controller, as that assumes
551 * to be called as precisely as possible every SLEEP_TIME,
552 * and would be confused otherwise. */
553 if (drbd_rs_should_slow_down(mdev))
554 goto requeue; 541 goto requeue;
555 542
556 mutex_lock(&mdev->data.mutex);
557 if (mdev->data.socket)
558 mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
559 else
560 mx = 1;
561 mutex_unlock(&mdev->data.mutex);
562
563 /* For resync rates >160MB/sec, allow more pending RS requests */
564 if (number > mx)
565 mx = number;
566
567 /* Limit the number of pending RS requests to no more than the peer's receive buffer */
568 pe = atomic_read(&mdev->rs_pending_cnt);
569 if ((pe + number) > mx) {
570 number = mx - pe;
571 }
572
573 for (i = 0; i < number; i++) { 543 for (i = 0; i < number; i++) {
574 /* Stop generating RS requests, when half of the send buffer is filled */ 544 /* Stop generating RS requests, when half of the send buffer is filled */
575 mutex_lock(&mdev->data.mutex); 545 mutex_lock(&mdev->data.mutex);
@@ -588,16 +558,16 @@ next_sector:
588 size = BM_BLOCK_SIZE; 558 size = BM_BLOCK_SIZE;
589 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); 559 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
590 560
591 if (bit == -1UL) { 561 if (bit == DRBD_END_OF_BITMAP) {
592 mdev->bm_resync_fo = drbd_bm_bits(mdev); 562 mdev->bm_resync_fo = drbd_bm_bits(mdev);
593 mdev->resync_work.cb = w_resync_inactive;
594 put_ldev(mdev); 563 put_ldev(mdev);
595 return 1; 564 return 1;
596 } 565 }
597 566
598 sector = BM_BIT_TO_SECT(bit); 567 sector = BM_BIT_TO_SECT(bit);
599 568
600 if (drbd_try_rs_begin_io(mdev, sector)) { 569 if (drbd_rs_should_slow_down(mdev, sector) ||
570 drbd_try_rs_begin_io(mdev, sector)) {
601 mdev->bm_resync_fo = bit; 571 mdev->bm_resync_fo = bit;
602 goto requeue; 572 goto requeue;
603 } 573 }
@@ -608,7 +578,7 @@ next_sector:
608 goto next_sector; 578 goto next_sector;
609 } 579 }
610 580
611#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE 581#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
612 /* try to find some adjacent bits. 582 /* try to find some adjacent bits.
613 * we stop if we have already the maximum req size. 583 * we stop if we have already the maximum req size.
614 * 584 *
@@ -618,7 +588,7 @@ next_sector:
618 align = 1; 588 align = 1;
619 rollback_i = i; 589 rollback_i = i;
620 for (;;) { 590 for (;;) {
621 if (size + BM_BLOCK_SIZE > max_segment_size) 591 if (size + BM_BLOCK_SIZE > max_bio_size)
622 break; 592 break;
623 593
624 /* Be always aligned */ 594 /* Be always aligned */
@@ -685,7 +655,6 @@ next_sector:
685 * resync data block, and the last bit is cleared. 655 * resync data block, and the last bit is cleared.
686 * until then resync "work" is "inactive" ... 656 * until then resync "work" is "inactive" ...
687 */ 657 */
688 mdev->resync_work.cb = w_resync_inactive;
689 put_ldev(mdev); 658 put_ldev(mdev);
690 return 1; 659 return 1;
691 } 660 }
@@ -706,27 +675,18 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
706 if (unlikely(cancel)) 675 if (unlikely(cancel))
707 return 1; 676 return 1;
708 677
709 if (unlikely(mdev->state.conn < C_CONNECTED)) { 678 number = drbd_rs_number_requests(mdev);
710 dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
711 return 0;
712 }
713
714 number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
715 if (atomic_read(&mdev->rs_pending_cnt) > number)
716 goto requeue;
717
718 number -= atomic_read(&mdev->rs_pending_cnt);
719 679
720 sector = mdev->ov_position; 680 sector = mdev->ov_position;
721 for (i = 0; i < number; i++) { 681 for (i = 0; i < number; i++) {
722 if (sector >= capacity) { 682 if (sector >= capacity) {
723 mdev->resync_work.cb = w_resync_inactive;
724 return 1; 683 return 1;
725 } 684 }
726 685
727 size = BM_BLOCK_SIZE; 686 size = BM_BLOCK_SIZE;
728 687
729 if (drbd_try_rs_begin_io(mdev, sector)) { 688 if (drbd_rs_should_slow_down(mdev, sector) ||
689 drbd_try_rs_begin_io(mdev, sector)) {
730 mdev->ov_position = sector; 690 mdev->ov_position = sector;
731 goto requeue; 691 goto requeue;
732 } 692 }
@@ -744,11 +704,33 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
744 mdev->ov_position = sector; 704 mdev->ov_position = sector;
745 705
746 requeue: 706 requeue:
707 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
747 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 708 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
748 return 1; 709 return 1;
749} 710}
750 711
751 712
713void start_resync_timer_fn(unsigned long data)
714{
715 struct drbd_conf *mdev = (struct drbd_conf *) data;
716
717 drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
718}
719
720int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
721{
722 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
723 dev_warn(DEV, "w_start_resync later...\n");
724 mdev->start_resync_timer.expires = jiffies + HZ/10;
725 add_timer(&mdev->start_resync_timer);
726 return 1;
727 }
728
729 drbd_start_resync(mdev, C_SYNC_SOURCE);
730 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
731 return 1;
732}
733
752int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 734int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
753{ 735{
754 kfree(w); 736 kfree(w);
@@ -782,6 +764,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
782 union drbd_state os, ns; 764 union drbd_state os, ns;
783 struct drbd_work *w; 765 struct drbd_work *w;
784 char *khelper_cmd = NULL; 766 char *khelper_cmd = NULL;
767 int verify_done = 0;
785 768
786 /* Remove all elements from the resync LRU. Since future actions 769 /* Remove all elements from the resync LRU. Since future actions
787 * might set bits in the (main) bitmap, then the entries in the 770 * might set bits in the (main) bitmap, then the entries in the
@@ -792,8 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
792 * queue (or even the read operations for those packets 775 * queue (or even the read operations for those packets
793 * is not finished by now). Retry in 100ms. */ 776 * is not finished by now). Retry in 100ms. */
794 777
795 __set_current_state(TASK_INTERRUPTIBLE); 778 schedule_timeout_interruptible(HZ / 10);
796 schedule_timeout(HZ / 10);
797 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); 779 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
798 if (w) { 780 if (w) {
799 w->cb = w_resync_finished; 781 w->cb = w_resync_finished;
@@ -818,6 +800,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
818 spin_lock_irq(&mdev->req_lock); 800 spin_lock_irq(&mdev->req_lock);
819 os = mdev->state; 801 os = mdev->state;
820 802
803 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
804
821 /* This protects us against multiple calls (that can happen in the presence 805 /* This protects us against multiple calls (that can happen in the presence
822 of application IO), and against connectivity loss just before we arrive here. */ 806 of application IO), and against connectivity loss just before we arrive here. */
823 if (os.conn <= C_CONNECTED) 807 if (os.conn <= C_CONNECTED)
@@ -827,8 +811,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
827 ns.conn = C_CONNECTED; 811 ns.conn = C_CONNECTED;
828 812
829 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 813 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
830 (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? 814 verify_done ? "Online verify " : "Resync",
831 "Online verify " : "Resync",
832 dt + mdev->rs_paused, mdev->rs_paused, dbdt); 815 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
833 816
834 n_oos = drbd_bm_total_weight(mdev); 817 n_oos = drbd_bm_total_weight(mdev);
@@ -886,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev)
886 } 869 }
887 } 870 }
888 871
889 drbd_uuid_set_bm(mdev, 0UL); 872 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
890 873 /* for verify runs, we don't update uuids here,
891 if (mdev->p_uuid) { 874 * so there would be nothing to report. */
892 /* Now the two UUID sets are equal, update what we 875 drbd_uuid_set_bm(mdev, 0UL);
893 * know of the peer. */ 876 drbd_print_uuids(mdev, "updated UUIDs");
894 int i; 877 if (mdev->p_uuid) {
895 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) 878 /* Now the two UUID sets are equal, update what we
896 mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; 879 * know of the peer. */
880 int i;
881 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
882 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
883 }
897 } 884 }
898 } 885 }
899 886
@@ -905,15 +892,11 @@ out:
905 mdev->rs_total = 0; 892 mdev->rs_total = 0;
906 mdev->rs_failed = 0; 893 mdev->rs_failed = 0;
907 mdev->rs_paused = 0; 894 mdev->rs_paused = 0;
908 mdev->ov_start_sector = 0; 895 if (verify_done)
896 mdev->ov_start_sector = 0;
909 897
910 drbd_md_sync(mdev); 898 drbd_md_sync(mdev);
911 899
912 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
913 dev_info(DEV, "Writing the whole bitmap\n");
914 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
915 }
916
917 if (khelper_cmd) 900 if (khelper_cmd)
918 drbd_khelper(mdev, khelper_cmd); 901 drbd_khelper(mdev, khelper_cmd);
919 902
@@ -994,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
994 put_ldev(mdev); 977 put_ldev(mdev);
995 } 978 }
996 979
997 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 980 if (mdev->state.conn == C_AHEAD) {
981 ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
982 } else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
998 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { 983 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
999 inc_rs_pending(mdev); 984 inc_rs_pending(mdev);
1000 ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 985 ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
@@ -1096,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1096 if (unlikely(cancel)) 1081 if (unlikely(cancel))
1097 goto out; 1082 goto out;
1098 1083
1099 if (unlikely((e->flags & EE_WAS_ERROR) != 0))
1100 goto out;
1101
1102 digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1084 digest_size = crypto_hash_digestsize(mdev->verify_tfm);
1103 /* FIXME if this allocation fails, online verify will not terminate! */
1104 digest = kmalloc(digest_size, GFP_NOIO); 1085 digest = kmalloc(digest_size, GFP_NOIO);
1105 if (digest) { 1086 if (!digest) {
1106 drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); 1087 ok = 0; /* terminate the connection in case the allocation failed */
1107 inc_rs_pending(mdev); 1088 goto out;
1108 ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
1109 digest, digest_size, P_OV_REPLY);
1110 if (!ok)
1111 dec_rs_pending(mdev);
1112 kfree(digest);
1113 } 1089 }
1114 1090
1091 if (likely(!(e->flags & EE_WAS_ERROR)))
1092 drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
1093 else
1094 memset(digest, 0, digest_size);
1095
1096 inc_rs_pending(mdev);
1097 ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
1098 digest, digest_size, P_OV_REPLY);
1099 if (!ok)
1100 dec_rs_pending(mdev);
1101 kfree(digest);
1102
1115out: 1103out:
1116 drbd_free_ee(mdev, e); 1104 drbd_free_ee(mdev, e);
1117
1118 dec_unacked(mdev); 1105 dec_unacked(mdev);
1119 1106
1120 return ok; 1107 return ok;
@@ -1129,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
1129 mdev->ov_last_oos_size = size>>9; 1116 mdev->ov_last_oos_size = size>>9;
1130 } 1117 }
1131 drbd_set_out_of_sync(mdev, sector, size); 1118 drbd_set_out_of_sync(mdev, sector, size);
1132 set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
1133} 1119}
1134 1120
1135int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1121int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -1165,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1165 eq = !memcmp(digest, di->digest, digest_size); 1151 eq = !memcmp(digest, di->digest, digest_size);
1166 kfree(digest); 1152 kfree(digest);
1167 } 1153 }
1168 } else {
1169 ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
1170 if (__ratelimit(&drbd_ratelimit_state))
1171 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1172 } 1154 }
1173 1155
1174 dec_unacked(mdev); 1156 dec_unacked(mdev);
@@ -1182,7 +1164,13 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1182 1164
1183 drbd_free_ee(mdev, e); 1165 drbd_free_ee(mdev, e);
1184 1166
1185 if (--mdev->ov_left == 0) { 1167 --mdev->ov_left;
1168
1169 /* let's advance progress step marks only for every other megabyte */
1170 if ((mdev->ov_left & 0x200) == 0x200)
1171 drbd_advance_rs_marks(mdev, mdev->ov_left);
1172
1173 if (mdev->ov_left == 0) {
1186 ov_oos_print(mdev); 1174 ov_oos_print(mdev);
1187 drbd_resync_finished(mdev); 1175 drbd_resync_finished(mdev);
1188 } 1176 }
@@ -1235,6 +1223,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1235 return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); 1223 return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
1236} 1224}
1237 1225
1226int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1227{
1228 struct drbd_request *req = container_of(w, struct drbd_request, w);
1229 int ok;
1230
1231 if (unlikely(cancel)) {
1232 req_mod(req, send_canceled);
1233 return 1;
1234 }
1235
1236 ok = drbd_send_oos(mdev, req);
1237 req_mod(req, oos_handed_to_network);
1238
1239 return ok;
1240}
1241
1238/** 1242/**
1239 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request 1243 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1240 * @mdev: DRBD device. 1244 * @mdev: DRBD device.
@@ -1430,6 +1434,17 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
1430 return retcode; 1434 return retcode;
1431} 1435}
1432 1436
1437void drbd_rs_controller_reset(struct drbd_conf *mdev)
1438{
1439 atomic_set(&mdev->rs_sect_in, 0);
1440 atomic_set(&mdev->rs_sect_ev, 0);
1441 mdev->rs_in_flight = 0;
1442 mdev->rs_planed = 0;
1443 spin_lock(&mdev->peer_seq_lock);
1444 fifo_set(&mdev->rs_plan_s, 0);
1445 spin_unlock(&mdev->peer_seq_lock);
1446}
1447
1433/** 1448/**
1434 * drbd_start_resync() - Start the resync process 1449 * drbd_start_resync() - Start the resync process
1435 * @mdev: DRBD device. 1450 * @mdev: DRBD device.
@@ -1443,13 +1458,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1443 union drbd_state ns; 1458 union drbd_state ns;
1444 int r; 1459 int r;
1445 1460
1446 if (mdev->state.conn >= C_SYNC_SOURCE) { 1461 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
1447 dev_err(DEV, "Resync already running!\n"); 1462 dev_err(DEV, "Resync already running!\n");
1448 return; 1463 return;
1449 } 1464 }
1450 1465
1451 /* In case a previous resync run was aborted by an IO error/detach on the peer. */ 1466 if (mdev->state.conn < C_AHEAD) {
1452 drbd_rs_cancel_all(mdev); 1467 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1468 drbd_rs_cancel_all(mdev);
1469 /* This should be done when we abort the resync. We definitely do not
1470 want to have this for connections going back and forth between
1471 Ahead/Behind and SyncSource/SyncTarget */
1472 }
1453 1473
1454 if (side == C_SYNC_TARGET) { 1474 if (side == C_SYNC_TARGET) {
1455 /* Since application IO was locked out during C_WF_BITMAP_T and 1475 /* Since application IO was locked out during C_WF_BITMAP_T and
@@ -1463,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1463 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 1483 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
1464 return; 1484 return;
1465 } 1485 }
1486 } else /* C_SYNC_SOURCE */ {
1487 r = drbd_khelper(mdev, "before-resync-source");
1488 r = (r >> 8) & 0xff;
1489 if (r > 0) {
1490 if (r == 3) {
1491 dev_info(DEV, "before-resync-source handler returned %d, "
1492 "ignoring. Old userland tools?", r);
1493 } else {
1494 dev_info(DEV, "before-resync-source handler returned %d, "
1495 "dropping connection.\n", r);
1496 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
1497 return;
1498 }
1499 }
1466 } 1500 }
1467 1501
1468 drbd_state_lock(mdev); 1502 drbd_state_lock(mdev);
@@ -1472,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1472 return; 1506 return;
1473 } 1507 }
1474 1508
1475 if (side == C_SYNC_TARGET) {
1476 mdev->bm_resync_fo = 0;
1477 } else /* side == C_SYNC_SOURCE */ {
1478 u64 uuid;
1479
1480 get_random_bytes(&uuid, sizeof(u64));
1481 drbd_uuid_set(mdev, UI_BITMAP, uuid);
1482 drbd_send_sync_uuid(mdev, uuid);
1483
1484 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
1485 }
1486
1487 write_lock_irq(&global_state_lock); 1509 write_lock_irq(&global_state_lock);
1488 ns = mdev->state; 1510 ns = mdev->state;
1489 1511
@@ -1521,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1521 _drbd_pause_after(mdev); 1543 _drbd_pause_after(mdev);
1522 } 1544 }
1523 write_unlock_irq(&global_state_lock); 1545 write_unlock_irq(&global_state_lock);
1524 put_ldev(mdev);
1525 1546
1526 if (r == SS_SUCCESS) { 1547 if (r == SS_SUCCESS) {
1527 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", 1548 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1528 drbd_conn_str(ns.conn), 1549 drbd_conn_str(ns.conn),
1529 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), 1550 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1530 (unsigned long) mdev->rs_total); 1551 (unsigned long) mdev->rs_total);
1552 if (side == C_SYNC_TARGET)
1553 mdev->bm_resync_fo = 0;
1554
1555 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1556 * with w_send_oos, or the sync target will get confused as to
1557 * how much bits to resync. We cannot do that always, because for an
1558 * empty resync and protocol < 95, we need to do it here, as we call
1559 * drbd_resync_finished from here in that case.
1560 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1561 * and from after_state_ch otherwise. */
1562 if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
1563 drbd_gen_and_send_sync_uuid(mdev);
1531 1564
1532 if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { 1565 if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
1533 /* This still has a race (about when exactly the peers 1566 /* This still has a race (about when exactly the peers
@@ -1547,13 +1580,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1547 drbd_resync_finished(mdev); 1580 drbd_resync_finished(mdev);
1548 } 1581 }
1549 1582
1550 atomic_set(&mdev->rs_sect_in, 0); 1583 drbd_rs_controller_reset(mdev);
1551 atomic_set(&mdev->rs_sect_ev, 0);
1552 mdev->rs_in_flight = 0;
1553 mdev->rs_planed = 0;
1554 spin_lock(&mdev->peer_seq_lock);
1555 fifo_set(&mdev->rs_plan_s, 0);
1556 spin_unlock(&mdev->peer_seq_lock);
1557 /* ns.conn may already be != mdev->state.conn, 1584 /* ns.conn may already be != mdev->state.conn,
1558 * we may have been paused in between, or become paused until 1585 * we may have been paused in between, or become paused until
1559 * the timer triggers. 1586 * the timer triggers.
@@ -1563,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1563 1590
1564 drbd_md_sync(mdev); 1591 drbd_md_sync(mdev);
1565 } 1592 }
1593 put_ldev(mdev);
1566 drbd_state_unlock(mdev); 1594 drbd_state_unlock(mdev);
1567} 1595}
1568 1596
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
index 53586fa5ae1b..151f1a37478f 100644
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -39,7 +39,7 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
39 return; 39 return;
40 } 40 }
41 41
42 if (FAULT_ACTIVE(mdev, fault_type)) 42 if (drbd_insert_fault(mdev, fault_type))
43 bio_endio(bio, -EIO); 43 bio_endio(bio, -EIO);
44 else 44 else
45 generic_make_request(bio); 45 generic_make_request(bio);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 81131eda5544..060ef6327876 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -315,11 +315,22 @@ config SENSORS_F71805F
315 will be called f71805f. 315 will be called f71805f.
316 316
317config SENSORS_F71882FG 317config SENSORS_F71882FG
318 tristate "Fintek F71858FG, F71862FG, F71882FG, F71889FG and F8000" 318 tristate "Fintek F71882FG and compatibles"
319 help 319 help
320 If you say yes here you get support for hardware monitoring 320 If you say yes here you get support for hardware monitoring
321 features of the Fintek F71858FG, F71862FG/71863FG, F71882FG/F71883FG, 321 features of many Fintek Super-I/O (LPC) chips. The currently
322 F71889FG and F8000 Super-I/O chips. 322 supported chips are:
323 F71808E
324 F71858FG
325 F71862FG
326 F71863FG
327 F71869F/E
328 F71882FG
329 F71883FG
330 F71889FG/ED/A
331 F8000
332 F81801U
333 F81865F
323 334
324 This driver can also be built as a module. If so, the module 335 This driver can also be built as a module. If so, the module
325 will be called f71882fg. 336 will be called f71882fg.
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index a4d430ee7e20..ca07a32447c2 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -54,7 +54,9 @@
54#define SIO_F71882_ID 0x0541 /* Chipset ID */ 54#define SIO_F71882_ID 0x0541 /* Chipset ID */
55#define SIO_F71889_ID 0x0723 /* Chipset ID */ 55#define SIO_F71889_ID 0x0723 /* Chipset ID */
56#define SIO_F71889E_ID 0x0909 /* Chipset ID */ 56#define SIO_F71889E_ID 0x0909 /* Chipset ID */
57#define SIO_F71889A_ID 0x1005 /* Chipset ID */
57#define SIO_F8000_ID 0x0581 /* Chipset ID */ 58#define SIO_F8000_ID 0x0581 /* Chipset ID */
59#define SIO_F81865_ID 0x0704 /* Chipset ID */
58 60
59#define REGION_LENGTH 8 61#define REGION_LENGTH 8
60#define ADDR_REG_OFFSET 5 62#define ADDR_REG_OFFSET 5
@@ -106,7 +108,7 @@ module_param(force_id, ushort, 0);
106MODULE_PARM_DESC(force_id, "Override the detected device ID"); 108MODULE_PARM_DESC(force_id, "Override the detected device ID");
107 109
108enum chips { f71808e, f71858fg, f71862fg, f71869, f71882fg, f71889fg, 110enum chips { f71808e, f71858fg, f71862fg, f71869, f71882fg, f71889fg,
109 f71889ed, f8000 }; 111 f71889ed, f71889a, f8000, f81865f };
110 112
111static const char *f71882fg_names[] = { 113static const char *f71882fg_names[] = {
112 "f71808e", 114 "f71808e",
@@ -114,42 +116,76 @@ static const char *f71882fg_names[] = {
114 "f71862fg", 116 "f71862fg",
115 "f71869", /* Both f71869f and f71869e, reg. compatible and same id */ 117 "f71869", /* Both f71869f and f71869e, reg. compatible and same id */
116 "f71882fg", 118 "f71882fg",
117 "f71889fg", 119 "f71889fg", /* f81801u too, same id */
118 "f71889ed", 120 "f71889ed",
121 "f71889a",
119 "f8000", 122 "f8000",
123 "f81865f",
120}; 124};
121 125
122static const char f71882fg_has_in[8][F71882FG_MAX_INS] = { 126static const char f71882fg_has_in[][F71882FG_MAX_INS] = {
123 { 1, 1, 1, 1, 1, 1, 0, 1, 1 }, /* f71808e */ 127 [f71808e] = { 1, 1, 1, 1, 1, 1, 0, 1, 1 },
124 { 1, 1, 1, 0, 0, 0, 0, 0, 0 }, /* f71858fg */ 128 [f71858fg] = { 1, 1, 1, 0, 0, 0, 0, 0, 0 },
125 { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71862fg */ 129 [f71862fg] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
126 { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71869 */ 130 [f71869] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
127 { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71882fg */ 131 [f71882fg] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
128 { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71889fg */ 132 [f71889fg] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
129 { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71889ed */ 133 [f71889ed] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
130 { 1, 1, 1, 0, 0, 0, 0, 0, 0 }, /* f8000 */ 134 [f71889a] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
135 [f8000] = { 1, 1, 1, 0, 0, 0, 0, 0, 0 },
136 [f81865f] = { 1, 1, 1, 1, 1, 1, 1, 0, 0 },
131}; 137};
132 138
133static const char f71882fg_has_in1_alarm[8] = { 139static const char f71882fg_has_in1_alarm[] = {
134 0, /* f71808e */ 140 [f71808e] = 0,
135 0, /* f71858fg */ 141 [f71858fg] = 0,
136 0, /* f71862fg */ 142 [f71862fg] = 0,
137 0, /* f71869 */ 143 [f71869] = 0,
138 1, /* f71882fg */ 144 [f71882fg] = 1,
139 1, /* f71889fg */ 145 [f71889fg] = 1,
140 1, /* f71889ed */ 146 [f71889ed] = 1,
141 0, /* f8000 */ 147 [f71889a] = 1,
148 [f8000] = 0,
149 [f81865f] = 1,
142}; 150};
143 151
144static const char f71882fg_has_beep[8] = { 152static const char f71882fg_has_beep[] = {
145 0, /* f71808e */ 153 [f71808e] = 0,
146 0, /* f71858fg */ 154 [f71858fg] = 0,
147 1, /* f71862fg */ 155 [f71862fg] = 1,
148 1, /* f71869 */ 156 [f71869] = 1,
149 1, /* f71882fg */ 157 [f71882fg] = 1,
150 1, /* f71889fg */ 158 [f71889fg] = 1,
151 1, /* f71889ed */ 159 [f71889ed] = 1,
152 0, /* f8000 */ 160 [f71889a] = 1,
161 [f8000] = 0,
162 [f81865f] = 1,
163};
164
165static const char f71882fg_nr_fans[] = {
166 [f71808e] = 3,
167 [f71858fg] = 3,
168 [f71862fg] = 3,
169 [f71869] = 3,
170 [f71882fg] = 4,
171 [f71889fg] = 3,
172 [f71889ed] = 3,
173 [f71889a] = 3,
174 [f8000] = 3,
175 [f81865f] = 2,
176};
177
178static const char f71882fg_nr_temps[] = {
179 [f71808e] = 2,
180 [f71858fg] = 3,
181 [f71862fg] = 3,
182 [f71869] = 3,
183 [f71882fg] = 3,
184 [f71889fg] = 3,
185 [f71889ed] = 3,
186 [f71889a] = 3,
187 [f8000] = 3,
188 [f81865f] = 2,
153}; 189};
154 190
155static struct platform_device *f71882fg_pdev; 191static struct platform_device *f71882fg_pdev;
@@ -1071,9 +1107,9 @@ static u16 f71882fg_read_temp(struct f71882fg_data *data, int nr)
1071static struct f71882fg_data *f71882fg_update_device(struct device *dev) 1107static struct f71882fg_data *f71882fg_update_device(struct device *dev)
1072{ 1108{
1073 struct f71882fg_data *data = dev_get_drvdata(dev); 1109 struct f71882fg_data *data = dev_get_drvdata(dev);
1110 int nr_fans = f71882fg_nr_fans[data->type];
1111 int nr_temps = f71882fg_nr_temps[data->type];
1074 int nr, reg, point; 1112 int nr, reg, point;
1075 int nr_fans = (data->type == f71882fg) ? 4 : 3;
1076 int nr_temps = (data->type == f71808e) ? 2 : 3;
1077 1113
1078 mutex_lock(&data->update_lock); 1114 mutex_lock(&data->update_lock);
1079 1115
@@ -2042,8 +2078,9 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
2042{ 2078{
2043 struct f71882fg_data *data; 2079 struct f71882fg_data *data;
2044 struct f71882fg_sio_data *sio_data = pdev->dev.platform_data; 2080 struct f71882fg_sio_data *sio_data = pdev->dev.platform_data;
2045 int err, i, nr_fans = (sio_data->type == f71882fg) ? 4 : 3; 2081 int nr_fans = f71882fg_nr_fans[sio_data->type];
2046 int nr_temps = (sio_data->type == f71808e) ? 2 : 3; 2082 int nr_temps = f71882fg_nr_temps[sio_data->type];
2083 int err, i;
2047 u8 start_reg, reg; 2084 u8 start_reg, reg;
2048 2085
2049 data = kzalloc(sizeof(struct f71882fg_data), GFP_KERNEL); 2086 data = kzalloc(sizeof(struct f71882fg_data), GFP_KERNEL);
@@ -2138,6 +2175,7 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
2138 /* Fall through to select correct fan/pwm reg bank! */ 2175 /* Fall through to select correct fan/pwm reg bank! */
2139 case f71889fg: 2176 case f71889fg:
2140 case f71889ed: 2177 case f71889ed:
2178 case f71889a:
2141 reg = f71882fg_read8(data, F71882FG_REG_FAN_FAULT_T); 2179 reg = f71882fg_read8(data, F71882FG_REG_FAN_FAULT_T);
2142 if (reg & F71882FG_FAN_NEG_TEMP_EN) 2180 if (reg & F71882FG_FAN_NEG_TEMP_EN)
2143 data->auto_point_temp_signed = 1; 2181 data->auto_point_temp_signed = 1;
@@ -2163,16 +2201,12 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
2163 case f71862fg: 2201 case f71862fg:
2164 err = (data->pwm_enable & 0x15) != 0x15; 2202 err = (data->pwm_enable & 0x15) != 0x15;
2165 break; 2203 break;
2166 case f71808e:
2167 case f71869:
2168 case f71882fg:
2169 case f71889fg:
2170 case f71889ed:
2171 err = 0;
2172 break;
2173 case f8000: 2204 case f8000:
2174 err = data->pwm_enable & 0x20; 2205 err = data->pwm_enable & 0x20;
2175 break; 2206 break;
2207 default:
2208 err = 0;
2209 break;
2176 } 2210 }
2177 if (err) { 2211 if (err) {
2178 dev_err(&pdev->dev, 2212 dev_err(&pdev->dev,
@@ -2199,6 +2233,7 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
2199 case f71869: 2233 case f71869:
2200 case f71889fg: 2234 case f71889fg:
2201 case f71889ed: 2235 case f71889ed:
2236 case f71889a:
2202 for (i = 0; i < nr_fans; i++) { 2237 for (i = 0; i < nr_fans; i++) {
2203 data->pwm_auto_point_mapping[i] = 2238 data->pwm_auto_point_mapping[i] =
2204 f71882fg_read8(data, 2239 f71882fg_read8(data,
@@ -2276,8 +2311,9 @@ exit_free:
2276static int f71882fg_remove(struct platform_device *pdev) 2311static int f71882fg_remove(struct platform_device *pdev)
2277{ 2312{
2278 struct f71882fg_data *data = platform_get_drvdata(pdev); 2313 struct f71882fg_data *data = platform_get_drvdata(pdev);
2279 int i, nr_fans = (data->type == f71882fg) ? 4 : 3; 2314 int nr_fans = f71882fg_nr_fans[data->type];
2280 int nr_temps = (data->type == f71808e) ? 2 : 3; 2315 int nr_temps = f71882fg_nr_temps[data->type];
2316 int i;
2281 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START); 2317 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
2282 2318
2283 if (data->hwmon_dev) 2319 if (data->hwmon_dev)
@@ -2406,9 +2442,15 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address,
2406 case SIO_F71889E_ID: 2442 case SIO_F71889E_ID:
2407 sio_data->type = f71889ed; 2443 sio_data->type = f71889ed;
2408 break; 2444 break;
2445 case SIO_F71889A_ID:
2446 sio_data->type = f71889a;
2447 break;
2409 case SIO_F8000_ID: 2448 case SIO_F8000_ID:
2410 sio_data->type = f8000; 2449 sio_data->type = f8000;
2411 break; 2450 break;
2451 case SIO_F81865_ID:
2452 sio_data->type = f81865f;
2453 break;
2412 default: 2454 default:
2413 pr_info("Unsupported Fintek device: %04x\n", 2455 pr_info("Unsupported Fintek device: %04x\n",
2414 (unsigned int)devid); 2456 (unsigned int)devid);
diff --git a/drivers/hwmon/pmbus_core.c b/drivers/hwmon/pmbus_core.c
index 6474512f49b0..edfb92e41735 100644
--- a/drivers/hwmon/pmbus_core.c
+++ b/drivers/hwmon/pmbus_core.c
@@ -752,7 +752,7 @@ static void pmbus_add_boolean_cmp(struct pmbus_data *data,
752static void pmbus_add_sensor(struct pmbus_data *data, 752static void pmbus_add_sensor(struct pmbus_data *data,
753 const char *name, const char *type, int seq, 753 const char *name, const char *type, int seq,
754 int page, int reg, enum pmbus_sensor_classes class, 754 int page, int reg, enum pmbus_sensor_classes class,
755 bool update) 755 bool update, bool readonly)
756{ 756{
757 struct pmbus_sensor *sensor; 757 struct pmbus_sensor *sensor;
758 758
@@ -765,7 +765,7 @@ static void pmbus_add_sensor(struct pmbus_data *data,
765 sensor->reg = reg; 765 sensor->reg = reg;
766 sensor->class = class; 766 sensor->class = class;
767 sensor->update = update; 767 sensor->update = update;
768 if (update) 768 if (readonly)
769 PMBUS_ADD_GET_ATTR(data, sensor->name, sensor, 769 PMBUS_ADD_GET_ATTR(data, sensor->name, sensor,
770 data->num_sensors); 770 data->num_sensors);
771 else 771 else
@@ -916,14 +916,14 @@ static void pmbus_find_attributes(struct i2c_client *client,
916 916
917 i0 = data->num_sensors; 917 i0 = data->num_sensors;
918 pmbus_add_label(data, "in", in_index, "vin", 0); 918 pmbus_add_label(data, "in", in_index, "vin", 0);
919 pmbus_add_sensor(data, "in", "input", in_index, 919 pmbus_add_sensor(data, "in", "input", in_index, 0,
920 0, PMBUS_READ_VIN, PSC_VOLTAGE_IN, true); 920 PMBUS_READ_VIN, PSC_VOLTAGE_IN, true, true);
921 if (pmbus_check_word_register(client, 0, 921 if (pmbus_check_word_register(client, 0,
922 PMBUS_VIN_UV_WARN_LIMIT)) { 922 PMBUS_VIN_UV_WARN_LIMIT)) {
923 i1 = data->num_sensors; 923 i1 = data->num_sensors;
924 pmbus_add_sensor(data, "in", "min", in_index, 924 pmbus_add_sensor(data, "in", "min", in_index,
925 0, PMBUS_VIN_UV_WARN_LIMIT, 925 0, PMBUS_VIN_UV_WARN_LIMIT,
926 PSC_VOLTAGE_IN, false); 926 PSC_VOLTAGE_IN, false, false);
927 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) { 927 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
928 pmbus_add_boolean_reg(data, "in", "min_alarm", 928 pmbus_add_boolean_reg(data, "in", "min_alarm",
929 in_index, 929 in_index,
@@ -937,7 +937,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
937 i1 = data->num_sensors; 937 i1 = data->num_sensors;
938 pmbus_add_sensor(data, "in", "lcrit", in_index, 938 pmbus_add_sensor(data, "in", "lcrit", in_index,
939 0, PMBUS_VIN_UV_FAULT_LIMIT, 939 0, PMBUS_VIN_UV_FAULT_LIMIT,
940 PSC_VOLTAGE_IN, false); 940 PSC_VOLTAGE_IN, false, false);
941 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) { 941 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
942 pmbus_add_boolean_reg(data, "in", "lcrit_alarm", 942 pmbus_add_boolean_reg(data, "in", "lcrit_alarm",
943 in_index, 943 in_index,
@@ -951,7 +951,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
951 i1 = data->num_sensors; 951 i1 = data->num_sensors;
952 pmbus_add_sensor(data, "in", "max", in_index, 952 pmbus_add_sensor(data, "in", "max", in_index,
953 0, PMBUS_VIN_OV_WARN_LIMIT, 953 0, PMBUS_VIN_OV_WARN_LIMIT,
954 PSC_VOLTAGE_IN, false); 954 PSC_VOLTAGE_IN, false, false);
955 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) { 955 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
956 pmbus_add_boolean_reg(data, "in", "max_alarm", 956 pmbus_add_boolean_reg(data, "in", "max_alarm",
957 in_index, 957 in_index,
@@ -965,7 +965,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
965 i1 = data->num_sensors; 965 i1 = data->num_sensors;
966 pmbus_add_sensor(data, "in", "crit", in_index, 966 pmbus_add_sensor(data, "in", "crit", in_index,
967 0, PMBUS_VIN_OV_FAULT_LIMIT, 967 0, PMBUS_VIN_OV_FAULT_LIMIT,
968 PSC_VOLTAGE_IN, false); 968 PSC_VOLTAGE_IN, false, false);
969 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) { 969 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
970 pmbus_add_boolean_reg(data, "in", "crit_alarm", 970 pmbus_add_boolean_reg(data, "in", "crit_alarm",
971 in_index, 971 in_index,
@@ -988,7 +988,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
988 if (info->func[0] & PMBUS_HAVE_VCAP) { 988 if (info->func[0] & PMBUS_HAVE_VCAP) {
989 pmbus_add_label(data, "in", in_index, "vcap", 0); 989 pmbus_add_label(data, "in", in_index, "vcap", 0);
990 pmbus_add_sensor(data, "in", "input", in_index, 0, 990 pmbus_add_sensor(data, "in", "input", in_index, 0,
991 PMBUS_READ_VCAP, PSC_VOLTAGE_IN, true); 991 PMBUS_READ_VCAP, PSC_VOLTAGE_IN, true, true);
992 in_index++; 992 in_index++;
993 } 993 }
994 994
@@ -1004,13 +1004,13 @@ static void pmbus_find_attributes(struct i2c_client *client,
1004 i0 = data->num_sensors; 1004 i0 = data->num_sensors;
1005 pmbus_add_label(data, "in", in_index, "vout", page + 1); 1005 pmbus_add_label(data, "in", in_index, "vout", page + 1);
1006 pmbus_add_sensor(data, "in", "input", in_index, page, 1006 pmbus_add_sensor(data, "in", "input", in_index, page,
1007 PMBUS_READ_VOUT, PSC_VOLTAGE_OUT, true); 1007 PMBUS_READ_VOUT, PSC_VOLTAGE_OUT, true, true);
1008 if (pmbus_check_word_register(client, page, 1008 if (pmbus_check_word_register(client, page,
1009 PMBUS_VOUT_UV_WARN_LIMIT)) { 1009 PMBUS_VOUT_UV_WARN_LIMIT)) {
1010 i1 = data->num_sensors; 1010 i1 = data->num_sensors;
1011 pmbus_add_sensor(data, "in", "min", in_index, page, 1011 pmbus_add_sensor(data, "in", "min", in_index, page,
1012 PMBUS_VOUT_UV_WARN_LIMIT, 1012 PMBUS_VOUT_UV_WARN_LIMIT,
1013 PSC_VOLTAGE_OUT, false); 1013 PSC_VOLTAGE_OUT, false, false);
1014 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) { 1014 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
1015 pmbus_add_boolean_reg(data, "in", "min_alarm", 1015 pmbus_add_boolean_reg(data, "in", "min_alarm",
1016 in_index, 1016 in_index,
@@ -1025,7 +1025,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1025 i1 = data->num_sensors; 1025 i1 = data->num_sensors;
1026 pmbus_add_sensor(data, "in", "lcrit", in_index, page, 1026 pmbus_add_sensor(data, "in", "lcrit", in_index, page,
1027 PMBUS_VOUT_UV_FAULT_LIMIT, 1027 PMBUS_VOUT_UV_FAULT_LIMIT,
1028 PSC_VOLTAGE_OUT, false); 1028 PSC_VOLTAGE_OUT, false, false);
1029 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) { 1029 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
1030 pmbus_add_boolean_reg(data, "in", "lcrit_alarm", 1030 pmbus_add_boolean_reg(data, "in", "lcrit_alarm",
1031 in_index, 1031 in_index,
@@ -1040,7 +1040,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1040 i1 = data->num_sensors; 1040 i1 = data->num_sensors;
1041 pmbus_add_sensor(data, "in", "max", in_index, page, 1041 pmbus_add_sensor(data, "in", "max", in_index, page,
1042 PMBUS_VOUT_OV_WARN_LIMIT, 1042 PMBUS_VOUT_OV_WARN_LIMIT,
1043 PSC_VOLTAGE_OUT, false); 1043 PSC_VOLTAGE_OUT, false, false);
1044 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) { 1044 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
1045 pmbus_add_boolean_reg(data, "in", "max_alarm", 1045 pmbus_add_boolean_reg(data, "in", "max_alarm",
1046 in_index, 1046 in_index,
@@ -1055,7 +1055,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1055 i1 = data->num_sensors; 1055 i1 = data->num_sensors;
1056 pmbus_add_sensor(data, "in", "crit", in_index, page, 1056 pmbus_add_sensor(data, "in", "crit", in_index, page,
1057 PMBUS_VOUT_OV_FAULT_LIMIT, 1057 PMBUS_VOUT_OV_FAULT_LIMIT,
1058 PSC_VOLTAGE_OUT, false); 1058 PSC_VOLTAGE_OUT, false, false);
1059 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) { 1059 if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
1060 pmbus_add_boolean_reg(data, "in", "crit_alarm", 1060 pmbus_add_boolean_reg(data, "in", "crit_alarm",
1061 in_index, 1061 in_index,
@@ -1088,14 +1088,14 @@ static void pmbus_find_attributes(struct i2c_client *client,
1088 if (info->func[0] & PMBUS_HAVE_IIN) { 1088 if (info->func[0] & PMBUS_HAVE_IIN) {
1089 i0 = data->num_sensors; 1089 i0 = data->num_sensors;
1090 pmbus_add_label(data, "curr", in_index, "iin", 0); 1090 pmbus_add_label(data, "curr", in_index, "iin", 0);
1091 pmbus_add_sensor(data, "curr", "input", in_index, 1091 pmbus_add_sensor(data, "curr", "input", in_index, 0,
1092 0, PMBUS_READ_IIN, PSC_CURRENT_IN, true); 1092 PMBUS_READ_IIN, PSC_CURRENT_IN, true, true);
1093 if (pmbus_check_word_register(client, 0, 1093 if (pmbus_check_word_register(client, 0,
1094 PMBUS_IIN_OC_WARN_LIMIT)) { 1094 PMBUS_IIN_OC_WARN_LIMIT)) {
1095 i1 = data->num_sensors; 1095 i1 = data->num_sensors;
1096 pmbus_add_sensor(data, "curr", "max", in_index, 1096 pmbus_add_sensor(data, "curr", "max", in_index,
1097 0, PMBUS_IIN_OC_WARN_LIMIT, 1097 0, PMBUS_IIN_OC_WARN_LIMIT,
1098 PSC_CURRENT_IN, false); 1098 PSC_CURRENT_IN, false, false);
1099 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) { 1099 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
1100 pmbus_add_boolean_reg(data, "curr", "max_alarm", 1100 pmbus_add_boolean_reg(data, "curr", "max_alarm",
1101 in_index, 1101 in_index,
@@ -1108,7 +1108,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1108 i1 = data->num_sensors; 1108 i1 = data->num_sensors;
1109 pmbus_add_sensor(data, "curr", "crit", in_index, 1109 pmbus_add_sensor(data, "curr", "crit", in_index,
1110 0, PMBUS_IIN_OC_FAULT_LIMIT, 1110 0, PMBUS_IIN_OC_FAULT_LIMIT,
1111 PSC_CURRENT_IN, false); 1111 PSC_CURRENT_IN, false, false);
1112 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) 1112 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT)
1113 pmbus_add_boolean_reg(data, "curr", 1113 pmbus_add_boolean_reg(data, "curr",
1114 "crit_alarm", 1114 "crit_alarm",
@@ -1131,13 +1131,13 @@ static void pmbus_find_attributes(struct i2c_client *client,
1131 i0 = data->num_sensors; 1131 i0 = data->num_sensors;
1132 pmbus_add_label(data, "curr", in_index, "iout", page + 1); 1132 pmbus_add_label(data, "curr", in_index, "iout", page + 1);
1133 pmbus_add_sensor(data, "curr", "input", in_index, page, 1133 pmbus_add_sensor(data, "curr", "input", in_index, page,
1134 PMBUS_READ_IOUT, PSC_CURRENT_OUT, true); 1134 PMBUS_READ_IOUT, PSC_CURRENT_OUT, true, true);
1135 if (pmbus_check_word_register(client, page, 1135 if (pmbus_check_word_register(client, page,
1136 PMBUS_IOUT_OC_WARN_LIMIT)) { 1136 PMBUS_IOUT_OC_WARN_LIMIT)) {
1137 i1 = data->num_sensors; 1137 i1 = data->num_sensors;
1138 pmbus_add_sensor(data, "curr", "max", in_index, page, 1138 pmbus_add_sensor(data, "curr", "max", in_index, page,
1139 PMBUS_IOUT_OC_WARN_LIMIT, 1139 PMBUS_IOUT_OC_WARN_LIMIT,
1140 PSC_CURRENT_OUT, false); 1140 PSC_CURRENT_OUT, false, false);
1141 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) { 1141 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
1142 pmbus_add_boolean_reg(data, "curr", "max_alarm", 1142 pmbus_add_boolean_reg(data, "curr", "max_alarm",
1143 in_index, 1143 in_index,
@@ -1151,7 +1151,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1151 i1 = data->num_sensors; 1151 i1 = data->num_sensors;
1152 pmbus_add_sensor(data, "curr", "lcrit", in_index, page, 1152 pmbus_add_sensor(data, "curr", "lcrit", in_index, page,
1153 PMBUS_IOUT_UC_FAULT_LIMIT, 1153 PMBUS_IOUT_UC_FAULT_LIMIT,
1154 PSC_CURRENT_OUT, false); 1154 PSC_CURRENT_OUT, false, false);
1155 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) { 1155 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
1156 pmbus_add_boolean_reg(data, "curr", 1156 pmbus_add_boolean_reg(data, "curr",
1157 "lcrit_alarm", 1157 "lcrit_alarm",
@@ -1166,7 +1166,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1166 i1 = data->num_sensors; 1166 i1 = data->num_sensors;
1167 pmbus_add_sensor(data, "curr", "crit", in_index, page, 1167 pmbus_add_sensor(data, "curr", "crit", in_index, page,
1168 PMBUS_IOUT_OC_FAULT_LIMIT, 1168 PMBUS_IOUT_OC_FAULT_LIMIT,
1169 PSC_CURRENT_OUT, false); 1169 PSC_CURRENT_OUT, false, false);
1170 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) { 1170 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
1171 pmbus_add_boolean_reg(data, "curr", 1171 pmbus_add_boolean_reg(data, "curr",
1172 "crit_alarm", 1172 "crit_alarm",
@@ -1199,13 +1199,13 @@ static void pmbus_find_attributes(struct i2c_client *client,
1199 i0 = data->num_sensors; 1199 i0 = data->num_sensors;
1200 pmbus_add_label(data, "power", in_index, "pin", 0); 1200 pmbus_add_label(data, "power", in_index, "pin", 0);
1201 pmbus_add_sensor(data, "power", "input", in_index, 1201 pmbus_add_sensor(data, "power", "input", in_index,
1202 0, PMBUS_READ_PIN, PSC_POWER, true); 1202 0, PMBUS_READ_PIN, PSC_POWER, true, true);
1203 if (pmbus_check_word_register(client, 0, 1203 if (pmbus_check_word_register(client, 0,
1204 PMBUS_PIN_OP_WARN_LIMIT)) { 1204 PMBUS_PIN_OP_WARN_LIMIT)) {
1205 i1 = data->num_sensors; 1205 i1 = data->num_sensors;
1206 pmbus_add_sensor(data, "power", "max", in_index, 1206 pmbus_add_sensor(data, "power", "max", in_index,
1207 0, PMBUS_PIN_OP_WARN_LIMIT, PSC_POWER, 1207 0, PMBUS_PIN_OP_WARN_LIMIT, PSC_POWER,
1208 false); 1208 false, false);
1209 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) 1209 if (info->func[0] & PMBUS_HAVE_STATUS_INPUT)
1210 pmbus_add_boolean_reg(data, "power", 1210 pmbus_add_boolean_reg(data, "power",
1211 "alarm", 1211 "alarm",
@@ -1228,7 +1228,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1228 i0 = data->num_sensors; 1228 i0 = data->num_sensors;
1229 pmbus_add_label(data, "power", in_index, "pout", page + 1); 1229 pmbus_add_label(data, "power", in_index, "pout", page + 1);
1230 pmbus_add_sensor(data, "power", "input", in_index, page, 1230 pmbus_add_sensor(data, "power", "input", in_index, page,
1231 PMBUS_READ_POUT, PSC_POWER, true); 1231 PMBUS_READ_POUT, PSC_POWER, true, true);
1232 /* 1232 /*
1233 * Per hwmon sysfs API, power_cap is to be used to limit output 1233 * Per hwmon sysfs API, power_cap is to be used to limit output
1234 * power. 1234 * power.
@@ -1241,7 +1241,8 @@ static void pmbus_find_attributes(struct i2c_client *client,
1241 if (pmbus_check_word_register(client, page, PMBUS_POUT_MAX)) { 1241 if (pmbus_check_word_register(client, page, PMBUS_POUT_MAX)) {
1242 i1 = data->num_sensors; 1242 i1 = data->num_sensors;
1243 pmbus_add_sensor(data, "power", "cap", in_index, page, 1243 pmbus_add_sensor(data, "power", "cap", in_index, page,
1244 PMBUS_POUT_MAX, PSC_POWER, false); 1244 PMBUS_POUT_MAX, PSC_POWER,
1245 false, false);
1245 need_alarm = true; 1246 need_alarm = true;
1246 } 1247 }
1247 if (pmbus_check_word_register(client, page, 1248 if (pmbus_check_word_register(client, page,
@@ -1249,7 +1250,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1249 i1 = data->num_sensors; 1250 i1 = data->num_sensors;
1250 pmbus_add_sensor(data, "power", "max", in_index, page, 1251 pmbus_add_sensor(data, "power", "max", in_index, page,
1251 PMBUS_POUT_OP_WARN_LIMIT, PSC_POWER, 1252 PMBUS_POUT_OP_WARN_LIMIT, PSC_POWER,
1252 false); 1253 false, false);
1253 need_alarm = true; 1254 need_alarm = true;
1254 } 1255 }
1255 if (need_alarm && (info->func[page] & PMBUS_HAVE_STATUS_IOUT)) 1256 if (need_alarm && (info->func[page] & PMBUS_HAVE_STATUS_IOUT))
@@ -1264,7 +1265,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1264 i1 = data->num_sensors; 1265 i1 = data->num_sensors;
1265 pmbus_add_sensor(data, "power", "crit", in_index, page, 1266 pmbus_add_sensor(data, "power", "crit", in_index, page,
1266 PMBUS_POUT_OP_FAULT_LIMIT, PSC_POWER, 1267 PMBUS_POUT_OP_FAULT_LIMIT, PSC_POWER,
1267 false); 1268 false, false);
1268 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) 1269 if (info->func[page] & PMBUS_HAVE_STATUS_IOUT)
1269 pmbus_add_boolean_reg(data, "power", 1270 pmbus_add_boolean_reg(data, "power",
1270 "crit_alarm", 1271 "crit_alarm",
@@ -1302,7 +1303,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1302 i0 = data->num_sensors; 1303 i0 = data->num_sensors;
1303 pmbus_add_sensor(data, "temp", "input", in_index, page, 1304 pmbus_add_sensor(data, "temp", "input", in_index, page,
1304 pmbus_temp_registers[t], 1305 pmbus_temp_registers[t],
1305 PSC_TEMPERATURE, true); 1306 PSC_TEMPERATURE, true, true);
1306 1307
1307 /* 1308 /*
1308 * PMBus provides only one status register for TEMP1-3. 1309 * PMBus provides only one status register for TEMP1-3.
@@ -1323,7 +1324,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1323 i1 = data->num_sensors; 1324 i1 = data->num_sensors;
1324 pmbus_add_sensor(data, "temp", "min", in_index, 1325 pmbus_add_sensor(data, "temp", "min", in_index,
1325 page, PMBUS_UT_WARN_LIMIT, 1326 page, PMBUS_UT_WARN_LIMIT,
1326 PSC_TEMPERATURE, true); 1327 PSC_TEMPERATURE, true, false);
1327 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) { 1328 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
1328 pmbus_add_boolean_cmp(data, "temp", 1329 pmbus_add_boolean_cmp(data, "temp",
1329 "min_alarm", in_index, i1, i0, 1330 "min_alarm", in_index, i1, i0,
@@ -1338,7 +1339,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1338 pmbus_add_sensor(data, "temp", "lcrit", 1339 pmbus_add_sensor(data, "temp", "lcrit",
1339 in_index, page, 1340 in_index, page,
1340 PMBUS_UT_FAULT_LIMIT, 1341 PMBUS_UT_FAULT_LIMIT,
1341 PSC_TEMPERATURE, true); 1342 PSC_TEMPERATURE, true, false);
1342 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) { 1343 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
1343 pmbus_add_boolean_cmp(data, "temp", 1344 pmbus_add_boolean_cmp(data, "temp",
1344 "lcrit_alarm", in_index, i1, i0, 1345 "lcrit_alarm", in_index, i1, i0,
@@ -1352,7 +1353,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1352 i1 = data->num_sensors; 1353 i1 = data->num_sensors;
1353 pmbus_add_sensor(data, "temp", "max", in_index, 1354 pmbus_add_sensor(data, "temp", "max", in_index,
1354 page, PMBUS_OT_WARN_LIMIT, 1355 page, PMBUS_OT_WARN_LIMIT,
1355 PSC_TEMPERATURE, true); 1356 PSC_TEMPERATURE, true, false);
1356 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) { 1357 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
1357 pmbus_add_boolean_cmp(data, "temp", 1358 pmbus_add_boolean_cmp(data, "temp",
1358 "max_alarm", in_index, i0, i1, 1359 "max_alarm", in_index, i0, i1,
@@ -1366,7 +1367,7 @@ static void pmbus_find_attributes(struct i2c_client *client,
1366 i1 = data->num_sensors; 1367 i1 = data->num_sensors;
1367 pmbus_add_sensor(data, "temp", "crit", in_index, 1368 pmbus_add_sensor(data, "temp", "crit", in_index,
1368 page, PMBUS_OT_FAULT_LIMIT, 1369 page, PMBUS_OT_FAULT_LIMIT,
1369 PSC_TEMPERATURE, true); 1370 PSC_TEMPERATURE, true, false);
1370 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) { 1371 if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
1371 pmbus_add_boolean_cmp(data, "temp", 1372 pmbus_add_boolean_cmp(data, "temp",
1372 "crit_alarm", in_index, i0, i1, 1373 "crit_alarm", in_index, i0, i1,
@@ -1421,7 +1422,8 @@ static void pmbus_find_attributes(struct i2c_client *client,
1421 1422
1422 i0 = data->num_sensors; 1423 i0 = data->num_sensors;
1423 pmbus_add_sensor(data, "fan", "input", in_index, page, 1424 pmbus_add_sensor(data, "fan", "input", in_index, page,
1424 pmbus_fan_registers[f], PSC_FAN, true); 1425 pmbus_fan_registers[f], PSC_FAN, true,
1426 true);
1425 1427
1426 /* 1428 /*
1427 * Each fan status register covers multiple fans, 1429 * Each fan status register covers multiple fans,
diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index eb4af28f8567..1f29bab6b3e5 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig
@@ -4,6 +4,7 @@
4 4
5config HWSPINLOCK 5config HWSPINLOCK
6 tristate "Generic Hardware Spinlock framework" 6 tristate "Generic Hardware Spinlock framework"
7 depends on ARCH_OMAP4
7 help 8 help
8 Say y here to support the generic hardware spinlock framework. 9 Say y here to support the generic hardware spinlock framework.
9 You only need to enable this if you have hardware spinlock module 10 You only need to enable this if you have hardware spinlock module
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index f4077840d3ab..0e406d73b2c8 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -440,6 +440,7 @@ void do_ide_request(struct request_queue *q)
440 struct ide_host *host = hwif->host; 440 struct ide_host *host = hwif->host;
441 struct request *rq = NULL; 441 struct request *rq = NULL;
442 ide_startstop_t startstop; 442 ide_startstop_t startstop;
443 unsigned long queue_run_ms = 3; /* old plug delay */
443 444
444 spin_unlock_irq(q->queue_lock); 445 spin_unlock_irq(q->queue_lock);
445 446
@@ -459,6 +460,9 @@ repeat:
459 prev_port = hwif->host->cur_port; 460 prev_port = hwif->host->cur_port;
460 if (drive->dev_flags & IDE_DFLAG_SLEEPING && 461 if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
461 time_after(drive->sleep, jiffies)) { 462 time_after(drive->sleep, jiffies)) {
463 unsigned long left = jiffies - drive->sleep;
464
465 queue_run_ms = jiffies_to_msecs(left + 1);
462 ide_unlock_port(hwif); 466 ide_unlock_port(hwif);
463 goto plug_device; 467 goto plug_device;
464 } 468 }
@@ -547,8 +551,10 @@ plug_device:
547plug_device_2: 551plug_device_2:
548 spin_lock_irq(q->queue_lock); 552 spin_lock_irq(q->queue_lock);
549 553
550 if (rq) 554 if (rq) {
551 blk_requeue_request(q, rq); 555 blk_requeue_request(q, rq);
556 blk_delay_queue(q, queue_run_ms);
557 }
552} 558}
553 559
554void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) 560void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
@@ -562,6 +568,10 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
562 blk_requeue_request(q, rq); 568 blk_requeue_request(q, rq);
563 569
564 spin_unlock_irqrestore(q->queue_lock, flags); 570 spin_unlock_irqrestore(q->queue_lock, flags);
571
572 /* Use 3ms as that was the old plug delay */
573 if (rq)
574 blk_delay_queue(q, 3);
565} 575}
566 576
567static int drive_is_ready(ide_drive_t *drive) 577static int drive_is_ready(ide_drive_t *drive)
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 9c511c1604a5..011cb6ce861b 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -416,7 +416,6 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
416 : chip->companion; 416 : chip->companion;
417 unsigned char status_buf[INT_STATUS_NUM]; 417 unsigned char status_buf[INT_STATUS_NUM];
418 unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; 418 unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
419 struct irq_desc *desc;
420 int i, data, mask, ret = -EINVAL; 419 int i, data, mask, ret = -EINVAL;
421 int __irq; 420 int __irq;
422 421
@@ -468,19 +467,17 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
468 if (!chip->core_irq) 467 if (!chip->core_irq)
469 goto out; 468 goto out;
470 469
471 desc = irq_to_desc(chip->core_irq);
472
473 /* register IRQ by genirq */ 470 /* register IRQ by genirq */
474 for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) { 471 for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
475 __irq = i + chip->irq_base; 472 __irq = i + chip->irq_base;
476 set_irq_chip_data(__irq, chip); 473 irq_set_chip_data(__irq, chip);
477 set_irq_chip_and_handler(__irq, &pm860x_irq_chip, 474 irq_set_chip_and_handler(__irq, &pm860x_irq_chip,
478 handle_edge_irq); 475 handle_edge_irq);
479 set_irq_nested_thread(__irq, 1); 476 irq_set_nested_thread(__irq, 1);
480#ifdef CONFIG_ARM 477#ifdef CONFIG_ARM
481 set_irq_flags(__irq, IRQF_VALID); 478 set_irq_flags(__irq, IRQF_VALID);
482#else 479#else
483 set_irq_noprobe(__irq); 480 irq_set_noprobe(__irq);
484#endif 481#endif
485 } 482 }
486 483
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a9a1af49281e..e986f91fff9c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -133,6 +133,7 @@ config TPS6105X
133 tristate "TPS61050/61052 Boost Converters" 133 tristate "TPS61050/61052 Boost Converters"
134 depends on I2C 134 depends on I2C
135 select REGULATOR 135 select REGULATOR
136 select MFD_CORE
136 select REGULATOR_FIXED_VOLTAGE 137 select REGULATOR_FIXED_VOLTAGE
137 help 138 help
138 This option enables a driver for the TP61050/TPS61052 139 This option enables a driver for the TP61050/TPS61052
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 47f5709f3828..ef489f253402 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -63,7 +63,7 @@ obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o
63obj-$(CONFIG_PMIC_DA903X) += da903x.o 63obj-$(CONFIG_PMIC_DA903X) += da903x.o
64max8925-objs := max8925-core.o max8925-i2c.o 64max8925-objs := max8925-core.o max8925-i2c.o
65obj-$(CONFIG_MFD_MAX8925) += max8925.o 65obj-$(CONFIG_MFD_MAX8925) += max8925.o
66obj-$(CONFIG_MFD_MAX8997) += max8997.o 66obj-$(CONFIG_MFD_MAX8997) += max8997.o max8997-irq.o
67obj-$(CONFIG_MFD_MAX8998) += max8998.o max8998-irq.o 67obj-$(CONFIG_MFD_MAX8998) += max8998.o max8998-irq.o
68 68
69pcf50633-objs := pcf50633-core.o pcf50633-irq.o 69pcf50633-objs := pcf50633-core.o pcf50633-irq.o
diff --git a/drivers/mfd/ab3550-core.c b/drivers/mfd/ab3550-core.c
index c12d04285226..ff86acf3e6bd 100644
--- a/drivers/mfd/ab3550-core.c
+++ b/drivers/mfd/ab3550-core.c
@@ -668,7 +668,7 @@ static int ab3550_startup_irq_enabled(struct device *dev, unsigned int irq)
668 struct ab3550_platform_data *plf_data; 668 struct ab3550_platform_data *plf_data;
669 bool val; 669 bool val;
670 670
671 ab = get_irq_chip_data(irq); 671 ab = irq_get_chip_data(irq);
672 plf_data = ab->i2c_client[0]->dev.platform_data; 672 plf_data = ab->i2c_client[0]->dev.platform_data;
673 irq -= plf_data->irq.base; 673 irq -= plf_data->irq.base;
674 val = ((ab->startup_events[irq / 8] & BIT(irq % 8)) != 0); 674 val = ((ab->startup_events[irq / 8] & BIT(irq % 8)) != 0);
@@ -1296,14 +1296,14 @@ static int __init ab3550_probe(struct i2c_client *client,
1296 unsigned int irq; 1296 unsigned int irq;
1297 1297
1298 irq = ab3550_plf_data->irq.base + i; 1298 irq = ab3550_plf_data->irq.base + i;
1299 set_irq_chip_data(irq, ab); 1299 irq_set_chip_data(irq, ab);
1300 set_irq_chip_and_handler(irq, &ab3550_irq_chip, 1300 irq_set_chip_and_handler(irq, &ab3550_irq_chip,
1301 handle_simple_irq); 1301 handle_simple_irq);
1302 set_irq_nested_thread(irq, 1); 1302 irq_set_nested_thread(irq, 1);
1303#ifdef CONFIG_ARM 1303#ifdef CONFIG_ARM
1304 set_irq_flags(irq, IRQF_VALID); 1304 set_irq_flags(irq, IRQF_VALID);
1305#else 1305#else
1306 set_irq_noprobe(irq); 1306 irq_set_noprobe(irq);
1307#endif 1307#endif
1308 } 1308 }
1309 1309
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 6e185b272d00..62e33e2258d4 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -334,14 +334,14 @@ static int ab8500_irq_init(struct ab8500 *ab8500)
334 int irq; 334 int irq;
335 335
336 for (irq = base; irq < base + AB8500_NR_IRQS; irq++) { 336 for (irq = base; irq < base + AB8500_NR_IRQS; irq++) {
337 set_irq_chip_data(irq, ab8500); 337 irq_set_chip_data(irq, ab8500);
338 set_irq_chip_and_handler(irq, &ab8500_irq_chip, 338 irq_set_chip_and_handler(irq, &ab8500_irq_chip,
339 handle_simple_irq); 339 handle_simple_irq);
340 set_irq_nested_thread(irq, 1); 340 irq_set_nested_thread(irq, 1);
341#ifdef CONFIG_ARM 341#ifdef CONFIG_ARM
342 set_irq_flags(irq, IRQF_VALID); 342 set_irq_flags(irq, IRQF_VALID);
343#else 343#else
344 set_irq_noprobe(irq); 344 irq_set_noprobe(irq);
345#endif 345#endif
346 } 346 }
347 347
@@ -357,8 +357,8 @@ static void ab8500_irq_remove(struct ab8500 *ab8500)
357#ifdef CONFIG_ARM 357#ifdef CONFIG_ARM
358 set_irq_flags(irq, 0); 358 set_irq_flags(irq, 0);
359#endif 359#endif
360 set_irq_chip_and_handler(irq, NULL, NULL); 360 irq_set_chip_and_handler(irq, NULL, NULL);
361 set_irq_chip_data(irq, NULL); 361 irq_set_chip_data(irq, NULL);
362 } 362 }
363} 363}
364 364
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 0241f08fc00d..d4a851c6b5bf 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -139,13 +139,12 @@ static void asic3_irq_flip_edge(struct asic3 *asic,
139 139
140static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc) 140static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
141{ 141{
142 struct asic3 *asic = irq_desc_get_handler_data(desc);
143 struct irq_data *data = irq_desc_get_irq_data(desc);
142 int iter, i; 144 int iter, i;
143 unsigned long flags; 145 unsigned long flags;
144 struct asic3 *asic;
145
146 desc->irq_data.chip->irq_ack(&desc->irq_data);
147 146
148 asic = get_irq_data(irq); 147 data->chip->irq_ack(irq_data);
149 148
150 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) { 149 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
151 u32 status; 150 u32 status;
@@ -188,8 +187,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
188 irqnr = asic->irq_base + 187 irqnr = asic->irq_base +
189 (ASIC3_GPIOS_PER_BANK * bank) 188 (ASIC3_GPIOS_PER_BANK * bank)
190 + i; 189 + i;
191 desc = irq_to_desc(irqnr); 190 generic_handle_irq(irqnr);
192 desc->handle_irq(irqnr, desc);
193 if (asic->irq_bothedge[bank] & bit) 191 if (asic->irq_bothedge[bank] & bit)
194 asic3_irq_flip_edge(asic, base, 192 asic3_irq_flip_edge(asic, base,
195 bit); 193 bit);
@@ -200,11 +198,8 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
200 /* Handle remaining IRQs in the status register */ 198 /* Handle remaining IRQs in the status register */
201 for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) { 199 for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
202 /* They start at bit 4 and go up */ 200 /* They start at bit 4 and go up */
203 if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) { 201 if (status & (1 << (i - ASIC3_NUM_GPIOS + 4)))
204 desc = irq_to_desc(asic->irq_base + i); 202 generic_handle_irq(asic->irq_base + i);
205 desc->handle_irq(asic->irq_base + i,
206 desc);
207 }
208 } 203 }
209 } 204 }
210 205
@@ -393,21 +388,21 @@ static int __init asic3_irq_probe(struct platform_device *pdev)
393 388
394 for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) { 389 for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
395 if (irq < asic->irq_base + ASIC3_NUM_GPIOS) 390 if (irq < asic->irq_base + ASIC3_NUM_GPIOS)
396 set_irq_chip(irq, &asic3_gpio_irq_chip); 391 irq_set_chip(irq, &asic3_gpio_irq_chip);
397 else 392 else
398 set_irq_chip(irq, &asic3_irq_chip); 393 irq_set_chip(irq, &asic3_irq_chip);
399 394
400 set_irq_chip_data(irq, asic); 395 irq_set_chip_data(irq, asic);
401 set_irq_handler(irq, handle_level_irq); 396 irq_set_handler(irq, handle_level_irq);
402 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); 397 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
403 } 398 }
404 399
405 asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK), 400 asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK),
406 ASIC3_INTMASK_GINTMASK); 401 ASIC3_INTMASK_GINTMASK);
407 402
408 set_irq_chained_handler(asic->irq_nr, asic3_irq_demux); 403 irq_set_chained_handler(asic->irq_nr, asic3_irq_demux);
409 set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING); 404 irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
410 set_irq_data(asic->irq_nr, asic); 405 irq_set_handler_data(asic->irq_nr, asic);
411 406
412 return 0; 407 return 0;
413} 408}
@@ -421,11 +416,10 @@ static void asic3_irq_remove(struct platform_device *pdev)
421 416
422 for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) { 417 for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
423 set_irq_flags(irq, 0); 418 set_irq_flags(irq, 0);
424 set_irq_handler(irq, NULL); 419 irq_set_chip_and_handler(irq, NULL, NULL);
425 set_irq_chip(irq, NULL); 420 irq_set_chip_data(irq, NULL);
426 set_irq_chip_data(irq, NULL);
427 } 421 }
428 set_irq_chained_handler(asic->irq_nr, NULL); 422 irq_set_chained_handler(asic->irq_nr, NULL);
429} 423}
430 424
431/* GPIOs */ 425/* GPIOs */
diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 886a06871065..155fa0407882 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c
@@ -27,6 +27,7 @@
27#include <linux/mfd/core.h> 27#include <linux/mfd/core.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <asm/olpc.h>
30 31
31#define DRV_NAME "cs5535-mfd" 32#define DRV_NAME "cs5535-mfd"
32 33
@@ -111,6 +112,20 @@ static __devinitdata struct mfd_cell cs5535_mfd_cells[] = {
111 }, 112 },
112}; 113};
113 114
115#ifdef CONFIG_OLPC
116static void __devinit cs5535_clone_olpc_cells(void)
117{
118 const char *acpi_clones[] = { "olpc-xo1-pm-acpi", "olpc-xo1-sci-acpi" };
119
120 if (!machine_is_olpc())
121 return;
122
123 mfd_clone_cell("cs5535-acpi", acpi_clones, ARRAY_SIZE(acpi_clones));
124}
125#else
126static void cs5535_clone_olpc_cells(void) { }
127#endif
128
114static int __devinit cs5535_mfd_probe(struct pci_dev *pdev, 129static int __devinit cs5535_mfd_probe(struct pci_dev *pdev,
115 const struct pci_device_id *id) 130 const struct pci_device_id *id)
116{ 131{
@@ -139,6 +154,7 @@ static int __devinit cs5535_mfd_probe(struct pci_dev *pdev,
139 dev_err(&pdev->dev, "MFD add devices failed: %d\n", err); 154 dev_err(&pdev->dev, "MFD add devices failed: %d\n", err);
140 goto err_disable; 155 goto err_disable;
141 } 156 }
157 cs5535_clone_olpc_cells();
142 158
143 dev_info(&pdev->dev, "%zu devices registered.\n", 159 dev_info(&pdev->dev, "%zu devices registered.\n",
144 ARRAY_SIZE(cs5535_mfd_cells)); 160 ARRAY_SIZE(cs5535_mfd_cells));
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 9e2d8dd5f9e5..f2f4029e21a0 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -162,6 +162,7 @@ static void pcap_unmask_irq(struct irq_data *d)
162 162
163static struct irq_chip pcap_irq_chip = { 163static struct irq_chip pcap_irq_chip = {
164 .name = "pcap", 164 .name = "pcap",
165 .irq_disable = pcap_mask_irq,
165 .irq_mask = pcap_mask_irq, 166 .irq_mask = pcap_mask_irq,
166 .irq_unmask = pcap_unmask_irq, 167 .irq_unmask = pcap_unmask_irq,
167}; 168};
@@ -196,17 +197,8 @@ static void pcap_isr_work(struct work_struct *work)
196 local_irq_disable(); 197 local_irq_disable();
197 service = isr & ~msr; 198 service = isr & ~msr;
198 for (irq = pcap->irq_base; service; service >>= 1, irq++) { 199 for (irq = pcap->irq_base; service; service >>= 1, irq++) {
199 if (service & 1) { 200 if (service & 1)
200 struct irq_desc *desc = irq_to_desc(irq); 201 generic_handle_irq(irq);
201
202 if (WARN(!desc, "Invalid PCAP IRQ %d\n", irq))
203 break;
204
205 if (desc->status & IRQ_DISABLED)
206 note_interrupt(irq, desc, IRQ_NONE);
207 else
208 desc->handle_irq(irq, desc);
209 }
210 } 202 }
211 local_irq_enable(); 203 local_irq_enable();
212 ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr); 204 ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
@@ -215,7 +207,7 @@ static void pcap_isr_work(struct work_struct *work)
215 207
216static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc) 208static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
217{ 209{
218 struct pcap_chip *pcap = get_irq_data(irq); 210 struct pcap_chip *pcap = irq_get_handler_data(irq);
219 211
220 desc->irq_data.chip->irq_ack(&desc->irq_data); 212 desc->irq_data.chip->irq_ack(&desc->irq_data);
221 queue_work(pcap->workqueue, &pcap->isr_work); 213 queue_work(pcap->workqueue, &pcap->isr_work);
@@ -419,7 +411,7 @@ static int __devexit ezx_pcap_remove(struct spi_device *spi)
419 411
420 /* cleanup irqchip */ 412 /* cleanup irqchip */
421 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) 413 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
422 set_irq_chip_and_handler(i, NULL, NULL); 414 irq_set_chip_and_handler(i, NULL, NULL);
423 415
424 destroy_workqueue(pcap->workqueue); 416 destroy_workqueue(pcap->workqueue);
425 417
@@ -476,12 +468,12 @@ static int __devinit ezx_pcap_probe(struct spi_device *spi)
476 468
477 /* setup irq chip */ 469 /* setup irq chip */
478 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) { 470 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) {
479 set_irq_chip_and_handler(i, &pcap_irq_chip, handle_simple_irq); 471 irq_set_chip_and_handler(i, &pcap_irq_chip, handle_simple_irq);
480 set_irq_chip_data(i, pcap); 472 irq_set_chip_data(i, pcap);
481#ifdef CONFIG_ARM 473#ifdef CONFIG_ARM
482 set_irq_flags(i, IRQF_VALID); 474 set_irq_flags(i, IRQF_VALID);
483#else 475#else
484 set_irq_noprobe(i); 476 irq_set_noprobe(i);
485#endif 477#endif
486 } 478 }
487 479
@@ -490,10 +482,10 @@ static int __devinit ezx_pcap_probe(struct spi_device *spi)
490 ezx_pcap_write(pcap, PCAP_REG_ISR, PCAP_CLEAR_INTERRUPT_REGISTER); 482 ezx_pcap_write(pcap, PCAP_REG_ISR, PCAP_CLEAR_INTERRUPT_REGISTER);
491 pcap->msr = PCAP_MASK_ALL_INTERRUPT; 483 pcap->msr = PCAP_MASK_ALL_INTERRUPT;
492 484
493 set_irq_type(spi->irq, IRQ_TYPE_EDGE_RISING); 485 irq_set_irq_type(spi->irq, IRQ_TYPE_EDGE_RISING);
494 set_irq_data(spi->irq, pcap); 486 irq_set_handler_data(spi->irq, pcap);
495 set_irq_chained_handler(spi->irq, pcap_irq_handler); 487 irq_set_chained_handler(spi->irq, pcap_irq_handler);
496 set_irq_wake(spi->irq, 1); 488 irq_set_irq_wake(spi->irq, 1);
497 489
498 /* ADC */ 490 /* ADC */
499 adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ? 491 adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
@@ -522,7 +514,7 @@ remove_subdevs:
522 free_irq(adc_irq, pcap); 514 free_irq(adc_irq, pcap);
523free_irqchip: 515free_irqchip:
524 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) 516 for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
525 set_irq_chip_and_handler(i, NULL, NULL); 517 irq_set_chip_and_handler(i, NULL, NULL);
526/* destroy_workqueue: */ 518/* destroy_workqueue: */
527 destroy_workqueue(pcap->workqueue); 519 destroy_workqueue(pcap->workqueue);
528free_pcap: 520free_pcap:
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index d00b6d1a69e5..bbaec0ccba8f 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -100,7 +100,7 @@ static struct irq_chip egpio_muxed_chip = {
100 100
101static void egpio_handler(unsigned int irq, struct irq_desc *desc) 101static void egpio_handler(unsigned int irq, struct irq_desc *desc)
102{ 102{
103 struct egpio_info *ei = get_irq_data(irq); 103 struct egpio_info *ei = irq_desc_get_handler_data(desc);
104 int irqpin; 104 int irqpin;
105 105
106 /* Read current pins. */ 106 /* Read current pins. */
@@ -113,9 +113,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
113 for_each_set_bit(irqpin, &readval, ei->nirqs) { 113 for_each_set_bit(irqpin, &readval, ei->nirqs) {
114 /* Run irq handler */ 114 /* Run irq handler */
115 pr_debug("got IRQ %d\n", irqpin); 115 pr_debug("got IRQ %d\n", irqpin);
116 irq = ei->irq_start + irqpin; 116 generic_handle_irq(ei->irq_start + irqpin);
117 desc = irq_to_desc(irq);
118 desc->handle_irq(irq, desc);
119 } 117 }
120} 118}
121 119
@@ -346,14 +344,14 @@ static int __init egpio_probe(struct platform_device *pdev)
346 ei->ack_write = 0; 344 ei->ack_write = 0;
347 irq_end = ei->irq_start + ei->nirqs; 345 irq_end = ei->irq_start + ei->nirqs;
348 for (irq = ei->irq_start; irq < irq_end; irq++) { 346 for (irq = ei->irq_start; irq < irq_end; irq++) {
349 set_irq_chip(irq, &egpio_muxed_chip); 347 irq_set_chip_and_handler(irq, &egpio_muxed_chip,
350 set_irq_chip_data(irq, ei); 348 handle_simple_irq);
351 set_irq_handler(irq, handle_simple_irq); 349 irq_set_chip_data(irq, ei);
352 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); 350 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
353 } 351 }
354 set_irq_type(ei->chained_irq, IRQ_TYPE_EDGE_RISING); 352 irq_set_irq_type(ei->chained_irq, IRQ_TYPE_EDGE_RISING);
355 set_irq_data(ei->chained_irq, ei); 353 irq_set_handler_data(ei->chained_irq, ei);
356 set_irq_chained_handler(ei->chained_irq, egpio_handler); 354 irq_set_chained_handler(ei->chained_irq, egpio_handler);
357 ack_irqs(ei); 355 ack_irqs(ei);
358 356
359 device_init_wakeup(&pdev->dev, 1); 357 device_init_wakeup(&pdev->dev, 1);
@@ -375,11 +373,10 @@ static int __exit egpio_remove(struct platform_device *pdev)
375 if (ei->chained_irq) { 373 if (ei->chained_irq) {
376 irq_end = ei->irq_start + ei->nirqs; 374 irq_end = ei->irq_start + ei->nirqs;
377 for (irq = ei->irq_start; irq < irq_end; irq++) { 375 for (irq = ei->irq_start; irq < irq_end; irq++) {
378 set_irq_chip(irq, NULL); 376 irq_set_chip_and_handler(irq, NULL, NULL);
379 set_irq_handler(irq, NULL);
380 set_irq_flags(irq, 0); 377 set_irq_flags(irq, 0);
381 } 378 }
382 set_irq_chained_handler(ei->chained_irq, NULL); 379 irq_set_chained_handler(ei->chained_irq, NULL);
383 device_init_wakeup(&pdev->dev, 0); 380 device_init_wakeup(&pdev->dev, 0);
384 } 381 }
385 iounmap(ei->base_addr); 382 iounmap(ei->base_addr);
diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c
index 296ad1562f69..d55065cc324c 100644
--- a/drivers/mfd/htc-i2cpld.c
+++ b/drivers/mfd/htc-i2cpld.c
@@ -58,6 +58,7 @@ struct htcpld_chip {
58 uint irq_start; 58 uint irq_start;
59 int nirqs; 59 int nirqs;
60 60
61 unsigned int flow_type;
61 /* 62 /*
62 * Work structure to allow for setting values outside of any 63 * Work structure to allow for setting values outside of any
63 * possible interrupt context 64 * possible interrupt context
@@ -97,12 +98,7 @@ static void htcpld_unmask(struct irq_data *data)
97 98
98static int htcpld_set_type(struct irq_data *data, unsigned int flags) 99static int htcpld_set_type(struct irq_data *data, unsigned int flags)
99{ 100{
100 struct irq_desc *d = irq_to_desc(data->irq); 101 struct htcpld_chip *chip = irq_data_get_irq_chip_data(data);
101
102 if (!d) {
103 pr_err("HTCPLD invalid IRQ: %d\n", data->irq);
104 return -EINVAL;
105 }
106 102
107 if (flags & ~IRQ_TYPE_SENSE_MASK) 103 if (flags & ~IRQ_TYPE_SENSE_MASK)
108 return -EINVAL; 104 return -EINVAL;
@@ -111,9 +107,7 @@ static int htcpld_set_type(struct irq_data *data, unsigned int flags)
111 if (flags & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)) 107 if (flags & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))
112 return -EINVAL; 108 return -EINVAL;
113 109
114 d->status &= ~IRQ_TYPE_SENSE_MASK; 110 chip->flow_type = flags;
115 d->status |= flags;
116
117 return 0; 111 return 0;
118} 112}
119 113
@@ -135,7 +129,6 @@ static irqreturn_t htcpld_handler(int irq, void *dev)
135 unsigned int i; 129 unsigned int i;
136 unsigned long flags; 130 unsigned long flags;
137 int irqpin; 131 int irqpin;
138 struct irq_desc *desc;
139 132
140 if (!htcpld) { 133 if (!htcpld) {
141 pr_debug("htcpld is null in ISR\n"); 134 pr_debug("htcpld is null in ISR\n");
@@ -195,23 +188,19 @@ static irqreturn_t htcpld_handler(int irq, void *dev)
195 * associated interrupts. 188 * associated interrupts.
196 */ 189 */
197 for (irqpin = 0; irqpin < chip->nirqs; irqpin++) { 190 for (irqpin = 0; irqpin < chip->nirqs; irqpin++) {
198 unsigned oldb, newb; 191 unsigned oldb, newb, type = chip->flow_type;
199 int flags;
200 192
201 irq = chip->irq_start + irqpin; 193 irq = chip->irq_start + irqpin;
202 desc = irq_to_desc(irq);
203 flags = desc->status;
204 194
205 /* Run the IRQ handler, but only if the bit value 195 /* Run the IRQ handler, but only if the bit value
206 * changed, and the proper flags are set */ 196 * changed, and the proper flags are set */
207 oldb = (old_val >> irqpin) & 1; 197 oldb = (old_val >> irqpin) & 1;
208 newb = (uval >> irqpin) & 1; 198 newb = (uval >> irqpin) & 1;
209 199
210 if ((!oldb && newb && (flags & IRQ_TYPE_EDGE_RISING)) || 200 if ((!oldb && newb && (type & IRQ_TYPE_EDGE_RISING)) ||
211 (oldb && !newb && 201 (oldb && !newb && (type & IRQ_TYPE_EDGE_FALLING))) {
212 (flags & IRQ_TYPE_EDGE_FALLING))) {
213 pr_debug("fire IRQ %d\n", irqpin); 202 pr_debug("fire IRQ %d\n", irqpin);
214 desc->handle_irq(irq, desc); 203 generic_handle_irq(irq);
215 } 204 }
216 } 205 }
217 } 206 }
@@ -359,13 +348,13 @@ static int __devinit htcpld_setup_chip_irq(
359 /* Setup irq handlers */ 348 /* Setup irq handlers */
360 irq_end = chip->irq_start + chip->nirqs; 349 irq_end = chip->irq_start + chip->nirqs;
361 for (irq = chip->irq_start; irq < irq_end; irq++) { 350 for (irq = chip->irq_start; irq < irq_end; irq++) {
362 set_irq_chip(irq, &htcpld_muxed_chip); 351 irq_set_chip_and_handler(irq, &htcpld_muxed_chip,
363 set_irq_chip_data(irq, chip); 352 handle_simple_irq);
364 set_irq_handler(irq, handle_simple_irq); 353 irq_set_chip_data(irq, chip);
365#ifdef CONFIG_ARM 354#ifdef CONFIG_ARM
366 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); 355 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
367#else 356#else
368 set_irq_probe(irq); 357 irq_set_probe(irq);
369#endif 358#endif
370 } 359 }
371 360
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index aa518b9beaf5..a0bd0cf05af3 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -112,7 +112,7 @@ static struct irq_chip jz4740_adc_irq_chip = {
112 112
113static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc) 113static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc)
114{ 114{
115 struct jz4740_adc *adc = get_irq_desc_data(desc); 115 struct jz4740_adc *adc = irq_desc_get_handler_data(desc);
116 uint8_t status; 116 uint8_t status;
117 unsigned int i; 117 unsigned int i;
118 118
@@ -310,13 +310,13 @@ static int __devinit jz4740_adc_probe(struct platform_device *pdev)
310 platform_set_drvdata(pdev, adc); 310 platform_set_drvdata(pdev, adc);
311 311
312 for (irq = adc->irq_base; irq < adc->irq_base + 5; ++irq) { 312 for (irq = adc->irq_base; irq < adc->irq_base + 5; ++irq) {
313 set_irq_chip_data(irq, adc); 313 irq_set_chip_data(irq, adc);
314 set_irq_chip_and_handler(irq, &jz4740_adc_irq_chip, 314 irq_set_chip_and_handler(irq, &jz4740_adc_irq_chip,
315 handle_level_irq); 315 handle_level_irq);
316 } 316 }
317 317
318 set_irq_data(adc->irq, adc); 318 irq_set_handler_data(adc->irq, adc);
319 set_irq_chained_handler(adc->irq, jz4740_adc_irq_demux); 319 irq_set_chained_handler(adc->irq, jz4740_adc_irq_demux);
320 320
321 writeb(0x00, adc->base + JZ_REG_ADC_ENABLE); 321 writeb(0x00, adc->base + JZ_REG_ADC_ENABLE);
322 writeb(0xff, adc->base + JZ_REG_ADC_CTRL); 322 writeb(0xff, adc->base + JZ_REG_ADC_CTRL);
@@ -347,8 +347,8 @@ static int __devexit jz4740_adc_remove(struct platform_device *pdev)
347 347
348 mfd_remove_devices(&pdev->dev); 348 mfd_remove_devices(&pdev->dev);
349 349
350 set_irq_data(adc->irq, NULL); 350 irq_set_handler_data(adc->irq, NULL);
351 set_irq_chained_handler(adc->irq, NULL); 351 irq_set_chained_handler(adc->irq, NULL);
352 352
353 iounmap(adc->base); 353 iounmap(adc->base);
354 release_mem_region(adc->mem->start, resource_size(adc->mem)); 354 release_mem_region(adc->mem->start, resource_size(adc->mem));
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 0e998dc4e7d8..58cc5fdde016 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -517,7 +517,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
517 struct max8925_platform_data *pdata) 517 struct max8925_platform_data *pdata)
518{ 518{
519 unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; 519 unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
520 struct irq_desc *desc;
521 int i, ret; 520 int i, ret;
522 int __irq; 521 int __irq;
523 522
@@ -544,19 +543,18 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
544 mutex_init(&chip->irq_lock); 543 mutex_init(&chip->irq_lock);
545 chip->core_irq = irq; 544 chip->core_irq = irq;
546 chip->irq_base = pdata->irq_base; 545 chip->irq_base = pdata->irq_base;
547 desc = irq_to_desc(chip->core_irq);
548 546
549 /* register with genirq */ 547 /* register with genirq */
550 for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) { 548 for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
551 __irq = i + chip->irq_base; 549 __irq = i + chip->irq_base;
552 set_irq_chip_data(__irq, chip); 550 irq_set_chip_data(__irq, chip);
553 set_irq_chip_and_handler(__irq, &max8925_irq_chip, 551 irq_set_chip_and_handler(__irq, &max8925_irq_chip,
554 handle_edge_irq); 552 handle_edge_irq);
555 set_irq_nested_thread(__irq, 1); 553 irq_set_nested_thread(__irq, 1);
556#ifdef CONFIG_ARM 554#ifdef CONFIG_ARM
557 set_irq_flags(__irq, IRQF_VALID); 555 set_irq_flags(__irq, IRQF_VALID);
558#else 556#else
559 set_irq_noprobe(__irq); 557 irq_set_noprobe(__irq);
560#endif 558#endif
561 } 559 }
562 if (!irq) { 560 if (!irq) {
diff --git a/drivers/mfd/max8997-irq.c b/drivers/mfd/max8997-irq.c
new file mode 100644
index 000000000000..638bf7e4d3b3
--- /dev/null
+++ b/drivers/mfd/max8997-irq.c
@@ -0,0 +1,377 @@
1/*
2 * max8997-irq.c - Interrupt controller support for MAX8997
3 *
4 * Copyright (C) 2011 Samsung Electronics Co.Ltd
5 * MyungJoo Ham <myungjoo.ham@samsung.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * This driver is based on max8998-irq.c
22 */
23
24#include <linux/err.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/mfd/max8997.h>
28#include <linux/mfd/max8997-private.h>
29
30static const u8 max8997_mask_reg[] = {
31 [PMIC_INT1] = MAX8997_REG_INT1MSK,
32 [PMIC_INT2] = MAX8997_REG_INT2MSK,
33 [PMIC_INT3] = MAX8997_REG_INT3MSK,
34 [PMIC_INT4] = MAX8997_REG_INT4MSK,
35 [FUEL_GAUGE] = MAX8997_REG_INVALID,
36 [MUIC_INT1] = MAX8997_MUIC_REG_INTMASK1,
37 [MUIC_INT2] = MAX8997_MUIC_REG_INTMASK2,
38 [MUIC_INT3] = MAX8997_MUIC_REG_INTMASK3,
39 [GPIO_LOW] = MAX8997_REG_INVALID,
40 [GPIO_HI] = MAX8997_REG_INVALID,
41 [FLASH_STATUS] = MAX8997_REG_INVALID,
42};
43
44static struct i2c_client *get_i2c(struct max8997_dev *max8997,
45 enum max8997_irq_source src)
46{
47 switch (src) {
48 case PMIC_INT1 ... PMIC_INT4:
49 return max8997->i2c;
50 case FUEL_GAUGE:
51 return NULL;
52 case MUIC_INT1 ... MUIC_INT3:
53 return max8997->muic;
54 case GPIO_LOW ... GPIO_HI:
55 return max8997->i2c;
56 case FLASH_STATUS:
57 return max8997->i2c;
58 default:
59 return ERR_PTR(-EINVAL);
60 }
61
62 return ERR_PTR(-EINVAL);
63}
64
65struct max8997_irq_data {
66 int mask;
67 enum max8997_irq_source group;
68};
69
70#define DECLARE_IRQ(idx, _group, _mask) \
71 [(idx)] = { .group = (_group), .mask = (_mask) }
72static const struct max8997_irq_data max8997_irqs[] = {
73 DECLARE_IRQ(MAX8997_PMICIRQ_PWRONR, PMIC_INT1, 1 << 0),
74 DECLARE_IRQ(MAX8997_PMICIRQ_PWRONF, PMIC_INT1, 1 << 1),
75 DECLARE_IRQ(MAX8997_PMICIRQ_PWRON1SEC, PMIC_INT1, 1 << 3),
76 DECLARE_IRQ(MAX8997_PMICIRQ_JIGONR, PMIC_INT1, 1 << 4),
77 DECLARE_IRQ(MAX8997_PMICIRQ_JIGONF, PMIC_INT1, 1 << 5),
78 DECLARE_IRQ(MAX8997_PMICIRQ_LOWBAT2, PMIC_INT1, 1 << 6),
79 DECLARE_IRQ(MAX8997_PMICIRQ_LOWBAT1, PMIC_INT1, 1 << 7),
80
81 DECLARE_IRQ(MAX8997_PMICIRQ_JIGR, PMIC_INT2, 1 << 0),
82 DECLARE_IRQ(MAX8997_PMICIRQ_JIGF, PMIC_INT2, 1 << 1),
83 DECLARE_IRQ(MAX8997_PMICIRQ_MR, PMIC_INT2, 1 << 2),
84 DECLARE_IRQ(MAX8997_PMICIRQ_DVS1OK, PMIC_INT2, 1 << 3),
85 DECLARE_IRQ(MAX8997_PMICIRQ_DVS2OK, PMIC_INT2, 1 << 4),
86 DECLARE_IRQ(MAX8997_PMICIRQ_DVS3OK, PMIC_INT2, 1 << 5),
87 DECLARE_IRQ(MAX8997_PMICIRQ_DVS4OK, PMIC_INT2, 1 << 6),
88
89 DECLARE_IRQ(MAX8997_PMICIRQ_CHGINS, PMIC_INT3, 1 << 0),
90 DECLARE_IRQ(MAX8997_PMICIRQ_CHGRM, PMIC_INT3, 1 << 1),
91 DECLARE_IRQ(MAX8997_PMICIRQ_DCINOVP, PMIC_INT3, 1 << 2),
92 DECLARE_IRQ(MAX8997_PMICIRQ_TOPOFFR, PMIC_INT3, 1 << 3),
93 DECLARE_IRQ(MAX8997_PMICIRQ_CHGRSTF, PMIC_INT3, 1 << 5),
94 DECLARE_IRQ(MAX8997_PMICIRQ_MBCHGTMEXPD, PMIC_INT3, 1 << 7),
95
96 DECLARE_IRQ(MAX8997_PMICIRQ_RTC60S, PMIC_INT4, 1 << 0),
97 DECLARE_IRQ(MAX8997_PMICIRQ_RTCA1, PMIC_INT4, 1 << 1),
98 DECLARE_IRQ(MAX8997_PMICIRQ_RTCA2, PMIC_INT4, 1 << 2),
99 DECLARE_IRQ(MAX8997_PMICIRQ_SMPL_INT, PMIC_INT4, 1 << 3),
100 DECLARE_IRQ(MAX8997_PMICIRQ_RTC1S, PMIC_INT4, 1 << 4),
101 DECLARE_IRQ(MAX8997_PMICIRQ_WTSR, PMIC_INT4, 1 << 5),
102
103 DECLARE_IRQ(MAX8997_MUICIRQ_ADCError, MUIC_INT1, 1 << 2),
104 DECLARE_IRQ(MAX8997_MUICIRQ_ADCLow, MUIC_INT1, 1 << 1),
105 DECLARE_IRQ(MAX8997_MUICIRQ_ADC, MUIC_INT1, 1 << 0),
106
107 DECLARE_IRQ(MAX8997_MUICIRQ_VBVolt, MUIC_INT2, 1 << 4),
108 DECLARE_IRQ(MAX8997_MUICIRQ_DBChg, MUIC_INT2, 1 << 3),
109 DECLARE_IRQ(MAX8997_MUICIRQ_DCDTmr, MUIC_INT2, 1 << 2),
110 DECLARE_IRQ(MAX8997_MUICIRQ_ChgDetRun, MUIC_INT2, 1 << 1),
111 DECLARE_IRQ(MAX8997_MUICIRQ_ChgTyp, MUIC_INT2, 1 << 0),
112
113 DECLARE_IRQ(MAX8997_MUICIRQ_OVP, MUIC_INT3, 1 << 2),
114};
115
116static void max8997_irq_lock(struct irq_data *data)
117{
118 struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
119
120 mutex_lock(&max8997->irqlock);
121}
122
123static void max8997_irq_sync_unlock(struct irq_data *data)
124{
125 struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
126 int i;
127
128 for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++) {
129 u8 mask_reg = max8997_mask_reg[i];
130 struct i2c_client *i2c = get_i2c(max8997, i);
131
132 if (mask_reg == MAX8997_REG_INVALID ||
133 IS_ERR_OR_NULL(i2c))
134 continue;
135 max8997->irq_masks_cache[i] = max8997->irq_masks_cur[i];
136
137 max8997_write_reg(i2c, max8997_mask_reg[i],
138 max8997->irq_masks_cur[i]);
139 }
140
141 mutex_unlock(&max8997->irqlock);
142}
143
144static const inline struct max8997_irq_data *
145irq_to_max8997_irq(struct max8997_dev *max8997, int irq)
146{
147 return &max8997_irqs[irq - max8997->irq_base];
148}
149
150static void max8997_irq_mask(struct irq_data *data)
151{
152 struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
153 const struct max8997_irq_data *irq_data = irq_to_max8997_irq(max8997,
154 data->irq);
155
156 max8997->irq_masks_cur[irq_data->group] |= irq_data->mask;
157}
158
159static void max8997_irq_unmask(struct irq_data *data)
160{
161 struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
162 const struct max8997_irq_data *irq_data = irq_to_max8997_irq(max8997,
163 data->irq);
164
165 max8997->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
166}
167
168static struct irq_chip max8997_irq_chip = {
169 .name = "max8997",
170 .irq_bus_lock = max8997_irq_lock,
171 .irq_bus_sync_unlock = max8997_irq_sync_unlock,
172 .irq_mask = max8997_irq_mask,
173 .irq_unmask = max8997_irq_unmask,
174};
175
176#define MAX8997_IRQSRC_PMIC (1 << 1)
177#define MAX8997_IRQSRC_FUELGAUGE (1 << 2)
178#define MAX8997_IRQSRC_MUIC (1 << 3)
179#define MAX8997_IRQSRC_GPIO (1 << 4)
180#define MAX8997_IRQSRC_FLASH (1 << 5)
181static irqreturn_t max8997_irq_thread(int irq, void *data)
182{
183 struct max8997_dev *max8997 = data;
184 u8 irq_reg[MAX8997_IRQ_GROUP_NR] = {};
185 u8 irq_src;
186 int ret;
187 int i;
188
189 ret = max8997_read_reg(max8997->i2c, MAX8997_REG_INTSRC, &irq_src);
190 if (ret < 0) {
191 dev_err(max8997->dev, "Failed to read interrupt source: %d\n",
192 ret);
193 return IRQ_NONE;
194 }
195
196 if (irq_src & MAX8997_IRQSRC_PMIC) {
197 /* PMIC INT1 ~ INT4 */
198 max8997_bulk_read(max8997->i2c, MAX8997_REG_INT1, 4,
199 &irq_reg[PMIC_INT1]);
200 }
201 if (irq_src & MAX8997_IRQSRC_FUELGAUGE) {
202 /*
203 * TODO: FUEL GAUGE
204 *
205 * This is to be supported by Max17042 driver. When
206 * an interrupt incurs here, it should be relayed to a
207 * Max17042 device that is connected (probably by
208 * platform-data). However, we do not have interrupt
209 * handling in Max17042 driver currently. The Max17042 IRQ
210 * driver should be ready to be used as a stand-alone device and
211 * a Max8997-dependent device. Because it is not ready in
212 * Max17042-side and it is not too critical in operating
213 * Max8997, we do not implement this in initial releases.
214 */
215 irq_reg[FUEL_GAUGE] = 0;
216 }
217 if (irq_src & MAX8997_IRQSRC_MUIC) {
218 /* MUIC INT1 ~ INT3 */
219 max8997_bulk_read(max8997->muic, MAX8997_MUIC_REG_INT1, 3,
220 &irq_reg[MUIC_INT1]);
221 }
222 if (irq_src & MAX8997_IRQSRC_GPIO) {
223 /* GPIO Interrupt */
224 u8 gpio_info[MAX8997_NUM_GPIO];
225
226 irq_reg[GPIO_LOW] = 0;
227 irq_reg[GPIO_HI] = 0;
228
229 max8997_bulk_read(max8997->i2c, MAX8997_REG_GPIOCNTL1,
230 MAX8997_NUM_GPIO, gpio_info);
231 for (i = 0; i < MAX8997_NUM_GPIO; i++) {
232 bool interrupt = false;
233
234 switch (gpio_info[i] & MAX8997_GPIO_INT_MASK) {
235 case MAX8997_GPIO_INT_BOTH:
236 if (max8997->gpio_status[i] != gpio_info[i])
237 interrupt = true;
238 break;
239 case MAX8997_GPIO_INT_RISE:
240 if ((max8997->gpio_status[i] != gpio_info[i]) &&
241 (gpio_info[i] & MAX8997_GPIO_DATA_MASK))
242 interrupt = true;
243 break;
244 case MAX8997_GPIO_INT_FALL:
245 if ((max8997->gpio_status[i] != gpio_info[i]) &&
246 !(gpio_info[i] & MAX8997_GPIO_DATA_MASK))
247 interrupt = true;
248 break;
249 default:
250 break;
251 }
252
253 if (interrupt) {
254 if (i < 8)
255 irq_reg[GPIO_LOW] |= (1 << i);
256 else
257 irq_reg[GPIO_HI] |= (1 << (i - 8));
258 }
259
260 }
261 }
262 if (irq_src & MAX8997_IRQSRC_FLASH) {
263 /* Flash Status Interrupt */
264 ret = max8997_read_reg(max8997->i2c, MAX8997_REG_FLASHSTATUS,
265 &irq_reg[FLASH_STATUS]);
266 }
267
268 /* Apply masking */
269 for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++)
270 irq_reg[i] &= ~max8997->irq_masks_cur[i];
271
272 /* Report */
273 for (i = 0; i < MAX8997_IRQ_NR; i++) {
274 if (irq_reg[max8997_irqs[i].group] & max8997_irqs[i].mask)
275 handle_nested_irq(max8997->irq_base + i);
276 }
277
278 return IRQ_HANDLED;
279}
280
281int max8997_irq_resume(struct max8997_dev *max8997)
282{
283 if (max8997->irq && max8997->irq_base)
284 max8997_irq_thread(max8997->irq_base, max8997);
285 return 0;
286}
287
288int max8997_irq_init(struct max8997_dev *max8997)
289{
290 int i;
291 int cur_irq;
292 int ret;
293 u8 val;
294
295 if (!max8997->irq) {
296 dev_warn(max8997->dev, "No interrupt specified.\n");
297 max8997->irq_base = 0;
298 return 0;
299 }
300
301 if (!max8997->irq_base) {
302 dev_err(max8997->dev, "No interrupt base specified.\n");
303 return 0;
304 }
305
306 mutex_init(&max8997->irqlock);
307
308 /* Mask individual interrupt sources */
309 for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++) {
310 struct i2c_client *i2c;
311
312 max8997->irq_masks_cur[i] = 0xff;
313 max8997->irq_masks_cache[i] = 0xff;
314 i2c = get_i2c(max8997, i);
315
316 if (IS_ERR_OR_NULL(i2c))
317 continue;
318 if (max8997_mask_reg[i] == MAX8997_REG_INVALID)
319 continue;
320
321 max8997_write_reg(i2c, max8997_mask_reg[i], 0xff);
322 }
323
324 for (i = 0; i < MAX8997_NUM_GPIO; i++) {
325 max8997->gpio_status[i] = (max8997_read_reg(max8997->i2c,
326 MAX8997_REG_GPIOCNTL1 + i,
327 &val)
328 & MAX8997_GPIO_DATA_MASK) ?
329 true : false;
330 }
331
332 /* Register with genirq */
333 for (i = 0; i < MAX8997_IRQ_NR; i++) {
334 cur_irq = i + max8997->irq_base;
335 irq_set_chip_data(cur_irq, max8997);
336 irq_set_chip_and_handler(cur_irq, &max8997_irq_chip,
337 handle_edge_irq);
338 irq_set_nested_thread(cur_irq, 1);
339#ifdef CONFIG_ARM
340 set_irq_flags(cur_irq, IRQF_VALID);
341#else
342 irq_set_noprobe(cur_irq);
343#endif
344 }
345
346 ret = request_threaded_irq(max8997->irq, NULL, max8997_irq_thread,
347 IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
348 "max8997-irq", max8997);
349
350 if (ret) {
351 dev_err(max8997->dev, "Failed to request IRQ %d: %d\n",
352 max8997->irq, ret);
353 return ret;
354 }
355
356 if (!max8997->ono)
357 return 0;
358
359 ret = request_threaded_irq(max8997->ono, NULL, max8997_irq_thread,
360 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING |
361 IRQF_ONESHOT, "max8997-ono", max8997);
362
363 if (ret)
364 dev_err(max8997->dev, "Failed to request ono-IRQ %d: %d\n",
365 max8997->ono, ret);
366
367 return 0;
368}
369
370void max8997_irq_exit(struct max8997_dev *max8997)
371{
372 if (max8997->ono)
373 free_irq(max8997->ono, max8997);
374
375 if (max8997->irq)
376 free_irq(max8997->irq, max8997);
377}
diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c
index 3903e1fbb334..5919710dc9ed 100644
--- a/drivers/mfd/max8998-irq.c
+++ b/drivers/mfd/max8998-irq.c
@@ -224,14 +224,14 @@ int max8998_irq_init(struct max8998_dev *max8998)
224 /* register with genirq */ 224 /* register with genirq */
225 for (i = 0; i < MAX8998_IRQ_NR; i++) { 225 for (i = 0; i < MAX8998_IRQ_NR; i++) {
226 cur_irq = i + max8998->irq_base; 226 cur_irq = i + max8998->irq_base;
227 set_irq_chip_data(cur_irq, max8998); 227 irq_set_chip_data(cur_irq, max8998);
228 set_irq_chip_and_handler(cur_irq, &max8998_irq_chip, 228 irq_set_chip_and_handler(cur_irq, &max8998_irq_chip,
229 handle_edge_irq); 229 handle_edge_irq);
230 set_irq_nested_thread(cur_irq, 1); 230 irq_set_nested_thread(cur_irq, 1);
231#ifdef CONFIG_ARM 231#ifdef CONFIG_ARM
232 set_irq_flags(cur_irq, IRQF_VALID); 232 set_irq_flags(cur_irq, IRQF_VALID);
233#else 233#else
234 set_irq_noprobe(cur_irq); 234 irq_set_noprobe(cur_irq);
235#endif 235#endif
236 } 236 }
237 237
diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index c00214257da2..9ec7570f5b81 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -209,7 +209,7 @@ static int max8998_suspend(struct device *dev)
209 struct max8998_dev *max8998 = i2c_get_clientdata(i2c); 209 struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
210 210
211 if (max8998->wakeup) 211 if (max8998->wakeup)
212 set_irq_wake(max8998->irq, 1); 212 irq_set_irq_wake(max8998->irq, 1);
213 return 0; 213 return 0;
214} 214}
215 215
@@ -219,7 +219,7 @@ static int max8998_resume(struct device *dev)
219 struct max8998_dev *max8998 = i2c_get_clientdata(i2c); 219 struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
220 220
221 if (max8998->wakeup) 221 if (max8998->wakeup)
222 set_irq_wake(max8998->irq, 0); 222 irq_set_irq_wake(max8998->irq, 0);
223 /* 223 /*
224 * In LP3974, if IRQ registers are not "read & clear" 224 * In LP3974, if IRQ registers are not "read & clear"
225 * when it's set during sleep, the interrupt becomes 225 * when it's set during sleep, the interrupt becomes
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 79eda0264fb2..d01574d98870 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -184,16 +184,12 @@ void mfd_remove_devices(struct device *parent)
184} 184}
185EXPORT_SYMBOL(mfd_remove_devices); 185EXPORT_SYMBOL(mfd_remove_devices);
186 186
187static int add_shared_platform_device(const char *cell, const char *name) 187int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
188{ 188{
189 struct mfd_cell cell_entry; 189 struct mfd_cell cell_entry;
190 struct device *dev; 190 struct device *dev;
191 struct platform_device *pdev; 191 struct platform_device *pdev;
192 int err; 192 int i;
193
194 /* check if we've already registered a device (don't fail if we have) */
195 if (bus_find_device_by_name(&platform_bus_type, NULL, name))
196 return 0;
197 193
198 /* fetch the parent cell's device (should already be registered!) */ 194 /* fetch the parent cell's device (should already be registered!) */
199 dev = bus_find_device_by_name(&platform_bus_type, NULL, cell); 195 dev = bus_find_device_by_name(&platform_bus_type, NULL, cell);
@@ -206,44 +202,17 @@ static int add_shared_platform_device(const char *cell, const char *name)
206 202
207 WARN_ON(!cell_entry.enable); 203 WARN_ON(!cell_entry.enable);
208 204
209 cell_entry.name = name; 205 for (i = 0; i < n_clones; i++) {
210 err = mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0); 206 cell_entry.name = clones[i];
211 if (err) 207 /* don't give up if a single call fails; just report error */
212 dev_err(dev, "MFD add devices failed: %d\n", err); 208 if (mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0))
213 return err; 209 dev_err(dev, "failed to create platform device '%s'\n",
214} 210 clones[i]);
215 211 }
216int mfd_shared_platform_driver_register(struct platform_driver *drv,
217 const char *cellname)
218{
219 int err;
220
221 err = add_shared_platform_device(cellname, drv->driver.name);
222 if (err)
223 printk(KERN_ERR "failed to add platform device %s\n",
224 drv->driver.name);
225
226 err = platform_driver_register(drv);
227 if (err)
228 printk(KERN_ERR "failed to add platform driver %s\n",
229 drv->driver.name);
230
231 return err;
232}
233EXPORT_SYMBOL(mfd_shared_platform_driver_register);
234
235void mfd_shared_platform_driver_unregister(struct platform_driver *drv)
236{
237 struct device *dev;
238
239 dev = bus_find_device_by_name(&platform_bus_type, NULL,
240 drv->driver.name);
241 if (dev)
242 platform_device_unregister(to_platform_device(dev));
243 212
244 platform_driver_unregister(drv); 213 return 0;
245} 214}
246EXPORT_SYMBOL(mfd_shared_platform_driver_unregister); 215EXPORT_SYMBOL(mfd_clone_cell);
247 216
248MODULE_LICENSE("GPL"); 217MODULE_LICENSE("GPL");
249MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov"); 218MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov");
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index c1306ed43e3c..c7687f6a78a0 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -356,7 +356,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
356 return 0; 356 return 0;
357} 357}
358 358
359static struct i2c_device_id pcf50633_id_table[] = { 359static const struct i2c_device_id pcf50633_id_table[] = {
360 {"pcf50633", 0x73}, 360 {"pcf50633", 0x73},
361 {/* end of list */} 361 {/* end of list */}
362}; 362};
diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index 193c940225b5..10dbe6374a89 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -97,6 +97,7 @@ static DEFINE_PCI_DEVICE_TABLE(rdc321x_sb_table) = {
97 { PCI_DEVICE(PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030) }, 97 { PCI_DEVICE(PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030) },
98 {} 98 {}
99}; 99};
100MODULE_DEVICE_TABLE(pci, rdc321x_sb_table);
100 101
101static struct pci_driver rdc321x_sb_driver = { 102static struct pci_driver rdc321x_sb_driver = {
102 .name = "RDC321x Southbridge", 103 .name = "RDC321x Southbridge",
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 3e5732b58c49..7ab7746631d4 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -762,14 +762,14 @@ static int __devinit stmpe_irq_init(struct stmpe *stmpe)
762 int irq; 762 int irq;
763 763
764 for (irq = base; irq < base + num_irqs; irq++) { 764 for (irq = base; irq < base + num_irqs; irq++) {
765 set_irq_chip_data(irq, stmpe); 765 irq_set_chip_data(irq, stmpe);
766 set_irq_chip_and_handler(irq, &stmpe_irq_chip, 766 irq_set_chip_and_handler(irq, &stmpe_irq_chip,
767 handle_edge_irq); 767 handle_edge_irq);
768 set_irq_nested_thread(irq, 1); 768 irq_set_nested_thread(irq, 1);
769#ifdef CONFIG_ARM 769#ifdef CONFIG_ARM
770 set_irq_flags(irq, IRQF_VALID); 770 set_irq_flags(irq, IRQF_VALID);
771#else 771#else
772 set_irq_noprobe(irq); 772 irq_set_noprobe(irq);
773#endif 773#endif
774 } 774 }
775 775
@@ -786,8 +786,8 @@ static void stmpe_irq_remove(struct stmpe *stmpe)
786#ifdef CONFIG_ARM 786#ifdef CONFIG_ARM
787 set_irq_flags(irq, 0); 787 set_irq_flags(irq, 0);
788#endif 788#endif
789 set_irq_chip_and_handler(irq, NULL, NULL); 789 irq_set_chip_and_handler(irq, NULL, NULL);
790 set_irq_chip_data(irq, NULL); 790 irq_set_chip_data(irq, NULL);
791 } 791 }
792} 792}
793 793
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index af57fc706a4c..42830e692964 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -186,7 +186,7 @@ static struct mfd_cell t7l66xb_cells[] = {
186/* Handle the T7L66XB interrupt mux */ 186/* Handle the T7L66XB interrupt mux */
187static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc) 187static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc)
188{ 188{
189 struct t7l66xb *t7l66xb = get_irq_data(irq); 189 struct t7l66xb *t7l66xb = irq_get_handler_data(irq);
190 unsigned int isr; 190 unsigned int isr;
191 unsigned int i, irq_base; 191 unsigned int i, irq_base;
192 192
@@ -243,17 +243,16 @@ static void t7l66xb_attach_irq(struct platform_device *dev)
243 irq_base = t7l66xb->irq_base; 243 irq_base = t7l66xb->irq_base;
244 244
245 for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) { 245 for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) {
246 set_irq_chip(irq, &t7l66xb_chip); 246 irq_set_chip_and_handler(irq, &t7l66xb_chip, handle_level_irq);
247 set_irq_chip_data(irq, t7l66xb); 247 irq_set_chip_data(irq, t7l66xb);
248 set_irq_handler(irq, handle_level_irq);
249#ifdef CONFIG_ARM 248#ifdef CONFIG_ARM
250 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); 249 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
251#endif 250#endif
252 } 251 }
253 252
254 set_irq_type(t7l66xb->irq, IRQ_TYPE_EDGE_FALLING); 253 irq_set_irq_type(t7l66xb->irq, IRQ_TYPE_EDGE_FALLING);
255 set_irq_data(t7l66xb->irq, t7l66xb); 254 irq_set_handler_data(t7l66xb->irq, t7l66xb);
256 set_irq_chained_handler(t7l66xb->irq, t7l66xb_irq); 255 irq_set_chained_handler(t7l66xb->irq, t7l66xb_irq);
257} 256}
258 257
259static void t7l66xb_detach_irq(struct platform_device *dev) 258static void t7l66xb_detach_irq(struct platform_device *dev)
@@ -263,15 +262,15 @@ static void t7l66xb_detach_irq(struct platform_device *dev)
263 262
264 irq_base = t7l66xb->irq_base; 263 irq_base = t7l66xb->irq_base;
265 264
266 set_irq_chained_handler(t7l66xb->irq, NULL); 265 irq_set_chained_handler(t7l66xb->irq, NULL);
267 set_irq_data(t7l66xb->irq, NULL); 266 irq_set_handler_data(t7l66xb->irq, NULL);
268 267
269 for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) { 268 for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) {
270#ifdef CONFIG_ARM 269#ifdef CONFIG_ARM
271 set_irq_flags(irq, 0); 270 set_irq_flags(irq, 0);
272#endif 271#endif
273 set_irq_chip(irq, NULL); 272 irq_set_chip(irq, NULL);
274 set_irq_chip_data(irq, NULL); 273 irq_set_chip_data(irq, NULL);
275 } 274 }
276} 275}
277 276
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 729dbeed2ce0..c27e515b0722 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -192,14 +192,14 @@ static int tc3589x_irq_init(struct tc3589x *tc3589x)
192 int irq; 192 int irq;
193 193
194 for (irq = base; irq < base + TC3589x_NR_INTERNAL_IRQS; irq++) { 194 for (irq = base; irq < base + TC3589x_NR_INTERNAL_IRQS; irq++) {
195 set_irq_chip_data(irq, tc3589x); 195 irq_set_chip_data(irq, tc3589x);
196 set_irq_chip_and_handler(irq, &dummy_irq_chip, 196 irq_set_chip_and_handler(irq, &dummy_irq_chip,
197 handle_edge_irq); 197 handle_edge_irq);
198 set_irq_nested_thread(irq, 1); 198 irq_set_nested_thread(irq, 1);
199#ifdef CONFIG_ARM 199#ifdef CONFIG_ARM
200 set_irq_flags(irq, IRQF_VALID); 200 set_irq_flags(irq, IRQF_VALID);
201#else 201#else
202 set_irq_noprobe(irq); 202 irq_set_noprobe(irq);
203#endif 203#endif
204 } 204 }
205 205
@@ -215,8 +215,8 @@ static void tc3589x_irq_remove(struct tc3589x *tc3589x)
215#ifdef CONFIG_ARM 215#ifdef CONFIG_ARM
216 set_irq_flags(irq, 0); 216 set_irq_flags(irq, 0);
217#endif 217#endif
218 set_irq_chip_and_handler(irq, NULL, NULL); 218 irq_set_chip_and_handler(irq, NULL, NULL);
219 set_irq_chip_data(irq, NULL); 219 irq_set_chip_data(irq, NULL);
220 } 220 }
221} 221}
222 222
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 3d62ded86a8f..fc53ce287601 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -513,7 +513,7 @@ static int tc6393xb_register_gpio(struct tc6393xb *tc6393xb, int gpio_base)
513static void 513static void
514tc6393xb_irq(unsigned int irq, struct irq_desc *desc) 514tc6393xb_irq(unsigned int irq, struct irq_desc *desc)
515{ 515{
516 struct tc6393xb *tc6393xb = get_irq_data(irq); 516 struct tc6393xb *tc6393xb = irq_get_handler_data(irq);
517 unsigned int isr; 517 unsigned int isr;
518 unsigned int i, irq_base; 518 unsigned int i, irq_base;
519 519
@@ -572,15 +572,14 @@ static void tc6393xb_attach_irq(struct platform_device *dev)
572 irq_base = tc6393xb->irq_base; 572 irq_base = tc6393xb->irq_base;
573 573
574 for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) { 574 for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) {
575 set_irq_chip(irq, &tc6393xb_chip); 575 irq_set_chip_and_handler(irq, &tc6393xb_chip, handle_edge_irq);
576 set_irq_chip_data(irq, tc6393xb); 576 irq_set_chip_data(irq, tc6393xb);
577 set_irq_handler(irq, handle_edge_irq);
578 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); 577 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
579 } 578 }
580 579
581 set_irq_type(tc6393xb->irq, IRQ_TYPE_EDGE_FALLING); 580 irq_set_irq_type(tc6393xb->irq, IRQ_TYPE_EDGE_FALLING);
582 set_irq_data(tc6393xb->irq, tc6393xb); 581 irq_set_handler_data(tc6393xb->irq, tc6393xb);
583 set_irq_chained_handler(tc6393xb->irq, tc6393xb_irq); 582 irq_set_chained_handler(tc6393xb->irq, tc6393xb_irq);
584} 583}
585 584
586static void tc6393xb_detach_irq(struct platform_device *dev) 585static void tc6393xb_detach_irq(struct platform_device *dev)
@@ -588,15 +587,15 @@ static void tc6393xb_detach_irq(struct platform_device *dev)
588 struct tc6393xb *tc6393xb = platform_get_drvdata(dev); 587 struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
589 unsigned int irq, irq_base; 588 unsigned int irq, irq_base;
590 589
591 set_irq_chained_handler(tc6393xb->irq, NULL); 590 irq_set_chained_handler(tc6393xb->irq, NULL);
592 set_irq_data(tc6393xb->irq, NULL); 591 irq_set_handler_data(tc6393xb->irq, NULL);
593 592
594 irq_base = tc6393xb->irq_base; 593 irq_base = tc6393xb->irq_base;
595 594
596 for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) { 595 for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) {
597 set_irq_flags(irq, 0); 596 set_irq_flags(irq, 0);
598 set_irq_chip(irq, NULL); 597 irq_set_chip(irq, NULL);
599 set_irq_chip_data(irq, NULL); 598 irq_set_chip_data(irq, NULL);
600 } 599 }
601} 600}
602 601
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 0aa9186aec19..b600808690c1 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -422,10 +422,10 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
422 422
423 for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) { 423 for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
424 int __irq = i + tps6586x->irq_base; 424 int __irq = i + tps6586x->irq_base;
425 set_irq_chip_data(__irq, tps6586x); 425 irq_set_chip_data(__irq, tps6586x);
426 set_irq_chip_and_handler(__irq, &tps6586x->irq_chip, 426 irq_set_chip_and_handler(__irq, &tps6586x->irq_chip,
427 handle_simple_irq); 427 handle_simple_irq);
428 set_irq_nested_thread(__irq, 1); 428 irq_set_nested_thread(__irq, 1);
429#ifdef CONFIG_ARM 429#ifdef CONFIG_ARM
430 set_irq_flags(__irq, IRQF_VALID); 430 set_irq_flags(__irq, IRQF_VALID);
431#endif 431#endif
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 63a30e88908f..8a7ee3139b86 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -320,24 +320,8 @@ static int twl4030_irq_thread(void *data)
320 for (module_irq = twl4030_irq_base; 320 for (module_irq = twl4030_irq_base;
321 pih_isr; 321 pih_isr;
322 pih_isr >>= 1, module_irq++) { 322 pih_isr >>= 1, module_irq++) {
323 if (pih_isr & 0x1) { 323 if (pih_isr & 0x1)
324 struct irq_desc *d = irq_to_desc(module_irq); 324 generic_handle_irq(module_irq);
325
326 if (!d) {
327 pr_err("twl4030: Invalid SIH IRQ: %d\n",
328 module_irq);
329 return -EINVAL;
330 }
331
332 /* These can't be masked ... always warn
333 * if we get any surprises.
334 */
335 if (d->status & IRQ_DISABLED)
336 note_interrupt(module_irq, d,
337 IRQ_NONE);
338 else
339 d->handle_irq(module_irq, d);
340 }
341 } 325 }
342 local_irq_enable(); 326 local_irq_enable();
343 327
@@ -470,7 +454,7 @@ static inline void activate_irq(int irq)
470 set_irq_flags(irq, IRQF_VALID); 454 set_irq_flags(irq, IRQF_VALID);
471#else 455#else
472 /* same effect on other architectures */ 456 /* same effect on other architectures */
473 set_irq_noprobe(irq); 457 irq_set_noprobe(irq);
474#endif 458#endif
475} 459}
476 460
@@ -560,24 +544,18 @@ static void twl4030_sih_do_edge(struct work_struct *work)
560 /* Modify only the bits we know must change */ 544 /* Modify only the bits we know must change */
561 while (edge_change) { 545 while (edge_change) {
562 int i = fls(edge_change) - 1; 546 int i = fls(edge_change) - 1;
563 struct irq_desc *d = irq_to_desc(i + agent->irq_base); 547 struct irq_data *idata = irq_get_irq_data(i + agent->irq_base);
564 int byte = 1 + (i >> 2); 548 int byte = 1 + (i >> 2);
565 int off = (i & 0x3) * 2; 549 int off = (i & 0x3) * 2;
566 550 unsigned int type;
567 if (!d) {
568 pr_err("twl4030: Invalid IRQ: %d\n",
569 i + agent->irq_base);
570 return;
571 }
572 551
573 bytes[byte] &= ~(0x03 << off); 552 bytes[byte] &= ~(0x03 << off);
574 553
575 raw_spin_lock_irq(&d->lock); 554 type = irqd_get_trigger_type(idata);
576 if (d->status & IRQ_TYPE_EDGE_RISING) 555 if (type & IRQ_TYPE_EDGE_RISING)
577 bytes[byte] |= BIT(off + 1); 556 bytes[byte] |= BIT(off + 1);
578 if (d->status & IRQ_TYPE_EDGE_FALLING) 557 if (type & IRQ_TYPE_EDGE_FALLING)
579 bytes[byte] |= BIT(off + 0); 558 bytes[byte] |= BIT(off + 0);
580 raw_spin_unlock_irq(&d->lock);
581 559
582 edge_change &= ~BIT(i); 560 edge_change &= ~BIT(i);
583 } 561 }
@@ -626,21 +604,13 @@ static void twl4030_sih_unmask(struct irq_data *data)
626static int twl4030_sih_set_type(struct irq_data *data, unsigned trigger) 604static int twl4030_sih_set_type(struct irq_data *data, unsigned trigger)
627{ 605{
628 struct sih_agent *sih = irq_data_get_irq_chip_data(data); 606 struct sih_agent *sih = irq_data_get_irq_chip_data(data);
629 struct irq_desc *desc = irq_to_desc(data->irq);
630 unsigned long flags; 607 unsigned long flags;
631 608
632 if (!desc) {
633 pr_err("twl4030: Invalid IRQ: %d\n", data->irq);
634 return -EINVAL;
635 }
636
637 if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) 609 if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
638 return -EINVAL; 610 return -EINVAL;
639 611
640 spin_lock_irqsave(&sih_agent_lock, flags); 612 spin_lock_irqsave(&sih_agent_lock, flags);
641 if ((desc->status & IRQ_TYPE_SENSE_MASK) != trigger) { 613 if (irqd_get_trigger_type(data) != trigger) {
642 desc->status &= ~IRQ_TYPE_SENSE_MASK;
643 desc->status |= trigger;
644 sih->edge_change |= BIT(data->irq - sih->irq_base); 614 sih->edge_change |= BIT(data->irq - sih->irq_base);
645 queue_work(wq, &sih->edge_work); 615 queue_work(wq, &sih->edge_work);
646 } 616 }
@@ -680,7 +650,7 @@ static inline int sih_read_isr(const struct sih *sih)
680 */ 650 */
681static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc) 651static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
682{ 652{
683 struct sih_agent *agent = get_irq_data(irq); 653 struct sih_agent *agent = irq_get_handler_data(irq);
684 const struct sih *sih = agent->sih; 654 const struct sih *sih = agent->sih;
685 int isr; 655 int isr;
686 656
@@ -754,9 +724,9 @@ int twl4030_sih_setup(int module)
754 for (i = 0; i < sih->bits; i++) { 724 for (i = 0; i < sih->bits; i++) {
755 irq = irq_base + i; 725 irq = irq_base + i;
756 726
757 set_irq_chip_and_handler(irq, &twl4030_sih_irq_chip, 727 irq_set_chip_and_handler(irq, &twl4030_sih_irq_chip,
758 handle_edge_irq); 728 handle_edge_irq);
759 set_irq_chip_data(irq, agent); 729 irq_set_chip_data(irq, agent);
760 activate_irq(irq); 730 activate_irq(irq);
761 } 731 }
762 732
@@ -765,8 +735,8 @@ int twl4030_sih_setup(int module)
765 735
766 /* replace generic PIH handler (handle_simple_irq) */ 736 /* replace generic PIH handler (handle_simple_irq) */
767 irq = sih_mod + twl4030_irq_base; 737 irq = sih_mod + twl4030_irq_base;
768 set_irq_data(irq, agent); 738 irq_set_handler_data(irq, agent);
769 set_irq_chained_handler(irq, handle_twl4030_sih); 739 irq_set_chained_handler(irq, handle_twl4030_sih);
770 740
771 pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name, 741 pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name,
772 irq, irq_base, twl4030_irq_next - 1); 742 irq, irq_base, twl4030_irq_next - 1);
@@ -815,8 +785,8 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
815 twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack; 785 twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack;
816 786
817 for (i = irq_base; i < irq_end; i++) { 787 for (i = irq_base; i < irq_end; i++) {
818 set_irq_chip_and_handler(i, &twl4030_irq_chip, 788 irq_set_chip_and_handler(i, &twl4030_irq_chip,
819 handle_simple_irq); 789 handle_simple_irq);
820 activate_irq(i); 790 activate_irq(i);
821 } 791 }
822 twl4030_irq_next = i; 792 twl4030_irq_next = i;
@@ -856,7 +826,7 @@ fail_rqirq:
856 /* clean up twl4030_sih_setup */ 826 /* clean up twl4030_sih_setup */
857fail: 827fail:
858 for (i = irq_base; i < irq_end; i++) 828 for (i = irq_base; i < irq_end; i++)
859 set_irq_chip_and_handler(i, NULL, NULL); 829 irq_set_chip_and_handler(i, NULL, NULL);
860 destroy_workqueue(wq); 830 destroy_workqueue(wq);
861 wq = NULL; 831 wq = NULL;
862 return status; 832 return status;
diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index 4082ed73613f..fa937052fbab 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c
@@ -140,22 +140,7 @@ static int twl6030_irq_thread(void *data)
140 if (sts.int_sts & 0x1) { 140 if (sts.int_sts & 0x1) {
141 int module_irq = twl6030_irq_base + 141 int module_irq = twl6030_irq_base +
142 twl6030_interrupt_mapping[i]; 142 twl6030_interrupt_mapping[i];
143 struct irq_desc *d = irq_to_desc(module_irq); 143 generic_handle_irq(module_irq);
144
145 if (!d) {
146 pr_err("twl6030: Invalid SIH IRQ: %d\n",
147 module_irq);
148 return -EINVAL;
149 }
150
151 /* These can't be masked ... always warn
152 * if we get any surprises.
153 */
154 if (d->status & IRQ_DISABLED)
155 note_interrupt(module_irq, d,
156 IRQ_NONE);
157 else
158 d->handle_irq(module_irq, d);
159 144
160 } 145 }
161 local_irq_enable(); 146 local_irq_enable();
@@ -198,7 +183,7 @@ static inline void activate_irq(int irq)
198 set_irq_flags(irq, IRQF_VALID); 183 set_irq_flags(irq, IRQF_VALID);
199#else 184#else
200 /* same effect on other architectures */ 185 /* same effect on other architectures */
201 set_irq_noprobe(irq); 186 irq_set_noprobe(irq);
202#endif 187#endif
203} 188}
204 189
@@ -335,8 +320,8 @@ int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
335 twl6030_irq_chip.irq_set_type = NULL; 320 twl6030_irq_chip.irq_set_type = NULL;
336 321
337 for (i = irq_base; i < irq_end; i++) { 322 for (i = irq_base; i < irq_end; i++) {
338 set_irq_chip_and_handler(i, &twl6030_irq_chip, 323 irq_set_chip_and_handler(i, &twl6030_irq_chip,
339 handle_simple_irq); 324 handle_simple_irq);
340 activate_irq(i); 325 activate_irq(i);
341 } 326 }
342 327
@@ -365,7 +350,7 @@ fail_irq:
365 350
366fail_kthread: 351fail_kthread:
367 for (i = irq_base; i < irq_end; i++) 352 for (i = irq_base; i < irq_end; i++)
368 set_irq_chip_and_handler(i, NULL, NULL); 353 irq_set_chip_and_handler(i, NULL, NULL);
369 return status; 354 return status;
370} 355}
371 356
diff --git a/drivers/mfd/wl1273-core.c b/drivers/mfd/wl1273-core.c
index f76f6c798046..04914f2836c0 100644
--- a/drivers/mfd/wl1273-core.c
+++ b/drivers/mfd/wl1273-core.c
@@ -25,7 +25,7 @@
25 25
26#define DRIVER_DESC "WL1273 FM Radio Core" 26#define DRIVER_DESC "WL1273 FM Radio Core"
27 27
28static struct i2c_device_id wl1273_driver_id_table[] = { 28static const struct i2c_device_id wl1273_driver_id_table[] = {
29 { WL1273_FM_DRIVER_NAME, 0 }, 29 { WL1273_FM_DRIVER_NAME, 0 },
30 { } 30 { }
31}; 31};
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index a5cd17e18d09..23e66af89dea 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -553,17 +553,17 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
553 for (cur_irq = wm831x->irq_base; 553 for (cur_irq = wm831x->irq_base;
554 cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base; 554 cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base;
555 cur_irq++) { 555 cur_irq++) {
556 set_irq_chip_data(cur_irq, wm831x); 556 irq_set_chip_data(cur_irq, wm831x);
557 set_irq_chip_and_handler(cur_irq, &wm831x_irq_chip, 557 irq_set_chip_and_handler(cur_irq, &wm831x_irq_chip,
558 handle_edge_irq); 558 handle_edge_irq);
559 set_irq_nested_thread(cur_irq, 1); 559 irq_set_nested_thread(cur_irq, 1);
560 560
561 /* ARM needs us to explicitly flag the IRQ as valid 561 /* ARM needs us to explicitly flag the IRQ as valid
562 * and will set them noprobe when we do so. */ 562 * and will set them noprobe when we do so. */
563#ifdef CONFIG_ARM 563#ifdef CONFIG_ARM
564 set_irq_flags(cur_irq, IRQF_VALID); 564 set_irq_flags(cur_irq, IRQF_VALID);
565#else 565#else
566 set_irq_noprobe(cur_irq); 566 irq_set_noprobe(cur_irq);
567#endif 567#endif
568 } 568 }
569 569
diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 5839966ebd85..ed4b22a167b3 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -518,17 +518,17 @@ int wm8350_irq_init(struct wm8350 *wm8350, int irq,
518 for (cur_irq = wm8350->irq_base; 518 for (cur_irq = wm8350->irq_base;
519 cur_irq < ARRAY_SIZE(wm8350_irqs) + wm8350->irq_base; 519 cur_irq < ARRAY_SIZE(wm8350_irqs) + wm8350->irq_base;
520 cur_irq++) { 520 cur_irq++) {
521 set_irq_chip_data(cur_irq, wm8350); 521 irq_set_chip_data(cur_irq, wm8350);
522 set_irq_chip_and_handler(cur_irq, &wm8350_irq_chip, 522 irq_set_chip_and_handler(cur_irq, &wm8350_irq_chip,
523 handle_edge_irq); 523 handle_edge_irq);
524 set_irq_nested_thread(cur_irq, 1); 524 irq_set_nested_thread(cur_irq, 1);
525 525
526 /* ARM needs us to explicitly flag the IRQ as valid 526 /* ARM needs us to explicitly flag the IRQ as valid
527 * and will set them noprobe when we do so. */ 527 * and will set them noprobe when we do so. */
528#ifdef CONFIG_ARM 528#ifdef CONFIG_ARM
529 set_irq_flags(cur_irq, IRQF_VALID); 529 set_irq_flags(cur_irq, IRQF_VALID);
530#else 530#else
531 set_irq_noprobe(cur_irq); 531 irq_set_noprobe(cur_irq);
532#endif 532#endif
533 } 533 }
534 534
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index 1e3bf4a2ff8e..71c6e8f9aedb 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c
@@ -278,17 +278,17 @@ int wm8994_irq_init(struct wm8994 *wm8994)
278 for (cur_irq = wm8994->irq_base; 278 for (cur_irq = wm8994->irq_base;
279 cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base; 279 cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base;
280 cur_irq++) { 280 cur_irq++) {
281 set_irq_chip_data(cur_irq, wm8994); 281 irq_set_chip_data(cur_irq, wm8994);
282 set_irq_chip_and_handler(cur_irq, &wm8994_irq_chip, 282 irq_set_chip_and_handler(cur_irq, &wm8994_irq_chip,
283 handle_edge_irq); 283 handle_edge_irq);
284 set_irq_nested_thread(cur_irq, 1); 284 irq_set_nested_thread(cur_irq, 1);
285 285
286 /* ARM needs us to explicitly flag the IRQ as valid 286 /* ARM needs us to explicitly flag the IRQ as valid
287 * and will set them noprobe when we do so. */ 287 * and will set them noprobe when we do so. */
288#ifdef CONFIG_ARM 288#ifdef CONFIG_ARM
289 set_irq_flags(cur_irq, IRQF_VALID); 289 set_irq_flags(cur_irq, IRQF_VALID);
290#else 290#else
291 set_irq_noprobe(cur_irq); 291 irq_set_noprobe(cur_irq);
292#endif 292#endif
293 } 293 }
294 294
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 77414702cb00..b4567c35a322 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -33,14 +33,6 @@ config MTD_TESTS
33 should normally be compiled as kernel modules. The modules perform 33 should normally be compiled as kernel modules. The modules perform
34 various checks and verifications when loaded. 34 various checks and verifications when loaded.
35 35
36config MTD_CONCAT
37 tristate "MTD concatenating support"
38 help
39 Support for concatenating several MTD devices into a single
40 (virtual) one. This allows you to have -for example- a JFFS(2)
41 file system spanning multiple physical flash chips. If unsure,
42 say 'Y'.
43
44config MTD_PARTITIONS 36config MTD_PARTITIONS
45 bool "MTD partitioning support" 37 bool "MTD partitioning support"
46 help 38 help
@@ -333,6 +325,16 @@ config MTD_OOPS
333 To use, add console=ttyMTDx to the kernel command line, 325 To use, add console=ttyMTDx to the kernel command line,
334 where x is the MTD device number to use. 326 where x is the MTD device number to use.
335 327
328config MTD_SWAP
329 tristate "Swap on MTD device support"
330 depends on MTD && SWAP
331 select MTD_BLKDEVS
332 help
333 Provides volatile block device driver on top of mtd partition
334 suitable for swapping. The mapping of written blocks is not saved.
335 The driver provides wear leveling by storing erase counter into the
336 OOB.
337
336source "drivers/mtd/chips/Kconfig" 338source "drivers/mtd/chips/Kconfig"
337 339
338source "drivers/mtd/maps/Kconfig" 340source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index d4e7f25b1ebb..d578095fb255 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -4,11 +4,10 @@
4 4
5# Core functionality. 5# Core functionality.
6obj-$(CONFIG_MTD) += mtd.o 6obj-$(CONFIG_MTD) += mtd.o
7mtd-y := mtdcore.o mtdsuper.o 7mtd-y := mtdcore.o mtdsuper.o mtdconcat.o
8mtd-$(CONFIG_MTD_PARTITIONS) += mtdpart.o 8mtd-$(CONFIG_MTD_PARTITIONS) += mtdpart.o
9mtd-$(CONFIG_MTD_OF_PARTS) += ofpart.o 9mtd-$(CONFIG_MTD_OF_PARTS) += ofpart.o
10 10
11obj-$(CONFIG_MTD_CONCAT) += mtdconcat.o
12obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o 11obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
13obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o 12obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
14obj-$(CONFIG_MTD_AFS_PARTS) += afs.o 13obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
@@ -26,6 +25,7 @@ obj-$(CONFIG_RFD_FTL) += rfd_ftl.o
26obj-$(CONFIG_SSFDC) += ssfdc.o 25obj-$(CONFIG_SSFDC) += ssfdc.o
27obj-$(CONFIG_SM_FTL) += sm_ftl.o 26obj-$(CONFIG_SM_FTL) += sm_ftl.o
28obj-$(CONFIG_MTD_OOPS) += mtdoops.o 27obj-$(CONFIG_MTD_OOPS) += mtdoops.o
28obj-$(CONFIG_MTD_SWAP) += mtdswap.o
29 29
30nftl-objs := nftlcore.o nftlmount.o 30nftl-objs := nftlcore.o nftlmount.o
31inftl-objs := inftlcore.o inftlmount.o 31inftl-objs := inftlcore.o inftlmount.o
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 4aaa88f8ab5f..092aef11120c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -455,7 +455,7 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
455 mtd->flags = MTD_CAP_NORFLASH; 455 mtd->flags = MTD_CAP_NORFLASH;
456 mtd->name = map->name; 456 mtd->name = map->name;
457 mtd->writesize = 1; 457 mtd->writesize = 1;
458 mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; 458 mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
459 459
460 mtd->reboot_notifier.notifier_call = cfi_intelext_reboot; 460 mtd->reboot_notifier.notifier_call = cfi_intelext_reboot;
461 461
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index f072fcfde04e..f9a5331e9445 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -349,6 +349,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
349 { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri }, 349 { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri },
350#ifdef AMD_BOOTLOC_BUG 350#ifdef AMD_BOOTLOC_BUG
351 { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock }, 351 { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock },
352 { CFI_MFR_AMIC, CFI_ID_ANY, fixup_amd_bootblock },
352 { CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock }, 353 { CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock },
353#endif 354#endif
354 { CFI_MFR_AMD, 0x0050, fixup_use_secsi }, 355 { CFI_MFR_AMD, 0x0050, fixup_use_secsi },
@@ -440,7 +441,7 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
440 mtd->flags = MTD_CAP_NORFLASH; 441 mtd->flags = MTD_CAP_NORFLASH;
441 mtd->name = map->name; 442 mtd->name = map->name;
442 mtd->writesize = 1; 443 mtd->writesize = 1;
443 mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; 444 mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
444 445
445 DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n", 446 DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n",
446 __func__, mtd->writebufsize); 447 __func__, mtd->writebufsize);
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index c04b7658abe9..ed56ad3884fb 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -238,7 +238,7 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
238 mtd->resume = cfi_staa_resume; 238 mtd->resume = cfi_staa_resume;
239 mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE; 239 mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE;
240 mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */ 240 mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */
241 mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; 241 mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
242 map->fldrv = &cfi_staa_chipdrv; 242 map->fldrv = &cfi_staa_chipdrv;
243 __module_get(THIS_MODULE); 243 __module_get(THIS_MODULE);
244 mtd->name = map->name; 244 mtd->name = map->name;
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index e4eba6cc1b2e..3fb981d4bb51 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -655,7 +655,8 @@ static const struct spi_device_id m25p_ids[] = {
655 { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) }, 655 { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
656 { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) }, 656 { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
657 657
658 /* EON -- en25pxx */ 658 /* EON -- en25xxx */
659 { "en25f32", INFO(0x1c3116, 0, 64 * 1024, 64, SECT_4K) },
659 { "en25p32", INFO(0x1c2016, 0, 64 * 1024, 64, 0) }, 660 { "en25p32", INFO(0x1c2016, 0, 64 * 1024, 64, 0) },
660 { "en25p64", INFO(0x1c2017, 0, 64 * 1024, 128, 0) }, 661 { "en25p64", INFO(0x1c2017, 0, 64 * 1024, 128, 0) },
661 662
@@ -728,6 +729,8 @@ static const struct spi_device_id m25p_ids[] = {
728 { "m25pe80", INFO(0x208014, 0, 64 * 1024, 16, 0) }, 729 { "m25pe80", INFO(0x208014, 0, 64 * 1024, 16, 0) },
729 { "m25pe16", INFO(0x208015, 0, 64 * 1024, 32, SECT_4K) }, 730 { "m25pe16", INFO(0x208015, 0, 64 * 1024, 32, SECT_4K) },
730 731
732 { "m25px64", INFO(0x207117, 0, 64 * 1024, 128, 0) },
733
731 /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */ 734 /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
732 { "w25x10", INFO(0xef3011, 0, 64 * 1024, 2, SECT_4K) }, 735 { "w25x10", INFO(0xef3011, 0, 64 * 1024, 2, SECT_4K) },
733 { "w25x20", INFO(0xef3012, 0, 64 * 1024, 4, SECT_4K) }, 736 { "w25x20", INFO(0xef3012, 0, 64 * 1024, 4, SECT_4K) },
diff --git a/drivers/mtd/devices/mtdram.c b/drivers/mtd/devices/mtdram.c
index 26a6e809013d..1483e18971ce 100644
--- a/drivers/mtd/devices/mtdram.c
+++ b/drivers/mtd/devices/mtdram.c
@@ -121,6 +121,7 @@ int mtdram_init_device(struct mtd_info *mtd, void *mapped_address,
121 mtd->flags = MTD_CAP_RAM; 121 mtd->flags = MTD_CAP_RAM;
122 mtd->size = size; 122 mtd->size = size;
123 mtd->writesize = 1; 123 mtd->writesize = 1;
124 mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
124 mtd->erasesize = MTDRAM_ERASE_SIZE; 125 mtd->erasesize = MTDRAM_ERASE_SIZE;
125 mtd->priv = mapped_address; 126 mtd->priv = mapped_address;
126 127
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 52393282eaf1..8d28fa02a5a2 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -117,6 +117,7 @@ static void unregister_devices(void)
117 list_for_each_entry_safe(this, safe, &phram_list, list) { 117 list_for_each_entry_safe(this, safe, &phram_list, list) {
118 del_mtd_device(&this->mtd); 118 del_mtd_device(&this->mtd);
119 iounmap(this->mtd.priv); 119 iounmap(this->mtd.priv);
120 kfree(this->mtd.name);
120 kfree(this); 121 kfree(this);
121 } 122 }
122} 123}
@@ -275,6 +276,8 @@ static int phram_setup(const char *val, struct kernel_param *kp)
275 ret = register_device(name, start, len); 276 ret = register_device(name, start, len);
276 if (!ret) 277 if (!ret)
277 pr_info("%s device: %#x at %#x\n", name, len, start); 278 pr_info("%s device: %#x at %#x\n", name, len, start);
279 else
280 kfree(name);
278 281
279 return ret; 282 return ret;
280} 283}
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 5d37d315fa98..44b1f46458ca 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -114,7 +114,7 @@ config MTD_SUN_UFLASH
114 114
115config MTD_SC520CDP 115config MTD_SC520CDP
116 tristate "CFI Flash device mapped on AMD SC520 CDP" 116 tristate "CFI Flash device mapped on AMD SC520 CDP"
117 depends on X86 && MTD_CFI && MTD_CONCAT 117 depends on X86 && MTD_CFI
118 help 118 help
119 The SC520 CDP board has two banks of CFI-compliant chips and one 119 The SC520 CDP board has two banks of CFI-compliant chips and one
120 Dual-in-line JEDEC chip. This 'mapping' driver supports that 120 Dual-in-line JEDEC chip. This 'mapping' driver supports that
@@ -262,7 +262,7 @@ config MTD_BCM963XX
262 262
263config MTD_DILNETPC 263config MTD_DILNETPC
264 tristate "CFI Flash device mapped on DIL/Net PC" 264 tristate "CFI Flash device mapped on DIL/Net PC"
265 depends on X86 && MTD_CONCAT && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN 265 depends on X86 && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN
266 help 266 help
267 MTD map driver for SSV DIL/Net PC Boards "DNP" and "ADNP". 267 MTD map driver for SSV DIL/Net PC Boards "DNP" and "ADNP".
268 For details, see <http://www.ssv-embedded.de/ssv/pc104/p169.htm> 268 For details, see <http://www.ssv-embedded.de/ssv/pc104/p169.htm>
@@ -552,4 +552,13 @@ config MTD_PISMO
552 552
553 When built as a module, it will be called pismo.ko 553 When built as a module, it will be called pismo.ko
554 554
555config MTD_LATCH_ADDR
556 tristate "Latch-assisted Flash Chip Support"
557 depends on MTD_COMPLEX_MAPPINGS
558 help
559 Map driver which allows flashes to be partially physically addressed
560 and have the upper address lines set by a board specific code.
561
562 If compiled as a module, it will be called latch-addr-flash.
563
555endmenu 564endmenu
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index c7869c7a6b18..08533bd5cba7 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -59,3 +59,4 @@ obj-$(CONFIG_MTD_RBTX4939) += rbtx4939-flash.o
59obj-$(CONFIG_MTD_VMU) += vmu-flash.o 59obj-$(CONFIG_MTD_VMU) += vmu-flash.o
60obj-$(CONFIG_MTD_GPIO_ADDR) += gpio-addr-flash.o 60obj-$(CONFIG_MTD_GPIO_ADDR) += gpio-addr-flash.o
61obj-$(CONFIG_MTD_BCM963XX) += bcm963xx-flash.o 61obj-$(CONFIG_MTD_BCM963XX) += bcm963xx-flash.o
62obj-$(CONFIG_MTD_LATCH_ADDR) += latch-addr-flash.o
diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
index c09f4f57093e..e5f645b775ad 100644
--- a/drivers/mtd/maps/ceiva.c
+++ b/drivers/mtd/maps/ceiva.c
@@ -194,16 +194,10 @@ static int __init clps_setup_mtd(struct clps_info *clps, int nr, struct mtd_info
194 * We detected multiple devices. Concatenate 194 * We detected multiple devices. Concatenate
195 * them together. 195 * them together.
196 */ 196 */
197#ifdef CONFIG_MTD_CONCAT
198 *rmtd = mtd_concat_create(subdev, found, 197 *rmtd = mtd_concat_create(subdev, found,
199 "clps flash"); 198 "clps flash");
200 if (*rmtd == NULL) 199 if (*rmtd == NULL)
201 ret = -ENXIO; 200 ret = -ENXIO;
202#else
203 printk(KERN_ERR "clps flash: multiple devices "
204 "found but MTD concat support disabled.\n");
205 ret = -ENXIO;
206#endif
207 } 201 }
208 } 202 }
209 203
diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c
index 2aac41bde8b3..e22ff5adbbf4 100644
--- a/drivers/mtd/maps/integrator-flash.c
+++ b/drivers/mtd/maps/integrator-flash.c
@@ -202,7 +202,6 @@ static int armflash_probe(struct platform_device *dev)
202 if (info->nr_subdev == 1) 202 if (info->nr_subdev == 1)
203 info->mtd = info->subdev[0].mtd; 203 info->mtd = info->subdev[0].mtd;
204 else if (info->nr_subdev > 1) { 204 else if (info->nr_subdev > 1) {
205#ifdef CONFIG_MTD_CONCAT
206 struct mtd_info *cdev[info->nr_subdev]; 205 struct mtd_info *cdev[info->nr_subdev];
207 206
208 /* 207 /*
@@ -215,11 +214,6 @@ static int armflash_probe(struct platform_device *dev)
215 dev_name(&dev->dev)); 214 dev_name(&dev->dev));
216 if (info->mtd == NULL) 215 if (info->mtd == NULL)
217 err = -ENXIO; 216 err = -ENXIO;
218#else
219 printk(KERN_ERR "armflash: multiple devices found but "
220 "MTD concat support disabled.\n");
221 err = -ENXIO;
222#endif
223 } 217 }
224 218
225 if (err < 0) 219 if (err < 0)
@@ -244,10 +238,8 @@ static int armflash_probe(struct platform_device *dev)
244 cleanup: 238 cleanup:
245 if (info->mtd) { 239 if (info->mtd) {
246 del_mtd_partitions(info->mtd); 240 del_mtd_partitions(info->mtd);
247#ifdef CONFIG_MTD_CONCAT
248 if (info->mtd != info->subdev[0].mtd) 241 if (info->mtd != info->subdev[0].mtd)
249 mtd_concat_destroy(info->mtd); 242 mtd_concat_destroy(info->mtd);
250#endif
251 } 243 }
252 kfree(info->parts); 244 kfree(info->parts);
253 subdev_err: 245 subdev_err:
@@ -272,10 +264,8 @@ static int armflash_remove(struct platform_device *dev)
272 if (info) { 264 if (info) {
273 if (info->mtd) { 265 if (info->mtd) {
274 del_mtd_partitions(info->mtd); 266 del_mtd_partitions(info->mtd);
275#ifdef CONFIG_MTD_CONCAT
276 if (info->mtd != info->subdev[0].mtd) 267 if (info->mtd != info->subdev[0].mtd)
277 mtd_concat_destroy(info->mtd); 268 mtd_concat_destroy(info->mtd);
278#endif
279 } 269 }
280 kfree(info->parts); 270 kfree(info->parts);
281 271
diff --git a/drivers/mtd/maps/latch-addr-flash.c b/drivers/mtd/maps/latch-addr-flash.c
new file mode 100644
index 000000000000..ee2548085334
--- /dev/null
+++ b/drivers/mtd/maps/latch-addr-flash.c
@@ -0,0 +1,272 @@
1/*
2 * Interface for NOR flash driver whose high address lines are latched
3 *
4 * Copyright © 2000 Nicolas Pitre <nico@cam.org>
5 * Copyright © 2005-2008 Analog Devices Inc.
6 * Copyright © 2008 MontaVista Software, Inc. <source@mvista.com>
7 *
8 * This file is licensed under the terms of the GNU General Public License
9 * version 2. This program is licensed "as is" without any warranty of any
10 * kind, whether express or implied.
11 */
12
13#include <linux/init.h>
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/mtd/mtd.h>
17#include <linux/mtd/map.h>
18#include <linux/mtd/partitions.h>
19#include <linux/platform_device.h>
20#include <linux/mtd/latch-addr-flash.h>
21#include <linux/slab.h>
22
23#define DRIVER_NAME "latch-addr-flash"
24
25struct latch_addr_flash_info {
26 struct mtd_info *mtd;
27 struct map_info map;
28 struct resource *res;
29
30 void (*set_window)(unsigned long offset, void *data);
31 void *data;
32
33 /* cache; could be found out of res */
34 unsigned long win_mask;
35
36 int nr_parts;
37 struct mtd_partition *parts;
38
39 spinlock_t lock;
40};
41
42static map_word lf_read(struct map_info *map, unsigned long ofs)
43{
44 struct latch_addr_flash_info *info;
45 map_word datum;
46
47 info = (struct latch_addr_flash_info *)map->map_priv_1;
48
49 spin_lock(&info->lock);
50
51 info->set_window(ofs, info->data);
52 datum = inline_map_read(map, info->win_mask & ofs);
53
54 spin_unlock(&info->lock);
55
56 return datum;
57}
58
59static void lf_write(struct map_info *map, map_word datum, unsigned long ofs)
60{
61 struct latch_addr_flash_info *info;
62
63 info = (struct latch_addr_flash_info *)map->map_priv_1;
64
65 spin_lock(&info->lock);
66
67 info->set_window(ofs, info->data);
68 inline_map_write(map, datum, info->win_mask & ofs);
69
70 spin_unlock(&info->lock);
71}
72
73static void lf_copy_from(struct map_info *map, void *to,
74 unsigned long from, ssize_t len)
75{
76 struct latch_addr_flash_info *info =
77 (struct latch_addr_flash_info *) map->map_priv_1;
78 unsigned n;
79
80 while (len > 0) {
81 n = info->win_mask + 1 - (from & info->win_mask);
82 if (n > len)
83 n = len;
84
85 spin_lock(&info->lock);
86
87 info->set_window(from, info->data);
88 memcpy_fromio(to, map->virt + (from & info->win_mask), n);
89
90 spin_unlock(&info->lock);
91
92 to += n;
93 from += n;
94 len -= n;
95 }
96}
97
98static char *rom_probe_types[] = { "cfi_probe", NULL };
99
100static char *part_probe_types[] = { "cmdlinepart", NULL };
101
102static int latch_addr_flash_remove(struct platform_device *dev)
103{
104 struct latch_addr_flash_info *info;
105 struct latch_addr_flash_data *latch_addr_data;
106
107 info = platform_get_drvdata(dev);
108 if (info == NULL)
109 return 0;
110 platform_set_drvdata(dev, NULL);
111
112 latch_addr_data = dev->dev.platform_data;
113
114 if (info->mtd != NULL) {
115 if (mtd_has_partitions()) {
116 if (info->nr_parts) {
117 del_mtd_partitions(info->mtd);
118 kfree(info->parts);
119 } else if (latch_addr_data->nr_parts) {
120 del_mtd_partitions(info->mtd);
121 } else {
122 del_mtd_device(info->mtd);
123 }
124 } else {
125 del_mtd_device(info->mtd);
126 }
127 map_destroy(info->mtd);
128 }
129
130 if (info->map.virt != NULL)
131 iounmap(info->map.virt);
132
133 if (info->res != NULL)
134 release_mem_region(info->res->start, resource_size(info->res));
135
136 kfree(info);
137
138 if (latch_addr_data->done)
139 latch_addr_data->done(latch_addr_data->data);
140
141 return 0;
142}
143
144static int __devinit latch_addr_flash_probe(struct platform_device *dev)
145{
146 struct latch_addr_flash_data *latch_addr_data;
147 struct latch_addr_flash_info *info;
148 resource_size_t win_base = dev->resource->start;
149 resource_size_t win_size = resource_size(dev->resource);
150 char **probe_type;
151 int chipsel;
152 int err;
153
154 latch_addr_data = dev->dev.platform_data;
155 if (latch_addr_data == NULL)
156 return -ENODEV;
157
158 pr_notice("latch-addr platform flash device: %#llx byte "
159 "window at %#.8llx\n",
160 (unsigned long long)win_size, (unsigned long long)win_base);
161
162 chipsel = dev->id;
163
164 if (latch_addr_data->init) {
165 err = latch_addr_data->init(latch_addr_data->data, chipsel);
166 if (err != 0)
167 return err;
168 }
169
170 info = kzalloc(sizeof(struct latch_addr_flash_info), GFP_KERNEL);
171 if (info == NULL) {
172 err = -ENOMEM;
173 goto done;
174 }
175
176 platform_set_drvdata(dev, info);
177
178 info->res = request_mem_region(win_base, win_size, DRIVER_NAME);
179 if (info->res == NULL) {
180 dev_err(&dev->dev, "Could not reserve memory region\n");
181 err = -EBUSY;
182 goto free_info;
183 }
184
185 info->map.name = DRIVER_NAME;
186 info->map.size = latch_addr_data->size;
187 info->map.bankwidth = latch_addr_data->width;
188
189 info->map.phys = NO_XIP;
190 info->map.virt = ioremap(win_base, win_size);
191 if (!info->map.virt) {
192 err = -ENOMEM;
193 goto free_res;
194 }
195
196 info->map.map_priv_1 = (unsigned long)info;
197
198 info->map.read = lf_read;
199 info->map.copy_from = lf_copy_from;
200 info->map.write = lf_write;
201 info->set_window = latch_addr_data->set_window;
202 info->data = latch_addr_data->data;
203 info->win_mask = win_size - 1;
204
205 spin_lock_init(&info->lock);
206
207 for (probe_type = rom_probe_types; !info->mtd && *probe_type;
208 probe_type++)
209 info->mtd = do_map_probe(*probe_type, &info->map);
210
211 if (info->mtd == NULL) {
212 dev_err(&dev->dev, "map_probe failed\n");
213 err = -ENODEV;
214 goto iounmap;
215 }
216 info->mtd->owner = THIS_MODULE;
217
218 if (mtd_has_partitions()) {
219
220 err = parse_mtd_partitions(info->mtd,
221 (const char **)part_probe_types,
222 &info->parts, 0);
223 if (err > 0) {
224 add_mtd_partitions(info->mtd, info->parts, err);
225 return 0;
226 }
227 if (latch_addr_data->nr_parts) {
228 pr_notice("Using latch-addr-flash partition information\n");
229 add_mtd_partitions(info->mtd, latch_addr_data->parts,
230 latch_addr_data->nr_parts);
231 return 0;
232 }
233 }
234 add_mtd_device(info->mtd);
235 return 0;
236
237iounmap:
238 iounmap(info->map.virt);
239free_res:
240 release_mem_region(info->res->start, resource_size(info->res));
241free_info:
242 kfree(info);
243done:
244 if (latch_addr_data->done)
245 latch_addr_data->done(latch_addr_data->data);
246 return err;
247}
248
249static struct platform_driver latch_addr_flash_driver = {
250 .probe = latch_addr_flash_probe,
251 .remove = __devexit_p(latch_addr_flash_remove),
252 .driver = {
253 .name = DRIVER_NAME,
254 },
255};
256
257static int __init latch_addr_flash_init(void)
258{
259 return platform_driver_register(&latch_addr_flash_driver);
260}
261module_init(latch_addr_flash_init);
262
263static void __exit latch_addr_flash_exit(void)
264{
265 platform_driver_unregister(&latch_addr_flash_driver);
266}
267module_exit(latch_addr_flash_exit);
268
269MODULE_AUTHOR("David Griego <dgriego@mvista.com>");
270MODULE_DESCRIPTION("MTD map driver for flashes addressed physically with upper "
271 "address lines being set board specifically");
272MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 4c18b98a3110..7522df4f71f1 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -59,10 +59,8 @@ static int physmap_flash_remove(struct platform_device *dev)
59#else 59#else
60 del_mtd_device(info->cmtd); 60 del_mtd_device(info->cmtd);
61#endif 61#endif
62#ifdef CONFIG_MTD_CONCAT
63 if (info->cmtd != info->mtd[0]) 62 if (info->cmtd != info->mtd[0])
64 mtd_concat_destroy(info->cmtd); 63 mtd_concat_destroy(info->cmtd);
65#endif
66 } 64 }
67 65
68 for (i = 0; i < MAX_RESOURCES; i++) { 66 for (i = 0; i < MAX_RESOURCES; i++) {
@@ -159,15 +157,9 @@ static int physmap_flash_probe(struct platform_device *dev)
159 /* 157 /*
160 * We detected multiple devices. Concatenate them together. 158 * We detected multiple devices. Concatenate them together.
161 */ 159 */
162#ifdef CONFIG_MTD_CONCAT
163 info->cmtd = mtd_concat_create(info->mtd, devices_found, dev_name(&dev->dev)); 160 info->cmtd = mtd_concat_create(info->mtd, devices_found, dev_name(&dev->dev));
164 if (info->cmtd == NULL) 161 if (info->cmtd == NULL)
165 err = -ENXIO; 162 err = -ENXIO;
166#else
167 printk(KERN_ERR "physmap-flash: multiple devices "
168 "found but MTD concat support disabled.\n");
169 err = -ENXIO;
170#endif
171 } 163 }
172 if (err) 164 if (err)
173 goto err_out; 165 goto err_out;
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 3db0cb083d31..bd483f0c57e1 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -104,12 +104,10 @@ static int of_flash_remove(struct platform_device *dev)
104 return 0; 104 return 0;
105 dev_set_drvdata(&dev->dev, NULL); 105 dev_set_drvdata(&dev->dev, NULL);
106 106
107#ifdef CONFIG_MTD_CONCAT
108 if (info->cmtd != info->list[0].mtd) { 107 if (info->cmtd != info->list[0].mtd) {
109 del_mtd_device(info->cmtd); 108 del_mtd_device(info->cmtd);
110 mtd_concat_destroy(info->cmtd); 109 mtd_concat_destroy(info->cmtd);
111 } 110 }
112#endif
113 111
114 if (info->cmtd) { 112 if (info->cmtd) {
115 if (OF_FLASH_PARTS(info)) { 113 if (OF_FLASH_PARTS(info)) {
@@ -337,16 +335,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
337 /* 335 /*
338 * We detected multiple devices. Concatenate them together. 336 * We detected multiple devices. Concatenate them together.
339 */ 337 */
340#ifdef CONFIG_MTD_CONCAT
341 info->cmtd = mtd_concat_create(mtd_list, info->list_size, 338 info->cmtd = mtd_concat_create(mtd_list, info->list_size,
342 dev_name(&dev->dev)); 339 dev_name(&dev->dev));
343 if (info->cmtd == NULL) 340 if (info->cmtd == NULL)
344 err = -ENXIO; 341 err = -ENXIO;
345#else
346 printk(KERN_ERR "physmap_of: multiple devices "
347 "found but MTD concat support disabled.\n");
348 err = -ENXIO;
349#endif
350 } 342 }
351 if (err) 343 if (err)
352 goto err_out; 344 goto err_out;
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index f3af87e08ecd..da875908ea8e 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -232,10 +232,8 @@ static void sa1100_destroy(struct sa_info *info, struct flash_platform_data *pla
232 else 232 else
233 del_mtd_partitions(info->mtd); 233 del_mtd_partitions(info->mtd);
234#endif 234#endif
235#ifdef CONFIG_MTD_CONCAT
236 if (info->mtd != info->subdev[0].mtd) 235 if (info->mtd != info->subdev[0].mtd)
237 mtd_concat_destroy(info->mtd); 236 mtd_concat_destroy(info->mtd);
238#endif
239 } 237 }
240 238
241 kfree(info->parts); 239 kfree(info->parts);
@@ -321,7 +319,6 @@ sa1100_setup_mtd(struct platform_device *pdev, struct flash_platform_data *plat)
321 info->mtd = info->subdev[0].mtd; 319 info->mtd = info->subdev[0].mtd;
322 ret = 0; 320 ret = 0;
323 } else if (info->num_subdev > 1) { 321 } else if (info->num_subdev > 1) {
324#ifdef CONFIG_MTD_CONCAT
325 struct mtd_info *cdev[nr]; 322 struct mtd_info *cdev[nr];
326 /* 323 /*
327 * We detected multiple devices. Concatenate them together. 324 * We detected multiple devices. Concatenate them together.
@@ -333,11 +330,6 @@ sa1100_setup_mtd(struct platform_device *pdev, struct flash_platform_data *plat)
333 plat->name); 330 plat->name);
334 if (info->mtd == NULL) 331 if (info->mtd == NULL)
335 ret = -ENXIO; 332 ret = -ENXIO;
336#else
337 printk(KERN_ERR "SA1100 flash: multiple devices "
338 "found but MTD concat support disabled.\n");
339 ret = -ENXIO;
340#endif
341 } 333 }
342 334
343 if (ret == 0) 335 if (ret == 0)
diff --git a/drivers/mtd/maps/ts5500_flash.c b/drivers/mtd/maps/ts5500_flash.c
index e2147bf11c88..e02dfa9d4ddd 100644
--- a/drivers/mtd/maps/ts5500_flash.c
+++ b/drivers/mtd/maps/ts5500_flash.c
@@ -94,7 +94,6 @@ static int __init init_ts5500_map(void)
94 return 0; 94 return 0;
95 95
96err1: 96err1:
97 map_destroy(mymtd);
98 iounmap(ts5500_map.virt); 97 iounmap(ts5500_map.virt);
99err2: 98err2:
100 return rc; 99 return rc;
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index e0a2373bf0e2..a534e1f0c348 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -40,7 +40,7 @@
40static LIST_HEAD(blktrans_majors); 40static LIST_HEAD(blktrans_majors);
41static DEFINE_MUTEX(blktrans_ref_mutex); 41static DEFINE_MUTEX(blktrans_ref_mutex);
42 42
43void blktrans_dev_release(struct kref *kref) 43static void blktrans_dev_release(struct kref *kref)
44{ 44{
45 struct mtd_blktrans_dev *dev = 45 struct mtd_blktrans_dev *dev =
46 container_of(kref, struct mtd_blktrans_dev, ref); 46 container_of(kref, struct mtd_blktrans_dev, ref);
@@ -67,7 +67,7 @@ unlock:
67 return dev; 67 return dev;
68} 68}
69 69
70void blktrans_dev_put(struct mtd_blktrans_dev *dev) 70static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
71{ 71{
72 mutex_lock(&blktrans_ref_mutex); 72 mutex_lock(&blktrans_ref_mutex);
73 kref_put(&dev->ref, blktrans_dev_release); 73 kref_put(&dev->ref, blktrans_dev_release);
@@ -119,18 +119,43 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
119 } 119 }
120} 120}
121 121
122int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
123{
124 if (kthread_should_stop())
125 return 1;
126
127 return dev->bg_stop;
128}
129EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
130
122static int mtd_blktrans_thread(void *arg) 131static int mtd_blktrans_thread(void *arg)
123{ 132{
124 struct mtd_blktrans_dev *dev = arg; 133 struct mtd_blktrans_dev *dev = arg;
134 struct mtd_blktrans_ops *tr = dev->tr;
125 struct request_queue *rq = dev->rq; 135 struct request_queue *rq = dev->rq;
126 struct request *req = NULL; 136 struct request *req = NULL;
137 int background_done = 0;
127 138
128 spin_lock_irq(rq->queue_lock); 139 spin_lock_irq(rq->queue_lock);
129 140
130 while (!kthread_should_stop()) { 141 while (!kthread_should_stop()) {
131 int res; 142 int res;
132 143
144 dev->bg_stop = false;
133 if (!req && !(req = blk_fetch_request(rq))) { 145 if (!req && !(req = blk_fetch_request(rq))) {
146 if (tr->background && !background_done) {
147 spin_unlock_irq(rq->queue_lock);
148 mutex_lock(&dev->lock);
149 tr->background(dev);
150 mutex_unlock(&dev->lock);
151 spin_lock_irq(rq->queue_lock);
152 /*
153 * Do background processing just once per idle
154 * period.
155 */
156 background_done = !dev->bg_stop;
157 continue;
158 }
134 set_current_state(TASK_INTERRUPTIBLE); 159 set_current_state(TASK_INTERRUPTIBLE);
135 160
136 if (kthread_should_stop()) 161 if (kthread_should_stop())
@@ -152,6 +177,8 @@ static int mtd_blktrans_thread(void *arg)
152 177
153 if (!__blk_end_request_cur(req, res)) 178 if (!__blk_end_request_cur(req, res))
154 req = NULL; 179 req = NULL;
180
181 background_done = 0;
155 } 182 }
156 183
157 if (req) 184 if (req)
@@ -172,8 +199,10 @@ static void mtd_blktrans_request(struct request_queue *rq)
172 if (!dev) 199 if (!dev)
173 while ((req = blk_fetch_request(rq)) != NULL) 200 while ((req = blk_fetch_request(rq)) != NULL)
174 __blk_end_request_all(req, -ENODEV); 201 __blk_end_request_all(req, -ENODEV);
175 else 202 else {
203 dev->bg_stop = true;
176 wake_up_process(dev->thread); 204 wake_up_process(dev->thread);
205 }
177} 206}
178 207
179static int blktrans_open(struct block_device *bdev, fmode_t mode) 208static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -379,9 +408,10 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
379 new->rq->queuedata = new; 408 new->rq->queuedata = new;
380 blk_queue_logical_block_size(new->rq, tr->blksize); 409 blk_queue_logical_block_size(new->rq, tr->blksize);
381 410
382 if (tr->discard) 411 if (tr->discard) {
383 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 412 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
384 new->rq); 413 new->rq->limits.max_discard_sectors = UINT_MAX;
414 }
385 415
386 gd->queue = new->rq; 416 gd->queue = new->rq;
387 417
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 5f5777bd3f75..5060e608ea5d 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -750,6 +750,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
750 struct mtd_concat *concat; 750 struct mtd_concat *concat;
751 uint32_t max_erasesize, curr_erasesize; 751 uint32_t max_erasesize, curr_erasesize;
752 int num_erase_region; 752 int num_erase_region;
753 int max_writebufsize = 0;
753 754
754 printk(KERN_NOTICE "Concatenating MTD devices:\n"); 755 printk(KERN_NOTICE "Concatenating MTD devices:\n");
755 for (i = 0; i < num_devs; i++) 756 for (i = 0; i < num_devs; i++)
@@ -776,7 +777,12 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
776 concat->mtd.size = subdev[0]->size; 777 concat->mtd.size = subdev[0]->size;
777 concat->mtd.erasesize = subdev[0]->erasesize; 778 concat->mtd.erasesize = subdev[0]->erasesize;
778 concat->mtd.writesize = subdev[0]->writesize; 779 concat->mtd.writesize = subdev[0]->writesize;
779 concat->mtd.writebufsize = subdev[0]->writebufsize; 780
781 for (i = 0; i < num_devs; i++)
782 if (max_writebufsize < subdev[i]->writebufsize)
783 max_writebufsize = subdev[i]->writebufsize;
784 concat->mtd.writebufsize = max_writebufsize;
785
780 concat->mtd.subpage_sft = subdev[0]->subpage_sft; 786 concat->mtd.subpage_sft = subdev[0]->subpage_sft;
781 concat->mtd.oobsize = subdev[0]->oobsize; 787 concat->mtd.oobsize = subdev[0]->oobsize;
782 concat->mtd.oobavail = subdev[0]->oobavail; 788 concat->mtd.oobavail = subdev[0]->oobavail;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 527cebf58da4..da69bc8a5a7d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -43,7 +43,7 @@
43 * backing device capabilities for non-mappable devices (such as NAND flash) 43 * backing device capabilities for non-mappable devices (such as NAND flash)
44 * - permits private mappings, copies are taken of the data 44 * - permits private mappings, copies are taken of the data
45 */ 45 */
46struct backing_dev_info mtd_bdi_unmappable = { 46static struct backing_dev_info mtd_bdi_unmappable = {
47 .capabilities = BDI_CAP_MAP_COPY, 47 .capabilities = BDI_CAP_MAP_COPY,
48}; 48};
49 49
@@ -52,7 +52,7 @@ struct backing_dev_info mtd_bdi_unmappable = {
52 * - permits private mappings, copies are taken of the data 52 * - permits private mappings, copies are taken of the data
53 * - permits non-writable shared mappings 53 * - permits non-writable shared mappings
54 */ 54 */
55struct backing_dev_info mtd_bdi_ro_mappable = { 55static struct backing_dev_info mtd_bdi_ro_mappable = {
56 .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT | 56 .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT |
57 BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP), 57 BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP),
58}; 58};
@@ -62,7 +62,7 @@ struct backing_dev_info mtd_bdi_ro_mappable = {
62 * - permits private mappings, copies are taken of the data 62 * - permits private mappings, copies are taken of the data
63 * - permits non-writable shared mappings 63 * - permits non-writable shared mappings
64 */ 64 */
65struct backing_dev_info mtd_bdi_rw_mappable = { 65static struct backing_dev_info mtd_bdi_rw_mappable = {
66 .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT | 66 .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT |
67 BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP | 67 BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP |
68 BDI_CAP_WRITE_MAP), 68 BDI_CAP_WRITE_MAP),
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
new file mode 100644
index 000000000000..237913c5c92c
--- /dev/null
+++ b/drivers/mtd/mtdswap.c
@@ -0,0 +1,1587 @@
1/*
2 * Swap block device support for MTDs
3 * Turns an MTD device into a swap device with block wear leveling
4 *
5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
6 *
7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
8 *
9 * Based on Richard Purdie's earlier implementation in 2007. Background
10 * support and lock-less operation written by Adrian Hunter.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * version 2 as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
24 * 02110-1301 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/mtd/mtd.h>
30#include <linux/mtd/blktrans.h>
31#include <linux/rbtree.h>
32#include <linux/sched.h>
33#include <linux/slab.h>
34#include <linux/vmalloc.h>
35#include <linux/genhd.h>
36#include <linux/swap.h>
37#include <linux/debugfs.h>
38#include <linux/seq_file.h>
39#include <linux/device.h>
40#include <linux/math64.h>
41
42#define MTDSWAP_PREFIX "mtdswap"
43
44/*
45 * The number of free eraseblocks when GC should stop
46 */
47#define CLEAN_BLOCK_THRESHOLD 20
48
49/*
50 * Number of free eraseblocks below which GC can also collect low frag
51 * blocks.
52 */
53#define LOW_FRAG_GC_TRESHOLD 5
54
55/*
56 * Wear level cost amortization. We want to do wear leveling on the background
57 * without disturbing gc too much. This is made by defining max GC frequency.
58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
59 * on the biggest wear difference rather than the biggest dirtiness.
60 *
61 * The lower freq2 should be chosen so that it makes sure the maximum erase
62 * difference will decrease even if a malicious application is deliberately
63 * trying to make erase differences large.
64 */
65#define MAX_ERASE_DIFF 4000
66#define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF
67#define COLLECT_NONDIRTY_FREQ1 6
68#define COLLECT_NONDIRTY_FREQ2 4
69
70#define PAGE_UNDEF UINT_MAX
71#define BLOCK_UNDEF UINT_MAX
72#define BLOCK_ERROR (UINT_MAX - 1)
73#define BLOCK_MAX (UINT_MAX - 2)
74
75#define EBLOCK_BAD (1 << 0)
76#define EBLOCK_NOMAGIC (1 << 1)
77#define EBLOCK_BITFLIP (1 << 2)
78#define EBLOCK_FAILED (1 << 3)
79#define EBLOCK_READERR (1 << 4)
80#define EBLOCK_IDX_SHIFT 5
81
82struct swap_eb {
83 struct rb_node rb;
84 struct rb_root *root;
85
86 unsigned int flags;
87 unsigned int active_count;
88 unsigned int erase_count;
89 unsigned int pad; /* speeds up pointer decremtnt */
90};
91
92#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
93 rb)->erase_count)
94#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
95 rb)->erase_count)
96
97struct mtdswap_tree {
98 struct rb_root root;
99 unsigned int count;
100};
101
102enum {
103 MTDSWAP_CLEAN,
104 MTDSWAP_USED,
105 MTDSWAP_LOWFRAG,
106 MTDSWAP_HIFRAG,
107 MTDSWAP_DIRTY,
108 MTDSWAP_BITFLIP,
109 MTDSWAP_FAILING,
110 MTDSWAP_TREE_CNT,
111};
112
113struct mtdswap_dev {
114 struct mtd_blktrans_dev *mbd_dev;
115 struct mtd_info *mtd;
116 struct device *dev;
117
118 unsigned int *page_data;
119 unsigned int *revmap;
120
121 unsigned int eblks;
122 unsigned int spare_eblks;
123 unsigned int pages_per_eblk;
124 unsigned int max_erase_count;
125 struct swap_eb *eb_data;
126
127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
128
129 unsigned long long sect_read_count;
130 unsigned long long sect_write_count;
131 unsigned long long mtd_write_count;
132 unsigned long long mtd_read_count;
133 unsigned long long discard_count;
134 unsigned long long discard_page_count;
135
136 unsigned int curr_write_pos;
137 struct swap_eb *curr_write;
138
139 char *page_buf;
140 char *oob_buf;
141
142 struct dentry *debugfs_root;
143};
144
145struct mtdswap_oobdata {
146 __le16 magic;
147 __le32 count;
148} __attribute__((packed));
149
150#define MTDSWAP_MAGIC_CLEAN 0x2095
151#define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1)
152#define MTDSWAP_TYPE_CLEAN 0
153#define MTDSWAP_TYPE_DIRTY 1
154#define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata)
155
156#define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */
157#define MTDSWAP_IO_RETRIES 3
158
159enum {
160 MTDSWAP_SCANNED_CLEAN,
161 MTDSWAP_SCANNED_DIRTY,
162 MTDSWAP_SCANNED_BITFLIP,
163 MTDSWAP_SCANNED_BAD,
164};
165
166/*
167 * In the worst case mtdswap_writesect() has allocated the last clean
168 * page from the current block and is then pre-empted by the GC
169 * thread. The thread can consume a full erase block when moving a
170 * block.
171 */
172#define MIN_SPARE_EBLOCKS 2
173#define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1)
174
175#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
176#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
177#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
178#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
179
180#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
181
182static char partitions[128] = "";
183module_param_string(partitions, partitions, sizeof(partitions), 0444);
184MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
185 "partitions=\"1,3,5\"");
186
187static unsigned int spare_eblocks = 10;
188module_param(spare_eblocks, uint, 0444);
189MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
190 "garbage collection (default 10%)");
191
192static bool header; /* false */
193module_param(header, bool, 0444);
194MODULE_PARM_DESC(header,
195 "Include builtin swap header (default 0, without header)");
196
197static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
198
199static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
200{
201 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
202}
203
204static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
205{
206 unsigned int oldidx;
207 struct mtdswap_tree *tp;
208
209 if (eb->root) {
210 tp = container_of(eb->root, struct mtdswap_tree, root);
211 oldidx = tp - &d->trees[0];
212
213 d->trees[oldidx].count--;
214 rb_erase(&eb->rb, eb->root);
215 }
216}
217
218static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
219{
220 struct rb_node **p, *parent = NULL;
221 struct swap_eb *cur;
222
223 p = &root->rb_node;
224 while (*p) {
225 parent = *p;
226 cur = rb_entry(parent, struct swap_eb, rb);
227 if (eb->erase_count > cur->erase_count)
228 p = &(*p)->rb_right;
229 else
230 p = &(*p)->rb_left;
231 }
232
233 rb_link_node(&eb->rb, parent, p);
234 rb_insert_color(&eb->rb, root);
235}
236
237static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
238{
239 struct rb_root *root;
240
241 if (eb->root == &d->trees[idx].root)
242 return;
243
244 mtdswap_eb_detach(d, eb);
245 root = &d->trees[idx].root;
246 __mtdswap_rb_add(root, eb);
247 eb->root = root;
248 d->trees[idx].count++;
249}
250
251static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
252{
253 struct rb_node *p;
254 unsigned int i;
255
256 p = rb_first(root);
257 i = 0;
258 while (i < idx && p) {
259 p = rb_next(p);
260 i++;
261 }
262
263 return p;
264}
265
266static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
267{
268 int ret;
269 loff_t offset;
270
271 d->spare_eblks--;
272 eb->flags |= EBLOCK_BAD;
273 mtdswap_eb_detach(d, eb);
274 eb->root = NULL;
275
276 /* badblocks not supported */
277 if (!d->mtd->block_markbad)
278 return 1;
279
280 offset = mtdswap_eb_offset(d, eb);
281 dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
282 ret = d->mtd->block_markbad(d->mtd, offset);
283
284 if (ret) {
285 dev_warn(d->dev, "Mark block bad failed for block at %08llx "
286 "error %d\n", offset, ret);
287 return ret;
288 }
289
290 return 1;
291
292}
293
294static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
295{
296 unsigned int marked = eb->flags & EBLOCK_FAILED;
297 struct swap_eb *curr_write = d->curr_write;
298
299 eb->flags |= EBLOCK_FAILED;
300 if (curr_write == eb) {
301 d->curr_write = NULL;
302
303 if (!marked && d->curr_write_pos != 0) {
304 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
305 return 0;
306 }
307 }
308
309 return mtdswap_handle_badblock(d, eb);
310}
311
312static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
313 struct mtd_oob_ops *ops)
314{
315 int ret = d->mtd->read_oob(d->mtd, from, ops);
316
317 if (ret == -EUCLEAN)
318 return ret;
319
320 if (ret) {
321 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
322 ret, from);
323 return ret;
324 }
325
326 if (ops->oobretlen < ops->ooblen) {
327 dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
328 "%zd) for block at %08llx\n",
329 ops->oobretlen, ops->ooblen, from);
330 return -EIO;
331 }
332
333 return 0;
334}
335
336static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
337{
338 struct mtdswap_oobdata *data, *data2;
339 int ret;
340 loff_t offset;
341 struct mtd_oob_ops ops;
342
343 offset = mtdswap_eb_offset(d, eb);
344
345 /* Check first if the block is bad. */
346 if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset))
347 return MTDSWAP_SCANNED_BAD;
348
349 ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
350 ops.oobbuf = d->oob_buf;
351 ops.ooboffs = 0;
352 ops.datbuf = NULL;
353 ops.mode = MTD_OOB_AUTO;
354
355 ret = mtdswap_read_oob(d, offset, &ops);
356
357 if (ret && ret != -EUCLEAN)
358 return ret;
359
360 data = (struct mtdswap_oobdata *)d->oob_buf;
361 data2 = (struct mtdswap_oobdata *)
362 (d->oob_buf + d->mtd->ecclayout->oobavail);
363
364 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
365 eb->erase_count = le32_to_cpu(data->count);
366 if (ret == -EUCLEAN)
367 ret = MTDSWAP_SCANNED_BITFLIP;
368 else {
369 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
370 ret = MTDSWAP_SCANNED_DIRTY;
371 else
372 ret = MTDSWAP_SCANNED_CLEAN;
373 }
374 } else {
375 eb->flags |= EBLOCK_NOMAGIC;
376 ret = MTDSWAP_SCANNED_DIRTY;
377 }
378
379 return ret;
380}
381
382static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
383 u16 marker)
384{
385 struct mtdswap_oobdata n;
386 int ret;
387 loff_t offset;
388 struct mtd_oob_ops ops;
389
390 ops.ooboffs = 0;
391 ops.oobbuf = (uint8_t *)&n;
392 ops.mode = MTD_OOB_AUTO;
393 ops.datbuf = NULL;
394
395 if (marker == MTDSWAP_TYPE_CLEAN) {
396 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
397 n.count = cpu_to_le32(eb->erase_count);
398 ops.ooblen = MTDSWAP_OOBSIZE;
399 offset = mtdswap_eb_offset(d, eb);
400 } else {
401 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
402 ops.ooblen = sizeof(n.magic);
403 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
404 }
405
406 ret = d->mtd->write_oob(d->mtd, offset , &ops);
407
408 if (ret) {
409 dev_warn(d->dev, "Write OOB failed for block at %08llx "
410 "error %d\n", offset, ret);
411 if (ret == -EIO || ret == -EBADMSG)
412 mtdswap_handle_write_error(d, eb);
413 return ret;
414 }
415
416 if (ops.oobretlen != ops.ooblen) {
417 dev_warn(d->dev, "Short OOB write for block at %08llx: "
418 "%zd not %zd\n",
419 offset, ops.oobretlen, ops.ooblen);
420 return ret;
421 }
422
423 return 0;
424}
425
426/*
427 * Are there any erase blocks without MAGIC_CLEAN header, presumably
428 * because power was cut off after erase but before header write? We
429 * need to guestimate the erase count.
430 */
431static void mtdswap_check_counts(struct mtdswap_dev *d)
432{
433 struct rb_root hist_root = RB_ROOT;
434 struct rb_node *medrb;
435 struct swap_eb *eb;
436 unsigned int i, cnt, median;
437
438 cnt = 0;
439 for (i = 0; i < d->eblks; i++) {
440 eb = d->eb_data + i;
441
442 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
443 continue;
444
445 __mtdswap_rb_add(&hist_root, eb);
446 cnt++;
447 }
448
449 if (cnt == 0)
450 return;
451
452 medrb = mtdswap_rb_index(&hist_root, cnt / 2);
453 median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
454
455 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
456
457 for (i = 0; i < d->eblks; i++) {
458 eb = d->eb_data + i;
459
460 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
461 eb->erase_count = median;
462
463 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
464 continue;
465
466 rb_erase(&eb->rb, &hist_root);
467 }
468}
469
470static void mtdswap_scan_eblks(struct mtdswap_dev *d)
471{
472 int status;
473 unsigned int i, idx;
474 struct swap_eb *eb;
475
476 for (i = 0; i < d->eblks; i++) {
477 eb = d->eb_data + i;
478
479 status = mtdswap_read_markers(d, eb);
480 if (status < 0)
481 eb->flags |= EBLOCK_READERR;
482 else if (status == MTDSWAP_SCANNED_BAD) {
483 eb->flags |= EBLOCK_BAD;
484 continue;
485 }
486
487 switch (status) {
488 case MTDSWAP_SCANNED_CLEAN:
489 idx = MTDSWAP_CLEAN;
490 break;
491 case MTDSWAP_SCANNED_DIRTY:
492 case MTDSWAP_SCANNED_BITFLIP:
493 idx = MTDSWAP_DIRTY;
494 break;
495 default:
496 idx = MTDSWAP_FAILING;
497 }
498
499 eb->flags |= (idx << EBLOCK_IDX_SHIFT);
500 }
501
502 mtdswap_check_counts(d);
503
504 for (i = 0; i < d->eblks; i++) {
505 eb = d->eb_data + i;
506
507 if (eb->flags & EBLOCK_BAD)
508 continue;
509
510 idx = eb->flags >> EBLOCK_IDX_SHIFT;
511 mtdswap_rb_add(d, eb, idx);
512 }
513}
514
515/*
516 * Place eblk into a tree corresponding to its number of active blocks
517 * it contains.
518 */
519static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
520{
521 unsigned int weight = eb->active_count;
522 unsigned int maxweight = d->pages_per_eblk;
523
524 if (eb == d->curr_write)
525 return;
526
527 if (eb->flags & EBLOCK_BITFLIP)
528 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
529 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
530 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
531 if (weight == maxweight)
532 mtdswap_rb_add(d, eb, MTDSWAP_USED);
533 else if (weight == 0)
534 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
535 else if (weight > (maxweight/2))
536 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
537 else
538 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
539}
540
541
542static void mtdswap_erase_callback(struct erase_info *done)
543{
544 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
545 wake_up(wait_q);
546}
547
548static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
549{
550 struct mtd_info *mtd = d->mtd;
551 struct erase_info erase;
552 wait_queue_head_t wq;
553 unsigned int retries = 0;
554 int ret;
555
556 eb->erase_count++;
557 if (eb->erase_count > d->max_erase_count)
558 d->max_erase_count = eb->erase_count;
559
560retry:
561 init_waitqueue_head(&wq);
562 memset(&erase, 0, sizeof(struct erase_info));
563
564 erase.mtd = mtd;
565 erase.callback = mtdswap_erase_callback;
566 erase.addr = mtdswap_eb_offset(d, eb);
567 erase.len = mtd->erasesize;
568 erase.priv = (u_long)&wq;
569
570 ret = mtd->erase(mtd, &erase);
571 if (ret) {
572 if (retries++ < MTDSWAP_ERASE_RETRIES) {
573 dev_warn(d->dev,
574 "erase of erase block %#llx on %s failed",
575 erase.addr, mtd->name);
576 yield();
577 goto retry;
578 }
579
580 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
581 erase.addr, mtd->name);
582
583 mtdswap_handle_badblock(d, eb);
584 return -EIO;
585 }
586
587 ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE ||
588 erase.state == MTD_ERASE_FAILED);
589 if (ret) {
590 dev_err(d->dev, "Interrupted erase block %#llx erassure on %s",
591 erase.addr, mtd->name);
592 return -EINTR;
593 }
594
595 if (erase.state == MTD_ERASE_FAILED) {
596 if (retries++ < MTDSWAP_ERASE_RETRIES) {
597 dev_warn(d->dev,
598 "erase of erase block %#llx on %s failed",
599 erase.addr, mtd->name);
600 yield();
601 goto retry;
602 }
603
604 mtdswap_handle_badblock(d, eb);
605 return -EIO;
606 }
607
608 return 0;
609}
610
611static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
612 unsigned int *block)
613{
614 int ret;
615 struct swap_eb *old_eb = d->curr_write;
616 struct rb_root *clean_root;
617 struct swap_eb *eb;
618
619 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
620 do {
621 if (TREE_EMPTY(d, CLEAN))
622 return -ENOSPC;
623
624 clean_root = TREE_ROOT(d, CLEAN);
625 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
626 rb_erase(&eb->rb, clean_root);
627 eb->root = NULL;
628 TREE_COUNT(d, CLEAN)--;
629
630 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
631 } while (ret == -EIO || ret == -EBADMSG);
632
633 if (ret)
634 return ret;
635
636 d->curr_write_pos = 0;
637 d->curr_write = eb;
638 if (old_eb)
639 mtdswap_store_eb(d, old_eb);
640 }
641
642 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
643 d->curr_write_pos;
644
645 d->curr_write->active_count++;
646 d->revmap[*block] = page;
647 d->curr_write_pos++;
648
649 return 0;
650}
651
652static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
653{
654 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
655 d->pages_per_eblk - d->curr_write_pos;
656}
657
658static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
659{
660 return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
661}
662
663static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
664 unsigned int page, unsigned int *bp, int gc_context)
665{
666 struct mtd_info *mtd = d->mtd;
667 struct swap_eb *eb;
668 size_t retlen;
669 loff_t writepos;
670 int ret;
671
672retry:
673 if (!gc_context)
674 while (!mtdswap_enough_free_pages(d))
675 if (mtdswap_gc(d, 0) > 0)
676 return -ENOSPC;
677
678 ret = mtdswap_map_free_block(d, page, bp);
679 eb = d->eb_data + (*bp / d->pages_per_eblk);
680
681 if (ret == -EIO || ret == -EBADMSG) {
682 d->curr_write = NULL;
683 eb->active_count--;
684 d->revmap[*bp] = PAGE_UNDEF;
685 goto retry;
686 }
687
688 if (ret < 0)
689 return ret;
690
691 writepos = (loff_t)*bp << PAGE_SHIFT;
692 ret = mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf);
693 if (ret == -EIO || ret == -EBADMSG) {
694 d->curr_write_pos--;
695 eb->active_count--;
696 d->revmap[*bp] = PAGE_UNDEF;
697 mtdswap_handle_write_error(d, eb);
698 goto retry;
699 }
700
701 if (ret < 0) {
702 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
703 ret, retlen);
704 goto err;
705 }
706
707 if (retlen != PAGE_SIZE) {
708 dev_err(d->dev, "Short write to MTD device: %zd written",
709 retlen);
710 ret = -EIO;
711 goto err;
712 }
713
714 return ret;
715
716err:
717 d->curr_write_pos--;
718 eb->active_count--;
719 d->revmap[*bp] = PAGE_UNDEF;
720
721 return ret;
722}
723
724static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
725 unsigned int *newblock)
726{
727 struct mtd_info *mtd = d->mtd;
728 struct swap_eb *eb, *oldeb;
729 int ret;
730 size_t retlen;
731 unsigned int page, retries;
732 loff_t readpos;
733
734 page = d->revmap[oldblock];
735 readpos = (loff_t) oldblock << PAGE_SHIFT;
736 retries = 0;
737
738retry:
739 ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
740
741 if (ret < 0 && ret != -EUCLEAN) {
742 oldeb = d->eb_data + oldblock / d->pages_per_eblk;
743 oldeb->flags |= EBLOCK_READERR;
744
745 dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
746 oldblock);
747 retries++;
748 if (retries < MTDSWAP_IO_RETRIES)
749 goto retry;
750
751 goto read_error;
752 }
753
754 if (retlen != PAGE_SIZE) {
755 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
756 oldblock);
757 ret = -EIO;
758 goto read_error;
759 }
760
761 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
762 if (ret < 0) {
763 d->page_data[page] = BLOCK_ERROR;
764 dev_err(d->dev, "Write error: %d\n", ret);
765 return ret;
766 }
767
768 eb = d->eb_data + *newblock / d->pages_per_eblk;
769 d->page_data[page] = *newblock;
770 d->revmap[oldblock] = PAGE_UNDEF;
771 eb = d->eb_data + oldblock / d->pages_per_eblk;
772 eb->active_count--;
773
774 return 0;
775
776read_error:
777 d->page_data[page] = BLOCK_ERROR;
778 d->revmap[oldblock] = PAGE_UNDEF;
779 return ret;
780}
781
782static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
783{
784 unsigned int i, block, eblk_base, newblock;
785 int ret, errcode;
786
787 errcode = 0;
788 eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
789
790 for (i = 0; i < d->pages_per_eblk; i++) {
791 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
792 return -ENOSPC;
793
794 block = eblk_base + i;
795 if (d->revmap[block] == PAGE_UNDEF)
796 continue;
797
798 ret = mtdswap_move_block(d, block, &newblock);
799 if (ret < 0 && !errcode)
800 errcode = ret;
801 }
802
803 return errcode;
804}
805
806static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
807{
808 int idx, stopat;
809
810 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD)
811 stopat = MTDSWAP_LOWFRAG;
812 else
813 stopat = MTDSWAP_HIFRAG;
814
815 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
816 if (d->trees[idx].root.rb_node != NULL)
817 return idx;
818
819 return -1;
820}
821
822static int mtdswap_wlfreq(unsigned int maxdiff)
823{
824 unsigned int h, x, y, dist, base;
825
826 /*
827 * Calculate linear ramp down from f1 to f2 when maxdiff goes from
828 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar
829 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
830 */
831
832 dist = maxdiff - MAX_ERASE_DIFF;
833 if (dist > COLLECT_NONDIRTY_BASE)
834 dist = COLLECT_NONDIRTY_BASE;
835
836 /*
837 * Modelling the slop as right angular triangle with base
838 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
839 * equal to the ratio h/base.
840 */
841 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
842 base = COLLECT_NONDIRTY_BASE;
843
844 x = dist - base;
845 y = (x * h + base / 2) / base;
846
847 return COLLECT_NONDIRTY_FREQ2 + y;
848}
849
850static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
851{
852 static unsigned int pick_cnt;
853 unsigned int i, idx = -1, wear, max;
854 struct rb_root *root;
855
856 max = 0;
857 for (i = 0; i <= MTDSWAP_DIRTY; i++) {
858 root = &d->trees[i].root;
859 if (root->rb_node == NULL)
860 continue;
861
862 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
863 if (wear > max) {
864 max = wear;
865 idx = i;
866 }
867 }
868
869 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
870 pick_cnt = 0;
871 return idx;
872 }
873
874 pick_cnt++;
875 return -1;
876}
877
878static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
879 unsigned int background)
880{
881 int idx;
882
883 if (TREE_NONEMPTY(d, FAILING) &&
884 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
885 return MTDSWAP_FAILING;
886
887 idx = mtdswap_choose_wl_tree(d);
888 if (idx >= MTDSWAP_CLEAN)
889 return idx;
890
891 return __mtdswap_choose_gc_tree(d);
892}
893
894static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
895 unsigned int background)
896{
897 struct rb_root *rp = NULL;
898 struct swap_eb *eb = NULL;
899 int idx;
900
901 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
902 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
903 return NULL;
904
905 idx = mtdswap_choose_gc_tree(d, background);
906 if (idx < 0)
907 return NULL;
908
909 rp = &d->trees[idx].root;
910 eb = rb_entry(rb_first(rp), struct swap_eb, rb);
911
912 rb_erase(&eb->rb, rp);
913 eb->root = NULL;
914 d->trees[idx].count--;
915 return eb;
916}
917
918static unsigned int mtdswap_test_patt(unsigned int i)
919{
920 return i % 2 ? 0x55555555 : 0xAAAAAAAA;
921}
922
923static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
924 struct swap_eb *eb)
925{
926 struct mtd_info *mtd = d->mtd;
927 unsigned int test, i, j, patt, mtd_pages;
928 loff_t base, pos;
929 unsigned int *p1 = (unsigned int *)d->page_buf;
930 unsigned char *p2 = (unsigned char *)d->oob_buf;
931 struct mtd_oob_ops ops;
932 int ret;
933
934 ops.mode = MTD_OOB_AUTO;
935 ops.len = mtd->writesize;
936 ops.ooblen = mtd->ecclayout->oobavail;
937 ops.ooboffs = 0;
938 ops.datbuf = d->page_buf;
939 ops.oobbuf = d->oob_buf;
940 base = mtdswap_eb_offset(d, eb);
941 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
942
943 for (test = 0; test < 2; test++) {
944 pos = base;
945 for (i = 0; i < mtd_pages; i++) {
946 patt = mtdswap_test_patt(test + i);
947 memset(d->page_buf, patt, mtd->writesize);
948 memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
949 ret = mtd->write_oob(mtd, pos, &ops);
950 if (ret)
951 goto error;
952
953 pos += mtd->writesize;
954 }
955
956 pos = base;
957 for (i = 0; i < mtd_pages; i++) {
958 ret = mtd->read_oob(mtd, pos, &ops);
959 if (ret)
960 goto error;
961
962 patt = mtdswap_test_patt(test + i);
963 for (j = 0; j < mtd->writesize/sizeof(int); j++)
964 if (p1[j] != patt)
965 goto error;
966
967 for (j = 0; j < mtd->ecclayout->oobavail; j++)
968 if (p2[j] != (unsigned char)patt)
969 goto error;
970
971 pos += mtd->writesize;
972 }
973
974 ret = mtdswap_erase_block(d, eb);
975 if (ret)
976 goto error;
977 }
978
979 eb->flags &= ~EBLOCK_READERR;
980 return 1;
981
982error:
983 mtdswap_handle_badblock(d, eb);
984 return 0;
985}
986
987static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
988{
989 struct swap_eb *eb;
990 int ret;
991
992 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
993 return 1;
994
995 eb = mtdswap_pick_gc_eblk(d, background);
996 if (!eb)
997 return 1;
998
999 ret = mtdswap_gc_eblock(d, eb);
1000 if (ret == -ENOSPC)
1001 return 1;
1002
1003 if (eb->flags & EBLOCK_FAILED) {
1004 mtdswap_handle_badblock(d, eb);
1005 return 0;
1006 }
1007
1008 eb->flags &= ~EBLOCK_BITFLIP;
1009 ret = mtdswap_erase_block(d, eb);
1010 if ((eb->flags & EBLOCK_READERR) &&
1011 (ret || !mtdswap_eblk_passes(d, eb)))
1012 return 0;
1013
1014 if (ret == 0)
1015 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
1016
1017 if (ret == 0)
1018 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
1019 else if (ret != -EIO && ret != -EBADMSG)
1020 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
1021
1022 return 0;
1023}
1024
1025static void mtdswap_background(struct mtd_blktrans_dev *dev)
1026{
1027 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1028 int ret;
1029
1030 while (1) {
1031 ret = mtdswap_gc(d, 1);
1032 if (ret || mtd_blktrans_cease_background(dev))
1033 return;
1034 }
1035}
1036
1037static void mtdswap_cleanup(struct mtdswap_dev *d)
1038{
1039 vfree(d->eb_data);
1040 vfree(d->revmap);
1041 vfree(d->page_data);
1042 kfree(d->oob_buf);
1043 kfree(d->page_buf);
1044}
1045
1046static int mtdswap_flush(struct mtd_blktrans_dev *dev)
1047{
1048 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1049
1050 if (d->mtd->sync)
1051 d->mtd->sync(d->mtd);
1052 return 0;
1053}
1054
1055static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1056{
1057 loff_t offset;
1058 unsigned int badcnt;
1059
1060 badcnt = 0;
1061
1062 if (mtd->block_isbad)
1063 for (offset = 0; offset < size; offset += mtd->erasesize)
1064 if (mtd->block_isbad(mtd, offset))
1065 badcnt++;
1066
1067 return badcnt;
1068}
1069
1070static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1071 unsigned long page, char *buf)
1072{
1073 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1074 unsigned int newblock, mapped;
1075 struct swap_eb *eb;
1076 int ret;
1077
1078 d->sect_write_count++;
1079
1080 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1081 return -ENOSPC;
1082
1083 if (header) {
1084 /* Ignore writes to the header page */
1085 if (unlikely(page == 0))
1086 return 0;
1087
1088 page--;
1089 }
1090
1091 mapped = d->page_data[page];
1092 if (mapped <= BLOCK_MAX) {
1093 eb = d->eb_data + (mapped / d->pages_per_eblk);
1094 eb->active_count--;
1095 mtdswap_store_eb(d, eb);
1096 d->page_data[page] = BLOCK_UNDEF;
1097 d->revmap[mapped] = PAGE_UNDEF;
1098 }
1099
1100 ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1101 d->mtd_write_count++;
1102
1103 if (ret < 0)
1104 return ret;
1105
1106 eb = d->eb_data + (newblock / d->pages_per_eblk);
1107 d->page_data[page] = newblock;
1108
1109 return 0;
1110}
1111
1112/* Provide a dummy swap header for the kernel */
1113static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1114{
1115 union swap_header *hd = (union swap_header *)(buf);
1116
1117 memset(buf, 0, PAGE_SIZE - 10);
1118
1119 hd->info.version = 1;
1120 hd->info.last_page = d->mbd_dev->size - 1;
1121 hd->info.nr_badpages = 0;
1122
1123 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1124
1125 return 0;
1126}
1127
1128static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1129 unsigned long page, char *buf)
1130{
1131 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1132 struct mtd_info *mtd = d->mtd;
1133 unsigned int realblock, retries;
1134 loff_t readpos;
1135 struct swap_eb *eb;
1136 size_t retlen;
1137 int ret;
1138
1139 d->sect_read_count++;
1140
1141 if (header) {
1142 if (unlikely(page == 0))
1143 return mtdswap_auto_header(d, buf);
1144
1145 page--;
1146 }
1147
1148 realblock = d->page_data[page];
1149 if (realblock > BLOCK_MAX) {
1150 memset(buf, 0x0, PAGE_SIZE);
1151 if (realblock == BLOCK_UNDEF)
1152 return 0;
1153 else
1154 return -EIO;
1155 }
1156
1157 eb = d->eb_data + (realblock / d->pages_per_eblk);
1158 BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1159
1160 readpos = (loff_t)realblock << PAGE_SHIFT;
1161 retries = 0;
1162
1163retry:
1164 ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1165
1166 d->mtd_read_count++;
1167 if (ret == -EUCLEAN) {
1168 eb->flags |= EBLOCK_BITFLIP;
1169 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1170 ret = 0;
1171 }
1172
1173 if (ret < 0) {
1174 dev_err(d->dev, "Read error %d\n", ret);
1175 eb->flags |= EBLOCK_READERR;
1176 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1177 retries++;
1178 if (retries < MTDSWAP_IO_RETRIES)
1179 goto retry;
1180
1181 return ret;
1182 }
1183
1184 if (retlen != PAGE_SIZE) {
1185 dev_err(d->dev, "Short read %zd\n", retlen);
1186 return -EIO;
1187 }
1188
1189 return 0;
1190}
1191
1192static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1193 unsigned nr_pages)
1194{
1195 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1196 unsigned long page;
1197 struct swap_eb *eb;
1198 unsigned int mapped;
1199
1200 d->discard_count++;
1201
1202 for (page = first; page < first + nr_pages; page++) {
1203 mapped = d->page_data[page];
1204 if (mapped <= BLOCK_MAX) {
1205 eb = d->eb_data + (mapped / d->pages_per_eblk);
1206 eb->active_count--;
1207 mtdswap_store_eb(d, eb);
1208 d->page_data[page] = BLOCK_UNDEF;
1209 d->revmap[mapped] = PAGE_UNDEF;
1210 d->discard_page_count++;
1211 } else if (mapped == BLOCK_ERROR) {
1212 d->page_data[page] = BLOCK_UNDEF;
1213 d->discard_page_count++;
1214 }
1215 }
1216
1217 return 0;
1218}
1219
1220static int mtdswap_show(struct seq_file *s, void *data)
1221{
1222 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1223 unsigned long sum;
1224 unsigned int count[MTDSWAP_TREE_CNT];
1225 unsigned int min[MTDSWAP_TREE_CNT];
1226 unsigned int max[MTDSWAP_TREE_CNT];
1227 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1228 uint64_t use_size;
1229 char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip",
1230 "failing"};
1231
1232 mutex_lock(&d->mbd_dev->lock);
1233
1234 for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1235 struct rb_root *root = &d->trees[i].root;
1236
1237 if (root->rb_node) {
1238 count[i] = d->trees[i].count;
1239 min[i] = rb_entry(rb_first(root), struct swap_eb,
1240 rb)->erase_count;
1241 max[i] = rb_entry(rb_last(root), struct swap_eb,
1242 rb)->erase_count;
1243 } else
1244 count[i] = 0;
1245 }
1246
1247 if (d->curr_write) {
1248 cw = 1;
1249 cwp = d->curr_write_pos;
1250 cwecount = d->curr_write->erase_count;
1251 }
1252
1253 sum = 0;
1254 for (i = 0; i < d->eblks; i++)
1255 sum += d->eb_data[i].erase_count;
1256
1257 use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1258 bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1259
1260 mapped = 0;
1261 pages = d->mbd_dev->size;
1262 for (i = 0; i < pages; i++)
1263 if (d->page_data[i] != BLOCK_UNDEF)
1264 mapped++;
1265
1266 mutex_unlock(&d->mbd_dev->lock);
1267
1268 for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1269 if (!count[i])
1270 continue;
1271
1272 if (min[i] != max[i])
1273 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1274 "max %d times\n",
1275 name[i], count[i], min[i], max[i]);
1276 else
1277 seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1278 "times\n", name[i], count[i], min[i]);
1279 }
1280
1281 if (bb_cnt)
1282 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1283
1284 if (cw)
1285 seq_printf(s, "current erase block: %u pages used, %u free, "
1286 "erased %u times\n",
1287 cwp, d->pages_per_eblk - cwp, cwecount);
1288
1289 seq_printf(s, "total erasures: %lu\n", sum);
1290
1291 seq_printf(s, "\n");
1292
1293 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1294 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1295 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1296 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1297 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1298 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1299
1300 seq_printf(s, "\n");
1301 seq_printf(s, "total pages: %u\n", pages);
1302 seq_printf(s, "pages mapped: %u\n", mapped);
1303
1304 return 0;
1305}
1306
1307static int mtdswap_open(struct inode *inode, struct file *file)
1308{
1309 return single_open(file, mtdswap_show, inode->i_private);
1310}
1311
1312static const struct file_operations mtdswap_fops = {
1313 .open = mtdswap_open,
1314 .read = seq_read,
1315 .llseek = seq_lseek,
1316 .release = single_release,
1317};
1318
1319static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1320{
1321 struct gendisk *gd = d->mbd_dev->disk;
1322 struct device *dev = disk_to_dev(gd);
1323
1324 struct dentry *root;
1325 struct dentry *dent;
1326
1327 root = debugfs_create_dir(gd->disk_name, NULL);
1328 if (IS_ERR(root))
1329 return 0;
1330
1331 if (!root) {
1332 dev_err(dev, "failed to initialize debugfs\n");
1333 return -1;
1334 }
1335
1336 d->debugfs_root = root;
1337
1338 dent = debugfs_create_file("stats", S_IRUSR, root, d,
1339 &mtdswap_fops);
1340 if (!dent) {
1341 dev_err(d->dev, "debugfs_create_file failed\n");
1342 debugfs_remove_recursive(root);
1343 d->debugfs_root = NULL;
1344 return -1;
1345 }
1346
1347 return 0;
1348}
1349
1350static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1351 unsigned int spare_cnt)
1352{
1353 struct mtd_info *mtd = d->mbd_dev->mtd;
1354 unsigned int i, eblk_bytes, pages, blocks;
1355 int ret = -ENOMEM;
1356
1357 d->mtd = mtd;
1358 d->eblks = eblocks;
1359 d->spare_eblks = spare_cnt;
1360 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1361
1362 pages = d->mbd_dev->size;
1363 blocks = eblocks * d->pages_per_eblk;
1364
1365 for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1366 d->trees[i].root = RB_ROOT;
1367
1368 d->page_data = vmalloc(sizeof(int)*pages);
1369 if (!d->page_data)
1370 goto page_data_fail;
1371
1372 d->revmap = vmalloc(sizeof(int)*blocks);
1373 if (!d->revmap)
1374 goto revmap_fail;
1375
1376 eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1377 d->eb_data = vmalloc(eblk_bytes);
1378 if (!d->eb_data)
1379 goto eb_data_fail;
1380
1381 memset(d->eb_data, 0, eblk_bytes);
1382 for (i = 0; i < pages; i++)
1383 d->page_data[i] = BLOCK_UNDEF;
1384
1385 for (i = 0; i < blocks; i++)
1386 d->revmap[i] = PAGE_UNDEF;
1387
1388 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1389 if (!d->page_buf)
1390 goto page_buf_fail;
1391
1392 d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
1393 if (!d->oob_buf)
1394 goto oob_buf_fail;
1395
1396 mtdswap_scan_eblks(d);
1397
1398 return 0;
1399
1400oob_buf_fail:
1401 kfree(d->page_buf);
1402page_buf_fail:
1403 vfree(d->eb_data);
1404eb_data_fail:
1405 vfree(d->revmap);
1406revmap_fail:
1407 vfree(d->page_data);
1408page_data_fail:
1409 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1410 return ret;
1411}
1412
1413static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1414{
1415 struct mtdswap_dev *d;
1416 struct mtd_blktrans_dev *mbd_dev;
1417 char *parts;
1418 char *this_opt;
1419 unsigned long part;
1420 unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1421 uint64_t swap_size, use_size, size_limit;
1422 struct nand_ecclayout *oinfo;
1423 int ret;
1424
1425 parts = &partitions[0];
1426 if (!*parts)
1427 return;
1428
1429 while ((this_opt = strsep(&parts, ",")) != NULL) {
1430 if (strict_strtoul(this_opt, 0, &part) < 0)
1431 return;
1432
1433 if (mtd->index == part)
1434 break;
1435 }
1436
1437 if (mtd->index != part)
1438 return;
1439
1440 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1441 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1442 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1443 return;
1444 }
1445
1446 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1447 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1448 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1449 return;
1450 }
1451
1452 oinfo = mtd->ecclayout;
1453 if (!mtd->oobsize || !oinfo || oinfo->oobavail < MTDSWAP_OOBSIZE) {
1454 printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1455 "%d available, %lu needed.\n",
1456 MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
1457 return;
1458 }
1459
1460 if (spare_eblocks > 100)
1461 spare_eblocks = 100;
1462
1463 use_size = mtd->size;
1464 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1465
1466 if (mtd->size > size_limit) {
1467 printk(KERN_WARNING "%s: Device too large. Limiting size to "
1468 "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1469 use_size = size_limit;
1470 }
1471
1472 eblocks = mtd_div_by_eb(use_size, mtd);
1473 use_size = eblocks * mtd->erasesize;
1474 bad_blocks = mtdswap_badblocks(mtd, use_size);
1475 eavailable = eblocks - bad_blocks;
1476
1477 if (eavailable < MIN_ERASE_BLOCKS) {
1478 printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1479 "%d needed\n", MTDSWAP_PREFIX, eavailable,
1480 MIN_ERASE_BLOCKS);
1481 return;
1482 }
1483
1484 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1485
1486 if (spare_cnt < MIN_SPARE_EBLOCKS)
1487 spare_cnt = MIN_SPARE_EBLOCKS;
1488
1489 if (spare_cnt > eavailable - 1)
1490 spare_cnt = eavailable - 1;
1491
1492 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1493 (header ? PAGE_SIZE : 0);
1494
1495 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1496 "%u spare, %u bad blocks\n",
1497 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1498
1499 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1500 if (!d)
1501 return;
1502
1503 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1504 if (!mbd_dev) {
1505 kfree(d);
1506 return;
1507 }
1508
1509 d->mbd_dev = mbd_dev;
1510 mbd_dev->priv = d;
1511
1512 mbd_dev->mtd = mtd;
1513 mbd_dev->devnum = mtd->index;
1514 mbd_dev->size = swap_size >> PAGE_SHIFT;
1515 mbd_dev->tr = tr;
1516
1517 if (!(mtd->flags & MTD_WRITEABLE))
1518 mbd_dev->readonly = 1;
1519
1520 if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1521 goto init_failed;
1522
1523 if (add_mtd_blktrans_dev(mbd_dev) < 0)
1524 goto cleanup;
1525
1526 d->dev = disk_to_dev(mbd_dev->disk);
1527
1528 ret = mtdswap_add_debugfs(d);
1529 if (ret < 0)
1530 goto debugfs_failed;
1531
1532 return;
1533
1534debugfs_failed:
1535 del_mtd_blktrans_dev(mbd_dev);
1536
1537cleanup:
1538 mtdswap_cleanup(d);
1539
1540init_failed:
1541 kfree(mbd_dev);
1542 kfree(d);
1543}
1544
1545static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1546{
1547 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1548
1549 debugfs_remove_recursive(d->debugfs_root);
1550 del_mtd_blktrans_dev(dev);
1551 mtdswap_cleanup(d);
1552 kfree(d);
1553}
1554
1555static struct mtd_blktrans_ops mtdswap_ops = {
1556 .name = "mtdswap",
1557 .major = 0,
1558 .part_bits = 0,
1559 .blksize = PAGE_SIZE,
1560 .flush = mtdswap_flush,
1561 .readsect = mtdswap_readsect,
1562 .writesect = mtdswap_writesect,
1563 .discard = mtdswap_discard,
1564 .background = mtdswap_background,
1565 .add_mtd = mtdswap_add_mtd,
1566 .remove_dev = mtdswap_remove_dev,
1567 .owner = THIS_MODULE,
1568};
1569
1570static int __init mtdswap_modinit(void)
1571{
1572 return register_mtd_blktrans(&mtdswap_ops);
1573}
1574
1575static void __exit mtdswap_modexit(void)
1576{
1577 deregister_mtd_blktrans(&mtdswap_ops);
1578}
1579
1580module_init(mtdswap_modinit);
1581module_exit(mtdswap_modexit);
1582
1583
1584MODULE_LICENSE("GPL");
1585MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1586MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1587 "swap space");
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 4f6c06f16328..a92054e945e1 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -31,6 +31,21 @@ config MTD_NAND_VERIFY_WRITE
31 device thinks the write was successful, a bit could have been 31 device thinks the write was successful, a bit could have been
32 flipped accidentally due to device wear or something else. 32 flipped accidentally due to device wear or something else.
33 33
34config MTD_NAND_BCH
35 tristate
36 select BCH
37 depends on MTD_NAND_ECC_BCH
38 default MTD_NAND
39
40config MTD_NAND_ECC_BCH
41 bool "Support software BCH ECC"
42 default n
43 help
44 This enables support for software BCH error correction. Binary BCH
45 codes are more powerful and cpu intensive than traditional Hamming
46 ECC codes. They are used with NAND devices requiring more than 1 bit
47 of error correction.
48
34config MTD_SM_COMMON 49config MTD_SM_COMMON
35 tristate 50 tristate
36 default n 51 default n
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 8ad6faec72cb..5745d831168e 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_MTD_NAND) += nand.o 5obj-$(CONFIG_MTD_NAND) += nand.o
6obj-$(CONFIG_MTD_NAND_ECC) += nand_ecc.o 6obj-$(CONFIG_MTD_NAND_ECC) += nand_ecc.o
7obj-$(CONFIG_MTD_NAND_BCH) += nand_bch.o
7obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o 8obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o
8obj-$(CONFIG_MTD_SM_COMMON) += sm_common.o 9obj-$(CONFIG_MTD_SM_COMMON) += sm_common.o
9 10
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index ccce0f03b5dc..6fae04b3fc6d 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -48,6 +48,9 @@
48#define no_ecc 0 48#define no_ecc 0
49#endif 49#endif
50 50
51static int use_dma = 1;
52module_param(use_dma, int, 0);
53
51static int on_flash_bbt = 0; 54static int on_flash_bbt = 0;
52module_param(on_flash_bbt, int, 0); 55module_param(on_flash_bbt, int, 0);
53 56
@@ -89,11 +92,20 @@ struct atmel_nand_host {
89 struct nand_chip nand_chip; 92 struct nand_chip nand_chip;
90 struct mtd_info mtd; 93 struct mtd_info mtd;
91 void __iomem *io_base; 94 void __iomem *io_base;
95 dma_addr_t io_phys;
92 struct atmel_nand_data *board; 96 struct atmel_nand_data *board;
93 struct device *dev; 97 struct device *dev;
94 void __iomem *ecc; 98 void __iomem *ecc;
99
100 struct completion comp;
101 struct dma_chan *dma_chan;
95}; 102};
96 103
104static int cpu_has_dma(void)
105{
106 return cpu_is_at91sam9rl() || cpu_is_at91sam9g45();
107}
108
97/* 109/*
98 * Enable NAND. 110 * Enable NAND.
99 */ 111 */
@@ -150,7 +162,7 @@ static int atmel_nand_device_ready(struct mtd_info *mtd)
150/* 162/*
151 * Minimal-overhead PIO for data access. 163 * Minimal-overhead PIO for data access.
152 */ 164 */
153static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len) 165static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
154{ 166{
155 struct nand_chip *nand_chip = mtd->priv; 167 struct nand_chip *nand_chip = mtd->priv;
156 168
@@ -164,7 +176,7 @@ static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
164 __raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2); 176 __raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
165} 177}
166 178
167static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len) 179static void atmel_write_buf8(struct mtd_info *mtd, const u8 *buf, int len)
168{ 180{
169 struct nand_chip *nand_chip = mtd->priv; 181 struct nand_chip *nand_chip = mtd->priv;
170 182
@@ -178,6 +190,121 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
178 __raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2); 190 __raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
179} 191}
180 192
193static void dma_complete_func(void *completion)
194{
195 complete(completion);
196}
197
198static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
199 int is_read)
200{
201 struct dma_device *dma_dev;
202 enum dma_ctrl_flags flags;
203 dma_addr_t dma_src_addr, dma_dst_addr, phys_addr;
204 struct dma_async_tx_descriptor *tx = NULL;
205 dma_cookie_t cookie;
206 struct nand_chip *chip = mtd->priv;
207 struct atmel_nand_host *host = chip->priv;
208 void *p = buf;
209 int err = -EIO;
210 enum dma_data_direction dir = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
211
212 if (buf >= high_memory) {
213 struct page *pg;
214
215 if (((size_t)buf & PAGE_MASK) !=
216 ((size_t)(buf + len - 1) & PAGE_MASK)) {
217 dev_warn(host->dev, "Buffer not fit in one page\n");
218 goto err_buf;
219 }
220
221 pg = vmalloc_to_page(buf);
222 if (pg == 0) {
223 dev_err(host->dev, "Failed to vmalloc_to_page\n");
224 goto err_buf;
225 }
226 p = page_address(pg) + ((size_t)buf & ~PAGE_MASK);
227 }
228
229 dma_dev = host->dma_chan->device;
230
231 flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
232 DMA_COMPL_SKIP_DEST_UNMAP;
233
234 phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
235 if (dma_mapping_error(dma_dev->dev, phys_addr)) {
236 dev_err(host->dev, "Failed to dma_map_single\n");
237 goto err_buf;
238 }
239
240 if (is_read) {
241 dma_src_addr = host->io_phys;
242 dma_dst_addr = phys_addr;
243 } else {
244 dma_src_addr = phys_addr;
245 dma_dst_addr = host->io_phys;
246 }
247
248 tx = dma_dev->device_prep_dma_memcpy(host->dma_chan, dma_dst_addr,
249 dma_src_addr, len, flags);
250 if (!tx) {
251 dev_err(host->dev, "Failed to prepare DMA memcpy\n");
252 goto err_dma;
253 }
254
255 init_completion(&host->comp);
256 tx->callback = dma_complete_func;
257 tx->callback_param = &host->comp;
258
259 cookie = tx->tx_submit(tx);
260 if (dma_submit_error(cookie)) {
261 dev_err(host->dev, "Failed to do DMA tx_submit\n");
262 goto err_dma;
263 }
264
265 dma_async_issue_pending(host->dma_chan);
266 wait_for_completion(&host->comp);
267
268 err = 0;
269
270err_dma:
271 dma_unmap_single(dma_dev->dev, phys_addr, len, dir);
272err_buf:
273 if (err != 0)
274 dev_warn(host->dev, "Fall back to CPU I/O\n");
275 return err;
276}
277
278static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len)
279{
280 struct nand_chip *chip = mtd->priv;
281 struct atmel_nand_host *host = chip->priv;
282
283 if (use_dma && len >= mtd->oobsize)
284 if (atmel_nand_dma_op(mtd, buf, len, 1) == 0)
285 return;
286
287 if (host->board->bus_width_16)
288 atmel_read_buf16(mtd, buf, len);
289 else
290 atmel_read_buf8(mtd, buf, len);
291}
292
293static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
294{
295 struct nand_chip *chip = mtd->priv;
296 struct atmel_nand_host *host = chip->priv;
297
298 if (use_dma && len >= mtd->oobsize)
299 if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) == 0)
300 return;
301
302 if (host->board->bus_width_16)
303 atmel_write_buf16(mtd, buf, len);
304 else
305 atmel_write_buf8(mtd, buf, len);
306}
307
181/* 308/*
182 * Calculate HW ECC 309 * Calculate HW ECC
183 * 310 *
@@ -398,6 +525,8 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
398 return -ENOMEM; 525 return -ENOMEM;
399 } 526 }
400 527
528 host->io_phys = (dma_addr_t)mem->start;
529
401 host->io_base = ioremap(mem->start, mem->end - mem->start + 1); 530 host->io_base = ioremap(mem->start, mem->end - mem->start + 1);
402 if (host->io_base == NULL) { 531 if (host->io_base == NULL) {
403 printk(KERN_ERR "atmel_nand: ioremap failed\n"); 532 printk(KERN_ERR "atmel_nand: ioremap failed\n");
@@ -448,14 +577,11 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
448 577
449 nand_chip->chip_delay = 20; /* 20us command delay time */ 578 nand_chip->chip_delay = 20; /* 20us command delay time */
450 579
451 if (host->board->bus_width_16) { /* 16-bit bus width */ 580 if (host->board->bus_width_16) /* 16-bit bus width */
452 nand_chip->options |= NAND_BUSWIDTH_16; 581 nand_chip->options |= NAND_BUSWIDTH_16;
453 nand_chip->read_buf = atmel_read_buf16; 582
454 nand_chip->write_buf = atmel_write_buf16; 583 nand_chip->read_buf = atmel_read_buf;
455 } else { 584 nand_chip->write_buf = atmel_write_buf;
456 nand_chip->read_buf = atmel_read_buf;
457 nand_chip->write_buf = atmel_write_buf;
458 }
459 585
460 platform_set_drvdata(pdev, host); 586 platform_set_drvdata(pdev, host);
461 atmel_nand_enable(host); 587 atmel_nand_enable(host);
@@ -473,6 +599,22 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
473 nand_chip->options |= NAND_USE_FLASH_BBT; 599 nand_chip->options |= NAND_USE_FLASH_BBT;
474 } 600 }
475 601
602 if (cpu_has_dma() && use_dma) {
603 dma_cap_mask_t mask;
604
605 dma_cap_zero(mask);
606 dma_cap_set(DMA_MEMCPY, mask);
607 host->dma_chan = dma_request_channel(mask, 0, NULL);
608 if (!host->dma_chan) {
609 dev_err(host->dev, "Failed to request DMA channel\n");
610 use_dma = 0;
611 }
612 }
613 if (use_dma)
614 dev_info(host->dev, "Using DMA for NAND access.\n");
615 else
616 dev_info(host->dev, "No DMA support for NAND access.\n");
617
476 /* first scan to find the device and get the page size */ 618 /* first scan to find the device and get the page size */
477 if (nand_scan_ident(mtd, 1, NULL)) { 619 if (nand_scan_ident(mtd, 1, NULL)) {
478 res = -ENXIO; 620 res = -ENXIO;
@@ -555,6 +697,8 @@ err_scan_ident:
555err_no_card: 697err_no_card:
556 atmel_nand_disable(host); 698 atmel_nand_disable(host);
557 platform_set_drvdata(pdev, NULL); 699 platform_set_drvdata(pdev, NULL);
700 if (host->dma_chan)
701 dma_release_channel(host->dma_chan);
558 if (host->ecc) 702 if (host->ecc)
559 iounmap(host->ecc); 703 iounmap(host->ecc);
560err_ecc_ioremap: 704err_ecc_ioremap:
@@ -578,6 +722,10 @@ static int __exit atmel_nand_remove(struct platform_device *pdev)
578 722
579 if (host->ecc) 723 if (host->ecc)
580 iounmap(host->ecc); 724 iounmap(host->ecc);
725
726 if (host->dma_chan)
727 dma_release_channel(host->dma_chan);
728
581 iounmap(host->io_base); 729 iounmap(host->io_base);
582 kfree(host); 730 kfree(host);
583 731
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index a90fde3ede28..aff3468867ac 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -37,9 +37,6 @@
37#include <mach/nand.h> 37#include <mach/nand.h>
38#include <mach/aemif.h> 38#include <mach/aemif.h>
39 39
40#include <asm/mach-types.h>
41
42
43/* 40/*
44 * This is a device driver for the NAND flash controller found on the 41 * This is a device driver for the NAND flash controller found on the
45 * various DaVinci family chips. It handles up to four SoC chipselects, 42 * various DaVinci family chips. It handles up to four SoC chipselects,
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index c2f95437e5e9..0b81b5b499d1 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -29,6 +29,7 @@
29#include <linux/clk.h> 29#include <linux/clk.h>
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/delay.h> 31#include <linux/delay.h>
32#include <linux/err.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/interrupt.h> 34#include <linux/interrupt.h>
34#include <linux/io.h> 35#include <linux/io.h>
@@ -757,9 +758,9 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
757 758
758 /* Enable NFC clock */ 759 /* Enable NFC clock */
759 prv->clk = clk_get(dev, "nfc_clk"); 760 prv->clk = clk_get(dev, "nfc_clk");
760 if (!prv->clk) { 761 if (IS_ERR(prv->clk)) {
761 dev_err(dev, "Unable to acquire NFC clock!\n"); 762 dev_err(dev, "Unable to acquire NFC clock!\n");
762 retval = -ENODEV; 763 retval = PTR_ERR(prv->clk);
763 goto error; 764 goto error;
764 } 765 }
765 766
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 5ae1d9ee2cf1..42a95fb41504 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -211,6 +211,31 @@ static struct nand_ecclayout nandv2_hw_eccoob_largepage = {
211 } 211 }
212}; 212};
213 213
214/* OOB description for 4096 byte pages with 128 byte OOB */
215static struct nand_ecclayout nandv2_hw_eccoob_4k = {
216 .eccbytes = 8 * 9,
217 .eccpos = {
218 7, 8, 9, 10, 11, 12, 13, 14, 15,
219 23, 24, 25, 26, 27, 28, 29, 30, 31,
220 39, 40, 41, 42, 43, 44, 45, 46, 47,
221 55, 56, 57, 58, 59, 60, 61, 62, 63,
222 71, 72, 73, 74, 75, 76, 77, 78, 79,
223 87, 88, 89, 90, 91, 92, 93, 94, 95,
224 103, 104, 105, 106, 107, 108, 109, 110, 111,
225 119, 120, 121, 122, 123, 124, 125, 126, 127,
226 },
227 .oobfree = {
228 {.offset = 2, .length = 4},
229 {.offset = 16, .length = 7},
230 {.offset = 32, .length = 7},
231 {.offset = 48, .length = 7},
232 {.offset = 64, .length = 7},
233 {.offset = 80, .length = 7},
234 {.offset = 96, .length = 7},
235 {.offset = 112, .length = 7},
236 }
237};
238
214#ifdef CONFIG_MTD_PARTITIONS 239#ifdef CONFIG_MTD_PARTITIONS
215static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL }; 240static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
216#endif 241#endif
@@ -641,9 +666,9 @@ static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
641 666
642 n = min(n, len); 667 n = min(n, len);
643 668
644 memcpy(buf, host->data_buf + col, len); 669 memcpy(buf, host->data_buf + col, n);
645 670
646 host->buf_start += len; 671 host->buf_start += n;
647} 672}
648 673
649/* Used by the upper layer to verify the data in NAND Flash 674/* Used by the upper layer to verify the data in NAND Flash
@@ -1185,6 +1210,8 @@ static int __init mxcnd_probe(struct platform_device *pdev)
1185 1210
1186 if (mtd->writesize == 2048) 1211 if (mtd->writesize == 2048)
1187 this->ecc.layout = oob_largepage; 1212 this->ecc.layout = oob_largepage;
1213 if (nfc_is_v21() && mtd->writesize == 4096)
1214 this->ecc.layout = &nandv2_hw_eccoob_4k;
1188 1215
1189 /* second phase scan */ 1216 /* second phase scan */
1190 if (nand_scan_tail(mtd)) { 1217 if (nand_scan_tail(mtd)) {
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a9c6ce745767..85cfc061d41c 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -42,6 +42,7 @@
42#include <linux/mtd/mtd.h> 42#include <linux/mtd/mtd.h>
43#include <linux/mtd/nand.h> 43#include <linux/mtd/nand.h>
44#include <linux/mtd/nand_ecc.h> 44#include <linux/mtd/nand_ecc.h>
45#include <linux/mtd/nand_bch.h>
45#include <linux/interrupt.h> 46#include <linux/interrupt.h>
46#include <linux/bitops.h> 47#include <linux/bitops.h>
47#include <linux/leds.h> 48#include <linux/leds.h>
@@ -2377,7 +2378,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
2377 return -EINVAL; 2378 return -EINVAL;
2378 } 2379 }
2379 2380
2380 /* Do not allow reads past end of device */ 2381 /* Do not allow write past end of device */
2381 if (unlikely(to >= mtd->size || 2382 if (unlikely(to >= mtd->size ||
2382 ops->ooboffs + ops->ooblen > 2383 ops->ooboffs + ops->ooblen >
2383 ((mtd->size >> chip->page_shift) - 2384 ((mtd->size >> chip->page_shift) -
@@ -3248,7 +3249,7 @@ int nand_scan_tail(struct mtd_info *mtd)
3248 /* 3249 /*
3249 * If no default placement scheme is given, select an appropriate one 3250 * If no default placement scheme is given, select an appropriate one
3250 */ 3251 */
3251 if (!chip->ecc.layout) { 3252 if (!chip->ecc.layout && (chip->ecc.mode != NAND_ECC_SOFT_BCH)) {
3252 switch (mtd->oobsize) { 3253 switch (mtd->oobsize) {
3253 case 8: 3254 case 8:
3254 chip->ecc.layout = &nand_oob_8; 3255 chip->ecc.layout = &nand_oob_8;
@@ -3351,6 +3352,40 @@ int nand_scan_tail(struct mtd_info *mtd)
3351 chip->ecc.bytes = 3; 3352 chip->ecc.bytes = 3;
3352 break; 3353 break;
3353 3354
3355 case NAND_ECC_SOFT_BCH:
3356 if (!mtd_nand_has_bch()) {
3357 printk(KERN_WARNING "CONFIG_MTD_ECC_BCH not enabled\n");
3358 BUG();
3359 }
3360 chip->ecc.calculate = nand_bch_calculate_ecc;
3361 chip->ecc.correct = nand_bch_correct_data;
3362 chip->ecc.read_page = nand_read_page_swecc;
3363 chip->ecc.read_subpage = nand_read_subpage;
3364 chip->ecc.write_page = nand_write_page_swecc;
3365 chip->ecc.read_page_raw = nand_read_page_raw;
3366 chip->ecc.write_page_raw = nand_write_page_raw;
3367 chip->ecc.read_oob = nand_read_oob_std;
3368 chip->ecc.write_oob = nand_write_oob_std;
3369 /*
3370 * Board driver should supply ecc.size and ecc.bytes values to
3371 * select how many bits are correctable; see nand_bch_init()
3372 * for details.
3373 * Otherwise, default to 4 bits for large page devices
3374 */
3375 if (!chip->ecc.size && (mtd->oobsize >= 64)) {
3376 chip->ecc.size = 512;
3377 chip->ecc.bytes = 7;
3378 }
3379 chip->ecc.priv = nand_bch_init(mtd,
3380 chip->ecc.size,
3381 chip->ecc.bytes,
3382 &chip->ecc.layout);
3383 if (!chip->ecc.priv) {
3384 printk(KERN_WARNING "BCH ECC initialization failed!\n");
3385 BUG();
3386 }
3387 break;
3388
3354 case NAND_ECC_NONE: 3389 case NAND_ECC_NONE:
3355 printk(KERN_WARNING "NAND_ECC_NONE selected by board driver. " 3390 printk(KERN_WARNING "NAND_ECC_NONE selected by board driver. "
3356 "This is not recommended !!\n"); 3391 "This is not recommended !!\n");
@@ -3501,6 +3536,9 @@ void nand_release(struct mtd_info *mtd)
3501{ 3536{
3502 struct nand_chip *chip = mtd->priv; 3537 struct nand_chip *chip = mtd->priv;
3503 3538
3539 if (chip->ecc.mode == NAND_ECC_SOFT_BCH)
3540 nand_bch_free((struct nand_bch_control *)chip->ecc.priv);
3541
3504#ifdef CONFIG_MTD_PARTITIONS 3542#ifdef CONFIG_MTD_PARTITIONS
3505 /* Deregister partitions */ 3543 /* Deregister partitions */
3506 del_mtd_partitions(mtd); 3544 del_mtd_partitions(mtd);
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 6ebd869993aa..a1e8b30078d9 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -1101,12 +1101,16 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td)
1101static void verify_bbt_descr(struct mtd_info *mtd, struct nand_bbt_descr *bd) 1101static void verify_bbt_descr(struct mtd_info *mtd, struct nand_bbt_descr *bd)
1102{ 1102{
1103 struct nand_chip *this = mtd->priv; 1103 struct nand_chip *this = mtd->priv;
1104 u32 pattern_len = bd->len; 1104 u32 pattern_len;
1105 u32 bits = bd->options & NAND_BBT_NRBITS_MSK; 1105 u32 bits;
1106 u32 table_size; 1106 u32 table_size;
1107 1107
1108 if (!bd) 1108 if (!bd)
1109 return; 1109 return;
1110
1111 pattern_len = bd->len;
1112 bits = bd->options & NAND_BBT_NRBITS_MSK;
1113
1110 BUG_ON((this->options & NAND_USE_FLASH_BBT_NO_OOB) && 1114 BUG_ON((this->options & NAND_USE_FLASH_BBT_NO_OOB) &&
1111 !(this->options & NAND_USE_FLASH_BBT)); 1115 !(this->options & NAND_USE_FLASH_BBT));
1112 BUG_ON(!bits); 1116 BUG_ON(!bits);
diff --git a/drivers/mtd/nand/nand_bch.c b/drivers/mtd/nand/nand_bch.c
new file mode 100644
index 000000000000..0f931e757116
--- /dev/null
+++ b/drivers/mtd/nand/nand_bch.c
@@ -0,0 +1,243 @@
1/*
2 * This file provides ECC correction for more than 1 bit per block of data,
3 * using binary BCH codes. It relies on the generic BCH library lib/bch.c.
4 *
5 * Copyright © 2011 Ivan Djelic <ivan.djelic@parrot.com>
6 *
7 * This file is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 or (at your option) any
10 * later version.
11 *
12 * This file is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this file; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22#include <linux/types.h>
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/slab.h>
26#include <linux/bitops.h>
27#include <linux/mtd/mtd.h>
28#include <linux/mtd/nand.h>
29#include <linux/mtd/nand_bch.h>
30#include <linux/bch.h>
31
32/**
33 * struct nand_bch_control - private NAND BCH control structure
34 * @bch: BCH control structure
35 * @ecclayout: private ecc layout for this BCH configuration
36 * @errloc: error location array
37 * @eccmask: XOR ecc mask, allows erased pages to be decoded as valid
38 */
39struct nand_bch_control {
40 struct bch_control *bch;
41 struct nand_ecclayout ecclayout;
42 unsigned int *errloc;
43 unsigned char *eccmask;
44};
45
46/**
47 * nand_bch_calculate_ecc - [NAND Interface] Calculate ECC for data block
48 * @mtd: MTD block structure
49 * @buf: input buffer with raw data
50 * @code: output buffer with ECC
51 */
52int nand_bch_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
53 unsigned char *code)
54{
55 const struct nand_chip *chip = mtd->priv;
56 struct nand_bch_control *nbc = chip->ecc.priv;
57 unsigned int i;
58
59 memset(code, 0, chip->ecc.bytes);
60 encode_bch(nbc->bch, buf, chip->ecc.size, code);
61
62 /* apply mask so that an erased page is a valid codeword */
63 for (i = 0; i < chip->ecc.bytes; i++)
64 code[i] ^= nbc->eccmask[i];
65
66 return 0;
67}
68EXPORT_SYMBOL(nand_bch_calculate_ecc);
69
70/**
71 * nand_bch_correct_data - [NAND Interface] Detect and correct bit error(s)
72 * @mtd: MTD block structure
73 * @buf: raw data read from the chip
74 * @read_ecc: ECC from the chip
75 * @calc_ecc: the ECC calculated from raw data
76 *
77 * Detect and correct bit errors for a data byte block
78 */
79int nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
80 unsigned char *read_ecc, unsigned char *calc_ecc)
81{
82 const struct nand_chip *chip = mtd->priv;
83 struct nand_bch_control *nbc = chip->ecc.priv;
84 unsigned int *errloc = nbc->errloc;
85 int i, count;
86
87 count = decode_bch(nbc->bch, NULL, chip->ecc.size, read_ecc, calc_ecc,
88 NULL, errloc);
89 if (count > 0) {
90 for (i = 0; i < count; i++) {
91 if (errloc[i] < (chip->ecc.size*8))
92 /* error is located in data, correct it */
93 buf[errloc[i] >> 3] ^= (1 << (errloc[i] & 7));
94 /* else error in ecc, no action needed */
95
96 DEBUG(MTD_DEBUG_LEVEL0, "%s: corrected bitflip %u\n",
97 __func__, errloc[i]);
98 }
99 } else if (count < 0) {
100 printk(KERN_ERR "ecc unrecoverable error\n");
101 count = -1;
102 }
103 return count;
104}
105EXPORT_SYMBOL(nand_bch_correct_data);
106
107/**
108 * nand_bch_init - [NAND Interface] Initialize NAND BCH error correction
109 * @mtd: MTD block structure
110 * @eccsize: ecc block size in bytes
111 * @eccbytes: ecc length in bytes
112 * @ecclayout: output default layout
113 *
114 * Returns:
115 * a pointer to a new NAND BCH control structure, or NULL upon failure
116 *
117 * Initialize NAND BCH error correction. Parameters @eccsize and @eccbytes
118 * are used to compute BCH parameters m (Galois field order) and t (error
119 * correction capability). @eccbytes should be equal to the number of bytes
120 * required to store m*t bits, where m is such that 2^m-1 > @eccsize*8.
121 *
122 * Example: to configure 4 bit correction per 512 bytes, you should pass
123 * @eccsize = 512 (thus, m=13 is the smallest integer such that 2^m-1 > 512*8)
124 * @eccbytes = 7 (7 bytes are required to store m*t = 13*4 = 52 bits)
125 */
126struct nand_bch_control *
127nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
128 struct nand_ecclayout **ecclayout)
129{
130 unsigned int m, t, eccsteps, i;
131 struct nand_ecclayout *layout;
132 struct nand_bch_control *nbc = NULL;
133 unsigned char *erased_page;
134
135 if (!eccsize || !eccbytes) {
136 printk(KERN_WARNING "ecc parameters not supplied\n");
137 goto fail;
138 }
139
140 m = fls(1+8*eccsize);
141 t = (eccbytes*8)/m;
142
143 nbc = kzalloc(sizeof(*nbc), GFP_KERNEL);
144 if (!nbc)
145 goto fail;
146
147 nbc->bch = init_bch(m, t, 0);
148 if (!nbc->bch)
149 goto fail;
150
151 /* verify that eccbytes has the expected value */
152 if (nbc->bch->ecc_bytes != eccbytes) {
153 printk(KERN_WARNING "invalid eccbytes %u, should be %u\n",
154 eccbytes, nbc->bch->ecc_bytes);
155 goto fail;
156 }
157
158 eccsteps = mtd->writesize/eccsize;
159
160 /* if no ecc placement scheme was provided, build one */
161 if (!*ecclayout) {
162
163 /* handle large page devices only */
164 if (mtd->oobsize < 64) {
165 printk(KERN_WARNING "must provide an oob scheme for "
166 "oobsize %d\n", mtd->oobsize);
167 goto fail;
168 }
169
170 layout = &nbc->ecclayout;
171 layout->eccbytes = eccsteps*eccbytes;
172
173 /* reserve 2 bytes for bad block marker */
174 if (layout->eccbytes+2 > mtd->oobsize) {
175 printk(KERN_WARNING "no suitable oob scheme available "
176 "for oobsize %d eccbytes %u\n", mtd->oobsize,
177 eccbytes);
178 goto fail;
179 }
180 /* put ecc bytes at oob tail */
181 for (i = 0; i < layout->eccbytes; i++)
182 layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i;
183
184 layout->oobfree[0].offset = 2;
185 layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
186
187 *ecclayout = layout;
188 }
189
190 /* sanity checks */
191 if (8*(eccsize+eccbytes) >= (1 << m)) {
192 printk(KERN_WARNING "eccsize %u is too large\n", eccsize);
193 goto fail;
194 }
195 if ((*ecclayout)->eccbytes != (eccsteps*eccbytes)) {
196 printk(KERN_WARNING "invalid ecc layout\n");
197 goto fail;
198 }
199
200 nbc->eccmask = kmalloc(eccbytes, GFP_KERNEL);
201 nbc->errloc = kmalloc(t*sizeof(*nbc->errloc), GFP_KERNEL);
202 if (!nbc->eccmask || !nbc->errloc)
203 goto fail;
204 /*
205 * compute and store the inverted ecc of an erased ecc block
206 */
207 erased_page = kmalloc(eccsize, GFP_KERNEL);
208 if (!erased_page)
209 goto fail;
210
211 memset(erased_page, 0xff, eccsize);
212 memset(nbc->eccmask, 0, eccbytes);
213 encode_bch(nbc->bch, erased_page, eccsize, nbc->eccmask);
214 kfree(erased_page);
215
216 for (i = 0; i < eccbytes; i++)
217 nbc->eccmask[i] ^= 0xff;
218
219 return nbc;
220fail:
221 nand_bch_free(nbc);
222 return NULL;
223}
224EXPORT_SYMBOL(nand_bch_init);
225
226/**
227 * nand_bch_free - [NAND Interface] Release NAND BCH ECC resources
228 * @nbc: NAND BCH control structure
229 */
230void nand_bch_free(struct nand_bch_control *nbc)
231{
232 if (nbc) {
233 free_bch(nbc->bch);
234 kfree(nbc->errloc);
235 kfree(nbc->eccmask);
236 kfree(nbc);
237 }
238}
239EXPORT_SYMBOL(nand_bch_free);
240
241MODULE_LICENSE("GPL");
242MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
243MODULE_DESCRIPTION("NAND software BCH ECC support");
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a5aa99f014ba..213181be0d9a 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -34,6 +34,7 @@
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/mtd/mtd.h> 35#include <linux/mtd/mtd.h>
36#include <linux/mtd/nand.h> 36#include <linux/mtd/nand.h>
37#include <linux/mtd/nand_bch.h>
37#include <linux/mtd/partitions.h> 38#include <linux/mtd/partitions.h>
38#include <linux/delay.h> 39#include <linux/delay.h>
39#include <linux/list.h> 40#include <linux/list.h>
@@ -108,6 +109,7 @@ static unsigned int rptwear = 0;
108static unsigned int overridesize = 0; 109static unsigned int overridesize = 0;
109static char *cache_file = NULL; 110static char *cache_file = NULL;
110static unsigned int bbt; 111static unsigned int bbt;
112static unsigned int bch;
111 113
112module_param(first_id_byte, uint, 0400); 114module_param(first_id_byte, uint, 0400);
113module_param(second_id_byte, uint, 0400); 115module_param(second_id_byte, uint, 0400);
@@ -132,6 +134,7 @@ module_param(rptwear, uint, 0400);
132module_param(overridesize, uint, 0400); 134module_param(overridesize, uint, 0400);
133module_param(cache_file, charp, 0400); 135module_param(cache_file, charp, 0400);
134module_param(bbt, uint, 0400); 136module_param(bbt, uint, 0400);
137module_param(bch, uint, 0400);
135 138
136MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)"); 139MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)");
137MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)"); 140MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)");
@@ -165,6 +168,8 @@ MODULE_PARM_DESC(overridesize, "Specifies the NAND Flash size overriding the I
165 " e.g. 5 means a size of 32 erase blocks"); 168 " e.g. 5 means a size of 32 erase blocks");
166MODULE_PARM_DESC(cache_file, "File to use to cache nand pages instead of memory"); 169MODULE_PARM_DESC(cache_file, "File to use to cache nand pages instead of memory");
167MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in data area"); 170MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in data area");
171MODULE_PARM_DESC(bch, "Enable BCH ecc and set how many bits should "
172 "be correctable in 512-byte blocks");
168 173
169/* The largest possible page size */ 174/* The largest possible page size */
170#define NS_LARGEST_PAGE_SIZE 4096 175#define NS_LARGEST_PAGE_SIZE 4096
@@ -2309,7 +2314,43 @@ static int __init ns_init_module(void)
2309 if ((retval = parse_gravepages()) != 0) 2314 if ((retval = parse_gravepages()) != 0)
2310 goto error; 2315 goto error;
2311 2316
2312 if ((retval = nand_scan(nsmtd, 1)) != 0) { 2317 retval = nand_scan_ident(nsmtd, 1, NULL);
2318 if (retval) {
2319 NS_ERR("cannot scan NAND Simulator device\n");
2320 if (retval > 0)
2321 retval = -ENXIO;
2322 goto error;
2323 }
2324
2325 if (bch) {
2326 unsigned int eccsteps, eccbytes;
2327 if (!mtd_nand_has_bch()) {
2328 NS_ERR("BCH ECC support is disabled\n");
2329 retval = -EINVAL;
2330 goto error;
2331 }
2332 /* use 512-byte ecc blocks */
2333 eccsteps = nsmtd->writesize/512;
2334 eccbytes = (bch*13+7)/8;
2335 /* do not bother supporting small page devices */
2336 if ((nsmtd->oobsize < 64) || !eccsteps) {
2337 NS_ERR("bch not available on small page devices\n");
2338 retval = -EINVAL;
2339 goto error;
2340 }
2341 if ((eccbytes*eccsteps+2) > nsmtd->oobsize) {
2342 NS_ERR("invalid bch value %u\n", bch);
2343 retval = -EINVAL;
2344 goto error;
2345 }
2346 chip->ecc.mode = NAND_ECC_SOFT_BCH;
2347 chip->ecc.size = 512;
2348 chip->ecc.bytes = eccbytes;
2349 NS_INFO("using %u-bit/%u bytes BCH ECC\n", bch, chip->ecc.size);
2350 }
2351
2352 retval = nand_scan_tail(nsmtd);
2353 if (retval) {
2313 NS_ERR("can't register NAND Simulator\n"); 2354 NS_ERR("can't register NAND Simulator\n");
2314 if (retval > 0) 2355 if (retval > 0)
2315 retval = -ENXIO; 2356 retval = -ENXIO;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 7b8f1fffc528..da9a351c9d79 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -668,6 +668,8 @@ static void gen_true_ecc(u8 *ecc_buf)
668 * 668 *
669 * This function compares two ECC's and indicates if there is an error. 669 * This function compares two ECC's and indicates if there is an error.
670 * If the error can be corrected it will be corrected to the buffer. 670 * If the error can be corrected it will be corrected to the buffer.
671 * If there is no error, %0 is returned. If there is an error but it
672 * was corrected, %1 is returned. Otherwise, %-1 is returned.
671 */ 673 */
672static int omap_compare_ecc(u8 *ecc_data1, /* read from NAND memory */ 674static int omap_compare_ecc(u8 *ecc_data1, /* read from NAND memory */
673 u8 *ecc_data2, /* read from register */ 675 u8 *ecc_data2, /* read from register */
@@ -773,7 +775,7 @@ static int omap_compare_ecc(u8 *ecc_data1, /* read from NAND memory */
773 775
774 page_data[find_byte] ^= (1 << find_bit); 776 page_data[find_byte] ^= (1 << find_bit);
775 777
776 return 0; 778 return 1;
777 default: 779 default:
778 if (isEccFF) { 780 if (isEccFF) {
779 if (ecc_data2[0] == 0 && 781 if (ecc_data2[0] == 0 &&
@@ -794,8 +796,11 @@ static int omap_compare_ecc(u8 *ecc_data1, /* read from NAND memory */
794 * @calc_ecc: ecc read from HW ECC registers 796 * @calc_ecc: ecc read from HW ECC registers
795 * 797 *
796 * Compares the ecc read from nand spare area with ECC registers values 798 * Compares the ecc read from nand spare area with ECC registers values
797 * and if ECC's mismached, it will call 'omap_compare_ecc' for error detection 799 * and if ECC's mismatched, it will call 'omap_compare_ecc' for error
798 * and correction. 800 * detection and correction. If there are no errors, %0 is returned. If
801 * there were errors and all of the errors were corrected, the number of
802 * corrected errors is returned. If uncorrectable errors exist, %-1 is
803 * returned.
799 */ 804 */
800static int omap_correct_data(struct mtd_info *mtd, u_char *dat, 805static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
801 u_char *read_ecc, u_char *calc_ecc) 806 u_char *read_ecc, u_char *calc_ecc)
@@ -803,6 +808,7 @@ static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
803 struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, 808 struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
804 mtd); 809 mtd);
805 int blockCnt = 0, i = 0, ret = 0; 810 int blockCnt = 0, i = 0, ret = 0;
811 int stat = 0;
806 812
807 /* Ex NAND_ECC_HW12_2048 */ 813 /* Ex NAND_ECC_HW12_2048 */
808 if ((info->nand.ecc.mode == NAND_ECC_HW) && 814 if ((info->nand.ecc.mode == NAND_ECC_HW) &&
@@ -816,12 +822,14 @@ static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
816 ret = omap_compare_ecc(read_ecc, calc_ecc, dat); 822 ret = omap_compare_ecc(read_ecc, calc_ecc, dat);
817 if (ret < 0) 823 if (ret < 0)
818 return ret; 824 return ret;
825 /* keep track of the number of corrected errors */
826 stat += ret;
819 } 827 }
820 read_ecc += 3; 828 read_ecc += 3;
821 calc_ecc += 3; 829 calc_ecc += 3;
822 dat += 512; 830 dat += 512;
823 } 831 }
824 return 0; 832 return stat;
825} 833}
826 834
827/** 835/**
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index ea2c288df3f6..ab7f4c33ced6 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -27,6 +27,8 @@
27#include <plat/pxa3xx_nand.h> 27#include <plat/pxa3xx_nand.h>
28 28
29#define CHIP_DELAY_TIMEOUT (2 * HZ/10) 29#define CHIP_DELAY_TIMEOUT (2 * HZ/10)
30#define NAND_STOP_DELAY (2 * HZ/50)
31#define PAGE_CHUNK_SIZE (2048)
30 32
31/* registers and bit definitions */ 33/* registers and bit definitions */
32#define NDCR (0x00) /* Control register */ 34#define NDCR (0x00) /* Control register */
@@ -52,16 +54,18 @@
52#define NDCR_ND_MODE (0x3 << 21) 54#define NDCR_ND_MODE (0x3 << 21)
53#define NDCR_NAND_MODE (0x0) 55#define NDCR_NAND_MODE (0x0)
54#define NDCR_CLR_PG_CNT (0x1 << 20) 56#define NDCR_CLR_PG_CNT (0x1 << 20)
55#define NDCR_CLR_ECC (0x1 << 19) 57#define NDCR_STOP_ON_UNCOR (0x1 << 19)
56#define NDCR_RD_ID_CNT_MASK (0x7 << 16) 58#define NDCR_RD_ID_CNT_MASK (0x7 << 16)
57#define NDCR_RD_ID_CNT(x) (((x) << 16) & NDCR_RD_ID_CNT_MASK) 59#define NDCR_RD_ID_CNT(x) (((x) << 16) & NDCR_RD_ID_CNT_MASK)
58 60
59#define NDCR_RA_START (0x1 << 15) 61#define NDCR_RA_START (0x1 << 15)
60#define NDCR_PG_PER_BLK (0x1 << 14) 62#define NDCR_PG_PER_BLK (0x1 << 14)
61#define NDCR_ND_ARB_EN (0x1 << 12) 63#define NDCR_ND_ARB_EN (0x1 << 12)
64#define NDCR_INT_MASK (0xFFF)
62 65
63#define NDSR_MASK (0xfff) 66#define NDSR_MASK (0xfff)
64#define NDSR_RDY (0x1 << 11) 67#define NDSR_RDY (0x1 << 12)
68#define NDSR_FLASH_RDY (0x1 << 11)
65#define NDSR_CS0_PAGED (0x1 << 10) 69#define NDSR_CS0_PAGED (0x1 << 10)
66#define NDSR_CS1_PAGED (0x1 << 9) 70#define NDSR_CS1_PAGED (0x1 << 9)
67#define NDSR_CS0_CMDD (0x1 << 8) 71#define NDSR_CS0_CMDD (0x1 << 8)
@@ -74,6 +78,7 @@
74#define NDSR_RDDREQ (0x1 << 1) 78#define NDSR_RDDREQ (0x1 << 1)
75#define NDSR_WRCMDREQ (0x1) 79#define NDSR_WRCMDREQ (0x1)
76 80
81#define NDCB0_ST_ROW_EN (0x1 << 26)
77#define NDCB0_AUTO_RS (0x1 << 25) 82#define NDCB0_AUTO_RS (0x1 << 25)
78#define NDCB0_CSEL (0x1 << 24) 83#define NDCB0_CSEL (0x1 << 24)
79#define NDCB0_CMD_TYPE_MASK (0x7 << 21) 84#define NDCB0_CMD_TYPE_MASK (0x7 << 21)
@@ -104,18 +109,21 @@ enum {
104}; 109};
105 110
106enum { 111enum {
107 STATE_READY = 0, 112 STATE_IDLE = 0,
108 STATE_CMD_HANDLE, 113 STATE_CMD_HANDLE,
109 STATE_DMA_READING, 114 STATE_DMA_READING,
110 STATE_DMA_WRITING, 115 STATE_DMA_WRITING,
111 STATE_DMA_DONE, 116 STATE_DMA_DONE,
112 STATE_PIO_READING, 117 STATE_PIO_READING,
113 STATE_PIO_WRITING, 118 STATE_PIO_WRITING,
119 STATE_CMD_DONE,
120 STATE_READY,
114}; 121};
115 122
116struct pxa3xx_nand_info { 123struct pxa3xx_nand_info {
117 struct nand_chip nand_chip; 124 struct nand_chip nand_chip;
118 125
126 struct nand_hw_control controller;
119 struct platform_device *pdev; 127 struct platform_device *pdev;
120 struct pxa3xx_nand_cmdset *cmdset; 128 struct pxa3xx_nand_cmdset *cmdset;
121 129
@@ -126,6 +134,7 @@ struct pxa3xx_nand_info {
126 unsigned int buf_start; 134 unsigned int buf_start;
127 unsigned int buf_count; 135 unsigned int buf_count;
128 136
137 struct mtd_info *mtd;
129 /* DMA information */ 138 /* DMA information */
130 int drcmr_dat; 139 int drcmr_dat;
131 int drcmr_cmd; 140 int drcmr_cmd;
@@ -149,6 +158,7 @@ struct pxa3xx_nand_info {
149 158
150 int use_ecc; /* use HW ECC ? */ 159 int use_ecc; /* use HW ECC ? */
151 int use_dma; /* use DMA ? */ 160 int use_dma; /* use DMA ? */
161 int is_ready;
152 162
153 unsigned int page_size; /* page size of attached chip */ 163 unsigned int page_size; /* page size of attached chip */
154 unsigned int data_size; /* data size in FIFO */ 164 unsigned int data_size; /* data size in FIFO */
@@ -201,20 +211,22 @@ static struct pxa3xx_nand_timing timing[] = {
201}; 211};
202 212
203static struct pxa3xx_nand_flash builtin_flash_types[] = { 213static struct pxa3xx_nand_flash builtin_flash_types[] = {
204 { 0, 0, 2048, 8, 8, 0, &default_cmdset, &timing[0] }, 214{ "DEFAULT FLASH", 0, 0, 2048, 8, 8, 0, &timing[0] },
205 { 0x46ec, 32, 512, 16, 16, 4096, &default_cmdset, &timing[1] }, 215{ "64MiB 16-bit", 0x46ec, 32, 512, 16, 16, 4096, &timing[1] },
206 { 0xdaec, 64, 2048, 8, 8, 2048, &default_cmdset, &timing[1] }, 216{ "256MiB 8-bit", 0xdaec, 64, 2048, 8, 8, 2048, &timing[1] },
207 { 0xd7ec, 128, 4096, 8, 8, 8192, &default_cmdset, &timing[1] }, 217{ "4GiB 8-bit", 0xd7ec, 128, 4096, 8, 8, 8192, &timing[1] },
208 { 0xa12c, 64, 2048, 8, 8, 1024, &default_cmdset, &timing[2] }, 218{ "128MiB 8-bit", 0xa12c, 64, 2048, 8, 8, 1024, &timing[2] },
209 { 0xb12c, 64, 2048, 16, 16, 1024, &default_cmdset, &timing[2] }, 219{ "128MiB 16-bit", 0xb12c, 64, 2048, 16, 16, 1024, &timing[2] },
210 { 0xdc2c, 64, 2048, 8, 8, 4096, &default_cmdset, &timing[2] }, 220{ "512MiB 8-bit", 0xdc2c, 64, 2048, 8, 8, 4096, &timing[2] },
211 { 0xcc2c, 64, 2048, 16, 16, 4096, &default_cmdset, &timing[2] }, 221{ "512MiB 16-bit", 0xcc2c, 64, 2048, 16, 16, 4096, &timing[2] },
212 { 0xba20, 64, 2048, 16, 16, 2048, &default_cmdset, &timing[3] }, 222{ "256MiB 16-bit", 0xba20, 64, 2048, 16, 16, 2048, &timing[3] },
213}; 223};
214 224
215/* Define a default flash type setting serve as flash detecting only */ 225/* Define a default flash type setting serve as flash detecting only */
216#define DEFAULT_FLASH_TYPE (&builtin_flash_types[0]) 226#define DEFAULT_FLASH_TYPE (&builtin_flash_types[0])
217 227
228const char *mtd_names[] = {"pxa3xx_nand-0", NULL};
229
218#define NDTR0_tCH(c) (min((c), 7) << 19) 230#define NDTR0_tCH(c) (min((c), 7) << 19)
219#define NDTR0_tCS(c) (min((c), 7) << 16) 231#define NDTR0_tCS(c) (min((c), 7) << 16)
220#define NDTR0_tWH(c) (min((c), 7) << 11) 232#define NDTR0_tWH(c) (min((c), 7) << 11)
@@ -252,25 +264,6 @@ static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
252 nand_writel(info, NDTR1CS0, ndtr1); 264 nand_writel(info, NDTR1CS0, ndtr1);
253} 265}
254 266
255#define WAIT_EVENT_TIMEOUT 10
256
257static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
258{
259 int timeout = WAIT_EVENT_TIMEOUT;
260 uint32_t ndsr;
261
262 while (timeout--) {
263 ndsr = nand_readl(info, NDSR) & NDSR_MASK;
264 if (ndsr & event) {
265 nand_writel(info, NDSR, ndsr);
266 return 0;
267 }
268 udelay(10);
269 }
270
271 return -ETIMEDOUT;
272}
273
274static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info) 267static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info)
275{ 268{
276 int oob_enable = info->reg_ndcr & NDCR_SPARE_EN; 269 int oob_enable = info->reg_ndcr & NDCR_SPARE_EN;
@@ -291,69 +284,45 @@ static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info)
291 } 284 }
292} 285}
293 286
294static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info, 287/**
295 uint16_t cmd, int column, int page_addr) 288 * NOTE: it is a must to set ND_RUN firstly, then write
289 * command buffer, otherwise, it does not work.
290 * We enable all the interrupt at the same time, and
291 * let pxa3xx_nand_irq to handle all logic.
292 */
293static void pxa3xx_nand_start(struct pxa3xx_nand_info *info)
296{ 294{
297 const struct pxa3xx_nand_cmdset *cmdset = info->cmdset; 295 uint32_t ndcr;
298 pxa3xx_set_datasize(info);
299
300 /* generate values for NDCBx registers */
301 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
302 info->ndcb1 = 0;
303 info->ndcb2 = 0;
304 info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
305
306 if (info->col_addr_cycles == 2) {
307 /* large block, 2 cycles for column address
308 * row address starts from 3rd cycle
309 */
310 info->ndcb1 |= page_addr << 16;
311 if (info->row_addr_cycles == 3)
312 info->ndcb2 = (page_addr >> 16) & 0xff;
313 } else
314 /* small block, 1 cycles for column address
315 * row address starts from 2nd cycle
316 */
317 info->ndcb1 = page_addr << 8;
318
319 if (cmd == cmdset->program)
320 info->ndcb0 |= NDCB0_CMD_TYPE(1) | NDCB0_AUTO_RS;
321 296
322 return 0; 297 ndcr = info->reg_ndcr;
323} 298 ndcr |= info->use_ecc ? NDCR_ECC_EN : 0;
299 ndcr |= info->use_dma ? NDCR_DMA_EN : 0;
300 ndcr |= NDCR_ND_RUN;
324 301
325static int prepare_erase_cmd(struct pxa3xx_nand_info *info, 302 /* clear status bits and run */
326 uint16_t cmd, int page_addr) 303 nand_writel(info, NDCR, 0);
327{ 304 nand_writel(info, NDSR, NDSR_MASK);
328 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0); 305 nand_writel(info, NDCR, ndcr);
329 info->ndcb0 |= NDCB0_CMD_TYPE(2) | NDCB0_AUTO_RS | NDCB0_ADDR_CYC(3);
330 info->ndcb1 = page_addr;
331 info->ndcb2 = 0;
332 return 0;
333} 306}
334 307
335static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd) 308static void pxa3xx_nand_stop(struct pxa3xx_nand_info *info)
336{ 309{
337 const struct pxa3xx_nand_cmdset *cmdset = info->cmdset; 310 uint32_t ndcr;
338 311 int timeout = NAND_STOP_DELAY;
339 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
340 info->ndcb1 = 0;
341 info->ndcb2 = 0;
342 312
343 info->oob_size = 0; 313 /* wait RUN bit in NDCR become 0 */
344 if (cmd == cmdset->read_id) { 314 ndcr = nand_readl(info, NDCR);
345 info->ndcb0 |= NDCB0_CMD_TYPE(3); 315 while ((ndcr & NDCR_ND_RUN) && (timeout-- > 0)) {
346 info->data_size = 8; 316 ndcr = nand_readl(info, NDCR);
347 } else if (cmd == cmdset->read_status) { 317 udelay(1);
348 info->ndcb0 |= NDCB0_CMD_TYPE(4); 318 }
349 info->data_size = 8;
350 } else if (cmd == cmdset->reset || cmd == cmdset->lock ||
351 cmd == cmdset->unlock) {
352 info->ndcb0 |= NDCB0_CMD_TYPE(5);
353 } else
354 return -EINVAL;
355 319
356 return 0; 320 if (timeout <= 0) {
321 ndcr &= ~NDCR_ND_RUN;
322 nand_writel(info, NDCR, ndcr);
323 }
324 /* clear status bits */
325 nand_writel(info, NDSR, NDSR_MASK);
357} 326}
358 327
359static void enable_int(struct pxa3xx_nand_info *info, uint32_t int_mask) 328static void enable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
@@ -372,39 +341,8 @@ static void disable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
372 nand_writel(info, NDCR, ndcr | int_mask); 341 nand_writel(info, NDCR, ndcr | int_mask);
373} 342}
374 343
375/* NOTE: it is a must to set ND_RUN firstly, then write command buffer 344static void handle_data_pio(struct pxa3xx_nand_info *info)
376 * otherwise, it does not work
377 */
378static int write_cmd(struct pxa3xx_nand_info *info)
379{ 345{
380 uint32_t ndcr;
381
382 /* clear status bits and run */
383 nand_writel(info, NDSR, NDSR_MASK);
384
385 ndcr = info->reg_ndcr;
386
387 ndcr |= info->use_ecc ? NDCR_ECC_EN : 0;
388 ndcr |= info->use_dma ? NDCR_DMA_EN : 0;
389 ndcr |= NDCR_ND_RUN;
390
391 nand_writel(info, NDCR, ndcr);
392
393 if (wait_for_event(info, NDSR_WRCMDREQ)) {
394 printk(KERN_ERR "timed out writing command\n");
395 return -ETIMEDOUT;
396 }
397
398 nand_writel(info, NDCB0, info->ndcb0);
399 nand_writel(info, NDCB0, info->ndcb1);
400 nand_writel(info, NDCB0, info->ndcb2);
401 return 0;
402}
403
404static int handle_data_pio(struct pxa3xx_nand_info *info)
405{
406 int ret, timeout = CHIP_DELAY_TIMEOUT;
407
408 switch (info->state) { 346 switch (info->state) {
409 case STATE_PIO_WRITING: 347 case STATE_PIO_WRITING:
410 __raw_writesl(info->mmio_base + NDDB, info->data_buff, 348 __raw_writesl(info->mmio_base + NDDB, info->data_buff,
@@ -412,14 +350,6 @@ static int handle_data_pio(struct pxa3xx_nand_info *info)
412 if (info->oob_size > 0) 350 if (info->oob_size > 0)
413 __raw_writesl(info->mmio_base + NDDB, info->oob_buff, 351 __raw_writesl(info->mmio_base + NDDB, info->oob_buff,
414 DIV_ROUND_UP(info->oob_size, 4)); 352 DIV_ROUND_UP(info->oob_size, 4));
415
416 enable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
417
418 ret = wait_for_completion_timeout(&info->cmd_complete, timeout);
419 if (!ret) {
420 printk(KERN_ERR "program command time out\n");
421 return -1;
422 }
423 break; 353 break;
424 case STATE_PIO_READING: 354 case STATE_PIO_READING:
425 __raw_readsl(info->mmio_base + NDDB, info->data_buff, 355 __raw_readsl(info->mmio_base + NDDB, info->data_buff,
@@ -431,14 +361,11 @@ static int handle_data_pio(struct pxa3xx_nand_info *info)
431 default: 361 default:
432 printk(KERN_ERR "%s: invalid state %d\n", __func__, 362 printk(KERN_ERR "%s: invalid state %d\n", __func__,
433 info->state); 363 info->state);
434 return -EINVAL; 364 BUG();
435 } 365 }
436
437 info->state = STATE_READY;
438 return 0;
439} 366}
440 367
441static void start_data_dma(struct pxa3xx_nand_info *info, int dir_out) 368static void start_data_dma(struct pxa3xx_nand_info *info)
442{ 369{
443 struct pxa_dma_desc *desc = info->data_desc; 370 struct pxa_dma_desc *desc = info->data_desc;
444 int dma_len = ALIGN(info->data_size + info->oob_size, 32); 371 int dma_len = ALIGN(info->data_size + info->oob_size, 32);
@@ -446,14 +373,21 @@ static void start_data_dma(struct pxa3xx_nand_info *info, int dir_out)
446 desc->ddadr = DDADR_STOP; 373 desc->ddadr = DDADR_STOP;
447 desc->dcmd = DCMD_ENDIRQEN | DCMD_WIDTH4 | DCMD_BURST32 | dma_len; 374 desc->dcmd = DCMD_ENDIRQEN | DCMD_WIDTH4 | DCMD_BURST32 | dma_len;
448 375
449 if (dir_out) { 376 switch (info->state) {
377 case STATE_DMA_WRITING:
450 desc->dsadr = info->data_buff_phys; 378 desc->dsadr = info->data_buff_phys;
451 desc->dtadr = info->mmio_phys + NDDB; 379 desc->dtadr = info->mmio_phys + NDDB;
452 desc->dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG; 380 desc->dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG;
453 } else { 381 break;
382 case STATE_DMA_READING:
454 desc->dtadr = info->data_buff_phys; 383 desc->dtadr = info->data_buff_phys;
455 desc->dsadr = info->mmio_phys + NDDB; 384 desc->dsadr = info->mmio_phys + NDDB;
456 desc->dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC; 385 desc->dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC;
386 break;
387 default:
388 printk(KERN_ERR "%s: invalid state %d\n", __func__,
389 info->state);
390 BUG();
457 } 391 }
458 392
459 DRCMR(info->drcmr_dat) = DRCMR_MAPVLD | info->data_dma_ch; 393 DRCMR(info->drcmr_dat) = DRCMR_MAPVLD | info->data_dma_ch;
@@ -471,93 +405,62 @@ static void pxa3xx_nand_data_dma_irq(int channel, void *data)
471 405
472 if (dcsr & DCSR_BUSERR) { 406 if (dcsr & DCSR_BUSERR) {
473 info->retcode = ERR_DMABUSERR; 407 info->retcode = ERR_DMABUSERR;
474 complete(&info->cmd_complete);
475 } 408 }
476 409
477 if (info->state == STATE_DMA_WRITING) { 410 info->state = STATE_DMA_DONE;
478 info->state = STATE_DMA_DONE; 411 enable_int(info, NDCR_INT_MASK);
479 enable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD); 412 nand_writel(info, NDSR, NDSR_WRDREQ | NDSR_RDDREQ);
480 } else {
481 info->state = STATE_READY;
482 complete(&info->cmd_complete);
483 }
484} 413}
485 414
486static irqreturn_t pxa3xx_nand_irq(int irq, void *devid) 415static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
487{ 416{
488 struct pxa3xx_nand_info *info = devid; 417 struct pxa3xx_nand_info *info = devid;
489 unsigned int status; 418 unsigned int status, is_completed = 0;
490 419
491 status = nand_readl(info, NDSR); 420 status = nand_readl(info, NDSR);
492 421
493 if (status & (NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR)) { 422 if (status & NDSR_DBERR)
494 if (status & NDSR_DBERR) 423 info->retcode = ERR_DBERR;
495 info->retcode = ERR_DBERR; 424 if (status & NDSR_SBERR)
496 else if (status & NDSR_SBERR) 425 info->retcode = ERR_SBERR;
497 info->retcode = ERR_SBERR; 426 if (status & (NDSR_RDDREQ | NDSR_WRDREQ)) {
498 427 /* whether use dma to transfer data */
499 disable_int(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
500
501 if (info->use_dma) {
502 info->state = STATE_DMA_READING;
503 start_data_dma(info, 0);
504 } else {
505 info->state = STATE_PIO_READING;
506 complete(&info->cmd_complete);
507 }
508 } else if (status & NDSR_WRDREQ) {
509 disable_int(info, NDSR_WRDREQ);
510 if (info->use_dma) { 428 if (info->use_dma) {
511 info->state = STATE_DMA_WRITING; 429 disable_int(info, NDCR_INT_MASK);
512 start_data_dma(info, 1); 430 info->state = (status & NDSR_RDDREQ) ?
431 STATE_DMA_READING : STATE_DMA_WRITING;
432 start_data_dma(info);
433 goto NORMAL_IRQ_EXIT;
513 } else { 434 } else {
514 info->state = STATE_PIO_WRITING; 435 info->state = (status & NDSR_RDDREQ) ?
515 complete(&info->cmd_complete); 436 STATE_PIO_READING : STATE_PIO_WRITING;
437 handle_data_pio(info);
516 } 438 }
517 } else if (status & (NDSR_CS0_BBD | NDSR_CS0_CMDD)) {
518 if (status & NDSR_CS0_BBD)
519 info->retcode = ERR_BBERR;
520
521 disable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
522 info->state = STATE_READY;
523 complete(&info->cmd_complete);
524 } 439 }
525 nand_writel(info, NDSR, status); 440 if (status & NDSR_CS0_CMDD) {
526 return IRQ_HANDLED; 441 info->state = STATE_CMD_DONE;
527} 442 is_completed = 1;
528
529static int pxa3xx_nand_do_cmd(struct pxa3xx_nand_info *info, uint32_t event)
530{
531 uint32_t ndcr;
532 int ret, timeout = CHIP_DELAY_TIMEOUT;
533
534 if (write_cmd(info)) {
535 info->retcode = ERR_SENDCMD;
536 goto fail_stop;
537 } 443 }
538 444 if (status & NDSR_FLASH_RDY) {
539 info->state = STATE_CMD_HANDLE; 445 info->is_ready = 1;
540 446 info->state = STATE_READY;
541 enable_int(info, event);
542
543 ret = wait_for_completion_timeout(&info->cmd_complete, timeout);
544 if (!ret) {
545 printk(KERN_ERR "command execution timed out\n");
546 info->retcode = ERR_SENDCMD;
547 goto fail_stop;
548 } 447 }
549 448
550 if (info->use_dma == 0 && info->data_size > 0) 449 if (status & NDSR_WRCMDREQ) {
551 if (handle_data_pio(info)) 450 nand_writel(info, NDSR, NDSR_WRCMDREQ);
552 goto fail_stop; 451 status &= ~NDSR_WRCMDREQ;
553 452 info->state = STATE_CMD_HANDLE;
554 return 0; 453 nand_writel(info, NDCB0, info->ndcb0);
454 nand_writel(info, NDCB0, info->ndcb1);
455 nand_writel(info, NDCB0, info->ndcb2);
456 }
555 457
556fail_stop: 458 /* clear NDSR to let the controller exit the IRQ */
557 ndcr = nand_readl(info, NDCR); 459 nand_writel(info, NDSR, status);
558 nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN); 460 if (is_completed)
559 udelay(10); 461 complete(&info->cmd_complete);
560 return -ETIMEDOUT; 462NORMAL_IRQ_EXIT:
463 return IRQ_HANDLED;
561} 464}
562 465
563static int pxa3xx_nand_dev_ready(struct mtd_info *mtd) 466static int pxa3xx_nand_dev_ready(struct mtd_info *mtd)
@@ -574,125 +477,218 @@ static inline int is_buf_blank(uint8_t *buf, size_t len)
574 return 1; 477 return 1;
575} 478}
576 479
577static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command, 480static int prepare_command_pool(struct pxa3xx_nand_info *info, int command,
578 int column, int page_addr) 481 uint16_t column, int page_addr)
579{ 482{
580 struct pxa3xx_nand_info *info = mtd->priv; 483 uint16_t cmd;
581 const struct pxa3xx_nand_cmdset *cmdset = info->cmdset; 484 int addr_cycle, exec_cmd, ndcb0;
582 int ret; 485 struct mtd_info *mtd = info->mtd;
486
487 ndcb0 = 0;
488 addr_cycle = 0;
489 exec_cmd = 1;
490
491 /* reset data and oob column point to handle data */
492 info->buf_start = 0;
493 info->buf_count = 0;
494 info->oob_size = 0;
495 info->use_ecc = 0;
496 info->is_ready = 0;
497 info->retcode = ERR_NONE;
583 498
584 info->use_dma = (use_dma) ? 1 : 0; 499 switch (command) {
585 info->use_ecc = 0; 500 case NAND_CMD_READ0:
586 info->data_size = 0; 501 case NAND_CMD_PAGEPROG:
587 info->state = STATE_READY; 502 info->use_ecc = 1;
503 case NAND_CMD_READOOB:
504 pxa3xx_set_datasize(info);
505 break;
506 case NAND_CMD_SEQIN:
507 exec_cmd = 0;
508 break;
509 default:
510 info->ndcb1 = 0;
511 info->ndcb2 = 0;
512 break;
513 }
588 514
589 init_completion(&info->cmd_complete); 515 info->ndcb0 = ndcb0;
516 addr_cycle = NDCB0_ADDR_CYC(info->row_addr_cycles
517 + info->col_addr_cycles);
590 518
591 switch (command) { 519 switch (command) {
592 case NAND_CMD_READOOB: 520 case NAND_CMD_READOOB:
593 /* disable HW ECC to get all the OOB data */ 521 case NAND_CMD_READ0:
594 info->buf_count = mtd->writesize + mtd->oobsize; 522 cmd = info->cmdset->read1;
595 info->buf_start = mtd->writesize + column; 523 if (command == NAND_CMD_READOOB)
596 memset(info->data_buff, 0xFF, info->buf_count); 524 info->buf_start = mtd->writesize + column;
525 else
526 info->buf_start = column;
597 527
598 if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr)) 528 if (unlikely(info->page_size < PAGE_CHUNK_SIZE))
599 break; 529 info->ndcb0 |= NDCB0_CMD_TYPE(0)
530 | addr_cycle
531 | (cmd & NDCB0_CMD1_MASK);
532 else
533 info->ndcb0 |= NDCB0_CMD_TYPE(0)
534 | NDCB0_DBC
535 | addr_cycle
536 | cmd;
600 537
601 pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR); 538 case NAND_CMD_SEQIN:
539 /* small page addr setting */
540 if (unlikely(info->page_size < PAGE_CHUNK_SIZE)) {
541 info->ndcb1 = ((page_addr & 0xFFFFFF) << 8)
542 | (column & 0xFF);
602 543
603 /* We only are OOB, so if the data has error, does not matter */ 544 info->ndcb2 = 0;
604 if (info->retcode == ERR_DBERR) 545 } else {
605 info->retcode = ERR_NONE; 546 info->ndcb1 = ((page_addr & 0xFFFF) << 16)
606 break; 547 | (column & 0xFFFF);
548
549 if (page_addr & 0xFF0000)
550 info->ndcb2 = (page_addr & 0xFF0000) >> 16;
551 else
552 info->ndcb2 = 0;
553 }
607 554
608 case NAND_CMD_READ0:
609 info->use_ecc = 1;
610 info->retcode = ERR_NONE;
611 info->buf_start = column;
612 info->buf_count = mtd->writesize + mtd->oobsize; 555 info->buf_count = mtd->writesize + mtd->oobsize;
613 memset(info->data_buff, 0xFF, info->buf_count); 556 memset(info->data_buff, 0xFF, info->buf_count);
614 557
615 if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr)) 558 break;
559
560 case NAND_CMD_PAGEPROG:
561 if (is_buf_blank(info->data_buff,
562 (mtd->writesize + mtd->oobsize))) {
563 exec_cmd = 0;
616 break; 564 break;
565 }
617 566
618 pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR); 567 cmd = info->cmdset->program;
568 info->ndcb0 |= NDCB0_CMD_TYPE(0x1)
569 | NDCB0_AUTO_RS
570 | NDCB0_ST_ROW_EN
571 | NDCB0_DBC
572 | cmd
573 | addr_cycle;
574 break;
619 575
620 if (info->retcode == ERR_DBERR) { 576 case NAND_CMD_READID:
621 /* for blank page (all 0xff), HW will calculate its ECC as 577 cmd = info->cmdset->read_id;
622 * 0, which is different from the ECC information within 578 info->buf_count = info->read_id_bytes;
623 * OOB, ignore such double bit errors 579 info->ndcb0 |= NDCB0_CMD_TYPE(3)
624 */ 580 | NDCB0_ADDR_CYC(1)
625 if (is_buf_blank(info->data_buff, mtd->writesize)) 581 | cmd;
626 info->retcode = ERR_NONE; 582
627 } 583 info->data_size = 8;
628 break; 584 break;
629 case NAND_CMD_SEQIN: 585 case NAND_CMD_STATUS:
630 info->buf_start = column; 586 cmd = info->cmdset->read_status;
631 info->buf_count = mtd->writesize + mtd->oobsize; 587 info->buf_count = 1;
632 memset(info->data_buff, 0xff, info->buf_count); 588 info->ndcb0 |= NDCB0_CMD_TYPE(4)
589 | NDCB0_ADDR_CYC(1)
590 | cmd;
633 591
634 /* save column/page_addr for next CMD_PAGEPROG */ 592 info->data_size = 8;
635 info->seqin_column = column;
636 info->seqin_page_addr = page_addr;
637 break; 593 break;
638 case NAND_CMD_PAGEPROG:
639 info->use_ecc = (info->seqin_column >= mtd->writesize) ? 0 : 1;
640 594
641 if (prepare_read_prog_cmd(info, cmdset->program, 595 case NAND_CMD_ERASE1:
642 info->seqin_column, info->seqin_page_addr)) 596 cmd = info->cmdset->erase;
643 break; 597 info->ndcb0 |= NDCB0_CMD_TYPE(2)
598 | NDCB0_AUTO_RS
599 | NDCB0_ADDR_CYC(3)
600 | NDCB0_DBC
601 | cmd;
602 info->ndcb1 = page_addr;
603 info->ndcb2 = 0;
644 604
645 pxa3xx_nand_do_cmd(info, NDSR_WRDREQ);
646 break; 605 break;
647 case NAND_CMD_ERASE1: 606 case NAND_CMD_RESET:
648 if (prepare_erase_cmd(info, cmdset->erase, page_addr)) 607 cmd = info->cmdset->reset;
649 break; 608 info->ndcb0 |= NDCB0_CMD_TYPE(5)
609 | cmd;
650 610
651 pxa3xx_nand_do_cmd(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
652 break; 611 break;
612
653 case NAND_CMD_ERASE2: 613 case NAND_CMD_ERASE2:
614 exec_cmd = 0;
654 break; 615 break;
655 case NAND_CMD_READID:
656 case NAND_CMD_STATUS:
657 info->use_dma = 0; /* force PIO read */
658 info->buf_start = 0;
659 info->buf_count = (command == NAND_CMD_READID) ?
660 info->read_id_bytes : 1;
661
662 if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
663 cmdset->read_id : cmdset->read_status))
664 break;
665 616
666 pxa3xx_nand_do_cmd(info, NDSR_RDDREQ); 617 default:
618 exec_cmd = 0;
619 printk(KERN_ERR "pxa3xx-nand: non-supported"
620 " command %x\n", command);
667 break; 621 break;
668 case NAND_CMD_RESET: 622 }
669 if (prepare_other_cmd(info, cmdset->reset))
670 break;
671 623
672 ret = pxa3xx_nand_do_cmd(info, NDSR_CS0_CMDD); 624 return exec_cmd;
673 if (ret == 0) { 625}
674 int timeout = 2;
675 uint32_t ndcr;
676 626
677 while (timeout--) { 627static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
678 if (nand_readl(info, NDSR) & NDSR_RDY) 628 int column, int page_addr)
679 break; 629{
680 msleep(10); 630 struct pxa3xx_nand_info *info = mtd->priv;
681 } 631 int ret, exec_cmd;
682 632
683 ndcr = nand_readl(info, NDCR); 633 /*
684 nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN); 634 * if this is a x16 device ,then convert the input
635 * "byte" address into a "word" address appropriate
636 * for indexing a word-oriented device
637 */
638 if (info->reg_ndcr & NDCR_DWIDTH_M)
639 column /= 2;
640
641 exec_cmd = prepare_command_pool(info, command, column, page_addr);
642 if (exec_cmd) {
643 init_completion(&info->cmd_complete);
644 pxa3xx_nand_start(info);
645
646 ret = wait_for_completion_timeout(&info->cmd_complete,
647 CHIP_DELAY_TIMEOUT);
648 if (!ret) {
649 printk(KERN_ERR "Wait time out!!!\n");
650 /* Stop State Machine for next command cycle */
651 pxa3xx_nand_stop(info);
685 } 652 }
686 break; 653 info->state = STATE_IDLE;
687 default:
688 printk(KERN_ERR "non-supported command.\n");
689 break;
690 } 654 }
655}
656
657static void pxa3xx_nand_write_page_hwecc(struct mtd_info *mtd,
658 struct nand_chip *chip, const uint8_t *buf)
659{
660 chip->write_buf(mtd, buf, mtd->writesize);
661 chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
662}
691 663
692 if (info->retcode == ERR_DBERR) { 664static int pxa3xx_nand_read_page_hwecc(struct mtd_info *mtd,
693 printk(KERN_ERR "double bit error @ page %08x\n", page_addr); 665 struct nand_chip *chip, uint8_t *buf, int page)
694 info->retcode = ERR_NONE; 666{
667 struct pxa3xx_nand_info *info = mtd->priv;
668
669 chip->read_buf(mtd, buf, mtd->writesize);
670 chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
671
672 if (info->retcode == ERR_SBERR) {
673 switch (info->use_ecc) {
674 case 1:
675 mtd->ecc_stats.corrected++;
676 break;
677 case 0:
678 default:
679 break;
680 }
681 } else if (info->retcode == ERR_DBERR) {
682 /*
683 * for blank page (all 0xff), HW will calculate its ECC as
684 * 0, which is different from the ECC information within
685 * OOB, ignore such double bit errors
686 */
687 if (is_buf_blank(buf, mtd->writesize))
688 mtd->ecc_stats.failed++;
695 } 689 }
690
691 return 0;
696} 692}
697 693
698static uint8_t pxa3xx_nand_read_byte(struct mtd_info *mtd) 694static uint8_t pxa3xx_nand_read_byte(struct mtd_info *mtd)
@@ -769,73 +765,12 @@ static int pxa3xx_nand_waitfunc(struct mtd_info *mtd, struct nand_chip *this)
769 return 0; 765 return 0;
770} 766}
771 767
772static void pxa3xx_nand_ecc_hwctl(struct mtd_info *mtd, int mode)
773{
774 return;
775}
776
777static int pxa3xx_nand_ecc_calculate(struct mtd_info *mtd,
778 const uint8_t *dat, uint8_t *ecc_code)
779{
780 return 0;
781}
782
783static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
784 uint8_t *dat, uint8_t *read_ecc, uint8_t *calc_ecc)
785{
786 struct pxa3xx_nand_info *info = mtd->priv;
787 /*
788 * Any error include ERR_SEND_CMD, ERR_DBERR, ERR_BUSERR, we
789 * consider it as a ecc error which will tell the caller the
790 * read fail We have distinguish all the errors, but the
791 * nand_read_ecc only check this function return value
792 *
793 * Corrected (single-bit) errors must also be noted.
794 */
795 if (info->retcode == ERR_SBERR)
796 return 1;
797 else if (info->retcode != ERR_NONE)
798 return -1;
799
800 return 0;
801}
802
803static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
804{
805 const struct pxa3xx_nand_cmdset *cmdset = info->cmdset;
806 uint32_t ndcr;
807 uint8_t id_buff[8];
808
809 if (prepare_other_cmd(info, cmdset->read_id)) {
810 printk(KERN_ERR "failed to prepare command\n");
811 return -EINVAL;
812 }
813
814 /* Send command */
815 if (write_cmd(info))
816 goto fail_timeout;
817
818 /* Wait for CMDDM(command done successfully) */
819 if (wait_for_event(info, NDSR_RDDREQ))
820 goto fail_timeout;
821
822 __raw_readsl(info->mmio_base + NDDB, id_buff, 2);
823 *id = id_buff[0] | (id_buff[1] << 8);
824 return 0;
825
826fail_timeout:
827 ndcr = nand_readl(info, NDCR);
828 nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN);
829 udelay(10);
830 return -ETIMEDOUT;
831}
832
833static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info, 768static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
834 const struct pxa3xx_nand_flash *f) 769 const struct pxa3xx_nand_flash *f)
835{ 770{
836 struct platform_device *pdev = info->pdev; 771 struct platform_device *pdev = info->pdev;
837 struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data; 772 struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
838 uint32_t ndcr = 0x00000FFF; /* disable all interrupts */ 773 uint32_t ndcr = 0x0; /* enable all interrupts */
839 774
840 if (f->page_size != 2048 && f->page_size != 512) 775 if (f->page_size != 2048 && f->page_size != 512)
841 return -EINVAL; 776 return -EINVAL;
@@ -844,9 +779,8 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
844 return -EINVAL; 779 return -EINVAL;
845 780
846 /* calculate flash information */ 781 /* calculate flash information */
847 info->cmdset = f->cmdset; 782 info->cmdset = &default_cmdset;
848 info->page_size = f->page_size; 783 info->page_size = f->page_size;
849 info->oob_buff = info->data_buff + f->page_size;
850 info->read_id_bytes = (f->page_size == 2048) ? 4 : 2; 784 info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
851 785
852 /* calculate addressing information */ 786 /* calculate addressing information */
@@ -876,87 +810,18 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
876static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info) 810static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info)
877{ 811{
878 uint32_t ndcr = nand_readl(info, NDCR); 812 uint32_t ndcr = nand_readl(info, NDCR);
879 struct nand_flash_dev *type = NULL;
880 uint32_t id = -1, page_per_block, num_blocks;
881 int i;
882
883 page_per_block = ndcr & NDCR_PG_PER_BLK ? 64 : 32;
884 info->page_size = ndcr & NDCR_PAGE_SZ ? 2048 : 512; 813 info->page_size = ndcr & NDCR_PAGE_SZ ? 2048 : 512;
885 /* set info fields needed to __readid */ 814 /* set info fields needed to read id */
886 info->read_id_bytes = (info->page_size == 2048) ? 4 : 2; 815 info->read_id_bytes = (info->page_size == 2048) ? 4 : 2;
887 info->reg_ndcr = ndcr; 816 info->reg_ndcr = ndcr;
888 info->cmdset = &default_cmdset; 817 info->cmdset = &default_cmdset;
889 818
890 if (__readid(info, &id))
891 return -ENODEV;
892
893 /* Lookup the flash id */
894 id = (id >> 8) & 0xff; /* device id is byte 2 */
895 for (i = 0; nand_flash_ids[i].name != NULL; i++) {
896 if (id == nand_flash_ids[i].id) {
897 type = &nand_flash_ids[i];
898 break;
899 }
900 }
901
902 if (!type)
903 return -ENODEV;
904
905 /* fill the missing flash information */
906 i = __ffs(page_per_block * info->page_size);
907 num_blocks = type->chipsize << (20 - i);
908
909 /* calculate addressing information */
910 info->col_addr_cycles = (info->page_size == 2048) ? 2 : 1;
911
912 if (num_blocks * page_per_block > 65536)
913 info->row_addr_cycles = 3;
914 else
915 info->row_addr_cycles = 2;
916
917 info->ndtr0cs0 = nand_readl(info, NDTR0CS0); 819 info->ndtr0cs0 = nand_readl(info, NDTR0CS0);
918 info->ndtr1cs0 = nand_readl(info, NDTR1CS0); 820 info->ndtr1cs0 = nand_readl(info, NDTR1CS0);
919 821
920 return 0; 822 return 0;
921} 823}
922 824
923static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
924 const struct pxa3xx_nand_platform_data *pdata)
925{
926 const struct pxa3xx_nand_flash *f;
927 uint32_t id = -1;
928 int i;
929
930 if (pdata->keep_config)
931 if (pxa3xx_nand_detect_config(info) == 0)
932 return 0;
933
934 /* we use default timing to detect id */
935 f = DEFAULT_FLASH_TYPE;
936 pxa3xx_nand_config_flash(info, f);
937 if (__readid(info, &id))
938 goto fail_detect;
939
940 for (i=0; i<ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1; i++) {
941 /* we first choose the flash definition from platfrom */
942 if (i < pdata->num_flash)
943 f = pdata->flash + i;
944 else
945 f = &builtin_flash_types[i - pdata->num_flash + 1];
946 if (f->chip_id == id) {
947 dev_info(&info->pdev->dev, "detect chip id: 0x%x\n", id);
948 pxa3xx_nand_config_flash(info, f);
949 return 0;
950 }
951 }
952
953 dev_warn(&info->pdev->dev,
954 "failed to detect configured nand flash; found %04x instead of\n",
955 id);
956fail_detect:
957 return -ENODEV;
958}
959
960/* the maximum possible buffer size for large page with OOB data 825/* the maximum possible buffer size for large page with OOB data
961 * is: 2048 + 64 = 2112 bytes, allocate a page here for both the 826 * is: 2048 + 64 = 2112 bytes, allocate a page here for both the
962 * data buffer and the DMA descriptor 827 * data buffer and the DMA descriptor
@@ -998,82 +863,144 @@ static int pxa3xx_nand_init_buff(struct pxa3xx_nand_info *info)
998 return 0; 863 return 0;
999} 864}
1000 865
1001static struct nand_ecclayout hw_smallpage_ecclayout = { 866static int pxa3xx_nand_sensing(struct pxa3xx_nand_info *info)
1002 .eccbytes = 6, 867{
1003 .eccpos = {8, 9, 10, 11, 12, 13 }, 868 struct mtd_info *mtd = info->mtd;
1004 .oobfree = { {2, 6} } 869 struct nand_chip *chip = mtd->priv;
1005};
1006 870
1007static struct nand_ecclayout hw_largepage_ecclayout = { 871 /* use the common timing to make a try */
1008 .eccbytes = 24, 872 pxa3xx_nand_config_flash(info, &builtin_flash_types[0]);
1009 .eccpos = { 873 chip->cmdfunc(mtd, NAND_CMD_RESET, 0, 0);
1010 40, 41, 42, 43, 44, 45, 46, 47, 874 if (info->is_ready)
1011 48, 49, 50, 51, 52, 53, 54, 55, 875 return 1;
1012 56, 57, 58, 59, 60, 61, 62, 63}, 876 else
1013 .oobfree = { {2, 38} } 877 return 0;
1014}; 878}
1015 879
1016static void pxa3xx_nand_init_mtd(struct mtd_info *mtd, 880static int pxa3xx_nand_scan(struct mtd_info *mtd)
1017 struct pxa3xx_nand_info *info)
1018{ 881{
1019 struct nand_chip *this = &info->nand_chip; 882 struct pxa3xx_nand_info *info = mtd->priv;
1020 883 struct platform_device *pdev = info->pdev;
1021 this->options = (info->reg_ndcr & NDCR_DWIDTH_C) ? NAND_BUSWIDTH_16: 0; 884 struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
1022 885 struct nand_flash_dev pxa3xx_flash_ids[2] = { {NULL,}, {NULL,} };
1023 this->waitfunc = pxa3xx_nand_waitfunc; 886 const struct pxa3xx_nand_flash *f = NULL;
1024 this->select_chip = pxa3xx_nand_select_chip; 887 struct nand_chip *chip = mtd->priv;
1025 this->dev_ready = pxa3xx_nand_dev_ready; 888 uint32_t id = -1;
1026 this->cmdfunc = pxa3xx_nand_cmdfunc; 889 uint64_t chipsize;
1027 this->read_word = pxa3xx_nand_read_word; 890 int i, ret, num;
1028 this->read_byte = pxa3xx_nand_read_byte; 891
1029 this->read_buf = pxa3xx_nand_read_buf; 892 if (pdata->keep_config && !pxa3xx_nand_detect_config(info))
1030 this->write_buf = pxa3xx_nand_write_buf; 893 goto KEEP_CONFIG;
1031 this->verify_buf = pxa3xx_nand_verify_buf; 894
1032 895 ret = pxa3xx_nand_sensing(info);
1033 this->ecc.mode = NAND_ECC_HW; 896 if (!ret) {
1034 this->ecc.hwctl = pxa3xx_nand_ecc_hwctl; 897 kfree(mtd);
1035 this->ecc.calculate = pxa3xx_nand_ecc_calculate; 898 info->mtd = NULL;
1036 this->ecc.correct = pxa3xx_nand_ecc_correct; 899 printk(KERN_INFO "There is no nand chip on cs 0!\n");
1037 this->ecc.size = info->page_size; 900
1038 901 return -EINVAL;
1039 if (info->page_size == 2048) 902 }
1040 this->ecc.layout = &hw_largepage_ecclayout; 903
904 chip->cmdfunc(mtd, NAND_CMD_READID, 0, 0);
905 id = *((uint16_t *)(info->data_buff));
906 if (id != 0)
907 printk(KERN_INFO "Detect a flash id %x\n", id);
908 else {
909 kfree(mtd);
910 info->mtd = NULL;
911 printk(KERN_WARNING "Read out ID 0, potential timing set wrong!!\n");
912
913 return -EINVAL;
914 }
915
916 num = ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1;
917 for (i = 0; i < num; i++) {
918 if (i < pdata->num_flash)
919 f = pdata->flash + i;
920 else
921 f = &builtin_flash_types[i - pdata->num_flash + 1];
922
923 /* find the chip in default list */
924 if (f->chip_id == id)
925 break;
926 }
927
928 if (i >= (ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1)) {
929 kfree(mtd);
930 info->mtd = NULL;
931 printk(KERN_ERR "ERROR!! flash not defined!!!\n");
932
933 return -EINVAL;
934 }
935
936 pxa3xx_nand_config_flash(info, f);
937 pxa3xx_flash_ids[0].name = f->name;
938 pxa3xx_flash_ids[0].id = (f->chip_id >> 8) & 0xffff;
939 pxa3xx_flash_ids[0].pagesize = f->page_size;
940 chipsize = (uint64_t)f->num_blocks * f->page_per_block * f->page_size;
941 pxa3xx_flash_ids[0].chipsize = chipsize >> 20;
942 pxa3xx_flash_ids[0].erasesize = f->page_size * f->page_per_block;
943 if (f->flash_width == 16)
944 pxa3xx_flash_ids[0].options = NAND_BUSWIDTH_16;
945KEEP_CONFIG:
946 if (nand_scan_ident(mtd, 1, pxa3xx_flash_ids))
947 return -ENODEV;
948 /* calculate addressing information */
949 info->col_addr_cycles = (mtd->writesize >= 2048) ? 2 : 1;
950 info->oob_buff = info->data_buff + mtd->writesize;
951 if ((mtd->size >> chip->page_shift) > 65536)
952 info->row_addr_cycles = 3;
1041 else 953 else
1042 this->ecc.layout = &hw_smallpage_ecclayout; 954 info->row_addr_cycles = 2;
955 mtd->name = mtd_names[0];
956 chip->ecc.mode = NAND_ECC_HW;
957 chip->ecc.size = f->page_size;
958
959 chip->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16 : 0;
960 chip->options |= NAND_NO_AUTOINCR;
961 chip->options |= NAND_NO_READRDY;
1043 962
1044 this->chip_delay = 25; 963 return nand_scan_tail(mtd);
1045} 964}
1046 965
1047static int pxa3xx_nand_probe(struct platform_device *pdev) 966static
967struct pxa3xx_nand_info *alloc_nand_resource(struct platform_device *pdev)
1048{ 968{
1049 struct pxa3xx_nand_platform_data *pdata;
1050 struct pxa3xx_nand_info *info; 969 struct pxa3xx_nand_info *info;
1051 struct nand_chip *this; 970 struct nand_chip *chip;
1052 struct mtd_info *mtd; 971 struct mtd_info *mtd;
1053 struct resource *r; 972 struct resource *r;
1054 int ret = 0, irq; 973 int ret, irq;
1055
1056 pdata = pdev->dev.platform_data;
1057
1058 if (!pdata) {
1059 dev_err(&pdev->dev, "no platform data defined\n");
1060 return -ENODEV;
1061 }
1062 974
1063 mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct pxa3xx_nand_info), 975 mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct pxa3xx_nand_info),
1064 GFP_KERNEL); 976 GFP_KERNEL);
1065 if (!mtd) { 977 if (!mtd) {
1066 dev_err(&pdev->dev, "failed to allocate memory\n"); 978 dev_err(&pdev->dev, "failed to allocate memory\n");
1067 return -ENOMEM; 979 return NULL;
1068 } 980 }
1069 981
1070 info = (struct pxa3xx_nand_info *)(&mtd[1]); 982 info = (struct pxa3xx_nand_info *)(&mtd[1]);
983 chip = (struct nand_chip *)(&mtd[1]);
1071 info->pdev = pdev; 984 info->pdev = pdev;
1072 985 info->mtd = mtd;
1073 this = &info->nand_chip;
1074 mtd->priv = info; 986 mtd->priv = info;
1075 mtd->owner = THIS_MODULE; 987 mtd->owner = THIS_MODULE;
1076 988
989 chip->ecc.read_page = pxa3xx_nand_read_page_hwecc;
990 chip->ecc.write_page = pxa3xx_nand_write_page_hwecc;
991 chip->controller = &info->controller;
992 chip->waitfunc = pxa3xx_nand_waitfunc;
993 chip->select_chip = pxa3xx_nand_select_chip;
994 chip->dev_ready = pxa3xx_nand_dev_ready;
995 chip->cmdfunc = pxa3xx_nand_cmdfunc;
996 chip->read_word = pxa3xx_nand_read_word;
997 chip->read_byte = pxa3xx_nand_read_byte;
998 chip->read_buf = pxa3xx_nand_read_buf;
999 chip->write_buf = pxa3xx_nand_write_buf;
1000 chip->verify_buf = pxa3xx_nand_verify_buf;
1001
1002 spin_lock_init(&chip->controller->lock);
1003 init_waitqueue_head(&chip->controller->wq);
1077 info->clk = clk_get(&pdev->dev, NULL); 1004 info->clk = clk_get(&pdev->dev, NULL);
1078 if (IS_ERR(info->clk)) { 1005 if (IS_ERR(info->clk)) {
1079 dev_err(&pdev->dev, "failed to get nand clock\n"); 1006 dev_err(&pdev->dev, "failed to get nand clock\n");
@@ -1141,43 +1068,12 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
1141 goto fail_free_buf; 1068 goto fail_free_buf;
1142 } 1069 }
1143 1070
1144 ret = pxa3xx_nand_detect_flash(info, pdata); 1071 platform_set_drvdata(pdev, info);
1145 if (ret) {
1146 dev_err(&pdev->dev, "failed to detect flash\n");
1147 ret = -ENODEV;
1148 goto fail_free_irq;
1149 }
1150
1151 pxa3xx_nand_init_mtd(mtd, info);
1152
1153 platform_set_drvdata(pdev, mtd);
1154
1155 if (nand_scan(mtd, 1)) {
1156 dev_err(&pdev->dev, "failed to scan nand\n");
1157 ret = -ENXIO;
1158 goto fail_free_irq;
1159 }
1160
1161#ifdef CONFIG_MTD_PARTITIONS
1162 if (mtd_has_cmdlinepart()) {
1163 static const char *probes[] = { "cmdlinepart", NULL };
1164 struct mtd_partition *parts;
1165 int nr_parts;
1166
1167 nr_parts = parse_mtd_partitions(mtd, probes, &parts, 0);
1168
1169 if (nr_parts)
1170 return add_mtd_partitions(mtd, parts, nr_parts);
1171 }
1172 1072
1173 return add_mtd_partitions(mtd, pdata->parts, pdata->nr_parts); 1073 return info;
1174#else
1175 return 0;
1176#endif
1177 1074
1178fail_free_irq:
1179 free_irq(irq, info);
1180fail_free_buf: 1075fail_free_buf:
1076 free_irq(irq, info);
1181 if (use_dma) { 1077 if (use_dma) {
1182 pxa_free_dma(info->data_dma_ch); 1078 pxa_free_dma(info->data_dma_ch);
1183 dma_free_coherent(&pdev->dev, info->data_buff_size, 1079 dma_free_coherent(&pdev->dev, info->data_buff_size,
@@ -1193,22 +1089,18 @@ fail_put_clk:
1193 clk_put(info->clk); 1089 clk_put(info->clk);
1194fail_free_mtd: 1090fail_free_mtd:
1195 kfree(mtd); 1091 kfree(mtd);
1196 return ret; 1092 return NULL;
1197} 1093}
1198 1094
1199static int pxa3xx_nand_remove(struct platform_device *pdev) 1095static int pxa3xx_nand_remove(struct platform_device *pdev)
1200{ 1096{
1201 struct mtd_info *mtd = platform_get_drvdata(pdev); 1097 struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
1202 struct pxa3xx_nand_info *info = mtd->priv; 1098 struct mtd_info *mtd = info->mtd;
1203 struct resource *r; 1099 struct resource *r;
1204 int irq; 1100 int irq;
1205 1101
1206 platform_set_drvdata(pdev, NULL); 1102 platform_set_drvdata(pdev, NULL);
1207 1103
1208 del_mtd_device(mtd);
1209#ifdef CONFIG_MTD_PARTITIONS
1210 del_mtd_partitions(mtd);
1211#endif
1212 irq = platform_get_irq(pdev, 0); 1104 irq = platform_get_irq(pdev, 0);
1213 if (irq >= 0) 1105 if (irq >= 0)
1214 free_irq(irq, info); 1106 free_irq(irq, info);
@@ -1226,17 +1118,62 @@ static int pxa3xx_nand_remove(struct platform_device *pdev)
1226 clk_disable(info->clk); 1118 clk_disable(info->clk);
1227 clk_put(info->clk); 1119 clk_put(info->clk);
1228 1120
1229 kfree(mtd); 1121 if (mtd) {
1122 del_mtd_device(mtd);
1123#ifdef CONFIG_MTD_PARTITIONS
1124 del_mtd_partitions(mtd);
1125#endif
1126 kfree(mtd);
1127 }
1230 return 0; 1128 return 0;
1231} 1129}
1232 1130
1131static int pxa3xx_nand_probe(struct platform_device *pdev)
1132{
1133 struct pxa3xx_nand_platform_data *pdata;
1134 struct pxa3xx_nand_info *info;
1135
1136 pdata = pdev->dev.platform_data;
1137 if (!pdata) {
1138 dev_err(&pdev->dev, "no platform data defined\n");
1139 return -ENODEV;
1140 }
1141
1142 info = alloc_nand_resource(pdev);
1143 if (info == NULL)
1144 return -ENOMEM;
1145
1146 if (pxa3xx_nand_scan(info->mtd)) {
1147 dev_err(&pdev->dev, "failed to scan nand\n");
1148 pxa3xx_nand_remove(pdev);
1149 return -ENODEV;
1150 }
1151
1152#ifdef CONFIG_MTD_PARTITIONS
1153 if (mtd_has_cmdlinepart()) {
1154 const char *probes[] = { "cmdlinepart", NULL };
1155 struct mtd_partition *parts;
1156 int nr_parts;
1157
1158 nr_parts = parse_mtd_partitions(info->mtd, probes, &parts, 0);
1159
1160 if (nr_parts)
1161 return add_mtd_partitions(info->mtd, parts, nr_parts);
1162 }
1163
1164 return add_mtd_partitions(info->mtd, pdata->parts, pdata->nr_parts);
1165#else
1166 return 0;
1167#endif
1168}
1169
1233#ifdef CONFIG_PM 1170#ifdef CONFIG_PM
1234static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state) 1171static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state)
1235{ 1172{
1236 struct mtd_info *mtd = (struct mtd_info *)platform_get_drvdata(pdev); 1173 struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
1237 struct pxa3xx_nand_info *info = mtd->priv; 1174 struct mtd_info *mtd = info->mtd;
1238 1175
1239 if (info->state != STATE_READY) { 1176 if (info->state) {
1240 dev_err(&pdev->dev, "driver busy, state = %d\n", info->state); 1177 dev_err(&pdev->dev, "driver busy, state = %d\n", info->state);
1241 return -EAGAIN; 1178 return -EAGAIN;
1242 } 1179 }
@@ -1246,8 +1183,8 @@ static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state)
1246 1183
1247static int pxa3xx_nand_resume(struct platform_device *pdev) 1184static int pxa3xx_nand_resume(struct platform_device *pdev)
1248{ 1185{
1249 struct mtd_info *mtd = (struct mtd_info *)platform_get_drvdata(pdev); 1186 struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
1250 struct pxa3xx_nand_info *info = mtd->priv; 1187 struct mtd_info *mtd = info->mtd;
1251 1188
1252 nand_writel(info, NDTR0CS0, info->ndtr0cs0); 1189 nand_writel(info, NDTR0CS0, info->ndtr0cs0);
1253 nand_writel(info, NDTR1CS0, info->ndtr1cs0); 1190 nand_writel(info, NDTR1CS0, info->ndtr1cs0);
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 14a49abe057e..f591f615d3f6 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -629,6 +629,7 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
629{ 629{
630 struct omap_onenand_platform_data *pdata; 630 struct omap_onenand_platform_data *pdata;
631 struct omap2_onenand *c; 631 struct omap2_onenand *c;
632 struct onenand_chip *this;
632 int r; 633 int r;
633 634
634 pdata = pdev->dev.platform_data; 635 pdata = pdev->dev.platform_data;
@@ -726,9 +727,8 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
726 727
727 c->mtd.dev.parent = &pdev->dev; 728 c->mtd.dev.parent = &pdev->dev;
728 729
730 this = &c->onenand;
729 if (c->dma_channel >= 0) { 731 if (c->dma_channel >= 0) {
730 struct onenand_chip *this = &c->onenand;
731
732 this->wait = omap2_onenand_wait; 732 this->wait = omap2_onenand_wait;
733 if (cpu_is_omap34xx()) { 733 if (cpu_is_omap34xx()) {
734 this->read_bufferram = omap3_onenand_read_bufferram; 734 this->read_bufferram = omap3_onenand_read_bufferram;
@@ -749,6 +749,9 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
749 c->onenand.disable = omap2_onenand_disable; 749 c->onenand.disable = omap2_onenand_disable;
750 } 750 }
751 751
752 if (pdata->skip_initial_unlocking)
753 this->options |= ONENAND_SKIP_INITIAL_UNLOCKING;
754
752 if ((r = onenand_scan(&c->mtd, 1)) < 0) 755 if ((r = onenand_scan(&c->mtd, 1)) < 0)
753 goto err_release_regulator; 756 goto err_release_regulator;
754 757
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index bac41caa8df7..56a8b2005bda 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1132,6 +1132,8 @@ static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
1132 onenand_update_bufferram(mtd, from, !ret); 1132 onenand_update_bufferram(mtd, from, !ret);
1133 if (ret == -EBADMSG) 1133 if (ret == -EBADMSG)
1134 ret = 0; 1134 ret = 0;
1135 if (ret)
1136 break;
1135 } 1137 }
1136 1138
1137 this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen); 1139 this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen);
@@ -1646,11 +1648,10 @@ static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr,
1646 int ret = 0; 1648 int ret = 0;
1647 int thislen, column; 1649 int thislen, column;
1648 1650
1651 column = addr & (this->writesize - 1);
1652
1649 while (len != 0) { 1653 while (len != 0) {
1650 thislen = min_t(int, this->writesize, len); 1654 thislen = min_t(int, this->writesize - column, len);
1651 column = addr & (this->writesize - 1);
1652 if (column + thislen > this->writesize)
1653 thislen = this->writesize - column;
1654 1655
1655 this->command(mtd, ONENAND_CMD_READ, addr, this->writesize); 1656 this->command(mtd, ONENAND_CMD_READ, addr, this->writesize);
1656 1657
@@ -1664,12 +1665,13 @@ static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr,
1664 1665
1665 this->read_bufferram(mtd, ONENAND_DATARAM, this->verify_buf, 0, mtd->writesize); 1666 this->read_bufferram(mtd, ONENAND_DATARAM, this->verify_buf, 0, mtd->writesize);
1666 1667
1667 if (memcmp(buf, this->verify_buf, thislen)) 1668 if (memcmp(buf, this->verify_buf + column, thislen))
1668 return -EBADMSG; 1669 return -EBADMSG;
1669 1670
1670 len -= thislen; 1671 len -= thislen;
1671 buf += thislen; 1672 buf += thislen;
1672 addr += thislen; 1673 addr += thislen;
1674 column = 0;
1673 } 1675 }
1674 1676
1675 return 0; 1677 return 0;
@@ -4083,7 +4085,8 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
4083 mtd->writebufsize = mtd->writesize; 4085 mtd->writebufsize = mtd->writesize;
4084 4086
4085 /* Unlock whole block */ 4087 /* Unlock whole block */
4086 this->unlock_all(mtd); 4088 if (!(this->options & ONENAND_SKIP_INITIAL_UNLOCKING))
4089 this->unlock_all(mtd);
4087 4090
4088 ret = this->scan_bbt(mtd); 4091 ret = this->scan_bbt(mtd);
4089 if ((!FLEXONENAND(this)) || ret) 4092 if ((!FLEXONENAND(this)) || ret)
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index ac0d6a8613b5..2b0daae4018d 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -64,12 +64,16 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
64 SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET); 64 SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET);
65 65
66 char *vendor = kmalloc(vendor_len, GFP_KERNEL); 66 char *vendor = kmalloc(vendor_len, GFP_KERNEL);
67 if (!vendor)
68 goto error1;
67 memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len); 69 memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len);
68 vendor[vendor_len] = 0; 70 vendor[vendor_len] = 0;
69 71
70 /* Initialize sysfs attributes */ 72 /* Initialize sysfs attributes */
71 vendor_attribute = 73 vendor_attribute =
72 kzalloc(sizeof(struct sm_sysfs_attribute), GFP_KERNEL); 74 kzalloc(sizeof(struct sm_sysfs_attribute), GFP_KERNEL);
75 if (!vendor_attribute)
76 goto error2;
73 77
74 sysfs_attr_init(&vendor_attribute->dev_attr.attr); 78 sysfs_attr_init(&vendor_attribute->dev_attr.attr);
75 79
@@ -83,12 +87,24 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
83 /* Create array of pointers to the attributes */ 87 /* Create array of pointers to the attributes */
84 attributes = kzalloc(sizeof(struct attribute *) * (NUM_ATTRIBUTES + 1), 88 attributes = kzalloc(sizeof(struct attribute *) * (NUM_ATTRIBUTES + 1),
85 GFP_KERNEL); 89 GFP_KERNEL);
90 if (!attributes)
91 goto error3;
86 attributes[0] = &vendor_attribute->dev_attr.attr; 92 attributes[0] = &vendor_attribute->dev_attr.attr;
87 93
88 /* Finally create the attribute group */ 94 /* Finally create the attribute group */
89 attr_group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); 95 attr_group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
96 if (!attr_group)
97 goto error4;
90 attr_group->attrs = attributes; 98 attr_group->attrs = attributes;
91 return attr_group; 99 return attr_group;
100error4:
101 kfree(attributes);
102error3:
103 kfree(vendor_attribute);
104error2:
105 kfree(vendor);
106error1:
107 return NULL;
92} 108}
93 109
94void sm_delete_sysfs_attributes(struct sm_ftl *ftl) 110void sm_delete_sysfs_attributes(struct sm_ftl *ftl)
@@ -1178,6 +1194,8 @@ static void sm_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1178 } 1194 }
1179 1195
1180 ftl->disk_attributes = sm_create_sysfs_attributes(ftl); 1196 ftl->disk_attributes = sm_create_sysfs_attributes(ftl);
1197 if (!ftl->disk_attributes)
1198 goto error6;
1181 trans->disk_attributes = ftl->disk_attributes; 1199 trans->disk_attributes = ftl->disk_attributes;
1182 1200
1183 sm_printk("Found %d MiB xD/SmartMedia FTL on mtd%d", 1201 sm_printk("Found %d MiB xD/SmartMedia FTL on mtd%d",
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 161feeb7b8b9..627d4e2466a3 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -16,7 +16,7 @@
16 * 16 *
17 * Test read and write speed of a MTD device. 17 * Test read and write speed of a MTD device.
18 * 18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com> 19 * Author: Adrian Hunter <adrian.hunter@nokia.com>
20 */ 20 */
21 21
22#include <linux/init.h> 22#include <linux/init.h>
@@ -33,6 +33,11 @@ static int dev;
33module_param(dev, int, S_IRUGO); 33module_param(dev, int, S_IRUGO);
34MODULE_PARM_DESC(dev, "MTD device number to use"); 34MODULE_PARM_DESC(dev, "MTD device number to use");
35 35
36static int count;
37module_param(count, int, S_IRUGO);
38MODULE_PARM_DESC(count, "Maximum number of eraseblocks to use "
39 "(0 means use all)");
40
36static struct mtd_info *mtd; 41static struct mtd_info *mtd;
37static unsigned char *iobuf; 42static unsigned char *iobuf;
38static unsigned char *bbt; 43static unsigned char *bbt;
@@ -89,6 +94,33 @@ static int erase_eraseblock(int ebnum)
89 return 0; 94 return 0;
90} 95}
91 96
97static int multiblock_erase(int ebnum, int blocks)
98{
99 int err;
100 struct erase_info ei;
101 loff_t addr = ebnum * mtd->erasesize;
102
103 memset(&ei, 0, sizeof(struct erase_info));
104 ei.mtd = mtd;
105 ei.addr = addr;
106 ei.len = mtd->erasesize * blocks;
107
108 err = mtd->erase(mtd, &ei);
109 if (err) {
110 printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n",
111 err, ebnum, blocks);
112 return err;
113 }
114
115 if (ei.state == MTD_ERASE_FAILED) {
116 printk(PRINT_PREF "some erase error occurred at EB %d,"
117 "blocks %d\n", ebnum, blocks);
118 return -EIO;
119 }
120
121 return 0;
122}
123
92static int erase_whole_device(void) 124static int erase_whole_device(void)
93{ 125{
94 int err; 126 int err;
@@ -282,13 +314,16 @@ static inline void stop_timing(void)
282 314
283static long calc_speed(void) 315static long calc_speed(void)
284{ 316{
285 long ms, k, speed; 317 uint64_t k;
318 long ms;
286 319
287 ms = (finish.tv_sec - start.tv_sec) * 1000 + 320 ms = (finish.tv_sec - start.tv_sec) * 1000 +
288 (finish.tv_usec - start.tv_usec) / 1000; 321 (finish.tv_usec - start.tv_usec) / 1000;
289 k = goodebcnt * mtd->erasesize / 1024; 322 if (ms == 0)
290 speed = (k * 1000) / ms; 323 return 0;
291 return speed; 324 k = goodebcnt * (mtd->erasesize / 1024) * 1000;
325 do_div(k, ms);
326 return k;
292} 327}
293 328
294static int scan_for_bad_eraseblocks(void) 329static int scan_for_bad_eraseblocks(void)
@@ -320,13 +355,16 @@ out:
320 355
321static int __init mtd_speedtest_init(void) 356static int __init mtd_speedtest_init(void)
322{ 357{
323 int err, i; 358 int err, i, blocks, j, k;
324 long speed; 359 long speed;
325 uint64_t tmp; 360 uint64_t tmp;
326 361
327 printk(KERN_INFO "\n"); 362 printk(KERN_INFO "\n");
328 printk(KERN_INFO "=================================================\n"); 363 printk(KERN_INFO "=================================================\n");
329 printk(PRINT_PREF "MTD device: %d\n", dev); 364 if (count)
365 printk(PRINT_PREF "MTD device: %d count: %d\n", dev, count);
366 else
367 printk(PRINT_PREF "MTD device: %d\n", dev);
330 368
331 mtd = get_mtd_device(NULL, dev); 369 mtd = get_mtd_device(NULL, dev);
332 if (IS_ERR(mtd)) { 370 if (IS_ERR(mtd)) {
@@ -353,6 +391,9 @@ static int __init mtd_speedtest_init(void)
353 (unsigned long long)mtd->size, mtd->erasesize, 391 (unsigned long long)mtd->size, mtd->erasesize,
354 pgsize, ebcnt, pgcnt, mtd->oobsize); 392 pgsize, ebcnt, pgcnt, mtd->oobsize);
355 393
394 if (count > 0 && count < ebcnt)
395 ebcnt = count;
396
356 err = -ENOMEM; 397 err = -ENOMEM;
357 iobuf = kmalloc(mtd->erasesize, GFP_KERNEL); 398 iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
358 if (!iobuf) { 399 if (!iobuf) {
@@ -484,6 +525,31 @@ static int __init mtd_speedtest_init(void)
484 speed = calc_speed(); 525 speed = calc_speed();
485 printk(PRINT_PREF "erase speed is %ld KiB/s\n", speed); 526 printk(PRINT_PREF "erase speed is %ld KiB/s\n", speed);
486 527
528 /* Multi-block erase all eraseblocks */
529 for (k = 1; k < 7; k++) {
530 blocks = 1 << k;
531 printk(PRINT_PREF "Testing %dx multi-block erase speed\n",
532 blocks);
533 start_timing();
534 for (i = 0; i < ebcnt; ) {
535 for (j = 0; j < blocks && (i + j) < ebcnt; j++)
536 if (bbt[i + j])
537 break;
538 if (j < 1) {
539 i++;
540 continue;
541 }
542 err = multiblock_erase(i, j);
543 if (err)
544 goto out;
545 cond_resched();
546 i += j;
547 }
548 stop_timing();
549 speed = calc_speed();
550 printk(PRINT_PREF "%dx multi-block erase speed is %ld KiB/s\n",
551 blocks, speed);
552 }
487 printk(PRINT_PREF "finished\n"); 553 printk(PRINT_PREF "finished\n");
488out: 554out:
489 kfree(iobuf); 555 kfree(iobuf);
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 11204e8aab5f..334eae53a3db 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -394,6 +394,11 @@ static int __init mtd_subpagetest_init(void)
394 } 394 }
395 395
396 subpgsize = mtd->writesize >> mtd->subpage_sft; 396 subpgsize = mtd->writesize >> mtd->subpage_sft;
397 tmp = mtd->size;
398 do_div(tmp, mtd->erasesize);
399 ebcnt = tmp;
400 pgcnt = mtd->erasesize / mtd->writesize;
401
397 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, " 402 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
398 "page size %u, subpage size %u, count of eraseblocks %u, " 403 "page size %u, subpage size %u, count of eraseblocks %u, "
399 "pages per eraseblock %u, OOB size %u\n", 404 "pages per eraseblock %u, OOB size %u\n",
@@ -413,11 +418,6 @@ static int __init mtd_subpagetest_init(void)
413 goto out; 418 goto out;
414 } 419 }
415 420
416 tmp = mtd->size;
417 do_div(tmp, mtd->erasesize);
418 ebcnt = tmp;
419 pgcnt = mtd->erasesize / mtd->writesize;
420
421 err = scan_for_bad_eraseblocks(); 421 err = scan_for_bad_eraseblocks();
422 if (err) 422 if (err)
423 goto out; 423 goto out;
diff --git a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
index 842cd9214a5e..289729daba80 100644
--- a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
+++ b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
@@ -1191,7 +1191,7 @@ static int cyasblkdev_add_disks(int bus_num,
1191 bd->user_disk_1->first_minor = (devidx + 1) << CYASBLKDEV_SHIFT; 1191 bd->user_disk_1->first_minor = (devidx + 1) << CYASBLKDEV_SHIFT;
1192 bd->user_disk_1->minors = 8; 1192 bd->user_disk_1->minors = 8;
1193 bd->user_disk_1->fops = &cyasblkdev_bdops; 1193 bd->user_disk_1->fops = &cyasblkdev_bdops;
1194 bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE; 1194 bd->user_disk_1->events = DISK_EVENT_MEDIA_CHANGE;
1195 bd->user_disk_1->private_data = bd; 1195 bd->user_disk_1->private_data = bd;
1196 bd->user_disk_1->queue = bd->queue.queue; 1196 bd->user_disk_1->queue = bd->queue.queue;
1197 bd->dbgprn_flags = DBGPRN_RD_RQ; 1197 bd->dbgprn_flags = DBGPRN_RD_RQ;
diff --git a/fs/inode.c b/fs/inode.c
index 05a1f75ae791..5f4e11aaeb5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1167,7 +1167,7 @@ EXPORT_SYMBOL(igrab);
1167 * Note: I_NEW is not waited upon so you have to be very careful what you do 1167 * Note: I_NEW is not waited upon so you have to be very careful what you do
1168 * with the returned inode. You probably should be using ilookup5() instead. 1168 * with the returned inode. You probably should be using ilookup5() instead.
1169 * 1169 *
1170 * Note: @test is called with the inode_hash_lock held, so can't sleep. 1170 * Note2: @test is called with the inode_hash_lock held, so can't sleep.
1171 */ 1171 */
1172struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1172struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1173 int (*test)(struct inode *, void *), void *data) 1173 int (*test)(struct inode *, void *), void *data)
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4f9cc0482949..3e93cdd19005 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
31 * is used to release xattr name/value pair and detach from c->xattrindex. 31 * is used to release xattr name/value pair and detach from c->xattrindex.
32 * reclaim_xattr_datum(c) 32 * reclaim_xattr_datum(c)
33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold 34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
35 * is hard coded as 32KiB. 35 * is hard coded as 32KiB.
36 * do_verify_xattr_datum(c, xd) 36 * do_verify_xattr_datum(c, xd)
37 * is used to load the xdatum informations without name/value pair from the medium. 37 * is used to load the xdatum informations without name/value pair from the medium.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c708a418acc..2e7addfd9803 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -182,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v)
182 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
183 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
184 184
185 vma_stop(priv, vma); 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma);
186 if (priv->task) 187 if (priv->task)
187 put_task_struct(priv->task); 188 put_task_struct(priv->task);
188} 189}
diff --git a/include/linux/bch.h b/include/linux/bch.h
new file mode 100644
index 000000000000..295b4ef153bb
--- /dev/null
+++ b/include/linux/bch.h
@@ -0,0 +1,79 @@
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25*/
26#ifndef _BCH_H
27#define _BCH_H
28
29#include <linux/types.h>
30
31/**
32 * struct bch_control - BCH control structure
33 * @m: Galois field order
34 * @n: maximum codeword size in bits (= 2^m-1)
35 * @t: error correction capability in bits
36 * @ecc_bits: ecc exact size in bits, i.e. generator polynomial degree (<=m*t)
37 * @ecc_bytes: ecc max size (m*t bits) in bytes
38 * @a_pow_tab: Galois field GF(2^m) exponentiation lookup table
39 * @a_log_tab: Galois field GF(2^m) log lookup table
40 * @mod8_tab: remainder generator polynomial lookup tables
41 * @ecc_buf: ecc parity words buffer
42 * @ecc_buf2: ecc parity words buffer
43 * @xi_tab: GF(2^m) base for solving degree 2 polynomial roots
44 * @syn: syndrome buffer
45 * @cache: log-based polynomial representation buffer
46 * @elp: error locator polynomial
47 * @poly_2t: temporary polynomials of degree 2t
48 */
49struct bch_control {
50 unsigned int m;
51 unsigned int n;
52 unsigned int t;
53 unsigned int ecc_bits;
54 unsigned int ecc_bytes;
55/* private: */
56 uint16_t *a_pow_tab;
57 uint16_t *a_log_tab;
58 uint32_t *mod8_tab;
59 uint32_t *ecc_buf;
60 uint32_t *ecc_buf2;
61 unsigned int *xi_tab;
62 unsigned int *syn;
63 int *cache;
64 struct gf_poly *elp;
65 struct gf_poly *poly_2t[4];
66};
67
68struct bch_control *init_bch(int m, int t, unsigned int prim_poly);
69
70void free_bch(struct bch_control *bch);
71
72void encode_bch(struct bch_control *bch, const uint8_t *data,
73 unsigned int len, uint8_t *ecc);
74
75int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
76 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
77 const unsigned int *syn, unsigned int *errloc);
78
79#endif /* _BCH_H */
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index ef44c7a0638c..d18d673ebc78 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,10 +53,10 @@
53 53
54 54
55extern const char *drbd_buildtag(void); 55extern const char *drbd_buildtag(void);
56#define REL_VERSION "8.3.9" 56#define REL_VERSION "8.3.10"
57#define API_VERSION 88 57#define API_VERSION 88
58#define PRO_VERSION_MIN 86 58#define PRO_VERSION_MIN 86
59#define PRO_VERSION_MAX 95 59#define PRO_VERSION_MAX 96
60 60
61 61
62enum drbd_io_error_p { 62enum drbd_io_error_p {
@@ -96,8 +96,14 @@ enum drbd_on_no_data {
96 OND_SUSPEND_IO 96 OND_SUSPEND_IO
97}; 97};
98 98
99enum drbd_on_congestion {
100 OC_BLOCK,
101 OC_PULL_AHEAD,
102 OC_DISCONNECT,
103};
104
99/* KEEP the order, do not delete or insert. Only append. */ 105/* KEEP the order, do not delete or insert. Only append. */
100enum drbd_ret_codes { 106enum drbd_ret_code {
101 ERR_CODE_BASE = 100, 107 ERR_CODE_BASE = 100,
102 NO_ERROR = 101, 108 NO_ERROR = 101,
103 ERR_LOCAL_ADDR = 102, 109 ERR_LOCAL_ADDR = 102,
@@ -146,6 +152,9 @@ enum drbd_ret_codes {
146 ERR_PERM = 152, 152 ERR_PERM = 152,
147 ERR_NEED_APV_93 = 153, 153 ERR_NEED_APV_93 = 153,
148 ERR_STONITH_AND_PROT_A = 154, 154 ERR_STONITH_AND_PROT_A = 154,
155 ERR_CONG_NOT_PROTO_A = 155,
156 ERR_PIC_AFTER_DEP = 156,
157 ERR_PIC_PEER_DEP = 157,
149 158
150 /* insert new ones above this line */ 159 /* insert new ones above this line */
151 AFTER_LAST_ERR_CODE 160 AFTER_LAST_ERR_CODE
@@ -199,6 +208,10 @@ enum drbd_conns {
199 C_VERIFY_T, 208 C_VERIFY_T,
200 C_PAUSED_SYNC_S, 209 C_PAUSED_SYNC_S,
201 C_PAUSED_SYNC_T, 210 C_PAUSED_SYNC_T,
211
212 C_AHEAD,
213 C_BEHIND,
214
202 C_MASK = 31 215 C_MASK = 31
203}; 216};
204 217
@@ -259,7 +272,7 @@ union drbd_state {
259 unsigned int i; 272 unsigned int i;
260}; 273};
261 274
262enum drbd_state_ret_codes { 275enum drbd_state_rv {
263 SS_CW_NO_NEED = 4, 276 SS_CW_NO_NEED = 4,
264 SS_CW_SUCCESS = 3, 277 SS_CW_SUCCESS = 3,
265 SS_NOTHING_TO_DO = 2, 278 SS_NOTHING_TO_DO = 2,
@@ -290,7 +303,7 @@ enum drbd_state_ret_codes {
290extern const char *drbd_conn_str(enum drbd_conns); 303extern const char *drbd_conn_str(enum drbd_conns);
291extern const char *drbd_role_str(enum drbd_role); 304extern const char *drbd_role_str(enum drbd_role);
292extern const char *drbd_disk_str(enum drbd_disk_state); 305extern const char *drbd_disk_str(enum drbd_disk_state);
293extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes); 306extern const char *drbd_set_st_err_str(enum drbd_state_rv);
294 307
295#define SHARED_SECRET_MAX 64 308#define SHARED_SECRET_MAX 64
296 309
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 4ac33f34b77e..bb264a5732de 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,7 +16,8 @@
16#define DEBUG_RANGE_CHECK 0 16#define DEBUG_RANGE_CHECK 0
17 17
18#define DRBD_MINOR_COUNT_MIN 1 18#define DRBD_MINOR_COUNT_MIN 1
19#define DRBD_MINOR_COUNT_MAX 255 19#define DRBD_MINOR_COUNT_MAX 256
20#define DRBD_MINOR_COUNT_DEF 32
20 21
21#define DRBD_DIALOG_REFRESH_MIN 0 22#define DRBD_DIALOG_REFRESH_MIN 0
22#define DRBD_DIALOG_REFRESH_MAX 600 23#define DRBD_DIALOG_REFRESH_MAX 600
@@ -129,6 +130,7 @@
129#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT 130#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
130#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT 131#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
131#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR 132#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
133#define DRBD_ON_CONGESTION_DEF OC_BLOCK
132 134
133#define DRBD_MAX_BIO_BVECS_MIN 0 135#define DRBD_MAX_BIO_BVECS_MIN 0
134#define DRBD_MAX_BIO_BVECS_MAX 128 136#define DRBD_MAX_BIO_BVECS_MAX 128
@@ -154,5 +156,13 @@
154#define DRBD_C_MIN_RATE_MAX (4 << 20) 156#define DRBD_C_MIN_RATE_MAX (4 << 20)
155#define DRBD_C_MIN_RATE_DEF 4096 157#define DRBD_C_MIN_RATE_DEF 4096
156 158
159#define DRBD_CONG_FILL_MIN 0
160#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
161#define DRBD_CONG_FILL_DEF 0
162
163#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
164#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
165#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
166
157#undef RANGE 167#undef RANGE
158#endif 168#endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index ade91107c9a5..ab6159e4fcf0 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -56,6 +56,9 @@ NL_PACKET(net_conf, 5,
56 NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) 56 NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
57 NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) 57 NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
58 NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) 58 NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
59 NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
60 NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
61 NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
59 /* 59 addr_family was available in GIT, never released */ 62 /* 59 addr_family was available in GIT, never released */
60 NL_BIT( 60, T_MANDATORY, mind_af) 63 NL_BIT( 60, T_MANDATORY, mind_af)
61 NL_BIT( 27, T_MAY_IGNORE, want_lose) 64 NL_BIT( 27, T_MAY_IGNORE, want_lose)
@@ -66,7 +69,9 @@ NL_PACKET(net_conf, 5,
66 NL_BIT( 70, T_MANDATORY, dry_run) 69 NL_BIT( 70, T_MANDATORY, dry_run)
67) 70)
68 71
69NL_PACKET(disconnect, 6, ) 72NL_PACKET(disconnect, 6,
73 NL_BIT( 84, T_MAY_IGNORE, force)
74)
70 75
71NL_PACKET(resize, 7, 76NL_PACKET(resize, 7,
72 NL_INT64( 29, T_MAY_IGNORE, resize_size) 77 NL_INT64( 29, T_MAY_IGNORE, resize_size)
@@ -143,9 +148,13 @@ NL_PACKET(new_c_uuid, 26,
143 NL_BIT( 63, T_MANDATORY, clear_bm) 148 NL_BIT( 63, T_MANDATORY, clear_bm)
144) 149)
145 150
151#ifdef NL_RESPONSE
152NL_RESPONSE(return_code_only, 27)
153#endif
154
146#undef NL_PACKET 155#undef NL_PACKET
147#undef NL_INTEGER 156#undef NL_INTEGER
148#undef NL_INT64 157#undef NL_INT64
149#undef NL_BIT 158#undef NL_BIT
150#undef NL_STRING 159#undef NL_STRING
151 160#undef NL_RESPONSE
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index fcdff8410e99..f14a165e82dc 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -7,6 +7,7 @@
7/* declare packet_type enums */ 7/* declare packet_type enums */
8enum packet_types { 8enum packet_types {
9#define NL_PACKET(name, number, fields) P_ ## name = number, 9#define NL_PACKET(name, number, fields) P_ ## name = number,
10#define NL_RESPONSE(name, number) P_ ## name = number,
10#define NL_INTEGER(pn, pr, member) 11#define NL_INTEGER(pn, pr, member)
11#define NL_INT64(pn, pr, member) 12#define NL_INT64(pn, pr, member)
12#define NL_BIT(pn, pr, member) 13#define NL_BIT(pn, pr, member)
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 1408bf8eed5f..ad1b19aa6508 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -63,6 +63,24 @@ extern int mfd_cell_enable(struct platform_device *pdev);
63extern int mfd_cell_disable(struct platform_device *pdev); 63extern int mfd_cell_disable(struct platform_device *pdev);
64 64
65/* 65/*
66 * "Clone" multiple platform devices for a single cell. This is to be used
67 * for devices that have multiple users of a cell. For example, if an mfd
68 * driver wants the cell "foo" to be used by a GPIO driver, an MTD driver,
69 * and a platform driver, the following bit of code would be use after first
70 * calling mfd_add_devices():
71 *
72 * const char *fclones[] = { "foo-gpio", "foo-mtd" };
73 * err = mfd_clone_cells("foo", fclones, ARRAY_SIZE(fclones));
74 *
75 * Each driver (MTD, GPIO, and platform driver) would then register
76 * platform_drivers for "foo-mtd", "foo-gpio", and "foo", respectively.
77 * The cell's .enable/.disable hooks should be used to deal with hardware
78 * resource contention.
79 */
80extern int mfd_clone_cell(const char *cell, const char **clones,
81 size_t n_clones);
82
83/*
66 * Given a platform device that's been created by mfd_add_devices(), fetch 84 * Given a platform device that's been created by mfd_add_devices(), fetch
67 * the mfd_cell that created it. 85 * the mfd_cell that created it.
68 */ 86 */
@@ -87,13 +105,4 @@ extern int mfd_add_devices(struct device *parent, int id,
87 105
88extern void mfd_remove_devices(struct device *parent); 106extern void mfd_remove_devices(struct device *parent);
89 107
90/*
91 * For MFD drivers with clients sharing access to resources, these create
92 * multiple platform devices per cell. Contention handling must still be
93 * handled via drivers (ie, with enable/disable hooks).
94 */
95extern int mfd_shared_platform_driver_register(struct platform_driver *drv,
96 const char *cellname);
97extern void mfd_shared_platform_driver_unregister(struct platform_driver *drv);
98
99#endif 108#endif
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 93a9477e075f..69d1010e2e51 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -24,6 +24,8 @@
24 24
25#include <linux/i2c.h> 25#include <linux/i2c.h>
26 26
27#define MAX8997_REG_INVALID (0xff)
28
27enum max8997_pmic_reg { 29enum max8997_pmic_reg {
28 MAX8997_REG_PMIC_ID0 = 0x00, 30 MAX8997_REG_PMIC_ID0 = 0x00,
29 MAX8997_REG_PMIC_ID1 = 0x01, 31 MAX8997_REG_PMIC_ID1 = 0x01,
@@ -313,6 +315,7 @@ enum max8997_irq {
313#define MAX8997_REG_BUCK2DVS(x) (MAX8997_REG_BUCK2DVS1 + (x) - 1) 315#define MAX8997_REG_BUCK2DVS(x) (MAX8997_REG_BUCK2DVS1 + (x) - 1)
314#define MAX8997_REG_BUCK5DVS(x) (MAX8997_REG_BUCK5DVS1 + (x) - 1) 316#define MAX8997_REG_BUCK5DVS(x) (MAX8997_REG_BUCK5DVS1 + (x) - 1)
315 317
318#define MAX8997_NUM_GPIO 12
316struct max8997_dev { 319struct max8997_dev {
317 struct device *dev; 320 struct device *dev;
318 struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */ 321 struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
@@ -324,11 +327,19 @@ struct max8997_dev {
324 int type; 327 int type;
325 struct platform_device *battery; /* battery control (not fuel gauge) */ 328 struct platform_device *battery; /* battery control (not fuel gauge) */
326 329
330 int irq;
331 int ono;
332 int irq_base;
327 bool wakeup; 333 bool wakeup;
334 struct mutex irqlock;
335 int irq_masks_cur[MAX8997_IRQ_GROUP_NR];
336 int irq_masks_cache[MAX8997_IRQ_GROUP_NR];
328 337
329 /* For hibernation */ 338 /* For hibernation */
330 u8 reg_dump[MAX8997_REG_PMIC_END + MAX8997_MUIC_REG_END + 339 u8 reg_dump[MAX8997_REG_PMIC_END + MAX8997_MUIC_REG_END +
331 MAX8997_HAPTIC_REG_END]; 340 MAX8997_HAPTIC_REG_END];
341
342 bool gpio_status[MAX8997_NUM_GPIO];
332}; 343};
333 344
334enum max8997_types { 345enum max8997_types {
@@ -336,6 +347,10 @@ enum max8997_types {
336 TYPE_MAX8966, 347 TYPE_MAX8966,
337}; 348};
338 349
350extern int max8997_irq_init(struct max8997_dev *max8997);
351extern void max8997_irq_exit(struct max8997_dev *max8997);
352extern int max8997_irq_resume(struct max8997_dev *max8997);
353
339extern int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); 354extern int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
340extern int max8997_bulk_read(struct i2c_client *i2c, u8 reg, int count, 355extern int max8997_bulk_read(struct i2c_client *i2c, u8 reg, int count,
341 u8 *buf); 356 u8 *buf);
@@ -344,4 +359,10 @@ extern int max8997_bulk_write(struct i2c_client *i2c, u8 reg, int count,
344 u8 *buf); 359 u8 *buf);
345extern int max8997_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask); 360extern int max8997_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask);
346 361
362#define MAX8997_GPIO_INT_BOTH (0x3 << 4)
363#define MAX8997_GPIO_INT_RISE (0x2 << 4)
364#define MAX8997_GPIO_INT_FALL (0x1 << 4)
365
366#define MAX8997_GPIO_INT_MASK (0x3 << 4)
367#define MAX8997_GPIO_DATA_MASK (0x1 << 2)
347#endif /* __LINUX_MFD_MAX8997_PRIV_H */ 368#endif /* __LINUX_MFD_MAX8997_PRIV_H */
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index cb671b3451bf..60931d089422 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -78,8 +78,11 @@ struct max8997_regulator_data {
78}; 78};
79 79
80struct max8997_platform_data { 80struct max8997_platform_data {
81 bool wakeup; 81 /* IRQ */
82 /* IRQ: Not implemented */ 82 int irq_base;
83 int ono;
84 int wakeup;
85
83 /* ---- PMIC ---- */ 86 /* ---- PMIC ---- */
84 struct max8997_regulator_data *regulators; 87 struct max8997_regulator_data *regulators;
85 int num_regulators; 88 int num_regulators;
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 26529ebd59cc..1bbd9f289245 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -36,6 +36,7 @@ struct mtd_blktrans_dev {
36 struct mtd_info *mtd; 36 struct mtd_info *mtd;
37 struct mutex lock; 37 struct mutex lock;
38 int devnum; 38 int devnum;
39 bool bg_stop;
39 unsigned long size; 40 unsigned long size;
40 int readonly; 41 int readonly;
41 int open; 42 int open;
@@ -62,6 +63,7 @@ struct mtd_blktrans_ops {
62 unsigned long block, char *buffer); 63 unsigned long block, char *buffer);
63 int (*discard)(struct mtd_blktrans_dev *dev, 64 int (*discard)(struct mtd_blktrans_dev *dev,
64 unsigned long block, unsigned nr_blocks); 65 unsigned long block, unsigned nr_blocks);
66 void (*background)(struct mtd_blktrans_dev *dev);
65 67
66 /* Block layer ioctls */ 68 /* Block layer ioctls */
67 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); 69 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);
@@ -85,6 +87,7 @@ extern int register_mtd_blktrans(struct mtd_blktrans_ops *tr);
85extern int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr); 87extern int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr);
86extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); 88extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
87extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); 89extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
90extern int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev);
88 91
89 92
90#endif /* __MTD_TRANS_H__ */ 93#endif /* __MTD_TRANS_H__ */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index a9baee6864af..0d823f2dd667 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -535,6 +535,7 @@ struct cfi_fixup {
535#define CFI_MFR_CONTINUATION 0x007F 535#define CFI_MFR_CONTINUATION 0x007F
536 536
537#define CFI_MFR_AMD 0x0001 537#define CFI_MFR_AMD 0x0001
538#define CFI_MFR_AMIC 0x0037
538#define CFI_MFR_ATMEL 0x001F 539#define CFI_MFR_ATMEL 0x001F
539#define CFI_MFR_EON 0x001C 540#define CFI_MFR_EON 0x001C
540#define CFI_MFR_FUJITSU 0x0004 541#define CFI_MFR_FUJITSU 0x0004
diff --git a/include/linux/mtd/latch-addr-flash.h b/include/linux/mtd/latch-addr-flash.h
new file mode 100644
index 000000000000..e94b8e128074
--- /dev/null
+++ b/include/linux/mtd/latch-addr-flash.h
@@ -0,0 +1,29 @@
1/*
2 * Interface for NOR flash driver whose high address lines are latched
3 *
4 * Copyright © 2008 MontaVista Software, Inc. <source@mvista.com>
5 *
6 * This file is licensed under the terms of the GNU General Public License
7 * version 2. This program is licensed "as is" without any warranty of any
8 * kind, whether express or implied.
9 */
10#ifndef __LATCH_ADDR_FLASH__
11#define __LATCH_ADDR_FLASH__
12
13struct map_info;
14struct mtd_partition;
15
16struct latch_addr_flash_data {
17 unsigned int width;
18 unsigned int size;
19
20 int (*init)(void *data, int cs);
21 void (*done)(void *data);
22 void (*set_window)(unsigned long offset, void *data);
23 void *data;
24
25 unsigned int nr_parts;
26 struct mtd_partition *parts;
27};
28
29#endif
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1f489b247a29..ae67ef56a8f5 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -140,6 +140,7 @@ typedef enum {
140 NAND_ECC_HW, 140 NAND_ECC_HW,
141 NAND_ECC_HW_SYNDROME, 141 NAND_ECC_HW_SYNDROME,
142 NAND_ECC_HW_OOB_FIRST, 142 NAND_ECC_HW_OOB_FIRST,
143 NAND_ECC_SOFT_BCH,
143} nand_ecc_modes_t; 144} nand_ecc_modes_t;
144 145
145/* 146/*
@@ -339,6 +340,7 @@ struct nand_hw_control {
339 * @prepad: padding information for syndrome based ecc generators 340 * @prepad: padding information for syndrome based ecc generators
340 * @postpad: padding information for syndrome based ecc generators 341 * @postpad: padding information for syndrome based ecc generators
341 * @layout: ECC layout control struct pointer 342 * @layout: ECC layout control struct pointer
343 * @priv: pointer to private ecc control data
342 * @hwctl: function to control hardware ecc generator. Must only 344 * @hwctl: function to control hardware ecc generator. Must only
343 * be provided if an hardware ECC is available 345 * be provided if an hardware ECC is available
344 * @calculate: function for ecc calculation or readback from ecc hardware 346 * @calculate: function for ecc calculation or readback from ecc hardware
@@ -362,6 +364,7 @@ struct nand_ecc_ctrl {
362 int prepad; 364 int prepad;
363 int postpad; 365 int postpad;
364 struct nand_ecclayout *layout; 366 struct nand_ecclayout *layout;
367 void *priv;
365 void (*hwctl)(struct mtd_info *mtd, int mode); 368 void (*hwctl)(struct mtd_info *mtd, int mode);
366 int (*calculate)(struct mtd_info *mtd, const uint8_t *dat, 369 int (*calculate)(struct mtd_info *mtd, const uint8_t *dat,
367 uint8_t *ecc_code); 370 uint8_t *ecc_code);
diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
new file mode 100644
index 000000000000..74acf5367556
--- /dev/null
+++ b/include/linux/mtd/nand_bch.h
@@ -0,0 +1,72 @@
1/*
2 * Copyright © 2011 Ivan Djelic <ivan.djelic@parrot.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This file is the header for the NAND BCH ECC implementation.
9 */
10
11#ifndef __MTD_NAND_BCH_H__
12#define __MTD_NAND_BCH_H__
13
14struct mtd_info;
15struct nand_bch_control;
16
17#if defined(CONFIG_MTD_NAND_ECC_BCH)
18
19static inline int mtd_nand_has_bch(void) { return 1; }
20
21/*
22 * Calculate BCH ecc code
23 */
24int nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
25 u_char *ecc_code);
26
27/*
28 * Detect and correct bit errors
29 */
30int nand_bch_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc,
31 u_char *calc_ecc);
32/*
33 * Initialize BCH encoder/decoder
34 */
35struct nand_bch_control *
36nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
37 unsigned int eccbytes, struct nand_ecclayout **ecclayout);
38/*
39 * Release BCH encoder/decoder resources
40 */
41void nand_bch_free(struct nand_bch_control *nbc);
42
43#else /* !CONFIG_MTD_NAND_ECC_BCH */
44
45static inline int mtd_nand_has_bch(void) { return 0; }
46
47static inline int
48nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
49 u_char *ecc_code)
50{
51 return -1;
52}
53
54static inline int
55nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
56 unsigned char *read_ecc, unsigned char *calc_ecc)
57{
58 return -1;
59}
60
61static inline struct nand_bch_control *
62nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
63 unsigned int eccbytes, struct nand_ecclayout **ecclayout)
64{
65 return NULL;
66}
67
68static inline void nand_bch_free(struct nand_bch_control *nbc) {}
69
70#endif /* CONFIG_MTD_NAND_ECC_BCH */
71
72#endif /* __MTD_NAND_BCH_H__ */
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index ae418e41d8f5..52b6f187bf49 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -198,6 +198,7 @@ struct onenand_chip {
198#define ONENAND_SKIP_UNLOCK_CHECK (0x0100) 198#define ONENAND_SKIP_UNLOCK_CHECK (0x0100)
199#define ONENAND_PAGEBUF_ALLOC (0x1000) 199#define ONENAND_PAGEBUF_ALLOC (0x1000)
200#define ONENAND_OOBBUF_ALLOC (0x2000) 200#define ONENAND_OOBBUF_ALLOC (0x2000)
201#define ONENAND_SKIP_INITIAL_UNLOCKING (0x4000)
201 202
202#define ONENAND_IS_4KB_PAGE(this) \ 203#define ONENAND_IS_4KB_PAGE(this) \
203 (this->options & ONENAND_HAS_4KB_PAGE) 204 (this->options & ONENAND_HAS_4KB_PAGE)
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 430a9cc045e2..e1bad1130616 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1031,9 +1031,7 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s
1031#define snd_pcm_lib_mmap_iomem NULL 1031#define snd_pcm_lib_mmap_iomem NULL
1032#endif 1032#endif
1033 1033
1034int snd_pcm_lib_mmap_noncached(struct snd_pcm_substream *substream, 1034#define snd_pcm_lib_mmap_vmalloc NULL
1035 struct vm_area_struct *area);
1036#define snd_pcm_lib_mmap_vmalloc snd_pcm_lib_mmap_noncached
1037 1035
1038static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max) 1036static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max)
1039{ 1037{
diff --git a/ipc/util.c b/ipc/util.c
index 8fd1b891ec0c..5c0d28921ba8 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -317,6 +317,7 @@ retry:
317 317
318/** 318/**
319 * ipc_check_perms - check security and permissions for an IPC 319 * ipc_check_perms - check security and permissions for an IPC
320 * @ns: IPC namespace
320 * @ipcp: ipc permission set 321 * @ipcp: ipc permission set
321 * @ops: the actual security routine to call 322 * @ops: the actual security routine to call
322 * @params: its parameters 323 * @params: its parameters
@@ -607,6 +608,7 @@ void ipc_rcu_putref(void *ptr)
607 608
608/** 609/**
609 * ipcperms - check IPC permissions 610 * ipcperms - check IPC permissions
611 * @ns: IPC namespace
610 * @ipcp: IPC permission set 612 * @ipcp: IPC permission set
611 * @flag: desired permission set. 613 * @flag: desired permission set.
612 * 614 *
@@ -769,7 +771,7 @@ void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
769 771
770/** 772/**
771 * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd 773 * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
772 * @ids: the ipc namespace 774 * @ns: the ipc namespace
773 * @ids: the table of ids where to look for the ipc 775 * @ids: the table of ids where to look for the ipc
774 * @id: the id of the ipc to retrieve 776 * @id: the id of the ipc to retrieve
775 * @cmd: the cmd to check 777 * @cmd: the cmd to check
diff --git a/lib/Kconfig b/lib/Kconfig
index 23fa7a359db7..9c10e38fc609 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -158,6 +158,45 @@ config REED_SOLOMON_DEC16
158 boolean 158 boolean
159 159
160# 160#
161# BCH support is selected if needed
162#
163config BCH
164 tristate
165
166config BCH_CONST_PARAMS
167 boolean
168 help
169 Drivers may select this option to force specific constant
170 values for parameters 'm' (Galois field order) and 't'
171 (error correction capability). Those specific values must
172 be set by declaring default values for symbols BCH_CONST_M
173 and BCH_CONST_T.
174 Doing so will enable extra compiler optimizations,
175 improving encoding and decoding performance up to 2x for
176 usual (m,t) values (typically such that m*t < 200).
177 When this option is selected, the BCH library supports
178 only a single (m,t) configuration. This is mainly useful
179 for NAND flash board drivers requiring known, fixed BCH
180 parameters.
181
182config BCH_CONST_M
183 int
184 range 5 15
185 help
186 Constant value for Galois field order 'm'. If 'k' is the
187 number of data bits to protect, 'm' should be chosen such
188 that (k + m*t) <= 2**m - 1.
189 Drivers should declare a default value for this symbol if
190 they select option BCH_CONST_PARAMS.
191
192config BCH_CONST_T
193 int
194 help
195 Constant value for error correction capability in bits 't'.
196 Drivers should declare a default value for this symbol if
197 they select option BCH_CONST_PARAMS.
198
199#
161# Textsearch support is select'ed if needed 200# Textsearch support is select'ed if needed
162# 201#
163config TEXTSEARCH 202config TEXTSEARCH
diff --git a/lib/Makefile b/lib/Makefile
index d7872b5c4c1c..ef0f28571156 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
69obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ 69obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
70obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ 70obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
71obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ 71obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
72obj-$(CONFIG_BCH) += bch.o
72obj-$(CONFIG_LZO_COMPRESS) += lzo/ 73obj-$(CONFIG_LZO_COMPRESS) += lzo/
73obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 74obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
74obj-$(CONFIG_XZ_DEC) += xz/ 75obj-$(CONFIG_XZ_DEC) += xz/
diff --git a/lib/bch.c b/lib/bch.c
new file mode 100644
index 000000000000..bc89dfe4d1b3
--- /dev/null
+++ b/lib/bch.c
@@ -0,0 +1,1368 @@
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25 *
26 * Call init_bch to get a pointer to a newly allocated bch_control structure for
27 * the given m (Galois field order), t (error correction capability) and
28 * (optional) primitive polynomial parameters.
29 *
30 * Call encode_bch to compute and store ecc parity bytes to a given buffer.
31 * Call decode_bch to detect and locate errors in received data.
32 *
33 * On systems supporting hw BCH features, intermediate results may be provided
34 * to decode_bch in order to skip certain steps. See decode_bch() documentation
35 * for details.
36 *
37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38 * parameters m and t; thus allowing extra compiler optimizations and providing
39 * better (up to 2x) encoding performance. Using this option makes sense when
40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41 * on a particular NAND flash device.
42 *
43 * Algorithmic details:
44 *
45 * Encoding is performed by processing 32 input bits in parallel, using 4
46 * remainder lookup tables.
47 *
48 * The final stage of decoding involves the following internal steps:
49 * a. Syndrome computation
50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51 * c. Error locator root finding (by far the most expensive step)
52 *
53 * In this implementation, step c is not performed using the usual Chien search.
54 * Instead, an alternative approach described in [1] is used. It consists in
55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58 * much better performance than Chien search for usual (m,t) values (typically
59 * m >= 13, t < 32, see [1]).
60 *
61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66 */
67
68#include <linux/kernel.h>
69#include <linux/errno.h>
70#include <linux/init.h>
71#include <linux/module.h>
72#include <linux/slab.h>
73#include <linux/bitops.h>
74#include <asm/byteorder.h>
75#include <linux/bch.h>
76
77#if defined(CONFIG_BCH_CONST_PARAMS)
78#define GF_M(_p) (CONFIG_BCH_CONST_M)
79#define GF_T(_p) (CONFIG_BCH_CONST_T)
80#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
81#else
82#define GF_M(_p) ((_p)->m)
83#define GF_T(_p) ((_p)->t)
84#define GF_N(_p) ((_p)->n)
85#endif
86
87#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
88#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
89
90#ifndef dbg
91#define dbg(_fmt, args...) do {} while (0)
92#endif
93
94/*
95 * represent a polynomial over GF(2^m)
96 */
97struct gf_poly {
98 unsigned int deg; /* polynomial degree */
99 unsigned int c[0]; /* polynomial terms */
100};
101
102/* given its degree, compute a polynomial size in bytes */
103#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
104
105/* polynomial of degree 1 */
106struct gf_poly_deg1 {
107 struct gf_poly poly;
108 unsigned int c[2];
109};
110
111/*
112 * same as encode_bch(), but process input data one byte at a time
113 */
114static void encode_bch_unaligned(struct bch_control *bch,
115 const unsigned char *data, unsigned int len,
116 uint32_t *ecc)
117{
118 int i;
119 const uint32_t *p;
120 const int l = BCH_ECC_WORDS(bch)-1;
121
122 while (len--) {
123 p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
124
125 for (i = 0; i < l; i++)
126 ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
127
128 ecc[l] = (ecc[l] << 8)^(*p);
129 }
130}
131
132/*
133 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
134 */
135static void load_ecc8(struct bch_control *bch, uint32_t *dst,
136 const uint8_t *src)
137{
138 uint8_t pad[4] = {0, 0, 0, 0};
139 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
140
141 for (i = 0; i < nwords; i++, src += 4)
142 dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
143
144 memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
145 dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
146}
147
148/*
149 * convert 32-bit ecc words to ecc bytes
150 */
151static void store_ecc8(struct bch_control *bch, uint8_t *dst,
152 const uint32_t *src)
153{
154 uint8_t pad[4];
155 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
156
157 for (i = 0; i < nwords; i++) {
158 *dst++ = (src[i] >> 24);
159 *dst++ = (src[i] >> 16) & 0xff;
160 *dst++ = (src[i] >> 8) & 0xff;
161 *dst++ = (src[i] >> 0) & 0xff;
162 }
163 pad[0] = (src[nwords] >> 24);
164 pad[1] = (src[nwords] >> 16) & 0xff;
165 pad[2] = (src[nwords] >> 8) & 0xff;
166 pad[3] = (src[nwords] >> 0) & 0xff;
167 memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
168}
169
170/**
171 * encode_bch - calculate BCH ecc parity of data
172 * @bch: BCH control structure
173 * @data: data to encode
174 * @len: data length in bytes
175 * @ecc: ecc parity data, must be initialized by caller
176 *
177 * The @ecc parity array is used both as input and output parameter, in order to
178 * allow incremental computations. It should be of the size indicated by member
179 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
180 *
181 * The exact number of computed ecc parity bits is given by member @ecc_bits of
182 * @bch; it may be less than m*t for large values of t.
183 */
184void encode_bch(struct bch_control *bch, const uint8_t *data,
185 unsigned int len, uint8_t *ecc)
186{
187 const unsigned int l = BCH_ECC_WORDS(bch)-1;
188 unsigned int i, mlen;
189 unsigned long m;
190 uint32_t w, r[l+1];
191 const uint32_t * const tab0 = bch->mod8_tab;
192 const uint32_t * const tab1 = tab0 + 256*(l+1);
193 const uint32_t * const tab2 = tab1 + 256*(l+1);
194 const uint32_t * const tab3 = tab2 + 256*(l+1);
195 const uint32_t *pdata, *p0, *p1, *p2, *p3;
196
197 if (ecc) {
198 /* load ecc parity bytes into internal 32-bit buffer */
199 load_ecc8(bch, bch->ecc_buf, ecc);
200 } else {
201 memset(bch->ecc_buf, 0, sizeof(r));
202 }
203
204 /* process first unaligned data bytes */
205 m = ((unsigned long)data) & 3;
206 if (m) {
207 mlen = (len < (4-m)) ? len : 4-m;
208 encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
209 data += mlen;
210 len -= mlen;
211 }
212
213 /* process 32-bit aligned data words */
214 pdata = (uint32_t *)data;
215 mlen = len/4;
216 data += 4*mlen;
217 len -= 4*mlen;
218 memcpy(r, bch->ecc_buf, sizeof(r));
219
220 /*
221 * split each 32-bit word into 4 polynomials of weight 8 as follows:
222 *
223 * 31 ...24 23 ...16 15 ... 8 7 ... 0
224 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
225 * tttttttt mod g = r0 (precomputed)
226 * zzzzzzzz 00000000 mod g = r1 (precomputed)
227 * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
228 * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
229 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
230 */
231 while (mlen--) {
232 /* input data is read in big-endian format */
233 w = r[0]^cpu_to_be32(*pdata++);
234 p0 = tab0 + (l+1)*((w >> 0) & 0xff);
235 p1 = tab1 + (l+1)*((w >> 8) & 0xff);
236 p2 = tab2 + (l+1)*((w >> 16) & 0xff);
237 p3 = tab3 + (l+1)*((w >> 24) & 0xff);
238
239 for (i = 0; i < l; i++)
240 r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
241
242 r[l] = p0[l]^p1[l]^p2[l]^p3[l];
243 }
244 memcpy(bch->ecc_buf, r, sizeof(r));
245
246 /* process last unaligned bytes */
247 if (len)
248 encode_bch_unaligned(bch, data, len, bch->ecc_buf);
249
250 /* store ecc parity bytes into original parity buffer */
251 if (ecc)
252 store_ecc8(bch, ecc, bch->ecc_buf);
253}
254EXPORT_SYMBOL_GPL(encode_bch);
255
256static inline int modulo(struct bch_control *bch, unsigned int v)
257{
258 const unsigned int n = GF_N(bch);
259 while (v >= n) {
260 v -= n;
261 v = (v & n) + (v >> GF_M(bch));
262 }
263 return v;
264}
265
266/*
267 * shorter and faster modulo function, only works when v < 2N.
268 */
269static inline int mod_s(struct bch_control *bch, unsigned int v)
270{
271 const unsigned int n = GF_N(bch);
272 return (v < n) ? v : v-n;
273}
274
275static inline int deg(unsigned int poly)
276{
277 /* polynomial degree is the most-significant bit index */
278 return fls(poly)-1;
279}
280
281static inline int parity(unsigned int x)
282{
283 /*
284 * public domain code snippet, lifted from
285 * http://www-graphics.stanford.edu/~seander/bithacks.html
286 */
287 x ^= x >> 1;
288 x ^= x >> 2;
289 x = (x & 0x11111111U) * 0x11111111U;
290 return (x >> 28) & 1;
291}
292
293/* Galois field basic operations: multiply, divide, inverse, etc. */
294
295static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
296 unsigned int b)
297{
298 return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
299 bch->a_log_tab[b])] : 0;
300}
301
302static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
303{
304 return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
305}
306
307static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
308 unsigned int b)
309{
310 return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
311 GF_N(bch)-bch->a_log_tab[b])] : 0;
312}
313
314static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
315{
316 return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
317}
318
319static inline unsigned int a_pow(struct bch_control *bch, int i)
320{
321 return bch->a_pow_tab[modulo(bch, i)];
322}
323
324static inline int a_log(struct bch_control *bch, unsigned int x)
325{
326 return bch->a_log_tab[x];
327}
328
329static inline int a_ilog(struct bch_control *bch, unsigned int x)
330{
331 return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
332}
333
334/*
335 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
336 */
337static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
338 unsigned int *syn)
339{
340 int i, j, s;
341 unsigned int m;
342 uint32_t poly;
343 const int t = GF_T(bch);
344
345 s = bch->ecc_bits;
346
347 /* make sure extra bits in last ecc word are cleared */
348 m = ((unsigned int)s) & 31;
349 if (m)
350 ecc[s/32] &= ~((1u << (32-m))-1);
351 memset(syn, 0, 2*t*sizeof(*syn));
352
353 /* compute v(a^j) for j=1 .. 2t-1 */
354 do {
355 poly = *ecc++;
356 s -= 32;
357 while (poly) {
358 i = deg(poly);
359 for (j = 0; j < 2*t; j += 2)
360 syn[j] ^= a_pow(bch, (j+1)*(i+s));
361
362 poly ^= (1 << i);
363 }
364 } while (s > 0);
365
366 /* v(a^(2j)) = v(a^j)^2 */
367 for (j = 0; j < t; j++)
368 syn[2*j+1] = gf_sqr(bch, syn[j]);
369}
370
371static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
372{
373 memcpy(dst, src, GF_POLY_SZ(src->deg));
374}
375
376static int compute_error_locator_polynomial(struct bch_control *bch,
377 const unsigned int *syn)
378{
379 const unsigned int t = GF_T(bch);
380 const unsigned int n = GF_N(bch);
381 unsigned int i, j, tmp, l, pd = 1, d = syn[0];
382 struct gf_poly *elp = bch->elp;
383 struct gf_poly *pelp = bch->poly_2t[0];
384 struct gf_poly *elp_copy = bch->poly_2t[1];
385 int k, pp = -1;
386
387 memset(pelp, 0, GF_POLY_SZ(2*t));
388 memset(elp, 0, GF_POLY_SZ(2*t));
389
390 pelp->deg = 0;
391 pelp->c[0] = 1;
392 elp->deg = 0;
393 elp->c[0] = 1;
394
395 /* use simplified binary Berlekamp-Massey algorithm */
396 for (i = 0; (i < t) && (elp->deg <= t); i++) {
397 if (d) {
398 k = 2*i-pp;
399 gf_poly_copy(elp_copy, elp);
400 /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
401 tmp = a_log(bch, d)+n-a_log(bch, pd);
402 for (j = 0; j <= pelp->deg; j++) {
403 if (pelp->c[j]) {
404 l = a_log(bch, pelp->c[j]);
405 elp->c[j+k] ^= a_pow(bch, tmp+l);
406 }
407 }
408 /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
409 tmp = pelp->deg+k;
410 if (tmp > elp->deg) {
411 elp->deg = tmp;
412 gf_poly_copy(pelp, elp_copy);
413 pd = d;
414 pp = 2*i;
415 }
416 }
417 /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
418 if (i < t-1) {
419 d = syn[2*i+2];
420 for (j = 1; j <= elp->deg; j++)
421 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
422 }
423 }
424 dbg("elp=%s\n", gf_poly_str(elp));
425 return (elp->deg > t) ? -1 : (int)elp->deg;
426}
427
428/*
429 * solve a m x m linear system in GF(2) with an expected number of solutions,
430 * and return the number of found solutions
431 */
432static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
433 unsigned int *sol, int nsol)
434{
435 const int m = GF_M(bch);
436 unsigned int tmp, mask;
437 int rem, c, r, p, k, param[m];
438
439 k = 0;
440 mask = 1 << m;
441
442 /* Gaussian elimination */
443 for (c = 0; c < m; c++) {
444 rem = 0;
445 p = c-k;
446 /* find suitable row for elimination */
447 for (r = p; r < m; r++) {
448 if (rows[r] & mask) {
449 if (r != p) {
450 tmp = rows[r];
451 rows[r] = rows[p];
452 rows[p] = tmp;
453 }
454 rem = r+1;
455 break;
456 }
457 }
458 if (rem) {
459 /* perform elimination on remaining rows */
460 tmp = rows[p];
461 for (r = rem; r < m; r++) {
462 if (rows[r] & mask)
463 rows[r] ^= tmp;
464 }
465 } else {
466 /* elimination not needed, store defective row index */
467 param[k++] = c;
468 }
469 mask >>= 1;
470 }
471 /* rewrite system, inserting fake parameter rows */
472 if (k > 0) {
473 p = k;
474 for (r = m-1; r >= 0; r--) {
475 if ((r > m-1-k) && rows[r])
476 /* system has no solution */
477 return 0;
478
479 rows[r] = (p && (r == param[p-1])) ?
480 p--, 1u << (m-r) : rows[r-p];
481 }
482 }
483
484 if (nsol != (1 << k))
485 /* unexpected number of solutions */
486 return 0;
487
488 for (p = 0; p < nsol; p++) {
489 /* set parameters for p-th solution */
490 for (c = 0; c < k; c++)
491 rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
492
493 /* compute unique solution */
494 tmp = 0;
495 for (r = m-1; r >= 0; r--) {
496 mask = rows[r] & (tmp|1);
497 tmp |= parity(mask) << (m-r);
498 }
499 sol[p] = tmp >> 1;
500 }
501 return nsol;
502}
503
504/*
505 * this function builds and solves a linear system for finding roots of a degree
506 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
507 */
508static int find_affine4_roots(struct bch_control *bch, unsigned int a,
509 unsigned int b, unsigned int c,
510 unsigned int *roots)
511{
512 int i, j, k;
513 const int m = GF_M(bch);
514 unsigned int mask = 0xff, t, rows[16] = {0,};
515
516 j = a_log(bch, b);
517 k = a_log(bch, a);
518 rows[0] = c;
519
520 /* buid linear system to solve X^4+aX^2+bX+c = 0 */
521 for (i = 0; i < m; i++) {
522 rows[i+1] = bch->a_pow_tab[4*i]^
523 (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
524 (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
525 j++;
526 k += 2;
527 }
528 /*
529 * transpose 16x16 matrix before passing it to linear solver
530 * warning: this code assumes m < 16
531 */
532 for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
533 for (k = 0; k < 16; k = (k+j+1) & ~j) {
534 t = ((rows[k] >> j)^rows[k+j]) & mask;
535 rows[k] ^= (t << j);
536 rows[k+j] ^= t;
537 }
538 }
539 return solve_linear_system(bch, rows, roots, 4);
540}
541
542/*
543 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
544 */
545static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
546 unsigned int *roots)
547{
548 int n = 0;
549
550 if (poly->c[0])
551 /* poly[X] = bX+c with c!=0, root=c/b */
552 roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
553 bch->a_log_tab[poly->c[1]]);
554 return n;
555}
556
557/*
558 * compute roots of a degree 2 polynomial over GF(2^m)
559 */
560static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
561 unsigned int *roots)
562{
563 int n = 0, i, l0, l1, l2;
564 unsigned int u, v, r;
565
566 if (poly->c[0] && poly->c[1]) {
567
568 l0 = bch->a_log_tab[poly->c[0]];
569 l1 = bch->a_log_tab[poly->c[1]];
570 l2 = bch->a_log_tab[poly->c[2]];
571
572 /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
573 u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
574 /*
575 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
576 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
577 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
578 * i.e. r and r+1 are roots iff Tr(u)=0
579 */
580 r = 0;
581 v = u;
582 while (v) {
583 i = deg(v);
584 r ^= bch->xi_tab[i];
585 v ^= (1 << i);
586 }
587 /* verify root */
588 if ((gf_sqr(bch, r)^r) == u) {
589 /* reverse z=a/bX transformation and compute log(1/r) */
590 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
591 bch->a_log_tab[r]+l2);
592 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
593 bch->a_log_tab[r^1]+l2);
594 }
595 }
596 return n;
597}
598
599/*
600 * compute roots of a degree 3 polynomial over GF(2^m)
601 */
602static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
603 unsigned int *roots)
604{
605 int i, n = 0;
606 unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
607
608 if (poly->c[0]) {
609 /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
610 e3 = poly->c[3];
611 c2 = gf_div(bch, poly->c[0], e3);
612 b2 = gf_div(bch, poly->c[1], e3);
613 a2 = gf_div(bch, poly->c[2], e3);
614
615 /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
616 c = gf_mul(bch, a2, c2); /* c = a2c2 */
617 b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */
618 a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */
619
620 /* find the 4 roots of this affine polynomial */
621 if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
622 /* remove a2 from final list of roots */
623 for (i = 0; i < 4; i++) {
624 if (tmp[i] != a2)
625 roots[n++] = a_ilog(bch, tmp[i]);
626 }
627 }
628 }
629 return n;
630}
631
632/*
633 * compute roots of a degree 4 polynomial over GF(2^m)
634 */
635static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
636 unsigned int *roots)
637{
638 int i, l, n = 0;
639 unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
640
641 if (poly->c[0] == 0)
642 return 0;
643
644 /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
645 e4 = poly->c[4];
646 d = gf_div(bch, poly->c[0], e4);
647 c = gf_div(bch, poly->c[1], e4);
648 b = gf_div(bch, poly->c[2], e4);
649 a = gf_div(bch, poly->c[3], e4);
650
651 /* use Y=1/X transformation to get an affine polynomial */
652 if (a) {
653 /* first, eliminate cX by using z=X+e with ae^2+c=0 */
654 if (c) {
655 /* compute e such that e^2 = c/a */
656 f = gf_div(bch, c, a);
657 l = a_log(bch, f);
658 l += (l & 1) ? GF_N(bch) : 0;
659 e = a_pow(bch, l/2);
660 /*
661 * use transformation z=X+e:
662 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
663 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
664 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
665 * z^4 + az^3 + b'z^2 + d'
666 */
667 d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
668 b = gf_mul(bch, a, e)^b;
669 }
670 /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
671 if (d == 0)
672 /* assume all roots have multiplicity 1 */
673 return 0;
674
675 c2 = gf_inv(bch, d);
676 b2 = gf_div(bch, a, d);
677 a2 = gf_div(bch, b, d);
678 } else {
679 /* polynomial is already affine */
680 c2 = d;
681 b2 = c;
682 a2 = b;
683 }
684 /* find the 4 roots of this affine polynomial */
685 if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
686 for (i = 0; i < 4; i++) {
687 /* post-process roots (reverse transformations) */
688 f = a ? gf_inv(bch, roots[i]) : roots[i];
689 roots[i] = a_ilog(bch, f^e);
690 }
691 n = 4;
692 }
693 return n;
694}
695
696/*
697 * build monic, log-based representation of a polynomial
698 */
699static void gf_poly_logrep(struct bch_control *bch,
700 const struct gf_poly *a, int *rep)
701{
702 int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
703
704 /* represent 0 values with -1; warning, rep[d] is not set to 1 */
705 for (i = 0; i < d; i++)
706 rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
707}
708
709/*
710 * compute polynomial Euclidean division remainder in GF(2^m)[X]
711 */
712static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
713 const struct gf_poly *b, int *rep)
714{
715 int la, p, m;
716 unsigned int i, j, *c = a->c;
717 const unsigned int d = b->deg;
718
719 if (a->deg < d)
720 return;
721
722 /* reuse or compute log representation of denominator */
723 if (!rep) {
724 rep = bch->cache;
725 gf_poly_logrep(bch, b, rep);
726 }
727
728 for (j = a->deg; j >= d; j--) {
729 if (c[j]) {
730 la = a_log(bch, c[j]);
731 p = j-d;
732 for (i = 0; i < d; i++, p++) {
733 m = rep[i];
734 if (m >= 0)
735 c[p] ^= bch->a_pow_tab[mod_s(bch,
736 m+la)];
737 }
738 }
739 }
740 a->deg = d-1;
741 while (!c[a->deg] && a->deg)
742 a->deg--;
743}
744
745/*
746 * compute polynomial Euclidean division quotient in GF(2^m)[X]
747 */
748static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
749 const struct gf_poly *b, struct gf_poly *q)
750{
751 if (a->deg >= b->deg) {
752 q->deg = a->deg-b->deg;
753 /* compute a mod b (modifies a) */
754 gf_poly_mod(bch, a, b, NULL);
755 /* quotient is stored in upper part of polynomial a */
756 memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
757 } else {
758 q->deg = 0;
759 q->c[0] = 0;
760 }
761}
762
763/*
764 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
765 */
766static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
767 struct gf_poly *b)
768{
769 struct gf_poly *tmp;
770
771 dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
772
773 if (a->deg < b->deg) {
774 tmp = b;
775 b = a;
776 a = tmp;
777 }
778
779 while (b->deg > 0) {
780 gf_poly_mod(bch, a, b, NULL);
781 tmp = b;
782 b = a;
783 a = tmp;
784 }
785
786 dbg("%s\n", gf_poly_str(a));
787
788 return a;
789}
790
791/*
792 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
793 * This is used in Berlekamp Trace algorithm for splitting polynomials
794 */
795static void compute_trace_bk_mod(struct bch_control *bch, int k,
796 const struct gf_poly *f, struct gf_poly *z,
797 struct gf_poly *out)
798{
799 const int m = GF_M(bch);
800 int i, j;
801
802 /* z contains z^2j mod f */
803 z->deg = 1;
804 z->c[0] = 0;
805 z->c[1] = bch->a_pow_tab[k];
806
807 out->deg = 0;
808 memset(out, 0, GF_POLY_SZ(f->deg));
809
810 /* compute f log representation only once */
811 gf_poly_logrep(bch, f, bch->cache);
812
813 for (i = 0; i < m; i++) {
814 /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
815 for (j = z->deg; j >= 0; j--) {
816 out->c[j] ^= z->c[j];
817 z->c[2*j] = gf_sqr(bch, z->c[j]);
818 z->c[2*j+1] = 0;
819 }
820 if (z->deg > out->deg)
821 out->deg = z->deg;
822
823 if (i < m-1) {
824 z->deg *= 2;
825 /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
826 gf_poly_mod(bch, z, f, bch->cache);
827 }
828 }
829 while (!out->c[out->deg] && out->deg)
830 out->deg--;
831
832 dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
833}
834
835/*
836 * factor a polynomial using Berlekamp Trace algorithm (BTA)
837 */
838static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
839 struct gf_poly **g, struct gf_poly **h)
840{
841 struct gf_poly *f2 = bch->poly_2t[0];
842 struct gf_poly *q = bch->poly_2t[1];
843 struct gf_poly *tk = bch->poly_2t[2];
844 struct gf_poly *z = bch->poly_2t[3];
845 struct gf_poly *gcd;
846
847 dbg("factoring %s...\n", gf_poly_str(f));
848
849 *g = f;
850 *h = NULL;
851
852 /* tk = Tr(a^k.X) mod f */
853 compute_trace_bk_mod(bch, k, f, z, tk);
854
855 if (tk->deg > 0) {
856 /* compute g = gcd(f, tk) (destructive operation) */
857 gf_poly_copy(f2, f);
858 gcd = gf_poly_gcd(bch, f2, tk);
859 if (gcd->deg < f->deg) {
860 /* compute h=f/gcd(f,tk); this will modify f and q */
861 gf_poly_div(bch, f, gcd, q);
862 /* store g and h in-place (clobbering f) */
863 *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
864 gf_poly_copy(*g, gcd);
865 gf_poly_copy(*h, q);
866 }
867 }
868}
869
870/*
871 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
872 * file for details
873 */
874static int find_poly_roots(struct bch_control *bch, unsigned int k,
875 struct gf_poly *poly, unsigned int *roots)
876{
877 int cnt;
878 struct gf_poly *f1, *f2;
879
880 switch (poly->deg) {
881 /* handle low degree polynomials with ad hoc techniques */
882 case 1:
883 cnt = find_poly_deg1_roots(bch, poly, roots);
884 break;
885 case 2:
886 cnt = find_poly_deg2_roots(bch, poly, roots);
887 break;
888 case 3:
889 cnt = find_poly_deg3_roots(bch, poly, roots);
890 break;
891 case 4:
892 cnt = find_poly_deg4_roots(bch, poly, roots);
893 break;
894 default:
895 /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
896 cnt = 0;
897 if (poly->deg && (k <= GF_M(bch))) {
898 factor_polynomial(bch, k, poly, &f1, &f2);
899 if (f1)
900 cnt += find_poly_roots(bch, k+1, f1, roots);
901 if (f2)
902 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
903 }
904 break;
905 }
906 return cnt;
907}
908
909#if defined(USE_CHIEN_SEARCH)
910/*
911 * exhaustive root search (Chien) implementation - not used, included only for
912 * reference/comparison tests
913 */
914static int chien_search(struct bch_control *bch, unsigned int len,
915 struct gf_poly *p, unsigned int *roots)
916{
917 int m;
918 unsigned int i, j, syn, syn0, count = 0;
919 const unsigned int k = 8*len+bch->ecc_bits;
920
921 /* use a log-based representation of polynomial */
922 gf_poly_logrep(bch, p, bch->cache);
923 bch->cache[p->deg] = 0;
924 syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
925
926 for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
927 /* compute elp(a^i) */
928 for (j = 1, syn = syn0; j <= p->deg; j++) {
929 m = bch->cache[j];
930 if (m >= 0)
931 syn ^= a_pow(bch, m+j*i);
932 }
933 if (syn == 0) {
934 roots[count++] = GF_N(bch)-i;
935 if (count == p->deg)
936 break;
937 }
938 }
939 return (count == p->deg) ? count : 0;
940}
941#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
942#endif /* USE_CHIEN_SEARCH */
943
944/**
945 * decode_bch - decode received codeword and find bit error locations
946 * @bch: BCH control structure
947 * @data: received data, ignored if @calc_ecc is provided
948 * @len: data length in bytes, must always be provided
949 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
950 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
951 * @syn: hw computed syndrome data (if NULL, syndrome is calculated)
952 * @errloc: output array of error locations
953 *
954 * Returns:
955 * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
956 * invalid parameters were provided
957 *
958 * Depending on the available hw BCH support and the need to compute @calc_ecc
959 * separately (using encode_bch()), this function should be called with one of
960 * the following parameter configurations -
961 *
962 * by providing @data and @recv_ecc only:
963 * decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
964 *
965 * by providing @recv_ecc and @calc_ecc:
966 * decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
967 *
968 * by providing ecc = recv_ecc XOR calc_ecc:
969 * decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
970 *
971 * by providing syndrome results @syn:
972 * decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
973 *
974 * Once decode_bch() has successfully returned with a positive value, error
975 * locations returned in array @errloc should be interpreted as follows -
976 *
977 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
978 * data correction)
979 *
980 * if (errloc[n] < 8*len), then n-th error is located in data and can be
981 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
982 *
983 * Note that this function does not perform any data correction by itself, it
984 * merely indicates error locations.
985 */
986int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
987 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
988 const unsigned int *syn, unsigned int *errloc)
989{
990 const unsigned int ecc_words = BCH_ECC_WORDS(bch);
991 unsigned int nbits;
992 int i, err, nroots;
993 uint32_t sum;
994
995 /* sanity check: make sure data length can be handled */
996 if (8*len > (bch->n-bch->ecc_bits))
997 return -EINVAL;
998
999 /* if caller does not provide syndromes, compute them */
1000 if (!syn) {
1001 if (!calc_ecc) {
1002 /* compute received data ecc into an internal buffer */
1003 if (!data || !recv_ecc)
1004 return -EINVAL;
1005 encode_bch(bch, data, len, NULL);
1006 } else {
1007 /* load provided calculated ecc */
1008 load_ecc8(bch, bch->ecc_buf, calc_ecc);
1009 }
1010 /* load received ecc or assume it was XORed in calc_ecc */
1011 if (recv_ecc) {
1012 load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1013 /* XOR received and calculated ecc */
1014 for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1015 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1016 sum |= bch->ecc_buf[i];
1017 }
1018 if (!sum)
1019 /* no error found */
1020 return 0;
1021 }
1022 compute_syndromes(bch, bch->ecc_buf, bch->syn);
1023 syn = bch->syn;
1024 }
1025
1026 err = compute_error_locator_polynomial(bch, syn);
1027 if (err > 0) {
1028 nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1029 if (err != nroots)
1030 err = -1;
1031 }
1032 if (err > 0) {
1033 /* post-process raw error locations for easier correction */
1034 nbits = (len*8)+bch->ecc_bits;
1035 for (i = 0; i < err; i++) {
1036 if (errloc[i] >= nbits) {
1037 err = -1;
1038 break;
1039 }
1040 errloc[i] = nbits-1-errloc[i];
1041 errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
1042 }
1043 }
1044 return (err >= 0) ? err : -EBADMSG;
1045}
1046EXPORT_SYMBOL_GPL(decode_bch);
1047
1048/*
1049 * generate Galois field lookup tables
1050 */
1051static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1052{
1053 unsigned int i, x = 1;
1054 const unsigned int k = 1 << deg(poly);
1055
1056 /* primitive polynomial must be of degree m */
1057 if (k != (1u << GF_M(bch)))
1058 return -1;
1059
1060 for (i = 0; i < GF_N(bch); i++) {
1061 bch->a_pow_tab[i] = x;
1062 bch->a_log_tab[x] = i;
1063 if (i && (x == 1))
1064 /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1065 return -1;
1066 x <<= 1;
1067 if (x & k)
1068 x ^= poly;
1069 }
1070 bch->a_pow_tab[GF_N(bch)] = 1;
1071 bch->a_log_tab[0] = 0;
1072
1073 return 0;
1074}
1075
1076/*
1077 * compute generator polynomial remainder tables for fast encoding
1078 */
1079static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1080{
1081 int i, j, b, d;
1082 uint32_t data, hi, lo, *tab;
1083 const int l = BCH_ECC_WORDS(bch);
1084 const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1085 const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1086
1087 memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1088
1089 for (i = 0; i < 256; i++) {
1090 /* p(X)=i is a small polynomial of weight <= 8 */
1091 for (b = 0; b < 4; b++) {
1092 /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1093 tab = bch->mod8_tab + (b*256+i)*l;
1094 data = i << (8*b);
1095 while (data) {
1096 d = deg(data);
1097 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1098 data ^= g[0] >> (31-d);
1099 for (j = 0; j < ecclen; j++) {
1100 hi = (d < 31) ? g[j] << (d+1) : 0;
1101 lo = (j+1 < plen) ?
1102 g[j+1] >> (31-d) : 0;
1103 tab[j] ^= hi|lo;
1104 }
1105 }
1106 }
1107 }
1108}
1109
1110/*
1111 * build a base for factoring degree 2 polynomials
1112 */
1113static int build_deg2_base(struct bch_control *bch)
1114{
1115 const int m = GF_M(bch);
1116 int i, j, r;
1117 unsigned int sum, x, y, remaining, ak = 0, xi[m];
1118
1119 /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1120 for (i = 0; i < m; i++) {
1121 for (j = 0, sum = 0; j < m; j++)
1122 sum ^= a_pow(bch, i*(1 << j));
1123
1124 if (sum) {
1125 ak = bch->a_pow_tab[i];
1126 break;
1127 }
1128 }
1129 /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1130 remaining = m;
1131 memset(xi, 0, sizeof(xi));
1132
1133 for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1134 y = gf_sqr(bch, x)^x;
1135 for (i = 0; i < 2; i++) {
1136 r = a_log(bch, y);
1137 if (y && (r < m) && !xi[r]) {
1138 bch->xi_tab[r] = x;
1139 xi[r] = 1;
1140 remaining--;
1141 dbg("x%d = %x\n", r, x);
1142 break;
1143 }
1144 y ^= ak;
1145 }
1146 }
1147 /* should not happen but check anyway */
1148 return remaining ? -1 : 0;
1149}
1150
1151static void *bch_alloc(size_t size, int *err)
1152{
1153 void *ptr;
1154
1155 ptr = kmalloc(size, GFP_KERNEL);
1156 if (ptr == NULL)
1157 *err = 1;
1158 return ptr;
1159}
1160
1161/*
1162 * compute generator polynomial for given (m,t) parameters.
1163 */
1164static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1165{
1166 const unsigned int m = GF_M(bch);
1167 const unsigned int t = GF_T(bch);
1168 int n, err = 0;
1169 unsigned int i, j, nbits, r, word, *roots;
1170 struct gf_poly *g;
1171 uint32_t *genpoly;
1172
1173 g = bch_alloc(GF_POLY_SZ(m*t), &err);
1174 roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1175 genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1176
1177 if (err) {
1178 kfree(genpoly);
1179 genpoly = NULL;
1180 goto finish;
1181 }
1182
1183 /* enumerate all roots of g(X) */
1184 memset(roots , 0, (bch->n+1)*sizeof(*roots));
1185 for (i = 0; i < t; i++) {
1186 for (j = 0, r = 2*i+1; j < m; j++) {
1187 roots[r] = 1;
1188 r = mod_s(bch, 2*r);
1189 }
1190 }
1191 /* build generator polynomial g(X) */
1192 g->deg = 0;
1193 g->c[0] = 1;
1194 for (i = 0; i < GF_N(bch); i++) {
1195 if (roots[i]) {
1196 /* multiply g(X) by (X+root) */
1197 r = bch->a_pow_tab[i];
1198 g->c[g->deg+1] = 1;
1199 for (j = g->deg; j > 0; j--)
1200 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1201
1202 g->c[0] = gf_mul(bch, g->c[0], r);
1203 g->deg++;
1204 }
1205 }
1206 /* store left-justified binary representation of g(X) */
1207 n = g->deg+1;
1208 i = 0;
1209
1210 while (n > 0) {
1211 nbits = (n > 32) ? 32 : n;
1212 for (j = 0, word = 0; j < nbits; j++) {
1213 if (g->c[n-1-j])
1214 word |= 1u << (31-j);
1215 }
1216 genpoly[i++] = word;
1217 n -= nbits;
1218 }
1219 bch->ecc_bits = g->deg;
1220
1221finish:
1222 kfree(g);
1223 kfree(roots);
1224
1225 return genpoly;
1226}
1227
1228/**
1229 * init_bch - initialize a BCH encoder/decoder
1230 * @m: Galois field order, should be in the range 5-15
1231 * @t: maximum error correction capability, in bits
1232 * @prim_poly: user-provided primitive polynomial (or 0 to use default)
1233 *
1234 * Returns:
1235 * a newly allocated BCH control structure if successful, NULL otherwise
1236 *
1237 * This initialization can take some time, as lookup tables are built for fast
1238 * encoding/decoding; make sure not to call this function from a time critical
1239 * path. Usually, init_bch() should be called on module/driver init and
1240 * free_bch() should be called to release memory on exit.
1241 *
1242 * You may provide your own primitive polynomial of degree @m in argument
1243 * @prim_poly, or let init_bch() use its default polynomial.
1244 *
1245 * Once init_bch() has successfully returned a pointer to a newly allocated
1246 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1247 * the structure.
1248 */
1249struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
1250{
1251 int err = 0;
1252 unsigned int i, words;
1253 uint32_t *genpoly;
1254 struct bch_control *bch = NULL;
1255
1256 const int min_m = 5;
1257 const int max_m = 15;
1258
1259 /* default primitive polynomials */
1260 static const unsigned int prim_poly_tab[] = {
1261 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1262 0x402b, 0x8003,
1263 };
1264
1265#if defined(CONFIG_BCH_CONST_PARAMS)
1266 if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1267 printk(KERN_ERR "bch encoder/decoder was configured to support "
1268 "parameters m=%d, t=%d only!\n",
1269 CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1270 goto fail;
1271 }
1272#endif
1273 if ((m < min_m) || (m > max_m))
1274 /*
1275 * values of m greater than 15 are not currently supported;
1276 * supporting m > 15 would require changing table base type
1277 * (uint16_t) and a small patch in matrix transposition
1278 */
1279 goto fail;
1280
1281 /* sanity checks */
1282 if ((t < 1) || (m*t >= ((1 << m)-1)))
1283 /* invalid t value */
1284 goto fail;
1285
1286 /* select a primitive polynomial for generating GF(2^m) */
1287 if (prim_poly == 0)
1288 prim_poly = prim_poly_tab[m-min_m];
1289
1290 bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1291 if (bch == NULL)
1292 goto fail;
1293
1294 bch->m = m;
1295 bch->t = t;
1296 bch->n = (1 << m)-1;
1297 words = DIV_ROUND_UP(m*t, 32);
1298 bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1299 bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1300 bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1301 bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1302 bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1303 bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1304 bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1305 bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err);
1306 bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err);
1307 bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1308
1309 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1310 bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1311
1312 if (err)
1313 goto fail;
1314
1315 err = build_gf_tables(bch, prim_poly);
1316 if (err)
1317 goto fail;
1318
1319 /* use generator polynomial for computing encoding tables */
1320 genpoly = compute_generator_polynomial(bch);
1321 if (genpoly == NULL)
1322 goto fail;
1323
1324 build_mod8_tables(bch, genpoly);
1325 kfree(genpoly);
1326
1327 err = build_deg2_base(bch);
1328 if (err)
1329 goto fail;
1330
1331 return bch;
1332
1333fail:
1334 free_bch(bch);
1335 return NULL;
1336}
1337EXPORT_SYMBOL_GPL(init_bch);
1338
1339/**
1340 * free_bch - free the BCH control structure
1341 * @bch: BCH control structure to release
1342 */
1343void free_bch(struct bch_control *bch)
1344{
1345 unsigned int i;
1346
1347 if (bch) {
1348 kfree(bch->a_pow_tab);
1349 kfree(bch->a_log_tab);
1350 kfree(bch->mod8_tab);
1351 kfree(bch->ecc_buf);
1352 kfree(bch->ecc_buf2);
1353 kfree(bch->xi_tab);
1354 kfree(bch->syn);
1355 kfree(bch->cache);
1356 kfree(bch->elp);
1357
1358 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1359 kfree(bch->poly_2t[i]);
1360
1361 kfree(bch);
1362 }
1363}
1364EXPORT_SYMBOL_GPL(free_bch);
1365
1366MODULE_LICENSE("GPL");
1367MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1368MODULE_DESCRIPTION("Binary BCH encoder/decoder");
diff --git a/mm/memory.c b/mm/memory.c
index 51a5c23704af..9da8cab1b1b0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3715,7 +3715,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3715} 3715}
3716 3716
3717/** 3717/**
3718 * @access_remote_vm - access another process' address space 3718 * access_remote_vm - access another process' address space
3719 * @mm: the mm_struct of the target address space 3719 * @mm: the mm_struct of the target address space
3720 * @addr: start address to access 3720 * @addr: start address to access
3721 * @buf: source or destination buffer 3721 * @buf: source or destination buffer
diff --git a/sound/core/init.c b/sound/core/init.c
index 3e65da21a08c..a0080aa45ae9 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -848,6 +848,7 @@ int snd_card_file_add(struct snd_card *card, struct file *file)
848 return -ENOMEM; 848 return -ENOMEM;
849 mfile->file = file; 849 mfile->file = file;
850 mfile->disconnected_f_op = NULL; 850 mfile->disconnected_f_op = NULL;
851 INIT_LIST_HEAD(&mfile->shutdown_list);
851 spin_lock(&card->files_lock); 852 spin_lock(&card->files_lock);
852 if (card->shutdown) { 853 if (card->shutdown) {
853 spin_unlock(&card->files_lock); 854 spin_unlock(&card->files_lock);
@@ -883,6 +884,9 @@ int snd_card_file_remove(struct snd_card *card, struct file *file)
883 list_for_each_entry(mfile, &card->files_list, list) { 884 list_for_each_entry(mfile, &card->files_list, list) {
884 if (mfile->file == file) { 885 if (mfile->file == file) {
885 list_del(&mfile->list); 886 list_del(&mfile->list);
887 spin_lock(&shutdown_lock);
888 list_del(&mfile->shutdown_list);
889 spin_unlock(&shutdown_lock);
886 if (mfile->disconnected_f_op) 890 if (mfile->disconnected_f_op)
887 fops_put(mfile->disconnected_f_op); 891 fops_put(mfile->disconnected_f_op);
888 found = mfile; 892 found = mfile;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index ae42b6509ce4..fe5c8036beba 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3201,15 +3201,6 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream,
3201EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem); 3201EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem);
3202#endif /* SNDRV_PCM_INFO_MMAP */ 3202#endif /* SNDRV_PCM_INFO_MMAP */
3203 3203
3204/* mmap callback with pgprot_noncached */
3205int snd_pcm_lib_mmap_noncached(struct snd_pcm_substream *substream,
3206 struct vm_area_struct *area)
3207{
3208 area->vm_page_prot = pgprot_noncached(area->vm_page_prot);
3209 return snd_pcm_default_mmap(substream, area);
3210}
3211EXPORT_SYMBOL(snd_pcm_lib_mmap_noncached);
3212
3213/* 3204/*
3214 * mmap DMA buffer 3205 * mmap DMA buffer
3215 */ 3206 */
diff --git a/sound/oss/dev_table.h b/sound/oss/dev_table.h
index b7617bee6388..0199a317c5a9 100644
--- a/sound/oss/dev_table.h
+++ b/sound/oss/dev_table.h
@@ -271,7 +271,7 @@ struct synth_operations
271 void (*reset) (int dev); 271 void (*reset) (int dev);
272 void (*hw_control) (int dev, unsigned char *event); 272 void (*hw_control) (int dev, unsigned char *event);
273 int (*load_patch) (int dev, int format, const char __user *addr, 273 int (*load_patch) (int dev, int format, const char __user *addr,
274 int offs, int count, int pmgr_flag); 274 int count, int pmgr_flag);
275 void (*aftertouch) (int dev, int voice, int pressure); 275 void (*aftertouch) (int dev, int voice, int pressure);
276 void (*controller) (int dev, int voice, int ctrl_num, int value); 276 void (*controller) (int dev, int voice, int ctrl_num, int value);
277 void (*panning) (int dev, int voice, int value); 277 void (*panning) (int dev, int voice, int value);
diff --git a/sound/oss/midi_synth.c b/sound/oss/midi_synth.c
index 3c09374ea5bf..2292c230d7e6 100644
--- a/sound/oss/midi_synth.c
+++ b/sound/oss/midi_synth.c
@@ -476,7 +476,7 @@ EXPORT_SYMBOL(midi_synth_hw_control);
476 476
477int 477int
478midi_synth_load_patch(int dev, int format, const char __user *addr, 478midi_synth_load_patch(int dev, int format, const char __user *addr,
479 int offs, int count, int pmgr_flag) 479 int count, int pmgr_flag)
480{ 480{
481 int orig_dev = synth_devs[dev]->midi_dev; 481 int orig_dev = synth_devs[dev]->midi_dev;
482 482
@@ -491,33 +491,29 @@ midi_synth_load_patch(int dev, int format, const char __user *addr,
491 if (!prefix_cmd(orig_dev, 0xf0)) 491 if (!prefix_cmd(orig_dev, 0xf0))
492 return 0; 492 return 0;
493 493
494 /* Invalid patch format */
494 if (format != SYSEX_PATCH) 495 if (format != SYSEX_PATCH)
495 {
496/* printk("MIDI Error: Invalid patch format (key) 0x%x\n", format);*/
497 return -EINVAL; 496 return -EINVAL;
498 } 497
498 /* Patch header too short */
499 if (count < hdr_size) 499 if (count < hdr_size)
500 {
501/* printk("MIDI Error: Patch header too short\n");*/
502 return -EINVAL; 500 return -EINVAL;
503 } 501
504 count -= hdr_size; 502 count -= hdr_size;
505 503
506 /* 504 /*
507 * Copy the header from user space but ignore the first bytes which have 505 * Copy the header from user space
508 * been transferred already.
509 */ 506 */
510 507
511 if(copy_from_user(&((char *) &sysex)[offs], &(addr)[offs], hdr_size - offs)) 508 if (copy_from_user(&sysex, addr, hdr_size))
512 return -EFAULT; 509 return -EFAULT;
513 510
514 if (count < sysex.len) 511 /* Sysex record too short */
515 { 512 if ((unsigned)count < (unsigned)sysex.len)
516/* printk(KERN_WARNING "MIDI Warning: Sysex record too short (%d<%d)\n", count, (int) sysex.len);*/
517 sysex.len = count; 513 sysex.len = count;
518 } 514
519 left = sysex.len; 515 left = sysex.len;
520 src_offs = 0; 516 src_offs = 0;
521 517
522 for (i = 0; i < left && !signal_pending(current); i++) 518 for (i = 0; i < left && !signal_pending(current); i++)
523 { 519 {
diff --git a/sound/oss/midi_synth.h b/sound/oss/midi_synth.h
index 6bc9d00bc77c..b64ddd6c4abc 100644
--- a/sound/oss/midi_synth.h
+++ b/sound/oss/midi_synth.h
@@ -8,7 +8,7 @@ int midi_synth_open (int dev, int mode);
8void midi_synth_close (int dev); 8void midi_synth_close (int dev);
9void midi_synth_hw_control (int dev, unsigned char *event); 9void midi_synth_hw_control (int dev, unsigned char *event);
10int midi_synth_load_patch (int dev, int format, const char __user * addr, 10int midi_synth_load_patch (int dev, int format, const char __user * addr,
11 int offs, int count, int pmgr_flag); 11 int count, int pmgr_flag);
12void midi_synth_panning (int dev, int channel, int pressure); 12void midi_synth_panning (int dev, int channel, int pressure);
13void midi_synth_aftertouch (int dev, int channel, int pressure); 13void midi_synth_aftertouch (int dev, int channel, int pressure);
14void midi_synth_controller (int dev, int channel, int ctrl_num, int value); 14void midi_synth_controller (int dev, int channel, int ctrl_num, int value);
diff --git a/sound/oss/opl3.c b/sound/oss/opl3.c
index 938c48c43585..407cd677950b 100644
--- a/sound/oss/opl3.c
+++ b/sound/oss/opl3.c
@@ -820,7 +820,7 @@ static void opl3_hw_control(int dev, unsigned char *event)
820} 820}
821 821
822static int opl3_load_patch(int dev, int format, const char __user *addr, 822static int opl3_load_patch(int dev, int format, const char __user *addr,
823 int offs, int count, int pmgr_flag) 823 int count, int pmgr_flag)
824{ 824{
825 struct sbi_instrument ins; 825 struct sbi_instrument ins;
826 826
@@ -830,11 +830,7 @@ static int opl3_load_patch(int dev, int format, const char __user *addr,
830 return -EINVAL; 830 return -EINVAL;
831 } 831 }
832 832
833 /* 833 if (copy_from_user(&ins, addr, sizeof(ins)))
834 * What the fuck is going on here? We leave junk in the beginning
835 * of ins and then check the field pretty close to that beginning?
836 */
837 if(copy_from_user(&((char *) &ins)[offs], addr + offs, sizeof(ins) - offs))
838 return -EFAULT; 834 return -EFAULT;
839 835
840 if (ins.channel < 0 || ins.channel >= SBFM_MAXINSTR) 836 if (ins.channel < 0 || ins.channel >= SBFM_MAXINSTR)
@@ -849,6 +845,10 @@ static int opl3_load_patch(int dev, int format, const char __user *addr,
849 845
850static void opl3_panning(int dev, int voice, int value) 846static void opl3_panning(int dev, int voice, int value)
851{ 847{
848
849 if (voice < 0 || voice >= devc->nr_voice)
850 return;
851
852 devc->voc[voice].panning = value; 852 devc->voc[voice].panning = value;
853} 853}
854 854
@@ -1066,8 +1066,15 @@ static int opl3_alloc_voice(int dev, int chn, int note, struct voice_alloc_info
1066 1066
1067static void opl3_setup_voice(int dev, int voice, int chn) 1067static void opl3_setup_voice(int dev, int voice, int chn)
1068{ 1068{
1069 struct channel_info *info = 1069 struct channel_info *info;
1070 &synth_devs[dev]->chn_info[chn]; 1070
1071 if (voice < 0 || voice >= devc->nr_voice)
1072 return;
1073
1074 if (chn < 0 || chn > 15)
1075 return;
1076
1077 info = &synth_devs[dev]->chn_info[chn];
1071 1078
1072 opl3_set_instr(dev, voice, info->pgm_num); 1079 opl3_set_instr(dev, voice, info->pgm_num);
1073 1080
diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c
index 5ea1098ac427..30bcfe470f83 100644
--- a/sound/oss/sequencer.c
+++ b/sound/oss/sequencer.c
@@ -241,7 +241,7 @@ int sequencer_write(int dev, struct file *file, const char __user *buf, int coun
241 return -ENXIO; 241 return -ENXIO;
242 242
243 fmt = (*(short *) &event_rec[0]) & 0xffff; 243 fmt = (*(short *) &event_rec[0]) & 0xffff;
244 err = synth_devs[dev]->load_patch(dev, fmt, buf, p + 4, c, 0); 244 err = synth_devs[dev]->load_patch(dev, fmt, buf + p, c, 0);
245 if (err < 0) 245 if (err < 0)
246 return err; 246 return err;
247 247
diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index 0ac1f98d91a1..f53a31e939c1 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -22,21 +22,6 @@
22 * for any purpose including commercial applications. 22 * for any purpose including commercial applications.
23 */ 23 */
24 24
25/* >0: print Hw params, timer vars. >1: print stream write/copy sizes */
26#define REALLY_VERBOSE_LOGGING 0
27
28#if REALLY_VERBOSE_LOGGING
29#define VPRINTK1 snd_printd
30#else
31#define VPRINTK1(...)
32#endif
33
34#if REALLY_VERBOSE_LOGGING > 1
35#define VPRINTK2 snd_printd
36#else
37#define VPRINTK2(...)
38#endif
39
40#include "hpi_internal.h" 25#include "hpi_internal.h"
41#include "hpimsginit.h" 26#include "hpimsginit.h"
42#include "hpioctl.h" 27#include "hpioctl.h"
@@ -57,11 +42,25 @@
57#include <sound/tlv.h> 42#include <sound/tlv.h>
58#include <sound/hwdep.h> 43#include <sound/hwdep.h>
59 44
60
61MODULE_LICENSE("GPL"); 45MODULE_LICENSE("GPL");
62MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>"); 46MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>");
63MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx"); 47MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx");
64 48
49#if defined CONFIG_SND_DEBUG_VERBOSE
50/**
51 * snd_printddd - very verbose debug printk
52 * @format: format string
53 *
54 * Works like snd_printk() for debugging purposes.
55 * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set.
56 * Must set snd module debug parameter to 3 to enable at runtime.
57 */
58#define snd_printddd(format, args...) \
59 __snd_printk(3, __FILE__, __LINE__, format, ##args)
60#else
61#define snd_printddd(format, args...) do { } while (0)
62#endif
63
65static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* index 0-MAX */ 64static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* index 0-MAX */
66static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ 65static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */
67static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; 66static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
@@ -289,7 +288,6 @@ static u16 handle_error(u16 err, int line, char *filename)
289#define hpi_handle_error(x) handle_error(x, __LINE__, __FILE__) 288#define hpi_handle_error(x) handle_error(x, __LINE__, __FILE__)
290 289
291/***************************** GENERAL PCM ****************/ 290/***************************** GENERAL PCM ****************/
292#if REALLY_VERBOSE_LOGGING
293static void print_hwparams(struct snd_pcm_hw_params *p) 291static void print_hwparams(struct snd_pcm_hw_params *p)
294{ 292{
295 snd_printd("HWPARAMS \n"); 293 snd_printd("HWPARAMS \n");
@@ -304,9 +302,6 @@ static void print_hwparams(struct snd_pcm_hw_params *p)
304 snd_printd("periods %d \n", params_periods(p)); 302 snd_printd("periods %d \n", params_periods(p));
305 snd_printd("buffer_size %d \n", params_buffer_size(p)); 303 snd_printd("buffer_size %d \n", params_buffer_size(p));
306} 304}
307#else
308#define print_hwparams(x)
309#endif
310 305
311static snd_pcm_format_t hpi_to_alsa_formats[] = { 306static snd_pcm_format_t hpi_to_alsa_formats[] = {
312 -1, /* INVALID */ 307 -1, /* INVALID */
@@ -381,13 +376,13 @@ static void snd_card_asihpi_pcm_samplerates(struct snd_card_asihpi *asihpi,
381 "No local sampleclock, err %d\n", err); 376 "No local sampleclock, err %d\n", err);
382 } 377 }
383 378
384 for (idx = 0; idx < 100; idx++) { 379 for (idx = -1; idx < 100; idx++) {
385 if (hpi_sample_clock_query_local_rate( 380 if (idx == -1) {
386 h_control, idx, &sample_rate)) { 381 if (hpi_sample_clock_get_sample_rate(h_control,
387 if (!idx) 382 &sample_rate))
388 snd_printk(KERN_ERR 383 continue;
389 "Local rate query failed\n"); 384 } else if (hpi_sample_clock_query_local_rate(h_control,
390 385 idx, &sample_rate)) {
391 break; 386 break;
392 } 387 }
393 388
@@ -440,8 +435,6 @@ static void snd_card_asihpi_pcm_samplerates(struct snd_card_asihpi *asihpi,
440 } 435 }
441 } 436 }
442 437
443 /* printk(KERN_INFO "Supported rates %X %d %d\n",
444 rates, rate_min, rate_max); */
445 pcmhw->rates = rates; 438 pcmhw->rates = rates;
446 pcmhw->rate_min = rate_min; 439 pcmhw->rate_min = rate_min;
447 pcmhw->rate_max = rate_max; 440 pcmhw->rate_max = rate_max;
@@ -466,7 +459,7 @@ static int snd_card_asihpi_pcm_hw_params(struct snd_pcm_substream *substream,
466 if (err) 459 if (err)
467 return err; 460 return err;
468 461
469 VPRINTK1(KERN_INFO "format %d, %d chans, %d_hz\n", 462 snd_printdd("format %d, %d chans, %d_hz\n",
470 format, params_channels(params), 463 format, params_channels(params),
471 params_rate(params)); 464 params_rate(params));
472 465
@@ -489,13 +482,12 @@ static int snd_card_asihpi_pcm_hw_params(struct snd_pcm_substream *substream,
489 err = hpi_stream_host_buffer_attach(dpcm->h_stream, 482 err = hpi_stream_host_buffer_attach(dpcm->h_stream,
490 params_buffer_bytes(params), runtime->dma_addr); 483 params_buffer_bytes(params), runtime->dma_addr);
491 if (err == 0) { 484 if (err == 0) {
492 VPRINTK1(KERN_INFO 485 snd_printdd(
493 "stream_host_buffer_attach succeeded %u %lu\n", 486 "stream_host_buffer_attach succeeded %u %lu\n",
494 params_buffer_bytes(params), 487 params_buffer_bytes(params),
495 (unsigned long)runtime->dma_addr); 488 (unsigned long)runtime->dma_addr);
496 } else { 489 } else {
497 snd_printd(KERN_INFO 490 snd_printd("stream_host_buffer_attach error %d\n",
498 "stream_host_buffer_attach error %d\n",
499 err); 491 err);
500 return -ENOMEM; 492 return -ENOMEM;
501 } 493 }
@@ -504,7 +496,7 @@ static int snd_card_asihpi_pcm_hw_params(struct snd_pcm_substream *substream,
504 &dpcm->hpi_buffer_attached, 496 &dpcm->hpi_buffer_attached,
505 NULL, NULL, NULL); 497 NULL, NULL, NULL);
506 498
507 VPRINTK1(KERN_INFO "stream_host_buffer_attach status 0x%x\n", 499 snd_printdd("stream_host_buffer_attach status 0x%x\n",
508 dpcm->hpi_buffer_attached); 500 dpcm->hpi_buffer_attached);
509 } 501 }
510 bytes_per_sec = params_rate(params) * params_channels(params); 502 bytes_per_sec = params_rate(params) * params_channels(params);
@@ -517,7 +509,7 @@ static int snd_card_asihpi_pcm_hw_params(struct snd_pcm_substream *substream,
517 dpcm->bytes_per_sec = bytes_per_sec; 509 dpcm->bytes_per_sec = bytes_per_sec;
518 dpcm->buffer_bytes = params_buffer_bytes(params); 510 dpcm->buffer_bytes = params_buffer_bytes(params);
519 dpcm->period_bytes = params_period_bytes(params); 511 dpcm->period_bytes = params_period_bytes(params);
520 VPRINTK1(KERN_INFO "buffer_bytes=%d, period_bytes=%d, bps=%d\n", 512 snd_printdd("buffer_bytes=%d, period_bytes=%d, bps=%d\n",
521 dpcm->buffer_bytes, dpcm->period_bytes, bytes_per_sec); 513 dpcm->buffer_bytes, dpcm->period_bytes, bytes_per_sec);
522 514
523 return 0; 515 return 0;
@@ -573,7 +565,7 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
573 struct snd_pcm_substream *s; 565 struct snd_pcm_substream *s;
574 u16 e; 566 u16 e;
575 567
576 VPRINTK1(KERN_INFO "%c%d trigger\n", 568 snd_printdd("%c%d trigger\n",
577 SCHR(substream->stream), substream->number); 569 SCHR(substream->stream), substream->number);
578 switch (cmd) { 570 switch (cmd) {
579 case SNDRV_PCM_TRIGGER_START: 571 case SNDRV_PCM_TRIGGER_START:
@@ -597,7 +589,7 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
597 * data?? 589 * data??
598 */ 590 */
599 unsigned int preload = ds->period_bytes * 1; 591 unsigned int preload = ds->period_bytes * 1;
600 VPRINTK2(KERN_INFO "%d preload x%x\n", s->number, preload); 592 snd_printddd("%d preload x%x\n", s->number, preload);
601 hpi_handle_error(hpi_outstream_write_buf( 593 hpi_handle_error(hpi_outstream_write_buf(
602 ds->h_stream, 594 ds->h_stream,
603 &runtime->dma_area[0], 595 &runtime->dma_area[0],
@@ -607,7 +599,7 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
607 } 599 }
608 600
609 if (card->support_grouping) { 601 if (card->support_grouping) {
610 VPRINTK1(KERN_INFO "\t%c%d group\n", 602 snd_printdd("\t%c%d group\n",
611 SCHR(s->stream), 603 SCHR(s->stream),
612 s->number); 604 s->number);
613 e = hpi_stream_group_add( 605 e = hpi_stream_group_add(
@@ -622,7 +614,7 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
622 } else 614 } else
623 break; 615 break;
624 } 616 }
625 VPRINTK1(KERN_INFO "start\n"); 617 snd_printdd("start\n");
626 /* start the master stream */ 618 /* start the master stream */
627 snd_card_asihpi_pcm_timer_start(substream); 619 snd_card_asihpi_pcm_timer_start(substream);
628 if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) || 620 if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) ||
@@ -644,14 +636,14 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
644 s->runtime->status->state = SNDRV_PCM_STATE_SETUP; 636 s->runtime->status->state = SNDRV_PCM_STATE_SETUP;
645 637
646 if (card->support_grouping) { 638 if (card->support_grouping) {
647 VPRINTK1(KERN_INFO "\t%c%d group\n", 639 snd_printdd("\t%c%d group\n",
648 SCHR(s->stream), 640 SCHR(s->stream),
649 s->number); 641 s->number);
650 snd_pcm_trigger_done(s, substream); 642 snd_pcm_trigger_done(s, substream);
651 } else 643 } else
652 break; 644 break;
653 } 645 }
654 VPRINTK1(KERN_INFO "stop\n"); 646 snd_printdd("stop\n");
655 647
656 /* _prepare and _hwparams reset the stream */ 648 /* _prepare and _hwparams reset the stream */
657 hpi_handle_error(hpi_stream_stop(dpcm->h_stream)); 649 hpi_handle_error(hpi_stream_stop(dpcm->h_stream));
@@ -664,12 +656,12 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream,
664 break; 656 break;
665 657
666 case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: 658 case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
667 VPRINTK1(KERN_INFO "pause release\n"); 659 snd_printdd("pause release\n");
668 hpi_handle_error(hpi_stream_start(dpcm->h_stream)); 660 hpi_handle_error(hpi_stream_start(dpcm->h_stream));
669 snd_card_asihpi_pcm_timer_start(substream); 661 snd_card_asihpi_pcm_timer_start(substream);
670 break; 662 break;
671 case SNDRV_PCM_TRIGGER_PAUSE_PUSH: 663 case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
672 VPRINTK1(KERN_INFO "pause\n"); 664 snd_printdd("pause\n");
673 snd_card_asihpi_pcm_timer_stop(substream); 665 snd_card_asihpi_pcm_timer_stop(substream);
674 hpi_handle_error(hpi_stream_stop(dpcm->h_stream)); 666 hpi_handle_error(hpi_stream_stop(dpcm->h_stream));
675 break; 667 break;
@@ -741,7 +733,7 @@ static void snd_card_asihpi_timer_function(unsigned long data)
741 u16 state; 733 u16 state;
742 u32 buffer_size, bytes_avail, samples_played, on_card_bytes; 734 u32 buffer_size, bytes_avail, samples_played, on_card_bytes;
743 735
744 VPRINTK1(KERN_INFO "%c%d snd_card_asihpi_timer_function\n", 736 snd_printdd("%c%d snd_card_asihpi_timer_function\n",
745 SCHR(substream->stream), substream->number); 737 SCHR(substream->stream), substream->number);
746 738
747 /* find minimum newdata and buffer pos in group */ 739 /* find minimum newdata and buffer pos in group */
@@ -770,10 +762,10 @@ static void snd_card_asihpi_timer_function(unsigned long data)
770 if ((bytes_avail == 0) && 762 if ((bytes_avail == 0) &&
771 (on_card_bytes < ds->pcm_buf_host_rw_ofs)) { 763 (on_card_bytes < ds->pcm_buf_host_rw_ofs)) {
772 hpi_handle_error(hpi_stream_start(ds->h_stream)); 764 hpi_handle_error(hpi_stream_start(ds->h_stream));
773 VPRINTK1(KERN_INFO "P%d start\n", s->number); 765 snd_printdd("P%d start\n", s->number);
774 } 766 }
775 } else if (state == HPI_STATE_DRAINED) { 767 } else if (state == HPI_STATE_DRAINED) {
776 VPRINTK1(KERN_WARNING "P%d drained\n", 768 snd_printd(KERN_WARNING "P%d drained\n",
777 s->number); 769 s->number);
778 /*snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN); 770 /*snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN);
779 continue; */ 771 continue; */
@@ -794,13 +786,13 @@ static void snd_card_asihpi_timer_function(unsigned long data)
794 newdata); 786 newdata);
795 } 787 }
796 788
797 VPRINTK1(KERN_INFO "PB timer hw_ptr x%04lX, appl_ptr x%04lX\n", 789 snd_printdd("hw_ptr x%04lX, appl_ptr x%04lX\n",
798 (unsigned long)frames_to_bytes(runtime, 790 (unsigned long)frames_to_bytes(runtime,
799 runtime->status->hw_ptr), 791 runtime->status->hw_ptr),
800 (unsigned long)frames_to_bytes(runtime, 792 (unsigned long)frames_to_bytes(runtime,
801 runtime->control->appl_ptr)); 793 runtime->control->appl_ptr));
802 794
803 VPRINTK1(KERN_INFO "%d %c%d S=%d, rw=%04X, dma=x%04X, left=x%04X," 795 snd_printdd("%d %c%d S=%d, rw=%04X, dma=x%04X, left=x%04X,"
804 " aux=x%04X space=x%04X\n", 796 " aux=x%04X space=x%04X\n",
805 loops, SCHR(s->stream), s->number, 797 loops, SCHR(s->stream), s->number,
806 state, ds->pcm_buf_host_rw_ofs, pcm_buf_dma_ofs, (int)bytes_avail, 798 state, ds->pcm_buf_host_rw_ofs, pcm_buf_dma_ofs, (int)bytes_avail,
@@ -822,7 +814,7 @@ static void snd_card_asihpi_timer_function(unsigned long data)
822 814
823 next_jiffies = max(next_jiffies, 1U); 815 next_jiffies = max(next_jiffies, 1U);
824 dpcm->timer.expires = jiffies + next_jiffies; 816 dpcm->timer.expires = jiffies + next_jiffies;
825 VPRINTK1(KERN_INFO "jif %d buf pos x%04X newdata x%04X xfer x%04X\n", 817 snd_printdd("jif %d buf pos x%04X newdata x%04X xfer x%04X\n",
826 next_jiffies, pcm_buf_dma_ofs, newdata, xfercount); 818 next_jiffies, pcm_buf_dma_ofs, newdata, xfercount);
827 819
828 snd_pcm_group_for_each_entry(s, substream) { 820 snd_pcm_group_for_each_entry(s, substream) {
@@ -837,7 +829,7 @@ static void snd_card_asihpi_timer_function(unsigned long data)
837 if (xfercount && (on_card_bytes <= ds->period_bytes)) { 829 if (xfercount && (on_card_bytes <= ds->period_bytes)) {
838 if (card->support_mmap) { 830 if (card->support_mmap) {
839 if (s->stream == SNDRV_PCM_STREAM_PLAYBACK) { 831 if (s->stream == SNDRV_PCM_STREAM_PLAYBACK) {
840 VPRINTK2(KERN_INFO "P%d write x%04x\n", 832 snd_printddd("P%d write x%04x\n",
841 s->number, 833 s->number,
842 ds->period_bytes); 834 ds->period_bytes);
843 hpi_handle_error( 835 hpi_handle_error(
@@ -848,7 +840,7 @@ static void snd_card_asihpi_timer_function(unsigned long data)
848 xfercount, 840 xfercount,
849 &ds->format)); 841 &ds->format));
850 } else { 842 } else {
851 VPRINTK2(KERN_INFO "C%d read x%04x\n", 843 snd_printddd("C%d read x%04x\n",
852 s->number, 844 s->number,
853 xfercount); 845 xfercount);
854 hpi_handle_error( 846 hpi_handle_error(
@@ -871,7 +863,7 @@ static void snd_card_asihpi_timer_function(unsigned long data)
871static int snd_card_asihpi_playback_ioctl(struct snd_pcm_substream *substream, 863static int snd_card_asihpi_playback_ioctl(struct snd_pcm_substream *substream,
872 unsigned int cmd, void *arg) 864 unsigned int cmd, void *arg)
873{ 865{
874 /* snd_printd(KERN_INFO "Playback ioctl %d\n", cmd); */ 866 snd_printdd(KERN_INFO "Playback ioctl %d\n", cmd);
875 return snd_pcm_lib_ioctl(substream, cmd, arg); 867 return snd_pcm_lib_ioctl(substream, cmd, arg);
876} 868}
877 869
@@ -881,7 +873,7 @@ static int snd_card_asihpi_playback_prepare(struct snd_pcm_substream *
881 struct snd_pcm_runtime *runtime = substream->runtime; 873 struct snd_pcm_runtime *runtime = substream->runtime;
882 struct snd_card_asihpi_pcm *dpcm = runtime->private_data; 874 struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
883 875
884 VPRINTK1(KERN_INFO "playback prepare %d\n", substream->number); 876 snd_printdd("playback prepare %d\n", substream->number);
885 877
886 hpi_handle_error(hpi_outstream_reset(dpcm->h_stream)); 878 hpi_handle_error(hpi_outstream_reset(dpcm->h_stream));
887 dpcm->pcm_buf_host_rw_ofs = 0; 879 dpcm->pcm_buf_host_rw_ofs = 0;
@@ -898,7 +890,7 @@ snd_card_asihpi_playback_pointer(struct snd_pcm_substream *substream)
898 snd_pcm_uframes_t ptr; 890 snd_pcm_uframes_t ptr;
899 891
900 ptr = bytes_to_frames(runtime, dpcm->pcm_buf_dma_ofs % dpcm->buffer_bytes); 892 ptr = bytes_to_frames(runtime, dpcm->pcm_buf_dma_ofs % dpcm->buffer_bytes);
901 /* VPRINTK2(KERN_INFO "playback_pointer=x%04lx\n", (unsigned long)ptr); */ 893 snd_printddd("playback_pointer=x%04lx\n", (unsigned long)ptr);
902 return ptr; 894 return ptr;
903} 895}
904 896
@@ -1014,12 +1006,13 @@ static int snd_card_asihpi_playback_open(struct snd_pcm_substream *substream)
1014 1006
1015 snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 1007 snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
1016 card->update_interval_frames); 1008 card->update_interval_frames);
1009
1017 snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 1010 snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
1018 card->update_interval_frames * 2, UINT_MAX); 1011 card->update_interval_frames * 2, UINT_MAX);
1019 1012
1020 snd_pcm_set_sync(substream); 1013 snd_pcm_set_sync(substream);
1021 1014
1022 VPRINTK1(KERN_INFO "playback open\n"); 1015 snd_printdd("playback open\n");
1023 1016
1024 return 0; 1017 return 0;
1025} 1018}
@@ -1030,7 +1023,7 @@ static int snd_card_asihpi_playback_close(struct snd_pcm_substream *substream)
1030 struct snd_card_asihpi_pcm *dpcm = runtime->private_data; 1023 struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
1031 1024
1032 hpi_handle_error(hpi_outstream_close(dpcm->h_stream)); 1025 hpi_handle_error(hpi_outstream_close(dpcm->h_stream));
1033 VPRINTK1(KERN_INFO "playback close\n"); 1026 snd_printdd("playback close\n");
1034 1027
1035 return 0; 1028 return 0;
1036} 1029}
@@ -1050,13 +1043,13 @@ static int snd_card_asihpi_playback_copy(struct snd_pcm_substream *substream,
1050 if (copy_from_user(runtime->dma_area, src, len)) 1043 if (copy_from_user(runtime->dma_area, src, len))
1051 return -EFAULT; 1044 return -EFAULT;
1052 1045
1053 VPRINTK2(KERN_DEBUG "playback copy%d %u bytes\n", 1046 snd_printddd("playback copy%d %u bytes\n",
1054 substream->number, len); 1047 substream->number, len);
1055 1048
1056 hpi_handle_error(hpi_outstream_write_buf(dpcm->h_stream, 1049 hpi_handle_error(hpi_outstream_write_buf(dpcm->h_stream,
1057 runtime->dma_area, len, &dpcm->format)); 1050 runtime->dma_area, len, &dpcm->format));
1058 1051
1059 dpcm->pcm_buf_host_rw_ofs = dpcm->pcm_buf_host_rw_ofs + len; 1052 dpcm->pcm_buf_host_rw_ofs += len;
1060 1053
1061 return 0; 1054 return 0;
1062} 1055}
@@ -1066,16 +1059,11 @@ static int snd_card_asihpi_playback_silence(struct snd_pcm_substream *
1066 snd_pcm_uframes_t pos, 1059 snd_pcm_uframes_t pos,
1067 snd_pcm_uframes_t count) 1060 snd_pcm_uframes_t count)
1068{ 1061{
1069 unsigned int len; 1062 /* Usually writes silence to DMA buffer, which should be overwritten
1070 struct snd_pcm_runtime *runtime = substream->runtime; 1063 by real audio later. Our fifos cannot be overwritten, and are not
1071 struct snd_card_asihpi_pcm *dpcm = runtime->private_data; 1064 free-running DMAs. Silence is output on fifo underflow.
1072 1065 This callback is still required to allow the copy callback to be used.
1073 len = frames_to_bytes(runtime, count); 1066 */
1074 VPRINTK1(KERN_INFO "playback silence %u bytes\n", len);
1075
1076 memset(runtime->dma_area, 0, len);
1077 hpi_handle_error(hpi_outstream_write_buf(dpcm->h_stream,
1078 runtime->dma_area, len, &dpcm->format));
1079 return 0; 1067 return 0;
1080} 1068}
1081 1069
@@ -1110,7 +1098,7 @@ snd_card_asihpi_capture_pointer(struct snd_pcm_substream *substream)
1110 struct snd_pcm_runtime *runtime = substream->runtime; 1098 struct snd_pcm_runtime *runtime = substream->runtime;
1111 struct snd_card_asihpi_pcm *dpcm = runtime->private_data; 1099 struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
1112 1100
1113 VPRINTK2(KERN_INFO "capture pointer %d=%d\n", 1101 snd_printddd("capture pointer %d=%d\n",
1114 substream->number, dpcm->pcm_buf_dma_ofs); 1102 substream->number, dpcm->pcm_buf_dma_ofs);
1115 /* NOTE Unlike playback can't use actual samples_played 1103 /* NOTE Unlike playback can't use actual samples_played
1116 for the capture position, because those samples aren't yet in 1104 for the capture position, because those samples aren't yet in
@@ -1135,7 +1123,7 @@ static int snd_card_asihpi_capture_prepare(struct snd_pcm_substream *substream)
1135 dpcm->pcm_buf_dma_ofs = 0; 1123 dpcm->pcm_buf_dma_ofs = 0;
1136 dpcm->pcm_buf_elapsed_dma_ofs = 0; 1124 dpcm->pcm_buf_elapsed_dma_ofs = 0;
1137 1125
1138 VPRINTK1("Capture Prepare %d\n", substream->number); 1126 snd_printdd("Capture Prepare %d\n", substream->number);
1139 return 0; 1127 return 0;
1140} 1128}
1141 1129
@@ -1198,7 +1186,7 @@ static int snd_card_asihpi_capture_open(struct snd_pcm_substream *substream)
1198 if (dpcm == NULL) 1186 if (dpcm == NULL)
1199 return -ENOMEM; 1187 return -ENOMEM;
1200 1188
1201 VPRINTK1("hpi_instream_open adapter %d stream %d\n", 1189 snd_printdd("capture open adapter %d stream %d\n",
1202 card->adapter_index, substream->number); 1190 card->adapter_index, substream->number);
1203 1191
1204 err = hpi_handle_error( 1192 err = hpi_handle_error(
@@ -1268,7 +1256,7 @@ static int snd_card_asihpi_capture_copy(struct snd_pcm_substream *substream,
1268 1256
1269 len = frames_to_bytes(runtime, count); 1257 len = frames_to_bytes(runtime, count);
1270 1258
1271 VPRINTK2(KERN_INFO "capture copy%d %d bytes\n", substream->number, len); 1259 snd_printddd("capture copy%d %d bytes\n", substream->number, len);
1272 hpi_handle_error(hpi_instream_read_buf(dpcm->h_stream, 1260 hpi_handle_error(hpi_instream_read_buf(dpcm->h_stream,
1273 runtime->dma_area, len)); 1261 runtime->dma_area, len));
1274 1262
@@ -2887,6 +2875,9 @@ static int __devinit snd_asihpi_probe(struct pci_dev *pci_dev,
2887 if (err) 2875 if (err)
2888 asihpi->update_interval_frames = 512; 2876 asihpi->update_interval_frames = 512;
2889 2877
2878 if (!asihpi->support_mmap)
2879 asihpi->update_interval_frames *= 2;
2880
2890 hpi_handle_error(hpi_instream_open(asihpi->adapter_index, 2881 hpi_handle_error(hpi_instream_open(asihpi->adapter_index,
2891 0, &h_stream)); 2882 0, &h_stream));
2892 2883
@@ -2909,7 +2900,6 @@ static int __devinit snd_asihpi_probe(struct pci_dev *pci_dev,
2909 asihpi->support_mrx 2900 asihpi->support_mrx
2910 ); 2901 );
2911 2902
2912
2913 err = snd_card_asihpi_pcm_new(asihpi, 0, pcm_substreams); 2903 err = snd_card_asihpi_pcm_new(asihpi, 0, pcm_substreams);
2914 if (err < 0) { 2904 if (err < 0) {
2915 snd_printk(KERN_ERR "pcm_new failed\n"); 2905 snd_printk(KERN_ERR "pcm_new failed\n");
@@ -2944,6 +2934,7 @@ static int __devinit snd_asihpi_probe(struct pci_dev *pci_dev,
2944 sprintf(card->longname, "%s %i", 2934 sprintf(card->longname, "%s %i",
2945 card->shortname, asihpi->adapter_index); 2935 card->shortname, asihpi->adapter_index);
2946 err = snd_card_register(card); 2936 err = snd_card_register(card);
2937
2947 if (!err) { 2938 if (!err) {
2948 hpi_card->snd_card_asihpi = card; 2939 hpi_card->snd_card_asihpi = card;
2949 dev++; 2940 dev++;
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 734c6ee55d8a..2942d2a9ea10 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -4256,6 +4256,84 @@ static int ad1984a_thinkpad_init(struct hda_codec *codec)
4256} 4256}
4257 4257
4258/* 4258/*
4259 * Precision R5500
4260 * 0x12 - HP/line-out
4261 * 0x13 - speaker (mono)
4262 * 0x15 - mic-in
4263 */
4264
4265static struct hda_verb ad1984a_precision_verbs[] = {
4266 /* Unmute main output path */
4267 {0x03, AC_VERB_SET_AMP_GAIN_MUTE, 0x27}, /* 0dB */
4268 {0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE + 0x1f}, /* 0dB */
4269 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(5) + 0x17}, /* 0dB */
4270 /* Analog mixer; mute as default */
4271 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
4272 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
4273 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
4274 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
4275 {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
4276 /* Select mic as input */
4277 {0x0c, AC_VERB_SET_CONNECT_SEL, 0x1},
4278 {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE + 0x27}, /* 0dB */
4279 /* Configure as mic */
4280 {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
4281 {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x7002}, /* raise mic as default */
4282 /* HP unmute */
4283 {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
4284 /* turn on EAPD */
4285 {0x13, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
4286 /* unsolicited event for pin-sense */
4287 {0x12, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | AD1884A_HP_EVENT},
4288 { } /* end */
4289};
4290
4291static struct snd_kcontrol_new ad1984a_precision_mixers[] = {
4292 HDA_CODEC_VOLUME("Master Playback Volume", 0x21, 0x0, HDA_OUTPUT),
4293 HDA_CODEC_MUTE("Master Playback Switch", 0x21, 0x0, HDA_OUTPUT),
4294 HDA_CODEC_VOLUME("PCM Playback Volume", 0x20, 0x5, HDA_INPUT),
4295 HDA_CODEC_MUTE("PCM Playback Switch", 0x20, 0x5, HDA_INPUT),
4296 HDA_CODEC_VOLUME("Mic Playback Volume", 0x20, 0x01, HDA_INPUT),
4297 HDA_CODEC_MUTE("Mic Playback Switch", 0x20, 0x01, HDA_INPUT),
4298 HDA_CODEC_VOLUME("Mic Boost Volume", 0x15, 0x0, HDA_INPUT),
4299 HDA_CODEC_MUTE("Front Playback Switch", 0x12, 0x0, HDA_OUTPUT),
4300 HDA_CODEC_VOLUME("Speaker Playback Volume", 0x13, 0x0, HDA_OUTPUT),
4301 HDA_CODEC_VOLUME("Capture Volume", 0x0c, 0x0, HDA_OUTPUT),
4302 HDA_CODEC_MUTE("Capture Switch", 0x0c, 0x0, HDA_OUTPUT),
4303 { } /* end */
4304};
4305
4306
4307/* mute internal speaker if HP is plugged */
4308static void ad1984a_precision_automute(struct hda_codec *codec)
4309{
4310 unsigned int present;
4311
4312 present = snd_hda_jack_detect(codec, 0x12);
4313 snd_hda_codec_amp_stereo(codec, 0x13, HDA_OUTPUT, 0,
4314 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
4315}
4316
4317
4318/* unsolicited event for HP jack sensing */
4319static void ad1984a_precision_unsol_event(struct hda_codec *codec,
4320 unsigned int res)
4321{
4322 if ((res >> 26) != AD1884A_HP_EVENT)
4323 return;
4324 ad1984a_precision_automute(codec);
4325}
4326
4327/* initialize jack-sensing, too */
4328static int ad1984a_precision_init(struct hda_codec *codec)
4329{
4330 ad198x_init(codec);
4331 ad1984a_precision_automute(codec);
4332 return 0;
4333}
4334
4335
4336/*
4259 * HP Touchsmart 4337 * HP Touchsmart
4260 * port-A (0x11) - front hp-out 4338 * port-A (0x11) - front hp-out
4261 * port-B (0x14) - unused 4339 * port-B (0x14) - unused
@@ -4384,6 +4462,7 @@ enum {
4384 AD1884A_MOBILE, 4462 AD1884A_MOBILE,
4385 AD1884A_THINKPAD, 4463 AD1884A_THINKPAD,
4386 AD1984A_TOUCHSMART, 4464 AD1984A_TOUCHSMART,
4465 AD1984A_PRECISION,
4387 AD1884A_MODELS 4466 AD1884A_MODELS
4388}; 4467};
4389 4468
@@ -4393,9 +4472,11 @@ static const char * const ad1884a_models[AD1884A_MODELS] = {
4393 [AD1884A_MOBILE] = "mobile", 4472 [AD1884A_MOBILE] = "mobile",
4394 [AD1884A_THINKPAD] = "thinkpad", 4473 [AD1884A_THINKPAD] = "thinkpad",
4395 [AD1984A_TOUCHSMART] = "touchsmart", 4474 [AD1984A_TOUCHSMART] = "touchsmart",
4475 [AD1984A_PRECISION] = "precision",
4396}; 4476};
4397 4477
4398static struct snd_pci_quirk ad1884a_cfg_tbl[] = { 4478static struct snd_pci_quirk ad1884a_cfg_tbl[] = {
4479 SND_PCI_QUIRK(0x1028, 0x04ac, "Precision R5500", AD1984A_PRECISION),
4399 SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE), 4480 SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE),
4400 SND_PCI_QUIRK(0x103c, 0x3037, "HP 2230s", AD1884A_LAPTOP), 4481 SND_PCI_QUIRK(0x103c, 0x3037, "HP 2230s", AD1884A_LAPTOP),
4401 SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE), 4482 SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE),
@@ -4489,6 +4570,14 @@ static int patch_ad1884a(struct hda_codec *codec)
4489 codec->patch_ops.unsol_event = ad1984a_thinkpad_unsol_event; 4570 codec->patch_ops.unsol_event = ad1984a_thinkpad_unsol_event;
4490 codec->patch_ops.init = ad1984a_thinkpad_init; 4571 codec->patch_ops.init = ad1984a_thinkpad_init;
4491 break; 4572 break;
4573 case AD1984A_PRECISION:
4574 spec->mixers[0] = ad1984a_precision_mixers;
4575 spec->init_verbs[spec->num_init_verbs++] =
4576 ad1984a_precision_verbs;
4577 spec->multiout.dig_out_nid = 0;
4578 codec->patch_ops.unsol_event = ad1984a_precision_unsol_event;
4579 codec->patch_ops.init = ad1984a_precision_init;
4580 break;
4492 case AD1984A_TOUCHSMART: 4581 case AD1984A_TOUCHSMART:
4493 spec->mixers[0] = ad1984a_touchsmart_mixers; 4582 spec->mixers[0] = ad1984a_touchsmart_mixers;
4494 spec->init_verbs[0] = ad1984a_touchsmart_verbs; 4583 spec->init_verbs[0] = ad1984a_touchsmart_verbs;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5d582de91c19..0ef0035fe99f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1290,7 +1290,7 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type)
1290 case 0x10ec0883: 1290 case 0x10ec0883:
1291 case 0x10ec0885: 1291 case 0x10ec0885:
1292 case 0x10ec0887: 1292 case 0x10ec0887:
1293 case 0x10ec0889: 1293 /*case 0x10ec0889:*/ /* this causes an SPDIF problem */
1294 alc889_coef_init(codec); 1294 alc889_coef_init(codec);
1295 break; 1295 break;
1296 case 0x10ec0888: 1296 case 0x10ec0888:
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index c0dcfca9b5b5..c66d3f64dcf8 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -1568,6 +1568,46 @@ YAMAHA_DEVICE(0x7010, "UB99"),
1568 } 1568 }
1569}, 1569},
1570{ 1570{
1571 USB_DEVICE_VENDOR_SPEC(0x0582, 0x0104),
1572 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
1573 /* .vendor_name = "Roland", */
1574 /* .product_name = "UM-1G", */
1575 .ifnum = 0,
1576 .type = QUIRK_MIDI_FIXED_ENDPOINT,
1577 .data = & (const struct snd_usb_midi_endpoint_info) {
1578 .out_cables = 0x0001,
1579 .in_cables = 0x0001
1580 }
1581 }
1582},
1583{
1584 /* Boss JS-8 Jam Station */
1585 USB_DEVICE(0x0582, 0x0109),
1586 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
1587 /* .vendor_name = "BOSS", */
1588 /* .product_name = "JS-8", */
1589 .ifnum = QUIRK_ANY_INTERFACE,
1590 .type = QUIRK_COMPOSITE,
1591 .data = (const struct snd_usb_audio_quirk[]) {
1592 {
1593 .ifnum = 0,
1594 .type = QUIRK_AUDIO_STANDARD_INTERFACE
1595 },
1596 {
1597 .ifnum = 1,
1598 .type = QUIRK_AUDIO_STANDARD_INTERFACE
1599 },
1600 {
1601 .ifnum = 2,
1602 .type = QUIRK_MIDI_STANDARD_INTERFACE
1603 },
1604 {
1605 .ifnum = -1
1606 }
1607 }
1608 }
1609},
1610{
1571 /* has ID 0x0110 when not in Advanced Driver mode */ 1611 /* has ID 0x0110 when not in Advanced Driver mode */
1572 USB_DEVICE_VENDOR_SPEC(0x0582, 0x010f), 1612 USB_DEVICE_VENDOR_SPEC(0x0582, 0x010f),
1573 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { 1613 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {