aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
commit3d30701b58970425e1d45994d6cb82f828924fdd (patch)
tree8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md
parent8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff)
parentfd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits) md: clean up do_md_stop md: fix another deadlock with removing sysfs attributes. md: move revalidate_disk() back outside open_mutex md/raid10: fix deadlock with unaligned read during resync md/bitmap: separate out loading a bitmap from initialising the structures. md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log. md/bitmap: optimise scanning of empty bitmaps. md/bitmap: clean up plugging calls. md/bitmap: reduce dependence on sysfs. md/bitmap: white space clean up and similar. md/raid5: export raid5 unplugging interface. md/plug: optionally use plugger to unplug an array during resync/recovery. md/raid5: add simple plugging infrastructure. md/raid5: export is_congested test raid5: Don't set read-ahead when there is no queue md: add support for raising dm events. md: export various start/stop interfaces md: split out md_rdev_init md: be more careful setting MD_CHANGE_CLEAN md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig18
-rw-r--r--drivers/md/Makefile77
-rw-r--r--drivers/md/bitmap.c508
-rw-r--r--drivers/md/bitmap.h6
-rw-r--r--drivers/md/md.c286
-rw-r--r--drivers/md/md.h55
-rw-r--r--drivers/md/mktables.c132
-rw-r--r--drivers/md/raid10.c18
-rw-r--r--drivers/md/raid5.c168
-rw-r--r--drivers/md/raid5.h9
-rw-r--r--drivers/md/raid6algos.c154
-rw-r--r--drivers/md/raid6altivec.uc130
-rw-r--r--drivers/md/raid6int.uc117
-rw-r--r--drivers/md/raid6mmx.c142
-rw-r--r--drivers/md/raid6recov.c132
-rw-r--r--drivers/md/raid6sse1.c162
-rw-r--r--drivers/md/raid6sse2.c262
-rw-r--r--drivers/md/raid6test/Makefile75
-rw-r--r--drivers/md/raid6test/test.c124
-rw-r--r--drivers/md/raid6x86.h61
-rw-r--r--drivers/md/unroll.awk20
21 files changed, 644 insertions, 2012 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 4a6feac8c94a..bf1a95e31559 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -121,7 +121,7 @@ config MD_RAID10
121config MD_RAID456 121config MD_RAID456
122 tristate "RAID-4/RAID-5/RAID-6 mode" 122 tristate "RAID-4/RAID-5/RAID-6 mode"
123 depends on BLK_DEV_MD 123 depends on BLK_DEV_MD
124 select MD_RAID6_PQ 124 select RAID6_PQ
125 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
126 select ASYNC_XOR 126 select ASYNC_XOR
127 select ASYNC_PQ 127 select ASYNC_PQ
@@ -165,22 +165,6 @@ config MULTICORE_RAID456
165 165
166 If unsure, say N. 166 If unsure, say N.
167 167
168config MD_RAID6_PQ
169 tristate
170
171config ASYNC_RAID6_TEST
172 tristate "Self test for hardware accelerated raid6 recovery"
173 depends on MD_RAID6_PQ
174 select ASYNC_RAID6_RECOV
175 ---help---
176 This is a one-shot self test that permutes through the
177 recovery of all the possible two disk failure scenarios for a
178 N-disk array. Recovery is performed with the asynchronous
179 raid6 recovery routines, and will optionally use an offload
180 engine if one is available.
181
182 If unsure, say N.
183
184config MD_MULTIPATH 168config MD_MULTIPATH
185 tristate "Multipath I/O support" 169 tristate "Multipath I/O support"
186 depends on BLK_DEV_MD 170 depends on BLK_DEV_MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index e355e7f6a536..5e3aac41919d 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -12,13 +12,6 @@ dm-log-userspace-y \
12 += dm-log-userspace-base.o dm-log-userspace-transfer.o 12 += dm-log-userspace-base.o dm-log-userspace-transfer.o
13md-mod-y += md.o bitmap.o 13md-mod-y += md.o bitmap.o
14raid456-y += raid5.o 14raid456-y += raid5.o
15raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
16 raid6int1.o raid6int2.o raid6int4.o \
17 raid6int8.o raid6int16.o raid6int32.o \
18 raid6altivec1.o raid6altivec2.o raid6altivec4.o \
19 raid6altivec8.o \
20 raid6mmx.o raid6sse1.o raid6sse2.o
21hostprogs-y += mktables
22 15
23# Note: link order is important. All raid personalities 16# Note: link order is important. All raid personalities
24# and must come before md.o, as they each initialise 17# and must come before md.o, as they each initialise
@@ -29,7 +22,6 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
29obj-$(CONFIG_MD_RAID0) += raid0.o 22obj-$(CONFIG_MD_RAID0) += raid0.o
30obj-$(CONFIG_MD_RAID1) += raid1.o 23obj-$(CONFIG_MD_RAID1) += raid1.o
31obj-$(CONFIG_MD_RAID10) += raid10.o 24obj-$(CONFIG_MD_RAID10) += raid10.o
32obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
33obj-$(CONFIG_MD_RAID456) += raid456.o 25obj-$(CONFIG_MD_RAID456) += raid456.o
34obj-$(CONFIG_MD_MULTIPATH) += multipath.o 26obj-$(CONFIG_MD_MULTIPATH) += multipath.o
35obj-$(CONFIG_MD_FAULTY) += faulty.o 27obj-$(CONFIG_MD_FAULTY) += faulty.o
@@ -45,75 +37,6 @@ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
45obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o 37obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
46obj-$(CONFIG_DM_ZERO) += dm-zero.o 38obj-$(CONFIG_DM_ZERO) += dm-zero.o
47 39
48quiet_cmd_unroll = UNROLL $@
49 cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
50 < $< > $@ || ( rm -f $@ && exit 1 )
51
52ifeq ($(CONFIG_ALTIVEC),y)
53altivec_flags := -maltivec -mabi=altivec
54endif
55
56ifeq ($(CONFIG_DM_UEVENT),y) 40ifeq ($(CONFIG_DM_UEVENT),y)
57dm-mod-objs += dm-uevent.o 41dm-mod-objs += dm-uevent.o
58endif 42endif
59
60targets += raid6int1.c
61$(obj)/raid6int1.c: UNROLL := 1
62$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
63 $(call if_changed,unroll)
64
65targets += raid6int2.c
66$(obj)/raid6int2.c: UNROLL := 2
67$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
68 $(call if_changed,unroll)
69
70targets += raid6int4.c
71$(obj)/raid6int4.c: UNROLL := 4
72$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
73 $(call if_changed,unroll)
74
75targets += raid6int8.c
76$(obj)/raid6int8.c: UNROLL := 8
77$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
78 $(call if_changed,unroll)
79
80targets += raid6int16.c
81$(obj)/raid6int16.c: UNROLL := 16
82$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
83 $(call if_changed,unroll)
84
85targets += raid6int32.c
86$(obj)/raid6int32.c: UNROLL := 32
87$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE
88 $(call if_changed,unroll)
89
90CFLAGS_raid6altivec1.o += $(altivec_flags)
91targets += raid6altivec1.c
92$(obj)/raid6altivec1.c: UNROLL := 1
93$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE
94 $(call if_changed,unroll)
95
96CFLAGS_raid6altivec2.o += $(altivec_flags)
97targets += raid6altivec2.c
98$(obj)/raid6altivec2.c: UNROLL := 2
99$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE
100 $(call if_changed,unroll)
101
102CFLAGS_raid6altivec4.o += $(altivec_flags)
103targets += raid6altivec4.c
104$(obj)/raid6altivec4.c: UNROLL := 4
105$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE
106 $(call if_changed,unroll)
107
108CFLAGS_raid6altivec8.o += $(altivec_flags)
109targets += raid6altivec8.c
110$(obj)/raid6altivec8.c: UNROLL := 8
111$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE
112 $(call if_changed,unroll)
113
114quiet_cmd_mktable = TABLE $@
115 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
116
117targets += raid6tables.c
118$(obj)/raid6tables.c: $(obj)/mktables FORCE
119 $(call if_changed,mktable)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 1742435ce3ae..1ba1e122e948 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -13,7 +13,6 @@
13 * Still to do: 13 * Still to do:
14 * 14 *
15 * flush after percent set rather than just time based. (maybe both). 15 * flush after percent set rather than just time based. (maybe both).
16 * wait if count gets too high, wake when it drops to half.
17 */ 16 */
18 17
19#include <linux/blkdev.h> 18#include <linux/blkdev.h>
@@ -30,6 +29,7 @@
30#include "md.h" 29#include "md.h"
31#include "bitmap.h" 30#include "bitmap.h"
32 31
32#include <linux/dm-dirty-log.h>
33/* debug macros */ 33/* debug macros */
34 34
35#define DEBUG 0 35#define DEBUG 0
@@ -51,9 +51,6 @@
51#define INJECT_FATAL_FAULT_3 0 /* undef */ 51#define INJECT_FATAL_FAULT_3 0 /* undef */
52#endif 52#endif
53 53
54//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */
55#define DPRINTK(x...) do { } while(0)
56
57#ifndef PRINTK 54#ifndef PRINTK
58# if DEBUG > 0 55# if DEBUG > 0
59# define PRINTK(x...) printk(KERN_DEBUG x) 56# define PRINTK(x...) printk(KERN_DEBUG x)
@@ -62,12 +59,11 @@
62# endif 59# endif
63#endif 60#endif
64 61
65static inline char * bmname(struct bitmap *bitmap) 62static inline char *bmname(struct bitmap *bitmap)
66{ 63{
67 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 64 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
68} 65}
69 66
70
71/* 67/*
72 * just a placeholder - calls kmalloc for bitmap pages 68 * just a placeholder - calls kmalloc for bitmap pages
73 */ 69 */
@@ -78,7 +74,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
78#ifdef INJECT_FAULTS_1 74#ifdef INJECT_FAULTS_1
79 page = NULL; 75 page = NULL;
80#else 76#else
81 page = kmalloc(PAGE_SIZE, GFP_NOIO); 77 page = kzalloc(PAGE_SIZE, GFP_NOIO);
82#endif 78#endif
83 if (!page) 79 if (!page)
84 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); 80 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
@@ -107,7 +103,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
107 * if we find our page, we increment the page's refcount so that it stays 103 * if we find our page, we increment the page's refcount so that it stays
108 * allocated while we're using it 104 * allocated while we're using it
109 */ 105 */
110static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 106static int bitmap_checkpage(struct bitmap *bitmap,
107 unsigned long page, int create)
111__releases(bitmap->lock) 108__releases(bitmap->lock)
112__acquires(bitmap->lock) 109__acquires(bitmap->lock)
113{ 110{
@@ -121,7 +118,6 @@ __acquires(bitmap->lock)
121 return -EINVAL; 118 return -EINVAL;
122 } 119 }
123 120
124
125 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 121 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
126 return 0; 122 return 0;
127 123
@@ -131,43 +127,34 @@ __acquires(bitmap->lock)
131 if (!create) 127 if (!create)
132 return -ENOENT; 128 return -ENOENT;
133 129
134 spin_unlock_irq(&bitmap->lock);
135
136 /* this page has not been allocated yet */ 130 /* this page has not been allocated yet */
137 131
138 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { 132 spin_unlock_irq(&bitmap->lock);
133 mappage = bitmap_alloc_page(bitmap);
134 spin_lock_irq(&bitmap->lock);
135
136 if (mappage == NULL) {
139 PRINTK("%s: bitmap map page allocation failed, hijacking\n", 137 PRINTK("%s: bitmap map page allocation failed, hijacking\n",
140 bmname(bitmap)); 138 bmname(bitmap));
141 /* failed - set the hijacked flag so that we can use the 139 /* failed - set the hijacked flag so that we can use the
142 * pointer as a counter */ 140 * pointer as a counter */
143 spin_lock_irq(&bitmap->lock);
144 if (!bitmap->bp[page].map) 141 if (!bitmap->bp[page].map)
145 bitmap->bp[page].hijacked = 1; 142 bitmap->bp[page].hijacked = 1;
146 goto out; 143 } else if (bitmap->bp[page].map ||
147 } 144 bitmap->bp[page].hijacked) {
148
149 /* got a page */
150
151 spin_lock_irq(&bitmap->lock);
152
153 /* recheck the page */
154
155 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) {
156 /* somebody beat us to getting the page */ 145 /* somebody beat us to getting the page */
157 bitmap_free_page(bitmap, mappage); 146 bitmap_free_page(bitmap, mappage);
158 return 0; 147 return 0;
159 } 148 } else {
160 149
161 /* no page was in place and we have one, so install it */ 150 /* no page was in place and we have one, so install it */
162 151
163 memset(mappage, 0, PAGE_SIZE); 152 bitmap->bp[page].map = mappage;
164 bitmap->bp[page].map = mappage; 153 bitmap->missing_pages--;
165 bitmap->missing_pages--; 154 }
166out:
167 return 0; 155 return 0;
168} 156}
169 157
170
171/* if page is completely empty, put it back on the free list, or dealloc it */ 158/* if page is completely empty, put it back on the free list, or dealloc it */
172/* if page was hijacked, unmark the flag so it might get alloced next time */ 159/* if page was hijacked, unmark the flag so it might get alloced next time */
173/* Note: lock should be held when calling this */ 160/* Note: lock should be held when calling this */
@@ -183,26 +170,15 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
183 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 170 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
184 bitmap->bp[page].hijacked = 0; 171 bitmap->bp[page].hijacked = 0;
185 bitmap->bp[page].map = NULL; 172 bitmap->bp[page].map = NULL;
186 return; 173 } else {
174 /* normal case, free the page */
175 ptr = bitmap->bp[page].map;
176 bitmap->bp[page].map = NULL;
177 bitmap->missing_pages++;
178 bitmap_free_page(bitmap, ptr);
187 } 179 }
188
189 /* normal case, free the page */
190
191#if 0
192/* actually ... let's not. We will probably need the page again exactly when
193 * memory is tight and we are flusing to disk
194 */
195 return;
196#else
197 ptr = bitmap->bp[page].map;
198 bitmap->bp[page].map = NULL;
199 bitmap->missing_pages++;
200 bitmap_free_page(bitmap, ptr);
201 return;
202#endif
203} 180}
204 181
205
206/* 182/*
207 * bitmap file handling - read and write the bitmap file and its superblock 183 * bitmap file handling - read and write the bitmap file and its superblock
208 */ 184 */
@@ -220,11 +196,14 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
220 196
221 mdk_rdev_t *rdev; 197 mdk_rdev_t *rdev;
222 sector_t target; 198 sector_t target;
199 int did_alloc = 0;
223 200
224 if (!page) 201 if (!page) {
225 page = alloc_page(GFP_KERNEL); 202 page = alloc_page(GFP_KERNEL);
226 if (!page) 203 if (!page)
227 return ERR_PTR(-ENOMEM); 204 return ERR_PTR(-ENOMEM);
205 did_alloc = 1;
206 }
228 207
229 list_for_each_entry(rdev, &mddev->disks, same_set) { 208 list_for_each_entry(rdev, &mddev->disks, same_set) {
230 if (! test_bit(In_sync, &rdev->flags) 209 if (! test_bit(In_sync, &rdev->flags)
@@ -242,6 +221,8 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
242 return page; 221 return page;
243 } 222 }
244 } 223 }
224 if (did_alloc)
225 put_page(page);
245 return ERR_PTR(-EIO); 226 return ERR_PTR(-EIO);
246 227
247} 228}
@@ -286,49 +267,51 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
286 mddev_t *mddev = bitmap->mddev; 267 mddev_t *mddev = bitmap->mddev;
287 268
288 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 269 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
289 int size = PAGE_SIZE; 270 int size = PAGE_SIZE;
290 loff_t offset = mddev->bitmap_info.offset; 271 loff_t offset = mddev->bitmap_info.offset;
291 if (page->index == bitmap->file_pages-1) 272 if (page->index == bitmap->file_pages-1)
292 size = roundup(bitmap->last_page_size, 273 size = roundup(bitmap->last_page_size,
293 bdev_logical_block_size(rdev->bdev)); 274 bdev_logical_block_size(rdev->bdev));
294 /* Just make sure we aren't corrupting data or 275 /* Just make sure we aren't corrupting data or
295 * metadata 276 * metadata
296 */ 277 */
297 if (mddev->external) { 278 if (mddev->external) {
298 /* Bitmap could be anywhere. */ 279 /* Bitmap could be anywhere. */
299 if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > 280 if (rdev->sb_start + offset + (page->index
300 rdev->data_offset && 281 * (PAGE_SIZE/512))
301 rdev->sb_start + offset < 282 > rdev->data_offset
302 rdev->data_offset + mddev->dev_sectors + 283 &&
303 (PAGE_SIZE/512)) 284 rdev->sb_start + offset
304 goto bad_alignment; 285 < (rdev->data_offset + mddev->dev_sectors
305 } else if (offset < 0) { 286 + (PAGE_SIZE/512)))
306 /* DATA BITMAP METADATA */ 287 goto bad_alignment;
307 if (offset 288 } else if (offset < 0) {
308 + (long)(page->index * (PAGE_SIZE/512)) 289 /* DATA BITMAP METADATA */
309 + size/512 > 0) 290 if (offset
310 /* bitmap runs in to metadata */ 291 + (long)(page->index * (PAGE_SIZE/512))
311 goto bad_alignment; 292 + size/512 > 0)
312 if (rdev->data_offset + mddev->dev_sectors 293 /* bitmap runs in to metadata */
313 > rdev->sb_start + offset) 294 goto bad_alignment;
314 /* data runs in to bitmap */ 295 if (rdev->data_offset + mddev->dev_sectors
315 goto bad_alignment; 296 > rdev->sb_start + offset)
316 } else if (rdev->sb_start < rdev->data_offset) { 297 /* data runs in to bitmap */
317 /* METADATA BITMAP DATA */ 298 goto bad_alignment;
318 if (rdev->sb_start 299 } else if (rdev->sb_start < rdev->data_offset) {
319 + offset 300 /* METADATA BITMAP DATA */
320 + page->index*(PAGE_SIZE/512) + size/512 301 if (rdev->sb_start
321 > rdev->data_offset) 302 + offset
322 /* bitmap runs in to data */ 303 + page->index*(PAGE_SIZE/512) + size/512
323 goto bad_alignment; 304 > rdev->data_offset)
324 } else { 305 /* bitmap runs in to data */
325 /* DATA METADATA BITMAP - no problems */ 306 goto bad_alignment;
326 } 307 } else {
327 md_super_write(mddev, rdev, 308 /* DATA METADATA BITMAP - no problems */
328 rdev->sb_start + offset 309 }
329 + page->index * (PAGE_SIZE/512), 310 md_super_write(mddev, rdev,
330 size, 311 rdev->sb_start + offset
331 page); 312 + page->index * (PAGE_SIZE/512),
313 size,
314 page);
332 } 315 }
333 316
334 if (wait) 317 if (wait)
@@ -364,10 +347,9 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
364 bh = bh->b_this_page; 347 bh = bh->b_this_page;
365 } 348 }
366 349
367 if (wait) { 350 if (wait)
368 wait_event(bitmap->write_wait, 351 wait_event(bitmap->write_wait,
369 atomic_read(&bitmap->pending_writes)==0); 352 atomic_read(&bitmap->pending_writes)==0);
370 }
371 } 353 }
372 if (bitmap->flags & BITMAP_WRITE_ERROR) 354 if (bitmap->flags & BITMAP_WRITE_ERROR)
373 bitmap_file_kick(bitmap); 355 bitmap_file_kick(bitmap);
@@ -424,7 +406,7 @@ static struct page *read_page(struct file *file, unsigned long index,
424 struct buffer_head *bh; 406 struct buffer_head *bh;
425 sector_t block; 407 sector_t block;
426 408
427 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 409 PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
428 (unsigned long long)index << PAGE_SHIFT); 410 (unsigned long long)index << PAGE_SHIFT);
429 411
430 page = alloc_page(GFP_KERNEL); 412 page = alloc_page(GFP_KERNEL);
@@ -478,7 +460,7 @@ static struct page *read_page(struct file *file, unsigned long index,
478 } 460 }
479out: 461out:
480 if (IS_ERR(page)) 462 if (IS_ERR(page))
481 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 463 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n",
482 (int)PAGE_SIZE, 464 (int)PAGE_SIZE,
483 (unsigned long long)index << PAGE_SHIFT, 465 (unsigned long long)index << PAGE_SHIFT,
484 PTR_ERR(page)); 466 PTR_ERR(page));
@@ -664,11 +646,14 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
664 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 646 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
665 old = le32_to_cpu(sb->state) & bits; 647 old = le32_to_cpu(sb->state) & bits;
666 switch (op) { 648 switch (op) {
667 case MASK_SET: sb->state |= cpu_to_le32(bits); 649 case MASK_SET:
668 break; 650 sb->state |= cpu_to_le32(bits);
669 case MASK_UNSET: sb->state &= cpu_to_le32(~bits); 651 break;
670 break; 652 case MASK_UNSET:
671 default: BUG(); 653 sb->state &= cpu_to_le32(~bits);
654 break;
655 default:
656 BUG();
672 } 657 }
673 kunmap_atomic(sb, KM_USER0); 658 kunmap_atomic(sb, KM_USER0);
674 return old; 659 return old;
@@ -710,12 +695,14 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon
710static inline struct page *filemap_get_page(struct bitmap *bitmap, 695static inline struct page *filemap_get_page(struct bitmap *bitmap,
711 unsigned long chunk) 696 unsigned long chunk)
712{ 697{
713 if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; 698 if (bitmap->filemap == NULL)
699 return NULL;
700 if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
701 return NULL;
714 return bitmap->filemap[file_page_index(bitmap, chunk) 702 return bitmap->filemap[file_page_index(bitmap, chunk)
715 - file_page_index(bitmap, 0)]; 703 - file_page_index(bitmap, 0)];
716} 704}
717 705
718
719static void bitmap_file_unmap(struct bitmap *bitmap) 706static void bitmap_file_unmap(struct bitmap *bitmap)
720{ 707{
721 struct page **map, *sb_page; 708 struct page **map, *sb_page;
@@ -766,7 +753,6 @@ static void bitmap_file_put(struct bitmap *bitmap)
766 } 753 }
767} 754}
768 755
769
770/* 756/*
771 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 757 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
772 * then it is no longer reliable, so we stop using it and we mark the file 758 * then it is no longer reliable, so we stop using it and we mark the file
@@ -785,7 +771,6 @@ static void bitmap_file_kick(struct bitmap *bitmap)
785 ptr = d_path(&bitmap->file->f_path, path, 771 ptr = d_path(&bitmap->file->f_path, path,
786 PAGE_SIZE); 772 PAGE_SIZE);
787 773
788
789 printk(KERN_ALERT 774 printk(KERN_ALERT
790 "%s: kicking failed bitmap file %s from array!\n", 775 "%s: kicking failed bitmap file %s from array!\n",
791 bmname(bitmap), IS_ERR(ptr) ? "" : ptr); 776 bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
@@ -803,27 +788,36 @@ static void bitmap_file_kick(struct bitmap *bitmap)
803} 788}
804 789
805enum bitmap_page_attr { 790enum bitmap_page_attr {
806 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced 791 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */
807 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared 792 BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */
808 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced 793 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
809}; 794};
810 795
811static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 796static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
812 enum bitmap_page_attr attr) 797 enum bitmap_page_attr attr)
813{ 798{
814 __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 799 if (page)
800 __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
801 else
802 __set_bit(attr, &bitmap->logattrs);
815} 803}
816 804
817static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 805static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
818 enum bitmap_page_attr attr) 806 enum bitmap_page_attr attr)
819{ 807{
820 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 808 if (page)
809 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
810 else
811 __clear_bit(attr, &bitmap->logattrs);
821} 812}
822 813
823static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, 814static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
824 enum bitmap_page_attr attr) 815 enum bitmap_page_attr attr)
825{ 816{
826 return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 817 if (page)
818 return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
819 else
820 return test_bit(attr, &bitmap->logattrs);
827} 821}
828 822
829/* 823/*
@@ -836,30 +830,32 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p
836static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 830static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
837{ 831{
838 unsigned long bit; 832 unsigned long bit;
839 struct page *page; 833 struct page *page = NULL;
840 void *kaddr; 834 void *kaddr;
841 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); 835 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
842 836
843 if (!bitmap->filemap) { 837 if (!bitmap->filemap) {
844 return; 838 struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
845 } 839 if (log)
846 840 log->type->mark_region(log, chunk);
847 page = filemap_get_page(bitmap, chunk); 841 } else {
848 if (!page) return;
849 bit = file_page_offset(bitmap, chunk);
850 842
851 /* set the bit */ 843 page = filemap_get_page(bitmap, chunk);
852 kaddr = kmap_atomic(page, KM_USER0); 844 if (!page)
853 if (bitmap->flags & BITMAP_HOSTENDIAN) 845 return;
854 set_bit(bit, kaddr); 846 bit = file_page_offset(bitmap, chunk);
855 else
856 ext2_set_bit(bit, kaddr);
857 kunmap_atomic(kaddr, KM_USER0);
858 PRINTK("set file bit %lu page %lu\n", bit, page->index);
859 847
848 /* set the bit */
849 kaddr = kmap_atomic(page, KM_USER0);
850 if (bitmap->flags & BITMAP_HOSTENDIAN)
851 set_bit(bit, kaddr);
852 else
853 ext2_set_bit(bit, kaddr);
854 kunmap_atomic(kaddr, KM_USER0);
855 PRINTK("set file bit %lu page %lu\n", bit, page->index);
856 }
860 /* record page number so it gets flushed to disk when unplug occurs */ 857 /* record page number so it gets flushed to disk when unplug occurs */
861 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 858 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
862
863} 859}
864 860
865/* this gets called when the md device is ready to unplug its underlying 861/* this gets called when the md device is ready to unplug its underlying
@@ -874,6 +870,16 @@ void bitmap_unplug(struct bitmap *bitmap)
874 870
875 if (!bitmap) 871 if (!bitmap)
876 return; 872 return;
873 if (!bitmap->filemap) {
874 /* Must be using a dirty_log */
875 struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
876 dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs);
877 need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs);
878 if (dirty || need_write)
879 if (log->type->flush(log))
880 bitmap->flags |= BITMAP_WRITE_ERROR;
881 goto out;
882 }
877 883
878 /* look at each page to see if there are any set bits that need to be 884 /* look at each page to see if there are any set bits that need to be
879 * flushed out to disk */ 885 * flushed out to disk */
@@ -892,7 +898,7 @@ void bitmap_unplug(struct bitmap *bitmap)
892 wait = 1; 898 wait = 1;
893 spin_unlock_irqrestore(&bitmap->lock, flags); 899 spin_unlock_irqrestore(&bitmap->lock, flags);
894 900
895 if (dirty | need_write) 901 if (dirty || need_write)
896 write_page(bitmap, page, 0); 902 write_page(bitmap, page, 0);
897 } 903 }
898 if (wait) { /* if any writes were performed, we need to wait on them */ 904 if (wait) { /* if any writes were performed, we need to wait on them */
@@ -902,9 +908,11 @@ void bitmap_unplug(struct bitmap *bitmap)
902 else 908 else
903 md_super_wait(bitmap->mddev); 909 md_super_wait(bitmap->mddev);
904 } 910 }
911out:
905 if (bitmap->flags & BITMAP_WRITE_ERROR) 912 if (bitmap->flags & BITMAP_WRITE_ERROR)
906 bitmap_file_kick(bitmap); 913 bitmap_file_kick(bitmap);
907} 914}
915EXPORT_SYMBOL(bitmap_unplug);
908 916
909static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 917static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
910/* * bitmap_init_from_disk -- called at bitmap_create time to initialize 918/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
@@ -943,12 +951,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
943 printk(KERN_INFO "%s: bitmap file is out of date, doing full " 951 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
944 "recovery\n", bmname(bitmap)); 952 "recovery\n", bmname(bitmap));
945 953
946 bytes = (chunks + 7) / 8; 954 bytes = DIV_ROUND_UP(bitmap->chunks, 8);
947 if (!bitmap->mddev->bitmap_info.external) 955 if (!bitmap->mddev->bitmap_info.external)
948 bytes += sizeof(bitmap_super_t); 956 bytes += sizeof(bitmap_super_t);
949 957
950 958 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
951 num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
952 959
953 if (file && i_size_read(file->f_mapping->host) < bytes) { 960 if (file && i_size_read(file->f_mapping->host) < bytes) {
954 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 961 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
@@ -966,7 +973,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
966 973
967 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ 974 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
968 bitmap->filemap_attr = kzalloc( 975 bitmap->filemap_attr = kzalloc(
969 roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 976 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
970 GFP_KERNEL); 977 GFP_KERNEL);
971 if (!bitmap->filemap_attr) 978 if (!bitmap->filemap_attr)
972 goto err; 979 goto err;
@@ -1021,7 +1028,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1021 if (outofdate) { 1028 if (outofdate) {
1022 /* 1029 /*
1023 * if bitmap is out of date, dirty the 1030 * if bitmap is out of date, dirty the
1024 * whole page and write it out 1031 * whole page and write it out
1025 */ 1032 */
1026 paddr = kmap_atomic(page, KM_USER0); 1033 paddr = kmap_atomic(page, KM_USER0);
1027 memset(paddr + offset, 0xff, 1034 memset(paddr + offset, 0xff,
@@ -1052,7 +1059,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1052 } 1059 }
1053 } 1060 }
1054 1061
1055 /* everything went OK */ 1062 /* everything went OK */
1056 ret = 0; 1063 ret = 0;
1057 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); 1064 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
1058 1065
@@ -1080,21 +1087,16 @@ void bitmap_write_all(struct bitmap *bitmap)
1080 */ 1087 */
1081 int i; 1088 int i;
1082 1089
1083 for (i=0; i < bitmap->file_pages; i++) 1090 for (i = 0; i < bitmap->file_pages; i++)
1084 set_page_attr(bitmap, bitmap->filemap[i], 1091 set_page_attr(bitmap, bitmap->filemap[i],
1085 BITMAP_PAGE_NEEDWRITE); 1092 BITMAP_PAGE_NEEDWRITE);
1086} 1093}
1087 1094
1088
1089static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) 1095static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
1090{ 1096{
1091 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1097 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1092 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1098 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1093 bitmap->bp[page].count += inc; 1099 bitmap->bp[page].count += inc;
1094/*
1095 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n",
1096 (unsigned long long)offset, inc, bitmap->bp[page].count);
1097*/
1098 bitmap_checkfree(bitmap, page); 1100 bitmap_checkfree(bitmap, page);
1099} 1101}
1100static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1102static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1114,6 +1116,7 @@ void bitmap_daemon_work(mddev_t *mddev)
1114 struct page *page = NULL, *lastpage = NULL; 1116 struct page *page = NULL, *lastpage = NULL;
1115 int blocks; 1117 int blocks;
1116 void *paddr; 1118 void *paddr;
1119 struct dm_dirty_log *log = mddev->bitmap_info.log;
1117 1120
1118 /* Use a mutex to guard daemon_work against 1121 /* Use a mutex to guard daemon_work against
1119 * bitmap_destroy. 1122 * bitmap_destroy.
@@ -1138,11 +1141,12 @@ void bitmap_daemon_work(mddev_t *mddev)
1138 spin_lock_irqsave(&bitmap->lock, flags); 1141 spin_lock_irqsave(&bitmap->lock, flags);
1139 for (j = 0; j < bitmap->chunks; j++) { 1142 for (j = 0; j < bitmap->chunks; j++) {
1140 bitmap_counter_t *bmc; 1143 bitmap_counter_t *bmc;
1141 if (!bitmap->filemap) 1144 if (!bitmap->filemap) {
1142 /* error or shutdown */ 1145 if (!log)
1143 break; 1146 /* error or shutdown */
1144 1147 break;
1145 page = filemap_get_page(bitmap, j); 1148 } else
1149 page = filemap_get_page(bitmap, j);
1146 1150
1147 if (page != lastpage) { 1151 if (page != lastpage) {
1148 /* skip this page unless it's marked as needing cleaning */ 1152 /* skip this page unless it's marked as needing cleaning */
@@ -1197,14 +1201,11 @@ void bitmap_daemon_work(mddev_t *mddev)
1197 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1201 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1198 &blocks, 0); 1202 &blocks, 0);
1199 if (bmc) { 1203 if (bmc) {
1200/*
1201 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
1202*/
1203 if (*bmc) 1204 if (*bmc)
1204 bitmap->allclean = 0; 1205 bitmap->allclean = 0;
1205 1206
1206 if (*bmc == 2) { 1207 if (*bmc == 2) {
1207 *bmc=1; /* maybe clear the bit next time */ 1208 *bmc = 1; /* maybe clear the bit next time */
1208 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1209 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1209 } else if (*bmc == 1 && !bitmap->need_sync) { 1210 } else if (*bmc == 1 && !bitmap->need_sync) {
1210 /* we can clear the bit */ 1211 /* we can clear the bit */
@@ -1214,14 +1215,17 @@ void bitmap_daemon_work(mddev_t *mddev)
1214 -1); 1215 -1);
1215 1216
1216 /* clear the bit */ 1217 /* clear the bit */
1217 paddr = kmap_atomic(page, KM_USER0); 1218 if (page) {
1218 if (bitmap->flags & BITMAP_HOSTENDIAN) 1219 paddr = kmap_atomic(page, KM_USER0);
1219 clear_bit(file_page_offset(bitmap, j), 1220 if (bitmap->flags & BITMAP_HOSTENDIAN)
1220 paddr); 1221 clear_bit(file_page_offset(bitmap, j),
1221 else 1222 paddr);
1222 ext2_clear_bit(file_page_offset(bitmap, j), 1223 else
1223 paddr); 1224 ext2_clear_bit(file_page_offset(bitmap, j),
1224 kunmap_atomic(paddr, KM_USER0); 1225 paddr);
1226 kunmap_atomic(paddr, KM_USER0);
1227 } else
1228 log->type->clear_region(log, j);
1225 } 1229 }
1226 } else 1230 } else
1227 j |= PAGE_COUNTER_MASK; 1231 j |= PAGE_COUNTER_MASK;
@@ -1229,12 +1233,16 @@ void bitmap_daemon_work(mddev_t *mddev)
1229 spin_unlock_irqrestore(&bitmap->lock, flags); 1233 spin_unlock_irqrestore(&bitmap->lock, flags);
1230 1234
1231 /* now sync the final page */ 1235 /* now sync the final page */
1232 if (lastpage != NULL) { 1236 if (lastpage != NULL || log != NULL) {
1233 spin_lock_irqsave(&bitmap->lock, flags); 1237 spin_lock_irqsave(&bitmap->lock, flags);
1234 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1238 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1235 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1239 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1236 spin_unlock_irqrestore(&bitmap->lock, flags); 1240 spin_unlock_irqrestore(&bitmap->lock, flags);
1237 write_page(bitmap, lastpage, 0); 1241 if (lastpage)
1242 write_page(bitmap, lastpage, 0);
1243 else
1244 if (log->type->flush(log))
1245 bitmap->flags |= BITMAP_WRITE_ERROR;
1238 } else { 1246 } else {
1239 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1247 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1240 spin_unlock_irqrestore(&bitmap->lock, flags); 1248 spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1243,7 +1251,7 @@ void bitmap_daemon_work(mddev_t *mddev)
1243 1251
1244 done: 1252 done:
1245 if (bitmap->allclean == 0) 1253 if (bitmap->allclean == 0)
1246 bitmap->mddev->thread->timeout = 1254 bitmap->mddev->thread->timeout =
1247 bitmap->mddev->bitmap_info.daemon_sleep; 1255 bitmap->mddev->bitmap_info.daemon_sleep;
1248 mutex_unlock(&mddev->bitmap_info.mutex); 1256 mutex_unlock(&mddev->bitmap_info.mutex);
1249} 1257}
@@ -1262,34 +1270,38 @@ __acquires(bitmap->lock)
1262 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1270 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1263 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1271 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1264 sector_t csize; 1272 sector_t csize;
1273 int err;
1265 1274
1266 if (bitmap_checkpage(bitmap, page, create) < 0) { 1275 err = bitmap_checkpage(bitmap, page, create);
1276
1277 if (bitmap->bp[page].hijacked ||
1278 bitmap->bp[page].map == NULL)
1279 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1280 PAGE_COUNTER_SHIFT - 1);
1281 else
1267 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1282 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1268 *blocks = csize - (offset & (csize- 1)); 1283 *blocks = csize - (offset & (csize - 1));
1284
1285 if (err < 0)
1269 return NULL; 1286 return NULL;
1270 } 1287
1271 /* now locked ... */ 1288 /* now locked ... */
1272 1289
1273 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1290 if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1274 /* should we use the first or second counter field 1291 /* should we use the first or second counter field
1275 * of the hijacked pointer? */ 1292 * of the hijacked pointer? */
1276 int hi = (pageoff > PAGE_COUNTER_MASK); 1293 int hi = (pageoff > PAGE_COUNTER_MASK);
1277 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1278 PAGE_COUNTER_SHIFT - 1);
1279 *blocks = csize - (offset & (csize- 1));
1280 return &((bitmap_counter_t *) 1294 return &((bitmap_counter_t *)
1281 &bitmap->bp[page].map)[hi]; 1295 &bitmap->bp[page].map)[hi];
1282 } else { /* page is allocated */ 1296 } else /* page is allocated */
1283 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1284 *blocks = csize - (offset & (csize- 1));
1285 return (bitmap_counter_t *) 1297 return (bitmap_counter_t *)
1286 &(bitmap->bp[page].map[pageoff]); 1298 &(bitmap->bp[page].map[pageoff]);
1287 }
1288} 1299}
1289 1300
1290int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1301int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1291{ 1302{
1292 if (!bitmap) return 0; 1303 if (!bitmap)
1304 return 0;
1293 1305
1294 if (behind) { 1306 if (behind) {
1295 int bw; 1307 int bw;
@@ -1322,17 +1334,16 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1322 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1334 prepare_to_wait(&bitmap->overflow_wait, &__wait,
1323 TASK_UNINTERRUPTIBLE); 1335 TASK_UNINTERRUPTIBLE);
1324 spin_unlock_irq(&bitmap->lock); 1336 spin_unlock_irq(&bitmap->lock);
1325 blk_unplug(bitmap->mddev->queue); 1337 md_unplug(bitmap->mddev);
1326 schedule(); 1338 schedule();
1327 finish_wait(&bitmap->overflow_wait, &__wait); 1339 finish_wait(&bitmap->overflow_wait, &__wait);
1328 continue; 1340 continue;
1329 } 1341 }
1330 1342
1331 switch(*bmc) { 1343 switch (*bmc) {
1332 case 0: 1344 case 0:
1333 bitmap_file_set_bit(bitmap, offset); 1345 bitmap_file_set_bit(bitmap, offset);
1334 bitmap_count_page(bitmap,offset, 1); 1346 bitmap_count_page(bitmap, offset, 1);
1335 blk_plug_device_unlocked(bitmap->mddev->queue);
1336 /* fall through */ 1347 /* fall through */
1337 case 1: 1348 case 1:
1338 *bmc = 2; 1349 *bmc = 2;
@@ -1345,16 +1356,19 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1345 offset += blocks; 1356 offset += blocks;
1346 if (sectors > blocks) 1357 if (sectors > blocks)
1347 sectors -= blocks; 1358 sectors -= blocks;
1348 else sectors = 0; 1359 else
1360 sectors = 0;
1349 } 1361 }
1350 bitmap->allclean = 0; 1362 bitmap->allclean = 0;
1351 return 0; 1363 return 0;
1352} 1364}
1365EXPORT_SYMBOL(bitmap_startwrite);
1353 1366
1354void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1367void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1355 int success, int behind) 1368 int success, int behind)
1356{ 1369{
1357 if (!bitmap) return; 1370 if (!bitmap)
1371 return;
1358 if (behind) { 1372 if (behind) {
1359 if (atomic_dec_and_test(&bitmap->behind_writes)) 1373 if (atomic_dec_and_test(&bitmap->behind_writes))
1360 wake_up(&bitmap->behind_wait); 1374 wake_up(&bitmap->behind_wait);
@@ -1381,7 +1395,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1381 bitmap->events_cleared < bitmap->mddev->events) { 1395 bitmap->events_cleared < bitmap->mddev->events) {
1382 bitmap->events_cleared = bitmap->mddev->events; 1396 bitmap->events_cleared = bitmap->mddev->events;
1383 bitmap->need_sync = 1; 1397 bitmap->need_sync = 1;
1384 sysfs_notify_dirent(bitmap->sysfs_can_clear); 1398 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1385 } 1399 }
1386 1400
1387 if (!success && ! (*bmc & NEEDED_MASK)) 1401 if (!success && ! (*bmc & NEEDED_MASK))
@@ -1391,18 +1405,22 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1391 wake_up(&bitmap->overflow_wait); 1405 wake_up(&bitmap->overflow_wait);
1392 1406
1393 (*bmc)--; 1407 (*bmc)--;
1394 if (*bmc <= 2) { 1408 if (*bmc <= 2)
1395 set_page_attr(bitmap, 1409 set_page_attr(bitmap,
1396 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1410 filemap_get_page(
1411 bitmap,
1412 offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1397 BITMAP_PAGE_CLEAN); 1413 BITMAP_PAGE_CLEAN);
1398 } 1414
1399 spin_unlock_irqrestore(&bitmap->lock, flags); 1415 spin_unlock_irqrestore(&bitmap->lock, flags);
1400 offset += blocks; 1416 offset += blocks;
1401 if (sectors > blocks) 1417 if (sectors > blocks)
1402 sectors -= blocks; 1418 sectors -= blocks;
1403 else sectors = 0; 1419 else
1420 sectors = 0;
1404 } 1421 }
1405} 1422}
1423EXPORT_SYMBOL(bitmap_endwrite);
1406 1424
1407static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1425static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1408 int degraded) 1426 int degraded)
@@ -1455,14 +1473,14 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1455 } 1473 }
1456 return rv; 1474 return rv;
1457} 1475}
1476EXPORT_SYMBOL(bitmap_start_sync);
1458 1477
1459void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1478void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1460{ 1479{
1461 bitmap_counter_t *bmc; 1480 bitmap_counter_t *bmc;
1462 unsigned long flags; 1481 unsigned long flags;
1463/* 1482
1464 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); 1483 if (bitmap == NULL) {
1465*/ if (bitmap == NULL) {
1466 *blocks = 1024; 1484 *blocks = 1024;
1467 return; 1485 return;
1468 } 1486 }
@@ -1471,26 +1489,23 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab
1471 if (bmc == NULL) 1489 if (bmc == NULL)
1472 goto unlock; 1490 goto unlock;
1473 /* locked */ 1491 /* locked */
1474/*
1475 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks);
1476*/
1477 if (RESYNC(*bmc)) { 1492 if (RESYNC(*bmc)) {
1478 *bmc &= ~RESYNC_MASK; 1493 *bmc &= ~RESYNC_MASK;
1479 1494
1480 if (!NEEDED(*bmc) && aborted) 1495 if (!NEEDED(*bmc) && aborted)
1481 *bmc |= NEEDED_MASK; 1496 *bmc |= NEEDED_MASK;
1482 else { 1497 else {
1483 if (*bmc <= 2) { 1498 if (*bmc <= 2)
1484 set_page_attr(bitmap, 1499 set_page_attr(bitmap,
1485 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1500 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1486 BITMAP_PAGE_CLEAN); 1501 BITMAP_PAGE_CLEAN);
1487 }
1488 } 1502 }
1489 } 1503 }
1490 unlock: 1504 unlock:
1491 spin_unlock_irqrestore(&bitmap->lock, flags); 1505 spin_unlock_irqrestore(&bitmap->lock, flags);
1492 bitmap->allclean = 0; 1506 bitmap->allclean = 0;
1493} 1507}
1508EXPORT_SYMBOL(bitmap_end_sync);
1494 1509
1495void bitmap_close_sync(struct bitmap *bitmap) 1510void bitmap_close_sync(struct bitmap *bitmap)
1496{ 1511{
@@ -1507,6 +1522,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
1507 sector += blocks; 1522 sector += blocks;
1508 } 1523 }
1509} 1524}
1525EXPORT_SYMBOL(bitmap_close_sync);
1510 1526
1511void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1527void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1512{ 1528{
@@ -1526,7 +1542,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1526 atomic_read(&bitmap->mddev->recovery_active) == 0); 1542 atomic_read(&bitmap->mddev->recovery_active) == 0);
1527 1543
1528 bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; 1544 bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
1529 set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); 1545 if (bitmap->mddev->persistent)
1546 set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1530 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1547 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
1531 s = 0; 1548 s = 0;
1532 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1549 while (s < sector && s < bitmap->mddev->resync_max_sectors) {
@@ -1536,6 +1553,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1536 bitmap->last_end_sync = jiffies; 1553 bitmap->last_end_sync = jiffies;
1537 sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); 1554 sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1538} 1555}
1556EXPORT_SYMBOL(bitmap_cond_end_sync);
1539 1557
1540static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1558static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1541{ 1559{
@@ -1552,9 +1570,9 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
1552 spin_unlock_irq(&bitmap->lock); 1570 spin_unlock_irq(&bitmap->lock);
1553 return; 1571 return;
1554 } 1572 }
1555 if (! *bmc) { 1573 if (!*bmc) {
1556 struct page *page; 1574 struct page *page;
1557 *bmc = 1 | (needed?NEEDED_MASK:0); 1575 *bmc = 1 | (needed ? NEEDED_MASK : 0);
1558 bitmap_count_page(bitmap, offset, 1); 1576 bitmap_count_page(bitmap, offset, 1);
1559 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); 1577 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1560 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1578 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
@@ -1663,15 +1681,17 @@ int bitmap_create(mddev_t *mddev)
1663 unsigned long pages; 1681 unsigned long pages;
1664 struct file *file = mddev->bitmap_info.file; 1682 struct file *file = mddev->bitmap_info.file;
1665 int err; 1683 int err;
1666 sector_t start; 1684 struct sysfs_dirent *bm = NULL;
1667 struct sysfs_dirent *bm;
1668 1685
1669 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1686 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1670 1687
1671 if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ 1688 if (!file
1689 && !mddev->bitmap_info.offset
1690 && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */
1672 return 0; 1691 return 0;
1673 1692
1674 BUG_ON(file && mddev->bitmap_info.offset); 1693 BUG_ON(file && mddev->bitmap_info.offset);
1694 BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log);
1675 1695
1676 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1696 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1677 if (!bitmap) 1697 if (!bitmap)
@@ -1685,7 +1705,8 @@ int bitmap_create(mddev_t *mddev)
1685 1705
1686 bitmap->mddev = mddev; 1706 bitmap->mddev = mddev;
1687 1707
1688 bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); 1708 if (mddev->kobj.sd)
1709 bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
1689 if (bm) { 1710 if (bm) {
1690 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); 1711 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
1691 sysfs_put(bm); 1712 sysfs_put(bm);
@@ -1719,9 +1740,9 @@ int bitmap_create(mddev_t *mddev)
1719 bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); 1740 bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
1720 1741
1721 /* now that chunksize and chunkshift are set, we can use these macros */ 1742 /* now that chunksize and chunkshift are set, we can use these macros */
1722 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> 1743 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
1723 CHUNK_BLOCK_SHIFT(bitmap); 1744 CHUNK_BLOCK_SHIFT(bitmap);
1724 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1745 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1725 1746
1726 BUG_ON(!pages); 1747 BUG_ON(!pages);
1727 1748
@@ -1741,27 +1762,11 @@ int bitmap_create(mddev_t *mddev)
1741 if (!bitmap->bp) 1762 if (!bitmap->bp)
1742 goto error; 1763 goto error;
1743 1764
1744 /* now that we have some pages available, initialize the in-memory
1745 * bitmap from the on-disk bitmap */
1746 start = 0;
1747 if (mddev->degraded == 0
1748 || bitmap->events_cleared == mddev->events)
1749 /* no need to keep dirty bits to optimise a re-add of a missing device */
1750 start = mddev->recovery_cp;
1751 err = bitmap_init_from_disk(bitmap, start);
1752
1753 if (err)
1754 goto error;
1755
1756 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", 1765 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1757 pages, bmname(bitmap)); 1766 pages, bmname(bitmap));
1758 1767
1759 mddev->bitmap = bitmap; 1768 mddev->bitmap = bitmap;
1760 1769
1761 mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1762 md_wakeup_thread(mddev->thread);
1763
1764 bitmap_update_sb(bitmap);
1765 1770
1766 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; 1771 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
1767 1772
@@ -1770,15 +1775,69 @@ int bitmap_create(mddev_t *mddev)
1770 return err; 1775 return err;
1771} 1776}
1772 1777
1778int bitmap_load(mddev_t *mddev)
1779{
1780 int err = 0;
1781 sector_t sector = 0;
1782 struct bitmap *bitmap = mddev->bitmap;
1783
1784 if (!bitmap)
1785 goto out;
1786
1787 /* Clear out old bitmap info first: Either there is none, or we
1788 * are resuming after someone else has possibly changed things,
1789 * so we should forget old cached info.
1790 * All chunks should be clean, but some might need_sync.
1791 */
1792 while (sector < mddev->resync_max_sectors) {
1793 int blocks;
1794 bitmap_start_sync(bitmap, sector, &blocks, 0);
1795 sector += blocks;
1796 }
1797 bitmap_close_sync(bitmap);
1798
1799 if (mddev->bitmap_info.log) {
1800 unsigned long i;
1801 struct dm_dirty_log *log = mddev->bitmap_info.log;
1802 for (i = 0; i < bitmap->chunks; i++)
1803 if (!log->type->in_sync(log, i, 1))
1804 bitmap_set_memory_bits(bitmap,
1805 (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
1806 1);
1807 } else {
1808 sector_t start = 0;
1809 if (mddev->degraded == 0
1810 || bitmap->events_cleared == mddev->events)
1811 /* no need to keep dirty bits to optimise a
1812 * re-add of a missing device */
1813 start = mddev->recovery_cp;
1814
1815 err = bitmap_init_from_disk(bitmap, start);
1816 }
1817 if (err)
1818 goto out;
1819
1820 mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1821 md_wakeup_thread(mddev->thread);
1822
1823 bitmap_update_sb(bitmap);
1824
1825 if (bitmap->flags & BITMAP_WRITE_ERROR)
1826 err = -EIO;
1827out:
1828 return err;
1829}
1830EXPORT_SYMBOL_GPL(bitmap_load);
1831
1773static ssize_t 1832static ssize_t
1774location_show(mddev_t *mddev, char *page) 1833location_show(mddev_t *mddev, char *page)
1775{ 1834{
1776 ssize_t len; 1835 ssize_t len;
1777 if (mddev->bitmap_info.file) { 1836 if (mddev->bitmap_info.file)
1778 len = sprintf(page, "file"); 1837 len = sprintf(page, "file");
1779 } else if (mddev->bitmap_info.offset) { 1838 else if (mddev->bitmap_info.offset)
1780 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 1839 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
1781 } else 1840 else
1782 len = sprintf(page, "none"); 1841 len = sprintf(page, "none");
1783 len += sprintf(page+len, "\n"); 1842 len += sprintf(page+len, "\n");
1784 return len; 1843 return len;
@@ -1867,7 +1926,7 @@ timeout_show(mddev_t *mddev, char *page)
1867 ssize_t len; 1926 ssize_t len;
1868 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 1927 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
1869 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 1928 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
1870 1929
1871 len = sprintf(page, "%lu", secs); 1930 len = sprintf(page, "%lu", secs);
1872 if (jifs) 1931 if (jifs)
1873 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 1932 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
@@ -2049,12 +2108,3 @@ struct attribute_group md_bitmap_group = {
2049 .attrs = md_bitmap_attrs, 2108 .attrs = md_bitmap_attrs,
2050}; 2109};
2051 2110
2052
2053/* the bitmap API -- for raid personalities */
2054EXPORT_SYMBOL(bitmap_startwrite);
2055EXPORT_SYMBOL(bitmap_endwrite);
2056EXPORT_SYMBOL(bitmap_start_sync);
2057EXPORT_SYMBOL(bitmap_end_sync);
2058EXPORT_SYMBOL(bitmap_unplug);
2059EXPORT_SYMBOL(bitmap_close_sync);
2060EXPORT_SYMBOL(bitmap_cond_end_sync);
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 3797dea4723a..e872a7bad6b8 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -222,6 +222,10 @@ struct bitmap {
222 unsigned long file_pages; /* number of pages in the file */ 222 unsigned long file_pages; /* number of pages in the file */
223 int last_page_size; /* bytes in the last page */ 223 int last_page_size; /* bytes in the last page */
224 224
225 unsigned long logattrs; /* used when filemap_attr doesn't exist
226 * because we are working with a dirty_log
227 */
228
225 unsigned long flags; 229 unsigned long flags;
226 230
227 int allclean; 231 int allclean;
@@ -243,12 +247,14 @@ struct bitmap {
243 wait_queue_head_t behind_wait; 247 wait_queue_head_t behind_wait;
244 248
245 struct sysfs_dirent *sysfs_can_clear; 249 struct sysfs_dirent *sysfs_can_clear;
250
246}; 251};
247 252
248/* the bitmap API */ 253/* the bitmap API */
249 254
250/* these are used only by md/bitmap */ 255/* these are used only by md/bitmap */
251int bitmap_create(mddev_t *mddev); 256int bitmap_create(mddev_t *mddev);
257int bitmap_load(mddev_t *mddev);
252void bitmap_flush(mddev_t *mddev); 258void bitmap_flush(mddev_t *mddev);
253void bitmap_destroy(mddev_t *mddev); 259void bitmap_destroy(mddev_t *mddev);
254 260
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 700c96edf9b2..11567c7999a2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -262,7 +262,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
262 * Once ->stop is called and completes, the module will be completely 262 * Once ->stop is called and completes, the module will be completely
263 * unused. 263 * unused.
264 */ 264 */
265static void mddev_suspend(mddev_t *mddev) 265void mddev_suspend(mddev_t *mddev)
266{ 266{
267 BUG_ON(mddev->suspended); 267 BUG_ON(mddev->suspended);
268 mddev->suspended = 1; 268 mddev->suspended = 1;
@@ -270,13 +270,15 @@ static void mddev_suspend(mddev_t *mddev)
270 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); 270 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
271 mddev->pers->quiesce(mddev, 1); 271 mddev->pers->quiesce(mddev, 1);
272} 272}
273EXPORT_SYMBOL_GPL(mddev_suspend);
273 274
274static void mddev_resume(mddev_t *mddev) 275void mddev_resume(mddev_t *mddev)
275{ 276{
276 mddev->suspended = 0; 277 mddev->suspended = 0;
277 wake_up(&mddev->sb_wait); 278 wake_up(&mddev->sb_wait);
278 mddev->pers->quiesce(mddev, 0); 279 mddev->pers->quiesce(mddev, 0);
279} 280}
281EXPORT_SYMBOL_GPL(mddev_resume);
280 282
281int mddev_congested(mddev_t *mddev, int bits) 283int mddev_congested(mddev_t *mddev, int bits)
282{ 284{
@@ -385,6 +387,51 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio)
385} 387}
386EXPORT_SYMBOL(md_barrier_request); 388EXPORT_SYMBOL(md_barrier_request);
387 389
390/* Support for plugging.
391 * This mirrors the plugging support in request_queue, but does not
392 * require having a whole queue
393 */
394static void plugger_work(struct work_struct *work)
395{
396 struct plug_handle *plug =
397 container_of(work, struct plug_handle, unplug_work);
398 plug->unplug_fn(plug);
399}
400static void plugger_timeout(unsigned long data)
401{
402 struct plug_handle *plug = (void *)data;
403 kblockd_schedule_work(NULL, &plug->unplug_work);
404}
405void plugger_init(struct plug_handle *plug,
406 void (*unplug_fn)(struct plug_handle *))
407{
408 plug->unplug_flag = 0;
409 plug->unplug_fn = unplug_fn;
410 init_timer(&plug->unplug_timer);
411 plug->unplug_timer.function = plugger_timeout;
412 plug->unplug_timer.data = (unsigned long)plug;
413 INIT_WORK(&plug->unplug_work, plugger_work);
414}
415EXPORT_SYMBOL_GPL(plugger_init);
416
417void plugger_set_plug(struct plug_handle *plug)
418{
419 if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
420 mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
421}
422EXPORT_SYMBOL_GPL(plugger_set_plug);
423
424int plugger_remove_plug(struct plug_handle *plug)
425{
426 if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
427 del_timer(&plug->unplug_timer);
428 return 1;
429 } else
430 return 0;
431}
432EXPORT_SYMBOL_GPL(plugger_remove_plug);
433
434
388static inline mddev_t *mddev_get(mddev_t *mddev) 435static inline mddev_t *mddev_get(mddev_t *mddev)
389{ 436{
390 atomic_inc(&mddev->active); 437 atomic_inc(&mddev->active);
@@ -417,7 +464,7 @@ static void mddev_put(mddev_t *mddev)
417 spin_unlock(&all_mddevs_lock); 464 spin_unlock(&all_mddevs_lock);
418} 465}
419 466
420static void mddev_init(mddev_t *mddev) 467void mddev_init(mddev_t *mddev)
421{ 468{
422 mutex_init(&mddev->open_mutex); 469 mutex_init(&mddev->open_mutex);
423 mutex_init(&mddev->reconfig_mutex); 470 mutex_init(&mddev->reconfig_mutex);
@@ -437,6 +484,7 @@ static void mddev_init(mddev_t *mddev)
437 mddev->resync_max = MaxSector; 484 mddev->resync_max = MaxSector;
438 mddev->level = LEVEL_NONE; 485 mddev->level = LEVEL_NONE;
439} 486}
487EXPORT_SYMBOL_GPL(mddev_init);
440 488
441static mddev_t * mddev_find(dev_t unit) 489static mddev_t * mddev_find(dev_t unit)
442{ 490{
@@ -533,25 +581,31 @@ static void mddev_unlock(mddev_t * mddev)
533 * an access to the files will try to take reconfig_mutex 581 * an access to the files will try to take reconfig_mutex
534 * while holding the file unremovable, which leads to 582 * while holding the file unremovable, which leads to
535 * a deadlock. 583 * a deadlock.
536 * So hold open_mutex instead - we are allowed to take 584 * So hold set sysfs_active while the remove in happeing,
537 * it while holding reconfig_mutex, and md_run can 585 * and anything else which might set ->to_remove or my
538 * use it to wait for the remove to complete. 586 * otherwise change the sysfs namespace will fail with
587 * -EBUSY if sysfs_active is still set.
588 * We set sysfs_active under reconfig_mutex and elsewhere
589 * test it under the same mutex to ensure its correct value
590 * is seen.
539 */ 591 */
540 struct attribute_group *to_remove = mddev->to_remove; 592 struct attribute_group *to_remove = mddev->to_remove;
541 mddev->to_remove = NULL; 593 mddev->to_remove = NULL;
542 mutex_lock(&mddev->open_mutex); 594 mddev->sysfs_active = 1;
543 mutex_unlock(&mddev->reconfig_mutex); 595 mutex_unlock(&mddev->reconfig_mutex);
544 596
545 if (to_remove != &md_redundancy_group) 597 if (mddev->kobj.sd) {
546 sysfs_remove_group(&mddev->kobj, to_remove); 598 if (to_remove != &md_redundancy_group)
547 if (mddev->pers == NULL || 599 sysfs_remove_group(&mddev->kobj, to_remove);
548 mddev->pers->sync_request == NULL) { 600 if (mddev->pers == NULL ||
549 sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 601 mddev->pers->sync_request == NULL) {
550 if (mddev->sysfs_action) 602 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
551 sysfs_put(mddev->sysfs_action); 603 if (mddev->sysfs_action)
552 mddev->sysfs_action = NULL; 604 sysfs_put(mddev->sysfs_action);
605 mddev->sysfs_action = NULL;
606 }
553 } 607 }
554 mutex_unlock(&mddev->open_mutex); 608 mddev->sysfs_active = 0;
555 } else 609 } else
556 mutex_unlock(&mddev->reconfig_mutex); 610 mutex_unlock(&mddev->reconfig_mutex);
557 611
@@ -1812,11 +1866,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1812 goto fail; 1866 goto fail;
1813 1867
1814 ko = &part_to_dev(rdev->bdev->bd_part)->kobj; 1868 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1815 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { 1869 if (sysfs_create_link(&rdev->kobj, ko, "block"))
1816 kobject_del(&rdev->kobj); 1870 /* failure here is OK */;
1817 goto fail; 1871 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
1818 }
1819 rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state");
1820 1872
1821 list_add_rcu(&rdev->same_set, &mddev->disks); 1873 list_add_rcu(&rdev->same_set, &mddev->disks);
1822 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1874 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
@@ -2335,8 +2387,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2335 set_bit(In_sync, &rdev->flags); 2387 set_bit(In_sync, &rdev->flags);
2336 err = 0; 2388 err = 0;
2337 } 2389 }
2338 if (!err && rdev->sysfs_state) 2390 if (!err)
2339 sysfs_notify_dirent(rdev->sysfs_state); 2391 sysfs_notify_dirent_safe(rdev->sysfs_state);
2340 return err ? err : len; 2392 return err ? err : len;
2341} 2393}
2342static struct rdev_sysfs_entry rdev_state = 2394static struct rdev_sysfs_entry rdev_state =
@@ -2431,14 +2483,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2431 rdev->raid_disk = -1; 2483 rdev->raid_disk = -1;
2432 return err; 2484 return err;
2433 } else 2485 } else
2434 sysfs_notify_dirent(rdev->sysfs_state); 2486 sysfs_notify_dirent_safe(rdev->sysfs_state);
2435 sprintf(nm, "rd%d", rdev->raid_disk); 2487 sprintf(nm, "rd%d", rdev->raid_disk);
2436 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) 2488 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2437 printk(KERN_WARNING 2489 /* failure here is OK */;
2438 "md: cannot register "
2439 "%s for %s\n",
2440 nm, mdname(rdev->mddev));
2441
2442 /* don't wakeup anyone, leave that to userspace. */ 2490 /* don't wakeup anyone, leave that to userspace. */
2443 } else { 2491 } else {
2444 if (slot >= rdev->mddev->raid_disks) 2492 if (slot >= rdev->mddev->raid_disks)
@@ -2448,7 +2496,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2448 clear_bit(Faulty, &rdev->flags); 2496 clear_bit(Faulty, &rdev->flags);
2449 clear_bit(WriteMostly, &rdev->flags); 2497 clear_bit(WriteMostly, &rdev->flags);
2450 set_bit(In_sync, &rdev->flags); 2498 set_bit(In_sync, &rdev->flags);
2451 sysfs_notify_dirent(rdev->sysfs_state); 2499 sysfs_notify_dirent_safe(rdev->sysfs_state);
2452 } 2500 }
2453 return len; 2501 return len;
2454} 2502}
@@ -2696,6 +2744,24 @@ static struct kobj_type rdev_ktype = {
2696 .default_attrs = rdev_default_attrs, 2744 .default_attrs = rdev_default_attrs,
2697}; 2745};
2698 2746
2747void md_rdev_init(mdk_rdev_t *rdev)
2748{
2749 rdev->desc_nr = -1;
2750 rdev->saved_raid_disk = -1;
2751 rdev->raid_disk = -1;
2752 rdev->flags = 0;
2753 rdev->data_offset = 0;
2754 rdev->sb_events = 0;
2755 rdev->last_read_error.tv_sec = 0;
2756 rdev->last_read_error.tv_nsec = 0;
2757 atomic_set(&rdev->nr_pending, 0);
2758 atomic_set(&rdev->read_errors, 0);
2759 atomic_set(&rdev->corrected_errors, 0);
2760
2761 INIT_LIST_HEAD(&rdev->same_set);
2762 init_waitqueue_head(&rdev->blocked_wait);
2763}
2764EXPORT_SYMBOL_GPL(md_rdev_init);
2699/* 2765/*
2700 * Import a device. If 'super_format' >= 0, then sanity check the superblock 2766 * Import a device. If 'super_format' >= 0, then sanity check the superblock
2701 * 2767 *
@@ -2719,6 +2785,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2719 return ERR_PTR(-ENOMEM); 2785 return ERR_PTR(-ENOMEM);
2720 } 2786 }
2721 2787
2788 md_rdev_init(rdev);
2722 if ((err = alloc_disk_sb(rdev))) 2789 if ((err = alloc_disk_sb(rdev)))
2723 goto abort_free; 2790 goto abort_free;
2724 2791
@@ -2728,18 +2795,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2728 2795
2729 kobject_init(&rdev->kobj, &rdev_ktype); 2796 kobject_init(&rdev->kobj, &rdev_ktype);
2730 2797
2731 rdev->desc_nr = -1;
2732 rdev->saved_raid_disk = -1;
2733 rdev->raid_disk = -1;
2734 rdev->flags = 0;
2735 rdev->data_offset = 0;
2736 rdev->sb_events = 0;
2737 rdev->last_read_error.tv_sec = 0;
2738 rdev->last_read_error.tv_nsec = 0;
2739 atomic_set(&rdev->nr_pending, 0);
2740 atomic_set(&rdev->read_errors, 0);
2741 atomic_set(&rdev->corrected_errors, 0);
2742
2743 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; 2798 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2744 if (!size) { 2799 if (!size) {
2745 printk(KERN_WARNING 2800 printk(KERN_WARNING
@@ -2768,9 +2823,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2768 } 2823 }
2769 } 2824 }
2770 2825
2771 INIT_LIST_HEAD(&rdev->same_set);
2772 init_waitqueue_head(&rdev->blocked_wait);
2773
2774 return rdev; 2826 return rdev;
2775 2827
2776abort_free: 2828abort_free:
@@ -2961,7 +3013,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2961 * - new personality will access other array. 3013 * - new personality will access other array.
2962 */ 3014 */
2963 3015
2964 if (mddev->sync_thread || mddev->reshape_position != MaxSector) 3016 if (mddev->sync_thread ||
3017 mddev->reshape_position != MaxSector ||
3018 mddev->sysfs_active)
2965 return -EBUSY; 3019 return -EBUSY;
2966 3020
2967 if (!mddev->pers->quiesce) { 3021 if (!mddev->pers->quiesce) {
@@ -3438,7 +3492,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
3438 if (err) 3492 if (err)
3439 return err; 3493 return err;
3440 else { 3494 else {
3441 sysfs_notify_dirent(mddev->sysfs_state); 3495 sysfs_notify_dirent_safe(mddev->sysfs_state);
3442 return len; 3496 return len;
3443 } 3497 }
3444} 3498}
@@ -3736,7 +3790,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3736 } 3790 }
3737 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3791 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3738 md_wakeup_thread(mddev->thread); 3792 md_wakeup_thread(mddev->thread);
3739 sysfs_notify_dirent(mddev->sysfs_action); 3793 sysfs_notify_dirent_safe(mddev->sysfs_action);
3740 return len; 3794 return len;
3741} 3795}
3742 3796
@@ -4282,13 +4336,14 @@ static int md_alloc(dev_t dev, char *name)
4282 disk->disk_name); 4336 disk->disk_name);
4283 error = 0; 4337 error = 0;
4284 } 4338 }
4285 if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4339 if (mddev->kobj.sd &&
4340 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4286 printk(KERN_DEBUG "pointless warning\n"); 4341 printk(KERN_DEBUG "pointless warning\n");
4287 abort: 4342 abort:
4288 mutex_unlock(&disks_mutex); 4343 mutex_unlock(&disks_mutex);
4289 if (!error) { 4344 if (!error && mddev->kobj.sd) {
4290 kobject_uevent(&mddev->kobj, KOBJ_ADD); 4345 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4291 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); 4346 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4292 } 4347 }
4293 mddev_put(mddev); 4348 mddev_put(mddev);
4294 return error; 4349 return error;
@@ -4326,14 +4381,14 @@ static void md_safemode_timeout(unsigned long data)
4326 if (!atomic_read(&mddev->writes_pending)) { 4381 if (!atomic_read(&mddev->writes_pending)) {
4327 mddev->safemode = 1; 4382 mddev->safemode = 1;
4328 if (mddev->external) 4383 if (mddev->external)
4329 sysfs_notify_dirent(mddev->sysfs_state); 4384 sysfs_notify_dirent_safe(mddev->sysfs_state);
4330 } 4385 }
4331 md_wakeup_thread(mddev->thread); 4386 md_wakeup_thread(mddev->thread);
4332} 4387}
4333 4388
4334static int start_dirty_degraded; 4389static int start_dirty_degraded;
4335 4390
4336static int md_run(mddev_t *mddev) 4391int md_run(mddev_t *mddev)
4337{ 4392{
4338 int err; 4393 int err;
4339 mdk_rdev_t *rdev; 4394 mdk_rdev_t *rdev;
@@ -4345,13 +4400,9 @@ static int md_run(mddev_t *mddev)
4345 4400
4346 if (mddev->pers) 4401 if (mddev->pers)
4347 return -EBUSY; 4402 return -EBUSY;
4348 4403 /* Cannot run until previous stop completes properly */
4349 /* These two calls synchronise us with the 4404 if (mddev->sysfs_active)
4350 * sysfs_remove_group calls in mddev_unlock, 4405 return -EBUSY;
4351 * so they must have completed.
4352 */
4353 mutex_lock(&mddev->open_mutex);
4354 mutex_unlock(&mddev->open_mutex);
4355 4406
4356 /* 4407 /*
4357 * Analyze all RAID superblock(s) 4408 * Analyze all RAID superblock(s)
@@ -4398,7 +4449,7 @@ static int md_run(mddev_t *mddev)
4398 return -EINVAL; 4449 return -EINVAL;
4399 } 4450 }
4400 } 4451 }
4401 sysfs_notify_dirent(rdev->sysfs_state); 4452 sysfs_notify_dirent_safe(rdev->sysfs_state);
4402 } 4453 }
4403 4454
4404 spin_lock(&pers_lock); 4455 spin_lock(&pers_lock);
@@ -4497,11 +4548,12 @@ static int md_run(mddev_t *mddev)
4497 return err; 4548 return err;
4498 } 4549 }
4499 if (mddev->pers->sync_request) { 4550 if (mddev->pers->sync_request) {
4500 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 4551 if (mddev->kobj.sd &&
4552 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4501 printk(KERN_WARNING 4553 printk(KERN_WARNING
4502 "md: cannot register extra attributes for %s\n", 4554 "md: cannot register extra attributes for %s\n",
4503 mdname(mddev)); 4555 mdname(mddev));
4504 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); 4556 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
4505 } else if (mddev->ro == 2) /* auto-readonly not meaningful */ 4557 } else if (mddev->ro == 2) /* auto-readonly not meaningful */
4506 mddev->ro = 0; 4558 mddev->ro = 0;
4507 4559
@@ -4519,8 +4571,7 @@ static int md_run(mddev_t *mddev)
4519 char nm[20]; 4571 char nm[20];
4520 sprintf(nm, "rd%d", rdev->raid_disk); 4572 sprintf(nm, "rd%d", rdev->raid_disk);
4521 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) 4573 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
4522 printk("md: cannot register %s for %s\n", 4574 /* failure here is OK */;
4523 nm, mdname(mddev));
4524 } 4575 }
4525 4576
4526 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4577 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -4532,12 +4583,12 @@ static int md_run(mddev_t *mddev)
4532 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4583 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4533 4584
4534 md_new_event(mddev); 4585 md_new_event(mddev);
4535 sysfs_notify_dirent(mddev->sysfs_state); 4586 sysfs_notify_dirent_safe(mddev->sysfs_state);
4536 if (mddev->sysfs_action) 4587 sysfs_notify_dirent_safe(mddev->sysfs_action);
4537 sysfs_notify_dirent(mddev->sysfs_action);
4538 sysfs_notify(&mddev->kobj, NULL, "degraded"); 4588 sysfs_notify(&mddev->kobj, NULL, "degraded");
4539 return 0; 4589 return 0;
4540} 4590}
4591EXPORT_SYMBOL_GPL(md_run);
4541 4592
4542static int do_md_run(mddev_t *mddev) 4593static int do_md_run(mddev_t *mddev)
4543{ 4594{
@@ -4546,7 +4597,11 @@ static int do_md_run(mddev_t *mddev)
4546 err = md_run(mddev); 4597 err = md_run(mddev);
4547 if (err) 4598 if (err)
4548 goto out; 4599 goto out;
4549 4600 err = bitmap_load(mddev);
4601 if (err) {
4602 bitmap_destroy(mddev);
4603 goto out;
4604 }
4550 set_capacity(mddev->gendisk, mddev->array_sectors); 4605 set_capacity(mddev->gendisk, mddev->array_sectors);
4551 revalidate_disk(mddev->gendisk); 4606 revalidate_disk(mddev->gendisk);
4552 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4607 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -4574,7 +4629,7 @@ static int restart_array(mddev_t *mddev)
4574 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4629 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4575 md_wakeup_thread(mddev->thread); 4630 md_wakeup_thread(mddev->thread);
4576 md_wakeup_thread(mddev->sync_thread); 4631 md_wakeup_thread(mddev->sync_thread);
4577 sysfs_notify_dirent(mddev->sysfs_state); 4632 sysfs_notify_dirent_safe(mddev->sysfs_state);
4578 return 0; 4633 return 0;
4579} 4634}
4580 4635
@@ -4645,9 +4700,10 @@ static void md_clean(mddev_t *mddev)
4645 mddev->bitmap_info.chunksize = 0; 4700 mddev->bitmap_info.chunksize = 0;
4646 mddev->bitmap_info.daemon_sleep = 0; 4701 mddev->bitmap_info.daemon_sleep = 0;
4647 mddev->bitmap_info.max_write_behind = 0; 4702 mddev->bitmap_info.max_write_behind = 0;
4703 mddev->plug = NULL;
4648} 4704}
4649 4705
4650static void md_stop_writes(mddev_t *mddev) 4706void md_stop_writes(mddev_t *mddev)
4651{ 4707{
4652 if (mddev->sync_thread) { 4708 if (mddev->sync_thread) {
4653 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4709 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4667,11 +4723,10 @@ static void md_stop_writes(mddev_t *mddev)
4667 md_update_sb(mddev, 1); 4723 md_update_sb(mddev, 1);
4668 } 4724 }
4669} 4725}
4726EXPORT_SYMBOL_GPL(md_stop_writes);
4670 4727
4671static void md_stop(mddev_t *mddev) 4728void md_stop(mddev_t *mddev)
4672{ 4729{
4673 md_stop_writes(mddev);
4674
4675 mddev->pers->stop(mddev); 4730 mddev->pers->stop(mddev);
4676 if (mddev->pers->sync_request && mddev->to_remove == NULL) 4731 if (mddev->pers->sync_request && mddev->to_remove == NULL)
4677 mddev->to_remove = &md_redundancy_group; 4732 mddev->to_remove = &md_redundancy_group;
@@ -4679,6 +4734,7 @@ static void md_stop(mddev_t *mddev)
4679 mddev->pers = NULL; 4734 mddev->pers = NULL;
4680 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4735 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4681} 4736}
4737EXPORT_SYMBOL_GPL(md_stop);
4682 4738
4683static int md_set_readonly(mddev_t *mddev, int is_open) 4739static int md_set_readonly(mddev_t *mddev, int is_open)
4684{ 4740{
@@ -4698,7 +4754,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
4698 mddev->ro = 1; 4754 mddev->ro = 1;
4699 set_disk_ro(mddev->gendisk, 1); 4755 set_disk_ro(mddev->gendisk, 1);
4700 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4756 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4701 sysfs_notify_dirent(mddev->sysfs_state); 4757 sysfs_notify_dirent_safe(mddev->sysfs_state);
4702 err = 0; 4758 err = 0;
4703 } 4759 }
4704out: 4760out:
@@ -4712,26 +4768,29 @@ out:
4712 */ 4768 */
4713static int do_md_stop(mddev_t * mddev, int mode, int is_open) 4769static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4714{ 4770{
4715 int err = 0;
4716 struct gendisk *disk = mddev->gendisk; 4771 struct gendisk *disk = mddev->gendisk;
4717 mdk_rdev_t *rdev; 4772 mdk_rdev_t *rdev;
4718 4773
4719 mutex_lock(&mddev->open_mutex); 4774 mutex_lock(&mddev->open_mutex);
4720 if (atomic_read(&mddev->openers) > is_open) { 4775 if (atomic_read(&mddev->openers) > is_open ||
4776 mddev->sysfs_active) {
4721 printk("md: %s still in use.\n",mdname(mddev)); 4777 printk("md: %s still in use.\n",mdname(mddev));
4722 err = -EBUSY; 4778 mutex_unlock(&mddev->open_mutex);
4723 } else if (mddev->pers) { 4779 return -EBUSY;
4780 }
4724 4781
4782 if (mddev->pers) {
4725 if (mddev->ro) 4783 if (mddev->ro)
4726 set_disk_ro(disk, 0); 4784 set_disk_ro(disk, 0);
4727 4785
4786 md_stop_writes(mddev);
4728 md_stop(mddev); 4787 md_stop(mddev);
4729 mddev->queue->merge_bvec_fn = NULL; 4788 mddev->queue->merge_bvec_fn = NULL;
4730 mddev->queue->unplug_fn = NULL; 4789 mddev->queue->unplug_fn = NULL;
4731 mddev->queue->backing_dev_info.congested_fn = NULL; 4790 mddev->queue->backing_dev_info.congested_fn = NULL;
4732 4791
4733 /* tell userspace to handle 'inactive' */ 4792 /* tell userspace to handle 'inactive' */
4734 sysfs_notify_dirent(mddev->sysfs_state); 4793 sysfs_notify_dirent_safe(mddev->sysfs_state);
4735 4794
4736 list_for_each_entry(rdev, &mddev->disks, same_set) 4795 list_for_each_entry(rdev, &mddev->disks, same_set)
4737 if (rdev->raid_disk >= 0) { 4796 if (rdev->raid_disk >= 0) {
@@ -4741,21 +4800,17 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4741 } 4800 }
4742 4801
4743 set_capacity(disk, 0); 4802 set_capacity(disk, 0);
4803 mutex_unlock(&mddev->open_mutex);
4744 revalidate_disk(disk); 4804 revalidate_disk(disk);
4745 4805
4746 if (mddev->ro) 4806 if (mddev->ro)
4747 mddev->ro = 0; 4807 mddev->ro = 0;
4748 4808 } else
4749 err = 0; 4809 mutex_unlock(&mddev->open_mutex);
4750 }
4751 mutex_unlock(&mddev->open_mutex);
4752 if (err)
4753 return err;
4754 /* 4810 /*
4755 * Free resources if final stop 4811 * Free resources if final stop
4756 */ 4812 */
4757 if (mode == 0) { 4813 if (mode == 0) {
4758
4759 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4814 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4760 4815
4761 bitmap_destroy(mddev); 4816 bitmap_destroy(mddev);
@@ -4772,13 +4827,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4772 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4827 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4773 if (mddev->hold_active == UNTIL_STOP) 4828 if (mddev->hold_active == UNTIL_STOP)
4774 mddev->hold_active = 0; 4829 mddev->hold_active = 0;
4775
4776 } 4830 }
4777 err = 0;
4778 blk_integrity_unregister(disk); 4831 blk_integrity_unregister(disk);
4779 md_new_event(mddev); 4832 md_new_event(mddev);
4780 sysfs_notify_dirent(mddev->sysfs_state); 4833 sysfs_notify_dirent_safe(mddev->sysfs_state);
4781 return err; 4834 return 0;
4782} 4835}
4783 4836
4784#ifndef MODULE 4837#ifndef MODULE
@@ -5139,7 +5192,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5139 if (err) 5192 if (err)
5140 export_rdev(rdev); 5193 export_rdev(rdev);
5141 else 5194 else
5142 sysfs_notify_dirent(rdev->sysfs_state); 5195 sysfs_notify_dirent_safe(rdev->sysfs_state);
5143 5196
5144 md_update_sb(mddev, 1); 5197 md_update_sb(mddev, 1);
5145 if (mddev->degraded) 5198 if (mddev->degraded)
@@ -5332,8 +5385,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
5332 err = 0; 5385 err = 0;
5333 if (mddev->pers) { 5386 if (mddev->pers) {
5334 mddev->pers->quiesce(mddev, 1); 5387 mddev->pers->quiesce(mddev, 1);
5335 if (fd >= 0) 5388 if (fd >= 0) {
5336 err = bitmap_create(mddev); 5389 err = bitmap_create(mddev);
5390 if (!err)
5391 err = bitmap_load(mddev);
5392 }
5337 if (fd < 0 || err) { 5393 if (fd < 0 || err) {
5338 bitmap_destroy(mddev); 5394 bitmap_destroy(mddev);
5339 fd = -1; /* make sure to put the file */ 5395 fd = -1; /* make sure to put the file */
@@ -5582,6 +5638,8 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5582 mddev->bitmap_info.default_offset; 5638 mddev->bitmap_info.default_offset;
5583 mddev->pers->quiesce(mddev, 1); 5639 mddev->pers->quiesce(mddev, 1);
5584 rv = bitmap_create(mddev); 5640 rv = bitmap_create(mddev);
5641 if (!rv)
5642 rv = bitmap_load(mddev);
5585 if (rv) 5643 if (rv)
5586 bitmap_destroy(mddev); 5644 bitmap_destroy(mddev);
5587 mddev->pers->quiesce(mddev, 0); 5645 mddev->pers->quiesce(mddev, 0);
@@ -5814,7 +5872,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
5814 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { 5872 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
5815 if (mddev->ro == 2) { 5873 if (mddev->ro == 2) {
5816 mddev->ro = 0; 5874 mddev->ro = 0;
5817 sysfs_notify_dirent(mddev->sysfs_state); 5875 sysfs_notify_dirent_safe(mddev->sysfs_state);
5818 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5876 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5819 md_wakeup_thread(mddev->thread); 5877 md_wakeup_thread(mddev->thread);
5820 } else { 5878 } else {
@@ -6065,10 +6123,12 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
6065 mddev->pers->error_handler(mddev,rdev); 6123 mddev->pers->error_handler(mddev,rdev);
6066 if (mddev->degraded) 6124 if (mddev->degraded)
6067 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 6125 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6068 sysfs_notify_dirent(rdev->sysfs_state); 6126 sysfs_notify_dirent_safe(rdev->sysfs_state);
6069 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6127 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6070 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6128 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6071 md_wakeup_thread(mddev->thread); 6129 md_wakeup_thread(mddev->thread);
6130 if (mddev->event_work.func)
6131 schedule_work(&mddev->event_work);
6072 md_new_event_inintr(mddev); 6132 md_new_event_inintr(mddev);
6073} 6133}
6074 6134
@@ -6526,7 +6586,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
6526 spin_unlock_irq(&mddev->write_lock); 6586 spin_unlock_irq(&mddev->write_lock);
6527 } 6587 }
6528 if (did_change) 6588 if (did_change)
6529 sysfs_notify_dirent(mddev->sysfs_state); 6589 sysfs_notify_dirent_safe(mddev->sysfs_state);
6530 wait_event(mddev->sb_wait, 6590 wait_event(mddev->sb_wait,
6531 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && 6591 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
6532 !test_bit(MD_CHANGE_PENDING, &mddev->flags)); 6592 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -6569,7 +6629,7 @@ int md_allow_write(mddev_t *mddev)
6569 mddev->safemode = 1; 6629 mddev->safemode = 1;
6570 spin_unlock_irq(&mddev->write_lock); 6630 spin_unlock_irq(&mddev->write_lock);
6571 md_update_sb(mddev, 0); 6631 md_update_sb(mddev, 0);
6572 sysfs_notify_dirent(mddev->sysfs_state); 6632 sysfs_notify_dirent_safe(mddev->sysfs_state);
6573 } else 6633 } else
6574 spin_unlock_irq(&mddev->write_lock); 6634 spin_unlock_irq(&mddev->write_lock);
6575 6635
@@ -6580,6 +6640,14 @@ int md_allow_write(mddev_t *mddev)
6580} 6640}
6581EXPORT_SYMBOL_GPL(md_allow_write); 6641EXPORT_SYMBOL_GPL(md_allow_write);
6582 6642
6643void md_unplug(mddev_t *mddev)
6644{
6645 if (mddev->queue)
6646 blk_unplug(mddev->queue);
6647 if (mddev->plug)
6648 mddev->plug->unplug_fn(mddev->plug);
6649}
6650
6583#define SYNC_MARKS 10 6651#define SYNC_MARKS 10
6584#define SYNC_MARK_STEP (3*HZ) 6652#define SYNC_MARK_STEP (3*HZ)
6585void md_do_sync(mddev_t *mddev) 6653void md_do_sync(mddev_t *mddev)
@@ -6758,12 +6826,13 @@ void md_do_sync(mddev_t *mddev)
6758 >= mddev->resync_max - mddev->curr_resync_completed 6826 >= mddev->resync_max - mddev->curr_resync_completed
6759 )) { 6827 )) {
6760 /* time to update curr_resync_completed */ 6828 /* time to update curr_resync_completed */
6761 blk_unplug(mddev->queue); 6829 md_unplug(mddev);
6762 wait_event(mddev->recovery_wait, 6830 wait_event(mddev->recovery_wait,
6763 atomic_read(&mddev->recovery_active) == 0); 6831 atomic_read(&mddev->recovery_active) == 0);
6764 mddev->curr_resync_completed = 6832 mddev->curr_resync_completed =
6765 mddev->curr_resync; 6833 mddev->curr_resync;
6766 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6834 if (mddev->persistent)
6835 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6767 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6836 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6768 } 6837 }
6769 6838
@@ -6835,7 +6904,7 @@ void md_do_sync(mddev_t *mddev)
6835 * about not overloading the IO subsystem. (things like an 6904 * about not overloading the IO subsystem. (things like an
6836 * e2fsck being done on the RAID array should execute fast) 6905 * e2fsck being done on the RAID array should execute fast)
6837 */ 6906 */
6838 blk_unplug(mddev->queue); 6907 md_unplug(mddev);
6839 cond_resched(); 6908 cond_resched();
6840 6909
6841 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 6910 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6854,7 +6923,7 @@ void md_do_sync(mddev_t *mddev)
6854 * this also signals 'finished resyncing' to md_stop 6923 * this also signals 'finished resyncing' to md_stop
6855 */ 6924 */
6856 out: 6925 out:
6857 blk_unplug(mddev->queue); 6926 md_unplug(mddev);
6858 6927
6859 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 6928 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
6860 6929
@@ -6956,10 +7025,7 @@ static int remove_and_add_spares(mddev_t *mddev)
6956 sprintf(nm, "rd%d", rdev->raid_disk); 7025 sprintf(nm, "rd%d", rdev->raid_disk);
6957 if (sysfs_create_link(&mddev->kobj, 7026 if (sysfs_create_link(&mddev->kobj,
6958 &rdev->kobj, nm)) 7027 &rdev->kobj, nm))
6959 printk(KERN_WARNING 7028 /* failure here is OK */;
6960 "md: cannot register "
6961 "%s for %s\n",
6962 nm, mdname(mddev));
6963 spares++; 7029 spares++;
6964 md_new_event(mddev); 7030 md_new_event(mddev);
6965 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7031 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -7052,7 +7118,7 @@ void md_check_recovery(mddev_t *mddev)
7052 mddev->safemode = 0; 7118 mddev->safemode = 0;
7053 spin_unlock_irq(&mddev->write_lock); 7119 spin_unlock_irq(&mddev->write_lock);
7054 if (did_change) 7120 if (did_change)
7055 sysfs_notify_dirent(mddev->sysfs_state); 7121 sysfs_notify_dirent_safe(mddev->sysfs_state);
7056 } 7122 }
7057 7123
7058 if (mddev->flags) 7124 if (mddev->flags)
@@ -7091,7 +7157,7 @@ void md_check_recovery(mddev_t *mddev)
7091 mddev->recovery = 0; 7157 mddev->recovery = 0;
7092 /* flag recovery needed just to double check */ 7158 /* flag recovery needed just to double check */
7093 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 7159 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7094 sysfs_notify_dirent(mddev->sysfs_action); 7160 sysfs_notify_dirent_safe(mddev->sysfs_action);
7095 md_new_event(mddev); 7161 md_new_event(mddev);
7096 goto unlock; 7162 goto unlock;
7097 } 7163 }
@@ -7153,7 +7219,7 @@ void md_check_recovery(mddev_t *mddev)
7153 mddev->recovery = 0; 7219 mddev->recovery = 0;
7154 } else 7220 } else
7155 md_wakeup_thread(mddev->sync_thread); 7221 md_wakeup_thread(mddev->sync_thread);
7156 sysfs_notify_dirent(mddev->sysfs_action); 7222 sysfs_notify_dirent_safe(mddev->sysfs_action);
7157 md_new_event(mddev); 7223 md_new_event(mddev);
7158 } 7224 }
7159 unlock: 7225 unlock:
@@ -7162,7 +7228,7 @@ void md_check_recovery(mddev_t *mddev)
7162 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 7228 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7163 &mddev->recovery)) 7229 &mddev->recovery))
7164 if (mddev->sysfs_action) 7230 if (mddev->sysfs_action)
7165 sysfs_notify_dirent(mddev->sysfs_action); 7231 sysfs_notify_dirent_safe(mddev->sysfs_action);
7166 } 7232 }
7167 mddev_unlock(mddev); 7233 mddev_unlock(mddev);
7168 } 7234 }
@@ -7170,7 +7236,7 @@ void md_check_recovery(mddev_t *mddev)
7170 7236
7171void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) 7237void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
7172{ 7238{
7173 sysfs_notify_dirent(rdev->sysfs_state); 7239 sysfs_notify_dirent_safe(rdev->sysfs_state);
7174 wait_event_timeout(rdev->blocked_wait, 7240 wait_event_timeout(rdev->blocked_wait,
7175 !test_bit(Blocked, &rdev->flags), 7241 !test_bit(Blocked, &rdev->flags),
7176 msecs_to_jiffies(5000)); 7242 msecs_to_jiffies(5000));
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fc56e0f21c80..a953fe2808ae 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -29,6 +29,26 @@
29typedef struct mddev_s mddev_t; 29typedef struct mddev_s mddev_t;
30typedef struct mdk_rdev_s mdk_rdev_t; 30typedef struct mdk_rdev_s mdk_rdev_t;
31 31
32/* generic plugging support - like that provided with request_queue,
33 * but does not require a request_queue
34 */
35struct plug_handle {
36 void (*unplug_fn)(struct plug_handle *);
37 struct timer_list unplug_timer;
38 struct work_struct unplug_work;
39 unsigned long unplug_flag;
40};
41#define PLUGGED_FLAG 1
42void plugger_init(struct plug_handle *plug,
43 void (*unplug_fn)(struct plug_handle *));
44void plugger_set_plug(struct plug_handle *plug);
45int plugger_remove_plug(struct plug_handle *plug);
46static inline void plugger_flush(struct plug_handle *plug)
47{
48 del_timer_sync(&plug->unplug_timer);
49 cancel_work_sync(&plug->unplug_work);
50}
51
32/* 52/*
33 * MD's 'extended' device 53 * MD's 'extended' device
34 */ 54 */
@@ -125,6 +145,10 @@ struct mddev_s
125 int suspended; 145 int suspended;
126 atomic_t active_io; 146 atomic_t active_io;
127 int ro; 147 int ro;
148 int sysfs_active; /* set when sysfs deletes
149 * are happening, so run/
150 * takeover/stop are not safe
151 */
128 152
129 struct gendisk *gendisk; 153 struct gendisk *gendisk;
130 154
@@ -297,9 +321,14 @@ struct mddev_s
297 * hot-adding a bitmap. It should 321 * hot-adding a bitmap. It should
298 * eventually be settable by sysfs. 322 * eventually be settable by sysfs.
299 */ 323 */
324 /* When md is serving under dm, it might use a
325 * dirty_log to store the bits.
326 */
327 struct dm_dirty_log *log;
328
300 struct mutex mutex; 329 struct mutex mutex;
301 unsigned long chunksize; 330 unsigned long chunksize;
302 unsigned long daemon_sleep; /* how many seconds between updates? */ 331 unsigned long daemon_sleep; /* how many jiffies between updates? */
303 unsigned long max_write_behind; /* write-behind mode */ 332 unsigned long max_write_behind; /* write-behind mode */
304 int external; 333 int external;
305 } bitmap_info; 334 } bitmap_info;
@@ -308,6 +337,8 @@ struct mddev_s
308 struct list_head all_mddevs; 337 struct list_head all_mddevs;
309 338
310 struct attribute_group *to_remove; 339 struct attribute_group *to_remove;
340 struct plug_handle *plug; /* if used by personality */
341
311 /* Generic barrier handling. 342 /* Generic barrier handling.
312 * If there is a pending barrier request, all other 343 * If there is a pending barrier request, all other
313 * writes are blocked while the devices are flushed. 344 * writes are blocked while the devices are flushed.
@@ -318,6 +349,7 @@ struct mddev_s
318 struct bio *barrier; 349 struct bio *barrier;
319 atomic_t flush_pending; 350 atomic_t flush_pending;
320 struct work_struct barrier_work; 351 struct work_struct barrier_work;
352 struct work_struct event_work; /* used by dm to report failure event */
321}; 353};
322 354
323 355
@@ -382,6 +414,18 @@ struct md_sysfs_entry {
382}; 414};
383extern struct attribute_group md_bitmap_group; 415extern struct attribute_group md_bitmap_group;
384 416
417static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name)
418{
419 if (sd)
420 return sysfs_get_dirent(sd, NULL, name);
421 return sd;
422}
423static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd)
424{
425 if (sd)
426 sysfs_notify_dirent(sd);
427}
428
385static inline char * mdname (mddev_t * mddev) 429static inline char * mdname (mddev_t * mddev)
386{ 430{
387 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; 431 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
@@ -474,5 +518,14 @@ extern int md_integrity_register(mddev_t *mddev);
474extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 518extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
475extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); 519extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
476extern void restore_bitmap_write_access(struct file *file); 520extern void restore_bitmap_write_access(struct file *file);
521extern void md_unplug(mddev_t *mddev);
522
523extern void mddev_init(mddev_t *mddev);
524extern int md_run(mddev_t *mddev);
525extern void md_stop(mddev_t *mddev);
526extern void md_stop_writes(mddev_t *mddev);
527extern void md_rdev_init(mdk_rdev_t *rdev);
477 528
529extern void mddev_suspend(mddev_t *mddev);
530extern void mddev_resume(mddev_t *mddev);
478#endif /* _MD_MD_H */ 531#endif /* _MD_MD_H */
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c
deleted file mode 100644
index 3b1500843bba..000000000000
--- a/drivers/md/mktables.c
+++ /dev/null
@@ -1,132 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * mktables.c
13 *
14 * Make RAID-6 tables. This is a host user space program to be run at
15 * compile time.
16 */
17
18#include <stdio.h>
19#include <string.h>
20#include <inttypes.h>
21#include <stdlib.h>
22#include <time.h>
23
24static uint8_t gfmul(uint8_t a, uint8_t b)
25{
26 uint8_t v = 0;
27
28 while (b) {
29 if (b & 1)
30 v ^= a;
31 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
32 b >>= 1;
33 }
34
35 return v;
36}
37
38static uint8_t gfpow(uint8_t a, int b)
39{
40 uint8_t v = 1;
41
42 b %= 255;
43 if (b < 0)
44 b += 255;
45
46 while (b) {
47 if (b & 1)
48 v = gfmul(v, a);
49 a = gfmul(a, a);
50 b >>= 1;
51 }
52
53 return v;
54}
55
56int main(int argc, char *argv[])
57{
58 int i, j, k;
59 uint8_t v;
60 uint8_t exptbl[256], invtbl[256];
61
62 printf("#include <linux/raid/pq.h>\n");
63
64 /* Compute multiplication table */
65 printf("\nconst u8 __attribute__((aligned(256)))\n"
66 "raid6_gfmul[256][256] =\n"
67 "{\n");
68 for (i = 0; i < 256; i++) {
69 printf("\t{\n");
70 for (j = 0; j < 256; j += 8) {
71 printf("\t\t");
72 for (k = 0; k < 8; k++)
73 printf("0x%02x,%c", gfmul(i, j + k),
74 (k == 7) ? '\n' : ' ');
75 }
76 printf("\t},\n");
77 }
78 printf("};\n");
79 printf("#ifdef __KERNEL__\n");
80 printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81 printf("#endif\n");
82
83 /* Compute power-of-2 table (exponent) */
84 v = 1;
85 printf("\nconst u8 __attribute__((aligned(256)))\n"
86 "raid6_gfexp[256] =\n" "{\n");
87 for (i = 0; i < 256; i += 8) {
88 printf("\t");
89 for (j = 0; j < 8; j++) {
90 exptbl[i + j] = v;
91 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
92 v = gfmul(v, 2);
93 if (v == 1)
94 v = 0; /* For entry 255, not a real entry */
95 }
96 }
97 printf("};\n");
98 printf("#ifdef __KERNEL__\n");
99 printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100 printf("#endif\n");
101
102 /* Compute inverse table x^-1 == x^254 */
103 printf("\nconst u8 __attribute__((aligned(256)))\n"
104 "raid6_gfinv[256] =\n" "{\n");
105 for (i = 0; i < 256; i += 8) {
106 printf("\t");
107 for (j = 0; j < 8; j++) {
108 invtbl[i + j] = v = gfpow(i + j, 254);
109 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
110 }
111 }
112 printf("};\n");
113 printf("#ifdef __KERNEL__\n");
114 printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115 printf("#endif\n");
116
117 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
118 printf("\nconst u8 __attribute__((aligned(256)))\n"
119 "raid6_gfexi[256] =\n" "{\n");
120 for (i = 0; i < 256; i += 8) {
121 printf("\t");
122 for (j = 0; j < 8; j++)
123 printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
124 (j == 7) ? '\n' : ' ');
125 }
126 printf("};\n");
127 printf("#ifdef __KERNEL__\n");
128 printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129 printf("#endif\n");
130
131 return 0;
132}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 62ecb6650fd0..a88aeb5198c7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio)
825 */ 825 */
826 bp = bio_split(bio, 826 bp = bio_split(bio,
827 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 827 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
828
829 /* Each of these 'make_request' calls will call 'wait_barrier'.
830 * If the first succeeds but the second blocks due to the resync
831 * thread raising the barrier, we will deadlock because the
832 * IO to the underlying device will be queued in generic_make_request
833 * and will never complete, so will never reduce nr_pending.
834 * So increment nr_waiting here so no new raise_barriers will
835 * succeed, and so the second wait_barrier cannot block.
836 */
837 spin_lock_irq(&conf->resync_lock);
838 conf->nr_waiting++;
839 spin_unlock_irq(&conf->resync_lock);
840
828 if (make_request(mddev, &bp->bio1)) 841 if (make_request(mddev, &bp->bio1))
829 generic_make_request(&bp->bio1); 842 generic_make_request(&bp->bio1);
830 if (make_request(mddev, &bp->bio2)) 843 if (make_request(mddev, &bp->bio2))
831 generic_make_request(&bp->bio2); 844 generic_make_request(&bp->bio2);
832 845
846 spin_lock_irq(&conf->resync_lock);
847 conf->nr_waiting--;
848 wake_up(&conf->wait_barrier);
849 spin_unlock_irq(&conf->resync_lock);
850
833 bio_pair_release(bp); 851 bio_pair_release(bp);
834 return 0; 852 return 0;
835 bad_map: 853 bad_map:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20ac2f14376a..866d4b5a144c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
201 if (test_bit(STRIPE_HANDLE, &sh->state)) { 201 if (test_bit(STRIPE_HANDLE, &sh->state)) {
202 if (test_bit(STRIPE_DELAYED, &sh->state)) { 202 if (test_bit(STRIPE_DELAYED, &sh->state)) {
203 list_add_tail(&sh->lru, &conf->delayed_list); 203 list_add_tail(&sh->lru, &conf->delayed_list);
204 blk_plug_device(conf->mddev->queue); 204 plugger_set_plug(&conf->plug);
205 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 205 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
206 sh->bm_seq - conf->seq_write > 0) { 206 sh->bm_seq - conf->seq_write > 0) {
207 list_add_tail(&sh->lru, &conf->bitmap_list); 207 list_add_tail(&sh->lru, &conf->bitmap_list);
208 blk_plug_device(conf->mddev->queue); 208 plugger_set_plug(&conf->plug);
209 } else { 209 } else {
210 clear_bit(STRIPE_BIT_DELAY, &sh->state); 210 clear_bit(STRIPE_BIT_DELAY, &sh->state);
211 list_add_tail(&sh->lru, &conf->handle_list); 211 list_add_tail(&sh->lru, &conf->handle_list);
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf)
434} 434}
435 435
436static void unplug_slaves(mddev_t *mddev); 436static void unplug_slaves(mddev_t *mddev);
437static void raid5_unplug_device(struct request_queue *q);
438 437
439static struct stripe_head * 438static struct stripe_head *
440get_active_stripe(raid5_conf_t *conf, sector_t sector, 439get_active_stripe(raid5_conf_t *conf, sector_t sector,
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
464 < (conf->max_nr_stripes *3/4) 463 < (conf->max_nr_stripes *3/4)
465 || !conf->inactive_blocked), 464 || !conf->inactive_blocked),
466 conf->device_lock, 465 conf->device_lock,
467 raid5_unplug_device(conf->mddev->queue) 466 md_raid5_unplug_device(conf)
468 ); 467 );
469 conf->inactive_blocked = 0; 468 conf->inactive_blocked = 0;
470 } else 469 } else
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num)
1337 struct kmem_cache *sc; 1336 struct kmem_cache *sc;
1338 int devs = max(conf->raid_disks, conf->previous_raid_disks); 1337 int devs = max(conf->raid_disks, conf->previous_raid_disks);
1339 1338
1340 sprintf(conf->cache_name[0], 1339 if (conf->mddev->gendisk)
1341 "raid%d-%s", conf->level, mdname(conf->mddev)); 1340 sprintf(conf->cache_name[0],
1342 sprintf(conf->cache_name[1], 1341 "raid%d-%s", conf->level, mdname(conf->mddev));
1343 "raid%d-%s-alt", conf->level, mdname(conf->mddev)); 1342 else
1343 sprintf(conf->cache_name[0],
1344 "raid%d-%p", conf->level, conf->mddev);
1345 sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
1346
1344 conf->active_name = 0; 1347 conf->active_name = 0;
1345 sc = kmem_cache_create(conf->cache_name[conf->active_name], 1348 sc = kmem_cache_create(conf->cache_name[conf->active_name],
1346 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 1349 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
3614 list_add_tail(&sh->lru, &conf->hold_list); 3617 list_add_tail(&sh->lru, &conf->hold_list);
3615 } 3618 }
3616 } else 3619 } else
3617 blk_plug_device(conf->mddev->queue); 3620 plugger_set_plug(&conf->plug);
3618} 3621}
3619 3622
3620static void activate_bit_delay(raid5_conf_t *conf) 3623static void activate_bit_delay(raid5_conf_t *conf)
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev)
3655 rcu_read_unlock(); 3658 rcu_read_unlock();
3656} 3659}
3657 3660
3658static void raid5_unplug_device(struct request_queue *q) 3661void md_raid5_unplug_device(raid5_conf_t *conf)
3659{ 3662{
3660 mddev_t *mddev = q->queuedata;
3661 raid5_conf_t *conf = mddev->private;
3662 unsigned long flags; 3663 unsigned long flags;
3663 3664
3664 spin_lock_irqsave(&conf->device_lock, flags); 3665 spin_lock_irqsave(&conf->device_lock, flags);
3665 3666
3666 if (blk_remove_plug(q)) { 3667 if (plugger_remove_plug(&conf->plug)) {
3667 conf->seq_flush++; 3668 conf->seq_flush++;
3668 raid5_activate_delayed(conf); 3669 raid5_activate_delayed(conf);
3669 } 3670 }
3670 md_wakeup_thread(mddev->thread); 3671 md_wakeup_thread(conf->mddev->thread);
3671 3672
3672 spin_unlock_irqrestore(&conf->device_lock, flags); 3673 spin_unlock_irqrestore(&conf->device_lock, flags);
3673 3674
3674 unplug_slaves(mddev); 3675 unplug_slaves(conf->mddev);
3675} 3676}
3677EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
3676 3678
3677static int raid5_congested(void *data, int bits) 3679static void raid5_unplug(struct plug_handle *plug)
3680{
3681 raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
3682 md_raid5_unplug_device(conf);
3683}
3684
3685static void raid5_unplug_queue(struct request_queue *q)
3686{
3687 mddev_t *mddev = q->queuedata;
3688 md_raid5_unplug_device(mddev->private);
3689}
3690
3691int md_raid5_congested(mddev_t *mddev, int bits)
3678{ 3692{
3679 mddev_t *mddev = data;
3680 raid5_conf_t *conf = mddev->private; 3693 raid5_conf_t *conf = mddev->private;
3681 3694
3682 /* No difference between reads and writes. Just check 3695 /* No difference between reads and writes. Just check
3683 * how busy the stripe_cache is 3696 * how busy the stripe_cache is
3684 */ 3697 */
3685 3698
3686 if (mddev_congested(mddev, bits))
3687 return 1;
3688 if (conf->inactive_blocked) 3699 if (conf->inactive_blocked)
3689 return 1; 3700 return 1;
3690 if (conf->quiesce) 3701 if (conf->quiesce)
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits)
3694 3705
3695 return 0; 3706 return 0;
3696} 3707}
3708EXPORT_SYMBOL_GPL(md_raid5_congested);
3709
3710static int raid5_congested(void *data, int bits)
3711{
3712 mddev_t *mddev = data;
3713
3714 return mddev_congested(mddev, bits) ||
3715 md_raid5_congested(mddev, bits);
3716}
3697 3717
3698/* We want read requests to align with chunks where possible, 3718/* We want read requests to align with chunks where possible,
3699 * but write requests don't need to. 3719 * but write requests don't need to.
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
4075 * add failed due to overlap. Flush everything 4095 * add failed due to overlap. Flush everything
4076 * and wait a while 4096 * and wait a while
4077 */ 4097 */
4078 raid5_unplug_device(mddev->queue); 4098 md_raid5_unplug_device(conf);
4079 release_stripe(sh); 4099 release_stripe(sh);
4080 schedule(); 4100 schedule();
4081 goto retry; 4101 goto retry;
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
4566 return 0; 4586 return 0;
4567} 4587}
4568 4588
4569static ssize_t 4589int
4570raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) 4590raid5_set_cache_size(mddev_t *mddev, int size)
4571{ 4591{
4572 raid5_conf_t *conf = mddev->private; 4592 raid5_conf_t *conf = mddev->private;
4573 unsigned long new;
4574 int err; 4593 int err;
4575 4594
4576 if (len >= PAGE_SIZE) 4595 if (size <= 16 || size > 32768)
4577 return -EINVAL; 4596 return -EINVAL;
4578 if (!conf) 4597 while (size < conf->max_nr_stripes) {
4579 return -ENODEV;
4580
4581 if (strict_strtoul(page, 10, &new))
4582 return -EINVAL;
4583 if (new <= 16 || new > 32768)
4584 return -EINVAL;
4585 while (new < conf->max_nr_stripes) {
4586 if (drop_one_stripe(conf)) 4598 if (drop_one_stripe(conf))
4587 conf->max_nr_stripes--; 4599 conf->max_nr_stripes--;
4588 else 4600 else
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
4591 err = md_allow_write(mddev); 4603 err = md_allow_write(mddev);
4592 if (err) 4604 if (err)
4593 return err; 4605 return err;
4594 while (new > conf->max_nr_stripes) { 4606 while (size > conf->max_nr_stripes) {
4595 if (grow_one_stripe(conf)) 4607 if (grow_one_stripe(conf))
4596 conf->max_nr_stripes++; 4608 conf->max_nr_stripes++;
4597 else break; 4609 else break;
4598 } 4610 }
4611 return 0;
4612}
4613EXPORT_SYMBOL(raid5_set_cache_size);
4614
4615static ssize_t
4616raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
4617{
4618 raid5_conf_t *conf = mddev->private;
4619 unsigned long new;
4620 int err;
4621
4622 if (len >= PAGE_SIZE)
4623 return -EINVAL;
4624 if (!conf)
4625 return -ENODEV;
4626
4627 if (strict_strtoul(page, 10, &new))
4628 return -EINVAL;
4629 err = raid5_set_cache_size(mddev, new);
4630 if (err)
4631 return err;
4599 return len; 4632 return len;
4600} 4633}
4601 4634
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
4958static int run(mddev_t *mddev) 4991static int run(mddev_t *mddev)
4959{ 4992{
4960 raid5_conf_t *conf; 4993 raid5_conf_t *conf;
4961 int working_disks = 0, chunk_size; 4994 int working_disks = 0;
4962 int dirty_parity_disks = 0; 4995 int dirty_parity_disks = 0;
4963 mdk_rdev_t *rdev; 4996 mdk_rdev_t *rdev;
4964 sector_t reshape_offset = 0; 4997 sector_t reshape_offset = 0;
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev)
5144 "reshape"); 5177 "reshape");
5145 } 5178 }
5146 5179
5147 /* read-ahead size must cover two whole stripes, which is
5148 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
5149 */
5150 {
5151 int data_disks = conf->previous_raid_disks - conf->max_degraded;
5152 int stripe = data_disks *
5153 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
5154 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
5155 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
5156 }
5157 5180
5158 /* Ok, everything is just fine now */ 5181 /* Ok, everything is just fine now */
5159 if (mddev->to_remove == &raid5_attrs_group) 5182 if (mddev->to_remove == &raid5_attrs_group)
5160 mddev->to_remove = NULL; 5183 mddev->to_remove = NULL;
5161 else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) 5184 else if (mddev->kobj.sd &&
5185 sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
5162 printk(KERN_WARNING 5186 printk(KERN_WARNING
5163 "md/raid:%s: failed to create sysfs attributes.\n", 5187 "raid5: failed to create sysfs attributes for %s\n",
5164 mdname(mddev)); 5188 mdname(mddev));
5189 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5165 5190
5166 mddev->queue->queue_lock = &conf->device_lock; 5191 plugger_init(&conf->plug, raid5_unplug);
5192 mddev->plug = &conf->plug;
5193 if (mddev->queue) {
5194 int chunk_size;
5195 /* read-ahead size must cover two whole stripes, which
5196 * is 2 * (datadisks) * chunksize where 'n' is the
5197 * number of raid devices
5198 */
5199 int data_disks = conf->previous_raid_disks - conf->max_degraded;
5200 int stripe = data_disks *
5201 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
5202 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
5203 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
5167 5204
5168 mddev->queue->unplug_fn = raid5_unplug_device; 5205 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
5169 mddev->queue->backing_dev_info.congested_data = mddev;
5170 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5171 5206
5172 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5207 mddev->queue->backing_dev_info.congested_data = mddev;
5208 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5209 mddev->queue->queue_lock = &conf->device_lock;
5210 mddev->queue->unplug_fn = raid5_unplug_queue;
5173 5211
5174 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 5212 chunk_size = mddev->chunk_sectors << 9;
5175 chunk_size = mddev->chunk_sectors << 9; 5213 blk_queue_io_min(mddev->queue, chunk_size);
5176 blk_queue_io_min(mddev->queue, chunk_size); 5214 blk_queue_io_opt(mddev->queue, chunk_size *
5177 blk_queue_io_opt(mddev->queue, chunk_size * 5215 (conf->raid_disks - conf->max_degraded));
5178 (conf->raid_disks - conf->max_degraded));
5179 5216
5180 list_for_each_entry(rdev, &mddev->disks, same_set) 5217 list_for_each_entry(rdev, &mddev->disks, same_set)
5181 disk_stack_limits(mddev->gendisk, rdev->bdev, 5218 disk_stack_limits(mddev->gendisk, rdev->bdev,
5182 rdev->data_offset << 9); 5219 rdev->data_offset << 9);
5220 }
5183 5221
5184 return 0; 5222 return 0;
5185abort: 5223abort:
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev)
5200 5238
5201 md_unregister_thread(mddev->thread); 5239 md_unregister_thread(mddev->thread);
5202 mddev->thread = NULL; 5240 mddev->thread = NULL;
5203 mddev->queue->backing_dev_info.congested_fn = NULL; 5241 if (mddev->queue)
5204 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 5242 mddev->queue->backing_dev_info.congested_fn = NULL;
5243 plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
5205 free_conf(conf); 5244 free_conf(conf);
5206 mddev->private = NULL; 5245 mddev->private = NULL;
5207 mddev->to_remove = &raid5_attrs_group; 5246 mddev->to_remove = &raid5_attrs_group;
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5545 sprintf(nm, "rd%d", rdev->raid_disk); 5584 sprintf(nm, "rd%d", rdev->raid_disk);
5546 if (sysfs_create_link(&mddev->kobj, 5585 if (sysfs_create_link(&mddev->kobj,
5547 &rdev->kobj, nm)) 5586 &rdev->kobj, nm))
5548 printk(KERN_WARNING 5587 /* Failure here is OK */;
5549 "md/raid:%s: failed to create "
5550 " link %s\n",
5551 mdname(mddev), nm);
5552 } else 5588 } else
5553 break; 5589 break;
5554 } 5590 }
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf)
5603 /* read-ahead size must cover two whole stripes, which is 5639 /* read-ahead size must cover two whole stripes, which is
5604 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices 5640 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
5605 */ 5641 */
5606 { 5642 if (conf->mddev->queue) {
5607 int data_disks = conf->raid_disks - conf->max_degraded; 5643 int data_disks = conf->raid_disks - conf->max_degraded;
5608 int stripe = data_disks * ((conf->chunk_sectors << 9) 5644 int stripe = data_disks * ((conf->chunk_sectors << 9)
5609 / PAGE_SIZE); 5645 / PAGE_SIZE);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 0f86f5e36724..36eaed5dfd6e 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -388,7 +388,7 @@ struct raid5_private_data {
388 * two caches. 388 * two caches.
389 */ 389 */
390 int active_name; 390 int active_name;
391 char cache_name[2][20]; 391 char cache_name[2][32];
392 struct kmem_cache *slab_cache; /* for allocating stripes */ 392 struct kmem_cache *slab_cache; /* for allocating stripes */
393 393
394 int seq_flush, seq_write; 394 int seq_flush, seq_write;
@@ -398,6 +398,9 @@ struct raid5_private_data {
398 * (fresh device added). 398 * (fresh device added).
399 * Cleared when a sync completes. 399 * Cleared when a sync completes.
400 */ 400 */
401
402 struct plug_handle plug;
403
401 /* per cpu variables */ 404 /* per cpu variables */
402 struct raid5_percpu { 405 struct raid5_percpu {
403 struct page *spare_page; /* Used when checking P/Q in raid6 */ 406 struct page *spare_page; /* Used when checking P/Q in raid6 */
@@ -497,4 +500,8 @@ static inline int algorithm_is_DDF(int layout)
497{ 500{
498 return layout >= 8 && layout <= 10; 501 return layout >= 8 && layout <= 10;
499} 502}
503
504extern int md_raid5_congested(mddev_t *mddev, int bits);
505extern void md_raid5_unplug_device(raid5_conf_t *conf);
506extern int raid5_set_cache_size(mddev_t *mddev, int size);
500#endif 507#endif
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
deleted file mode 100644
index 1f8784bfd44d..000000000000
--- a/drivers/md/raid6algos.c
+++ /dev/null
@@ -1,154 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6algos.c
15 *
16 * Algorithm list and algorithm selection for RAID-6
17 */
18
19#include <linux/raid/pq.h>
20#include <linux/gfp.h>
21#ifndef __KERNEL__
22#include <sys/mman.h>
23#include <stdio.h>
24#else
25#if !RAID6_USE_EMPTY_ZERO_PAGE
26/* In .bss so it's zeroed */
27const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
28EXPORT_SYMBOL(raid6_empty_zero_page);
29#endif
30#endif
31
32struct raid6_calls raid6_call;
33EXPORT_SYMBOL_GPL(raid6_call);
34
35const struct raid6_calls * const raid6_algos[] = {
36 &raid6_intx1,
37 &raid6_intx2,
38 &raid6_intx4,
39 &raid6_intx8,
40#if defined(__ia64__)
41 &raid6_intx16,
42 &raid6_intx32,
43#endif
44#if defined(__i386__) && !defined(__arch_um__)
45 &raid6_mmxx1,
46 &raid6_mmxx2,
47 &raid6_sse1x1,
48 &raid6_sse1x2,
49 &raid6_sse2x1,
50 &raid6_sse2x2,
51#endif
52#if defined(__x86_64__) && !defined(__arch_um__)
53 &raid6_sse2x1,
54 &raid6_sse2x2,
55 &raid6_sse2x4,
56#endif
57#ifdef CONFIG_ALTIVEC
58 &raid6_altivec1,
59 &raid6_altivec2,
60 &raid6_altivec4,
61 &raid6_altivec8,
62#endif
63 NULL
64};
65
66#ifdef __KERNEL__
67#define RAID6_TIME_JIFFIES_LG2 4
68#else
69/* Need more time to be stable in userspace */
70#define RAID6_TIME_JIFFIES_LG2 9
71#define time_before(x, y) ((x) < (y))
72#endif
73
74/* Try to pick the best algorithm */
75/* This code uses the gfmul table as convenient data set to abuse */
76
77int __init raid6_select_algo(void)
78{
79 const struct raid6_calls * const * algo;
80 const struct raid6_calls * best;
81 char *syndromes;
82 void *dptrs[(65536/PAGE_SIZE)+2];
83 int i, disks;
84 unsigned long perf, bestperf;
85 int bestprefer;
86 unsigned long j0, j1;
87
88 disks = (65536/PAGE_SIZE)+2;
89 for ( i = 0 ; i < disks-2 ; i++ ) {
90 dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
91 }
92
93 /* Normal code - use a 2-page allocation to avoid D$ conflict */
94 syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
95
96 if ( !syndromes ) {
97 printk("raid6: Yikes! No memory available.\n");
98 return -ENOMEM;
99 }
100
101 dptrs[disks-2] = syndromes;
102 dptrs[disks-1] = syndromes + PAGE_SIZE;
103
104 bestperf = 0; bestprefer = 0; best = NULL;
105
106 for ( algo = raid6_algos ; *algo ; algo++ ) {
107 if ( !(*algo)->valid || (*algo)->valid() ) {
108 perf = 0;
109
110 preempt_disable();
111 j0 = jiffies;
112 while ( (j1 = jiffies) == j0 )
113 cpu_relax();
114 while (time_before(jiffies,
115 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
116 (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
117 perf++;
118 }
119 preempt_enable();
120
121 if ( (*algo)->prefer > bestprefer ||
122 ((*algo)->prefer == bestprefer &&
123 perf > bestperf) ) {
124 best = *algo;
125 bestprefer = best->prefer;
126 bestperf = perf;
127 }
128 printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
129 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
130 }
131 }
132
133 if (best) {
134 printk("raid6: using algorithm %s (%ld MB/s)\n",
135 best->name,
136 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
137 raid6_call = *best;
138 } else
139 printk("raid6: Yikes! No algorithm found!\n");
140
141 free_pages((unsigned long)syndromes, 1);
142
143 return best ? 0 : -EINVAL;
144}
145
146static void raid6_exit(void)
147{
148 do { } while (0);
149}
150
151subsys_initcall(raid6_select_algo);
152module_exit(raid6_exit);
153MODULE_LICENSE("GPL");
154MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc
deleted file mode 100644
index 2654d5c854be..000000000000
--- a/drivers/md/raid6altivec.uc
+++ /dev/null
@@ -1,130 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6altivec$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 *
20 * <benh> hpa: in process,
21 * you can just "steal" the vec unit with enable_kernel_altivec() (but
22 * bracked this with preempt_disable/enable or in a lock)
23 */
24
25#include <linux/raid/pq.h>
26
27#ifdef CONFIG_ALTIVEC
28
29#include <altivec.h>
30#ifdef __KERNEL__
31# include <asm/system.h>
32# include <asm/cputable.h>
33#endif
34
35/*
36 * This is the C data type to use. We use a vector of
37 * signed char so vec_cmpgt() will generate the right
38 * instruction.
39 */
40
41typedef vector signed char unative_t;
42
43#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
44#define NSIZE sizeof(unative_t)
45
46/*
47 * The SHLBYTE() operation shifts each byte left by 1, *not*
48 * rolling over into the next byte
49 */
50static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
51{
52 return vec_add(v,v);
53}
54
55/*
56 * The MASK() operation returns 0xFF in any byte for which the high
57 * bit is 1, 0x00 for any byte for which the high bit is 0.
58 */
59static inline __attribute_const__ unative_t MASK(unative_t v)
60{
61 unative_t zv = NBYTES(0);
62
63 /* vec_cmpgt returns a vector bool char; thus the need for the cast */
64 return (unative_t)vec_cmpgt(zv, v);
65}
66
67
68/* This is noinline to make damned sure that gcc doesn't move any of the
69 Altivec code around the enable/disable code */
70static void noinline
71raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
72{
73 u8 **dptr = (u8 **)ptrs;
74 u8 *p, *q;
75 int d, z, z0;
76
77 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
78 unative_t x1d = NBYTES(0x1d);
79
80 z0 = disks - 3; /* Highest data disk */
81 p = dptr[z0+1]; /* XOR parity */
82 q = dptr[z0+2]; /* RS syndrome */
83
84 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
85 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
86 for ( z = z0-1 ; z >= 0 ; z-- ) {
87 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
88 wp$$ = vec_xor(wp$$, wd$$);
89 w2$$ = MASK(wq$$);
90 w1$$ = SHLBYTE(wq$$);
91 w2$$ = vec_and(w2$$, x1d);
92 w1$$ = vec_xor(w1$$, w2$$);
93 wq$$ = vec_xor(w1$$, wd$$);
94 }
95 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
96 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
97 }
98}
99
100static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
101{
102 preempt_disable();
103 enable_kernel_altivec();
104
105 raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
106
107 preempt_enable();
108}
109
110int raid6_have_altivec(void);
111#if $# == 1
112int raid6_have_altivec(void)
113{
114 /* This assumes either all CPUs have Altivec or none does */
115# ifdef __KERNEL__
116 return cpu_has_feature(CPU_FTR_ALTIVEC);
117# else
118 return 1;
119# endif
120}
121#endif
122
123const struct raid6_calls raid6_altivec$# = {
124 raid6_altivec$#_gen_syndrome,
125 raid6_have_altivec,
126 "altivecx$#",
127 0
128};
129
130#endif /* CONFIG_ALTIVEC */
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc
deleted file mode 100644
index d1e276a14fab..000000000000
--- a/drivers/md/raid6int.uc
+++ /dev/null
@@ -1,117 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6int$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 */
20
21#include <linux/raid/pq.h>
22
23/*
24 * This is the C data type to use
25 */
26
27/* Change this from BITS_PER_LONG if there is something better... */
28#if BITS_PER_LONG == 64
29# define NBYTES(x) ((x) * 0x0101010101010101UL)
30# define NSIZE 8
31# define NSHIFT 3
32# define NSTRING "64"
33typedef u64 unative_t;
34#else
35# define NBYTES(x) ((x) * 0x01010101U)
36# define NSIZE 4
37# define NSHIFT 2
38# define NSTRING "32"
39typedef u32 unative_t;
40#endif
41
42
43
44/*
45 * IA-64 wants insane amounts of unrolling. On other architectures that
46 * is just a waste of space.
47 */
48#if ($# <= 8) || defined(__ia64__)
49
50
51/*
52 * These sub-operations are separate inlines since they can sometimes be
53 * specially optimized using architecture-specific hacks.
54 */
55
56/*
57 * The SHLBYTE() operation shifts each byte left by 1, *not*
58 * rolling over into the next byte
59 */
60static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
61{
62 unative_t vv;
63
64 vv = (v << 1) & NBYTES(0xfe);
65 return vv;
66}
67
68/*
69 * The MASK() operation returns 0xFF in any byte for which the high
70 * bit is 1, 0x00 for any byte for which the high bit is 0.
71 */
72static inline __attribute_const__ unative_t MASK(unative_t v)
73{
74 unative_t vv;
75
76 vv = v & NBYTES(0x80);
77 vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
78 return vv;
79}
80
81
82static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
83{
84 u8 **dptr = (u8 **)ptrs;
85 u8 *p, *q;
86 int d, z, z0;
87
88 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
89
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0+1]; /* XOR parity */
92 q = dptr[z0+2]; /* RS syndrome */
93
94 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
95 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
96 for ( z = z0-1 ; z >= 0 ; z-- ) {
97 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
98 wp$$ ^= wd$$;
99 w2$$ = MASK(wq$$);
100 w1$$ = SHLBYTE(wq$$);
101 w2$$ &= NBYTES(0x1d);
102 w1$$ ^= w2$$;
103 wq$$ = w1$$ ^ wd$$;
104 }
105 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
106 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
107 }
108}
109
110const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */
113 "int" NSTRING "x$#",
114 0
115};
116
117#endif
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
deleted file mode 100644
index e7f6c13132bf..000000000000
--- a/drivers/md/raid6mmx.c
+++ /dev/null
@@ -1,142 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6mmx.c
15 *
16 * MMX implementation of RAID-6 syndrome functions
17 */
18
19#if defined(__i386__) && !defined(__arch_um__)
20
21#include <linux/raid/pq.h>
22#include "raid6x86.h"
23
24/* Shared with raid6sse1.c */
25const struct raid6_mmx_constants {
26 u64 x1d;
27} raid6_mmx_constants = {
28 0x1d1d1d1d1d1d1d1dULL,
29};
30
31static int raid6_have_mmx(void)
32{
33 /* Not really "boot_cpu" but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX);
35}
36
37/*
38 * Plain MMX implementation
39 */
40static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
41{
42 u8 **dptr = (u8 **)ptrs;
43 u8 *p, *q;
44 int d, z, z0;
45
46 z0 = disks - 3; /* Highest data disk */
47 p = dptr[z0+1]; /* XOR parity */
48 q = dptr[z0+2]; /* RS syndrome */
49
50 kernel_fpu_begin();
51
52 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
53 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
54
55 for ( d = 0 ; d < bytes ; d += 8 ) {
56 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
57 asm volatile("movq %mm2,%mm4"); /* Q[0] */
58 for ( z = z0-1 ; z >= 0 ; z-- ) {
59 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
60 asm volatile("pcmpgtb %mm4,%mm5");
61 asm volatile("paddb %mm4,%mm4");
62 asm volatile("pand %mm0,%mm5");
63 asm volatile("pxor %mm5,%mm4");
64 asm volatile("pxor %mm5,%mm5");
65 asm volatile("pxor %mm6,%mm2");
66 asm volatile("pxor %mm6,%mm4");
67 }
68 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
69 asm volatile("pxor %mm2,%mm2");
70 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
71 asm volatile("pxor %mm4,%mm4");
72 }
73
74 kernel_fpu_end();
75}
76
77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome,
79 raid6_have_mmx,
80 "mmxx1",
81 0
82};
83
84/*
85 * Unrolled-by-2 MMX implementation
86 */
87static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
88{
89 u8 **dptr = (u8 **)ptrs;
90 u8 *p, *q;
91 int d, z, z0;
92
93 z0 = disks - 3; /* Highest data disk */
94 p = dptr[z0+1]; /* XOR parity */
95 q = dptr[z0+2]; /* RS syndrome */
96
97 kernel_fpu_begin();
98
99 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
100 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
101 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
102
103 for ( d = 0 ; d < bytes ; d += 16 ) {
104 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
105 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
106 asm volatile("movq %mm2,%mm4"); /* Q[0] */
107 asm volatile("movq %mm3,%mm6"); /* Q[1] */
108 for ( z = z0-1 ; z >= 0 ; z-- ) {
109 asm volatile("pcmpgtb %mm4,%mm5");
110 asm volatile("pcmpgtb %mm6,%mm7");
111 asm volatile("paddb %mm4,%mm4");
112 asm volatile("paddb %mm6,%mm6");
113 asm volatile("pand %mm0,%mm5");
114 asm volatile("pand %mm0,%mm7");
115 asm volatile("pxor %mm5,%mm4");
116 asm volatile("pxor %mm7,%mm6");
117 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
118 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
119 asm volatile("pxor %mm5,%mm2");
120 asm volatile("pxor %mm7,%mm3");
121 asm volatile("pxor %mm5,%mm4");
122 asm volatile("pxor %mm7,%mm6");
123 asm volatile("pxor %mm5,%mm5");
124 asm volatile("pxor %mm7,%mm7");
125 }
126 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
127 asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
128 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
129 asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
130 }
131
132 kernel_fpu_end();
133}
134
135const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome,
137 raid6_have_mmx,
138 "mmxx2",
139 0
140};
141
142#endif
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c
deleted file mode 100644
index 2609f00e0d61..000000000000
--- a/drivers/md/raid6recov.c
+++ /dev/null
@@ -1,132 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6recov.c
15 *
16 * RAID-6 data recovery in dual failure mode. In single failure mode,
17 * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
18 * the syndrome.)
19 */
20
21#include <linux/raid/pq.h>
22
23/* Recover two failed data blocks. */
24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
25 void **ptrs)
26{
27 u8 *p, *q, *dp, *dq;
28 u8 px, qx, db;
29 const u8 *pbmul; /* P multiplier table for B data */
30 const u8 *qmul; /* Q multiplier table (for both) */
31
32 p = (u8 *)ptrs[disks-2];
33 q = (u8 *)ptrs[disks-1];
34
35 /* Compute syndrome with zero for the missing data pages
36 Use the dead data pages as temporary storage for
37 delta p and delta q */
38 dp = (u8 *)ptrs[faila];
39 ptrs[faila] = (void *)raid6_empty_zero_page;
40 ptrs[disks-2] = dp;
41 dq = (u8 *)ptrs[failb];
42 ptrs[failb] = (void *)raid6_empty_zero_page;
43 ptrs[disks-1] = dq;
44
45 raid6_call.gen_syndrome(disks, bytes, ptrs);
46
47 /* Restore pointer table */
48 ptrs[faila] = dp;
49 ptrs[failb] = dq;
50 ptrs[disks-2] = p;
51 ptrs[disks-1] = q;
52
53 /* Now, pick the proper data tables */
54 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
55 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
56
57 /* Now do it... */
58 while ( bytes-- ) {
59 px = *p ^ *dp;
60 qx = qmul[*q ^ *dq];
61 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
62 *dp++ = db ^ px; /* Reconstructed A */
63 p++; q++;
64 }
65}
66EXPORT_SYMBOL_GPL(raid6_2data_recov);
67
68/* Recover failure of one data block plus the P block */
69void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
70{
71 u8 *p, *q, *dq;
72 const u8 *qmul; /* Q multiplier table */
73
74 p = (u8 *)ptrs[disks-2];
75 q = (u8 *)ptrs[disks-1];
76
77 /* Compute syndrome with zero for the missing data page
78 Use the dead data page as temporary storage for delta q */
79 dq = (u8 *)ptrs[faila];
80 ptrs[faila] = (void *)raid6_empty_zero_page;
81 ptrs[disks-1] = dq;
82
83 raid6_call.gen_syndrome(disks, bytes, ptrs);
84
85 /* Restore pointer table */
86 ptrs[faila] = dq;
87 ptrs[disks-1] = q;
88
89 /* Now, pick the proper data tables */
90 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
91
92 /* Now do it... */
93 while ( bytes-- ) {
94 *p++ ^= *dq = qmul[*q ^ *dq];
95 q++; dq++;
96 }
97}
98EXPORT_SYMBOL_GPL(raid6_datap_recov);
99
100#ifndef __KERNEL__
101/* Testing only */
102
103/* Recover two failed blocks. */
104void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
105{
106 if ( faila > failb ) {
107 int tmp = faila;
108 faila = failb;
109 failb = tmp;
110 }
111
112 if ( failb == disks-1 ) {
113 if ( faila == disks-2 ) {
114 /* P+Q failure. Just rebuild the syndrome. */
115 raid6_call.gen_syndrome(disks, bytes, ptrs);
116 } else {
117 /* data+Q failure. Reconstruct data from P,
118 then rebuild syndrome. */
119 /* NOT IMPLEMENTED - equivalent to RAID-5 */
120 }
121 } else {
122 if ( failb == disks-2 ) {
123 /* data+P failure. */
124 raid6_datap_recov(disks, bytes, faila, ptrs);
125 } else {
126 /* data+data failure. */
127 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
128 }
129 }
130}
131
132#endif
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
deleted file mode 100644
index b274dd5eab8f..000000000000
--- a/drivers/md/raid6sse1.c
+++ /dev/null
@@ -1,162 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6sse1.c
15 *
16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17 *
18 * This is really an MMX implementation, but it requires SSE-1 or
19 * AMD MMXEXT for prefetch support and a few other features. The
20 * support for nontemporal memory accesses is enough to make this
21 * worthwhile as a separate implementation.
22 */
23
24#if defined(__i386__) && !defined(__arch_um__)
25
26#include <linux/raid/pq.h>
27#include "raid6x86.h"
28
29/* Defined in raid6mmx.c */
30extern const struct raid6_mmx_constants {
31 u64 x1d;
32} raid6_mmx_constants;
33
34static int raid6_have_sse1_or_mmxext(void)
35{
36 /* Not really boot_cpu but "all_cpus" */
37 return boot_cpu_has(X86_FEATURE_MMX) &&
38 (boot_cpu_has(X86_FEATURE_XMM) ||
39 boot_cpu_has(X86_FEATURE_MMXEXT));
40}
41
42/*
43 * Plain SSE1 implementation
44 */
45static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46{
47 u8 **dptr = (u8 **)ptrs;
48 u8 *p, *q;
49 int d, z, z0;
50
51 z0 = disks - 3; /* Highest data disk */
52 p = dptr[z0+1]; /* XOR parity */
53 q = dptr[z0+2]; /* RS syndrome */
54
55 kernel_fpu_begin();
56
57 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
59
60 for ( d = 0 ; d < bytes ; d += 8 ) {
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64 asm volatile("movq %mm2,%mm4"); /* Q[0] */
65 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66 for ( z = z0-2 ; z >= 0 ; z-- ) {
67 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68 asm volatile("pcmpgtb %mm4,%mm5");
69 asm volatile("paddb %mm4,%mm4");
70 asm volatile("pand %mm0,%mm5");
71 asm volatile("pxor %mm5,%mm4");
72 asm volatile("pxor %mm5,%mm5");
73 asm volatile("pxor %mm6,%mm2");
74 asm volatile("pxor %mm6,%mm4");
75 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76 }
77 asm volatile("pcmpgtb %mm4,%mm5");
78 asm volatile("paddb %mm4,%mm4");
79 asm volatile("pand %mm0,%mm5");
80 asm volatile("pxor %mm5,%mm4");
81 asm volatile("pxor %mm5,%mm5");
82 asm volatile("pxor %mm6,%mm2");
83 asm volatile("pxor %mm6,%mm4");
84
85 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome,
95 raid6_have_sse1_or_mmxext,
96 "sse1x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE1 implementation
102 */
103static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
117 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 16 bytes */
120 for ( d = 0 ; d < bytes ; d += 16 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124 asm volatile("movq %mm2,%mm4"); /* Q[0] */
125 asm volatile("movq %mm3,%mm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %mm4,%mm5");
129 asm volatile("pcmpgtb %mm6,%mm7");
130 asm volatile("paddb %mm4,%mm4");
131 asm volatile("paddb %mm6,%mm6");
132 asm volatile("pand %mm0,%mm5");
133 asm volatile("pand %mm0,%mm7");
134 asm volatile("pxor %mm5,%mm4");
135 asm volatile("pxor %mm7,%mm6");
136 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138 asm volatile("pxor %mm5,%mm2");
139 asm volatile("pxor %mm7,%mm3");
140 asm volatile("pxor %mm5,%mm4");
141 asm volatile("pxor %mm7,%mm6");
142 asm volatile("pxor %mm5,%mm5");
143 asm volatile("pxor %mm7,%mm7");
144 }
145 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149 }
150
151 asm volatile("sfence" : :: "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome,
157 raid6_have_sse1_or_mmxext,
158 "sse1x2",
159 1 /* Has cache hints */
160};
161
162#endif
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
deleted file mode 100644
index 6ed6c6c0389f..000000000000
--- a/drivers/md/raid6sse2.c
+++ /dev/null
@@ -1,262 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21
22#include <linux/raid/pq.h>
23#include "raid6x86.h"
24
25static const struct raid6_sse_constants {
26 u64 x1d[2];
27} raid6_sse_constants __attribute__((aligned(16))) = {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29};
30
31static int raid6_have_sse2(void)
32{
33 /* Not really boot_cpu but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX) &&
35 boot_cpu_has(X86_FEATURE_FXSR) &&
36 boot_cpu_has(X86_FEATURE_XMM) &&
37 boot_cpu_has(X86_FEATURE_XMM2);
38}
39
40/*
41 * Plain SSE2 implementation
42 */
43static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
44{
45 u8 **dptr = (u8 **)ptrs;
46 u8 *p, *q;
47 int d, z, z0;
48
49 z0 = disks - 3; /* Highest data disk */
50 p = dptr[z0+1]; /* XOR parity */
51 q = dptr[z0+2]; /* RS syndrome */
52
53 kernel_fpu_begin();
54
55 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
56 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
57
58 for ( d = 0 ; d < bytes ; d += 16 ) {
59 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
60 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
62 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
63 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
64 for ( z = z0-2 ; z >= 0 ; z-- ) {
65 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
66 asm volatile("pcmpgtb %xmm4,%xmm5");
67 asm volatile("paddb %xmm4,%xmm4");
68 asm volatile("pand %xmm0,%xmm5");
69 asm volatile("pxor %xmm5,%xmm4");
70 asm volatile("pxor %xmm5,%xmm5");
71 asm volatile("pxor %xmm6,%xmm2");
72 asm volatile("pxor %xmm6,%xmm4");
73 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
74 }
75 asm volatile("pcmpgtb %xmm4,%xmm5");
76 asm volatile("paddb %xmm4,%xmm4");
77 asm volatile("pand %xmm0,%xmm5");
78 asm volatile("pxor %xmm5,%xmm4");
79 asm volatile("pxor %xmm5,%xmm5");
80 asm volatile("pxor %xmm6,%xmm2");
81 asm volatile("pxor %xmm6,%xmm4");
82
83 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
84 asm volatile("pxor %xmm2,%xmm2");
85 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
86 asm volatile("pxor %xmm4,%xmm4");
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse2x1 = {
94 raid6_sse21_gen_syndrome,
95 raid6_have_sse2,
96 "sse2x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE2 implementation
102 */
103static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
116 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
117 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 32 bytes */
120 for ( d = 0 ; d < bytes ; d += 32 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
124 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
125 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %xmm4,%xmm5");
129 asm volatile("pcmpgtb %xmm6,%xmm7");
130 asm volatile("paddb %xmm4,%xmm4");
131 asm volatile("paddb %xmm6,%xmm6");
132 asm volatile("pand %xmm0,%xmm5");
133 asm volatile("pand %xmm0,%xmm7");
134 asm volatile("pxor %xmm5,%xmm4");
135 asm volatile("pxor %xmm7,%xmm6");
136 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
137 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
138 asm volatile("pxor %xmm5,%xmm2");
139 asm volatile("pxor %xmm7,%xmm3");
140 asm volatile("pxor %xmm5,%xmm4");
141 asm volatile("pxor %xmm7,%xmm6");
142 asm volatile("pxor %xmm5,%xmm5");
143 asm volatile("pxor %xmm7,%xmm7");
144 }
145 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
146 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
147 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
148 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
149 }
150
151 asm volatile("sfence" : : : "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse2x2 = {
156 raid6_sse22_gen_syndrome,
157 raid6_have_sse2,
158 "sse2x2",
159 1 /* Has cache hints */
160};
161
162#endif
163
164#if defined(__x86_64__) && !defined(__arch_um__)
165
166/*
167 * Unrolled-by-4 SSE2 implementation
168 */
169static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
170{
171 u8 **dptr = (u8 **)ptrs;
172 u8 *p, *q;
173 int d, z, z0;
174
175 z0 = disks - 3; /* Highest data disk */
176 p = dptr[z0+1]; /* XOR parity */
177 q = dptr[z0+2]; /* RS syndrome */
178
179 kernel_fpu_begin();
180
181 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
182 asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
183 asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
184 asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
185 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
186 asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
187 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
188 asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
189 asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
190 asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
191 asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
192 asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
193 asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
194
195 for ( d = 0 ; d < bytes ; d += 64 ) {
196 for ( z = z0 ; z >= 0 ; z-- ) {
197 /* The second prefetch seems to improve performance... */
198 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
199 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
200 asm volatile("pcmpgtb %xmm4,%xmm5");
201 asm volatile("pcmpgtb %xmm6,%xmm7");
202 asm volatile("pcmpgtb %xmm12,%xmm13");
203 asm volatile("pcmpgtb %xmm14,%xmm15");
204 asm volatile("paddb %xmm4,%xmm4");
205 asm volatile("paddb %xmm6,%xmm6");
206 asm volatile("paddb %xmm12,%xmm12");
207 asm volatile("paddb %xmm14,%xmm14");
208 asm volatile("pand %xmm0,%xmm5");
209 asm volatile("pand %xmm0,%xmm7");
210 asm volatile("pand %xmm0,%xmm13");
211 asm volatile("pand %xmm0,%xmm15");
212 asm volatile("pxor %xmm5,%xmm4");
213 asm volatile("pxor %xmm7,%xmm6");
214 asm volatile("pxor %xmm13,%xmm12");
215 asm volatile("pxor %xmm15,%xmm14");
216 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
217 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
218 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
219 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
220 asm volatile("pxor %xmm5,%xmm2");
221 asm volatile("pxor %xmm7,%xmm3");
222 asm volatile("pxor %xmm13,%xmm10");
223 asm volatile("pxor %xmm15,%xmm11");
224 asm volatile("pxor %xmm5,%xmm4");
225 asm volatile("pxor %xmm7,%xmm6");
226 asm volatile("pxor %xmm13,%xmm12");
227 asm volatile("pxor %xmm15,%xmm14");
228 asm volatile("pxor %xmm5,%xmm5");
229 asm volatile("pxor %xmm7,%xmm7");
230 asm volatile("pxor %xmm13,%xmm13");
231 asm volatile("pxor %xmm15,%xmm15");
232 }
233 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
234 asm volatile("pxor %xmm2,%xmm2");
235 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
236 asm volatile("pxor %xmm3,%xmm3");
237 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
238 asm volatile("pxor %xmm10,%xmm10");
239 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
240 asm volatile("pxor %xmm11,%xmm11");
241 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
242 asm volatile("pxor %xmm4,%xmm4");
243 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
244 asm volatile("pxor %xmm6,%xmm6");
245 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
246 asm volatile("pxor %xmm12,%xmm12");
247 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
248 asm volatile("pxor %xmm14,%xmm14");
249 }
250
251 asm volatile("sfence" : : : "memory");
252 kernel_fpu_end();
253}
254
255const struct raid6_calls raid6_sse2x4 = {
256 raid6_sse24_gen_syndrome,
257 raid6_have_sse2,
258 "sse2x4",
259 1 /* Has cache hints */
260};
261
262#endif
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile
deleted file mode 100644
index 2874cbef529d..000000000000
--- a/drivers/md/raid6test/Makefile
+++ /dev/null
@@ -1,75 +0,0 @@
1#
2# This is a simple Makefile to test some of the RAID-6 code
3# from userspace.
4#
5
6CC = gcc
7OPTFLAGS = -O2 # Adjust as desired
8CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9LD = ld
10AWK = awk
11AR = ar
12RANLIB = ranlib
13
14.c.o:
15 $(CC) $(CFLAGS) -c -o $@ $<
16
17%.c: ../%.c
18 cp -f $< $@
19
20%.uc: ../%.uc
21 cp -f $< $@
22
23all: raid6.a raid6test
24
25raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \
26 raid6int32.o \
27 raid6mmx.o raid6sse1.o raid6sse2.o \
28 raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \
29 raid6recov.o raid6algos.o \
30 raid6tables.o
31 rm -f $@
32 $(AR) cq $@ $^
33 $(RANLIB) $@
34
35raid6test: test.c raid6.a
36 $(CC) $(CFLAGS) -o raid6test $^
37
38raid6altivec1.c: raid6altivec.uc ../unroll.awk
39 $(AWK) ../unroll.awk -vN=1 < raid6altivec.uc > $@
40
41raid6altivec2.c: raid6altivec.uc ../unroll.awk
42 $(AWK) ../unroll.awk -vN=2 < raid6altivec.uc > $@
43
44raid6altivec4.c: raid6altivec.uc ../unroll.awk
45 $(AWK) ../unroll.awk -vN=4 < raid6altivec.uc > $@
46
47raid6altivec8.c: raid6altivec.uc ../unroll.awk
48 $(AWK) ../unroll.awk -vN=8 < raid6altivec.uc > $@
49
50raid6int1.c: raid6int.uc ../unroll.awk
51 $(AWK) ../unroll.awk -vN=1 < raid6int.uc > $@
52
53raid6int2.c: raid6int.uc ../unroll.awk
54 $(AWK) ../unroll.awk -vN=2 < raid6int.uc > $@
55
56raid6int4.c: raid6int.uc ../unroll.awk
57 $(AWK) ../unroll.awk -vN=4 < raid6int.uc > $@
58
59raid6int8.c: raid6int.uc ../unroll.awk
60 $(AWK) ../unroll.awk -vN=8 < raid6int.uc > $@
61
62raid6int16.c: raid6int.uc ../unroll.awk
63 $(AWK) ../unroll.awk -vN=16 < raid6int.uc > $@
64
65raid6int32.c: raid6int.uc ../unroll.awk
66 $(AWK) ../unroll.awk -vN=32 < raid6int.uc > $@
67
68raid6tables.c: mktables
69 ./mktables > raid6tables.c
70
71clean:
72 rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test
73
74spotless: clean
75 rm -f *~
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c
deleted file mode 100644
index 7a930318b17d..000000000000
--- a/drivers/md/raid6test/test.c
+++ /dev/null
@@ -1,124 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * raid6test.c
13 *
14 * Test RAID-6 recovery with various algorithms
15 */
16
17#include <stdlib.h>
18#include <stdio.h>
19#include <string.h>
20#include <linux/raid/pq.h>
21
22#define NDISKS 16 /* Including P and Q */
23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
25struct raid6_calls raid6_call;
26
27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30
31static void makedata(void)
32{
33 int i, j;
34
35 for (i = 0; i < NDISKS; i++) {
36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand();
38
39 dataptrs[i] = data[i];
40 }
41}
42
43static char disk_type(int d)
44{
45 switch (d) {
46 case NDISKS-2:
47 return 'P';
48 case NDISKS-1:
49 return 'Q';
50 default:
51 return 'D';
52 }
53}
54
55static int test_disks(int i, int j)
56{
57 int erra, errb;
58
59 memset(recovi, 0xf0, PAGE_SIZE);
60 memset(recovj, 0xba, PAGE_SIZE);
61
62 dataptrs[i] = recovi;
63 dataptrs[j] = recovj;
64
65 raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
66
67 erra = memcmp(data[i], recovi, PAGE_SIZE);
68 errb = memcmp(data[j], recovj, PAGE_SIZE);
69
70 if (i < NDISKS-2 && j == NDISKS-1) {
71 /* We don't implement the DQ failure scenario, since it's
72 equivalent to a RAID-5 failure (XOR, then recompute Q) */
73 erra = errb = 0;
74 } else {
75 printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
76 raid6_call.name,
77 i, disk_type(i),
78 j, disk_type(j),
79 (!erra && !errb) ? "OK" :
80 !erra ? "ERRB" :
81 !errb ? "ERRA" : "ERRAB");
82 }
83
84 dataptrs[i] = data[i];
85 dataptrs[j] = data[j];
86
87 return erra || errb;
88}
89
90int main(int argc, char *argv[])
91{
92 const struct raid6_calls *const *algo;
93 int i, j;
94 int err = 0;
95
96 makedata();
97
98 for (algo = raid6_algos; *algo; algo++) {
99 if (!(*algo)->valid || (*algo)->valid()) {
100 raid6_call = **algo;
101
102 /* Nuke syndromes */
103 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
104
105 /* Generate assumed good syndrome */
106 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
107 (void **)&dataptrs);
108
109 for (i = 0; i < NDISKS-1; i++)
110 for (j = i+1; j < NDISKS; j++)
111 err += test_disks(i, j);
112 }
113 printf("\n");
114 }
115
116 printf("\n");
117 /* Pick the best algorithm test */
118 raid6_select_algo();
119
120 if (err)
121 printf("\n*** ERRORS FOUND ***\n");
122
123 return err;
124}
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
deleted file mode 100644
index 4c22c1568558..000000000000
--- a/drivers/md/raid6x86.h
+++ /dev/null
@@ -1,61 +0,0 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6x86.h
15 *
16 * Definitions common to x86 and x86-64 RAID-6 code only
17 */
18
19#ifndef LINUX_RAID_RAID6X86_H
20#define LINUX_RAID_RAID6X86_H
21
22#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
23
24#ifdef __KERNEL__ /* Real code */
25
26#include <asm/i387.h>
27
28#else /* Dummy code for user space testing */
29
30static inline void kernel_fpu_begin(void)
31{
32}
33
34static inline void kernel_fpu_end(void)
35{
36}
37
38#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
39#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
40 * (fast save and restore) */
41#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
42#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
43#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
44
45/* Should work well enough on modern CPUs for testing */
46static inline int boot_cpu_has(int flag)
47{
48 u32 eax = (flag >> 5) ? 0x80000001 : 1;
49 u32 edx;
50
51 asm volatile("cpuid"
52 : "+a" (eax), "=d" (edx)
53 : : "ecx", "ebx");
54
55 return (edx >> (flag & 31)) & 1;
56}
57
58#endif /* ndef __KERNEL__ */
59
60#endif
61#endif
diff --git a/drivers/md/unroll.awk b/drivers/md/unroll.awk
deleted file mode 100644
index c6aa03631df8..000000000000
--- a/drivers/md/unroll.awk
+++ /dev/null
@@ -1,20 +0,0 @@
1
2# This filter requires one command line option of form -vN=n
3# where n must be a decimal number.
4#
5# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
6# Replace each $# with n, and each $* with a single $.
7
8BEGIN {
9 n = N + 0
10}
11{
12 if (/\$\$/) { rep = n } else { rep = 1 }
13 for (i = 0; i < rep; ++i) {
14 tmp = $0
15 gsub(/\$\$/, i, tmp)
16 gsub(/\$\#/, n, tmp)
17 gsub(/\$\*/, "$", tmp)
18 print tmp
19 }
20}