diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
commit | 3d30701b58970425e1d45994d6cb82f828924fdd (patch) | |
tree | 8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md | |
parent | 8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff) | |
parent | fd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits)
md: clean up do_md_stop
md: fix another deadlock with removing sysfs attributes.
md: move revalidate_disk() back outside open_mutex
md/raid10: fix deadlock with unaligned read during resync
md/bitmap: separate out loading a bitmap from initialising the structures.
md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
md/bitmap: optimise scanning of empty bitmaps.
md/bitmap: clean up plugging calls.
md/bitmap: reduce dependence on sysfs.
md/bitmap: white space clean up and similar.
md/raid5: export raid5 unplugging interface.
md/plug: optionally use plugger to unplug an array during resync/recovery.
md/raid5: add simple plugging infrastructure.
md/raid5: export is_congested test
raid5: Don't set read-ahead when there is no queue
md: add support for raising dm events.
md: export various start/stop interfaces
md: split out md_rdev_init
md: be more careful setting MD_CHANGE_CLEAN
md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 18 | ||||
-rw-r--r-- | drivers/md/Makefile | 77 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 508 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 286 | ||||
-rw-r--r-- | drivers/md/md.h | 55 | ||||
-rw-r--r-- | drivers/md/mktables.c | 132 | ||||
-rw-r--r-- | drivers/md/raid10.c | 18 | ||||
-rw-r--r-- | drivers/md/raid5.c | 168 | ||||
-rw-r--r-- | drivers/md/raid5.h | 9 | ||||
-rw-r--r-- | drivers/md/raid6algos.c | 154 | ||||
-rw-r--r-- | drivers/md/raid6altivec.uc | 130 | ||||
-rw-r--r-- | drivers/md/raid6int.uc | 117 | ||||
-rw-r--r-- | drivers/md/raid6mmx.c | 142 | ||||
-rw-r--r-- | drivers/md/raid6recov.c | 132 | ||||
-rw-r--r-- | drivers/md/raid6sse1.c | 162 | ||||
-rw-r--r-- | drivers/md/raid6sse2.c | 262 | ||||
-rw-r--r-- | drivers/md/raid6test/Makefile | 75 | ||||
-rw-r--r-- | drivers/md/raid6test/test.c | 124 | ||||
-rw-r--r-- | drivers/md/raid6x86.h | 61 | ||||
-rw-r--r-- | drivers/md/unroll.awk | 20 |
21 files changed, 644 insertions, 2012 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a6feac8c94a..bf1a95e31559 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -121,7 +121,7 @@ config MD_RAID10 | |||
121 | config MD_RAID456 | 121 | config MD_RAID456 |
122 | tristate "RAID-4/RAID-5/RAID-6 mode" | 122 | tristate "RAID-4/RAID-5/RAID-6 mode" |
123 | depends on BLK_DEV_MD | 123 | depends on BLK_DEV_MD |
124 | select MD_RAID6_PQ | 124 | select RAID6_PQ |
125 | select ASYNC_MEMCPY | 125 | select ASYNC_MEMCPY |
126 | select ASYNC_XOR | 126 | select ASYNC_XOR |
127 | select ASYNC_PQ | 127 | select ASYNC_PQ |
@@ -165,22 +165,6 @@ config MULTICORE_RAID456 | |||
165 | 165 | ||
166 | If unsure, say N. | 166 | If unsure, say N. |
167 | 167 | ||
168 | config MD_RAID6_PQ | ||
169 | tristate | ||
170 | |||
171 | config ASYNC_RAID6_TEST | ||
172 | tristate "Self test for hardware accelerated raid6 recovery" | ||
173 | depends on MD_RAID6_PQ | ||
174 | select ASYNC_RAID6_RECOV | ||
175 | ---help--- | ||
176 | This is a one-shot self test that permutes through the | ||
177 | recovery of all the possible two disk failure scenarios for a | ||
178 | N-disk array. Recovery is performed with the asynchronous | ||
179 | raid6 recovery routines, and will optionally use an offload | ||
180 | engine if one is available. | ||
181 | |||
182 | If unsure, say N. | ||
183 | |||
184 | config MD_MULTIPATH | 168 | config MD_MULTIPATH |
185 | tristate "Multipath I/O support" | 169 | tristate "Multipath I/O support" |
186 | depends on BLK_DEV_MD | 170 | depends on BLK_DEV_MD |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e355e7f6a536..5e3aac41919d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -12,13 +12,6 @@ dm-log-userspace-y \ | |||
12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o | 12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o |
13 | md-mod-y += md.o bitmap.o | 13 | md-mod-y += md.o bitmap.o |
14 | raid456-y += raid5.o | 14 | raid456-y += raid5.o |
15 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | ||
16 | raid6int1.o raid6int2.o raid6int4.o \ | ||
17 | raid6int8.o raid6int16.o raid6int32.o \ | ||
18 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | ||
19 | raid6altivec8.o \ | ||
20 | raid6mmx.o raid6sse1.o raid6sse2.o | ||
21 | hostprogs-y += mktables | ||
22 | 15 | ||
23 | # Note: link order is important. All raid personalities | 16 | # Note: link order is important. All raid personalities |
24 | # and must come before md.o, as they each initialise | 17 | # and must come before md.o, as they each initialise |
@@ -29,7 +22,6 @@ obj-$(CONFIG_MD_LINEAR) += linear.o | |||
29 | obj-$(CONFIG_MD_RAID0) += raid0.o | 22 | obj-$(CONFIG_MD_RAID0) += raid0.o |
30 | obj-$(CONFIG_MD_RAID1) += raid1.o | 23 | obj-$(CONFIG_MD_RAID1) += raid1.o |
31 | obj-$(CONFIG_MD_RAID10) += raid10.o | 24 | obj-$(CONFIG_MD_RAID10) += raid10.o |
32 | obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o | ||
33 | obj-$(CONFIG_MD_RAID456) += raid456.o | 25 | obj-$(CONFIG_MD_RAID456) += raid456.o |
34 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o | 26 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o |
35 | obj-$(CONFIG_MD_FAULTY) += faulty.o | 27 | obj-$(CONFIG_MD_FAULTY) += faulty.o |
@@ -45,75 +37,6 @@ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | |||
45 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o | 37 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o |
46 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
47 | 39 | ||
48 | quiet_cmd_unroll = UNROLL $@ | ||
49 | cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ | ||
50 | < $< > $@ || ( rm -f $@ && exit 1 ) | ||
51 | |||
52 | ifeq ($(CONFIG_ALTIVEC),y) | ||
53 | altivec_flags := -maltivec -mabi=altivec | ||
54 | endif | ||
55 | |||
56 | ifeq ($(CONFIG_DM_UEVENT),y) | 40 | ifeq ($(CONFIG_DM_UEVENT),y) |
57 | dm-mod-objs += dm-uevent.o | 41 | dm-mod-objs += dm-uevent.o |
58 | endif | 42 | endif |
59 | |||
60 | targets += raid6int1.c | ||
61 | $(obj)/raid6int1.c: UNROLL := 1 | ||
62 | $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
63 | $(call if_changed,unroll) | ||
64 | |||
65 | targets += raid6int2.c | ||
66 | $(obj)/raid6int2.c: UNROLL := 2 | ||
67 | $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
68 | $(call if_changed,unroll) | ||
69 | |||
70 | targets += raid6int4.c | ||
71 | $(obj)/raid6int4.c: UNROLL := 4 | ||
72 | $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
73 | $(call if_changed,unroll) | ||
74 | |||
75 | targets += raid6int8.c | ||
76 | $(obj)/raid6int8.c: UNROLL := 8 | ||
77 | $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
78 | $(call if_changed,unroll) | ||
79 | |||
80 | targets += raid6int16.c | ||
81 | $(obj)/raid6int16.c: UNROLL := 16 | ||
82 | $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
83 | $(call if_changed,unroll) | ||
84 | |||
85 | targets += raid6int32.c | ||
86 | $(obj)/raid6int32.c: UNROLL := 32 | ||
87 | $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
88 | $(call if_changed,unroll) | ||
89 | |||
90 | CFLAGS_raid6altivec1.o += $(altivec_flags) | ||
91 | targets += raid6altivec1.c | ||
92 | $(obj)/raid6altivec1.c: UNROLL := 1 | ||
93 | $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
94 | $(call if_changed,unroll) | ||
95 | |||
96 | CFLAGS_raid6altivec2.o += $(altivec_flags) | ||
97 | targets += raid6altivec2.c | ||
98 | $(obj)/raid6altivec2.c: UNROLL := 2 | ||
99 | $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
100 | $(call if_changed,unroll) | ||
101 | |||
102 | CFLAGS_raid6altivec4.o += $(altivec_flags) | ||
103 | targets += raid6altivec4.c | ||
104 | $(obj)/raid6altivec4.c: UNROLL := 4 | ||
105 | $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
106 | $(call if_changed,unroll) | ||
107 | |||
108 | CFLAGS_raid6altivec8.o += $(altivec_flags) | ||
109 | targets += raid6altivec8.c | ||
110 | $(obj)/raid6altivec8.c: UNROLL := 8 | ||
111 | $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
112 | $(call if_changed,unroll) | ||
113 | |||
114 | quiet_cmd_mktable = TABLE $@ | ||
115 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | ||
116 | |||
117 | targets += raid6tables.c | ||
118 | $(obj)/raid6tables.c: $(obj)/mktables FORCE | ||
119 | $(call if_changed,mktable) | ||
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1742435ce3ae..1ba1e122e948 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -13,7 +13,6 @@ | |||
13 | * Still to do: | 13 | * Still to do: |
14 | * | 14 | * |
15 | * flush after percent set rather than just time based. (maybe both). | 15 | * flush after percent set rather than just time based. (maybe both). |
16 | * wait if count gets too high, wake when it drops to half. | ||
17 | */ | 16 | */ |
18 | 17 | ||
19 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
@@ -30,6 +29,7 @@ | |||
30 | #include "md.h" | 29 | #include "md.h" |
31 | #include "bitmap.h" | 30 | #include "bitmap.h" |
32 | 31 | ||
32 | #include <linux/dm-dirty-log.h> | ||
33 | /* debug macros */ | 33 | /* debug macros */ |
34 | 34 | ||
35 | #define DEBUG 0 | 35 | #define DEBUG 0 |
@@ -51,9 +51,6 @@ | |||
51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ | 51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ | ||
55 | #define DPRINTK(x...) do { } while(0) | ||
56 | |||
57 | #ifndef PRINTK | 54 | #ifndef PRINTK |
58 | # if DEBUG > 0 | 55 | # if DEBUG > 0 |
59 | # define PRINTK(x...) printk(KERN_DEBUG x) | 56 | # define PRINTK(x...) printk(KERN_DEBUG x) |
@@ -62,12 +59,11 @@ | |||
62 | # endif | 59 | # endif |
63 | #endif | 60 | #endif |
64 | 61 | ||
65 | static inline char * bmname(struct bitmap *bitmap) | 62 | static inline char *bmname(struct bitmap *bitmap) |
66 | { | 63 | { |
67 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; | 64 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; |
68 | } | 65 | } |
69 | 66 | ||
70 | |||
71 | /* | 67 | /* |
72 | * just a placeholder - calls kmalloc for bitmap pages | 68 | * just a placeholder - calls kmalloc for bitmap pages |
73 | */ | 69 | */ |
@@ -78,7 +74,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | |||
78 | #ifdef INJECT_FAULTS_1 | 74 | #ifdef INJECT_FAULTS_1 |
79 | page = NULL; | 75 | page = NULL; |
80 | #else | 76 | #else |
81 | page = kmalloc(PAGE_SIZE, GFP_NOIO); | 77 | page = kzalloc(PAGE_SIZE, GFP_NOIO); |
82 | #endif | 78 | #endif |
83 | if (!page) | 79 | if (!page) |
84 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | 80 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); |
@@ -107,7 +103,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | |||
107 | * if we find our page, we increment the page's refcount so that it stays | 103 | * if we find our page, we increment the page's refcount so that it stays |
108 | * allocated while we're using it | 104 | * allocated while we're using it |
109 | */ | 105 | */ |
110 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) | 106 | static int bitmap_checkpage(struct bitmap *bitmap, |
107 | unsigned long page, int create) | ||
111 | __releases(bitmap->lock) | 108 | __releases(bitmap->lock) |
112 | __acquires(bitmap->lock) | 109 | __acquires(bitmap->lock) |
113 | { | 110 | { |
@@ -121,7 +118,6 @@ __acquires(bitmap->lock) | |||
121 | return -EINVAL; | 118 | return -EINVAL; |
122 | } | 119 | } |
123 | 120 | ||
124 | |||
125 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ | 121 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ |
126 | return 0; | 122 | return 0; |
127 | 123 | ||
@@ -131,43 +127,34 @@ __acquires(bitmap->lock) | |||
131 | if (!create) | 127 | if (!create) |
132 | return -ENOENT; | 128 | return -ENOENT; |
133 | 129 | ||
134 | spin_unlock_irq(&bitmap->lock); | ||
135 | |||
136 | /* this page has not been allocated yet */ | 130 | /* this page has not been allocated yet */ |
137 | 131 | ||
138 | if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { | 132 | spin_unlock_irq(&bitmap->lock); |
133 | mappage = bitmap_alloc_page(bitmap); | ||
134 | spin_lock_irq(&bitmap->lock); | ||
135 | |||
136 | if (mappage == NULL) { | ||
139 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", | 137 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", |
140 | bmname(bitmap)); | 138 | bmname(bitmap)); |
141 | /* failed - set the hijacked flag so that we can use the | 139 | /* failed - set the hijacked flag so that we can use the |
142 | * pointer as a counter */ | 140 | * pointer as a counter */ |
143 | spin_lock_irq(&bitmap->lock); | ||
144 | if (!bitmap->bp[page].map) | 141 | if (!bitmap->bp[page].map) |
145 | bitmap->bp[page].hijacked = 1; | 142 | bitmap->bp[page].hijacked = 1; |
146 | goto out; | 143 | } else if (bitmap->bp[page].map || |
147 | } | 144 | bitmap->bp[page].hijacked) { |
148 | |||
149 | /* got a page */ | ||
150 | |||
151 | spin_lock_irq(&bitmap->lock); | ||
152 | |||
153 | /* recheck the page */ | ||
154 | |||
155 | if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { | ||
156 | /* somebody beat us to getting the page */ | 145 | /* somebody beat us to getting the page */ |
157 | bitmap_free_page(bitmap, mappage); | 146 | bitmap_free_page(bitmap, mappage); |
158 | return 0; | 147 | return 0; |
159 | } | 148 | } else { |
160 | 149 | ||
161 | /* no page was in place and we have one, so install it */ | 150 | /* no page was in place and we have one, so install it */ |
162 | 151 | ||
163 | memset(mappage, 0, PAGE_SIZE); | 152 | bitmap->bp[page].map = mappage; |
164 | bitmap->bp[page].map = mappage; | 153 | bitmap->missing_pages--; |
165 | bitmap->missing_pages--; | 154 | } |
166 | out: | ||
167 | return 0; | 155 | return 0; |
168 | } | 156 | } |
169 | 157 | ||
170 | |||
171 | /* if page is completely empty, put it back on the free list, or dealloc it */ | 158 | /* if page is completely empty, put it back on the free list, or dealloc it */ |
172 | /* if page was hijacked, unmark the flag so it might get alloced next time */ | 159 | /* if page was hijacked, unmark the flag so it might get alloced next time */ |
173 | /* Note: lock should be held when calling this */ | 160 | /* Note: lock should be held when calling this */ |
@@ -183,26 +170,15 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
183 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ | 170 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ |
184 | bitmap->bp[page].hijacked = 0; | 171 | bitmap->bp[page].hijacked = 0; |
185 | bitmap->bp[page].map = NULL; | 172 | bitmap->bp[page].map = NULL; |
186 | return; | 173 | } else { |
174 | /* normal case, free the page */ | ||
175 | ptr = bitmap->bp[page].map; | ||
176 | bitmap->bp[page].map = NULL; | ||
177 | bitmap->missing_pages++; | ||
178 | bitmap_free_page(bitmap, ptr); | ||
187 | } | 179 | } |
188 | |||
189 | /* normal case, free the page */ | ||
190 | |||
191 | #if 0 | ||
192 | /* actually ... let's not. We will probably need the page again exactly when | ||
193 | * memory is tight and we are flusing to disk | ||
194 | */ | ||
195 | return; | ||
196 | #else | ||
197 | ptr = bitmap->bp[page].map; | ||
198 | bitmap->bp[page].map = NULL; | ||
199 | bitmap->missing_pages++; | ||
200 | bitmap_free_page(bitmap, ptr); | ||
201 | return; | ||
202 | #endif | ||
203 | } | 180 | } |
204 | 181 | ||
205 | |||
206 | /* | 182 | /* |
207 | * bitmap file handling - read and write the bitmap file and its superblock | 183 | * bitmap file handling - read and write the bitmap file and its superblock |
208 | */ | 184 | */ |
@@ -220,11 +196,14 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
220 | 196 | ||
221 | mdk_rdev_t *rdev; | 197 | mdk_rdev_t *rdev; |
222 | sector_t target; | 198 | sector_t target; |
199 | int did_alloc = 0; | ||
223 | 200 | ||
224 | if (!page) | 201 | if (!page) { |
225 | page = alloc_page(GFP_KERNEL); | 202 | page = alloc_page(GFP_KERNEL); |
226 | if (!page) | 203 | if (!page) |
227 | return ERR_PTR(-ENOMEM); | 204 | return ERR_PTR(-ENOMEM); |
205 | did_alloc = 1; | ||
206 | } | ||
228 | 207 | ||
229 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 208 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
230 | if (! test_bit(In_sync, &rdev->flags) | 209 | if (! test_bit(In_sync, &rdev->flags) |
@@ -242,6 +221,8 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
242 | return page; | 221 | return page; |
243 | } | 222 | } |
244 | } | 223 | } |
224 | if (did_alloc) | ||
225 | put_page(page); | ||
245 | return ERR_PTR(-EIO); | 226 | return ERR_PTR(-EIO); |
246 | 227 | ||
247 | } | 228 | } |
@@ -286,49 +267,51 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
286 | mddev_t *mddev = bitmap->mddev; | 267 | mddev_t *mddev = bitmap->mddev; |
287 | 268 | ||
288 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { | 269 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { |
289 | int size = PAGE_SIZE; | 270 | int size = PAGE_SIZE; |
290 | loff_t offset = mddev->bitmap_info.offset; | 271 | loff_t offset = mddev->bitmap_info.offset; |
291 | if (page->index == bitmap->file_pages-1) | 272 | if (page->index == bitmap->file_pages-1) |
292 | size = roundup(bitmap->last_page_size, | 273 | size = roundup(bitmap->last_page_size, |
293 | bdev_logical_block_size(rdev->bdev)); | 274 | bdev_logical_block_size(rdev->bdev)); |
294 | /* Just make sure we aren't corrupting data or | 275 | /* Just make sure we aren't corrupting data or |
295 | * metadata | 276 | * metadata |
296 | */ | 277 | */ |
297 | if (mddev->external) { | 278 | if (mddev->external) { |
298 | /* Bitmap could be anywhere. */ | 279 | /* Bitmap could be anywhere. */ |
299 | if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > | 280 | if (rdev->sb_start + offset + (page->index |
300 | rdev->data_offset && | 281 | * (PAGE_SIZE/512)) |
301 | rdev->sb_start + offset < | 282 | > rdev->data_offset |
302 | rdev->data_offset + mddev->dev_sectors + | 283 | && |
303 | (PAGE_SIZE/512)) | 284 | rdev->sb_start + offset |
304 | goto bad_alignment; | 285 | < (rdev->data_offset + mddev->dev_sectors |
305 | } else if (offset < 0) { | 286 | + (PAGE_SIZE/512))) |
306 | /* DATA BITMAP METADATA */ | 287 | goto bad_alignment; |
307 | if (offset | 288 | } else if (offset < 0) { |
308 | + (long)(page->index * (PAGE_SIZE/512)) | 289 | /* DATA BITMAP METADATA */ |
309 | + size/512 > 0) | 290 | if (offset |
310 | /* bitmap runs in to metadata */ | 291 | + (long)(page->index * (PAGE_SIZE/512)) |
311 | goto bad_alignment; | 292 | + size/512 > 0) |
312 | if (rdev->data_offset + mddev->dev_sectors | 293 | /* bitmap runs in to metadata */ |
313 | > rdev->sb_start + offset) | 294 | goto bad_alignment; |
314 | /* data runs in to bitmap */ | 295 | if (rdev->data_offset + mddev->dev_sectors |
315 | goto bad_alignment; | 296 | > rdev->sb_start + offset) |
316 | } else if (rdev->sb_start < rdev->data_offset) { | 297 | /* data runs in to bitmap */ |
317 | /* METADATA BITMAP DATA */ | 298 | goto bad_alignment; |
318 | if (rdev->sb_start | 299 | } else if (rdev->sb_start < rdev->data_offset) { |
319 | + offset | 300 | /* METADATA BITMAP DATA */ |
320 | + page->index*(PAGE_SIZE/512) + size/512 | 301 | if (rdev->sb_start |
321 | > rdev->data_offset) | 302 | + offset |
322 | /* bitmap runs in to data */ | 303 | + page->index*(PAGE_SIZE/512) + size/512 |
323 | goto bad_alignment; | 304 | > rdev->data_offset) |
324 | } else { | 305 | /* bitmap runs in to data */ |
325 | /* DATA METADATA BITMAP - no problems */ | 306 | goto bad_alignment; |
326 | } | 307 | } else { |
327 | md_super_write(mddev, rdev, | 308 | /* DATA METADATA BITMAP - no problems */ |
328 | rdev->sb_start + offset | 309 | } |
329 | + page->index * (PAGE_SIZE/512), | 310 | md_super_write(mddev, rdev, |
330 | size, | 311 | rdev->sb_start + offset |
331 | page); | 312 | + page->index * (PAGE_SIZE/512), |
313 | size, | ||
314 | page); | ||
332 | } | 315 | } |
333 | 316 | ||
334 | if (wait) | 317 | if (wait) |
@@ -364,10 +347,9 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) | |||
364 | bh = bh->b_this_page; | 347 | bh = bh->b_this_page; |
365 | } | 348 | } |
366 | 349 | ||
367 | if (wait) { | 350 | if (wait) |
368 | wait_event(bitmap->write_wait, | 351 | wait_event(bitmap->write_wait, |
369 | atomic_read(&bitmap->pending_writes)==0); | 352 | atomic_read(&bitmap->pending_writes)==0); |
370 | } | ||
371 | } | 353 | } |
372 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 354 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
373 | bitmap_file_kick(bitmap); | 355 | bitmap_file_kick(bitmap); |
@@ -424,7 +406,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
424 | struct buffer_head *bh; | 406 | struct buffer_head *bh; |
425 | sector_t block; | 407 | sector_t block; |
426 | 408 | ||
427 | PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, | 409 | PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, |
428 | (unsigned long long)index << PAGE_SHIFT); | 410 | (unsigned long long)index << PAGE_SHIFT); |
429 | 411 | ||
430 | page = alloc_page(GFP_KERNEL); | 412 | page = alloc_page(GFP_KERNEL); |
@@ -478,7 +460,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
478 | } | 460 | } |
479 | out: | 461 | out: |
480 | if (IS_ERR(page)) | 462 | if (IS_ERR(page)) |
481 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", | 463 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", |
482 | (int)PAGE_SIZE, | 464 | (int)PAGE_SIZE, |
483 | (unsigned long long)index << PAGE_SHIFT, | 465 | (unsigned long long)index << PAGE_SHIFT, |
484 | PTR_ERR(page)); | 466 | PTR_ERR(page)); |
@@ -664,11 +646,14 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
664 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); | 646 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); |
665 | old = le32_to_cpu(sb->state) & bits; | 647 | old = le32_to_cpu(sb->state) & bits; |
666 | switch (op) { | 648 | switch (op) { |
667 | case MASK_SET: sb->state |= cpu_to_le32(bits); | 649 | case MASK_SET: |
668 | break; | 650 | sb->state |= cpu_to_le32(bits); |
669 | case MASK_UNSET: sb->state &= cpu_to_le32(~bits); | 651 | break; |
670 | break; | 652 | case MASK_UNSET: |
671 | default: BUG(); | 653 | sb->state &= cpu_to_le32(~bits); |
654 | break; | ||
655 | default: | ||
656 | BUG(); | ||
672 | } | 657 | } |
673 | kunmap_atomic(sb, KM_USER0); | 658 | kunmap_atomic(sb, KM_USER0); |
674 | return old; | 659 | return old; |
@@ -710,12 +695,14 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon | |||
710 | static inline struct page *filemap_get_page(struct bitmap *bitmap, | 695 | static inline struct page *filemap_get_page(struct bitmap *bitmap, |
711 | unsigned long chunk) | 696 | unsigned long chunk) |
712 | { | 697 | { |
713 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; | 698 | if (bitmap->filemap == NULL) |
699 | return NULL; | ||
700 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) | ||
701 | return NULL; | ||
714 | return bitmap->filemap[file_page_index(bitmap, chunk) | 702 | return bitmap->filemap[file_page_index(bitmap, chunk) |
715 | - file_page_index(bitmap, 0)]; | 703 | - file_page_index(bitmap, 0)]; |
716 | } | 704 | } |
717 | 705 | ||
718 | |||
719 | static void bitmap_file_unmap(struct bitmap *bitmap) | 706 | static void bitmap_file_unmap(struct bitmap *bitmap) |
720 | { | 707 | { |
721 | struct page **map, *sb_page; | 708 | struct page **map, *sb_page; |
@@ -766,7 +753,6 @@ static void bitmap_file_put(struct bitmap *bitmap) | |||
766 | } | 753 | } |
767 | } | 754 | } |
768 | 755 | ||
769 | |||
770 | /* | 756 | /* |
771 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file | 757 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file |
772 | * then it is no longer reliable, so we stop using it and we mark the file | 758 | * then it is no longer reliable, so we stop using it and we mark the file |
@@ -785,7 +771,6 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
785 | ptr = d_path(&bitmap->file->f_path, path, | 771 | ptr = d_path(&bitmap->file->f_path, path, |
786 | PAGE_SIZE); | 772 | PAGE_SIZE); |
787 | 773 | ||
788 | |||
789 | printk(KERN_ALERT | 774 | printk(KERN_ALERT |
790 | "%s: kicking failed bitmap file %s from array!\n", | 775 | "%s: kicking failed bitmap file %s from array!\n", |
791 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); | 776 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); |
@@ -803,27 +788,36 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
803 | } | 788 | } |
804 | 789 | ||
805 | enum bitmap_page_attr { | 790 | enum bitmap_page_attr { |
806 | BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced | 791 | BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ |
807 | BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared | 792 | BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ |
808 | BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced | 793 | BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ |
809 | }; | 794 | }; |
810 | 795 | ||
811 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, | 796 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, |
812 | enum bitmap_page_attr attr) | 797 | enum bitmap_page_attr attr) |
813 | { | 798 | { |
814 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | 799 | if (page) |
800 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
801 | else | ||
802 | __set_bit(attr, &bitmap->logattrs); | ||
815 | } | 803 | } |
816 | 804 | ||
817 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, | 805 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, |
818 | enum bitmap_page_attr attr) | 806 | enum bitmap_page_attr attr) |
819 | { | 807 | { |
820 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | 808 | if (page) |
809 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
810 | else | ||
811 | __clear_bit(attr, &bitmap->logattrs); | ||
821 | } | 812 | } |
822 | 813 | ||
823 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, | 814 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, |
824 | enum bitmap_page_attr attr) | 815 | enum bitmap_page_attr attr) |
825 | { | 816 | { |
826 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | 817 | if (page) |
818 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
819 | else | ||
820 | return test_bit(attr, &bitmap->logattrs); | ||
827 | } | 821 | } |
828 | 822 | ||
829 | /* | 823 | /* |
@@ -836,30 +830,32 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p | |||
836 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | 830 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) |
837 | { | 831 | { |
838 | unsigned long bit; | 832 | unsigned long bit; |
839 | struct page *page; | 833 | struct page *page = NULL; |
840 | void *kaddr; | 834 | void *kaddr; |
841 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | 835 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); |
842 | 836 | ||
843 | if (!bitmap->filemap) { | 837 | if (!bitmap->filemap) { |
844 | return; | 838 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; |
845 | } | 839 | if (log) |
846 | 840 | log->type->mark_region(log, chunk); | |
847 | page = filemap_get_page(bitmap, chunk); | 841 | } else { |
848 | if (!page) return; | ||
849 | bit = file_page_offset(bitmap, chunk); | ||
850 | 842 | ||
851 | /* set the bit */ | 843 | page = filemap_get_page(bitmap, chunk); |
852 | kaddr = kmap_atomic(page, KM_USER0); | 844 | if (!page) |
853 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 845 | return; |
854 | set_bit(bit, kaddr); | 846 | bit = file_page_offset(bitmap, chunk); |
855 | else | ||
856 | ext2_set_bit(bit, kaddr); | ||
857 | kunmap_atomic(kaddr, KM_USER0); | ||
858 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
859 | 847 | ||
848 | /* set the bit */ | ||
849 | kaddr = kmap_atomic(page, KM_USER0); | ||
850 | if (bitmap->flags & BITMAP_HOSTENDIAN) | ||
851 | set_bit(bit, kaddr); | ||
852 | else | ||
853 | ext2_set_bit(bit, kaddr); | ||
854 | kunmap_atomic(kaddr, KM_USER0); | ||
855 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
856 | } | ||
860 | /* record page number so it gets flushed to disk when unplug occurs */ | 857 | /* record page number so it gets flushed to disk when unplug occurs */ |
861 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); | 858 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); |
862 | |||
863 | } | 859 | } |
864 | 860 | ||
865 | /* this gets called when the md device is ready to unplug its underlying | 861 | /* this gets called when the md device is ready to unplug its underlying |
@@ -874,6 +870,16 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
874 | 870 | ||
875 | if (!bitmap) | 871 | if (!bitmap) |
876 | return; | 872 | return; |
873 | if (!bitmap->filemap) { | ||
874 | /* Must be using a dirty_log */ | ||
875 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; | ||
876 | dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); | ||
877 | need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); | ||
878 | if (dirty || need_write) | ||
879 | if (log->type->flush(log)) | ||
880 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
881 | goto out; | ||
882 | } | ||
877 | 883 | ||
878 | /* look at each page to see if there are any set bits that need to be | 884 | /* look at each page to see if there are any set bits that need to be |
879 | * flushed out to disk */ | 885 | * flushed out to disk */ |
@@ -892,7 +898,7 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
892 | wait = 1; | 898 | wait = 1; |
893 | spin_unlock_irqrestore(&bitmap->lock, flags); | 899 | spin_unlock_irqrestore(&bitmap->lock, flags); |
894 | 900 | ||
895 | if (dirty | need_write) | 901 | if (dirty || need_write) |
896 | write_page(bitmap, page, 0); | 902 | write_page(bitmap, page, 0); |
897 | } | 903 | } |
898 | if (wait) { /* if any writes were performed, we need to wait on them */ | 904 | if (wait) { /* if any writes were performed, we need to wait on them */ |
@@ -902,9 +908,11 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
902 | else | 908 | else |
903 | md_super_wait(bitmap->mddev); | 909 | md_super_wait(bitmap->mddev); |
904 | } | 910 | } |
911 | out: | ||
905 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 912 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
906 | bitmap_file_kick(bitmap); | 913 | bitmap_file_kick(bitmap); |
907 | } | 914 | } |
915 | EXPORT_SYMBOL(bitmap_unplug); | ||
908 | 916 | ||
909 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); | 917 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); |
910 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize | 918 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize |
@@ -943,12 +951,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
943 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " | 951 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " |
944 | "recovery\n", bmname(bitmap)); | 952 | "recovery\n", bmname(bitmap)); |
945 | 953 | ||
946 | bytes = (chunks + 7) / 8; | 954 | bytes = DIV_ROUND_UP(bitmap->chunks, 8); |
947 | if (!bitmap->mddev->bitmap_info.external) | 955 | if (!bitmap->mddev->bitmap_info.external) |
948 | bytes += sizeof(bitmap_super_t); | 956 | bytes += sizeof(bitmap_super_t); |
949 | 957 | ||
950 | 958 | num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
951 | num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
952 | 959 | ||
953 | if (file && i_size_read(file->f_mapping->host) < bytes) { | 960 | if (file && i_size_read(file->f_mapping->host) < bytes) { |
954 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", | 961 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", |
@@ -966,7 +973,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
966 | 973 | ||
967 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ | 974 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ |
968 | bitmap->filemap_attr = kzalloc( | 975 | bitmap->filemap_attr = kzalloc( |
969 | roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), | 976 | roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), |
970 | GFP_KERNEL); | 977 | GFP_KERNEL); |
971 | if (!bitmap->filemap_attr) | 978 | if (!bitmap->filemap_attr) |
972 | goto err; | 979 | goto err; |
@@ -1021,7 +1028,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1021 | if (outofdate) { | 1028 | if (outofdate) { |
1022 | /* | 1029 | /* |
1023 | * if bitmap is out of date, dirty the | 1030 | * if bitmap is out of date, dirty the |
1024 | * whole page and write it out | 1031 | * whole page and write it out |
1025 | */ | 1032 | */ |
1026 | paddr = kmap_atomic(page, KM_USER0); | 1033 | paddr = kmap_atomic(page, KM_USER0); |
1027 | memset(paddr + offset, 0xff, | 1034 | memset(paddr + offset, 0xff, |
@@ -1052,7 +1059,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1052 | } | 1059 | } |
1053 | } | 1060 | } |
1054 | 1061 | ||
1055 | /* everything went OK */ | 1062 | /* everything went OK */ |
1056 | ret = 0; | 1063 | ret = 0; |
1057 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); | 1064 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); |
1058 | 1065 | ||
@@ -1080,21 +1087,16 @@ void bitmap_write_all(struct bitmap *bitmap) | |||
1080 | */ | 1087 | */ |
1081 | int i; | 1088 | int i; |
1082 | 1089 | ||
1083 | for (i=0; i < bitmap->file_pages; i++) | 1090 | for (i = 0; i < bitmap->file_pages; i++) |
1084 | set_page_attr(bitmap, bitmap->filemap[i], | 1091 | set_page_attr(bitmap, bitmap->filemap[i], |
1085 | BITMAP_PAGE_NEEDWRITE); | 1092 | BITMAP_PAGE_NEEDWRITE); |
1086 | } | 1093 | } |
1087 | 1094 | ||
1088 | |||
1089 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | 1095 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) |
1090 | { | 1096 | { |
1091 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1097 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); |
1092 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1098 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1093 | bitmap->bp[page].count += inc; | 1099 | bitmap->bp[page].count += inc; |
1094 | /* | ||
1095 | if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", | ||
1096 | (unsigned long long)offset, inc, bitmap->bp[page].count); | ||
1097 | */ | ||
1098 | bitmap_checkfree(bitmap, page); | 1100 | bitmap_checkfree(bitmap, page); |
1099 | } | 1101 | } |
1100 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1102 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
@@ -1114,6 +1116,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1114 | struct page *page = NULL, *lastpage = NULL; | 1116 | struct page *page = NULL, *lastpage = NULL; |
1115 | int blocks; | 1117 | int blocks; |
1116 | void *paddr; | 1118 | void *paddr; |
1119 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
1117 | 1120 | ||
1118 | /* Use a mutex to guard daemon_work against | 1121 | /* Use a mutex to guard daemon_work against |
1119 | * bitmap_destroy. | 1122 | * bitmap_destroy. |
@@ -1138,11 +1141,12 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1138 | spin_lock_irqsave(&bitmap->lock, flags); | 1141 | spin_lock_irqsave(&bitmap->lock, flags); |
1139 | for (j = 0; j < bitmap->chunks; j++) { | 1142 | for (j = 0; j < bitmap->chunks; j++) { |
1140 | bitmap_counter_t *bmc; | 1143 | bitmap_counter_t *bmc; |
1141 | if (!bitmap->filemap) | 1144 | if (!bitmap->filemap) { |
1142 | /* error or shutdown */ | 1145 | if (!log) |
1143 | break; | 1146 | /* error or shutdown */ |
1144 | 1147 | break; | |
1145 | page = filemap_get_page(bitmap, j); | 1148 | } else |
1149 | page = filemap_get_page(bitmap, j); | ||
1146 | 1150 | ||
1147 | if (page != lastpage) { | 1151 | if (page != lastpage) { |
1148 | /* skip this page unless it's marked as needing cleaning */ | 1152 | /* skip this page unless it's marked as needing cleaning */ |
@@ -1197,14 +1201,11 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1197 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1201 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), |
1198 | &blocks, 0); | 1202 | &blocks, 0); |
1199 | if (bmc) { | 1203 | if (bmc) { |
1200 | /* | ||
1201 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | ||
1202 | */ | ||
1203 | if (*bmc) | 1204 | if (*bmc) |
1204 | bitmap->allclean = 0; | 1205 | bitmap->allclean = 0; |
1205 | 1206 | ||
1206 | if (*bmc == 2) { | 1207 | if (*bmc == 2) { |
1207 | *bmc=1; /* maybe clear the bit next time */ | 1208 | *bmc = 1; /* maybe clear the bit next time */ |
1208 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1209 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1209 | } else if (*bmc == 1 && !bitmap->need_sync) { | 1210 | } else if (*bmc == 1 && !bitmap->need_sync) { |
1210 | /* we can clear the bit */ | 1211 | /* we can clear the bit */ |
@@ -1214,14 +1215,17 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1214 | -1); | 1215 | -1); |
1215 | 1216 | ||
1216 | /* clear the bit */ | 1217 | /* clear the bit */ |
1217 | paddr = kmap_atomic(page, KM_USER0); | 1218 | if (page) { |
1218 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 1219 | paddr = kmap_atomic(page, KM_USER0); |
1219 | clear_bit(file_page_offset(bitmap, j), | 1220 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
1220 | paddr); | 1221 | clear_bit(file_page_offset(bitmap, j), |
1221 | else | 1222 | paddr); |
1222 | ext2_clear_bit(file_page_offset(bitmap, j), | 1223 | else |
1223 | paddr); | 1224 | ext2_clear_bit(file_page_offset(bitmap, j), |
1224 | kunmap_atomic(paddr, KM_USER0); | 1225 | paddr); |
1226 | kunmap_atomic(paddr, KM_USER0); | ||
1227 | } else | ||
1228 | log->type->clear_region(log, j); | ||
1225 | } | 1229 | } |
1226 | } else | 1230 | } else |
1227 | j |= PAGE_COUNTER_MASK; | 1231 | j |= PAGE_COUNTER_MASK; |
@@ -1229,12 +1233,16 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1229 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1233 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1230 | 1234 | ||
1231 | /* now sync the final page */ | 1235 | /* now sync the final page */ |
1232 | if (lastpage != NULL) { | 1236 | if (lastpage != NULL || log != NULL) { |
1233 | spin_lock_irqsave(&bitmap->lock, flags); | 1237 | spin_lock_irqsave(&bitmap->lock, flags); |
1234 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { | 1238 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { |
1235 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1239 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
1236 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1240 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1237 | write_page(bitmap, lastpage, 0); | 1241 | if (lastpage) |
1242 | write_page(bitmap, lastpage, 0); | ||
1243 | else | ||
1244 | if (log->type->flush(log)) | ||
1245 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
1238 | } else { | 1246 | } else { |
1239 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1247 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
1240 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1248 | spin_unlock_irqrestore(&bitmap->lock, flags); |
@@ -1243,7 +1251,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1243 | 1251 | ||
1244 | done: | 1252 | done: |
1245 | if (bitmap->allclean == 0) | 1253 | if (bitmap->allclean == 0) |
1246 | bitmap->mddev->thread->timeout = | 1254 | bitmap->mddev->thread->timeout = |
1247 | bitmap->mddev->bitmap_info.daemon_sleep; | 1255 | bitmap->mddev->bitmap_info.daemon_sleep; |
1248 | mutex_unlock(&mddev->bitmap_info.mutex); | 1256 | mutex_unlock(&mddev->bitmap_info.mutex); |
1249 | } | 1257 | } |
@@ -1262,34 +1270,38 @@ __acquires(bitmap->lock) | |||
1262 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1270 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1263 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | 1271 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; |
1264 | sector_t csize; | 1272 | sector_t csize; |
1273 | int err; | ||
1265 | 1274 | ||
1266 | if (bitmap_checkpage(bitmap, page, create) < 0) { | 1275 | err = bitmap_checkpage(bitmap, page, create); |
1276 | |||
1277 | if (bitmap->bp[page].hijacked || | ||
1278 | bitmap->bp[page].map == NULL) | ||
1279 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
1280 | PAGE_COUNTER_SHIFT - 1); | ||
1281 | else | ||
1267 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | 1282 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); |
1268 | *blocks = csize - (offset & (csize- 1)); | 1283 | *blocks = csize - (offset & (csize - 1)); |
1284 | |||
1285 | if (err < 0) | ||
1269 | return NULL; | 1286 | return NULL; |
1270 | } | 1287 | |
1271 | /* now locked ... */ | 1288 | /* now locked ... */ |
1272 | 1289 | ||
1273 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ | 1290 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ |
1274 | /* should we use the first or second counter field | 1291 | /* should we use the first or second counter field |
1275 | * of the hijacked pointer? */ | 1292 | * of the hijacked pointer? */ |
1276 | int hi = (pageoff > PAGE_COUNTER_MASK); | 1293 | int hi = (pageoff > PAGE_COUNTER_MASK); |
1277 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
1278 | PAGE_COUNTER_SHIFT - 1); | ||
1279 | *blocks = csize - (offset & (csize- 1)); | ||
1280 | return &((bitmap_counter_t *) | 1294 | return &((bitmap_counter_t *) |
1281 | &bitmap->bp[page].map)[hi]; | 1295 | &bitmap->bp[page].map)[hi]; |
1282 | } else { /* page is allocated */ | 1296 | } else /* page is allocated */ |
1283 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | ||
1284 | *blocks = csize - (offset & (csize- 1)); | ||
1285 | return (bitmap_counter_t *) | 1297 | return (bitmap_counter_t *) |
1286 | &(bitmap->bp[page].map[pageoff]); | 1298 | &(bitmap->bp[page].map[pageoff]); |
1287 | } | ||
1288 | } | 1299 | } |
1289 | 1300 | ||
1290 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) | 1301 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) |
1291 | { | 1302 | { |
1292 | if (!bitmap) return 0; | 1303 | if (!bitmap) |
1304 | return 0; | ||
1293 | 1305 | ||
1294 | if (behind) { | 1306 | if (behind) { |
1295 | int bw; | 1307 | int bw; |
@@ -1322,17 +1334,16 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1322 | prepare_to_wait(&bitmap->overflow_wait, &__wait, | 1334 | prepare_to_wait(&bitmap->overflow_wait, &__wait, |
1323 | TASK_UNINTERRUPTIBLE); | 1335 | TASK_UNINTERRUPTIBLE); |
1324 | spin_unlock_irq(&bitmap->lock); | 1336 | spin_unlock_irq(&bitmap->lock); |
1325 | blk_unplug(bitmap->mddev->queue); | 1337 | md_unplug(bitmap->mddev); |
1326 | schedule(); | 1338 | schedule(); |
1327 | finish_wait(&bitmap->overflow_wait, &__wait); | 1339 | finish_wait(&bitmap->overflow_wait, &__wait); |
1328 | continue; | 1340 | continue; |
1329 | } | 1341 | } |
1330 | 1342 | ||
1331 | switch(*bmc) { | 1343 | switch (*bmc) { |
1332 | case 0: | 1344 | case 0: |
1333 | bitmap_file_set_bit(bitmap, offset); | 1345 | bitmap_file_set_bit(bitmap, offset); |
1334 | bitmap_count_page(bitmap,offset, 1); | 1346 | bitmap_count_page(bitmap, offset, 1); |
1335 | blk_plug_device_unlocked(bitmap->mddev->queue); | ||
1336 | /* fall through */ | 1347 | /* fall through */ |
1337 | case 1: | 1348 | case 1: |
1338 | *bmc = 2; | 1349 | *bmc = 2; |
@@ -1345,16 +1356,19 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1345 | offset += blocks; | 1356 | offset += blocks; |
1346 | if (sectors > blocks) | 1357 | if (sectors > blocks) |
1347 | sectors -= blocks; | 1358 | sectors -= blocks; |
1348 | else sectors = 0; | 1359 | else |
1360 | sectors = 0; | ||
1349 | } | 1361 | } |
1350 | bitmap->allclean = 0; | 1362 | bitmap->allclean = 0; |
1351 | return 0; | 1363 | return 0; |
1352 | } | 1364 | } |
1365 | EXPORT_SYMBOL(bitmap_startwrite); | ||
1353 | 1366 | ||
1354 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, | 1367 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, |
1355 | int success, int behind) | 1368 | int success, int behind) |
1356 | { | 1369 | { |
1357 | if (!bitmap) return; | 1370 | if (!bitmap) |
1371 | return; | ||
1358 | if (behind) { | 1372 | if (behind) { |
1359 | if (atomic_dec_and_test(&bitmap->behind_writes)) | 1373 | if (atomic_dec_and_test(&bitmap->behind_writes)) |
1360 | wake_up(&bitmap->behind_wait); | 1374 | wake_up(&bitmap->behind_wait); |
@@ -1381,7 +1395,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1381 | bitmap->events_cleared < bitmap->mddev->events) { | 1395 | bitmap->events_cleared < bitmap->mddev->events) { |
1382 | bitmap->events_cleared = bitmap->mddev->events; | 1396 | bitmap->events_cleared = bitmap->mddev->events; |
1383 | bitmap->need_sync = 1; | 1397 | bitmap->need_sync = 1; |
1384 | sysfs_notify_dirent(bitmap->sysfs_can_clear); | 1398 | sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); |
1385 | } | 1399 | } |
1386 | 1400 | ||
1387 | if (!success && ! (*bmc & NEEDED_MASK)) | 1401 | if (!success && ! (*bmc & NEEDED_MASK)) |
@@ -1391,18 +1405,22 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1391 | wake_up(&bitmap->overflow_wait); | 1405 | wake_up(&bitmap->overflow_wait); |
1392 | 1406 | ||
1393 | (*bmc)--; | 1407 | (*bmc)--; |
1394 | if (*bmc <= 2) { | 1408 | if (*bmc <= 2) |
1395 | set_page_attr(bitmap, | 1409 | set_page_attr(bitmap, |
1396 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1410 | filemap_get_page( |
1411 | bitmap, | ||
1412 | offset >> CHUNK_BLOCK_SHIFT(bitmap)), | ||
1397 | BITMAP_PAGE_CLEAN); | 1413 | BITMAP_PAGE_CLEAN); |
1398 | } | 1414 | |
1399 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1415 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1400 | offset += blocks; | 1416 | offset += blocks; |
1401 | if (sectors > blocks) | 1417 | if (sectors > blocks) |
1402 | sectors -= blocks; | 1418 | sectors -= blocks; |
1403 | else sectors = 0; | 1419 | else |
1420 | sectors = 0; | ||
1404 | } | 1421 | } |
1405 | } | 1422 | } |
1423 | EXPORT_SYMBOL(bitmap_endwrite); | ||
1406 | 1424 | ||
1407 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1425 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, |
1408 | int degraded) | 1426 | int degraded) |
@@ -1455,14 +1473,14 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1455 | } | 1473 | } |
1456 | return rv; | 1474 | return rv; |
1457 | } | 1475 | } |
1476 | EXPORT_SYMBOL(bitmap_start_sync); | ||
1458 | 1477 | ||
1459 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | 1478 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) |
1460 | { | 1479 | { |
1461 | bitmap_counter_t *bmc; | 1480 | bitmap_counter_t *bmc; |
1462 | unsigned long flags; | 1481 | unsigned long flags; |
1463 | /* | 1482 | |
1464 | if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); | 1483 | if (bitmap == NULL) { |
1465 | */ if (bitmap == NULL) { | ||
1466 | *blocks = 1024; | 1484 | *blocks = 1024; |
1467 | return; | 1485 | return; |
1468 | } | 1486 | } |
@@ -1471,26 +1489,23 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab | |||
1471 | if (bmc == NULL) | 1489 | if (bmc == NULL) |
1472 | goto unlock; | 1490 | goto unlock; |
1473 | /* locked */ | 1491 | /* locked */ |
1474 | /* | ||
1475 | if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); | ||
1476 | */ | ||
1477 | if (RESYNC(*bmc)) { | 1492 | if (RESYNC(*bmc)) { |
1478 | *bmc &= ~RESYNC_MASK; | 1493 | *bmc &= ~RESYNC_MASK; |
1479 | 1494 | ||
1480 | if (!NEEDED(*bmc) && aborted) | 1495 | if (!NEEDED(*bmc) && aborted) |
1481 | *bmc |= NEEDED_MASK; | 1496 | *bmc |= NEEDED_MASK; |
1482 | else { | 1497 | else { |
1483 | if (*bmc <= 2) { | 1498 | if (*bmc <= 2) |
1484 | set_page_attr(bitmap, | 1499 | set_page_attr(bitmap, |
1485 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1500 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), |
1486 | BITMAP_PAGE_CLEAN); | 1501 | BITMAP_PAGE_CLEAN); |
1487 | } | ||
1488 | } | 1502 | } |
1489 | } | 1503 | } |
1490 | unlock: | 1504 | unlock: |
1491 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1505 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1492 | bitmap->allclean = 0; | 1506 | bitmap->allclean = 0; |
1493 | } | 1507 | } |
1508 | EXPORT_SYMBOL(bitmap_end_sync); | ||
1494 | 1509 | ||
1495 | void bitmap_close_sync(struct bitmap *bitmap) | 1510 | void bitmap_close_sync(struct bitmap *bitmap) |
1496 | { | 1511 | { |
@@ -1507,6 +1522,7 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
1507 | sector += blocks; | 1522 | sector += blocks; |
1508 | } | 1523 | } |
1509 | } | 1524 | } |
1525 | EXPORT_SYMBOL(bitmap_close_sync); | ||
1510 | 1526 | ||
1511 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | 1527 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) |
1512 | { | 1528 | { |
@@ -1526,7 +1542,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1526 | atomic_read(&bitmap->mddev->recovery_active) == 0); | 1542 | atomic_read(&bitmap->mddev->recovery_active) == 0); |
1527 | 1543 | ||
1528 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; | 1544 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; |
1529 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | 1545 | if (bitmap->mddev->persistent) |
1546 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | ||
1530 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1547 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); |
1531 | s = 0; | 1548 | s = 0; |
1532 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1549 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
@@ -1536,6 +1553,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1536 | bitmap->last_end_sync = jiffies; | 1553 | bitmap->last_end_sync = jiffies; |
1537 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); | 1554 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); |
1538 | } | 1555 | } |
1556 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
1539 | 1557 | ||
1540 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) | 1558 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) |
1541 | { | 1559 | { |
@@ -1552,9 +1570,9 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1552 | spin_unlock_irq(&bitmap->lock); | 1570 | spin_unlock_irq(&bitmap->lock); |
1553 | return; | 1571 | return; |
1554 | } | 1572 | } |
1555 | if (! *bmc) { | 1573 | if (!*bmc) { |
1556 | struct page *page; | 1574 | struct page *page; |
1557 | *bmc = 1 | (needed?NEEDED_MASK:0); | 1575 | *bmc = 1 | (needed ? NEEDED_MASK : 0); |
1558 | bitmap_count_page(bitmap, offset, 1); | 1576 | bitmap_count_page(bitmap, offset, 1); |
1559 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | 1577 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); |
1560 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1578 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
@@ -1663,15 +1681,17 @@ int bitmap_create(mddev_t *mddev) | |||
1663 | unsigned long pages; | 1681 | unsigned long pages; |
1664 | struct file *file = mddev->bitmap_info.file; | 1682 | struct file *file = mddev->bitmap_info.file; |
1665 | int err; | 1683 | int err; |
1666 | sector_t start; | 1684 | struct sysfs_dirent *bm = NULL; |
1667 | struct sysfs_dirent *bm; | ||
1668 | 1685 | ||
1669 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); | 1686 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); |
1670 | 1687 | ||
1671 | if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ | 1688 | if (!file |
1689 | && !mddev->bitmap_info.offset | ||
1690 | && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ | ||
1672 | return 0; | 1691 | return 0; |
1673 | 1692 | ||
1674 | BUG_ON(file && mddev->bitmap_info.offset); | 1693 | BUG_ON(file && mddev->bitmap_info.offset); |
1694 | BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); | ||
1675 | 1695 | ||
1676 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); | 1696 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); |
1677 | if (!bitmap) | 1697 | if (!bitmap) |
@@ -1685,7 +1705,8 @@ int bitmap_create(mddev_t *mddev) | |||
1685 | 1705 | ||
1686 | bitmap->mddev = mddev; | 1706 | bitmap->mddev = mddev; |
1687 | 1707 | ||
1688 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | 1708 | if (mddev->kobj.sd) |
1709 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | ||
1689 | if (bm) { | 1710 | if (bm) { |
1690 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); | 1711 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); |
1691 | sysfs_put(bm); | 1712 | sysfs_put(bm); |
@@ -1719,9 +1740,9 @@ int bitmap_create(mddev_t *mddev) | |||
1719 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); | 1740 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); |
1720 | 1741 | ||
1721 | /* now that chunksize and chunkshift are set, we can use these macros */ | 1742 | /* now that chunksize and chunkshift are set, we can use these macros */ |
1722 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> | 1743 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> |
1723 | CHUNK_BLOCK_SHIFT(bitmap); | 1744 | CHUNK_BLOCK_SHIFT(bitmap); |
1724 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | 1745 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; |
1725 | 1746 | ||
1726 | BUG_ON(!pages); | 1747 | BUG_ON(!pages); |
1727 | 1748 | ||
@@ -1741,27 +1762,11 @@ int bitmap_create(mddev_t *mddev) | |||
1741 | if (!bitmap->bp) | 1762 | if (!bitmap->bp) |
1742 | goto error; | 1763 | goto error; |
1743 | 1764 | ||
1744 | /* now that we have some pages available, initialize the in-memory | ||
1745 | * bitmap from the on-disk bitmap */ | ||
1746 | start = 0; | ||
1747 | if (mddev->degraded == 0 | ||
1748 | || bitmap->events_cleared == mddev->events) | ||
1749 | /* no need to keep dirty bits to optimise a re-add of a missing device */ | ||
1750 | start = mddev->recovery_cp; | ||
1751 | err = bitmap_init_from_disk(bitmap, start); | ||
1752 | |||
1753 | if (err) | ||
1754 | goto error; | ||
1755 | |||
1756 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", | 1765 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", |
1757 | pages, bmname(bitmap)); | 1766 | pages, bmname(bitmap)); |
1758 | 1767 | ||
1759 | mddev->bitmap = bitmap; | 1768 | mddev->bitmap = bitmap; |
1760 | 1769 | ||
1761 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
1762 | md_wakeup_thread(mddev->thread); | ||
1763 | |||
1764 | bitmap_update_sb(bitmap); | ||
1765 | 1770 | ||
1766 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; | 1771 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; |
1767 | 1772 | ||
@@ -1770,15 +1775,69 @@ int bitmap_create(mddev_t *mddev) | |||
1770 | return err; | 1775 | return err; |
1771 | } | 1776 | } |
1772 | 1777 | ||
1778 | int bitmap_load(mddev_t *mddev) | ||
1779 | { | ||
1780 | int err = 0; | ||
1781 | sector_t sector = 0; | ||
1782 | struct bitmap *bitmap = mddev->bitmap; | ||
1783 | |||
1784 | if (!bitmap) | ||
1785 | goto out; | ||
1786 | |||
1787 | /* Clear out old bitmap info first: Either there is none, or we | ||
1788 | * are resuming after someone else has possibly changed things, | ||
1789 | * so we should forget old cached info. | ||
1790 | * All chunks should be clean, but some might need_sync. | ||
1791 | */ | ||
1792 | while (sector < mddev->resync_max_sectors) { | ||
1793 | int blocks; | ||
1794 | bitmap_start_sync(bitmap, sector, &blocks, 0); | ||
1795 | sector += blocks; | ||
1796 | } | ||
1797 | bitmap_close_sync(bitmap); | ||
1798 | |||
1799 | if (mddev->bitmap_info.log) { | ||
1800 | unsigned long i; | ||
1801 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
1802 | for (i = 0; i < bitmap->chunks; i++) | ||
1803 | if (!log->type->in_sync(log, i, 1)) | ||
1804 | bitmap_set_memory_bits(bitmap, | ||
1805 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | ||
1806 | 1); | ||
1807 | } else { | ||
1808 | sector_t start = 0; | ||
1809 | if (mddev->degraded == 0 | ||
1810 | || bitmap->events_cleared == mddev->events) | ||
1811 | /* no need to keep dirty bits to optimise a | ||
1812 | * re-add of a missing device */ | ||
1813 | start = mddev->recovery_cp; | ||
1814 | |||
1815 | err = bitmap_init_from_disk(bitmap, start); | ||
1816 | } | ||
1817 | if (err) | ||
1818 | goto out; | ||
1819 | |||
1820 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
1821 | md_wakeup_thread(mddev->thread); | ||
1822 | |||
1823 | bitmap_update_sb(bitmap); | ||
1824 | |||
1825 | if (bitmap->flags & BITMAP_WRITE_ERROR) | ||
1826 | err = -EIO; | ||
1827 | out: | ||
1828 | return err; | ||
1829 | } | ||
1830 | EXPORT_SYMBOL_GPL(bitmap_load); | ||
1831 | |||
1773 | static ssize_t | 1832 | static ssize_t |
1774 | location_show(mddev_t *mddev, char *page) | 1833 | location_show(mddev_t *mddev, char *page) |
1775 | { | 1834 | { |
1776 | ssize_t len; | 1835 | ssize_t len; |
1777 | if (mddev->bitmap_info.file) { | 1836 | if (mddev->bitmap_info.file) |
1778 | len = sprintf(page, "file"); | 1837 | len = sprintf(page, "file"); |
1779 | } else if (mddev->bitmap_info.offset) { | 1838 | else if (mddev->bitmap_info.offset) |
1780 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); | 1839 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); |
1781 | } else | 1840 | else |
1782 | len = sprintf(page, "none"); | 1841 | len = sprintf(page, "none"); |
1783 | len += sprintf(page+len, "\n"); | 1842 | len += sprintf(page+len, "\n"); |
1784 | return len; | 1843 | return len; |
@@ -1867,7 +1926,7 @@ timeout_show(mddev_t *mddev, char *page) | |||
1867 | ssize_t len; | 1926 | ssize_t len; |
1868 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; | 1927 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; |
1869 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; | 1928 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; |
1870 | 1929 | ||
1871 | len = sprintf(page, "%lu", secs); | 1930 | len = sprintf(page, "%lu", secs); |
1872 | if (jifs) | 1931 | if (jifs) |
1873 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); | 1932 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); |
@@ -2049,12 +2108,3 @@ struct attribute_group md_bitmap_group = { | |||
2049 | .attrs = md_bitmap_attrs, | 2108 | .attrs = md_bitmap_attrs, |
2050 | }; | 2109 | }; |
2051 | 2110 | ||
2052 | |||
2053 | /* the bitmap API -- for raid personalities */ | ||
2054 | EXPORT_SYMBOL(bitmap_startwrite); | ||
2055 | EXPORT_SYMBOL(bitmap_endwrite); | ||
2056 | EXPORT_SYMBOL(bitmap_start_sync); | ||
2057 | EXPORT_SYMBOL(bitmap_end_sync); | ||
2058 | EXPORT_SYMBOL(bitmap_unplug); | ||
2059 | EXPORT_SYMBOL(bitmap_close_sync); | ||
2060 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 3797dea4723a..e872a7bad6b8 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -222,6 +222,10 @@ struct bitmap { | |||
222 | unsigned long file_pages; /* number of pages in the file */ | 222 | unsigned long file_pages; /* number of pages in the file */ |
223 | int last_page_size; /* bytes in the last page */ | 223 | int last_page_size; /* bytes in the last page */ |
224 | 224 | ||
225 | unsigned long logattrs; /* used when filemap_attr doesn't exist | ||
226 | * because we are working with a dirty_log | ||
227 | */ | ||
228 | |||
225 | unsigned long flags; | 229 | unsigned long flags; |
226 | 230 | ||
227 | int allclean; | 231 | int allclean; |
@@ -243,12 +247,14 @@ struct bitmap { | |||
243 | wait_queue_head_t behind_wait; | 247 | wait_queue_head_t behind_wait; |
244 | 248 | ||
245 | struct sysfs_dirent *sysfs_can_clear; | 249 | struct sysfs_dirent *sysfs_can_clear; |
250 | |||
246 | }; | 251 | }; |
247 | 252 | ||
248 | /* the bitmap API */ | 253 | /* the bitmap API */ |
249 | 254 | ||
250 | /* these are used only by md/bitmap */ | 255 | /* these are used only by md/bitmap */ |
251 | int bitmap_create(mddev_t *mddev); | 256 | int bitmap_create(mddev_t *mddev); |
257 | int bitmap_load(mddev_t *mddev); | ||
252 | void bitmap_flush(mddev_t *mddev); | 258 | void bitmap_flush(mddev_t *mddev); |
253 | void bitmap_destroy(mddev_t *mddev); | 259 | void bitmap_destroy(mddev_t *mddev); |
254 | 260 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 700c96edf9b2..11567c7999a2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -262,7 +262,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
262 | * Once ->stop is called and completes, the module will be completely | 262 | * Once ->stop is called and completes, the module will be completely |
263 | * unused. | 263 | * unused. |
264 | */ | 264 | */ |
265 | static void mddev_suspend(mddev_t *mddev) | 265 | void mddev_suspend(mddev_t *mddev) |
266 | { | 266 | { |
267 | BUG_ON(mddev->suspended); | 267 | BUG_ON(mddev->suspended); |
268 | mddev->suspended = 1; | 268 | mddev->suspended = 1; |
@@ -270,13 +270,15 @@ static void mddev_suspend(mddev_t *mddev) | |||
270 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | 270 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); |
271 | mddev->pers->quiesce(mddev, 1); | 271 | mddev->pers->quiesce(mddev, 1); |
272 | } | 272 | } |
273 | EXPORT_SYMBOL_GPL(mddev_suspend); | ||
273 | 274 | ||
274 | static void mddev_resume(mddev_t *mddev) | 275 | void mddev_resume(mddev_t *mddev) |
275 | { | 276 | { |
276 | mddev->suspended = 0; | 277 | mddev->suspended = 0; |
277 | wake_up(&mddev->sb_wait); | 278 | wake_up(&mddev->sb_wait); |
278 | mddev->pers->quiesce(mddev, 0); | 279 | mddev->pers->quiesce(mddev, 0); |
279 | } | 280 | } |
281 | EXPORT_SYMBOL_GPL(mddev_resume); | ||
280 | 282 | ||
281 | int mddev_congested(mddev_t *mddev, int bits) | 283 | int mddev_congested(mddev_t *mddev, int bits) |
282 | { | 284 | { |
@@ -385,6 +387,51 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio) | |||
385 | } | 387 | } |
386 | EXPORT_SYMBOL(md_barrier_request); | 388 | EXPORT_SYMBOL(md_barrier_request); |
387 | 389 | ||
390 | /* Support for plugging. | ||
391 | * This mirrors the plugging support in request_queue, but does not | ||
392 | * require having a whole queue | ||
393 | */ | ||
394 | static void plugger_work(struct work_struct *work) | ||
395 | { | ||
396 | struct plug_handle *plug = | ||
397 | container_of(work, struct plug_handle, unplug_work); | ||
398 | plug->unplug_fn(plug); | ||
399 | } | ||
400 | static void plugger_timeout(unsigned long data) | ||
401 | { | ||
402 | struct plug_handle *plug = (void *)data; | ||
403 | kblockd_schedule_work(NULL, &plug->unplug_work); | ||
404 | } | ||
405 | void plugger_init(struct plug_handle *plug, | ||
406 | void (*unplug_fn)(struct plug_handle *)) | ||
407 | { | ||
408 | plug->unplug_flag = 0; | ||
409 | plug->unplug_fn = unplug_fn; | ||
410 | init_timer(&plug->unplug_timer); | ||
411 | plug->unplug_timer.function = plugger_timeout; | ||
412 | plug->unplug_timer.data = (unsigned long)plug; | ||
413 | INIT_WORK(&plug->unplug_work, plugger_work); | ||
414 | } | ||
415 | EXPORT_SYMBOL_GPL(plugger_init); | ||
416 | |||
417 | void plugger_set_plug(struct plug_handle *plug) | ||
418 | { | ||
419 | if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) | ||
420 | mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); | ||
421 | } | ||
422 | EXPORT_SYMBOL_GPL(plugger_set_plug); | ||
423 | |||
424 | int plugger_remove_plug(struct plug_handle *plug) | ||
425 | { | ||
426 | if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { | ||
427 | del_timer(&plug->unplug_timer); | ||
428 | return 1; | ||
429 | } else | ||
430 | return 0; | ||
431 | } | ||
432 | EXPORT_SYMBOL_GPL(plugger_remove_plug); | ||
433 | |||
434 | |||
388 | static inline mddev_t *mddev_get(mddev_t *mddev) | 435 | static inline mddev_t *mddev_get(mddev_t *mddev) |
389 | { | 436 | { |
390 | atomic_inc(&mddev->active); | 437 | atomic_inc(&mddev->active); |
@@ -417,7 +464,7 @@ static void mddev_put(mddev_t *mddev) | |||
417 | spin_unlock(&all_mddevs_lock); | 464 | spin_unlock(&all_mddevs_lock); |
418 | } | 465 | } |
419 | 466 | ||
420 | static void mddev_init(mddev_t *mddev) | 467 | void mddev_init(mddev_t *mddev) |
421 | { | 468 | { |
422 | mutex_init(&mddev->open_mutex); | 469 | mutex_init(&mddev->open_mutex); |
423 | mutex_init(&mddev->reconfig_mutex); | 470 | mutex_init(&mddev->reconfig_mutex); |
@@ -437,6 +484,7 @@ static void mddev_init(mddev_t *mddev) | |||
437 | mddev->resync_max = MaxSector; | 484 | mddev->resync_max = MaxSector; |
438 | mddev->level = LEVEL_NONE; | 485 | mddev->level = LEVEL_NONE; |
439 | } | 486 | } |
487 | EXPORT_SYMBOL_GPL(mddev_init); | ||
440 | 488 | ||
441 | static mddev_t * mddev_find(dev_t unit) | 489 | static mddev_t * mddev_find(dev_t unit) |
442 | { | 490 | { |
@@ -533,25 +581,31 @@ static void mddev_unlock(mddev_t * mddev) | |||
533 | * an access to the files will try to take reconfig_mutex | 581 | * an access to the files will try to take reconfig_mutex |
534 | * while holding the file unremovable, which leads to | 582 | * while holding the file unremovable, which leads to |
535 | * a deadlock. | 583 | * a deadlock. |
536 | * So hold open_mutex instead - we are allowed to take | 584 | * So hold set sysfs_active while the remove in happeing, |
537 | * it while holding reconfig_mutex, and md_run can | 585 | * and anything else which might set ->to_remove or my |
538 | * use it to wait for the remove to complete. | 586 | * otherwise change the sysfs namespace will fail with |
587 | * -EBUSY if sysfs_active is still set. | ||
588 | * We set sysfs_active under reconfig_mutex and elsewhere | ||
589 | * test it under the same mutex to ensure its correct value | ||
590 | * is seen. | ||
539 | */ | 591 | */ |
540 | struct attribute_group *to_remove = mddev->to_remove; | 592 | struct attribute_group *to_remove = mddev->to_remove; |
541 | mddev->to_remove = NULL; | 593 | mddev->to_remove = NULL; |
542 | mutex_lock(&mddev->open_mutex); | 594 | mddev->sysfs_active = 1; |
543 | mutex_unlock(&mddev->reconfig_mutex); | 595 | mutex_unlock(&mddev->reconfig_mutex); |
544 | 596 | ||
545 | if (to_remove != &md_redundancy_group) | 597 | if (mddev->kobj.sd) { |
546 | sysfs_remove_group(&mddev->kobj, to_remove); | 598 | if (to_remove != &md_redundancy_group) |
547 | if (mddev->pers == NULL || | 599 | sysfs_remove_group(&mddev->kobj, to_remove); |
548 | mddev->pers->sync_request == NULL) { | 600 | if (mddev->pers == NULL || |
549 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | 601 | mddev->pers->sync_request == NULL) { |
550 | if (mddev->sysfs_action) | 602 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); |
551 | sysfs_put(mddev->sysfs_action); | 603 | if (mddev->sysfs_action) |
552 | mddev->sysfs_action = NULL; | 604 | sysfs_put(mddev->sysfs_action); |
605 | mddev->sysfs_action = NULL; | ||
606 | } | ||
553 | } | 607 | } |
554 | mutex_unlock(&mddev->open_mutex); | 608 | mddev->sysfs_active = 0; |
555 | } else | 609 | } else |
556 | mutex_unlock(&mddev->reconfig_mutex); | 610 | mutex_unlock(&mddev->reconfig_mutex); |
557 | 611 | ||
@@ -1812,11 +1866,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1812 | goto fail; | 1866 | goto fail; |
1813 | 1867 | ||
1814 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; | 1868 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; |
1815 | if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { | 1869 | if (sysfs_create_link(&rdev->kobj, ko, "block")) |
1816 | kobject_del(&rdev->kobj); | 1870 | /* failure here is OK */; |
1817 | goto fail; | 1871 | rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); |
1818 | } | ||
1819 | rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state"); | ||
1820 | 1872 | ||
1821 | list_add_rcu(&rdev->same_set, &mddev->disks); | 1873 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1822 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1874 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
@@ -2335,8 +2387,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2335 | set_bit(In_sync, &rdev->flags); | 2387 | set_bit(In_sync, &rdev->flags); |
2336 | err = 0; | 2388 | err = 0; |
2337 | } | 2389 | } |
2338 | if (!err && rdev->sysfs_state) | 2390 | if (!err) |
2339 | sysfs_notify_dirent(rdev->sysfs_state); | 2391 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2340 | return err ? err : len; | 2392 | return err ? err : len; |
2341 | } | 2393 | } |
2342 | static struct rdev_sysfs_entry rdev_state = | 2394 | static struct rdev_sysfs_entry rdev_state = |
@@ -2431,14 +2483,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2431 | rdev->raid_disk = -1; | 2483 | rdev->raid_disk = -1; |
2432 | return err; | 2484 | return err; |
2433 | } else | 2485 | } else |
2434 | sysfs_notify_dirent(rdev->sysfs_state); | 2486 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2435 | sprintf(nm, "rd%d", rdev->raid_disk); | 2487 | sprintf(nm, "rd%d", rdev->raid_disk); |
2436 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) | 2488 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) |
2437 | printk(KERN_WARNING | 2489 | /* failure here is OK */; |
2438 | "md: cannot register " | ||
2439 | "%s for %s\n", | ||
2440 | nm, mdname(rdev->mddev)); | ||
2441 | |||
2442 | /* don't wakeup anyone, leave that to userspace. */ | 2490 | /* don't wakeup anyone, leave that to userspace. */ |
2443 | } else { | 2491 | } else { |
2444 | if (slot >= rdev->mddev->raid_disks) | 2492 | if (slot >= rdev->mddev->raid_disks) |
@@ -2448,7 +2496,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2448 | clear_bit(Faulty, &rdev->flags); | 2496 | clear_bit(Faulty, &rdev->flags); |
2449 | clear_bit(WriteMostly, &rdev->flags); | 2497 | clear_bit(WriteMostly, &rdev->flags); |
2450 | set_bit(In_sync, &rdev->flags); | 2498 | set_bit(In_sync, &rdev->flags); |
2451 | sysfs_notify_dirent(rdev->sysfs_state); | 2499 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2452 | } | 2500 | } |
2453 | return len; | 2501 | return len; |
2454 | } | 2502 | } |
@@ -2696,6 +2744,24 @@ static struct kobj_type rdev_ktype = { | |||
2696 | .default_attrs = rdev_default_attrs, | 2744 | .default_attrs = rdev_default_attrs, |
2697 | }; | 2745 | }; |
2698 | 2746 | ||
2747 | void md_rdev_init(mdk_rdev_t *rdev) | ||
2748 | { | ||
2749 | rdev->desc_nr = -1; | ||
2750 | rdev->saved_raid_disk = -1; | ||
2751 | rdev->raid_disk = -1; | ||
2752 | rdev->flags = 0; | ||
2753 | rdev->data_offset = 0; | ||
2754 | rdev->sb_events = 0; | ||
2755 | rdev->last_read_error.tv_sec = 0; | ||
2756 | rdev->last_read_error.tv_nsec = 0; | ||
2757 | atomic_set(&rdev->nr_pending, 0); | ||
2758 | atomic_set(&rdev->read_errors, 0); | ||
2759 | atomic_set(&rdev->corrected_errors, 0); | ||
2760 | |||
2761 | INIT_LIST_HEAD(&rdev->same_set); | ||
2762 | init_waitqueue_head(&rdev->blocked_wait); | ||
2763 | } | ||
2764 | EXPORT_SYMBOL_GPL(md_rdev_init); | ||
2699 | /* | 2765 | /* |
2700 | * Import a device. If 'super_format' >= 0, then sanity check the superblock | 2766 | * Import a device. If 'super_format' >= 0, then sanity check the superblock |
2701 | * | 2767 | * |
@@ -2719,6 +2785,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2719 | return ERR_PTR(-ENOMEM); | 2785 | return ERR_PTR(-ENOMEM); |
2720 | } | 2786 | } |
2721 | 2787 | ||
2788 | md_rdev_init(rdev); | ||
2722 | if ((err = alloc_disk_sb(rdev))) | 2789 | if ((err = alloc_disk_sb(rdev))) |
2723 | goto abort_free; | 2790 | goto abort_free; |
2724 | 2791 | ||
@@ -2728,18 +2795,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2728 | 2795 | ||
2729 | kobject_init(&rdev->kobj, &rdev_ktype); | 2796 | kobject_init(&rdev->kobj, &rdev_ktype); |
2730 | 2797 | ||
2731 | rdev->desc_nr = -1; | ||
2732 | rdev->saved_raid_disk = -1; | ||
2733 | rdev->raid_disk = -1; | ||
2734 | rdev->flags = 0; | ||
2735 | rdev->data_offset = 0; | ||
2736 | rdev->sb_events = 0; | ||
2737 | rdev->last_read_error.tv_sec = 0; | ||
2738 | rdev->last_read_error.tv_nsec = 0; | ||
2739 | atomic_set(&rdev->nr_pending, 0); | ||
2740 | atomic_set(&rdev->read_errors, 0); | ||
2741 | atomic_set(&rdev->corrected_errors, 0); | ||
2742 | |||
2743 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 2798 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; |
2744 | if (!size) { | 2799 | if (!size) { |
2745 | printk(KERN_WARNING | 2800 | printk(KERN_WARNING |
@@ -2768,9 +2823,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2768 | } | 2823 | } |
2769 | } | 2824 | } |
2770 | 2825 | ||
2771 | INIT_LIST_HEAD(&rdev->same_set); | ||
2772 | init_waitqueue_head(&rdev->blocked_wait); | ||
2773 | |||
2774 | return rdev; | 2826 | return rdev; |
2775 | 2827 | ||
2776 | abort_free: | 2828 | abort_free: |
@@ -2961,7 +3013,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2961 | * - new personality will access other array. | 3013 | * - new personality will access other array. |
2962 | */ | 3014 | */ |
2963 | 3015 | ||
2964 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | 3016 | if (mddev->sync_thread || |
3017 | mddev->reshape_position != MaxSector || | ||
3018 | mddev->sysfs_active) | ||
2965 | return -EBUSY; | 3019 | return -EBUSY; |
2966 | 3020 | ||
2967 | if (!mddev->pers->quiesce) { | 3021 | if (!mddev->pers->quiesce) { |
@@ -3438,7 +3492,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
3438 | if (err) | 3492 | if (err) |
3439 | return err; | 3493 | return err; |
3440 | else { | 3494 | else { |
3441 | sysfs_notify_dirent(mddev->sysfs_state); | 3495 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
3442 | return len; | 3496 | return len; |
3443 | } | 3497 | } |
3444 | } | 3498 | } |
@@ -3736,7 +3790,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
3736 | } | 3790 | } |
3737 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3791 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3738 | md_wakeup_thread(mddev->thread); | 3792 | md_wakeup_thread(mddev->thread); |
3739 | sysfs_notify_dirent(mddev->sysfs_action); | 3793 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
3740 | return len; | 3794 | return len; |
3741 | } | 3795 | } |
3742 | 3796 | ||
@@ -4282,13 +4336,14 @@ static int md_alloc(dev_t dev, char *name) | |||
4282 | disk->disk_name); | 4336 | disk->disk_name); |
4283 | error = 0; | 4337 | error = 0; |
4284 | } | 4338 | } |
4285 | if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | 4339 | if (mddev->kobj.sd && |
4340 | sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | ||
4286 | printk(KERN_DEBUG "pointless warning\n"); | 4341 | printk(KERN_DEBUG "pointless warning\n"); |
4287 | abort: | 4342 | abort: |
4288 | mutex_unlock(&disks_mutex); | 4343 | mutex_unlock(&disks_mutex); |
4289 | if (!error) { | 4344 | if (!error && mddev->kobj.sd) { |
4290 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 4345 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
4291 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); | 4346 | mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); |
4292 | } | 4347 | } |
4293 | mddev_put(mddev); | 4348 | mddev_put(mddev); |
4294 | return error; | 4349 | return error; |
@@ -4326,14 +4381,14 @@ static void md_safemode_timeout(unsigned long data) | |||
4326 | if (!atomic_read(&mddev->writes_pending)) { | 4381 | if (!atomic_read(&mddev->writes_pending)) { |
4327 | mddev->safemode = 1; | 4382 | mddev->safemode = 1; |
4328 | if (mddev->external) | 4383 | if (mddev->external) |
4329 | sysfs_notify_dirent(mddev->sysfs_state); | 4384 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4330 | } | 4385 | } |
4331 | md_wakeup_thread(mddev->thread); | 4386 | md_wakeup_thread(mddev->thread); |
4332 | } | 4387 | } |
4333 | 4388 | ||
4334 | static int start_dirty_degraded; | 4389 | static int start_dirty_degraded; |
4335 | 4390 | ||
4336 | static int md_run(mddev_t *mddev) | 4391 | int md_run(mddev_t *mddev) |
4337 | { | 4392 | { |
4338 | int err; | 4393 | int err; |
4339 | mdk_rdev_t *rdev; | 4394 | mdk_rdev_t *rdev; |
@@ -4345,13 +4400,9 @@ static int md_run(mddev_t *mddev) | |||
4345 | 4400 | ||
4346 | if (mddev->pers) | 4401 | if (mddev->pers) |
4347 | return -EBUSY; | 4402 | return -EBUSY; |
4348 | 4403 | /* Cannot run until previous stop completes properly */ | |
4349 | /* These two calls synchronise us with the | 4404 | if (mddev->sysfs_active) |
4350 | * sysfs_remove_group calls in mddev_unlock, | 4405 | return -EBUSY; |
4351 | * so they must have completed. | ||
4352 | */ | ||
4353 | mutex_lock(&mddev->open_mutex); | ||
4354 | mutex_unlock(&mddev->open_mutex); | ||
4355 | 4406 | ||
4356 | /* | 4407 | /* |
4357 | * Analyze all RAID superblock(s) | 4408 | * Analyze all RAID superblock(s) |
@@ -4398,7 +4449,7 @@ static int md_run(mddev_t *mddev) | |||
4398 | return -EINVAL; | 4449 | return -EINVAL; |
4399 | } | 4450 | } |
4400 | } | 4451 | } |
4401 | sysfs_notify_dirent(rdev->sysfs_state); | 4452 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
4402 | } | 4453 | } |
4403 | 4454 | ||
4404 | spin_lock(&pers_lock); | 4455 | spin_lock(&pers_lock); |
@@ -4497,11 +4548,12 @@ static int md_run(mddev_t *mddev) | |||
4497 | return err; | 4548 | return err; |
4498 | } | 4549 | } |
4499 | if (mddev->pers->sync_request) { | 4550 | if (mddev->pers->sync_request) { |
4500 | if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | 4551 | if (mddev->kobj.sd && |
4552 | sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | ||
4501 | printk(KERN_WARNING | 4553 | printk(KERN_WARNING |
4502 | "md: cannot register extra attributes for %s\n", | 4554 | "md: cannot register extra attributes for %s\n", |
4503 | mdname(mddev)); | 4555 | mdname(mddev)); |
4504 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); | 4556 | mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); |
4505 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ | 4557 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ |
4506 | mddev->ro = 0; | 4558 | mddev->ro = 0; |
4507 | 4559 | ||
@@ -4519,8 +4571,7 @@ static int md_run(mddev_t *mddev) | |||
4519 | char nm[20]; | 4571 | char nm[20]; |
4520 | sprintf(nm, "rd%d", rdev->raid_disk); | 4572 | sprintf(nm, "rd%d", rdev->raid_disk); |
4521 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) | 4573 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) |
4522 | printk("md: cannot register %s for %s\n", | 4574 | /* failure here is OK */; |
4523 | nm, mdname(mddev)); | ||
4524 | } | 4575 | } |
4525 | 4576 | ||
4526 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4577 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -4532,12 +4583,12 @@ static int md_run(mddev_t *mddev) | |||
4532 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4583 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4533 | 4584 | ||
4534 | md_new_event(mddev); | 4585 | md_new_event(mddev); |
4535 | sysfs_notify_dirent(mddev->sysfs_state); | 4586 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4536 | if (mddev->sysfs_action) | 4587 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
4537 | sysfs_notify_dirent(mddev->sysfs_action); | ||
4538 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 4588 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
4539 | return 0; | 4589 | return 0; |
4540 | } | 4590 | } |
4591 | EXPORT_SYMBOL_GPL(md_run); | ||
4541 | 4592 | ||
4542 | static int do_md_run(mddev_t *mddev) | 4593 | static int do_md_run(mddev_t *mddev) |
4543 | { | 4594 | { |
@@ -4546,7 +4597,11 @@ static int do_md_run(mddev_t *mddev) | |||
4546 | err = md_run(mddev); | 4597 | err = md_run(mddev); |
4547 | if (err) | 4598 | if (err) |
4548 | goto out; | 4599 | goto out; |
4549 | 4600 | err = bitmap_load(mddev); | |
4601 | if (err) { | ||
4602 | bitmap_destroy(mddev); | ||
4603 | goto out; | ||
4604 | } | ||
4550 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4605 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4551 | revalidate_disk(mddev->gendisk); | 4606 | revalidate_disk(mddev->gendisk); |
4552 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4607 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
@@ -4574,7 +4629,7 @@ static int restart_array(mddev_t *mddev) | |||
4574 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4629 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
4575 | md_wakeup_thread(mddev->thread); | 4630 | md_wakeup_thread(mddev->thread); |
4576 | md_wakeup_thread(mddev->sync_thread); | 4631 | md_wakeup_thread(mddev->sync_thread); |
4577 | sysfs_notify_dirent(mddev->sysfs_state); | 4632 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4578 | return 0; | 4633 | return 0; |
4579 | } | 4634 | } |
4580 | 4635 | ||
@@ -4645,9 +4700,10 @@ static void md_clean(mddev_t *mddev) | |||
4645 | mddev->bitmap_info.chunksize = 0; | 4700 | mddev->bitmap_info.chunksize = 0; |
4646 | mddev->bitmap_info.daemon_sleep = 0; | 4701 | mddev->bitmap_info.daemon_sleep = 0; |
4647 | mddev->bitmap_info.max_write_behind = 0; | 4702 | mddev->bitmap_info.max_write_behind = 0; |
4703 | mddev->plug = NULL; | ||
4648 | } | 4704 | } |
4649 | 4705 | ||
4650 | static void md_stop_writes(mddev_t *mddev) | 4706 | void md_stop_writes(mddev_t *mddev) |
4651 | { | 4707 | { |
4652 | if (mddev->sync_thread) { | 4708 | if (mddev->sync_thread) { |
4653 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4709 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4667,11 +4723,10 @@ static void md_stop_writes(mddev_t *mddev) | |||
4667 | md_update_sb(mddev, 1); | 4723 | md_update_sb(mddev, 1); |
4668 | } | 4724 | } |
4669 | } | 4725 | } |
4726 | EXPORT_SYMBOL_GPL(md_stop_writes); | ||
4670 | 4727 | ||
4671 | static void md_stop(mddev_t *mddev) | 4728 | void md_stop(mddev_t *mddev) |
4672 | { | 4729 | { |
4673 | md_stop_writes(mddev); | ||
4674 | |||
4675 | mddev->pers->stop(mddev); | 4730 | mddev->pers->stop(mddev); |
4676 | if (mddev->pers->sync_request && mddev->to_remove == NULL) | 4731 | if (mddev->pers->sync_request && mddev->to_remove == NULL) |
4677 | mddev->to_remove = &md_redundancy_group; | 4732 | mddev->to_remove = &md_redundancy_group; |
@@ -4679,6 +4734,7 @@ static void md_stop(mddev_t *mddev) | |||
4679 | mddev->pers = NULL; | 4734 | mddev->pers = NULL; |
4680 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4735 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4681 | } | 4736 | } |
4737 | EXPORT_SYMBOL_GPL(md_stop); | ||
4682 | 4738 | ||
4683 | static int md_set_readonly(mddev_t *mddev, int is_open) | 4739 | static int md_set_readonly(mddev_t *mddev, int is_open) |
4684 | { | 4740 | { |
@@ -4698,7 +4754,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open) | |||
4698 | mddev->ro = 1; | 4754 | mddev->ro = 1; |
4699 | set_disk_ro(mddev->gendisk, 1); | 4755 | set_disk_ro(mddev->gendisk, 1); |
4700 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4756 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4701 | sysfs_notify_dirent(mddev->sysfs_state); | 4757 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4702 | err = 0; | 4758 | err = 0; |
4703 | } | 4759 | } |
4704 | out: | 4760 | out: |
@@ -4712,26 +4768,29 @@ out: | |||
4712 | */ | 4768 | */ |
4713 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) | 4769 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) |
4714 | { | 4770 | { |
4715 | int err = 0; | ||
4716 | struct gendisk *disk = mddev->gendisk; | 4771 | struct gendisk *disk = mddev->gendisk; |
4717 | mdk_rdev_t *rdev; | 4772 | mdk_rdev_t *rdev; |
4718 | 4773 | ||
4719 | mutex_lock(&mddev->open_mutex); | 4774 | mutex_lock(&mddev->open_mutex); |
4720 | if (atomic_read(&mddev->openers) > is_open) { | 4775 | if (atomic_read(&mddev->openers) > is_open || |
4776 | mddev->sysfs_active) { | ||
4721 | printk("md: %s still in use.\n",mdname(mddev)); | 4777 | printk("md: %s still in use.\n",mdname(mddev)); |
4722 | err = -EBUSY; | 4778 | mutex_unlock(&mddev->open_mutex); |
4723 | } else if (mddev->pers) { | 4779 | return -EBUSY; |
4780 | } | ||
4724 | 4781 | ||
4782 | if (mddev->pers) { | ||
4725 | if (mddev->ro) | 4783 | if (mddev->ro) |
4726 | set_disk_ro(disk, 0); | 4784 | set_disk_ro(disk, 0); |
4727 | 4785 | ||
4786 | md_stop_writes(mddev); | ||
4728 | md_stop(mddev); | 4787 | md_stop(mddev); |
4729 | mddev->queue->merge_bvec_fn = NULL; | 4788 | mddev->queue->merge_bvec_fn = NULL; |
4730 | mddev->queue->unplug_fn = NULL; | 4789 | mddev->queue->unplug_fn = NULL; |
4731 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4790 | mddev->queue->backing_dev_info.congested_fn = NULL; |
4732 | 4791 | ||
4733 | /* tell userspace to handle 'inactive' */ | 4792 | /* tell userspace to handle 'inactive' */ |
4734 | sysfs_notify_dirent(mddev->sysfs_state); | 4793 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4735 | 4794 | ||
4736 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4795 | list_for_each_entry(rdev, &mddev->disks, same_set) |
4737 | if (rdev->raid_disk >= 0) { | 4796 | if (rdev->raid_disk >= 0) { |
@@ -4741,21 +4800,17 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4741 | } | 4800 | } |
4742 | 4801 | ||
4743 | set_capacity(disk, 0); | 4802 | set_capacity(disk, 0); |
4803 | mutex_unlock(&mddev->open_mutex); | ||
4744 | revalidate_disk(disk); | 4804 | revalidate_disk(disk); |
4745 | 4805 | ||
4746 | if (mddev->ro) | 4806 | if (mddev->ro) |
4747 | mddev->ro = 0; | 4807 | mddev->ro = 0; |
4748 | 4808 | } else | |
4749 | err = 0; | 4809 | mutex_unlock(&mddev->open_mutex); |
4750 | } | ||
4751 | mutex_unlock(&mddev->open_mutex); | ||
4752 | if (err) | ||
4753 | return err; | ||
4754 | /* | 4810 | /* |
4755 | * Free resources if final stop | 4811 | * Free resources if final stop |
4756 | */ | 4812 | */ |
4757 | if (mode == 0) { | 4813 | if (mode == 0) { |
4758 | |||
4759 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4814 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
4760 | 4815 | ||
4761 | bitmap_destroy(mddev); | 4816 | bitmap_destroy(mddev); |
@@ -4772,13 +4827,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4772 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4827 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
4773 | if (mddev->hold_active == UNTIL_STOP) | 4828 | if (mddev->hold_active == UNTIL_STOP) |
4774 | mddev->hold_active = 0; | 4829 | mddev->hold_active = 0; |
4775 | |||
4776 | } | 4830 | } |
4777 | err = 0; | ||
4778 | blk_integrity_unregister(disk); | 4831 | blk_integrity_unregister(disk); |
4779 | md_new_event(mddev); | 4832 | md_new_event(mddev); |
4780 | sysfs_notify_dirent(mddev->sysfs_state); | 4833 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4781 | return err; | 4834 | return 0; |
4782 | } | 4835 | } |
4783 | 4836 | ||
4784 | #ifndef MODULE | 4837 | #ifndef MODULE |
@@ -5139,7 +5192,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
5139 | if (err) | 5192 | if (err) |
5140 | export_rdev(rdev); | 5193 | export_rdev(rdev); |
5141 | else | 5194 | else |
5142 | sysfs_notify_dirent(rdev->sysfs_state); | 5195 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
5143 | 5196 | ||
5144 | md_update_sb(mddev, 1); | 5197 | md_update_sb(mddev, 1); |
5145 | if (mddev->degraded) | 5198 | if (mddev->degraded) |
@@ -5332,8 +5385,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
5332 | err = 0; | 5385 | err = 0; |
5333 | if (mddev->pers) { | 5386 | if (mddev->pers) { |
5334 | mddev->pers->quiesce(mddev, 1); | 5387 | mddev->pers->quiesce(mddev, 1); |
5335 | if (fd >= 0) | 5388 | if (fd >= 0) { |
5336 | err = bitmap_create(mddev); | 5389 | err = bitmap_create(mddev); |
5390 | if (!err) | ||
5391 | err = bitmap_load(mddev); | ||
5392 | } | ||
5337 | if (fd < 0 || err) { | 5393 | if (fd < 0 || err) { |
5338 | bitmap_destroy(mddev); | 5394 | bitmap_destroy(mddev); |
5339 | fd = -1; /* make sure to put the file */ | 5395 | fd = -1; /* make sure to put the file */ |
@@ -5582,6 +5638,8 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
5582 | mddev->bitmap_info.default_offset; | 5638 | mddev->bitmap_info.default_offset; |
5583 | mddev->pers->quiesce(mddev, 1); | 5639 | mddev->pers->quiesce(mddev, 1); |
5584 | rv = bitmap_create(mddev); | 5640 | rv = bitmap_create(mddev); |
5641 | if (!rv) | ||
5642 | rv = bitmap_load(mddev); | ||
5585 | if (rv) | 5643 | if (rv) |
5586 | bitmap_destroy(mddev); | 5644 | bitmap_destroy(mddev); |
5587 | mddev->pers->quiesce(mddev, 0); | 5645 | mddev->pers->quiesce(mddev, 0); |
@@ -5814,7 +5872,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
5814 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { | 5872 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { |
5815 | if (mddev->ro == 2) { | 5873 | if (mddev->ro == 2) { |
5816 | mddev->ro = 0; | 5874 | mddev->ro = 0; |
5817 | sysfs_notify_dirent(mddev->sysfs_state); | 5875 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
5818 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5876 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5819 | md_wakeup_thread(mddev->thread); | 5877 | md_wakeup_thread(mddev->thread); |
5820 | } else { | 5878 | } else { |
@@ -6065,10 +6123,12 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
6065 | mddev->pers->error_handler(mddev,rdev); | 6123 | mddev->pers->error_handler(mddev,rdev); |
6066 | if (mddev->degraded) | 6124 | if (mddev->degraded) |
6067 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 6125 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
6068 | sysfs_notify_dirent(rdev->sysfs_state); | 6126 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
6069 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 6127 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
6070 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6128 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6071 | md_wakeup_thread(mddev->thread); | 6129 | md_wakeup_thread(mddev->thread); |
6130 | if (mddev->event_work.func) | ||
6131 | schedule_work(&mddev->event_work); | ||
6072 | md_new_event_inintr(mddev); | 6132 | md_new_event_inintr(mddev); |
6073 | } | 6133 | } |
6074 | 6134 | ||
@@ -6526,7 +6586,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
6526 | spin_unlock_irq(&mddev->write_lock); | 6586 | spin_unlock_irq(&mddev->write_lock); |
6527 | } | 6587 | } |
6528 | if (did_change) | 6588 | if (did_change) |
6529 | sysfs_notify_dirent(mddev->sysfs_state); | 6589 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
6530 | wait_event(mddev->sb_wait, | 6590 | wait_event(mddev->sb_wait, |
6531 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | 6591 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && |
6532 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | 6592 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); |
@@ -6569,7 +6629,7 @@ int md_allow_write(mddev_t *mddev) | |||
6569 | mddev->safemode = 1; | 6629 | mddev->safemode = 1; |
6570 | spin_unlock_irq(&mddev->write_lock); | 6630 | spin_unlock_irq(&mddev->write_lock); |
6571 | md_update_sb(mddev, 0); | 6631 | md_update_sb(mddev, 0); |
6572 | sysfs_notify_dirent(mddev->sysfs_state); | 6632 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
6573 | } else | 6633 | } else |
6574 | spin_unlock_irq(&mddev->write_lock); | 6634 | spin_unlock_irq(&mddev->write_lock); |
6575 | 6635 | ||
@@ -6580,6 +6640,14 @@ int md_allow_write(mddev_t *mddev) | |||
6580 | } | 6640 | } |
6581 | EXPORT_SYMBOL_GPL(md_allow_write); | 6641 | EXPORT_SYMBOL_GPL(md_allow_write); |
6582 | 6642 | ||
6643 | void md_unplug(mddev_t *mddev) | ||
6644 | { | ||
6645 | if (mddev->queue) | ||
6646 | blk_unplug(mddev->queue); | ||
6647 | if (mddev->plug) | ||
6648 | mddev->plug->unplug_fn(mddev->plug); | ||
6649 | } | ||
6650 | |||
6583 | #define SYNC_MARKS 10 | 6651 | #define SYNC_MARKS 10 |
6584 | #define SYNC_MARK_STEP (3*HZ) | 6652 | #define SYNC_MARK_STEP (3*HZ) |
6585 | void md_do_sync(mddev_t *mddev) | 6653 | void md_do_sync(mddev_t *mddev) |
@@ -6758,12 +6826,13 @@ void md_do_sync(mddev_t *mddev) | |||
6758 | >= mddev->resync_max - mddev->curr_resync_completed | 6826 | >= mddev->resync_max - mddev->curr_resync_completed |
6759 | )) { | 6827 | )) { |
6760 | /* time to update curr_resync_completed */ | 6828 | /* time to update curr_resync_completed */ |
6761 | blk_unplug(mddev->queue); | 6829 | md_unplug(mddev); |
6762 | wait_event(mddev->recovery_wait, | 6830 | wait_event(mddev->recovery_wait, |
6763 | atomic_read(&mddev->recovery_active) == 0); | 6831 | atomic_read(&mddev->recovery_active) == 0); |
6764 | mddev->curr_resync_completed = | 6832 | mddev->curr_resync_completed = |
6765 | mddev->curr_resync; | 6833 | mddev->curr_resync; |
6766 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 6834 | if (mddev->persistent) |
6835 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
6767 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6836 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
6768 | } | 6837 | } |
6769 | 6838 | ||
@@ -6835,7 +6904,7 @@ void md_do_sync(mddev_t *mddev) | |||
6835 | * about not overloading the IO subsystem. (things like an | 6904 | * about not overloading the IO subsystem. (things like an |
6836 | * e2fsck being done on the RAID array should execute fast) | 6905 | * e2fsck being done on the RAID array should execute fast) |
6837 | */ | 6906 | */ |
6838 | blk_unplug(mddev->queue); | 6907 | md_unplug(mddev); |
6839 | cond_resched(); | 6908 | cond_resched(); |
6840 | 6909 | ||
6841 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 | 6910 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 |
@@ -6854,7 +6923,7 @@ void md_do_sync(mddev_t *mddev) | |||
6854 | * this also signals 'finished resyncing' to md_stop | 6923 | * this also signals 'finished resyncing' to md_stop |
6855 | */ | 6924 | */ |
6856 | out: | 6925 | out: |
6857 | blk_unplug(mddev->queue); | 6926 | md_unplug(mddev); |
6858 | 6927 | ||
6859 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 6928 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
6860 | 6929 | ||
@@ -6956,10 +7025,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6956 | sprintf(nm, "rd%d", rdev->raid_disk); | 7025 | sprintf(nm, "rd%d", rdev->raid_disk); |
6957 | if (sysfs_create_link(&mddev->kobj, | 7026 | if (sysfs_create_link(&mddev->kobj, |
6958 | &rdev->kobj, nm)) | 7027 | &rdev->kobj, nm)) |
6959 | printk(KERN_WARNING | 7028 | /* failure here is OK */; |
6960 | "md: cannot register " | ||
6961 | "%s for %s\n", | ||
6962 | nm, mdname(mddev)); | ||
6963 | spares++; | 7029 | spares++; |
6964 | md_new_event(mddev); | 7030 | md_new_event(mddev); |
6965 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7031 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
@@ -7052,7 +7118,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7052 | mddev->safemode = 0; | 7118 | mddev->safemode = 0; |
7053 | spin_unlock_irq(&mddev->write_lock); | 7119 | spin_unlock_irq(&mddev->write_lock); |
7054 | if (did_change) | 7120 | if (did_change) |
7055 | sysfs_notify_dirent(mddev->sysfs_state); | 7121 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
7056 | } | 7122 | } |
7057 | 7123 | ||
7058 | if (mddev->flags) | 7124 | if (mddev->flags) |
@@ -7091,7 +7157,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7091 | mddev->recovery = 0; | 7157 | mddev->recovery = 0; |
7092 | /* flag recovery needed just to double check */ | 7158 | /* flag recovery needed just to double check */ |
7093 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 7159 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
7094 | sysfs_notify_dirent(mddev->sysfs_action); | 7160 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7095 | md_new_event(mddev); | 7161 | md_new_event(mddev); |
7096 | goto unlock; | 7162 | goto unlock; |
7097 | } | 7163 | } |
@@ -7153,7 +7219,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7153 | mddev->recovery = 0; | 7219 | mddev->recovery = 0; |
7154 | } else | 7220 | } else |
7155 | md_wakeup_thread(mddev->sync_thread); | 7221 | md_wakeup_thread(mddev->sync_thread); |
7156 | sysfs_notify_dirent(mddev->sysfs_action); | 7222 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7157 | md_new_event(mddev); | 7223 | md_new_event(mddev); |
7158 | } | 7224 | } |
7159 | unlock: | 7225 | unlock: |
@@ -7162,7 +7228,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7162 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 7228 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
7163 | &mddev->recovery)) | 7229 | &mddev->recovery)) |
7164 | if (mddev->sysfs_action) | 7230 | if (mddev->sysfs_action) |
7165 | sysfs_notify_dirent(mddev->sysfs_action); | 7231 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7166 | } | 7232 | } |
7167 | mddev_unlock(mddev); | 7233 | mddev_unlock(mddev); |
7168 | } | 7234 | } |
@@ -7170,7 +7236,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7170 | 7236 | ||
7171 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | 7237 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) |
7172 | { | 7238 | { |
7173 | sysfs_notify_dirent(rdev->sysfs_state); | 7239 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
7174 | wait_event_timeout(rdev->blocked_wait, | 7240 | wait_event_timeout(rdev->blocked_wait, |
7175 | !test_bit(Blocked, &rdev->flags), | 7241 | !test_bit(Blocked, &rdev->flags), |
7176 | msecs_to_jiffies(5000)); | 7242 | msecs_to_jiffies(5000)); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index fc56e0f21c80..a953fe2808ae 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -29,6 +29,26 @@ | |||
29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
31 | 31 | ||
32 | /* generic plugging support - like that provided with request_queue, | ||
33 | * but does not require a request_queue | ||
34 | */ | ||
35 | struct plug_handle { | ||
36 | void (*unplug_fn)(struct plug_handle *); | ||
37 | struct timer_list unplug_timer; | ||
38 | struct work_struct unplug_work; | ||
39 | unsigned long unplug_flag; | ||
40 | }; | ||
41 | #define PLUGGED_FLAG 1 | ||
42 | void plugger_init(struct plug_handle *plug, | ||
43 | void (*unplug_fn)(struct plug_handle *)); | ||
44 | void plugger_set_plug(struct plug_handle *plug); | ||
45 | int plugger_remove_plug(struct plug_handle *plug); | ||
46 | static inline void plugger_flush(struct plug_handle *plug) | ||
47 | { | ||
48 | del_timer_sync(&plug->unplug_timer); | ||
49 | cancel_work_sync(&plug->unplug_work); | ||
50 | } | ||
51 | |||
32 | /* | 52 | /* |
33 | * MD's 'extended' device | 53 | * MD's 'extended' device |
34 | */ | 54 | */ |
@@ -125,6 +145,10 @@ struct mddev_s | |||
125 | int suspended; | 145 | int suspended; |
126 | atomic_t active_io; | 146 | atomic_t active_io; |
127 | int ro; | 147 | int ro; |
148 | int sysfs_active; /* set when sysfs deletes | ||
149 | * are happening, so run/ | ||
150 | * takeover/stop are not safe | ||
151 | */ | ||
128 | 152 | ||
129 | struct gendisk *gendisk; | 153 | struct gendisk *gendisk; |
130 | 154 | ||
@@ -297,9 +321,14 @@ struct mddev_s | |||
297 | * hot-adding a bitmap. It should | 321 | * hot-adding a bitmap. It should |
298 | * eventually be settable by sysfs. | 322 | * eventually be settable by sysfs. |
299 | */ | 323 | */ |
324 | /* When md is serving under dm, it might use a | ||
325 | * dirty_log to store the bits. | ||
326 | */ | ||
327 | struct dm_dirty_log *log; | ||
328 | |||
300 | struct mutex mutex; | 329 | struct mutex mutex; |
301 | unsigned long chunksize; | 330 | unsigned long chunksize; |
302 | unsigned long daemon_sleep; /* how many seconds between updates? */ | 331 | unsigned long daemon_sleep; /* how many jiffies between updates? */ |
303 | unsigned long max_write_behind; /* write-behind mode */ | 332 | unsigned long max_write_behind; /* write-behind mode */ |
304 | int external; | 333 | int external; |
305 | } bitmap_info; | 334 | } bitmap_info; |
@@ -308,6 +337,8 @@ struct mddev_s | |||
308 | struct list_head all_mddevs; | 337 | struct list_head all_mddevs; |
309 | 338 | ||
310 | struct attribute_group *to_remove; | 339 | struct attribute_group *to_remove; |
340 | struct plug_handle *plug; /* if used by personality */ | ||
341 | |||
311 | /* Generic barrier handling. | 342 | /* Generic barrier handling. |
312 | * If there is a pending barrier request, all other | 343 | * If there is a pending barrier request, all other |
313 | * writes are blocked while the devices are flushed. | 344 | * writes are blocked while the devices are flushed. |
@@ -318,6 +349,7 @@ struct mddev_s | |||
318 | struct bio *barrier; | 349 | struct bio *barrier; |
319 | atomic_t flush_pending; | 350 | atomic_t flush_pending; |
320 | struct work_struct barrier_work; | 351 | struct work_struct barrier_work; |
352 | struct work_struct event_work; /* used by dm to report failure event */ | ||
321 | }; | 353 | }; |
322 | 354 | ||
323 | 355 | ||
@@ -382,6 +414,18 @@ struct md_sysfs_entry { | |||
382 | }; | 414 | }; |
383 | extern struct attribute_group md_bitmap_group; | 415 | extern struct attribute_group md_bitmap_group; |
384 | 416 | ||
417 | static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name) | ||
418 | { | ||
419 | if (sd) | ||
420 | return sysfs_get_dirent(sd, NULL, name); | ||
421 | return sd; | ||
422 | } | ||
423 | static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd) | ||
424 | { | ||
425 | if (sd) | ||
426 | sysfs_notify_dirent(sd); | ||
427 | } | ||
428 | |||
385 | static inline char * mdname (mddev_t * mddev) | 429 | static inline char * mdname (mddev_t * mddev) |
386 | { | 430 | { |
387 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; | 431 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; |
@@ -474,5 +518,14 @@ extern int md_integrity_register(mddev_t *mddev); | |||
474 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 518 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
475 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); | 519 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); |
476 | extern void restore_bitmap_write_access(struct file *file); | 520 | extern void restore_bitmap_write_access(struct file *file); |
521 | extern void md_unplug(mddev_t *mddev); | ||
522 | |||
523 | extern void mddev_init(mddev_t *mddev); | ||
524 | extern int md_run(mddev_t *mddev); | ||
525 | extern void md_stop(mddev_t *mddev); | ||
526 | extern void md_stop_writes(mddev_t *mddev); | ||
527 | extern void md_rdev_init(mdk_rdev_t *rdev); | ||
477 | 528 | ||
529 | extern void mddev_suspend(mddev_t *mddev); | ||
530 | extern void mddev_resume(mddev_t *mddev); | ||
478 | #endif /* _MD_MD_H */ | 531 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c deleted file mode 100644 index 3b1500843bba..000000000000 --- a/drivers/md/mktables.c +++ /dev/null | |||
@@ -1,132 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * mktables.c | ||
13 | * | ||
14 | * Make RAID-6 tables. This is a host user space program to be run at | ||
15 | * compile time. | ||
16 | */ | ||
17 | |||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | #include <inttypes.h> | ||
21 | #include <stdlib.h> | ||
22 | #include <time.h> | ||
23 | |||
24 | static uint8_t gfmul(uint8_t a, uint8_t b) | ||
25 | { | ||
26 | uint8_t v = 0; | ||
27 | |||
28 | while (b) { | ||
29 | if (b & 1) | ||
30 | v ^= a; | ||
31 | a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); | ||
32 | b >>= 1; | ||
33 | } | ||
34 | |||
35 | return v; | ||
36 | } | ||
37 | |||
38 | static uint8_t gfpow(uint8_t a, int b) | ||
39 | { | ||
40 | uint8_t v = 1; | ||
41 | |||
42 | b %= 255; | ||
43 | if (b < 0) | ||
44 | b += 255; | ||
45 | |||
46 | while (b) { | ||
47 | if (b & 1) | ||
48 | v = gfmul(v, a); | ||
49 | a = gfmul(a, a); | ||
50 | b >>= 1; | ||
51 | } | ||
52 | |||
53 | return v; | ||
54 | } | ||
55 | |||
56 | int main(int argc, char *argv[]) | ||
57 | { | ||
58 | int i, j, k; | ||
59 | uint8_t v; | ||
60 | uint8_t exptbl[256], invtbl[256]; | ||
61 | |||
62 | printf("#include <linux/raid/pq.h>\n"); | ||
63 | |||
64 | /* Compute multiplication table */ | ||
65 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
66 | "raid6_gfmul[256][256] =\n" | ||
67 | "{\n"); | ||
68 | for (i = 0; i < 256; i++) { | ||
69 | printf("\t{\n"); | ||
70 | for (j = 0; j < 256; j += 8) { | ||
71 | printf("\t\t"); | ||
72 | for (k = 0; k < 8; k++) | ||
73 | printf("0x%02x,%c", gfmul(i, j + k), | ||
74 | (k == 7) ? '\n' : ' '); | ||
75 | } | ||
76 | printf("\t},\n"); | ||
77 | } | ||
78 | printf("};\n"); | ||
79 | printf("#ifdef __KERNEL__\n"); | ||
80 | printf("EXPORT_SYMBOL(raid6_gfmul);\n"); | ||
81 | printf("#endif\n"); | ||
82 | |||
83 | /* Compute power-of-2 table (exponent) */ | ||
84 | v = 1; | ||
85 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
86 | "raid6_gfexp[256] =\n" "{\n"); | ||
87 | for (i = 0; i < 256; i += 8) { | ||
88 | printf("\t"); | ||
89 | for (j = 0; j < 8; j++) { | ||
90 | exptbl[i + j] = v; | ||
91 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | ||
92 | v = gfmul(v, 2); | ||
93 | if (v == 1) | ||
94 | v = 0; /* For entry 255, not a real entry */ | ||
95 | } | ||
96 | } | ||
97 | printf("};\n"); | ||
98 | printf("#ifdef __KERNEL__\n"); | ||
99 | printf("EXPORT_SYMBOL(raid6_gfexp);\n"); | ||
100 | printf("#endif\n"); | ||
101 | |||
102 | /* Compute inverse table x^-1 == x^254 */ | ||
103 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
104 | "raid6_gfinv[256] =\n" "{\n"); | ||
105 | for (i = 0; i < 256; i += 8) { | ||
106 | printf("\t"); | ||
107 | for (j = 0; j < 8; j++) { | ||
108 | invtbl[i + j] = v = gfpow(i + j, 254); | ||
109 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | ||
110 | } | ||
111 | } | ||
112 | printf("};\n"); | ||
113 | printf("#ifdef __KERNEL__\n"); | ||
114 | printf("EXPORT_SYMBOL(raid6_gfinv);\n"); | ||
115 | printf("#endif\n"); | ||
116 | |||
117 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | ||
118 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
119 | "raid6_gfexi[256] =\n" "{\n"); | ||
120 | for (i = 0; i < 256; i += 8) { | ||
121 | printf("\t"); | ||
122 | for (j = 0; j < 8; j++) | ||
123 | printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], | ||
124 | (j == 7) ? '\n' : ' '); | ||
125 | } | ||
126 | printf("};\n"); | ||
127 | printf("#ifdef __KERNEL__\n"); | ||
128 | printf("EXPORT_SYMBOL(raid6_gfexi);\n"); | ||
129 | printf("#endif\n"); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 62ecb6650fd0..a88aeb5198c7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
825 | */ | 825 | */ |
826 | bp = bio_split(bio, | 826 | bp = bio_split(bio, |
827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); |
828 | |||
829 | /* Each of these 'make_request' calls will call 'wait_barrier'. | ||
830 | * If the first succeeds but the second blocks due to the resync | ||
831 | * thread raising the barrier, we will deadlock because the | ||
832 | * IO to the underlying device will be queued in generic_make_request | ||
833 | * and will never complete, so will never reduce nr_pending. | ||
834 | * So increment nr_waiting here so no new raise_barriers will | ||
835 | * succeed, and so the second wait_barrier cannot block. | ||
836 | */ | ||
837 | spin_lock_irq(&conf->resync_lock); | ||
838 | conf->nr_waiting++; | ||
839 | spin_unlock_irq(&conf->resync_lock); | ||
840 | |||
828 | if (make_request(mddev, &bp->bio1)) | 841 | if (make_request(mddev, &bp->bio1)) |
829 | generic_make_request(&bp->bio1); | 842 | generic_make_request(&bp->bio1); |
830 | if (make_request(mddev, &bp->bio2)) | 843 | if (make_request(mddev, &bp->bio2)) |
831 | generic_make_request(&bp->bio2); | 844 | generic_make_request(&bp->bio2); |
832 | 845 | ||
846 | spin_lock_irq(&conf->resync_lock); | ||
847 | conf->nr_waiting--; | ||
848 | wake_up(&conf->wait_barrier); | ||
849 | spin_unlock_irq(&conf->resync_lock); | ||
850 | |||
833 | bio_pair_release(bp); | 851 | bio_pair_release(bp); |
834 | return 0; | 852 | return 0; |
835 | bad_map: | 853 | bad_map: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 20ac2f14376a..866d4b5a144c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
204 | blk_plug_device(conf->mddev->queue); | 204 | plugger_set_plug(&conf->plug); |
205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
206 | sh->bm_seq - conf->seq_write > 0) { | 206 | sh->bm_seq - conf->seq_write > 0) { |
207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 207 | list_add_tail(&sh->lru, &conf->bitmap_list); |
208 | blk_plug_device(conf->mddev->queue); | 208 | plugger_set_plug(&conf->plug); |
209 | } else { | 209 | } else { |
210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
211 | list_add_tail(&sh->lru, &conf->handle_list); | 211 | list_add_tail(&sh->lru, &conf->handle_list); |
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf) | |||
434 | } | 434 | } |
435 | 435 | ||
436 | static void unplug_slaves(mddev_t *mddev); | 436 | static void unplug_slaves(mddev_t *mddev); |
437 | static void raid5_unplug_device(struct request_queue *q); | ||
438 | 437 | ||
439 | static struct stripe_head * | 438 | static struct stripe_head * |
440 | get_active_stripe(raid5_conf_t *conf, sector_t sector, | 439 | get_active_stripe(raid5_conf_t *conf, sector_t sector, |
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
464 | < (conf->max_nr_stripes *3/4) | 463 | < (conf->max_nr_stripes *3/4) |
465 | || !conf->inactive_blocked), | 464 | || !conf->inactive_blocked), |
466 | conf->device_lock, | 465 | conf->device_lock, |
467 | raid5_unplug_device(conf->mddev->queue) | 466 | md_raid5_unplug_device(conf) |
468 | ); | 467 | ); |
469 | conf->inactive_blocked = 0; | 468 | conf->inactive_blocked = 0; |
470 | } else | 469 | } else |
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
1337 | struct kmem_cache *sc; | 1336 | struct kmem_cache *sc; |
1338 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 1337 | int devs = max(conf->raid_disks, conf->previous_raid_disks); |
1339 | 1338 | ||
1340 | sprintf(conf->cache_name[0], | 1339 | if (conf->mddev->gendisk) |
1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 1340 | sprintf(conf->cache_name[0], |
1342 | sprintf(conf->cache_name[1], | 1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); |
1343 | "raid%d-%s-alt", conf->level, mdname(conf->mddev)); | 1342 | else |
1343 | sprintf(conf->cache_name[0], | ||
1344 | "raid%d-%p", conf->level, conf->mddev); | ||
1345 | sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); | ||
1346 | |||
1344 | conf->active_name = 0; | 1347 | conf->active_name = 0; |
1345 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 1348 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
1346 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 1349 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3614 | list_add_tail(&sh->lru, &conf->hold_list); | 3617 | list_add_tail(&sh->lru, &conf->hold_list); |
3615 | } | 3618 | } |
3616 | } else | 3619 | } else |
3617 | blk_plug_device(conf->mddev->queue); | 3620 | plugger_set_plug(&conf->plug); |
3618 | } | 3621 | } |
3619 | 3622 | ||
3620 | static void activate_bit_delay(raid5_conf_t *conf) | 3623 | static void activate_bit_delay(raid5_conf_t *conf) |
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev) | |||
3655 | rcu_read_unlock(); | 3658 | rcu_read_unlock(); |
3656 | } | 3659 | } |
3657 | 3660 | ||
3658 | static void raid5_unplug_device(struct request_queue *q) | 3661 | void md_raid5_unplug_device(raid5_conf_t *conf) |
3659 | { | 3662 | { |
3660 | mddev_t *mddev = q->queuedata; | ||
3661 | raid5_conf_t *conf = mddev->private; | ||
3662 | unsigned long flags; | 3663 | unsigned long flags; |
3663 | 3664 | ||
3664 | spin_lock_irqsave(&conf->device_lock, flags); | 3665 | spin_lock_irqsave(&conf->device_lock, flags); |
3665 | 3666 | ||
3666 | if (blk_remove_plug(q)) { | 3667 | if (plugger_remove_plug(&conf->plug)) { |
3667 | conf->seq_flush++; | 3668 | conf->seq_flush++; |
3668 | raid5_activate_delayed(conf); | 3669 | raid5_activate_delayed(conf); |
3669 | } | 3670 | } |
3670 | md_wakeup_thread(mddev->thread); | 3671 | md_wakeup_thread(conf->mddev->thread); |
3671 | 3672 | ||
3672 | spin_unlock_irqrestore(&conf->device_lock, flags); | 3673 | spin_unlock_irqrestore(&conf->device_lock, flags); |
3673 | 3674 | ||
3674 | unplug_slaves(mddev); | 3675 | unplug_slaves(conf->mddev); |
3675 | } | 3676 | } |
3677 | EXPORT_SYMBOL_GPL(md_raid5_unplug_device); | ||
3676 | 3678 | ||
3677 | static int raid5_congested(void *data, int bits) | 3679 | static void raid5_unplug(struct plug_handle *plug) |
3680 | { | ||
3681 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
3682 | md_raid5_unplug_device(conf); | ||
3683 | } | ||
3684 | |||
3685 | static void raid5_unplug_queue(struct request_queue *q) | ||
3686 | { | ||
3687 | mddev_t *mddev = q->queuedata; | ||
3688 | md_raid5_unplug_device(mddev->private); | ||
3689 | } | ||
3690 | |||
3691 | int md_raid5_congested(mddev_t *mddev, int bits) | ||
3678 | { | 3692 | { |
3679 | mddev_t *mddev = data; | ||
3680 | raid5_conf_t *conf = mddev->private; | 3693 | raid5_conf_t *conf = mddev->private; |
3681 | 3694 | ||
3682 | /* No difference between reads and writes. Just check | 3695 | /* No difference between reads and writes. Just check |
3683 | * how busy the stripe_cache is | 3696 | * how busy the stripe_cache is |
3684 | */ | 3697 | */ |
3685 | 3698 | ||
3686 | if (mddev_congested(mddev, bits)) | ||
3687 | return 1; | ||
3688 | if (conf->inactive_blocked) | 3699 | if (conf->inactive_blocked) |
3689 | return 1; | 3700 | return 1; |
3690 | if (conf->quiesce) | 3701 | if (conf->quiesce) |
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits) | |||
3694 | 3705 | ||
3695 | return 0; | 3706 | return 0; |
3696 | } | 3707 | } |
3708 | EXPORT_SYMBOL_GPL(md_raid5_congested); | ||
3709 | |||
3710 | static int raid5_congested(void *data, int bits) | ||
3711 | { | ||
3712 | mddev_t *mddev = data; | ||
3713 | |||
3714 | return mddev_congested(mddev, bits) || | ||
3715 | md_raid5_congested(mddev, bits); | ||
3716 | } | ||
3697 | 3717 | ||
3698 | /* We want read requests to align with chunks where possible, | 3718 | /* We want read requests to align with chunks where possible, |
3699 | * but write requests don't need to. | 3719 | * but write requests don't need to. |
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4075 | * add failed due to overlap. Flush everything | 4095 | * add failed due to overlap. Flush everything |
4076 | * and wait a while | 4096 | * and wait a while |
4077 | */ | 4097 | */ |
4078 | raid5_unplug_device(mddev->queue); | 4098 | md_raid5_unplug_device(conf); |
4079 | release_stripe(sh); | 4099 | release_stripe(sh); |
4080 | schedule(); | 4100 | schedule(); |
4081 | goto retry; | 4101 | goto retry; |
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | |||
4566 | return 0; | 4586 | return 0; |
4567 | } | 4587 | } |
4568 | 4588 | ||
4569 | static ssize_t | 4589 | int |
4570 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4590 | raid5_set_cache_size(mddev_t *mddev, int size) |
4571 | { | 4591 | { |
4572 | raid5_conf_t *conf = mddev->private; | 4592 | raid5_conf_t *conf = mddev->private; |
4573 | unsigned long new; | ||
4574 | int err; | 4593 | int err; |
4575 | 4594 | ||
4576 | if (len >= PAGE_SIZE) | 4595 | if (size <= 16 || size > 32768) |
4577 | return -EINVAL; | 4596 | return -EINVAL; |
4578 | if (!conf) | 4597 | while (size < conf->max_nr_stripes) { |
4579 | return -ENODEV; | ||
4580 | |||
4581 | if (strict_strtoul(page, 10, &new)) | ||
4582 | return -EINVAL; | ||
4583 | if (new <= 16 || new > 32768) | ||
4584 | return -EINVAL; | ||
4585 | while (new < conf->max_nr_stripes) { | ||
4586 | if (drop_one_stripe(conf)) | 4598 | if (drop_one_stripe(conf)) |
4587 | conf->max_nr_stripes--; | 4599 | conf->max_nr_stripes--; |
4588 | else | 4600 | else |
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
4591 | err = md_allow_write(mddev); | 4603 | err = md_allow_write(mddev); |
4592 | if (err) | 4604 | if (err) |
4593 | return err; | 4605 | return err; |
4594 | while (new > conf->max_nr_stripes) { | 4606 | while (size > conf->max_nr_stripes) { |
4595 | if (grow_one_stripe(conf)) | 4607 | if (grow_one_stripe(conf)) |
4596 | conf->max_nr_stripes++; | 4608 | conf->max_nr_stripes++; |
4597 | else break; | 4609 | else break; |
4598 | } | 4610 | } |
4611 | return 0; | ||
4612 | } | ||
4613 | EXPORT_SYMBOL(raid5_set_cache_size); | ||
4614 | |||
4615 | static ssize_t | ||
4616 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | ||
4617 | { | ||
4618 | raid5_conf_t *conf = mddev->private; | ||
4619 | unsigned long new; | ||
4620 | int err; | ||
4621 | |||
4622 | if (len >= PAGE_SIZE) | ||
4623 | return -EINVAL; | ||
4624 | if (!conf) | ||
4625 | return -ENODEV; | ||
4626 | |||
4627 | if (strict_strtoul(page, 10, &new)) | ||
4628 | return -EINVAL; | ||
4629 | err = raid5_set_cache_size(mddev, new); | ||
4630 | if (err) | ||
4631 | return err; | ||
4599 | return len; | 4632 | return len; |
4600 | } | 4633 | } |
4601 | 4634 | ||
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded | |||
4958 | static int run(mddev_t *mddev) | 4991 | static int run(mddev_t *mddev) |
4959 | { | 4992 | { |
4960 | raid5_conf_t *conf; | 4993 | raid5_conf_t *conf; |
4961 | int working_disks = 0, chunk_size; | 4994 | int working_disks = 0; |
4962 | int dirty_parity_disks = 0; | 4995 | int dirty_parity_disks = 0; |
4963 | mdk_rdev_t *rdev; | 4996 | mdk_rdev_t *rdev; |
4964 | sector_t reshape_offset = 0; | 4997 | sector_t reshape_offset = 0; |
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev) | |||
5144 | "reshape"); | 5177 | "reshape"); |
5145 | } | 5178 | } |
5146 | 5179 | ||
5147 | /* read-ahead size must cover two whole stripes, which is | ||
5148 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | ||
5149 | */ | ||
5150 | { | ||
5151 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5152 | int stripe = data_disks * | ||
5153 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5154 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5155 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5156 | } | ||
5157 | 5180 | ||
5158 | /* Ok, everything is just fine now */ | 5181 | /* Ok, everything is just fine now */ |
5159 | if (mddev->to_remove == &raid5_attrs_group) | 5182 | if (mddev->to_remove == &raid5_attrs_group) |
5160 | mddev->to_remove = NULL; | 5183 | mddev->to_remove = NULL; |
5161 | else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | 5184 | else if (mddev->kobj.sd && |
5185 | sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | ||
5162 | printk(KERN_WARNING | 5186 | printk(KERN_WARNING |
5163 | "md/raid:%s: failed to create sysfs attributes.\n", | 5187 | "raid5: failed to create sysfs attributes for %s\n", |
5164 | mdname(mddev)); | 5188 | mdname(mddev)); |
5189 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | ||
5165 | 5190 | ||
5166 | mddev->queue->queue_lock = &conf->device_lock; | 5191 | plugger_init(&conf->plug, raid5_unplug); |
5192 | mddev->plug = &conf->plug; | ||
5193 | if (mddev->queue) { | ||
5194 | int chunk_size; | ||
5195 | /* read-ahead size must cover two whole stripes, which | ||
5196 | * is 2 * (datadisks) * chunksize where 'n' is the | ||
5197 | * number of raid devices | ||
5198 | */ | ||
5199 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5200 | int stripe = data_disks * | ||
5201 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5202 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5203 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5167 | 5204 | ||
5168 | mddev->queue->unplug_fn = raid5_unplug_device; | 5205 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
5169 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
5170 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5171 | 5206 | ||
5172 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5207 | mddev->queue->backing_dev_info.congested_data = mddev; |
5208 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5209 | mddev->queue->queue_lock = &conf->device_lock; | ||
5210 | mddev->queue->unplug_fn = raid5_unplug_queue; | ||
5173 | 5211 | ||
5174 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 5212 | chunk_size = mddev->chunk_sectors << 9; |
5175 | chunk_size = mddev->chunk_sectors << 9; | 5213 | blk_queue_io_min(mddev->queue, chunk_size); |
5176 | blk_queue_io_min(mddev->queue, chunk_size); | 5214 | blk_queue_io_opt(mddev->queue, chunk_size * |
5177 | blk_queue_io_opt(mddev->queue, chunk_size * | 5215 | (conf->raid_disks - conf->max_degraded)); |
5178 | (conf->raid_disks - conf->max_degraded)); | ||
5179 | 5216 | ||
5180 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5217 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5218 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5182 | rdev->data_offset << 9); | 5219 | rdev->data_offset << 9); |
5220 | } | ||
5183 | 5221 | ||
5184 | return 0; | 5222 | return 0; |
5185 | abort: | 5223 | abort: |
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev) | |||
5200 | 5238 | ||
5201 | md_unregister_thread(mddev->thread); | 5239 | md_unregister_thread(mddev->thread); |
5202 | mddev->thread = NULL; | 5240 | mddev->thread = NULL; |
5203 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5241 | if (mddev->queue) |
5204 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 5242 | mddev->queue->backing_dev_info.congested_fn = NULL; |
5243 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
5205 | free_conf(conf); | 5244 | free_conf(conf); |
5206 | mddev->private = NULL; | 5245 | mddev->private = NULL; |
5207 | mddev->to_remove = &raid5_attrs_group; | 5246 | mddev->to_remove = &raid5_attrs_group; |
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5545 | sprintf(nm, "rd%d", rdev->raid_disk); | 5584 | sprintf(nm, "rd%d", rdev->raid_disk); |
5546 | if (sysfs_create_link(&mddev->kobj, | 5585 | if (sysfs_create_link(&mddev->kobj, |
5547 | &rdev->kobj, nm)) | 5586 | &rdev->kobj, nm)) |
5548 | printk(KERN_WARNING | 5587 | /* Failure here is OK */; |
5549 | "md/raid:%s: failed to create " | ||
5550 | " link %s\n", | ||
5551 | mdname(mddev), nm); | ||
5552 | } else | 5588 | } else |
5553 | break; | 5589 | break; |
5554 | } | 5590 | } |
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
5603 | /* read-ahead size must cover two whole stripes, which is | 5639 | /* read-ahead size must cover two whole stripes, which is |
5604 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | 5640 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
5605 | */ | 5641 | */ |
5606 | { | 5642 | if (conf->mddev->queue) { |
5607 | int data_disks = conf->raid_disks - conf->max_degraded; | 5643 | int data_disks = conf->raid_disks - conf->max_degraded; |
5608 | int stripe = data_disks * ((conf->chunk_sectors << 9) | 5644 | int stripe = data_disks * ((conf->chunk_sectors << 9) |
5609 | / PAGE_SIZE); | 5645 | / PAGE_SIZE); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 0f86f5e36724..36eaed5dfd6e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -388,7 +388,7 @@ struct raid5_private_data { | |||
388 | * two caches. | 388 | * two caches. |
389 | */ | 389 | */ |
390 | int active_name; | 390 | int active_name; |
391 | char cache_name[2][20]; | 391 | char cache_name[2][32]; |
392 | struct kmem_cache *slab_cache; /* for allocating stripes */ | 392 | struct kmem_cache *slab_cache; /* for allocating stripes */ |
393 | 393 | ||
394 | int seq_flush, seq_write; | 394 | int seq_flush, seq_write; |
@@ -398,6 +398,9 @@ struct raid5_private_data { | |||
398 | * (fresh device added). | 398 | * (fresh device added). |
399 | * Cleared when a sync completes. | 399 | * Cleared when a sync completes. |
400 | */ | 400 | */ |
401 | |||
402 | struct plug_handle plug; | ||
403 | |||
401 | /* per cpu variables */ | 404 | /* per cpu variables */ |
402 | struct raid5_percpu { | 405 | struct raid5_percpu { |
403 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 406 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |
@@ -497,4 +500,8 @@ static inline int algorithm_is_DDF(int layout) | |||
497 | { | 500 | { |
498 | return layout >= 8 && layout <= 10; | 501 | return layout >= 8 && layout <= 10; |
499 | } | 502 | } |
503 | |||
504 | extern int md_raid5_congested(mddev_t *mddev, int bits); | ||
505 | extern void md_raid5_unplug_device(raid5_conf_t *conf); | ||
506 | extern int raid5_set_cache_size(mddev_t *mddev, int size); | ||
500 | #endif | 507 | #endif |
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c deleted file mode 100644 index 1f8784bfd44d..000000000000 --- a/drivers/md/raid6algos.c +++ /dev/null | |||
@@ -1,154 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6algos.c | ||
15 | * | ||
16 | * Algorithm list and algorithm selection for RAID-6 | ||
17 | */ | ||
18 | |||
19 | #include <linux/raid/pq.h> | ||
20 | #include <linux/gfp.h> | ||
21 | #ifndef __KERNEL__ | ||
22 | #include <sys/mman.h> | ||
23 | #include <stdio.h> | ||
24 | #else | ||
25 | #if !RAID6_USE_EMPTY_ZERO_PAGE | ||
26 | /* In .bss so it's zeroed */ | ||
27 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
28 | EXPORT_SYMBOL(raid6_empty_zero_page); | ||
29 | #endif | ||
30 | #endif | ||
31 | |||
32 | struct raid6_calls raid6_call; | ||
33 | EXPORT_SYMBOL_GPL(raid6_call); | ||
34 | |||
35 | const struct raid6_calls * const raid6_algos[] = { | ||
36 | &raid6_intx1, | ||
37 | &raid6_intx2, | ||
38 | &raid6_intx4, | ||
39 | &raid6_intx8, | ||
40 | #if defined(__ia64__) | ||
41 | &raid6_intx16, | ||
42 | &raid6_intx32, | ||
43 | #endif | ||
44 | #if defined(__i386__) && !defined(__arch_um__) | ||
45 | &raid6_mmxx1, | ||
46 | &raid6_mmxx2, | ||
47 | &raid6_sse1x1, | ||
48 | &raid6_sse1x2, | ||
49 | &raid6_sse2x1, | ||
50 | &raid6_sse2x2, | ||
51 | #endif | ||
52 | #if defined(__x86_64__) && !defined(__arch_um__) | ||
53 | &raid6_sse2x1, | ||
54 | &raid6_sse2x2, | ||
55 | &raid6_sse2x4, | ||
56 | #endif | ||
57 | #ifdef CONFIG_ALTIVEC | ||
58 | &raid6_altivec1, | ||
59 | &raid6_altivec2, | ||
60 | &raid6_altivec4, | ||
61 | &raid6_altivec8, | ||
62 | #endif | ||
63 | NULL | ||
64 | }; | ||
65 | |||
66 | #ifdef __KERNEL__ | ||
67 | #define RAID6_TIME_JIFFIES_LG2 4 | ||
68 | #else | ||
69 | /* Need more time to be stable in userspace */ | ||
70 | #define RAID6_TIME_JIFFIES_LG2 9 | ||
71 | #define time_before(x, y) ((x) < (y)) | ||
72 | #endif | ||
73 | |||
74 | /* Try to pick the best algorithm */ | ||
75 | /* This code uses the gfmul table as convenient data set to abuse */ | ||
76 | |||
77 | int __init raid6_select_algo(void) | ||
78 | { | ||
79 | const struct raid6_calls * const * algo; | ||
80 | const struct raid6_calls * best; | ||
81 | char *syndromes; | ||
82 | void *dptrs[(65536/PAGE_SIZE)+2]; | ||
83 | int i, disks; | ||
84 | unsigned long perf, bestperf; | ||
85 | int bestprefer; | ||
86 | unsigned long j0, j1; | ||
87 | |||
88 | disks = (65536/PAGE_SIZE)+2; | ||
89 | for ( i = 0 ; i < disks-2 ; i++ ) { | ||
90 | dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; | ||
91 | } | ||
92 | |||
93 | /* Normal code - use a 2-page allocation to avoid D$ conflict */ | ||
94 | syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); | ||
95 | |||
96 | if ( !syndromes ) { | ||
97 | printk("raid6: Yikes! No memory available.\n"); | ||
98 | return -ENOMEM; | ||
99 | } | ||
100 | |||
101 | dptrs[disks-2] = syndromes; | ||
102 | dptrs[disks-1] = syndromes + PAGE_SIZE; | ||
103 | |||
104 | bestperf = 0; bestprefer = 0; best = NULL; | ||
105 | |||
106 | for ( algo = raid6_algos ; *algo ; algo++ ) { | ||
107 | if ( !(*algo)->valid || (*algo)->valid() ) { | ||
108 | perf = 0; | ||
109 | |||
110 | preempt_disable(); | ||
111 | j0 = jiffies; | ||
112 | while ( (j1 = jiffies) == j0 ) | ||
113 | cpu_relax(); | ||
114 | while (time_before(jiffies, | ||
115 | j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | ||
116 | (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); | ||
117 | perf++; | ||
118 | } | ||
119 | preempt_enable(); | ||
120 | |||
121 | if ( (*algo)->prefer > bestprefer || | ||
122 | ((*algo)->prefer == bestprefer && | ||
123 | perf > bestperf) ) { | ||
124 | best = *algo; | ||
125 | bestprefer = best->prefer; | ||
126 | bestperf = perf; | ||
127 | } | ||
128 | printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, | ||
129 | (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | if (best) { | ||
134 | printk("raid6: using algorithm %s (%ld MB/s)\n", | ||
135 | best->name, | ||
136 | (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | ||
137 | raid6_call = *best; | ||
138 | } else | ||
139 | printk("raid6: Yikes! No algorithm found!\n"); | ||
140 | |||
141 | free_pages((unsigned long)syndromes, 1); | ||
142 | |||
143 | return best ? 0 : -EINVAL; | ||
144 | } | ||
145 | |||
146 | static void raid6_exit(void) | ||
147 | { | ||
148 | do { } while (0); | ||
149 | } | ||
150 | |||
151 | subsys_initcall(raid6_select_algo); | ||
152 | module_exit(raid6_exit); | ||
153 | MODULE_LICENSE("GPL"); | ||
154 | MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); | ||
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc deleted file mode 100644 index 2654d5c854be..000000000000 --- a/drivers/md/raid6altivec.uc +++ /dev/null | |||
@@ -1,130 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6altivec$#.c | ||
15 | * | ||
16 | * $#-way unrolled portable integer math RAID-6 instruction set | ||
17 | * | ||
18 | * This file is postprocessed using unroll.awk | ||
19 | * | ||
20 | * <benh> hpa: in process, | ||
21 | * you can just "steal" the vec unit with enable_kernel_altivec() (but | ||
22 | * bracked this with preempt_disable/enable or in a lock) | ||
23 | */ | ||
24 | |||
25 | #include <linux/raid/pq.h> | ||
26 | |||
27 | #ifdef CONFIG_ALTIVEC | ||
28 | |||
29 | #include <altivec.h> | ||
30 | #ifdef __KERNEL__ | ||
31 | # include <asm/system.h> | ||
32 | # include <asm/cputable.h> | ||
33 | #endif | ||
34 | |||
35 | /* | ||
36 | * This is the C data type to use. We use a vector of | ||
37 | * signed char so vec_cmpgt() will generate the right | ||
38 | * instruction. | ||
39 | */ | ||
40 | |||
41 | typedef vector signed char unative_t; | ||
42 | |||
43 | #define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) | ||
44 | #define NSIZE sizeof(unative_t) | ||
45 | |||
46 | /* | ||
47 | * The SHLBYTE() operation shifts each byte left by 1, *not* | ||
48 | * rolling over into the next byte | ||
49 | */ | ||
50 | static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | ||
51 | { | ||
52 | return vec_add(v,v); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * The MASK() operation returns 0xFF in any byte for which the high | ||
57 | * bit is 1, 0x00 for any byte for which the high bit is 0. | ||
58 | */ | ||
59 | static inline __attribute_const__ unative_t MASK(unative_t v) | ||
60 | { | ||
61 | unative_t zv = NBYTES(0); | ||
62 | |||
63 | /* vec_cmpgt returns a vector bool char; thus the need for the cast */ | ||
64 | return (unative_t)vec_cmpgt(zv, v); | ||
65 | } | ||
66 | |||
67 | |||
68 | /* This is noinline to make damned sure that gcc doesn't move any of the | ||
69 | Altivec code around the enable/disable code */ | ||
70 | static void noinline | ||
71 | raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) | ||
72 | { | ||
73 | u8 **dptr = (u8 **)ptrs; | ||
74 | u8 *p, *q; | ||
75 | int d, z, z0; | ||
76 | |||
77 | unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | ||
78 | unative_t x1d = NBYTES(0x1d); | ||
79 | |||
80 | z0 = disks - 3; /* Highest data disk */ | ||
81 | p = dptr[z0+1]; /* XOR parity */ | ||
82 | q = dptr[z0+2]; /* RS syndrome */ | ||
83 | |||
84 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
85 | wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | ||
86 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
87 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | ||
88 | wp$$ = vec_xor(wp$$, wd$$); | ||
89 | w2$$ = MASK(wq$$); | ||
90 | w1$$ = SHLBYTE(wq$$); | ||
91 | w2$$ = vec_and(w2$$, x1d); | ||
92 | w1$$ = vec_xor(w1$$, w2$$); | ||
93 | wq$$ = vec_xor(w1$$, wd$$); | ||
94 | } | ||
95 | *(unative_t *)&p[d+NSIZE*$$] = wp$$; | ||
96 | *(unative_t *)&q[d+NSIZE*$$] = wq$$; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
101 | { | ||
102 | preempt_disable(); | ||
103 | enable_kernel_altivec(); | ||
104 | |||
105 | raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); | ||
106 | |||
107 | preempt_enable(); | ||
108 | } | ||
109 | |||
110 | int raid6_have_altivec(void); | ||
111 | #if $# == 1 | ||
112 | int raid6_have_altivec(void) | ||
113 | { | ||
114 | /* This assumes either all CPUs have Altivec or none does */ | ||
115 | # ifdef __KERNEL__ | ||
116 | return cpu_has_feature(CPU_FTR_ALTIVEC); | ||
117 | # else | ||
118 | return 1; | ||
119 | # endif | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | const struct raid6_calls raid6_altivec$# = { | ||
124 | raid6_altivec$#_gen_syndrome, | ||
125 | raid6_have_altivec, | ||
126 | "altivecx$#", | ||
127 | 0 | ||
128 | }; | ||
129 | |||
130 | #endif /* CONFIG_ALTIVEC */ | ||
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc deleted file mode 100644 index d1e276a14fab..000000000000 --- a/drivers/md/raid6int.uc +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6int$#.c | ||
15 | * | ||
16 | * $#-way unrolled portable integer math RAID-6 instruction set | ||
17 | * | ||
18 | * This file is postprocessed using unroll.awk | ||
19 | */ | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | |||
23 | /* | ||
24 | * This is the C data type to use | ||
25 | */ | ||
26 | |||
27 | /* Change this from BITS_PER_LONG if there is something better... */ | ||
28 | #if BITS_PER_LONG == 64 | ||
29 | # define NBYTES(x) ((x) * 0x0101010101010101UL) | ||
30 | # define NSIZE 8 | ||
31 | # define NSHIFT 3 | ||
32 | # define NSTRING "64" | ||
33 | typedef u64 unative_t; | ||
34 | #else | ||
35 | # define NBYTES(x) ((x) * 0x01010101U) | ||
36 | # define NSIZE 4 | ||
37 | # define NSHIFT 2 | ||
38 | # define NSTRING "32" | ||
39 | typedef u32 unative_t; | ||
40 | #endif | ||
41 | |||
42 | |||
43 | |||
44 | /* | ||
45 | * IA-64 wants insane amounts of unrolling. On other architectures that | ||
46 | * is just a waste of space. | ||
47 | */ | ||
48 | #if ($# <= 8) || defined(__ia64__) | ||
49 | |||
50 | |||
51 | /* | ||
52 | * These sub-operations are separate inlines since they can sometimes be | ||
53 | * specially optimized using architecture-specific hacks. | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * The SHLBYTE() operation shifts each byte left by 1, *not* | ||
58 | * rolling over into the next byte | ||
59 | */ | ||
60 | static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | ||
61 | { | ||
62 | unative_t vv; | ||
63 | |||
64 | vv = (v << 1) & NBYTES(0xfe); | ||
65 | return vv; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * The MASK() operation returns 0xFF in any byte for which the high | ||
70 | * bit is 1, 0x00 for any byte for which the high bit is 0. | ||
71 | */ | ||
72 | static inline __attribute_const__ unative_t MASK(unative_t v) | ||
73 | { | ||
74 | unative_t vv; | ||
75 | |||
76 | vv = v & NBYTES(0x80); | ||
77 | vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ | ||
78 | return vv; | ||
79 | } | ||
80 | |||
81 | |||
82 | static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
83 | { | ||
84 | u8 **dptr = (u8 **)ptrs; | ||
85 | u8 *p, *q; | ||
86 | int d, z, z0; | ||
87 | |||
88 | unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | ||
89 | |||
90 | z0 = disks - 3; /* Highest data disk */ | ||
91 | p = dptr[z0+1]; /* XOR parity */ | ||
92 | q = dptr[z0+2]; /* RS syndrome */ | ||
93 | |||
94 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
95 | wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | ||
96 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
97 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | ||
98 | wp$$ ^= wd$$; | ||
99 | w2$$ = MASK(wq$$); | ||
100 | w1$$ = SHLBYTE(wq$$); | ||
101 | w2$$ &= NBYTES(0x1d); | ||
102 | w1$$ ^= w2$$; | ||
103 | wq$$ = w1$$ ^ wd$$; | ||
104 | } | ||
105 | *(unative_t *)&p[d+NSIZE*$$] = wp$$; | ||
106 | *(unative_t *)&q[d+NSIZE*$$] = wq$$; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | const struct raid6_calls raid6_intx$# = { | ||
111 | raid6_int$#_gen_syndrome, | ||
112 | NULL, /* always valid */ | ||
113 | "int" NSTRING "x$#", | ||
114 | 0 | ||
115 | }; | ||
116 | |||
117 | #endif | ||
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c deleted file mode 100644 index e7f6c13132bf..000000000000 --- a/drivers/md/raid6mmx.c +++ /dev/null | |||
@@ -1,142 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6mmx.c | ||
15 | * | ||
16 | * MMX implementation of RAID-6 syndrome functions | ||
17 | */ | ||
18 | |||
19 | #if defined(__i386__) && !defined(__arch_um__) | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | #include "raid6x86.h" | ||
23 | |||
24 | /* Shared with raid6sse1.c */ | ||
25 | const struct raid6_mmx_constants { | ||
26 | u64 x1d; | ||
27 | } raid6_mmx_constants = { | ||
28 | 0x1d1d1d1d1d1d1d1dULL, | ||
29 | }; | ||
30 | |||
31 | static int raid6_have_mmx(void) | ||
32 | { | ||
33 | /* Not really "boot_cpu" but "all_cpus" */ | ||
34 | return boot_cpu_has(X86_FEATURE_MMX); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * Plain MMX implementation | ||
39 | */ | ||
40 | static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
41 | { | ||
42 | u8 **dptr = (u8 **)ptrs; | ||
43 | u8 *p, *q; | ||
44 | int d, z, z0; | ||
45 | |||
46 | z0 = disks - 3; /* Highest data disk */ | ||
47 | p = dptr[z0+1]; /* XOR parity */ | ||
48 | q = dptr[z0+2]; /* RS syndrome */ | ||
49 | |||
50 | kernel_fpu_begin(); | ||
51 | |||
52 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
53 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
54 | |||
55 | for ( d = 0 ; d < bytes ; d += 8 ) { | ||
56 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
57 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
58 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
59 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | ||
60 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
61 | asm volatile("paddb %mm4,%mm4"); | ||
62 | asm volatile("pand %mm0,%mm5"); | ||
63 | asm volatile("pxor %mm5,%mm4"); | ||
64 | asm volatile("pxor %mm5,%mm5"); | ||
65 | asm volatile("pxor %mm6,%mm2"); | ||
66 | asm volatile("pxor %mm6,%mm4"); | ||
67 | } | ||
68 | asm volatile("movq %%mm2,%0" : "=m" (p[d])); | ||
69 | asm volatile("pxor %mm2,%mm2"); | ||
70 | asm volatile("movq %%mm4,%0" : "=m" (q[d])); | ||
71 | asm volatile("pxor %mm4,%mm4"); | ||
72 | } | ||
73 | |||
74 | kernel_fpu_end(); | ||
75 | } | ||
76 | |||
77 | const struct raid6_calls raid6_mmxx1 = { | ||
78 | raid6_mmx1_gen_syndrome, | ||
79 | raid6_have_mmx, | ||
80 | "mmxx1", | ||
81 | 0 | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * Unrolled-by-2 MMX implementation | ||
86 | */ | ||
87 | static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
88 | { | ||
89 | u8 **dptr = (u8 **)ptrs; | ||
90 | u8 *p, *q; | ||
91 | int d, z, z0; | ||
92 | |||
93 | z0 = disks - 3; /* Highest data disk */ | ||
94 | p = dptr[z0+1]; /* XOR parity */ | ||
95 | q = dptr[z0+2]; /* RS syndrome */ | ||
96 | |||
97 | kernel_fpu_begin(); | ||
98 | |||
99 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
100 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
101 | asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | ||
102 | |||
103 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
104 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
105 | asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); | ||
106 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
107 | asm volatile("movq %mm3,%mm6"); /* Q[1] */ | ||
108 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
109 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
110 | asm volatile("pcmpgtb %mm6,%mm7"); | ||
111 | asm volatile("paddb %mm4,%mm4"); | ||
112 | asm volatile("paddb %mm6,%mm6"); | ||
113 | asm volatile("pand %mm0,%mm5"); | ||
114 | asm volatile("pand %mm0,%mm7"); | ||
115 | asm volatile("pxor %mm5,%mm4"); | ||
116 | asm volatile("pxor %mm7,%mm6"); | ||
117 | asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | ||
118 | asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | ||
119 | asm volatile("pxor %mm5,%mm2"); | ||
120 | asm volatile("pxor %mm7,%mm3"); | ||
121 | asm volatile("pxor %mm5,%mm4"); | ||
122 | asm volatile("pxor %mm7,%mm6"); | ||
123 | asm volatile("pxor %mm5,%mm5"); | ||
124 | asm volatile("pxor %mm7,%mm7"); | ||
125 | } | ||
126 | asm volatile("movq %%mm2,%0" : "=m" (p[d])); | ||
127 | asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); | ||
128 | asm volatile("movq %%mm4,%0" : "=m" (q[d])); | ||
129 | asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); | ||
130 | } | ||
131 | |||
132 | kernel_fpu_end(); | ||
133 | } | ||
134 | |||
135 | const struct raid6_calls raid6_mmxx2 = { | ||
136 | raid6_mmx2_gen_syndrome, | ||
137 | raid6_have_mmx, | ||
138 | "mmxx2", | ||
139 | 0 | ||
140 | }; | ||
141 | |||
142 | #endif | ||
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c deleted file mode 100644 index 2609f00e0d61..000000000000 --- a/drivers/md/raid6recov.c +++ /dev/null | |||
@@ -1,132 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6recov.c | ||
15 | * | ||
16 | * RAID-6 data recovery in dual failure mode. In single failure mode, | ||
17 | * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct | ||
18 | * the syndrome.) | ||
19 | */ | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | |||
23 | /* Recover two failed data blocks. */ | ||
24 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | ||
25 | void **ptrs) | ||
26 | { | ||
27 | u8 *p, *q, *dp, *dq; | ||
28 | u8 px, qx, db; | ||
29 | const u8 *pbmul; /* P multiplier table for B data */ | ||
30 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
31 | |||
32 | p = (u8 *)ptrs[disks-2]; | ||
33 | q = (u8 *)ptrs[disks-1]; | ||
34 | |||
35 | /* Compute syndrome with zero for the missing data pages | ||
36 | Use the dead data pages as temporary storage for | ||
37 | delta p and delta q */ | ||
38 | dp = (u8 *)ptrs[faila]; | ||
39 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
40 | ptrs[disks-2] = dp; | ||
41 | dq = (u8 *)ptrs[failb]; | ||
42 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
43 | ptrs[disks-1] = dq; | ||
44 | |||
45 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
46 | |||
47 | /* Restore pointer table */ | ||
48 | ptrs[faila] = dp; | ||
49 | ptrs[failb] = dq; | ||
50 | ptrs[disks-2] = p; | ||
51 | ptrs[disks-1] = q; | ||
52 | |||
53 | /* Now, pick the proper data tables */ | ||
54 | pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | ||
55 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | ||
56 | |||
57 | /* Now do it... */ | ||
58 | while ( bytes-- ) { | ||
59 | px = *p ^ *dp; | ||
60 | qx = qmul[*q ^ *dq]; | ||
61 | *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ | ||
62 | *dp++ = db ^ px; /* Reconstructed A */ | ||
63 | p++; q++; | ||
64 | } | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(raid6_2data_recov); | ||
67 | |||
68 | /* Recover failure of one data block plus the P block */ | ||
69 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | ||
70 | { | ||
71 | u8 *p, *q, *dq; | ||
72 | const u8 *qmul; /* Q multiplier table */ | ||
73 | |||
74 | p = (u8 *)ptrs[disks-2]; | ||
75 | q = (u8 *)ptrs[disks-1]; | ||
76 | |||
77 | /* Compute syndrome with zero for the missing data page | ||
78 | Use the dead data page as temporary storage for delta q */ | ||
79 | dq = (u8 *)ptrs[faila]; | ||
80 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
81 | ptrs[disks-1] = dq; | ||
82 | |||
83 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
84 | |||
85 | /* Restore pointer table */ | ||
86 | ptrs[faila] = dq; | ||
87 | ptrs[disks-1] = q; | ||
88 | |||
89 | /* Now, pick the proper data tables */ | ||
90 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
91 | |||
92 | /* Now do it... */ | ||
93 | while ( bytes-- ) { | ||
94 | *p++ ^= *dq = qmul[*q ^ *dq]; | ||
95 | q++; dq++; | ||
96 | } | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | ||
99 | |||
100 | #ifndef __KERNEL__ | ||
101 | /* Testing only */ | ||
102 | |||
103 | /* Recover two failed blocks. */ | ||
104 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) | ||
105 | { | ||
106 | if ( faila > failb ) { | ||
107 | int tmp = faila; | ||
108 | faila = failb; | ||
109 | failb = tmp; | ||
110 | } | ||
111 | |||
112 | if ( failb == disks-1 ) { | ||
113 | if ( faila == disks-2 ) { | ||
114 | /* P+Q failure. Just rebuild the syndrome. */ | ||
115 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
116 | } else { | ||
117 | /* data+Q failure. Reconstruct data from P, | ||
118 | then rebuild syndrome. */ | ||
119 | /* NOT IMPLEMENTED - equivalent to RAID-5 */ | ||
120 | } | ||
121 | } else { | ||
122 | if ( failb == disks-2 ) { | ||
123 | /* data+P failure. */ | ||
124 | raid6_datap_recov(disks, bytes, faila, ptrs); | ||
125 | } else { | ||
126 | /* data+data failure. */ | ||
127 | raid6_2data_recov(disks, bytes, faila, failb, ptrs); | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | |||
132 | #endif | ||
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c deleted file mode 100644 index b274dd5eab8f..000000000000 --- a/drivers/md/raid6sse1.c +++ /dev/null | |||
@@ -1,162 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6sse1.c | ||
15 | * | ||
16 | * SSE-1/MMXEXT implementation of RAID-6 syndrome functions | ||
17 | * | ||
18 | * This is really an MMX implementation, but it requires SSE-1 or | ||
19 | * AMD MMXEXT for prefetch support and a few other features. The | ||
20 | * support for nontemporal memory accesses is enough to make this | ||
21 | * worthwhile as a separate implementation. | ||
22 | */ | ||
23 | |||
24 | #if defined(__i386__) && !defined(__arch_um__) | ||
25 | |||
26 | #include <linux/raid/pq.h> | ||
27 | #include "raid6x86.h" | ||
28 | |||
29 | /* Defined in raid6mmx.c */ | ||
30 | extern const struct raid6_mmx_constants { | ||
31 | u64 x1d; | ||
32 | } raid6_mmx_constants; | ||
33 | |||
34 | static int raid6_have_sse1_or_mmxext(void) | ||
35 | { | ||
36 | /* Not really boot_cpu but "all_cpus" */ | ||
37 | return boot_cpu_has(X86_FEATURE_MMX) && | ||
38 | (boot_cpu_has(X86_FEATURE_XMM) || | ||
39 | boot_cpu_has(X86_FEATURE_MMXEXT)); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Plain SSE1 implementation | ||
44 | */ | ||
45 | static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
46 | { | ||
47 | u8 **dptr = (u8 **)ptrs; | ||
48 | u8 *p, *q; | ||
49 | int d, z, z0; | ||
50 | |||
51 | z0 = disks - 3; /* Highest data disk */ | ||
52 | p = dptr[z0+1]; /* XOR parity */ | ||
53 | q = dptr[z0+2]; /* RS syndrome */ | ||
54 | |||
55 | kernel_fpu_begin(); | ||
56 | |||
57 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
58 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
59 | |||
60 | for ( d = 0 ; d < bytes ; d += 8 ) { | ||
61 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
62 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
63 | asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | ||
64 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
65 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); | ||
66 | for ( z = z0-2 ; z >= 0 ; z-- ) { | ||
67 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
68 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
69 | asm volatile("paddb %mm4,%mm4"); | ||
70 | asm volatile("pand %mm0,%mm5"); | ||
71 | asm volatile("pxor %mm5,%mm4"); | ||
72 | asm volatile("pxor %mm5,%mm5"); | ||
73 | asm volatile("pxor %mm6,%mm2"); | ||
74 | asm volatile("pxor %mm6,%mm4"); | ||
75 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | ||
76 | } | ||
77 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
78 | asm volatile("paddb %mm4,%mm4"); | ||
79 | asm volatile("pand %mm0,%mm5"); | ||
80 | asm volatile("pxor %mm5,%mm4"); | ||
81 | asm volatile("pxor %mm5,%mm5"); | ||
82 | asm volatile("pxor %mm6,%mm2"); | ||
83 | asm volatile("pxor %mm6,%mm4"); | ||
84 | |||
85 | asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | ||
86 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | ||
87 | } | ||
88 | |||
89 | asm volatile("sfence" : : : "memory"); | ||
90 | kernel_fpu_end(); | ||
91 | } | ||
92 | |||
93 | const struct raid6_calls raid6_sse1x1 = { | ||
94 | raid6_sse11_gen_syndrome, | ||
95 | raid6_have_sse1_or_mmxext, | ||
96 | "sse1x1", | ||
97 | 1 /* Has cache hints */ | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Unrolled-by-2 SSE1 implementation | ||
102 | */ | ||
103 | static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
104 | { | ||
105 | u8 **dptr = (u8 **)ptrs; | ||
106 | u8 *p, *q; | ||
107 | int d, z, z0; | ||
108 | |||
109 | z0 = disks - 3; /* Highest data disk */ | ||
110 | p = dptr[z0+1]; /* XOR parity */ | ||
111 | q = dptr[z0+2]; /* RS syndrome */ | ||
112 | |||
113 | kernel_fpu_begin(); | ||
114 | |||
115 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
116 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
117 | asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | ||
118 | |||
119 | /* We uniformly assume a single prefetch covers at least 16 bytes */ | ||
120 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
121 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
122 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
123 | asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ | ||
124 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
125 | asm volatile("movq %mm3,%mm6"); /* Q[1] */ | ||
126 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
127 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
128 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
129 | asm volatile("pcmpgtb %mm6,%mm7"); | ||
130 | asm volatile("paddb %mm4,%mm4"); | ||
131 | asm volatile("paddb %mm6,%mm6"); | ||
132 | asm volatile("pand %mm0,%mm5"); | ||
133 | asm volatile("pand %mm0,%mm7"); | ||
134 | asm volatile("pxor %mm5,%mm4"); | ||
135 | asm volatile("pxor %mm7,%mm6"); | ||
136 | asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | ||
137 | asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | ||
138 | asm volatile("pxor %mm5,%mm2"); | ||
139 | asm volatile("pxor %mm7,%mm3"); | ||
140 | asm volatile("pxor %mm5,%mm4"); | ||
141 | asm volatile("pxor %mm7,%mm6"); | ||
142 | asm volatile("pxor %mm5,%mm5"); | ||
143 | asm volatile("pxor %mm7,%mm7"); | ||
144 | } | ||
145 | asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | ||
146 | asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); | ||
147 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | ||
148 | asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); | ||
149 | } | ||
150 | |||
151 | asm volatile("sfence" : :: "memory"); | ||
152 | kernel_fpu_end(); | ||
153 | } | ||
154 | |||
155 | const struct raid6_calls raid6_sse1x2 = { | ||
156 | raid6_sse12_gen_syndrome, | ||
157 | raid6_have_sse1_or_mmxext, | ||
158 | "sse1x2", | ||
159 | 1 /* Has cache hints */ | ||
160 | }; | ||
161 | |||
162 | #endif | ||
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c deleted file mode 100644 index 6ed6c6c0389f..000000000000 --- a/drivers/md/raid6sse2.c +++ /dev/null | |||
@@ -1,262 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6sse2.c | ||
15 | * | ||
16 | * SSE-2 implementation of RAID-6 syndrome functions | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
21 | |||
22 | #include <linux/raid/pq.h> | ||
23 | #include "raid6x86.h" | ||
24 | |||
25 | static const struct raid6_sse_constants { | ||
26 | u64 x1d[2]; | ||
27 | } raid6_sse_constants __attribute__((aligned(16))) = { | ||
28 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, | ||
29 | }; | ||
30 | |||
31 | static int raid6_have_sse2(void) | ||
32 | { | ||
33 | /* Not really boot_cpu but "all_cpus" */ | ||
34 | return boot_cpu_has(X86_FEATURE_MMX) && | ||
35 | boot_cpu_has(X86_FEATURE_FXSR) && | ||
36 | boot_cpu_has(X86_FEATURE_XMM) && | ||
37 | boot_cpu_has(X86_FEATURE_XMM2); | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Plain SSE2 implementation | ||
42 | */ | ||
43 | static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
44 | { | ||
45 | u8 **dptr = (u8 **)ptrs; | ||
46 | u8 *p, *q; | ||
47 | int d, z, z0; | ||
48 | |||
49 | z0 = disks - 3; /* Highest data disk */ | ||
50 | p = dptr[z0+1]; /* XOR parity */ | ||
51 | q = dptr[z0+2]; /* RS syndrome */ | ||
52 | |||
53 | kernel_fpu_begin(); | ||
54 | |||
55 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
56 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
57 | |||
58 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
59 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
60 | asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
61 | asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | ||
62 | asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | ||
63 | asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); | ||
64 | for ( z = z0-2 ; z >= 0 ; z-- ) { | ||
65 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
66 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
67 | asm volatile("paddb %xmm4,%xmm4"); | ||
68 | asm volatile("pand %xmm0,%xmm5"); | ||
69 | asm volatile("pxor %xmm5,%xmm4"); | ||
70 | asm volatile("pxor %xmm5,%xmm5"); | ||
71 | asm volatile("pxor %xmm6,%xmm2"); | ||
72 | asm volatile("pxor %xmm6,%xmm4"); | ||
73 | asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); | ||
74 | } | ||
75 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
76 | asm volatile("paddb %xmm4,%xmm4"); | ||
77 | asm volatile("pand %xmm0,%xmm5"); | ||
78 | asm volatile("pxor %xmm5,%xmm4"); | ||
79 | asm volatile("pxor %xmm5,%xmm5"); | ||
80 | asm volatile("pxor %xmm6,%xmm2"); | ||
81 | asm volatile("pxor %xmm6,%xmm4"); | ||
82 | |||
83 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
84 | asm volatile("pxor %xmm2,%xmm2"); | ||
85 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
86 | asm volatile("pxor %xmm4,%xmm4"); | ||
87 | } | ||
88 | |||
89 | asm volatile("sfence" : : : "memory"); | ||
90 | kernel_fpu_end(); | ||
91 | } | ||
92 | |||
93 | const struct raid6_calls raid6_sse2x1 = { | ||
94 | raid6_sse21_gen_syndrome, | ||
95 | raid6_have_sse2, | ||
96 | "sse2x1", | ||
97 | 1 /* Has cache hints */ | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Unrolled-by-2 SSE2 implementation | ||
102 | */ | ||
103 | static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
104 | { | ||
105 | u8 **dptr = (u8 **)ptrs; | ||
106 | u8 *p, *q; | ||
107 | int d, z, z0; | ||
108 | |||
109 | z0 = disks - 3; /* Highest data disk */ | ||
110 | p = dptr[z0+1]; /* XOR parity */ | ||
111 | q = dptr[z0+2]; /* RS syndrome */ | ||
112 | |||
113 | kernel_fpu_begin(); | ||
114 | |||
115 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
116 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
117 | asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | ||
118 | |||
119 | /* We uniformly assume a single prefetch covers at least 32 bytes */ | ||
120 | for ( d = 0 ; d < bytes ; d += 32 ) { | ||
121 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
122 | asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
123 | asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ | ||
124 | asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | ||
125 | asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ | ||
126 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
127 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
128 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
129 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
130 | asm volatile("paddb %xmm4,%xmm4"); | ||
131 | asm volatile("paddb %xmm6,%xmm6"); | ||
132 | asm volatile("pand %xmm0,%xmm5"); | ||
133 | asm volatile("pand %xmm0,%xmm7"); | ||
134 | asm volatile("pxor %xmm5,%xmm4"); | ||
135 | asm volatile("pxor %xmm7,%xmm6"); | ||
136 | asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); | ||
137 | asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); | ||
138 | asm volatile("pxor %xmm5,%xmm2"); | ||
139 | asm volatile("pxor %xmm7,%xmm3"); | ||
140 | asm volatile("pxor %xmm5,%xmm4"); | ||
141 | asm volatile("pxor %xmm7,%xmm6"); | ||
142 | asm volatile("pxor %xmm5,%xmm5"); | ||
143 | asm volatile("pxor %xmm7,%xmm7"); | ||
144 | } | ||
145 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
146 | asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | ||
147 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
148 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | ||
149 | } | ||
150 | |||
151 | asm volatile("sfence" : : : "memory"); | ||
152 | kernel_fpu_end(); | ||
153 | } | ||
154 | |||
155 | const struct raid6_calls raid6_sse2x2 = { | ||
156 | raid6_sse22_gen_syndrome, | ||
157 | raid6_have_sse2, | ||
158 | "sse2x2", | ||
159 | 1 /* Has cache hints */ | ||
160 | }; | ||
161 | |||
162 | #endif | ||
163 | |||
164 | #if defined(__x86_64__) && !defined(__arch_um__) | ||
165 | |||
166 | /* | ||
167 | * Unrolled-by-4 SSE2 implementation | ||
168 | */ | ||
169 | static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
170 | { | ||
171 | u8 **dptr = (u8 **)ptrs; | ||
172 | u8 *p, *q; | ||
173 | int d, z, z0; | ||
174 | |||
175 | z0 = disks - 3; /* Highest data disk */ | ||
176 | p = dptr[z0+1]; /* XOR parity */ | ||
177 | q = dptr[z0+2]; /* RS syndrome */ | ||
178 | |||
179 | kernel_fpu_begin(); | ||
180 | |||
181 | asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | ||
182 | asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ | ||
183 | asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ | ||
184 | asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ | ||
185 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
186 | asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ | ||
187 | asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | ||
188 | asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ | ||
189 | asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ | ||
190 | asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ | ||
191 | asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ | ||
192 | asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ | ||
193 | asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ | ||
194 | |||
195 | for ( d = 0 ; d < bytes ; d += 64 ) { | ||
196 | for ( z = z0 ; z >= 0 ; z-- ) { | ||
197 | /* The second prefetch seems to improve performance... */ | ||
198 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | ||
199 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); | ||
200 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
201 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
202 | asm volatile("pcmpgtb %xmm12,%xmm13"); | ||
203 | asm volatile("pcmpgtb %xmm14,%xmm15"); | ||
204 | asm volatile("paddb %xmm4,%xmm4"); | ||
205 | asm volatile("paddb %xmm6,%xmm6"); | ||
206 | asm volatile("paddb %xmm12,%xmm12"); | ||
207 | asm volatile("paddb %xmm14,%xmm14"); | ||
208 | asm volatile("pand %xmm0,%xmm5"); | ||
209 | asm volatile("pand %xmm0,%xmm7"); | ||
210 | asm volatile("pand %xmm0,%xmm13"); | ||
211 | asm volatile("pand %xmm0,%xmm15"); | ||
212 | asm volatile("pxor %xmm5,%xmm4"); | ||
213 | asm volatile("pxor %xmm7,%xmm6"); | ||
214 | asm volatile("pxor %xmm13,%xmm12"); | ||
215 | asm volatile("pxor %xmm15,%xmm14"); | ||
216 | asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | ||
217 | asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | ||
218 | asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); | ||
219 | asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); | ||
220 | asm volatile("pxor %xmm5,%xmm2"); | ||
221 | asm volatile("pxor %xmm7,%xmm3"); | ||
222 | asm volatile("pxor %xmm13,%xmm10"); | ||
223 | asm volatile("pxor %xmm15,%xmm11"); | ||
224 | asm volatile("pxor %xmm5,%xmm4"); | ||
225 | asm volatile("pxor %xmm7,%xmm6"); | ||
226 | asm volatile("pxor %xmm13,%xmm12"); | ||
227 | asm volatile("pxor %xmm15,%xmm14"); | ||
228 | asm volatile("pxor %xmm5,%xmm5"); | ||
229 | asm volatile("pxor %xmm7,%xmm7"); | ||
230 | asm volatile("pxor %xmm13,%xmm13"); | ||
231 | asm volatile("pxor %xmm15,%xmm15"); | ||
232 | } | ||
233 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
234 | asm volatile("pxor %xmm2,%xmm2"); | ||
235 | asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | ||
236 | asm volatile("pxor %xmm3,%xmm3"); | ||
237 | asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); | ||
238 | asm volatile("pxor %xmm10,%xmm10"); | ||
239 | asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); | ||
240 | asm volatile("pxor %xmm11,%xmm11"); | ||
241 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
242 | asm volatile("pxor %xmm4,%xmm4"); | ||
243 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | ||
244 | asm volatile("pxor %xmm6,%xmm6"); | ||
245 | asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); | ||
246 | asm volatile("pxor %xmm12,%xmm12"); | ||
247 | asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | ||
248 | asm volatile("pxor %xmm14,%xmm14"); | ||
249 | } | ||
250 | |||
251 | asm volatile("sfence" : : : "memory"); | ||
252 | kernel_fpu_end(); | ||
253 | } | ||
254 | |||
255 | const struct raid6_calls raid6_sse2x4 = { | ||
256 | raid6_sse24_gen_syndrome, | ||
257 | raid6_have_sse2, | ||
258 | "sse2x4", | ||
259 | 1 /* Has cache hints */ | ||
260 | }; | ||
261 | |||
262 | #endif | ||
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile deleted file mode 100644 index 2874cbef529d..000000000000 --- a/drivers/md/raid6test/Makefile +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | # | ||
2 | # This is a simple Makefile to test some of the RAID-6 code | ||
3 | # from userspace. | ||
4 | # | ||
5 | |||
6 | CC = gcc | ||
7 | OPTFLAGS = -O2 # Adjust as desired | ||
8 | CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) | ||
9 | LD = ld | ||
10 | AWK = awk | ||
11 | AR = ar | ||
12 | RANLIB = ranlib | ||
13 | |||
14 | .c.o: | ||
15 | $(CC) $(CFLAGS) -c -o $@ $< | ||
16 | |||
17 | %.c: ../%.c | ||
18 | cp -f $< $@ | ||
19 | |||
20 | %.uc: ../%.uc | ||
21 | cp -f $< $@ | ||
22 | |||
23 | all: raid6.a raid6test | ||
24 | |||
25 | raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ | ||
26 | raid6int32.o \ | ||
27 | raid6mmx.o raid6sse1.o raid6sse2.o \ | ||
28 | raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ | ||
29 | raid6recov.o raid6algos.o \ | ||
30 | raid6tables.o | ||
31 | rm -f $@ | ||
32 | $(AR) cq $@ $^ | ||
33 | $(RANLIB) $@ | ||
34 | |||
35 | raid6test: test.c raid6.a | ||
36 | $(CC) $(CFLAGS) -o raid6test $^ | ||
37 | |||
38 | raid6altivec1.c: raid6altivec.uc ../unroll.awk | ||
39 | $(AWK) ../unroll.awk -vN=1 < raid6altivec.uc > $@ | ||
40 | |||
41 | raid6altivec2.c: raid6altivec.uc ../unroll.awk | ||
42 | $(AWK) ../unroll.awk -vN=2 < raid6altivec.uc > $@ | ||
43 | |||
44 | raid6altivec4.c: raid6altivec.uc ../unroll.awk | ||
45 | $(AWK) ../unroll.awk -vN=4 < raid6altivec.uc > $@ | ||
46 | |||
47 | raid6altivec8.c: raid6altivec.uc ../unroll.awk | ||
48 | $(AWK) ../unroll.awk -vN=8 < raid6altivec.uc > $@ | ||
49 | |||
50 | raid6int1.c: raid6int.uc ../unroll.awk | ||
51 | $(AWK) ../unroll.awk -vN=1 < raid6int.uc > $@ | ||
52 | |||
53 | raid6int2.c: raid6int.uc ../unroll.awk | ||
54 | $(AWK) ../unroll.awk -vN=2 < raid6int.uc > $@ | ||
55 | |||
56 | raid6int4.c: raid6int.uc ../unroll.awk | ||
57 | $(AWK) ../unroll.awk -vN=4 < raid6int.uc > $@ | ||
58 | |||
59 | raid6int8.c: raid6int.uc ../unroll.awk | ||
60 | $(AWK) ../unroll.awk -vN=8 < raid6int.uc > $@ | ||
61 | |||
62 | raid6int16.c: raid6int.uc ../unroll.awk | ||
63 | $(AWK) ../unroll.awk -vN=16 < raid6int.uc > $@ | ||
64 | |||
65 | raid6int32.c: raid6int.uc ../unroll.awk | ||
66 | $(AWK) ../unroll.awk -vN=32 < raid6int.uc > $@ | ||
67 | |||
68 | raid6tables.c: mktables | ||
69 | ./mktables > raid6tables.c | ||
70 | |||
71 | clean: | ||
72 | rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test | ||
73 | |||
74 | spotless: clean | ||
75 | rm -f *~ | ||
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c deleted file mode 100644 index 7a930318b17d..000000000000 --- a/drivers/md/raid6test/test.c +++ /dev/null | |||
@@ -1,124 +0,0 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * raid6test.c | ||
13 | * | ||
14 | * Test RAID-6 recovery with various algorithms | ||
15 | */ | ||
16 | |||
17 | #include <stdlib.h> | ||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | #include <linux/raid/pq.h> | ||
21 | |||
22 | #define NDISKS 16 /* Including P and Q */ | ||
23 | |||
24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
25 | struct raid6_calls raid6_call; | ||
26 | |||
27 | char *dataptrs[NDISKS]; | ||
28 | char data[NDISKS][PAGE_SIZE]; | ||
29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | ||
30 | |||
31 | static void makedata(void) | ||
32 | { | ||
33 | int i, j; | ||
34 | |||
35 | for (i = 0; i < NDISKS; i++) { | ||
36 | for (j = 0; j < PAGE_SIZE; j++) | ||
37 | data[i][j] = rand(); | ||
38 | |||
39 | dataptrs[i] = data[i]; | ||
40 | } | ||
41 | } | ||
42 | |||
43 | static char disk_type(int d) | ||
44 | { | ||
45 | switch (d) { | ||
46 | case NDISKS-2: | ||
47 | return 'P'; | ||
48 | case NDISKS-1: | ||
49 | return 'Q'; | ||
50 | default: | ||
51 | return 'D'; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | static int test_disks(int i, int j) | ||
56 | { | ||
57 | int erra, errb; | ||
58 | |||
59 | memset(recovi, 0xf0, PAGE_SIZE); | ||
60 | memset(recovj, 0xba, PAGE_SIZE); | ||
61 | |||
62 | dataptrs[i] = recovi; | ||
63 | dataptrs[j] = recovj; | ||
64 | |||
65 | raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | ||
66 | |||
67 | erra = memcmp(data[i], recovi, PAGE_SIZE); | ||
68 | errb = memcmp(data[j], recovj, PAGE_SIZE); | ||
69 | |||
70 | if (i < NDISKS-2 && j == NDISKS-1) { | ||
71 | /* We don't implement the DQ failure scenario, since it's | ||
72 | equivalent to a RAID-5 failure (XOR, then recompute Q) */ | ||
73 | erra = errb = 0; | ||
74 | } else { | ||
75 | printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | ||
76 | raid6_call.name, | ||
77 | i, disk_type(i), | ||
78 | j, disk_type(j), | ||
79 | (!erra && !errb) ? "OK" : | ||
80 | !erra ? "ERRB" : | ||
81 | !errb ? "ERRA" : "ERRAB"); | ||
82 | } | ||
83 | |||
84 | dataptrs[i] = data[i]; | ||
85 | dataptrs[j] = data[j]; | ||
86 | |||
87 | return erra || errb; | ||
88 | } | ||
89 | |||
90 | int main(int argc, char *argv[]) | ||
91 | { | ||
92 | const struct raid6_calls *const *algo; | ||
93 | int i, j; | ||
94 | int err = 0; | ||
95 | |||
96 | makedata(); | ||
97 | |||
98 | for (algo = raid6_algos; *algo; algo++) { | ||
99 | if (!(*algo)->valid || (*algo)->valid()) { | ||
100 | raid6_call = **algo; | ||
101 | |||
102 | /* Nuke syndromes */ | ||
103 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | ||
104 | |||
105 | /* Generate assumed good syndrome */ | ||
106 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | ||
107 | (void **)&dataptrs); | ||
108 | |||
109 | for (i = 0; i < NDISKS-1; i++) | ||
110 | for (j = i+1; j < NDISKS; j++) | ||
111 | err += test_disks(i, j); | ||
112 | } | ||
113 | printf("\n"); | ||
114 | } | ||
115 | |||
116 | printf("\n"); | ||
117 | /* Pick the best algorithm test */ | ||
118 | raid6_select_algo(); | ||
119 | |||
120 | if (err) | ||
121 | printf("\n*** ERRORS FOUND ***\n"); | ||
122 | |||
123 | return err; | ||
124 | } | ||
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h deleted file mode 100644 index 4c22c1568558..000000000000 --- a/drivers/md/raid6x86.h +++ /dev/null | |||
@@ -1,61 +0,0 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6x86.h | ||
15 | * | ||
16 | * Definitions common to x86 and x86-64 RAID-6 code only | ||
17 | */ | ||
18 | |||
19 | #ifndef LINUX_RAID_RAID6X86_H | ||
20 | #define LINUX_RAID_RAID6X86_H | ||
21 | |||
22 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
23 | |||
24 | #ifdef __KERNEL__ /* Real code */ | ||
25 | |||
26 | #include <asm/i387.h> | ||
27 | |||
28 | #else /* Dummy code for user space testing */ | ||
29 | |||
30 | static inline void kernel_fpu_begin(void) | ||
31 | { | ||
32 | } | ||
33 | |||
34 | static inline void kernel_fpu_end(void) | ||
35 | { | ||
36 | } | ||
37 | |||
38 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ | ||
39 | #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | ||
40 | * (fast save and restore) */ | ||
41 | #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ | ||
42 | #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ | ||
43 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | ||
44 | |||
45 | /* Should work well enough on modern CPUs for testing */ | ||
46 | static inline int boot_cpu_has(int flag) | ||
47 | { | ||
48 | u32 eax = (flag >> 5) ? 0x80000001 : 1; | ||
49 | u32 edx; | ||
50 | |||
51 | asm volatile("cpuid" | ||
52 | : "+a" (eax), "=d" (edx) | ||
53 | : : "ecx", "ebx"); | ||
54 | |||
55 | return (edx >> (flag & 31)) & 1; | ||
56 | } | ||
57 | |||
58 | #endif /* ndef __KERNEL__ */ | ||
59 | |||
60 | #endif | ||
61 | #endif | ||
diff --git a/drivers/md/unroll.awk b/drivers/md/unroll.awk deleted file mode 100644 index c6aa03631df8..000000000000 --- a/drivers/md/unroll.awk +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | |||
2 | # This filter requires one command line option of form -vN=n | ||
3 | # where n must be a decimal number. | ||
4 | # | ||
5 | # Repeat each input line containing $$ n times, replacing $$ with 0...n-1. | ||
6 | # Replace each $# with n, and each $* with a single $. | ||
7 | |||
8 | BEGIN { | ||
9 | n = N + 0 | ||
10 | } | ||
11 | { | ||
12 | if (/\$\$/) { rep = n } else { rep = 1 } | ||
13 | for (i = 0; i < rep; ++i) { | ||
14 | tmp = $0 | ||
15 | gsub(/\$\$/, i, tmp) | ||
16 | gsub(/\$\#/, n, tmp) | ||
17 | gsub(/\$\*/, "$", tmp) | ||
18 | print tmp | ||
19 | } | ||
20 | } | ||