diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 3 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 1586 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 1 | ||||
-rw-r--r-- | drivers/md/linear.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 529 | ||||
-rw-r--r-- | drivers/md/multipath.c | 3 | ||||
-rw-r--r-- | drivers/md/raid0.c | 12 | ||||
-rw-r--r-- | drivers/md/raid1.c | 242 | ||||
-rw-r--r-- | drivers/md/raid10.c | 30 | ||||
-rw-r--r-- | drivers/md/raid5.c | 12 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 12 |
13 files changed, 2218 insertions, 232 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 90de9c146a5f..d3efedf6a6ad 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -7,6 +7,7 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | |||
7 | dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o | 7 | dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o | 8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o |
9 | dm-mirror-objs := dm-log.o dm-raid1.o | 9 | dm-mirror-objs := dm-log.o dm-raid1.o |
10 | md-mod-objs := md.o bitmap.o | ||
10 | raid6-objs := raid6main.o raid6algos.o raid6recov.o raid6tables.o \ | 11 | raid6-objs := raid6main.o raid6algos.o raid6recov.o raid6tables.o \ |
11 | raid6int1.o raid6int2.o raid6int4.o \ | 12 | raid6int1.o raid6int2.o raid6int4.o \ |
12 | raid6int8.o raid6int16.o raid6int32.o \ | 13 | raid6int8.o raid6int16.o raid6int32.o \ |
@@ -28,7 +29,7 @@ obj-$(CONFIG_MD_RAID5) += raid5.o xor.o | |||
28 | obj-$(CONFIG_MD_RAID6) += raid6.o xor.o | 29 | obj-$(CONFIG_MD_RAID6) += raid6.o xor.o |
29 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o | 30 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o |
30 | obj-$(CONFIG_MD_FAULTY) += faulty.o | 31 | obj-$(CONFIG_MD_FAULTY) += faulty.o |
31 | obj-$(CONFIG_BLK_DEV_MD) += md.o | 32 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
32 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 33 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 34 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
34 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o | 35 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c new file mode 100644 index 000000000000..95980ad6b27b --- /dev/null +++ b/drivers/md/bitmap.c | |||
@@ -0,0 +1,1586 @@ | |||
1 | /* | ||
2 | * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 | ||
3 | * | ||
4 | * bitmap_create - sets up the bitmap structure | ||
5 | * bitmap_destroy - destroys the bitmap structure | ||
6 | * | ||
7 | * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: | ||
8 | * - added disk storage for bitmap | ||
9 | * - changes to allow various bitmap chunk sizes | ||
10 | * - added bitmap daemon (to asynchronously clear bitmap bits from disk) | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Still to do: | ||
15 | * | ||
16 | * flush after percent set rather than just time based. (maybe both). | ||
17 | * wait if count gets too high, wake when it drops to half. | ||
18 | * allow bitmap to be mirrored with superblock (before or after...) | ||
19 | * allow hot-add to re-instate a current device. | ||
20 | * allow hot-add of bitmap after quiessing device | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/version.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/config.h> | ||
29 | #include <linux/timer.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/list.h> | ||
32 | #include <linux/file.h> | ||
33 | #include <linux/mount.h> | ||
34 | #include <linux/buffer_head.h> | ||
35 | #include <linux/raid/md.h> | ||
36 | #include <linux/raid/bitmap.h> | ||
37 | |||
38 | /* debug macros */ | ||
39 | |||
40 | #define DEBUG 0 | ||
41 | |||
42 | #if DEBUG | ||
43 | /* these are for debugging purposes only! */ | ||
44 | |||
45 | /* define one and only one of these */ | ||
46 | #define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */ | ||
47 | #define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/ | ||
48 | #define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */ | ||
49 | #define INJECT_FAULTS_4 0 /* undef */ | ||
50 | #define INJECT_FAULTS_5 0 /* undef */ | ||
51 | #define INJECT_FAULTS_6 0 | ||
52 | |||
53 | /* if these are defined, the driver will fail! debug only */ | ||
54 | #define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */ | ||
55 | #define INJECT_FATAL_FAULT_2 0 /* undef */ | ||
56 | #define INJECT_FATAL_FAULT_3 0 /* undef */ | ||
57 | #endif | ||
58 | |||
59 | //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ | ||
60 | #define DPRINTK(x...) do { } while(0) | ||
61 | |||
62 | #ifndef PRINTK | ||
63 | # if DEBUG > 0 | ||
64 | # define PRINTK(x...) printk(KERN_DEBUG x) | ||
65 | # else | ||
66 | # define PRINTK(x...) | ||
67 | # endif | ||
68 | #endif | ||
69 | |||
70 | static inline char * bmname(struct bitmap *bitmap) | ||
71 | { | ||
72 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; | ||
73 | } | ||
74 | |||
75 | |||
76 | /* | ||
77 | * test if the bitmap is active | ||
78 | */ | ||
79 | int bitmap_active(struct bitmap *bitmap) | ||
80 | { | ||
81 | unsigned long flags; | ||
82 | int res = 0; | ||
83 | |||
84 | if (!bitmap) | ||
85 | return res; | ||
86 | spin_lock_irqsave(&bitmap->lock, flags); | ||
87 | res = bitmap->flags & BITMAP_ACTIVE; | ||
88 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
89 | return res; | ||
90 | } | ||
91 | |||
92 | #define WRITE_POOL_SIZE 256 | ||
93 | /* mempool for queueing pending writes on the bitmap file */ | ||
94 | static void *write_pool_alloc(unsigned int gfp_flags, void *data) | ||
95 | { | ||
96 | return kmalloc(sizeof(struct page_list), gfp_flags); | ||
97 | } | ||
98 | |||
99 | static void write_pool_free(void *ptr, void *data) | ||
100 | { | ||
101 | kfree(ptr); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * just a placeholder - calls kmalloc for bitmap pages | ||
106 | */ | ||
107 | static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | ||
108 | { | ||
109 | unsigned char *page; | ||
110 | |||
111 | #if INJECT_FAULTS_1 | ||
112 | page = NULL; | ||
113 | #else | ||
114 | page = kmalloc(PAGE_SIZE, GFP_NOIO); | ||
115 | #endif | ||
116 | if (!page) | ||
117 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | ||
118 | else | ||
119 | PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", | ||
120 | bmname(bitmap), page); | ||
121 | return page; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * for now just a placeholder -- just calls kfree for bitmap pages | ||
126 | */ | ||
127 | static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | ||
128 | { | ||
129 | PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); | ||
130 | kfree(page); | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * check a page and, if necessary, allocate it (or hijack it if the alloc fails) | ||
135 | * | ||
136 | * 1) check to see if this page is allocated, if it's not then try to alloc | ||
137 | * 2) if the alloc fails, set the page's hijacked flag so we'll use the | ||
138 | * page pointer directly as a counter | ||
139 | * | ||
140 | * if we find our page, we increment the page's refcount so that it stays | ||
141 | * allocated while we're using it | ||
142 | */ | ||
143 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) | ||
144 | { | ||
145 | unsigned char *mappage; | ||
146 | |||
147 | if (page >= bitmap->pages) { | ||
148 | printk(KERN_ALERT | ||
149 | "%s: invalid bitmap page request: %lu (> %lu)\n", | ||
150 | bmname(bitmap), page, bitmap->pages-1); | ||
151 | return -EINVAL; | ||
152 | } | ||
153 | |||
154 | |||
155 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ | ||
156 | return 0; | ||
157 | |||
158 | if (bitmap->bp[page].map) /* page is already allocated, just return */ | ||
159 | return 0; | ||
160 | |||
161 | if (!create) | ||
162 | return -ENOENT; | ||
163 | |||
164 | spin_unlock_irq(&bitmap->lock); | ||
165 | |||
166 | /* this page has not been allocated yet */ | ||
167 | |||
168 | if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { | ||
169 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", | ||
170 | bmname(bitmap)); | ||
171 | /* failed - set the hijacked flag so that we can use the | ||
172 | * pointer as a counter */ | ||
173 | spin_lock_irq(&bitmap->lock); | ||
174 | if (!bitmap->bp[page].map) | ||
175 | bitmap->bp[page].hijacked = 1; | ||
176 | goto out; | ||
177 | } | ||
178 | |||
179 | /* got a page */ | ||
180 | |||
181 | spin_lock_irq(&bitmap->lock); | ||
182 | |||
183 | /* recheck the page */ | ||
184 | |||
185 | if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { | ||
186 | /* somebody beat us to getting the page */ | ||
187 | bitmap_free_page(bitmap, mappage); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /* no page was in place and we have one, so install it */ | ||
192 | |||
193 | memset(mappage, 0, PAGE_SIZE); | ||
194 | bitmap->bp[page].map = mappage; | ||
195 | bitmap->missing_pages--; | ||
196 | out: | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | |||
201 | /* if page is completely empty, put it back on the free list, or dealloc it */ | ||
202 | /* if page was hijacked, unmark the flag so it might get alloced next time */ | ||
203 | /* Note: lock should be held when calling this */ | ||
204 | static inline void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | ||
205 | { | ||
206 | char *ptr; | ||
207 | |||
208 | if (bitmap->bp[page].count) /* page is still busy */ | ||
209 | return; | ||
210 | |||
211 | /* page is no longer in use, it can be released */ | ||
212 | |||
213 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ | ||
214 | bitmap->bp[page].hijacked = 0; | ||
215 | bitmap->bp[page].map = NULL; | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | /* normal case, free the page */ | ||
220 | |||
221 | #if 0 | ||
222 | /* actually ... let's not. We will probably need the page again exactly when | ||
223 | * memory is tight and we are flusing to disk | ||
224 | */ | ||
225 | return; | ||
226 | #else | ||
227 | ptr = bitmap->bp[page].map; | ||
228 | bitmap->bp[page].map = NULL; | ||
229 | bitmap->missing_pages++; | ||
230 | bitmap_free_page(bitmap, ptr); | ||
231 | return; | ||
232 | #endif | ||
233 | } | ||
234 | |||
235 | |||
236 | /* | ||
237 | * bitmap file handling - read and write the bitmap file and its superblock | ||
238 | */ | ||
239 | |||
240 | /* copy the pathname of a file to a buffer */ | ||
241 | char *file_path(struct file *file, char *buf, int count) | ||
242 | { | ||
243 | struct dentry *d; | ||
244 | struct vfsmount *v; | ||
245 | |||
246 | if (!buf) | ||
247 | return NULL; | ||
248 | |||
249 | d = file->f_dentry; | ||
250 | v = file->f_vfsmnt; | ||
251 | |||
252 | buf = d_path(d, v, buf, count); | ||
253 | |||
254 | return IS_ERR(buf) ? NULL : buf; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * basic page I/O operations | ||
259 | */ | ||
260 | |||
261 | /* IO operations when bitmap is stored near all superblocks */ | ||
262 | static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) | ||
263 | { | ||
264 | /* choose a good rdev and read the page from there */ | ||
265 | |||
266 | mdk_rdev_t *rdev; | ||
267 | struct list_head *tmp; | ||
268 | struct page *page = alloc_page(GFP_KERNEL); | ||
269 | sector_t target; | ||
270 | |||
271 | if (!page) | ||
272 | return ERR_PTR(-ENOMEM); | ||
273 | do { | ||
274 | ITERATE_RDEV(mddev, rdev, tmp) | ||
275 | if (rdev->in_sync && !rdev->faulty) | ||
276 | goto found; | ||
277 | return ERR_PTR(-EIO); | ||
278 | |||
279 | found: | ||
280 | target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); | ||
281 | |||
282 | } while (!sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)); | ||
283 | |||
284 | page->index = index; | ||
285 | return page; | ||
286 | } | ||
287 | |||
288 | static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait) | ||
289 | { | ||
290 | mdk_rdev_t *rdev; | ||
291 | struct list_head *tmp; | ||
292 | |||
293 | ITERATE_RDEV(mddev, rdev, tmp) | ||
294 | if (rdev->in_sync && !rdev->faulty) | ||
295 | md_super_write(mddev, rdev, | ||
296 | (rdev->sb_offset<<1) + offset | ||
297 | + page->index * (PAGE_SIZE/512), | ||
298 | PAGE_SIZE, | ||
299 | page); | ||
300 | |||
301 | if (wait) | ||
302 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * write out a page to a file | ||
308 | */ | ||
309 | static int write_page(struct bitmap *bitmap, struct page *page, int wait) | ||
310 | { | ||
311 | int ret = -ENOMEM; | ||
312 | |||
313 | if (bitmap->file == NULL) | ||
314 | return write_sb_page(bitmap->mddev, bitmap->offset, page, wait); | ||
315 | |||
316 | if (wait) | ||
317 | lock_page(page); | ||
318 | else { | ||
319 | if (TestSetPageLocked(page)) | ||
320 | return -EAGAIN; /* already locked */ | ||
321 | if (PageWriteback(page)) { | ||
322 | unlock_page(page); | ||
323 | return -EAGAIN; | ||
324 | } | ||
325 | } | ||
326 | |||
327 | ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE); | ||
328 | if (!ret) | ||
329 | ret = page->mapping->a_ops->commit_write(NULL, page, 0, | ||
330 | PAGE_SIZE); | ||
331 | if (ret) { | ||
332 | unlock_page(page); | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | set_page_dirty(page); /* force it to be written out */ | ||
337 | |||
338 | if (!wait) { | ||
339 | /* add to list to be waited for by daemon */ | ||
340 | struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO); | ||
341 | item->page = page; | ||
342 | page_cache_get(page); | ||
343 | spin_lock(&bitmap->write_lock); | ||
344 | list_add(&item->list, &bitmap->complete_pages); | ||
345 | spin_unlock(&bitmap->write_lock); | ||
346 | md_wakeup_thread(bitmap->writeback_daemon); | ||
347 | } | ||
348 | return write_one_page(page, wait); | ||
349 | } | ||
350 | |||
351 | /* read a page from a file, pinning it into cache, and return bytes_read */ | ||
352 | static struct page *read_page(struct file *file, unsigned long index, | ||
353 | unsigned long *bytes_read) | ||
354 | { | ||
355 | struct inode *inode = file->f_mapping->host; | ||
356 | struct page *page = NULL; | ||
357 | loff_t isize = i_size_read(inode); | ||
358 | unsigned long end_index = isize >> PAGE_CACHE_SHIFT; | ||
359 | |||
360 | PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_CACHE_SIZE, | ||
361 | (unsigned long long)index << PAGE_CACHE_SHIFT); | ||
362 | |||
363 | page = read_cache_page(inode->i_mapping, index, | ||
364 | (filler_t *)inode->i_mapping->a_ops->readpage, file); | ||
365 | if (IS_ERR(page)) | ||
366 | goto out; | ||
367 | wait_on_page_locked(page); | ||
368 | if (!PageUptodate(page) || PageError(page)) { | ||
369 | page_cache_release(page); | ||
370 | page = ERR_PTR(-EIO); | ||
371 | goto out; | ||
372 | } | ||
373 | |||
374 | if (index > end_index) /* we have read beyond EOF */ | ||
375 | *bytes_read = 0; | ||
376 | else if (index == end_index) /* possible short read */ | ||
377 | *bytes_read = isize & ~PAGE_CACHE_MASK; | ||
378 | else | ||
379 | *bytes_read = PAGE_CACHE_SIZE; /* got a full page */ | ||
380 | out: | ||
381 | if (IS_ERR(page)) | ||
382 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", | ||
383 | (int)PAGE_CACHE_SIZE, | ||
384 | (unsigned long long)index << PAGE_CACHE_SHIFT, | ||
385 | PTR_ERR(page)); | ||
386 | return page; | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * bitmap file superblock operations | ||
391 | */ | ||
392 | |||
393 | /* update the event counter and sync the superblock to disk */ | ||
394 | int bitmap_update_sb(struct bitmap *bitmap) | ||
395 | { | ||
396 | bitmap_super_t *sb; | ||
397 | unsigned long flags; | ||
398 | |||
399 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ | ||
400 | return 0; | ||
401 | spin_lock_irqsave(&bitmap->lock, flags); | ||
402 | if (!bitmap->sb_page) { /* no superblock */ | ||
403 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
404 | return 0; | ||
405 | } | ||
406 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
407 | sb = (bitmap_super_t *)kmap(bitmap->sb_page); | ||
408 | sb->events = cpu_to_le64(bitmap->mddev->events); | ||
409 | if (!bitmap->mddev->degraded) | ||
410 | sb->events_cleared = cpu_to_le64(bitmap->mddev->events); | ||
411 | kunmap(bitmap->sb_page); | ||
412 | return write_page(bitmap, bitmap->sb_page, 1); | ||
413 | } | ||
414 | |||
415 | /* print out the bitmap file superblock */ | ||
416 | void bitmap_print_sb(struct bitmap *bitmap) | ||
417 | { | ||
418 | bitmap_super_t *sb; | ||
419 | |||
420 | if (!bitmap || !bitmap->sb_page) | ||
421 | return; | ||
422 | sb = (bitmap_super_t *)kmap(bitmap->sb_page); | ||
423 | printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); | ||
424 | printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); | ||
425 | printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); | ||
426 | printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", | ||
427 | *(__u32 *)(sb->uuid+0), | ||
428 | *(__u32 *)(sb->uuid+4), | ||
429 | *(__u32 *)(sb->uuid+8), | ||
430 | *(__u32 *)(sb->uuid+12)); | ||
431 | printk(KERN_DEBUG " events: %llu\n", | ||
432 | (unsigned long long) le64_to_cpu(sb->events)); | ||
433 | printk(KERN_DEBUG "events cleared: %llu\n", | ||
434 | (unsigned long long) le64_to_cpu(sb->events_cleared)); | ||
435 | printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); | ||
436 | printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); | ||
437 | printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); | ||
438 | printk(KERN_DEBUG " sync size: %llu KB\n", | ||
439 | (unsigned long long)le64_to_cpu(sb->sync_size)/2); | ||
440 | kunmap(bitmap->sb_page); | ||
441 | } | ||
442 | |||
443 | /* read the superblock from the bitmap file and initialize some bitmap fields */ | ||
444 | static int bitmap_read_sb(struct bitmap *bitmap) | ||
445 | { | ||
446 | char *reason = NULL; | ||
447 | bitmap_super_t *sb; | ||
448 | unsigned long chunksize, daemon_sleep; | ||
449 | unsigned long bytes_read; | ||
450 | unsigned long long events; | ||
451 | int err = -EINVAL; | ||
452 | |||
453 | /* page 0 is the superblock, read it... */ | ||
454 | if (bitmap->file) | ||
455 | bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read); | ||
456 | else { | ||
457 | bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); | ||
458 | bytes_read = PAGE_SIZE; | ||
459 | } | ||
460 | if (IS_ERR(bitmap->sb_page)) { | ||
461 | err = PTR_ERR(bitmap->sb_page); | ||
462 | bitmap->sb_page = NULL; | ||
463 | return err; | ||
464 | } | ||
465 | |||
466 | sb = (bitmap_super_t *)kmap(bitmap->sb_page); | ||
467 | |||
468 | if (bytes_read < sizeof(*sb)) { /* short read */ | ||
469 | printk(KERN_INFO "%s: bitmap file superblock truncated\n", | ||
470 | bmname(bitmap)); | ||
471 | err = -ENOSPC; | ||
472 | goto out; | ||
473 | } | ||
474 | |||
475 | chunksize = le32_to_cpu(sb->chunksize); | ||
476 | daemon_sleep = le32_to_cpu(sb->daemon_sleep); | ||
477 | |||
478 | /* verify that the bitmap-specific fields are valid */ | ||
479 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) | ||
480 | reason = "bad magic"; | ||
481 | else if (sb->version != cpu_to_le32(BITMAP_MAJOR)) | ||
482 | reason = "unrecognized superblock version"; | ||
483 | else if (chunksize < 512 || chunksize > (1024 * 1024 * 4)) | ||
484 | reason = "bitmap chunksize out of range (512B - 4MB)"; | ||
485 | else if ((1 << ffz(~chunksize)) != chunksize) | ||
486 | reason = "bitmap chunksize not a power of 2"; | ||
487 | else if (daemon_sleep < 1 || daemon_sleep > 15) | ||
488 | reason = "daemon sleep period out of range"; | ||
489 | if (reason) { | ||
490 | printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", | ||
491 | bmname(bitmap), reason); | ||
492 | goto out; | ||
493 | } | ||
494 | |||
495 | /* keep the array size field of the bitmap superblock up to date */ | ||
496 | sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); | ||
497 | |||
498 | if (!bitmap->mddev->persistent) | ||
499 | goto success; | ||
500 | |||
501 | /* | ||
502 | * if we have a persistent array superblock, compare the | ||
503 | * bitmap's UUID and event counter to the mddev's | ||
504 | */ | ||
505 | if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { | ||
506 | printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", | ||
507 | bmname(bitmap)); | ||
508 | goto out; | ||
509 | } | ||
510 | events = le64_to_cpu(sb->events); | ||
511 | if (events < bitmap->mddev->events) { | ||
512 | printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " | ||
513 | "-- forcing full recovery\n", bmname(bitmap), events, | ||
514 | (unsigned long long) bitmap->mddev->events); | ||
515 | sb->state |= BITMAP_STALE; | ||
516 | } | ||
517 | success: | ||
518 | /* assign fields using values from superblock */ | ||
519 | bitmap->chunksize = chunksize; | ||
520 | bitmap->daemon_sleep = daemon_sleep; | ||
521 | bitmap->flags |= sb->state; | ||
522 | bitmap->events_cleared = le64_to_cpu(sb->events_cleared); | ||
523 | err = 0; | ||
524 | out: | ||
525 | kunmap(bitmap->sb_page); | ||
526 | if (err) | ||
527 | bitmap_print_sb(bitmap); | ||
528 | return err; | ||
529 | } | ||
530 | |||
531 | enum bitmap_mask_op { | ||
532 | MASK_SET, | ||
533 | MASK_UNSET | ||
534 | }; | ||
535 | |||
536 | /* record the state of the bitmap in the superblock */ | ||
537 | static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | ||
538 | enum bitmap_mask_op op) | ||
539 | { | ||
540 | bitmap_super_t *sb; | ||
541 | unsigned long flags; | ||
542 | |||
543 | spin_lock_irqsave(&bitmap->lock, flags); | ||
544 | if (!bitmap || !bitmap->sb_page) { /* can't set the state */ | ||
545 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
546 | return; | ||
547 | } | ||
548 | page_cache_get(bitmap->sb_page); | ||
549 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
550 | sb = (bitmap_super_t *)kmap(bitmap->sb_page); | ||
551 | switch (op) { | ||
552 | case MASK_SET: sb->state |= bits; | ||
553 | break; | ||
554 | case MASK_UNSET: sb->state &= ~bits; | ||
555 | break; | ||
556 | default: BUG(); | ||
557 | } | ||
558 | kunmap(bitmap->sb_page); | ||
559 | page_cache_release(bitmap->sb_page); | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * general bitmap file operations | ||
564 | */ | ||
565 | |||
566 | /* calculate the index of the page that contains this bit */ | ||
567 | static inline unsigned long file_page_index(unsigned long chunk) | ||
568 | { | ||
569 | return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; | ||
570 | } | ||
571 | |||
572 | /* calculate the (bit) offset of this bit within a page */ | ||
573 | static inline unsigned long file_page_offset(unsigned long chunk) | ||
574 | { | ||
575 | return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * return a pointer to the page in the filemap that contains the given bit | ||
580 | * | ||
581 | * this lookup is complicated by the fact that the bitmap sb might be exactly | ||
582 | * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page | ||
583 | * 0 or page 1 | ||
584 | */ | ||
585 | static inline struct page *filemap_get_page(struct bitmap *bitmap, | ||
586 | unsigned long chunk) | ||
587 | { | ||
588 | return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; | ||
589 | } | ||
590 | |||
591 | |||
592 | static void bitmap_file_unmap(struct bitmap *bitmap) | ||
593 | { | ||
594 | struct page **map, *sb_page; | ||
595 | unsigned long *attr; | ||
596 | int pages; | ||
597 | unsigned long flags; | ||
598 | |||
599 | spin_lock_irqsave(&bitmap->lock, flags); | ||
600 | map = bitmap->filemap; | ||
601 | bitmap->filemap = NULL; | ||
602 | attr = bitmap->filemap_attr; | ||
603 | bitmap->filemap_attr = NULL; | ||
604 | pages = bitmap->file_pages; | ||
605 | bitmap->file_pages = 0; | ||
606 | sb_page = bitmap->sb_page; | ||
607 | bitmap->sb_page = NULL; | ||
608 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
609 | |||
610 | while (pages--) | ||
611 | if (map[pages]->index != 0) /* 0 is sb_page, release it below */ | ||
612 | page_cache_release(map[pages]); | ||
613 | kfree(map); | ||
614 | kfree(attr); | ||
615 | |||
616 | if (sb_page) | ||
617 | page_cache_release(sb_page); | ||
618 | } | ||
619 | |||
620 | static void bitmap_stop_daemons(struct bitmap *bitmap); | ||
621 | |||
622 | /* dequeue the next item in a page list -- don't call from irq context */ | ||
623 | static struct page_list *dequeue_page(struct bitmap *bitmap) | ||
624 | { | ||
625 | struct page_list *item = NULL; | ||
626 | struct list_head *head = &bitmap->complete_pages; | ||
627 | |||
628 | spin_lock(&bitmap->write_lock); | ||
629 | if (list_empty(head)) | ||
630 | goto out; | ||
631 | item = list_entry(head->prev, struct page_list, list); | ||
632 | list_del(head->prev); | ||
633 | out: | ||
634 | spin_unlock(&bitmap->write_lock); | ||
635 | return item; | ||
636 | } | ||
637 | |||
638 | static void drain_write_queues(struct bitmap *bitmap) | ||
639 | { | ||
640 | struct page_list *item; | ||
641 | |||
642 | while ((item = dequeue_page(bitmap))) { | ||
643 | /* don't bother to wait */ | ||
644 | page_cache_release(item->page); | ||
645 | mempool_free(item, bitmap->write_pool); | ||
646 | } | ||
647 | |||
648 | wake_up(&bitmap->write_wait); | ||
649 | } | ||
650 | |||
651 | static void bitmap_file_put(struct bitmap *bitmap) | ||
652 | { | ||
653 | struct file *file; | ||
654 | struct inode *inode; | ||
655 | unsigned long flags; | ||
656 | |||
657 | spin_lock_irqsave(&bitmap->lock, flags); | ||
658 | file = bitmap->file; | ||
659 | bitmap->file = NULL; | ||
660 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
661 | |||
662 | bitmap_stop_daemons(bitmap); | ||
663 | |||
664 | drain_write_queues(bitmap); | ||
665 | |||
666 | bitmap_file_unmap(bitmap); | ||
667 | |||
668 | if (file) { | ||
669 | inode = file->f_mapping->host; | ||
670 | spin_lock(&inode->i_lock); | ||
671 | atomic_set(&inode->i_writecount, 1); /* allow writes again */ | ||
672 | spin_unlock(&inode->i_lock); | ||
673 | fput(file); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | |||
678 | /* | ||
679 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file | ||
680 | * then it is no longer reliable, so we stop using it and we mark the file | ||
681 | * as failed in the superblock | ||
682 | */ | ||
683 | static void bitmap_file_kick(struct bitmap *bitmap) | ||
684 | { | ||
685 | char *path, *ptr = NULL; | ||
686 | |||
687 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET); | ||
688 | bitmap_update_sb(bitmap); | ||
689 | |||
690 | if (bitmap->file) { | ||
691 | path = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
692 | if (path) | ||
693 | ptr = file_path(bitmap->file, path, PAGE_SIZE); | ||
694 | |||
695 | printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", | ||
696 | bmname(bitmap), ptr ? ptr : ""); | ||
697 | |||
698 | kfree(path); | ||
699 | } | ||
700 | |||
701 | bitmap_file_put(bitmap); | ||
702 | |||
703 | return; | ||
704 | } | ||
705 | |||
706 | enum bitmap_page_attr { | ||
707 | BITMAP_PAGE_DIRTY = 1, // there are set bits that need to be synced | ||
708 | BITMAP_PAGE_CLEAN = 2, // there are bits that might need to be cleared | ||
709 | BITMAP_PAGE_NEEDWRITE=4, // there are cleared bits that need to be synced | ||
710 | }; | ||
711 | |||
712 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, | ||
713 | enum bitmap_page_attr attr) | ||
714 | { | ||
715 | bitmap->filemap_attr[page->index] |= attr; | ||
716 | } | ||
717 | |||
718 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, | ||
719 | enum bitmap_page_attr attr) | ||
720 | { | ||
721 | bitmap->filemap_attr[page->index] &= ~attr; | ||
722 | } | ||
723 | |||
724 | static inline unsigned long get_page_attr(struct bitmap *bitmap, struct page *page) | ||
725 | { | ||
726 | return bitmap->filemap_attr[page->index]; | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * bitmap_file_set_bit -- called before performing a write to the md device | ||
731 | * to set (and eventually sync) a particular bit in the bitmap file | ||
732 | * | ||
733 | * we set the bit immediately, then we record the page number so that | ||
734 | * when an unplug occurs, we can flush the dirty pages out to disk | ||
735 | */ | ||
736 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | ||
737 | { | ||
738 | unsigned long bit; | ||
739 | struct page *page; | ||
740 | void *kaddr; | ||
741 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | ||
742 | |||
743 | if (!bitmap->filemap) { | ||
744 | return; | ||
745 | } | ||
746 | |||
747 | page = filemap_get_page(bitmap, chunk); | ||
748 | bit = file_page_offset(chunk); | ||
749 | |||
750 | |||
751 | /* make sure the page stays cached until it gets written out */ | ||
752 | if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY)) | ||
753 | page_cache_get(page); | ||
754 | |||
755 | /* set the bit */ | ||
756 | kaddr = kmap_atomic(page, KM_USER0); | ||
757 | set_bit(bit, kaddr); | ||
758 | kunmap_atomic(kaddr, KM_USER0); | ||
759 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
760 | |||
761 | /* record page number so it gets flushed to disk when unplug occurs */ | ||
762 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); | ||
763 | |||
764 | } | ||
765 | |||
766 | /* this gets called when the md device is ready to unplug its underlying | ||
767 | * (slave) device queues -- before we let any writes go down, we need to | ||
768 | * sync the dirty pages of the bitmap file to disk */ | ||
769 | int bitmap_unplug(struct bitmap *bitmap) | ||
770 | { | ||
771 | unsigned long i, attr, flags; | ||
772 | struct page *page; | ||
773 | int wait = 0; | ||
774 | int err; | ||
775 | |||
776 | if (!bitmap) | ||
777 | return 0; | ||
778 | |||
779 | /* look at each page to see if there are any set bits that need to be | ||
780 | * flushed out to disk */ | ||
781 | for (i = 0; i < bitmap->file_pages; i++) { | ||
782 | spin_lock_irqsave(&bitmap->lock, flags); | ||
783 | if (!bitmap->filemap) { | ||
784 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
785 | return 0; | ||
786 | } | ||
787 | page = bitmap->filemap[i]; | ||
788 | attr = get_page_attr(bitmap, page); | ||
789 | clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); | ||
790 | clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); | ||
791 | if ((attr & BITMAP_PAGE_DIRTY)) | ||
792 | wait = 1; | ||
793 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
794 | |||
795 | if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE)) { | ||
796 | err = write_page(bitmap, page, 0); | ||
797 | if (err == -EAGAIN) { | ||
798 | if (attr & BITMAP_PAGE_DIRTY) | ||
799 | err = write_page(bitmap, page, 1); | ||
800 | else | ||
801 | err = 0; | ||
802 | } | ||
803 | if (err) | ||
804 | return 1; | ||
805 | } | ||
806 | } | ||
807 | if (wait) { /* if any writes were performed, we need to wait on them */ | ||
808 | if (bitmap->file) { | ||
809 | spin_lock_irq(&bitmap->write_lock); | ||
810 | wait_event_lock_irq(bitmap->write_wait, | ||
811 | list_empty(&bitmap->complete_pages), bitmap->write_lock, | ||
812 | wake_up_process(bitmap->writeback_daemon->tsk)); | ||
813 | spin_unlock_irq(&bitmap->write_lock); | ||
814 | } else | ||
815 | wait_event(bitmap->mddev->sb_wait, | ||
816 | atomic_read(&bitmap->mddev->pending_writes)==0); | ||
817 | } | ||
818 | return 0; | ||
819 | } | ||
820 | |||
821 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, | ||
822 | unsigned long sectors, int in_sync); | ||
823 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize | ||
824 | * the in-memory bitmap from the on-disk bitmap -- also, sets up the | ||
825 | * memory mapping of the bitmap file | ||
826 | * Special cases: | ||
827 | * if there's no bitmap file, or if the bitmap file had been | ||
828 | * previously kicked from the array, we mark all the bits as | ||
829 | * 1's in order to cause a full resync. | ||
830 | */ | ||
831 | static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) | ||
832 | { | ||
833 | unsigned long i, chunks, index, oldindex, bit; | ||
834 | struct page *page = NULL, *oldpage = NULL; | ||
835 | unsigned long num_pages, bit_cnt = 0; | ||
836 | struct file *file; | ||
837 | unsigned long bytes, offset, dummy; | ||
838 | int outofdate; | ||
839 | int ret = -ENOSPC; | ||
840 | |||
841 | chunks = bitmap->chunks; | ||
842 | file = bitmap->file; | ||
843 | |||
844 | BUG_ON(!file && !bitmap->offset); | ||
845 | |||
846 | #if INJECT_FAULTS_3 | ||
847 | outofdate = 1; | ||
848 | #else | ||
849 | outofdate = bitmap->flags & BITMAP_STALE; | ||
850 | #endif | ||
851 | if (outofdate) | ||
852 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " | ||
853 | "recovery\n", bmname(bitmap)); | ||
854 | |||
855 | bytes = (chunks + 7) / 8; | ||
856 | |||
857 | num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; | ||
858 | |||
859 | if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { | ||
860 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", | ||
861 | bmname(bitmap), | ||
862 | (unsigned long) i_size_read(file->f_mapping->host), | ||
863 | bytes + sizeof(bitmap_super_t)); | ||
864 | goto out; | ||
865 | } | ||
866 | |||
867 | ret = -ENOMEM; | ||
868 | |||
869 | bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); | ||
870 | if (!bitmap->filemap) | ||
871 | goto out; | ||
872 | |||
873 | bitmap->filemap_attr = kmalloc(sizeof(long) * num_pages, GFP_KERNEL); | ||
874 | if (!bitmap->filemap_attr) | ||
875 | goto out; | ||
876 | |||
877 | memset(bitmap->filemap_attr, 0, sizeof(long) * num_pages); | ||
878 | |||
879 | oldindex = ~0L; | ||
880 | |||
881 | for (i = 0; i < chunks; i++) { | ||
882 | index = file_page_index(i); | ||
883 | bit = file_page_offset(i); | ||
884 | if (index != oldindex) { /* this is a new page, read it in */ | ||
885 | /* unmap the old page, we're done with it */ | ||
886 | if (oldpage != NULL) | ||
887 | kunmap(oldpage); | ||
888 | if (index == 0) { | ||
889 | /* | ||
890 | * if we're here then the superblock page | ||
891 | * contains some bits (PAGE_SIZE != sizeof sb) | ||
892 | * we've already read it in, so just use it | ||
893 | */ | ||
894 | page = bitmap->sb_page; | ||
895 | offset = sizeof(bitmap_super_t); | ||
896 | } else if (file) { | ||
897 | page = read_page(file, index, &dummy); | ||
898 | offset = 0; | ||
899 | } else { | ||
900 | page = read_sb_page(bitmap->mddev, bitmap->offset, index); | ||
901 | offset = 0; | ||
902 | } | ||
903 | if (IS_ERR(page)) { /* read error */ | ||
904 | ret = PTR_ERR(page); | ||
905 | goto out; | ||
906 | } | ||
907 | |||
908 | oldindex = index; | ||
909 | oldpage = page; | ||
910 | kmap(page); | ||
911 | |||
912 | if (outofdate) { | ||
913 | /* | ||
914 | * if bitmap is out of date, dirty the | ||
915 | * whole page and write it out | ||
916 | */ | ||
917 | memset(page_address(page) + offset, 0xff, | ||
918 | PAGE_SIZE - offset); | ||
919 | ret = write_page(bitmap, page, 1); | ||
920 | if (ret) { | ||
921 | kunmap(page); | ||
922 | /* release, page not in filemap yet */ | ||
923 | page_cache_release(page); | ||
924 | goto out; | ||
925 | } | ||
926 | } | ||
927 | |||
928 | bitmap->filemap[bitmap->file_pages++] = page; | ||
929 | } | ||
930 | if (test_bit(bit, page_address(page))) { | ||
931 | /* if the disk bit is set, set the memory bit */ | ||
932 | bitmap_set_memory_bits(bitmap, | ||
933 | i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync); | ||
934 | bit_cnt++; | ||
935 | } | ||
936 | } | ||
937 | |||
938 | /* everything went OK */ | ||
939 | ret = 0; | ||
940 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); | ||
941 | |||
942 | if (page) /* unmap the last page */ | ||
943 | kunmap(page); | ||
944 | |||
945 | if (bit_cnt) { /* Kick recovery if any bits were set */ | ||
946 | set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); | ||
947 | md_wakeup_thread(bitmap->mddev->thread); | ||
948 | } | ||
949 | |||
950 | out: | ||
951 | printk(KERN_INFO "%s: bitmap initialized from disk: " | ||
952 | "read %lu/%lu pages, set %lu bits, status: %d\n", | ||
953 | bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret); | ||
954 | |||
955 | return ret; | ||
956 | } | ||
957 | |||
958 | void bitmap_write_all(struct bitmap *bitmap) | ||
959 | { | ||
960 | /* We don't actually write all bitmap blocks here, | ||
961 | * just flag them as needing to be written | ||
962 | */ | ||
963 | |||
964 | unsigned long chunks = bitmap->chunks; | ||
965 | unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t); | ||
966 | unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE; | ||
967 | while (num_pages--) | ||
968 | bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE; | ||
969 | } | ||
970 | |||
971 | |||
972 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | ||
973 | { | ||
974 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | ||
975 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | ||
976 | bitmap->bp[page].count += inc; | ||
977 | /* | ||
978 | if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", | ||
979 | (unsigned long long)offset, inc, bitmap->bp[page].count); | ||
980 | */ | ||
981 | bitmap_checkfree(bitmap, page); | ||
982 | } | ||
983 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | ||
984 | sector_t offset, int *blocks, | ||
985 | int create); | ||
986 | |||
987 | /* | ||
988 | * bitmap daemon -- periodically wakes up to clean bits and flush pages | ||
989 | * out to disk | ||
990 | */ | ||
991 | |||
992 | int bitmap_daemon_work(struct bitmap *bitmap) | ||
993 | { | ||
994 | unsigned long j; | ||
995 | unsigned long flags; | ||
996 | struct page *page = NULL, *lastpage = NULL; | ||
997 | int err = 0; | ||
998 | int blocks; | ||
999 | int attr; | ||
1000 | |||
1001 | if (bitmap == NULL) | ||
1002 | return 0; | ||
1003 | if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) | ||
1004 | return 0; | ||
1005 | bitmap->daemon_lastrun = jiffies; | ||
1006 | |||
1007 | for (j = 0; j < bitmap->chunks; j++) { | ||
1008 | bitmap_counter_t *bmc; | ||
1009 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1010 | if (!bitmap->filemap) { | ||
1011 | /* error or shutdown */ | ||
1012 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1013 | break; | ||
1014 | } | ||
1015 | |||
1016 | page = filemap_get_page(bitmap, j); | ||
1017 | |||
1018 | if (page != lastpage) { | ||
1019 | /* skip this page unless it's marked as needing cleaning */ | ||
1020 | if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) { | ||
1021 | if (attr & BITMAP_PAGE_NEEDWRITE) { | ||
1022 | page_cache_get(page); | ||
1023 | clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); | ||
1024 | } | ||
1025 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1026 | if (attr & BITMAP_PAGE_NEEDWRITE) { | ||
1027 | switch (write_page(bitmap, page, 0)) { | ||
1028 | case -EAGAIN: | ||
1029 | set_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); | ||
1030 | break; | ||
1031 | case 0: | ||
1032 | break; | ||
1033 | default: | ||
1034 | bitmap_file_kick(bitmap); | ||
1035 | } | ||
1036 | page_cache_release(page); | ||
1037 | } | ||
1038 | continue; | ||
1039 | } | ||
1040 | |||
1041 | /* grab the new page, sync and release the old */ | ||
1042 | page_cache_get(page); | ||
1043 | if (lastpage != NULL) { | ||
1044 | if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) { | ||
1045 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1046 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1047 | err = write_page(bitmap, lastpage, 0); | ||
1048 | if (err == -EAGAIN) { | ||
1049 | err = 0; | ||
1050 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1051 | } | ||
1052 | } else { | ||
1053 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1054 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1055 | } | ||
1056 | kunmap(lastpage); | ||
1057 | page_cache_release(lastpage); | ||
1058 | if (err) | ||
1059 | bitmap_file_kick(bitmap); | ||
1060 | } else | ||
1061 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1062 | lastpage = page; | ||
1063 | kmap(page); | ||
1064 | /* | ||
1065 | printk("bitmap clean at page %lu\n", j); | ||
1066 | */ | ||
1067 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1068 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | ||
1069 | } | ||
1070 | bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | ||
1071 | &blocks, 0); | ||
1072 | if (bmc) { | ||
1073 | /* | ||
1074 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | ||
1075 | */ | ||
1076 | if (*bmc == 2) { | ||
1077 | *bmc=1; /* maybe clear the bit next time */ | ||
1078 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | ||
1079 | } else if (*bmc == 1) { | ||
1080 | /* we can clear the bit */ | ||
1081 | *bmc = 0; | ||
1082 | bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | ||
1083 | -1); | ||
1084 | |||
1085 | /* clear the bit */ | ||
1086 | clear_bit(file_page_offset(j), page_address(page)); | ||
1087 | } | ||
1088 | } | ||
1089 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1090 | } | ||
1091 | |||
1092 | /* now sync the final page */ | ||
1093 | if (lastpage != NULL) { | ||
1094 | kunmap(lastpage); | ||
1095 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1096 | if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) { | ||
1097 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1098 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1099 | err = write_page(bitmap, lastpage, 0); | ||
1100 | if (err == -EAGAIN) { | ||
1101 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1102 | err = 0; | ||
1103 | } | ||
1104 | } else { | ||
1105 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | ||
1106 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1107 | } | ||
1108 | |||
1109 | page_cache_release(lastpage); | ||
1110 | } | ||
1111 | |||
1112 | return err; | ||
1113 | } | ||
1114 | |||
1115 | static void daemon_exit(struct bitmap *bitmap, mdk_thread_t **daemon) | ||
1116 | { | ||
1117 | mdk_thread_t *dmn; | ||
1118 | unsigned long flags; | ||
1119 | |||
1120 | /* if no one is waiting on us, we'll free the md thread struct | ||
1121 | * and exit, otherwise we let the waiter clean things up */ | ||
1122 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1123 | if ((dmn = *daemon)) { /* no one is waiting, cleanup and exit */ | ||
1124 | *daemon = NULL; | ||
1125 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1126 | kfree(dmn); | ||
1127 | complete_and_exit(NULL, 0); /* do_exit not exported */ | ||
1128 | } | ||
1129 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1130 | } | ||
1131 | |||
1132 | static void bitmap_writeback_daemon(mddev_t *mddev) | ||
1133 | { | ||
1134 | struct bitmap *bitmap = mddev->bitmap; | ||
1135 | struct page *page; | ||
1136 | struct page_list *item; | ||
1137 | int err = 0; | ||
1138 | |||
1139 | if (signal_pending(current)) { | ||
1140 | printk(KERN_INFO | ||
1141 | "%s: bitmap writeback daemon got signal, exiting...\n", | ||
1142 | bmname(bitmap)); | ||
1143 | err = -EINTR; | ||
1144 | goto out; | ||
1145 | } | ||
1146 | |||
1147 | PRINTK("%s: bitmap writeback daemon woke up...\n", bmname(bitmap)); | ||
1148 | /* wait on bitmap page writebacks */ | ||
1149 | while ((item = dequeue_page(bitmap))) { | ||
1150 | page = item->page; | ||
1151 | mempool_free(item, bitmap->write_pool); | ||
1152 | PRINTK("wait on page writeback: %p\n", page); | ||
1153 | wait_on_page_writeback(page); | ||
1154 | PRINTK("finished page writeback: %p\n", page); | ||
1155 | |||
1156 | err = PageError(page); | ||
1157 | page_cache_release(page); | ||
1158 | if (err) { | ||
1159 | printk(KERN_WARNING "%s: bitmap file writeback " | ||
1160 | "failed (page %lu): %d\n", | ||
1161 | bmname(bitmap), page->index, err); | ||
1162 | bitmap_file_kick(bitmap); | ||
1163 | goto out; | ||
1164 | } | ||
1165 | } | ||
1166 | out: | ||
1167 | wake_up(&bitmap->write_wait); | ||
1168 | if (err) { | ||
1169 | printk(KERN_INFO "%s: bitmap writeback daemon exiting (%d)\n", | ||
1170 | bmname(bitmap), err); | ||
1171 | daemon_exit(bitmap, &bitmap->writeback_daemon); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr, | ||
1176 | void (*func)(mddev_t *), char *name) | ||
1177 | { | ||
1178 | mdk_thread_t *daemon; | ||
1179 | unsigned long flags; | ||
1180 | char namebuf[32]; | ||
1181 | |||
1182 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1183 | *ptr = NULL; | ||
1184 | |||
1185 | if (!bitmap->file) /* no need for daemon if there's no backing file */ | ||
1186 | goto out_unlock; | ||
1187 | |||
1188 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1189 | |||
1190 | #if INJECT_FATAL_FAULT_2 | ||
1191 | daemon = NULL; | ||
1192 | #else | ||
1193 | sprintf(namebuf, "%%s_%s", name); | ||
1194 | daemon = md_register_thread(func, bitmap->mddev, namebuf); | ||
1195 | #endif | ||
1196 | if (!daemon) { | ||
1197 | printk(KERN_ERR "%s: failed to start bitmap daemon\n", | ||
1198 | bmname(bitmap)); | ||
1199 | return -ECHILD; | ||
1200 | } | ||
1201 | |||
1202 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1203 | *ptr = daemon; | ||
1204 | |||
1205 | md_wakeup_thread(daemon); /* start it running */ | ||
1206 | |||
1207 | PRINTK("%s: %s daemon (pid %d) started...\n", | ||
1208 | bmname(bitmap), name, daemon->tsk->pid); | ||
1209 | out_unlock: | ||
1210 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1211 | return 0; | ||
1212 | } | ||
1213 | |||
1214 | static int bitmap_start_daemons(struct bitmap *bitmap) | ||
1215 | { | ||
1216 | int err = bitmap_start_daemon(bitmap, &bitmap->writeback_daemon, | ||
1217 | bitmap_writeback_daemon, "bitmap_wb"); | ||
1218 | return err; | ||
1219 | } | ||
1220 | |||
1221 | static void bitmap_stop_daemon(struct bitmap *bitmap, mdk_thread_t **ptr) | ||
1222 | { | ||
1223 | mdk_thread_t *daemon; | ||
1224 | unsigned long flags; | ||
1225 | |||
1226 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1227 | daemon = *ptr; | ||
1228 | *ptr = NULL; | ||
1229 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1230 | if (daemon) | ||
1231 | md_unregister_thread(daemon); /* destroy the thread */ | ||
1232 | } | ||
1233 | |||
1234 | static void bitmap_stop_daemons(struct bitmap *bitmap) | ||
1235 | { | ||
1236 | /* the daemons can't stop themselves... they'll just exit instead... */ | ||
1237 | if (bitmap->writeback_daemon && | ||
1238 | current->pid != bitmap->writeback_daemon->tsk->pid) | ||
1239 | bitmap_stop_daemon(bitmap, &bitmap->writeback_daemon); | ||
1240 | } | ||
1241 | |||
1242 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | ||
1243 | sector_t offset, int *blocks, | ||
1244 | int create) | ||
1245 | { | ||
1246 | /* If 'create', we might release the lock and reclaim it. | ||
1247 | * The lock must have been taken with interrupts enabled. | ||
1248 | * If !create, we don't release the lock. | ||
1249 | */ | ||
1250 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | ||
1251 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | ||
1252 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | ||
1253 | sector_t csize; | ||
1254 | |||
1255 | if (bitmap_checkpage(bitmap, page, create) < 0) { | ||
1256 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | ||
1257 | *blocks = csize - (offset & (csize- 1)); | ||
1258 | return NULL; | ||
1259 | } | ||
1260 | /* now locked ... */ | ||
1261 | |||
1262 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ | ||
1263 | /* should we use the first or second counter field | ||
1264 | * of the hijacked pointer? */ | ||
1265 | int hi = (pageoff > PAGE_COUNTER_MASK); | ||
1266 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
1267 | PAGE_COUNTER_SHIFT - 1); | ||
1268 | *blocks = csize - (offset & (csize- 1)); | ||
1269 | return &((bitmap_counter_t *) | ||
1270 | &bitmap->bp[page].map)[hi]; | ||
1271 | } else { /* page is allocated */ | ||
1272 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | ||
1273 | *blocks = csize - (offset & (csize- 1)); | ||
1274 | return (bitmap_counter_t *) | ||
1275 | &(bitmap->bp[page].map[pageoff]); | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors) | ||
1280 | { | ||
1281 | if (!bitmap) return 0; | ||
1282 | while (sectors) { | ||
1283 | int blocks; | ||
1284 | bitmap_counter_t *bmc; | ||
1285 | |||
1286 | spin_lock_irq(&bitmap->lock); | ||
1287 | bmc = bitmap_get_counter(bitmap, offset, &blocks, 1); | ||
1288 | if (!bmc) { | ||
1289 | spin_unlock_irq(&bitmap->lock); | ||
1290 | return 0; | ||
1291 | } | ||
1292 | |||
1293 | switch(*bmc) { | ||
1294 | case 0: | ||
1295 | bitmap_file_set_bit(bitmap, offset); | ||
1296 | bitmap_count_page(bitmap,offset, 1); | ||
1297 | blk_plug_device(bitmap->mddev->queue); | ||
1298 | /* fall through */ | ||
1299 | case 1: | ||
1300 | *bmc = 2; | ||
1301 | } | ||
1302 | if ((*bmc & COUNTER_MAX) == COUNTER_MAX) BUG(); | ||
1303 | (*bmc)++; | ||
1304 | |||
1305 | spin_unlock_irq(&bitmap->lock); | ||
1306 | |||
1307 | offset += blocks; | ||
1308 | if (sectors > blocks) | ||
1309 | sectors -= blocks; | ||
1310 | else sectors = 0; | ||
1311 | } | ||
1312 | return 0; | ||
1313 | } | ||
1314 | |||
1315 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, | ||
1316 | int success) | ||
1317 | { | ||
1318 | if (!bitmap) return; | ||
1319 | while (sectors) { | ||
1320 | int blocks; | ||
1321 | unsigned long flags; | ||
1322 | bitmap_counter_t *bmc; | ||
1323 | |||
1324 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1325 | bmc = bitmap_get_counter(bitmap, offset, &blocks, 0); | ||
1326 | if (!bmc) { | ||
1327 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1328 | return; | ||
1329 | } | ||
1330 | |||
1331 | if (!success && ! (*bmc & NEEDED_MASK)) | ||
1332 | *bmc |= NEEDED_MASK; | ||
1333 | |||
1334 | (*bmc)--; | ||
1335 | if (*bmc <= 2) { | ||
1336 | set_page_attr(bitmap, | ||
1337 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | ||
1338 | BITMAP_PAGE_CLEAN); | ||
1339 | } | ||
1340 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1341 | offset += blocks; | ||
1342 | if (sectors > blocks) | ||
1343 | sectors -= blocks; | ||
1344 | else sectors = 0; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks) | ||
1349 | { | ||
1350 | bitmap_counter_t *bmc; | ||
1351 | int rv; | ||
1352 | if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ | ||
1353 | *blocks = 1024; | ||
1354 | return 1; /* always resync if no bitmap */ | ||
1355 | } | ||
1356 | spin_lock_irq(&bitmap->lock); | ||
1357 | bmc = bitmap_get_counter(bitmap, offset, blocks, 0); | ||
1358 | rv = 0; | ||
1359 | if (bmc) { | ||
1360 | /* locked */ | ||
1361 | if (RESYNC(*bmc)) | ||
1362 | rv = 1; | ||
1363 | else if (NEEDED(*bmc)) { | ||
1364 | rv = 1; | ||
1365 | *bmc |= RESYNC_MASK; | ||
1366 | *bmc &= ~NEEDED_MASK; | ||
1367 | } | ||
1368 | } | ||
1369 | spin_unlock_irq(&bitmap->lock); | ||
1370 | return rv; | ||
1371 | } | ||
1372 | |||
1373 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | ||
1374 | { | ||
1375 | bitmap_counter_t *bmc; | ||
1376 | unsigned long flags; | ||
1377 | /* | ||
1378 | if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); | ||
1379 | */ if (bitmap == NULL) { | ||
1380 | *blocks = 1024; | ||
1381 | return; | ||
1382 | } | ||
1383 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1384 | bmc = bitmap_get_counter(bitmap, offset, blocks, 0); | ||
1385 | if (bmc == NULL) | ||
1386 | goto unlock; | ||
1387 | /* locked */ | ||
1388 | /* | ||
1389 | if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); | ||
1390 | */ | ||
1391 | if (RESYNC(*bmc)) { | ||
1392 | *bmc &= ~RESYNC_MASK; | ||
1393 | |||
1394 | if (!NEEDED(*bmc) && aborted) | ||
1395 | *bmc |= NEEDED_MASK; | ||
1396 | else { | ||
1397 | if (*bmc <= 2) { | ||
1398 | set_page_attr(bitmap, | ||
1399 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | ||
1400 | BITMAP_PAGE_CLEAN); | ||
1401 | } | ||
1402 | } | ||
1403 | } | ||
1404 | unlock: | ||
1405 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1406 | } | ||
1407 | |||
1408 | void bitmap_close_sync(struct bitmap *bitmap) | ||
1409 | { | ||
1410 | /* Sync has finished, and any bitmap chunks that weren't synced | ||
1411 | * properly have been aborted. It remains to us to clear the | ||
1412 | * RESYNC bit wherever it is still on | ||
1413 | */ | ||
1414 | sector_t sector = 0; | ||
1415 | int blocks; | ||
1416 | if (!bitmap) return; | ||
1417 | while (sector < bitmap->mddev->resync_max_sectors) { | ||
1418 | bitmap_end_sync(bitmap, sector, &blocks, 0); | ||
1419 | /* | ||
1420 | if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n", | ||
1421 | (unsigned long long)sector, blocks); | ||
1422 | */ sector += blocks; | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, | ||
1427 | unsigned long sectors, int in_sync) | ||
1428 | { | ||
1429 | /* For each chunk covered by any of these sectors, set the | ||
1430 | * counter to 1 and set resync_needed unless in_sync. They should all | ||
1431 | * be 0 at this point | ||
1432 | */ | ||
1433 | while (sectors) { | ||
1434 | int secs; | ||
1435 | bitmap_counter_t *bmc; | ||
1436 | spin_lock_irq(&bitmap->lock); | ||
1437 | bmc = bitmap_get_counter(bitmap, offset, &secs, 1); | ||
1438 | if (!bmc) { | ||
1439 | spin_unlock_irq(&bitmap->lock); | ||
1440 | return; | ||
1441 | } | ||
1442 | if (! *bmc) { | ||
1443 | struct page *page; | ||
1444 | *bmc = 1 | (in_sync? 0 : NEEDED_MASK); | ||
1445 | bitmap_count_page(bitmap, offset, 1); | ||
1446 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | ||
1447 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | ||
1448 | } | ||
1449 | spin_unlock_irq(&bitmap->lock); | ||
1450 | if (sectors > secs) | ||
1451 | sectors -= secs; | ||
1452 | else | ||
1453 | sectors = 0; | ||
1454 | } | ||
1455 | } | ||
1456 | |||
1457 | /* | ||
1458 | * free memory that was allocated | ||
1459 | */ | ||
1460 | void bitmap_destroy(mddev_t *mddev) | ||
1461 | { | ||
1462 | unsigned long k, pages; | ||
1463 | struct bitmap_page *bp; | ||
1464 | struct bitmap *bitmap = mddev->bitmap; | ||
1465 | |||
1466 | if (!bitmap) /* there was no bitmap */ | ||
1467 | return; | ||
1468 | |||
1469 | mddev->bitmap = NULL; /* disconnect from the md device */ | ||
1470 | |||
1471 | /* release the bitmap file and kill the daemon */ | ||
1472 | bitmap_file_put(bitmap); | ||
1473 | |||
1474 | bp = bitmap->bp; | ||
1475 | pages = bitmap->pages; | ||
1476 | |||
1477 | /* free all allocated memory */ | ||
1478 | |||
1479 | mempool_destroy(bitmap->write_pool); | ||
1480 | |||
1481 | if (bp) /* deallocate the page memory */ | ||
1482 | for (k = 0; k < pages; k++) | ||
1483 | if (bp[k].map && !bp[k].hijacked) | ||
1484 | kfree(bp[k].map); | ||
1485 | kfree(bp); | ||
1486 | kfree(bitmap); | ||
1487 | } | ||
1488 | |||
1489 | /* | ||
1490 | * initialize the bitmap structure | ||
1491 | * if this returns an error, bitmap_destroy must be called to do clean up | ||
1492 | */ | ||
1493 | int bitmap_create(mddev_t *mddev) | ||
1494 | { | ||
1495 | struct bitmap *bitmap; | ||
1496 | unsigned long blocks = mddev->resync_max_sectors; | ||
1497 | unsigned long chunks; | ||
1498 | unsigned long pages; | ||
1499 | struct file *file = mddev->bitmap_file; | ||
1500 | int err; | ||
1501 | |||
1502 | BUG_ON(sizeof(bitmap_super_t) != 256); | ||
1503 | |||
1504 | if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ | ||
1505 | return 0; | ||
1506 | |||
1507 | BUG_ON(file && mddev->bitmap_offset); | ||
1508 | |||
1509 | bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL); | ||
1510 | if (!bitmap) | ||
1511 | return -ENOMEM; | ||
1512 | |||
1513 | memset(bitmap, 0, sizeof(*bitmap)); | ||
1514 | |||
1515 | spin_lock_init(&bitmap->lock); | ||
1516 | bitmap->mddev = mddev; | ||
1517 | mddev->bitmap = bitmap; | ||
1518 | |||
1519 | spin_lock_init(&bitmap->write_lock); | ||
1520 | INIT_LIST_HEAD(&bitmap->complete_pages); | ||
1521 | init_waitqueue_head(&bitmap->write_wait); | ||
1522 | bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc, | ||
1523 | write_pool_free, NULL); | ||
1524 | if (!bitmap->write_pool) | ||
1525 | return -ENOMEM; | ||
1526 | |||
1527 | bitmap->file = file; | ||
1528 | bitmap->offset = mddev->bitmap_offset; | ||
1529 | if (file) get_file(file); | ||
1530 | /* read superblock from bitmap file (this sets bitmap->chunksize) */ | ||
1531 | err = bitmap_read_sb(bitmap); | ||
1532 | if (err) | ||
1533 | return err; | ||
1534 | |||
1535 | bitmap->chunkshift = find_first_bit(&bitmap->chunksize, | ||
1536 | sizeof(bitmap->chunksize)); | ||
1537 | |||
1538 | /* now that chunksize and chunkshift are set, we can use these macros */ | ||
1539 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / | ||
1540 | CHUNK_BLOCK_RATIO(bitmap); | ||
1541 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | ||
1542 | |||
1543 | BUG_ON(!pages); | ||
1544 | |||
1545 | bitmap->chunks = chunks; | ||
1546 | bitmap->pages = pages; | ||
1547 | bitmap->missing_pages = pages; | ||
1548 | bitmap->counter_bits = COUNTER_BITS; | ||
1549 | |||
1550 | bitmap->syncchunk = ~0UL; | ||
1551 | |||
1552 | #if INJECT_FATAL_FAULT_1 | ||
1553 | bitmap->bp = NULL; | ||
1554 | #else | ||
1555 | bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); | ||
1556 | #endif | ||
1557 | if (!bitmap->bp) | ||
1558 | return -ENOMEM; | ||
1559 | memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp)); | ||
1560 | |||
1561 | bitmap->flags |= BITMAP_ACTIVE; | ||
1562 | |||
1563 | /* now that we have some pages available, initialize the in-memory | ||
1564 | * bitmap from the on-disk bitmap */ | ||
1565 | err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector); | ||
1566 | if (err) | ||
1567 | return err; | ||
1568 | |||
1569 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", | ||
1570 | pages, bmname(bitmap)); | ||
1571 | |||
1572 | /* kick off the bitmap daemons */ | ||
1573 | err = bitmap_start_daemons(bitmap); | ||
1574 | if (err) | ||
1575 | return err; | ||
1576 | return bitmap_update_sb(bitmap); | ||
1577 | } | ||
1578 | |||
1579 | /* the bitmap API -- for raid personalities */ | ||
1580 | EXPORT_SYMBOL(bitmap_startwrite); | ||
1581 | EXPORT_SYMBOL(bitmap_endwrite); | ||
1582 | EXPORT_SYMBOL(bitmap_start_sync); | ||
1583 | EXPORT_SYMBOL(bitmap_end_sync); | ||
1584 | EXPORT_SYMBOL(bitmap_unplug); | ||
1585 | EXPORT_SYMBOL(bitmap_close_sync); | ||
1586 | EXPORT_SYMBOL(bitmap_daemon_work); | ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0dd6c2b5391b..d0a4bab220e5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -704,8 +704,7 @@ static void crypt_dtr(struct dm_target *ti) | |||
704 | mempool_destroy(cc->page_pool); | 704 | mempool_destroy(cc->page_pool); |
705 | mempool_destroy(cc->io_pool); | 705 | mempool_destroy(cc->io_pool); |
706 | 706 | ||
707 | if (cc->iv_mode) | 707 | kfree(cc->iv_mode); |
708 | kfree(cc->iv_mode); | ||
709 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) | 708 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) |
710 | cc->iv_gen_ops->dtr(cc); | 709 | cc->iv_gen_ops->dtr(cc); |
711 | crypto_free_tfm(cc->tfm); | 710 | crypto_free_tfm(cc->tfm); |
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ee3c869d9701..200a0688f717 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -122,14 +122,6 @@ static struct hash_cell *__get_uuid_cell(const char *str) | |||
122 | /*----------------------------------------------------------------- | 122 | /*----------------------------------------------------------------- |
123 | * Inserting, removing and renaming a device. | 123 | * Inserting, removing and renaming a device. |
124 | *---------------------------------------------------------------*/ | 124 | *---------------------------------------------------------------*/ |
125 | static inline char *kstrdup(const char *str) | ||
126 | { | ||
127 | char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); | ||
128 | if (r) | ||
129 | strcpy(r, str); | ||
130 | return r; | ||
131 | } | ||
132 | |||
133 | static struct hash_cell *alloc_cell(const char *name, const char *uuid, | 125 | static struct hash_cell *alloc_cell(const char *name, const char *uuid, |
134 | struct mapped_device *md) | 126 | struct mapped_device *md) |
135 | { | 127 | { |
@@ -139,7 +131,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, | |||
139 | if (!hc) | 131 | if (!hc) |
140 | return NULL; | 132 | return NULL; |
141 | 133 | ||
142 | hc->name = kstrdup(name); | 134 | hc->name = kstrdup(name, GFP_KERNEL); |
143 | if (!hc->name) { | 135 | if (!hc->name) { |
144 | kfree(hc); | 136 | kfree(hc); |
145 | return NULL; | 137 | return NULL; |
@@ -149,7 +141,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, | |||
149 | hc->uuid = NULL; | 141 | hc->uuid = NULL; |
150 | 142 | ||
151 | else { | 143 | else { |
152 | hc->uuid = kstrdup(uuid); | 144 | hc->uuid = kstrdup(uuid, GFP_KERNEL); |
153 | if (!hc->uuid) { | 145 | if (!hc->uuid) { |
154 | kfree(hc->name); | 146 | kfree(hc->name); |
155 | kfree(hc); | 147 | kfree(hc); |
@@ -273,7 +265,7 @@ static int dm_hash_rename(const char *old, const char *new) | |||
273 | /* | 265 | /* |
274 | * duplicate new. | 266 | * duplicate new. |
275 | */ | 267 | */ |
276 | new_name = kstrdup(new); | 268 | new_name = kstrdup(new, GFP_KERNEL); |
277 | if (!new_name) | 269 | if (!new_name) |
278 | return -ENOMEM; | 270 | return -ENOMEM; |
279 | 271 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6e3cf7e13451..12031c9d3f1e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -1060,6 +1060,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | ti->private = ms; | 1062 | ti->private = ms; |
1063 | ti->split_io = ms->rh.region_size; | ||
1063 | 1064 | ||
1064 | r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); | 1065 | r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); |
1065 | if (r) { | 1066 | if (r) { |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1941b887f46..8d740013d74d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -217,8 +217,7 @@ static int linear_run (mddev_t *mddev) | |||
217 | return 0; | 217 | return 0; |
218 | 218 | ||
219 | out: | 219 | out: |
220 | if (conf) | 220 | kfree(conf); |
221 | kfree(conf); | ||
222 | return 1; | 221 | return 1; |
223 | } | 222 | } |
224 | 223 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index d899204d3743..4a0c57db2b67 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -19,6 +19,9 @@ | |||
19 | 19 | ||
20 | Neil Brown <neilb@cse.unsw.edu.au>. | 20 | Neil Brown <neilb@cse.unsw.edu.au>. |
21 | 21 | ||
22 | - persistent bitmap code | ||
23 | Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. | ||
24 | |||
22 | This program is free software; you can redistribute it and/or modify | 25 | This program is free software; you can redistribute it and/or modify |
23 | it under the terms of the GNU General Public License as published by | 26 | it under the terms of the GNU General Public License as published by |
24 | the Free Software Foundation; either version 2, or (at your option) | 27 | the Free Software Foundation; either version 2, or (at your option) |
@@ -33,6 +36,7 @@ | |||
33 | #include <linux/config.h> | 36 | #include <linux/config.h> |
34 | #include <linux/linkage.h> | 37 | #include <linux/linkage.h> |
35 | #include <linux/raid/md.h> | 38 | #include <linux/raid/md.h> |
39 | #include <linux/raid/bitmap.h> | ||
36 | #include <linux/sysctl.h> | 40 | #include <linux/sysctl.h> |
37 | #include <linux/devfs_fs_kernel.h> | 41 | #include <linux/devfs_fs_kernel.h> |
38 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 42 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
@@ -40,6 +44,8 @@ | |||
40 | 44 | ||
41 | #include <linux/init.h> | 45 | #include <linux/init.h> |
42 | 46 | ||
47 | #include <linux/file.h> | ||
48 | |||
43 | #ifdef CONFIG_KMOD | 49 | #ifdef CONFIG_KMOD |
44 | #include <linux/kmod.h> | 50 | #include <linux/kmod.h> |
45 | #endif | 51 | #endif |
@@ -189,8 +195,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
189 | if (mddev->unit == unit) { | 195 | if (mddev->unit == unit) { |
190 | mddev_get(mddev); | 196 | mddev_get(mddev); |
191 | spin_unlock(&all_mddevs_lock); | 197 | spin_unlock(&all_mddevs_lock); |
192 | if (new) | 198 | kfree(new); |
193 | kfree(new); | ||
194 | return mddev; | 199 | return mddev; |
195 | } | 200 | } |
196 | 201 | ||
@@ -218,6 +223,8 @@ static mddev_t * mddev_find(dev_t unit) | |||
218 | INIT_LIST_HEAD(&new->all_mddevs); | 223 | INIT_LIST_HEAD(&new->all_mddevs); |
219 | init_timer(&new->safemode_timer); | 224 | init_timer(&new->safemode_timer); |
220 | atomic_set(&new->active, 1); | 225 | atomic_set(&new->active, 1); |
226 | spin_lock_init(&new->write_lock); | ||
227 | init_waitqueue_head(&new->sb_wait); | ||
221 | 228 | ||
222 | new->queue = blk_alloc_queue(GFP_KERNEL); | 229 | new->queue = blk_alloc_queue(GFP_KERNEL); |
223 | if (!new->queue) { | 230 | if (!new->queue) { |
@@ -320,6 +327,41 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
320 | } | 327 | } |
321 | 328 | ||
322 | 329 | ||
330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | ||
331 | { | ||
332 | mdk_rdev_t *rdev = bio->bi_private; | ||
333 | if (bio->bi_size) | ||
334 | return 1; | ||
335 | |||
336 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
337 | md_error(rdev->mddev, rdev); | ||
338 | |||
339 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | ||
340 | wake_up(&rdev->mddev->sb_wait); | ||
341 | bio_put(bio); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | ||
346 | sector_t sector, int size, struct page *page) | ||
347 | { | ||
348 | /* write first size bytes of page to sector of rdev | ||
349 | * Increment mddev->pending_writes before returning | ||
350 | * and decrement it on completion, waking up sb_wait | ||
351 | * if zero is reached. | ||
352 | * If an error occurred, call md_error | ||
353 | */ | ||
354 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | ||
355 | |||
356 | bio->bi_bdev = rdev->bdev; | ||
357 | bio->bi_sector = sector; | ||
358 | bio_add_page(bio, page, size, 0); | ||
359 | bio->bi_private = rdev; | ||
360 | bio->bi_end_io = super_written; | ||
361 | atomic_inc(&mddev->pending_writes); | ||
362 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | ||
363 | } | ||
364 | |||
323 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 365 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
324 | { | 366 | { |
325 | if (bio->bi_size) | 367 | if (bio->bi_size) |
@@ -329,7 +371,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | |||
329 | return 0; | 371 | return 0; |
330 | } | 372 | } |
331 | 373 | ||
332 | static int sync_page_io(struct block_device *bdev, sector_t sector, int size, | 374 | int sync_page_io(struct block_device *bdev, sector_t sector, int size, |
333 | struct page *page, int rw) | 375 | struct page *page, int rw) |
334 | { | 376 | { |
335 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 377 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
@@ -416,11 +458,8 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) | |||
416 | ret = 1; | 458 | ret = 1; |
417 | 459 | ||
418 | abort: | 460 | abort: |
419 | if (tmp1) | 461 | kfree(tmp1); |
420 | kfree(tmp1); | 462 | kfree(tmp2); |
421 | if (tmp2) | ||
422 | kfree(tmp2); | ||
423 | |||
424 | return ret; | 463 | return ret; |
425 | } | 464 | } |
426 | 465 | ||
@@ -569,6 +608,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
569 | mdp_disk_t *desc; | 608 | mdp_disk_t *desc; |
570 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 609 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
571 | 610 | ||
611 | rdev->raid_disk = -1; | ||
612 | rdev->in_sync = 0; | ||
572 | if (mddev->raid_disks == 0) { | 613 | if (mddev->raid_disks == 0) { |
573 | mddev->major_version = 0; | 614 | mddev->major_version = 0; |
574 | mddev->minor_version = sb->minor_version; | 615 | mddev->minor_version = sb->minor_version; |
@@ -599,16 +640,35 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
599 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); | 640 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); |
600 | 641 | ||
601 | mddev->max_disks = MD_SB_DISKS; | 642 | mddev->max_disks = MD_SB_DISKS; |
602 | } else { | 643 | |
603 | __u64 ev1; | 644 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
604 | ev1 = md_event(sb); | 645 | mddev->bitmap_file == NULL) { |
646 | if (mddev->level != 1) { | ||
647 | /* FIXME use a better test */ | ||
648 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | ||
649 | return -EINVAL; | ||
650 | } | ||
651 | mddev->bitmap_offset = (MD_SB_BYTES >> 9); | ||
652 | } | ||
653 | |||
654 | } else if (mddev->pers == NULL) { | ||
655 | /* Insist on good event counter while assembling */ | ||
656 | __u64 ev1 = md_event(sb); | ||
605 | ++ev1; | 657 | ++ev1; |
606 | if (ev1 < mddev->events) | 658 | if (ev1 < mddev->events) |
607 | return -EINVAL; | 659 | return -EINVAL; |
608 | } | 660 | } else if (mddev->bitmap) { |
661 | /* if adding to array with a bitmap, then we can accept an | ||
662 | * older device ... but not too old. | ||
663 | */ | ||
664 | __u64 ev1 = md_event(sb); | ||
665 | if (ev1 < mddev->bitmap->events_cleared) | ||
666 | return 0; | ||
667 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
668 | return 0; | ||
669 | |||
609 | if (mddev->level != LEVEL_MULTIPATH) { | 670 | if (mddev->level != LEVEL_MULTIPATH) { |
610 | rdev->raid_disk = -1; | 671 | rdev->faulty = 0; |
611 | rdev->in_sync = rdev->faulty = 0; | ||
612 | desc = sb->disks + rdev->desc_nr; | 672 | desc = sb->disks + rdev->desc_nr; |
613 | 673 | ||
614 | if (desc->state & (1<<MD_DISK_FAULTY)) | 674 | if (desc->state & (1<<MD_DISK_FAULTY)) |
@@ -618,7 +678,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
618 | rdev->in_sync = 1; | 678 | rdev->in_sync = 1; |
619 | rdev->raid_disk = desc->raid_disk; | 679 | rdev->raid_disk = desc->raid_disk; |
620 | } | 680 | } |
621 | } | 681 | } else /* MULTIPATH are always insync */ |
682 | rdev->in_sync = 1; | ||
622 | return 0; | 683 | return 0; |
623 | } | 684 | } |
624 | 685 | ||
@@ -683,6 +744,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
683 | sb->layout = mddev->layout; | 744 | sb->layout = mddev->layout; |
684 | sb->chunk_size = mddev->chunk_size; | 745 | sb->chunk_size = mddev->chunk_size; |
685 | 746 | ||
747 | if (mddev->bitmap && mddev->bitmap_file == NULL) | ||
748 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | ||
749 | |||
686 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 750 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
687 | ITERATE_RDEV(mddev,rdev2,tmp) { | 751 | ITERATE_RDEV(mddev,rdev2,tmp) { |
688 | mdp_disk_t *d; | 752 | mdp_disk_t *d; |
@@ -780,7 +844,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
780 | case 0: | 844 | case 0: |
781 | sb_offset = rdev->bdev->bd_inode->i_size >> 9; | 845 | sb_offset = rdev->bdev->bd_inode->i_size >> 9; |
782 | sb_offset -= 8*2; | 846 | sb_offset -= 8*2; |
783 | sb_offset &= ~(4*2-1); | 847 | sb_offset &= ~(sector_t)(4*2-1); |
784 | /* convert from sectors to K */ | 848 | /* convert from sectors to K */ |
785 | sb_offset /= 2; | 849 | sb_offset /= 2; |
786 | break; | 850 | break; |
@@ -860,6 +924,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
860 | { | 924 | { |
861 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 925 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
862 | 926 | ||
927 | rdev->raid_disk = -1; | ||
928 | rdev->in_sync = 0; | ||
863 | if (mddev->raid_disks == 0) { | 929 | if (mddev->raid_disks == 0) { |
864 | mddev->major_version = 1; | 930 | mddev->major_version = 1; |
865 | mddev->patch_version = 0; | 931 | mddev->patch_version = 0; |
@@ -877,13 +943,30 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
877 | memcpy(mddev->uuid, sb->set_uuid, 16); | 943 | memcpy(mddev->uuid, sb->set_uuid, 16); |
878 | 944 | ||
879 | mddev->max_disks = (4096-256)/2; | 945 | mddev->max_disks = (4096-256)/2; |
880 | } else { | 946 | |
881 | __u64 ev1; | 947 | if ((le32_to_cpu(sb->feature_map) & 1) && |
882 | ev1 = le64_to_cpu(sb->events); | 948 | mddev->bitmap_file == NULL ) { |
949 | if (mddev->level != 1) { | ||
950 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); | ||
951 | return -EINVAL; | ||
952 | } | ||
953 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | ||
954 | } | ||
955 | } else if (mddev->pers == NULL) { | ||
956 | /* Insist of good event counter while assembling */ | ||
957 | __u64 ev1 = le64_to_cpu(sb->events); | ||
883 | ++ev1; | 958 | ++ev1; |
884 | if (ev1 < mddev->events) | 959 | if (ev1 < mddev->events) |
885 | return -EINVAL; | 960 | return -EINVAL; |
886 | } | 961 | } else if (mddev->bitmap) { |
962 | /* If adding to array with a bitmap, then we can accept an | ||
963 | * older device, but not too old. | ||
964 | */ | ||
965 | __u64 ev1 = le64_to_cpu(sb->events); | ||
966 | if (ev1 < mddev->bitmap->events_cleared) | ||
967 | return 0; | ||
968 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
969 | return 0; | ||
887 | 970 | ||
888 | if (mddev->level != LEVEL_MULTIPATH) { | 971 | if (mddev->level != LEVEL_MULTIPATH) { |
889 | int role; | 972 | int role; |
@@ -891,14 +974,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
891 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 974 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
892 | switch(role) { | 975 | switch(role) { |
893 | case 0xffff: /* spare */ | 976 | case 0xffff: /* spare */ |
894 | rdev->in_sync = 0; | ||
895 | rdev->faulty = 0; | 977 | rdev->faulty = 0; |
896 | rdev->raid_disk = -1; | ||
897 | break; | 978 | break; |
898 | case 0xfffe: /* faulty */ | 979 | case 0xfffe: /* faulty */ |
899 | rdev->in_sync = 0; | ||
900 | rdev->faulty = 1; | 980 | rdev->faulty = 1; |
901 | rdev->raid_disk = -1; | ||
902 | break; | 981 | break; |
903 | default: | 982 | default: |
904 | rdev->in_sync = 1; | 983 | rdev->in_sync = 1; |
@@ -906,7 +985,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
906 | rdev->raid_disk = role; | 985 | rdev->raid_disk = role; |
907 | break; | 986 | break; |
908 | } | 987 | } |
909 | } | 988 | } else /* MULTIPATH are always insync */ |
989 | rdev->in_sync = 1; | ||
990 | |||
910 | return 0; | 991 | return 0; |
911 | } | 992 | } |
912 | 993 | ||
@@ -933,6 +1014,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
933 | else | 1014 | else |
934 | sb->resync_offset = cpu_to_le64(0); | 1015 | sb->resync_offset = cpu_to_le64(0); |
935 | 1016 | ||
1017 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | ||
1018 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | ||
1019 | sb->feature_map = cpu_to_le32(1); | ||
1020 | } | ||
1021 | |||
936 | max_dev = 0; | 1022 | max_dev = 0; |
937 | ITERATE_RDEV(mddev,rdev2,tmp) | 1023 | ITERATE_RDEV(mddev,rdev2,tmp) |
938 | if (rdev2->desc_nr+1 > max_dev) | 1024 | if (rdev2->desc_nr+1 > max_dev) |
@@ -1196,8 +1282,11 @@ void md_print_devices(void) | |||
1196 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); | 1282 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); |
1197 | printk("md: **********************************\n"); | 1283 | printk("md: **********************************\n"); |
1198 | ITERATE_MDDEV(mddev,tmp) { | 1284 | ITERATE_MDDEV(mddev,tmp) { |
1199 | printk("%s: ", mdname(mddev)); | ||
1200 | 1285 | ||
1286 | if (mddev->bitmap) | ||
1287 | bitmap_print_sb(mddev->bitmap); | ||
1288 | else | ||
1289 | printk("%s: ", mdname(mddev)); | ||
1201 | ITERATE_RDEV(mddev,rdev,tmp2) | 1290 | ITERATE_RDEV(mddev,rdev,tmp2) |
1202 | printk("<%s>", bdevname(rdev->bdev,b)); | 1291 | printk("<%s>", bdevname(rdev->bdev,b)); |
1203 | printk("\n"); | 1292 | printk("\n"); |
@@ -1210,30 +1299,6 @@ void md_print_devices(void) | |||
1210 | } | 1299 | } |
1211 | 1300 | ||
1212 | 1301 | ||
1213 | static int write_disk_sb(mdk_rdev_t * rdev) | ||
1214 | { | ||
1215 | char b[BDEVNAME_SIZE]; | ||
1216 | if (!rdev->sb_loaded) { | ||
1217 | MD_BUG(); | ||
1218 | return 1; | ||
1219 | } | ||
1220 | if (rdev->faulty) { | ||
1221 | MD_BUG(); | ||
1222 | return 1; | ||
1223 | } | ||
1224 | |||
1225 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1226 | bdevname(rdev->bdev,b), | ||
1227 | (unsigned long long)rdev->sb_offset); | ||
1228 | |||
1229 | if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE)) | ||
1230 | return 0; | ||
1231 | |||
1232 | printk("md: write_disk_sb failed for device %s\n", | ||
1233 | bdevname(rdev->bdev,b)); | ||
1234 | return 1; | ||
1235 | } | ||
1236 | |||
1237 | static void sync_sbs(mddev_t * mddev) | 1302 | static void sync_sbs(mddev_t * mddev) |
1238 | { | 1303 | { |
1239 | mdk_rdev_t *rdev; | 1304 | mdk_rdev_t *rdev; |
@@ -1248,12 +1313,14 @@ static void sync_sbs(mddev_t * mddev) | |||
1248 | 1313 | ||
1249 | static void md_update_sb(mddev_t * mddev) | 1314 | static void md_update_sb(mddev_t * mddev) |
1250 | { | 1315 | { |
1251 | int err, count = 100; | 1316 | int err; |
1252 | struct list_head *tmp; | 1317 | struct list_head *tmp; |
1253 | mdk_rdev_t *rdev; | 1318 | mdk_rdev_t *rdev; |
1319 | int sync_req; | ||
1254 | 1320 | ||
1255 | mddev->sb_dirty = 0; | ||
1256 | repeat: | 1321 | repeat: |
1322 | spin_lock(&mddev->write_lock); | ||
1323 | sync_req = mddev->in_sync; | ||
1257 | mddev->utime = get_seconds(); | 1324 | mddev->utime = get_seconds(); |
1258 | mddev->events ++; | 1325 | mddev->events ++; |
1259 | 1326 | ||
@@ -1266,20 +1333,26 @@ repeat: | |||
1266 | MD_BUG(); | 1333 | MD_BUG(); |
1267 | mddev->events --; | 1334 | mddev->events --; |
1268 | } | 1335 | } |
1336 | mddev->sb_dirty = 2; | ||
1269 | sync_sbs(mddev); | 1337 | sync_sbs(mddev); |
1270 | 1338 | ||
1271 | /* | 1339 | /* |
1272 | * do not write anything to disk if using | 1340 | * do not write anything to disk if using |
1273 | * nonpersistent superblocks | 1341 | * nonpersistent superblocks |
1274 | */ | 1342 | */ |
1275 | if (!mddev->persistent) | 1343 | if (!mddev->persistent) { |
1344 | mddev->sb_dirty = 0; | ||
1345 | spin_unlock(&mddev->write_lock); | ||
1346 | wake_up(&mddev->sb_wait); | ||
1276 | return; | 1347 | return; |
1348 | } | ||
1349 | spin_unlock(&mddev->write_lock); | ||
1277 | 1350 | ||
1278 | dprintk(KERN_INFO | 1351 | dprintk(KERN_INFO |
1279 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1352 | "md: updating %s RAID superblock on device (in sync %d)\n", |
1280 | mdname(mddev),mddev->in_sync); | 1353 | mdname(mddev),mddev->in_sync); |
1281 | 1354 | ||
1282 | err = 0; | 1355 | err = bitmap_update_sb(mddev->bitmap); |
1283 | ITERATE_RDEV(mddev,rdev,tmp) { | 1356 | ITERATE_RDEV(mddev,rdev,tmp) { |
1284 | char b[BDEVNAME_SIZE]; | 1357 | char b[BDEVNAME_SIZE]; |
1285 | dprintk(KERN_INFO "md: "); | 1358 | dprintk(KERN_INFO "md: "); |
@@ -1288,22 +1361,32 @@ repeat: | |||
1288 | 1361 | ||
1289 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1362 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1290 | if (!rdev->faulty) { | 1363 | if (!rdev->faulty) { |
1291 | err += write_disk_sb(rdev); | 1364 | md_super_write(mddev,rdev, |
1365 | rdev->sb_offset<<1, MD_SB_BYTES, | ||
1366 | rdev->sb_page); | ||
1367 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1368 | bdevname(rdev->bdev,b), | ||
1369 | (unsigned long long)rdev->sb_offset); | ||
1370 | |||
1292 | } else | 1371 | } else |
1293 | dprintk(")\n"); | 1372 | dprintk(")\n"); |
1294 | if (!err && mddev->level == LEVEL_MULTIPATH) | 1373 | if (mddev->level == LEVEL_MULTIPATH) |
1295 | /* only need to write one superblock... */ | 1374 | /* only need to write one superblock... */ |
1296 | break; | 1375 | break; |
1297 | } | 1376 | } |
1298 | if (err) { | 1377 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); |
1299 | if (--count) { | 1378 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1300 | printk(KERN_ERR "md: errors occurred during superblock" | 1379 | |
1301 | " update, repeating\n"); | 1380 | spin_lock(&mddev->write_lock); |
1302 | goto repeat; | 1381 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1303 | } | 1382 | /* have to write it out again */ |
1304 | printk(KERN_ERR \ | 1383 | spin_unlock(&mddev->write_lock); |
1305 | "md: excessive errors occurred during superblock update, exiting\n"); | 1384 | goto repeat; |
1306 | } | 1385 | } |
1386 | mddev->sb_dirty = 0; | ||
1387 | spin_unlock(&mddev->write_lock); | ||
1388 | wake_up(&mddev->sb_wait); | ||
1389 | |||
1307 | } | 1390 | } |
1308 | 1391 | ||
1309 | /* | 1392 | /* |
@@ -1607,12 +1690,19 @@ static int do_md_run(mddev_t * mddev) | |||
1607 | 1690 | ||
1608 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 1691 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
1609 | 1692 | ||
1610 | err = mddev->pers->run(mddev); | 1693 | /* before we start the array running, initialise the bitmap */ |
1694 | err = bitmap_create(mddev); | ||
1695 | if (err) | ||
1696 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | ||
1697 | mdname(mddev), err); | ||
1698 | else | ||
1699 | err = mddev->pers->run(mddev); | ||
1611 | if (err) { | 1700 | if (err) { |
1612 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 1701 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
1613 | module_put(mddev->pers->owner); | 1702 | module_put(mddev->pers->owner); |
1614 | mddev->pers = NULL; | 1703 | mddev->pers = NULL; |
1615 | return -EINVAL; | 1704 | bitmap_destroy(mddev); |
1705 | return err; | ||
1616 | } | 1706 | } |
1617 | atomic_set(&mddev->writes_pending,0); | 1707 | atomic_set(&mddev->writes_pending,0); |
1618 | mddev->safemode = 0; | 1708 | mddev->safemode = 0; |
@@ -1725,6 +1815,14 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1725 | if (ro) | 1815 | if (ro) |
1726 | set_disk_ro(disk, 1); | 1816 | set_disk_ro(disk, 1); |
1727 | } | 1817 | } |
1818 | |||
1819 | bitmap_destroy(mddev); | ||
1820 | if (mddev->bitmap_file) { | ||
1821 | atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); | ||
1822 | fput(mddev->bitmap_file); | ||
1823 | mddev->bitmap_file = NULL; | ||
1824 | } | ||
1825 | |||
1728 | /* | 1826 | /* |
1729 | * Free resources if final stop | 1827 | * Free resources if final stop |
1730 | */ | 1828 | */ |
@@ -1983,6 +2081,42 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
1983 | return 0; | 2081 | return 0; |
1984 | } | 2082 | } |
1985 | 2083 | ||
2084 | static int get_bitmap_file(mddev_t * mddev, void * arg) | ||
2085 | { | ||
2086 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | ||
2087 | char *ptr, *buf = NULL; | ||
2088 | int err = -ENOMEM; | ||
2089 | |||
2090 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
2091 | if (!file) | ||
2092 | goto out; | ||
2093 | |||
2094 | /* bitmap disabled, zero the first byte and copy out */ | ||
2095 | if (!mddev->bitmap || !mddev->bitmap->file) { | ||
2096 | file->pathname[0] = '\0'; | ||
2097 | goto copy_out; | ||
2098 | } | ||
2099 | |||
2100 | buf = kmalloc(sizeof(file->pathname), GFP_KERNEL); | ||
2101 | if (!buf) | ||
2102 | goto out; | ||
2103 | |||
2104 | ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); | ||
2105 | if (!ptr) | ||
2106 | goto out; | ||
2107 | |||
2108 | strcpy(file->pathname, ptr); | ||
2109 | |||
2110 | copy_out: | ||
2111 | err = 0; | ||
2112 | if (copy_to_user(arg, file, sizeof(*file))) | ||
2113 | err = -EFAULT; | ||
2114 | out: | ||
2115 | kfree(buf); | ||
2116 | kfree(file); | ||
2117 | return err; | ||
2118 | } | ||
2119 | |||
1986 | static int get_disk_info(mddev_t * mddev, void __user * arg) | 2120 | static int get_disk_info(mddev_t * mddev, void __user * arg) |
1987 | { | 2121 | { |
1988 | mdu_disk_info_t info; | 2122 | mdu_disk_info_t info; |
@@ -2078,11 +2212,25 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2078 | PTR_ERR(rdev)); | 2212 | PTR_ERR(rdev)); |
2079 | return PTR_ERR(rdev); | 2213 | return PTR_ERR(rdev); |
2080 | } | 2214 | } |
2215 | /* set save_raid_disk if appropriate */ | ||
2216 | if (!mddev->persistent) { | ||
2217 | if (info->state & (1<<MD_DISK_SYNC) && | ||
2218 | info->raid_disk < mddev->raid_disks) | ||
2219 | rdev->raid_disk = info->raid_disk; | ||
2220 | else | ||
2221 | rdev->raid_disk = -1; | ||
2222 | } else | ||
2223 | super_types[mddev->major_version]. | ||
2224 | validate_super(mddev, rdev); | ||
2225 | rdev->saved_raid_disk = rdev->raid_disk; | ||
2226 | |||
2081 | rdev->in_sync = 0; /* just to be sure */ | 2227 | rdev->in_sync = 0; /* just to be sure */ |
2082 | rdev->raid_disk = -1; | 2228 | rdev->raid_disk = -1; |
2083 | err = bind_rdev_to_array(rdev, mddev); | 2229 | err = bind_rdev_to_array(rdev, mddev); |
2084 | if (err) | 2230 | if (err) |
2085 | export_rdev(rdev); | 2231 | export_rdev(rdev); |
2232 | |||
2233 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
2086 | if (mddev->thread) | 2234 | if (mddev->thread) |
2087 | md_wakeup_thread(mddev->thread); | 2235 | md_wakeup_thread(mddev->thread); |
2088 | return err; | 2236 | return err; |
@@ -2256,6 +2404,49 @@ abort_export: | |||
2256 | return err; | 2404 | return err; |
2257 | } | 2405 | } |
2258 | 2406 | ||
2407 | /* similar to deny_write_access, but accounts for our holding a reference | ||
2408 | * to the file ourselves */ | ||
2409 | static int deny_bitmap_write_access(struct file * file) | ||
2410 | { | ||
2411 | struct inode *inode = file->f_mapping->host; | ||
2412 | |||
2413 | spin_lock(&inode->i_lock); | ||
2414 | if (atomic_read(&inode->i_writecount) > 1) { | ||
2415 | spin_unlock(&inode->i_lock); | ||
2416 | return -ETXTBSY; | ||
2417 | } | ||
2418 | atomic_set(&inode->i_writecount, -1); | ||
2419 | spin_unlock(&inode->i_lock); | ||
2420 | |||
2421 | return 0; | ||
2422 | } | ||
2423 | |||
2424 | static int set_bitmap_file(mddev_t *mddev, int fd) | ||
2425 | { | ||
2426 | int err; | ||
2427 | |||
2428 | if (mddev->pers) | ||
2429 | return -EBUSY; | ||
2430 | |||
2431 | mddev->bitmap_file = fget(fd); | ||
2432 | |||
2433 | if (mddev->bitmap_file == NULL) { | ||
2434 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | ||
2435 | mdname(mddev)); | ||
2436 | return -EBADF; | ||
2437 | } | ||
2438 | |||
2439 | err = deny_bitmap_write_access(mddev->bitmap_file); | ||
2440 | if (err) { | ||
2441 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
2442 | mdname(mddev)); | ||
2443 | fput(mddev->bitmap_file); | ||
2444 | mddev->bitmap_file = NULL; | ||
2445 | } else | ||
2446 | mddev->bitmap_offset = 0; /* file overrides offset */ | ||
2447 | return err; | ||
2448 | } | ||
2449 | |||
2259 | /* | 2450 | /* |
2260 | * set_array_info is used two different ways | 2451 | * set_array_info is used two different ways |
2261 | * The original usage is when creating a new array. | 2452 | * The original usage is when creating a new array. |
@@ -2567,8 +2758,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2567 | /* | 2758 | /* |
2568 | * Commands querying/configuring an existing array: | 2759 | * Commands querying/configuring an existing array: |
2569 | */ | 2760 | */ |
2570 | /* if we are initialised yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ | 2761 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, |
2571 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { | 2762 | * RUN_ARRAY, and SET_BITMAP_FILE are allowed */ |
2763 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY | ||
2764 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE) { | ||
2572 | err = -ENODEV; | 2765 | err = -ENODEV; |
2573 | goto abort_unlock; | 2766 | goto abort_unlock; |
2574 | } | 2767 | } |
@@ -2582,6 +2775,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2582 | err = get_array_info(mddev, argp); | 2775 | err = get_array_info(mddev, argp); |
2583 | goto done_unlock; | 2776 | goto done_unlock; |
2584 | 2777 | ||
2778 | case GET_BITMAP_FILE: | ||
2779 | err = get_bitmap_file(mddev, (void *)arg); | ||
2780 | goto done_unlock; | ||
2781 | |||
2585 | case GET_DISK_INFO: | 2782 | case GET_DISK_INFO: |
2586 | err = get_disk_info(mddev, argp); | 2783 | err = get_disk_info(mddev, argp); |
2587 | goto done_unlock; | 2784 | goto done_unlock; |
@@ -2662,6 +2859,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2662 | err = do_md_run (mddev); | 2859 | err = do_md_run (mddev); |
2663 | goto done_unlock; | 2860 | goto done_unlock; |
2664 | 2861 | ||
2862 | case SET_BITMAP_FILE: | ||
2863 | err = set_bitmap_file(mddev, (int)arg); | ||
2864 | goto done_unlock; | ||
2865 | |||
2665 | default: | 2866 | default: |
2666 | if (_IOC_TYPE(cmd) == MD_MAJOR) | 2867 | if (_IOC_TYPE(cmd) == MD_MAJOR) |
2667 | printk(KERN_WARNING "md: %s(pid %d) used" | 2868 | printk(KERN_WARNING "md: %s(pid %d) used" |
@@ -2773,10 +2974,10 @@ static int md_thread(void * arg) | |||
2773 | while (thread->run) { | 2974 | while (thread->run) { |
2774 | void (*run)(mddev_t *); | 2975 | void (*run)(mddev_t *); |
2775 | 2976 | ||
2776 | wait_event_interruptible(thread->wqueue, | 2977 | wait_event_interruptible_timeout(thread->wqueue, |
2777 | test_bit(THREAD_WAKEUP, &thread->flags)); | 2978 | test_bit(THREAD_WAKEUP, &thread->flags), |
2778 | if (current->flags & PF_FREEZE) | 2979 | thread->timeout); |
2779 | refrigerator(PF_FREEZE); | 2980 | try_to_freeze(); |
2780 | 2981 | ||
2781 | clear_bit(THREAD_WAKEUP, &thread->flags); | 2982 | clear_bit(THREAD_WAKEUP, &thread->flags); |
2782 | 2983 | ||
@@ -2820,6 +3021,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
2820 | thread->run = run; | 3021 | thread->run = run; |
2821 | thread->mddev = mddev; | 3022 | thread->mddev = mddev; |
2822 | thread->name = name; | 3023 | thread->name = name; |
3024 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
2823 | ret = kernel_thread(md_thread, thread, 0); | 3025 | ret = kernel_thread(md_thread, thread, 0); |
2824 | if (ret < 0) { | 3026 | if (ret < 0) { |
2825 | kfree(thread); | 3027 | kfree(thread); |
@@ -2858,13 +3060,13 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2858 | 3060 | ||
2859 | if (!rdev || rdev->faulty) | 3061 | if (!rdev || rdev->faulty) |
2860 | return; | 3062 | return; |
2861 | 3063 | /* | |
2862 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 3064 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
2863 | mdname(mddev), | 3065 | mdname(mddev), |
2864 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), | 3066 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), |
2865 | __builtin_return_address(0),__builtin_return_address(1), | 3067 | __builtin_return_address(0),__builtin_return_address(1), |
2866 | __builtin_return_address(2),__builtin_return_address(3)); | 3068 | __builtin_return_address(2),__builtin_return_address(3)); |
2867 | 3069 | */ | |
2868 | if (!mddev->pers->error_handler) | 3070 | if (!mddev->pers->error_handler) |
2869 | return; | 3071 | return; |
2870 | mddev->pers->error_handler(mddev,rdev); | 3072 | mddev->pers->error_handler(mddev,rdev); |
@@ -3018,6 +3220,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3018 | struct list_head *tmp2; | 3220 | struct list_head *tmp2; |
3019 | mdk_rdev_t *rdev; | 3221 | mdk_rdev_t *rdev; |
3020 | int i; | 3222 | int i; |
3223 | struct bitmap *bitmap; | ||
3021 | 3224 | ||
3022 | if (v == (void*)1) { | 3225 | if (v == (void*)1) { |
3023 | seq_printf(seq, "Personalities : "); | 3226 | seq_printf(seq, "Personalities : "); |
@@ -3070,10 +3273,35 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3070 | if (mddev->pers) { | 3273 | if (mddev->pers) { |
3071 | mddev->pers->status (seq, mddev); | 3274 | mddev->pers->status (seq, mddev); |
3072 | seq_printf(seq, "\n "); | 3275 | seq_printf(seq, "\n "); |
3073 | if (mddev->curr_resync > 2) | 3276 | if (mddev->curr_resync > 2) { |
3074 | status_resync (seq, mddev); | 3277 | status_resync (seq, mddev); |
3075 | else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 3278 | seq_printf(seq, "\n "); |
3076 | seq_printf(seq, " resync=DELAYED"); | 3279 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
3280 | seq_printf(seq, " resync=DELAYED\n "); | ||
3281 | } else | ||
3282 | seq_printf(seq, "\n "); | ||
3283 | |||
3284 | if ((bitmap = mddev->bitmap)) { | ||
3285 | unsigned long chunk_kb; | ||
3286 | unsigned long flags; | ||
3287 | spin_lock_irqsave(&bitmap->lock, flags); | ||
3288 | chunk_kb = bitmap->chunksize >> 10; | ||
3289 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
3290 | "%lu%s chunk", | ||
3291 | bitmap->pages - bitmap->missing_pages, | ||
3292 | bitmap->pages, | ||
3293 | (bitmap->pages - bitmap->missing_pages) | ||
3294 | << (PAGE_SHIFT - 10), | ||
3295 | chunk_kb ? chunk_kb : bitmap->chunksize, | ||
3296 | chunk_kb ? "KB" : "B"); | ||
3297 | if (bitmap->file) { | ||
3298 | seq_printf(seq, ", file: "); | ||
3299 | seq_path(seq, bitmap->file->f_vfsmnt, | ||
3300 | bitmap->file->f_dentry," \t\n"); | ||
3301 | } | ||
3302 | |||
3303 | seq_printf(seq, "\n"); | ||
3304 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
3077 | } | 3305 | } |
3078 | 3306 | ||
3079 | seq_printf(seq, "\n"); | 3307 | seq_printf(seq, "\n"); |
@@ -3176,19 +3404,28 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
3176 | } | 3404 | } |
3177 | 3405 | ||
3178 | 3406 | ||
3179 | void md_write_start(mddev_t *mddev) | 3407 | /* md_write_start(mddev, bi) |
3408 | * If we need to update some array metadata (e.g. 'active' flag | ||
3409 | * in superblock) before writing, schedule a superblock update | ||
3410 | * and wait for it to complete. | ||
3411 | */ | ||
3412 | void md_write_start(mddev_t *mddev, struct bio *bi) | ||
3180 | { | 3413 | { |
3181 | if (!atomic_read(&mddev->writes_pending)) { | 3414 | DEFINE_WAIT(w); |
3182 | mddev_lock_uninterruptible(mddev); | 3415 | if (bio_data_dir(bi) != WRITE) |
3416 | return; | ||
3417 | |||
3418 | atomic_inc(&mddev->writes_pending); | ||
3419 | if (mddev->in_sync) { | ||
3420 | spin_lock(&mddev->write_lock); | ||
3183 | if (mddev->in_sync) { | 3421 | if (mddev->in_sync) { |
3184 | mddev->in_sync = 0; | 3422 | mddev->in_sync = 0; |
3185 | del_timer(&mddev->safemode_timer); | 3423 | mddev->sb_dirty = 1; |
3186 | md_update_sb(mddev); | 3424 | md_wakeup_thread(mddev->thread); |
3187 | } | 3425 | } |
3188 | atomic_inc(&mddev->writes_pending); | 3426 | spin_unlock(&mddev->write_lock); |
3189 | mddev_unlock(mddev); | 3427 | } |
3190 | } else | 3428 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3191 | atomic_inc(&mddev->writes_pending); | ||
3192 | } | 3429 | } |
3193 | 3430 | ||
3194 | void md_write_end(mddev_t *mddev) | 3431 | void md_write_end(mddev_t *mddev) |
@@ -3201,37 +3438,6 @@ void md_write_end(mddev_t *mddev) | |||
3201 | } | 3438 | } |
3202 | } | 3439 | } |
3203 | 3440 | ||
3204 | static inline void md_enter_safemode(mddev_t *mddev) | ||
3205 | { | ||
3206 | if (!mddev->safemode) return; | ||
3207 | if (mddev->safemode == 2 && | ||
3208 | (atomic_read(&mddev->writes_pending) || mddev->in_sync || | ||
3209 | mddev->recovery_cp != MaxSector)) | ||
3210 | return; /* avoid the lock */ | ||
3211 | mddev_lock_uninterruptible(mddev); | ||
3212 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | ||
3213 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | ||
3214 | mddev->in_sync = 1; | ||
3215 | md_update_sb(mddev); | ||
3216 | } | ||
3217 | mddev_unlock(mddev); | ||
3218 | |||
3219 | if (mddev->safemode == 1) | ||
3220 | mddev->safemode = 0; | ||
3221 | } | ||
3222 | |||
3223 | void md_handle_safemode(mddev_t *mddev) | ||
3224 | { | ||
3225 | if (signal_pending(current)) { | ||
3226 | printk(KERN_INFO "md: %s in immediate safe mode\n", | ||
3227 | mdname(mddev)); | ||
3228 | mddev->safemode = 2; | ||
3229 | flush_signals(current); | ||
3230 | } | ||
3231 | md_enter_safemode(mddev); | ||
3232 | } | ||
3233 | |||
3234 | |||
3235 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 3441 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
3236 | 3442 | ||
3237 | #define SYNC_MARKS 10 | 3443 | #define SYNC_MARKS 10 |
@@ -3241,12 +3447,13 @@ static void md_do_sync(mddev_t *mddev) | |||
3241 | mddev_t *mddev2; | 3447 | mddev_t *mddev2; |
3242 | unsigned int currspeed = 0, | 3448 | unsigned int currspeed = 0, |
3243 | window; | 3449 | window; |
3244 | sector_t max_sectors,j; | 3450 | sector_t max_sectors,j, io_sectors; |
3245 | unsigned long mark[SYNC_MARKS]; | 3451 | unsigned long mark[SYNC_MARKS]; |
3246 | sector_t mark_cnt[SYNC_MARKS]; | 3452 | sector_t mark_cnt[SYNC_MARKS]; |
3247 | int last_mark,m; | 3453 | int last_mark,m; |
3248 | struct list_head *tmp; | 3454 | struct list_head *tmp; |
3249 | sector_t last_check; | 3455 | sector_t last_check; |
3456 | int skipped = 0; | ||
3250 | 3457 | ||
3251 | /* just incase thread restarts... */ | 3458 | /* just incase thread restarts... */ |
3252 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 3459 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
@@ -3312,7 +3519,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3312 | 3519 | ||
3313 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3520 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
3314 | /* resync follows the size requested by the personality, | 3521 | /* resync follows the size requested by the personality, |
3315 | * which default to physical size, but can be virtual size | 3522 | * which defaults to physical size, but can be virtual size |
3316 | */ | 3523 | */ |
3317 | max_sectors = mddev->resync_max_sectors; | 3524 | max_sectors = mddev->resync_max_sectors; |
3318 | else | 3525 | else |
@@ -3327,13 +3534,15 @@ static void md_do_sync(mddev_t *mddev) | |||
3327 | sysctl_speed_limit_max); | 3534 | sysctl_speed_limit_max); |
3328 | 3535 | ||
3329 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 3536 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
3330 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3537 | /* we don't use the checkpoint if there's a bitmap */ |
3538 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap) | ||
3331 | j = mddev->recovery_cp; | 3539 | j = mddev->recovery_cp; |
3332 | else | 3540 | else |
3333 | j = 0; | 3541 | j = 0; |
3542 | io_sectors = 0; | ||
3334 | for (m = 0; m < SYNC_MARKS; m++) { | 3543 | for (m = 0; m < SYNC_MARKS; m++) { |
3335 | mark[m] = jiffies; | 3544 | mark[m] = jiffies; |
3336 | mark_cnt[m] = j; | 3545 | mark_cnt[m] = io_sectors; |
3337 | } | 3546 | } |
3338 | last_mark = 0; | 3547 | last_mark = 0; |
3339 | mddev->resync_mark = mark[last_mark]; | 3548 | mddev->resync_mark = mark[last_mark]; |
@@ -3358,21 +3567,29 @@ static void md_do_sync(mddev_t *mddev) | |||
3358 | } | 3567 | } |
3359 | 3568 | ||
3360 | while (j < max_sectors) { | 3569 | while (j < max_sectors) { |
3361 | int sectors; | 3570 | sector_t sectors; |
3362 | 3571 | ||
3363 | sectors = mddev->pers->sync_request(mddev, j, currspeed < sysctl_speed_limit_min); | 3572 | skipped = 0; |
3364 | if (sectors < 0) { | 3573 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
3574 | currspeed < sysctl_speed_limit_min); | ||
3575 | if (sectors == 0) { | ||
3365 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 3576 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
3366 | goto out; | 3577 | goto out; |
3367 | } | 3578 | } |
3368 | atomic_add(sectors, &mddev->recovery_active); | 3579 | |
3580 | if (!skipped) { /* actual IO requested */ | ||
3581 | io_sectors += sectors; | ||
3582 | atomic_add(sectors, &mddev->recovery_active); | ||
3583 | } | ||
3584 | |||
3369 | j += sectors; | 3585 | j += sectors; |
3370 | if (j>1) mddev->curr_resync = j; | 3586 | if (j>1) mddev->curr_resync = j; |
3371 | 3587 | ||
3372 | if (last_check + window > j || j == max_sectors) | 3588 | |
3589 | if (last_check + window > io_sectors || j == max_sectors) | ||
3373 | continue; | 3590 | continue; |
3374 | 3591 | ||
3375 | last_check = j; | 3592 | last_check = io_sectors; |
3376 | 3593 | ||
3377 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || | 3594 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || |
3378 | test_bit(MD_RECOVERY_ERR, &mddev->recovery)) | 3595 | test_bit(MD_RECOVERY_ERR, &mddev->recovery)) |
@@ -3386,7 +3603,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3386 | mddev->resync_mark = mark[next]; | 3603 | mddev->resync_mark = mark[next]; |
3387 | mddev->resync_mark_cnt = mark_cnt[next]; | 3604 | mddev->resync_mark_cnt = mark_cnt[next]; |
3388 | mark[next] = jiffies; | 3605 | mark[next] = jiffies; |
3389 | mark_cnt[next] = j - atomic_read(&mddev->recovery_active); | 3606 | mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active); |
3390 | last_mark = next; | 3607 | last_mark = next; |
3391 | } | 3608 | } |
3392 | 3609 | ||
@@ -3413,7 +3630,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3413 | mddev->queue->unplug_fn(mddev->queue); | 3630 | mddev->queue->unplug_fn(mddev->queue); |
3414 | cond_resched(); | 3631 | cond_resched(); |
3415 | 3632 | ||
3416 | currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1; | 3633 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 |
3634 | /((jiffies-mddev->resync_mark)/HZ +1) +1; | ||
3417 | 3635 | ||
3418 | if (currspeed > sysctl_speed_limit_min) { | 3636 | if (currspeed > sysctl_speed_limit_min) { |
3419 | if ((currspeed > sysctl_speed_limit_max) || | 3637 | if ((currspeed > sysctl_speed_limit_max) || |
@@ -3433,7 +3651,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3433 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 3651 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
3434 | 3652 | ||
3435 | /* tell personality that we are finished */ | 3653 | /* tell personality that we are finished */ |
3436 | mddev->pers->sync_request(mddev, max_sectors, 1); | 3654 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); |
3437 | 3655 | ||
3438 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 3656 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
3439 | mddev->curr_resync > 2 && | 3657 | mddev->curr_resync > 2 && |
@@ -3447,7 +3665,6 @@ static void md_do_sync(mddev_t *mddev) | |||
3447 | mddev->recovery_cp = MaxSector; | 3665 | mddev->recovery_cp = MaxSector; |
3448 | } | 3666 | } |
3449 | 3667 | ||
3450 | md_enter_safemode(mddev); | ||
3451 | skip: | 3668 | skip: |
3452 | mddev->curr_resync = 0; | 3669 | mddev->curr_resync = 0; |
3453 | wake_up(&resync_wait); | 3670 | wake_up(&resync_wait); |
@@ -3484,20 +3701,48 @@ void md_check_recovery(mddev_t *mddev) | |||
3484 | struct list_head *rtmp; | 3701 | struct list_head *rtmp; |
3485 | 3702 | ||
3486 | 3703 | ||
3487 | dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); | 3704 | if (mddev->bitmap) |
3705 | bitmap_daemon_work(mddev->bitmap); | ||
3488 | 3706 | ||
3489 | if (mddev->ro) | 3707 | if (mddev->ro) |
3490 | return; | 3708 | return; |
3709 | |||
3710 | if (signal_pending(current)) { | ||
3711 | if (mddev->pers->sync_request) { | ||
3712 | printk(KERN_INFO "md: %s in immediate safe mode\n", | ||
3713 | mdname(mddev)); | ||
3714 | mddev->safemode = 2; | ||
3715 | } | ||
3716 | flush_signals(current); | ||
3717 | } | ||
3718 | |||
3491 | if ( ! ( | 3719 | if ( ! ( |
3492 | mddev->sb_dirty || | 3720 | mddev->sb_dirty || |
3493 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 3721 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
3494 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) | 3722 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
3723 | (mddev->safemode == 1) || | ||
3724 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) | ||
3725 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) | ||
3495 | )) | 3726 | )) |
3496 | return; | 3727 | return; |
3728 | |||
3497 | if (mddev_trylock(mddev)==0) { | 3729 | if (mddev_trylock(mddev)==0) { |
3498 | int spares =0; | 3730 | int spares =0; |
3731 | |||
3732 | spin_lock(&mddev->write_lock); | ||
3733 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | ||
3734 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | ||
3735 | mddev->in_sync = 1; | ||
3736 | mddev->sb_dirty = 1; | ||
3737 | } | ||
3738 | if (mddev->safemode == 1) | ||
3739 | mddev->safemode = 0; | ||
3740 | spin_unlock(&mddev->write_lock); | ||
3741 | |||
3499 | if (mddev->sb_dirty) | 3742 | if (mddev->sb_dirty) |
3500 | md_update_sb(mddev); | 3743 | md_update_sb(mddev); |
3744 | |||
3745 | |||
3501 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | 3746 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && |
3502 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { | 3747 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { |
3503 | /* resync/recovery still happening */ | 3748 | /* resync/recovery still happening */ |
@@ -3515,6 +3760,14 @@ void md_check_recovery(mddev_t *mddev) | |||
3515 | mddev->pers->spare_active(mddev); | 3760 | mddev->pers->spare_active(mddev); |
3516 | } | 3761 | } |
3517 | md_update_sb(mddev); | 3762 | md_update_sb(mddev); |
3763 | |||
3764 | /* if array is no-longer degraded, then any saved_raid_disk | ||
3765 | * information must be scrapped | ||
3766 | */ | ||
3767 | if (!mddev->degraded) | ||
3768 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
3769 | rdev->saved_raid_disk = -1; | ||
3770 | |||
3518 | mddev->recovery = 0; | 3771 | mddev->recovery = 0; |
3519 | /* flag recovery needed just to double check */ | 3772 | /* flag recovery needed just to double check */ |
3520 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3773 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -3557,6 +3810,13 @@ void md_check_recovery(mddev_t *mddev) | |||
3557 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 3810 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3558 | if (!spares) | 3811 | if (!spares) |
3559 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 3812 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
3813 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { | ||
3814 | /* We are adding a device or devices to an array | ||
3815 | * which has the bitmap stored on all devices. | ||
3816 | * So make sure all bitmap pages get written | ||
3817 | */ | ||
3818 | bitmap_write_all(mddev->bitmap); | ||
3819 | } | ||
3560 | mddev->sync_thread = md_register_thread(md_do_sync, | 3820 | mddev->sync_thread = md_register_thread(md_do_sync, |
3561 | mddev, | 3821 | mddev, |
3562 | "%s_resync"); | 3822 | "%s_resync"); |
@@ -3624,6 +3884,8 @@ static int __init md_init(void) | |||
3624 | " MD_SB_DISKS=%d\n", | 3884 | " MD_SB_DISKS=%d\n", |
3625 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 3885 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
3626 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); | 3886 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); |
3887 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR, | ||
3888 | BITMAP_MINOR); | ||
3627 | 3889 | ||
3628 | if (register_blkdev(MAJOR_NR, "md")) | 3890 | if (register_blkdev(MAJOR_NR, "md")) |
3629 | return -1; | 3891 | return -1; |
@@ -3739,7 +4001,6 @@ EXPORT_SYMBOL(md_error); | |||
3739 | EXPORT_SYMBOL(md_done_sync); | 4001 | EXPORT_SYMBOL(md_done_sync); |
3740 | EXPORT_SYMBOL(md_write_start); | 4002 | EXPORT_SYMBOL(md_write_start); |
3741 | EXPORT_SYMBOL(md_write_end); | 4003 | EXPORT_SYMBOL(md_write_end); |
3742 | EXPORT_SYMBOL(md_handle_safemode); | ||
3743 | EXPORT_SYMBOL(md_register_thread); | 4004 | EXPORT_SYMBOL(md_register_thread); |
3744 | EXPORT_SYMBOL(md_unregister_thread); | 4005 | EXPORT_SYMBOL(md_unregister_thread); |
3745 | EXPORT_SYMBOL(md_wakeup_thread); | 4006 | EXPORT_SYMBOL(md_wakeup_thread); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 2ae2d709cb15..2d2ca7fa0265 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -533,8 +533,7 @@ static int multipath_run (mddev_t *mddev) | |||
533 | out_free_conf: | 533 | out_free_conf: |
534 | if (conf->pool) | 534 | if (conf->pool) |
535 | mempool_destroy(conf->pool); | 535 | mempool_destroy(conf->pool); |
536 | if (conf->multipaths) | 536 | kfree(conf->multipaths); |
537 | kfree(conf->multipaths); | ||
538 | kfree(conf); | 537 | kfree(conf); |
539 | mddev->private = NULL; | 538 | mddev->private = NULL; |
540 | out: | 539 | out: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e7d934eca06f..e11dd14d0b43 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -371,10 +371,8 @@ static int raid0_run (mddev_t *mddev) | |||
371 | return 0; | 371 | return 0; |
372 | 372 | ||
373 | out_free_conf: | 373 | out_free_conf: |
374 | if (conf->strip_zone) | 374 | kfree(conf->strip_zone); |
375 | kfree(conf->strip_zone); | 375 | kfree(conf->devlist); |
376 | if (conf->devlist) | ||
377 | kfree (conf->devlist); | ||
378 | kfree(conf); | 376 | kfree(conf); |
379 | mddev->private = NULL; | 377 | mddev->private = NULL; |
380 | out: | 378 | out: |
@@ -386,11 +384,11 @@ static int raid0_stop (mddev_t *mddev) | |||
386 | raid0_conf_t *conf = mddev_to_conf(mddev); | 384 | raid0_conf_t *conf = mddev_to_conf(mddev); |
387 | 385 | ||
388 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 386 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
389 | kfree (conf->hash_table); | 387 | kfree(conf->hash_table); |
390 | conf->hash_table = NULL; | 388 | conf->hash_table = NULL; |
391 | kfree (conf->strip_zone); | 389 | kfree(conf->strip_zone); |
392 | conf->strip_zone = NULL; | 390 | conf->strip_zone = NULL; |
393 | kfree (conf); | 391 | kfree(conf); |
394 | mddev->private = NULL; | 392 | mddev->private = NULL; |
395 | 393 | ||
396 | return 0; | 394 | return 0; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1db5de52d376..ff1dbec864af 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -12,6 +12,15 @@ | |||
12 | * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk> | 12 | * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk> |
13 | * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> | 13 | * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> |
14 | * | 14 | * |
15 | * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support | ||
16 | * bitmapped intelligence in resync: | ||
17 | * | ||
18 | * - bitmap marked during normal i/o | ||
19 | * - bitmap used to skip nondirty blocks during sync | ||
20 | * | ||
21 | * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology: | ||
22 | * - persistent bitmap code | ||
23 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | 24 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 25 | * it under the terms of the GNU General Public License as published by |
17 | * the Free Software Foundation; either version 2, or (at your option) | 26 | * the Free Software Foundation; either version 2, or (at your option) |
@@ -22,7 +31,16 @@ | |||
22 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 31 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 32 | */ |
24 | 33 | ||
34 | #include "dm-bio-list.h" | ||
25 | #include <linux/raid/raid1.h> | 35 | #include <linux/raid/raid1.h> |
36 | #include <linux/raid/bitmap.h> | ||
37 | |||
38 | #define DEBUG 0 | ||
39 | #if DEBUG | ||
40 | #define PRINTK(x...) printk(x) | ||
41 | #else | ||
42 | #define PRINTK(x...) | ||
43 | #endif | ||
26 | 44 | ||
27 | /* | 45 | /* |
28 | * Number of guaranteed r1bios in case of extreme VM load: | 46 | * Number of guaranteed r1bios in case of extreme VM load: |
@@ -287,9 +305,11 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
287 | /* | 305 | /* |
288 | * this branch is our 'one mirror IO has finished' event handler: | 306 | * this branch is our 'one mirror IO has finished' event handler: |
289 | */ | 307 | */ |
290 | if (!uptodate) | 308 | if (!uptodate) { |
291 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 309 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); |
292 | else | 310 | /* an I/O failed, we can't clear the bitmap */ |
311 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
312 | } else | ||
293 | /* | 313 | /* |
294 | * Set R1BIO_Uptodate in our master bio, so that | 314 | * Set R1BIO_Uptodate in our master bio, so that |
295 | * we will return a good error code for to the higher | 315 | * we will return a good error code for to the higher |
@@ -309,6 +329,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
309 | * already. | 329 | * already. |
310 | */ | 330 | */ |
311 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 331 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
332 | /* clear the bitmap if all writes complete successfully */ | ||
333 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
334 | r1_bio->sectors, | ||
335 | !test_bit(R1BIO_Degraded, &r1_bio->state)); | ||
312 | md_write_end(r1_bio->mddev); | 336 | md_write_end(r1_bio->mddev); |
313 | raid_end_bio_io(r1_bio); | 337 | raid_end_bio_io(r1_bio); |
314 | } | 338 | } |
@@ -458,7 +482,10 @@ static void unplug_slaves(mddev_t *mddev) | |||
458 | 482 | ||
459 | static void raid1_unplug(request_queue_t *q) | 483 | static void raid1_unplug(request_queue_t *q) |
460 | { | 484 | { |
461 | unplug_slaves(q->queuedata); | 485 | mddev_t *mddev = q->queuedata; |
486 | |||
487 | unplug_slaves(mddev); | ||
488 | md_wakeup_thread(mddev->thread); | ||
462 | } | 489 | } |
463 | 490 | ||
464 | static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, | 491 | static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, |
@@ -501,16 +528,16 @@ static void device_barrier(conf_t *conf, sector_t sect) | |||
501 | { | 528 | { |
502 | spin_lock_irq(&conf->resync_lock); | 529 | spin_lock_irq(&conf->resync_lock); |
503 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), | 530 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), |
504 | conf->resync_lock, unplug_slaves(conf->mddev)); | 531 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
505 | 532 | ||
506 | if (!conf->barrier++) { | 533 | if (!conf->barrier++) { |
507 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 534 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, |
508 | conf->resync_lock, unplug_slaves(conf->mddev)); | 535 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
509 | if (conf->nr_pending) | 536 | if (conf->nr_pending) |
510 | BUG(); | 537 | BUG(); |
511 | } | 538 | } |
512 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, | 539 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, |
513 | conf->resync_lock, unplug_slaves(conf->mddev)); | 540 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
514 | conf->next_resync = sect; | 541 | conf->next_resync = sect; |
515 | spin_unlock_irq(&conf->resync_lock); | 542 | spin_unlock_irq(&conf->resync_lock); |
516 | } | 543 | } |
@@ -522,14 +549,20 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
522 | mirror_info_t *mirror; | 549 | mirror_info_t *mirror; |
523 | r1bio_t *r1_bio; | 550 | r1bio_t *r1_bio; |
524 | struct bio *read_bio; | 551 | struct bio *read_bio; |
525 | int i, disks; | 552 | int i, targets = 0, disks; |
526 | mdk_rdev_t *rdev; | 553 | mdk_rdev_t *rdev; |
554 | struct bitmap *bitmap = mddev->bitmap; | ||
555 | unsigned long flags; | ||
556 | struct bio_list bl; | ||
557 | |||
527 | 558 | ||
528 | /* | 559 | /* |
529 | * Register the new request and wait if the reconstruction | 560 | * Register the new request and wait if the reconstruction |
530 | * thread has put up a bar for new requests. | 561 | * thread has put up a bar for new requests. |
531 | * Continue immediately if no resync is active currently. | 562 | * Continue immediately if no resync is active currently. |
532 | */ | 563 | */ |
564 | md_write_start(mddev, bio); /* wait on superblock update early */ | ||
565 | |||
533 | spin_lock_irq(&conf->resync_lock); | 566 | spin_lock_irq(&conf->resync_lock); |
534 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); | 567 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); |
535 | conf->nr_pending++; | 568 | conf->nr_pending++; |
@@ -552,7 +585,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
552 | 585 | ||
553 | r1_bio->master_bio = bio; | 586 | r1_bio->master_bio = bio; |
554 | r1_bio->sectors = bio->bi_size >> 9; | 587 | r1_bio->sectors = bio->bi_size >> 9; |
555 | 588 | r1_bio->state = 0; | |
556 | r1_bio->mddev = mddev; | 589 | r1_bio->mddev = mddev; |
557 | r1_bio->sector = bio->bi_sector; | 590 | r1_bio->sector = bio->bi_sector; |
558 | 591 | ||
@@ -595,6 +628,13 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
595 | * bios[x] to bio | 628 | * bios[x] to bio |
596 | */ | 629 | */ |
597 | disks = conf->raid_disks; | 630 | disks = conf->raid_disks; |
631 | #if 0 | ||
632 | { static int first=1; | ||
633 | if (first) printk("First Write sector %llu disks %d\n", | ||
634 | (unsigned long long)r1_bio->sector, disks); | ||
635 | first = 0; | ||
636 | } | ||
637 | #endif | ||
598 | rcu_read_lock(); | 638 | rcu_read_lock(); |
599 | for (i = 0; i < disks; i++) { | 639 | for (i = 0; i < disks; i++) { |
600 | if ((rdev=conf->mirrors[i].rdev) != NULL && | 640 | if ((rdev=conf->mirrors[i].rdev) != NULL && |
@@ -605,13 +645,21 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
605 | r1_bio->bios[i] = NULL; | 645 | r1_bio->bios[i] = NULL; |
606 | } else | 646 | } else |
607 | r1_bio->bios[i] = bio; | 647 | r1_bio->bios[i] = bio; |
648 | targets++; | ||
608 | } else | 649 | } else |
609 | r1_bio->bios[i] = NULL; | 650 | r1_bio->bios[i] = NULL; |
610 | } | 651 | } |
611 | rcu_read_unlock(); | 652 | rcu_read_unlock(); |
612 | 653 | ||
613 | atomic_set(&r1_bio->remaining, 1); | 654 | if (targets < conf->raid_disks) { |
614 | md_write_start(mddev); | 655 | /* array is degraded, we will not clear the bitmap |
656 | * on I/O completion (see raid1_end_write_request) */ | ||
657 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
658 | } | ||
659 | |||
660 | atomic_set(&r1_bio->remaining, 0); | ||
661 | |||
662 | bio_list_init(&bl); | ||
615 | for (i = 0; i < disks; i++) { | 663 | for (i = 0; i < disks; i++) { |
616 | struct bio *mbio; | 664 | struct bio *mbio; |
617 | if (!r1_bio->bios[i]) | 665 | if (!r1_bio->bios[i]) |
@@ -627,14 +675,23 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
627 | mbio->bi_private = r1_bio; | 675 | mbio->bi_private = r1_bio; |
628 | 676 | ||
629 | atomic_inc(&r1_bio->remaining); | 677 | atomic_inc(&r1_bio->remaining); |
630 | generic_make_request(mbio); | ||
631 | } | ||
632 | 678 | ||
633 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 679 | bio_list_add(&bl, mbio); |
634 | md_write_end(mddev); | ||
635 | raid_end_bio_io(r1_bio); | ||
636 | } | 680 | } |
637 | 681 | ||
682 | bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors); | ||
683 | spin_lock_irqsave(&conf->device_lock, flags); | ||
684 | bio_list_merge(&conf->pending_bio_list, &bl); | ||
685 | bio_list_init(&bl); | ||
686 | |||
687 | blk_plug_device(mddev->queue); | ||
688 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
689 | |||
690 | #if 0 | ||
691 | while ((bio = bio_list_pop(&bl)) != NULL) | ||
692 | generic_make_request(bio); | ||
693 | #endif | ||
694 | |||
638 | return 0; | 695 | return 0; |
639 | } | 696 | } |
640 | 697 | ||
@@ -714,7 +771,7 @@ static void close_sync(conf_t *conf) | |||
714 | { | 771 | { |
715 | spin_lock_irq(&conf->resync_lock); | 772 | spin_lock_irq(&conf->resync_lock); |
716 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, | 773 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, |
717 | conf->resync_lock, unplug_slaves(conf->mddev)); | 774 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
718 | spin_unlock_irq(&conf->resync_lock); | 775 | spin_unlock_irq(&conf->resync_lock); |
719 | 776 | ||
720 | if (conf->barrier) BUG(); | 777 | if (conf->barrier) BUG(); |
@@ -754,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
754 | { | 811 | { |
755 | conf_t *conf = mddev->private; | 812 | conf_t *conf = mddev->private; |
756 | int found = 0; | 813 | int found = 0; |
757 | int mirror; | 814 | int mirror = 0; |
758 | mirror_info_t *p; | 815 | mirror_info_t *p; |
759 | 816 | ||
817 | if (rdev->saved_raid_disk >= 0 && | ||
818 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) | ||
819 | mirror = rdev->saved_raid_disk; | ||
760 | for (mirror=0; mirror < mddev->raid_disks; mirror++) | 820 | for (mirror=0; mirror < mddev->raid_disks; mirror++) |
761 | if ( !(p=conf->mirrors+mirror)->rdev) { | 821 | if ( !(p=conf->mirrors+mirror)->rdev) { |
762 | 822 | ||
@@ -773,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
773 | p->head_position = 0; | 833 | p->head_position = 0; |
774 | rdev->raid_disk = mirror; | 834 | rdev->raid_disk = mirror; |
775 | found = 1; | 835 | found = 1; |
836 | if (rdev->saved_raid_disk != mirror) | ||
837 | conf->fullsync = 1; | ||
776 | p->rdev = rdev; | 838 | p->rdev = rdev; |
777 | break; | 839 | break; |
778 | } | 840 | } |
@@ -828,10 +890,11 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) | |||
828 | * or re-read if the read failed. | 890 | * or re-read if the read failed. |
829 | * We don't do much here, just schedule handling by raid1d | 891 | * We don't do much here, just schedule handling by raid1d |
830 | */ | 892 | */ |
831 | if (!uptodate) | 893 | if (!uptodate) { |
832 | md_error(r1_bio->mddev, | 894 | md_error(r1_bio->mddev, |
833 | conf->mirrors[r1_bio->read_disk].rdev); | 895 | conf->mirrors[r1_bio->read_disk].rdev); |
834 | else | 896 | set_bit(R1BIO_Degraded, &r1_bio->state); |
897 | } else | ||
835 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 898 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
836 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); | 899 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); |
837 | reschedule_retry(r1_bio); | 900 | reschedule_retry(r1_bio); |
@@ -855,8 +918,10 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) | |||
855 | mirror = i; | 918 | mirror = i; |
856 | break; | 919 | break; |
857 | } | 920 | } |
858 | if (!uptodate) | 921 | if (!uptodate) { |
859 | md_error(mddev, conf->mirrors[mirror].rdev); | 922 | md_error(mddev, conf->mirrors[mirror].rdev); |
923 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
924 | } | ||
860 | update_head_pos(mirror, r1_bio); | 925 | update_head_pos(mirror, r1_bio); |
861 | 926 | ||
862 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 927 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
@@ -876,6 +941,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
876 | 941 | ||
877 | bio = r1_bio->bios[r1_bio->read_disk]; | 942 | bio = r1_bio->bios[r1_bio->read_disk]; |
878 | 943 | ||
944 | /* | ||
945 | if (r1_bio->sector == 0) printk("First sync write startss\n"); | ||
946 | */ | ||
879 | /* | 947 | /* |
880 | * schedule writes | 948 | * schedule writes |
881 | */ | 949 | */ |
@@ -903,10 +971,12 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
903 | atomic_inc(&conf->mirrors[i].rdev->nr_pending); | 971 | atomic_inc(&conf->mirrors[i].rdev->nr_pending); |
904 | atomic_inc(&r1_bio->remaining); | 972 | atomic_inc(&r1_bio->remaining); |
905 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); | 973 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); |
974 | |||
906 | generic_make_request(wbio); | 975 | generic_make_request(wbio); |
907 | } | 976 | } |
908 | 977 | ||
909 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 978 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
979 | /* if we're here, all write(s) have completed, so clean up */ | ||
910 | md_done_sync(mddev, r1_bio->sectors, 1); | 980 | md_done_sync(mddev, r1_bio->sectors, 1); |
911 | put_buf(r1_bio); | 981 | put_buf(r1_bio); |
912 | } | 982 | } |
@@ -931,11 +1001,30 @@ static void raid1d(mddev_t *mddev) | |||
931 | mdk_rdev_t *rdev; | 1001 | mdk_rdev_t *rdev; |
932 | 1002 | ||
933 | md_check_recovery(mddev); | 1003 | md_check_recovery(mddev); |
934 | md_handle_safemode(mddev); | ||
935 | 1004 | ||
936 | for (;;) { | 1005 | for (;;) { |
937 | char b[BDEVNAME_SIZE]; | 1006 | char b[BDEVNAME_SIZE]; |
938 | spin_lock_irqsave(&conf->device_lock, flags); | 1007 | spin_lock_irqsave(&conf->device_lock, flags); |
1008 | |||
1009 | if (conf->pending_bio_list.head) { | ||
1010 | bio = bio_list_get(&conf->pending_bio_list); | ||
1011 | blk_remove_plug(mddev->queue); | ||
1012 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1013 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ | ||
1014 | if (bitmap_unplug(mddev->bitmap) != 0) | ||
1015 | printk("%s: bitmap file write failed!\n", mdname(mddev)); | ||
1016 | |||
1017 | while (bio) { /* submit pending writes */ | ||
1018 | struct bio *next = bio->bi_next; | ||
1019 | bio->bi_next = NULL; | ||
1020 | generic_make_request(bio); | ||
1021 | bio = next; | ||
1022 | } | ||
1023 | unplug = 1; | ||
1024 | |||
1025 | continue; | ||
1026 | } | ||
1027 | |||
939 | if (list_empty(head)) | 1028 | if (list_empty(head)) |
940 | break; | 1029 | break; |
941 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); | 1030 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); |
@@ -1009,7 +1098,7 @@ static int init_resync(conf_t *conf) | |||
1009 | * that can be installed to exclude normal IO requests. | 1098 | * that can be installed to exclude normal IO requests. |
1010 | */ | 1099 | */ |
1011 | 1100 | ||
1012 | static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | 1101 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1013 | { | 1102 | { |
1014 | conf_t *conf = mddev_to_conf(mddev); | 1103 | conf_t *conf = mddev_to_conf(mddev); |
1015 | mirror_info_t *mirror; | 1104 | mirror_info_t *mirror; |
@@ -1019,17 +1108,43 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1019 | int disk; | 1108 | int disk; |
1020 | int i; | 1109 | int i; |
1021 | int write_targets = 0; | 1110 | int write_targets = 0; |
1111 | int sync_blocks; | ||
1022 | 1112 | ||
1023 | if (!conf->r1buf_pool) | 1113 | if (!conf->r1buf_pool) |
1114 | { | ||
1115 | /* | ||
1116 | printk("sync start - bitmap %p\n", mddev->bitmap); | ||
1117 | */ | ||
1024 | if (init_resync(conf)) | 1118 | if (init_resync(conf)) |
1025 | return -ENOMEM; | 1119 | return 0; |
1120 | } | ||
1026 | 1121 | ||
1027 | max_sector = mddev->size << 1; | 1122 | max_sector = mddev->size << 1; |
1028 | if (sector_nr >= max_sector) { | 1123 | if (sector_nr >= max_sector) { |
1124 | /* If we aborted, we need to abort the | ||
1125 | * sync on the 'current' bitmap chunk (there will | ||
1126 | * only be one in raid1 resync. | ||
1127 | * We can find the current addess in mddev->curr_resync | ||
1128 | */ | ||
1129 | if (!conf->fullsync) { | ||
1130 | if (mddev->curr_resync < max_sector) | ||
1131 | bitmap_end_sync(mddev->bitmap, | ||
1132 | mddev->curr_resync, | ||
1133 | &sync_blocks, 1); | ||
1134 | bitmap_close_sync(mddev->bitmap); | ||
1135 | } | ||
1136 | if (mddev->curr_resync >= max_sector) | ||
1137 | conf->fullsync = 0; | ||
1029 | close_sync(conf); | 1138 | close_sync(conf); |
1030 | return 0; | 1139 | return 0; |
1031 | } | 1140 | } |
1032 | 1141 | ||
1142 | if (!conf->fullsync && | ||
1143 | !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) { | ||
1144 | /* We can skip this block, and probably several more */ | ||
1145 | *skipped = 1; | ||
1146 | return sync_blocks; | ||
1147 | } | ||
1033 | /* | 1148 | /* |
1034 | * If there is non-resync activity waiting for us then | 1149 | * If there is non-resync activity waiting for us then |
1035 | * put in a delay to throttle resync. | 1150 | * put in a delay to throttle resync. |
@@ -1068,6 +1183,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1068 | 1183 | ||
1069 | r1_bio->mddev = mddev; | 1184 | r1_bio->mddev = mddev; |
1070 | r1_bio->sector = sector_nr; | 1185 | r1_bio->sector = sector_nr; |
1186 | r1_bio->state = 0; | ||
1071 | set_bit(R1BIO_IsSync, &r1_bio->state); | 1187 | set_bit(R1BIO_IsSync, &r1_bio->state); |
1072 | r1_bio->read_disk = disk; | 1188 | r1_bio->read_disk = disk; |
1073 | 1189 | ||
@@ -1102,18 +1218,24 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1102 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1218 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1103 | bio->bi_private = r1_bio; | 1219 | bio->bi_private = r1_bio; |
1104 | } | 1220 | } |
1221 | |||
1222 | if (write_targets + 1 < conf->raid_disks) | ||
1223 | /* array degraded, can't clear bitmap */ | ||
1224 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
1225 | |||
1105 | if (write_targets == 0) { | 1226 | if (write_targets == 0) { |
1106 | /* There is nowhere to write, so all non-sync | 1227 | /* There is nowhere to write, so all non-sync |
1107 | * drives must be failed - so we are finished | 1228 | * drives must be failed - so we are finished |
1108 | */ | 1229 | */ |
1109 | int rv = max_sector - sector_nr; | 1230 | sector_t rv = max_sector - sector_nr; |
1110 | md_done_sync(mddev, rv, 1); | 1231 | *skipped = 1; |
1111 | put_buf(r1_bio); | 1232 | put_buf(r1_bio); |
1112 | rdev_dec_pending(conf->mirrors[disk].rdev, mddev); | 1233 | rdev_dec_pending(conf->mirrors[disk].rdev, mddev); |
1113 | return rv; | 1234 | return rv; |
1114 | } | 1235 | } |
1115 | 1236 | ||
1116 | nr_sectors = 0; | 1237 | nr_sectors = 0; |
1238 | sync_blocks = 0; | ||
1117 | do { | 1239 | do { |
1118 | struct page *page; | 1240 | struct page *page; |
1119 | int len = PAGE_SIZE; | 1241 | int len = PAGE_SIZE; |
@@ -1121,6 +1243,17 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1121 | len = (max_sector - sector_nr) << 9; | 1243 | len = (max_sector - sector_nr) << 9; |
1122 | if (len == 0) | 1244 | if (len == 0) |
1123 | break; | 1245 | break; |
1246 | if (!conf->fullsync) { | ||
1247 | if (sync_blocks == 0) { | ||
1248 | if (!bitmap_start_sync(mddev->bitmap, | ||
1249 | sector_nr, &sync_blocks)) | ||
1250 | break; | ||
1251 | if (sync_blocks < (PAGE_SIZE>>9)) | ||
1252 | BUG(); | ||
1253 | if (len > (sync_blocks<<9)) len = sync_blocks<<9; | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1124 | for (i=0 ; i < conf->raid_disks; i++) { | 1257 | for (i=0 ; i < conf->raid_disks; i++) { |
1125 | bio = r1_bio->bios[i]; | 1258 | bio = r1_bio->bios[i]; |
1126 | if (bio->bi_end_io) { | 1259 | if (bio->bi_end_io) { |
@@ -1143,6 +1276,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1143 | } | 1276 | } |
1144 | nr_sectors += len>>9; | 1277 | nr_sectors += len>>9; |
1145 | sector_nr += len>>9; | 1278 | sector_nr += len>>9; |
1279 | sync_blocks -= (len>>9); | ||
1146 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); | 1280 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); |
1147 | bio_full: | 1281 | bio_full: |
1148 | bio = r1_bio->bios[disk]; | 1282 | bio = r1_bio->bios[disk]; |
@@ -1231,6 +1365,9 @@ static int run(mddev_t *mddev) | |||
1231 | init_waitqueue_head(&conf->wait_idle); | 1365 | init_waitqueue_head(&conf->wait_idle); |
1232 | init_waitqueue_head(&conf->wait_resume); | 1366 | init_waitqueue_head(&conf->wait_resume); |
1233 | 1367 | ||
1368 | bio_list_init(&conf->pending_bio_list); | ||
1369 | bio_list_init(&conf->flushing_bio_list); | ||
1370 | |||
1234 | if (!conf->working_disks) { | 1371 | if (!conf->working_disks) { |
1235 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", | 1372 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", |
1236 | mdname(mddev)); | 1373 | mdname(mddev)); |
@@ -1259,16 +1396,15 @@ static int run(mddev_t *mddev) | |||
1259 | conf->last_used = j; | 1396 | conf->last_used = j; |
1260 | 1397 | ||
1261 | 1398 | ||
1262 | 1399 | mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); | |
1263 | { | 1400 | if (!mddev->thread) { |
1264 | mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); | 1401 | printk(KERN_ERR |
1265 | if (!mddev->thread) { | 1402 | "raid1: couldn't allocate thread for %s\n", |
1266 | printk(KERN_ERR | 1403 | mdname(mddev)); |
1267 | "raid1: couldn't allocate thread for %s\n", | 1404 | goto out_free_conf; |
1268 | mdname(mddev)); | ||
1269 | goto out_free_conf; | ||
1270 | } | ||
1271 | } | 1405 | } |
1406 | if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
1407 | |||
1272 | printk(KERN_INFO | 1408 | printk(KERN_INFO |
1273 | "raid1: raid set %s active with %d out of %d mirrors\n", | 1409 | "raid1: raid set %s active with %d out of %d mirrors\n", |
1274 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 1410 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
@@ -1291,10 +1427,8 @@ out_free_conf: | |||
1291 | if (conf) { | 1427 | if (conf) { |
1292 | if (conf->r1bio_pool) | 1428 | if (conf->r1bio_pool) |
1293 | mempool_destroy(conf->r1bio_pool); | 1429 | mempool_destroy(conf->r1bio_pool); |
1294 | if (conf->mirrors) | 1430 | kfree(conf->mirrors); |
1295 | kfree(conf->mirrors); | 1431 | kfree(conf->poolinfo); |
1296 | if (conf->poolinfo) | ||
1297 | kfree(conf->poolinfo); | ||
1298 | kfree(conf); | 1432 | kfree(conf); |
1299 | mddev->private = NULL; | 1433 | mddev->private = NULL; |
1300 | } | 1434 | } |
@@ -1311,10 +1445,8 @@ static int stop(mddev_t *mddev) | |||
1311 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 1445 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
1312 | if (conf->r1bio_pool) | 1446 | if (conf->r1bio_pool) |
1313 | mempool_destroy(conf->r1bio_pool); | 1447 | mempool_destroy(conf->r1bio_pool); |
1314 | if (conf->mirrors) | 1448 | kfree(conf->mirrors); |
1315 | kfree(conf->mirrors); | 1449 | kfree(conf->poolinfo); |
1316 | if (conf->poolinfo) | ||
1317 | kfree(conf->poolinfo); | ||
1318 | kfree(conf); | 1450 | kfree(conf); |
1319 | mddev->private = NULL; | 1451 | mddev->private = NULL; |
1320 | return 0; | 1452 | return 0; |
@@ -1349,17 +1481,26 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1349 | * We allocate a new r1bio_pool if we can. | 1481 | * We allocate a new r1bio_pool if we can. |
1350 | * Then raise a device barrier and wait until all IO stops. | 1482 | * Then raise a device barrier and wait until all IO stops. |
1351 | * Then resize conf->mirrors and swap in the new r1bio pool. | 1483 | * Then resize conf->mirrors and swap in the new r1bio pool. |
1484 | * | ||
1485 | * At the same time, we "pack" the devices so that all the missing | ||
1486 | * devices have the higher raid_disk numbers. | ||
1352 | */ | 1487 | */ |
1353 | mempool_t *newpool, *oldpool; | 1488 | mempool_t *newpool, *oldpool; |
1354 | struct pool_info *newpoolinfo; | 1489 | struct pool_info *newpoolinfo; |
1355 | mirror_info_t *newmirrors; | 1490 | mirror_info_t *newmirrors; |
1356 | conf_t *conf = mddev_to_conf(mddev); | 1491 | conf_t *conf = mddev_to_conf(mddev); |
1492 | int cnt; | ||
1357 | 1493 | ||
1358 | int d; | 1494 | int d, d2; |
1359 | 1495 | ||
1360 | for (d= raid_disks; d < conf->raid_disks; d++) | 1496 | if (raid_disks < conf->raid_disks) { |
1361 | if (conf->mirrors[d].rdev) | 1497 | cnt=0; |
1498 | for (d= 0; d < conf->raid_disks; d++) | ||
1499 | if (conf->mirrors[d].rdev) | ||
1500 | cnt++; | ||
1501 | if (cnt > raid_disks) | ||
1362 | return -EBUSY; | 1502 | return -EBUSY; |
1503 | } | ||
1363 | 1504 | ||
1364 | newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); | 1505 | newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); |
1365 | if (!newpoolinfo) | 1506 | if (!newpoolinfo) |
@@ -1384,14 +1525,18 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1384 | spin_lock_irq(&conf->resync_lock); | 1525 | spin_lock_irq(&conf->resync_lock); |
1385 | conf->barrier++; | 1526 | conf->barrier++; |
1386 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 1527 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, |
1387 | conf->resync_lock, unplug_slaves(mddev)); | 1528 | conf->resync_lock, raid1_unplug(mddev->queue)); |
1388 | spin_unlock_irq(&conf->resync_lock); | 1529 | spin_unlock_irq(&conf->resync_lock); |
1389 | 1530 | ||
1390 | /* ok, everything is stopped */ | 1531 | /* ok, everything is stopped */ |
1391 | oldpool = conf->r1bio_pool; | 1532 | oldpool = conf->r1bio_pool; |
1392 | conf->r1bio_pool = newpool; | 1533 | conf->r1bio_pool = newpool; |
1393 | for (d=0; d < raid_disks && d < conf->raid_disks; d++) | 1534 | |
1394 | newmirrors[d] = conf->mirrors[d]; | 1535 | for (d=d2=0; d < conf->raid_disks; d++) |
1536 | if (conf->mirrors[d].rdev) { | ||
1537 | conf->mirrors[d].rdev->raid_disk = d2; | ||
1538 | newmirrors[d2++].rdev = conf->mirrors[d].rdev; | ||
1539 | } | ||
1395 | kfree(conf->mirrors); | 1540 | kfree(conf->mirrors); |
1396 | conf->mirrors = newmirrors; | 1541 | conf->mirrors = newmirrors; |
1397 | kfree(conf->poolinfo); | 1542 | kfree(conf->poolinfo); |
@@ -1400,6 +1545,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1400 | mddev->degraded += (raid_disks - conf->raid_disks); | 1545 | mddev->degraded += (raid_disks - conf->raid_disks); |
1401 | conf->raid_disks = mddev->raid_disks = raid_disks; | 1546 | conf->raid_disks = mddev->raid_disks = raid_disks; |
1402 | 1547 | ||
1548 | conf->last_used = 0; /* just make sure it is in-range */ | ||
1403 | spin_lock_irq(&conf->resync_lock); | 1549 | spin_lock_irq(&conf->resync_lock); |
1404 | conf->barrier--; | 1550 | conf->barrier--; |
1405 | spin_unlock_irq(&conf->resync_lock); | 1551 | spin_unlock_irq(&conf->resync_lock); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3c37be6423d7..62ebb1bc72be 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -700,6 +700,8 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
700 | return 0; | 700 | return 0; |
701 | } | 701 | } |
702 | 702 | ||
703 | md_write_start(mddev, bio); | ||
704 | |||
703 | /* | 705 | /* |
704 | * Register the new request and wait if the reconstruction | 706 | * Register the new request and wait if the reconstruction |
705 | * thread has put up a bar for new requests. | 707 | * thread has put up a bar for new requests. |
@@ -774,7 +776,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
774 | rcu_read_unlock(); | 776 | rcu_read_unlock(); |
775 | 777 | ||
776 | atomic_set(&r10_bio->remaining, 1); | 778 | atomic_set(&r10_bio->remaining, 1); |
777 | md_write_start(mddev); | 779 | |
778 | for (i = 0; i < conf->copies; i++) { | 780 | for (i = 0; i < conf->copies; i++) { |
779 | struct bio *mbio; | 781 | struct bio *mbio; |
780 | int d = r10_bio->devs[i].devnum; | 782 | int d = r10_bio->devs[i].devnum; |
@@ -1216,7 +1218,6 @@ static void raid10d(mddev_t *mddev) | |||
1216 | mdk_rdev_t *rdev; | 1218 | mdk_rdev_t *rdev; |
1217 | 1219 | ||
1218 | md_check_recovery(mddev); | 1220 | md_check_recovery(mddev); |
1219 | md_handle_safemode(mddev); | ||
1220 | 1221 | ||
1221 | for (;;) { | 1222 | for (;;) { |
1222 | char b[BDEVNAME_SIZE]; | 1223 | char b[BDEVNAME_SIZE]; |
@@ -1319,7 +1320,7 @@ static int init_resync(conf_t *conf) | |||
1319 | * | 1320 | * |
1320 | */ | 1321 | */ |
1321 | 1322 | ||
1322 | static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | 1323 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1323 | { | 1324 | { |
1324 | conf_t *conf = mddev_to_conf(mddev); | 1325 | conf_t *conf = mddev_to_conf(mddev); |
1325 | r10bio_t *r10_bio; | 1326 | r10bio_t *r10_bio; |
@@ -1333,7 +1334,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1333 | 1334 | ||
1334 | if (!conf->r10buf_pool) | 1335 | if (!conf->r10buf_pool) |
1335 | if (init_resync(conf)) | 1336 | if (init_resync(conf)) |
1336 | return -ENOMEM; | 1337 | return 0; |
1337 | 1338 | ||
1338 | skipped: | 1339 | skipped: |
1339 | max_sector = mddev->size << 1; | 1340 | max_sector = mddev->size << 1; |
@@ -1341,15 +1342,15 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1341 | max_sector = mddev->resync_max_sectors; | 1342 | max_sector = mddev->resync_max_sectors; |
1342 | if (sector_nr >= max_sector) { | 1343 | if (sector_nr >= max_sector) { |
1343 | close_sync(conf); | 1344 | close_sync(conf); |
1345 | *skipped = 1; | ||
1344 | return sectors_skipped; | 1346 | return sectors_skipped; |
1345 | } | 1347 | } |
1346 | if (chunks_skipped >= conf->raid_disks) { | 1348 | if (chunks_skipped >= conf->raid_disks) { |
1347 | /* if there has been nothing to do on any drive, | 1349 | /* if there has been nothing to do on any drive, |
1348 | * then there is nothing to do at all.. | 1350 | * then there is nothing to do at all.. |
1349 | */ | 1351 | */ |
1350 | sector_t sec = max_sector - sector_nr; | 1352 | *skipped = 1; |
1351 | md_done_sync(mddev, sec, 1); | 1353 | return (max_sector - sector_nr) + sectors_skipped; |
1352 | return sec + sectors_skipped; | ||
1353 | } | 1354 | } |
1354 | 1355 | ||
1355 | /* make sure whole request will fit in a chunk - if chunks | 1356 | /* make sure whole request will fit in a chunk - if chunks |
@@ -1563,17 +1564,22 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1563 | } | 1564 | } |
1564 | } | 1565 | } |
1565 | 1566 | ||
1567 | if (sectors_skipped) | ||
1568 | /* pretend they weren't skipped, it makes | ||
1569 | * no important difference in this case | ||
1570 | */ | ||
1571 | md_done_sync(mddev, sectors_skipped, 1); | ||
1572 | |||
1566 | return sectors_skipped + nr_sectors; | 1573 | return sectors_skipped + nr_sectors; |
1567 | giveup: | 1574 | giveup: |
1568 | /* There is nowhere to write, so all non-sync | 1575 | /* There is nowhere to write, so all non-sync |
1569 | * drives must be failed, so try the next chunk... | 1576 | * drives must be failed, so try the next chunk... |
1570 | */ | 1577 | */ |
1571 | { | 1578 | { |
1572 | int sec = max_sector - sector_nr; | 1579 | sector_t sec = max_sector - sector_nr; |
1573 | sectors_skipped += sec; | 1580 | sectors_skipped += sec; |
1574 | chunks_skipped ++; | 1581 | chunks_skipped ++; |
1575 | sector_nr = max_sector; | 1582 | sector_nr = max_sector; |
1576 | md_done_sync(mddev, sec, 1); | ||
1577 | goto skipped; | 1583 | goto skipped; |
1578 | } | 1584 | } |
1579 | } | 1585 | } |
@@ -1731,8 +1737,7 @@ static int run(mddev_t *mddev) | |||
1731 | out_free_conf: | 1737 | out_free_conf: |
1732 | if (conf->r10bio_pool) | 1738 | if (conf->r10bio_pool) |
1733 | mempool_destroy(conf->r10bio_pool); | 1739 | mempool_destroy(conf->r10bio_pool); |
1734 | if (conf->mirrors) | 1740 | kfree(conf->mirrors); |
1735 | kfree(conf->mirrors); | ||
1736 | kfree(conf); | 1741 | kfree(conf); |
1737 | mddev->private = NULL; | 1742 | mddev->private = NULL; |
1738 | out: | 1743 | out: |
@@ -1748,8 +1753,7 @@ static int stop(mddev_t *mddev) | |||
1748 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 1753 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
1749 | if (conf->r10bio_pool) | 1754 | if (conf->r10bio_pool) |
1750 | mempool_destroy(conf->r10bio_pool); | 1755 | mempool_destroy(conf->r10bio_pool); |
1751 | if (conf->mirrors) | 1756 | kfree(conf->mirrors); |
1752 | kfree(conf->mirrors); | ||
1753 | kfree(conf); | 1757 | kfree(conf); |
1754 | mddev->private = NULL; | 1758 | mddev->private = NULL; |
1755 | return 0; | 1759 | return 0; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3cb11ac232fa..93a9726cc2d6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1411,6 +1411,8 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1411 | sector_t logical_sector, last_sector; | 1411 | sector_t logical_sector, last_sector; |
1412 | struct stripe_head *sh; | 1412 | struct stripe_head *sh; |
1413 | 1413 | ||
1414 | md_write_start(mddev, bi); | ||
1415 | |||
1414 | if (bio_data_dir(bi)==WRITE) { | 1416 | if (bio_data_dir(bi)==WRITE) { |
1415 | disk_stat_inc(mddev->gendisk, writes); | 1417 | disk_stat_inc(mddev->gendisk, writes); |
1416 | disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); | 1418 | disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); |
@@ -1423,8 +1425,7 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1423 | last_sector = bi->bi_sector + (bi->bi_size>>9); | 1425 | last_sector = bi->bi_sector + (bi->bi_size>>9); |
1424 | bi->bi_next = NULL; | 1426 | bi->bi_next = NULL; |
1425 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 1427 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
1426 | if ( bio_data_dir(bi) == WRITE ) | 1428 | |
1427 | md_write_start(mddev); | ||
1428 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 1429 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
1429 | DEFINE_WAIT(w); | 1430 | DEFINE_WAIT(w); |
1430 | 1431 | ||
@@ -1475,7 +1476,7 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1475 | } | 1476 | } |
1476 | 1477 | ||
1477 | /* FIXME go_faster isn't used */ | 1478 | /* FIXME go_faster isn't used */ |
1478 | static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) | 1479 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1479 | { | 1480 | { |
1480 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 1481 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
1481 | struct stripe_head *sh; | 1482 | struct stripe_head *sh; |
@@ -1498,8 +1499,8 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1498 | * nothing we can do. | 1499 | * nothing we can do. |
1499 | */ | 1500 | */ |
1500 | if (mddev->degraded >= 1 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 1501 | if (mddev->degraded >= 1 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
1501 | int rv = (mddev->size << 1) - sector_nr; | 1502 | sector_t rv = (mddev->size << 1) - sector_nr; |
1502 | md_done_sync(mddev, rv, 1); | 1503 | *skipped = 1; |
1503 | return rv; | 1504 | return rv; |
1504 | } | 1505 | } |
1505 | 1506 | ||
@@ -1546,7 +1547,6 @@ static void raid5d (mddev_t *mddev) | |||
1546 | PRINTK("+++ raid5d active\n"); | 1547 | PRINTK("+++ raid5d active\n"); |
1547 | 1548 | ||
1548 | md_check_recovery(mddev); | 1549 | md_check_recovery(mddev); |
1549 | md_handle_safemode(mddev); | ||
1550 | 1550 | ||
1551 | handled = 0; | 1551 | handled = 0; |
1552 | spin_lock_irq(&conf->device_lock); | 1552 | spin_lock_irq(&conf->device_lock); |
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 908edd78a792..f62ea1a73d0d 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c | |||
@@ -1570,6 +1570,8 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1570 | sector_t logical_sector, last_sector; | 1570 | sector_t logical_sector, last_sector; |
1571 | struct stripe_head *sh; | 1571 | struct stripe_head *sh; |
1572 | 1572 | ||
1573 | md_write_start(mddev, bi); | ||
1574 | |||
1573 | if (bio_data_dir(bi)==WRITE) { | 1575 | if (bio_data_dir(bi)==WRITE) { |
1574 | disk_stat_inc(mddev->gendisk, writes); | 1576 | disk_stat_inc(mddev->gendisk, writes); |
1575 | disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); | 1577 | disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); |
@@ -1583,8 +1585,7 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1583 | 1585 | ||
1584 | bi->bi_next = NULL; | 1586 | bi->bi_next = NULL; |
1585 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 1587 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
1586 | if ( bio_data_dir(bi) == WRITE ) | 1588 | |
1587 | md_write_start(mddev); | ||
1588 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 1589 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
1589 | DEFINE_WAIT(w); | 1590 | DEFINE_WAIT(w); |
1590 | 1591 | ||
@@ -1634,7 +1635,7 @@ static int make_request (request_queue_t *q, struct bio * bi) | |||
1634 | } | 1635 | } |
1635 | 1636 | ||
1636 | /* FIXME go_faster isn't used */ | 1637 | /* FIXME go_faster isn't used */ |
1637 | static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) | 1638 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1638 | { | 1639 | { |
1639 | raid6_conf_t *conf = (raid6_conf_t *) mddev->private; | 1640 | raid6_conf_t *conf = (raid6_conf_t *) mddev->private; |
1640 | struct stripe_head *sh; | 1641 | struct stripe_head *sh; |
@@ -1657,8 +1658,8 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1657 | * nothing we can do. | 1658 | * nothing we can do. |
1658 | */ | 1659 | */ |
1659 | if (mddev->degraded >= 2 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 1660 | if (mddev->degraded >= 2 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
1660 | int rv = (mddev->size << 1) - sector_nr; | 1661 | sector_t rv = (mddev->size << 1) - sector_nr; |
1661 | md_done_sync(mddev, rv, 1); | 1662 | *skipped = 1; |
1662 | return rv; | 1663 | return rv; |
1663 | } | 1664 | } |
1664 | 1665 | ||
@@ -1705,7 +1706,6 @@ static void raid6d (mddev_t *mddev) | |||
1705 | PRINTK("+++ raid6d active\n"); | 1706 | PRINTK("+++ raid6d active\n"); |
1706 | 1707 | ||
1707 | md_check_recovery(mddev); | 1708 | md_check_recovery(mddev); |
1708 | md_handle_safemode(mddev); | ||
1709 | 1709 | ||
1710 | handled = 0; | 1710 | handled = 0; |
1711 | spin_lock_irq(&conf->device_lock); | 1711 | spin_lock_irq(&conf->device_lock); |