diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 508 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 286 | ||||
-rw-r--r-- | drivers/md/md.h | 55 | ||||
-rw-r--r-- | drivers/md/raid10.c | 18 | ||||
-rw-r--r-- | drivers/md/raid5.c | 168 | ||||
-rw-r--r-- | drivers/md/raid5.h | 9 |
7 files changed, 643 insertions, 407 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1742435ce3ae..1ba1e122e948 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -13,7 +13,6 @@ | |||
13 | * Still to do: | 13 | * Still to do: |
14 | * | 14 | * |
15 | * flush after percent set rather than just time based. (maybe both). | 15 | * flush after percent set rather than just time based. (maybe both). |
16 | * wait if count gets too high, wake when it drops to half. | ||
17 | */ | 16 | */ |
18 | 17 | ||
19 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
@@ -30,6 +29,7 @@ | |||
30 | #include "md.h" | 29 | #include "md.h" |
31 | #include "bitmap.h" | 30 | #include "bitmap.h" |
32 | 31 | ||
32 | #include <linux/dm-dirty-log.h> | ||
33 | /* debug macros */ | 33 | /* debug macros */ |
34 | 34 | ||
35 | #define DEBUG 0 | 35 | #define DEBUG 0 |
@@ -51,9 +51,6 @@ | |||
51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ | 51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ | ||
55 | #define DPRINTK(x...) do { } while(0) | ||
56 | |||
57 | #ifndef PRINTK | 54 | #ifndef PRINTK |
58 | # if DEBUG > 0 | 55 | # if DEBUG > 0 |
59 | # define PRINTK(x...) printk(KERN_DEBUG x) | 56 | # define PRINTK(x...) printk(KERN_DEBUG x) |
@@ -62,12 +59,11 @@ | |||
62 | # endif | 59 | # endif |
63 | #endif | 60 | #endif |
64 | 61 | ||
65 | static inline char * bmname(struct bitmap *bitmap) | 62 | static inline char *bmname(struct bitmap *bitmap) |
66 | { | 63 | { |
67 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; | 64 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; |
68 | } | 65 | } |
69 | 66 | ||
70 | |||
71 | /* | 67 | /* |
72 | * just a placeholder - calls kmalloc for bitmap pages | 68 | * just a placeholder - calls kmalloc for bitmap pages |
73 | */ | 69 | */ |
@@ -78,7 +74,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | |||
78 | #ifdef INJECT_FAULTS_1 | 74 | #ifdef INJECT_FAULTS_1 |
79 | page = NULL; | 75 | page = NULL; |
80 | #else | 76 | #else |
81 | page = kmalloc(PAGE_SIZE, GFP_NOIO); | 77 | page = kzalloc(PAGE_SIZE, GFP_NOIO); |
82 | #endif | 78 | #endif |
83 | if (!page) | 79 | if (!page) |
84 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | 80 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); |
@@ -107,7 +103,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | |||
107 | * if we find our page, we increment the page's refcount so that it stays | 103 | * if we find our page, we increment the page's refcount so that it stays |
108 | * allocated while we're using it | 104 | * allocated while we're using it |
109 | */ | 105 | */ |
110 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) | 106 | static int bitmap_checkpage(struct bitmap *bitmap, |
107 | unsigned long page, int create) | ||
111 | __releases(bitmap->lock) | 108 | __releases(bitmap->lock) |
112 | __acquires(bitmap->lock) | 109 | __acquires(bitmap->lock) |
113 | { | 110 | { |
@@ -121,7 +118,6 @@ __acquires(bitmap->lock) | |||
121 | return -EINVAL; | 118 | return -EINVAL; |
122 | } | 119 | } |
123 | 120 | ||
124 | |||
125 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ | 121 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ |
126 | return 0; | 122 | return 0; |
127 | 123 | ||
@@ -131,43 +127,34 @@ __acquires(bitmap->lock) | |||
131 | if (!create) | 127 | if (!create) |
132 | return -ENOENT; | 128 | return -ENOENT; |
133 | 129 | ||
134 | spin_unlock_irq(&bitmap->lock); | ||
135 | |||
136 | /* this page has not been allocated yet */ | 130 | /* this page has not been allocated yet */ |
137 | 131 | ||
138 | if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { | 132 | spin_unlock_irq(&bitmap->lock); |
133 | mappage = bitmap_alloc_page(bitmap); | ||
134 | spin_lock_irq(&bitmap->lock); | ||
135 | |||
136 | if (mappage == NULL) { | ||
139 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", | 137 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", |
140 | bmname(bitmap)); | 138 | bmname(bitmap)); |
141 | /* failed - set the hijacked flag so that we can use the | 139 | /* failed - set the hijacked flag so that we can use the |
142 | * pointer as a counter */ | 140 | * pointer as a counter */ |
143 | spin_lock_irq(&bitmap->lock); | ||
144 | if (!bitmap->bp[page].map) | 141 | if (!bitmap->bp[page].map) |
145 | bitmap->bp[page].hijacked = 1; | 142 | bitmap->bp[page].hijacked = 1; |
146 | goto out; | 143 | } else if (bitmap->bp[page].map || |
147 | } | 144 | bitmap->bp[page].hijacked) { |
148 | |||
149 | /* got a page */ | ||
150 | |||
151 | spin_lock_irq(&bitmap->lock); | ||
152 | |||
153 | /* recheck the page */ | ||
154 | |||
155 | if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { | ||
156 | /* somebody beat us to getting the page */ | 145 | /* somebody beat us to getting the page */ |
157 | bitmap_free_page(bitmap, mappage); | 146 | bitmap_free_page(bitmap, mappage); |
158 | return 0; | 147 | return 0; |
159 | } | 148 | } else { |
160 | 149 | ||
161 | /* no page was in place and we have one, so install it */ | 150 | /* no page was in place and we have one, so install it */ |
162 | 151 | ||
163 | memset(mappage, 0, PAGE_SIZE); | 152 | bitmap->bp[page].map = mappage; |
164 | bitmap->bp[page].map = mappage; | 153 | bitmap->missing_pages--; |
165 | bitmap->missing_pages--; | 154 | } |
166 | out: | ||
167 | return 0; | 155 | return 0; |
168 | } | 156 | } |
169 | 157 | ||
170 | |||
171 | /* if page is completely empty, put it back on the free list, or dealloc it */ | 158 | /* if page is completely empty, put it back on the free list, or dealloc it */ |
172 | /* if page was hijacked, unmark the flag so it might get alloced next time */ | 159 | /* if page was hijacked, unmark the flag so it might get alloced next time */ |
173 | /* Note: lock should be held when calling this */ | 160 | /* Note: lock should be held when calling this */ |
@@ -183,26 +170,15 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
183 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ | 170 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ |
184 | bitmap->bp[page].hijacked = 0; | 171 | bitmap->bp[page].hijacked = 0; |
185 | bitmap->bp[page].map = NULL; | 172 | bitmap->bp[page].map = NULL; |
186 | return; | 173 | } else { |
174 | /* normal case, free the page */ | ||
175 | ptr = bitmap->bp[page].map; | ||
176 | bitmap->bp[page].map = NULL; | ||
177 | bitmap->missing_pages++; | ||
178 | bitmap_free_page(bitmap, ptr); | ||
187 | } | 179 | } |
188 | |||
189 | /* normal case, free the page */ | ||
190 | |||
191 | #if 0 | ||
192 | /* actually ... let's not. We will probably need the page again exactly when | ||
193 | * memory is tight and we are flusing to disk | ||
194 | */ | ||
195 | return; | ||
196 | #else | ||
197 | ptr = bitmap->bp[page].map; | ||
198 | bitmap->bp[page].map = NULL; | ||
199 | bitmap->missing_pages++; | ||
200 | bitmap_free_page(bitmap, ptr); | ||
201 | return; | ||
202 | #endif | ||
203 | } | 180 | } |
204 | 181 | ||
205 | |||
206 | /* | 182 | /* |
207 | * bitmap file handling - read and write the bitmap file and its superblock | 183 | * bitmap file handling - read and write the bitmap file and its superblock |
208 | */ | 184 | */ |
@@ -220,11 +196,14 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
220 | 196 | ||
221 | mdk_rdev_t *rdev; | 197 | mdk_rdev_t *rdev; |
222 | sector_t target; | 198 | sector_t target; |
199 | int did_alloc = 0; | ||
223 | 200 | ||
224 | if (!page) | 201 | if (!page) { |
225 | page = alloc_page(GFP_KERNEL); | 202 | page = alloc_page(GFP_KERNEL); |
226 | if (!page) | 203 | if (!page) |
227 | return ERR_PTR(-ENOMEM); | 204 | return ERR_PTR(-ENOMEM); |
205 | did_alloc = 1; | ||
206 | } | ||
228 | 207 | ||
229 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 208 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
230 | if (! test_bit(In_sync, &rdev->flags) | 209 | if (! test_bit(In_sync, &rdev->flags) |
@@ -242,6 +221,8 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
242 | return page; | 221 | return page; |
243 | } | 222 | } |
244 | } | 223 | } |
224 | if (did_alloc) | ||
225 | put_page(page); | ||
245 | return ERR_PTR(-EIO); | 226 | return ERR_PTR(-EIO); |
246 | 227 | ||
247 | } | 228 | } |
@@ -286,49 +267,51 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
286 | mddev_t *mddev = bitmap->mddev; | 267 | mddev_t *mddev = bitmap->mddev; |
287 | 268 | ||
288 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { | 269 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { |
289 | int size = PAGE_SIZE; | 270 | int size = PAGE_SIZE; |
290 | loff_t offset = mddev->bitmap_info.offset; | 271 | loff_t offset = mddev->bitmap_info.offset; |
291 | if (page->index == bitmap->file_pages-1) | 272 | if (page->index == bitmap->file_pages-1) |
292 | size = roundup(bitmap->last_page_size, | 273 | size = roundup(bitmap->last_page_size, |
293 | bdev_logical_block_size(rdev->bdev)); | 274 | bdev_logical_block_size(rdev->bdev)); |
294 | /* Just make sure we aren't corrupting data or | 275 | /* Just make sure we aren't corrupting data or |
295 | * metadata | 276 | * metadata |
296 | */ | 277 | */ |
297 | if (mddev->external) { | 278 | if (mddev->external) { |
298 | /* Bitmap could be anywhere. */ | 279 | /* Bitmap could be anywhere. */ |
299 | if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > | 280 | if (rdev->sb_start + offset + (page->index |
300 | rdev->data_offset && | 281 | * (PAGE_SIZE/512)) |
301 | rdev->sb_start + offset < | 282 | > rdev->data_offset |
302 | rdev->data_offset + mddev->dev_sectors + | 283 | && |
303 | (PAGE_SIZE/512)) | 284 | rdev->sb_start + offset |
304 | goto bad_alignment; | 285 | < (rdev->data_offset + mddev->dev_sectors |
305 | } else if (offset < 0) { | 286 | + (PAGE_SIZE/512))) |
306 | /* DATA BITMAP METADATA */ | 287 | goto bad_alignment; |
307 | if (offset | 288 | } else if (offset < 0) { |
308 | + (long)(page->index * (PAGE_SIZE/512)) | 289 | /* DATA BITMAP METADATA */ |
309 | + size/512 > 0) | 290 | if (offset |
310 | /* bitmap runs in to metadata */ | 291 | + (long)(page->index * (PAGE_SIZE/512)) |
311 | goto bad_alignment; | 292 | + size/512 > 0) |
312 | if (rdev->data_offset + mddev->dev_sectors | 293 | /* bitmap runs in to metadata */ |
313 | > rdev->sb_start + offset) | 294 | goto bad_alignment; |
314 | /* data runs in to bitmap */ | 295 | if (rdev->data_offset + mddev->dev_sectors |
315 | goto bad_alignment; | 296 | > rdev->sb_start + offset) |
316 | } else if (rdev->sb_start < rdev->data_offset) { | 297 | /* data runs in to bitmap */ |
317 | /* METADATA BITMAP DATA */ | 298 | goto bad_alignment; |
318 | if (rdev->sb_start | 299 | } else if (rdev->sb_start < rdev->data_offset) { |
319 | + offset | 300 | /* METADATA BITMAP DATA */ |
320 | + page->index*(PAGE_SIZE/512) + size/512 | 301 | if (rdev->sb_start |
321 | > rdev->data_offset) | 302 | + offset |
322 | /* bitmap runs in to data */ | 303 | + page->index*(PAGE_SIZE/512) + size/512 |
323 | goto bad_alignment; | 304 | > rdev->data_offset) |
324 | } else { | 305 | /* bitmap runs in to data */ |
325 | /* DATA METADATA BITMAP - no problems */ | 306 | goto bad_alignment; |
326 | } | 307 | } else { |
327 | md_super_write(mddev, rdev, | 308 | /* DATA METADATA BITMAP - no problems */ |
328 | rdev->sb_start + offset | 309 | } |
329 | + page->index * (PAGE_SIZE/512), | 310 | md_super_write(mddev, rdev, |
330 | size, | 311 | rdev->sb_start + offset |
331 | page); | 312 | + page->index * (PAGE_SIZE/512), |
313 | size, | ||
314 | page); | ||
332 | } | 315 | } |
333 | 316 | ||
334 | if (wait) | 317 | if (wait) |
@@ -364,10 +347,9 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) | |||
364 | bh = bh->b_this_page; | 347 | bh = bh->b_this_page; |
365 | } | 348 | } |
366 | 349 | ||
367 | if (wait) { | 350 | if (wait) |
368 | wait_event(bitmap->write_wait, | 351 | wait_event(bitmap->write_wait, |
369 | atomic_read(&bitmap->pending_writes)==0); | 352 | atomic_read(&bitmap->pending_writes)==0); |
370 | } | ||
371 | } | 353 | } |
372 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 354 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
373 | bitmap_file_kick(bitmap); | 355 | bitmap_file_kick(bitmap); |
@@ -424,7 +406,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
424 | struct buffer_head *bh; | 406 | struct buffer_head *bh; |
425 | sector_t block; | 407 | sector_t block; |
426 | 408 | ||
427 | PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, | 409 | PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, |
428 | (unsigned long long)index << PAGE_SHIFT); | 410 | (unsigned long long)index << PAGE_SHIFT); |
429 | 411 | ||
430 | page = alloc_page(GFP_KERNEL); | 412 | page = alloc_page(GFP_KERNEL); |
@@ -478,7 +460,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
478 | } | 460 | } |
479 | out: | 461 | out: |
480 | if (IS_ERR(page)) | 462 | if (IS_ERR(page)) |
481 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", | 463 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", |
482 | (int)PAGE_SIZE, | 464 | (int)PAGE_SIZE, |
483 | (unsigned long long)index << PAGE_SHIFT, | 465 | (unsigned long long)index << PAGE_SHIFT, |
484 | PTR_ERR(page)); | 466 | PTR_ERR(page)); |
@@ -664,11 +646,14 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
664 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); | 646 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); |
665 | old = le32_to_cpu(sb->state) & bits; | 647 | old = le32_to_cpu(sb->state) & bits; |
666 | switch (op) { | 648 | switch (op) { |
667 | case MASK_SET: sb->state |= cpu_to_le32(bits); | 649 | case MASK_SET: |
668 | break; | 650 | sb->state |= cpu_to_le32(bits); |
669 | case MASK_UNSET: sb->state &= cpu_to_le32(~bits); | 651 | break; |
670 | break; | 652 | case MASK_UNSET: |
671 | default: BUG(); | 653 | sb->state &= cpu_to_le32(~bits); |
654 | break; | ||
655 | default: | ||
656 | BUG(); | ||
672 | } | 657 | } |
673 | kunmap_atomic(sb, KM_USER0); | 658 | kunmap_atomic(sb, KM_USER0); |
674 | return old; | 659 | return old; |
@@ -710,12 +695,14 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon | |||
710 | static inline struct page *filemap_get_page(struct bitmap *bitmap, | 695 | static inline struct page *filemap_get_page(struct bitmap *bitmap, |
711 | unsigned long chunk) | 696 | unsigned long chunk) |
712 | { | 697 | { |
713 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; | 698 | if (bitmap->filemap == NULL) |
699 | return NULL; | ||
700 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) | ||
701 | return NULL; | ||
714 | return bitmap->filemap[file_page_index(bitmap, chunk) | 702 | return bitmap->filemap[file_page_index(bitmap, chunk) |
715 | - file_page_index(bitmap, 0)]; | 703 | - file_page_index(bitmap, 0)]; |
716 | } | 704 | } |
717 | 705 | ||
718 | |||
719 | static void bitmap_file_unmap(struct bitmap *bitmap) | 706 | static void bitmap_file_unmap(struct bitmap *bitmap) |
720 | { | 707 | { |
721 | struct page **map, *sb_page; | 708 | struct page **map, *sb_page; |
@@ -766,7 +753,6 @@ static void bitmap_file_put(struct bitmap *bitmap) | |||
766 | } | 753 | } |
767 | } | 754 | } |
768 | 755 | ||
769 | |||
770 | /* | 756 | /* |
771 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file | 757 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file |
772 | * then it is no longer reliable, so we stop using it and we mark the file | 758 | * then it is no longer reliable, so we stop using it and we mark the file |
@@ -785,7 +771,6 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
785 | ptr = d_path(&bitmap->file->f_path, path, | 771 | ptr = d_path(&bitmap->file->f_path, path, |
786 | PAGE_SIZE); | 772 | PAGE_SIZE); |
787 | 773 | ||
788 | |||
789 | printk(KERN_ALERT | 774 | printk(KERN_ALERT |
790 | "%s: kicking failed bitmap file %s from array!\n", | 775 | "%s: kicking failed bitmap file %s from array!\n", |
791 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); | 776 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); |
@@ -803,27 +788,36 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
803 | } | 788 | } |
804 | 789 | ||
805 | enum bitmap_page_attr { | 790 | enum bitmap_page_attr { |
806 | BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced | 791 | BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ |
807 | BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared | 792 | BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ |
808 | BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced | 793 | BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ |
809 | }; | 794 | }; |
810 | 795 | ||
811 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, | 796 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, |
812 | enum bitmap_page_attr attr) | 797 | enum bitmap_page_attr attr) |
813 | { | 798 | { |
814 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | 799 | if (page) |
800 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
801 | else | ||
802 | __set_bit(attr, &bitmap->logattrs); | ||
815 | } | 803 | } |
816 | 804 | ||
817 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, | 805 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, |
818 | enum bitmap_page_attr attr) | 806 | enum bitmap_page_attr attr) |
819 | { | 807 | { |
820 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | 808 | if (page) |
809 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
810 | else | ||
811 | __clear_bit(attr, &bitmap->logattrs); | ||
821 | } | 812 | } |
822 | 813 | ||
823 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, | 814 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, |
824 | enum bitmap_page_attr attr) | 815 | enum bitmap_page_attr attr) |
825 | { | 816 | { |
826 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | 817 | if (page) |
818 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
819 | else | ||
820 | return test_bit(attr, &bitmap->logattrs); | ||
827 | } | 821 | } |
828 | 822 | ||
829 | /* | 823 | /* |
@@ -836,30 +830,32 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p | |||
836 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | 830 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) |
837 | { | 831 | { |
838 | unsigned long bit; | 832 | unsigned long bit; |
839 | struct page *page; | 833 | struct page *page = NULL; |
840 | void *kaddr; | 834 | void *kaddr; |
841 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | 835 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); |
842 | 836 | ||
843 | if (!bitmap->filemap) { | 837 | if (!bitmap->filemap) { |
844 | return; | 838 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; |
845 | } | 839 | if (log) |
846 | 840 | log->type->mark_region(log, chunk); | |
847 | page = filemap_get_page(bitmap, chunk); | 841 | } else { |
848 | if (!page) return; | ||
849 | bit = file_page_offset(bitmap, chunk); | ||
850 | 842 | ||
851 | /* set the bit */ | 843 | page = filemap_get_page(bitmap, chunk); |
852 | kaddr = kmap_atomic(page, KM_USER0); | 844 | if (!page) |
853 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 845 | return; |
854 | set_bit(bit, kaddr); | 846 | bit = file_page_offset(bitmap, chunk); |
855 | else | ||
856 | ext2_set_bit(bit, kaddr); | ||
857 | kunmap_atomic(kaddr, KM_USER0); | ||
858 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
859 | 847 | ||
848 | /* set the bit */ | ||
849 | kaddr = kmap_atomic(page, KM_USER0); | ||
850 | if (bitmap->flags & BITMAP_HOSTENDIAN) | ||
851 | set_bit(bit, kaddr); | ||
852 | else | ||
853 | ext2_set_bit(bit, kaddr); | ||
854 | kunmap_atomic(kaddr, KM_USER0); | ||
855 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
856 | } | ||
860 | /* record page number so it gets flushed to disk when unplug occurs */ | 857 | /* record page number so it gets flushed to disk when unplug occurs */ |
861 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); | 858 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); |
862 | |||
863 | } | 859 | } |
864 | 860 | ||
865 | /* this gets called when the md device is ready to unplug its underlying | 861 | /* this gets called when the md device is ready to unplug its underlying |
@@ -874,6 +870,16 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
874 | 870 | ||
875 | if (!bitmap) | 871 | if (!bitmap) |
876 | return; | 872 | return; |
873 | if (!bitmap->filemap) { | ||
874 | /* Must be using a dirty_log */ | ||
875 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; | ||
876 | dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); | ||
877 | need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); | ||
878 | if (dirty || need_write) | ||
879 | if (log->type->flush(log)) | ||
880 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
881 | goto out; | ||
882 | } | ||
877 | 883 | ||
878 | /* look at each page to see if there are any set bits that need to be | 884 | /* look at each page to see if there are any set bits that need to be |
879 | * flushed out to disk */ | 885 | * flushed out to disk */ |
@@ -892,7 +898,7 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
892 | wait = 1; | 898 | wait = 1; |
893 | spin_unlock_irqrestore(&bitmap->lock, flags); | 899 | spin_unlock_irqrestore(&bitmap->lock, flags); |
894 | 900 | ||
895 | if (dirty | need_write) | 901 | if (dirty || need_write) |
896 | write_page(bitmap, page, 0); | 902 | write_page(bitmap, page, 0); |
897 | } | 903 | } |
898 | if (wait) { /* if any writes were performed, we need to wait on them */ | 904 | if (wait) { /* if any writes were performed, we need to wait on them */ |
@@ -902,9 +908,11 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
902 | else | 908 | else |
903 | md_super_wait(bitmap->mddev); | 909 | md_super_wait(bitmap->mddev); |
904 | } | 910 | } |
911 | out: | ||
905 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 912 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
906 | bitmap_file_kick(bitmap); | 913 | bitmap_file_kick(bitmap); |
907 | } | 914 | } |
915 | EXPORT_SYMBOL(bitmap_unplug); | ||
908 | 916 | ||
909 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); | 917 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); |
910 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize | 918 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize |
@@ -943,12 +951,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
943 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " | 951 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " |
944 | "recovery\n", bmname(bitmap)); | 952 | "recovery\n", bmname(bitmap)); |
945 | 953 | ||
946 | bytes = (chunks + 7) / 8; | 954 | bytes = DIV_ROUND_UP(bitmap->chunks, 8); |
947 | if (!bitmap->mddev->bitmap_info.external) | 955 | if (!bitmap->mddev->bitmap_info.external) |
948 | bytes += sizeof(bitmap_super_t); | 956 | bytes += sizeof(bitmap_super_t); |
949 | 957 | ||
950 | 958 | num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
951 | num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
952 | 959 | ||
953 | if (file && i_size_read(file->f_mapping->host) < bytes) { | 960 | if (file && i_size_read(file->f_mapping->host) < bytes) { |
954 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", | 961 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", |
@@ -966,7 +973,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
966 | 973 | ||
967 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ | 974 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ |
968 | bitmap->filemap_attr = kzalloc( | 975 | bitmap->filemap_attr = kzalloc( |
969 | roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), | 976 | roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), |
970 | GFP_KERNEL); | 977 | GFP_KERNEL); |
971 | if (!bitmap->filemap_attr) | 978 | if (!bitmap->filemap_attr) |
972 | goto err; | 979 | goto err; |
@@ -1021,7 +1028,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1021 | if (outofdate) { | 1028 | if (outofdate) { |
1022 | /* | 1029 | /* |
1023 | * if bitmap is out of date, dirty the | 1030 | * if bitmap is out of date, dirty the |
1024 | * whole page and write it out | 1031 | * whole page and write it out |
1025 | */ | 1032 | */ |
1026 | paddr = kmap_atomic(page, KM_USER0); | 1033 | paddr = kmap_atomic(page, KM_USER0); |
1027 | memset(paddr + offset, 0xff, | 1034 | memset(paddr + offset, 0xff, |
@@ -1052,7 +1059,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1052 | } | 1059 | } |
1053 | } | 1060 | } |
1054 | 1061 | ||
1055 | /* everything went OK */ | 1062 | /* everything went OK */ |
1056 | ret = 0; | 1063 | ret = 0; |
1057 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); | 1064 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); |
1058 | 1065 | ||
@@ -1080,21 +1087,16 @@ void bitmap_write_all(struct bitmap *bitmap) | |||
1080 | */ | 1087 | */ |
1081 | int i; | 1088 | int i; |
1082 | 1089 | ||
1083 | for (i=0; i < bitmap->file_pages; i++) | 1090 | for (i = 0; i < bitmap->file_pages; i++) |
1084 | set_page_attr(bitmap, bitmap->filemap[i], | 1091 | set_page_attr(bitmap, bitmap->filemap[i], |
1085 | BITMAP_PAGE_NEEDWRITE); | 1092 | BITMAP_PAGE_NEEDWRITE); |
1086 | } | 1093 | } |
1087 | 1094 | ||
1088 | |||
1089 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | 1095 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) |
1090 | { | 1096 | { |
1091 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1097 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); |
1092 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1098 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1093 | bitmap->bp[page].count += inc; | 1099 | bitmap->bp[page].count += inc; |
1094 | /* | ||
1095 | if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", | ||
1096 | (unsigned long long)offset, inc, bitmap->bp[page].count); | ||
1097 | */ | ||
1098 | bitmap_checkfree(bitmap, page); | 1100 | bitmap_checkfree(bitmap, page); |
1099 | } | 1101 | } |
1100 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1102 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
@@ -1114,6 +1116,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1114 | struct page *page = NULL, *lastpage = NULL; | 1116 | struct page *page = NULL, *lastpage = NULL; |
1115 | int blocks; | 1117 | int blocks; |
1116 | void *paddr; | 1118 | void *paddr; |
1119 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
1117 | 1120 | ||
1118 | /* Use a mutex to guard daemon_work against | 1121 | /* Use a mutex to guard daemon_work against |
1119 | * bitmap_destroy. | 1122 | * bitmap_destroy. |
@@ -1138,11 +1141,12 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1138 | spin_lock_irqsave(&bitmap->lock, flags); | 1141 | spin_lock_irqsave(&bitmap->lock, flags); |
1139 | for (j = 0; j < bitmap->chunks; j++) { | 1142 | for (j = 0; j < bitmap->chunks; j++) { |
1140 | bitmap_counter_t *bmc; | 1143 | bitmap_counter_t *bmc; |
1141 | if (!bitmap->filemap) | 1144 | if (!bitmap->filemap) { |
1142 | /* error or shutdown */ | 1145 | if (!log) |
1143 | break; | 1146 | /* error or shutdown */ |
1144 | 1147 | break; | |
1145 | page = filemap_get_page(bitmap, j); | 1148 | } else |
1149 | page = filemap_get_page(bitmap, j); | ||
1146 | 1150 | ||
1147 | if (page != lastpage) { | 1151 | if (page != lastpage) { |
1148 | /* skip this page unless it's marked as needing cleaning */ | 1152 | /* skip this page unless it's marked as needing cleaning */ |
@@ -1197,14 +1201,11 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1197 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1201 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), |
1198 | &blocks, 0); | 1202 | &blocks, 0); |
1199 | if (bmc) { | 1203 | if (bmc) { |
1200 | /* | ||
1201 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | ||
1202 | */ | ||
1203 | if (*bmc) | 1204 | if (*bmc) |
1204 | bitmap->allclean = 0; | 1205 | bitmap->allclean = 0; |
1205 | 1206 | ||
1206 | if (*bmc == 2) { | 1207 | if (*bmc == 2) { |
1207 | *bmc=1; /* maybe clear the bit next time */ | 1208 | *bmc = 1; /* maybe clear the bit next time */ |
1208 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1209 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1209 | } else if (*bmc == 1 && !bitmap->need_sync) { | 1210 | } else if (*bmc == 1 && !bitmap->need_sync) { |
1210 | /* we can clear the bit */ | 1211 | /* we can clear the bit */ |
@@ -1214,14 +1215,17 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1214 | -1); | 1215 | -1); |
1215 | 1216 | ||
1216 | /* clear the bit */ | 1217 | /* clear the bit */ |
1217 | paddr = kmap_atomic(page, KM_USER0); | 1218 | if (page) { |
1218 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 1219 | paddr = kmap_atomic(page, KM_USER0); |
1219 | clear_bit(file_page_offset(bitmap, j), | 1220 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
1220 | paddr); | 1221 | clear_bit(file_page_offset(bitmap, j), |
1221 | else | 1222 | paddr); |
1222 | ext2_clear_bit(file_page_offset(bitmap, j), | 1223 | else |
1223 | paddr); | 1224 | ext2_clear_bit(file_page_offset(bitmap, j), |
1224 | kunmap_atomic(paddr, KM_USER0); | 1225 | paddr); |
1226 | kunmap_atomic(paddr, KM_USER0); | ||
1227 | } else | ||
1228 | log->type->clear_region(log, j); | ||
1225 | } | 1229 | } |
1226 | } else | 1230 | } else |
1227 | j |= PAGE_COUNTER_MASK; | 1231 | j |= PAGE_COUNTER_MASK; |
@@ -1229,12 +1233,16 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1229 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1233 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1230 | 1234 | ||
1231 | /* now sync the final page */ | 1235 | /* now sync the final page */ |
1232 | if (lastpage != NULL) { | 1236 | if (lastpage != NULL || log != NULL) { |
1233 | spin_lock_irqsave(&bitmap->lock, flags); | 1237 | spin_lock_irqsave(&bitmap->lock, flags); |
1234 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { | 1238 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { |
1235 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1239 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
1236 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1240 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1237 | write_page(bitmap, lastpage, 0); | 1241 | if (lastpage) |
1242 | write_page(bitmap, lastpage, 0); | ||
1243 | else | ||
1244 | if (log->type->flush(log)) | ||
1245 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
1238 | } else { | 1246 | } else { |
1239 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1247 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
1240 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1248 | spin_unlock_irqrestore(&bitmap->lock, flags); |
@@ -1243,7 +1251,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1243 | 1251 | ||
1244 | done: | 1252 | done: |
1245 | if (bitmap->allclean == 0) | 1253 | if (bitmap->allclean == 0) |
1246 | bitmap->mddev->thread->timeout = | 1254 | bitmap->mddev->thread->timeout = |
1247 | bitmap->mddev->bitmap_info.daemon_sleep; | 1255 | bitmap->mddev->bitmap_info.daemon_sleep; |
1248 | mutex_unlock(&mddev->bitmap_info.mutex); | 1256 | mutex_unlock(&mddev->bitmap_info.mutex); |
1249 | } | 1257 | } |
@@ -1262,34 +1270,38 @@ __acquires(bitmap->lock) | |||
1262 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1270 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1263 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | 1271 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; |
1264 | sector_t csize; | 1272 | sector_t csize; |
1273 | int err; | ||
1265 | 1274 | ||
1266 | if (bitmap_checkpage(bitmap, page, create) < 0) { | 1275 | err = bitmap_checkpage(bitmap, page, create); |
1276 | |||
1277 | if (bitmap->bp[page].hijacked || | ||
1278 | bitmap->bp[page].map == NULL) | ||
1279 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
1280 | PAGE_COUNTER_SHIFT - 1); | ||
1281 | else | ||
1267 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | 1282 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); |
1268 | *blocks = csize - (offset & (csize- 1)); | 1283 | *blocks = csize - (offset & (csize - 1)); |
1284 | |||
1285 | if (err < 0) | ||
1269 | return NULL; | 1286 | return NULL; |
1270 | } | 1287 | |
1271 | /* now locked ... */ | 1288 | /* now locked ... */ |
1272 | 1289 | ||
1273 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ | 1290 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ |
1274 | /* should we use the first or second counter field | 1291 | /* should we use the first or second counter field |
1275 | * of the hijacked pointer? */ | 1292 | * of the hijacked pointer? */ |
1276 | int hi = (pageoff > PAGE_COUNTER_MASK); | 1293 | int hi = (pageoff > PAGE_COUNTER_MASK); |
1277 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
1278 | PAGE_COUNTER_SHIFT - 1); | ||
1279 | *blocks = csize - (offset & (csize- 1)); | ||
1280 | return &((bitmap_counter_t *) | 1294 | return &((bitmap_counter_t *) |
1281 | &bitmap->bp[page].map)[hi]; | 1295 | &bitmap->bp[page].map)[hi]; |
1282 | } else { /* page is allocated */ | 1296 | } else /* page is allocated */ |
1283 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | ||
1284 | *blocks = csize - (offset & (csize- 1)); | ||
1285 | return (bitmap_counter_t *) | 1297 | return (bitmap_counter_t *) |
1286 | &(bitmap->bp[page].map[pageoff]); | 1298 | &(bitmap->bp[page].map[pageoff]); |
1287 | } | ||
1288 | } | 1299 | } |
1289 | 1300 | ||
1290 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) | 1301 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) |
1291 | { | 1302 | { |
1292 | if (!bitmap) return 0; | 1303 | if (!bitmap) |
1304 | return 0; | ||
1293 | 1305 | ||
1294 | if (behind) { | 1306 | if (behind) { |
1295 | int bw; | 1307 | int bw; |
@@ -1322,17 +1334,16 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1322 | prepare_to_wait(&bitmap->overflow_wait, &__wait, | 1334 | prepare_to_wait(&bitmap->overflow_wait, &__wait, |
1323 | TASK_UNINTERRUPTIBLE); | 1335 | TASK_UNINTERRUPTIBLE); |
1324 | spin_unlock_irq(&bitmap->lock); | 1336 | spin_unlock_irq(&bitmap->lock); |
1325 | blk_unplug(bitmap->mddev->queue); | 1337 | md_unplug(bitmap->mddev); |
1326 | schedule(); | 1338 | schedule(); |
1327 | finish_wait(&bitmap->overflow_wait, &__wait); | 1339 | finish_wait(&bitmap->overflow_wait, &__wait); |
1328 | continue; | 1340 | continue; |
1329 | } | 1341 | } |
1330 | 1342 | ||
1331 | switch(*bmc) { | 1343 | switch (*bmc) { |
1332 | case 0: | 1344 | case 0: |
1333 | bitmap_file_set_bit(bitmap, offset); | 1345 | bitmap_file_set_bit(bitmap, offset); |
1334 | bitmap_count_page(bitmap,offset, 1); | 1346 | bitmap_count_page(bitmap, offset, 1); |
1335 | blk_plug_device_unlocked(bitmap->mddev->queue); | ||
1336 | /* fall through */ | 1347 | /* fall through */ |
1337 | case 1: | 1348 | case 1: |
1338 | *bmc = 2; | 1349 | *bmc = 2; |
@@ -1345,16 +1356,19 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1345 | offset += blocks; | 1356 | offset += blocks; |
1346 | if (sectors > blocks) | 1357 | if (sectors > blocks) |
1347 | sectors -= blocks; | 1358 | sectors -= blocks; |
1348 | else sectors = 0; | 1359 | else |
1360 | sectors = 0; | ||
1349 | } | 1361 | } |
1350 | bitmap->allclean = 0; | 1362 | bitmap->allclean = 0; |
1351 | return 0; | 1363 | return 0; |
1352 | } | 1364 | } |
1365 | EXPORT_SYMBOL(bitmap_startwrite); | ||
1353 | 1366 | ||
1354 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, | 1367 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, |
1355 | int success, int behind) | 1368 | int success, int behind) |
1356 | { | 1369 | { |
1357 | if (!bitmap) return; | 1370 | if (!bitmap) |
1371 | return; | ||
1358 | if (behind) { | 1372 | if (behind) { |
1359 | if (atomic_dec_and_test(&bitmap->behind_writes)) | 1373 | if (atomic_dec_and_test(&bitmap->behind_writes)) |
1360 | wake_up(&bitmap->behind_wait); | 1374 | wake_up(&bitmap->behind_wait); |
@@ -1381,7 +1395,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1381 | bitmap->events_cleared < bitmap->mddev->events) { | 1395 | bitmap->events_cleared < bitmap->mddev->events) { |
1382 | bitmap->events_cleared = bitmap->mddev->events; | 1396 | bitmap->events_cleared = bitmap->mddev->events; |
1383 | bitmap->need_sync = 1; | 1397 | bitmap->need_sync = 1; |
1384 | sysfs_notify_dirent(bitmap->sysfs_can_clear); | 1398 | sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); |
1385 | } | 1399 | } |
1386 | 1400 | ||
1387 | if (!success && ! (*bmc & NEEDED_MASK)) | 1401 | if (!success && ! (*bmc & NEEDED_MASK)) |
@@ -1391,18 +1405,22 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1391 | wake_up(&bitmap->overflow_wait); | 1405 | wake_up(&bitmap->overflow_wait); |
1392 | 1406 | ||
1393 | (*bmc)--; | 1407 | (*bmc)--; |
1394 | if (*bmc <= 2) { | 1408 | if (*bmc <= 2) |
1395 | set_page_attr(bitmap, | 1409 | set_page_attr(bitmap, |
1396 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1410 | filemap_get_page( |
1411 | bitmap, | ||
1412 | offset >> CHUNK_BLOCK_SHIFT(bitmap)), | ||
1397 | BITMAP_PAGE_CLEAN); | 1413 | BITMAP_PAGE_CLEAN); |
1398 | } | 1414 | |
1399 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1415 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1400 | offset += blocks; | 1416 | offset += blocks; |
1401 | if (sectors > blocks) | 1417 | if (sectors > blocks) |
1402 | sectors -= blocks; | 1418 | sectors -= blocks; |
1403 | else sectors = 0; | 1419 | else |
1420 | sectors = 0; | ||
1404 | } | 1421 | } |
1405 | } | 1422 | } |
1423 | EXPORT_SYMBOL(bitmap_endwrite); | ||
1406 | 1424 | ||
1407 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1425 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, |
1408 | int degraded) | 1426 | int degraded) |
@@ -1455,14 +1473,14 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1455 | } | 1473 | } |
1456 | return rv; | 1474 | return rv; |
1457 | } | 1475 | } |
1476 | EXPORT_SYMBOL(bitmap_start_sync); | ||
1458 | 1477 | ||
1459 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | 1478 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) |
1460 | { | 1479 | { |
1461 | bitmap_counter_t *bmc; | 1480 | bitmap_counter_t *bmc; |
1462 | unsigned long flags; | 1481 | unsigned long flags; |
1463 | /* | 1482 | |
1464 | if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); | 1483 | if (bitmap == NULL) { |
1465 | */ if (bitmap == NULL) { | ||
1466 | *blocks = 1024; | 1484 | *blocks = 1024; |
1467 | return; | 1485 | return; |
1468 | } | 1486 | } |
@@ -1471,26 +1489,23 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab | |||
1471 | if (bmc == NULL) | 1489 | if (bmc == NULL) |
1472 | goto unlock; | 1490 | goto unlock; |
1473 | /* locked */ | 1491 | /* locked */ |
1474 | /* | ||
1475 | if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); | ||
1476 | */ | ||
1477 | if (RESYNC(*bmc)) { | 1492 | if (RESYNC(*bmc)) { |
1478 | *bmc &= ~RESYNC_MASK; | 1493 | *bmc &= ~RESYNC_MASK; |
1479 | 1494 | ||
1480 | if (!NEEDED(*bmc) && aborted) | 1495 | if (!NEEDED(*bmc) && aborted) |
1481 | *bmc |= NEEDED_MASK; | 1496 | *bmc |= NEEDED_MASK; |
1482 | else { | 1497 | else { |
1483 | if (*bmc <= 2) { | 1498 | if (*bmc <= 2) |
1484 | set_page_attr(bitmap, | 1499 | set_page_attr(bitmap, |
1485 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1500 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), |
1486 | BITMAP_PAGE_CLEAN); | 1501 | BITMAP_PAGE_CLEAN); |
1487 | } | ||
1488 | } | 1502 | } |
1489 | } | 1503 | } |
1490 | unlock: | 1504 | unlock: |
1491 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1505 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1492 | bitmap->allclean = 0; | 1506 | bitmap->allclean = 0; |
1493 | } | 1507 | } |
1508 | EXPORT_SYMBOL(bitmap_end_sync); | ||
1494 | 1509 | ||
1495 | void bitmap_close_sync(struct bitmap *bitmap) | 1510 | void bitmap_close_sync(struct bitmap *bitmap) |
1496 | { | 1511 | { |
@@ -1507,6 +1522,7 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
1507 | sector += blocks; | 1522 | sector += blocks; |
1508 | } | 1523 | } |
1509 | } | 1524 | } |
1525 | EXPORT_SYMBOL(bitmap_close_sync); | ||
1510 | 1526 | ||
1511 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | 1527 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) |
1512 | { | 1528 | { |
@@ -1526,7 +1542,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1526 | atomic_read(&bitmap->mddev->recovery_active) == 0); | 1542 | atomic_read(&bitmap->mddev->recovery_active) == 0); |
1527 | 1543 | ||
1528 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; | 1544 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; |
1529 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | 1545 | if (bitmap->mddev->persistent) |
1546 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | ||
1530 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1547 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); |
1531 | s = 0; | 1548 | s = 0; |
1532 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1549 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
@@ -1536,6 +1553,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1536 | bitmap->last_end_sync = jiffies; | 1553 | bitmap->last_end_sync = jiffies; |
1537 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); | 1554 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); |
1538 | } | 1555 | } |
1556 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
1539 | 1557 | ||
1540 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) | 1558 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) |
1541 | { | 1559 | { |
@@ -1552,9 +1570,9 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1552 | spin_unlock_irq(&bitmap->lock); | 1570 | spin_unlock_irq(&bitmap->lock); |
1553 | return; | 1571 | return; |
1554 | } | 1572 | } |
1555 | if (! *bmc) { | 1573 | if (!*bmc) { |
1556 | struct page *page; | 1574 | struct page *page; |
1557 | *bmc = 1 | (needed?NEEDED_MASK:0); | 1575 | *bmc = 1 | (needed ? NEEDED_MASK : 0); |
1558 | bitmap_count_page(bitmap, offset, 1); | 1576 | bitmap_count_page(bitmap, offset, 1); |
1559 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | 1577 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); |
1560 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1578 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
@@ -1663,15 +1681,17 @@ int bitmap_create(mddev_t *mddev) | |||
1663 | unsigned long pages; | 1681 | unsigned long pages; |
1664 | struct file *file = mddev->bitmap_info.file; | 1682 | struct file *file = mddev->bitmap_info.file; |
1665 | int err; | 1683 | int err; |
1666 | sector_t start; | 1684 | struct sysfs_dirent *bm = NULL; |
1667 | struct sysfs_dirent *bm; | ||
1668 | 1685 | ||
1669 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); | 1686 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); |
1670 | 1687 | ||
1671 | if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ | 1688 | if (!file |
1689 | && !mddev->bitmap_info.offset | ||
1690 | && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ | ||
1672 | return 0; | 1691 | return 0; |
1673 | 1692 | ||
1674 | BUG_ON(file && mddev->bitmap_info.offset); | 1693 | BUG_ON(file && mddev->bitmap_info.offset); |
1694 | BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); | ||
1675 | 1695 | ||
1676 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); | 1696 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); |
1677 | if (!bitmap) | 1697 | if (!bitmap) |
@@ -1685,7 +1705,8 @@ int bitmap_create(mddev_t *mddev) | |||
1685 | 1705 | ||
1686 | bitmap->mddev = mddev; | 1706 | bitmap->mddev = mddev; |
1687 | 1707 | ||
1688 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | 1708 | if (mddev->kobj.sd) |
1709 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | ||
1689 | if (bm) { | 1710 | if (bm) { |
1690 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); | 1711 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); |
1691 | sysfs_put(bm); | 1712 | sysfs_put(bm); |
@@ -1719,9 +1740,9 @@ int bitmap_create(mddev_t *mddev) | |||
1719 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); | 1740 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); |
1720 | 1741 | ||
1721 | /* now that chunksize and chunkshift are set, we can use these macros */ | 1742 | /* now that chunksize and chunkshift are set, we can use these macros */ |
1722 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> | 1743 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> |
1723 | CHUNK_BLOCK_SHIFT(bitmap); | 1744 | CHUNK_BLOCK_SHIFT(bitmap); |
1724 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | 1745 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; |
1725 | 1746 | ||
1726 | BUG_ON(!pages); | 1747 | BUG_ON(!pages); |
1727 | 1748 | ||
@@ -1741,27 +1762,11 @@ int bitmap_create(mddev_t *mddev) | |||
1741 | if (!bitmap->bp) | 1762 | if (!bitmap->bp) |
1742 | goto error; | 1763 | goto error; |
1743 | 1764 | ||
1744 | /* now that we have some pages available, initialize the in-memory | ||
1745 | * bitmap from the on-disk bitmap */ | ||
1746 | start = 0; | ||
1747 | if (mddev->degraded == 0 | ||
1748 | || bitmap->events_cleared == mddev->events) | ||
1749 | /* no need to keep dirty bits to optimise a re-add of a missing device */ | ||
1750 | start = mddev->recovery_cp; | ||
1751 | err = bitmap_init_from_disk(bitmap, start); | ||
1752 | |||
1753 | if (err) | ||
1754 | goto error; | ||
1755 | |||
1756 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", | 1765 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", |
1757 | pages, bmname(bitmap)); | 1766 | pages, bmname(bitmap)); |
1758 | 1767 | ||
1759 | mddev->bitmap = bitmap; | 1768 | mddev->bitmap = bitmap; |
1760 | 1769 | ||
1761 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
1762 | md_wakeup_thread(mddev->thread); | ||
1763 | |||
1764 | bitmap_update_sb(bitmap); | ||
1765 | 1770 | ||
1766 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; | 1771 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; |
1767 | 1772 | ||
@@ -1770,15 +1775,69 @@ int bitmap_create(mddev_t *mddev) | |||
1770 | return err; | 1775 | return err; |
1771 | } | 1776 | } |
1772 | 1777 | ||
1778 | int bitmap_load(mddev_t *mddev) | ||
1779 | { | ||
1780 | int err = 0; | ||
1781 | sector_t sector = 0; | ||
1782 | struct bitmap *bitmap = mddev->bitmap; | ||
1783 | |||
1784 | if (!bitmap) | ||
1785 | goto out; | ||
1786 | |||
1787 | /* Clear out old bitmap info first: Either there is none, or we | ||
1788 | * are resuming after someone else has possibly changed things, | ||
1789 | * so we should forget old cached info. | ||
1790 | * All chunks should be clean, but some might need_sync. | ||
1791 | */ | ||
1792 | while (sector < mddev->resync_max_sectors) { | ||
1793 | int blocks; | ||
1794 | bitmap_start_sync(bitmap, sector, &blocks, 0); | ||
1795 | sector += blocks; | ||
1796 | } | ||
1797 | bitmap_close_sync(bitmap); | ||
1798 | |||
1799 | if (mddev->bitmap_info.log) { | ||
1800 | unsigned long i; | ||
1801 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
1802 | for (i = 0; i < bitmap->chunks; i++) | ||
1803 | if (!log->type->in_sync(log, i, 1)) | ||
1804 | bitmap_set_memory_bits(bitmap, | ||
1805 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | ||
1806 | 1); | ||
1807 | } else { | ||
1808 | sector_t start = 0; | ||
1809 | if (mddev->degraded == 0 | ||
1810 | || bitmap->events_cleared == mddev->events) | ||
1811 | /* no need to keep dirty bits to optimise a | ||
1812 | * re-add of a missing device */ | ||
1813 | start = mddev->recovery_cp; | ||
1814 | |||
1815 | err = bitmap_init_from_disk(bitmap, start); | ||
1816 | } | ||
1817 | if (err) | ||
1818 | goto out; | ||
1819 | |||
1820 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
1821 | md_wakeup_thread(mddev->thread); | ||
1822 | |||
1823 | bitmap_update_sb(bitmap); | ||
1824 | |||
1825 | if (bitmap->flags & BITMAP_WRITE_ERROR) | ||
1826 | err = -EIO; | ||
1827 | out: | ||
1828 | return err; | ||
1829 | } | ||
1830 | EXPORT_SYMBOL_GPL(bitmap_load); | ||
1831 | |||
1773 | static ssize_t | 1832 | static ssize_t |
1774 | location_show(mddev_t *mddev, char *page) | 1833 | location_show(mddev_t *mddev, char *page) |
1775 | { | 1834 | { |
1776 | ssize_t len; | 1835 | ssize_t len; |
1777 | if (mddev->bitmap_info.file) { | 1836 | if (mddev->bitmap_info.file) |
1778 | len = sprintf(page, "file"); | 1837 | len = sprintf(page, "file"); |
1779 | } else if (mddev->bitmap_info.offset) { | 1838 | else if (mddev->bitmap_info.offset) |
1780 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); | 1839 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); |
1781 | } else | 1840 | else |
1782 | len = sprintf(page, "none"); | 1841 | len = sprintf(page, "none"); |
1783 | len += sprintf(page+len, "\n"); | 1842 | len += sprintf(page+len, "\n"); |
1784 | return len; | 1843 | return len; |
@@ -1867,7 +1926,7 @@ timeout_show(mddev_t *mddev, char *page) | |||
1867 | ssize_t len; | 1926 | ssize_t len; |
1868 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; | 1927 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; |
1869 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; | 1928 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; |
1870 | 1929 | ||
1871 | len = sprintf(page, "%lu", secs); | 1930 | len = sprintf(page, "%lu", secs); |
1872 | if (jifs) | 1931 | if (jifs) |
1873 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); | 1932 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); |
@@ -2049,12 +2108,3 @@ struct attribute_group md_bitmap_group = { | |||
2049 | .attrs = md_bitmap_attrs, | 2108 | .attrs = md_bitmap_attrs, |
2050 | }; | 2109 | }; |
2051 | 2110 | ||
2052 | |||
2053 | /* the bitmap API -- for raid personalities */ | ||
2054 | EXPORT_SYMBOL(bitmap_startwrite); | ||
2055 | EXPORT_SYMBOL(bitmap_endwrite); | ||
2056 | EXPORT_SYMBOL(bitmap_start_sync); | ||
2057 | EXPORT_SYMBOL(bitmap_end_sync); | ||
2058 | EXPORT_SYMBOL(bitmap_unplug); | ||
2059 | EXPORT_SYMBOL(bitmap_close_sync); | ||
2060 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 3797dea4723a..e872a7bad6b8 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -222,6 +222,10 @@ struct bitmap { | |||
222 | unsigned long file_pages; /* number of pages in the file */ | 222 | unsigned long file_pages; /* number of pages in the file */ |
223 | int last_page_size; /* bytes in the last page */ | 223 | int last_page_size; /* bytes in the last page */ |
224 | 224 | ||
225 | unsigned long logattrs; /* used when filemap_attr doesn't exist | ||
226 | * because we are working with a dirty_log | ||
227 | */ | ||
228 | |||
225 | unsigned long flags; | 229 | unsigned long flags; |
226 | 230 | ||
227 | int allclean; | 231 | int allclean; |
@@ -243,12 +247,14 @@ struct bitmap { | |||
243 | wait_queue_head_t behind_wait; | 247 | wait_queue_head_t behind_wait; |
244 | 248 | ||
245 | struct sysfs_dirent *sysfs_can_clear; | 249 | struct sysfs_dirent *sysfs_can_clear; |
250 | |||
246 | }; | 251 | }; |
247 | 252 | ||
248 | /* the bitmap API */ | 253 | /* the bitmap API */ |
249 | 254 | ||
250 | /* these are used only by md/bitmap */ | 255 | /* these are used only by md/bitmap */ |
251 | int bitmap_create(mddev_t *mddev); | 256 | int bitmap_create(mddev_t *mddev); |
257 | int bitmap_load(mddev_t *mddev); | ||
252 | void bitmap_flush(mddev_t *mddev); | 258 | void bitmap_flush(mddev_t *mddev); |
253 | void bitmap_destroy(mddev_t *mddev); | 259 | void bitmap_destroy(mddev_t *mddev); |
254 | 260 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index cb20d0b0555a..d44efb267a69 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -261,7 +261,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
261 | * Once ->stop is called and completes, the module will be completely | 261 | * Once ->stop is called and completes, the module will be completely |
262 | * unused. | 262 | * unused. |
263 | */ | 263 | */ |
264 | static void mddev_suspend(mddev_t *mddev) | 264 | void mddev_suspend(mddev_t *mddev) |
265 | { | 265 | { |
266 | BUG_ON(mddev->suspended); | 266 | BUG_ON(mddev->suspended); |
267 | mddev->suspended = 1; | 267 | mddev->suspended = 1; |
@@ -269,13 +269,15 @@ static void mddev_suspend(mddev_t *mddev) | |||
269 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | 269 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); |
270 | mddev->pers->quiesce(mddev, 1); | 270 | mddev->pers->quiesce(mddev, 1); |
271 | } | 271 | } |
272 | EXPORT_SYMBOL_GPL(mddev_suspend); | ||
272 | 273 | ||
273 | static void mddev_resume(mddev_t *mddev) | 274 | void mddev_resume(mddev_t *mddev) |
274 | { | 275 | { |
275 | mddev->suspended = 0; | 276 | mddev->suspended = 0; |
276 | wake_up(&mddev->sb_wait); | 277 | wake_up(&mddev->sb_wait); |
277 | mddev->pers->quiesce(mddev, 0); | 278 | mddev->pers->quiesce(mddev, 0); |
278 | } | 279 | } |
280 | EXPORT_SYMBOL_GPL(mddev_resume); | ||
279 | 281 | ||
280 | int mddev_congested(mddev_t *mddev, int bits) | 282 | int mddev_congested(mddev_t *mddev, int bits) |
281 | { | 283 | { |
@@ -384,6 +386,51 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio) | |||
384 | } | 386 | } |
385 | EXPORT_SYMBOL(md_barrier_request); | 387 | EXPORT_SYMBOL(md_barrier_request); |
386 | 388 | ||
389 | /* Support for plugging. | ||
390 | * This mirrors the plugging support in request_queue, but does not | ||
391 | * require having a whole queue | ||
392 | */ | ||
393 | static void plugger_work(struct work_struct *work) | ||
394 | { | ||
395 | struct plug_handle *plug = | ||
396 | container_of(work, struct plug_handle, unplug_work); | ||
397 | plug->unplug_fn(plug); | ||
398 | } | ||
399 | static void plugger_timeout(unsigned long data) | ||
400 | { | ||
401 | struct plug_handle *plug = (void *)data; | ||
402 | kblockd_schedule_work(NULL, &plug->unplug_work); | ||
403 | } | ||
404 | void plugger_init(struct plug_handle *plug, | ||
405 | void (*unplug_fn)(struct plug_handle *)) | ||
406 | { | ||
407 | plug->unplug_flag = 0; | ||
408 | plug->unplug_fn = unplug_fn; | ||
409 | init_timer(&plug->unplug_timer); | ||
410 | plug->unplug_timer.function = plugger_timeout; | ||
411 | plug->unplug_timer.data = (unsigned long)plug; | ||
412 | INIT_WORK(&plug->unplug_work, plugger_work); | ||
413 | } | ||
414 | EXPORT_SYMBOL_GPL(plugger_init); | ||
415 | |||
416 | void plugger_set_plug(struct plug_handle *plug) | ||
417 | { | ||
418 | if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) | ||
419 | mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); | ||
420 | } | ||
421 | EXPORT_SYMBOL_GPL(plugger_set_plug); | ||
422 | |||
423 | int plugger_remove_plug(struct plug_handle *plug) | ||
424 | { | ||
425 | if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { | ||
426 | del_timer(&plug->unplug_timer); | ||
427 | return 1; | ||
428 | } else | ||
429 | return 0; | ||
430 | } | ||
431 | EXPORT_SYMBOL_GPL(plugger_remove_plug); | ||
432 | |||
433 | |||
387 | static inline mddev_t *mddev_get(mddev_t *mddev) | 434 | static inline mddev_t *mddev_get(mddev_t *mddev) |
388 | { | 435 | { |
389 | atomic_inc(&mddev->active); | 436 | atomic_inc(&mddev->active); |
@@ -416,7 +463,7 @@ static void mddev_put(mddev_t *mddev) | |||
416 | spin_unlock(&all_mddevs_lock); | 463 | spin_unlock(&all_mddevs_lock); |
417 | } | 464 | } |
418 | 465 | ||
419 | static void mddev_init(mddev_t *mddev) | 466 | void mddev_init(mddev_t *mddev) |
420 | { | 467 | { |
421 | mutex_init(&mddev->open_mutex); | 468 | mutex_init(&mddev->open_mutex); |
422 | mutex_init(&mddev->reconfig_mutex); | 469 | mutex_init(&mddev->reconfig_mutex); |
@@ -436,6 +483,7 @@ static void mddev_init(mddev_t *mddev) | |||
436 | mddev->resync_max = MaxSector; | 483 | mddev->resync_max = MaxSector; |
437 | mddev->level = LEVEL_NONE; | 484 | mddev->level = LEVEL_NONE; |
438 | } | 485 | } |
486 | EXPORT_SYMBOL_GPL(mddev_init); | ||
439 | 487 | ||
440 | static mddev_t * mddev_find(dev_t unit) | 488 | static mddev_t * mddev_find(dev_t unit) |
441 | { | 489 | { |
@@ -532,25 +580,31 @@ static void mddev_unlock(mddev_t * mddev) | |||
532 | * an access to the files will try to take reconfig_mutex | 580 | * an access to the files will try to take reconfig_mutex |
533 | * while holding the file unremovable, which leads to | 581 | * while holding the file unremovable, which leads to |
534 | * a deadlock. | 582 | * a deadlock. |
535 | * So hold open_mutex instead - we are allowed to take | 583 | * So hold set sysfs_active while the remove in happeing, |
536 | * it while holding reconfig_mutex, and md_run can | 584 | * and anything else which might set ->to_remove or my |
537 | * use it to wait for the remove to complete. | 585 | * otherwise change the sysfs namespace will fail with |
586 | * -EBUSY if sysfs_active is still set. | ||
587 | * We set sysfs_active under reconfig_mutex and elsewhere | ||
588 | * test it under the same mutex to ensure its correct value | ||
589 | * is seen. | ||
538 | */ | 590 | */ |
539 | struct attribute_group *to_remove = mddev->to_remove; | 591 | struct attribute_group *to_remove = mddev->to_remove; |
540 | mddev->to_remove = NULL; | 592 | mddev->to_remove = NULL; |
541 | mutex_lock(&mddev->open_mutex); | 593 | mddev->sysfs_active = 1; |
542 | mutex_unlock(&mddev->reconfig_mutex); | 594 | mutex_unlock(&mddev->reconfig_mutex); |
543 | 595 | ||
544 | if (to_remove != &md_redundancy_group) | 596 | if (mddev->kobj.sd) { |
545 | sysfs_remove_group(&mddev->kobj, to_remove); | 597 | if (to_remove != &md_redundancy_group) |
546 | if (mddev->pers == NULL || | 598 | sysfs_remove_group(&mddev->kobj, to_remove); |
547 | mddev->pers->sync_request == NULL) { | 599 | if (mddev->pers == NULL || |
548 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | 600 | mddev->pers->sync_request == NULL) { |
549 | if (mddev->sysfs_action) | 601 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); |
550 | sysfs_put(mddev->sysfs_action); | 602 | if (mddev->sysfs_action) |
551 | mddev->sysfs_action = NULL; | 603 | sysfs_put(mddev->sysfs_action); |
604 | mddev->sysfs_action = NULL; | ||
605 | } | ||
552 | } | 606 | } |
553 | mutex_unlock(&mddev->open_mutex); | 607 | mddev->sysfs_active = 0; |
554 | } else | 608 | } else |
555 | mutex_unlock(&mddev->reconfig_mutex); | 609 | mutex_unlock(&mddev->reconfig_mutex); |
556 | 610 | ||
@@ -1811,11 +1865,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1811 | goto fail; | 1865 | goto fail; |
1812 | 1866 | ||
1813 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; | 1867 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; |
1814 | if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { | 1868 | if (sysfs_create_link(&rdev->kobj, ko, "block")) |
1815 | kobject_del(&rdev->kobj); | 1869 | /* failure here is OK */; |
1816 | goto fail; | 1870 | rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); |
1817 | } | ||
1818 | rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state"); | ||
1819 | 1871 | ||
1820 | list_add_rcu(&rdev->same_set, &mddev->disks); | 1872 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1821 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1873 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
@@ -2334,8 +2386,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2334 | set_bit(In_sync, &rdev->flags); | 2386 | set_bit(In_sync, &rdev->flags); |
2335 | err = 0; | 2387 | err = 0; |
2336 | } | 2388 | } |
2337 | if (!err && rdev->sysfs_state) | 2389 | if (!err) |
2338 | sysfs_notify_dirent(rdev->sysfs_state); | 2390 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2339 | return err ? err : len; | 2391 | return err ? err : len; |
2340 | } | 2392 | } |
2341 | static struct rdev_sysfs_entry rdev_state = | 2393 | static struct rdev_sysfs_entry rdev_state = |
@@ -2430,14 +2482,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2430 | rdev->raid_disk = -1; | 2482 | rdev->raid_disk = -1; |
2431 | return err; | 2483 | return err; |
2432 | } else | 2484 | } else |
2433 | sysfs_notify_dirent(rdev->sysfs_state); | 2485 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2434 | sprintf(nm, "rd%d", rdev->raid_disk); | 2486 | sprintf(nm, "rd%d", rdev->raid_disk); |
2435 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) | 2487 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) |
2436 | printk(KERN_WARNING | 2488 | /* failure here is OK */; |
2437 | "md: cannot register " | ||
2438 | "%s for %s\n", | ||
2439 | nm, mdname(rdev->mddev)); | ||
2440 | |||
2441 | /* don't wakeup anyone, leave that to userspace. */ | 2489 | /* don't wakeup anyone, leave that to userspace. */ |
2442 | } else { | 2490 | } else { |
2443 | if (slot >= rdev->mddev->raid_disks) | 2491 | if (slot >= rdev->mddev->raid_disks) |
@@ -2447,7 +2495,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2447 | clear_bit(Faulty, &rdev->flags); | 2495 | clear_bit(Faulty, &rdev->flags); |
2448 | clear_bit(WriteMostly, &rdev->flags); | 2496 | clear_bit(WriteMostly, &rdev->flags); |
2449 | set_bit(In_sync, &rdev->flags); | 2497 | set_bit(In_sync, &rdev->flags); |
2450 | sysfs_notify_dirent(rdev->sysfs_state); | 2498 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
2451 | } | 2499 | } |
2452 | return len; | 2500 | return len; |
2453 | } | 2501 | } |
@@ -2695,6 +2743,24 @@ static struct kobj_type rdev_ktype = { | |||
2695 | .default_attrs = rdev_default_attrs, | 2743 | .default_attrs = rdev_default_attrs, |
2696 | }; | 2744 | }; |
2697 | 2745 | ||
2746 | void md_rdev_init(mdk_rdev_t *rdev) | ||
2747 | { | ||
2748 | rdev->desc_nr = -1; | ||
2749 | rdev->saved_raid_disk = -1; | ||
2750 | rdev->raid_disk = -1; | ||
2751 | rdev->flags = 0; | ||
2752 | rdev->data_offset = 0; | ||
2753 | rdev->sb_events = 0; | ||
2754 | rdev->last_read_error.tv_sec = 0; | ||
2755 | rdev->last_read_error.tv_nsec = 0; | ||
2756 | atomic_set(&rdev->nr_pending, 0); | ||
2757 | atomic_set(&rdev->read_errors, 0); | ||
2758 | atomic_set(&rdev->corrected_errors, 0); | ||
2759 | |||
2760 | INIT_LIST_HEAD(&rdev->same_set); | ||
2761 | init_waitqueue_head(&rdev->blocked_wait); | ||
2762 | } | ||
2763 | EXPORT_SYMBOL_GPL(md_rdev_init); | ||
2698 | /* | 2764 | /* |
2699 | * Import a device. If 'super_format' >= 0, then sanity check the superblock | 2765 | * Import a device. If 'super_format' >= 0, then sanity check the superblock |
2700 | * | 2766 | * |
@@ -2718,6 +2784,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2718 | return ERR_PTR(-ENOMEM); | 2784 | return ERR_PTR(-ENOMEM); |
2719 | } | 2785 | } |
2720 | 2786 | ||
2787 | md_rdev_init(rdev); | ||
2721 | if ((err = alloc_disk_sb(rdev))) | 2788 | if ((err = alloc_disk_sb(rdev))) |
2722 | goto abort_free; | 2789 | goto abort_free; |
2723 | 2790 | ||
@@ -2727,18 +2794,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2727 | 2794 | ||
2728 | kobject_init(&rdev->kobj, &rdev_ktype); | 2795 | kobject_init(&rdev->kobj, &rdev_ktype); |
2729 | 2796 | ||
2730 | rdev->desc_nr = -1; | ||
2731 | rdev->saved_raid_disk = -1; | ||
2732 | rdev->raid_disk = -1; | ||
2733 | rdev->flags = 0; | ||
2734 | rdev->data_offset = 0; | ||
2735 | rdev->sb_events = 0; | ||
2736 | rdev->last_read_error.tv_sec = 0; | ||
2737 | rdev->last_read_error.tv_nsec = 0; | ||
2738 | atomic_set(&rdev->nr_pending, 0); | ||
2739 | atomic_set(&rdev->read_errors, 0); | ||
2740 | atomic_set(&rdev->corrected_errors, 0); | ||
2741 | |||
2742 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 2797 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; |
2743 | if (!size) { | 2798 | if (!size) { |
2744 | printk(KERN_WARNING | 2799 | printk(KERN_WARNING |
@@ -2767,9 +2822,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2767 | } | 2822 | } |
2768 | } | 2823 | } |
2769 | 2824 | ||
2770 | INIT_LIST_HEAD(&rdev->same_set); | ||
2771 | init_waitqueue_head(&rdev->blocked_wait); | ||
2772 | |||
2773 | return rdev; | 2825 | return rdev; |
2774 | 2826 | ||
2775 | abort_free: | 2827 | abort_free: |
@@ -2960,7 +3012,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2960 | * - new personality will access other array. | 3012 | * - new personality will access other array. |
2961 | */ | 3013 | */ |
2962 | 3014 | ||
2963 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | 3015 | if (mddev->sync_thread || |
3016 | mddev->reshape_position != MaxSector || | ||
3017 | mddev->sysfs_active) | ||
2964 | return -EBUSY; | 3018 | return -EBUSY; |
2965 | 3019 | ||
2966 | if (!mddev->pers->quiesce) { | 3020 | if (!mddev->pers->quiesce) { |
@@ -3437,7 +3491,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
3437 | if (err) | 3491 | if (err) |
3438 | return err; | 3492 | return err; |
3439 | else { | 3493 | else { |
3440 | sysfs_notify_dirent(mddev->sysfs_state); | 3494 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
3441 | return len; | 3495 | return len; |
3442 | } | 3496 | } |
3443 | } | 3497 | } |
@@ -3735,7 +3789,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
3735 | } | 3789 | } |
3736 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3790 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3737 | md_wakeup_thread(mddev->thread); | 3791 | md_wakeup_thread(mddev->thread); |
3738 | sysfs_notify_dirent(mddev->sysfs_action); | 3792 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
3739 | return len; | 3793 | return len; |
3740 | } | 3794 | } |
3741 | 3795 | ||
@@ -4281,13 +4335,14 @@ static int md_alloc(dev_t dev, char *name) | |||
4281 | disk->disk_name); | 4335 | disk->disk_name); |
4282 | error = 0; | 4336 | error = 0; |
4283 | } | 4337 | } |
4284 | if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | 4338 | if (mddev->kobj.sd && |
4339 | sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | ||
4285 | printk(KERN_DEBUG "pointless warning\n"); | 4340 | printk(KERN_DEBUG "pointless warning\n"); |
4286 | abort: | 4341 | abort: |
4287 | mutex_unlock(&disks_mutex); | 4342 | mutex_unlock(&disks_mutex); |
4288 | if (!error) { | 4343 | if (!error && mddev->kobj.sd) { |
4289 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 4344 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
4290 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); | 4345 | mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); |
4291 | } | 4346 | } |
4292 | mddev_put(mddev); | 4347 | mddev_put(mddev); |
4293 | return error; | 4348 | return error; |
@@ -4325,14 +4380,14 @@ static void md_safemode_timeout(unsigned long data) | |||
4325 | if (!atomic_read(&mddev->writes_pending)) { | 4380 | if (!atomic_read(&mddev->writes_pending)) { |
4326 | mddev->safemode = 1; | 4381 | mddev->safemode = 1; |
4327 | if (mddev->external) | 4382 | if (mddev->external) |
4328 | sysfs_notify_dirent(mddev->sysfs_state); | 4383 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4329 | } | 4384 | } |
4330 | md_wakeup_thread(mddev->thread); | 4385 | md_wakeup_thread(mddev->thread); |
4331 | } | 4386 | } |
4332 | 4387 | ||
4333 | static int start_dirty_degraded; | 4388 | static int start_dirty_degraded; |
4334 | 4389 | ||
4335 | static int md_run(mddev_t *mddev) | 4390 | int md_run(mddev_t *mddev) |
4336 | { | 4391 | { |
4337 | int err; | 4392 | int err; |
4338 | mdk_rdev_t *rdev; | 4393 | mdk_rdev_t *rdev; |
@@ -4344,13 +4399,9 @@ static int md_run(mddev_t *mddev) | |||
4344 | 4399 | ||
4345 | if (mddev->pers) | 4400 | if (mddev->pers) |
4346 | return -EBUSY; | 4401 | return -EBUSY; |
4347 | 4402 | /* Cannot run until previous stop completes properly */ | |
4348 | /* These two calls synchronise us with the | 4403 | if (mddev->sysfs_active) |
4349 | * sysfs_remove_group calls in mddev_unlock, | 4404 | return -EBUSY; |
4350 | * so they must have completed. | ||
4351 | */ | ||
4352 | mutex_lock(&mddev->open_mutex); | ||
4353 | mutex_unlock(&mddev->open_mutex); | ||
4354 | 4405 | ||
4355 | /* | 4406 | /* |
4356 | * Analyze all RAID superblock(s) | 4407 | * Analyze all RAID superblock(s) |
@@ -4397,7 +4448,7 @@ static int md_run(mddev_t *mddev) | |||
4397 | return -EINVAL; | 4448 | return -EINVAL; |
4398 | } | 4449 | } |
4399 | } | 4450 | } |
4400 | sysfs_notify_dirent(rdev->sysfs_state); | 4451 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
4401 | } | 4452 | } |
4402 | 4453 | ||
4403 | spin_lock(&pers_lock); | 4454 | spin_lock(&pers_lock); |
@@ -4496,11 +4547,12 @@ static int md_run(mddev_t *mddev) | |||
4496 | return err; | 4547 | return err; |
4497 | } | 4548 | } |
4498 | if (mddev->pers->sync_request) { | 4549 | if (mddev->pers->sync_request) { |
4499 | if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | 4550 | if (mddev->kobj.sd && |
4551 | sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | ||
4500 | printk(KERN_WARNING | 4552 | printk(KERN_WARNING |
4501 | "md: cannot register extra attributes for %s\n", | 4553 | "md: cannot register extra attributes for %s\n", |
4502 | mdname(mddev)); | 4554 | mdname(mddev)); |
4503 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); | 4555 | mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); |
4504 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ | 4556 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ |
4505 | mddev->ro = 0; | 4557 | mddev->ro = 0; |
4506 | 4558 | ||
@@ -4518,8 +4570,7 @@ static int md_run(mddev_t *mddev) | |||
4518 | char nm[20]; | 4570 | char nm[20]; |
4519 | sprintf(nm, "rd%d", rdev->raid_disk); | 4571 | sprintf(nm, "rd%d", rdev->raid_disk); |
4520 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) | 4572 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) |
4521 | printk("md: cannot register %s for %s\n", | 4573 | /* failure here is OK */; |
4522 | nm, mdname(mddev)); | ||
4523 | } | 4574 | } |
4524 | 4575 | ||
4525 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4576 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -4531,12 +4582,12 @@ static int md_run(mddev_t *mddev) | |||
4531 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4582 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4532 | 4583 | ||
4533 | md_new_event(mddev); | 4584 | md_new_event(mddev); |
4534 | sysfs_notify_dirent(mddev->sysfs_state); | 4585 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4535 | if (mddev->sysfs_action) | 4586 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
4536 | sysfs_notify_dirent(mddev->sysfs_action); | ||
4537 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 4587 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
4538 | return 0; | 4588 | return 0; |
4539 | } | 4589 | } |
4590 | EXPORT_SYMBOL_GPL(md_run); | ||
4540 | 4591 | ||
4541 | static int do_md_run(mddev_t *mddev) | 4592 | static int do_md_run(mddev_t *mddev) |
4542 | { | 4593 | { |
@@ -4545,7 +4596,11 @@ static int do_md_run(mddev_t *mddev) | |||
4545 | err = md_run(mddev); | 4596 | err = md_run(mddev); |
4546 | if (err) | 4597 | if (err) |
4547 | goto out; | 4598 | goto out; |
4548 | 4599 | err = bitmap_load(mddev); | |
4600 | if (err) { | ||
4601 | bitmap_destroy(mddev); | ||
4602 | goto out; | ||
4603 | } | ||
4549 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4604 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4550 | revalidate_disk(mddev->gendisk); | 4605 | revalidate_disk(mddev->gendisk); |
4551 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4606 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
@@ -4573,7 +4628,7 @@ static int restart_array(mddev_t *mddev) | |||
4573 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4628 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
4574 | md_wakeup_thread(mddev->thread); | 4629 | md_wakeup_thread(mddev->thread); |
4575 | md_wakeup_thread(mddev->sync_thread); | 4630 | md_wakeup_thread(mddev->sync_thread); |
4576 | sysfs_notify_dirent(mddev->sysfs_state); | 4631 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4577 | return 0; | 4632 | return 0; |
4578 | } | 4633 | } |
4579 | 4634 | ||
@@ -4644,9 +4699,10 @@ static void md_clean(mddev_t *mddev) | |||
4644 | mddev->bitmap_info.chunksize = 0; | 4699 | mddev->bitmap_info.chunksize = 0; |
4645 | mddev->bitmap_info.daemon_sleep = 0; | 4700 | mddev->bitmap_info.daemon_sleep = 0; |
4646 | mddev->bitmap_info.max_write_behind = 0; | 4701 | mddev->bitmap_info.max_write_behind = 0; |
4702 | mddev->plug = NULL; | ||
4647 | } | 4703 | } |
4648 | 4704 | ||
4649 | static void md_stop_writes(mddev_t *mddev) | 4705 | void md_stop_writes(mddev_t *mddev) |
4650 | { | 4706 | { |
4651 | if (mddev->sync_thread) { | 4707 | if (mddev->sync_thread) { |
4652 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4708 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4666,11 +4722,10 @@ static void md_stop_writes(mddev_t *mddev) | |||
4666 | md_update_sb(mddev, 1); | 4722 | md_update_sb(mddev, 1); |
4667 | } | 4723 | } |
4668 | } | 4724 | } |
4725 | EXPORT_SYMBOL_GPL(md_stop_writes); | ||
4669 | 4726 | ||
4670 | static void md_stop(mddev_t *mddev) | 4727 | void md_stop(mddev_t *mddev) |
4671 | { | 4728 | { |
4672 | md_stop_writes(mddev); | ||
4673 | |||
4674 | mddev->pers->stop(mddev); | 4729 | mddev->pers->stop(mddev); |
4675 | if (mddev->pers->sync_request && mddev->to_remove == NULL) | 4730 | if (mddev->pers->sync_request && mddev->to_remove == NULL) |
4676 | mddev->to_remove = &md_redundancy_group; | 4731 | mddev->to_remove = &md_redundancy_group; |
@@ -4678,6 +4733,7 @@ static void md_stop(mddev_t *mddev) | |||
4678 | mddev->pers = NULL; | 4733 | mddev->pers = NULL; |
4679 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4734 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4680 | } | 4735 | } |
4736 | EXPORT_SYMBOL_GPL(md_stop); | ||
4681 | 4737 | ||
4682 | static int md_set_readonly(mddev_t *mddev, int is_open) | 4738 | static int md_set_readonly(mddev_t *mddev, int is_open) |
4683 | { | 4739 | { |
@@ -4697,7 +4753,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open) | |||
4697 | mddev->ro = 1; | 4753 | mddev->ro = 1; |
4698 | set_disk_ro(mddev->gendisk, 1); | 4754 | set_disk_ro(mddev->gendisk, 1); |
4699 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4755 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4700 | sysfs_notify_dirent(mddev->sysfs_state); | 4756 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4701 | err = 0; | 4757 | err = 0; |
4702 | } | 4758 | } |
4703 | out: | 4759 | out: |
@@ -4711,26 +4767,29 @@ out: | |||
4711 | */ | 4767 | */ |
4712 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) | 4768 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) |
4713 | { | 4769 | { |
4714 | int err = 0; | ||
4715 | struct gendisk *disk = mddev->gendisk; | 4770 | struct gendisk *disk = mddev->gendisk; |
4716 | mdk_rdev_t *rdev; | 4771 | mdk_rdev_t *rdev; |
4717 | 4772 | ||
4718 | mutex_lock(&mddev->open_mutex); | 4773 | mutex_lock(&mddev->open_mutex); |
4719 | if (atomic_read(&mddev->openers) > is_open) { | 4774 | if (atomic_read(&mddev->openers) > is_open || |
4775 | mddev->sysfs_active) { | ||
4720 | printk("md: %s still in use.\n",mdname(mddev)); | 4776 | printk("md: %s still in use.\n",mdname(mddev)); |
4721 | err = -EBUSY; | 4777 | mutex_unlock(&mddev->open_mutex); |
4722 | } else if (mddev->pers) { | 4778 | return -EBUSY; |
4779 | } | ||
4723 | 4780 | ||
4781 | if (mddev->pers) { | ||
4724 | if (mddev->ro) | 4782 | if (mddev->ro) |
4725 | set_disk_ro(disk, 0); | 4783 | set_disk_ro(disk, 0); |
4726 | 4784 | ||
4785 | md_stop_writes(mddev); | ||
4727 | md_stop(mddev); | 4786 | md_stop(mddev); |
4728 | mddev->queue->merge_bvec_fn = NULL; | 4787 | mddev->queue->merge_bvec_fn = NULL; |
4729 | mddev->queue->unplug_fn = NULL; | 4788 | mddev->queue->unplug_fn = NULL; |
4730 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4789 | mddev->queue->backing_dev_info.congested_fn = NULL; |
4731 | 4790 | ||
4732 | /* tell userspace to handle 'inactive' */ | 4791 | /* tell userspace to handle 'inactive' */ |
4733 | sysfs_notify_dirent(mddev->sysfs_state); | 4792 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4734 | 4793 | ||
4735 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4794 | list_for_each_entry(rdev, &mddev->disks, same_set) |
4736 | if (rdev->raid_disk >= 0) { | 4795 | if (rdev->raid_disk >= 0) { |
@@ -4740,21 +4799,17 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4740 | } | 4799 | } |
4741 | 4800 | ||
4742 | set_capacity(disk, 0); | 4801 | set_capacity(disk, 0); |
4802 | mutex_unlock(&mddev->open_mutex); | ||
4743 | revalidate_disk(disk); | 4803 | revalidate_disk(disk); |
4744 | 4804 | ||
4745 | if (mddev->ro) | 4805 | if (mddev->ro) |
4746 | mddev->ro = 0; | 4806 | mddev->ro = 0; |
4747 | 4807 | } else | |
4748 | err = 0; | 4808 | mutex_unlock(&mddev->open_mutex); |
4749 | } | ||
4750 | mutex_unlock(&mddev->open_mutex); | ||
4751 | if (err) | ||
4752 | return err; | ||
4753 | /* | 4809 | /* |
4754 | * Free resources if final stop | 4810 | * Free resources if final stop |
4755 | */ | 4811 | */ |
4756 | if (mode == 0) { | 4812 | if (mode == 0) { |
4757 | |||
4758 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4813 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
4759 | 4814 | ||
4760 | bitmap_destroy(mddev); | 4815 | bitmap_destroy(mddev); |
@@ -4771,13 +4826,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4771 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4826 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
4772 | if (mddev->hold_active == UNTIL_STOP) | 4827 | if (mddev->hold_active == UNTIL_STOP) |
4773 | mddev->hold_active = 0; | 4828 | mddev->hold_active = 0; |
4774 | |||
4775 | } | 4829 | } |
4776 | err = 0; | ||
4777 | blk_integrity_unregister(disk); | 4830 | blk_integrity_unregister(disk); |
4778 | md_new_event(mddev); | 4831 | md_new_event(mddev); |
4779 | sysfs_notify_dirent(mddev->sysfs_state); | 4832 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
4780 | return err; | 4833 | return 0; |
4781 | } | 4834 | } |
4782 | 4835 | ||
4783 | #ifndef MODULE | 4836 | #ifndef MODULE |
@@ -5138,7 +5191,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
5138 | if (err) | 5191 | if (err) |
5139 | export_rdev(rdev); | 5192 | export_rdev(rdev); |
5140 | else | 5193 | else |
5141 | sysfs_notify_dirent(rdev->sysfs_state); | 5194 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
5142 | 5195 | ||
5143 | md_update_sb(mddev, 1); | 5196 | md_update_sb(mddev, 1); |
5144 | if (mddev->degraded) | 5197 | if (mddev->degraded) |
@@ -5331,8 +5384,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
5331 | err = 0; | 5384 | err = 0; |
5332 | if (mddev->pers) { | 5385 | if (mddev->pers) { |
5333 | mddev->pers->quiesce(mddev, 1); | 5386 | mddev->pers->quiesce(mddev, 1); |
5334 | if (fd >= 0) | 5387 | if (fd >= 0) { |
5335 | err = bitmap_create(mddev); | 5388 | err = bitmap_create(mddev); |
5389 | if (!err) | ||
5390 | err = bitmap_load(mddev); | ||
5391 | } | ||
5336 | if (fd < 0 || err) { | 5392 | if (fd < 0 || err) { |
5337 | bitmap_destroy(mddev); | 5393 | bitmap_destroy(mddev); |
5338 | fd = -1; /* make sure to put the file */ | 5394 | fd = -1; /* make sure to put the file */ |
@@ -5581,6 +5637,8 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
5581 | mddev->bitmap_info.default_offset; | 5637 | mddev->bitmap_info.default_offset; |
5582 | mddev->pers->quiesce(mddev, 1); | 5638 | mddev->pers->quiesce(mddev, 1); |
5583 | rv = bitmap_create(mddev); | 5639 | rv = bitmap_create(mddev); |
5640 | if (!rv) | ||
5641 | rv = bitmap_load(mddev); | ||
5584 | if (rv) | 5642 | if (rv) |
5585 | bitmap_destroy(mddev); | 5643 | bitmap_destroy(mddev); |
5586 | mddev->pers->quiesce(mddev, 0); | 5644 | mddev->pers->quiesce(mddev, 0); |
@@ -5813,7 +5871,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
5813 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { | 5871 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { |
5814 | if (mddev->ro == 2) { | 5872 | if (mddev->ro == 2) { |
5815 | mddev->ro = 0; | 5873 | mddev->ro = 0; |
5816 | sysfs_notify_dirent(mddev->sysfs_state); | 5874 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
5817 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5875 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5818 | md_wakeup_thread(mddev->thread); | 5876 | md_wakeup_thread(mddev->thread); |
5819 | } else { | 5877 | } else { |
@@ -6059,10 +6117,12 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
6059 | mddev->pers->error_handler(mddev,rdev); | 6117 | mddev->pers->error_handler(mddev,rdev); |
6060 | if (mddev->degraded) | 6118 | if (mddev->degraded) |
6061 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 6119 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
6062 | sysfs_notify_dirent(rdev->sysfs_state); | 6120 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
6063 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 6121 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
6064 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6122 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6065 | md_wakeup_thread(mddev->thread); | 6123 | md_wakeup_thread(mddev->thread); |
6124 | if (mddev->event_work.func) | ||
6125 | schedule_work(&mddev->event_work); | ||
6066 | md_new_event_inintr(mddev); | 6126 | md_new_event_inintr(mddev); |
6067 | } | 6127 | } |
6068 | 6128 | ||
@@ -6520,7 +6580,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
6520 | spin_unlock_irq(&mddev->write_lock); | 6580 | spin_unlock_irq(&mddev->write_lock); |
6521 | } | 6581 | } |
6522 | if (did_change) | 6582 | if (did_change) |
6523 | sysfs_notify_dirent(mddev->sysfs_state); | 6583 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
6524 | wait_event(mddev->sb_wait, | 6584 | wait_event(mddev->sb_wait, |
6525 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | 6585 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && |
6526 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | 6586 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); |
@@ -6563,7 +6623,7 @@ int md_allow_write(mddev_t *mddev) | |||
6563 | mddev->safemode = 1; | 6623 | mddev->safemode = 1; |
6564 | spin_unlock_irq(&mddev->write_lock); | 6624 | spin_unlock_irq(&mddev->write_lock); |
6565 | md_update_sb(mddev, 0); | 6625 | md_update_sb(mddev, 0); |
6566 | sysfs_notify_dirent(mddev->sysfs_state); | 6626 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
6567 | } else | 6627 | } else |
6568 | spin_unlock_irq(&mddev->write_lock); | 6628 | spin_unlock_irq(&mddev->write_lock); |
6569 | 6629 | ||
@@ -6574,6 +6634,14 @@ int md_allow_write(mddev_t *mddev) | |||
6574 | } | 6634 | } |
6575 | EXPORT_SYMBOL_GPL(md_allow_write); | 6635 | EXPORT_SYMBOL_GPL(md_allow_write); |
6576 | 6636 | ||
6637 | void md_unplug(mddev_t *mddev) | ||
6638 | { | ||
6639 | if (mddev->queue) | ||
6640 | blk_unplug(mddev->queue); | ||
6641 | if (mddev->plug) | ||
6642 | mddev->plug->unplug_fn(mddev->plug); | ||
6643 | } | ||
6644 | |||
6577 | #define SYNC_MARKS 10 | 6645 | #define SYNC_MARKS 10 |
6578 | #define SYNC_MARK_STEP (3*HZ) | 6646 | #define SYNC_MARK_STEP (3*HZ) |
6579 | void md_do_sync(mddev_t *mddev) | 6647 | void md_do_sync(mddev_t *mddev) |
@@ -6752,12 +6820,13 @@ void md_do_sync(mddev_t *mddev) | |||
6752 | >= mddev->resync_max - mddev->curr_resync_completed | 6820 | >= mddev->resync_max - mddev->curr_resync_completed |
6753 | )) { | 6821 | )) { |
6754 | /* time to update curr_resync_completed */ | 6822 | /* time to update curr_resync_completed */ |
6755 | blk_unplug(mddev->queue); | 6823 | md_unplug(mddev); |
6756 | wait_event(mddev->recovery_wait, | 6824 | wait_event(mddev->recovery_wait, |
6757 | atomic_read(&mddev->recovery_active) == 0); | 6825 | atomic_read(&mddev->recovery_active) == 0); |
6758 | mddev->curr_resync_completed = | 6826 | mddev->curr_resync_completed = |
6759 | mddev->curr_resync; | 6827 | mddev->curr_resync; |
6760 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 6828 | if (mddev->persistent) |
6829 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
6761 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6830 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
6762 | } | 6831 | } |
6763 | 6832 | ||
@@ -6829,7 +6898,7 @@ void md_do_sync(mddev_t *mddev) | |||
6829 | * about not overloading the IO subsystem. (things like an | 6898 | * about not overloading the IO subsystem. (things like an |
6830 | * e2fsck being done on the RAID array should execute fast) | 6899 | * e2fsck being done on the RAID array should execute fast) |
6831 | */ | 6900 | */ |
6832 | blk_unplug(mddev->queue); | 6901 | md_unplug(mddev); |
6833 | cond_resched(); | 6902 | cond_resched(); |
6834 | 6903 | ||
6835 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 | 6904 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 |
@@ -6848,7 +6917,7 @@ void md_do_sync(mddev_t *mddev) | |||
6848 | * this also signals 'finished resyncing' to md_stop | 6917 | * this also signals 'finished resyncing' to md_stop |
6849 | */ | 6918 | */ |
6850 | out: | 6919 | out: |
6851 | blk_unplug(mddev->queue); | 6920 | md_unplug(mddev); |
6852 | 6921 | ||
6853 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 6922 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
6854 | 6923 | ||
@@ -6950,10 +7019,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6950 | sprintf(nm, "rd%d", rdev->raid_disk); | 7019 | sprintf(nm, "rd%d", rdev->raid_disk); |
6951 | if (sysfs_create_link(&mddev->kobj, | 7020 | if (sysfs_create_link(&mddev->kobj, |
6952 | &rdev->kobj, nm)) | 7021 | &rdev->kobj, nm)) |
6953 | printk(KERN_WARNING | 7022 | /* failure here is OK */; |
6954 | "md: cannot register " | ||
6955 | "%s for %s\n", | ||
6956 | nm, mdname(mddev)); | ||
6957 | spares++; | 7023 | spares++; |
6958 | md_new_event(mddev); | 7024 | md_new_event(mddev); |
6959 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7025 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
@@ -7046,7 +7112,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7046 | mddev->safemode = 0; | 7112 | mddev->safemode = 0; |
7047 | spin_unlock_irq(&mddev->write_lock); | 7113 | spin_unlock_irq(&mddev->write_lock); |
7048 | if (did_change) | 7114 | if (did_change) |
7049 | sysfs_notify_dirent(mddev->sysfs_state); | 7115 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
7050 | } | 7116 | } |
7051 | 7117 | ||
7052 | if (mddev->flags) | 7118 | if (mddev->flags) |
@@ -7085,7 +7151,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7085 | mddev->recovery = 0; | 7151 | mddev->recovery = 0; |
7086 | /* flag recovery needed just to double check */ | 7152 | /* flag recovery needed just to double check */ |
7087 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 7153 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
7088 | sysfs_notify_dirent(mddev->sysfs_action); | 7154 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7089 | md_new_event(mddev); | 7155 | md_new_event(mddev); |
7090 | goto unlock; | 7156 | goto unlock; |
7091 | } | 7157 | } |
@@ -7147,7 +7213,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7147 | mddev->recovery = 0; | 7213 | mddev->recovery = 0; |
7148 | } else | 7214 | } else |
7149 | md_wakeup_thread(mddev->sync_thread); | 7215 | md_wakeup_thread(mddev->sync_thread); |
7150 | sysfs_notify_dirent(mddev->sysfs_action); | 7216 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7151 | md_new_event(mddev); | 7217 | md_new_event(mddev); |
7152 | } | 7218 | } |
7153 | unlock: | 7219 | unlock: |
@@ -7156,7 +7222,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7156 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 7222 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
7157 | &mddev->recovery)) | 7223 | &mddev->recovery)) |
7158 | if (mddev->sysfs_action) | 7224 | if (mddev->sysfs_action) |
7159 | sysfs_notify_dirent(mddev->sysfs_action); | 7225 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7160 | } | 7226 | } |
7161 | mddev_unlock(mddev); | 7227 | mddev_unlock(mddev); |
7162 | } | 7228 | } |
@@ -7164,7 +7230,7 @@ void md_check_recovery(mddev_t *mddev) | |||
7164 | 7230 | ||
7165 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | 7231 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) |
7166 | { | 7232 | { |
7167 | sysfs_notify_dirent(rdev->sysfs_state); | 7233 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
7168 | wait_event_timeout(rdev->blocked_wait, | 7234 | wait_event_timeout(rdev->blocked_wait, |
7169 | !test_bit(Blocked, &rdev->flags), | 7235 | !test_bit(Blocked, &rdev->flags), |
7170 | msecs_to_jiffies(5000)); | 7236 | msecs_to_jiffies(5000)); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 10597bfec000..6f797eceae31 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -29,6 +29,26 @@ | |||
29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
31 | 31 | ||
32 | /* generic plugging support - like that provided with request_queue, | ||
33 | * but does not require a request_queue | ||
34 | */ | ||
35 | struct plug_handle { | ||
36 | void (*unplug_fn)(struct plug_handle *); | ||
37 | struct timer_list unplug_timer; | ||
38 | struct work_struct unplug_work; | ||
39 | unsigned long unplug_flag; | ||
40 | }; | ||
41 | #define PLUGGED_FLAG 1 | ||
42 | void plugger_init(struct plug_handle *plug, | ||
43 | void (*unplug_fn)(struct plug_handle *)); | ||
44 | void plugger_set_plug(struct plug_handle *plug); | ||
45 | int plugger_remove_plug(struct plug_handle *plug); | ||
46 | static inline void plugger_flush(struct plug_handle *plug) | ||
47 | { | ||
48 | del_timer_sync(&plug->unplug_timer); | ||
49 | cancel_work_sync(&plug->unplug_work); | ||
50 | } | ||
51 | |||
32 | /* | 52 | /* |
33 | * MD's 'extended' device | 53 | * MD's 'extended' device |
34 | */ | 54 | */ |
@@ -125,6 +145,10 @@ struct mddev_s | |||
125 | int suspended; | 145 | int suspended; |
126 | atomic_t active_io; | 146 | atomic_t active_io; |
127 | int ro; | 147 | int ro; |
148 | int sysfs_active; /* set when sysfs deletes | ||
149 | * are happening, so run/ | ||
150 | * takeover/stop are not safe | ||
151 | */ | ||
128 | 152 | ||
129 | struct gendisk *gendisk; | 153 | struct gendisk *gendisk; |
130 | 154 | ||
@@ -297,9 +321,14 @@ struct mddev_s | |||
297 | * hot-adding a bitmap. It should | 321 | * hot-adding a bitmap. It should |
298 | * eventually be settable by sysfs. | 322 | * eventually be settable by sysfs. |
299 | */ | 323 | */ |
324 | /* When md is serving under dm, it might use a | ||
325 | * dirty_log to store the bits. | ||
326 | */ | ||
327 | struct dm_dirty_log *log; | ||
328 | |||
300 | struct mutex mutex; | 329 | struct mutex mutex; |
301 | unsigned long chunksize; | 330 | unsigned long chunksize; |
302 | unsigned long daemon_sleep; /* how many seconds between updates? */ | 331 | unsigned long daemon_sleep; /* how many jiffies between updates? */ |
303 | unsigned long max_write_behind; /* write-behind mode */ | 332 | unsigned long max_write_behind; /* write-behind mode */ |
304 | int external; | 333 | int external; |
305 | } bitmap_info; | 334 | } bitmap_info; |
@@ -308,6 +337,8 @@ struct mddev_s | |||
308 | struct list_head all_mddevs; | 337 | struct list_head all_mddevs; |
309 | 338 | ||
310 | struct attribute_group *to_remove; | 339 | struct attribute_group *to_remove; |
340 | struct plug_handle *plug; /* if used by personality */ | ||
341 | |||
311 | /* Generic barrier handling. | 342 | /* Generic barrier handling. |
312 | * If there is a pending barrier request, all other | 343 | * If there is a pending barrier request, all other |
313 | * writes are blocked while the devices are flushed. | 344 | * writes are blocked while the devices are flushed. |
@@ -318,6 +349,7 @@ struct mddev_s | |||
318 | struct bio *barrier; | 349 | struct bio *barrier; |
319 | atomic_t flush_pending; | 350 | atomic_t flush_pending; |
320 | struct work_struct barrier_work; | 351 | struct work_struct barrier_work; |
352 | struct work_struct event_work; /* used by dm to report failure event */ | ||
321 | }; | 353 | }; |
322 | 354 | ||
323 | 355 | ||
@@ -382,6 +414,18 @@ struct md_sysfs_entry { | |||
382 | }; | 414 | }; |
383 | extern struct attribute_group md_bitmap_group; | 415 | extern struct attribute_group md_bitmap_group; |
384 | 416 | ||
417 | static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name) | ||
418 | { | ||
419 | if (sd) | ||
420 | return sysfs_get_dirent(sd, NULL, name); | ||
421 | return sd; | ||
422 | } | ||
423 | static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd) | ||
424 | { | ||
425 | if (sd) | ||
426 | sysfs_notify_dirent(sd); | ||
427 | } | ||
428 | |||
385 | static inline char * mdname (mddev_t * mddev) | 429 | static inline char * mdname (mddev_t * mddev) |
386 | { | 430 | { |
387 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; | 431 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; |
@@ -474,5 +518,14 @@ extern int md_integrity_register(mddev_t *mddev); | |||
474 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 518 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
475 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); | 519 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); |
476 | extern void restore_bitmap_write_access(struct file *file); | 520 | extern void restore_bitmap_write_access(struct file *file); |
521 | extern void md_unplug(mddev_t *mddev); | ||
522 | |||
523 | extern void mddev_init(mddev_t *mddev); | ||
524 | extern int md_run(mddev_t *mddev); | ||
525 | extern void md_stop(mddev_t *mddev); | ||
526 | extern void md_stop_writes(mddev_t *mddev); | ||
527 | extern void md_rdev_init(mdk_rdev_t *rdev); | ||
477 | 528 | ||
529 | extern void mddev_suspend(mddev_t *mddev); | ||
530 | extern void mddev_resume(mddev_t *mddev); | ||
478 | #endif /* _MD_MD_H */ | 531 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 42e64e4e5e25..d1d689126346 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
825 | */ | 825 | */ |
826 | bp = bio_split(bio, | 826 | bp = bio_split(bio, |
827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); |
828 | |||
829 | /* Each of these 'make_request' calls will call 'wait_barrier'. | ||
830 | * If the first succeeds but the second blocks due to the resync | ||
831 | * thread raising the barrier, we will deadlock because the | ||
832 | * IO to the underlying device will be queued in generic_make_request | ||
833 | * and will never complete, so will never reduce nr_pending. | ||
834 | * So increment nr_waiting here so no new raise_barriers will | ||
835 | * succeed, and so the second wait_barrier cannot block. | ||
836 | */ | ||
837 | spin_lock_irq(&conf->resync_lock); | ||
838 | conf->nr_waiting++; | ||
839 | spin_unlock_irq(&conf->resync_lock); | ||
840 | |||
828 | if (make_request(mddev, &bp->bio1)) | 841 | if (make_request(mddev, &bp->bio1)) |
829 | generic_make_request(&bp->bio1); | 842 | generic_make_request(&bp->bio1); |
830 | if (make_request(mddev, &bp->bio2)) | 843 | if (make_request(mddev, &bp->bio2)) |
831 | generic_make_request(&bp->bio2); | 844 | generic_make_request(&bp->bio2); |
832 | 845 | ||
846 | spin_lock_irq(&conf->resync_lock); | ||
847 | conf->nr_waiting--; | ||
848 | wake_up(&conf->wait_barrier); | ||
849 | spin_unlock_irq(&conf->resync_lock); | ||
850 | |||
833 | bio_pair_release(bp); | 851 | bio_pair_release(bp); |
834 | return 0; | 852 | return 0; |
835 | bad_map: | 853 | bad_map: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 96c690279fc6..e30a809cbea0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
204 | blk_plug_device(conf->mddev->queue); | 204 | plugger_set_plug(&conf->plug); |
205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
206 | sh->bm_seq - conf->seq_write > 0) { | 206 | sh->bm_seq - conf->seq_write > 0) { |
207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 207 | list_add_tail(&sh->lru, &conf->bitmap_list); |
208 | blk_plug_device(conf->mddev->queue); | 208 | plugger_set_plug(&conf->plug); |
209 | } else { | 209 | } else { |
210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
211 | list_add_tail(&sh->lru, &conf->handle_list); | 211 | list_add_tail(&sh->lru, &conf->handle_list); |
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf) | |||
434 | } | 434 | } |
435 | 435 | ||
436 | static void unplug_slaves(mddev_t *mddev); | 436 | static void unplug_slaves(mddev_t *mddev); |
437 | static void raid5_unplug_device(struct request_queue *q); | ||
438 | 437 | ||
439 | static struct stripe_head * | 438 | static struct stripe_head * |
440 | get_active_stripe(raid5_conf_t *conf, sector_t sector, | 439 | get_active_stripe(raid5_conf_t *conf, sector_t sector, |
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
464 | < (conf->max_nr_stripes *3/4) | 463 | < (conf->max_nr_stripes *3/4) |
465 | || !conf->inactive_blocked), | 464 | || !conf->inactive_blocked), |
466 | conf->device_lock, | 465 | conf->device_lock, |
467 | raid5_unplug_device(conf->mddev->queue) | 466 | md_raid5_unplug_device(conf) |
468 | ); | 467 | ); |
469 | conf->inactive_blocked = 0; | 468 | conf->inactive_blocked = 0; |
470 | } else | 469 | } else |
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
1337 | struct kmem_cache *sc; | 1336 | struct kmem_cache *sc; |
1338 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 1337 | int devs = max(conf->raid_disks, conf->previous_raid_disks); |
1339 | 1338 | ||
1340 | sprintf(conf->cache_name[0], | 1339 | if (conf->mddev->gendisk) |
1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 1340 | sprintf(conf->cache_name[0], |
1342 | sprintf(conf->cache_name[1], | 1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); |
1343 | "raid%d-%s-alt", conf->level, mdname(conf->mddev)); | 1342 | else |
1343 | sprintf(conf->cache_name[0], | ||
1344 | "raid%d-%p", conf->level, conf->mddev); | ||
1345 | sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); | ||
1346 | |||
1344 | conf->active_name = 0; | 1347 | conf->active_name = 0; |
1345 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 1348 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
1346 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 1349 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3614 | list_add_tail(&sh->lru, &conf->hold_list); | 3617 | list_add_tail(&sh->lru, &conf->hold_list); |
3615 | } | 3618 | } |
3616 | } else | 3619 | } else |
3617 | blk_plug_device(conf->mddev->queue); | 3620 | plugger_set_plug(&conf->plug); |
3618 | } | 3621 | } |
3619 | 3622 | ||
3620 | static void activate_bit_delay(raid5_conf_t *conf) | 3623 | static void activate_bit_delay(raid5_conf_t *conf) |
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev) | |||
3655 | rcu_read_unlock(); | 3658 | rcu_read_unlock(); |
3656 | } | 3659 | } |
3657 | 3660 | ||
3658 | static void raid5_unplug_device(struct request_queue *q) | 3661 | void md_raid5_unplug_device(raid5_conf_t *conf) |
3659 | { | 3662 | { |
3660 | mddev_t *mddev = q->queuedata; | ||
3661 | raid5_conf_t *conf = mddev->private; | ||
3662 | unsigned long flags; | 3663 | unsigned long flags; |
3663 | 3664 | ||
3664 | spin_lock_irqsave(&conf->device_lock, flags); | 3665 | spin_lock_irqsave(&conf->device_lock, flags); |
3665 | 3666 | ||
3666 | if (blk_remove_plug(q)) { | 3667 | if (plugger_remove_plug(&conf->plug)) { |
3667 | conf->seq_flush++; | 3668 | conf->seq_flush++; |
3668 | raid5_activate_delayed(conf); | 3669 | raid5_activate_delayed(conf); |
3669 | } | 3670 | } |
3670 | md_wakeup_thread(mddev->thread); | 3671 | md_wakeup_thread(conf->mddev->thread); |
3671 | 3672 | ||
3672 | spin_unlock_irqrestore(&conf->device_lock, flags); | 3673 | spin_unlock_irqrestore(&conf->device_lock, flags); |
3673 | 3674 | ||
3674 | unplug_slaves(mddev); | 3675 | unplug_slaves(conf->mddev); |
3675 | } | 3676 | } |
3677 | EXPORT_SYMBOL_GPL(md_raid5_unplug_device); | ||
3676 | 3678 | ||
3677 | static int raid5_congested(void *data, int bits) | 3679 | static void raid5_unplug(struct plug_handle *plug) |
3680 | { | ||
3681 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
3682 | md_raid5_unplug_device(conf); | ||
3683 | } | ||
3684 | |||
3685 | static void raid5_unplug_queue(struct request_queue *q) | ||
3686 | { | ||
3687 | mddev_t *mddev = q->queuedata; | ||
3688 | md_raid5_unplug_device(mddev->private); | ||
3689 | } | ||
3690 | |||
3691 | int md_raid5_congested(mddev_t *mddev, int bits) | ||
3678 | { | 3692 | { |
3679 | mddev_t *mddev = data; | ||
3680 | raid5_conf_t *conf = mddev->private; | 3693 | raid5_conf_t *conf = mddev->private; |
3681 | 3694 | ||
3682 | /* No difference between reads and writes. Just check | 3695 | /* No difference between reads and writes. Just check |
3683 | * how busy the stripe_cache is | 3696 | * how busy the stripe_cache is |
3684 | */ | 3697 | */ |
3685 | 3698 | ||
3686 | if (mddev_congested(mddev, bits)) | ||
3687 | return 1; | ||
3688 | if (conf->inactive_blocked) | 3699 | if (conf->inactive_blocked) |
3689 | return 1; | 3700 | return 1; |
3690 | if (conf->quiesce) | 3701 | if (conf->quiesce) |
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits) | |||
3694 | 3705 | ||
3695 | return 0; | 3706 | return 0; |
3696 | } | 3707 | } |
3708 | EXPORT_SYMBOL_GPL(md_raid5_congested); | ||
3709 | |||
3710 | static int raid5_congested(void *data, int bits) | ||
3711 | { | ||
3712 | mddev_t *mddev = data; | ||
3713 | |||
3714 | return mddev_congested(mddev, bits) || | ||
3715 | md_raid5_congested(mddev, bits); | ||
3716 | } | ||
3697 | 3717 | ||
3698 | /* We want read requests to align with chunks where possible, | 3718 | /* We want read requests to align with chunks where possible, |
3699 | * but write requests don't need to. | 3719 | * but write requests don't need to. |
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4075 | * add failed due to overlap. Flush everything | 4095 | * add failed due to overlap. Flush everything |
4076 | * and wait a while | 4096 | * and wait a while |
4077 | */ | 4097 | */ |
4078 | raid5_unplug_device(mddev->queue); | 4098 | md_raid5_unplug_device(conf); |
4079 | release_stripe(sh); | 4099 | release_stripe(sh); |
4080 | schedule(); | 4100 | schedule(); |
4081 | goto retry; | 4101 | goto retry; |
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | |||
4566 | return 0; | 4586 | return 0; |
4567 | } | 4587 | } |
4568 | 4588 | ||
4569 | static ssize_t | 4589 | int |
4570 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4590 | raid5_set_cache_size(mddev_t *mddev, int size) |
4571 | { | 4591 | { |
4572 | raid5_conf_t *conf = mddev->private; | 4592 | raid5_conf_t *conf = mddev->private; |
4573 | unsigned long new; | ||
4574 | int err; | 4593 | int err; |
4575 | 4594 | ||
4576 | if (len >= PAGE_SIZE) | 4595 | if (size <= 16 || size > 32768) |
4577 | return -EINVAL; | 4596 | return -EINVAL; |
4578 | if (!conf) | 4597 | while (size < conf->max_nr_stripes) { |
4579 | return -ENODEV; | ||
4580 | |||
4581 | if (strict_strtoul(page, 10, &new)) | ||
4582 | return -EINVAL; | ||
4583 | if (new <= 16 || new > 32768) | ||
4584 | return -EINVAL; | ||
4585 | while (new < conf->max_nr_stripes) { | ||
4586 | if (drop_one_stripe(conf)) | 4598 | if (drop_one_stripe(conf)) |
4587 | conf->max_nr_stripes--; | 4599 | conf->max_nr_stripes--; |
4588 | else | 4600 | else |
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
4591 | err = md_allow_write(mddev); | 4603 | err = md_allow_write(mddev); |
4592 | if (err) | 4604 | if (err) |
4593 | return err; | 4605 | return err; |
4594 | while (new > conf->max_nr_stripes) { | 4606 | while (size > conf->max_nr_stripes) { |
4595 | if (grow_one_stripe(conf)) | 4607 | if (grow_one_stripe(conf)) |
4596 | conf->max_nr_stripes++; | 4608 | conf->max_nr_stripes++; |
4597 | else break; | 4609 | else break; |
4598 | } | 4610 | } |
4611 | return 0; | ||
4612 | } | ||
4613 | EXPORT_SYMBOL(raid5_set_cache_size); | ||
4614 | |||
4615 | static ssize_t | ||
4616 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | ||
4617 | { | ||
4618 | raid5_conf_t *conf = mddev->private; | ||
4619 | unsigned long new; | ||
4620 | int err; | ||
4621 | |||
4622 | if (len >= PAGE_SIZE) | ||
4623 | return -EINVAL; | ||
4624 | if (!conf) | ||
4625 | return -ENODEV; | ||
4626 | |||
4627 | if (strict_strtoul(page, 10, &new)) | ||
4628 | return -EINVAL; | ||
4629 | err = raid5_set_cache_size(mddev, new); | ||
4630 | if (err) | ||
4631 | return err; | ||
4599 | return len; | 4632 | return len; |
4600 | } | 4633 | } |
4601 | 4634 | ||
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded | |||
4958 | static int run(mddev_t *mddev) | 4991 | static int run(mddev_t *mddev) |
4959 | { | 4992 | { |
4960 | raid5_conf_t *conf; | 4993 | raid5_conf_t *conf; |
4961 | int working_disks = 0, chunk_size; | 4994 | int working_disks = 0; |
4962 | int dirty_parity_disks = 0; | 4995 | int dirty_parity_disks = 0; |
4963 | mdk_rdev_t *rdev; | 4996 | mdk_rdev_t *rdev; |
4964 | sector_t reshape_offset = 0; | 4997 | sector_t reshape_offset = 0; |
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev) | |||
5144 | "reshape"); | 5177 | "reshape"); |
5145 | } | 5178 | } |
5146 | 5179 | ||
5147 | /* read-ahead size must cover two whole stripes, which is | ||
5148 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | ||
5149 | */ | ||
5150 | { | ||
5151 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5152 | int stripe = data_disks * | ||
5153 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5154 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5155 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5156 | } | ||
5157 | 5180 | ||
5158 | /* Ok, everything is just fine now */ | 5181 | /* Ok, everything is just fine now */ |
5159 | if (mddev->to_remove == &raid5_attrs_group) | 5182 | if (mddev->to_remove == &raid5_attrs_group) |
5160 | mddev->to_remove = NULL; | 5183 | mddev->to_remove = NULL; |
5161 | else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | 5184 | else if (mddev->kobj.sd && |
5185 | sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | ||
5162 | printk(KERN_WARNING | 5186 | printk(KERN_WARNING |
5163 | "md/raid:%s: failed to create sysfs attributes.\n", | 5187 | "raid5: failed to create sysfs attributes for %s\n", |
5164 | mdname(mddev)); | 5188 | mdname(mddev)); |
5189 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | ||
5165 | 5190 | ||
5166 | mddev->queue->queue_lock = &conf->device_lock; | 5191 | plugger_init(&conf->plug, raid5_unplug); |
5192 | mddev->plug = &conf->plug; | ||
5193 | if (mddev->queue) { | ||
5194 | int chunk_size; | ||
5195 | /* read-ahead size must cover two whole stripes, which | ||
5196 | * is 2 * (datadisks) * chunksize where 'n' is the | ||
5197 | * number of raid devices | ||
5198 | */ | ||
5199 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5200 | int stripe = data_disks * | ||
5201 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5202 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5203 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5167 | 5204 | ||
5168 | mddev->queue->unplug_fn = raid5_unplug_device; | 5205 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
5169 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
5170 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5171 | 5206 | ||
5172 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5207 | mddev->queue->backing_dev_info.congested_data = mddev; |
5208 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5209 | mddev->queue->queue_lock = &conf->device_lock; | ||
5210 | mddev->queue->unplug_fn = raid5_unplug_queue; | ||
5173 | 5211 | ||
5174 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 5212 | chunk_size = mddev->chunk_sectors << 9; |
5175 | chunk_size = mddev->chunk_sectors << 9; | 5213 | blk_queue_io_min(mddev->queue, chunk_size); |
5176 | blk_queue_io_min(mddev->queue, chunk_size); | 5214 | blk_queue_io_opt(mddev->queue, chunk_size * |
5177 | blk_queue_io_opt(mddev->queue, chunk_size * | 5215 | (conf->raid_disks - conf->max_degraded)); |
5178 | (conf->raid_disks - conf->max_degraded)); | ||
5179 | 5216 | ||
5180 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5217 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5218 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5182 | rdev->data_offset << 9); | 5219 | rdev->data_offset << 9); |
5220 | } | ||
5183 | 5221 | ||
5184 | return 0; | 5222 | return 0; |
5185 | abort: | 5223 | abort: |
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev) | |||
5200 | 5238 | ||
5201 | md_unregister_thread(mddev->thread); | 5239 | md_unregister_thread(mddev->thread); |
5202 | mddev->thread = NULL; | 5240 | mddev->thread = NULL; |
5203 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5241 | if (mddev->queue) |
5204 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 5242 | mddev->queue->backing_dev_info.congested_fn = NULL; |
5243 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
5205 | free_conf(conf); | 5244 | free_conf(conf); |
5206 | mddev->private = NULL; | 5245 | mddev->private = NULL; |
5207 | mddev->to_remove = &raid5_attrs_group; | 5246 | mddev->to_remove = &raid5_attrs_group; |
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5545 | sprintf(nm, "rd%d", rdev->raid_disk); | 5584 | sprintf(nm, "rd%d", rdev->raid_disk); |
5546 | if (sysfs_create_link(&mddev->kobj, | 5585 | if (sysfs_create_link(&mddev->kobj, |
5547 | &rdev->kobj, nm)) | 5586 | &rdev->kobj, nm)) |
5548 | printk(KERN_WARNING | 5587 | /* Failure here is OK */; |
5549 | "md/raid:%s: failed to create " | ||
5550 | " link %s\n", | ||
5551 | mdname(mddev), nm); | ||
5552 | } else | 5588 | } else |
5553 | break; | 5589 | break; |
5554 | } | 5590 | } |
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
5603 | /* read-ahead size must cover two whole stripes, which is | 5639 | /* read-ahead size must cover two whole stripes, which is |
5604 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | 5640 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
5605 | */ | 5641 | */ |
5606 | { | 5642 | if (conf->mddev->queue) { |
5607 | int data_disks = conf->raid_disks - conf->max_degraded; | 5643 | int data_disks = conf->raid_disks - conf->max_degraded; |
5608 | int stripe = data_disks * ((conf->chunk_sectors << 9) | 5644 | int stripe = data_disks * ((conf->chunk_sectors << 9) |
5609 | / PAGE_SIZE); | 5645 | / PAGE_SIZE); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 0f86f5e36724..36eaed5dfd6e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -388,7 +388,7 @@ struct raid5_private_data { | |||
388 | * two caches. | 388 | * two caches. |
389 | */ | 389 | */ |
390 | int active_name; | 390 | int active_name; |
391 | char cache_name[2][20]; | 391 | char cache_name[2][32]; |
392 | struct kmem_cache *slab_cache; /* for allocating stripes */ | 392 | struct kmem_cache *slab_cache; /* for allocating stripes */ |
393 | 393 | ||
394 | int seq_flush, seq_write; | 394 | int seq_flush, seq_write; |
@@ -398,6 +398,9 @@ struct raid5_private_data { | |||
398 | * (fresh device added). | 398 | * (fresh device added). |
399 | * Cleared when a sync completes. | 399 | * Cleared when a sync completes. |
400 | */ | 400 | */ |
401 | |||
402 | struct plug_handle plug; | ||
403 | |||
401 | /* per cpu variables */ | 404 | /* per cpu variables */ |
402 | struct raid5_percpu { | 405 | struct raid5_percpu { |
403 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 406 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |
@@ -497,4 +500,8 @@ static inline int algorithm_is_DDF(int layout) | |||
497 | { | 500 | { |
498 | return layout >= 8 && layout <= 10; | 501 | return layout >= 8 && layout <= 10; |
499 | } | 502 | } |
503 | |||
504 | extern int md_raid5_congested(mddev_t *mddev, int bits); | ||
505 | extern void md_raid5_unplug_device(raid5_conf_t *conf); | ||
506 | extern int raid5_set_cache_size(mddev_t *mddev, int size); | ||
500 | #endif | 507 | #endif |