diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-29 11:47:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-29 11:47:36 -0400 |
commit | d8d048f69a618c531575cb1f398a7186f0532ef2 (patch) | |
tree | cb8d6ee7a3cb2aea53e5e95de0ca689cc69411a2 | |
parent | 53113b06e48c6c38f7612c1f8043b8a0d2adf72b (diff) | |
parent | f3ac8bf7ce1c5abd763ea762e95d1cdcf7799372 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md: tidy up device searches in read_balance.
md/raid1: fix some typos in comments.
md/raid1: discard unused variable.
md: unplug writes to external bitmaps.
md: use separate bio pool for each md device.
md: change type of first arg to sync_page_io.
md/raid1: perform mem allocation before disabling writes during resync.
md: use bio_kmalloc rather than bio_alloc when failure is acceptable.
md: Fix possible deadlock with multiple mempool allocations.
md: fix and update workqueue usage
md: use sector_t in bitmap_get_counter
md: remove md_mutex locking.
md: Fix regression with raid1 arrays without persistent metadata.
-rw-r--r-- | drivers/md/bitmap.c | 30 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 4 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 162 | ||||
-rw-r--r-- | drivers/md/md.h | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 224 | ||||
-rw-r--r-- | drivers/md/raid1.h | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 42 | ||||
-rw-r--r-- | drivers/md/raid5.c | 6 |
9 files changed, 275 insertions, 205 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e4fb58db5454..5a1ffe3527aa 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -212,7 +212,7 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
212 | 212 | ||
213 | target = rdev->sb_start + offset + index * (PAGE_SIZE/512); | 213 | target = rdev->sb_start + offset + index * (PAGE_SIZE/512); |
214 | 214 | ||
215 | if (sync_page_io(rdev->bdev, target, | 215 | if (sync_page_io(rdev, target, |
216 | roundup(size, bdev_logical_block_size(rdev->bdev)), | 216 | roundup(size, bdev_logical_block_size(rdev->bdev)), |
217 | page, READ)) { | 217 | page, READ)) { |
218 | page->index = index; | 218 | page->index = index; |
@@ -343,7 +343,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) | |||
343 | atomic_inc(&bitmap->pending_writes); | 343 | atomic_inc(&bitmap->pending_writes); |
344 | set_buffer_locked(bh); | 344 | set_buffer_locked(bh); |
345 | set_buffer_mapped(bh); | 345 | set_buffer_mapped(bh); |
346 | submit_bh(WRITE, bh); | 346 | submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh); |
347 | bh = bh->b_this_page; | 347 | bh = bh->b_this_page; |
348 | } | 348 | } |
349 | 349 | ||
@@ -1101,7 +1101,7 @@ static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | |||
1101 | bitmap_checkfree(bitmap, page); | 1101 | bitmap_checkfree(bitmap, page); |
1102 | } | 1102 | } |
1103 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1103 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
1104 | sector_t offset, int *blocks, | 1104 | sector_t offset, sector_t *blocks, |
1105 | int create); | 1105 | int create); |
1106 | 1106 | ||
1107 | /* | 1107 | /* |
@@ -1115,7 +1115,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1115 | unsigned long j; | 1115 | unsigned long j; |
1116 | unsigned long flags; | 1116 | unsigned long flags; |
1117 | struct page *page = NULL, *lastpage = NULL; | 1117 | struct page *page = NULL, *lastpage = NULL; |
1118 | int blocks; | 1118 | sector_t blocks; |
1119 | void *paddr; | 1119 | void *paddr; |
1120 | struct dm_dirty_log *log = mddev->bitmap_info.log; | 1120 | struct dm_dirty_log *log = mddev->bitmap_info.log; |
1121 | 1121 | ||
@@ -1258,7 +1258,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1258 | } | 1258 | } |
1259 | 1259 | ||
1260 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1260 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
1261 | sector_t offset, int *blocks, | 1261 | sector_t offset, sector_t *blocks, |
1262 | int create) | 1262 | int create) |
1263 | __releases(bitmap->lock) | 1263 | __releases(bitmap->lock) |
1264 | __acquires(bitmap->lock) | 1264 | __acquires(bitmap->lock) |
@@ -1316,7 +1316,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1316 | } | 1316 | } |
1317 | 1317 | ||
1318 | while (sectors) { | 1318 | while (sectors) { |
1319 | int blocks; | 1319 | sector_t blocks; |
1320 | bitmap_counter_t *bmc; | 1320 | bitmap_counter_t *bmc; |
1321 | 1321 | ||
1322 | spin_lock_irq(&bitmap->lock); | 1322 | spin_lock_irq(&bitmap->lock); |
@@ -1381,7 +1381,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1381 | success = 0; | 1381 | success = 0; |
1382 | 1382 | ||
1383 | while (sectors) { | 1383 | while (sectors) { |
1384 | int blocks; | 1384 | sector_t blocks; |
1385 | unsigned long flags; | 1385 | unsigned long flags; |
1386 | bitmap_counter_t *bmc; | 1386 | bitmap_counter_t *bmc; |
1387 | 1387 | ||
@@ -1423,7 +1423,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1423 | } | 1423 | } |
1424 | EXPORT_SYMBOL(bitmap_endwrite); | 1424 | EXPORT_SYMBOL(bitmap_endwrite); |
1425 | 1425 | ||
1426 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1426 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, |
1427 | int degraded) | 1427 | int degraded) |
1428 | { | 1428 | { |
1429 | bitmap_counter_t *bmc; | 1429 | bitmap_counter_t *bmc; |
@@ -1452,7 +1452,7 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *bloc | |||
1452 | return rv; | 1452 | return rv; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1455 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, |
1456 | int degraded) | 1456 | int degraded) |
1457 | { | 1457 | { |
1458 | /* bitmap_start_sync must always report on multiples of whole | 1458 | /* bitmap_start_sync must always report on multiples of whole |
@@ -1463,7 +1463,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1463 | * Return the 'or' of the result. | 1463 | * Return the 'or' of the result. |
1464 | */ | 1464 | */ |
1465 | int rv = 0; | 1465 | int rv = 0; |
1466 | int blocks1; | 1466 | sector_t blocks1; |
1467 | 1467 | ||
1468 | *blocks = 0; | 1468 | *blocks = 0; |
1469 | while (*blocks < (PAGE_SIZE>>9)) { | 1469 | while (*blocks < (PAGE_SIZE>>9)) { |
@@ -1476,7 +1476,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1476 | } | 1476 | } |
1477 | EXPORT_SYMBOL(bitmap_start_sync); | 1477 | EXPORT_SYMBOL(bitmap_start_sync); |
1478 | 1478 | ||
1479 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | 1479 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) |
1480 | { | 1480 | { |
1481 | bitmap_counter_t *bmc; | 1481 | bitmap_counter_t *bmc; |
1482 | unsigned long flags; | 1482 | unsigned long flags; |
@@ -1515,7 +1515,7 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
1515 | * RESYNC bit wherever it is still on | 1515 | * RESYNC bit wherever it is still on |
1516 | */ | 1516 | */ |
1517 | sector_t sector = 0; | 1517 | sector_t sector = 0; |
1518 | int blocks; | 1518 | sector_t blocks; |
1519 | if (!bitmap) | 1519 | if (!bitmap) |
1520 | return; | 1520 | return; |
1521 | while (sector < bitmap->mddev->resync_max_sectors) { | 1521 | while (sector < bitmap->mddev->resync_max_sectors) { |
@@ -1528,7 +1528,7 @@ EXPORT_SYMBOL(bitmap_close_sync); | |||
1528 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | 1528 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) |
1529 | { | 1529 | { |
1530 | sector_t s = 0; | 1530 | sector_t s = 0; |
1531 | int blocks; | 1531 | sector_t blocks; |
1532 | 1532 | ||
1533 | if (!bitmap) | 1533 | if (!bitmap) |
1534 | return; | 1534 | return; |
@@ -1562,7 +1562,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1562 | * be 0 at this point | 1562 | * be 0 at this point |
1563 | */ | 1563 | */ |
1564 | 1564 | ||
1565 | int secs; | 1565 | sector_t secs; |
1566 | bitmap_counter_t *bmc; | 1566 | bitmap_counter_t *bmc; |
1567 | spin_lock_irq(&bitmap->lock); | 1567 | spin_lock_irq(&bitmap->lock); |
1568 | bmc = bitmap_get_counter(bitmap, offset, &secs, 1); | 1568 | bmc = bitmap_get_counter(bitmap, offset, &secs, 1); |
@@ -1790,7 +1790,7 @@ int bitmap_load(mddev_t *mddev) | |||
1790 | * All chunks should be clean, but some might need_sync. | 1790 | * All chunks should be clean, but some might need_sync. |
1791 | */ | 1791 | */ |
1792 | while (sector < mddev->resync_max_sectors) { | 1792 | while (sector < mddev->resync_max_sectors) { |
1793 | int blocks; | 1793 | sector_t blocks; |
1794 | bitmap_start_sync(bitmap, sector, &blocks, 0); | 1794 | bitmap_start_sync(bitmap, sector, &blocks, 0); |
1795 | sector += blocks; | 1795 | sector += blocks; |
1796 | } | 1796 | } |
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index e872a7bad6b8..931a7a7c3796 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -271,8 +271,8 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, | |||
271 | unsigned long sectors, int behind); | 271 | unsigned long sectors, int behind); |
272 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, | 272 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, |
273 | unsigned long sectors, int success, int behind); | 273 | unsigned long sectors, int success, int behind); |
274 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded); | 274 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); |
275 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); | 275 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); |
276 | void bitmap_close_sync(struct bitmap *bitmap); | 276 | void bitmap_close_sync(struct bitmap *bitmap); |
277 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); | 277 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); |
278 | 278 | ||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 1a8987884614..339fdc670751 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -210,7 +210,7 @@ static int make_request(mddev_t *mddev, struct bio *bio) | |||
210 | } | 210 | } |
211 | } | 211 | } |
212 | if (failit) { | 212 | if (failit) { |
213 | struct bio *b = bio_clone(bio, GFP_NOIO); | 213 | struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev); |
214 | b->bi_bdev = conf->rdev->bdev; | 214 | b->bi_bdev = conf->rdev->bdev; |
215 | b->bi_private = bio; | 215 | b->bi_private = bio; |
216 | b->bi_end_io = faulty_fail; | 216 | b->bi_end_io = faulty_fail; |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 225815197a3d..4e957f3140a8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -57,8 +57,6 @@ | |||
57 | #define DEBUG 0 | 57 | #define DEBUG 0 |
58 | #define dprintk(x...) ((void)(DEBUG && printk(x))) | 58 | #define dprintk(x...) ((void)(DEBUG && printk(x))) |
59 | 59 | ||
60 | static DEFINE_MUTEX(md_mutex); | ||
61 | |||
62 | #ifndef MODULE | 60 | #ifndef MODULE |
63 | static void autostart_arrays(int part); | 61 | static void autostart_arrays(int part); |
64 | #endif | 62 | #endif |
@@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock); | |||
69 | static void md_print_devices(void); | 67 | static void md_print_devices(void); |
70 | 68 | ||
71 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 69 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
70 | static struct workqueue_struct *md_wq; | ||
71 | static struct workqueue_struct *md_misc_wq; | ||
72 | 72 | ||
73 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 73 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
74 | 74 | ||
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops; | |||
149 | 149 | ||
150 | static int start_readonly; | 150 | static int start_readonly; |
151 | 151 | ||
152 | /* bio_clone_mddev | ||
153 | * like bio_clone, but with a local bio set | ||
154 | */ | ||
155 | |||
156 | static void mddev_bio_destructor(struct bio *bio) | ||
157 | { | ||
158 | mddev_t *mddev, **mddevp; | ||
159 | |||
160 | mddevp = (void*)bio; | ||
161 | mddev = mddevp[-1]; | ||
162 | |||
163 | bio_free(bio, mddev->bio_set); | ||
164 | } | ||
165 | |||
166 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | ||
167 | mddev_t *mddev) | ||
168 | { | ||
169 | struct bio *b; | ||
170 | mddev_t **mddevp; | ||
171 | |||
172 | if (!mddev || !mddev->bio_set) | ||
173 | return bio_alloc(gfp_mask, nr_iovecs); | ||
174 | |||
175 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, | ||
176 | mddev->bio_set); | ||
177 | if (!b) | ||
178 | return NULL; | ||
179 | mddevp = (void*)b; | ||
180 | mddevp[-1] = mddev; | ||
181 | b->bi_destructor = mddev_bio_destructor; | ||
182 | return b; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); | ||
185 | |||
186 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | ||
187 | mddev_t *mddev) | ||
188 | { | ||
189 | struct bio *b; | ||
190 | mddev_t **mddevp; | ||
191 | |||
192 | if (!mddev || !mddev->bio_set) | ||
193 | return bio_clone(bio, gfp_mask); | ||
194 | |||
195 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, | ||
196 | mddev->bio_set); | ||
197 | if (!b) | ||
198 | return NULL; | ||
199 | mddevp = (void*)b; | ||
200 | mddevp[-1] = mddev; | ||
201 | b->bi_destructor = mddev_bio_destructor; | ||
202 | __bio_clone(b, bio); | ||
203 | if (bio_integrity(bio)) { | ||
204 | int ret; | ||
205 | |||
206 | ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set); | ||
207 | |||
208 | if (ret < 0) { | ||
209 | bio_put(b); | ||
210 | return NULL; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | return b; | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(bio_clone_mddev); | ||
217 | |||
152 | /* | 218 | /* |
153 | * We have a system wide 'event count' that is incremented | 219 | * We have a system wide 'event count' that is incremented |
154 | * on any 'interesting' event, and readers of /proc/mdstat | 220 | * on any 'interesting' event, and readers of /proc/mdstat |
@@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err) | |||
300 | 366 | ||
301 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 367 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
302 | /* The pre-request flush has finished */ | 368 | /* The pre-request flush has finished */ |
303 | schedule_work(&mddev->flush_work); | 369 | queue_work(md_wq, &mddev->flush_work); |
304 | } | 370 | } |
305 | bio_put(bio); | 371 | bio_put(bio); |
306 | } | 372 | } |
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev) | |||
321 | atomic_inc(&rdev->nr_pending); | 387 | atomic_inc(&rdev->nr_pending); |
322 | atomic_inc(&rdev->nr_pending); | 388 | atomic_inc(&rdev->nr_pending); |
323 | rcu_read_unlock(); | 389 | rcu_read_unlock(); |
324 | bi = bio_alloc(GFP_KERNEL, 0); | 390 | bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev); |
325 | bi->bi_end_io = md_end_flush; | 391 | bi->bi_end_io = md_end_flush; |
326 | bi->bi_private = rdev; | 392 | bi->bi_private = rdev; |
327 | bi->bi_bdev = rdev->bdev; | 393 | bi->bi_bdev = rdev->bdev; |
@@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio) | |||
369 | submit_flushes(mddev); | 435 | submit_flushes(mddev); |
370 | 436 | ||
371 | if (atomic_dec_and_test(&mddev->flush_pending)) | 437 | if (atomic_dec_and_test(&mddev->flush_pending)) |
372 | schedule_work(&mddev->flush_work); | 438 | queue_work(md_wq, &mddev->flush_work); |
373 | } | 439 | } |
374 | EXPORT_SYMBOL(md_flush_request); | 440 | EXPORT_SYMBOL(md_flush_request); |
375 | 441 | ||
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws); | |||
428 | 494 | ||
429 | static void mddev_put(mddev_t *mddev) | 495 | static void mddev_put(mddev_t *mddev) |
430 | { | 496 | { |
497 | struct bio_set *bs = NULL; | ||
498 | |||
431 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) | 499 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) |
432 | return; | 500 | return; |
433 | if (!mddev->raid_disks && list_empty(&mddev->disks) && | 501 | if (!mddev->raid_disks && list_empty(&mddev->disks) && |
@@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev) | |||
435 | /* Array is not configured at all, and not held active, | 503 | /* Array is not configured at all, and not held active, |
436 | * so destroy it */ | 504 | * so destroy it */ |
437 | list_del(&mddev->all_mddevs); | 505 | list_del(&mddev->all_mddevs); |
506 | bs = mddev->bio_set; | ||
507 | mddev->bio_set = NULL; | ||
438 | if (mddev->gendisk) { | 508 | if (mddev->gendisk) { |
439 | /* we did a probe so need to clean up. | 509 | /* We did a probe so need to clean up. Call |
440 | * Call schedule_work inside the spinlock | 510 | * queue_work inside the spinlock so that |
441 | * so that flush_scheduled_work() after | 511 | * flush_workqueue() after mddev_find will |
442 | * mddev_find will succeed in waiting for the | 512 | * succeed in waiting for the work to be done. |
443 | * work to be done. | ||
444 | */ | 513 | */ |
445 | INIT_WORK(&mddev->del_work, mddev_delayed_delete); | 514 | INIT_WORK(&mddev->del_work, mddev_delayed_delete); |
446 | schedule_work(&mddev->del_work); | 515 | queue_work(md_misc_wq, &mddev->del_work); |
447 | } else | 516 | } else |
448 | kfree(mddev); | 517 | kfree(mddev); |
449 | } | 518 | } |
450 | spin_unlock(&all_mddevs_lock); | 519 | spin_unlock(&all_mddevs_lock); |
520 | if (bs) | ||
521 | bioset_free(bs); | ||
451 | } | 522 | } |
452 | 523 | ||
453 | void mddev_init(mddev_t *mddev) | 524 | void mddev_init(mddev_t *mddev) |
@@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
691 | * if zero is reached. | 762 | * if zero is reached. |
692 | * If an error occurred, call md_error | 763 | * If an error occurred, call md_error |
693 | */ | 764 | */ |
694 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 765 | struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); |
695 | 766 | ||
696 | bio->bi_bdev = rdev->bdev; | 767 | bio->bi_bdev = rdev->bdev; |
697 | bio->bi_sector = sector; | 768 | bio->bi_sector = sector; |
@@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error) | |||
722 | complete((struct completion*)bio->bi_private); | 793 | complete((struct completion*)bio->bi_private); |
723 | } | 794 | } |
724 | 795 | ||
725 | int sync_page_io(struct block_device *bdev, sector_t sector, int size, | 796 | int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, |
726 | struct page *page, int rw) | 797 | struct page *page, int rw) |
727 | { | 798 | { |
728 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 799 | struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); |
729 | struct completion event; | 800 | struct completion event; |
730 | int ret; | 801 | int ret; |
731 | 802 | ||
732 | rw |= REQ_SYNC | REQ_UNPLUG; | 803 | rw |= REQ_SYNC | REQ_UNPLUG; |
733 | 804 | ||
734 | bio->bi_bdev = bdev; | 805 | bio->bi_bdev = rdev->bdev; |
735 | bio->bi_sector = sector; | 806 | bio->bi_sector = sector; |
736 | bio_add_page(bio, page, size, 0); | 807 | bio_add_page(bio, page, size, 0); |
737 | init_completion(&event); | 808 | init_completion(&event); |
@@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) | |||
757 | return 0; | 828 | return 0; |
758 | 829 | ||
759 | 830 | ||
760 | if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) | 831 | if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) |
761 | goto fail; | 832 | goto fail; |
762 | rdev->sb_loaded = 1; | 833 | rdev->sb_loaded = 1; |
763 | return 0; | 834 | return 0; |
@@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1850 | synchronize_rcu(); | 1921 | synchronize_rcu(); |
1851 | INIT_WORK(&rdev->del_work, md_delayed_delete); | 1922 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1852 | kobject_get(&rdev->kobj); | 1923 | kobject_get(&rdev->kobj); |
1853 | schedule_work(&rdev->del_work); | 1924 | queue_work(md_misc_wq, &rdev->del_work); |
1854 | } | 1925 | } |
1855 | 1926 | ||
1856 | /* | 1927 | /* |
@@ -2108,6 +2179,8 @@ repeat: | |||
2108 | if (!mddev->persistent) { | 2179 | if (!mddev->persistent) { |
2109 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2180 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); |
2110 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); | 2181 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); |
2182 | if (!mddev->external) | ||
2183 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
2111 | wake_up(&mddev->sb_wait); | 2184 | wake_up(&mddev->sb_wait); |
2112 | return; | 2185 | return; |
2113 | } | 2186 | } |
@@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name) | |||
4192 | shift = partitioned ? MdpMinorShift : 0; | 4265 | shift = partitioned ? MdpMinorShift : 0; |
4193 | unit = MINOR(mddev->unit) >> shift; | 4266 | unit = MINOR(mddev->unit) >> shift; |
4194 | 4267 | ||
4195 | /* wait for any previous instance if this device | 4268 | /* wait for any previous instance of this device to be |
4196 | * to be completed removed (mddev_delayed_delete). | 4269 | * completely removed (mddev_delayed_delete). |
4197 | */ | 4270 | */ |
4198 | flush_scheduled_work(); | 4271 | flush_workqueue(md_misc_wq); |
4199 | 4272 | ||
4200 | mutex_lock(&disks_mutex); | 4273 | mutex_lock(&disks_mutex); |
4201 | error = -EEXIST; | 4274 | error = -EEXIST; |
@@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev) | |||
4378 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 4451 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
4379 | } | 4452 | } |
4380 | 4453 | ||
4454 | if (mddev->bio_set == NULL) | ||
4455 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev)); | ||
4456 | |||
4381 | spin_lock(&pers_lock); | 4457 | spin_lock(&pers_lock); |
4382 | pers = find_pers(mddev->level, mddev->clevel); | 4458 | pers = find_pers(mddev->level, mddev->clevel); |
4383 | if (!pers || !try_module_get(pers->owner)) { | 4459 | if (!pers || !try_module_get(pers->owner)) { |
@@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5885 | mddev_t *mddev = mddev_find(bdev->bd_dev); | 5961 | mddev_t *mddev = mddev_find(bdev->bd_dev); |
5886 | int err; | 5962 | int err; |
5887 | 5963 | ||
5888 | mutex_lock(&md_mutex); | ||
5889 | if (mddev->gendisk != bdev->bd_disk) { | 5964 | if (mddev->gendisk != bdev->bd_disk) { |
5890 | /* we are racing with mddev_put which is discarding this | 5965 | /* we are racing with mddev_put which is discarding this |
5891 | * bd_disk. | 5966 | * bd_disk. |
5892 | */ | 5967 | */ |
5893 | mddev_put(mddev); | 5968 | mddev_put(mddev); |
5894 | /* Wait until bdev->bd_disk is definitely gone */ | 5969 | /* Wait until bdev->bd_disk is definitely gone */ |
5895 | flush_scheduled_work(); | 5970 | flush_workqueue(md_misc_wq); |
5896 | /* Then retry the open from the top */ | 5971 | /* Then retry the open from the top */ |
5897 | mutex_unlock(&md_mutex); | ||
5898 | return -ERESTARTSYS; | 5972 | return -ERESTARTSYS; |
5899 | } | 5973 | } |
5900 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5974 | BUG_ON(mddev != bdev->bd_disk->private_data); |
@@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5908 | 5982 | ||
5909 | check_disk_size_change(mddev->gendisk, bdev); | 5983 | check_disk_size_change(mddev->gendisk, bdev); |
5910 | out: | 5984 | out: |
5911 | mutex_unlock(&md_mutex); | ||
5912 | return err; | 5985 | return err; |
5913 | } | 5986 | } |
5914 | 5987 | ||
@@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode) | |||
5917 | mddev_t *mddev = disk->private_data; | 5990 | mddev_t *mddev = disk->private_data; |
5918 | 5991 | ||
5919 | BUG_ON(!mddev); | 5992 | BUG_ON(!mddev); |
5920 | mutex_lock(&md_mutex); | ||
5921 | atomic_dec(&mddev->openers); | 5993 | atomic_dec(&mddev->openers); |
5922 | mddev_put(mddev); | 5994 | mddev_put(mddev); |
5923 | mutex_unlock(&md_mutex); | ||
5924 | 5995 | ||
5925 | return 0; | 5996 | return 0; |
5926 | } | 5997 | } |
@@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
6052 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6123 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6053 | md_wakeup_thread(mddev->thread); | 6124 | md_wakeup_thread(mddev->thread); |
6054 | if (mddev->event_work.func) | 6125 | if (mddev->event_work.func) |
6055 | schedule_work(&mddev->event_work); | 6126 | queue_work(md_misc_wq, &mddev->event_work); |
6056 | md_new_event_inintr(mddev); | 6127 | md_new_event_inintr(mddev); |
6057 | } | 6128 | } |
6058 | 6129 | ||
@@ -7212,12 +7283,23 @@ static void md_geninit(void) | |||
7212 | 7283 | ||
7213 | static int __init md_init(void) | 7284 | static int __init md_init(void) |
7214 | { | 7285 | { |
7215 | if (register_blkdev(MD_MAJOR, "md")) | 7286 | int ret = -ENOMEM; |
7216 | return -1; | 7287 | |
7217 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { | 7288 | md_wq = alloc_workqueue("md", WQ_RESCUER, 0); |
7218 | unregister_blkdev(MD_MAJOR, "md"); | 7289 | if (!md_wq) |
7219 | return -1; | 7290 | goto err_wq; |
7220 | } | 7291 | |
7292 | md_misc_wq = alloc_workqueue("md_misc", 0, 0); | ||
7293 | if (!md_misc_wq) | ||
7294 | goto err_misc_wq; | ||
7295 | |||
7296 | if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) | ||
7297 | goto err_md; | ||
7298 | |||
7299 | if ((ret = register_blkdev(0, "mdp")) < 0) | ||
7300 | goto err_mdp; | ||
7301 | mdp_major = ret; | ||
7302 | |||
7221 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, | 7303 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, |
7222 | md_probe, NULL, NULL); | 7304 | md_probe, NULL, NULL); |
7223 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, | 7305 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, |
@@ -7228,8 +7310,16 @@ static int __init md_init(void) | |||
7228 | 7310 | ||
7229 | md_geninit(); | 7311 | md_geninit(); |
7230 | return 0; | 7312 | return 0; |
7231 | } | ||
7232 | 7313 | ||
7314 | err_mdp: | ||
7315 | unregister_blkdev(MD_MAJOR, "md"); | ||
7316 | err_md: | ||
7317 | destroy_workqueue(md_misc_wq); | ||
7318 | err_misc_wq: | ||
7319 | destroy_workqueue(md_wq); | ||
7320 | err_wq: | ||
7321 | return ret; | ||
7322 | } | ||
7233 | 7323 | ||
7234 | #ifndef MODULE | 7324 | #ifndef MODULE |
7235 | 7325 | ||
@@ -7316,6 +7406,8 @@ static __exit void md_exit(void) | |||
7316 | export_array(mddev); | 7406 | export_array(mddev); |
7317 | mddev->hold_active = 0; | 7407 | mddev->hold_active = 0; |
7318 | } | 7408 | } |
7409 | destroy_workqueue(md_misc_wq); | ||
7410 | destroy_workqueue(md_wq); | ||
7319 | } | 7411 | } |
7320 | 7412 | ||
7321 | subsys_initcall(md_init); | 7413 | subsys_initcall(md_init); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 112a2c32db0c..d05bab55df4e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -331,6 +331,8 @@ struct mddev_s | |||
331 | struct attribute_group *to_remove; | 331 | struct attribute_group *to_remove; |
332 | struct plug_handle *plug; /* if used by personality */ | 332 | struct plug_handle *plug; /* if used by personality */ |
333 | 333 | ||
334 | struct bio_set *bio_set; | ||
335 | |||
334 | /* Generic flush handling. | 336 | /* Generic flush handling. |
335 | * The last to finish preflush schedules a worker to submit | 337 | * The last to finish preflush schedules a worker to submit |
336 | * the rest of the request (without the REQ_FLUSH flag). | 338 | * the rest of the request (without the REQ_FLUSH flag). |
@@ -495,7 +497,7 @@ extern void md_flush_request(mddev_t *mddev, struct bio *bio); | |||
495 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 497 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
496 | sector_t sector, int size, struct page *page); | 498 | sector_t sector, int size, struct page *page); |
497 | extern void md_super_wait(mddev_t *mddev); | 499 | extern void md_super_wait(mddev_t *mddev); |
498 | extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, | 500 | extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, |
499 | struct page *page, int rw); | 501 | struct page *page, int rw); |
500 | extern void md_do_sync(mddev_t *mddev); | 502 | extern void md_do_sync(mddev_t *mddev); |
501 | extern void md_new_event(mddev_t *mddev); | 503 | extern void md_new_event(mddev_t *mddev); |
@@ -517,4 +519,8 @@ extern void md_rdev_init(mdk_rdev_t *rdev); | |||
517 | 519 | ||
518 | extern void mddev_suspend(mddev_t *mddev); | 520 | extern void mddev_suspend(mddev_t *mddev); |
519 | extern void mddev_resume(mddev_t *mddev); | 521 | extern void mddev_resume(mddev_t *mddev); |
522 | extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | ||
523 | mddev_t *mddev); | ||
524 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | ||
525 | mddev_t *mddev); | ||
520 | #endif /* _MD_MD_H */ | 526 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 378a25894c57..45f8324196ec 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -100,7 +100,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
100 | * Allocate bios : 1 for reading, n-1 for writing | 100 | * Allocate bios : 1 for reading, n-1 for writing |
101 | */ | 101 | */ |
102 | for (j = pi->raid_disks ; j-- ; ) { | 102 | for (j = pi->raid_disks ; j-- ; ) { |
103 | bio = bio_alloc(gfp_flags, RESYNC_PAGES); | 103 | bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); |
104 | if (!bio) | 104 | if (!bio) |
105 | goto out_free_bio; | 105 | goto out_free_bio; |
106 | r1_bio->bios[j] = bio; | 106 | r1_bio->bios[j] = bio; |
@@ -306,6 +306,28 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
306 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | 306 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); |
307 | } | 307 | } |
308 | 308 | ||
309 | static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, | ||
310 | int behind) | ||
311 | { | ||
312 | if (atomic_dec_and_test(&r1_bio->remaining)) | ||
313 | { | ||
314 | /* it really is the end of this request */ | ||
315 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | ||
316 | /* free extra copy of the data pages */ | ||
317 | int i = vcnt; | ||
318 | while (i--) | ||
319 | safe_put_page(bv[i].bv_page); | ||
320 | } | ||
321 | /* clear the bitmap if all writes complete successfully */ | ||
322 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
323 | r1_bio->sectors, | ||
324 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
325 | behind); | ||
326 | md_write_end(r1_bio->mddev); | ||
327 | raid_end_bio_io(r1_bio); | ||
328 | } | ||
329 | } | ||
330 | |||
309 | static void raid1_end_write_request(struct bio *bio, int error) | 331 | static void raid1_end_write_request(struct bio *bio, int error) |
310 | { | 332 | { |
311 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 333 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
@@ -373,21 +395,7 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
373 | * Let's see if all mirrored write operations have finished | 395 | * Let's see if all mirrored write operations have finished |
374 | * already. | 396 | * already. |
375 | */ | 397 | */ |
376 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 398 | r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); |
377 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | ||
378 | /* free extra copy of the data pages */ | ||
379 | int i = bio->bi_vcnt; | ||
380 | while (i--) | ||
381 | safe_put_page(bio->bi_io_vec[i].bv_page); | ||
382 | } | ||
383 | /* clear the bitmap if all writes complete successfully */ | ||
384 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
385 | r1_bio->sectors, | ||
386 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
387 | behind); | ||
388 | md_write_end(r1_bio->mddev); | ||
389 | raid_end_bio_io(r1_bio); | ||
390 | } | ||
391 | 399 | ||
392 | if (to_put) | 400 | if (to_put) |
393 | bio_put(to_put); | 401 | bio_put(to_put); |
@@ -411,11 +419,13 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
411 | static int read_balance(conf_t *conf, r1bio_t *r1_bio) | 419 | static int read_balance(conf_t *conf, r1bio_t *r1_bio) |
412 | { | 420 | { |
413 | const sector_t this_sector = r1_bio->sector; | 421 | const sector_t this_sector = r1_bio->sector; |
414 | int new_disk = conf->last_used, disk = new_disk; | ||
415 | int wonly_disk = -1; | ||
416 | const int sectors = r1_bio->sectors; | 422 | const int sectors = r1_bio->sectors; |
423 | int new_disk = -1; | ||
424 | int start_disk; | ||
425 | int i; | ||
417 | sector_t new_distance, current_distance; | 426 | sector_t new_distance, current_distance; |
418 | mdk_rdev_t *rdev; | 427 | mdk_rdev_t *rdev; |
428 | int choose_first; | ||
419 | 429 | ||
420 | rcu_read_lock(); | 430 | rcu_read_lock(); |
421 | /* | 431 | /* |
@@ -426,54 +436,33 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
426 | retry: | 436 | retry: |
427 | if (conf->mddev->recovery_cp < MaxSector && | 437 | if (conf->mddev->recovery_cp < MaxSector && |
428 | (this_sector + sectors >= conf->next_resync)) { | 438 | (this_sector + sectors >= conf->next_resync)) { |
429 | /* Choose the first operational device, for consistancy */ | 439 | choose_first = 1; |
430 | new_disk = 0; | 440 | start_disk = 0; |
431 | 441 | } else { | |
432 | for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); | 442 | choose_first = 0; |
433 | r1_bio->bios[new_disk] == IO_BLOCKED || | 443 | start_disk = conf->last_used; |
434 | !rdev || !test_bit(In_sync, &rdev->flags) | ||
435 | || test_bit(WriteMostly, &rdev->flags); | ||
436 | rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) { | ||
437 | |||
438 | if (rdev && test_bit(In_sync, &rdev->flags) && | ||
439 | r1_bio->bios[new_disk] != IO_BLOCKED) | ||
440 | wonly_disk = new_disk; | ||
441 | |||
442 | if (new_disk == conf->raid_disks - 1) { | ||
443 | new_disk = wonly_disk; | ||
444 | break; | ||
445 | } | ||
446 | } | ||
447 | goto rb_out; | ||
448 | } | 444 | } |
449 | 445 | ||
450 | |||
451 | /* make sure the disk is operational */ | 446 | /* make sure the disk is operational */ |
452 | for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); | 447 | for (i = 0 ; i < conf->raid_disks ; i++) { |
453 | r1_bio->bios[new_disk] == IO_BLOCKED || | 448 | int disk = start_disk + i; |
454 | !rdev || !test_bit(In_sync, &rdev->flags) || | 449 | if (disk >= conf->raid_disks) |
455 | test_bit(WriteMostly, &rdev->flags); | 450 | disk -= conf->raid_disks; |
456 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { | 451 | |
457 | 452 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | |
458 | if (rdev && test_bit(In_sync, &rdev->flags) && | 453 | if (r1_bio->bios[disk] == IO_BLOCKED |
459 | r1_bio->bios[new_disk] != IO_BLOCKED) | 454 | || rdev == NULL |
460 | wonly_disk = new_disk; | 455 | || !test_bit(In_sync, &rdev->flags)) |
461 | 456 | continue; | |
462 | if (new_disk <= 0) | 457 | |
463 | new_disk = conf->raid_disks; | 458 | new_disk = disk; |
464 | new_disk--; | 459 | if (!test_bit(WriteMostly, &rdev->flags)) |
465 | if (new_disk == disk) { | ||
466 | new_disk = wonly_disk; | ||
467 | break; | 460 | break; |
468 | } | ||
469 | } | 461 | } |
470 | 462 | ||
471 | if (new_disk < 0) | 463 | if (new_disk < 0 || choose_first) |
472 | goto rb_out; | 464 | goto rb_out; |
473 | 465 | ||
474 | disk = new_disk; | ||
475 | /* now disk == new_disk == starting point for search */ | ||
476 | |||
477 | /* | 466 | /* |
478 | * Don't change to another disk for sequential reads: | 467 | * Don't change to another disk for sequential reads: |
479 | */ | 468 | */ |
@@ -482,20 +471,21 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
482 | if (this_sector == conf->mirrors[new_disk].head_position) | 471 | if (this_sector == conf->mirrors[new_disk].head_position) |
483 | goto rb_out; | 472 | goto rb_out; |
484 | 473 | ||
485 | current_distance = abs(this_sector - conf->mirrors[disk].head_position); | 474 | current_distance = abs(this_sector |
486 | 475 | - conf->mirrors[new_disk].head_position); | |
487 | /* Find the disk whose head is closest */ | ||
488 | 476 | ||
489 | do { | 477 | /* look for a better disk - i.e. head is closer */ |
490 | if (disk <= 0) | 478 | start_disk = new_disk; |
491 | disk = conf->raid_disks; | 479 | for (i = 1; i < conf->raid_disks; i++) { |
492 | disk--; | 480 | int disk = start_disk + 1; |
481 | if (disk >= conf->raid_disks) | ||
482 | disk -= conf->raid_disks; | ||
493 | 483 | ||
494 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 484 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
495 | 485 | if (r1_bio->bios[disk] == IO_BLOCKED | |
496 | if (!rdev || r1_bio->bios[disk] == IO_BLOCKED || | 486 | || rdev == NULL |
497 | !test_bit(In_sync, &rdev->flags) || | 487 | || !test_bit(In_sync, &rdev->flags) |
498 | test_bit(WriteMostly, &rdev->flags)) | 488 | || test_bit(WriteMostly, &rdev->flags)) |
499 | continue; | 489 | continue; |
500 | 490 | ||
501 | if (!atomic_read(&rdev->nr_pending)) { | 491 | if (!atomic_read(&rdev->nr_pending)) { |
@@ -507,11 +497,9 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
507 | current_distance = new_distance; | 497 | current_distance = new_distance; |
508 | new_disk = disk; | 498 | new_disk = disk; |
509 | } | 499 | } |
510 | } while (disk != conf->last_used); | 500 | } |
511 | 501 | ||
512 | rb_out: | 502 | rb_out: |
513 | |||
514 | |||
515 | if (new_disk >= 0) { | 503 | if (new_disk >= 0) { |
516 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev); | 504 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev); |
517 | if (!rdev) | 505 | if (!rdev) |
@@ -658,7 +646,7 @@ static void raise_barrier(conf_t *conf) | |||
658 | /* block any new IO from starting */ | 646 | /* block any new IO from starting */ |
659 | conf->barrier++; | 647 | conf->barrier++; |
660 | 648 | ||
661 | /* No wait for all pending IO to complete */ | 649 | /* Now wait for all pending IO to complete */ |
662 | wait_event_lock_irq(conf->wait_barrier, | 650 | wait_event_lock_irq(conf->wait_barrier, |
663 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 651 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, |
664 | conf->resync_lock, | 652 | conf->resync_lock, |
@@ -735,23 +723,26 @@ static void unfreeze_array(conf_t *conf) | |||
735 | } | 723 | } |
736 | 724 | ||
737 | 725 | ||
738 | /* duplicate the data pages for behind I/O */ | 726 | /* duplicate the data pages for behind I/O |
739 | static struct page **alloc_behind_pages(struct bio *bio) | 727 | * We return a list of bio_vec rather than just page pointers |
728 | * as it makes freeing easier | ||
729 | */ | ||
730 | static struct bio_vec *alloc_behind_pages(struct bio *bio) | ||
740 | { | 731 | { |
741 | int i; | 732 | int i; |
742 | struct bio_vec *bvec; | 733 | struct bio_vec *bvec; |
743 | struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page *), | 734 | struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), |
744 | GFP_NOIO); | 735 | GFP_NOIO); |
745 | if (unlikely(!pages)) | 736 | if (unlikely(!pages)) |
746 | goto do_sync_io; | 737 | goto do_sync_io; |
747 | 738 | ||
748 | bio_for_each_segment(bvec, bio, i) { | 739 | bio_for_each_segment(bvec, bio, i) { |
749 | pages[i] = alloc_page(GFP_NOIO); | 740 | pages[i].bv_page = alloc_page(GFP_NOIO); |
750 | if (unlikely(!pages[i])) | 741 | if (unlikely(!pages[i].bv_page)) |
751 | goto do_sync_io; | 742 | goto do_sync_io; |
752 | memcpy(kmap(pages[i]) + bvec->bv_offset, | 743 | memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, |
753 | kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); | 744 | kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); |
754 | kunmap(pages[i]); | 745 | kunmap(pages[i].bv_page); |
755 | kunmap(bvec->bv_page); | 746 | kunmap(bvec->bv_page); |
756 | } | 747 | } |
757 | 748 | ||
@@ -759,8 +750,8 @@ static struct page **alloc_behind_pages(struct bio *bio) | |||
759 | 750 | ||
760 | do_sync_io: | 751 | do_sync_io: |
761 | if (pages) | 752 | if (pages) |
762 | for (i = 0; i < bio->bi_vcnt && pages[i]; i++) | 753 | for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) |
763 | put_page(pages[i]); | 754 | put_page(pages[i].bv_page); |
764 | kfree(pages); | 755 | kfree(pages); |
765 | PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); | 756 | PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); |
766 | return NULL; | 757 | return NULL; |
@@ -775,8 +766,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
775 | int i, targets = 0, disks; | 766 | int i, targets = 0, disks; |
776 | struct bitmap *bitmap; | 767 | struct bitmap *bitmap; |
777 | unsigned long flags; | 768 | unsigned long flags; |
778 | struct bio_list bl; | 769 | struct bio_vec *behind_pages = NULL; |
779 | struct page **behind_pages = NULL; | ||
780 | const int rw = bio_data_dir(bio); | 770 | const int rw = bio_data_dir(bio); |
781 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 771 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
782 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 772 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
@@ -851,7 +841,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
851 | } | 841 | } |
852 | r1_bio->read_disk = rdisk; | 842 | r1_bio->read_disk = rdisk; |
853 | 843 | ||
854 | read_bio = bio_clone(bio, GFP_NOIO); | 844 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
855 | 845 | ||
856 | r1_bio->bios[rdisk] = read_bio; | 846 | r1_bio->bios[rdisk] = read_bio; |
857 | 847 | ||
@@ -873,13 +863,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
873 | * bios[x] to bio | 863 | * bios[x] to bio |
874 | */ | 864 | */ |
875 | disks = conf->raid_disks; | 865 | disks = conf->raid_disks; |
876 | #if 0 | ||
877 | { static int first=1; | ||
878 | if (first) printk("First Write sector %llu disks %d\n", | ||
879 | (unsigned long long)r1_bio->sector, disks); | ||
880 | first = 0; | ||
881 | } | ||
882 | #endif | ||
883 | retry_write: | 866 | retry_write: |
884 | blocked_rdev = NULL; | 867 | blocked_rdev = NULL; |
885 | rcu_read_lock(); | 868 | rcu_read_lock(); |
@@ -937,16 +920,17 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
937 | (behind_pages = alloc_behind_pages(bio)) != NULL) | 920 | (behind_pages = alloc_behind_pages(bio)) != NULL) |
938 | set_bit(R1BIO_BehindIO, &r1_bio->state); | 921 | set_bit(R1BIO_BehindIO, &r1_bio->state); |
939 | 922 | ||
940 | atomic_set(&r1_bio->remaining, 0); | 923 | atomic_set(&r1_bio->remaining, 1); |
941 | atomic_set(&r1_bio->behind_remaining, 0); | 924 | atomic_set(&r1_bio->behind_remaining, 0); |
942 | 925 | ||
943 | bio_list_init(&bl); | 926 | bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors, |
927 | test_bit(R1BIO_BehindIO, &r1_bio->state)); | ||
944 | for (i = 0; i < disks; i++) { | 928 | for (i = 0; i < disks; i++) { |
945 | struct bio *mbio; | 929 | struct bio *mbio; |
946 | if (!r1_bio->bios[i]) | 930 | if (!r1_bio->bios[i]) |
947 | continue; | 931 | continue; |
948 | 932 | ||
949 | mbio = bio_clone(bio, GFP_NOIO); | 933 | mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
950 | r1_bio->bios[i] = mbio; | 934 | r1_bio->bios[i] = mbio; |
951 | 935 | ||
952 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 936 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
@@ -963,39 +947,29 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
963 | * we clear any unused pointer in the io_vec, rather | 947 | * we clear any unused pointer in the io_vec, rather |
964 | * than leave them unchanged. This is important | 948 | * than leave them unchanged. This is important |
965 | * because when we come to free the pages, we won't | 949 | * because when we come to free the pages, we won't |
966 | * know the originial bi_idx, so we just free | 950 | * know the original bi_idx, so we just free |
967 | * them all | 951 | * them all |
968 | */ | 952 | */ |
969 | __bio_for_each_segment(bvec, mbio, j, 0) | 953 | __bio_for_each_segment(bvec, mbio, j, 0) |
970 | bvec->bv_page = behind_pages[j]; | 954 | bvec->bv_page = behind_pages[j].bv_page; |
971 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) | 955 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) |
972 | atomic_inc(&r1_bio->behind_remaining); | 956 | atomic_inc(&r1_bio->behind_remaining); |
973 | } | 957 | } |
974 | 958 | ||
975 | atomic_inc(&r1_bio->remaining); | 959 | atomic_inc(&r1_bio->remaining); |
976 | 960 | spin_lock_irqsave(&conf->device_lock, flags); | |
977 | bio_list_add(&bl, mbio); | 961 | bio_list_add(&conf->pending_bio_list, mbio); |
962 | blk_plug_device(mddev->queue); | ||
963 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
978 | } | 964 | } |
965 | r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); | ||
979 | kfree(behind_pages); /* the behind pages are attached to the bios now */ | 966 | kfree(behind_pages); /* the behind pages are attached to the bios now */ |
980 | 967 | ||
981 | bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors, | 968 | /* In case raid1d snuck in to freeze_array */ |
982 | test_bit(R1BIO_BehindIO, &r1_bio->state)); | ||
983 | spin_lock_irqsave(&conf->device_lock, flags); | ||
984 | bio_list_merge(&conf->pending_bio_list, &bl); | ||
985 | bio_list_init(&bl); | ||
986 | |||
987 | blk_plug_device(mddev->queue); | ||
988 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
989 | |||
990 | /* In case raid1d snuck into freeze_array */ | ||
991 | wake_up(&conf->wait_barrier); | 969 | wake_up(&conf->wait_barrier); |
992 | 970 | ||
993 | if (do_sync) | 971 | if (do_sync) |
994 | md_wakeup_thread(mddev->thread); | 972 | md_wakeup_thread(mddev->thread); |
995 | #if 0 | ||
996 | while ((bio = bio_list_pop(&bl)) != NULL) | ||
997 | generic_make_request(bio); | ||
998 | #endif | ||
999 | 973 | ||
1000 | return 0; | 974 | return 0; |
1001 | } | 975 | } |
@@ -1183,7 +1157,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
1183 | err = -EBUSY; | 1157 | err = -EBUSY; |
1184 | goto abort; | 1158 | goto abort; |
1185 | } | 1159 | } |
1186 | /* Only remove non-faulty devices is recovery | 1160 | /* Only remove non-faulty devices if recovery |
1187 | * is not possible. | 1161 | * is not possible. |
1188 | */ | 1162 | */ |
1189 | if (!test_bit(Faulty, &rdev->flags) && | 1163 | if (!test_bit(Faulty, &rdev->flags) && |
@@ -1245,7 +1219,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
1245 | break; | 1219 | break; |
1246 | } | 1220 | } |
1247 | if (!uptodate) { | 1221 | if (!uptodate) { |
1248 | int sync_blocks = 0; | 1222 | sector_t sync_blocks = 0; |
1249 | sector_t s = r1_bio->sector; | 1223 | sector_t s = r1_bio->sector; |
1250 | long sectors_to_go = r1_bio->sectors; | 1224 | long sectors_to_go = r1_bio->sectors; |
1251 | /* make sure these bits doesn't get cleared. */ | 1225 | /* make sure these bits doesn't get cleared. */ |
@@ -1388,7 +1362,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1388 | * active, and resync is currently active | 1362 | * active, and resync is currently active |
1389 | */ | 1363 | */ |
1390 | rdev = conf->mirrors[d].rdev; | 1364 | rdev = conf->mirrors[d].rdev; |
1391 | if (sync_page_io(rdev->bdev, | 1365 | if (sync_page_io(rdev, |
1392 | sect + rdev->data_offset, | 1366 | sect + rdev->data_offset, |
1393 | s<<9, | 1367 | s<<9, |
1394 | bio->bi_io_vec[idx].bv_page, | 1368 | bio->bi_io_vec[idx].bv_page, |
@@ -1414,7 +1388,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1414 | continue; | 1388 | continue; |
1415 | rdev = conf->mirrors[d].rdev; | 1389 | rdev = conf->mirrors[d].rdev; |
1416 | atomic_add(s, &rdev->corrected_errors); | 1390 | atomic_add(s, &rdev->corrected_errors); |
1417 | if (sync_page_io(rdev->bdev, | 1391 | if (sync_page_io(rdev, |
1418 | sect + rdev->data_offset, | 1392 | sect + rdev->data_offset, |
1419 | s<<9, | 1393 | s<<9, |
1420 | bio->bi_io_vec[idx].bv_page, | 1394 | bio->bi_io_vec[idx].bv_page, |
@@ -1429,7 +1403,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1429 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | 1403 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) |
1430 | continue; | 1404 | continue; |
1431 | rdev = conf->mirrors[d].rdev; | 1405 | rdev = conf->mirrors[d].rdev; |
1432 | if (sync_page_io(rdev->bdev, | 1406 | if (sync_page_io(rdev, |
1433 | sect + rdev->data_offset, | 1407 | sect + rdev->data_offset, |
1434 | s<<9, | 1408 | s<<9, |
1435 | bio->bi_io_vec[idx].bv_page, | 1409 | bio->bi_io_vec[idx].bv_page, |
@@ -1513,7 +1487,7 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1513 | rdev = conf->mirrors[d].rdev; | 1487 | rdev = conf->mirrors[d].rdev; |
1514 | if (rdev && | 1488 | if (rdev && |
1515 | test_bit(In_sync, &rdev->flags) && | 1489 | test_bit(In_sync, &rdev->flags) && |
1516 | sync_page_io(rdev->bdev, | 1490 | sync_page_io(rdev, |
1517 | sect + rdev->data_offset, | 1491 | sect + rdev->data_offset, |
1518 | s<<9, | 1492 | s<<9, |
1519 | conf->tmppage, READ)) | 1493 | conf->tmppage, READ)) |
@@ -1539,7 +1513,7 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1539 | rdev = conf->mirrors[d].rdev; | 1513 | rdev = conf->mirrors[d].rdev; |
1540 | if (rdev && | 1514 | if (rdev && |
1541 | test_bit(In_sync, &rdev->flags)) { | 1515 | test_bit(In_sync, &rdev->flags)) { |
1542 | if (sync_page_io(rdev->bdev, | 1516 | if (sync_page_io(rdev, |
1543 | sect + rdev->data_offset, | 1517 | sect + rdev->data_offset, |
1544 | s<<9, conf->tmppage, WRITE) | 1518 | s<<9, conf->tmppage, WRITE) |
1545 | == 0) | 1519 | == 0) |
@@ -1556,7 +1530,7 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1556 | rdev = conf->mirrors[d].rdev; | 1530 | rdev = conf->mirrors[d].rdev; |
1557 | if (rdev && | 1531 | if (rdev && |
1558 | test_bit(In_sync, &rdev->flags)) { | 1532 | test_bit(In_sync, &rdev->flags)) { |
1559 | if (sync_page_io(rdev->bdev, | 1533 | if (sync_page_io(rdev, |
1560 | sect + rdev->data_offset, | 1534 | sect + rdev->data_offset, |
1561 | s<<9, conf->tmppage, READ) | 1535 | s<<9, conf->tmppage, READ) |
1562 | == 0) | 1536 | == 0) |
@@ -1646,7 +1620,8 @@ static void raid1d(mddev_t *mddev) | |||
1646 | mddev->ro ? IO_BLOCKED : NULL; | 1620 | mddev->ro ? IO_BLOCKED : NULL; |
1647 | r1_bio->read_disk = disk; | 1621 | r1_bio->read_disk = disk; |
1648 | bio_put(bio); | 1622 | bio_put(bio); |
1649 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | 1623 | bio = bio_clone_mddev(r1_bio->master_bio, |
1624 | GFP_NOIO, mddev); | ||
1650 | r1_bio->bios[r1_bio->read_disk] = bio; | 1625 | r1_bio->bios[r1_bio->read_disk] = bio; |
1651 | rdev = conf->mirrors[disk].rdev; | 1626 | rdev = conf->mirrors[disk].rdev; |
1652 | if (printk_ratelimit()) | 1627 | if (printk_ratelimit()) |
@@ -1705,7 +1680,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1705 | int i; | 1680 | int i; |
1706 | int wonly = -1; | 1681 | int wonly = -1; |
1707 | int write_targets = 0, read_targets = 0; | 1682 | int write_targets = 0, read_targets = 0; |
1708 | int sync_blocks; | 1683 | sector_t sync_blocks; |
1709 | int still_degraded = 0; | 1684 | int still_degraded = 0; |
1710 | 1685 | ||
1711 | if (!conf->r1buf_pool) | 1686 | if (!conf->r1buf_pool) |
@@ -1755,11 +1730,11 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1755 | msleep_interruptible(1000); | 1730 | msleep_interruptible(1000); |
1756 | 1731 | ||
1757 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | 1732 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); |
1733 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | ||
1758 | raise_barrier(conf); | 1734 | raise_barrier(conf); |
1759 | 1735 | ||
1760 | conf->next_resync = sector_nr; | 1736 | conf->next_resync = sector_nr; |
1761 | 1737 | ||
1762 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | ||
1763 | rcu_read_lock(); | 1738 | rcu_read_lock(); |
1764 | /* | 1739 | /* |
1765 | * If we get a correctably read error during resync or recovery, | 1740 | * If we get a correctably read error during resync or recovery, |
@@ -1971,7 +1946,6 @@ static conf_t *setup_conf(mddev_t *mddev) | |||
1971 | init_waitqueue_head(&conf->wait_barrier); | 1946 | init_waitqueue_head(&conf->wait_barrier); |
1972 | 1947 | ||
1973 | bio_list_init(&conf->pending_bio_list); | 1948 | bio_list_init(&conf->pending_bio_list); |
1974 | bio_list_init(&conf->flushing_bio_list); | ||
1975 | 1949 | ||
1976 | conf->last_used = -1; | 1950 | conf->last_used = -1; |
1977 | for (i = 0; i < conf->raid_disks; i++) { | 1951 | for (i = 0; i < conf->raid_disks; i++) { |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index adf8cfd73313..cbfdf1a6acd9 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -35,8 +35,6 @@ struct r1_private_data_s { | |||
35 | struct list_head retry_list; | 35 | struct list_head retry_list; |
36 | /* queue pending writes and submit them on unplug */ | 36 | /* queue pending writes and submit them on unplug */ |
37 | struct bio_list pending_bio_list; | 37 | struct bio_list pending_bio_list; |
38 | /* queue of writes that have been unplugged */ | ||
39 | struct bio_list flushing_bio_list; | ||
40 | 38 | ||
41 | /* for use when syncing mirrors: */ | 39 | /* for use when syncing mirrors: */ |
42 | 40 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f0d082f749be..c67aa54694ae 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -120,7 +120,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
120 | * Allocate bios. | 120 | * Allocate bios. |
121 | */ | 121 | */ |
122 | for (j = nalloc ; j-- ; ) { | 122 | for (j = nalloc ; j-- ; ) { |
123 | bio = bio_alloc(gfp_flags, RESYNC_PAGES); | 123 | bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); |
124 | if (!bio) | 124 | if (!bio) |
125 | goto out_free_bio; | 125 | goto out_free_bio; |
126 | r10_bio->devs[j].bio = bio; | 126 | r10_bio->devs[j].bio = bio; |
@@ -801,7 +801,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
801 | const int rw = bio_data_dir(bio); | 801 | const int rw = bio_data_dir(bio); |
802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
804 | struct bio_list bl; | ||
805 | unsigned long flags; | 804 | unsigned long flags; |
806 | mdk_rdev_t *blocked_rdev; | 805 | mdk_rdev_t *blocked_rdev; |
807 | 806 | ||
@@ -890,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
890 | } | 889 | } |
891 | mirror = conf->mirrors + disk; | 890 | mirror = conf->mirrors + disk; |
892 | 891 | ||
893 | read_bio = bio_clone(bio, GFP_NOIO); | 892 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
894 | 893 | ||
895 | r10_bio->devs[slot].bio = read_bio; | 894 | r10_bio->devs[slot].bio = read_bio; |
896 | 895 | ||
@@ -950,16 +949,16 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
950 | goto retry_write; | 949 | goto retry_write; |
951 | } | 950 | } |
952 | 951 | ||
953 | atomic_set(&r10_bio->remaining, 0); | 952 | atomic_set(&r10_bio->remaining, 1); |
953 | bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); | ||
954 | 954 | ||
955 | bio_list_init(&bl); | ||
956 | for (i = 0; i < conf->copies; i++) { | 955 | for (i = 0; i < conf->copies; i++) { |
957 | struct bio *mbio; | 956 | struct bio *mbio; |
958 | int d = r10_bio->devs[i].devnum; | 957 | int d = r10_bio->devs[i].devnum; |
959 | if (!r10_bio->devs[i].bio) | 958 | if (!r10_bio->devs[i].bio) |
960 | continue; | 959 | continue; |
961 | 960 | ||
962 | mbio = bio_clone(bio, GFP_NOIO); | 961 | mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
963 | r10_bio->devs[i].bio = mbio; | 962 | r10_bio->devs[i].bio = mbio; |
964 | 963 | ||
965 | mbio->bi_sector = r10_bio->devs[i].addr+ | 964 | mbio->bi_sector = r10_bio->devs[i].addr+ |
@@ -970,22 +969,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
970 | mbio->bi_private = r10_bio; | 969 | mbio->bi_private = r10_bio; |
971 | 970 | ||
972 | atomic_inc(&r10_bio->remaining); | 971 | atomic_inc(&r10_bio->remaining); |
973 | bio_list_add(&bl, mbio); | 972 | spin_lock_irqsave(&conf->device_lock, flags); |
973 | bio_list_add(&conf->pending_bio_list, mbio); | ||
974 | blk_plug_device(mddev->queue); | ||
975 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
974 | } | 976 | } |
975 | 977 | ||
976 | if (unlikely(!atomic_read(&r10_bio->remaining))) { | 978 | if (atomic_dec_and_test(&r10_bio->remaining)) { |
977 | /* the array is dead */ | 979 | /* This matches the end of raid10_end_write_request() */ |
980 | bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, | ||
981 | r10_bio->sectors, | ||
982 | !test_bit(R10BIO_Degraded, &r10_bio->state), | ||
983 | 0); | ||
978 | md_write_end(mddev); | 984 | md_write_end(mddev); |
979 | raid_end_bio_io(r10_bio); | 985 | raid_end_bio_io(r10_bio); |
980 | return 0; | ||
981 | } | 986 | } |
982 | 987 | ||
983 | bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); | ||
984 | spin_lock_irqsave(&conf->device_lock, flags); | ||
985 | bio_list_merge(&conf->pending_bio_list, &bl); | ||
986 | blk_plug_device(mddev->queue); | ||
987 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
988 | |||
989 | /* In case raid10d snuck in to freeze_array */ | 988 | /* In case raid10d snuck in to freeze_array */ |
990 | wake_up(&conf->wait_barrier); | 989 | wake_up(&conf->wait_barrier); |
991 | 990 | ||
@@ -1558,7 +1557,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1558 | test_bit(In_sync, &rdev->flags)) { | 1557 | test_bit(In_sync, &rdev->flags)) { |
1559 | atomic_inc(&rdev->nr_pending); | 1558 | atomic_inc(&rdev->nr_pending); |
1560 | rcu_read_unlock(); | 1559 | rcu_read_unlock(); |
1561 | success = sync_page_io(rdev->bdev, | 1560 | success = sync_page_io(rdev, |
1562 | r10_bio->devs[sl].addr + | 1561 | r10_bio->devs[sl].addr + |
1563 | sect + rdev->data_offset, | 1562 | sect + rdev->data_offset, |
1564 | s<<9, | 1563 | s<<9, |
@@ -1597,7 +1596,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1597 | atomic_inc(&rdev->nr_pending); | 1596 | atomic_inc(&rdev->nr_pending); |
1598 | rcu_read_unlock(); | 1597 | rcu_read_unlock(); |
1599 | atomic_add(s, &rdev->corrected_errors); | 1598 | atomic_add(s, &rdev->corrected_errors); |
1600 | if (sync_page_io(rdev->bdev, | 1599 | if (sync_page_io(rdev, |
1601 | r10_bio->devs[sl].addr + | 1600 | r10_bio->devs[sl].addr + |
1602 | sect + rdev->data_offset, | 1601 | sect + rdev->data_offset, |
1603 | s<<9, conf->tmppage, WRITE) | 1602 | s<<9, conf->tmppage, WRITE) |
@@ -1634,7 +1633,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1634 | char b[BDEVNAME_SIZE]; | 1633 | char b[BDEVNAME_SIZE]; |
1635 | atomic_inc(&rdev->nr_pending); | 1634 | atomic_inc(&rdev->nr_pending); |
1636 | rcu_read_unlock(); | 1635 | rcu_read_unlock(); |
1637 | if (sync_page_io(rdev->bdev, | 1636 | if (sync_page_io(rdev, |
1638 | r10_bio->devs[sl].addr + | 1637 | r10_bio->devs[sl].addr + |
1639 | sect + rdev->data_offset, | 1638 | sect + rdev->data_offset, |
1640 | s<<9, conf->tmppage, | 1639 | s<<9, conf->tmppage, |
@@ -1747,7 +1746,8 @@ static void raid10d(mddev_t *mddev) | |||
1747 | mdname(mddev), | 1746 | mdname(mddev), |
1748 | bdevname(rdev->bdev,b), | 1747 | bdevname(rdev->bdev,b), |
1749 | (unsigned long long)r10_bio->sector); | 1748 | (unsigned long long)r10_bio->sector); |
1750 | bio = bio_clone(r10_bio->master_bio, GFP_NOIO); | 1749 | bio = bio_clone_mddev(r10_bio->master_bio, |
1750 | GFP_NOIO, mddev); | ||
1751 | r10_bio->devs[r10_bio->read_slot].bio = bio; | 1751 | r10_bio->devs[r10_bio->read_slot].bio = bio; |
1752 | bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr | 1752 | bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr |
1753 | + rdev->data_offset; | 1753 | + rdev->data_offset; |
@@ -1820,7 +1820,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1820 | int disk; | 1820 | int disk; |
1821 | int i; | 1821 | int i; |
1822 | int max_sync; | 1822 | int max_sync; |
1823 | int sync_blocks; | 1823 | sector_t sync_blocks; |
1824 | 1824 | ||
1825 | sector_t sectors_skipped = 0; | 1825 | sector_t sectors_skipped = 0; |
1826 | int chunks_skipped = 0; | 1826 | int chunks_skipped = 0; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 31140d1259dc..dc574f303f8b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3876,9 +3876,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) | |||
3876 | return 0; | 3876 | return 0; |
3877 | } | 3877 | } |
3878 | /* | 3878 | /* |
3879 | * use bio_clone to make a copy of the bio | 3879 | * use bio_clone_mddev to make a copy of the bio |
3880 | */ | 3880 | */ |
3881 | align_bi = bio_clone(raid_bio, GFP_NOIO); | 3881 | align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev); |
3882 | if (!align_bi) | 3882 | if (!align_bi) |
3883 | return 0; | 3883 | return 0; |
3884 | /* | 3884 | /* |
@@ -4360,7 +4360,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
4360 | raid5_conf_t *conf = mddev->private; | 4360 | raid5_conf_t *conf = mddev->private; |
4361 | struct stripe_head *sh; | 4361 | struct stripe_head *sh; |
4362 | sector_t max_sector = mddev->dev_sectors; | 4362 | sector_t max_sector = mddev->dev_sectors; |
4363 | int sync_blocks; | 4363 | sector_t sync_blocks; |
4364 | int still_degraded = 0; | 4364 | int still_degraded = 0; |
4365 | int i; | 4365 | int i; |
4366 | 4366 | ||