aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c84
1 files changed, 58 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7433871f4b3a..450066007160 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -18,6 +18,30 @@
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21/*
22 * BITMAP UNPLUGGING:
23 *
24 * The sequencing for updating the bitmap reliably is a little
25 * subtle (and I got it wrong the first time) so it deserves some
26 * explanation.
27 *
28 * We group bitmap updates into batches. Each batch has a number.
29 * We may write out several batches at once, but that isn't very important.
30 * conf->bm_write is the number of the last batch successfully written.
31 * conf->bm_flush is the number of the last batch that was closed to
32 * new additions.
33 * When we discover that we will need to write to any block in a stripe
34 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
35 * the number of the batch it will be in. This is bm_flush+1.
36 * When we are ready to do a write, if that batch hasn't been written yet,
37 * we plug the array and queue the stripe for later.
38 * When an unplug happens, we increment bm_flush, thus closing the current
39 * batch.
40 * When we notice that bm_flush > bm_write, we write out all pending updates
41 * to the bitmap, and advance bm_write to where bm_flush was.
42 * This may occasionally write a bit out twice, but is sure never to
43 * miss any bits.
44 */
21 45
22#include <linux/module.h> 46#include <linux/module.h>
23#include <linux/slab.h> 47#include <linux/slab.h>
@@ -88,12 +112,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
88 BUG_ON(!list_empty(&sh->lru)); 112 BUG_ON(!list_empty(&sh->lru));
89 BUG_ON(atomic_read(&conf->active_stripes)==0); 113 BUG_ON(atomic_read(&conf->active_stripes)==0);
90 if (test_bit(STRIPE_HANDLE, &sh->state)) { 114 if (test_bit(STRIPE_HANDLE, &sh->state)) {
91 if (test_bit(STRIPE_DELAYED, &sh->state)) 115 if (test_bit(STRIPE_DELAYED, &sh->state)) {
92 list_add_tail(&sh->lru, &conf->delayed_list); 116 list_add_tail(&sh->lru, &conf->delayed_list);
93 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 117 blk_plug_device(conf->mddev->queue);
94 conf->seq_write == sh->bm_seq) 118 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
119 sh->bm_seq - conf->seq_write > 0) {
95 list_add_tail(&sh->lru, &conf->bitmap_list); 120 list_add_tail(&sh->lru, &conf->bitmap_list);
96 else { 121 blk_plug_device(conf->mddev->queue);
122 } else {
97 clear_bit(STRIPE_BIT_DELAY, &sh->state); 123 clear_bit(STRIPE_BIT_DELAY, &sh->state);
98 list_add_tail(&sh->lru, &conf->handle_list); 124 list_add_tail(&sh->lru, &conf->handle_list);
99 } 125 }
@@ -270,7 +296,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
270 < (conf->max_nr_stripes *3/4) 296 < (conf->max_nr_stripes *3/4)
271 || !conf->inactive_blocked), 297 || !conf->inactive_blocked),
272 conf->device_lock, 298 conf->device_lock,
273 unplug_slaves(conf->mddev) 299 raid5_unplug_device(conf->mddev->queue)
274 ); 300 );
275 conf->inactive_blocked = 0; 301 conf->inactive_blocked = 0;
276 } else 302 } else
@@ -281,7 +307,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
281 } else { 307 } else {
282 if (!test_bit(STRIPE_HANDLE, &sh->state)) 308 if (!test_bit(STRIPE_HANDLE, &sh->state))
283 atomic_inc(&conf->active_stripes); 309 atomic_inc(&conf->active_stripes);
284 if (list_empty(&sh->lru)) 310 if (list_empty(&sh->lru) &&
311 !test_bit(STRIPE_EXPANDING, &sh->state))
285 BUG(); 312 BUG();
286 list_del_init(&sh->lru); 313 list_del_init(&sh->lru);
287 } 314 }
@@ -496,6 +523,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
496 raid5_conf_t *conf = sh->raid_conf; 523 raid5_conf_t *conf = sh->raid_conf;
497 int disks = sh->disks, i; 524 int disks = sh->disks, i;
498 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 525 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
526 char b[BDEVNAME_SIZE];
527 mdk_rdev_t *rdev;
499 528
500 if (bi->bi_size) 529 if (bi->bi_size)
501 return 1; 530 return 1;
@@ -543,25 +572,39 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
543 set_bit(R5_UPTODATE, &sh->dev[i].flags); 572 set_bit(R5_UPTODATE, &sh->dev[i].flags);
544#endif 573#endif
545 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 574 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
546 printk(KERN_INFO "raid5: read error corrected!!\n"); 575 rdev = conf->disks[i].rdev;
576 printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
577 mdname(conf->mddev), STRIPE_SECTORS,
578 (unsigned long long)sh->sector + rdev->data_offset,
579 bdevname(rdev->bdev, b));
547 clear_bit(R5_ReadError, &sh->dev[i].flags); 580 clear_bit(R5_ReadError, &sh->dev[i].flags);
548 clear_bit(R5_ReWrite, &sh->dev[i].flags); 581 clear_bit(R5_ReWrite, &sh->dev[i].flags);
549 } 582 }
550 if (atomic_read(&conf->disks[i].rdev->read_errors)) 583 if (atomic_read(&conf->disks[i].rdev->read_errors))
551 atomic_set(&conf->disks[i].rdev->read_errors, 0); 584 atomic_set(&conf->disks[i].rdev->read_errors, 0);
552 } else { 585 } else {
586 const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);
553 int retry = 0; 587 int retry = 0;
588 rdev = conf->disks[i].rdev;
589
554 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 590 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
555 atomic_inc(&conf->disks[i].rdev->read_errors); 591 atomic_inc(&rdev->read_errors);
556 if (conf->mddev->degraded) 592 if (conf->mddev->degraded)
557 printk(KERN_WARNING "raid5: read error not correctable.\n"); 593 printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
594 mdname(conf->mddev),
595 (unsigned long long)sh->sector + rdev->data_offset,
596 bdn);
558 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 597 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
559 /* Oh, no!!! */ 598 /* Oh, no!!! */
560 printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); 599 printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
561 else if (atomic_read(&conf->disks[i].rdev->read_errors) 600 mdname(conf->mddev),
601 (unsigned long long)sh->sector + rdev->data_offset,
602 bdn);
603 else if (atomic_read(&rdev->read_errors)
562 > conf->max_nr_stripes) 604 > conf->max_nr_stripes)
563 printk(KERN_WARNING 605 printk(KERN_WARNING
564 "raid5: Too many read errors, failing device.\n"); 606 "raid5:%s: Too many read errors, failing device %s.\n",
607 mdname(conf->mddev), bdn);
565 else 608 else
566 retry = 1; 609 retry = 1;
567 if (retry) 610 if (retry)
@@ -569,7 +612,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
569 else { 612 else {
570 clear_bit(R5_ReadError, &sh->dev[i].flags); 613 clear_bit(R5_ReadError, &sh->dev[i].flags);
571 clear_bit(R5_ReWrite, &sh->dev[i].flags); 614 clear_bit(R5_ReWrite, &sh->dev[i].flags);
572 md_error(conf->mddev, conf->disks[i].rdev); 615 md_error(conf->mddev, rdev);
573 } 616 }
574 } 617 }
575 rdev_dec_pending(conf->disks[i].rdev, conf->mddev); 618 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
@@ -1270,9 +1313,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1270 (unsigned long long)sh->sector, dd_idx); 1313 (unsigned long long)sh->sector, dd_idx);
1271 1314
1272 if (conf->mddev->bitmap && firstwrite) { 1315 if (conf->mddev->bitmap && firstwrite) {
1273 sh->bm_seq = conf->seq_write;
1274 bitmap_startwrite(conf->mddev->bitmap, sh->sector, 1316 bitmap_startwrite(conf->mddev->bitmap, sh->sector,
1275 STRIPE_SECTORS, 0); 1317 STRIPE_SECTORS, 0);
1318 sh->bm_seq = conf->seq_flush+1;
1276 set_bit(STRIPE_BIT_DELAY, &sh->state); 1319 set_bit(STRIPE_BIT_DELAY, &sh->state);
1277 } 1320 }
1278 1321
@@ -2554,13 +2597,6 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk,
2554 return ret; 2597 return ret;
2555} 2598}
2556 2599
2557static inline void raid5_plug_device(raid5_conf_t *conf)
2558{
2559 spin_lock_irq(&conf->device_lock);
2560 blk_plug_device(conf->mddev->queue);
2561 spin_unlock_irq(&conf->device_lock);
2562}
2563
2564static int make_request(request_queue_t *q, struct bio * bi) 2600static int make_request(request_queue_t *q, struct bio * bi)
2565{ 2601{
2566 mddev_t *mddev = q->queuedata; 2602 mddev_t *mddev = q->queuedata;
@@ -2670,7 +2706,6 @@ static int make_request(request_queue_t *q, struct bio * bi)
2670 goto retry; 2706 goto retry;
2671 } 2707 }
2672 finish_wait(&conf->wait_for_overlap, &w); 2708 finish_wait(&conf->wait_for_overlap, &w);
2673 raid5_plug_device(conf);
2674 handle_stripe(sh, NULL); 2709 handle_stripe(sh, NULL);
2675 release_stripe(sh); 2710 release_stripe(sh);
2676 } else { 2711 } else {
@@ -2923,7 +2958,7 @@ static void raid5d (mddev_t *mddev)
2923 while (1) { 2958 while (1) {
2924 struct list_head *first; 2959 struct list_head *first;
2925 2960
2926 if (conf->seq_flush - conf->seq_write > 0) { 2961 if (conf->seq_flush != conf->seq_write) {
2927 int seq = conf->seq_flush; 2962 int seq = conf->seq_flush;
2928 spin_unlock_irq(&conf->device_lock); 2963 spin_unlock_irq(&conf->device_lock);
2929 bitmap_unplug(mddev->bitmap); 2964 bitmap_unplug(mddev->bitmap);
@@ -3246,9 +3281,6 @@ static int run(mddev_t *mddev)
3246 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 3281 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3247 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 3282 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
3248 "%s_reshape"); 3283 "%s_reshape");
3249 /* FIXME if md_register_thread fails?? */
3250 md_wakeup_thread(mddev->sync_thread);
3251
3252 } 3284 }
3253 3285
3254 /* read-ahead size must cover two whole stripes, which is 3286 /* read-ahead size must cover two whole stripes, which is