diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 84 |
1 files changed, 58 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7433871f4b3a..450066007160 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -18,6 +18,30 @@ | |||
18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /* | ||
22 | * BITMAP UNPLUGGING: | ||
23 | * | ||
24 | * The sequencing for updating the bitmap reliably is a little | ||
25 | * subtle (and I got it wrong the first time) so it deserves some | ||
26 | * explanation. | ||
27 | * | ||
28 | * We group bitmap updates into batches. Each batch has a number. | ||
29 | * We may write out several batches at once, but that isn't very important. | ||
30 | * conf->bm_write is the number of the last batch successfully written. | ||
31 | * conf->bm_flush is the number of the last batch that was closed to | ||
32 | * new additions. | ||
33 | * When we discover that we will need to write to any block in a stripe | ||
34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq | ||
35 | * the number of the batch it will be in. This is bm_flush+1. | ||
36 | * When we are ready to do a write, if that batch hasn't been written yet, | ||
37 | * we plug the array and queue the stripe for later. | ||
38 | * When an unplug happens, we increment bm_flush, thus closing the current | ||
39 | * batch. | ||
40 | * When we notice that bm_flush > bm_write, we write out all pending updates | ||
41 | * to the bitmap, and advance bm_write to where bm_flush was. | ||
42 | * This may occasionally write a bit out twice, but is sure never to | ||
43 | * miss any bits. | ||
44 | */ | ||
21 | 45 | ||
22 | #include <linux/module.h> | 46 | #include <linux/module.h> |
23 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
@@ -88,12 +112,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
88 | BUG_ON(!list_empty(&sh->lru)); | 112 | BUG_ON(!list_empty(&sh->lru)); |
89 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 113 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
90 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 114 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
91 | if (test_bit(STRIPE_DELAYED, &sh->state)) | 115 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
92 | list_add_tail(&sh->lru, &conf->delayed_list); | 116 | list_add_tail(&sh->lru, &conf->delayed_list); |
93 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 117 | blk_plug_device(conf->mddev->queue); |
94 | conf->seq_write == sh->bm_seq) | 118 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
119 | sh->bm_seq - conf->seq_write > 0) { | ||
95 | list_add_tail(&sh->lru, &conf->bitmap_list); | 120 | list_add_tail(&sh->lru, &conf->bitmap_list); |
96 | else { | 121 | blk_plug_device(conf->mddev->queue); |
122 | } else { | ||
97 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 123 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
98 | list_add_tail(&sh->lru, &conf->handle_list); | 124 | list_add_tail(&sh->lru, &conf->handle_list); |
99 | } | 125 | } |
@@ -270,7 +296,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
270 | < (conf->max_nr_stripes *3/4) | 296 | < (conf->max_nr_stripes *3/4) |
271 | || !conf->inactive_blocked), | 297 | || !conf->inactive_blocked), |
272 | conf->device_lock, | 298 | conf->device_lock, |
273 | unplug_slaves(conf->mddev) | 299 | raid5_unplug_device(conf->mddev->queue) |
274 | ); | 300 | ); |
275 | conf->inactive_blocked = 0; | 301 | conf->inactive_blocked = 0; |
276 | } else | 302 | } else |
@@ -281,7 +307,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
281 | } else { | 307 | } else { |
282 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 308 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
283 | atomic_inc(&conf->active_stripes); | 309 | atomic_inc(&conf->active_stripes); |
284 | if (list_empty(&sh->lru)) | 310 | if (list_empty(&sh->lru) && |
311 | !test_bit(STRIPE_EXPANDING, &sh->state)) | ||
285 | BUG(); | 312 | BUG(); |
286 | list_del_init(&sh->lru); | 313 | list_del_init(&sh->lru); |
287 | } | 314 | } |
@@ -496,6 +523,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
496 | raid5_conf_t *conf = sh->raid_conf; | 523 | raid5_conf_t *conf = sh->raid_conf; |
497 | int disks = sh->disks, i; | 524 | int disks = sh->disks, i; |
498 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); | 525 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); |
526 | char b[BDEVNAME_SIZE]; | ||
527 | mdk_rdev_t *rdev; | ||
499 | 528 | ||
500 | if (bi->bi_size) | 529 | if (bi->bi_size) |
501 | return 1; | 530 | return 1; |
@@ -543,25 +572,39 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
543 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 572 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
544 | #endif | 573 | #endif |
545 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 574 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
546 | printk(KERN_INFO "raid5: read error corrected!!\n"); | 575 | rdev = conf->disks[i].rdev; |
576 | printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", | ||
577 | mdname(conf->mddev), STRIPE_SECTORS, | ||
578 | (unsigned long long)sh->sector + rdev->data_offset, | ||
579 | bdevname(rdev->bdev, b)); | ||
547 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 580 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
548 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 581 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
549 | } | 582 | } |
550 | if (atomic_read(&conf->disks[i].rdev->read_errors)) | 583 | if (atomic_read(&conf->disks[i].rdev->read_errors)) |
551 | atomic_set(&conf->disks[i].rdev->read_errors, 0); | 584 | atomic_set(&conf->disks[i].rdev->read_errors, 0); |
552 | } else { | 585 | } else { |
586 | const char *bdn = bdevname(conf->disks[i].rdev->bdev, b); | ||
553 | int retry = 0; | 587 | int retry = 0; |
588 | rdev = conf->disks[i].rdev; | ||
589 | |||
554 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 590 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
555 | atomic_inc(&conf->disks[i].rdev->read_errors); | 591 | atomic_inc(&rdev->read_errors); |
556 | if (conf->mddev->degraded) | 592 | if (conf->mddev->degraded) |
557 | printk(KERN_WARNING "raid5: read error not correctable.\n"); | 593 | printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", |
594 | mdname(conf->mddev), | ||
595 | (unsigned long long)sh->sector + rdev->data_offset, | ||
596 | bdn); | ||
558 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 597 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
559 | /* Oh, no!!! */ | 598 | /* Oh, no!!! */ |
560 | printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); | 599 | printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", |
561 | else if (atomic_read(&conf->disks[i].rdev->read_errors) | 600 | mdname(conf->mddev), |
601 | (unsigned long long)sh->sector + rdev->data_offset, | ||
602 | bdn); | ||
603 | else if (atomic_read(&rdev->read_errors) | ||
562 | > conf->max_nr_stripes) | 604 | > conf->max_nr_stripes) |
563 | printk(KERN_WARNING | 605 | printk(KERN_WARNING |
564 | "raid5: Too many read errors, failing device.\n"); | 606 | "raid5:%s: Too many read errors, failing device %s.\n", |
607 | mdname(conf->mddev), bdn); | ||
565 | else | 608 | else |
566 | retry = 1; | 609 | retry = 1; |
567 | if (retry) | 610 | if (retry) |
@@ -569,7 +612,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
569 | else { | 612 | else { |
570 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 613 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
571 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 614 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
572 | md_error(conf->mddev, conf->disks[i].rdev); | 615 | md_error(conf->mddev, rdev); |
573 | } | 616 | } |
574 | } | 617 | } |
575 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); | 618 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); |
@@ -1270,9 +1313,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1270 | (unsigned long long)sh->sector, dd_idx); | 1313 | (unsigned long long)sh->sector, dd_idx); |
1271 | 1314 | ||
1272 | if (conf->mddev->bitmap && firstwrite) { | 1315 | if (conf->mddev->bitmap && firstwrite) { |
1273 | sh->bm_seq = conf->seq_write; | ||
1274 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 1316 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
1275 | STRIPE_SECTORS, 0); | 1317 | STRIPE_SECTORS, 0); |
1318 | sh->bm_seq = conf->seq_flush+1; | ||
1276 | set_bit(STRIPE_BIT_DELAY, &sh->state); | 1319 | set_bit(STRIPE_BIT_DELAY, &sh->state); |
1277 | } | 1320 | } |
1278 | 1321 | ||
@@ -2554,13 +2597,6 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
2554 | return ret; | 2597 | return ret; |
2555 | } | 2598 | } |
2556 | 2599 | ||
2557 | static inline void raid5_plug_device(raid5_conf_t *conf) | ||
2558 | { | ||
2559 | spin_lock_irq(&conf->device_lock); | ||
2560 | blk_plug_device(conf->mddev->queue); | ||
2561 | spin_unlock_irq(&conf->device_lock); | ||
2562 | } | ||
2563 | |||
2564 | static int make_request(request_queue_t *q, struct bio * bi) | 2600 | static int make_request(request_queue_t *q, struct bio * bi) |
2565 | { | 2601 | { |
2566 | mddev_t *mddev = q->queuedata; | 2602 | mddev_t *mddev = q->queuedata; |
@@ -2670,7 +2706,6 @@ static int make_request(request_queue_t *q, struct bio * bi) | |||
2670 | goto retry; | 2706 | goto retry; |
2671 | } | 2707 | } |
2672 | finish_wait(&conf->wait_for_overlap, &w); | 2708 | finish_wait(&conf->wait_for_overlap, &w); |
2673 | raid5_plug_device(conf); | ||
2674 | handle_stripe(sh, NULL); | 2709 | handle_stripe(sh, NULL); |
2675 | release_stripe(sh); | 2710 | release_stripe(sh); |
2676 | } else { | 2711 | } else { |
@@ -2923,7 +2958,7 @@ static void raid5d (mddev_t *mddev) | |||
2923 | while (1) { | 2958 | while (1) { |
2924 | struct list_head *first; | 2959 | struct list_head *first; |
2925 | 2960 | ||
2926 | if (conf->seq_flush - conf->seq_write > 0) { | 2961 | if (conf->seq_flush != conf->seq_write) { |
2927 | int seq = conf->seq_flush; | 2962 | int seq = conf->seq_flush; |
2928 | spin_unlock_irq(&conf->device_lock); | 2963 | spin_unlock_irq(&conf->device_lock); |
2929 | bitmap_unplug(mddev->bitmap); | 2964 | bitmap_unplug(mddev->bitmap); |
@@ -3246,9 +3281,6 @@ static int run(mddev_t *mddev) | |||
3246 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 3281 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3247 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | 3282 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, |
3248 | "%s_reshape"); | 3283 | "%s_reshape"); |
3249 | /* FIXME if md_register_thread fails?? */ | ||
3250 | md_wakeup_thread(mddev->sync_thread); | ||
3251 | |||
3252 | } | 3284 | } |
3253 | 3285 | ||
3254 | /* read-ahead size must cover two whole stripes, which is | 3286 | /* read-ahead size must cover two whole stripes, which is |