diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 16:22:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 16:22:01 -0400 |
commit | 9db908806b85c1430150fbafe269a7b21b07d15d (patch) | |
tree | 3911759c93e0be26b6771e1a92b75612b206ffa5 /drivers/md/raid10.c | |
parent | 4d7127dace8cf4b05eb7c8c8531fc204fbb195f4 (diff) | |
parent | 72f36d5972a166197036c1281963f6863c429bf2 (diff) |
Merge tag 'md-3.7' of git://neil.brown.name/md
Pull md updates from NeilBrown:
- "discard" support, some dm-raid improvements and other assorted bits
and pieces.
* tag 'md-3.7' of git://neil.brown.name/md: (29 commits)
md: refine reporting of resync/reshape delays.
md/raid5: be careful not to resize_stripes too big.
md: make sure manual changes to recovery checkpoint are saved.
md/raid10: use correct limit variable
md: writing to sync_action should clear the read-auto state.
Subject: [PATCH] md:change resync_mismatches to atomic64_t to avoid races
md/raid5: make sure to_read and to_write never go negative.
md: When RAID5 is dirty, force reconstruct-write instead of read-modify-write.
md/raid5: protect debug message against NULL derefernce.
md/raid5: add some missing locking in handle_failed_stripe.
MD: raid5 avoid unnecessary zero page for trim
MD: raid5 trim support
md/bitmap:Don't use IS_ERR to judge alloc_page().
md/raid1: Don't release reference to device while handling read error.
raid: replace list_for_each_continue_rcu with new interface
add further __init annotations to crypto/xor.c
DM RAID: Fix for "sync" directive ineffectiveness
DM RAID: Fix comparison of index and quantity for "rebuild" parameter
DM RAID: Add rebuild capability for RAID10
DM RAID: Move 'rebuild' checking code to its own function
...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 95 |
1 files changed, 84 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0138a727c1f3..906ccbd0f7dc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf) | |||
911 | while (bio) { /* submit pending writes */ | 911 | while (bio) { /* submit pending writes */ |
912 | struct bio *next = bio->bi_next; | 912 | struct bio *next = bio->bi_next; |
913 | bio->bi_next = NULL; | 913 | bio->bi_next = NULL; |
914 | generic_make_request(bio); | 914 | if (unlikely((bio->bi_rw & REQ_DISCARD) && |
915 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) | ||
916 | /* Just ignore it */ | ||
917 | bio_endio(bio, 0); | ||
918 | else | ||
919 | generic_make_request(bio); | ||
915 | bio = next; | 920 | bio = next; |
916 | } | 921 | } |
917 | } else | 922 | } else |
@@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio, | |||
1050 | return rdev->new_data_offset; | 1055 | return rdev->new_data_offset; |
1051 | } | 1056 | } |
1052 | 1057 | ||
1058 | struct raid10_plug_cb { | ||
1059 | struct blk_plug_cb cb; | ||
1060 | struct bio_list pending; | ||
1061 | int pending_cnt; | ||
1062 | }; | ||
1063 | |||
1064 | static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
1065 | { | ||
1066 | struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb, | ||
1067 | cb); | ||
1068 | struct mddev *mddev = plug->cb.data; | ||
1069 | struct r10conf *conf = mddev->private; | ||
1070 | struct bio *bio; | ||
1071 | |||
1072 | if (from_schedule) { | ||
1073 | spin_lock_irq(&conf->device_lock); | ||
1074 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | ||
1075 | conf->pending_count += plug->pending_cnt; | ||
1076 | spin_unlock_irq(&conf->device_lock); | ||
1077 | md_wakeup_thread(mddev->thread); | ||
1078 | kfree(plug); | ||
1079 | return; | ||
1080 | } | ||
1081 | |||
1082 | /* we aren't scheduling, so we can do the write-out directly. */ | ||
1083 | bio = bio_list_get(&plug->pending); | ||
1084 | bitmap_unplug(mddev->bitmap); | ||
1085 | wake_up(&conf->wait_barrier); | ||
1086 | |||
1087 | while (bio) { /* submit pending writes */ | ||
1088 | struct bio *next = bio->bi_next; | ||
1089 | bio->bi_next = NULL; | ||
1090 | generic_make_request(bio); | ||
1091 | bio = next; | ||
1092 | } | ||
1093 | kfree(plug); | ||
1094 | } | ||
1095 | |||
1053 | static void make_request(struct mddev *mddev, struct bio * bio) | 1096 | static void make_request(struct mddev *mddev, struct bio * bio) |
1054 | { | 1097 | { |
1055 | struct r10conf *conf = mddev->private; | 1098 | struct r10conf *conf = mddev->private; |
@@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1061 | const int rw = bio_data_dir(bio); | 1104 | const int rw = bio_data_dir(bio); |
1062 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 1105 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
1063 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 1106 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
1107 | const unsigned long do_discard = (bio->bi_rw | ||
1108 | & (REQ_DISCARD | REQ_SECURE)); | ||
1064 | unsigned long flags; | 1109 | unsigned long flags; |
1065 | struct md_rdev *blocked_rdev; | 1110 | struct md_rdev *blocked_rdev; |
1111 | struct blk_plug_cb *cb; | ||
1112 | struct raid10_plug_cb *plug = NULL; | ||
1066 | int sectors_handled; | 1113 | int sectors_handled; |
1067 | int max_sectors; | 1114 | int max_sectors; |
1068 | int sectors; | 1115 | int sectors; |
@@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1081 | || conf->prev.near_copies < conf->prev.raid_disks))) { | 1128 | || conf->prev.near_copies < conf->prev.raid_disks))) { |
1082 | struct bio_pair *bp; | 1129 | struct bio_pair *bp; |
1083 | /* Sanity check -- queue functions should prevent this happening */ | 1130 | /* Sanity check -- queue functions should prevent this happening */ |
1084 | if (bio->bi_vcnt != 1 || | 1131 | if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || |
1085 | bio->bi_idx != 0) | 1132 | bio->bi_idx != 0) |
1086 | goto bad_map; | 1133 | goto bad_map; |
1087 | /* This is a one page bio that upper layers | 1134 | /* This is a one page bio that upper layers |
@@ -1410,15 +1457,26 @@ retry_write: | |||
1410 | conf->mirrors[d].rdev)); | 1457 | conf->mirrors[d].rdev)); |
1411 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 1458 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
1412 | mbio->bi_end_io = raid10_end_write_request; | 1459 | mbio->bi_end_io = raid10_end_write_request; |
1413 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1460 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; |
1414 | mbio->bi_private = r10_bio; | 1461 | mbio->bi_private = r10_bio; |
1415 | 1462 | ||
1416 | atomic_inc(&r10_bio->remaining); | 1463 | atomic_inc(&r10_bio->remaining); |
1464 | |||
1465 | cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); | ||
1466 | if (cb) | ||
1467 | plug = container_of(cb, struct raid10_plug_cb, cb); | ||
1468 | else | ||
1469 | plug = NULL; | ||
1417 | spin_lock_irqsave(&conf->device_lock, flags); | 1470 | spin_lock_irqsave(&conf->device_lock, flags); |
1418 | bio_list_add(&conf->pending_bio_list, mbio); | 1471 | if (plug) { |
1419 | conf->pending_count++; | 1472 | bio_list_add(&plug->pending, mbio); |
1473 | plug->pending_cnt++; | ||
1474 | } else { | ||
1475 | bio_list_add(&conf->pending_bio_list, mbio); | ||
1476 | conf->pending_count++; | ||
1477 | } | ||
1420 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1478 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1421 | if (!mddev_check_plugged(mddev)) | 1479 | if (!plug) |
1422 | md_wakeup_thread(mddev->thread); | 1480 | md_wakeup_thread(mddev->thread); |
1423 | 1481 | ||
1424 | if (!r10_bio->devs[i].repl_bio) | 1482 | if (!r10_bio->devs[i].repl_bio) |
@@ -1439,7 +1497,7 @@ retry_write: | |||
1439 | conf->mirrors[d].replacement)); | 1497 | conf->mirrors[d].replacement)); |
1440 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; | 1498 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; |
1441 | mbio->bi_end_io = raid10_end_write_request; | 1499 | mbio->bi_end_io = raid10_end_write_request; |
1442 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1500 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; |
1443 | mbio->bi_private = r10_bio; | 1501 | mbio->bi_private = r10_bio; |
1444 | 1502 | ||
1445 | atomic_inc(&r10_bio->remaining); | 1503 | atomic_inc(&r10_bio->remaining); |
@@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
1638 | && !test_bit(Faulty, &tmp->rdev->flags) | 1696 | && !test_bit(Faulty, &tmp->rdev->flags) |
1639 | && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { | 1697 | && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { |
1640 | count++; | 1698 | count++; |
1641 | sysfs_notify_dirent(tmp->rdev->sysfs_state); | 1699 | sysfs_notify_dirent_safe(tmp->rdev->sysfs_state); |
1642 | } | 1700 | } |
1643 | } | 1701 | } |
1644 | spin_lock_irqsave(&conf->device_lock, flags); | 1702 | spin_lock_irqsave(&conf->device_lock, flags); |
@@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1725 | clear_bit(Unmerged, &rdev->flags); | 1783 | clear_bit(Unmerged, &rdev->flags); |
1726 | } | 1784 | } |
1727 | md_integrity_add_rdev(rdev, mddev); | 1785 | md_integrity_add_rdev(rdev, mddev); |
1786 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
1787 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
1788 | |||
1728 | print_conf(conf); | 1789 | print_conf(conf); |
1729 | return err; | 1790 | return err; |
1730 | } | 1791 | } |
@@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
1952 | break; | 2013 | break; |
1953 | if (j == vcnt) | 2014 | if (j == vcnt) |
1954 | continue; | 2015 | continue; |
1955 | mddev->resync_mismatches += r10_bio->sectors; | 2016 | atomic64_add(r10_bio->sectors, &mddev->resync_mismatches); |
1956 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 2017 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) |
1957 | /* Don't fix anything. */ | 2018 | /* Don't fix anything. */ |
1958 | continue; | 2019 | continue; |
@@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
2673 | } | 2734 | } |
2674 | } | 2735 | } |
2675 | 2736 | ||
2676 | static void raid10d(struct mddev *mddev) | 2737 | static void raid10d(struct md_thread *thread) |
2677 | { | 2738 | { |
2739 | struct mddev *mddev = thread->mddev; | ||
2678 | struct r10bio *r10_bio; | 2740 | struct r10bio *r10_bio; |
2679 | unsigned long flags; | 2741 | unsigned long flags; |
2680 | struct r10conf *conf = mddev->private; | 2742 | struct r10conf *conf = mddev->private; |
@@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3158 | else { | 3220 | else { |
3159 | bad_sectors -= (sector - first_bad); | 3221 | bad_sectors -= (sector - first_bad); |
3160 | if (max_sync > bad_sectors) | 3222 | if (max_sync > bad_sectors) |
3161 | max_sync = max_sync; | 3223 | max_sync = bad_sectors; |
3162 | continue; | 3224 | continue; |
3163 | } | 3225 | } |
3164 | } | 3226 | } |
@@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev) | |||
3482 | sector_t size; | 3544 | sector_t size; |
3483 | sector_t min_offset_diff = 0; | 3545 | sector_t min_offset_diff = 0; |
3484 | int first = 1; | 3546 | int first = 1; |
3547 | bool discard_supported = false; | ||
3485 | 3548 | ||
3486 | if (mddev->private == NULL) { | 3549 | if (mddev->private == NULL) { |
3487 | conf = setup_conf(mddev); | 3550 | conf = setup_conf(mddev); |
@@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev) | |||
3498 | 3561 | ||
3499 | chunk_size = mddev->chunk_sectors << 9; | 3562 | chunk_size = mddev->chunk_sectors << 9; |
3500 | if (mddev->queue) { | 3563 | if (mddev->queue) { |
3564 | blk_queue_max_discard_sectors(mddev->queue, | ||
3565 | mddev->chunk_sectors); | ||
3501 | blk_queue_io_min(mddev->queue, chunk_size); | 3566 | blk_queue_io_min(mddev->queue, chunk_size); |
3502 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3567 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3503 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3568 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
@@ -3543,8 +3608,16 @@ static int run(struct mddev *mddev) | |||
3543 | rdev->data_offset << 9); | 3608 | rdev->data_offset << 9); |
3544 | 3609 | ||
3545 | disk->head_position = 0; | 3610 | disk->head_position = 0; |
3611 | |||
3612 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
3613 | discard_supported = true; | ||
3546 | } | 3614 | } |
3547 | 3615 | ||
3616 | if (discard_supported) | ||
3617 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
3618 | else | ||
3619 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
3620 | |||
3548 | /* need to check that every block has at least one working mirror */ | 3621 | /* need to check that every block has at least one working mirror */ |
3549 | if (!enough(conf, -1)) { | 3622 | if (!enough(conf, -1)) { |
3550 | printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", | 3623 | printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", |