aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-13 16:22:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-13 16:22:01 -0400
commit9db908806b85c1430150fbafe269a7b21b07d15d (patch)
tree3911759c93e0be26b6771e1a92b75612b206ffa5 /drivers/md/raid10.c
parent4d7127dace8cf4b05eb7c8c8531fc204fbb195f4 (diff)
parent72f36d5972a166197036c1281963f6863c429bf2 (diff)
Merge tag 'md-3.7' of git://neil.brown.name/md
Pull md updates from NeilBrown: - "discard" support, some dm-raid improvements and other assorted bits and pieces. * tag 'md-3.7' of git://neil.brown.name/md: (29 commits) md: refine reporting of resync/reshape delays. md/raid5: be careful not to resize_stripes too big. md: make sure manual changes to recovery checkpoint are saved. md/raid10: use correct limit variable md: writing to sync_action should clear the read-auto state. Subject: [PATCH] md:change resync_mismatches to atomic64_t to avoid races md/raid5: make sure to_read and to_write never go negative. md: When RAID5 is dirty, force reconstruct-write instead of read-modify-write. md/raid5: protect debug message against NULL derefernce. md/raid5: add some missing locking in handle_failed_stripe. MD: raid5 avoid unnecessary zero page for trim MD: raid5 trim support md/bitmap:Don't use IS_ERR to judge alloc_page(). md/raid1: Don't release reference to device while handling read error. raid: replace list_for_each_continue_rcu with new interface add further __init annotations to crypto/xor.c DM RAID: Fix for "sync" directive ineffectiveness DM RAID: Fix comparison of index and quantity for "rebuild" parameter DM RAID: Add rebuild capability for RAID10 DM RAID: Move 'rebuild' checking code to its own function ...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c95
1 files changed, 84 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0138a727c1f3..906ccbd0f7dc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf)
911 while (bio) { /* submit pending writes */ 911 while (bio) { /* submit pending writes */
912 struct bio *next = bio->bi_next; 912 struct bio *next = bio->bi_next;
913 bio->bi_next = NULL; 913 bio->bi_next = NULL;
914 generic_make_request(bio); 914 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
915 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
916 /* Just ignore it */
917 bio_endio(bio, 0);
918 else
919 generic_make_request(bio);
915 bio = next; 920 bio = next;
916 } 921 }
917 } else 922 } else
@@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
1050 return rdev->new_data_offset; 1055 return rdev->new_data_offset;
1051} 1056}
1052 1057
1058struct raid10_plug_cb {
1059 struct blk_plug_cb cb;
1060 struct bio_list pending;
1061 int pending_cnt;
1062};
1063
1064static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1065{
1066 struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
1067 cb);
1068 struct mddev *mddev = plug->cb.data;
1069 struct r10conf *conf = mddev->private;
1070 struct bio *bio;
1071
1072 if (from_schedule) {
1073 spin_lock_irq(&conf->device_lock);
1074 bio_list_merge(&conf->pending_bio_list, &plug->pending);
1075 conf->pending_count += plug->pending_cnt;
1076 spin_unlock_irq(&conf->device_lock);
1077 md_wakeup_thread(mddev->thread);
1078 kfree(plug);
1079 return;
1080 }
1081
1082 /* we aren't scheduling, so we can do the write-out directly. */
1083 bio = bio_list_get(&plug->pending);
1084 bitmap_unplug(mddev->bitmap);
1085 wake_up(&conf->wait_barrier);
1086
1087 while (bio) { /* submit pending writes */
1088 struct bio *next = bio->bi_next;
1089 bio->bi_next = NULL;
1090 generic_make_request(bio);
1091 bio = next;
1092 }
1093 kfree(plug);
1094}
1095
1053static void make_request(struct mddev *mddev, struct bio * bio) 1096static void make_request(struct mddev *mddev, struct bio * bio)
1054{ 1097{
1055 struct r10conf *conf = mddev->private; 1098 struct r10conf *conf = mddev->private;
@@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1061 const int rw = bio_data_dir(bio); 1104 const int rw = bio_data_dir(bio);
1062 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 1105 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
1063 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 1106 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
1107 const unsigned long do_discard = (bio->bi_rw
1108 & (REQ_DISCARD | REQ_SECURE));
1064 unsigned long flags; 1109 unsigned long flags;
1065 struct md_rdev *blocked_rdev; 1110 struct md_rdev *blocked_rdev;
1111 struct blk_plug_cb *cb;
1112 struct raid10_plug_cb *plug = NULL;
1066 int sectors_handled; 1113 int sectors_handled;
1067 int max_sectors; 1114 int max_sectors;
1068 int sectors; 1115 int sectors;
@@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1081 || conf->prev.near_copies < conf->prev.raid_disks))) { 1128 || conf->prev.near_copies < conf->prev.raid_disks))) {
1082 struct bio_pair *bp; 1129 struct bio_pair *bp;
1083 /* Sanity check -- queue functions should prevent this happening */ 1130 /* Sanity check -- queue functions should prevent this happening */
1084 if (bio->bi_vcnt != 1 || 1131 if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
1085 bio->bi_idx != 0) 1132 bio->bi_idx != 0)
1086 goto bad_map; 1133 goto bad_map;
1087 /* This is a one page bio that upper layers 1134 /* This is a one page bio that upper layers
@@ -1410,15 +1457,26 @@ retry_write:
1410 conf->mirrors[d].rdev)); 1457 conf->mirrors[d].rdev));
1411 mbio->bi_bdev = conf->mirrors[d].rdev->bdev; 1458 mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
1412 mbio->bi_end_io = raid10_end_write_request; 1459 mbio->bi_end_io = raid10_end_write_request;
1413 mbio->bi_rw = WRITE | do_sync | do_fua; 1460 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1414 mbio->bi_private = r10_bio; 1461 mbio->bi_private = r10_bio;
1415 1462
1416 atomic_inc(&r10_bio->remaining); 1463 atomic_inc(&r10_bio->remaining);
1464
1465 cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
1466 if (cb)
1467 plug = container_of(cb, struct raid10_plug_cb, cb);
1468 else
1469 plug = NULL;
1417 spin_lock_irqsave(&conf->device_lock, flags); 1470 spin_lock_irqsave(&conf->device_lock, flags);
1418 bio_list_add(&conf->pending_bio_list, mbio); 1471 if (plug) {
1419 conf->pending_count++; 1472 bio_list_add(&plug->pending, mbio);
1473 plug->pending_cnt++;
1474 } else {
1475 bio_list_add(&conf->pending_bio_list, mbio);
1476 conf->pending_count++;
1477 }
1420 spin_unlock_irqrestore(&conf->device_lock, flags); 1478 spin_unlock_irqrestore(&conf->device_lock, flags);
1421 if (!mddev_check_plugged(mddev)) 1479 if (!plug)
1422 md_wakeup_thread(mddev->thread); 1480 md_wakeup_thread(mddev->thread);
1423 1481
1424 if (!r10_bio->devs[i].repl_bio) 1482 if (!r10_bio->devs[i].repl_bio)
@@ -1439,7 +1497,7 @@ retry_write:
1439 conf->mirrors[d].replacement)); 1497 conf->mirrors[d].replacement));
1440 mbio->bi_bdev = conf->mirrors[d].replacement->bdev; 1498 mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
1441 mbio->bi_end_io = raid10_end_write_request; 1499 mbio->bi_end_io = raid10_end_write_request;
1442 mbio->bi_rw = WRITE | do_sync | do_fua; 1500 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1443 mbio->bi_private = r10_bio; 1501 mbio->bi_private = r10_bio;
1444 1502
1445 atomic_inc(&r10_bio->remaining); 1503 atomic_inc(&r10_bio->remaining);
@@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev)
1638 && !test_bit(Faulty, &tmp->rdev->flags) 1696 && !test_bit(Faulty, &tmp->rdev->flags)
1639 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 1697 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
1640 count++; 1698 count++;
1641 sysfs_notify_dirent(tmp->rdev->sysfs_state); 1699 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
1642 } 1700 }
1643 } 1701 }
1644 spin_lock_irqsave(&conf->device_lock, flags); 1702 spin_lock_irqsave(&conf->device_lock, flags);
@@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1725 clear_bit(Unmerged, &rdev->flags); 1783 clear_bit(Unmerged, &rdev->flags);
1726 } 1784 }
1727 md_integrity_add_rdev(rdev, mddev); 1785 md_integrity_add_rdev(rdev, mddev);
1786 if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
1787 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
1788
1728 print_conf(conf); 1789 print_conf(conf);
1729 return err; 1790 return err;
1730} 1791}
@@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
1952 break; 2013 break;
1953 if (j == vcnt) 2014 if (j == vcnt)
1954 continue; 2015 continue;
1955 mddev->resync_mismatches += r10_bio->sectors; 2016 atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
1956 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 2017 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1957 /* Don't fix anything. */ 2018 /* Don't fix anything. */
1958 continue; 2019 continue;
@@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2673 } 2734 }
2674} 2735}
2675 2736
2676static void raid10d(struct mddev *mddev) 2737static void raid10d(struct md_thread *thread)
2677{ 2738{
2739 struct mddev *mddev = thread->mddev;
2678 struct r10bio *r10_bio; 2740 struct r10bio *r10_bio;
2679 unsigned long flags; 2741 unsigned long flags;
2680 struct r10conf *conf = mddev->private; 2742 struct r10conf *conf = mddev->private;
@@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3158 else { 3220 else {
3159 bad_sectors -= (sector - first_bad); 3221 bad_sectors -= (sector - first_bad);
3160 if (max_sync > bad_sectors) 3222 if (max_sync > bad_sectors)
3161 max_sync = max_sync; 3223 max_sync = bad_sectors;
3162 continue; 3224 continue;
3163 } 3225 }
3164 } 3226 }
@@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev)
3482 sector_t size; 3544 sector_t size;
3483 sector_t min_offset_diff = 0; 3545 sector_t min_offset_diff = 0;
3484 int first = 1; 3546 int first = 1;
3547 bool discard_supported = false;
3485 3548
3486 if (mddev->private == NULL) { 3549 if (mddev->private == NULL) {
3487 conf = setup_conf(mddev); 3550 conf = setup_conf(mddev);
@@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev)
3498 3561
3499 chunk_size = mddev->chunk_sectors << 9; 3562 chunk_size = mddev->chunk_sectors << 9;
3500 if (mddev->queue) { 3563 if (mddev->queue) {
3564 blk_queue_max_discard_sectors(mddev->queue,
3565 mddev->chunk_sectors);
3501 blk_queue_io_min(mddev->queue, chunk_size); 3566 blk_queue_io_min(mddev->queue, chunk_size);
3502 if (conf->geo.raid_disks % conf->geo.near_copies) 3567 if (conf->geo.raid_disks % conf->geo.near_copies)
3503 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); 3568 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3543,8 +3608,16 @@ static int run(struct mddev *mddev)
3543 rdev->data_offset << 9); 3608 rdev->data_offset << 9);
3544 3609
3545 disk->head_position = 0; 3610 disk->head_position = 0;
3611
3612 if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
3613 discard_supported = true;
3546 } 3614 }
3547 3615
3616 if (discard_supported)
3617 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
3618 else
3619 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
3620
3548 /* need to check that every block has at least one working mirror */ 3621 /* need to check that every block has at least one working mirror */
3549 if (!enough(conf, -1)) { 3622 if (!enough(conf, -1)) {
3550 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", 3623 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",