Merge tag 'md-3.7' of git://neil.brown.name/md

Pull md updates from NeilBrown: - "discard" support, some dm-raid improvements and other assorted bits and pieces. * tag 'md-3.7' of git://neil.brown.name/md: (29 commits) md: refine reporting of resync/reshape delays. md/raid5: be careful not to resize_stripes too big. md: make sure manual changes to recovery checkpoint are saved. md/raid10: use correct limit variable md: writing to sync_action should clear the read-auto state. Subject: [PATCH] md:change resync_mismatches to atomic64_t to avoid races md/raid5: make sure to_read and to_write never go negative. md: When RAID5 is dirty, force reconstruct-write instead of read-modify-write. md/raid5: protect debug message against NULL derefernce. md/raid5: add some missing locking in handle_failed_stripe. MD: raid5 avoid unnecessary zero page for trim MD: raid5 trim support md/bitmap:Don't use IS_ERR to judge alloc_page(). md/raid1: Don't release reference to device while handling read error. raid: replace list_for_each_continue_rcu with new interface add further __init annotations to crypto/xor.c DM RAID: Fix for "sync" directive ineffectiveness DM RAID: Fix comparison of index and quantity for "rebuild" parameter DM RAID: Add rebuild capability for RAID10 DM RAID: Move 'rebuild' checking code to its own function ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-13 16:22:01 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-13 16:22:01 -0400
commit: 9db908806b85c1430150fbafe269a7b21b07d15d (patch)
tree: 3911759c93e0be26b6771e1a92b75612b206ffa5 /drivers/md/raid10.c
parent: 4d7127dace8cf4b05eb7c8c8531fc204fbb195f4 (diff)
parent: 72f36d5972a166197036c1281963f6863c429bf2 (diff)
1 files changed, 84 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0138a727c1f3..906ccbd0f7dc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf)
                while (bio) { /* submit pending writes */
                        struct bio *next = bio->bi_next;
                        bio->bi_next = NULL;
-                        generic_make_request(bio);
+                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+                                /* Just ignore it */
+                                bio_endio(bio, 0);
+                        else
+                                generic_make_request(bio);
                        bio = next;
                }
        } else
@@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
                return rdev->new_data_offset;
 }
+struct raid10_plug_cb {
+        struct blk_plug_cb      cb;
+        struct bio_list         pending;
+        int                     pending_cnt;
+};
+static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+        struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
+                                                   cb);
+        struct mddev *mddev = plug->cb.data;
+        struct r10conf *conf = mddev->private;
+        struct bio *bio;
+        if (from_schedule) {
+                spin_lock_irq(&conf->device_lock);
+                bio_list_merge(&conf->pending_bio_list, &plug->pending);
+                conf->pending_count += plug->pending_cnt;
+                spin_unlock_irq(&conf->device_lock);
+                md_wakeup_thread(mddev->thread);
+                kfree(plug);
+                return;
+        }
+        /* we aren't scheduling, so we can do the write-out directly. */
+        bio = bio_list_get(&plug->pending);
+        bitmap_unplug(mddev->bitmap);
+        wake_up(&conf->wait_barrier);
+        while (bio) { /* submit pending writes */
+                struct bio *next = bio->bi_next;
+                bio->bi_next = NULL;
+                generic_make_request(bio);
+                bio = next;
+        }
+        kfree(plug);
+}
 static void make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r10conf *conf = mddev->private;
@@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        const int rw = bio_data_dir(bio);
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
+        const unsigned long do_discard = (bio->bi_rw
+                                          & (REQ_DISCARD | REQ_SECURE));
        unsigned long flags;
        struct md_rdev *blocked_rdev;
+        struct blk_plug_cb *cb;
+        struct raid10_plug_cb *plug = NULL;
        int sectors_handled;
        int max_sectors;
        int sectors;
@@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                         || conf->prev.near_copies < conf->prev.raid_disks))) {
                struct bio_pair *bp;
                /* Sanity check -- queue functions should prevent this happening */
-                if (bio->bi_vcnt != 1 ||
+                if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
                    bio->bi_idx != 0)
                        goto bad_map;
                /* This is a one page bio that upper layers
@@ -1410,15 +1457,26 @@ retry_write:
                                                      conf->mirrors[d].rdev));
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-                mbio->bi_rw = WRITE | do_sync | do_fua;
+                mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                mbio->bi_private = r10_bio;
                atomic_inc(&r10_bio->remaining);
+                cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
+                if (cb)
+                        plug = container_of(cb, struct raid10_plug_cb, cb);
+                else
+                        plug = NULL;
                spin_lock_irqsave(&conf->device_lock, flags);
-                bio_list_add(&conf->pending_bio_list, mbio);
+                if (plug) {
-                conf->pending_count++;
+                        bio_list_add(&plug->pending, mbio);
+                        plug->pending_cnt++;
+                } else {
+                        bio_list_add(&conf->pending_bio_list, mbio);
+                        conf->pending_count++;
+                }
                spin_unlock_irqrestore(&conf->device_lock, flags);
-                if (!mddev_check_plugged(mddev))
+                if (!plug)
                        md_wakeup_thread(mddev->thread);
                if (!r10_bio->devs[i].repl_bio)
@@ -1439,7 +1497,7 @@ retry_write:
                                           conf->mirrors[d].replacement));
                mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-                mbio->bi_rw = WRITE | do_sync | do_fua;
+                mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                mbio->bi_private = r10_bio;
                atomic_inc(&r10_bio->remaining);
@@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev)
                           && !test_bit(Faulty, &tmp->rdev->flags)
                           && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
                        count++;
-                        sysfs_notify_dirent(tmp->rdev->sysfs_state);
+                        sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
                }
        }
        spin_lock_irqsave(&conf->device_lock, flags);
@@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                clear_bit(Unmerged, &rdev->flags);
        }
        md_integrity_add_rdev(rdev, mddev);
+        if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
        print_conf(conf);
        return err;
 }
@@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                                        break;
                        if (j == vcnt)
                                continue;
-                        mddev->resync_mismatches += r10_bio->sectors;
+                        atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
                        if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                                /* Don't fix anything. */
                                continue;
@@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
        }
 }
-static void raid10d(struct mddev *mddev)
+static void raid10d(struct md_thread *thread)
 {
+        struct mddev *mddev = thread->mddev;
        struct r10bio *r10_bio;
        unsigned long flags;
        struct r10conf *conf = mddev->private;
@@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                else {
                                        bad_sectors -= (sector - first_bad);
                                        if (max_sync > bad_sectors)
-                                                max_sync = max_sync;
+                                                max_sync = bad_sectors;
                                        continue;
                                }
                        }
@@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev)
        sector_t size;
        sector_t min_offset_diff = 0;
        int first = 1;
+        bool discard_supported = false;
        if (mddev->private == NULL) {
                conf = setup_conf(mddev);
@@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev)
        chunk_size = mddev->chunk_sectors << 9;
        if (mddev->queue) {
+                blk_queue_max_discard_sectors(mddev->queue,
+                                              mddev->chunk_sectors);
                blk_queue_io_min(mddev->queue, chunk_size);
                if (conf->geo.raid_disks % conf->geo.near_copies)
                        blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3543,8 +3608,16 @@ static int run(struct mddev *mddev)
                                          rdev->data_offset << 9);
                disk->head_position = 0;
+                if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+                        discard_supported = true;
        }
+        if (discard_supported)
+                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+        else
+                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
        /* need to check that every block has at least one working mirror */
        if (!enough(conf, -1)) {
                printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-13 16:22:01 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-13 16:22:01 -0400
commit	9db908806b85c1430150fbafe269a7b21b07d15d (patch)
tree	3911759c93e0be26b6771e1a92b75612b206ffa5 /drivers/md/raid10.c
parent	4d7127dace8cf4b05eb7c8c8531fc204fbb195f4 (diff)
parent	72f36d5972a166197036c1281963f6863c429bf2 (diff)