Merge branch 'master'; commit 'v2.6.39-rc3' into next

author: James Morris <jmorris@namei.org> 2011-04-19 07:32:41 -0400
committer: James Morris <jmorris@namei.org> 2011-04-19 07:32:41 -0400
commit: d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
tree: eefd82c155bc27469a85667d759cd90facf4a6e3 /drivers/md
parent: c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent: 96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)
12 files changed, 193 insertions, 181 deletions
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 931a7a7c3796..d0aeaf46d932 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -45,7 +45,7 @@
 *
 * The counter counts pending write requests, plus the on-disk bit.
 * When the counter is '1' and the resync bits are clear, the on-disk
- * bit can be cleared aswell, thus setting the counter to 0.
+ * bit can be cleared as well, thus setting the counter to 0.
 * When we set a bit, or in the counter (to start a write), if the fields is
 * 0, we first set the disk bit and set the counter to 1.
 *
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5ef136cdba91..e5d8904fc8f6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
        return md_raid5_congested(&rs->md, bits);
 }
-static void raid_unplug(struct dm_target_callbacks *cb)
-{
-        struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-        md_raid5_kick_device(rs->md.private);
-}
 /*
 * Construct a RAID4/5/6 mapping:
 * Args:
@@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
        }
        rs->callbacks.congested_fn = raid_is_congested;
-        rs->callbacks.unplug_fn = raid_unplug;
        dm_table_add_target_callbacks(ti->table, &rs->callbacks);
        return 0;
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index dad011aed0c9..7771ed212182 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -419,7 +419,7 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
        /*
         * Possible cases:
         *   1) DM_RH_DIRTY
-         *   2) DM_RH_NOSYNC: was dirty, other preceeding writes failed
+         *   2) DM_RH_NOSYNC: was dirty, other preceding writes failed
         *   3) DM_RH_RECOVERING: flushing pending writes
         * Either case, the region should have not been connected to list.
         */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 416d4e258df6..cb8380c9767f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -927,20 +927,80 @@ static int dm_table_build_index(struct dm_table *t)
 }
 /*
+ * Get a disk whose integrity profile reflects the table's profile.
+ * If %match_all is true, all devices' profiles must match.
+ * If %match_all is false, all devices must at least have an
+ * allocated integrity profile; but uninitialized is ok.
+ * Returns NULL if integrity support was inconsistent or unavailable.
+ */
+static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
+                                                    bool match_all)
+{
+        struct list_head *devices = dm_table_get_devices(t);
+        struct dm_dev_internal *dd = NULL;
+        struct gendisk *prev_disk = NULL, *template_disk = NULL;
+        list_for_each_entry(dd, devices, list) {
+                template_disk = dd->dm_dev.bdev->bd_disk;
+                if (!blk_get_integrity(template_disk))
+                        goto no_integrity;
+                if (!match_all && !blk_integrity_is_initialized(template_disk))
+                        continue; /* skip uninitialized profiles */
+                else if (prev_disk &&
+                         blk_integrity_compare(prev_disk, template_disk) < 0)
+                        goto no_integrity;
+                prev_disk = template_disk;
+        }
+        return template_disk;
+no_integrity:
+        if (prev_disk)
+                DMWARN("%s: integrity not set: %s and %s profile mismatch",
+                       dm_device_name(t->md),
+                       prev_disk->disk_name,
+                       template_disk->disk_name);
+        return NULL;
+}
+/*
 * Register the mapped device for blk_integrity support if
- * the underlying devices support it.
+ * the underlying devices have an integrity profile.  But all devices
+ * may not have matching profiles (checking all devices isn't reliable
+ * during table load because this table may use other DM device(s) which
+ * must be resumed before they will have an initialized integity profile).
+ * Stacked DM devices force a 2 stage integrity profile validation:
+ * 1 - during load, validate all initialized integrity profiles match
+ * 2 - during resume, validate all integrity profiles match
 */
 static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md)
 {
-        struct list_head *devices = dm_table_get_devices(t);
+        struct gendisk *template_disk = NULL;
-        struct dm_dev_internal *dd;
-        list_for_each_entry(dd, devices, list)
+        template_disk = dm_table_get_integrity_disk(t, false);
-                if (bdev_get_integrity(dd->dm_dev.bdev)) {
+        if (!template_disk)
-                        t->integrity_supported = 1;
+                return 0;
-                        return blk_integrity_register(dm_disk(md), NULL);
-                }
+        if (!blk_integrity_is_initialized(dm_disk(md))) {
+                t->integrity_supported = 1;
+                return blk_integrity_register(dm_disk(md), NULL);
+        }
+        /*
+         * If DM device already has an initalized integrity
+         * profile the new profile should not conflict.
+         */
+        if (blk_integrity_is_initialized(template_disk) &&
+            blk_integrity_compare(dm_disk(md), template_disk) < 0) {
+                DMWARN("%s: conflict with existing integrity profile: "
+                       "%s profile mismatch",
+                       dm_device_name(t->md),
+                       template_disk->disk_name);
+                return 1;
+        }
+        /* Preserve existing initialized integrity profile */
+        t->integrity_supported = 1;
        return 0;
 }
@@ -1094,41 +1154,27 @@ combine_limits:
 /*
 * Set the integrity profile for this device if all devices used have
- * matching profiles.
+ * matching profiles.  We're quite deep in the resume path but still
+ * don't know if all devices (particularly DM devices this device
+ * may be stacked on) have matching profiles.  Even if the profiles
+ * don't match we have no way to fail (to resume) at this point.
 */
 static void dm_table_set_integrity(struct dm_table *t)
 {
-        struct list_head *devices = dm_table_get_devices(t);
+        struct gendisk *template_disk = NULL;
-        struct dm_dev_internal *prev = NULL, *dd = NULL;
        if (!blk_get_integrity(dm_disk(t->md)))
                return;
-        list_for_each_entry(dd, devices, list) {
+        template_disk = dm_table_get_integrity_disk(t, true);
-                if (prev &&
+        if (!template_disk &&
-                    blk_integrity_compare(prev->dm_dev.bdev->bd_disk,
+            blk_integrity_is_initialized(dm_disk(t->md))) {
-                                          dd->dm_dev.bdev->bd_disk) < 0) {
+                DMWARN("%s: device no longer has a valid integrity profile",
-                        DMWARN("%s: integrity not set: %s and %s mismatch",
+                       dm_device_name(t->md));
-                               dm_device_name(t->md),
+                return;
-                               prev->dm_dev.bdev->bd_disk->disk_name,
-                               dd->dm_dev.bdev->bd_disk->disk_name);
-                        goto no_integrity;
-                }
-                prev = dd;
        }
-        if (!prev || !bdev_get_integrity(prev->dm_dev.bdev))
-                goto no_integrity;
        blk_integrity_register(dm_disk(t->md),
-                               bdev_get_integrity(prev->dm_dev.bdev));
+                               blk_get_integrity(template_disk));
-        return;
-no_integrity:
-        blk_integrity_register(dm_disk(t->md), NULL);
-        return;
 }
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 339fdc670751..23078dabb6df 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -30,7 +30,7 @@
 *
 * Different modes can be active at a time, but only
 * one can be set at array creation.  Others can be added later.
- * A mode can be one-shot or recurrent with the recurrance being
+ * A mode can be one-shot or recurrent with the recurrence being
 * once in every N requests.
 * The bottom 5 bits of the "layout" indicate the mode.  The
 * remainder indicate a period, or 0 for one-shot.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8b66e04c2ea6..6e853c61d87e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -447,48 +447,59 @@ EXPORT_SYMBOL(md_flush_request);
 /* Support for plugging.
 * This mirrors the plugging support in request_queue, but does not
- * require having a whole queue
+ * require having a whole queue or request structures.
+ * We allocate an md_plug_cb for each md device and each thread it gets
+ * plugged on.  This links tot the private plug_handle structure in the
+ * personality data where we keep a count of the number of outstanding
+ * plugs so other code can see if a plug is active.
 */
-static void plugger_work(struct work_struct *work)
+struct md_plug_cb {
-{
+        struct blk_plug_cb cb;
-        struct plug_handle *plug =
+        mddev_t *mddev;
-                container_of(work, struct plug_handle, unplug_work);
+};
-        plug->unplug_fn(plug);
-}
-static void plugger_timeout(unsigned long data)
-{
-        struct plug_handle *plug = (void *)data;
-        kblockd_schedule_work(NULL, &plug->unplug_work);
-}
-void plugger_init(struct plug_handle *plug,
-                  void (*unplug_fn)(struct plug_handle *))
-{
-        plug->unplug_flag = 0;
-        plug->unplug_fn = unplug_fn;
-        init_timer(&plug->unplug_timer);
-        plug->unplug_timer.function = plugger_timeout;
-        plug->unplug_timer.data = (unsigned long)plug;
-        INIT_WORK(&plug->unplug_work, plugger_work);
-}
-EXPORT_SYMBOL_GPL(plugger_init);
-void plugger_set_plug(struct plug_handle *plug)
+static void plugger_unplug(struct blk_plug_cb *cb)
 {
-        if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
+        struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
-                mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
+        if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
+                md_wakeup_thread(mdcb->mddev->thread);
+        kfree(mdcb);
 }
-EXPORT_SYMBOL_GPL(plugger_set_plug);
-int plugger_remove_plug(struct plug_handle *plug)
+/* Check that an unplug wakeup will come shortly.
+ * If not, wakeup the md thread immediately
+ */
+int mddev_check_plugged(mddev_t *mddev)
 {
-        if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
+        struct blk_plug *plug = current->plug;
-                del_timer(&plug->unplug_timer);
+        struct md_plug_cb *mdcb;
-                return 1;
-        } else
+        if (!plug)
+                return 0;
+        list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
+                if (mdcb->cb.callback == plugger_unplug &&
+                    mdcb->mddev == mddev) {
+                        /* Already on the list, move to top */
+                        if (mdcb != list_first_entry(&plug->cb_list,
+                                                    struct md_plug_cb,
+                                                    cb.list))
+                                list_move(&mdcb->cb.list, &plug->cb_list);
+                        return 1;
+                }
+        }
+        /* Not currently on the callback list */
+        mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
+        if (!mdcb)
                return 0;
-}
-EXPORT_SYMBOL_GPL(plugger_remove_plug);
+        mdcb->mddev = mddev;
+        mdcb->cb.callback = plugger_unplug;
+        atomic_inc(&mddev->plug_cnt);
+        list_add(&mdcb->cb.list, &plug->cb_list);
+        return 1;
+}
+EXPORT_SYMBOL_GPL(mddev_check_plugged);
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
@@ -538,6 +549,7 @@ void mddev_init(mddev_t *mddev)
        atomic_set(&mddev->active, 1);
        atomic_set(&mddev->openers, 0);
        atomic_set(&mddev->active_io, 0);
+        atomic_set(&mddev->plug_cnt, 0);
        spin_lock_init(&mddev->write_lock);
        atomic_set(&mddev->flush_pending, 0);
        init_waitqueue_head(&mddev->sb_wait);
@@ -4723,7 +4735,6 @@ static void md_clean(mddev_t *mddev)
        mddev->bitmap_info.chunksize = 0;
        mddev->bitmap_info.daemon_sleep = 0;
        mddev->bitmap_info.max_write_behind = 0;
-        mddev->plug = NULL;
 }
 static void __md_stop_writes(mddev_t *mddev)
@@ -6266,7 +6277,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
         * rt is a sector_t, so could be 32bit or 64bit.
         * So we divide before multiply in case it is 32bit and close
         * to the limit.
-         * We scale the divisor (db) by 32 to avoid loosing precision
+         * We scale the divisor (db) by 32 to avoid losing precision
         * near the end of resync when the number of remaining sectors
         * is close to 'db'.
         * We then divide rt by 32 after multiplying by db to compensate.
@@ -6688,12 +6699,6 @@ int md_allow_write(mddev_t *mddev)
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
-void md_unplug(mddev_t *mddev)
-{
-        if (mddev->plug)
-                mddev->plug->unplug_fn(mddev->plug);
-}
 #define SYNC_MARKS      10
 #define SYNC_MARK_STEP  (3*HZ)
 void md_do_sync(mddev_t *mddev)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 12215d437fcc..0b1fd3f1d85b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -29,26 +29,6 @@
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
-/* generic plugging support - like that provided with request_queue,
- * but does not require a request_queue
- */
-struct plug_handle {
-        void                    (*unplug_fn)(struct plug_handle *);
-        struct timer_list       unplug_timer;
-        struct work_struct      unplug_work;
-        unsigned long           unplug_flag;
-};
-#define PLUGGED_FLAG 1
-void plugger_init(struct plug_handle *plug,
-                  void (*unplug_fn)(struct plug_handle *));
-void plugger_set_plug(struct plug_handle *plug);
-int plugger_remove_plug(struct plug_handle *plug);
-static inline void plugger_flush(struct plug_handle *plug)
-{
-        del_timer_sync(&plug->unplug_timer);
-        cancel_work_sync(&plug->unplug_work);
-}
 /*
 * MD's 'extended' device
 */
@@ -94,7 +74,7 @@ struct mdk_rdev_s
 #define In_sync         2               /* device is in_sync with rest of array */
 #define WriteMostly     4               /* Avoid reading if at all possible */
 #define AutoDetected    7               /* added by auto-detect */
-#define Blocked         8               /* An error occured on an externally
+#define Blocked         8               /* An error occurred on an externally
                                         * managed array, don't allow writes
                                         * until it is cleared */
        wait_queue_head_t blocked_wait;
@@ -199,6 +179,9 @@ struct mddev_s
        int                             delta_disks, new_level, new_layout;
        int                             new_chunk_sectors;
+        atomic_t                        plug_cnt;       /* If device is expecting
+                                                         * more bios soon.
+                                                         */
        struct mdk_thread_s             *thread;        /* management thread */
        struct mdk_thread_s             *sync_thread;   /* doing resync or reconstruct */
        sector_t                        curr_resync;    /* last block scheduled */
@@ -336,7 +319,6 @@ struct mddev_s
        struct list_head                all_mddevs;
        struct attribute_group          *to_remove;
-        struct plug_handle              *plug; /* if used by personality */
        struct bio_set                  *bio_set;
@@ -516,7 +498,6 @@ extern int md_integrity_register(mddev_t *mddev);
 extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
 extern void restore_bitmap_write_access(struct file *file);
-extern void md_unplug(mddev_t *mddev);
 extern void mddev_init(mddev_t *mddev);
 extern int md_run(mddev_t *mddev);
@@ -530,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
                                   mddev_t *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   mddev_t *mddev);
+extern int mddev_check_plugged(mddev_t *mddev);
 #endif /* _MD_MD_H */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c2a21ae56d97..2b7a7ff401dc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf)
                spin_unlock_irq(&conf->device_lock);
 }
-static void md_kick_device(mddev_t *mddev)
-{
-        blk_flush_plug(current);
-        md_wakeup_thread(mddev->thread);
-}
 /* Barriers....
 * Sometimes we need to suspend IO while we do something else,
 * either some resync/recovery, or reconfigure the array.
@@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf)
        /* Wait until no block IO is waiting */
        wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-                            conf->resync_lock, md_kick_device(conf->mddev));
+                            conf->resync_lock, );
        /* block any new IO from starting */
        conf->barrier++;
@@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf)
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                            conf->resync_lock, md_kick_device(conf->mddev));
+                            conf->resync_lock, );
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                    md_kick_device(conf->mddev));
+                                    );
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf)
        wait_event_lock_irq(conf->wait_barrier,
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
-                            ({ flush_pending_writes(conf);
+                            flush_pending_writes(conf));
-                               md_kick_device(conf->mddev); }));
        spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
        mdk_rdev_t *blocked_rdev;
+        int plugged;
        /*
         * Register the new request and wait if the reconstruction
@@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
         */
+        plugged = mddev_check_plugged(mddev);
        disks = conf->raid_disks;
 retry_write:
        blocked_rdev = NULL;
@@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid1d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
-        if (do_sync || !bitmap)
+        if (do_sync || !bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
        return 0;
@@ -1516,13 +1512,16 @@ static void raid1d(mddev_t *mddev)
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
        mdk_rdev_t *rdev;
+        struct blk_plug plug;
        md_check_recovery(mddev);
-        
+        blk_start_plug(&plug);
        for (;;) {
                char b[BDEVNAME_SIZE];
-                flush_pending_writes(conf);
+                if (atomic_read(&mddev->plug_cnt) == 0)
+                        flush_pending_writes(conf);
                spin_lock_irqsave(&conf->device_lock, flags);
                if (list_empty(head)) {
@@ -1593,6 +1592,7 @@ static void raid1d(mddev_t *mddev)
                }
                cond_resched();
        }
+        blk_finish_plug(&plug);
 }
@@ -2039,7 +2039,6 @@ static int stop(mddev_t *mddev)
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
-        blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
        if (conf->r1bio_pool)
                mempool_destroy(conf->r1bio_pool);
        kfree(conf->mirrors);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f7b62370b374..8e9462626ec5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -5,7 +5,7 @@
 *
 * RAID-10 support for md.
 *
- * Base on code in raid1.c.  See raid1.c for futher copyright information.
+ * Base on code in raid1.c.  See raid1.c for further copyright information.
 *
 *
 * This program is free software; you can redistribute it and/or modify
@@ -340,14 +340,14 @@ static void raid10_end_write_request(struct bio *bio, int error)
 /*
 * RAID10 layout manager
- * Aswell as the chunksize and raid_disks count, there are two
+ * As well as the chunksize and raid_disks count, there are two
 * parameters: near_copies and far_copies.
 * near_copies * far_copies must be <= raid_disks.
 * Normally one of these will be 1.
 * If both are 1, we get raid0.
 * If near_copies == raid_disks, we get raid1.
 *
- * Chunks are layed out in raid0 style with near_copies copies of the
+ * Chunks are laid out in raid0 style with near_copies copies of the
 * first chunk, followed by near_copies copies of the next chunk and
 * so on.
 * If far_copies > 1, then after 1/far_copies of the array has been assigned
@@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf)
                spin_unlock_irq(&conf->device_lock);
 }
-static void md_kick_device(mddev_t *mddev)
-{
-        blk_flush_plug(current);
-        md_wakeup_thread(mddev->thread);
-}
 /* Barriers....
 * Sometimes we need to suspend IO while we do something else,
 * either some resync/recovery, or reconfigure the array.
@@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force)
        /* Wait until no block IO is waiting (unless 'force') */
        wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-                            conf->resync_lock, md_kick_device(conf->mddev));
+                            conf->resync_lock, );
        /* block any new IO from starting */
        conf->barrier++;
-        /* No wait for all pending IO to complete */
+        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                            conf->resync_lock, md_kick_device(conf->mddev));
+                            conf->resync_lock, );
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf)
                conf->nr_waiting++;
                wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
                                    conf->resync_lock,
-                                    md_kick_device(conf->mddev));
+                                    );
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf)
        wait_event_lock_irq(conf->wait_barrier,
                            conf->nr_pending == conf->nr_queued+1,
                            conf->resync_lock,
-                            ({ flush_pending_writes(conf);
+                            flush_pending_writes(conf));
-                               md_kick_device(conf->mddev); }));
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
+        int plugged;
        if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bio);
@@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
         */
+        plugged = mddev_check_plugged(mddev);
        raid10_find_phys(conf, r10_bio);
 retry_write:
        blocked_rdev = NULL;
@@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid10d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
-        if (do_sync || !mddev->bitmap)
+        if (do_sync || !mddev->bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
        return 0;
 }
@@ -1640,9 +1636,11 @@ static void raid10d(mddev_t *mddev)
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
        mdk_rdev_t *rdev;
+        struct blk_plug plug;
        md_check_recovery(mddev);
+        blk_start_plug(&plug);
        for (;;) {
                char b[BDEVNAME_SIZE];
@@ -1716,6 +1714,7 @@ static void raid10d(mddev_t *mddev)
                }
                cond_resched();
        }
+        blk_finish_plug(&plug);
 }
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 2316ac2e8e21..944b1104d3b4 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -17,8 +17,8 @@ struct r10_private_data_s {
        spinlock_t              device_lock;
        /* geometry */
-        int                     near_copies;  /* number of copies layed out raid0 style */
+        int                     near_copies;  /* number of copies laid out raid0 style */
-        int                     far_copies;   /* number of copies layed out
+        int                     far_copies;   /* number of copies laid out
                                               * at large strides across drives
                                               */
        int                     far_offset;   /* far_copies are offset by 1 stripe
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e867ee42b152..f301e6ae220c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -27,12 +27,12 @@
 *
 * We group bitmap updates into batches.  Each batch has a number.
 * We may write out several batches at once, but that isn't very important.
- * conf->bm_write is the number of the last batch successfully written.
+ * conf->seq_write is the number of the last batch successfully written.
- * conf->bm_flush is the number of the last batch that was closed to
+ * conf->seq_flush is the number of the last batch that was closed to
 *    new additions.
 * When we discover that we will need to write to any block in a stripe
 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
- * the number of the batch it will be in. This is bm_flush+1.
+ * the number of the batch it will be in. This is seq_flush+1.
 * When we are ready to do a write, if that batch hasn't been written yet,
 *   we plug the array and queue the stripe for later.
 * When an unplug happens, we increment bm_flush, thus closing the current
@@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
                BUG_ON(!list_empty(&sh->lru));
                BUG_ON(atomic_read(&conf->active_stripes)==0);
                if (test_bit(STRIPE_HANDLE, &sh->state)) {
-                        if (test_bit(STRIPE_DELAYED, &sh->state)) {
+                        if (test_bit(STRIPE_DELAYED, &sh->state))
                                list_add_tail(&sh->lru, &conf->delayed_list);
-                                plugger_set_plug(&conf->plug);
+                        else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-                        } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+                                   sh->bm_seq - conf->seq_write > 0)
-                                   sh->bm_seq - conf->seq_write > 0) {
                                list_add_tail(&sh->lru, &conf->bitmap_list);
-                                plugger_set_plug(&conf->plug);
+                        else {
-                        } else {
                                clear_bit(STRIPE_BIT_DELAY, &sh->state);
                                list_add_tail(&sh->lru, &conf->handle_list);
                        }
@@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                                                     < (conf->max_nr_stripes *3/4)
                                                     || !conf->inactive_blocked),
                                                    conf->device_lock,
-                                                    md_raid5_kick_device(conf));
+                                                    );
                                conf->inactive_blocked = 0;
                        } else
                                init_stripe(sh, sector, previous);
@@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
                wait_event_lock_irq(conf->wait_for_stripe,
                                    !list_empty(&conf->inactive_list),
                                    conf->device_lock,
-                                    blk_flush_plug(current));
+                                    );
                osh = get_free_stripe(conf);
                spin_unlock_irq(&conf->device_lock);
                atomic_set(&nsh->count, 1);
@@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
                                atomic_inc(&conf->preread_active_stripes);
                        list_add_tail(&sh->lru, &conf->hold_list);
                }
-        } else
+        }
-                plugger_set_plug(&conf->plug);
 }
 static void activate_bit_delay(raid5_conf_t *conf)
@@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf)
        }
 }
-void md_raid5_kick_device(raid5_conf_t *conf)
-{
-        blk_flush_plug(current);
-        raid5_activate_delayed(conf);
-        md_wakeup_thread(conf->mddev->thread);
-}
-EXPORT_SYMBOL_GPL(md_raid5_kick_device);
-static void raid5_unplug(struct plug_handle *plug)
-{
-        raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
-        md_raid5_kick_device(conf);
-}
 int md_raid5_congested(mddev_t *mddev, int bits)
 {
        raid5_conf_t *conf = mddev->private;
@@ -3945,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
        struct stripe_head *sh;
        const int rw = bio_data_dir(bi);
        int remaining;
+        int plugged;
        if (unlikely(bi->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bi);
@@ -3963,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
+        plugged = mddev_check_plugged(mddev);
        for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
                DEFINE_WAIT(w);
                int disks, data_disks;
@@ -4057,7 +4041,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
                                 */
-                                md_raid5_kick_device(conf);
+                                md_wakeup_thread(mddev->thread);
                                release_stripe(sh);
                                schedule();
                                goto retry;
@@ -4077,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                }
                        
        }
+        if (!plugged)
+                md_wakeup_thread(mddev->thread);
        spin_lock_irq(&conf->device_lock);
        remaining = raid5_dec_bi_phys_segments(bi);
        spin_unlock_irq(&conf->device_lock);
@@ -4478,24 +4465,30 @@ static void raid5d(mddev_t *mddev)
        struct stripe_head *sh;
        raid5_conf_t *conf = mddev->private;
        int handled;
+        struct blk_plug plug;
        pr_debug("+++ raid5d active\n");
        md_check_recovery(mddev);
+        blk_start_plug(&plug);
        handled = 0;
        spin_lock_irq(&conf->device_lock);
        while (1) {
                struct bio *bio;
-                if (conf->seq_flush != conf->seq_write) {
+                if (atomic_read(&mddev->plug_cnt) == 0 &&
-                        int seq = conf->seq_flush;
+                    !list_empty(&conf->bitmap_list)) {
+                        /* Now is a good time to flush some bitmap updates */
+                        conf->seq_flush++;
                        spin_unlock_irq(&conf->device_lock);
                        bitmap_unplug(mddev->bitmap);
                        spin_lock_irq(&conf->device_lock);
-                        conf->seq_write = seq;
+                        conf->seq_write = conf->seq_flush;
                        activate_bit_delay(conf);
                }
+                if (atomic_read(&mddev->plug_cnt) == 0)
+                        raid5_activate_delayed(conf);
                while ((bio = remove_bio_from_retry(conf))) {
                        int ok;
@@ -4525,6 +4518,7 @@ static void raid5d(mddev_t *mddev)
        spin_unlock_irq(&conf->device_lock);
        async_tx_issue_pending_all();
+        blk_finish_plug(&plug);
        pr_debug("--- raid5d inactive\n");
 }
@@ -5141,8 +5135,6 @@ static int run(mddev_t *mddev)
                       mdname(mddev));
        md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-        plugger_init(&conf->plug, raid5_unplug);
-        mddev->plug = &conf->plug;
        if (mddev->queue) {
                int chunk_size;
                /* read-ahead size must cover two whole stripes, which
@@ -5192,7 +5184,6 @@ static int stop(mddev_t *mddev)
        mddev->thread = NULL;
        if (mddev->queue)
                mddev->queue->backing_dev_info.congested_fn = NULL;
-        plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
        free_conf(conf);
        mddev->private = NULL;
        mddev->to_remove = &raid5_attrs_group;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 8d563a4f022a..3ca77a2613ba 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -400,8 +400,6 @@ struct raid5_private_data {
                                            * Cleared when a sync completes.
                                            */
-        struct plug_handle      plug;
        /* per cpu variables */
        struct raid5_percpu {
                struct page     *spare_page; /* Used when checking P/Q in raid6 */
author	James Morris <jmorris@namei.org>	2011-04-19 07:32:41 -0400
committer	James Morris <jmorris@namei.org>	2011-04-19 07:32:41 -0400
commit	d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
tree	eefd82c155bc27469a85667d759cd90facf4a6e3 /drivers/md
parent	c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent	96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)