aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c23
-rw-r--r--drivers/md/md.c50
-rw-r--r--drivers/md/raid1.c73
-rw-r--r--drivers/md/raid10.c87
4 files changed, 167 insertions, 66 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7aeceedcf7d4..c14dacdacfac 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1045,8 +1045,14 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1045 if (bitmap == NULL) 1045 if (bitmap == NULL)
1046 return; 1046 return;
1047 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) 1047 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
1048 return; 1048 goto done;
1049
1049 bitmap->daemon_lastrun = jiffies; 1050 bitmap->daemon_lastrun = jiffies;
1051 if (bitmap->allclean) {
1052 bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1053 return;
1054 }
1055 bitmap->allclean = 1;
1050 1056
1051 for (j = 0; j < bitmap->chunks; j++) { 1057 for (j = 0; j < bitmap->chunks; j++) {
1052 bitmap_counter_t *bmc; 1058 bitmap_counter_t *bmc;
@@ -1068,8 +1074,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1068 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 1074 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1069 1075
1070 spin_unlock_irqrestore(&bitmap->lock, flags); 1076 spin_unlock_irqrestore(&bitmap->lock, flags);
1071 if (need_write) 1077 if (need_write) {
1072 write_page(bitmap, page, 0); 1078 write_page(bitmap, page, 0);
1079 bitmap->allclean = 0;
1080 }
1073 continue; 1081 continue;
1074 } 1082 }
1075 1083
@@ -1098,6 +1106,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1098/* 1106/*
1099 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1107 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
1100*/ 1108*/
1109 if (*bmc)
1110 bitmap->allclean = 0;
1111
1101 if (*bmc == 2) { 1112 if (*bmc == 2) {
1102 *bmc=1; /* maybe clear the bit next time */ 1113 *bmc=1; /* maybe clear the bit next time */
1103 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1114 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
@@ -1132,6 +1143,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1132 } 1143 }
1133 } 1144 }
1134 1145
1146 done:
1147 if (bitmap->allclean == 0)
1148 bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
1135} 1149}
1136 1150
1137static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1151static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1226,6 +1240,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1226 sectors -= blocks; 1240 sectors -= blocks;
1227 else sectors = 0; 1241 else sectors = 0;
1228 } 1242 }
1243 bitmap->allclean = 0;
1229 return 0; 1244 return 0;
1230} 1245}
1231 1246
@@ -1296,6 +1311,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1296 } 1311 }
1297 } 1312 }
1298 spin_unlock_irq(&bitmap->lock); 1313 spin_unlock_irq(&bitmap->lock);
1314 bitmap->allclean = 0;
1299 return rv; 1315 return rv;
1300} 1316}
1301 1317
@@ -1332,6 +1348,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab
1332 } 1348 }
1333 unlock: 1349 unlock:
1334 spin_unlock_irqrestore(&bitmap->lock, flags); 1350 spin_unlock_irqrestore(&bitmap->lock, flags);
1351 bitmap->allclean = 0;
1335} 1352}
1336 1353
1337void bitmap_close_sync(struct bitmap *bitmap) 1354void bitmap_close_sync(struct bitmap *bitmap)
@@ -1399,7 +1416,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
1399 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1416 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1400 } 1417 }
1401 spin_unlock_irq(&bitmap->lock); 1418 spin_unlock_irq(&bitmap->lock);
1402 1419 bitmap->allclean = 0;
1403} 1420}
1404 1421
1405/* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1422/* dirty the memory and file bits for bitmap chunks "s" to "e" */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7da6ec244e15..ccbbf63727cc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1105,7 +1105,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1105 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; 1105 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1106 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; 1106 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
1107 if (rdev->sb_size & bmask) 1107 if (rdev->sb_size & bmask)
1108 rdev-> sb_size = (rdev->sb_size | bmask)+1; 1108 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1109
1110 if (minor_version
1111 && rdev->data_offset < sb_offset + (rdev->sb_size/512))
1112 return -EINVAL;
1109 1113
1110 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) 1114 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1111 rdev->desc_nr = -1; 1115 rdev->desc_nr = -1;
@@ -1137,7 +1141,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1137 else 1141 else
1138 ret = 0; 1142 ret = 0;
1139 } 1143 }
1140 if (minor_version) 1144 if (minor_version)
1141 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; 1145 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
1142 else 1146 else
1143 rdev->size = rdev->sb_offset; 1147 rdev->size = rdev->sb_offset;
@@ -1499,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev)
1499 free_disk_sb(rdev); 1503 free_disk_sb(rdev);
1500 list_del_init(&rdev->same_set); 1504 list_del_init(&rdev->same_set);
1501#ifndef MODULE 1505#ifndef MODULE
1502 md_autodetect_dev(rdev->bdev->bd_dev); 1506 if (test_bit(AutoDetected, &rdev->flags))
1507 md_autodetect_dev(rdev->bdev->bd_dev);
1503#endif 1508#endif
1504 unlock_rdev(rdev); 1509 unlock_rdev(rdev);
1505 kobject_put(&rdev->kobj); 1510 kobject_put(&rdev->kobj);
@@ -1996,9 +2001,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1996 char *e; 2001 char *e;
1997 unsigned long long size = simple_strtoull(buf, &e, 10); 2002 unsigned long long size = simple_strtoull(buf, &e, 10);
1998 unsigned long long oldsize = rdev->size; 2003 unsigned long long oldsize = rdev->size;
2004 mddev_t *my_mddev = rdev->mddev;
2005
1999 if (e==buf || (*e && *e != '\n')) 2006 if (e==buf || (*e && *e != '\n'))
2000 return -EINVAL; 2007 return -EINVAL;
2001 if (rdev->mddev->pers) 2008 if (my_mddev->pers)
2002 return -EBUSY; 2009 return -EBUSY;
2003 rdev->size = size; 2010 rdev->size = size;
2004 if (size > oldsize && rdev->mddev->external) { 2011 if (size > oldsize && rdev->mddev->external) {
@@ -2011,7 +2018,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2011 int overlap = 0; 2018 int overlap = 0;
2012 struct list_head *tmp, *tmp2; 2019 struct list_head *tmp, *tmp2;
2013 2020
2014 mddev_unlock(rdev->mddev); 2021 mddev_unlock(my_mddev);
2015 for_each_mddev(mddev, tmp) { 2022 for_each_mddev(mddev, tmp) {
2016 mdk_rdev_t *rdev2; 2023 mdk_rdev_t *rdev2;
2017 2024
@@ -2031,7 +2038,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2031 break; 2038 break;
2032 } 2039 }
2033 } 2040 }
2034 mddev_lock(rdev->mddev); 2041 mddev_lock(my_mddev);
2035 if (overlap) { 2042 if (overlap) {
2036 /* Someone else could have slipped in a size 2043 /* Someone else could have slipped in a size
2037 * change here, but doing so is just silly. 2044 * change here, but doing so is just silly.
@@ -2043,8 +2050,8 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2043 return -EBUSY; 2050 return -EBUSY;
2044 } 2051 }
2045 } 2052 }
2046 if (size < rdev->mddev->size || rdev->mddev->size == 0) 2053 if (size < my_mddev->size || my_mddev->size == 0)
2047 rdev->mddev->size = size; 2054 my_mddev->size = size;
2048 return len; 2055 return len;
2049} 2056}
2050 2057
@@ -2065,10 +2072,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2065{ 2072{
2066 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); 2073 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2067 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); 2074 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2075 mddev_t *mddev = rdev->mddev;
2076 ssize_t rv;
2068 2077
2069 if (!entry->show) 2078 if (!entry->show)
2070 return -EIO; 2079 return -EIO;
2071 return entry->show(rdev, page); 2080
2081 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2082 if (!rv) {
2083 if (rdev->mddev == NULL)
2084 rv = -EBUSY;
2085 else
2086 rv = entry->show(rdev, page);
2087 mddev_unlock(mddev);
2088 }
2089 return rv;
2072} 2090}
2073 2091
2074static ssize_t 2092static ssize_t
@@ -2077,15 +2095,19 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2077{ 2095{
2078 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); 2096 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2079 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); 2097 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2080 int rv; 2098 ssize_t rv;
2099 mddev_t *mddev = rdev->mddev;
2081 2100
2082 if (!entry->store) 2101 if (!entry->store)
2083 return -EIO; 2102 return -EIO;
2084 if (!capable(CAP_SYS_ADMIN)) 2103 if (!capable(CAP_SYS_ADMIN))
2085 return -EACCES; 2104 return -EACCES;
2086 rv = mddev_lock(rdev->mddev); 2105 rv = mddev ? mddev_lock(mddev): -EBUSY;
2087 if (!rv) { 2106 if (!rv) {
2088 rv = entry->store(rdev, page, length); 2107 if (rdev->mddev == NULL)
2108 rv = -EBUSY;
2109 else
2110 rv = entry->store(rdev, page, length);
2089 mddev_unlock(rdev->mddev); 2111 mddev_unlock(rdev->mddev);
2090 } 2112 }
2091 return rv; 2113 return rv;
@@ -5127,7 +5149,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5127 if (mddev->ro==1) 5149 if (mddev->ro==1)
5128 seq_printf(seq, " (read-only)"); 5150 seq_printf(seq, " (read-only)");
5129 if (mddev->ro==2) 5151 if (mddev->ro==2)
5130 seq_printf(seq, "(auto-read-only)"); 5152 seq_printf(seq, " (auto-read-only)");
5131 seq_printf(seq, " %s", mddev->pers->name); 5153 seq_printf(seq, " %s", mddev->pers->name);
5132 } 5154 }
5133 5155
@@ -5351,6 +5373,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
5351 mddev->ro = 0; 5373 mddev->ro = 0;
5352 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5374 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5353 md_wakeup_thread(mddev->thread); 5375 md_wakeup_thread(mddev->thread);
5376 md_wakeup_thread(mddev->sync_thread);
5354 } 5377 }
5355 atomic_inc(&mddev->writes_pending); 5378 atomic_inc(&mddev->writes_pending);
5356 if (mddev->in_sync) { 5379 if (mddev->in_sync) {
@@ -6021,6 +6044,7 @@ static void autostart_arrays(int part)
6021 MD_BUG(); 6044 MD_BUG();
6022 continue; 6045 continue;
6023 } 6046 }
6047 set_bit(AutoDetected, &rdev->flags);
6024 list_add(&rdev->same_set, &pending_raid_disks); 6048 list_add(&rdev->same_set, &pending_raid_disks);
6025 i_passed++; 6049 i_passed++;
6026 } 6050 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5c7fef091cec..ff61b309129a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits)
592} 592}
593 593
594 594
595static int flush_pending_writes(conf_t *conf)
596{
597 /* Any writes that have been queued but are awaiting
598 * bitmap updates get flushed here.
599 * We return 1 if any requests were actually submitted.
600 */
601 int rv = 0;
602
603 spin_lock_irq(&conf->device_lock);
604
605 if (conf->pending_bio_list.head) {
606 struct bio *bio;
607 bio = bio_list_get(&conf->pending_bio_list);
608 blk_remove_plug(conf->mddev->queue);
609 spin_unlock_irq(&conf->device_lock);
610 /* flush any pending bitmap writes to
611 * disk before proceeding w/ I/O */
612 bitmap_unplug(conf->mddev->bitmap);
613
614 while (bio) { /* submit pending writes */
615 struct bio *next = bio->bi_next;
616 bio->bi_next = NULL;
617 generic_make_request(bio);
618 bio = next;
619 }
620 rv = 1;
621 } else
622 spin_unlock_irq(&conf->device_lock);
623 return rv;
624}
625
595/* Barriers.... 626/* Barriers....
596 * Sometimes we need to suspend IO while we do something else, 627 * Sometimes we need to suspend IO while we do something else,
597 * either some resync/recovery, or reconfigure the array. 628 * either some resync/recovery, or reconfigure the array.
@@ -673,15 +704,23 @@ static void freeze_array(conf_t *conf)
673 /* stop syncio and normal IO and wait for everything to 704 /* stop syncio and normal IO and wait for everything to
674 * go quite. 705 * go quite.
675 * We increment barrier and nr_waiting, and then 706 * We increment barrier and nr_waiting, and then
676 * wait until barrier+nr_pending match nr_queued+2 707 * wait until nr_pending match nr_queued+1
708 * This is called in the context of one normal IO request
709 * that has failed. Thus any sync request that might be pending
710 * will be blocked by nr_pending, and we need to wait for
711 * pending IO requests to complete or be queued for re-try.
712 * Thus the number queued (nr_queued) plus this request (1)
713 * must match the number of pending IOs (nr_pending) before
714 * we continue.
677 */ 715 */
678 spin_lock_irq(&conf->resync_lock); 716 spin_lock_irq(&conf->resync_lock);
679 conf->barrier++; 717 conf->barrier++;
680 conf->nr_waiting++; 718 conf->nr_waiting++;
681 wait_event_lock_irq(conf->wait_barrier, 719 wait_event_lock_irq(conf->wait_barrier,
682 conf->barrier+conf->nr_pending == conf->nr_queued+2, 720 conf->nr_pending == conf->nr_queued+1,
683 conf->resync_lock, 721 conf->resync_lock,
684 raid1_unplug(conf->mddev->queue)); 722 ({ flush_pending_writes(conf);
723 raid1_unplug(conf->mddev->queue); }));
685 spin_unlock_irq(&conf->resync_lock); 724 spin_unlock_irq(&conf->resync_lock);
686} 725}
687static void unfreeze_array(conf_t *conf) 726static void unfreeze_array(conf_t *conf)
@@ -907,6 +946,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
907 blk_plug_device(mddev->queue); 946 blk_plug_device(mddev->queue);
908 spin_unlock_irqrestore(&conf->device_lock, flags); 947 spin_unlock_irqrestore(&conf->device_lock, flags);
909 948
949 /* In case raid1d snuck into freeze_array */
950 wake_up(&conf->wait_barrier);
951
910 if (do_sync) 952 if (do_sync)
911 md_wakeup_thread(mddev->thread); 953 md_wakeup_thread(mddev->thread);
912#if 0 954#if 0
@@ -1473,28 +1515,14 @@ static void raid1d(mddev_t *mddev)
1473 1515
1474 for (;;) { 1516 for (;;) {
1475 char b[BDEVNAME_SIZE]; 1517 char b[BDEVNAME_SIZE];
1476 spin_lock_irqsave(&conf->device_lock, flags);
1477
1478 if (conf->pending_bio_list.head) {
1479 bio = bio_list_get(&conf->pending_bio_list);
1480 blk_remove_plug(mddev->queue);
1481 spin_unlock_irqrestore(&conf->device_lock, flags);
1482 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1483 bitmap_unplug(mddev->bitmap);
1484 1518
1485 while (bio) { /* submit pending writes */ 1519 unplug += flush_pending_writes(conf);
1486 struct bio *next = bio->bi_next;
1487 bio->bi_next = NULL;
1488 generic_make_request(bio);
1489 bio = next;
1490 }
1491 unplug = 1;
1492 1520
1493 continue; 1521 spin_lock_irqsave(&conf->device_lock, flags);
1494 } 1522 if (list_empty(head)) {
1495 1523 spin_unlock_irqrestore(&conf->device_lock, flags);
1496 if (list_empty(head))
1497 break; 1524 break;
1525 }
1498 r1_bio = list_entry(head->prev, r1bio_t, retry_list); 1526 r1_bio = list_entry(head->prev, r1bio_t, retry_list);
1499 list_del(head->prev); 1527 list_del(head->prev);
1500 conf->nr_queued--; 1528 conf->nr_queued--;
@@ -1590,7 +1618,6 @@ static void raid1d(mddev_t *mddev)
1590 } 1618 }
1591 } 1619 }
1592 } 1620 }
1593 spin_unlock_irqrestore(&conf->device_lock, flags);
1594 if (unplug) 1621 if (unplug)
1595 unplug_slaves(mddev); 1622 unplug_slaves(mddev);
1596} 1623}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 017f58113c33..32389d2f18fc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
537 current_distance = abs(r10_bio->devs[slot].addr - 537 current_distance = abs(r10_bio->devs[slot].addr -
538 conf->mirrors[disk].head_position); 538 conf->mirrors[disk].head_position);
539 539
540 /* Find the disk whose head is closest */ 540 /* Find the disk whose head is closest,
541 * or - for far > 1 - find the closest to partition beginning */
541 542
542 for (nslot = slot; nslot < conf->copies; nslot++) { 543 for (nslot = slot; nslot < conf->copies; nslot++) {
543 int ndisk = r10_bio->devs[nslot].devnum; 544 int ndisk = r10_bio->devs[nslot].devnum;
@@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
557 slot = nslot; 558 slot = nslot;
558 break; 559 break;
559 } 560 }
560 new_distance = abs(r10_bio->devs[nslot].addr - 561
561 conf->mirrors[ndisk].head_position); 562 /* for far > 1 always use the lowest address */
563 if (conf->far_copies > 1)
564 new_distance = r10_bio->devs[nslot].addr;
565 else
566 new_distance = abs(r10_bio->devs[nslot].addr -
567 conf->mirrors[ndisk].head_position);
562 if (new_distance < current_distance) { 568 if (new_distance < current_distance) {
563 current_distance = new_distance; 569 current_distance = new_distance;
564 disk = ndisk; 570 disk = ndisk;
@@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits)
629 return ret; 635 return ret;
630} 636}
631 637
632 638static int flush_pending_writes(conf_t *conf)
639{
640 /* Any writes that have been queued but are awaiting
641 * bitmap updates get flushed here.
642 * We return 1 if any requests were actually submitted.
643 */
644 int rv = 0;
645
646 spin_lock_irq(&conf->device_lock);
647
648 if (conf->pending_bio_list.head) {
649 struct bio *bio;
650 bio = bio_list_get(&conf->pending_bio_list);
651 blk_remove_plug(conf->mddev->queue);
652 spin_unlock_irq(&conf->device_lock);
653 /* flush any pending bitmap writes to disk
654 * before proceeding w/ I/O */
655 bitmap_unplug(conf->mddev->bitmap);
656
657 while (bio) { /* submit pending writes */
658 struct bio *next = bio->bi_next;
659 bio->bi_next = NULL;
660 generic_make_request(bio);
661 bio = next;
662 }
663 rv = 1;
664 } else
665 spin_unlock_irq(&conf->device_lock);
666 return rv;
667}
633/* Barriers.... 668/* Barriers....
634 * Sometimes we need to suspend IO while we do something else, 669 * Sometimes we need to suspend IO while we do something else,
635 * either some resync/recovery, or reconfigure the array. 670 * either some resync/recovery, or reconfigure the array.
@@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf)
712 /* stop syncio and normal IO and wait for everything to 747 /* stop syncio and normal IO and wait for everything to
713 * go quiet. 748 * go quiet.
714 * We increment barrier and nr_waiting, and then 749 * We increment barrier and nr_waiting, and then
715 * wait until barrier+nr_pending match nr_queued+2 750 * wait until nr_pending match nr_queued+1
751 * This is called in the context of one normal IO request
752 * that has failed. Thus any sync request that might be pending
753 * will be blocked by nr_pending, and we need to wait for
754 * pending IO requests to complete or be queued for re-try.
755 * Thus the number queued (nr_queued) plus this request (1)
756 * must match the number of pending IOs (nr_pending) before
757 * we continue.
716 */ 758 */
717 spin_lock_irq(&conf->resync_lock); 759 spin_lock_irq(&conf->resync_lock);
718 conf->barrier++; 760 conf->barrier++;
719 conf->nr_waiting++; 761 conf->nr_waiting++;
720 wait_event_lock_irq(conf->wait_barrier, 762 wait_event_lock_irq(conf->wait_barrier,
721 conf->barrier+conf->nr_pending == conf->nr_queued+2, 763 conf->nr_pending == conf->nr_queued+1,
722 conf->resync_lock, 764 conf->resync_lock,
723 raid10_unplug(conf->mddev->queue)); 765 ({ flush_pending_writes(conf);
766 raid10_unplug(conf->mddev->queue); }));
724 spin_unlock_irq(&conf->resync_lock); 767 spin_unlock_irq(&conf->resync_lock);
725} 768}
726 769
@@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
892 blk_plug_device(mddev->queue); 935 blk_plug_device(mddev->queue);
893 spin_unlock_irqrestore(&conf->device_lock, flags); 936 spin_unlock_irqrestore(&conf->device_lock, flags);
894 937
938 /* In case raid10d snuck in to freeze_array */
939 wake_up(&conf->wait_barrier);
940
895 if (do_sync) 941 if (do_sync)
896 md_wakeup_thread(mddev->thread); 942 md_wakeup_thread(mddev->thread);
897 943
@@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev)
1464 1510
1465 for (;;) { 1511 for (;;) {
1466 char b[BDEVNAME_SIZE]; 1512 char b[BDEVNAME_SIZE];
1467 spin_lock_irqsave(&conf->device_lock, flags);
1468 1513
1469 if (conf->pending_bio_list.head) { 1514 unplug += flush_pending_writes(conf);
1470 bio = bio_list_get(&conf->pending_bio_list);
1471 blk_remove_plug(mddev->queue);
1472 spin_unlock_irqrestore(&conf->device_lock, flags);
1473 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1474 bitmap_unplug(mddev->bitmap);
1475
1476 while (bio) { /* submit pending writes */
1477 struct bio *next = bio->bi_next;
1478 bio->bi_next = NULL;
1479 generic_make_request(bio);
1480 bio = next;
1481 }
1482 unplug = 1;
1483
1484 continue;
1485 }
1486 1515
1487 if (list_empty(head)) 1516 spin_lock_irqsave(&conf->device_lock, flags);
1517 if (list_empty(head)) {
1518 spin_unlock_irqrestore(&conf->device_lock, flags);
1488 break; 1519 break;
1520 }
1489 r10_bio = list_entry(head->prev, r10bio_t, retry_list); 1521 r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1490 list_del(head->prev); 1522 list_del(head->prev);
1491 conf->nr_queued--; 1523 conf->nr_queued--;
@@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev)
1548 } 1580 }
1549 } 1581 }
1550 } 1582 }
1551 spin_unlock_irqrestore(&conf->device_lock, flags);
1552 if (unplug) 1583 if (unplug)
1553 unplug_slaves(mddev); 1584 unplug_slaves(mddev);
1554} 1585}
@@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1787 if (j == conf->copies) { 1818 if (j == conf->copies) {
1788 /* Cannot recover, so abort the recovery */ 1819 /* Cannot recover, so abort the recovery */
1789 put_buf(r10_bio); 1820 put_buf(r10_bio);
1821 if (rb2)
1822 atomic_dec(&rb2->remaining);
1790 r10_bio = rb2; 1823 r10_bio = rb2;
1791 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) 1824 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
1792 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", 1825 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",