diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 23 | ||||
-rw-r--r-- | drivers/md/md.c | 50 | ||||
-rw-r--r-- | drivers/md/raid1.c | 73 | ||||
-rw-r--r-- | drivers/md/raid10.c | 87 |
4 files changed, 167 insertions, 66 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7aeceedcf7d4..c14dacdacfac 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1045,8 +1045,14 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1045 | if (bitmap == NULL) | 1045 | if (bitmap == NULL) |
1046 | return; | 1046 | return; |
1047 | if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) | 1047 | if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) |
1048 | return; | 1048 | goto done; |
1049 | |||
1049 | bitmap->daemon_lastrun = jiffies; | 1050 | bitmap->daemon_lastrun = jiffies; |
1051 | if (bitmap->allclean) { | ||
1052 | bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
1053 | return; | ||
1054 | } | ||
1055 | bitmap->allclean = 1; | ||
1050 | 1056 | ||
1051 | for (j = 0; j < bitmap->chunks; j++) { | 1057 | for (j = 0; j < bitmap->chunks; j++) { |
1052 | bitmap_counter_t *bmc; | 1058 | bitmap_counter_t *bmc; |
@@ -1068,8 +1074,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1068 | clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); | 1074 | clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); |
1069 | 1075 | ||
1070 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1076 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1071 | if (need_write) | 1077 | if (need_write) { |
1072 | write_page(bitmap, page, 0); | 1078 | write_page(bitmap, page, 0); |
1079 | bitmap->allclean = 0; | ||
1080 | } | ||
1073 | continue; | 1081 | continue; |
1074 | } | 1082 | } |
1075 | 1083 | ||
@@ -1098,6 +1106,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1098 | /* | 1106 | /* |
1099 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | 1107 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); |
1100 | */ | 1108 | */ |
1109 | if (*bmc) | ||
1110 | bitmap->allclean = 0; | ||
1111 | |||
1101 | if (*bmc == 2) { | 1112 | if (*bmc == 2) { |
1102 | *bmc=1; /* maybe clear the bit next time */ | 1113 | *bmc=1; /* maybe clear the bit next time */ |
1103 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1114 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
@@ -1132,6 +1143,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1132 | } | 1143 | } |
1133 | } | 1144 | } |
1134 | 1145 | ||
1146 | done: | ||
1147 | if (bitmap->allclean == 0) | ||
1148 | bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; | ||
1135 | } | 1149 | } |
1136 | 1150 | ||
1137 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1151 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
@@ -1226,6 +1240,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1226 | sectors -= blocks; | 1240 | sectors -= blocks; |
1227 | else sectors = 0; | 1241 | else sectors = 0; |
1228 | } | 1242 | } |
1243 | bitmap->allclean = 0; | ||
1229 | return 0; | 1244 | return 0; |
1230 | } | 1245 | } |
1231 | 1246 | ||
@@ -1296,6 +1311,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1296 | } | 1311 | } |
1297 | } | 1312 | } |
1298 | spin_unlock_irq(&bitmap->lock); | 1313 | spin_unlock_irq(&bitmap->lock); |
1314 | bitmap->allclean = 0; | ||
1299 | return rv; | 1315 | return rv; |
1300 | } | 1316 | } |
1301 | 1317 | ||
@@ -1332,6 +1348,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab | |||
1332 | } | 1348 | } |
1333 | unlock: | 1349 | unlock: |
1334 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1350 | spin_unlock_irqrestore(&bitmap->lock, flags); |
1351 | bitmap->allclean = 0; | ||
1335 | } | 1352 | } |
1336 | 1353 | ||
1337 | void bitmap_close_sync(struct bitmap *bitmap) | 1354 | void bitmap_close_sync(struct bitmap *bitmap) |
@@ -1399,7 +1416,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1399 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1416 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1400 | } | 1417 | } |
1401 | spin_unlock_irq(&bitmap->lock); | 1418 | spin_unlock_irq(&bitmap->lock); |
1402 | 1419 | bitmap->allclean = 0; | |
1403 | } | 1420 | } |
1404 | 1421 | ||
1405 | /* dirty the memory and file bits for bitmap chunks "s" to "e" */ | 1422 | /* dirty the memory and file bits for bitmap chunks "s" to "e" */ |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 7da6ec244e15..ccbbf63727cc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1105,7 +1105,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1105 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; | 1105 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; |
1106 | bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; | 1106 | bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; |
1107 | if (rdev->sb_size & bmask) | 1107 | if (rdev->sb_size & bmask) |
1108 | rdev-> sb_size = (rdev->sb_size | bmask)+1; | 1108 | rdev->sb_size = (rdev->sb_size | bmask) + 1; |
1109 | |||
1110 | if (minor_version | ||
1111 | && rdev->data_offset < sb_offset + (rdev->sb_size/512)) | ||
1112 | return -EINVAL; | ||
1109 | 1113 | ||
1110 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) | 1114 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) |
1111 | rdev->desc_nr = -1; | 1115 | rdev->desc_nr = -1; |
@@ -1137,7 +1141,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1137 | else | 1141 | else |
1138 | ret = 0; | 1142 | ret = 0; |
1139 | } | 1143 | } |
1140 | if (minor_version) | 1144 | if (minor_version) |
1141 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; | 1145 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; |
1142 | else | 1146 | else |
1143 | rdev->size = rdev->sb_offset; | 1147 | rdev->size = rdev->sb_offset; |
@@ -1499,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev) | |||
1499 | free_disk_sb(rdev); | 1503 | free_disk_sb(rdev); |
1500 | list_del_init(&rdev->same_set); | 1504 | list_del_init(&rdev->same_set); |
1501 | #ifndef MODULE | 1505 | #ifndef MODULE |
1502 | md_autodetect_dev(rdev->bdev->bd_dev); | 1506 | if (test_bit(AutoDetected, &rdev->flags)) |
1507 | md_autodetect_dev(rdev->bdev->bd_dev); | ||
1503 | #endif | 1508 | #endif |
1504 | unlock_rdev(rdev); | 1509 | unlock_rdev(rdev); |
1505 | kobject_put(&rdev->kobj); | 1510 | kobject_put(&rdev->kobj); |
@@ -1996,9 +2001,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1996 | char *e; | 2001 | char *e; |
1997 | unsigned long long size = simple_strtoull(buf, &e, 10); | 2002 | unsigned long long size = simple_strtoull(buf, &e, 10); |
1998 | unsigned long long oldsize = rdev->size; | 2003 | unsigned long long oldsize = rdev->size; |
2004 | mddev_t *my_mddev = rdev->mddev; | ||
2005 | |||
1999 | if (e==buf || (*e && *e != '\n')) | 2006 | if (e==buf || (*e && *e != '\n')) |
2000 | return -EINVAL; | 2007 | return -EINVAL; |
2001 | if (rdev->mddev->pers) | 2008 | if (my_mddev->pers) |
2002 | return -EBUSY; | 2009 | return -EBUSY; |
2003 | rdev->size = size; | 2010 | rdev->size = size; |
2004 | if (size > oldsize && rdev->mddev->external) { | 2011 | if (size > oldsize && rdev->mddev->external) { |
@@ -2011,7 +2018,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2011 | int overlap = 0; | 2018 | int overlap = 0; |
2012 | struct list_head *tmp, *tmp2; | 2019 | struct list_head *tmp, *tmp2; |
2013 | 2020 | ||
2014 | mddev_unlock(rdev->mddev); | 2021 | mddev_unlock(my_mddev); |
2015 | for_each_mddev(mddev, tmp) { | 2022 | for_each_mddev(mddev, tmp) { |
2016 | mdk_rdev_t *rdev2; | 2023 | mdk_rdev_t *rdev2; |
2017 | 2024 | ||
@@ -2031,7 +2038,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2031 | break; | 2038 | break; |
2032 | } | 2039 | } |
2033 | } | 2040 | } |
2034 | mddev_lock(rdev->mddev); | 2041 | mddev_lock(my_mddev); |
2035 | if (overlap) { | 2042 | if (overlap) { |
2036 | /* Someone else could have slipped in a size | 2043 | /* Someone else could have slipped in a size |
2037 | * change here, but doing so is just silly. | 2044 | * change here, but doing so is just silly. |
@@ -2043,8 +2050,8 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2043 | return -EBUSY; | 2050 | return -EBUSY; |
2044 | } | 2051 | } |
2045 | } | 2052 | } |
2046 | if (size < rdev->mddev->size || rdev->mddev->size == 0) | 2053 | if (size < my_mddev->size || my_mddev->size == 0) |
2047 | rdev->mddev->size = size; | 2054 | my_mddev->size = size; |
2048 | return len; | 2055 | return len; |
2049 | } | 2056 | } |
2050 | 2057 | ||
@@ -2065,10 +2072,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |||
2065 | { | 2072 | { |
2066 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | 2073 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); |
2067 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | 2074 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); |
2075 | mddev_t *mddev = rdev->mddev; | ||
2076 | ssize_t rv; | ||
2068 | 2077 | ||
2069 | if (!entry->show) | 2078 | if (!entry->show) |
2070 | return -EIO; | 2079 | return -EIO; |
2071 | return entry->show(rdev, page); | 2080 | |
2081 | rv = mddev ? mddev_lock(mddev) : -EBUSY; | ||
2082 | if (!rv) { | ||
2083 | if (rdev->mddev == NULL) | ||
2084 | rv = -EBUSY; | ||
2085 | else | ||
2086 | rv = entry->show(rdev, page); | ||
2087 | mddev_unlock(mddev); | ||
2088 | } | ||
2089 | return rv; | ||
2072 | } | 2090 | } |
2073 | 2091 | ||
2074 | static ssize_t | 2092 | static ssize_t |
@@ -2077,15 +2095,19 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
2077 | { | 2095 | { |
2078 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | 2096 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); |
2079 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | 2097 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); |
2080 | int rv; | 2098 | ssize_t rv; |
2099 | mddev_t *mddev = rdev->mddev; | ||
2081 | 2100 | ||
2082 | if (!entry->store) | 2101 | if (!entry->store) |
2083 | return -EIO; | 2102 | return -EIO; |
2084 | if (!capable(CAP_SYS_ADMIN)) | 2103 | if (!capable(CAP_SYS_ADMIN)) |
2085 | return -EACCES; | 2104 | return -EACCES; |
2086 | rv = mddev_lock(rdev->mddev); | 2105 | rv = mddev ? mddev_lock(mddev): -EBUSY; |
2087 | if (!rv) { | 2106 | if (!rv) { |
2088 | rv = entry->store(rdev, page, length); | 2107 | if (rdev->mddev == NULL) |
2108 | rv = -EBUSY; | ||
2109 | else | ||
2110 | rv = entry->store(rdev, page, length); | ||
2089 | mddev_unlock(rdev->mddev); | 2111 | mddev_unlock(rdev->mddev); |
2090 | } | 2112 | } |
2091 | return rv; | 2113 | return rv; |
@@ -5127,7 +5149,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5127 | if (mddev->ro==1) | 5149 | if (mddev->ro==1) |
5128 | seq_printf(seq, " (read-only)"); | 5150 | seq_printf(seq, " (read-only)"); |
5129 | if (mddev->ro==2) | 5151 | if (mddev->ro==2) |
5130 | seq_printf(seq, "(auto-read-only)"); | 5152 | seq_printf(seq, " (auto-read-only)"); |
5131 | seq_printf(seq, " %s", mddev->pers->name); | 5153 | seq_printf(seq, " %s", mddev->pers->name); |
5132 | } | 5154 | } |
5133 | 5155 | ||
@@ -5351,6 +5373,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
5351 | mddev->ro = 0; | 5373 | mddev->ro = 0; |
5352 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5374 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5353 | md_wakeup_thread(mddev->thread); | 5375 | md_wakeup_thread(mddev->thread); |
5376 | md_wakeup_thread(mddev->sync_thread); | ||
5354 | } | 5377 | } |
5355 | atomic_inc(&mddev->writes_pending); | 5378 | atomic_inc(&mddev->writes_pending); |
5356 | if (mddev->in_sync) { | 5379 | if (mddev->in_sync) { |
@@ -6021,6 +6044,7 @@ static void autostart_arrays(int part) | |||
6021 | MD_BUG(); | 6044 | MD_BUG(); |
6022 | continue; | 6045 | continue; |
6023 | } | 6046 | } |
6047 | set_bit(AutoDetected, &rdev->flags); | ||
6024 | list_add(&rdev->same_set, &pending_raid_disks); | 6048 | list_add(&rdev->same_set, &pending_raid_disks); |
6025 | i_passed++; | 6049 | i_passed++; |
6026 | } | 6050 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5c7fef091cec..ff61b309129a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits) | |||
592 | } | 592 | } |
593 | 593 | ||
594 | 594 | ||
595 | static int flush_pending_writes(conf_t *conf) | ||
596 | { | ||
597 | /* Any writes that have been queued but are awaiting | ||
598 | * bitmap updates get flushed here. | ||
599 | * We return 1 if any requests were actually submitted. | ||
600 | */ | ||
601 | int rv = 0; | ||
602 | |||
603 | spin_lock_irq(&conf->device_lock); | ||
604 | |||
605 | if (conf->pending_bio_list.head) { | ||
606 | struct bio *bio; | ||
607 | bio = bio_list_get(&conf->pending_bio_list); | ||
608 | blk_remove_plug(conf->mddev->queue); | ||
609 | spin_unlock_irq(&conf->device_lock); | ||
610 | /* flush any pending bitmap writes to | ||
611 | * disk before proceeding w/ I/O */ | ||
612 | bitmap_unplug(conf->mddev->bitmap); | ||
613 | |||
614 | while (bio) { /* submit pending writes */ | ||
615 | struct bio *next = bio->bi_next; | ||
616 | bio->bi_next = NULL; | ||
617 | generic_make_request(bio); | ||
618 | bio = next; | ||
619 | } | ||
620 | rv = 1; | ||
621 | } else | ||
622 | spin_unlock_irq(&conf->device_lock); | ||
623 | return rv; | ||
624 | } | ||
625 | |||
595 | /* Barriers.... | 626 | /* Barriers.... |
596 | * Sometimes we need to suspend IO while we do something else, | 627 | * Sometimes we need to suspend IO while we do something else, |
597 | * either some resync/recovery, or reconfigure the array. | 628 | * either some resync/recovery, or reconfigure the array. |
@@ -673,15 +704,23 @@ static void freeze_array(conf_t *conf) | |||
673 | /* stop syncio and normal IO and wait for everything to | 704 | /* stop syncio and normal IO and wait for everything to |
674 | * go quite. | 705 | * go quite. |
675 | * We increment barrier and nr_waiting, and then | 706 | * We increment barrier and nr_waiting, and then |
676 | * wait until barrier+nr_pending match nr_queued+2 | 707 | * wait until nr_pending match nr_queued+1 |
708 | * This is called in the context of one normal IO request | ||
709 | * that has failed. Thus any sync request that might be pending | ||
710 | * will be blocked by nr_pending, and we need to wait for | ||
711 | * pending IO requests to complete or be queued for re-try. | ||
712 | * Thus the number queued (nr_queued) plus this request (1) | ||
713 | * must match the number of pending IOs (nr_pending) before | ||
714 | * we continue. | ||
677 | */ | 715 | */ |
678 | spin_lock_irq(&conf->resync_lock); | 716 | spin_lock_irq(&conf->resync_lock); |
679 | conf->barrier++; | 717 | conf->barrier++; |
680 | conf->nr_waiting++; | 718 | conf->nr_waiting++; |
681 | wait_event_lock_irq(conf->wait_barrier, | 719 | wait_event_lock_irq(conf->wait_barrier, |
682 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | 720 | conf->nr_pending == conf->nr_queued+1, |
683 | conf->resync_lock, | 721 | conf->resync_lock, |
684 | raid1_unplug(conf->mddev->queue)); | 722 | ({ flush_pending_writes(conf); |
723 | raid1_unplug(conf->mddev->queue); })); | ||
685 | spin_unlock_irq(&conf->resync_lock); | 724 | spin_unlock_irq(&conf->resync_lock); |
686 | } | 725 | } |
687 | static void unfreeze_array(conf_t *conf) | 726 | static void unfreeze_array(conf_t *conf) |
@@ -907,6 +946,9 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
907 | blk_plug_device(mddev->queue); | 946 | blk_plug_device(mddev->queue); |
908 | spin_unlock_irqrestore(&conf->device_lock, flags); | 947 | spin_unlock_irqrestore(&conf->device_lock, flags); |
909 | 948 | ||
949 | /* In case raid1d snuck into freeze_array */ | ||
950 | wake_up(&conf->wait_barrier); | ||
951 | |||
910 | if (do_sync) | 952 | if (do_sync) |
911 | md_wakeup_thread(mddev->thread); | 953 | md_wakeup_thread(mddev->thread); |
912 | #if 0 | 954 | #if 0 |
@@ -1473,28 +1515,14 @@ static void raid1d(mddev_t *mddev) | |||
1473 | 1515 | ||
1474 | for (;;) { | 1516 | for (;;) { |
1475 | char b[BDEVNAME_SIZE]; | 1517 | char b[BDEVNAME_SIZE]; |
1476 | spin_lock_irqsave(&conf->device_lock, flags); | ||
1477 | |||
1478 | if (conf->pending_bio_list.head) { | ||
1479 | bio = bio_list_get(&conf->pending_bio_list); | ||
1480 | blk_remove_plug(mddev->queue); | ||
1481 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1482 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ | ||
1483 | bitmap_unplug(mddev->bitmap); | ||
1484 | 1518 | ||
1485 | while (bio) { /* submit pending writes */ | 1519 | unplug += flush_pending_writes(conf); |
1486 | struct bio *next = bio->bi_next; | ||
1487 | bio->bi_next = NULL; | ||
1488 | generic_make_request(bio); | ||
1489 | bio = next; | ||
1490 | } | ||
1491 | unplug = 1; | ||
1492 | 1520 | ||
1493 | continue; | 1521 | spin_lock_irqsave(&conf->device_lock, flags); |
1494 | } | 1522 | if (list_empty(head)) { |
1495 | 1523 | spin_unlock_irqrestore(&conf->device_lock, flags); | |
1496 | if (list_empty(head)) | ||
1497 | break; | 1524 | break; |
1525 | } | ||
1498 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); | 1526 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); |
1499 | list_del(head->prev); | 1527 | list_del(head->prev); |
1500 | conf->nr_queued--; | 1528 | conf->nr_queued--; |
@@ -1590,7 +1618,6 @@ static void raid1d(mddev_t *mddev) | |||
1590 | } | 1618 | } |
1591 | } | 1619 | } |
1592 | } | 1620 | } |
1593 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1594 | if (unplug) | 1621 | if (unplug) |
1595 | unplug_slaves(mddev); | 1622 | unplug_slaves(mddev); |
1596 | } | 1623 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 017f58113c33..32389d2f18fc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
537 | current_distance = abs(r10_bio->devs[slot].addr - | 537 | current_distance = abs(r10_bio->devs[slot].addr - |
538 | conf->mirrors[disk].head_position); | 538 | conf->mirrors[disk].head_position); |
539 | 539 | ||
540 | /* Find the disk whose head is closest */ | 540 | /* Find the disk whose head is closest, |
541 | * or - for far > 1 - find the closest to partition beginning */ | ||
541 | 542 | ||
542 | for (nslot = slot; nslot < conf->copies; nslot++) { | 543 | for (nslot = slot; nslot < conf->copies; nslot++) { |
543 | int ndisk = r10_bio->devs[nslot].devnum; | 544 | int ndisk = r10_bio->devs[nslot].devnum; |
@@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
557 | slot = nslot; | 558 | slot = nslot; |
558 | break; | 559 | break; |
559 | } | 560 | } |
560 | new_distance = abs(r10_bio->devs[nslot].addr - | 561 | |
561 | conf->mirrors[ndisk].head_position); | 562 | /* for far > 1 always use the lowest address */ |
563 | if (conf->far_copies > 1) | ||
564 | new_distance = r10_bio->devs[nslot].addr; | ||
565 | else | ||
566 | new_distance = abs(r10_bio->devs[nslot].addr - | ||
567 | conf->mirrors[ndisk].head_position); | ||
562 | if (new_distance < current_distance) { | 568 | if (new_distance < current_distance) { |
563 | current_distance = new_distance; | 569 | current_distance = new_distance; |
564 | disk = ndisk; | 570 | disk = ndisk; |
@@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits) | |||
629 | return ret; | 635 | return ret; |
630 | } | 636 | } |
631 | 637 | ||
632 | 638 | static int flush_pending_writes(conf_t *conf) | |
639 | { | ||
640 | /* Any writes that have been queued but are awaiting | ||
641 | * bitmap updates get flushed here. | ||
642 | * We return 1 if any requests were actually submitted. | ||
643 | */ | ||
644 | int rv = 0; | ||
645 | |||
646 | spin_lock_irq(&conf->device_lock); | ||
647 | |||
648 | if (conf->pending_bio_list.head) { | ||
649 | struct bio *bio; | ||
650 | bio = bio_list_get(&conf->pending_bio_list); | ||
651 | blk_remove_plug(conf->mddev->queue); | ||
652 | spin_unlock_irq(&conf->device_lock); | ||
653 | /* flush any pending bitmap writes to disk | ||
654 | * before proceeding w/ I/O */ | ||
655 | bitmap_unplug(conf->mddev->bitmap); | ||
656 | |||
657 | while (bio) { /* submit pending writes */ | ||
658 | struct bio *next = bio->bi_next; | ||
659 | bio->bi_next = NULL; | ||
660 | generic_make_request(bio); | ||
661 | bio = next; | ||
662 | } | ||
663 | rv = 1; | ||
664 | } else | ||
665 | spin_unlock_irq(&conf->device_lock); | ||
666 | return rv; | ||
667 | } | ||
633 | /* Barriers.... | 668 | /* Barriers.... |
634 | * Sometimes we need to suspend IO while we do something else, | 669 | * Sometimes we need to suspend IO while we do something else, |
635 | * either some resync/recovery, or reconfigure the array. | 670 | * either some resync/recovery, or reconfigure the array. |
@@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf) | |||
712 | /* stop syncio and normal IO and wait for everything to | 747 | /* stop syncio and normal IO and wait for everything to |
713 | * go quiet. | 748 | * go quiet. |
714 | * We increment barrier and nr_waiting, and then | 749 | * We increment barrier and nr_waiting, and then |
715 | * wait until barrier+nr_pending match nr_queued+2 | 750 | * wait until nr_pending match nr_queued+1 |
751 | * This is called in the context of one normal IO request | ||
752 | * that has failed. Thus any sync request that might be pending | ||
753 | * will be blocked by nr_pending, and we need to wait for | ||
754 | * pending IO requests to complete or be queued for re-try. | ||
755 | * Thus the number queued (nr_queued) plus this request (1) | ||
756 | * must match the number of pending IOs (nr_pending) before | ||
757 | * we continue. | ||
716 | */ | 758 | */ |
717 | spin_lock_irq(&conf->resync_lock); | 759 | spin_lock_irq(&conf->resync_lock); |
718 | conf->barrier++; | 760 | conf->barrier++; |
719 | conf->nr_waiting++; | 761 | conf->nr_waiting++; |
720 | wait_event_lock_irq(conf->wait_barrier, | 762 | wait_event_lock_irq(conf->wait_barrier, |
721 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | 763 | conf->nr_pending == conf->nr_queued+1, |
722 | conf->resync_lock, | 764 | conf->resync_lock, |
723 | raid10_unplug(conf->mddev->queue)); | 765 | ({ flush_pending_writes(conf); |
766 | raid10_unplug(conf->mddev->queue); })); | ||
724 | spin_unlock_irq(&conf->resync_lock); | 767 | spin_unlock_irq(&conf->resync_lock); |
725 | } | 768 | } |
726 | 769 | ||
@@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
892 | blk_plug_device(mddev->queue); | 935 | blk_plug_device(mddev->queue); |
893 | spin_unlock_irqrestore(&conf->device_lock, flags); | 936 | spin_unlock_irqrestore(&conf->device_lock, flags); |
894 | 937 | ||
938 | /* In case raid10d snuck in to freeze_array */ | ||
939 | wake_up(&conf->wait_barrier); | ||
940 | |||
895 | if (do_sync) | 941 | if (do_sync) |
896 | md_wakeup_thread(mddev->thread); | 942 | md_wakeup_thread(mddev->thread); |
897 | 943 | ||
@@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev) | |||
1464 | 1510 | ||
1465 | for (;;) { | 1511 | for (;;) { |
1466 | char b[BDEVNAME_SIZE]; | 1512 | char b[BDEVNAME_SIZE]; |
1467 | spin_lock_irqsave(&conf->device_lock, flags); | ||
1468 | 1513 | ||
1469 | if (conf->pending_bio_list.head) { | 1514 | unplug += flush_pending_writes(conf); |
1470 | bio = bio_list_get(&conf->pending_bio_list); | ||
1471 | blk_remove_plug(mddev->queue); | ||
1472 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1473 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ | ||
1474 | bitmap_unplug(mddev->bitmap); | ||
1475 | |||
1476 | while (bio) { /* submit pending writes */ | ||
1477 | struct bio *next = bio->bi_next; | ||
1478 | bio->bi_next = NULL; | ||
1479 | generic_make_request(bio); | ||
1480 | bio = next; | ||
1481 | } | ||
1482 | unplug = 1; | ||
1483 | |||
1484 | continue; | ||
1485 | } | ||
1486 | 1515 | ||
1487 | if (list_empty(head)) | 1516 | spin_lock_irqsave(&conf->device_lock, flags); |
1517 | if (list_empty(head)) { | ||
1518 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1488 | break; | 1519 | break; |
1520 | } | ||
1489 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); | 1521 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); |
1490 | list_del(head->prev); | 1522 | list_del(head->prev); |
1491 | conf->nr_queued--; | 1523 | conf->nr_queued--; |
@@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev) | |||
1548 | } | 1580 | } |
1549 | } | 1581 | } |
1550 | } | 1582 | } |
1551 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1552 | if (unplug) | 1583 | if (unplug) |
1553 | unplug_slaves(mddev); | 1584 | unplug_slaves(mddev); |
1554 | } | 1585 | } |
@@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1787 | if (j == conf->copies) { | 1818 | if (j == conf->copies) { |
1788 | /* Cannot recover, so abort the recovery */ | 1819 | /* Cannot recover, so abort the recovery */ |
1789 | put_buf(r10_bio); | 1820 | put_buf(r10_bio); |
1821 | if (rb2) | ||
1822 | atomic_dec(&rb2->remaining); | ||
1790 | r10_bio = rb2; | 1823 | r10_bio = rb2; |
1791 | if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) | 1824 | if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) |
1792 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", | 1825 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", |