aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorStefan Richter <stefanr@s5r6.in-berlin.de>2006-09-17 12:17:19 -0400
committerStefan Richter <stefanr@s5r6.in-berlin.de>2006-09-17 12:19:31 -0400
commit9b4f2e9576658c4e52d95dc8d309f51b2e2db096 (patch)
tree7b1902b0f931783fccc6fee45c6f9c16b4fde5ce /drivers/md
parent3c6c65f5ed5a6d307bd607aecd06d658c0934d88 (diff)
parent803db244b9f71102e366fd689000c1417b9a7508 (diff)
ieee1394: merge from Linus
Conflicts: drivers/ieee1394/hosts.c Patch "lockdep: annotate ieee1394 skb-queue-head locking" was meddling with patch "ieee1394: fix kerneldoc of hpsb_alloc_host". Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-raid1.c4
-rw-r--r--drivers/md/linear.c6
-rw-r--r--drivers/md/md.c105
-rw-r--r--drivers/md/raid1.c67
-rw-r--r--drivers/md/raid10.c4
-rw-r--r--drivers/md/raid5.c84
7 files changed, 176 insertions, 97 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 217615b33223..93f701ea87bc 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -710,6 +710,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
710 return -EINVAL; 710 return -EINVAL;
711 } 711 }
712 712
713 m->ti = ti;
714
713 r = parse_features(&as, m, ti); 715 r = parse_features(&as, m, ti);
714 if (r) 716 if (r)
715 goto bad; 717 goto bad;
@@ -751,7 +753,6 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
751 } 753 }
752 754
753 ti->private = m; 755 ti->private = m;
754 m->ti = ti;
755 756
756 return 0; 757 return 0;
757 758
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index be48cedf986b..c54de989eb00 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -255,7 +255,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region)
255 struct region *reg, *nreg; 255 struct region *reg, *nreg;
256 256
257 read_unlock(&rh->hash_lock); 257 read_unlock(&rh->hash_lock);
258 nreg = mempool_alloc(rh->region_pool, GFP_NOIO); 258 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
259 if (unlikely(!nreg))
260 nreg = kmalloc(sizeof(struct region), GFP_NOIO);
259 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? 261 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
260 RH_CLEAN : RH_NOSYNC; 262 RH_CLEAN : RH_NOSYNC;
261 nreg->rh = rh; 263 nreg->rh = rh;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index ff83c9b5979e..b99c19c7eb22 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -162,7 +162,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
162 goto out; 162 goto out;
163 } 163 }
164 164
165 min_spacing = mddev->array_size; 165 min_spacing = conf->array_size;
166 sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); 166 sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));
167 167
168 /* min_spacing is the minimum spacing that will fit the hash 168 /* min_spacing is the minimum spacing that will fit the hash
@@ -171,7 +171,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
171 * that is larger than min_spacing as use the size of that as 171 * that is larger than min_spacing as use the size of that as
172 * the actual spacing 172 * the actual spacing
173 */ 173 */
174 conf->hash_spacing = mddev->array_size; 174 conf->hash_spacing = conf->array_size;
175 for (i=0; i < cnt-1 ; i++) { 175 for (i=0; i < cnt-1 ; i++) {
176 sector_t sz = 0; 176 sector_t sz = 0;
177 int j; 177 int j;
@@ -228,7 +228,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
228 curr_offset = 0; 228 curr_offset = 0;
229 i = 0; 229 i = 0;
230 for (curr_offset = 0; 230 for (curr_offset = 0;
231 curr_offset < mddev->array_size; 231 curr_offset < conf->array_size;
232 curr_offset += conf->hash_spacing) { 232 curr_offset += conf->hash_spacing) {
233 233
234 while (i < mddev->raid_disks-1 && 234 while (i < mddev->raid_disks-1 &&
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2fe32c261922..8dbab2ef3885 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -110,7 +110,7 @@ static ctl_table raid_table[] = {
110 .procname = "speed_limit_min", 110 .procname = "speed_limit_min",
111 .data = &sysctl_speed_limit_min, 111 .data = &sysctl_speed_limit_min,
112 .maxlen = sizeof(int), 112 .maxlen = sizeof(int),
113 .mode = 0644, 113 .mode = S_IRUGO|S_IWUSR,
114 .proc_handler = &proc_dointvec, 114 .proc_handler = &proc_dointvec,
115 }, 115 },
116 { 116 {
@@ -118,7 +118,7 @@ static ctl_table raid_table[] = {
118 .procname = "speed_limit_max", 118 .procname = "speed_limit_max",
119 .data = &sysctl_speed_limit_max, 119 .data = &sysctl_speed_limit_max,
120 .maxlen = sizeof(int), 120 .maxlen = sizeof(int),
121 .mode = 0644, 121 .mode = S_IRUGO|S_IWUSR,
122 .proc_handler = &proc_dointvec, 122 .proc_handler = &proc_dointvec,
123 }, 123 },
124 { .ctl_name = 0 } 124 { .ctl_name = 0 }
@@ -129,7 +129,7 @@ static ctl_table raid_dir_table[] = {
129 .ctl_name = DEV_RAID, 129 .ctl_name = DEV_RAID,
130 .procname = "raid", 130 .procname = "raid",
131 .maxlen = 0, 131 .maxlen = 0,
132 .mode = 0555, 132 .mode = S_IRUGO|S_IXUGO,
133 .child = raid_table, 133 .child = raid_table,
134 }, 134 },
135 { .ctl_name = 0 } 135 { .ctl_name = 0 }
@@ -1062,6 +1062,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1062 if (rdev->sb_size & bmask) 1062 if (rdev->sb_size & bmask)
1063 rdev-> sb_size = (rdev->sb_size | bmask)+1; 1063 rdev-> sb_size = (rdev->sb_size | bmask)+1;
1064 1064
1065 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1066 rdev->desc_nr = -1;
1067 else
1068 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1069
1065 if (refdev == 0) 1070 if (refdev == 0)
1066 ret = 1; 1071 ret = 1;
1067 else { 1072 else {
@@ -1171,7 +1176,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1171 } 1176 }
1172 if (mddev->level != LEVEL_MULTIPATH) { 1177 if (mddev->level != LEVEL_MULTIPATH) {
1173 int role; 1178 int role;
1174 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1175 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 1179 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1176 switch(role) { 1180 switch(role) {
1177 case 0xffff: /* spare */ 1181 case 0xffff: /* spare */
@@ -1404,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1404 struct block_device *bdev; 1408 struct block_device *bdev;
1405 char b[BDEVNAME_SIZE]; 1409 char b[BDEVNAME_SIZE];
1406 1410
1407 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1411 bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1408 if (IS_ERR(bdev)) { 1412 if (IS_ERR(bdev)) {
1409 printk(KERN_ERR "md: could not open %s.\n", 1413 printk(KERN_ERR "md: could not open %s.\n",
1410 __bdevname(dev, b)); 1414 __bdevname(dev, b));
@@ -1414,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1414 if (err) { 1418 if (err) {
1415 printk(KERN_ERR "md: could not bd_claim %s.\n", 1419 printk(KERN_ERR "md: could not bd_claim %s.\n",
1416 bdevname(bdev, b)); 1420 bdevname(bdev, b));
1417 blkdev_put(bdev); 1421 blkdev_put_partition(bdev);
1418 return err; 1422 return err;
1419 } 1423 }
1420 rdev->bdev = bdev; 1424 rdev->bdev = bdev;
@@ -1428,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1428 if (!bdev) 1432 if (!bdev)
1429 MD_BUG(); 1433 MD_BUG();
1430 bd_release(bdev); 1434 bd_release(bdev);
1431 blkdev_put(bdev); 1435 blkdev_put_partition(bdev);
1432} 1436}
1433 1437
1434void md_autodetect_dev(dev_t dev); 1438void md_autodetect_dev(dev_t dev);
@@ -1593,6 +1597,19 @@ void md_update_sb(mddev_t * mddev)
1593 1597
1594repeat: 1598repeat:
1595 spin_lock_irq(&mddev->write_lock); 1599 spin_lock_irq(&mddev->write_lock);
1600
1601 if (mddev->degraded && mddev->sb_dirty == 3)
1602 /* If the array is degraded, then skipping spares is both
1603 * dangerous and fairly pointless.
1604 * Dangerous because a device that was removed from the array
1605 * might have a event_count that still looks up-to-date,
1606 * so it can be re-added without a resync.
1607 * Pointless because if there are any spares to skip,
1608 * then a recovery will happen and soon that array won't
1609 * be degraded any more and the spare can go back to sleep then.
1610 */
1611 mddev->sb_dirty = 1;
1612
1596 sync_req = mddev->in_sync; 1613 sync_req = mddev->in_sync;
1597 mddev->utime = get_seconds(); 1614 mddev->utime = get_seconds();
1598 if (mddev->sb_dirty == 3) 1615 if (mddev->sb_dirty == 3)
@@ -1779,8 +1796,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1779 } 1796 }
1780 return err ? err : len; 1797 return err ? err : len;
1781} 1798}
1782static struct rdev_sysfs_entry 1799static struct rdev_sysfs_entry rdev_state =
1783rdev_state = __ATTR(state, 0644, state_show, state_store); 1800__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
1784 1801
1785static ssize_t 1802static ssize_t
1786super_show(mdk_rdev_t *rdev, char *page) 1803super_show(mdk_rdev_t *rdev, char *page)
@@ -1811,7 +1828,7 @@ errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1811 return -EINVAL; 1828 return -EINVAL;
1812} 1829}
1813static struct rdev_sysfs_entry rdev_errors = 1830static struct rdev_sysfs_entry rdev_errors =
1814__ATTR(errors, 0644, errors_show, errors_store); 1831__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
1815 1832
1816static ssize_t 1833static ssize_t
1817slot_show(mdk_rdev_t *rdev, char *page) 1834slot_show(mdk_rdev_t *rdev, char *page)
@@ -1845,7 +1862,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1845 1862
1846 1863
1847static struct rdev_sysfs_entry rdev_slot = 1864static struct rdev_sysfs_entry rdev_slot =
1848__ATTR(slot, 0644, slot_show, slot_store); 1865__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
1849 1866
1850static ssize_t 1867static ssize_t
1851offset_show(mdk_rdev_t *rdev, char *page) 1868offset_show(mdk_rdev_t *rdev, char *page)
@@ -1867,7 +1884,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1867} 1884}
1868 1885
1869static struct rdev_sysfs_entry rdev_offset = 1886static struct rdev_sysfs_entry rdev_offset =
1870__ATTR(offset, 0644, offset_show, offset_store); 1887__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
1871 1888
1872static ssize_t 1889static ssize_t
1873rdev_size_show(mdk_rdev_t *rdev, char *page) 1890rdev_size_show(mdk_rdev_t *rdev, char *page)
@@ -1891,7 +1908,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1891} 1908}
1892 1909
1893static struct rdev_sysfs_entry rdev_size = 1910static struct rdev_sysfs_entry rdev_size =
1894__ATTR(size, 0644, rdev_size_show, rdev_size_store); 1911__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
1895 1912
1896static struct attribute *rdev_default_attrs[] = { 1913static struct attribute *rdev_default_attrs[] = {
1897 &rdev_state.attr, 1914 &rdev_state.attr,
@@ -1922,6 +1939,8 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
1922 1939
1923 if (!entry->store) 1940 if (!entry->store)
1924 return -EIO; 1941 return -EIO;
1942 if (!capable(CAP_SYS_ADMIN))
1943 return -EACCES;
1925 return entry->store(rdev, page, length); 1944 return entry->store(rdev, page, length);
1926} 1945}
1927 1946
@@ -2128,7 +2147,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2128 return len; 2147 return len;
2129} 2148}
2130static struct md_sysfs_entry md_safe_delay = 2149static struct md_sysfs_entry md_safe_delay =
2131__ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store); 2150__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2132 2151
2133static ssize_t 2152static ssize_t
2134level_show(mddev_t *mddev, char *page) 2153level_show(mddev_t *mddev, char *page)
@@ -2163,7 +2182,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2163} 2182}
2164 2183
2165static struct md_sysfs_entry md_level = 2184static struct md_sysfs_entry md_level =
2166__ATTR(level, 0644, level_show, level_store); 2185__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2167 2186
2168 2187
2169static ssize_t 2188static ssize_t
@@ -2188,7 +2207,7 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
2188 return len; 2207 return len;
2189} 2208}
2190static struct md_sysfs_entry md_layout = 2209static struct md_sysfs_entry md_layout =
2191__ATTR(layout, 0655, layout_show, layout_store); 2210__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2192 2211
2193 2212
2194static ssize_t 2213static ssize_t
@@ -2219,7 +2238,7 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2219 return rv ? rv : len; 2238 return rv ? rv : len;
2220} 2239}
2221static struct md_sysfs_entry md_raid_disks = 2240static struct md_sysfs_entry md_raid_disks =
2222__ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); 2241__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2223 2242
2224static ssize_t 2243static ssize_t
2225chunk_size_show(mddev_t *mddev, char *page) 2244chunk_size_show(mddev_t *mddev, char *page)
@@ -2243,7 +2262,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2243 return len; 2262 return len;
2244} 2263}
2245static struct md_sysfs_entry md_chunk_size = 2264static struct md_sysfs_entry md_chunk_size =
2246__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); 2265__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2247 2266
2248static ssize_t 2267static ssize_t
2249resync_start_show(mddev_t *mddev, char *page) 2268resync_start_show(mddev_t *mddev, char *page)
@@ -2267,7 +2286,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2267 return len; 2286 return len;
2268} 2287}
2269static struct md_sysfs_entry md_resync_start = 2288static struct md_sysfs_entry md_resync_start =
2270__ATTR(resync_start, 0644, resync_start_show, resync_start_store); 2289__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2271 2290
2272/* 2291/*
2273 * The array state can be: 2292 * The array state can be:
@@ -2437,7 +2456,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2437 else 2456 else
2438 return len; 2457 return len;
2439} 2458}
2440static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); 2459static struct md_sysfs_entry md_array_state =
2460__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
2441 2461
2442static ssize_t 2462static ssize_t
2443null_show(mddev_t *mddev, char *page) 2463null_show(mddev_t *mddev, char *page)
@@ -2497,7 +2517,7 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2497} 2517}
2498 2518
2499static struct md_sysfs_entry md_new_device = 2519static struct md_sysfs_entry md_new_device =
2500__ATTR(new_dev, 0200, null_show, new_dev_store); 2520__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
2501 2521
2502static ssize_t 2522static ssize_t
2503size_show(mddev_t *mddev, char *page) 2523size_show(mddev_t *mddev, char *page)
@@ -2535,7 +2555,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
2535} 2555}
2536 2556
2537static struct md_sysfs_entry md_size = 2557static struct md_sysfs_entry md_size =
2538__ATTR(component_size, 0644, size_show, size_store); 2558__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2539 2559
2540 2560
2541/* Metdata version. 2561/* Metdata version.
@@ -2583,7 +2603,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2583} 2603}
2584 2604
2585static struct md_sysfs_entry md_metadata = 2605static struct md_sysfs_entry md_metadata =
2586__ATTR(metadata_version, 0644, metadata_show, metadata_store); 2606__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2587 2607
2588static ssize_t 2608static ssize_t
2589action_show(mddev_t *mddev, char *page) 2609action_show(mddev_t *mddev, char *page)
@@ -2651,12 +2671,11 @@ mismatch_cnt_show(mddev_t *mddev, char *page)
2651 (unsigned long long) mddev->resync_mismatches); 2671 (unsigned long long) mddev->resync_mismatches);
2652} 2672}
2653 2673
2654static struct md_sysfs_entry 2674static struct md_sysfs_entry md_scan_mode =
2655md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); 2675__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
2656 2676
2657 2677
2658static struct md_sysfs_entry 2678static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
2659md_mismatches = __ATTR_RO(mismatch_cnt);
2660 2679
2661static ssize_t 2680static ssize_t
2662sync_min_show(mddev_t *mddev, char *page) 2681sync_min_show(mddev_t *mddev, char *page)
@@ -2715,15 +2734,14 @@ static ssize_t
2715sync_speed_show(mddev_t *mddev, char *page) 2734sync_speed_show(mddev_t *mddev, char *page)
2716{ 2735{
2717 unsigned long resync, dt, db; 2736 unsigned long resync, dt, db;
2718 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); 2737 resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active));
2719 dt = ((jiffies - mddev->resync_mark) / HZ); 2738 dt = ((jiffies - mddev->resync_mark) / HZ);
2720 if (!dt) dt++; 2739 if (!dt) dt++;
2721 db = resync - (mddev->resync_mark_cnt); 2740 db = resync - (mddev->resync_mark_cnt);
2722 return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ 2741 return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
2723} 2742}
2724 2743
2725static struct md_sysfs_entry 2744static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
2726md_sync_speed = __ATTR_RO(sync_speed);
2727 2745
2728static ssize_t 2746static ssize_t
2729sync_completed_show(mddev_t *mddev, char *page) 2747sync_completed_show(mddev_t *mddev, char *page)
@@ -2739,8 +2757,7 @@ sync_completed_show(mddev_t *mddev, char *page)
2739 return sprintf(page, "%lu / %lu\n", resync, max_blocks); 2757 return sprintf(page, "%lu / %lu\n", resync, max_blocks);
2740} 2758}
2741 2759
2742static struct md_sysfs_entry 2760static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
2743md_sync_completed = __ATTR_RO(sync_completed);
2744 2761
2745static ssize_t 2762static ssize_t
2746suspend_lo_show(mddev_t *mddev, char *page) 2763suspend_lo_show(mddev_t *mddev, char *page)
@@ -2857,6 +2874,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
2857 2874
2858 if (!entry->store) 2875 if (!entry->store)
2859 return -EIO; 2876 return -EIO;
2877 if (!capable(CAP_SYS_ADMIN))
2878 return -EACCES;
2860 rv = mddev_lock(mddev); 2879 rv = mddev_lock(mddev);
2861 if (!rv) { 2880 if (!rv) {
2862 rv = entry->store(mddev, page, length); 2881 rv = entry->store(mddev, page, length);
@@ -3091,7 +3110,6 @@ static int do_md_run(mddev_t * mddev)
3091 } 3110 }
3092 3111
3093 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3112 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3094 md_wakeup_thread(mddev->thread);
3095 3113
3096 if (mddev->sb_dirty) 3114 if (mddev->sb_dirty)
3097 md_update_sb(mddev); 3115 md_update_sb(mddev);
@@ -3112,7 +3130,7 @@ static int do_md_run(mddev_t * mddev)
3112 * start recovery here. If we leave it to md_check_recovery, 3130 * start recovery here. If we leave it to md_check_recovery,
3113 * it will remove the drives and not do the right thing 3131 * it will remove the drives and not do the right thing
3114 */ 3132 */
3115 if (mddev->degraded) { 3133 if (mddev->degraded && !mddev->sync_thread) {
3116 struct list_head *rtmp; 3134 struct list_head *rtmp;
3117 int spares = 0; 3135 int spares = 0;
3118 ITERATE_RDEV(mddev,rdev,rtmp) 3136 ITERATE_RDEV(mddev,rdev,rtmp)
@@ -3133,10 +3151,11 @@ static int do_md_run(mddev_t * mddev)
3133 mdname(mddev)); 3151 mdname(mddev));
3134 /* leave the spares where they are, it shouldn't hurt */ 3152 /* leave the spares where they are, it shouldn't hurt */
3135 mddev->recovery = 0; 3153 mddev->recovery = 0;
3136 } else 3154 }
3137 md_wakeup_thread(mddev->sync_thread);
3138 } 3155 }
3139 } 3156 }
3157 md_wakeup_thread(mddev->thread);
3158 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
3140 3159
3141 mddev->changed = 1; 3160 mddev->changed = 1;
3142 md_new_event(mddev); 3161 md_new_event(mddev);
@@ -4586,6 +4605,8 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
4586 __builtin_return_address(0),__builtin_return_address(1), 4605 __builtin_return_address(0),__builtin_return_address(1),
4587 __builtin_return_address(2),__builtin_return_address(3)); 4606 __builtin_return_address(2),__builtin_return_address(3));
4588*/ 4607*/
4608 if (!mddev->pers)
4609 return;
4589 if (!mddev->pers->error_handler) 4610 if (!mddev->pers->error_handler)
4590 return; 4611 return;
4591 mddev->pers->error_handler(mddev,rdev); 4612 mddev->pers->error_handler(mddev,rdev);
@@ -4683,12 +4704,13 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
4683 */ 4704 */
4684 dt = ((jiffies - mddev->resync_mark) / HZ); 4705 dt = ((jiffies - mddev->resync_mark) / HZ);
4685 if (!dt) dt++; 4706 if (!dt) dt++;
4686 db = resync - (mddev->resync_mark_cnt/2); 4707 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
4687 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; 4708 - mddev->resync_mark_cnt;
4709 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
4688 4710
4689 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); 4711 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
4690 4712
4691 seq_printf(seq, " speed=%ldK/sec", db/dt); 4713 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
4692} 4714}
4693 4715
4694static void *md_seq_start(struct seq_file *seq, loff_t *pos) 4716static void *md_seq_start(struct seq_file *seq, loff_t *pos)
@@ -5199,6 +5221,7 @@ void md_do_sync(mddev_t *mddev)
5199 5221
5200 j += sectors; 5222 j += sectors;
5201 if (j>1) mddev->curr_resync = j; 5223 if (j>1) mddev->curr_resync = j;
5224 mddev->curr_mark_cnt = io_sectors;
5202 if (last_check == 0) 5225 if (last_check == 0)
5203 /* this is the earliers that rebuilt will be 5226 /* this is the earliers that rebuilt will be
5204 * visible in /proc/mdstat 5227 * visible in /proc/mdstat
@@ -5645,8 +5668,8 @@ static int set_ro(const char *val, struct kernel_param *kp)
5645 return -EINVAL; 5668 return -EINVAL;
5646} 5669}
5647 5670
5648module_param_call(start_ro, set_ro, get_ro, NULL, 0600); 5671module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
5649module_param(start_dirty_degraded, int, 0644); 5672module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
5650 5673
5651 5674
5652EXPORT_SYMBOL(register_md_personality); 5675EXPORT_SYMBOL(register_md_personality);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cead918578a7..3b4d69c05623 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -930,10 +930,13 @@ static void status(struct seq_file *seq, mddev_t *mddev)
930 930
931 seq_printf(seq, " [%d/%d] [", conf->raid_disks, 931 seq_printf(seq, " [%d/%d] [", conf->raid_disks,
932 conf->working_disks); 932 conf->working_disks);
933 for (i = 0; i < conf->raid_disks; i++) 933 rcu_read_lock();
934 for (i = 0; i < conf->raid_disks; i++) {
935 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
934 seq_printf(seq, "%s", 936 seq_printf(seq, "%s",
935 conf->mirrors[i].rdev && 937 rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
936 test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); 938 }
939 rcu_read_unlock();
937 seq_printf(seq, "]"); 940 seq_printf(seq, "]");
938} 941}
939 942
@@ -975,7 +978,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
975static void print_conf(conf_t *conf) 978static void print_conf(conf_t *conf)
976{ 979{
977 int i; 980 int i;
978 mirror_info_t *tmp;
979 981
980 printk("RAID1 conf printout:\n"); 982 printk("RAID1 conf printout:\n");
981 if (!conf) { 983 if (!conf) {
@@ -985,14 +987,17 @@ static void print_conf(conf_t *conf)
985 printk(" --- wd:%d rd:%d\n", conf->working_disks, 987 printk(" --- wd:%d rd:%d\n", conf->working_disks,
986 conf->raid_disks); 988 conf->raid_disks);
987 989
990 rcu_read_lock();
988 for (i = 0; i < conf->raid_disks; i++) { 991 for (i = 0; i < conf->raid_disks; i++) {
989 char b[BDEVNAME_SIZE]; 992 char b[BDEVNAME_SIZE];
990 tmp = conf->mirrors + i; 993 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
991 if (tmp->rdev) 994 if (rdev)
992 printk(" disk %d, wo:%d, o:%d, dev:%s\n", 995 printk(" disk %d, wo:%d, o:%d, dev:%s\n",
993 i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags), 996 i, !test_bit(In_sync, &rdev->flags),
994 bdevname(tmp->rdev->bdev,b)); 997 !test_bit(Faulty, &rdev->flags),
998 bdevname(rdev->bdev,b));
995 } 999 }
1000 rcu_read_unlock();
996} 1001}
997 1002
998static void close_sync(conf_t *conf) 1003static void close_sync(conf_t *conf)
@@ -1008,20 +1013,20 @@ static int raid1_spare_active(mddev_t *mddev)
1008{ 1013{
1009 int i; 1014 int i;
1010 conf_t *conf = mddev->private; 1015 conf_t *conf = mddev->private;
1011 mirror_info_t *tmp;
1012 1016
1013 /* 1017 /*
1014 * Find all failed disks within the RAID1 configuration 1018 * Find all failed disks within the RAID1 configuration
1015 * and mark them readable 1019 * and mark them readable.
1020 * Called under mddev lock, so rcu protection not needed.
1016 */ 1021 */
1017 for (i = 0; i < conf->raid_disks; i++) { 1022 for (i = 0; i < conf->raid_disks; i++) {
1018 tmp = conf->mirrors + i; 1023 mdk_rdev_t *rdev = conf->mirrors[i].rdev;
1019 if (tmp->rdev 1024 if (rdev
1020 && !test_bit(Faulty, &tmp->rdev->flags) 1025 && !test_bit(Faulty, &rdev->flags)
1021 && !test_bit(In_sync, &tmp->rdev->flags)) { 1026 && !test_bit(In_sync, &rdev->flags)) {
1022 conf->working_disks++; 1027 conf->working_disks++;
1023 mddev->degraded--; 1028 mddev->degraded--;
1024 set_bit(In_sync, &tmp->rdev->flags); 1029 set_bit(In_sync, &rdev->flags);
1025 } 1030 }
1026 } 1031 }
1027 1032
@@ -1145,7 +1150,7 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
1145 long sectors_to_go = r1_bio->sectors; 1150 long sectors_to_go = r1_bio->sectors;
1146 /* make sure these bits doesn't get cleared. */ 1151 /* make sure these bits doesn't get cleared. */
1147 do { 1152 do {
1148 bitmap_end_sync(mddev->bitmap, r1_bio->sector, 1153 bitmap_end_sync(mddev->bitmap, s,
1149 &sync_blocks, 1); 1154 &sync_blocks, 1);
1150 s += sync_blocks; 1155 s += sync_blocks;
1151 sectors_to_go -= sync_blocks; 1156 sectors_to_go -= sync_blocks;
@@ -1237,7 +1242,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1237 /* ouch - failed to read all of that. 1242 /* ouch - failed to read all of that.
1238 * Try some synchronous reads of other devices to get 1243 * Try some synchronous reads of other devices to get
1239 * good data, much like with normal read errors. Only 1244 * good data, much like with normal read errors. Only
1240 * read into the pages we already have so they we don't 1245 * read into the pages we already have so we don't
1241 * need to re-issue the read request. 1246 * need to re-issue the read request.
1242 * We don't need to freeze the array, because being in an 1247 * We don't need to freeze the array, because being in an
1243 * active sync request, there is no normal IO, and 1248 * active sync request, there is no normal IO, and
@@ -1257,6 +1262,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1257 s = PAGE_SIZE >> 9; 1262 s = PAGE_SIZE >> 9;
1258 do { 1263 do {
1259 if (r1_bio->bios[d]->bi_end_io == end_sync_read) { 1264 if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
1265 /* No rcu protection needed here devices
1266 * can only be removed when no resync is
1267 * active, and resync is currently active
1268 */
1260 rdev = conf->mirrors[d].rdev; 1269 rdev = conf->mirrors[d].rdev;
1261 if (sync_page_io(rdev->bdev, 1270 if (sync_page_io(rdev->bdev,
1262 sect + rdev->data_offset, 1271 sect + rdev->data_offset,
@@ -1463,6 +1472,11 @@ static void raid1d(mddev_t *mddev)
1463 s = PAGE_SIZE >> 9; 1472 s = PAGE_SIZE >> 9;
1464 1473
1465 do { 1474 do {
1475 /* Note: no rcu protection needed here
1476 * as this is synchronous in the raid1d thread
1477 * which is the thread that might remove
1478 * a device. If raid1d ever becomes multi-threaded....
1479 */
1466 rdev = conf->mirrors[d].rdev; 1480 rdev = conf->mirrors[d].rdev;
1467 if (rdev && 1481 if (rdev &&
1468 test_bit(In_sync, &rdev->flags) && 1482 test_bit(In_sync, &rdev->flags) &&
@@ -1486,7 +1500,6 @@ static void raid1d(mddev_t *mddev)
1486 d = conf->raid_disks; 1500 d = conf->raid_disks;
1487 d--; 1501 d--;
1488 rdev = conf->mirrors[d].rdev; 1502 rdev = conf->mirrors[d].rdev;
1489 atomic_add(s, &rdev->corrected_errors);
1490 if (rdev && 1503 if (rdev &&
1491 test_bit(In_sync, &rdev->flags)) { 1504 test_bit(In_sync, &rdev->flags)) {
1492 if (sync_page_io(rdev->bdev, 1505 if (sync_page_io(rdev->bdev,
@@ -1509,6 +1522,11 @@ static void raid1d(mddev_t *mddev)
1509 s<<9, conf->tmppage, READ) == 0) 1522 s<<9, conf->tmppage, READ) == 0)
1510 /* Well, this device is dead */ 1523 /* Well, this device is dead */
1511 md_error(mddev, rdev); 1524 md_error(mddev, rdev);
1525 else {
1526 atomic_add(s, &rdev->corrected_errors);
1527 printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n",
1528 mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b));
1529 }
1512 } 1530 }
1513 } 1531 }
1514 } else { 1532 } else {
@@ -1622,15 +1640,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1622 return 0; 1640 return 0;
1623 } 1641 }
1624 1642
1625 /* before building a request, check if we can skip these blocks..
1626 * This call the bitmap_start_sync doesn't actually record anything
1627 */
1628 if (mddev->bitmap == NULL && 1643 if (mddev->bitmap == NULL &&
1629 mddev->recovery_cp == MaxSector && 1644 mddev->recovery_cp == MaxSector &&
1645 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
1630 conf->fullsync == 0) { 1646 conf->fullsync == 0) {
1631 *skipped = 1; 1647 *skipped = 1;
1632 return max_sector - sector_nr; 1648 return max_sector - sector_nr;
1633 } 1649 }
1650 /* before building a request, check if we can skip these blocks..
1651 * This call the bitmap_start_sync doesn't actually record anything
1652 */
1634 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && 1653 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
1635 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1654 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1636 /* We can skip this block, and probably several more */ 1655 /* We can skip this block, and probably several more */
@@ -1783,19 +1802,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1783 for (i=0; i<conf->raid_disks; i++) { 1802 for (i=0; i<conf->raid_disks; i++) {
1784 bio = r1_bio->bios[i]; 1803 bio = r1_bio->bios[i];
1785 if (bio->bi_end_io == end_sync_read) { 1804 if (bio->bi_end_io == end_sync_read) {
1786 md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors); 1805 md_sync_acct(bio->bi_bdev, nr_sectors);
1787 generic_make_request(bio); 1806 generic_make_request(bio);
1788 } 1807 }
1789 } 1808 }
1790 } else { 1809 } else {
1791 atomic_set(&r1_bio->remaining, 1); 1810 atomic_set(&r1_bio->remaining, 1);
1792 bio = r1_bio->bios[r1_bio->read_disk]; 1811 bio = r1_bio->bios[r1_bio->read_disk];
1793 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, 1812 md_sync_acct(bio->bi_bdev, nr_sectors);
1794 nr_sectors);
1795 generic_make_request(bio); 1813 generic_make_request(bio);
1796 1814
1797 } 1815 }
1798
1799 return nr_sectors; 1816 return nr_sectors;
1800} 1817}
1801 1818
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7f636283a1ba..016ddb831c9b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1492,6 +1492,10 @@ static void raid10d(mddev_t *mddev)
1492 s<<9, conf->tmppage, READ) == 0) 1492 s<<9, conf->tmppage, READ) == 0)
1493 /* Well, this device is dead */ 1493 /* Well, this device is dead */
1494 md_error(mddev, rdev); 1494 md_error(mddev, rdev);
1495 else
1496 printk(KERN_INFO "raid10:%s: read error corrected (%d sectors at %llu on %s)\n",
1497 mdname(mddev), s, (unsigned long long)(sect+rdev->data_offset), bdevname(rdev->bdev, b));
1498
1495 rdev_dec_pending(rdev, mddev); 1499 rdev_dec_pending(rdev, mddev);
1496 rcu_read_lock(); 1500 rcu_read_lock();
1497 } 1501 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7433871f4b3a..450066007160 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -18,6 +18,30 @@
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21/*
22 * BITMAP UNPLUGGING:
23 *
24 * The sequencing for updating the bitmap reliably is a little
25 * subtle (and I got it wrong the first time) so it deserves some
26 * explanation.
27 *
28 * We group bitmap updates into batches. Each batch has a number.
29 * We may write out several batches at once, but that isn't very important.
30 * conf->bm_write is the number of the last batch successfully written.
31 * conf->bm_flush is the number of the last batch that was closed to
32 * new additions.
33 * When we discover that we will need to write to any block in a stripe
34 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
35 * the number of the batch it will be in. This is bm_flush+1.
36 * When we are ready to do a write, if that batch hasn't been written yet,
37 * we plug the array and queue the stripe for later.
38 * When an unplug happens, we increment bm_flush, thus closing the current
39 * batch.
40 * When we notice that bm_flush > bm_write, we write out all pending updates
41 * to the bitmap, and advance bm_write to where bm_flush was.
42 * This may occasionally write a bit out twice, but is sure never to
43 * miss any bits.
44 */
21 45
22#include <linux/module.h> 46#include <linux/module.h>
23#include <linux/slab.h> 47#include <linux/slab.h>
@@ -88,12 +112,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
88 BUG_ON(!list_empty(&sh->lru)); 112 BUG_ON(!list_empty(&sh->lru));
89 BUG_ON(atomic_read(&conf->active_stripes)==0); 113 BUG_ON(atomic_read(&conf->active_stripes)==0);
90 if (test_bit(STRIPE_HANDLE, &sh->state)) { 114 if (test_bit(STRIPE_HANDLE, &sh->state)) {
91 if (test_bit(STRIPE_DELAYED, &sh->state)) 115 if (test_bit(STRIPE_DELAYED, &sh->state)) {
92 list_add_tail(&sh->lru, &conf->delayed_list); 116 list_add_tail(&sh->lru, &conf->delayed_list);
93 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 117 blk_plug_device(conf->mddev->queue);
94 conf->seq_write == sh->bm_seq) 118 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
119 sh->bm_seq - conf->seq_write > 0) {
95 list_add_tail(&sh->lru, &conf->bitmap_list); 120 list_add_tail(&sh->lru, &conf->bitmap_list);
96 else { 121 blk_plug_device(conf->mddev->queue);
122 } else {
97 clear_bit(STRIPE_BIT_DELAY, &sh->state); 123 clear_bit(STRIPE_BIT_DELAY, &sh->state);
98 list_add_tail(&sh->lru, &conf->handle_list); 124 list_add_tail(&sh->lru, &conf->handle_list);
99 } 125 }
@@ -270,7 +296,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
270 < (conf->max_nr_stripes *3/4) 296 < (conf->max_nr_stripes *3/4)
271 || !conf->inactive_blocked), 297 || !conf->inactive_blocked),
272 conf->device_lock, 298 conf->device_lock,
273 unplug_slaves(conf->mddev) 299 raid5_unplug_device(conf->mddev->queue)
274 ); 300 );
275 conf->inactive_blocked = 0; 301 conf->inactive_blocked = 0;
276 } else 302 } else
@@ -281,7 +307,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
281 } else { 307 } else {
282 if (!test_bit(STRIPE_HANDLE, &sh->state)) 308 if (!test_bit(STRIPE_HANDLE, &sh->state))
283 atomic_inc(&conf->active_stripes); 309 atomic_inc(&conf->active_stripes);
284 if (list_empty(&sh->lru)) 310 if (list_empty(&sh->lru) &&
311 !test_bit(STRIPE_EXPANDING, &sh->state))
285 BUG(); 312 BUG();
286 list_del_init(&sh->lru); 313 list_del_init(&sh->lru);
287 } 314 }
@@ -496,6 +523,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
496 raid5_conf_t *conf = sh->raid_conf; 523 raid5_conf_t *conf = sh->raid_conf;
497 int disks = sh->disks, i; 524 int disks = sh->disks, i;
498 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 525 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
526 char b[BDEVNAME_SIZE];
527 mdk_rdev_t *rdev;
499 528
500 if (bi->bi_size) 529 if (bi->bi_size)
501 return 1; 530 return 1;
@@ -543,25 +572,39 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
543 set_bit(R5_UPTODATE, &sh->dev[i].flags); 572 set_bit(R5_UPTODATE, &sh->dev[i].flags);
544#endif 573#endif
545 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 574 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
546 printk(KERN_INFO "raid5: read error corrected!!\n"); 575 rdev = conf->disks[i].rdev;
576 printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
577 mdname(conf->mddev), STRIPE_SECTORS,
578 (unsigned long long)sh->sector + rdev->data_offset,
579 bdevname(rdev->bdev, b));
547 clear_bit(R5_ReadError, &sh->dev[i].flags); 580 clear_bit(R5_ReadError, &sh->dev[i].flags);
548 clear_bit(R5_ReWrite, &sh->dev[i].flags); 581 clear_bit(R5_ReWrite, &sh->dev[i].flags);
549 } 582 }
550 if (atomic_read(&conf->disks[i].rdev->read_errors)) 583 if (atomic_read(&conf->disks[i].rdev->read_errors))
551 atomic_set(&conf->disks[i].rdev->read_errors, 0); 584 atomic_set(&conf->disks[i].rdev->read_errors, 0);
552 } else { 585 } else {
586 const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);
553 int retry = 0; 587 int retry = 0;
588 rdev = conf->disks[i].rdev;
589
554 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 590 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
555 atomic_inc(&conf->disks[i].rdev->read_errors); 591 atomic_inc(&rdev->read_errors);
556 if (conf->mddev->degraded) 592 if (conf->mddev->degraded)
557 printk(KERN_WARNING "raid5: read error not correctable.\n"); 593 printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
594 mdname(conf->mddev),
595 (unsigned long long)sh->sector + rdev->data_offset,
596 bdn);
558 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 597 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
559 /* Oh, no!!! */ 598 /* Oh, no!!! */
560 printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); 599 printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
561 else if (atomic_read(&conf->disks[i].rdev->read_errors) 600 mdname(conf->mddev),
601 (unsigned long long)sh->sector + rdev->data_offset,
602 bdn);
603 else if (atomic_read(&rdev->read_errors)
562 > conf->max_nr_stripes) 604 > conf->max_nr_stripes)
563 printk(KERN_WARNING 605 printk(KERN_WARNING
564 "raid5: Too many read errors, failing device.\n"); 606 "raid5:%s: Too many read errors, failing device %s.\n",
607 mdname(conf->mddev), bdn);
565 else 608 else
566 retry = 1; 609 retry = 1;
567 if (retry) 610 if (retry)
@@ -569,7 +612,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
569 else { 612 else {
570 clear_bit(R5_ReadError, &sh->dev[i].flags); 613 clear_bit(R5_ReadError, &sh->dev[i].flags);
571 clear_bit(R5_ReWrite, &sh->dev[i].flags); 614 clear_bit(R5_ReWrite, &sh->dev[i].flags);
572 md_error(conf->mddev, conf->disks[i].rdev); 615 md_error(conf->mddev, rdev);
573 } 616 }
574 } 617 }
575 rdev_dec_pending(conf->disks[i].rdev, conf->mddev); 618 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
@@ -1270,9 +1313,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1270 (unsigned long long)sh->sector, dd_idx); 1313 (unsigned long long)sh->sector, dd_idx);
1271 1314
1272 if (conf->mddev->bitmap && firstwrite) { 1315 if (conf->mddev->bitmap && firstwrite) {
1273 sh->bm_seq = conf->seq_write;
1274 bitmap_startwrite(conf->mddev->bitmap, sh->sector, 1316 bitmap_startwrite(conf->mddev->bitmap, sh->sector,
1275 STRIPE_SECTORS, 0); 1317 STRIPE_SECTORS, 0);
1318 sh->bm_seq = conf->seq_flush+1;
1276 set_bit(STRIPE_BIT_DELAY, &sh->state); 1319 set_bit(STRIPE_BIT_DELAY, &sh->state);
1277 } 1320 }
1278 1321
@@ -2554,13 +2597,6 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk,
2554 return ret; 2597 return ret;
2555} 2598}
2556 2599
2557static inline void raid5_plug_device(raid5_conf_t *conf)
2558{
2559 spin_lock_irq(&conf->device_lock);
2560 blk_plug_device(conf->mddev->queue);
2561 spin_unlock_irq(&conf->device_lock);
2562}
2563
2564static int make_request(request_queue_t *q, struct bio * bi) 2600static int make_request(request_queue_t *q, struct bio * bi)
2565{ 2601{
2566 mddev_t *mddev = q->queuedata; 2602 mddev_t *mddev = q->queuedata;
@@ -2670,7 +2706,6 @@ static int make_request(request_queue_t *q, struct bio * bi)
2670 goto retry; 2706 goto retry;
2671 } 2707 }
2672 finish_wait(&conf->wait_for_overlap, &w); 2708 finish_wait(&conf->wait_for_overlap, &w);
2673 raid5_plug_device(conf);
2674 handle_stripe(sh, NULL); 2709 handle_stripe(sh, NULL);
2675 release_stripe(sh); 2710 release_stripe(sh);
2676 } else { 2711 } else {
@@ -2923,7 +2958,7 @@ static void raid5d (mddev_t *mddev)
2923 while (1) { 2958 while (1) {
2924 struct list_head *first; 2959 struct list_head *first;
2925 2960
2926 if (conf->seq_flush - conf->seq_write > 0) { 2961 if (conf->seq_flush != conf->seq_write) {
2927 int seq = conf->seq_flush; 2962 int seq = conf->seq_flush;
2928 spin_unlock_irq(&conf->device_lock); 2963 spin_unlock_irq(&conf->device_lock);
2929 bitmap_unplug(mddev->bitmap); 2964 bitmap_unplug(mddev->bitmap);
@@ -3246,9 +3281,6 @@ static int run(mddev_t *mddev)
3246 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 3281 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3247 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 3282 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
3248 "%s_reshape"); 3283 "%s_reshape");
3249 /* FIXME if md_register_thread fails?? */
3250 md_wakeup_thread(mddev->sync_thread);
3251
3252 } 3284 }
3253 3285
3254 /* read-ahead size must cover two whole stripes, which is 3286 /* read-ahead size must cover two whole stripes, which is