diff options
author | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2006-09-17 12:17:19 -0400 |
---|---|---|
committer | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2006-09-17 12:19:31 -0400 |
commit | 9b4f2e9576658c4e52d95dc8d309f51b2e2db096 (patch) | |
tree | 7b1902b0f931783fccc6fee45c6f9c16b4fde5ce /drivers/md | |
parent | 3c6c65f5ed5a6d307bd607aecd06d658c0934d88 (diff) | |
parent | 803db244b9f71102e366fd689000c1417b9a7508 (diff) |
ieee1394: merge from Linus
Conflicts: drivers/ieee1394/hosts.c
Patch "lockdep: annotate ieee1394 skb-queue-head locking" was meddling
with patch "ieee1394: fix kerneldoc of hpsb_alloc_host".
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-mpath.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/linear.c | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 105 | ||||
-rw-r--r-- | drivers/md/raid1.c | 67 | ||||
-rw-r--r-- | drivers/md/raid10.c | 4 | ||||
-rw-r--r-- | drivers/md/raid5.c | 84 |
7 files changed, 176 insertions, 97 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 217615b33223..93f701ea87bc 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -710,6 +710,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, | |||
710 | return -EINVAL; | 710 | return -EINVAL; |
711 | } | 711 | } |
712 | 712 | ||
713 | m->ti = ti; | ||
714 | |||
713 | r = parse_features(&as, m, ti); | 715 | r = parse_features(&as, m, ti); |
714 | if (r) | 716 | if (r) |
715 | goto bad; | 717 | goto bad; |
@@ -751,7 +753,6 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, | |||
751 | } | 753 | } |
752 | 754 | ||
753 | ti->private = m; | 755 | ti->private = m; |
754 | m->ti = ti; | ||
755 | 756 | ||
756 | return 0; | 757 | return 0; |
757 | 758 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index be48cedf986b..c54de989eb00 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -255,7 +255,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region) | |||
255 | struct region *reg, *nreg; | 255 | struct region *reg, *nreg; |
256 | 256 | ||
257 | read_unlock(&rh->hash_lock); | 257 | read_unlock(&rh->hash_lock); |
258 | nreg = mempool_alloc(rh->region_pool, GFP_NOIO); | 258 | nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); |
259 | if (unlikely(!nreg)) | ||
260 | nreg = kmalloc(sizeof(struct region), GFP_NOIO); | ||
259 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | 261 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? |
260 | RH_CLEAN : RH_NOSYNC; | 262 | RH_CLEAN : RH_NOSYNC; |
261 | nreg->rh = rh; | 263 | nreg->rh = rh; |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ff83c9b5979e..b99c19c7eb22 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -162,7 +162,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
162 | goto out; | 162 | goto out; |
163 | } | 163 | } |
164 | 164 | ||
165 | min_spacing = mddev->array_size; | 165 | min_spacing = conf->array_size; |
166 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); | 166 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); |
167 | 167 | ||
168 | /* min_spacing is the minimum spacing that will fit the hash | 168 | /* min_spacing is the minimum spacing that will fit the hash |
@@ -171,7 +171,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
171 | * that is larger than min_spacing as use the size of that as | 171 | * that is larger than min_spacing as use the size of that as |
172 | * the actual spacing | 172 | * the actual spacing |
173 | */ | 173 | */ |
174 | conf->hash_spacing = mddev->array_size; | 174 | conf->hash_spacing = conf->array_size; |
175 | for (i=0; i < cnt-1 ; i++) { | 175 | for (i=0; i < cnt-1 ; i++) { |
176 | sector_t sz = 0; | 176 | sector_t sz = 0; |
177 | int j; | 177 | int j; |
@@ -228,7 +228,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
228 | curr_offset = 0; | 228 | curr_offset = 0; |
229 | i = 0; | 229 | i = 0; |
230 | for (curr_offset = 0; | 230 | for (curr_offset = 0; |
231 | curr_offset < mddev->array_size; | 231 | curr_offset < conf->array_size; |
232 | curr_offset += conf->hash_spacing) { | 232 | curr_offset += conf->hash_spacing) { |
233 | 233 | ||
234 | while (i < mddev->raid_disks-1 && | 234 | while (i < mddev->raid_disks-1 && |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2fe32c261922..8dbab2ef3885 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -110,7 +110,7 @@ static ctl_table raid_table[] = { | |||
110 | .procname = "speed_limit_min", | 110 | .procname = "speed_limit_min", |
111 | .data = &sysctl_speed_limit_min, | 111 | .data = &sysctl_speed_limit_min, |
112 | .maxlen = sizeof(int), | 112 | .maxlen = sizeof(int), |
113 | .mode = 0644, | 113 | .mode = S_IRUGO|S_IWUSR, |
114 | .proc_handler = &proc_dointvec, | 114 | .proc_handler = &proc_dointvec, |
115 | }, | 115 | }, |
116 | { | 116 | { |
@@ -118,7 +118,7 @@ static ctl_table raid_table[] = { | |||
118 | .procname = "speed_limit_max", | 118 | .procname = "speed_limit_max", |
119 | .data = &sysctl_speed_limit_max, | 119 | .data = &sysctl_speed_limit_max, |
120 | .maxlen = sizeof(int), | 120 | .maxlen = sizeof(int), |
121 | .mode = 0644, | 121 | .mode = S_IRUGO|S_IWUSR, |
122 | .proc_handler = &proc_dointvec, | 122 | .proc_handler = &proc_dointvec, |
123 | }, | 123 | }, |
124 | { .ctl_name = 0 } | 124 | { .ctl_name = 0 } |
@@ -129,7 +129,7 @@ static ctl_table raid_dir_table[] = { | |||
129 | .ctl_name = DEV_RAID, | 129 | .ctl_name = DEV_RAID, |
130 | .procname = "raid", | 130 | .procname = "raid", |
131 | .maxlen = 0, | 131 | .maxlen = 0, |
132 | .mode = 0555, | 132 | .mode = S_IRUGO|S_IXUGO, |
133 | .child = raid_table, | 133 | .child = raid_table, |
134 | }, | 134 | }, |
135 | { .ctl_name = 0 } | 135 | { .ctl_name = 0 } |
@@ -1062,6 +1062,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1062 | if (rdev->sb_size & bmask) | 1062 | if (rdev->sb_size & bmask) |
1063 | rdev-> sb_size = (rdev->sb_size | bmask)+1; | 1063 | rdev-> sb_size = (rdev->sb_size | bmask)+1; |
1064 | 1064 | ||
1065 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) | ||
1066 | rdev->desc_nr = -1; | ||
1067 | else | ||
1068 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | ||
1069 | |||
1065 | if (refdev == 0) | 1070 | if (refdev == 0) |
1066 | ret = 1; | 1071 | ret = 1; |
1067 | else { | 1072 | else { |
@@ -1171,7 +1176,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1171 | } | 1176 | } |
1172 | if (mddev->level != LEVEL_MULTIPATH) { | 1177 | if (mddev->level != LEVEL_MULTIPATH) { |
1173 | int role; | 1178 | int role; |
1174 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | ||
1175 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1179 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
1176 | switch(role) { | 1180 | switch(role) { |
1177 | case 0xffff: /* spare */ | 1181 | case 0xffff: /* spare */ |
@@ -1404,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1404 | struct block_device *bdev; | 1408 | struct block_device *bdev; |
1405 | char b[BDEVNAME_SIZE]; | 1409 | char b[BDEVNAME_SIZE]; |
1406 | 1410 | ||
1407 | bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | 1411 | bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE); |
1408 | if (IS_ERR(bdev)) { | 1412 | if (IS_ERR(bdev)) { |
1409 | printk(KERN_ERR "md: could not open %s.\n", | 1413 | printk(KERN_ERR "md: could not open %s.\n", |
1410 | __bdevname(dev, b)); | 1414 | __bdevname(dev, b)); |
@@ -1414,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1414 | if (err) { | 1418 | if (err) { |
1415 | printk(KERN_ERR "md: could not bd_claim %s.\n", | 1419 | printk(KERN_ERR "md: could not bd_claim %s.\n", |
1416 | bdevname(bdev, b)); | 1420 | bdevname(bdev, b)); |
1417 | blkdev_put(bdev); | 1421 | blkdev_put_partition(bdev); |
1418 | return err; | 1422 | return err; |
1419 | } | 1423 | } |
1420 | rdev->bdev = bdev; | 1424 | rdev->bdev = bdev; |
@@ -1428,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) | |||
1428 | if (!bdev) | 1432 | if (!bdev) |
1429 | MD_BUG(); | 1433 | MD_BUG(); |
1430 | bd_release(bdev); | 1434 | bd_release(bdev); |
1431 | blkdev_put(bdev); | 1435 | blkdev_put_partition(bdev); |
1432 | } | 1436 | } |
1433 | 1437 | ||
1434 | void md_autodetect_dev(dev_t dev); | 1438 | void md_autodetect_dev(dev_t dev); |
@@ -1593,6 +1597,19 @@ void md_update_sb(mddev_t * mddev) | |||
1593 | 1597 | ||
1594 | repeat: | 1598 | repeat: |
1595 | spin_lock_irq(&mddev->write_lock); | 1599 | spin_lock_irq(&mddev->write_lock); |
1600 | |||
1601 | if (mddev->degraded && mddev->sb_dirty == 3) | ||
1602 | /* If the array is degraded, then skipping spares is both | ||
1603 | * dangerous and fairly pointless. | ||
1604 | * Dangerous because a device that was removed from the array | ||
1605 | * might have a event_count that still looks up-to-date, | ||
1606 | * so it can be re-added without a resync. | ||
1607 | * Pointless because if there are any spares to skip, | ||
1608 | * then a recovery will happen and soon that array won't | ||
1609 | * be degraded any more and the spare can go back to sleep then. | ||
1610 | */ | ||
1611 | mddev->sb_dirty = 1; | ||
1612 | |||
1596 | sync_req = mddev->in_sync; | 1613 | sync_req = mddev->in_sync; |
1597 | mddev->utime = get_seconds(); | 1614 | mddev->utime = get_seconds(); |
1598 | if (mddev->sb_dirty == 3) | 1615 | if (mddev->sb_dirty == 3) |
@@ -1779,8 +1796,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1779 | } | 1796 | } |
1780 | return err ? err : len; | 1797 | return err ? err : len; |
1781 | } | 1798 | } |
1782 | static struct rdev_sysfs_entry | 1799 | static struct rdev_sysfs_entry rdev_state = |
1783 | rdev_state = __ATTR(state, 0644, state_show, state_store); | 1800 | __ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store); |
1784 | 1801 | ||
1785 | static ssize_t | 1802 | static ssize_t |
1786 | super_show(mdk_rdev_t *rdev, char *page) | 1803 | super_show(mdk_rdev_t *rdev, char *page) |
@@ -1811,7 +1828,7 @@ errors_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1811 | return -EINVAL; | 1828 | return -EINVAL; |
1812 | } | 1829 | } |
1813 | static struct rdev_sysfs_entry rdev_errors = | 1830 | static struct rdev_sysfs_entry rdev_errors = |
1814 | __ATTR(errors, 0644, errors_show, errors_store); | 1831 | __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store); |
1815 | 1832 | ||
1816 | static ssize_t | 1833 | static ssize_t |
1817 | slot_show(mdk_rdev_t *rdev, char *page) | 1834 | slot_show(mdk_rdev_t *rdev, char *page) |
@@ -1845,7 +1862,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1845 | 1862 | ||
1846 | 1863 | ||
1847 | static struct rdev_sysfs_entry rdev_slot = | 1864 | static struct rdev_sysfs_entry rdev_slot = |
1848 | __ATTR(slot, 0644, slot_show, slot_store); | 1865 | __ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store); |
1849 | 1866 | ||
1850 | static ssize_t | 1867 | static ssize_t |
1851 | offset_show(mdk_rdev_t *rdev, char *page) | 1868 | offset_show(mdk_rdev_t *rdev, char *page) |
@@ -1867,7 +1884,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1867 | } | 1884 | } |
1868 | 1885 | ||
1869 | static struct rdev_sysfs_entry rdev_offset = | 1886 | static struct rdev_sysfs_entry rdev_offset = |
1870 | __ATTR(offset, 0644, offset_show, offset_store); | 1887 | __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); |
1871 | 1888 | ||
1872 | static ssize_t | 1889 | static ssize_t |
1873 | rdev_size_show(mdk_rdev_t *rdev, char *page) | 1890 | rdev_size_show(mdk_rdev_t *rdev, char *page) |
@@ -1891,7 +1908,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1891 | } | 1908 | } |
1892 | 1909 | ||
1893 | static struct rdev_sysfs_entry rdev_size = | 1910 | static struct rdev_sysfs_entry rdev_size = |
1894 | __ATTR(size, 0644, rdev_size_show, rdev_size_store); | 1911 | __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); |
1895 | 1912 | ||
1896 | static struct attribute *rdev_default_attrs[] = { | 1913 | static struct attribute *rdev_default_attrs[] = { |
1897 | &rdev_state.attr, | 1914 | &rdev_state.attr, |
@@ -1922,6 +1939,8 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
1922 | 1939 | ||
1923 | if (!entry->store) | 1940 | if (!entry->store) |
1924 | return -EIO; | 1941 | return -EIO; |
1942 | if (!capable(CAP_SYS_ADMIN)) | ||
1943 | return -EACCES; | ||
1925 | return entry->store(rdev, page, length); | 1944 | return entry->store(rdev, page, length); |
1926 | } | 1945 | } |
1927 | 1946 | ||
@@ -2128,7 +2147,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | |||
2128 | return len; | 2147 | return len; |
2129 | } | 2148 | } |
2130 | static struct md_sysfs_entry md_safe_delay = | 2149 | static struct md_sysfs_entry md_safe_delay = |
2131 | __ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store); | 2150 | __ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store); |
2132 | 2151 | ||
2133 | static ssize_t | 2152 | static ssize_t |
2134 | level_show(mddev_t *mddev, char *page) | 2153 | level_show(mddev_t *mddev, char *page) |
@@ -2163,7 +2182,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2163 | } | 2182 | } |
2164 | 2183 | ||
2165 | static struct md_sysfs_entry md_level = | 2184 | static struct md_sysfs_entry md_level = |
2166 | __ATTR(level, 0644, level_show, level_store); | 2185 | __ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store); |
2167 | 2186 | ||
2168 | 2187 | ||
2169 | static ssize_t | 2188 | static ssize_t |
@@ -2188,7 +2207,7 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
2188 | return len; | 2207 | return len; |
2189 | } | 2208 | } |
2190 | static struct md_sysfs_entry md_layout = | 2209 | static struct md_sysfs_entry md_layout = |
2191 | __ATTR(layout, 0655, layout_show, layout_store); | 2210 | __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); |
2192 | 2211 | ||
2193 | 2212 | ||
2194 | static ssize_t | 2213 | static ssize_t |
@@ -2219,7 +2238,7 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len) | |||
2219 | return rv ? rv : len; | 2238 | return rv ? rv : len; |
2220 | } | 2239 | } |
2221 | static struct md_sysfs_entry md_raid_disks = | 2240 | static struct md_sysfs_entry md_raid_disks = |
2222 | __ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); | 2241 | __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); |
2223 | 2242 | ||
2224 | static ssize_t | 2243 | static ssize_t |
2225 | chunk_size_show(mddev_t *mddev, char *page) | 2244 | chunk_size_show(mddev_t *mddev, char *page) |
@@ -2243,7 +2262,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2243 | return len; | 2262 | return len; |
2244 | } | 2263 | } |
2245 | static struct md_sysfs_entry md_chunk_size = | 2264 | static struct md_sysfs_entry md_chunk_size = |
2246 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); | 2265 | __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); |
2247 | 2266 | ||
2248 | static ssize_t | 2267 | static ssize_t |
2249 | resync_start_show(mddev_t *mddev, char *page) | 2268 | resync_start_show(mddev_t *mddev, char *page) |
@@ -2267,7 +2286,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len) | |||
2267 | return len; | 2286 | return len; |
2268 | } | 2287 | } |
2269 | static struct md_sysfs_entry md_resync_start = | 2288 | static struct md_sysfs_entry md_resync_start = |
2270 | __ATTR(resync_start, 0644, resync_start_show, resync_start_store); | 2289 | __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); |
2271 | 2290 | ||
2272 | /* | 2291 | /* |
2273 | * The array state can be: | 2292 | * The array state can be: |
@@ -2437,7 +2456,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2437 | else | 2456 | else |
2438 | return len; | 2457 | return len; |
2439 | } | 2458 | } |
2440 | static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); | 2459 | static struct md_sysfs_entry md_array_state = |
2460 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | ||
2441 | 2461 | ||
2442 | static ssize_t | 2462 | static ssize_t |
2443 | null_show(mddev_t *mddev, char *page) | 2463 | null_show(mddev_t *mddev, char *page) |
@@ -2497,7 +2517,7 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len) | |||
2497 | } | 2517 | } |
2498 | 2518 | ||
2499 | static struct md_sysfs_entry md_new_device = | 2519 | static struct md_sysfs_entry md_new_device = |
2500 | __ATTR(new_dev, 0200, null_show, new_dev_store); | 2520 | __ATTR(new_dev, S_IWUSR, null_show, new_dev_store); |
2501 | 2521 | ||
2502 | static ssize_t | 2522 | static ssize_t |
2503 | size_show(mddev_t *mddev, char *page) | 2523 | size_show(mddev_t *mddev, char *page) |
@@ -2535,7 +2555,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2535 | } | 2555 | } |
2536 | 2556 | ||
2537 | static struct md_sysfs_entry md_size = | 2557 | static struct md_sysfs_entry md_size = |
2538 | __ATTR(component_size, 0644, size_show, size_store); | 2558 | __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); |
2539 | 2559 | ||
2540 | 2560 | ||
2541 | /* Metdata version. | 2561 | /* Metdata version. |
@@ -2583,7 +2603,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2583 | } | 2603 | } |
2584 | 2604 | ||
2585 | static struct md_sysfs_entry md_metadata = | 2605 | static struct md_sysfs_entry md_metadata = |
2586 | __ATTR(metadata_version, 0644, metadata_show, metadata_store); | 2606 | __ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); |
2587 | 2607 | ||
2588 | static ssize_t | 2608 | static ssize_t |
2589 | action_show(mddev_t *mddev, char *page) | 2609 | action_show(mddev_t *mddev, char *page) |
@@ -2651,12 +2671,11 @@ mismatch_cnt_show(mddev_t *mddev, char *page) | |||
2651 | (unsigned long long) mddev->resync_mismatches); | 2671 | (unsigned long long) mddev->resync_mismatches); |
2652 | } | 2672 | } |
2653 | 2673 | ||
2654 | static struct md_sysfs_entry | 2674 | static struct md_sysfs_entry md_scan_mode = |
2655 | md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | 2675 | __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); |
2656 | 2676 | ||
2657 | 2677 | ||
2658 | static struct md_sysfs_entry | 2678 | static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); |
2659 | md_mismatches = __ATTR_RO(mismatch_cnt); | ||
2660 | 2679 | ||
2661 | static ssize_t | 2680 | static ssize_t |
2662 | sync_min_show(mddev_t *mddev, char *page) | 2681 | sync_min_show(mddev_t *mddev, char *page) |
@@ -2715,15 +2734,14 @@ static ssize_t | |||
2715 | sync_speed_show(mddev_t *mddev, char *page) | 2734 | sync_speed_show(mddev_t *mddev, char *page) |
2716 | { | 2735 | { |
2717 | unsigned long resync, dt, db; | 2736 | unsigned long resync, dt, db; |
2718 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); | 2737 | resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)); |
2719 | dt = ((jiffies - mddev->resync_mark) / HZ); | 2738 | dt = ((jiffies - mddev->resync_mark) / HZ); |
2720 | if (!dt) dt++; | 2739 | if (!dt) dt++; |
2721 | db = resync - (mddev->resync_mark_cnt); | 2740 | db = resync - (mddev->resync_mark_cnt); |
2722 | return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ | 2741 | return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ |
2723 | } | 2742 | } |
2724 | 2743 | ||
2725 | static struct md_sysfs_entry | 2744 | static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); |
2726 | md_sync_speed = __ATTR_RO(sync_speed); | ||
2727 | 2745 | ||
2728 | static ssize_t | 2746 | static ssize_t |
2729 | sync_completed_show(mddev_t *mddev, char *page) | 2747 | sync_completed_show(mddev_t *mddev, char *page) |
@@ -2739,8 +2757,7 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2739 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); | 2757 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); |
2740 | } | 2758 | } |
2741 | 2759 | ||
2742 | static struct md_sysfs_entry | 2760 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
2743 | md_sync_completed = __ATTR_RO(sync_completed); | ||
2744 | 2761 | ||
2745 | static ssize_t | 2762 | static ssize_t |
2746 | suspend_lo_show(mddev_t *mddev, char *page) | 2763 | suspend_lo_show(mddev_t *mddev, char *page) |
@@ -2857,6 +2874,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, | |||
2857 | 2874 | ||
2858 | if (!entry->store) | 2875 | if (!entry->store) |
2859 | return -EIO; | 2876 | return -EIO; |
2877 | if (!capable(CAP_SYS_ADMIN)) | ||
2878 | return -EACCES; | ||
2860 | rv = mddev_lock(mddev); | 2879 | rv = mddev_lock(mddev); |
2861 | if (!rv) { | 2880 | if (!rv) { |
2862 | rv = entry->store(mddev, page, length); | 2881 | rv = entry->store(mddev, page, length); |
@@ -3091,7 +3110,6 @@ static int do_md_run(mddev_t * mddev) | |||
3091 | } | 3110 | } |
3092 | 3111 | ||
3093 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3112 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3094 | md_wakeup_thread(mddev->thread); | ||
3095 | 3113 | ||
3096 | if (mddev->sb_dirty) | 3114 | if (mddev->sb_dirty) |
3097 | md_update_sb(mddev); | 3115 | md_update_sb(mddev); |
@@ -3112,7 +3130,7 @@ static int do_md_run(mddev_t * mddev) | |||
3112 | * start recovery here. If we leave it to md_check_recovery, | 3130 | * start recovery here. If we leave it to md_check_recovery, |
3113 | * it will remove the drives and not do the right thing | 3131 | * it will remove the drives and not do the right thing |
3114 | */ | 3132 | */ |
3115 | if (mddev->degraded) { | 3133 | if (mddev->degraded && !mddev->sync_thread) { |
3116 | struct list_head *rtmp; | 3134 | struct list_head *rtmp; |
3117 | int spares = 0; | 3135 | int spares = 0; |
3118 | ITERATE_RDEV(mddev,rdev,rtmp) | 3136 | ITERATE_RDEV(mddev,rdev,rtmp) |
@@ -3133,10 +3151,11 @@ static int do_md_run(mddev_t * mddev) | |||
3133 | mdname(mddev)); | 3151 | mdname(mddev)); |
3134 | /* leave the spares where they are, it shouldn't hurt */ | 3152 | /* leave the spares where they are, it shouldn't hurt */ |
3135 | mddev->recovery = 0; | 3153 | mddev->recovery = 0; |
3136 | } else | 3154 | } |
3137 | md_wakeup_thread(mddev->sync_thread); | ||
3138 | } | 3155 | } |
3139 | } | 3156 | } |
3157 | md_wakeup_thread(mddev->thread); | ||
3158 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | ||
3140 | 3159 | ||
3141 | mddev->changed = 1; | 3160 | mddev->changed = 1; |
3142 | md_new_event(mddev); | 3161 | md_new_event(mddev); |
@@ -4586,6 +4605,8 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
4586 | __builtin_return_address(0),__builtin_return_address(1), | 4605 | __builtin_return_address(0),__builtin_return_address(1), |
4587 | __builtin_return_address(2),__builtin_return_address(3)); | 4606 | __builtin_return_address(2),__builtin_return_address(3)); |
4588 | */ | 4607 | */ |
4608 | if (!mddev->pers) | ||
4609 | return; | ||
4589 | if (!mddev->pers->error_handler) | 4610 | if (!mddev->pers->error_handler) |
4590 | return; | 4611 | return; |
4591 | mddev->pers->error_handler(mddev,rdev); | 4612 | mddev->pers->error_handler(mddev,rdev); |
@@ -4683,12 +4704,13 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
4683 | */ | 4704 | */ |
4684 | dt = ((jiffies - mddev->resync_mark) / HZ); | 4705 | dt = ((jiffies - mddev->resync_mark) / HZ); |
4685 | if (!dt) dt++; | 4706 | if (!dt) dt++; |
4686 | db = resync - (mddev->resync_mark_cnt/2); | 4707 | db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) |
4687 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; | 4708 | - mddev->resync_mark_cnt; |
4709 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; | ||
4688 | 4710 | ||
4689 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); | 4711 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); |
4690 | 4712 | ||
4691 | seq_printf(seq, " speed=%ldK/sec", db/dt); | 4713 | seq_printf(seq, " speed=%ldK/sec", db/2/dt); |
4692 | } | 4714 | } |
4693 | 4715 | ||
4694 | static void *md_seq_start(struct seq_file *seq, loff_t *pos) | 4716 | static void *md_seq_start(struct seq_file *seq, loff_t *pos) |
@@ -5199,6 +5221,7 @@ void md_do_sync(mddev_t *mddev) | |||
5199 | 5221 | ||
5200 | j += sectors; | 5222 | j += sectors; |
5201 | if (j>1) mddev->curr_resync = j; | 5223 | if (j>1) mddev->curr_resync = j; |
5224 | mddev->curr_mark_cnt = io_sectors; | ||
5202 | if (last_check == 0) | 5225 | if (last_check == 0) |
5203 | /* this is the earliers that rebuilt will be | 5226 | /* this is the earliers that rebuilt will be |
5204 | * visible in /proc/mdstat | 5227 | * visible in /proc/mdstat |
@@ -5645,8 +5668,8 @@ static int set_ro(const char *val, struct kernel_param *kp) | |||
5645 | return -EINVAL; | 5668 | return -EINVAL; |
5646 | } | 5669 | } |
5647 | 5670 | ||
5648 | module_param_call(start_ro, set_ro, get_ro, NULL, 0600); | 5671 | module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); |
5649 | module_param(start_dirty_degraded, int, 0644); | 5672 | module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); |
5650 | 5673 | ||
5651 | 5674 | ||
5652 | EXPORT_SYMBOL(register_md_personality); | 5675 | EXPORT_SYMBOL(register_md_personality); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cead918578a7..3b4d69c05623 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -930,10 +930,13 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
930 | 930 | ||
931 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, | 931 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, |
932 | conf->working_disks); | 932 | conf->working_disks); |
933 | for (i = 0; i < conf->raid_disks; i++) | 933 | rcu_read_lock(); |
934 | for (i = 0; i < conf->raid_disks; i++) { | ||
935 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); | ||
934 | seq_printf(seq, "%s", | 936 | seq_printf(seq, "%s", |
935 | conf->mirrors[i].rdev && | 937 | rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); |
936 | test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); | 938 | } |
939 | rcu_read_unlock(); | ||
937 | seq_printf(seq, "]"); | 940 | seq_printf(seq, "]"); |
938 | } | 941 | } |
939 | 942 | ||
@@ -975,7 +978,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
975 | static void print_conf(conf_t *conf) | 978 | static void print_conf(conf_t *conf) |
976 | { | 979 | { |
977 | int i; | 980 | int i; |
978 | mirror_info_t *tmp; | ||
979 | 981 | ||
980 | printk("RAID1 conf printout:\n"); | 982 | printk("RAID1 conf printout:\n"); |
981 | if (!conf) { | 983 | if (!conf) { |
@@ -985,14 +987,17 @@ static void print_conf(conf_t *conf) | |||
985 | printk(" --- wd:%d rd:%d\n", conf->working_disks, | 987 | printk(" --- wd:%d rd:%d\n", conf->working_disks, |
986 | conf->raid_disks); | 988 | conf->raid_disks); |
987 | 989 | ||
990 | rcu_read_lock(); | ||
988 | for (i = 0; i < conf->raid_disks; i++) { | 991 | for (i = 0; i < conf->raid_disks; i++) { |
989 | char b[BDEVNAME_SIZE]; | 992 | char b[BDEVNAME_SIZE]; |
990 | tmp = conf->mirrors + i; | 993 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
991 | if (tmp->rdev) | 994 | if (rdev) |
992 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", | 995 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", |
993 | i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags), | 996 | i, !test_bit(In_sync, &rdev->flags), |
994 | bdevname(tmp->rdev->bdev,b)); | 997 | !test_bit(Faulty, &rdev->flags), |
998 | bdevname(rdev->bdev,b)); | ||
995 | } | 999 | } |
1000 | rcu_read_unlock(); | ||
996 | } | 1001 | } |
997 | 1002 | ||
998 | static void close_sync(conf_t *conf) | 1003 | static void close_sync(conf_t *conf) |
@@ -1008,20 +1013,20 @@ static int raid1_spare_active(mddev_t *mddev) | |||
1008 | { | 1013 | { |
1009 | int i; | 1014 | int i; |
1010 | conf_t *conf = mddev->private; | 1015 | conf_t *conf = mddev->private; |
1011 | mirror_info_t *tmp; | ||
1012 | 1016 | ||
1013 | /* | 1017 | /* |
1014 | * Find all failed disks within the RAID1 configuration | 1018 | * Find all failed disks within the RAID1 configuration |
1015 | * and mark them readable | 1019 | * and mark them readable. |
1020 | * Called under mddev lock, so rcu protection not needed. | ||
1016 | */ | 1021 | */ |
1017 | for (i = 0; i < conf->raid_disks; i++) { | 1022 | for (i = 0; i < conf->raid_disks; i++) { |
1018 | tmp = conf->mirrors + i; | 1023 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; |
1019 | if (tmp->rdev | 1024 | if (rdev |
1020 | && !test_bit(Faulty, &tmp->rdev->flags) | 1025 | && !test_bit(Faulty, &rdev->flags) |
1021 | && !test_bit(In_sync, &tmp->rdev->flags)) { | 1026 | && !test_bit(In_sync, &rdev->flags)) { |
1022 | conf->working_disks++; | 1027 | conf->working_disks++; |
1023 | mddev->degraded--; | 1028 | mddev->degraded--; |
1024 | set_bit(In_sync, &tmp->rdev->flags); | 1029 | set_bit(In_sync, &rdev->flags); |
1025 | } | 1030 | } |
1026 | } | 1031 | } |
1027 | 1032 | ||
@@ -1145,7 +1150,7 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) | |||
1145 | long sectors_to_go = r1_bio->sectors; | 1150 | long sectors_to_go = r1_bio->sectors; |
1146 | /* make sure these bits doesn't get cleared. */ | 1151 | /* make sure these bits doesn't get cleared. */ |
1147 | do { | 1152 | do { |
1148 | bitmap_end_sync(mddev->bitmap, r1_bio->sector, | 1153 | bitmap_end_sync(mddev->bitmap, s, |
1149 | &sync_blocks, 1); | 1154 | &sync_blocks, 1); |
1150 | s += sync_blocks; | 1155 | s += sync_blocks; |
1151 | sectors_to_go -= sync_blocks; | 1156 | sectors_to_go -= sync_blocks; |
@@ -1237,7 +1242,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1237 | /* ouch - failed to read all of that. | 1242 | /* ouch - failed to read all of that. |
1238 | * Try some synchronous reads of other devices to get | 1243 | * Try some synchronous reads of other devices to get |
1239 | * good data, much like with normal read errors. Only | 1244 | * good data, much like with normal read errors. Only |
1240 | * read into the pages we already have so they we don't | 1245 | * read into the pages we already have so we don't |
1241 | * need to re-issue the read request. | 1246 | * need to re-issue the read request. |
1242 | * We don't need to freeze the array, because being in an | 1247 | * We don't need to freeze the array, because being in an |
1243 | * active sync request, there is no normal IO, and | 1248 | * active sync request, there is no normal IO, and |
@@ -1257,6 +1262,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1257 | s = PAGE_SIZE >> 9; | 1262 | s = PAGE_SIZE >> 9; |
1258 | do { | 1263 | do { |
1259 | if (r1_bio->bios[d]->bi_end_io == end_sync_read) { | 1264 | if (r1_bio->bios[d]->bi_end_io == end_sync_read) { |
1265 | /* No rcu protection needed here devices | ||
1266 | * can only be removed when no resync is | ||
1267 | * active, and resync is currently active | ||
1268 | */ | ||
1260 | rdev = conf->mirrors[d].rdev; | 1269 | rdev = conf->mirrors[d].rdev; |
1261 | if (sync_page_io(rdev->bdev, | 1270 | if (sync_page_io(rdev->bdev, |
1262 | sect + rdev->data_offset, | 1271 | sect + rdev->data_offset, |
@@ -1463,6 +1472,11 @@ static void raid1d(mddev_t *mddev) | |||
1463 | s = PAGE_SIZE >> 9; | 1472 | s = PAGE_SIZE >> 9; |
1464 | 1473 | ||
1465 | do { | 1474 | do { |
1475 | /* Note: no rcu protection needed here | ||
1476 | * as this is synchronous in the raid1d thread | ||
1477 | * which is the thread that might remove | ||
1478 | * a device. If raid1d ever becomes multi-threaded.... | ||
1479 | */ | ||
1466 | rdev = conf->mirrors[d].rdev; | 1480 | rdev = conf->mirrors[d].rdev; |
1467 | if (rdev && | 1481 | if (rdev && |
1468 | test_bit(In_sync, &rdev->flags) && | 1482 | test_bit(In_sync, &rdev->flags) && |
@@ -1486,7 +1500,6 @@ static void raid1d(mddev_t *mddev) | |||
1486 | d = conf->raid_disks; | 1500 | d = conf->raid_disks; |
1487 | d--; | 1501 | d--; |
1488 | rdev = conf->mirrors[d].rdev; | 1502 | rdev = conf->mirrors[d].rdev; |
1489 | atomic_add(s, &rdev->corrected_errors); | ||
1490 | if (rdev && | 1503 | if (rdev && |
1491 | test_bit(In_sync, &rdev->flags)) { | 1504 | test_bit(In_sync, &rdev->flags)) { |
1492 | if (sync_page_io(rdev->bdev, | 1505 | if (sync_page_io(rdev->bdev, |
@@ -1509,6 +1522,11 @@ static void raid1d(mddev_t *mddev) | |||
1509 | s<<9, conf->tmppage, READ) == 0) | 1522 | s<<9, conf->tmppage, READ) == 0) |
1510 | /* Well, this device is dead */ | 1523 | /* Well, this device is dead */ |
1511 | md_error(mddev, rdev); | 1524 | md_error(mddev, rdev); |
1525 | else { | ||
1526 | atomic_add(s, &rdev->corrected_errors); | ||
1527 | printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n", | ||
1528 | mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b)); | ||
1529 | } | ||
1512 | } | 1530 | } |
1513 | } | 1531 | } |
1514 | } else { | 1532 | } else { |
@@ -1622,15 +1640,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1622 | return 0; | 1640 | return 0; |
1623 | } | 1641 | } |
1624 | 1642 | ||
1625 | /* before building a request, check if we can skip these blocks.. | ||
1626 | * This call the bitmap_start_sync doesn't actually record anything | ||
1627 | */ | ||
1628 | if (mddev->bitmap == NULL && | 1643 | if (mddev->bitmap == NULL && |
1629 | mddev->recovery_cp == MaxSector && | 1644 | mddev->recovery_cp == MaxSector && |
1645 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | ||
1630 | conf->fullsync == 0) { | 1646 | conf->fullsync == 0) { |
1631 | *skipped = 1; | 1647 | *skipped = 1; |
1632 | return max_sector - sector_nr; | 1648 | return max_sector - sector_nr; |
1633 | } | 1649 | } |
1650 | /* before building a request, check if we can skip these blocks.. | ||
1651 | * This call the bitmap_start_sync doesn't actually record anything | ||
1652 | */ | ||
1634 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | 1653 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && |
1635 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 1654 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
1636 | /* We can skip this block, and probably several more */ | 1655 | /* We can skip this block, and probably several more */ |
@@ -1783,19 +1802,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1783 | for (i=0; i<conf->raid_disks; i++) { | 1802 | for (i=0; i<conf->raid_disks; i++) { |
1784 | bio = r1_bio->bios[i]; | 1803 | bio = r1_bio->bios[i]; |
1785 | if (bio->bi_end_io == end_sync_read) { | 1804 | if (bio->bi_end_io == end_sync_read) { |
1786 | md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors); | 1805 | md_sync_acct(bio->bi_bdev, nr_sectors); |
1787 | generic_make_request(bio); | 1806 | generic_make_request(bio); |
1788 | } | 1807 | } |
1789 | } | 1808 | } |
1790 | } else { | 1809 | } else { |
1791 | atomic_set(&r1_bio->remaining, 1); | 1810 | atomic_set(&r1_bio->remaining, 1); |
1792 | bio = r1_bio->bios[r1_bio->read_disk]; | 1811 | bio = r1_bio->bios[r1_bio->read_disk]; |
1793 | md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, | 1812 | md_sync_acct(bio->bi_bdev, nr_sectors); |
1794 | nr_sectors); | ||
1795 | generic_make_request(bio); | 1813 | generic_make_request(bio); |
1796 | 1814 | ||
1797 | } | 1815 | } |
1798 | |||
1799 | return nr_sectors; | 1816 | return nr_sectors; |
1800 | } | 1817 | } |
1801 | 1818 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7f636283a1ba..016ddb831c9b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1492,6 +1492,10 @@ static void raid10d(mddev_t *mddev) | |||
1492 | s<<9, conf->tmppage, READ) == 0) | 1492 | s<<9, conf->tmppage, READ) == 0) |
1493 | /* Well, this device is dead */ | 1493 | /* Well, this device is dead */ |
1494 | md_error(mddev, rdev); | 1494 | md_error(mddev, rdev); |
1495 | else | ||
1496 | printk(KERN_INFO "raid10:%s: read error corrected (%d sectors at %llu on %s)\n", | ||
1497 | mdname(mddev), s, (unsigned long long)(sect+rdev->data_offset), bdevname(rdev->bdev, b)); | ||
1498 | |||
1495 | rdev_dec_pending(rdev, mddev); | 1499 | rdev_dec_pending(rdev, mddev); |
1496 | rcu_read_lock(); | 1500 | rcu_read_lock(); |
1497 | } | 1501 | } |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7433871f4b3a..450066007160 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -18,6 +18,30 @@ | |||
18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /* | ||
22 | * BITMAP UNPLUGGING: | ||
23 | * | ||
24 | * The sequencing for updating the bitmap reliably is a little | ||
25 | * subtle (and I got it wrong the first time) so it deserves some | ||
26 | * explanation. | ||
27 | * | ||
28 | * We group bitmap updates into batches. Each batch has a number. | ||
29 | * We may write out several batches at once, but that isn't very important. | ||
30 | * conf->bm_write is the number of the last batch successfully written. | ||
31 | * conf->bm_flush is the number of the last batch that was closed to | ||
32 | * new additions. | ||
33 | * When we discover that we will need to write to any block in a stripe | ||
34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq | ||
35 | * the number of the batch it will be in. This is bm_flush+1. | ||
36 | * When we are ready to do a write, if that batch hasn't been written yet, | ||
37 | * we plug the array and queue the stripe for later. | ||
38 | * When an unplug happens, we increment bm_flush, thus closing the current | ||
39 | * batch. | ||
40 | * When we notice that bm_flush > bm_write, we write out all pending updates | ||
41 | * to the bitmap, and advance bm_write to where bm_flush was. | ||
42 | * This may occasionally write a bit out twice, but is sure never to | ||
43 | * miss any bits. | ||
44 | */ | ||
21 | 45 | ||
22 | #include <linux/module.h> | 46 | #include <linux/module.h> |
23 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
@@ -88,12 +112,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
88 | BUG_ON(!list_empty(&sh->lru)); | 112 | BUG_ON(!list_empty(&sh->lru)); |
89 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 113 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
90 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 114 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
91 | if (test_bit(STRIPE_DELAYED, &sh->state)) | 115 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
92 | list_add_tail(&sh->lru, &conf->delayed_list); | 116 | list_add_tail(&sh->lru, &conf->delayed_list); |
93 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 117 | blk_plug_device(conf->mddev->queue); |
94 | conf->seq_write == sh->bm_seq) | 118 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
119 | sh->bm_seq - conf->seq_write > 0) { | ||
95 | list_add_tail(&sh->lru, &conf->bitmap_list); | 120 | list_add_tail(&sh->lru, &conf->bitmap_list); |
96 | else { | 121 | blk_plug_device(conf->mddev->queue); |
122 | } else { | ||
97 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 123 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
98 | list_add_tail(&sh->lru, &conf->handle_list); | 124 | list_add_tail(&sh->lru, &conf->handle_list); |
99 | } | 125 | } |
@@ -270,7 +296,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
270 | < (conf->max_nr_stripes *3/4) | 296 | < (conf->max_nr_stripes *3/4) |
271 | || !conf->inactive_blocked), | 297 | || !conf->inactive_blocked), |
272 | conf->device_lock, | 298 | conf->device_lock, |
273 | unplug_slaves(conf->mddev) | 299 | raid5_unplug_device(conf->mddev->queue) |
274 | ); | 300 | ); |
275 | conf->inactive_blocked = 0; | 301 | conf->inactive_blocked = 0; |
276 | } else | 302 | } else |
@@ -281,7 +307,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
281 | } else { | 307 | } else { |
282 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 308 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
283 | atomic_inc(&conf->active_stripes); | 309 | atomic_inc(&conf->active_stripes); |
284 | if (list_empty(&sh->lru)) | 310 | if (list_empty(&sh->lru) && |
311 | !test_bit(STRIPE_EXPANDING, &sh->state)) | ||
285 | BUG(); | 312 | BUG(); |
286 | list_del_init(&sh->lru); | 313 | list_del_init(&sh->lru); |
287 | } | 314 | } |
@@ -496,6 +523,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
496 | raid5_conf_t *conf = sh->raid_conf; | 523 | raid5_conf_t *conf = sh->raid_conf; |
497 | int disks = sh->disks, i; | 524 | int disks = sh->disks, i; |
498 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); | 525 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); |
526 | char b[BDEVNAME_SIZE]; | ||
527 | mdk_rdev_t *rdev; | ||
499 | 528 | ||
500 | if (bi->bi_size) | 529 | if (bi->bi_size) |
501 | return 1; | 530 | return 1; |
@@ -543,25 +572,39 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
543 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 572 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
544 | #endif | 573 | #endif |
545 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 574 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
546 | printk(KERN_INFO "raid5: read error corrected!!\n"); | 575 | rdev = conf->disks[i].rdev; |
576 | printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", | ||
577 | mdname(conf->mddev), STRIPE_SECTORS, | ||
578 | (unsigned long long)sh->sector + rdev->data_offset, | ||
579 | bdevname(rdev->bdev, b)); | ||
547 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 580 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
548 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 581 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
549 | } | 582 | } |
550 | if (atomic_read(&conf->disks[i].rdev->read_errors)) | 583 | if (atomic_read(&conf->disks[i].rdev->read_errors)) |
551 | atomic_set(&conf->disks[i].rdev->read_errors, 0); | 584 | atomic_set(&conf->disks[i].rdev->read_errors, 0); |
552 | } else { | 585 | } else { |
586 | const char *bdn = bdevname(conf->disks[i].rdev->bdev, b); | ||
553 | int retry = 0; | 587 | int retry = 0; |
588 | rdev = conf->disks[i].rdev; | ||
589 | |||
554 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 590 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
555 | atomic_inc(&conf->disks[i].rdev->read_errors); | 591 | atomic_inc(&rdev->read_errors); |
556 | if (conf->mddev->degraded) | 592 | if (conf->mddev->degraded) |
557 | printk(KERN_WARNING "raid5: read error not correctable.\n"); | 593 | printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", |
594 | mdname(conf->mddev), | ||
595 | (unsigned long long)sh->sector + rdev->data_offset, | ||
596 | bdn); | ||
558 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 597 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
559 | /* Oh, no!!! */ | 598 | /* Oh, no!!! */ |
560 | printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); | 599 | printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", |
561 | else if (atomic_read(&conf->disks[i].rdev->read_errors) | 600 | mdname(conf->mddev), |
601 | (unsigned long long)sh->sector + rdev->data_offset, | ||
602 | bdn); | ||
603 | else if (atomic_read(&rdev->read_errors) | ||
562 | > conf->max_nr_stripes) | 604 | > conf->max_nr_stripes) |
563 | printk(KERN_WARNING | 605 | printk(KERN_WARNING |
564 | "raid5: Too many read errors, failing device.\n"); | 606 | "raid5:%s: Too many read errors, failing device %s.\n", |
607 | mdname(conf->mddev), bdn); | ||
565 | else | 608 | else |
566 | retry = 1; | 609 | retry = 1; |
567 | if (retry) | 610 | if (retry) |
@@ -569,7 +612,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, | |||
569 | else { | 612 | else { |
570 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 613 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
571 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 614 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
572 | md_error(conf->mddev, conf->disks[i].rdev); | 615 | md_error(conf->mddev, rdev); |
573 | } | 616 | } |
574 | } | 617 | } |
575 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); | 618 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); |
@@ -1270,9 +1313,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1270 | (unsigned long long)sh->sector, dd_idx); | 1313 | (unsigned long long)sh->sector, dd_idx); |
1271 | 1314 | ||
1272 | if (conf->mddev->bitmap && firstwrite) { | 1315 | if (conf->mddev->bitmap && firstwrite) { |
1273 | sh->bm_seq = conf->seq_write; | ||
1274 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 1316 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
1275 | STRIPE_SECTORS, 0); | 1317 | STRIPE_SECTORS, 0); |
1318 | sh->bm_seq = conf->seq_flush+1; | ||
1276 | set_bit(STRIPE_BIT_DELAY, &sh->state); | 1319 | set_bit(STRIPE_BIT_DELAY, &sh->state); |
1277 | } | 1320 | } |
1278 | 1321 | ||
@@ -2554,13 +2597,6 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
2554 | return ret; | 2597 | return ret; |
2555 | } | 2598 | } |
2556 | 2599 | ||
2557 | static inline void raid5_plug_device(raid5_conf_t *conf) | ||
2558 | { | ||
2559 | spin_lock_irq(&conf->device_lock); | ||
2560 | blk_plug_device(conf->mddev->queue); | ||
2561 | spin_unlock_irq(&conf->device_lock); | ||
2562 | } | ||
2563 | |||
2564 | static int make_request(request_queue_t *q, struct bio * bi) | 2600 | static int make_request(request_queue_t *q, struct bio * bi) |
2565 | { | 2601 | { |
2566 | mddev_t *mddev = q->queuedata; | 2602 | mddev_t *mddev = q->queuedata; |
@@ -2670,7 +2706,6 @@ static int make_request(request_queue_t *q, struct bio * bi) | |||
2670 | goto retry; | 2706 | goto retry; |
2671 | } | 2707 | } |
2672 | finish_wait(&conf->wait_for_overlap, &w); | 2708 | finish_wait(&conf->wait_for_overlap, &w); |
2673 | raid5_plug_device(conf); | ||
2674 | handle_stripe(sh, NULL); | 2709 | handle_stripe(sh, NULL); |
2675 | release_stripe(sh); | 2710 | release_stripe(sh); |
2676 | } else { | 2711 | } else { |
@@ -2923,7 +2958,7 @@ static void raid5d (mddev_t *mddev) | |||
2923 | while (1) { | 2958 | while (1) { |
2924 | struct list_head *first; | 2959 | struct list_head *first; |
2925 | 2960 | ||
2926 | if (conf->seq_flush - conf->seq_write > 0) { | 2961 | if (conf->seq_flush != conf->seq_write) { |
2927 | int seq = conf->seq_flush; | 2962 | int seq = conf->seq_flush; |
2928 | spin_unlock_irq(&conf->device_lock); | 2963 | spin_unlock_irq(&conf->device_lock); |
2929 | bitmap_unplug(mddev->bitmap); | 2964 | bitmap_unplug(mddev->bitmap); |
@@ -3246,9 +3281,6 @@ static int run(mddev_t *mddev) | |||
3246 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 3281 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3247 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | 3282 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, |
3248 | "%s_reshape"); | 3283 | "%s_reshape"); |
3249 | /* FIXME if md_register_thread fails?? */ | ||
3250 | md_wakeup_thread(mddev->sync_thread); | ||
3251 | |||
3252 | } | 3284 | } |
3253 | 3285 | ||
3254 | /* read-ahead size must cover two whole stripes, which is | 3286 | /* read-ahead size must cover two whole stripes, which is |