aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-01-23 01:45:46 -0500
committerDavid S. Miller <davem@davemloft.net>2010-01-23 01:45:46 -0500
commit6be325719b3e54624397e413efd4b33a997e55a3 (patch)
tree57f321a56794cab2222e179b16731e0d76a4a68a /drivers/md/md.c
parent26d92f9276a56d55511a427fb70bd70886af647a (diff)
parent92dcffb916d309aa01778bf8963a6932e4014d07 (diff)
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c455
1 files changed, 333 insertions, 122 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5f154ef1e4be..dd3dfe42d5a9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -39,11 +39,13 @@
39#include <linux/buffer_head.h> /* for invalidate_bdev */ 39#include <linux/buffer_head.h> /* for invalidate_bdev */
40#include <linux/poll.h> 40#include <linux/poll.h>
41#include <linux/ctype.h> 41#include <linux/ctype.h>
42#include <linux/string.h>
42#include <linux/hdreg.h> 43#include <linux/hdreg.h>
43#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
44#include <linux/random.h> 45#include <linux/random.h>
45#include <linux/reboot.h> 46#include <linux/reboot.h>
46#include <linux/file.h> 47#include <linux/file.h>
48#include <linux/compat.h>
47#include <linux/delay.h> 49#include <linux/delay.h>
48#include <linux/raid/md_p.h> 50#include <linux/raid/md_p.h>
49#include <linux/raid/md_u.h> 51#include <linux/raid/md_u.h>
@@ -68,6 +70,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 70#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
69 71
70/* 72/*
73 * Default number of read corrections we'll attempt on an rdev
74 * before ejecting it from the array. We divide the read error
75 * count by 2 for every hour elapsed between read errors.
76 */
77#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
78/*
71 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 79 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
72 * is 1000 KB/sec, so the extra system load does not show up that much. 80 * is 1000 KB/sec, so the extra system load does not show up that much.
73 * Increase it if you want to have more _guaranteed_ speed. Note that 81 * Increase it if you want to have more _guaranteed_ speed. Note that
@@ -213,12 +221,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
213 return 0; 221 return 0;
214 } 222 }
215 rcu_read_lock(); 223 rcu_read_lock();
216 if (mddev->suspended) { 224 if (mddev->suspended || mddev->barrier) {
217 DEFINE_WAIT(__wait); 225 DEFINE_WAIT(__wait);
218 for (;;) { 226 for (;;) {
219 prepare_to_wait(&mddev->sb_wait, &__wait, 227 prepare_to_wait(&mddev->sb_wait, &__wait,
220 TASK_UNINTERRUPTIBLE); 228 TASK_UNINTERRUPTIBLE);
221 if (!mddev->suspended) 229 if (!mddev->suspended && !mddev->barrier)
222 break; 230 break;
223 rcu_read_unlock(); 231 rcu_read_unlock();
224 schedule(); 232 schedule();
@@ -260,10 +268,110 @@ static void mddev_resume(mddev_t *mddev)
260 268
261int mddev_congested(mddev_t *mddev, int bits) 269int mddev_congested(mddev_t *mddev, int bits)
262{ 270{
271 if (mddev->barrier)
272 return 1;
263 return mddev->suspended; 273 return mddev->suspended;
264} 274}
265EXPORT_SYMBOL(mddev_congested); 275EXPORT_SYMBOL(mddev_congested);
266 276
277/*
278 * Generic barrier handling for md
279 */
280
281#define POST_REQUEST_BARRIER ((void*)1)
282
283static void md_end_barrier(struct bio *bio, int err)
284{
285 mdk_rdev_t *rdev = bio->bi_private;
286 mddev_t *mddev = rdev->mddev;
287 if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
288 set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
289
290 rdev_dec_pending(rdev, mddev);
291
292 if (atomic_dec_and_test(&mddev->flush_pending)) {
293 if (mddev->barrier == POST_REQUEST_BARRIER) {
294 /* This was a post-request barrier */
295 mddev->barrier = NULL;
296 wake_up(&mddev->sb_wait);
297 } else
298 /* The pre-request barrier has finished */
299 schedule_work(&mddev->barrier_work);
300 }
301 bio_put(bio);
302}
303
304static void submit_barriers(mddev_t *mddev)
305{
306 mdk_rdev_t *rdev;
307
308 rcu_read_lock();
309 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
310 if (rdev->raid_disk >= 0 &&
311 !test_bit(Faulty, &rdev->flags)) {
312 /* Take two references, one is dropped
313 * when request finishes, one after
314 * we reclaim rcu_read_lock
315 */
316 struct bio *bi;
317 atomic_inc(&rdev->nr_pending);
318 atomic_inc(&rdev->nr_pending);
319 rcu_read_unlock();
320 bi = bio_alloc(GFP_KERNEL, 0);
321 bi->bi_end_io = md_end_barrier;
322 bi->bi_private = rdev;
323 bi->bi_bdev = rdev->bdev;
324 atomic_inc(&mddev->flush_pending);
325 submit_bio(WRITE_BARRIER, bi);
326 rcu_read_lock();
327 rdev_dec_pending(rdev, mddev);
328 }
329 rcu_read_unlock();
330}
331
332static void md_submit_barrier(struct work_struct *ws)
333{
334 mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
335 struct bio *bio = mddev->barrier;
336
337 atomic_set(&mddev->flush_pending, 1);
338
339 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
340 bio_endio(bio, -EOPNOTSUPP);
341 else if (bio->bi_size == 0)
342 /* an empty barrier - all done */
343 bio_endio(bio, 0);
344 else {
345 bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
346 if (mddev->pers->make_request(mddev->queue, bio))
347 generic_make_request(bio);
348 mddev->barrier = POST_REQUEST_BARRIER;
349 submit_barriers(mddev);
350 }
351 if (atomic_dec_and_test(&mddev->flush_pending)) {
352 mddev->barrier = NULL;
353 wake_up(&mddev->sb_wait);
354 }
355}
356
357void md_barrier_request(mddev_t *mddev, struct bio *bio)
358{
359 spin_lock_irq(&mddev->write_lock);
360 wait_event_lock_irq(mddev->sb_wait,
361 !mddev->barrier,
362 mddev->write_lock, /*nothing*/);
363 mddev->barrier = bio;
364 spin_unlock_irq(&mddev->write_lock);
365
366 atomic_set(&mddev->flush_pending, 1);
367 INIT_WORK(&mddev->barrier_work, md_submit_barrier);
368
369 submit_barriers(mddev);
370
371 if (atomic_dec_and_test(&mddev->flush_pending))
372 schedule_work(&mddev->barrier_work);
373}
374EXPORT_SYMBOL(md_barrier_request);
267 375
268static inline mddev_t *mddev_get(mddev_t *mddev) 376static inline mddev_t *mddev_get(mddev_t *mddev)
269{ 377{
@@ -278,7 +386,9 @@ static void mddev_put(mddev_t *mddev)
278 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 386 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
279 return; 387 return;
280 if (!mddev->raid_disks && list_empty(&mddev->disks) && 388 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
281 !mddev->hold_active) { 389 mddev->ctime == 0 && !mddev->hold_active) {
390 /* Array is not configured at all, and not held active,
391 * so destroy it */
282 list_del(&mddev->all_mddevs); 392 list_del(&mddev->all_mddevs);
283 if (mddev->gendisk) { 393 if (mddev->gendisk) {
284 /* we did a probe so need to clean up. 394 /* we did a probe so need to clean up.
@@ -363,6 +473,7 @@ static mddev_t * mddev_find(dev_t unit)
363 473
364 mutex_init(&new->open_mutex); 474 mutex_init(&new->open_mutex);
365 mutex_init(&new->reconfig_mutex); 475 mutex_init(&new->reconfig_mutex);
476 mutex_init(&new->bitmap_info.mutex);
366 INIT_LIST_HEAD(&new->disks); 477 INIT_LIST_HEAD(&new->disks);
367 INIT_LIST_HEAD(&new->all_mddevs); 478 INIT_LIST_HEAD(&new->all_mddevs);
368 init_timer(&new->safemode_timer); 479 init_timer(&new->safemode_timer);
@@ -370,6 +481,7 @@ static mddev_t * mddev_find(dev_t unit)
370 atomic_set(&new->openers, 0); 481 atomic_set(&new->openers, 0);
371 atomic_set(&new->active_io, 0); 482 atomic_set(&new->active_io, 0);
372 spin_lock_init(&new->write_lock); 483 spin_lock_init(&new->write_lock);
484 atomic_set(&new->flush_pending, 0);
373 init_waitqueue_head(&new->sb_wait); 485 init_waitqueue_head(&new->sb_wait);
374 init_waitqueue_head(&new->recovery_wait); 486 init_waitqueue_head(&new->recovery_wait);
375 new->reshape_position = MaxSector; 487 new->reshape_position = MaxSector;
@@ -748,7 +860,7 @@ struct super_type {
748 */ 860 */
749int md_check_no_bitmap(mddev_t *mddev) 861int md_check_no_bitmap(mddev_t *mddev)
750{ 862{
751 if (!mddev->bitmap_file && !mddev->bitmap_offset) 863 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
752 return 0; 864 return 0;
753 printk(KERN_ERR "%s: bitmaps are not supported for %s\n", 865 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
754 mdname(mddev), mddev->pers->name); 866 mdname(mddev), mddev->pers->name);
@@ -876,8 +988,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
876 mddev->raid_disks = sb->raid_disks; 988 mddev->raid_disks = sb->raid_disks;
877 mddev->dev_sectors = sb->size * 2; 989 mddev->dev_sectors = sb->size * 2;
878 mddev->events = ev1; 990 mddev->events = ev1;
879 mddev->bitmap_offset = 0; 991 mddev->bitmap_info.offset = 0;
880 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 992 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
881 993
882 if (mddev->minor_version >= 91) { 994 if (mddev->minor_version >= 91) {
883 mddev->reshape_position = sb->reshape_position; 995 mddev->reshape_position = sb->reshape_position;
@@ -911,8 +1023,9 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
911 mddev->max_disks = MD_SB_DISKS; 1023 mddev->max_disks = MD_SB_DISKS;
912 1024
913 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && 1025 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
914 mddev->bitmap_file == NULL) 1026 mddev->bitmap_info.file == NULL)
915 mddev->bitmap_offset = mddev->default_bitmap_offset; 1027 mddev->bitmap_info.offset =
1028 mddev->bitmap_info.default_offset;
916 1029
917 } else if (mddev->pers == NULL) { 1030 } else if (mddev->pers == NULL) {
918 /* Insist on good event counter while assembling */ 1031 /* Insist on good event counter while assembling */
@@ -1029,7 +1142,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1029 sb->layout = mddev->layout; 1142 sb->layout = mddev->layout;
1030 sb->chunk_size = mddev->chunk_sectors << 9; 1143 sb->chunk_size = mddev->chunk_sectors << 9;
1031 1144
1032 if (mddev->bitmap && mddev->bitmap_file == NULL) 1145 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1033 sb->state |= (1<<MD_SB_BITMAP_PRESENT); 1146 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1034 1147
1035 sb->disks[0].state = (1<<MD_DISK_REMOVED); 1148 sb->disks[0].state = (1<<MD_DISK_REMOVED);
@@ -1107,7 +1220,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1107{ 1220{
1108 if (num_sectors && num_sectors < rdev->mddev->dev_sectors) 1221 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1109 return 0; /* component must fit device */ 1222 return 0; /* component must fit device */
1110 if (rdev->mddev->bitmap_offset) 1223 if (rdev->mddev->bitmap_info.offset)
1111 return 0; /* can't move bitmap */ 1224 return 0; /* can't move bitmap */
1112 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 1225 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
1113 if (!num_sectors || num_sectors > rdev->sb_start) 1226 if (!num_sectors || num_sectors > rdev->sb_start)
@@ -1286,8 +1399,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1286 mddev->raid_disks = le32_to_cpu(sb->raid_disks); 1399 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1287 mddev->dev_sectors = le64_to_cpu(sb->size); 1400 mddev->dev_sectors = le64_to_cpu(sb->size);
1288 mddev->events = ev1; 1401 mddev->events = ev1;
1289 mddev->bitmap_offset = 0; 1402 mddev->bitmap_info.offset = 0;
1290 mddev->default_bitmap_offset = 1024 >> 9; 1403 mddev->bitmap_info.default_offset = 1024 >> 9;
1291 1404
1292 mddev->recovery_cp = le64_to_cpu(sb->resync_offset); 1405 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1293 memcpy(mddev->uuid, sb->set_uuid, 16); 1406 memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -1295,8 +1408,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1295 mddev->max_disks = (4096-256)/2; 1408 mddev->max_disks = (4096-256)/2;
1296 1409
1297 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && 1410 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1298 mddev->bitmap_file == NULL ) 1411 mddev->bitmap_info.file == NULL )
1299 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); 1412 mddev->bitmap_info.offset =
1413 (__s32)le32_to_cpu(sb->bitmap_offset);
1300 1414
1301 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { 1415 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1302 mddev->reshape_position = le64_to_cpu(sb->reshape_position); 1416 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
@@ -1390,19 +1504,17 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1390 sb->level = cpu_to_le32(mddev->level); 1504 sb->level = cpu_to_le32(mddev->level);
1391 sb->layout = cpu_to_le32(mddev->layout); 1505 sb->layout = cpu_to_le32(mddev->layout);
1392 1506
1393 if (mddev->bitmap && mddev->bitmap_file == NULL) { 1507 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1394 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1508 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1395 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1509 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1396 } 1510 }
1397 1511
1398 if (rdev->raid_disk >= 0 && 1512 if (rdev->raid_disk >= 0 &&
1399 !test_bit(In_sync, &rdev->flags)) { 1513 !test_bit(In_sync, &rdev->flags)) {
1400 if (rdev->recovery_offset > 0) { 1514 sb->feature_map |=
1401 sb->feature_map |= 1515 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1402 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1516 sb->recovery_offset =
1403 sb->recovery_offset = 1517 cpu_to_le64(rdev->recovery_offset);
1404 cpu_to_le64(rdev->recovery_offset);
1405 }
1406 } 1518 }
1407 1519
1408 if (mddev->reshape_position != MaxSector) { 1520 if (mddev->reshape_position != MaxSector) {
@@ -1436,7 +1548,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1436 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1548 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1437 else if (test_bit(In_sync, &rdev2->flags)) 1549 else if (test_bit(In_sync, &rdev2->flags))
1438 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1550 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1439 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) 1551 else if (rdev2->raid_disk >= 0)
1440 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1552 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1441 else 1553 else
1442 sb->dev_roles[i] = cpu_to_le16(0xffff); 1554 sb->dev_roles[i] = cpu_to_le16(0xffff);
@@ -1458,7 +1570,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1458 max_sectors -= rdev->data_offset; 1570 max_sectors -= rdev->data_offset;
1459 if (!num_sectors || num_sectors > max_sectors) 1571 if (!num_sectors || num_sectors > max_sectors)
1460 num_sectors = max_sectors; 1572 num_sectors = max_sectors;
1461 } else if (rdev->mddev->bitmap_offset) { 1573 } else if (rdev->mddev->bitmap_info.offset) {
1462 /* minor version 0 with bitmap we can't move */ 1574 /* minor version 0 with bitmap we can't move */
1463 return 0; 1575 return 0;
1464 } else { 1576 } else {
@@ -1826,15 +1938,11 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1826 1938
1827 uuid = sb->set_uuid; 1939 uuid = sb->set_uuid;
1828 printk(KERN_INFO 1940 printk(KERN_INFO
1829 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" 1941 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
1830 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1831 "md: Name: \"%s\" CT:%llu\n", 1942 "md: Name: \"%s\" CT:%llu\n",
1832 le32_to_cpu(sb->major_version), 1943 le32_to_cpu(sb->major_version),
1833 le32_to_cpu(sb->feature_map), 1944 le32_to_cpu(sb->feature_map),
1834 uuid[0], uuid[1], uuid[2], uuid[3], 1945 uuid,
1835 uuid[4], uuid[5], uuid[6], uuid[7],
1836 uuid[8], uuid[9], uuid[10], uuid[11],
1837 uuid[12], uuid[13], uuid[14], uuid[15],
1838 sb->set_name, 1946 sb->set_name,
1839 (unsigned long long)le64_to_cpu(sb->ctime) 1947 (unsigned long long)le64_to_cpu(sb->ctime)
1840 & MD_SUPERBLOCK_1_TIME_SEC_MASK); 1948 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
@@ -1843,8 +1951,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1843 printk(KERN_INFO 1951 printk(KERN_INFO
1844 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" 1952 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1845 " RO:%llu\n" 1953 " RO:%llu\n"
1846 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" 1954 "md: Dev:%08x UUID: %pU\n"
1847 ":%02x%02x%02x%02x%02x%02x\n"
1848 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" 1955 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1849 "md: (MaxDev:%u) \n", 1956 "md: (MaxDev:%u) \n",
1850 le32_to_cpu(sb->level), 1957 le32_to_cpu(sb->level),
@@ -1857,10 +1964,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1857 (unsigned long long)le64_to_cpu(sb->super_offset), 1964 (unsigned long long)le64_to_cpu(sb->super_offset),
1858 (unsigned long long)le64_to_cpu(sb->recovery_offset), 1965 (unsigned long long)le64_to_cpu(sb->recovery_offset),
1859 le32_to_cpu(sb->dev_number), 1966 le32_to_cpu(sb->dev_number),
1860 uuid[0], uuid[1], uuid[2], uuid[3], 1967 uuid,
1861 uuid[4], uuid[5], uuid[6], uuid[7],
1862 uuid[8], uuid[9], uuid[10], uuid[11],
1863 uuid[12], uuid[13], uuid[14], uuid[15],
1864 sb->devflags, 1968 sb->devflags,
1865 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, 1969 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
1866 (unsigned long long)le64_to_cpu(sb->events), 1970 (unsigned long long)le64_to_cpu(sb->events),
@@ -2442,12 +2546,49 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2442static struct rdev_sysfs_entry rdev_size = 2546static struct rdev_sysfs_entry rdev_size =
2443__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); 2547__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2444 2548
2549
2550static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page)
2551{
2552 unsigned long long recovery_start = rdev->recovery_offset;
2553
2554 if (test_bit(In_sync, &rdev->flags) ||
2555 recovery_start == MaxSector)
2556 return sprintf(page, "none\n");
2557
2558 return sprintf(page, "%llu\n", recovery_start);
2559}
2560
2561static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2562{
2563 unsigned long long recovery_start;
2564
2565 if (cmd_match(buf, "none"))
2566 recovery_start = MaxSector;
2567 else if (strict_strtoull(buf, 10, &recovery_start))
2568 return -EINVAL;
2569
2570 if (rdev->mddev->pers &&
2571 rdev->raid_disk >= 0)
2572 return -EBUSY;
2573
2574 rdev->recovery_offset = recovery_start;
2575 if (recovery_start == MaxSector)
2576 set_bit(In_sync, &rdev->flags);
2577 else
2578 clear_bit(In_sync, &rdev->flags);
2579 return len;
2580}
2581
2582static struct rdev_sysfs_entry rdev_recovery_start =
2583__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
2584
2445static struct attribute *rdev_default_attrs[] = { 2585static struct attribute *rdev_default_attrs[] = {
2446 &rdev_state.attr, 2586 &rdev_state.attr,
2447 &rdev_errors.attr, 2587 &rdev_errors.attr,
2448 &rdev_slot.attr, 2588 &rdev_slot.attr,
2449 &rdev_offset.attr, 2589 &rdev_offset.attr,
2450 &rdev_size.attr, 2590 &rdev_size.attr,
2591 &rdev_recovery_start.attr,
2451 NULL, 2592 NULL,
2452}; 2593};
2453static ssize_t 2594static ssize_t
@@ -2549,6 +2690,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2549 rdev->flags = 0; 2690 rdev->flags = 0;
2550 rdev->data_offset = 0; 2691 rdev->data_offset = 0;
2551 rdev->sb_events = 0; 2692 rdev->sb_events = 0;
2693 rdev->last_read_error.tv_sec = 0;
2694 rdev->last_read_error.tv_nsec = 0;
2552 atomic_set(&rdev->nr_pending, 0); 2695 atomic_set(&rdev->nr_pending, 0);
2553 atomic_set(&rdev->read_errors, 0); 2696 atomic_set(&rdev->read_errors, 0);
2554 atomic_set(&rdev->corrected_errors, 0); 2697 atomic_set(&rdev->corrected_errors, 0);
@@ -2659,6 +2802,47 @@ static void analyze_sbs(mddev_t * mddev)
2659 } 2802 }
2660} 2803}
2661 2804
2805/* Read a fixed-point number.
2806 * Numbers in sysfs attributes should be in "standard" units where
2807 * possible, so time should be in seconds.
2808 * However we internally use a a much smaller unit such as
2809 * milliseconds or jiffies.
2810 * This function takes a decimal number with a possible fractional
2811 * component, and produces an integer which is the result of
2812 * multiplying that number by 10^'scale'.
2813 * all without any floating-point arithmetic.
2814 */
2815int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
2816{
2817 unsigned long result = 0;
2818 long decimals = -1;
2819 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
2820 if (*cp == '.')
2821 decimals = 0;
2822 else if (decimals < scale) {
2823 unsigned int value;
2824 value = *cp - '0';
2825 result = result * 10 + value;
2826 if (decimals >= 0)
2827 decimals++;
2828 }
2829 cp++;
2830 }
2831 if (*cp == '\n')
2832 cp++;
2833 if (*cp)
2834 return -EINVAL;
2835 if (decimals < 0)
2836 decimals = 0;
2837 while (decimals < scale) {
2838 result *= 10;
2839 decimals ++;
2840 }
2841 *res = result;
2842 return 0;
2843}
2844
2845
2662static void md_safemode_timeout(unsigned long data); 2846static void md_safemode_timeout(unsigned long data);
2663 2847
2664static ssize_t 2848static ssize_t
@@ -2670,31 +2854,10 @@ safe_delay_show(mddev_t *mddev, char *page)
2670static ssize_t 2854static ssize_t
2671safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) 2855safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2672{ 2856{
2673 int scale=1;
2674 int dot=0;
2675 int i;
2676 unsigned long msec; 2857 unsigned long msec;
2677 char buf[30];
2678 2858
2679 /* remove a period, and count digits after it */ 2859 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
2680 if (len >= sizeof(buf))
2681 return -EINVAL; 2860 return -EINVAL;
2682 strlcpy(buf, cbuf, sizeof(buf));
2683 for (i=0; i<len; i++) {
2684 if (dot) {
2685 if (isdigit(buf[i])) {
2686 buf[i-1] = buf[i];
2687 scale *= 10;
2688 }
2689 buf[i] = 0;
2690 } else if (buf[i] == '.') {
2691 dot=1;
2692 buf[i] = 0;
2693 }
2694 }
2695 if (strict_strtoul(buf, 10, &msec) < 0)
2696 return -EINVAL;
2697 msec = (msec * 1000) / scale;
2698 if (msec == 0) 2861 if (msec == 0)
2699 mddev->safemode_delay = 0; 2862 mddev->safemode_delay = 0;
2700 else { 2863 else {
@@ -2970,7 +3133,9 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2970 3133
2971 if (mddev->pers) 3134 if (mddev->pers)
2972 return -EBUSY; 3135 return -EBUSY;
2973 if (!*buf || (*e && *e != '\n')) 3136 if (cmd_match(buf, "none"))
3137 n = MaxSector;
3138 else if (!*buf || (*e && *e != '\n'))
2974 return -EINVAL; 3139 return -EINVAL;
2975 3140
2976 mddev->recovery_cp = n; 3141 mddev->recovery_cp = n;
@@ -3166,6 +3331,29 @@ static struct md_sysfs_entry md_array_state =
3166__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); 3331__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3167 3332
3168static ssize_t 3333static ssize_t
3334max_corrected_read_errors_show(mddev_t *mddev, char *page) {
3335 return sprintf(page, "%d\n",
3336 atomic_read(&mddev->max_corr_read_errors));
3337}
3338
3339static ssize_t
3340max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
3341{
3342 char *e;
3343 unsigned long n = simple_strtoul(buf, &e, 10);
3344
3345 if (*buf && (*e == 0 || *e == '\n')) {
3346 atomic_set(&mddev->max_corr_read_errors, n);
3347 return len;
3348 }
3349 return -EINVAL;
3350}
3351
3352static struct md_sysfs_entry max_corr_read_errors =
3353__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3354 max_corrected_read_errors_store);
3355
3356static ssize_t
3169null_show(mddev_t *mddev, char *page) 3357null_show(mddev_t *mddev, char *page)
3170{ 3358{
3171 return -EINVAL; 3359 return -EINVAL;
@@ -3246,8 +3434,7 @@ bitmap_store(mddev_t *mddev, const char *buf, size_t len)
3246 } 3434 }
3247 if (*end && !isspace(*end)) break; 3435 if (*end && !isspace(*end)) break;
3248 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); 3436 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3249 buf = end; 3437 buf = skip_spaces(end);
3250 while (isspace(*buf)) buf++;
3251 } 3438 }
3252 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ 3439 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
3253out: 3440out:
@@ -3790,6 +3977,7 @@ static struct attribute *md_default_attrs[] = {
3790 &md_array_state.attr, 3977 &md_array_state.attr,
3791 &md_reshape_position.attr, 3978 &md_reshape_position.attr,
3792 &md_array_size.attr, 3979 &md_array_size.attr,
3980 &max_corr_read_errors.attr,
3793 NULL, 3981 NULL,
3794}; 3982};
3795 3983
@@ -3894,6 +4082,7 @@ static void mddev_delayed_delete(struct work_struct *ws)
3894 mddev->sysfs_action = NULL; 4082 mddev->sysfs_action = NULL;
3895 mddev->private = NULL; 4083 mddev->private = NULL;
3896 } 4084 }
4085 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
3897 kobject_del(&mddev->kobj); 4086 kobject_del(&mddev->kobj);
3898 kobject_put(&mddev->kobj); 4087 kobject_put(&mddev->kobj);
3899} 4088}
@@ -3985,6 +4174,8 @@ static int md_alloc(dev_t dev, char *name)
3985 disk->disk_name); 4174 disk->disk_name);
3986 error = 0; 4175 error = 0;
3987 } 4176 }
4177 if (sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4178 printk(KERN_DEBUG "pointless warning\n");
3988 abort: 4179 abort:
3989 mutex_unlock(&disks_mutex); 4180 mutex_unlock(&disks_mutex);
3990 if (!error) { 4181 if (!error) {
@@ -4166,7 +4357,7 @@ static int do_md_run(mddev_t * mddev)
4166 mddev->barriers_work = 1; 4357 mddev->barriers_work = 1;
4167 mddev->ok_start_degraded = start_dirty_degraded; 4358 mddev->ok_start_degraded = start_dirty_degraded;
4168 4359
4169 if (start_readonly) 4360 if (start_readonly && mddev->ro == 0)
4170 mddev->ro = 2; /* read-only, but switch on first write */ 4361 mddev->ro = 2; /* read-only, but switch on first write */
4171 4362
4172 err = mddev->pers->run(mddev); 4363 err = mddev->pers->run(mddev);
@@ -4206,6 +4397,8 @@ static int do_md_run(mddev_t * mddev)
4206 mddev->ro = 0; 4397 mddev->ro = 0;
4207 4398
4208 atomic_set(&mddev->writes_pending,0); 4399 atomic_set(&mddev->writes_pending,0);
4400 atomic_set(&mddev->max_corr_read_errors,
4401 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
4209 mddev->safemode = 0; 4402 mddev->safemode = 0;
4210 mddev->safemode_timer.function = md_safemode_timeout; 4403 mddev->safemode_timer.function = md_safemode_timeout;
4211 mddev->safemode_timer.data = (unsigned long) mddev; 4404 mddev->safemode_timer.data = (unsigned long) mddev;
@@ -4228,33 +4421,6 @@ static int do_md_run(mddev_t * mddev)
4228 4421
4229 set_capacity(disk, mddev->array_sectors); 4422 set_capacity(disk, mddev->array_sectors);
4230 4423
4231 /* If there is a partially-recovered drive we need to
4232 * start recovery here. If we leave it to md_check_recovery,
4233 * it will remove the drives and not do the right thing
4234 */
4235 if (mddev->degraded && !mddev->sync_thread) {
4236 int spares = 0;
4237 list_for_each_entry(rdev, &mddev->disks, same_set)
4238 if (rdev->raid_disk >= 0 &&
4239 !test_bit(In_sync, &rdev->flags) &&
4240 !test_bit(Faulty, &rdev->flags))
4241 /* complete an interrupted recovery */
4242 spares++;
4243 if (spares && mddev->pers->sync_request) {
4244 mddev->recovery = 0;
4245 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4246 mddev->sync_thread = md_register_thread(md_do_sync,
4247 mddev,
4248 "resync");
4249 if (!mddev->sync_thread) {
4250 printk(KERN_ERR "%s: could not start resync"
4251 " thread...\n",
4252 mdname(mddev));
4253 /* leave the spares where they are, it shouldn't hurt */
4254 mddev->recovery = 0;
4255 }
4256 }
4257 }
4258 md_wakeup_thread(mddev->thread); 4424 md_wakeup_thread(mddev->thread);
4259 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4425 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4260 4426
@@ -4310,7 +4476,7 @@ static int deny_bitmap_write_access(struct file * file)
4310 return 0; 4476 return 0;
4311} 4477}
4312 4478
4313static void restore_bitmap_write_access(struct file *file) 4479void restore_bitmap_write_access(struct file *file)
4314{ 4480{
4315 struct inode *inode = file->f_mapping->host; 4481 struct inode *inode = file->f_mapping->host;
4316 4482
@@ -4405,12 +4571,12 @@ out:
4405 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4571 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4406 4572
4407 bitmap_destroy(mddev); 4573 bitmap_destroy(mddev);
4408 if (mddev->bitmap_file) { 4574 if (mddev->bitmap_info.file) {
4409 restore_bitmap_write_access(mddev->bitmap_file); 4575 restore_bitmap_write_access(mddev->bitmap_info.file);
4410 fput(mddev->bitmap_file); 4576 fput(mddev->bitmap_info.file);
4411 mddev->bitmap_file = NULL; 4577 mddev->bitmap_info.file = NULL;
4412 } 4578 }
4413 mddev->bitmap_offset = 0; 4579 mddev->bitmap_info.offset = 0;
4414 4580
4415 /* make sure all md_delayed_delete calls have finished */ 4581 /* make sure all md_delayed_delete calls have finished */
4416 flush_scheduled_work(); 4582 flush_scheduled_work();
@@ -4451,6 +4617,11 @@ out:
4451 mddev->degraded = 0; 4617 mddev->degraded = 0;
4452 mddev->barriers_work = 0; 4618 mddev->barriers_work = 0;
4453 mddev->safemode = 0; 4619 mddev->safemode = 0;
4620 mddev->bitmap_info.offset = 0;
4621 mddev->bitmap_info.default_offset = 0;
4622 mddev->bitmap_info.chunksize = 0;
4623 mddev->bitmap_info.daemon_sleep = 0;
4624 mddev->bitmap_info.max_write_behind = 0;
4454 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4625 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4455 if (mddev->hold_active == UNTIL_STOP) 4626 if (mddev->hold_active == UNTIL_STOP)
4456 mddev->hold_active = 0; 4627 mddev->hold_active = 0;
@@ -4636,7 +4807,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4636 info.state = 0; 4807 info.state = 0;
4637 if (mddev->in_sync) 4808 if (mddev->in_sync)
4638 info.state = (1<<MD_SB_CLEAN); 4809 info.state = (1<<MD_SB_CLEAN);
4639 if (mddev->bitmap && mddev->bitmap_offset) 4810 if (mddev->bitmap && mddev->bitmap_info.offset)
4640 info.state = (1<<MD_SB_BITMAP_PRESENT); 4811 info.state = (1<<MD_SB_BITMAP_PRESENT);
4641 info.active_disks = insync; 4812 info.active_disks = insync;
4642 info.working_disks = working; 4813 info.working_disks = working;
@@ -4994,23 +5165,23 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
4994 if (fd >= 0) { 5165 if (fd >= 0) {
4995 if (mddev->bitmap) 5166 if (mddev->bitmap)
4996 return -EEXIST; /* cannot add when bitmap is present */ 5167 return -EEXIST; /* cannot add when bitmap is present */
4997 mddev->bitmap_file = fget(fd); 5168 mddev->bitmap_info.file = fget(fd);
4998 5169
4999 if (mddev->bitmap_file == NULL) { 5170 if (mddev->bitmap_info.file == NULL) {
5000 printk(KERN_ERR "%s: error: failed to get bitmap file\n", 5171 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5001 mdname(mddev)); 5172 mdname(mddev));
5002 return -EBADF; 5173 return -EBADF;
5003 } 5174 }
5004 5175
5005 err = deny_bitmap_write_access(mddev->bitmap_file); 5176 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5006 if (err) { 5177 if (err) {
5007 printk(KERN_ERR "%s: error: bitmap file is already in use\n", 5178 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5008 mdname(mddev)); 5179 mdname(mddev));
5009 fput(mddev->bitmap_file); 5180 fput(mddev->bitmap_info.file);
5010 mddev->bitmap_file = NULL; 5181 mddev->bitmap_info.file = NULL;
5011 return err; 5182 return err;
5012 } 5183 }
5013 mddev->bitmap_offset = 0; /* file overrides offset */ 5184 mddev->bitmap_info.offset = 0; /* file overrides offset */
5014 } else if (mddev->bitmap == NULL) 5185 } else if (mddev->bitmap == NULL)
5015 return -ENOENT; /* cannot remove what isn't there */ 5186 return -ENOENT; /* cannot remove what isn't there */
5016 err = 0; 5187 err = 0;
@@ -5025,11 +5196,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
5025 mddev->pers->quiesce(mddev, 0); 5196 mddev->pers->quiesce(mddev, 0);
5026 } 5197 }
5027 if (fd < 0) { 5198 if (fd < 0) {
5028 if (mddev->bitmap_file) { 5199 if (mddev->bitmap_info.file) {
5029 restore_bitmap_write_access(mddev->bitmap_file); 5200 restore_bitmap_write_access(mddev->bitmap_info.file);
5030 fput(mddev->bitmap_file); 5201 fput(mddev->bitmap_info.file);
5031 } 5202 }
5032 mddev->bitmap_file = NULL; 5203 mddev->bitmap_info.file = NULL;
5033 } 5204 }
5034 5205
5035 return err; 5206 return err;
@@ -5066,6 +5237,10 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5066 mddev->minor_version = info->minor_version; 5237 mddev->minor_version = info->minor_version;
5067 mddev->patch_version = info->patch_version; 5238 mddev->patch_version = info->patch_version;
5068 mddev->persistent = !info->not_persistent; 5239 mddev->persistent = !info->not_persistent;
5240 /* ensure mddev_put doesn't delete this now that there
5241 * is some minimal configuration.
5242 */
5243 mddev->ctime = get_seconds();
5069 return 0; 5244 return 0;
5070 } 5245 }
5071 mddev->major_version = MD_MAJOR_VERSION; 5246 mddev->major_version = MD_MAJOR_VERSION;
@@ -5096,8 +5271,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5096 mddev->flags = 0; 5271 mddev->flags = 0;
5097 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5272 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5098 5273
5099 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 5274 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
5100 mddev->bitmap_offset = 0; 5275 mddev->bitmap_info.offset = 0;
5101 5276
5102 mddev->reshape_position = MaxSector; 5277 mddev->reshape_position = MaxSector;
5103 5278
@@ -5197,7 +5372,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5197 int state = 0; 5372 int state = 0;
5198 5373
5199 /* calculate expected state,ignoring low bits */ 5374 /* calculate expected state,ignoring low bits */
5200 if (mddev->bitmap && mddev->bitmap_offset) 5375 if (mddev->bitmap && mddev->bitmap_info.offset)
5201 state |= (1 << MD_SB_BITMAP_PRESENT); 5376 state |= (1 << MD_SB_BITMAP_PRESENT);
5202 5377
5203 if (mddev->major_version != info->major_version || 5378 if (mddev->major_version != info->major_version ||
@@ -5256,9 +5431,10 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5256 /* add the bitmap */ 5431 /* add the bitmap */
5257 if (mddev->bitmap) 5432 if (mddev->bitmap)
5258 return -EEXIST; 5433 return -EEXIST;
5259 if (mddev->default_bitmap_offset == 0) 5434 if (mddev->bitmap_info.default_offset == 0)
5260 return -EINVAL; 5435 return -EINVAL;
5261 mddev->bitmap_offset = mddev->default_bitmap_offset; 5436 mddev->bitmap_info.offset =
5437 mddev->bitmap_info.default_offset;
5262 mddev->pers->quiesce(mddev, 1); 5438 mddev->pers->quiesce(mddev, 1);
5263 rv = bitmap_create(mddev); 5439 rv = bitmap_create(mddev);
5264 if (rv) 5440 if (rv)
@@ -5273,7 +5449,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5273 mddev->pers->quiesce(mddev, 1); 5449 mddev->pers->quiesce(mddev, 1);
5274 bitmap_destroy(mddev); 5450 bitmap_destroy(mddev);
5275 mddev->pers->quiesce(mddev, 0); 5451 mddev->pers->quiesce(mddev, 0);
5276 mddev->bitmap_offset = 0; 5452 mddev->bitmap_info.offset = 0;
5277 } 5453 }
5278 } 5454 }
5279 md_update_sb(mddev, 1); 5455 md_update_sb(mddev, 1);
@@ -5524,6 +5700,25 @@ done:
5524abort: 5700abort:
5525 return err; 5701 return err;
5526} 5702}
5703#ifdef CONFIG_COMPAT
5704static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
5705 unsigned int cmd, unsigned long arg)
5706{
5707 switch (cmd) {
5708 case HOT_REMOVE_DISK:
5709 case HOT_ADD_DISK:
5710 case SET_DISK_FAULTY:
5711 case SET_BITMAP_FILE:
5712 /* These take in integer arg, do not convert */
5713 break;
5714 default:
5715 arg = (unsigned long)compat_ptr(arg);
5716 break;
5717 }
5718
5719 return md_ioctl(bdev, mode, cmd, arg);
5720}
5721#endif /* CONFIG_COMPAT */
5527 5722
5528static int md_open(struct block_device *bdev, fmode_t mode) 5723static int md_open(struct block_device *bdev, fmode_t mode)
5529{ 5724{
@@ -5589,6 +5784,9 @@ static const struct block_device_operations md_fops =
5589 .open = md_open, 5784 .open = md_open,
5590 .release = md_release, 5785 .release = md_release,
5591 .ioctl = md_ioctl, 5786 .ioctl = md_ioctl,
5787#ifdef CONFIG_COMPAT
5788 .compat_ioctl = md_compat_ioctl,
5789#endif
5592 .getgeo = md_getgeo, 5790 .getgeo = md_getgeo,
5593 .media_changed = md_media_changed, 5791 .media_changed = md_media_changed,
5594 .revalidate_disk= md_revalidate, 5792 .revalidate_disk= md_revalidate,
@@ -5982,14 +6180,14 @@ static int md_seq_show(struct seq_file *seq, void *v)
5982 unsigned long chunk_kb; 6180 unsigned long chunk_kb;
5983 unsigned long flags; 6181 unsigned long flags;
5984 spin_lock_irqsave(&bitmap->lock, flags); 6182 spin_lock_irqsave(&bitmap->lock, flags);
5985 chunk_kb = bitmap->chunksize >> 10; 6183 chunk_kb = mddev->bitmap_info.chunksize >> 10;
5986 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " 6184 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5987 "%lu%s chunk", 6185 "%lu%s chunk",
5988 bitmap->pages - bitmap->missing_pages, 6186 bitmap->pages - bitmap->missing_pages,
5989 bitmap->pages, 6187 bitmap->pages,
5990 (bitmap->pages - bitmap->missing_pages) 6188 (bitmap->pages - bitmap->missing_pages)
5991 << (PAGE_SHIFT - 10), 6189 << (PAGE_SHIFT - 10),
5992 chunk_kb ? chunk_kb : bitmap->chunksize, 6190 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
5993 chunk_kb ? "KB" : "B"); 6191 chunk_kb ? "KB" : "B");
5994 if (bitmap->file) { 6192 if (bitmap->file) {
5995 seq_printf(seq, ", file: "); 6193 seq_printf(seq, ", file: ");
@@ -6275,10 +6473,11 @@ void md_do_sync(mddev_t *mddev)
6275 mddev->curr_resync = 2; 6473 mddev->curr_resync = 2;
6276 6474
6277 try_again: 6475 try_again:
6278 if (kthread_should_stop()) { 6476 if (kthread_should_stop())
6279 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6477 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6478
6479 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6280 goto skip; 6480 goto skip;
6281 }
6282 for_each_mddev(mddev2, tmp) { 6481 for_each_mddev(mddev2, tmp) {
6283 if (mddev2 == mddev) 6482 if (mddev2 == mddev)
6284 continue; 6483 continue;
@@ -6338,12 +6537,14 @@ void md_do_sync(mddev_t *mddev)
6338 /* recovery follows the physical size of devices */ 6537 /* recovery follows the physical size of devices */
6339 max_sectors = mddev->dev_sectors; 6538 max_sectors = mddev->dev_sectors;
6340 j = MaxSector; 6539 j = MaxSector;
6341 list_for_each_entry(rdev, &mddev->disks, same_set) 6540 rcu_read_lock();
6541 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
6342 if (rdev->raid_disk >= 0 && 6542 if (rdev->raid_disk >= 0 &&
6343 !test_bit(Faulty, &rdev->flags) && 6543 !test_bit(Faulty, &rdev->flags) &&
6344 !test_bit(In_sync, &rdev->flags) && 6544 !test_bit(In_sync, &rdev->flags) &&
6345 rdev->recovery_offset < j) 6545 rdev->recovery_offset < j)
6346 j = rdev->recovery_offset; 6546 j = rdev->recovery_offset;
6547 rcu_read_unlock();
6347 } 6548 }
6348 6549
6349 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); 6550 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
@@ -6380,6 +6581,7 @@ void md_do_sync(mddev_t *mddev)
6380 desc, mdname(mddev)); 6581 desc, mdname(mddev));
6381 mddev->curr_resync = j; 6582 mddev->curr_resync = j;
6382 } 6583 }
6584 mddev->curr_resync_completed = mddev->curr_resync;
6383 6585
6384 while (j < max_sectors) { 6586 while (j < max_sectors) {
6385 sector_t sectors; 6587 sector_t sectors;
@@ -6512,22 +6714,29 @@ void md_do_sync(mddev_t *mddev)
6512 } else { 6714 } else {
6513 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 6715 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6514 mddev->curr_resync = MaxSector; 6716 mddev->curr_resync = MaxSector;
6515 list_for_each_entry(rdev, &mddev->disks, same_set) 6717 rcu_read_lock();
6718 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
6516 if (rdev->raid_disk >= 0 && 6719 if (rdev->raid_disk >= 0 &&
6517 !test_bit(Faulty, &rdev->flags) && 6720 !test_bit(Faulty, &rdev->flags) &&
6518 !test_bit(In_sync, &rdev->flags) && 6721 !test_bit(In_sync, &rdev->flags) &&
6519 rdev->recovery_offset < mddev->curr_resync) 6722 rdev->recovery_offset < mddev->curr_resync)
6520 rdev->recovery_offset = mddev->curr_resync; 6723 rdev->recovery_offset = mddev->curr_resync;
6724 rcu_read_unlock();
6521 } 6725 }
6522 } 6726 }
6523 set_bit(MD_CHANGE_DEVS, &mddev->flags); 6727 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6524 6728
6525 skip: 6729 skip:
6730 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
6731 /* We completed so min/max setting can be forgotten if used. */
6732 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6733 mddev->resync_min = 0;
6734 mddev->resync_max = MaxSector;
6735 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6736 mddev->resync_min = mddev->curr_resync_completed;
6526 mddev->curr_resync = 0; 6737 mddev->curr_resync = 0;
6527 mddev->curr_resync_completed = 0;
6528 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 6738 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6529 /* We completed so max setting can be forgotten. */ 6739 mddev->curr_resync_completed = 0;
6530 mddev->resync_max = MaxSector;
6531 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6740 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6532 wake_up(&resync_wait); 6741 wake_up(&resync_wait);
6533 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 6742 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
@@ -6590,6 +6799,7 @@ static int remove_and_add_spares(mddev_t *mddev)
6590 nm, mdname(mddev)); 6799 nm, mdname(mddev));
6591 spares++; 6800 spares++;
6592 md_new_event(mddev); 6801 md_new_event(mddev);
6802 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6593 } else 6803 } else
6594 break; 6804 break;
6595 } 6805 }
@@ -6625,7 +6835,7 @@ void md_check_recovery(mddev_t *mddev)
6625 6835
6626 6836
6627 if (mddev->bitmap) 6837 if (mddev->bitmap)
6628 bitmap_daemon_work(mddev->bitmap); 6838 bitmap_daemon_work(mddev);
6629 6839
6630 if (mddev->ro) 6840 if (mddev->ro)
6631 return; 6841 return;
@@ -6995,5 +7205,6 @@ EXPORT_SYMBOL(md_unregister_thread);
6995EXPORT_SYMBOL(md_wakeup_thread); 7205EXPORT_SYMBOL(md_wakeup_thread);
6996EXPORT_SYMBOL(md_check_recovery); 7206EXPORT_SYMBOL(md_check_recovery);
6997MODULE_LICENSE("GPL"); 7207MODULE_LICENSE("GPL");
7208MODULE_DESCRIPTION("MD RAID framework");
6998MODULE_ALIAS("md"); 7209MODULE_ALIAS("md");
6999MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR); 7210MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);