diff options
author | David S. Miller <davem@davemloft.net> | 2010-01-23 01:45:46 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-01-23 01:45:46 -0500 |
commit | 6be325719b3e54624397e413efd4b33a997e55a3 (patch) | |
tree | 57f321a56794cab2222e179b16731e0d76a4a68a /drivers/md/md.c | |
parent | 26d92f9276a56d55511a427fb70bd70886af647a (diff) | |
parent | 92dcffb916d309aa01778bf8963a6932e4014d07 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 455 |
1 files changed, 333 insertions, 122 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5f154ef1e4be..dd3dfe42d5a9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -39,11 +39,13 @@ | |||
39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
40 | #include <linux/poll.h> | 40 | #include <linux/poll.h> |
41 | #include <linux/ctype.h> | 41 | #include <linux/ctype.h> |
42 | #include <linux/string.h> | ||
42 | #include <linux/hdreg.h> | 43 | #include <linux/hdreg.h> |
43 | #include <linux/proc_fs.h> | 44 | #include <linux/proc_fs.h> |
44 | #include <linux/random.h> | 45 | #include <linux/random.h> |
45 | #include <linux/reboot.h> | 46 | #include <linux/reboot.h> |
46 | #include <linux/file.h> | 47 | #include <linux/file.h> |
48 | #include <linux/compat.h> | ||
47 | #include <linux/delay.h> | 49 | #include <linux/delay.h> |
48 | #include <linux/raid/md_p.h> | 50 | #include <linux/raid/md_p.h> |
49 | #include <linux/raid/md_u.h> | 51 | #include <linux/raid/md_u.h> |
@@ -68,6 +70,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
68 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 70 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
69 | 71 | ||
70 | /* | 72 | /* |
73 | * Default number of read corrections we'll attempt on an rdev | ||
74 | * before ejecting it from the array. We divide the read error | ||
75 | * count by 2 for every hour elapsed between read errors. | ||
76 | */ | ||
77 | #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 | ||
78 | /* | ||
71 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 79 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
72 | * is 1000 KB/sec, so the extra system load does not show up that much. | 80 | * is 1000 KB/sec, so the extra system load does not show up that much. |
73 | * Increase it if you want to have more _guaranteed_ speed. Note that | 81 | * Increase it if you want to have more _guaranteed_ speed. Note that |
@@ -213,12 +221,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
213 | return 0; | 221 | return 0; |
214 | } | 222 | } |
215 | rcu_read_lock(); | 223 | rcu_read_lock(); |
216 | if (mddev->suspended) { | 224 | if (mddev->suspended || mddev->barrier) { |
217 | DEFINE_WAIT(__wait); | 225 | DEFINE_WAIT(__wait); |
218 | for (;;) { | 226 | for (;;) { |
219 | prepare_to_wait(&mddev->sb_wait, &__wait, | 227 | prepare_to_wait(&mddev->sb_wait, &__wait, |
220 | TASK_UNINTERRUPTIBLE); | 228 | TASK_UNINTERRUPTIBLE); |
221 | if (!mddev->suspended) | 229 | if (!mddev->suspended && !mddev->barrier) |
222 | break; | 230 | break; |
223 | rcu_read_unlock(); | 231 | rcu_read_unlock(); |
224 | schedule(); | 232 | schedule(); |
@@ -260,10 +268,110 @@ static void mddev_resume(mddev_t *mddev) | |||
260 | 268 | ||
261 | int mddev_congested(mddev_t *mddev, int bits) | 269 | int mddev_congested(mddev_t *mddev, int bits) |
262 | { | 270 | { |
271 | if (mddev->barrier) | ||
272 | return 1; | ||
263 | return mddev->suspended; | 273 | return mddev->suspended; |
264 | } | 274 | } |
265 | EXPORT_SYMBOL(mddev_congested); | 275 | EXPORT_SYMBOL(mddev_congested); |
266 | 276 | ||
277 | /* | ||
278 | * Generic barrier handling for md | ||
279 | */ | ||
280 | |||
281 | #define POST_REQUEST_BARRIER ((void*)1) | ||
282 | |||
283 | static void md_end_barrier(struct bio *bio, int err) | ||
284 | { | ||
285 | mdk_rdev_t *rdev = bio->bi_private; | ||
286 | mddev_t *mddev = rdev->mddev; | ||
287 | if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) | ||
288 | set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); | ||
289 | |||
290 | rdev_dec_pending(rdev, mddev); | ||
291 | |||
292 | if (atomic_dec_and_test(&mddev->flush_pending)) { | ||
293 | if (mddev->barrier == POST_REQUEST_BARRIER) { | ||
294 | /* This was a post-request barrier */ | ||
295 | mddev->barrier = NULL; | ||
296 | wake_up(&mddev->sb_wait); | ||
297 | } else | ||
298 | /* The pre-request barrier has finished */ | ||
299 | schedule_work(&mddev->barrier_work); | ||
300 | } | ||
301 | bio_put(bio); | ||
302 | } | ||
303 | |||
304 | static void submit_barriers(mddev_t *mddev) | ||
305 | { | ||
306 | mdk_rdev_t *rdev; | ||
307 | |||
308 | rcu_read_lock(); | ||
309 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | ||
310 | if (rdev->raid_disk >= 0 && | ||
311 | !test_bit(Faulty, &rdev->flags)) { | ||
312 | /* Take two references, one is dropped | ||
313 | * when request finishes, one after | ||
314 | * we reclaim rcu_read_lock | ||
315 | */ | ||
316 | struct bio *bi; | ||
317 | atomic_inc(&rdev->nr_pending); | ||
318 | atomic_inc(&rdev->nr_pending); | ||
319 | rcu_read_unlock(); | ||
320 | bi = bio_alloc(GFP_KERNEL, 0); | ||
321 | bi->bi_end_io = md_end_barrier; | ||
322 | bi->bi_private = rdev; | ||
323 | bi->bi_bdev = rdev->bdev; | ||
324 | atomic_inc(&mddev->flush_pending); | ||
325 | submit_bio(WRITE_BARRIER, bi); | ||
326 | rcu_read_lock(); | ||
327 | rdev_dec_pending(rdev, mddev); | ||
328 | } | ||
329 | rcu_read_unlock(); | ||
330 | } | ||
331 | |||
332 | static void md_submit_barrier(struct work_struct *ws) | ||
333 | { | ||
334 | mddev_t *mddev = container_of(ws, mddev_t, barrier_work); | ||
335 | struct bio *bio = mddev->barrier; | ||
336 | |||
337 | atomic_set(&mddev->flush_pending, 1); | ||
338 | |||
339 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) | ||
340 | bio_endio(bio, -EOPNOTSUPP); | ||
341 | else if (bio->bi_size == 0) | ||
342 | /* an empty barrier - all done */ | ||
343 | bio_endio(bio, 0); | ||
344 | else { | ||
345 | bio->bi_rw &= ~(1<<BIO_RW_BARRIER); | ||
346 | if (mddev->pers->make_request(mddev->queue, bio)) | ||
347 | generic_make_request(bio); | ||
348 | mddev->barrier = POST_REQUEST_BARRIER; | ||
349 | submit_barriers(mddev); | ||
350 | } | ||
351 | if (atomic_dec_and_test(&mddev->flush_pending)) { | ||
352 | mddev->barrier = NULL; | ||
353 | wake_up(&mddev->sb_wait); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | void md_barrier_request(mddev_t *mddev, struct bio *bio) | ||
358 | { | ||
359 | spin_lock_irq(&mddev->write_lock); | ||
360 | wait_event_lock_irq(mddev->sb_wait, | ||
361 | !mddev->barrier, | ||
362 | mddev->write_lock, /*nothing*/); | ||
363 | mddev->barrier = bio; | ||
364 | spin_unlock_irq(&mddev->write_lock); | ||
365 | |||
366 | atomic_set(&mddev->flush_pending, 1); | ||
367 | INIT_WORK(&mddev->barrier_work, md_submit_barrier); | ||
368 | |||
369 | submit_barriers(mddev); | ||
370 | |||
371 | if (atomic_dec_and_test(&mddev->flush_pending)) | ||
372 | schedule_work(&mddev->barrier_work); | ||
373 | } | ||
374 | EXPORT_SYMBOL(md_barrier_request); | ||
267 | 375 | ||
268 | static inline mddev_t *mddev_get(mddev_t *mddev) | 376 | static inline mddev_t *mddev_get(mddev_t *mddev) |
269 | { | 377 | { |
@@ -278,7 +386,9 @@ static void mddev_put(mddev_t *mddev) | |||
278 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) | 386 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) |
279 | return; | 387 | return; |
280 | if (!mddev->raid_disks && list_empty(&mddev->disks) && | 388 | if (!mddev->raid_disks && list_empty(&mddev->disks) && |
281 | !mddev->hold_active) { | 389 | mddev->ctime == 0 && !mddev->hold_active) { |
390 | /* Array is not configured at all, and not held active, | ||
391 | * so destroy it */ | ||
282 | list_del(&mddev->all_mddevs); | 392 | list_del(&mddev->all_mddevs); |
283 | if (mddev->gendisk) { | 393 | if (mddev->gendisk) { |
284 | /* we did a probe so need to clean up. | 394 | /* we did a probe so need to clean up. |
@@ -363,6 +473,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
363 | 473 | ||
364 | mutex_init(&new->open_mutex); | 474 | mutex_init(&new->open_mutex); |
365 | mutex_init(&new->reconfig_mutex); | 475 | mutex_init(&new->reconfig_mutex); |
476 | mutex_init(&new->bitmap_info.mutex); | ||
366 | INIT_LIST_HEAD(&new->disks); | 477 | INIT_LIST_HEAD(&new->disks); |
367 | INIT_LIST_HEAD(&new->all_mddevs); | 478 | INIT_LIST_HEAD(&new->all_mddevs); |
368 | init_timer(&new->safemode_timer); | 479 | init_timer(&new->safemode_timer); |
@@ -370,6 +481,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
370 | atomic_set(&new->openers, 0); | 481 | atomic_set(&new->openers, 0); |
371 | atomic_set(&new->active_io, 0); | 482 | atomic_set(&new->active_io, 0); |
372 | spin_lock_init(&new->write_lock); | 483 | spin_lock_init(&new->write_lock); |
484 | atomic_set(&new->flush_pending, 0); | ||
373 | init_waitqueue_head(&new->sb_wait); | 485 | init_waitqueue_head(&new->sb_wait); |
374 | init_waitqueue_head(&new->recovery_wait); | 486 | init_waitqueue_head(&new->recovery_wait); |
375 | new->reshape_position = MaxSector; | 487 | new->reshape_position = MaxSector; |
@@ -748,7 +860,7 @@ struct super_type { | |||
748 | */ | 860 | */ |
749 | int md_check_no_bitmap(mddev_t *mddev) | 861 | int md_check_no_bitmap(mddev_t *mddev) |
750 | { | 862 | { |
751 | if (!mddev->bitmap_file && !mddev->bitmap_offset) | 863 | if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset) |
752 | return 0; | 864 | return 0; |
753 | printk(KERN_ERR "%s: bitmaps are not supported for %s\n", | 865 | printk(KERN_ERR "%s: bitmaps are not supported for %s\n", |
754 | mdname(mddev), mddev->pers->name); | 866 | mdname(mddev), mddev->pers->name); |
@@ -876,8 +988,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
876 | mddev->raid_disks = sb->raid_disks; | 988 | mddev->raid_disks = sb->raid_disks; |
877 | mddev->dev_sectors = sb->size * 2; | 989 | mddev->dev_sectors = sb->size * 2; |
878 | mddev->events = ev1; | 990 | mddev->events = ev1; |
879 | mddev->bitmap_offset = 0; | 991 | mddev->bitmap_info.offset = 0; |
880 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 992 | mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; |
881 | 993 | ||
882 | if (mddev->minor_version >= 91) { | 994 | if (mddev->minor_version >= 91) { |
883 | mddev->reshape_position = sb->reshape_position; | 995 | mddev->reshape_position = sb->reshape_position; |
@@ -911,8 +1023,9 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
911 | mddev->max_disks = MD_SB_DISKS; | 1023 | mddev->max_disks = MD_SB_DISKS; |
912 | 1024 | ||
913 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 1025 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
914 | mddev->bitmap_file == NULL) | 1026 | mddev->bitmap_info.file == NULL) |
915 | mddev->bitmap_offset = mddev->default_bitmap_offset; | 1027 | mddev->bitmap_info.offset = |
1028 | mddev->bitmap_info.default_offset; | ||
916 | 1029 | ||
917 | } else if (mddev->pers == NULL) { | 1030 | } else if (mddev->pers == NULL) { |
918 | /* Insist on good event counter while assembling */ | 1031 | /* Insist on good event counter while assembling */ |
@@ -1029,7 +1142,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1029 | sb->layout = mddev->layout; | 1142 | sb->layout = mddev->layout; |
1030 | sb->chunk_size = mddev->chunk_sectors << 9; | 1143 | sb->chunk_size = mddev->chunk_sectors << 9; |
1031 | 1144 | ||
1032 | if (mddev->bitmap && mddev->bitmap_file == NULL) | 1145 | if (mddev->bitmap && mddev->bitmap_info.file == NULL) |
1033 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 1146 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
1034 | 1147 | ||
1035 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 1148 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
@@ -1107,7 +1220,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1107 | { | 1220 | { |
1108 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) | 1221 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1109 | return 0; /* component must fit device */ | 1222 | return 0; /* component must fit device */ |
1110 | if (rdev->mddev->bitmap_offset) | 1223 | if (rdev->mddev->bitmap_info.offset) |
1111 | return 0; /* can't move bitmap */ | 1224 | return 0; /* can't move bitmap */ |
1112 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); | 1225 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
1113 | if (!num_sectors || num_sectors > rdev->sb_start) | 1226 | if (!num_sectors || num_sectors > rdev->sb_start) |
@@ -1286,8 +1399,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1286 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); | 1399 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); |
1287 | mddev->dev_sectors = le64_to_cpu(sb->size); | 1400 | mddev->dev_sectors = le64_to_cpu(sb->size); |
1288 | mddev->events = ev1; | 1401 | mddev->events = ev1; |
1289 | mddev->bitmap_offset = 0; | 1402 | mddev->bitmap_info.offset = 0; |
1290 | mddev->default_bitmap_offset = 1024 >> 9; | 1403 | mddev->bitmap_info.default_offset = 1024 >> 9; |
1291 | 1404 | ||
1292 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); | 1405 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); |
1293 | memcpy(mddev->uuid, sb->set_uuid, 16); | 1406 | memcpy(mddev->uuid, sb->set_uuid, 16); |
@@ -1295,8 +1408,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1295 | mddev->max_disks = (4096-256)/2; | 1408 | mddev->max_disks = (4096-256)/2; |
1296 | 1409 | ||
1297 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && | 1410 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && |
1298 | mddev->bitmap_file == NULL ) | 1411 | mddev->bitmap_info.file == NULL ) |
1299 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | 1412 | mddev->bitmap_info.offset = |
1413 | (__s32)le32_to_cpu(sb->bitmap_offset); | ||
1300 | 1414 | ||
1301 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { | 1415 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { |
1302 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); | 1416 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); |
@@ -1390,19 +1504,17 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1390 | sb->level = cpu_to_le32(mddev->level); | 1504 | sb->level = cpu_to_le32(mddev->level); |
1391 | sb->layout = cpu_to_le32(mddev->layout); | 1505 | sb->layout = cpu_to_le32(mddev->layout); |
1392 | 1506 | ||
1393 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | 1507 | if (mddev->bitmap && mddev->bitmap_info.file == NULL) { |
1394 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1508 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); |
1395 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1509 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1396 | } | 1510 | } |
1397 | 1511 | ||
1398 | if (rdev->raid_disk >= 0 && | 1512 | if (rdev->raid_disk >= 0 && |
1399 | !test_bit(In_sync, &rdev->flags)) { | 1513 | !test_bit(In_sync, &rdev->flags)) { |
1400 | if (rdev->recovery_offset > 0) { | 1514 | sb->feature_map |= |
1401 | sb->feature_map |= | 1515 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); |
1402 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | 1516 | sb->recovery_offset = |
1403 | sb->recovery_offset = | 1517 | cpu_to_le64(rdev->recovery_offset); |
1404 | cpu_to_le64(rdev->recovery_offset); | ||
1405 | } | ||
1406 | } | 1518 | } |
1407 | 1519 | ||
1408 | if (mddev->reshape_position != MaxSector) { | 1520 | if (mddev->reshape_position != MaxSector) { |
@@ -1436,7 +1548,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1436 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1548 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1437 | else if (test_bit(In_sync, &rdev2->flags)) | 1549 | else if (test_bit(In_sync, &rdev2->flags)) |
1438 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1550 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1439 | else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) | 1551 | else if (rdev2->raid_disk >= 0) |
1440 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1552 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1441 | else | 1553 | else |
1442 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1554 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
@@ -1458,7 +1570,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1458 | max_sectors -= rdev->data_offset; | 1570 | max_sectors -= rdev->data_offset; |
1459 | if (!num_sectors || num_sectors > max_sectors) | 1571 | if (!num_sectors || num_sectors > max_sectors) |
1460 | num_sectors = max_sectors; | 1572 | num_sectors = max_sectors; |
1461 | } else if (rdev->mddev->bitmap_offset) { | 1573 | } else if (rdev->mddev->bitmap_info.offset) { |
1462 | /* minor version 0 with bitmap we can't move */ | 1574 | /* minor version 0 with bitmap we can't move */ |
1463 | return 0; | 1575 | return 0; |
1464 | } else { | 1576 | } else { |
@@ -1826,15 +1938,11 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1826 | 1938 | ||
1827 | uuid = sb->set_uuid; | 1939 | uuid = sb->set_uuid; |
1828 | printk(KERN_INFO | 1940 | printk(KERN_INFO |
1829 | "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" | 1941 | "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n" |
1830 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" | ||
1831 | "md: Name: \"%s\" CT:%llu\n", | 1942 | "md: Name: \"%s\" CT:%llu\n", |
1832 | le32_to_cpu(sb->major_version), | 1943 | le32_to_cpu(sb->major_version), |
1833 | le32_to_cpu(sb->feature_map), | 1944 | le32_to_cpu(sb->feature_map), |
1834 | uuid[0], uuid[1], uuid[2], uuid[3], | 1945 | uuid, |
1835 | uuid[4], uuid[5], uuid[6], uuid[7], | ||
1836 | uuid[8], uuid[9], uuid[10], uuid[11], | ||
1837 | uuid[12], uuid[13], uuid[14], uuid[15], | ||
1838 | sb->set_name, | 1946 | sb->set_name, |
1839 | (unsigned long long)le64_to_cpu(sb->ctime) | 1947 | (unsigned long long)le64_to_cpu(sb->ctime) |
1840 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); | 1948 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); |
@@ -1843,8 +1951,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1843 | printk(KERN_INFO | 1951 | printk(KERN_INFO |
1844 | "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | 1952 | "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" |
1845 | " RO:%llu\n" | 1953 | " RO:%llu\n" |
1846 | "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" | 1954 | "md: Dev:%08x UUID: %pU\n" |
1847 | ":%02x%02x%02x%02x%02x%02x\n" | ||
1848 | "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" | 1955 | "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" |
1849 | "md: (MaxDev:%u) \n", | 1956 | "md: (MaxDev:%u) \n", |
1850 | le32_to_cpu(sb->level), | 1957 | le32_to_cpu(sb->level), |
@@ -1857,10 +1964,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1857 | (unsigned long long)le64_to_cpu(sb->super_offset), | 1964 | (unsigned long long)le64_to_cpu(sb->super_offset), |
1858 | (unsigned long long)le64_to_cpu(sb->recovery_offset), | 1965 | (unsigned long long)le64_to_cpu(sb->recovery_offset), |
1859 | le32_to_cpu(sb->dev_number), | 1966 | le32_to_cpu(sb->dev_number), |
1860 | uuid[0], uuid[1], uuid[2], uuid[3], | 1967 | uuid, |
1861 | uuid[4], uuid[5], uuid[6], uuid[7], | ||
1862 | uuid[8], uuid[9], uuid[10], uuid[11], | ||
1863 | uuid[12], uuid[13], uuid[14], uuid[15], | ||
1864 | sb->devflags, | 1968 | sb->devflags, |
1865 | (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, | 1969 | (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, |
1866 | (unsigned long long)le64_to_cpu(sb->events), | 1970 | (unsigned long long)le64_to_cpu(sb->events), |
@@ -2442,12 +2546,49 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2442 | static struct rdev_sysfs_entry rdev_size = | 2546 | static struct rdev_sysfs_entry rdev_size = |
2443 | __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); | 2547 | __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); |
2444 | 2548 | ||
2549 | |||
2550 | static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page) | ||
2551 | { | ||
2552 | unsigned long long recovery_start = rdev->recovery_offset; | ||
2553 | |||
2554 | if (test_bit(In_sync, &rdev->flags) || | ||
2555 | recovery_start == MaxSector) | ||
2556 | return sprintf(page, "none\n"); | ||
2557 | |||
2558 | return sprintf(page, "%llu\n", recovery_start); | ||
2559 | } | ||
2560 | |||
2561 | static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len) | ||
2562 | { | ||
2563 | unsigned long long recovery_start; | ||
2564 | |||
2565 | if (cmd_match(buf, "none")) | ||
2566 | recovery_start = MaxSector; | ||
2567 | else if (strict_strtoull(buf, 10, &recovery_start)) | ||
2568 | return -EINVAL; | ||
2569 | |||
2570 | if (rdev->mddev->pers && | ||
2571 | rdev->raid_disk >= 0) | ||
2572 | return -EBUSY; | ||
2573 | |||
2574 | rdev->recovery_offset = recovery_start; | ||
2575 | if (recovery_start == MaxSector) | ||
2576 | set_bit(In_sync, &rdev->flags); | ||
2577 | else | ||
2578 | clear_bit(In_sync, &rdev->flags); | ||
2579 | return len; | ||
2580 | } | ||
2581 | |||
2582 | static struct rdev_sysfs_entry rdev_recovery_start = | ||
2583 | __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); | ||
2584 | |||
2445 | static struct attribute *rdev_default_attrs[] = { | 2585 | static struct attribute *rdev_default_attrs[] = { |
2446 | &rdev_state.attr, | 2586 | &rdev_state.attr, |
2447 | &rdev_errors.attr, | 2587 | &rdev_errors.attr, |
2448 | &rdev_slot.attr, | 2588 | &rdev_slot.attr, |
2449 | &rdev_offset.attr, | 2589 | &rdev_offset.attr, |
2450 | &rdev_size.attr, | 2590 | &rdev_size.attr, |
2591 | &rdev_recovery_start.attr, | ||
2451 | NULL, | 2592 | NULL, |
2452 | }; | 2593 | }; |
2453 | static ssize_t | 2594 | static ssize_t |
@@ -2549,6 +2690,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2549 | rdev->flags = 0; | 2690 | rdev->flags = 0; |
2550 | rdev->data_offset = 0; | 2691 | rdev->data_offset = 0; |
2551 | rdev->sb_events = 0; | 2692 | rdev->sb_events = 0; |
2693 | rdev->last_read_error.tv_sec = 0; | ||
2694 | rdev->last_read_error.tv_nsec = 0; | ||
2552 | atomic_set(&rdev->nr_pending, 0); | 2695 | atomic_set(&rdev->nr_pending, 0); |
2553 | atomic_set(&rdev->read_errors, 0); | 2696 | atomic_set(&rdev->read_errors, 0); |
2554 | atomic_set(&rdev->corrected_errors, 0); | 2697 | atomic_set(&rdev->corrected_errors, 0); |
@@ -2659,6 +2802,47 @@ static void analyze_sbs(mddev_t * mddev) | |||
2659 | } | 2802 | } |
2660 | } | 2803 | } |
2661 | 2804 | ||
2805 | /* Read a fixed-point number. | ||
2806 | * Numbers in sysfs attributes should be in "standard" units where | ||
2807 | * possible, so time should be in seconds. | ||
2808 | * However we internally use a a much smaller unit such as | ||
2809 | * milliseconds or jiffies. | ||
2810 | * This function takes a decimal number with a possible fractional | ||
2811 | * component, and produces an integer which is the result of | ||
2812 | * multiplying that number by 10^'scale'. | ||
2813 | * all without any floating-point arithmetic. | ||
2814 | */ | ||
2815 | int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) | ||
2816 | { | ||
2817 | unsigned long result = 0; | ||
2818 | long decimals = -1; | ||
2819 | while (isdigit(*cp) || (*cp == '.' && decimals < 0)) { | ||
2820 | if (*cp == '.') | ||
2821 | decimals = 0; | ||
2822 | else if (decimals < scale) { | ||
2823 | unsigned int value; | ||
2824 | value = *cp - '0'; | ||
2825 | result = result * 10 + value; | ||
2826 | if (decimals >= 0) | ||
2827 | decimals++; | ||
2828 | } | ||
2829 | cp++; | ||
2830 | } | ||
2831 | if (*cp == '\n') | ||
2832 | cp++; | ||
2833 | if (*cp) | ||
2834 | return -EINVAL; | ||
2835 | if (decimals < 0) | ||
2836 | decimals = 0; | ||
2837 | while (decimals < scale) { | ||
2838 | result *= 10; | ||
2839 | decimals ++; | ||
2840 | } | ||
2841 | *res = result; | ||
2842 | return 0; | ||
2843 | } | ||
2844 | |||
2845 | |||
2662 | static void md_safemode_timeout(unsigned long data); | 2846 | static void md_safemode_timeout(unsigned long data); |
2663 | 2847 | ||
2664 | static ssize_t | 2848 | static ssize_t |
@@ -2670,31 +2854,10 @@ safe_delay_show(mddev_t *mddev, char *page) | |||
2670 | static ssize_t | 2854 | static ssize_t |
2671 | safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | 2855 | safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) |
2672 | { | 2856 | { |
2673 | int scale=1; | ||
2674 | int dot=0; | ||
2675 | int i; | ||
2676 | unsigned long msec; | 2857 | unsigned long msec; |
2677 | char buf[30]; | ||
2678 | 2858 | ||
2679 | /* remove a period, and count digits after it */ | 2859 | if (strict_strtoul_scaled(cbuf, &msec, 3) < 0) |
2680 | if (len >= sizeof(buf)) | ||
2681 | return -EINVAL; | 2860 | return -EINVAL; |
2682 | strlcpy(buf, cbuf, sizeof(buf)); | ||
2683 | for (i=0; i<len; i++) { | ||
2684 | if (dot) { | ||
2685 | if (isdigit(buf[i])) { | ||
2686 | buf[i-1] = buf[i]; | ||
2687 | scale *= 10; | ||
2688 | } | ||
2689 | buf[i] = 0; | ||
2690 | } else if (buf[i] == '.') { | ||
2691 | dot=1; | ||
2692 | buf[i] = 0; | ||
2693 | } | ||
2694 | } | ||
2695 | if (strict_strtoul(buf, 10, &msec) < 0) | ||
2696 | return -EINVAL; | ||
2697 | msec = (msec * 1000) / scale; | ||
2698 | if (msec == 0) | 2861 | if (msec == 0) |
2699 | mddev->safemode_delay = 0; | 2862 | mddev->safemode_delay = 0; |
2700 | else { | 2863 | else { |
@@ -2970,7 +3133,9 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len) | |||
2970 | 3133 | ||
2971 | if (mddev->pers) | 3134 | if (mddev->pers) |
2972 | return -EBUSY; | 3135 | return -EBUSY; |
2973 | if (!*buf || (*e && *e != '\n')) | 3136 | if (cmd_match(buf, "none")) |
3137 | n = MaxSector; | ||
3138 | else if (!*buf || (*e && *e != '\n')) | ||
2974 | return -EINVAL; | 3139 | return -EINVAL; |
2975 | 3140 | ||
2976 | mddev->recovery_cp = n; | 3141 | mddev->recovery_cp = n; |
@@ -3166,6 +3331,29 @@ static struct md_sysfs_entry md_array_state = | |||
3166 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | 3331 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); |
3167 | 3332 | ||
3168 | static ssize_t | 3333 | static ssize_t |
3334 | max_corrected_read_errors_show(mddev_t *mddev, char *page) { | ||
3335 | return sprintf(page, "%d\n", | ||
3336 | atomic_read(&mddev->max_corr_read_errors)); | ||
3337 | } | ||
3338 | |||
3339 | static ssize_t | ||
3340 | max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len) | ||
3341 | { | ||
3342 | char *e; | ||
3343 | unsigned long n = simple_strtoul(buf, &e, 10); | ||
3344 | |||
3345 | if (*buf && (*e == 0 || *e == '\n')) { | ||
3346 | atomic_set(&mddev->max_corr_read_errors, n); | ||
3347 | return len; | ||
3348 | } | ||
3349 | return -EINVAL; | ||
3350 | } | ||
3351 | |||
3352 | static struct md_sysfs_entry max_corr_read_errors = | ||
3353 | __ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show, | ||
3354 | max_corrected_read_errors_store); | ||
3355 | |||
3356 | static ssize_t | ||
3169 | null_show(mddev_t *mddev, char *page) | 3357 | null_show(mddev_t *mddev, char *page) |
3170 | { | 3358 | { |
3171 | return -EINVAL; | 3359 | return -EINVAL; |
@@ -3246,8 +3434,7 @@ bitmap_store(mddev_t *mddev, const char *buf, size_t len) | |||
3246 | } | 3434 | } |
3247 | if (*end && !isspace(*end)) break; | 3435 | if (*end && !isspace(*end)) break; |
3248 | bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); | 3436 | bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); |
3249 | buf = end; | 3437 | buf = skip_spaces(end); |
3250 | while (isspace(*buf)) buf++; | ||
3251 | } | 3438 | } |
3252 | bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ | 3439 | bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ |
3253 | out: | 3440 | out: |
@@ -3790,6 +3977,7 @@ static struct attribute *md_default_attrs[] = { | |||
3790 | &md_array_state.attr, | 3977 | &md_array_state.attr, |
3791 | &md_reshape_position.attr, | 3978 | &md_reshape_position.attr, |
3792 | &md_array_size.attr, | 3979 | &md_array_size.attr, |
3980 | &max_corr_read_errors.attr, | ||
3793 | NULL, | 3981 | NULL, |
3794 | }; | 3982 | }; |
3795 | 3983 | ||
@@ -3894,6 +4082,7 @@ static void mddev_delayed_delete(struct work_struct *ws) | |||
3894 | mddev->sysfs_action = NULL; | 4082 | mddev->sysfs_action = NULL; |
3895 | mddev->private = NULL; | 4083 | mddev->private = NULL; |
3896 | } | 4084 | } |
4085 | sysfs_remove_group(&mddev->kobj, &md_bitmap_group); | ||
3897 | kobject_del(&mddev->kobj); | 4086 | kobject_del(&mddev->kobj); |
3898 | kobject_put(&mddev->kobj); | 4087 | kobject_put(&mddev->kobj); |
3899 | } | 4088 | } |
@@ -3985,6 +4174,8 @@ static int md_alloc(dev_t dev, char *name) | |||
3985 | disk->disk_name); | 4174 | disk->disk_name); |
3986 | error = 0; | 4175 | error = 0; |
3987 | } | 4176 | } |
4177 | if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | ||
4178 | printk(KERN_DEBUG "pointless warning\n"); | ||
3988 | abort: | 4179 | abort: |
3989 | mutex_unlock(&disks_mutex); | 4180 | mutex_unlock(&disks_mutex); |
3990 | if (!error) { | 4181 | if (!error) { |
@@ -4166,7 +4357,7 @@ static int do_md_run(mddev_t * mddev) | |||
4166 | mddev->barriers_work = 1; | 4357 | mddev->barriers_work = 1; |
4167 | mddev->ok_start_degraded = start_dirty_degraded; | 4358 | mddev->ok_start_degraded = start_dirty_degraded; |
4168 | 4359 | ||
4169 | if (start_readonly) | 4360 | if (start_readonly && mddev->ro == 0) |
4170 | mddev->ro = 2; /* read-only, but switch on first write */ | 4361 | mddev->ro = 2; /* read-only, but switch on first write */ |
4171 | 4362 | ||
4172 | err = mddev->pers->run(mddev); | 4363 | err = mddev->pers->run(mddev); |
@@ -4206,6 +4397,8 @@ static int do_md_run(mddev_t * mddev) | |||
4206 | mddev->ro = 0; | 4397 | mddev->ro = 0; |
4207 | 4398 | ||
4208 | atomic_set(&mddev->writes_pending,0); | 4399 | atomic_set(&mddev->writes_pending,0); |
4400 | atomic_set(&mddev->max_corr_read_errors, | ||
4401 | MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); | ||
4209 | mddev->safemode = 0; | 4402 | mddev->safemode = 0; |
4210 | mddev->safemode_timer.function = md_safemode_timeout; | 4403 | mddev->safemode_timer.function = md_safemode_timeout; |
4211 | mddev->safemode_timer.data = (unsigned long) mddev; | 4404 | mddev->safemode_timer.data = (unsigned long) mddev; |
@@ -4228,33 +4421,6 @@ static int do_md_run(mddev_t * mddev) | |||
4228 | 4421 | ||
4229 | set_capacity(disk, mddev->array_sectors); | 4422 | set_capacity(disk, mddev->array_sectors); |
4230 | 4423 | ||
4231 | /* If there is a partially-recovered drive we need to | ||
4232 | * start recovery here. If we leave it to md_check_recovery, | ||
4233 | * it will remove the drives and not do the right thing | ||
4234 | */ | ||
4235 | if (mddev->degraded && !mddev->sync_thread) { | ||
4236 | int spares = 0; | ||
4237 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4238 | if (rdev->raid_disk >= 0 && | ||
4239 | !test_bit(In_sync, &rdev->flags) && | ||
4240 | !test_bit(Faulty, &rdev->flags)) | ||
4241 | /* complete an interrupted recovery */ | ||
4242 | spares++; | ||
4243 | if (spares && mddev->pers->sync_request) { | ||
4244 | mddev->recovery = 0; | ||
4245 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
4246 | mddev->sync_thread = md_register_thread(md_do_sync, | ||
4247 | mddev, | ||
4248 | "resync"); | ||
4249 | if (!mddev->sync_thread) { | ||
4250 | printk(KERN_ERR "%s: could not start resync" | ||
4251 | " thread...\n", | ||
4252 | mdname(mddev)); | ||
4253 | /* leave the spares where they are, it shouldn't hurt */ | ||
4254 | mddev->recovery = 0; | ||
4255 | } | ||
4256 | } | ||
4257 | } | ||
4258 | md_wakeup_thread(mddev->thread); | 4424 | md_wakeup_thread(mddev->thread); |
4259 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4425 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4260 | 4426 | ||
@@ -4310,7 +4476,7 @@ static int deny_bitmap_write_access(struct file * file) | |||
4310 | return 0; | 4476 | return 0; |
4311 | } | 4477 | } |
4312 | 4478 | ||
4313 | static void restore_bitmap_write_access(struct file *file) | 4479 | void restore_bitmap_write_access(struct file *file) |
4314 | { | 4480 | { |
4315 | struct inode *inode = file->f_mapping->host; | 4481 | struct inode *inode = file->f_mapping->host; |
4316 | 4482 | ||
@@ -4405,12 +4571,12 @@ out: | |||
4405 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4571 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
4406 | 4572 | ||
4407 | bitmap_destroy(mddev); | 4573 | bitmap_destroy(mddev); |
4408 | if (mddev->bitmap_file) { | 4574 | if (mddev->bitmap_info.file) { |
4409 | restore_bitmap_write_access(mddev->bitmap_file); | 4575 | restore_bitmap_write_access(mddev->bitmap_info.file); |
4410 | fput(mddev->bitmap_file); | 4576 | fput(mddev->bitmap_info.file); |
4411 | mddev->bitmap_file = NULL; | 4577 | mddev->bitmap_info.file = NULL; |
4412 | } | 4578 | } |
4413 | mddev->bitmap_offset = 0; | 4579 | mddev->bitmap_info.offset = 0; |
4414 | 4580 | ||
4415 | /* make sure all md_delayed_delete calls have finished */ | 4581 | /* make sure all md_delayed_delete calls have finished */ |
4416 | flush_scheduled_work(); | 4582 | flush_scheduled_work(); |
@@ -4451,6 +4617,11 @@ out: | |||
4451 | mddev->degraded = 0; | 4617 | mddev->degraded = 0; |
4452 | mddev->barriers_work = 0; | 4618 | mddev->barriers_work = 0; |
4453 | mddev->safemode = 0; | 4619 | mddev->safemode = 0; |
4620 | mddev->bitmap_info.offset = 0; | ||
4621 | mddev->bitmap_info.default_offset = 0; | ||
4622 | mddev->bitmap_info.chunksize = 0; | ||
4623 | mddev->bitmap_info.daemon_sleep = 0; | ||
4624 | mddev->bitmap_info.max_write_behind = 0; | ||
4454 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4625 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
4455 | if (mddev->hold_active == UNTIL_STOP) | 4626 | if (mddev->hold_active == UNTIL_STOP) |
4456 | mddev->hold_active = 0; | 4627 | mddev->hold_active = 0; |
@@ -4636,7 +4807,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4636 | info.state = 0; | 4807 | info.state = 0; |
4637 | if (mddev->in_sync) | 4808 | if (mddev->in_sync) |
4638 | info.state = (1<<MD_SB_CLEAN); | 4809 | info.state = (1<<MD_SB_CLEAN); |
4639 | if (mddev->bitmap && mddev->bitmap_offset) | 4810 | if (mddev->bitmap && mddev->bitmap_info.offset) |
4640 | info.state = (1<<MD_SB_BITMAP_PRESENT); | 4811 | info.state = (1<<MD_SB_BITMAP_PRESENT); |
4641 | info.active_disks = insync; | 4812 | info.active_disks = insync; |
4642 | info.working_disks = working; | 4813 | info.working_disks = working; |
@@ -4994,23 +5165,23 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
4994 | if (fd >= 0) { | 5165 | if (fd >= 0) { |
4995 | if (mddev->bitmap) | 5166 | if (mddev->bitmap) |
4996 | return -EEXIST; /* cannot add when bitmap is present */ | 5167 | return -EEXIST; /* cannot add when bitmap is present */ |
4997 | mddev->bitmap_file = fget(fd); | 5168 | mddev->bitmap_info.file = fget(fd); |
4998 | 5169 | ||
4999 | if (mddev->bitmap_file == NULL) { | 5170 | if (mddev->bitmap_info.file == NULL) { |
5000 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | 5171 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", |
5001 | mdname(mddev)); | 5172 | mdname(mddev)); |
5002 | return -EBADF; | 5173 | return -EBADF; |
5003 | } | 5174 | } |
5004 | 5175 | ||
5005 | err = deny_bitmap_write_access(mddev->bitmap_file); | 5176 | err = deny_bitmap_write_access(mddev->bitmap_info.file); |
5006 | if (err) { | 5177 | if (err) { |
5007 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | 5178 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", |
5008 | mdname(mddev)); | 5179 | mdname(mddev)); |
5009 | fput(mddev->bitmap_file); | 5180 | fput(mddev->bitmap_info.file); |
5010 | mddev->bitmap_file = NULL; | 5181 | mddev->bitmap_info.file = NULL; |
5011 | return err; | 5182 | return err; |
5012 | } | 5183 | } |
5013 | mddev->bitmap_offset = 0; /* file overrides offset */ | 5184 | mddev->bitmap_info.offset = 0; /* file overrides offset */ |
5014 | } else if (mddev->bitmap == NULL) | 5185 | } else if (mddev->bitmap == NULL) |
5015 | return -ENOENT; /* cannot remove what isn't there */ | 5186 | return -ENOENT; /* cannot remove what isn't there */ |
5016 | err = 0; | 5187 | err = 0; |
@@ -5025,11 +5196,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
5025 | mddev->pers->quiesce(mddev, 0); | 5196 | mddev->pers->quiesce(mddev, 0); |
5026 | } | 5197 | } |
5027 | if (fd < 0) { | 5198 | if (fd < 0) { |
5028 | if (mddev->bitmap_file) { | 5199 | if (mddev->bitmap_info.file) { |
5029 | restore_bitmap_write_access(mddev->bitmap_file); | 5200 | restore_bitmap_write_access(mddev->bitmap_info.file); |
5030 | fput(mddev->bitmap_file); | 5201 | fput(mddev->bitmap_info.file); |
5031 | } | 5202 | } |
5032 | mddev->bitmap_file = NULL; | 5203 | mddev->bitmap_info.file = NULL; |
5033 | } | 5204 | } |
5034 | 5205 | ||
5035 | return err; | 5206 | return err; |
@@ -5066,6 +5237,10 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
5066 | mddev->minor_version = info->minor_version; | 5237 | mddev->minor_version = info->minor_version; |
5067 | mddev->patch_version = info->patch_version; | 5238 | mddev->patch_version = info->patch_version; |
5068 | mddev->persistent = !info->not_persistent; | 5239 | mddev->persistent = !info->not_persistent; |
5240 | /* ensure mddev_put doesn't delete this now that there | ||
5241 | * is some minimal configuration. | ||
5242 | */ | ||
5243 | mddev->ctime = get_seconds(); | ||
5069 | return 0; | 5244 | return 0; |
5070 | } | 5245 | } |
5071 | mddev->major_version = MD_MAJOR_VERSION; | 5246 | mddev->major_version = MD_MAJOR_VERSION; |
@@ -5096,8 +5271,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
5096 | mddev->flags = 0; | 5271 | mddev->flags = 0; |
5097 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5272 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5098 | 5273 | ||
5099 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 5274 | mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; |
5100 | mddev->bitmap_offset = 0; | 5275 | mddev->bitmap_info.offset = 0; |
5101 | 5276 | ||
5102 | mddev->reshape_position = MaxSector; | 5277 | mddev->reshape_position = MaxSector; |
5103 | 5278 | ||
@@ -5197,7 +5372,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
5197 | int state = 0; | 5372 | int state = 0; |
5198 | 5373 | ||
5199 | /* calculate expected state,ignoring low bits */ | 5374 | /* calculate expected state,ignoring low bits */ |
5200 | if (mddev->bitmap && mddev->bitmap_offset) | 5375 | if (mddev->bitmap && mddev->bitmap_info.offset) |
5201 | state |= (1 << MD_SB_BITMAP_PRESENT); | 5376 | state |= (1 << MD_SB_BITMAP_PRESENT); |
5202 | 5377 | ||
5203 | if (mddev->major_version != info->major_version || | 5378 | if (mddev->major_version != info->major_version || |
@@ -5256,9 +5431,10 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
5256 | /* add the bitmap */ | 5431 | /* add the bitmap */ |
5257 | if (mddev->bitmap) | 5432 | if (mddev->bitmap) |
5258 | return -EEXIST; | 5433 | return -EEXIST; |
5259 | if (mddev->default_bitmap_offset == 0) | 5434 | if (mddev->bitmap_info.default_offset == 0) |
5260 | return -EINVAL; | 5435 | return -EINVAL; |
5261 | mddev->bitmap_offset = mddev->default_bitmap_offset; | 5436 | mddev->bitmap_info.offset = |
5437 | mddev->bitmap_info.default_offset; | ||
5262 | mddev->pers->quiesce(mddev, 1); | 5438 | mddev->pers->quiesce(mddev, 1); |
5263 | rv = bitmap_create(mddev); | 5439 | rv = bitmap_create(mddev); |
5264 | if (rv) | 5440 | if (rv) |
@@ -5273,7 +5449,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
5273 | mddev->pers->quiesce(mddev, 1); | 5449 | mddev->pers->quiesce(mddev, 1); |
5274 | bitmap_destroy(mddev); | 5450 | bitmap_destroy(mddev); |
5275 | mddev->pers->quiesce(mddev, 0); | 5451 | mddev->pers->quiesce(mddev, 0); |
5276 | mddev->bitmap_offset = 0; | 5452 | mddev->bitmap_info.offset = 0; |
5277 | } | 5453 | } |
5278 | } | 5454 | } |
5279 | md_update_sb(mddev, 1); | 5455 | md_update_sb(mddev, 1); |
@@ -5524,6 +5700,25 @@ done: | |||
5524 | abort: | 5700 | abort: |
5525 | return err; | 5701 | return err; |
5526 | } | 5702 | } |
5703 | #ifdef CONFIG_COMPAT | ||
5704 | static int md_compat_ioctl(struct block_device *bdev, fmode_t mode, | ||
5705 | unsigned int cmd, unsigned long arg) | ||
5706 | { | ||
5707 | switch (cmd) { | ||
5708 | case HOT_REMOVE_DISK: | ||
5709 | case HOT_ADD_DISK: | ||
5710 | case SET_DISK_FAULTY: | ||
5711 | case SET_BITMAP_FILE: | ||
5712 | /* These take in integer arg, do not convert */ | ||
5713 | break; | ||
5714 | default: | ||
5715 | arg = (unsigned long)compat_ptr(arg); | ||
5716 | break; | ||
5717 | } | ||
5718 | |||
5719 | return md_ioctl(bdev, mode, cmd, arg); | ||
5720 | } | ||
5721 | #endif /* CONFIG_COMPAT */ | ||
5527 | 5722 | ||
5528 | static int md_open(struct block_device *bdev, fmode_t mode) | 5723 | static int md_open(struct block_device *bdev, fmode_t mode) |
5529 | { | 5724 | { |
@@ -5589,6 +5784,9 @@ static const struct block_device_operations md_fops = | |||
5589 | .open = md_open, | 5784 | .open = md_open, |
5590 | .release = md_release, | 5785 | .release = md_release, |
5591 | .ioctl = md_ioctl, | 5786 | .ioctl = md_ioctl, |
5787 | #ifdef CONFIG_COMPAT | ||
5788 | .compat_ioctl = md_compat_ioctl, | ||
5789 | #endif | ||
5592 | .getgeo = md_getgeo, | 5790 | .getgeo = md_getgeo, |
5593 | .media_changed = md_media_changed, | 5791 | .media_changed = md_media_changed, |
5594 | .revalidate_disk= md_revalidate, | 5792 | .revalidate_disk= md_revalidate, |
@@ -5982,14 +6180,14 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5982 | unsigned long chunk_kb; | 6180 | unsigned long chunk_kb; |
5983 | unsigned long flags; | 6181 | unsigned long flags; |
5984 | spin_lock_irqsave(&bitmap->lock, flags); | 6182 | spin_lock_irqsave(&bitmap->lock, flags); |
5985 | chunk_kb = bitmap->chunksize >> 10; | 6183 | chunk_kb = mddev->bitmap_info.chunksize >> 10; |
5986 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | 6184 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " |
5987 | "%lu%s chunk", | 6185 | "%lu%s chunk", |
5988 | bitmap->pages - bitmap->missing_pages, | 6186 | bitmap->pages - bitmap->missing_pages, |
5989 | bitmap->pages, | 6187 | bitmap->pages, |
5990 | (bitmap->pages - bitmap->missing_pages) | 6188 | (bitmap->pages - bitmap->missing_pages) |
5991 | << (PAGE_SHIFT - 10), | 6189 | << (PAGE_SHIFT - 10), |
5992 | chunk_kb ? chunk_kb : bitmap->chunksize, | 6190 | chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, |
5993 | chunk_kb ? "KB" : "B"); | 6191 | chunk_kb ? "KB" : "B"); |
5994 | if (bitmap->file) { | 6192 | if (bitmap->file) { |
5995 | seq_printf(seq, ", file: "); | 6193 | seq_printf(seq, ", file: "); |
@@ -6275,10 +6473,11 @@ void md_do_sync(mddev_t *mddev) | |||
6275 | mddev->curr_resync = 2; | 6473 | mddev->curr_resync = 2; |
6276 | 6474 | ||
6277 | try_again: | 6475 | try_again: |
6278 | if (kthread_should_stop()) { | 6476 | if (kthread_should_stop()) |
6279 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 6477 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
6478 | |||
6479 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
6280 | goto skip; | 6480 | goto skip; |
6281 | } | ||
6282 | for_each_mddev(mddev2, tmp) { | 6481 | for_each_mddev(mddev2, tmp) { |
6283 | if (mddev2 == mddev) | 6482 | if (mddev2 == mddev) |
6284 | continue; | 6483 | continue; |
@@ -6338,12 +6537,14 @@ void md_do_sync(mddev_t *mddev) | |||
6338 | /* recovery follows the physical size of devices */ | 6537 | /* recovery follows the physical size of devices */ |
6339 | max_sectors = mddev->dev_sectors; | 6538 | max_sectors = mddev->dev_sectors; |
6340 | j = MaxSector; | 6539 | j = MaxSector; |
6341 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6540 | rcu_read_lock(); |
6541 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | ||
6342 | if (rdev->raid_disk >= 0 && | 6542 | if (rdev->raid_disk >= 0 && |
6343 | !test_bit(Faulty, &rdev->flags) && | 6543 | !test_bit(Faulty, &rdev->flags) && |
6344 | !test_bit(In_sync, &rdev->flags) && | 6544 | !test_bit(In_sync, &rdev->flags) && |
6345 | rdev->recovery_offset < j) | 6545 | rdev->recovery_offset < j) |
6346 | j = rdev->recovery_offset; | 6546 | j = rdev->recovery_offset; |
6547 | rcu_read_unlock(); | ||
6347 | } | 6548 | } |
6348 | 6549 | ||
6349 | printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); | 6550 | printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); |
@@ -6380,6 +6581,7 @@ void md_do_sync(mddev_t *mddev) | |||
6380 | desc, mdname(mddev)); | 6581 | desc, mdname(mddev)); |
6381 | mddev->curr_resync = j; | 6582 | mddev->curr_resync = j; |
6382 | } | 6583 | } |
6584 | mddev->curr_resync_completed = mddev->curr_resync; | ||
6383 | 6585 | ||
6384 | while (j < max_sectors) { | 6586 | while (j < max_sectors) { |
6385 | sector_t sectors; | 6587 | sector_t sectors; |
@@ -6512,22 +6714,29 @@ void md_do_sync(mddev_t *mddev) | |||
6512 | } else { | 6714 | } else { |
6513 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 6715 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
6514 | mddev->curr_resync = MaxSector; | 6716 | mddev->curr_resync = MaxSector; |
6515 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6717 | rcu_read_lock(); |
6718 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | ||
6516 | if (rdev->raid_disk >= 0 && | 6719 | if (rdev->raid_disk >= 0 && |
6517 | !test_bit(Faulty, &rdev->flags) && | 6720 | !test_bit(Faulty, &rdev->flags) && |
6518 | !test_bit(In_sync, &rdev->flags) && | 6721 | !test_bit(In_sync, &rdev->flags) && |
6519 | rdev->recovery_offset < mddev->curr_resync) | 6722 | rdev->recovery_offset < mddev->curr_resync) |
6520 | rdev->recovery_offset = mddev->curr_resync; | 6723 | rdev->recovery_offset = mddev->curr_resync; |
6724 | rcu_read_unlock(); | ||
6521 | } | 6725 | } |
6522 | } | 6726 | } |
6523 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 6727 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
6524 | 6728 | ||
6525 | skip: | 6729 | skip: |
6730 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
6731 | /* We completed so min/max setting can be forgotten if used. */ | ||
6732 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
6733 | mddev->resync_min = 0; | ||
6734 | mddev->resync_max = MaxSector; | ||
6735 | } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
6736 | mddev->resync_min = mddev->curr_resync_completed; | ||
6526 | mddev->curr_resync = 0; | 6737 | mddev->curr_resync = 0; |
6527 | mddev->curr_resync_completed = 0; | ||
6528 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 6738 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
6529 | /* We completed so max setting can be forgotten. */ | 6739 | mddev->curr_resync_completed = 0; |
6530 | mddev->resync_max = MaxSector; | ||
6531 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6740 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
6532 | wake_up(&resync_wait); | 6741 | wake_up(&resync_wait); |
6533 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 6742 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
@@ -6590,6 +6799,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6590 | nm, mdname(mddev)); | 6799 | nm, mdname(mddev)); |
6591 | spares++; | 6800 | spares++; |
6592 | md_new_event(mddev); | 6801 | md_new_event(mddev); |
6802 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
6593 | } else | 6803 | } else |
6594 | break; | 6804 | break; |
6595 | } | 6805 | } |
@@ -6625,7 +6835,7 @@ void md_check_recovery(mddev_t *mddev) | |||
6625 | 6835 | ||
6626 | 6836 | ||
6627 | if (mddev->bitmap) | 6837 | if (mddev->bitmap) |
6628 | bitmap_daemon_work(mddev->bitmap); | 6838 | bitmap_daemon_work(mddev); |
6629 | 6839 | ||
6630 | if (mddev->ro) | 6840 | if (mddev->ro) |
6631 | return; | 6841 | return; |
@@ -6995,5 +7205,6 @@ EXPORT_SYMBOL(md_unregister_thread); | |||
6995 | EXPORT_SYMBOL(md_wakeup_thread); | 7205 | EXPORT_SYMBOL(md_wakeup_thread); |
6996 | EXPORT_SYMBOL(md_check_recovery); | 7206 | EXPORT_SYMBOL(md_check_recovery); |
6997 | MODULE_LICENSE("GPL"); | 7207 | MODULE_LICENSE("GPL"); |
7208 | MODULE_DESCRIPTION("MD RAID framework"); | ||
6998 | MODULE_ALIAS("md"); | 7209 | MODULE_ALIAS("md"); |
6999 | MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR); | 7210 | MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR); |