diff options
| author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-09-09 19:23:45 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-09 19:39:10 -0400 |
| commit | 36fa30636fb84b209210299684e1be66d9e58217 (patch) | |
| tree | 28a7473238932f1e7ade1a5a5ec67a113b6d6205 | |
| parent | 6a07997fc34ac15a1c5dc650285d79b7604a2276 (diff) | |
[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps
Both file-bitmaps and superblock bitmaps are supported.
If you add a bitmap file on the array device, you lose.
This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used
for adding a superblock bitmap doesn't have room for giving an offset. Later,
this value will be setable via sysfs.
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | drivers/md/md.c | 104 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 30 | ||||
| -rw-r--r-- | include/linux/raid/md_k.h | 10 |
3 files changed, 127 insertions, 17 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 63c566165189..ae654466dc23 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -623,6 +623,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 623 | mddev->size = sb->size; | 623 | mddev->size = sb->size; |
| 624 | mddev->events = md_event(sb); | 624 | mddev->events = md_event(sb); |
| 625 | mddev->bitmap_offset = 0; | 625 | mddev->bitmap_offset = 0; |
| 626 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | ||
| 626 | 627 | ||
| 627 | if (sb->state & (1<<MD_SB_CLEAN)) | 628 | if (sb->state & (1<<MD_SB_CLEAN)) |
| 628 | mddev->recovery_cp = MaxSector; | 629 | mddev->recovery_cp = MaxSector; |
| @@ -648,7 +649,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 648 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | 649 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); |
| 649 | return -EINVAL; | 650 | return -EINVAL; |
| 650 | } | 651 | } |
| 651 | mddev->bitmap_offset = (MD_SB_BYTES >> 9); | 652 | mddev->bitmap_offset = mddev->default_bitmap_offset; |
| 652 | } | 653 | } |
| 653 | 654 | ||
| 654 | } else if (mddev->pers == NULL) { | 655 | } else if (mddev->pers == NULL) { |
| @@ -939,6 +940,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 939 | mddev->size = le64_to_cpu(sb->size)/2; | 940 | mddev->size = le64_to_cpu(sb->size)/2; |
| 940 | mddev->events = le64_to_cpu(sb->events); | 941 | mddev->events = le64_to_cpu(sb->events); |
| 941 | mddev->bitmap_offset = 0; | 942 | mddev->bitmap_offset = 0; |
| 943 | mddev->default_bitmap_offset = 0; | ||
| 944 | if (mddev->minor_version == 0) | ||
| 945 | mddev->default_bitmap_offset = -(64*1024)/512; | ||
| 942 | 946 | ||
| 943 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); | 947 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); |
| 944 | memcpy(mddev->uuid, sb->set_uuid, 16); | 948 | memcpy(mddev->uuid, sb->set_uuid, 16); |
| @@ -2073,6 +2077,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
| 2073 | info.state = 0; | 2077 | info.state = 0; |
| 2074 | if (mddev->in_sync) | 2078 | if (mddev->in_sync) |
| 2075 | info.state = (1<<MD_SB_CLEAN); | 2079 | info.state = (1<<MD_SB_CLEAN); |
| 2080 | if (mddev->bitmap && mddev->bitmap_offset) | ||
| 2081 | info.state = (1<<MD_SB_BITMAP_PRESENT); | ||
| 2076 | info.active_disks = active; | 2082 | info.active_disks = active; |
| 2077 | info.working_disks = working; | 2083 | info.working_disks = working; |
| 2078 | info.failed_disks = failed; | 2084 | info.failed_disks = failed; |
| @@ -2430,25 +2436,51 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
| 2430 | { | 2436 | { |
| 2431 | int err; | 2437 | int err; |
| 2432 | 2438 | ||
| 2433 | if (mddev->pers || mddev->bitmap_file) | 2439 | if (mddev->pers) { |
| 2434 | return -EBUSY; | 2440 | if (!mddev->pers->quiesce) |
| 2441 | return -EBUSY; | ||
| 2442 | if (mddev->recovery || mddev->sync_thread) | ||
| 2443 | return -EBUSY; | ||
| 2444 | /* we should be able to change the bitmap.. */ | ||
| 2445 | } | ||
| 2435 | 2446 | ||
| 2436 | mddev->bitmap_file = fget(fd); | ||
| 2437 | 2447 | ||
| 2438 | if (mddev->bitmap_file == NULL) { | 2448 | if (fd >= 0) { |
| 2439 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | 2449 | if (mddev->bitmap) |
| 2440 | mdname(mddev)); | 2450 | return -EEXIST; /* cannot add when bitmap is present */ |
| 2441 | return -EBADF; | 2451 | mddev->bitmap_file = fget(fd); |
| 2442 | } | ||
| 2443 | 2452 | ||
| 2444 | err = deny_bitmap_write_access(mddev->bitmap_file); | 2453 | if (mddev->bitmap_file == NULL) { |
| 2445 | if (err) { | 2454 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", |
| 2446 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | 2455 | mdname(mddev)); |
| 2447 | mdname(mddev)); | 2456 | return -EBADF; |
| 2448 | fput(mddev->bitmap_file); | 2457 | } |
| 2449 | mddev->bitmap_file = NULL; | 2458 | |
| 2450 | } else | 2459 | err = deny_bitmap_write_access(mddev->bitmap_file); |
| 2460 | if (err) { | ||
| 2461 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
| 2462 | mdname(mddev)); | ||
| 2463 | fput(mddev->bitmap_file); | ||
| 2464 | mddev->bitmap_file = NULL; | ||
| 2465 | return err; | ||
| 2466 | } | ||
| 2451 | mddev->bitmap_offset = 0; /* file overrides offset */ | 2467 | mddev->bitmap_offset = 0; /* file overrides offset */ |
| 2468 | } else if (mddev->bitmap == NULL) | ||
| 2469 | return -ENOENT; /* cannot remove what isn't there */ | ||
| 2470 | err = 0; | ||
| 2471 | if (mddev->pers) { | ||
| 2472 | mddev->pers->quiesce(mddev, 1); | ||
| 2473 | if (fd >= 0) | ||
| 2474 | err = bitmap_create(mddev); | ||
| 2475 | if (fd < 0 || err) | ||
| 2476 | bitmap_destroy(mddev); | ||
| 2477 | mddev->pers->quiesce(mddev, 0); | ||
| 2478 | } else if (fd < 0) { | ||
| 2479 | if (mddev->bitmap_file) | ||
| 2480 | fput(mddev->bitmap_file); | ||
| 2481 | mddev->bitmap_file = NULL; | ||
| 2482 | } | ||
| 2483 | |||
| 2452 | return err; | 2484 | return err; |
| 2453 | } | 2485 | } |
| 2454 | 2486 | ||
| @@ -2528,6 +2560,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2528 | { | 2560 | { |
| 2529 | int rv = 0; | 2561 | int rv = 0; |
| 2530 | int cnt = 0; | 2562 | int cnt = 0; |
| 2563 | int state = 0; | ||
| 2564 | |||
| 2565 | /* calculate expected state,ignoring low bits */ | ||
| 2566 | if (mddev->bitmap && mddev->bitmap_offset) | ||
| 2567 | state |= (1 << MD_SB_BITMAP_PRESENT); | ||
| 2531 | 2568 | ||
| 2532 | if (mddev->major_version != info->major_version || | 2569 | if (mddev->major_version != info->major_version || |
| 2533 | mddev->minor_version != info->minor_version || | 2570 | mddev->minor_version != info->minor_version || |
| @@ -2536,12 +2573,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2536 | mddev->level != info->level || | 2573 | mddev->level != info->level || |
| 2537 | /* mddev->layout != info->layout || */ | 2574 | /* mddev->layout != info->layout || */ |
| 2538 | !mddev->persistent != info->not_persistent|| | 2575 | !mddev->persistent != info->not_persistent|| |
| 2539 | mddev->chunk_size != info->chunk_size ) | 2576 | mddev->chunk_size != info->chunk_size || |
| 2577 | /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ | ||
| 2578 | ((state^info->state) & 0xfffffe00) | ||
| 2579 | ) | ||
| 2540 | return -EINVAL; | 2580 | return -EINVAL; |
| 2541 | /* Check there is only one change */ | 2581 | /* Check there is only one change */ |
| 2542 | if (mddev->size != info->size) cnt++; | 2582 | if (mddev->size != info->size) cnt++; |
| 2543 | if (mddev->raid_disks != info->raid_disks) cnt++; | 2583 | if (mddev->raid_disks != info->raid_disks) cnt++; |
| 2544 | if (mddev->layout != info->layout) cnt++; | 2584 | if (mddev->layout != info->layout) cnt++; |
| 2585 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; | ||
| 2545 | if (cnt == 0) return 0; | 2586 | if (cnt == 0) return 0; |
| 2546 | if (cnt > 1) return -EINVAL; | 2587 | if (cnt > 1) return -EINVAL; |
| 2547 | 2588 | ||
| @@ -2620,6 +2661,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2620 | } | 2661 | } |
| 2621 | } | 2662 | } |
| 2622 | } | 2663 | } |
| 2664 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { | ||
| 2665 | if (mddev->pers->quiesce == NULL) | ||
| 2666 | return -EINVAL; | ||
| 2667 | if (mddev->recovery || mddev->sync_thread) | ||
| 2668 | return -EBUSY; | ||
| 2669 | if (info->state & (1<<MD_SB_BITMAP_PRESENT)) { | ||
| 2670 | /* add the bitmap */ | ||
| 2671 | if (mddev->bitmap) | ||
| 2672 | return -EEXIST; | ||
| 2673 | if (mddev->default_bitmap_offset == 0) | ||
| 2674 | return -EINVAL; | ||
| 2675 | mddev->bitmap_offset = mddev->default_bitmap_offset; | ||
| 2676 | mddev->pers->quiesce(mddev, 1); | ||
| 2677 | rv = bitmap_create(mddev); | ||
| 2678 | if (rv) | ||
| 2679 | bitmap_destroy(mddev); | ||
| 2680 | mddev->pers->quiesce(mddev, 0); | ||
| 2681 | } else { | ||
| 2682 | /* remove the bitmap */ | ||
| 2683 | if (!mddev->bitmap) | ||
| 2684 | return -ENOENT; | ||
| 2685 | if (mddev->bitmap->file) | ||
| 2686 | return -EINVAL; | ||
| 2687 | mddev->pers->quiesce(mddev, 1); | ||
| 2688 | bitmap_destroy(mddev); | ||
| 2689 | mddev->pers->quiesce(mddev, 0); | ||
| 2690 | mddev->bitmap_offset = 0; | ||
| 2691 | } | ||
| 2692 | } | ||
| 2623 | md_update_sb(mddev); | 2693 | md_update_sb(mddev); |
| 2624 | return rv; | 2694 | return rv; |
| 2625 | } | 2695 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ace41c571aeb..ba643e4bfac9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1565,6 +1565,35 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
| 1565 | return 0; | 1565 | return 0; |
| 1566 | } | 1566 | } |
| 1567 | 1567 | ||
| 1568 | void raid1_quiesce(mddev_t *mddev, int state) | ||
| 1569 | { | ||
| 1570 | conf_t *conf = mddev_to_conf(mddev); | ||
| 1571 | |||
| 1572 | switch(state) { | ||
| 1573 | case 0: | ||
| 1574 | spin_lock_irq(&conf->resync_lock); | ||
| 1575 | conf->barrier++; | ||
| 1576 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | ||
| 1577 | conf->resync_lock, raid1_unplug(mddev->queue)); | ||
| 1578 | spin_unlock_irq(&conf->resync_lock); | ||
| 1579 | break; | ||
| 1580 | case 1: | ||
| 1581 | spin_lock_irq(&conf->resync_lock); | ||
| 1582 | conf->barrier--; | ||
| 1583 | spin_unlock_irq(&conf->resync_lock); | ||
| 1584 | wake_up(&conf->wait_resume); | ||
| 1585 | wake_up(&conf->wait_idle); | ||
| 1586 | break; | ||
| 1587 | } | ||
| 1588 | if (mddev->thread) { | ||
| 1589 | if (mddev->bitmap) | ||
| 1590 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
| 1591 | else | ||
| 1592 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
| 1593 | md_wakeup_thread(mddev->thread); | ||
| 1594 | } | ||
| 1595 | } | ||
| 1596 | |||
| 1568 | 1597 | ||
| 1569 | static mdk_personality_t raid1_personality = | 1598 | static mdk_personality_t raid1_personality = |
| 1570 | { | 1599 | { |
| @@ -1581,6 +1610,7 @@ static mdk_personality_t raid1_personality = | |||
| 1581 | .sync_request = sync_request, | 1610 | .sync_request = sync_request, |
| 1582 | .resize = raid1_resize, | 1611 | .resize = raid1_resize, |
| 1583 | .reshape = raid1_reshape, | 1612 | .reshape = raid1_reshape, |
| 1613 | .quiesce = raid1_quiesce, | ||
| 1584 | }; | 1614 | }; |
| 1585 | 1615 | ||
| 1586 | static int __init raid_init(void) | 1616 | static int __init raid_init(void) |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8c14ba565a45..817062bf7352 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
| @@ -278,6 +278,10 @@ struct mddev_s | |||
| 278 | * start of bitmap. May be | 278 | * start of bitmap. May be |
| 279 | * negative, but not '0' | 279 | * negative, but not '0' |
| 280 | */ | 280 | */ |
| 281 | long default_bitmap_offset; /* this is the offset to use when | ||
| 282 | * hot-adding a bitmap. It should | ||
| 283 | * eventually be settable by sysfs. | ||
| 284 | */ | ||
| 281 | 285 | ||
| 282 | struct list_head all_mddevs; | 286 | struct list_head all_mddevs; |
| 283 | }; | 287 | }; |
| @@ -314,6 +318,12 @@ struct mdk_personality_s | |||
| 314 | int (*resize) (mddev_t *mddev, sector_t sectors); | 318 | int (*resize) (mddev_t *mddev, sector_t sectors); |
| 315 | int (*reshape) (mddev_t *mddev, int raid_disks); | 319 | int (*reshape) (mddev_t *mddev, int raid_disks); |
| 316 | int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); | 320 | int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); |
| 321 | /* quiesce moves between quiescence states | ||
| 322 | * 0 - fully active | ||
| 323 | * 1 - no new requests allowed | ||
| 324 | * others - reserved | ||
| 325 | */ | ||
| 326 | void (*quiesce) (mddev_t *mddev, int state); | ||
| 317 | }; | 327 | }; |
| 318 | 328 | ||
| 319 | 329 | ||
