diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 172 |
1 files changed, 158 insertions, 14 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index fa608a1a5c20..c402f6cc7047 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -19,6 +19,9 @@ | |||
19 | 19 | ||
20 | Neil Brown <neilb@cse.unsw.edu.au>. | 20 | Neil Brown <neilb@cse.unsw.edu.au>. |
21 | 21 | ||
22 | - persistent bitmap code | ||
23 | Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. | ||
24 | |||
22 | This program is free software; you can redistribute it and/or modify | 25 | This program is free software; you can redistribute it and/or modify |
23 | it under the terms of the GNU General Public License as published by | 26 | it under the terms of the GNU General Public License as published by |
24 | the Free Software Foundation; either version 2, or (at your option) | 27 | the Free Software Foundation; either version 2, or (at your option) |
@@ -33,6 +36,7 @@ | |||
33 | #include <linux/config.h> | 36 | #include <linux/config.h> |
34 | #include <linux/linkage.h> | 37 | #include <linux/linkage.h> |
35 | #include <linux/raid/md.h> | 38 | #include <linux/raid/md.h> |
39 | #include <linux/raid/bitmap.h> | ||
36 | #include <linux/sysctl.h> | 40 | #include <linux/sysctl.h> |
37 | #include <linux/devfs_fs_kernel.h> | 41 | #include <linux/devfs_fs_kernel.h> |
38 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 42 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
@@ -40,6 +44,8 @@ | |||
40 | 44 | ||
41 | #include <linux/init.h> | 45 | #include <linux/init.h> |
42 | 46 | ||
47 | #include <linux/file.h> | ||
48 | |||
43 | #ifdef CONFIG_KMOD | 49 | #ifdef CONFIG_KMOD |
44 | #include <linux/kmod.h> | 50 | #include <linux/kmod.h> |
45 | #endif | 51 | #endif |
@@ -1198,8 +1204,11 @@ void md_print_devices(void) | |||
1198 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); | 1204 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); |
1199 | printk("md: **********************************\n"); | 1205 | printk("md: **********************************\n"); |
1200 | ITERATE_MDDEV(mddev,tmp) { | 1206 | ITERATE_MDDEV(mddev,tmp) { |
1201 | printk("%s: ", mdname(mddev)); | ||
1202 | 1207 | ||
1208 | if (mddev->bitmap) | ||
1209 | bitmap_print_sb(mddev->bitmap); | ||
1210 | else | ||
1211 | printk("%s: ", mdname(mddev)); | ||
1203 | ITERATE_RDEV(mddev,rdev,tmp2) | 1212 | ITERATE_RDEV(mddev,rdev,tmp2) |
1204 | printk("<%s>", bdevname(rdev->bdev,b)); | 1213 | printk("<%s>", bdevname(rdev->bdev,b)); |
1205 | printk("\n"); | 1214 | printk("\n"); |
@@ -1287,7 +1296,7 @@ repeat: | |||
1287 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1296 | "md: updating %s RAID superblock on device (in sync %d)\n", |
1288 | mdname(mddev),mddev->in_sync); | 1297 | mdname(mddev),mddev->in_sync); |
1289 | 1298 | ||
1290 | err = 0; | 1299 | err = bitmap_update_sb(mddev->bitmap); |
1291 | ITERATE_RDEV(mddev,rdev,tmp) { | 1300 | ITERATE_RDEV(mddev,rdev,tmp) { |
1292 | char b[BDEVNAME_SIZE]; | 1301 | char b[BDEVNAME_SIZE]; |
1293 | dprintk(KERN_INFO "md: "); | 1302 | dprintk(KERN_INFO "md: "); |
@@ -1624,12 +1633,19 @@ static int do_md_run(mddev_t * mddev) | |||
1624 | 1633 | ||
1625 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 1634 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
1626 | 1635 | ||
1627 | err = mddev->pers->run(mddev); | 1636 | /* before we start the array running, initialise the bitmap */ |
1637 | err = bitmap_create(mddev); | ||
1638 | if (err) | ||
1639 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | ||
1640 | mdname(mddev), err); | ||
1641 | else | ||
1642 | err = mddev->pers->run(mddev); | ||
1628 | if (err) { | 1643 | if (err) { |
1629 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 1644 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
1630 | module_put(mddev->pers->owner); | 1645 | module_put(mddev->pers->owner); |
1631 | mddev->pers = NULL; | 1646 | mddev->pers = NULL; |
1632 | return -EINVAL; | 1647 | bitmap_destroy(mddev); |
1648 | return err; | ||
1633 | } | 1649 | } |
1634 | atomic_set(&mddev->writes_pending,0); | 1650 | atomic_set(&mddev->writes_pending,0); |
1635 | mddev->safemode = 0; | 1651 | mddev->safemode = 0; |
@@ -1742,6 +1758,14 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1742 | if (ro) | 1758 | if (ro) |
1743 | set_disk_ro(disk, 1); | 1759 | set_disk_ro(disk, 1); |
1744 | } | 1760 | } |
1761 | |||
1762 | bitmap_destroy(mddev); | ||
1763 | if (mddev->bitmap_file) { | ||
1764 | atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); | ||
1765 | fput(mddev->bitmap_file); | ||
1766 | mddev->bitmap_file = NULL; | ||
1767 | } | ||
1768 | |||
1745 | /* | 1769 | /* |
1746 | * Free resources if final stop | 1770 | * Free resources if final stop |
1747 | */ | 1771 | */ |
@@ -2000,6 +2024,42 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
2000 | return 0; | 2024 | return 0; |
2001 | } | 2025 | } |
2002 | 2026 | ||
2027 | static int get_bitmap_file(mddev_t * mddev, void * arg) | ||
2028 | { | ||
2029 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | ||
2030 | char *ptr, *buf = NULL; | ||
2031 | int err = -ENOMEM; | ||
2032 | |||
2033 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
2034 | if (!file) | ||
2035 | goto out; | ||
2036 | |||
2037 | /* bitmap disabled, zero the first byte and copy out */ | ||
2038 | if (!mddev->bitmap || !mddev->bitmap->file) { | ||
2039 | file->pathname[0] = '\0'; | ||
2040 | goto copy_out; | ||
2041 | } | ||
2042 | |||
2043 | buf = kmalloc(sizeof(file->pathname), GFP_KERNEL); | ||
2044 | if (!buf) | ||
2045 | goto out; | ||
2046 | |||
2047 | ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); | ||
2048 | if (!ptr) | ||
2049 | goto out; | ||
2050 | |||
2051 | strcpy(file->pathname, ptr); | ||
2052 | |||
2053 | copy_out: | ||
2054 | err = 0; | ||
2055 | if (copy_to_user(arg, file, sizeof(*file))) | ||
2056 | err = -EFAULT; | ||
2057 | out: | ||
2058 | kfree(buf); | ||
2059 | kfree(file); | ||
2060 | return err; | ||
2061 | } | ||
2062 | |||
2003 | static int get_disk_info(mddev_t * mddev, void __user * arg) | 2063 | static int get_disk_info(mddev_t * mddev, void __user * arg) |
2004 | { | 2064 | { |
2005 | mdu_disk_info_t info; | 2065 | mdu_disk_info_t info; |
@@ -2275,6 +2335,48 @@ abort_export: | |||
2275 | return err; | 2335 | return err; |
2276 | } | 2336 | } |
2277 | 2337 | ||
2338 | /* similar to deny_write_access, but accounts for our holding a reference | ||
2339 | * to the file ourselves */ | ||
2340 | static int deny_bitmap_write_access(struct file * file) | ||
2341 | { | ||
2342 | struct inode *inode = file->f_mapping->host; | ||
2343 | |||
2344 | spin_lock(&inode->i_lock); | ||
2345 | if (atomic_read(&inode->i_writecount) > 1) { | ||
2346 | spin_unlock(&inode->i_lock); | ||
2347 | return -ETXTBSY; | ||
2348 | } | ||
2349 | atomic_set(&inode->i_writecount, -1); | ||
2350 | spin_unlock(&inode->i_lock); | ||
2351 | |||
2352 | return 0; | ||
2353 | } | ||
2354 | |||
2355 | static int set_bitmap_file(mddev_t *mddev, int fd) | ||
2356 | { | ||
2357 | int err; | ||
2358 | |||
2359 | if (mddev->pers) | ||
2360 | return -EBUSY; | ||
2361 | |||
2362 | mddev->bitmap_file = fget(fd); | ||
2363 | |||
2364 | if (mddev->bitmap_file == NULL) { | ||
2365 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | ||
2366 | mdname(mddev)); | ||
2367 | return -EBADF; | ||
2368 | } | ||
2369 | |||
2370 | err = deny_bitmap_write_access(mddev->bitmap_file); | ||
2371 | if (err) { | ||
2372 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
2373 | mdname(mddev)); | ||
2374 | fput(mddev->bitmap_file); | ||
2375 | mddev->bitmap_file = NULL; | ||
2376 | } | ||
2377 | return err; | ||
2378 | } | ||
2379 | |||
2278 | /* | 2380 | /* |
2279 | * set_array_info is used two different ways | 2381 | * set_array_info is used two different ways |
2280 | * The original usage is when creating a new array. | 2382 | * The original usage is when creating a new array. |
@@ -2586,8 +2688,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2586 | /* | 2688 | /* |
2587 | * Commands querying/configuring an existing array: | 2689 | * Commands querying/configuring an existing array: |
2588 | */ | 2690 | */ |
2589 | /* if we are initialised yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ | 2691 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, |
2590 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { | 2692 | * RUN_ARRAY, and SET_BITMAP_FILE are allowed */ |
2693 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY | ||
2694 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE) { | ||
2591 | err = -ENODEV; | 2695 | err = -ENODEV; |
2592 | goto abort_unlock; | 2696 | goto abort_unlock; |
2593 | } | 2697 | } |
@@ -2601,6 +2705,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2601 | err = get_array_info(mddev, argp); | 2705 | err = get_array_info(mddev, argp); |
2602 | goto done_unlock; | 2706 | goto done_unlock; |
2603 | 2707 | ||
2708 | case GET_BITMAP_FILE: | ||
2709 | err = get_bitmap_file(mddev, (void *)arg); | ||
2710 | goto done_unlock; | ||
2711 | |||
2604 | case GET_DISK_INFO: | 2712 | case GET_DISK_INFO: |
2605 | err = get_disk_info(mddev, argp); | 2713 | err = get_disk_info(mddev, argp); |
2606 | goto done_unlock; | 2714 | goto done_unlock; |
@@ -2681,6 +2789,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2681 | err = do_md_run (mddev); | 2789 | err = do_md_run (mddev); |
2682 | goto done_unlock; | 2790 | goto done_unlock; |
2683 | 2791 | ||
2792 | case SET_BITMAP_FILE: | ||
2793 | err = set_bitmap_file(mddev, (int)arg); | ||
2794 | goto done_unlock; | ||
2795 | |||
2684 | default: | 2796 | default: |
2685 | if (_IOC_TYPE(cmd) == MD_MAJOR) | 2797 | if (_IOC_TYPE(cmd) == MD_MAJOR) |
2686 | printk(KERN_WARNING "md: %s(pid %d) used" | 2798 | printk(KERN_WARNING "md: %s(pid %d) used" |
@@ -2792,8 +2904,9 @@ static int md_thread(void * arg) | |||
2792 | while (thread->run) { | 2904 | while (thread->run) { |
2793 | void (*run)(mddev_t *); | 2905 | void (*run)(mddev_t *); |
2794 | 2906 | ||
2795 | wait_event_interruptible(thread->wqueue, | 2907 | wait_event_interruptible_timeout(thread->wqueue, |
2796 | test_bit(THREAD_WAKEUP, &thread->flags)); | 2908 | test_bit(THREAD_WAKEUP, &thread->flags), |
2909 | thread->timeout); | ||
2797 | if (current->flags & PF_FREEZE) | 2910 | if (current->flags & PF_FREEZE) |
2798 | refrigerator(PF_FREEZE); | 2911 | refrigerator(PF_FREEZE); |
2799 | 2912 | ||
@@ -2839,6 +2952,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
2839 | thread->run = run; | 2952 | thread->run = run; |
2840 | thread->mddev = mddev; | 2953 | thread->mddev = mddev; |
2841 | thread->name = name; | 2954 | thread->name = name; |
2955 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
2842 | ret = kernel_thread(md_thread, thread, 0); | 2956 | ret = kernel_thread(md_thread, thread, 0); |
2843 | if (ret < 0) { | 2957 | if (ret < 0) { |
2844 | kfree(thread); | 2958 | kfree(thread); |
@@ -2877,13 +2991,13 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2877 | 2991 | ||
2878 | if (!rdev || rdev->faulty) | 2992 | if (!rdev || rdev->faulty) |
2879 | return; | 2993 | return; |
2880 | 2994 | /* | |
2881 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 2995 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
2882 | mdname(mddev), | 2996 | mdname(mddev), |
2883 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), | 2997 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), |
2884 | __builtin_return_address(0),__builtin_return_address(1), | 2998 | __builtin_return_address(0),__builtin_return_address(1), |
2885 | __builtin_return_address(2),__builtin_return_address(3)); | 2999 | __builtin_return_address(2),__builtin_return_address(3)); |
2886 | 3000 | */ | |
2887 | if (!mddev->pers->error_handler) | 3001 | if (!mddev->pers->error_handler) |
2888 | return; | 3002 | return; |
2889 | mddev->pers->error_handler(mddev,rdev); | 3003 | mddev->pers->error_handler(mddev,rdev); |
@@ -3037,6 +3151,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3037 | struct list_head *tmp2; | 3151 | struct list_head *tmp2; |
3038 | mdk_rdev_t *rdev; | 3152 | mdk_rdev_t *rdev; |
3039 | int i; | 3153 | int i; |
3154 | struct bitmap *bitmap; | ||
3040 | 3155 | ||
3041 | if (v == (void*)1) { | 3156 | if (v == (void*)1) { |
3042 | seq_printf(seq, "Personalities : "); | 3157 | seq_printf(seq, "Personalities : "); |
@@ -3089,10 +3204,36 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3089 | if (mddev->pers) { | 3204 | if (mddev->pers) { |
3090 | mddev->pers->status (seq, mddev); | 3205 | mddev->pers->status (seq, mddev); |
3091 | seq_printf(seq, "\n "); | 3206 | seq_printf(seq, "\n "); |
3092 | if (mddev->curr_resync > 2) | 3207 | if (mddev->curr_resync > 2) { |
3093 | status_resync (seq, mddev); | 3208 | status_resync (seq, mddev); |
3094 | else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 3209 | seq_printf(seq, "\n "); |
3095 | seq_printf(seq, " resync=DELAYED"); | 3210 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
3211 | seq_printf(seq, " resync=DELAYED\n "); | ||
3212 | } else | ||
3213 | seq_printf(seq, "\n "); | ||
3214 | |||
3215 | if ((bitmap = mddev->bitmap)) { | ||
3216 | char *buf, *path; | ||
3217 | unsigned long chunk_kb; | ||
3218 | unsigned long flags; | ||
3219 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3220 | spin_lock_irqsave(&bitmap->lock, flags); | ||
3221 | chunk_kb = bitmap->chunksize >> 10; | ||
3222 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
3223 | "%lu%s chunk", | ||
3224 | bitmap->pages - bitmap->missing_pages, | ||
3225 | bitmap->pages, | ||
3226 | (bitmap->pages - bitmap->missing_pages) | ||
3227 | << (PAGE_SHIFT - 10), | ||
3228 | chunk_kb ? chunk_kb : bitmap->chunksize, | ||
3229 | chunk_kb ? "KB" : "B"); | ||
3230 | if (bitmap->file && buf) { | ||
3231 | path = file_path(bitmap->file, buf, PAGE_SIZE); | ||
3232 | seq_printf(seq, ", file: %s", path ? path : ""); | ||
3233 | } | ||
3234 | seq_printf(seq, "\n"); | ||
3235 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
3236 | kfree(buf); | ||
3096 | } | 3237 | } |
3097 | 3238 | ||
3098 | seq_printf(seq, "\n"); | 3239 | seq_printf(seq, "\n"); |
@@ -3328,7 +3469,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3328 | sysctl_speed_limit_max); | 3469 | sysctl_speed_limit_max); |
3329 | 3470 | ||
3330 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 3471 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
3331 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3472 | /* we don't use the checkpoint if there's a bitmap */ |
3473 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap) | ||
3332 | j = mddev->recovery_cp; | 3474 | j = mddev->recovery_cp; |
3333 | else | 3475 | else |
3334 | j = 0; | 3476 | j = 0; |
@@ -3673,6 +3815,8 @@ static int __init md_init(void) | |||
3673 | " MD_SB_DISKS=%d\n", | 3815 | " MD_SB_DISKS=%d\n", |
3674 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 3816 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
3675 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); | 3817 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); |
3818 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR, | ||
3819 | BITMAP_MINOR); | ||
3676 | 3820 | ||
3677 | if (register_blkdev(MAJOR_NR, "md")) | 3821 | if (register_blkdev(MAJOR_NR, "md")) |
3678 | return -1; | 3822 | return -1; |