aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:30 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:05 -0500
commitd7603b7e3a7f802c67f9190b2387d4d5d111ec14 (patch)
tree7fc644f33bd6d77156387f8acddd65030b1a817d
parent0eb3ff12aa8a12538ef681dc83f4361636a0699f (diff)
[PATCH] md: make /proc/mdstat pollable
With this patch it is possible to poll /proc/mdstat to detect arrays appearing or disappearing, to detect failures, recovery starting, recovery completing, and devices being added and removed. It is similar to the poll-ability of /proc/mounts, though different in that: We always report that the file is readable (because face it, it is, even if only for EOF). We report POLLPRI when there is a change so that select() can detect it as an exceptional event. Not only are these exceptional events, but that is the mechanism that the current 'mdadm' uses to watch for events (It also polls after a timeout). (We also report POLLERR like /proc/mounts). Finally, we only reset the per-file event counter when the start of the file is read, rather than when poll() returns an event. This is more robust as it means that an fd will continue to report activity to poll/select until the program clearly responds to that activity. md_new_event takes an 'mddev' which isn't currently used, but it will be soon. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c81
1 files changed, 76 insertions, 5 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1364a1c97e6f..6101879a730f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -42,6 +42,7 @@
42#include <linux/devfs_fs_kernel.h> 42#include <linux/devfs_fs_kernel.h>
43#include <linux/buffer_head.h> /* for invalidate_bdev */ 43#include <linux/buffer_head.h> /* for invalidate_bdev */
44#include <linux/suspend.h> 44#include <linux/suspend.h>
45#include <linux/poll.h>
45 46
46#include <linux/init.h> 47#include <linux/init.h>
47 48
@@ -134,6 +135,24 @@ static struct block_device_operations md_fops;
134static int start_readonly; 135static int start_readonly;
135 136
136/* 137/*
138 * We have a system wide 'event count' that is incremented
139 * on any 'interesting' event, and readers of /proc/mdstat
140 * can use 'poll' or 'select' to find out when the event
141 * count increases.
142 *
143 * Events are:
144 * start array, stop array, error, add device, remove device,
145 * start build, activate spare
146 */
147DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
148static atomic_t md_event_count;
149void md_new_event(mddev_t *mddev)
150{
151 atomic_inc(&md_event_count);
152 wake_up(&md_event_waiters);
153}
154
155/*
137 * Enables to iterate over all existing md arrays 156 * Enables to iterate over all existing md arrays
138 * all_mddevs_lock protects this list. 157 * all_mddevs_lock protects this list.
139 */ 158 */
@@ -2111,6 +2130,7 @@ static int do_md_run(mddev_t * mddev)
2111 mddev->queue->make_request_fn = mddev->pers->make_request; 2130 mddev->queue->make_request_fn = mddev->pers->make_request;
2112 2131
2113 mddev->changed = 1; 2132 mddev->changed = 1;
2133 md_new_event(mddev);
2114 return 0; 2134 return 0;
2115} 2135}
2116 2136
@@ -2238,6 +2258,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2238 printk(KERN_INFO "md: %s switched to read-only mode.\n", 2258 printk(KERN_INFO "md: %s switched to read-only mode.\n",
2239 mdname(mddev)); 2259 mdname(mddev));
2240 err = 0; 2260 err = 0;
2261 md_new_event(mddev);
2241out: 2262out:
2242 return err; 2263 return err;
2243} 2264}
@@ -2712,6 +2733,7 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev)
2712 2733
2713 kick_rdev_from_array(rdev); 2734 kick_rdev_from_array(rdev);
2714 md_update_sb(mddev); 2735 md_update_sb(mddev);
2736 md_new_event(mddev);
2715 2737
2716 return 0; 2738 return 0;
2717busy: 2739busy:
@@ -2802,7 +2824,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
2802 */ 2824 */
2803 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2825 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2804 md_wakeup_thread(mddev->thread); 2826 md_wakeup_thread(mddev->thread);
2805 2827 md_new_event(mddev);
2806 return 0; 2828 return 0;
2807 2829
2808abort_unbind_export: 2830abort_unbind_export:
@@ -3531,6 +3553,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
3531 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3553 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3532 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3554 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3533 md_wakeup_thread(mddev->thread); 3555 md_wakeup_thread(mddev->thread);
3556 md_new_event(mddev);
3534} 3557}
3535 3558
3536/* seq_file implementation /proc/mdstat */ 3559/* seq_file implementation /proc/mdstat */
@@ -3671,12 +3694,17 @@ static void md_seq_stop(struct seq_file *seq, void *v)
3671 mddev_put(mddev); 3694 mddev_put(mddev);
3672} 3695}
3673 3696
3697struct mdstat_info {
3698 int event;
3699};
3700
3674static int md_seq_show(struct seq_file *seq, void *v) 3701static int md_seq_show(struct seq_file *seq, void *v)
3675{ 3702{
3676 mddev_t *mddev = v; 3703 mddev_t *mddev = v;
3677 sector_t size; 3704 sector_t size;
3678 struct list_head *tmp2; 3705 struct list_head *tmp2;
3679 mdk_rdev_t *rdev; 3706 mdk_rdev_t *rdev;
3707 struct mdstat_info *mi = seq->private;
3680 int i; 3708 int i;
3681 struct bitmap *bitmap; 3709 struct bitmap *bitmap;
3682 3710
@@ -3689,6 +3717,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
3689 3717
3690 spin_unlock(&pers_lock); 3718 spin_unlock(&pers_lock);
3691 seq_printf(seq, "\n"); 3719 seq_printf(seq, "\n");
3720 mi->event = atomic_read(&md_event_count);
3692 return 0; 3721 return 0;
3693 } 3722 }
3694 if (v == (void*)2) { 3723 if (v == (void*)2) {
@@ -3797,16 +3826,52 @@ static struct seq_operations md_seq_ops = {
3797static int md_seq_open(struct inode *inode, struct file *file) 3826static int md_seq_open(struct inode *inode, struct file *file)
3798{ 3827{
3799 int error; 3828 int error;
3829 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
3830 if (mi == NULL)
3831 return -ENOMEM;
3800 3832
3801 error = seq_open(file, &md_seq_ops); 3833 error = seq_open(file, &md_seq_ops);
3834 if (error)
3835 kfree(mi);
3836 else {
3837 struct seq_file *p = file->private_data;
3838 p->private = mi;
3839 mi->event = atomic_read(&md_event_count);
3840 }
3802 return error; 3841 return error;
3803} 3842}
3804 3843
3844static int md_seq_release(struct inode *inode, struct file *file)
3845{
3846 struct seq_file *m = file->private_data;
3847 struct mdstat_info *mi = m->private;
3848 m->private = NULL;
3849 kfree(mi);
3850 return seq_release(inode, file);
3851}
3852
3853static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
3854{
3855 struct seq_file *m = filp->private_data;
3856 struct mdstat_info *mi = m->private;
3857 int mask;
3858
3859 poll_wait(filp, &md_event_waiters, wait);
3860
3861 /* always allow read */
3862 mask = POLLIN | POLLRDNORM;
3863
3864 if (mi->event != atomic_read(&md_event_count))
3865 mask |= POLLERR | POLLPRI;
3866 return mask;
3867}
3868
3805static struct file_operations md_seq_fops = { 3869static struct file_operations md_seq_fops = {
3806 .open = md_seq_open, 3870 .open = md_seq_open,
3807 .read = seq_read, 3871 .read = seq_read,
3808 .llseek = seq_lseek, 3872 .llseek = seq_lseek,
3809 .release = seq_release, 3873 .release = md_seq_release,
3874 .poll = mdstat_poll,
3810}; 3875};
3811 3876
3812int register_md_personality(int pnum, mdk_personality_t *p) 3877int register_md_personality(int pnum, mdk_personality_t *p)
@@ -4076,7 +4141,11 @@ static void md_do_sync(mddev_t *mddev)
4076 4141
4077 j += sectors; 4142 j += sectors;
4078 if (j>1) mddev->curr_resync = j; 4143 if (j>1) mddev->curr_resync = j;
4079 4144 if (last_check == 0)
4145 /* this is the earliers that rebuilt will be
4146 * visible in /proc/mdstat
4147 */
4148 md_new_event(mddev);
4080 4149
4081 if (last_check + window > io_sectors || j == max_sectors) 4150 if (last_check + window > io_sectors || j == max_sectors)
4082 continue; 4151 continue;
@@ -4262,6 +4331,7 @@ void md_check_recovery(mddev_t *mddev)
4262 mddev->recovery = 0; 4331 mddev->recovery = 0;
4263 /* flag recovery needed just to double check */ 4332 /* flag recovery needed just to double check */
4264 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4333 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4334 md_new_event(mddev);
4265 goto unlock; 4335 goto unlock;
4266 } 4336 }
4267 /* Clear some bits that don't mean anything, but 4337 /* Clear some bits that don't mean anything, but
@@ -4299,6 +4369,7 @@ void md_check_recovery(mddev_t *mddev)
4299 sprintf(nm, "rd%d", rdev->raid_disk); 4369 sprintf(nm, "rd%d", rdev->raid_disk);
4300 sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); 4370 sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
4301 spares++; 4371 spares++;
4372 md_new_event(mddev);
4302 } else 4373 } else
4303 break; 4374 break;
4304 } 4375 }
@@ -4331,9 +4402,9 @@ void md_check_recovery(mddev_t *mddev)
4331 mdname(mddev)); 4402 mdname(mddev));
4332 /* leave the spares where they are, it shouldn't hurt */ 4403 /* leave the spares where they are, it shouldn't hurt */
4333 mddev->recovery = 0; 4404 mddev->recovery = 0;
4334 } else { 4405 } else
4335 md_wakeup_thread(mddev->sync_thread); 4406 md_wakeup_thread(mddev->sync_thread);
4336 } 4407 md_new_event(mddev);
4337 } 4408 }
4338 unlock: 4409 unlock:
4339 mddev_unlock(mddev); 4410 mddev_unlock(mddev);