diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 14:05:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 14:05:49 -0500 |
commit | 5d8e7fb6916556e9b476de33404e8c9e2c9aee61 (patch) | |
tree | 2f2e1c0f0df579a221e3bc99e5ccf5ddacfcc27a /drivers/md | |
parent | 87c9172f71e3f729729aad27fa6592bb795137fd (diff) | |
parent | 53a6ab4d3f6d6dc87ec8f14998b4b5536ee2968c (diff) |
Merge tag 'md/3.20' of git://neil.brown.name/md
Pull md updates from Neil Brown:
- assorted locking changes so that access to /proc/mdstat
and much of /sys/block/mdXX/md/* is protected by a spinlock
rather than a mutex and will never block indefinitely.
- Make an 'if' condition in RAID5 - which has been implicated
in recent bugs - more readable.
- misc minor fixes
* tag 'md/3.20' of git://neil.brown.name/md: (28 commits)
md/raid10: fix conversion from RAID0 to RAID10
md: wakeup thread upon rdev_dec_pending()
md: make reconfig_mutex optional for writes to md sysfs files.
md: move mddev_lock and related to md.h
md: use mddev->lock to protect updates to resync_{min,max}.
md: minor cleanup in safe_delay_store.
md: move GET_BITMAP_FILE ioctl out from mddev_lock.
md: tidy up set_bitmap_file
md: remove unnecessary 'buf' from get_bitmap_file.
md: remove mddev_lock from rdev_attr_show()
md: remove mddev_lock() from md_attr_show()
md/raid5: use ->lock to protect accessing raid5 sysfs attributes.
md: remove need for mddev_lock() in md_seq_show()
md/bitmap: protect clearing of ->bitmap by mddev->lock
md: protect ->pers changes with mddev->lock
md: level_store: group all important changes into one place.
md: rename ->stop to ->free
md: split detach operation out from ->stop.
md/linear: remove rcu protections in favour of suspend/resume
md: make merge_bvec_fn more robust in face of personality changes.
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 15 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 8 | ||||
-rw-r--r-- | drivers/md/faulty.c | 8 | ||||
-rw-r--r-- | drivers/md/linear.c | 67 | ||||
-rw-r--r-- | drivers/md/md.c | 816 | ||||
-rw-r--r-- | drivers/md/md.h | 57 | ||||
-rw-r--r-- | drivers/md/multipath.c | 22 | ||||
-rw-r--r-- | drivers/md/raid0.c | 29 | ||||
-rw-r--r-- | drivers/md/raid1.c | 52 | ||||
-rw-r--r-- | drivers/md/raid1.h | 3 | ||||
-rw-r--r-- | drivers/md/raid10.c | 49 | ||||
-rw-r--r-- | drivers/md/raid10.h | 3 | ||||
-rw-r--r-- | drivers/md/raid5.c | 334 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
14 files changed, 858 insertions, 606 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1695ee5f3ffc..3a5767968ba0 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1619,7 +1619,9 @@ void bitmap_destroy(struct mddev *mddev) | |||
1619 | return; | 1619 | return; |
1620 | 1620 | ||
1621 | mutex_lock(&mddev->bitmap_info.mutex); | 1621 | mutex_lock(&mddev->bitmap_info.mutex); |
1622 | spin_lock(&mddev->lock); | ||
1622 | mddev->bitmap = NULL; /* disconnect from the md device */ | 1623 | mddev->bitmap = NULL; /* disconnect from the md device */ |
1624 | spin_unlock(&mddev->lock); | ||
1623 | mutex_unlock(&mddev->bitmap_info.mutex); | 1625 | mutex_unlock(&mddev->bitmap_info.mutex); |
1624 | if (mddev->thread) | 1626 | if (mddev->thread) |
1625 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | 1627 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; |
@@ -2209,11 +2211,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); | |||
2209 | static ssize_t can_clear_show(struct mddev *mddev, char *page) | 2211 | static ssize_t can_clear_show(struct mddev *mddev, char *page) |
2210 | { | 2212 | { |
2211 | int len; | 2213 | int len; |
2214 | spin_lock(&mddev->lock); | ||
2212 | if (mddev->bitmap) | 2215 | if (mddev->bitmap) |
2213 | len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? | 2216 | len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? |
2214 | "false" : "true")); | 2217 | "false" : "true")); |
2215 | else | 2218 | else |
2216 | len = sprintf(page, "\n"); | 2219 | len = sprintf(page, "\n"); |
2220 | spin_unlock(&mddev->lock); | ||
2217 | return len; | 2221 | return len; |
2218 | } | 2222 | } |
2219 | 2223 | ||
@@ -2238,10 +2242,15 @@ __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); | |||
2238 | static ssize_t | 2242 | static ssize_t |
2239 | behind_writes_used_show(struct mddev *mddev, char *page) | 2243 | behind_writes_used_show(struct mddev *mddev, char *page) |
2240 | { | 2244 | { |
2245 | ssize_t ret; | ||
2246 | spin_lock(&mddev->lock); | ||
2241 | if (mddev->bitmap == NULL) | 2247 | if (mddev->bitmap == NULL) |
2242 | return sprintf(page, "0\n"); | 2248 | ret = sprintf(page, "0\n"); |
2243 | return sprintf(page, "%lu\n", | 2249 | else |
2244 | mddev->bitmap->behind_writes_used); | 2250 | ret = sprintf(page, "%lu\n", |
2251 | mddev->bitmap->behind_writes_used); | ||
2252 | spin_unlock(&mddev->lock); | ||
2253 | return ret; | ||
2245 | } | 2254 | } |
2246 | 2255 | ||
2247 | static ssize_t | 2256 | static ssize_t |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 07c0fa0fa284..777d9ba2acad 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -746,13 +746,7 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
746 | { | 746 | { |
747 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | 747 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); |
748 | 748 | ||
749 | if (rs->raid_type->level == 1) | 749 | return mddev_congested(&rs->md, bits); |
750 | return md_raid1_congested(&rs->md, bits); | ||
751 | |||
752 | if (rs->raid_type->level == 10) | ||
753 | return md_raid10_congested(&rs->md, bits); | ||
754 | |||
755 | return md_raid5_congested(&rs->md, bits); | ||
756 | } | 750 | } |
757 | 751 | ||
758 | /* | 752 | /* |
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index e8b4574956c7..1277eb26b58a 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -332,13 +332,11 @@ static int run(struct mddev *mddev) | |||
332 | return 0; | 332 | return 0; |
333 | } | 333 | } |
334 | 334 | ||
335 | static int stop(struct mddev *mddev) | 335 | static void faulty_free(struct mddev *mddev, void *priv) |
336 | { | 336 | { |
337 | struct faulty_conf *conf = mddev->private; | 337 | struct faulty_conf *conf = priv; |
338 | 338 | ||
339 | kfree(conf); | 339 | kfree(conf); |
340 | mddev->private = NULL; | ||
341 | return 0; | ||
342 | } | 340 | } |
343 | 341 | ||
344 | static struct md_personality faulty_personality = | 342 | static struct md_personality faulty_personality = |
@@ -348,7 +346,7 @@ static struct md_personality faulty_personality = | |||
348 | .owner = THIS_MODULE, | 346 | .owner = THIS_MODULE, |
349 | .make_request = make_request, | 347 | .make_request = make_request, |
350 | .run = run, | 348 | .run = run, |
351 | .stop = stop, | 349 | .free = faulty_free, |
352 | .status = status, | 350 | .status = status, |
353 | .check_reshape = reshape, | 351 | .check_reshape = reshape, |
354 | .size = faulty_size, | 352 | .size = faulty_size, |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 64713b77df1c..fa7d577f3d12 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -34,7 +34,7 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) | |||
34 | 34 | ||
35 | lo = 0; | 35 | lo = 0; |
36 | hi = mddev->raid_disks - 1; | 36 | hi = mddev->raid_disks - 1; |
37 | conf = rcu_dereference(mddev->private); | 37 | conf = mddev->private; |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Binary Search | 40 | * Binary Search |
@@ -60,18 +60,16 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) | |||
60 | * | 60 | * |
61 | * Return amount of bytes we can take at this offset | 61 | * Return amount of bytes we can take at this offset |
62 | */ | 62 | */ |
63 | static int linear_mergeable_bvec(struct request_queue *q, | 63 | static int linear_mergeable_bvec(struct mddev *mddev, |
64 | struct bvec_merge_data *bvm, | 64 | struct bvec_merge_data *bvm, |
65 | struct bio_vec *biovec) | 65 | struct bio_vec *biovec) |
66 | { | 66 | { |
67 | struct mddev *mddev = q->queuedata; | ||
68 | struct dev_info *dev0; | 67 | struct dev_info *dev0; |
69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; | 68 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; |
70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 69 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
71 | int maxbytes = biovec->bv_len; | 70 | int maxbytes = biovec->bv_len; |
72 | struct request_queue *subq; | 71 | struct request_queue *subq; |
73 | 72 | ||
74 | rcu_read_lock(); | ||
75 | dev0 = which_dev(mddev, sector); | 73 | dev0 = which_dev(mddev, sector); |
76 | maxsectors = dev0->end_sector - sector; | 74 | maxsectors = dev0->end_sector - sector; |
77 | subq = bdev_get_queue(dev0->rdev->bdev); | 75 | subq = bdev_get_queue(dev0->rdev->bdev); |
@@ -81,7 +79,6 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
81 | maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, | 79 | maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, |
82 | biovec)); | 80 | biovec)); |
83 | } | 81 | } |
84 | rcu_read_unlock(); | ||
85 | 82 | ||
86 | if (maxsectors < bio_sectors) | 83 | if (maxsectors < bio_sectors) |
87 | maxsectors = 0; | 84 | maxsectors = 0; |
@@ -97,24 +94,18 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
97 | return maxsectors << 9; | 94 | return maxsectors << 9; |
98 | } | 95 | } |
99 | 96 | ||
100 | static int linear_congested(void *data, int bits) | 97 | static int linear_congested(struct mddev *mddev, int bits) |
101 | { | 98 | { |
102 | struct mddev *mddev = data; | ||
103 | struct linear_conf *conf; | 99 | struct linear_conf *conf; |
104 | int i, ret = 0; | 100 | int i, ret = 0; |
105 | 101 | ||
106 | if (mddev_congested(mddev, bits)) | 102 | conf = mddev->private; |
107 | return 1; | ||
108 | |||
109 | rcu_read_lock(); | ||
110 | conf = rcu_dereference(mddev->private); | ||
111 | 103 | ||
112 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { | 104 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { |
113 | struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); | 105 | struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); |
114 | ret |= bdi_congested(&q->backing_dev_info, bits); | 106 | ret |= bdi_congested(&q->backing_dev_info, bits); |
115 | } | 107 | } |
116 | 108 | ||
117 | rcu_read_unlock(); | ||
118 | return ret; | 109 | return ret; |
119 | } | 110 | } |
120 | 111 | ||
@@ -123,12 +114,10 @@ static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disk | |||
123 | struct linear_conf *conf; | 114 | struct linear_conf *conf; |
124 | sector_t array_sectors; | 115 | sector_t array_sectors; |
125 | 116 | ||
126 | rcu_read_lock(); | 117 | conf = mddev->private; |
127 | conf = rcu_dereference(mddev->private); | ||
128 | WARN_ONCE(sectors || raid_disks, | 118 | WARN_ONCE(sectors || raid_disks, |
129 | "%s does not support generic reshape\n", __func__); | 119 | "%s does not support generic reshape\n", __func__); |
130 | array_sectors = conf->array_sectors; | 120 | array_sectors = conf->array_sectors; |
131 | rcu_read_unlock(); | ||
132 | 121 | ||
133 | return array_sectors; | 122 | return array_sectors; |
134 | } | 123 | } |
@@ -217,10 +206,6 @@ static int linear_run (struct mddev *mddev) | |||
217 | mddev->private = conf; | 206 | mddev->private = conf; |
218 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 207 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
219 | 208 | ||
220 | blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); | ||
221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | ||
222 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
223 | |||
224 | ret = md_integrity_register(mddev); | 209 | ret = md_integrity_register(mddev); |
225 | if (ret) { | 210 | if (ret) { |
226 | kfree(conf); | 211 | kfree(conf); |
@@ -252,38 +237,23 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) | |||
252 | if (!newconf) | 237 | if (!newconf) |
253 | return -ENOMEM; | 238 | return -ENOMEM; |
254 | 239 | ||
255 | oldconf = rcu_dereference_protected(mddev->private, | 240 | mddev_suspend(mddev); |
256 | lockdep_is_held( | 241 | oldconf = mddev->private; |
257 | &mddev->reconfig_mutex)); | ||
258 | mddev->raid_disks++; | 242 | mddev->raid_disks++; |
259 | rcu_assign_pointer(mddev->private, newconf); | 243 | mddev->private = newconf; |
260 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 244 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
261 | set_capacity(mddev->gendisk, mddev->array_sectors); | 245 | set_capacity(mddev->gendisk, mddev->array_sectors); |
246 | mddev_resume(mddev); | ||
262 | revalidate_disk(mddev->gendisk); | 247 | revalidate_disk(mddev->gendisk); |
263 | kfree_rcu(oldconf, rcu); | 248 | kfree(oldconf); |
264 | return 0; | 249 | return 0; |
265 | } | 250 | } |
266 | 251 | ||
267 | static int linear_stop (struct mddev *mddev) | 252 | static void linear_free(struct mddev *mddev, void *priv) |
268 | { | 253 | { |
269 | struct linear_conf *conf = | 254 | struct linear_conf *conf = priv; |
270 | rcu_dereference_protected(mddev->private, | ||
271 | lockdep_is_held( | ||
272 | &mddev->reconfig_mutex)); | ||
273 | 255 | ||
274 | /* | ||
275 | * We do not require rcu protection here since | ||
276 | * we hold reconfig_mutex for both linear_add and | ||
277 | * linear_stop, so they cannot race. | ||
278 | * We should make sure any old 'conf's are properly | ||
279 | * freed though. | ||
280 | */ | ||
281 | rcu_barrier(); | ||
282 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
283 | kfree(conf); | 256 | kfree(conf); |
284 | mddev->private = NULL; | ||
285 | |||
286 | return 0; | ||
287 | } | 257 | } |
288 | 258 | ||
289 | static void linear_make_request(struct mddev *mddev, struct bio *bio) | 259 | static void linear_make_request(struct mddev *mddev, struct bio *bio) |
@@ -299,16 +269,12 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) | |||
299 | } | 269 | } |
300 | 270 | ||
301 | do { | 271 | do { |
302 | rcu_read_lock(); | ||
303 | |||
304 | tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); | 272 | tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); |
305 | start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; | 273 | start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; |
306 | end_sector = tmp_dev->end_sector; | 274 | end_sector = tmp_dev->end_sector; |
307 | data_offset = tmp_dev->rdev->data_offset; | 275 | data_offset = tmp_dev->rdev->data_offset; |
308 | bio->bi_bdev = tmp_dev->rdev->bdev; | 276 | bio->bi_bdev = tmp_dev->rdev->bdev; |
309 | 277 | ||
310 | rcu_read_unlock(); | ||
311 | |||
312 | if (unlikely(bio->bi_iter.bi_sector >= end_sector || | 278 | if (unlikely(bio->bi_iter.bi_sector >= end_sector || |
313 | bio->bi_iter.bi_sector < start_sector)) | 279 | bio->bi_iter.bi_sector < start_sector)) |
314 | goto out_of_bounds; | 280 | goto out_of_bounds; |
@@ -355,6 +321,10 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev) | |||
355 | seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); | 321 | seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); |
356 | } | 322 | } |
357 | 323 | ||
324 | static void linear_quiesce(struct mddev *mddev, int state) | ||
325 | { | ||
326 | } | ||
327 | |||
358 | static struct md_personality linear_personality = | 328 | static struct md_personality linear_personality = |
359 | { | 329 | { |
360 | .name = "linear", | 330 | .name = "linear", |
@@ -362,10 +332,13 @@ static struct md_personality linear_personality = | |||
362 | .owner = THIS_MODULE, | 332 | .owner = THIS_MODULE, |
363 | .make_request = linear_make_request, | 333 | .make_request = linear_make_request, |
364 | .run = linear_run, | 334 | .run = linear_run, |
365 | .stop = linear_stop, | 335 | .free = linear_free, |
366 | .status = linear_status, | 336 | .status = linear_status, |
367 | .hot_add_disk = linear_add, | 337 | .hot_add_disk = linear_add, |
368 | .size = linear_size, | 338 | .size = linear_size, |
339 | .quiesce = linear_quiesce, | ||
340 | .congested = linear_congested, | ||
341 | .mergeable_bvec = linear_mergeable_bvec, | ||
369 | }; | 342 | }; |
370 | 343 | ||
371 | static int __init linear_init (void) | 344 | static int __init linear_init (void) |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 709755fb6d7b..c8d2bac4e28b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -72,6 +72,7 @@ static struct workqueue_struct *md_misc_wq; | |||
72 | 72 | ||
73 | static int remove_and_add_spares(struct mddev *mddev, | 73 | static int remove_and_add_spares(struct mddev *mddev, |
74 | struct md_rdev *this); | 74 | struct md_rdev *this); |
75 | static void mddev_detach(struct mddev *mddev); | ||
75 | 76 | ||
76 | /* | 77 | /* |
77 | * Default number of read corrections we'll attempt on an rdev | 78 | * Default number of read corrections we'll attempt on an rdev |
@@ -292,8 +293,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio) | |||
292 | /* mddev_suspend makes sure no new requests are submitted | 293 | /* mddev_suspend makes sure no new requests are submitted |
293 | * to the device, and that any requests that have been submitted | 294 | * to the device, and that any requests that have been submitted |
294 | * are completely handled. | 295 | * are completely handled. |
295 | * Once ->stop is called and completes, the module will be completely | 296 | * Once mddev_detach() is called and completes, the module will be |
296 | * unused. | 297 | * completely unused. |
297 | */ | 298 | */ |
298 | void mddev_suspend(struct mddev *mddev) | 299 | void mddev_suspend(struct mddev *mddev) |
299 | { | 300 | { |
@@ -321,10 +322,47 @@ EXPORT_SYMBOL_GPL(mddev_resume); | |||
321 | 322 | ||
322 | int mddev_congested(struct mddev *mddev, int bits) | 323 | int mddev_congested(struct mddev *mddev, int bits) |
323 | { | 324 | { |
324 | return mddev->suspended; | 325 | struct md_personality *pers = mddev->pers; |
326 | int ret = 0; | ||
327 | |||
328 | rcu_read_lock(); | ||
329 | if (mddev->suspended) | ||
330 | ret = 1; | ||
331 | else if (pers && pers->congested) | ||
332 | ret = pers->congested(mddev, bits); | ||
333 | rcu_read_unlock(); | ||
334 | return ret; | ||
335 | } | ||
336 | EXPORT_SYMBOL_GPL(mddev_congested); | ||
337 | static int md_congested(void *data, int bits) | ||
338 | { | ||
339 | struct mddev *mddev = data; | ||
340 | return mddev_congested(mddev, bits); | ||
325 | } | 341 | } |
326 | EXPORT_SYMBOL(mddev_congested); | ||
327 | 342 | ||
343 | static int md_mergeable_bvec(struct request_queue *q, | ||
344 | struct bvec_merge_data *bvm, | ||
345 | struct bio_vec *biovec) | ||
346 | { | ||
347 | struct mddev *mddev = q->queuedata; | ||
348 | int ret; | ||
349 | rcu_read_lock(); | ||
350 | if (mddev->suspended) { | ||
351 | /* Must always allow one vec */ | ||
352 | if (bvm->bi_size == 0) | ||
353 | ret = biovec->bv_len; | ||
354 | else | ||
355 | ret = 0; | ||
356 | } else { | ||
357 | struct md_personality *pers = mddev->pers; | ||
358 | if (pers && pers->mergeable_bvec) | ||
359 | ret = pers->mergeable_bvec(mddev, bvm, biovec); | ||
360 | else | ||
361 | ret = biovec->bv_len; | ||
362 | } | ||
363 | rcu_read_unlock(); | ||
364 | return ret; | ||
365 | } | ||
328 | /* | 366 | /* |
329 | * Generic flush handling for md | 367 | * Generic flush handling for md |
330 | */ | 368 | */ |
@@ -397,12 +435,12 @@ static void md_submit_flush_data(struct work_struct *ws) | |||
397 | 435 | ||
398 | void md_flush_request(struct mddev *mddev, struct bio *bio) | 436 | void md_flush_request(struct mddev *mddev, struct bio *bio) |
399 | { | 437 | { |
400 | spin_lock_irq(&mddev->write_lock); | 438 | spin_lock_irq(&mddev->lock); |
401 | wait_event_lock_irq(mddev->sb_wait, | 439 | wait_event_lock_irq(mddev->sb_wait, |
402 | !mddev->flush_bio, | 440 | !mddev->flush_bio, |
403 | mddev->write_lock); | 441 | mddev->lock); |
404 | mddev->flush_bio = bio; | 442 | mddev->flush_bio = bio; |
405 | spin_unlock_irq(&mddev->write_lock); | 443 | spin_unlock_irq(&mddev->lock); |
406 | 444 | ||
407 | INIT_WORK(&mddev->flush_work, submit_flushes); | 445 | INIT_WORK(&mddev->flush_work, submit_flushes); |
408 | queue_work(md_wq, &mddev->flush_work); | 446 | queue_work(md_wq, &mddev->flush_work); |
@@ -465,7 +503,7 @@ void mddev_init(struct mddev *mddev) | |||
465 | atomic_set(&mddev->active, 1); | 503 | atomic_set(&mddev->active, 1); |
466 | atomic_set(&mddev->openers, 0); | 504 | atomic_set(&mddev->openers, 0); |
467 | atomic_set(&mddev->active_io, 0); | 505 | atomic_set(&mddev->active_io, 0); |
468 | spin_lock_init(&mddev->write_lock); | 506 | spin_lock_init(&mddev->lock); |
469 | atomic_set(&mddev->flush_pending, 0); | 507 | atomic_set(&mddev->flush_pending, 0); |
470 | init_waitqueue_head(&mddev->sb_wait); | 508 | init_waitqueue_head(&mddev->sb_wait); |
471 | init_waitqueue_head(&mddev->recovery_wait); | 509 | init_waitqueue_head(&mddev->recovery_wait); |
@@ -552,32 +590,9 @@ static struct mddev *mddev_find(dev_t unit) | |||
552 | goto retry; | 590 | goto retry; |
553 | } | 591 | } |
554 | 592 | ||
555 | static inline int __must_check mddev_lock(struct mddev *mddev) | ||
556 | { | ||
557 | return mutex_lock_interruptible(&mddev->reconfig_mutex); | ||
558 | } | ||
559 | |||
560 | /* Sometimes we need to take the lock in a situation where | ||
561 | * failure due to interrupts is not acceptable. | ||
562 | */ | ||
563 | static inline void mddev_lock_nointr(struct mddev *mddev) | ||
564 | { | ||
565 | mutex_lock(&mddev->reconfig_mutex); | ||
566 | } | ||
567 | |||
568 | static inline int mddev_is_locked(struct mddev *mddev) | ||
569 | { | ||
570 | return mutex_is_locked(&mddev->reconfig_mutex); | ||
571 | } | ||
572 | |||
573 | static inline int mddev_trylock(struct mddev *mddev) | ||
574 | { | ||
575 | return mutex_trylock(&mddev->reconfig_mutex); | ||
576 | } | ||
577 | |||
578 | static struct attribute_group md_redundancy_group; | 593 | static struct attribute_group md_redundancy_group; |
579 | 594 | ||
580 | static void mddev_unlock(struct mddev *mddev) | 595 | void mddev_unlock(struct mddev *mddev) |
581 | { | 596 | { |
582 | if (mddev->to_remove) { | 597 | if (mddev->to_remove) { |
583 | /* These cannot be removed under reconfig_mutex as | 598 | /* These cannot be removed under reconfig_mutex as |
@@ -619,6 +634,7 @@ static void mddev_unlock(struct mddev *mddev) | |||
619 | md_wakeup_thread(mddev->thread); | 634 | md_wakeup_thread(mddev->thread); |
620 | spin_unlock(&pers_lock); | 635 | spin_unlock(&pers_lock); |
621 | } | 636 | } |
637 | EXPORT_SYMBOL_GPL(mddev_unlock); | ||
622 | 638 | ||
623 | static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr) | 639 | static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr) |
624 | { | 640 | { |
@@ -2230,7 +2246,7 @@ repeat: | |||
2230 | return; | 2246 | return; |
2231 | } | 2247 | } |
2232 | 2248 | ||
2233 | spin_lock_irq(&mddev->write_lock); | 2249 | spin_lock(&mddev->lock); |
2234 | 2250 | ||
2235 | mddev->utime = get_seconds(); | 2251 | mddev->utime = get_seconds(); |
2236 | 2252 | ||
@@ -2287,7 +2303,7 @@ repeat: | |||
2287 | } | 2303 | } |
2288 | 2304 | ||
2289 | sync_sbs(mddev, nospares); | 2305 | sync_sbs(mddev, nospares); |
2290 | spin_unlock_irq(&mddev->write_lock); | 2306 | spin_unlock(&mddev->lock); |
2291 | 2307 | ||
2292 | pr_debug("md: updating %s RAID superblock on device (in sync %d)\n", | 2308 | pr_debug("md: updating %s RAID superblock on device (in sync %d)\n", |
2293 | mdname(mddev), mddev->in_sync); | 2309 | mdname(mddev), mddev->in_sync); |
@@ -2326,15 +2342,15 @@ repeat: | |||
2326 | md_super_wait(mddev); | 2342 | md_super_wait(mddev); |
2327 | /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */ | 2343 | /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */ |
2328 | 2344 | ||
2329 | spin_lock_irq(&mddev->write_lock); | 2345 | spin_lock(&mddev->lock); |
2330 | if (mddev->in_sync != sync_req || | 2346 | if (mddev->in_sync != sync_req || |
2331 | test_bit(MD_CHANGE_DEVS, &mddev->flags)) { | 2347 | test_bit(MD_CHANGE_DEVS, &mddev->flags)) { |
2332 | /* have to write it out again */ | 2348 | /* have to write it out again */ |
2333 | spin_unlock_irq(&mddev->write_lock); | 2349 | spin_unlock(&mddev->lock); |
2334 | goto repeat; | 2350 | goto repeat; |
2335 | } | 2351 | } |
2336 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 2352 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); |
2337 | spin_unlock_irq(&mddev->write_lock); | 2353 | spin_unlock(&mddev->lock); |
2338 | wake_up(&mddev->sb_wait); | 2354 | wake_up(&mddev->sb_wait); |
2339 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 2355 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
2340 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 2356 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
@@ -2381,40 +2397,41 @@ state_show(struct md_rdev *rdev, char *page) | |||
2381 | { | 2397 | { |
2382 | char *sep = ""; | 2398 | char *sep = ""; |
2383 | size_t len = 0; | 2399 | size_t len = 0; |
2400 | unsigned long flags = ACCESS_ONCE(rdev->flags); | ||
2384 | 2401 | ||
2385 | if (test_bit(Faulty, &rdev->flags) || | 2402 | if (test_bit(Faulty, &flags) || |
2386 | rdev->badblocks.unacked_exist) { | 2403 | rdev->badblocks.unacked_exist) { |
2387 | len+= sprintf(page+len, "%sfaulty",sep); | 2404 | len+= sprintf(page+len, "%sfaulty",sep); |
2388 | sep = ","; | 2405 | sep = ","; |
2389 | } | 2406 | } |
2390 | if (test_bit(In_sync, &rdev->flags)) { | 2407 | if (test_bit(In_sync, &flags)) { |
2391 | len += sprintf(page+len, "%sin_sync",sep); | 2408 | len += sprintf(page+len, "%sin_sync",sep); |
2392 | sep = ","; | 2409 | sep = ","; |
2393 | } | 2410 | } |
2394 | if (test_bit(WriteMostly, &rdev->flags)) { | 2411 | if (test_bit(WriteMostly, &flags)) { |
2395 | len += sprintf(page+len, "%swrite_mostly",sep); | 2412 | len += sprintf(page+len, "%swrite_mostly",sep); |
2396 | sep = ","; | 2413 | sep = ","; |
2397 | } | 2414 | } |
2398 | if (test_bit(Blocked, &rdev->flags) || | 2415 | if (test_bit(Blocked, &flags) || |
2399 | (rdev->badblocks.unacked_exist | 2416 | (rdev->badblocks.unacked_exist |
2400 | && !test_bit(Faulty, &rdev->flags))) { | 2417 | && !test_bit(Faulty, &flags))) { |
2401 | len += sprintf(page+len, "%sblocked", sep); | 2418 | len += sprintf(page+len, "%sblocked", sep); |
2402 | sep = ","; | 2419 | sep = ","; |
2403 | } | 2420 | } |
2404 | if (!test_bit(Faulty, &rdev->flags) && | 2421 | if (!test_bit(Faulty, &flags) && |
2405 | !test_bit(In_sync, &rdev->flags)) { | 2422 | !test_bit(In_sync, &flags)) { |
2406 | len += sprintf(page+len, "%sspare", sep); | 2423 | len += sprintf(page+len, "%sspare", sep); |
2407 | sep = ","; | 2424 | sep = ","; |
2408 | } | 2425 | } |
2409 | if (test_bit(WriteErrorSeen, &rdev->flags)) { | 2426 | if (test_bit(WriteErrorSeen, &flags)) { |
2410 | len += sprintf(page+len, "%swrite_error", sep); | 2427 | len += sprintf(page+len, "%swrite_error", sep); |
2411 | sep = ","; | 2428 | sep = ","; |
2412 | } | 2429 | } |
2413 | if (test_bit(WantReplacement, &rdev->flags)) { | 2430 | if (test_bit(WantReplacement, &flags)) { |
2414 | len += sprintf(page+len, "%swant_replacement", sep); | 2431 | len += sprintf(page+len, "%swant_replacement", sep); |
2415 | sep = ","; | 2432 | sep = ","; |
2416 | } | 2433 | } |
2417 | if (test_bit(Replacement, &rdev->flags)) { | 2434 | if (test_bit(Replacement, &flags)) { |
2418 | len += sprintf(page+len, "%sreplacement", sep); | 2435 | len += sprintf(page+len, "%sreplacement", sep); |
2419 | sep = ","; | 2436 | sep = ","; |
2420 | } | 2437 | } |
@@ -2927,21 +2944,12 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |||
2927 | { | 2944 | { |
2928 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | 2945 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); |
2929 | struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj); | 2946 | struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj); |
2930 | struct mddev *mddev = rdev->mddev; | ||
2931 | ssize_t rv; | ||
2932 | 2947 | ||
2933 | if (!entry->show) | 2948 | if (!entry->show) |
2934 | return -EIO; | 2949 | return -EIO; |
2935 | 2950 | if (!rdev->mddev) | |
2936 | rv = mddev ? mddev_lock(mddev) : -EBUSY; | 2951 | return -EBUSY; |
2937 | if (!rv) { | 2952 | return entry->show(rdev, page); |
2938 | if (rdev->mddev == NULL) | ||
2939 | rv = -EBUSY; | ||
2940 | else | ||
2941 | rv = entry->show(rdev, page); | ||
2942 | mddev_unlock(mddev); | ||
2943 | } | ||
2944 | return rv; | ||
2945 | } | 2953 | } |
2946 | 2954 | ||
2947 | static ssize_t | 2955 | static ssize_t |
@@ -3212,11 +3220,13 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) | |||
3212 | mddev->safemode_delay = 0; | 3220 | mddev->safemode_delay = 0; |
3213 | else { | 3221 | else { |
3214 | unsigned long old_delay = mddev->safemode_delay; | 3222 | unsigned long old_delay = mddev->safemode_delay; |
3215 | mddev->safemode_delay = (msec*HZ)/1000; | 3223 | unsigned long new_delay = (msec*HZ)/1000; |
3216 | if (mddev->safemode_delay == 0) | 3224 | |
3217 | mddev->safemode_delay = 1; | 3225 | if (new_delay == 0) |
3218 | if (mddev->safemode_delay < old_delay || old_delay == 0) | 3226 | new_delay = 1; |
3219 | md_safemode_timeout((unsigned long)mddev); | 3227 | mddev->safemode_delay = new_delay; |
3228 | if (new_delay < old_delay || old_delay == 0) | ||
3229 | mod_timer(&mddev->safemode_timer, jiffies+1); | ||
3220 | } | 3230 | } |
3221 | return len; | 3231 | return len; |
3222 | } | 3232 | } |
@@ -3226,41 +3236,52 @@ __ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store); | |||
3226 | static ssize_t | 3236 | static ssize_t |
3227 | level_show(struct mddev *mddev, char *page) | 3237 | level_show(struct mddev *mddev, char *page) |
3228 | { | 3238 | { |
3229 | struct md_personality *p = mddev->pers; | 3239 | struct md_personality *p; |
3240 | int ret; | ||
3241 | spin_lock(&mddev->lock); | ||
3242 | p = mddev->pers; | ||
3230 | if (p) | 3243 | if (p) |
3231 | return sprintf(page, "%s\n", p->name); | 3244 | ret = sprintf(page, "%s\n", p->name); |
3232 | else if (mddev->clevel[0]) | 3245 | else if (mddev->clevel[0]) |
3233 | return sprintf(page, "%s\n", mddev->clevel); | 3246 | ret = sprintf(page, "%s\n", mddev->clevel); |
3234 | else if (mddev->level != LEVEL_NONE) | 3247 | else if (mddev->level != LEVEL_NONE) |
3235 | return sprintf(page, "%d\n", mddev->level); | 3248 | ret = sprintf(page, "%d\n", mddev->level); |
3236 | else | 3249 | else |
3237 | return 0; | 3250 | ret = 0; |
3251 | spin_unlock(&mddev->lock); | ||
3252 | return ret; | ||
3238 | } | 3253 | } |
3239 | 3254 | ||
3240 | static ssize_t | 3255 | static ssize_t |
3241 | level_store(struct mddev *mddev, const char *buf, size_t len) | 3256 | level_store(struct mddev *mddev, const char *buf, size_t len) |
3242 | { | 3257 | { |
3243 | char clevel[16]; | 3258 | char clevel[16]; |
3244 | ssize_t rv = len; | 3259 | ssize_t rv; |
3245 | struct md_personality *pers; | 3260 | size_t slen = len; |
3261 | struct md_personality *pers, *oldpers; | ||
3246 | long level; | 3262 | long level; |
3247 | void *priv; | 3263 | void *priv, *oldpriv; |
3248 | struct md_rdev *rdev; | 3264 | struct md_rdev *rdev; |
3249 | 3265 | ||
3266 | if (slen == 0 || slen >= sizeof(clevel)) | ||
3267 | return -EINVAL; | ||
3268 | |||
3269 | rv = mddev_lock(mddev); | ||
3270 | if (rv) | ||
3271 | return rv; | ||
3272 | |||
3250 | if (mddev->pers == NULL) { | 3273 | if (mddev->pers == NULL) { |
3251 | if (len == 0) | 3274 | strncpy(mddev->clevel, buf, slen); |
3252 | return 0; | 3275 | if (mddev->clevel[slen-1] == '\n') |
3253 | if (len >= sizeof(mddev->clevel)) | 3276 | slen--; |
3254 | return -ENOSPC; | 3277 | mddev->clevel[slen] = 0; |
3255 | strncpy(mddev->clevel, buf, len); | ||
3256 | if (mddev->clevel[len-1] == '\n') | ||
3257 | len--; | ||
3258 | mddev->clevel[len] = 0; | ||
3259 | mddev->level = LEVEL_NONE; | 3278 | mddev->level = LEVEL_NONE; |
3260 | return rv; | 3279 | rv = len; |
3280 | goto out_unlock; | ||
3261 | } | 3281 | } |
3282 | rv = -EROFS; | ||
3262 | if (mddev->ro) | 3283 | if (mddev->ro) |
3263 | return -EROFS; | 3284 | goto out_unlock; |
3264 | 3285 | ||
3265 | /* request to change the personality. Need to ensure: | 3286 | /* request to change the personality. Need to ensure: |
3266 | * - array is not engaged in resync/recovery/reshape | 3287 | * - array is not engaged in resync/recovery/reshape |
@@ -3268,25 +3289,25 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3268 | * - new personality will access other array. | 3289 | * - new personality will access other array. |
3269 | */ | 3290 | */ |
3270 | 3291 | ||
3292 | rv = -EBUSY; | ||
3271 | if (mddev->sync_thread || | 3293 | if (mddev->sync_thread || |
3272 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 3294 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
3273 | mddev->reshape_position != MaxSector || | 3295 | mddev->reshape_position != MaxSector || |
3274 | mddev->sysfs_active) | 3296 | mddev->sysfs_active) |
3275 | return -EBUSY; | 3297 | goto out_unlock; |
3276 | 3298 | ||
3299 | rv = -EINVAL; | ||
3277 | if (!mddev->pers->quiesce) { | 3300 | if (!mddev->pers->quiesce) { |
3278 | printk(KERN_WARNING "md: %s: %s does not support online personality change\n", | 3301 | printk(KERN_WARNING "md: %s: %s does not support online personality change\n", |
3279 | mdname(mddev), mddev->pers->name); | 3302 | mdname(mddev), mddev->pers->name); |
3280 | return -EINVAL; | 3303 | goto out_unlock; |
3281 | } | 3304 | } |
3282 | 3305 | ||
3283 | /* Now find the new personality */ | 3306 | /* Now find the new personality */ |
3284 | if (len == 0 || len >= sizeof(clevel)) | 3307 | strncpy(clevel, buf, slen); |
3285 | return -EINVAL; | 3308 | if (clevel[slen-1] == '\n') |
3286 | strncpy(clevel, buf, len); | 3309 | slen--; |
3287 | if (clevel[len-1] == '\n') | 3310 | clevel[slen] = 0; |
3288 | len--; | ||
3289 | clevel[len] = 0; | ||
3290 | if (kstrtol(clevel, 10, &level)) | 3311 | if (kstrtol(clevel, 10, &level)) |
3291 | level = LEVEL_NONE; | 3312 | level = LEVEL_NONE; |
3292 | 3313 | ||
@@ -3297,20 +3318,23 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3297 | if (!pers || !try_module_get(pers->owner)) { | 3318 | if (!pers || !try_module_get(pers->owner)) { |
3298 | spin_unlock(&pers_lock); | 3319 | spin_unlock(&pers_lock); |
3299 | printk(KERN_WARNING "md: personality %s not loaded\n", clevel); | 3320 | printk(KERN_WARNING "md: personality %s not loaded\n", clevel); |
3300 | return -EINVAL; | 3321 | rv = -EINVAL; |
3322 | goto out_unlock; | ||
3301 | } | 3323 | } |
3302 | spin_unlock(&pers_lock); | 3324 | spin_unlock(&pers_lock); |
3303 | 3325 | ||
3304 | if (pers == mddev->pers) { | 3326 | if (pers == mddev->pers) { |
3305 | /* Nothing to do! */ | 3327 | /* Nothing to do! */ |
3306 | module_put(pers->owner); | 3328 | module_put(pers->owner); |
3307 | return rv; | 3329 | rv = len; |
3330 | goto out_unlock; | ||
3308 | } | 3331 | } |
3309 | if (!pers->takeover) { | 3332 | if (!pers->takeover) { |
3310 | module_put(pers->owner); | 3333 | module_put(pers->owner); |
3311 | printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", | 3334 | printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", |
3312 | mdname(mddev), clevel); | 3335 | mdname(mddev), clevel); |
3313 | return -EINVAL; | 3336 | rv = -EINVAL; |
3337 | goto out_unlock; | ||
3314 | } | 3338 | } |
3315 | 3339 | ||
3316 | rdev_for_each(rdev, mddev) | 3340 | rdev_for_each(rdev, mddev) |
@@ -3330,30 +3354,29 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3330 | module_put(pers->owner); | 3354 | module_put(pers->owner); |
3331 | printk(KERN_WARNING "md: %s: %s would not accept array\n", | 3355 | printk(KERN_WARNING "md: %s: %s would not accept array\n", |
3332 | mdname(mddev), clevel); | 3356 | mdname(mddev), clevel); |
3333 | return PTR_ERR(priv); | 3357 | rv = PTR_ERR(priv); |
3358 | goto out_unlock; | ||
3334 | } | 3359 | } |
3335 | 3360 | ||
3336 | /* Looks like we have a winner */ | 3361 | /* Looks like we have a winner */ |
3337 | mddev_suspend(mddev); | 3362 | mddev_suspend(mddev); |
3338 | mddev->pers->stop(mddev); | 3363 | mddev_detach(mddev); |
3339 | 3364 | ||
3340 | if (mddev->pers->sync_request == NULL && | 3365 | spin_lock(&mddev->lock); |
3341 | pers->sync_request != NULL) { | 3366 | oldpers = mddev->pers; |
3342 | /* need to add the md_redundancy_group */ | 3367 | oldpriv = mddev->private; |
3343 | if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | 3368 | mddev->pers = pers; |
3344 | printk(KERN_WARNING | 3369 | mddev->private = priv; |
3345 | "md: cannot register extra attributes for %s\n", | 3370 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
3346 | mdname(mddev)); | 3371 | mddev->level = mddev->new_level; |
3347 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); | 3372 | mddev->layout = mddev->new_layout; |
3348 | } | 3373 | mddev->chunk_sectors = mddev->new_chunk_sectors; |
3349 | if (mddev->pers->sync_request != NULL && | 3374 | mddev->delta_disks = 0; |
3350 | pers->sync_request == NULL) { | 3375 | mddev->reshape_backwards = 0; |
3351 | /* need to remove the md_redundancy_group */ | 3376 | mddev->degraded = 0; |
3352 | if (mddev->to_remove == NULL) | 3377 | spin_unlock(&mddev->lock); |
3353 | mddev->to_remove = &md_redundancy_group; | ||
3354 | } | ||
3355 | 3378 | ||
3356 | if (mddev->pers->sync_request == NULL && | 3379 | if (oldpers->sync_request == NULL && |
3357 | mddev->external) { | 3380 | mddev->external) { |
3358 | /* We are converting from a no-redundancy array | 3381 | /* We are converting from a no-redundancy array |
3359 | * to a redundancy array and metadata is managed | 3382 | * to a redundancy array and metadata is managed |
@@ -3367,6 +3390,24 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3367 | mddev->safemode = 0; | 3390 | mddev->safemode = 0; |
3368 | } | 3391 | } |
3369 | 3392 | ||
3393 | oldpers->free(mddev, oldpriv); | ||
3394 | |||
3395 | if (oldpers->sync_request == NULL && | ||
3396 | pers->sync_request != NULL) { | ||
3397 | /* need to add the md_redundancy_group */ | ||
3398 | if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | ||
3399 | printk(KERN_WARNING | ||
3400 | "md: cannot register extra attributes for %s\n", | ||
3401 | mdname(mddev)); | ||
3402 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); | ||
3403 | } | ||
3404 | if (oldpers->sync_request != NULL && | ||
3405 | pers->sync_request == NULL) { | ||
3406 | /* need to remove the md_redundancy_group */ | ||
3407 | if (mddev->to_remove == NULL) | ||
3408 | mddev->to_remove = &md_redundancy_group; | ||
3409 | } | ||
3410 | |||
3370 | rdev_for_each(rdev, mddev) { | 3411 | rdev_for_each(rdev, mddev) { |
3371 | if (rdev->raid_disk < 0) | 3412 | if (rdev->raid_disk < 0) |
3372 | continue; | 3413 | continue; |
@@ -3392,17 +3433,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3392 | } | 3433 | } |
3393 | } | 3434 | } |
3394 | 3435 | ||
3395 | module_put(mddev->pers->owner); | 3436 | if (pers->sync_request == NULL) { |
3396 | mddev->pers = pers; | ||
3397 | mddev->private = priv; | ||
3398 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | ||
3399 | mddev->level = mddev->new_level; | ||
3400 | mddev->layout = mddev->new_layout; | ||
3401 | mddev->chunk_sectors = mddev->new_chunk_sectors; | ||
3402 | mddev->delta_disks = 0; | ||
3403 | mddev->reshape_backwards = 0; | ||
3404 | mddev->degraded = 0; | ||
3405 | if (mddev->pers->sync_request == NULL) { | ||
3406 | /* this is now an array without redundancy, so | 3437 | /* this is now an array without redundancy, so |
3407 | * it must always be in_sync | 3438 | * it must always be in_sync |
3408 | */ | 3439 | */ |
@@ -3417,6 +3448,9 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3417 | md_update_sb(mddev, 1); | 3448 | md_update_sb(mddev, 1); |
3418 | sysfs_notify(&mddev->kobj, NULL, "level"); | 3449 | sysfs_notify(&mddev->kobj, NULL, "level"); |
3419 | md_new_event(mddev); | 3450 | md_new_event(mddev); |
3451 | rv = len; | ||
3452 | out_unlock: | ||
3453 | mddev_unlock(mddev); | ||
3420 | return rv; | 3454 | return rv; |
3421 | } | 3455 | } |
3422 | 3456 | ||
@@ -3439,28 +3473,32 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) | |||
3439 | { | 3473 | { |
3440 | char *e; | 3474 | char *e; |
3441 | unsigned long n = simple_strtoul(buf, &e, 10); | 3475 | unsigned long n = simple_strtoul(buf, &e, 10); |
3476 | int err; | ||
3442 | 3477 | ||
3443 | if (!*buf || (*e && *e != '\n')) | 3478 | if (!*buf || (*e && *e != '\n')) |
3444 | return -EINVAL; | 3479 | return -EINVAL; |
3480 | err = mddev_lock(mddev); | ||
3481 | if (err) | ||
3482 | return err; | ||
3445 | 3483 | ||
3446 | if (mddev->pers) { | 3484 | if (mddev->pers) { |
3447 | int err; | ||
3448 | if (mddev->pers->check_reshape == NULL) | 3485 | if (mddev->pers->check_reshape == NULL) |
3449 | return -EBUSY; | 3486 | err = -EBUSY; |
3450 | if (mddev->ro) | 3487 | else if (mddev->ro) |
3451 | return -EROFS; | 3488 | err = -EROFS; |
3452 | mddev->new_layout = n; | 3489 | else { |
3453 | err = mddev->pers->check_reshape(mddev); | 3490 | mddev->new_layout = n; |
3454 | if (err) { | 3491 | err = mddev->pers->check_reshape(mddev); |
3455 | mddev->new_layout = mddev->layout; | 3492 | if (err) |
3456 | return err; | 3493 | mddev->new_layout = mddev->layout; |
3457 | } | 3494 | } |
3458 | } else { | 3495 | } else { |
3459 | mddev->new_layout = n; | 3496 | mddev->new_layout = n; |
3460 | if (mddev->reshape_position == MaxSector) | 3497 | if (mddev->reshape_position == MaxSector) |
3461 | mddev->layout = n; | 3498 | mddev->layout = n; |
3462 | } | 3499 | } |
3463 | return len; | 3500 | mddev_unlock(mddev); |
3501 | return err ?: len; | ||
3464 | } | 3502 | } |
3465 | static struct md_sysfs_entry md_layout = | 3503 | static struct md_sysfs_entry md_layout = |
3466 | __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); | 3504 | __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); |
@@ -3483,32 +3521,39 @@ static ssize_t | |||
3483 | raid_disks_store(struct mddev *mddev, const char *buf, size_t len) | 3521 | raid_disks_store(struct mddev *mddev, const char *buf, size_t len) |
3484 | { | 3522 | { |
3485 | char *e; | 3523 | char *e; |
3486 | int rv = 0; | 3524 | int err; |
3487 | unsigned long n = simple_strtoul(buf, &e, 10); | 3525 | unsigned long n = simple_strtoul(buf, &e, 10); |
3488 | 3526 | ||
3489 | if (!*buf || (*e && *e != '\n')) | 3527 | if (!*buf || (*e && *e != '\n')) |
3490 | return -EINVAL; | 3528 | return -EINVAL; |
3491 | 3529 | ||
3530 | err = mddev_lock(mddev); | ||
3531 | if (err) | ||
3532 | return err; | ||
3492 | if (mddev->pers) | 3533 | if (mddev->pers) |
3493 | rv = update_raid_disks(mddev, n); | 3534 | err = update_raid_disks(mddev, n); |
3494 | else if (mddev->reshape_position != MaxSector) { | 3535 | else if (mddev->reshape_position != MaxSector) { |
3495 | struct md_rdev *rdev; | 3536 | struct md_rdev *rdev; |
3496 | int olddisks = mddev->raid_disks - mddev->delta_disks; | 3537 | int olddisks = mddev->raid_disks - mddev->delta_disks; |
3497 | 3538 | ||
3539 | err = -EINVAL; | ||
3498 | rdev_for_each(rdev, mddev) { | 3540 | rdev_for_each(rdev, mddev) { |
3499 | if (olddisks < n && | 3541 | if (olddisks < n && |
3500 | rdev->data_offset < rdev->new_data_offset) | 3542 | rdev->data_offset < rdev->new_data_offset) |
3501 | return -EINVAL; | 3543 | goto out_unlock; |
3502 | if (olddisks > n && | 3544 | if (olddisks > n && |
3503 | rdev->data_offset > rdev->new_data_offset) | 3545 | rdev->data_offset > rdev->new_data_offset) |
3504 | return -EINVAL; | 3546 | goto out_unlock; |
3505 | } | 3547 | } |
3548 | err = 0; | ||
3506 | mddev->delta_disks = n - olddisks; | 3549 | mddev->delta_disks = n - olddisks; |
3507 | mddev->raid_disks = n; | 3550 | mddev->raid_disks = n; |
3508 | mddev->reshape_backwards = (mddev->delta_disks < 0); | 3551 | mddev->reshape_backwards = (mddev->delta_disks < 0); |
3509 | } else | 3552 | } else |
3510 | mddev->raid_disks = n; | 3553 | mddev->raid_disks = n; |
3511 | return rv ? rv : len; | 3554 | out_unlock: |
3555 | mddev_unlock(mddev); | ||
3556 | return err ? err : len; | ||
3512 | } | 3557 | } |
3513 | static struct md_sysfs_entry md_raid_disks = | 3558 | static struct md_sysfs_entry md_raid_disks = |
3514 | __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); | 3559 | __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); |
@@ -3527,30 +3572,34 @@ chunk_size_show(struct mddev *mddev, char *page) | |||
3527 | static ssize_t | 3572 | static ssize_t |
3528 | chunk_size_store(struct mddev *mddev, const char *buf, size_t len) | 3573 | chunk_size_store(struct mddev *mddev, const char *buf, size_t len) |
3529 | { | 3574 | { |
3575 | int err; | ||
3530 | char *e; | 3576 | char *e; |
3531 | unsigned long n = simple_strtoul(buf, &e, 10); | 3577 | unsigned long n = simple_strtoul(buf, &e, 10); |
3532 | 3578 | ||
3533 | if (!*buf || (*e && *e != '\n')) | 3579 | if (!*buf || (*e && *e != '\n')) |
3534 | return -EINVAL; | 3580 | return -EINVAL; |
3535 | 3581 | ||
3582 | err = mddev_lock(mddev); | ||
3583 | if (err) | ||
3584 | return err; | ||
3536 | if (mddev->pers) { | 3585 | if (mddev->pers) { |
3537 | int err; | ||
3538 | if (mddev->pers->check_reshape == NULL) | 3586 | if (mddev->pers->check_reshape == NULL) |
3539 | return -EBUSY; | 3587 | err = -EBUSY; |
3540 | if (mddev->ro) | 3588 | else if (mddev->ro) |
3541 | return -EROFS; | 3589 | err = -EROFS; |
3542 | mddev->new_chunk_sectors = n >> 9; | 3590 | else { |
3543 | err = mddev->pers->check_reshape(mddev); | 3591 | mddev->new_chunk_sectors = n >> 9; |
3544 | if (err) { | 3592 | err = mddev->pers->check_reshape(mddev); |
3545 | mddev->new_chunk_sectors = mddev->chunk_sectors; | 3593 | if (err) |
3546 | return err; | 3594 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
3547 | } | 3595 | } |
3548 | } else { | 3596 | } else { |
3549 | mddev->new_chunk_sectors = n >> 9; | 3597 | mddev->new_chunk_sectors = n >> 9; |
3550 | if (mddev->reshape_position == MaxSector) | 3598 | if (mddev->reshape_position == MaxSector) |
3551 | mddev->chunk_sectors = n >> 9; | 3599 | mddev->chunk_sectors = n >> 9; |
3552 | } | 3600 | } |
3553 | return len; | 3601 | mddev_unlock(mddev); |
3602 | return err ?: len; | ||
3554 | } | 3603 | } |
3555 | static struct md_sysfs_entry md_chunk_size = | 3604 | static struct md_sysfs_entry md_chunk_size = |
3556 | __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); | 3605 | __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); |
@@ -3566,20 +3615,27 @@ resync_start_show(struct mddev *mddev, char *page) | |||
3566 | static ssize_t | 3615 | static ssize_t |
3567 | resync_start_store(struct mddev *mddev, const char *buf, size_t len) | 3616 | resync_start_store(struct mddev *mddev, const char *buf, size_t len) |
3568 | { | 3617 | { |
3618 | int err; | ||
3569 | char *e; | 3619 | char *e; |
3570 | unsigned long long n = simple_strtoull(buf, &e, 10); | 3620 | unsigned long long n = simple_strtoull(buf, &e, 10); |
3571 | 3621 | ||
3622 | err = mddev_lock(mddev); | ||
3623 | if (err) | ||
3624 | return err; | ||
3572 | if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | 3625 | if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) |
3573 | return -EBUSY; | 3626 | err = -EBUSY; |
3574 | if (cmd_match(buf, "none")) | 3627 | else if (cmd_match(buf, "none")) |
3575 | n = MaxSector; | 3628 | n = MaxSector; |
3576 | else if (!*buf || (*e && *e != '\n')) | 3629 | else if (!*buf || (*e && *e != '\n')) |
3577 | return -EINVAL; | 3630 | err = -EINVAL; |
3578 | 3631 | ||
3579 | mddev->recovery_cp = n; | 3632 | if (!err) { |
3580 | if (mddev->pers) | 3633 | mddev->recovery_cp = n; |
3581 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 3634 | if (mddev->pers) |
3582 | return len; | 3635 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); |
3636 | } | ||
3637 | mddev_unlock(mddev); | ||
3638 | return err ?: len; | ||
3583 | } | 3639 | } |
3584 | static struct md_sysfs_entry md_resync_start = | 3640 | static struct md_sysfs_entry md_resync_start = |
3585 | __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); | 3641 | __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); |
@@ -3677,8 +3733,39 @@ static int restart_array(struct mddev *mddev); | |||
3677 | static ssize_t | 3733 | static ssize_t |
3678 | array_state_store(struct mddev *mddev, const char *buf, size_t len) | 3734 | array_state_store(struct mddev *mddev, const char *buf, size_t len) |
3679 | { | 3735 | { |
3680 | int err = -EINVAL; | 3736 | int err; |
3681 | enum array_state st = match_word(buf, array_states); | 3737 | enum array_state st = match_word(buf, array_states); |
3738 | |||
3739 | if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { | ||
3740 | /* don't take reconfig_mutex when toggling between | ||
3741 | * clean and active | ||
3742 | */ | ||
3743 | spin_lock(&mddev->lock); | ||
3744 | if (st == active) { | ||
3745 | restart_array(mddev); | ||
3746 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
3747 | wake_up(&mddev->sb_wait); | ||
3748 | err = 0; | ||
3749 | } else /* st == clean */ { | ||
3750 | restart_array(mddev); | ||
3751 | if (atomic_read(&mddev->writes_pending) == 0) { | ||
3752 | if (mddev->in_sync == 0) { | ||
3753 | mddev->in_sync = 1; | ||
3754 | if (mddev->safemode == 1) | ||
3755 | mddev->safemode = 0; | ||
3756 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
3757 | } | ||
3758 | err = 0; | ||
3759 | } else | ||
3760 | err = -EBUSY; | ||
3761 | } | ||
3762 | spin_unlock(&mddev->lock); | ||
3763 | return err; | ||
3764 | } | ||
3765 | err = mddev_lock(mddev); | ||
3766 | if (err) | ||
3767 | return err; | ||
3768 | err = -EINVAL; | ||
3682 | switch(st) { | 3769 | switch(st) { |
3683 | case bad_word: | 3770 | case bad_word: |
3684 | break; | 3771 | break; |
@@ -3722,7 +3809,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) | |||
3722 | case clean: | 3809 | case clean: |
3723 | if (mddev->pers) { | 3810 | if (mddev->pers) { |
3724 | restart_array(mddev); | 3811 | restart_array(mddev); |
3725 | spin_lock_irq(&mddev->write_lock); | 3812 | spin_lock(&mddev->lock); |
3726 | if (atomic_read(&mddev->writes_pending) == 0) { | 3813 | if (atomic_read(&mddev->writes_pending) == 0) { |
3727 | if (mddev->in_sync == 0) { | 3814 | if (mddev->in_sync == 0) { |
3728 | mddev->in_sync = 1; | 3815 | mddev->in_sync = 1; |
@@ -3733,7 +3820,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) | |||
3733 | err = 0; | 3820 | err = 0; |
3734 | } else | 3821 | } else |
3735 | err = -EBUSY; | 3822 | err = -EBUSY; |
3736 | spin_unlock_irq(&mddev->write_lock); | 3823 | spin_unlock(&mddev->lock); |
3737 | } else | 3824 | } else |
3738 | err = -EINVAL; | 3825 | err = -EINVAL; |
3739 | break; | 3826 | break; |
@@ -3754,14 +3841,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) | |||
3754 | /* these cannot be set */ | 3841 | /* these cannot be set */ |
3755 | break; | 3842 | break; |
3756 | } | 3843 | } |
3757 | if (err) | 3844 | |
3758 | return err; | 3845 | if (!err) { |
3759 | else { | ||
3760 | if (mddev->hold_active == UNTIL_IOCTL) | 3846 | if (mddev->hold_active == UNTIL_IOCTL) |
3761 | mddev->hold_active = 0; | 3847 | mddev->hold_active = 0; |
3762 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 3848 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
3763 | return len; | ||
3764 | } | 3849 | } |
3850 | mddev_unlock(mddev); | ||
3851 | return err ?: len; | ||
3765 | } | 3852 | } |
3766 | static struct md_sysfs_entry md_array_state = | 3853 | static struct md_sysfs_entry md_array_state = |
3767 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | 3854 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); |
@@ -3822,6 +3909,11 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) | |||
3822 | minor != MINOR(dev)) | 3909 | minor != MINOR(dev)) |
3823 | return -EOVERFLOW; | 3910 | return -EOVERFLOW; |
3824 | 3911 | ||
3912 | flush_workqueue(md_misc_wq); | ||
3913 | |||
3914 | err = mddev_lock(mddev); | ||
3915 | if (err) | ||
3916 | return err; | ||
3825 | if (mddev->persistent) { | 3917 | if (mddev->persistent) { |
3826 | rdev = md_import_device(dev, mddev->major_version, | 3918 | rdev = md_import_device(dev, mddev->major_version, |
3827 | mddev->minor_version); | 3919 | mddev->minor_version); |
@@ -3845,6 +3937,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) | |||
3845 | out: | 3937 | out: |
3846 | if (err) | 3938 | if (err) |
3847 | export_rdev(rdev); | 3939 | export_rdev(rdev); |
3940 | mddev_unlock(mddev); | ||
3848 | return err ? err : len; | 3941 | return err ? err : len; |
3849 | } | 3942 | } |
3850 | 3943 | ||
@@ -3856,7 +3949,11 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len) | |||
3856 | { | 3949 | { |
3857 | char *end; | 3950 | char *end; |
3858 | unsigned long chunk, end_chunk; | 3951 | unsigned long chunk, end_chunk; |
3952 | int err; | ||
3859 | 3953 | ||
3954 | err = mddev_lock(mddev); | ||
3955 | if (err) | ||
3956 | return err; | ||
3860 | if (!mddev->bitmap) | 3957 | if (!mddev->bitmap) |
3861 | goto out; | 3958 | goto out; |
3862 | /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */ | 3959 | /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */ |
@@ -3874,6 +3971,7 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len) | |||
3874 | } | 3971 | } |
3875 | bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ | 3972 | bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ |
3876 | out: | 3973 | out: |
3974 | mddev_unlock(mddev); | ||
3877 | return len; | 3975 | return len; |
3878 | } | 3976 | } |
3879 | 3977 | ||
@@ -3901,6 +3999,9 @@ size_store(struct mddev *mddev, const char *buf, size_t len) | |||
3901 | 3999 | ||
3902 | if (err < 0) | 4000 | if (err < 0) |
3903 | return err; | 4001 | return err; |
4002 | err = mddev_lock(mddev); | ||
4003 | if (err) | ||
4004 | return err; | ||
3904 | if (mddev->pers) { | 4005 | if (mddev->pers) { |
3905 | err = update_size(mddev, sectors); | 4006 | err = update_size(mddev, sectors); |
3906 | md_update_sb(mddev, 1); | 4007 | md_update_sb(mddev, 1); |
@@ -3911,6 +4012,7 @@ size_store(struct mddev *mddev, const char *buf, size_t len) | |||
3911 | else | 4012 | else |
3912 | err = -ENOSPC; | 4013 | err = -ENOSPC; |
3913 | } | 4014 | } |
4015 | mddev_unlock(mddev); | ||
3914 | return err ? err : len; | 4016 | return err ? err : len; |
3915 | } | 4017 | } |
3916 | 4018 | ||
@@ -3940,21 +4042,28 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len) | |||
3940 | { | 4042 | { |
3941 | int major, minor; | 4043 | int major, minor; |
3942 | char *e; | 4044 | char *e; |
4045 | int err; | ||
3943 | /* Changing the details of 'external' metadata is | 4046 | /* Changing the details of 'external' metadata is |
3944 | * always permitted. Otherwise there must be | 4047 | * always permitted. Otherwise there must be |
3945 | * no devices attached to the array. | 4048 | * no devices attached to the array. |
3946 | */ | 4049 | */ |
4050 | |||
4051 | err = mddev_lock(mddev); | ||
4052 | if (err) | ||
4053 | return err; | ||
4054 | err = -EBUSY; | ||
3947 | if (mddev->external && strncmp(buf, "external:", 9) == 0) | 4055 | if (mddev->external && strncmp(buf, "external:", 9) == 0) |
3948 | ; | 4056 | ; |
3949 | else if (!list_empty(&mddev->disks)) | 4057 | else if (!list_empty(&mddev->disks)) |
3950 | return -EBUSY; | 4058 | goto out_unlock; |
3951 | 4059 | ||
4060 | err = 0; | ||
3952 | if (cmd_match(buf, "none")) { | 4061 | if (cmd_match(buf, "none")) { |
3953 | mddev->persistent = 0; | 4062 | mddev->persistent = 0; |
3954 | mddev->external = 0; | 4063 | mddev->external = 0; |
3955 | mddev->major_version = 0; | 4064 | mddev->major_version = 0; |
3956 | mddev->minor_version = 90; | 4065 | mddev->minor_version = 90; |
3957 | return len; | 4066 | goto out_unlock; |
3958 | } | 4067 | } |
3959 | if (strncmp(buf, "external:", 9) == 0) { | 4068 | if (strncmp(buf, "external:", 9) == 0) { |
3960 | size_t namelen = len-9; | 4069 | size_t namelen = len-9; |
@@ -3968,22 +4077,27 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len) | |||
3968 | mddev->external = 1; | 4077 | mddev->external = 1; |
3969 | mddev->major_version = 0; | 4078 | mddev->major_version = 0; |
3970 | mddev->minor_version = 90; | 4079 | mddev->minor_version = 90; |
3971 | return len; | 4080 | goto out_unlock; |
3972 | } | 4081 | } |
3973 | major = simple_strtoul(buf, &e, 10); | 4082 | major = simple_strtoul(buf, &e, 10); |
4083 | err = -EINVAL; | ||
3974 | if (e==buf || *e != '.') | 4084 | if (e==buf || *e != '.') |
3975 | return -EINVAL; | 4085 | goto out_unlock; |
3976 | buf = e+1; | 4086 | buf = e+1; |
3977 | minor = simple_strtoul(buf, &e, 10); | 4087 | minor = simple_strtoul(buf, &e, 10); |
3978 | if (e==buf || (*e && *e != '\n') ) | 4088 | if (e==buf || (*e && *e != '\n') ) |
3979 | return -EINVAL; | 4089 | goto out_unlock; |
4090 | err = -ENOENT; | ||
3980 | if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL) | 4091 | if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL) |
3981 | return -ENOENT; | 4092 | goto out_unlock; |
3982 | mddev->major_version = major; | 4093 | mddev->major_version = major; |
3983 | mddev->minor_version = minor; | 4094 | mddev->minor_version = minor; |
3984 | mddev->persistent = 1; | 4095 | mddev->persistent = 1; |
3985 | mddev->external = 0; | 4096 | mddev->external = 0; |
3986 | return len; | 4097 | err = 0; |
4098 | out_unlock: | ||
4099 | mddev_unlock(mddev); | ||
4100 | return err ?: len; | ||
3987 | } | 4101 | } |
3988 | 4102 | ||
3989 | static struct md_sysfs_entry md_metadata = | 4103 | static struct md_sysfs_entry md_metadata = |
@@ -3993,20 +4107,21 @@ static ssize_t | |||
3993 | action_show(struct mddev *mddev, char *page) | 4107 | action_show(struct mddev *mddev, char *page) |
3994 | { | 4108 | { |
3995 | char *type = "idle"; | 4109 | char *type = "idle"; |
3996 | if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | 4110 | unsigned long recovery = mddev->recovery; |
4111 | if (test_bit(MD_RECOVERY_FROZEN, &recovery)) | ||
3997 | type = "frozen"; | 4112 | type = "frozen"; |
3998 | else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 4113 | else if (test_bit(MD_RECOVERY_RUNNING, &recovery) || |
3999 | (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { | 4114 | (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) { |
4000 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4115 | if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) |
4001 | type = "reshape"; | 4116 | type = "reshape"; |
4002 | else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 4117 | else if (test_bit(MD_RECOVERY_SYNC, &recovery)) { |
4003 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 4118 | if (!test_bit(MD_RECOVERY_REQUESTED, &recovery)) |
4004 | type = "resync"; | 4119 | type = "resync"; |
4005 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 4120 | else if (test_bit(MD_RECOVERY_CHECK, &recovery)) |
4006 | type = "check"; | 4121 | type = "check"; |
4007 | else | 4122 | else |
4008 | type = "repair"; | 4123 | type = "repair"; |
4009 | } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) | 4124 | } else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) |
4010 | type = "recover"; | 4125 | type = "recover"; |
4011 | } | 4126 | } |
4012 | return sprintf(page, "%s\n", type); | 4127 | return sprintf(page, "%s\n", type); |
@@ -4027,7 +4142,10 @@ action_store(struct mddev *mddev, const char *page, size_t len) | |||
4027 | flush_workqueue(md_misc_wq); | 4142 | flush_workqueue(md_misc_wq); |
4028 | if (mddev->sync_thread) { | 4143 | if (mddev->sync_thread) { |
4029 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 4144 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
4030 | md_reap_sync_thread(mddev); | 4145 | if (mddev_lock(mddev) == 0) { |
4146 | md_reap_sync_thread(mddev); | ||
4147 | mddev_unlock(mddev); | ||
4148 | } | ||
4031 | } | 4149 | } |
4032 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 4150 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
4033 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | 4151 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) |
@@ -4041,7 +4159,11 @@ action_store(struct mddev *mddev, const char *page, size_t len) | |||
4041 | int err; | 4159 | int err; |
4042 | if (mddev->pers->start_reshape == NULL) | 4160 | if (mddev->pers->start_reshape == NULL) |
4043 | return -EINVAL; | 4161 | return -EINVAL; |
4044 | err = mddev->pers->start_reshape(mddev); | 4162 | err = mddev_lock(mddev); |
4163 | if (!err) { | ||
4164 | err = mddev->pers->start_reshape(mddev); | ||
4165 | mddev_unlock(mddev); | ||
4166 | } | ||
4045 | if (err) | 4167 | if (err) |
4046 | return err; | 4168 | return err; |
4047 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 4169 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
@@ -4225,22 +4347,36 @@ static ssize_t | |||
4225 | min_sync_store(struct mddev *mddev, const char *buf, size_t len) | 4347 | min_sync_store(struct mddev *mddev, const char *buf, size_t len) |
4226 | { | 4348 | { |
4227 | unsigned long long min; | 4349 | unsigned long long min; |
4350 | int err; | ||
4351 | int chunk; | ||
4352 | |||
4228 | if (kstrtoull(buf, 10, &min)) | 4353 | if (kstrtoull(buf, 10, &min)) |
4229 | return -EINVAL; | 4354 | return -EINVAL; |
4355 | |||
4356 | spin_lock(&mddev->lock); | ||
4357 | err = -EINVAL; | ||
4230 | if (min > mddev->resync_max) | 4358 | if (min > mddev->resync_max) |
4231 | return -EINVAL; | 4359 | goto out_unlock; |
4360 | |||
4361 | err = -EBUSY; | ||
4232 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4362 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
4233 | return -EBUSY; | 4363 | goto out_unlock; |
4234 | 4364 | ||
4235 | /* Must be a multiple of chunk_size */ | 4365 | /* Must be a multiple of chunk_size */ |
4236 | if (mddev->chunk_sectors) { | 4366 | chunk = mddev->chunk_sectors; |
4367 | if (chunk) { | ||
4237 | sector_t temp = min; | 4368 | sector_t temp = min; |
4238 | if (sector_div(temp, mddev->chunk_sectors)) | 4369 | |
4239 | return -EINVAL; | 4370 | err = -EINVAL; |
4371 | if (sector_div(temp, chunk)) | ||
4372 | goto out_unlock; | ||
4240 | } | 4373 | } |
4241 | mddev->resync_min = min; | 4374 | mddev->resync_min = min; |
4375 | err = 0; | ||
4242 | 4376 | ||
4243 | return len; | 4377 | out_unlock: |
4378 | spin_unlock(&mddev->lock); | ||
4379 | return err ?: len; | ||
4244 | } | 4380 | } |
4245 | 4381 | ||
4246 | static struct md_sysfs_entry md_min_sync = | 4382 | static struct md_sysfs_entry md_min_sync = |
@@ -4258,29 +4394,42 @@ max_sync_show(struct mddev *mddev, char *page) | |||
4258 | static ssize_t | 4394 | static ssize_t |
4259 | max_sync_store(struct mddev *mddev, const char *buf, size_t len) | 4395 | max_sync_store(struct mddev *mddev, const char *buf, size_t len) |
4260 | { | 4396 | { |
4397 | int err; | ||
4398 | spin_lock(&mddev->lock); | ||
4261 | if (strncmp(buf, "max", 3) == 0) | 4399 | if (strncmp(buf, "max", 3) == 0) |
4262 | mddev->resync_max = MaxSector; | 4400 | mddev->resync_max = MaxSector; |
4263 | else { | 4401 | else { |
4264 | unsigned long long max; | 4402 | unsigned long long max; |
4403 | int chunk; | ||
4404 | |||
4405 | err = -EINVAL; | ||
4265 | if (kstrtoull(buf, 10, &max)) | 4406 | if (kstrtoull(buf, 10, &max)) |
4266 | return -EINVAL; | 4407 | goto out_unlock; |
4267 | if (max < mddev->resync_min) | 4408 | if (max < mddev->resync_min) |
4268 | return -EINVAL; | 4409 | goto out_unlock; |
4410 | |||
4411 | err = -EBUSY; | ||
4269 | if (max < mddev->resync_max && | 4412 | if (max < mddev->resync_max && |
4270 | mddev->ro == 0 && | 4413 | mddev->ro == 0 && |
4271 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4414 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
4272 | return -EBUSY; | 4415 | goto out_unlock; |
4273 | 4416 | ||
4274 | /* Must be a multiple of chunk_size */ | 4417 | /* Must be a multiple of chunk_size */ |
4275 | if (mddev->chunk_sectors) { | 4418 | chunk = mddev->chunk_sectors; |
4419 | if (chunk) { | ||
4276 | sector_t temp = max; | 4420 | sector_t temp = max; |
4277 | if (sector_div(temp, mddev->chunk_sectors)) | 4421 | |
4278 | return -EINVAL; | 4422 | err = -EINVAL; |
4423 | if (sector_div(temp, chunk)) | ||
4424 | goto out_unlock; | ||
4279 | } | 4425 | } |
4280 | mddev->resync_max = max; | 4426 | mddev->resync_max = max; |
4281 | } | 4427 | } |
4282 | wake_up(&mddev->recovery_wait); | 4428 | wake_up(&mddev->recovery_wait); |
4283 | return len; | 4429 | err = 0; |
4430 | out_unlock: | ||
4431 | spin_unlock(&mddev->lock); | ||
4432 | return err ?: len; | ||
4284 | } | 4433 | } |
4285 | 4434 | ||
4286 | static struct md_sysfs_entry md_max_sync = | 4435 | static struct md_sysfs_entry md_max_sync = |
@@ -4297,14 +4446,20 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) | |||
4297 | { | 4446 | { |
4298 | char *e; | 4447 | char *e; |
4299 | unsigned long long new = simple_strtoull(buf, &e, 10); | 4448 | unsigned long long new = simple_strtoull(buf, &e, 10); |
4300 | unsigned long long old = mddev->suspend_lo; | 4449 | unsigned long long old; |
4450 | int err; | ||
4301 | 4451 | ||
4302 | if (mddev->pers == NULL || | ||
4303 | mddev->pers->quiesce == NULL) | ||
4304 | return -EINVAL; | ||
4305 | if (buf == e || (*e && *e != '\n')) | 4452 | if (buf == e || (*e && *e != '\n')) |
4306 | return -EINVAL; | 4453 | return -EINVAL; |
4307 | 4454 | ||
4455 | err = mddev_lock(mddev); | ||
4456 | if (err) | ||
4457 | return err; | ||
4458 | err = -EINVAL; | ||
4459 | if (mddev->pers == NULL || | ||
4460 | mddev->pers->quiesce == NULL) | ||
4461 | goto unlock; | ||
4462 | old = mddev->suspend_lo; | ||
4308 | mddev->suspend_lo = new; | 4463 | mddev->suspend_lo = new; |
4309 | if (new >= old) | 4464 | if (new >= old) |
4310 | /* Shrinking suspended region */ | 4465 | /* Shrinking suspended region */ |
@@ -4314,7 +4469,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) | |||
4314 | mddev->pers->quiesce(mddev, 1); | 4469 | mddev->pers->quiesce(mddev, 1); |
4315 | mddev->pers->quiesce(mddev, 0); | 4470 | mddev->pers->quiesce(mddev, 0); |
4316 | } | 4471 | } |
4317 | return len; | 4472 | err = 0; |
4473 | unlock: | ||
4474 | mddev_unlock(mddev); | ||
4475 | return err ?: len; | ||
4318 | } | 4476 | } |
4319 | static struct md_sysfs_entry md_suspend_lo = | 4477 | static struct md_sysfs_entry md_suspend_lo = |
4320 | __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); | 4478 | __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); |
@@ -4330,14 +4488,20 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) | |||
4330 | { | 4488 | { |
4331 | char *e; | 4489 | char *e; |
4332 | unsigned long long new = simple_strtoull(buf, &e, 10); | 4490 | unsigned long long new = simple_strtoull(buf, &e, 10); |
4333 | unsigned long long old = mddev->suspend_hi; | 4491 | unsigned long long old; |
4492 | int err; | ||
4334 | 4493 | ||
4335 | if (mddev->pers == NULL || | ||
4336 | mddev->pers->quiesce == NULL) | ||
4337 | return -EINVAL; | ||
4338 | if (buf == e || (*e && *e != '\n')) | 4494 | if (buf == e || (*e && *e != '\n')) |
4339 | return -EINVAL; | 4495 | return -EINVAL; |
4340 | 4496 | ||
4497 | err = mddev_lock(mddev); | ||
4498 | if (err) | ||
4499 | return err; | ||
4500 | err = -EINVAL; | ||
4501 | if (mddev->pers == NULL || | ||
4502 | mddev->pers->quiesce == NULL) | ||
4503 | goto unlock; | ||
4504 | old = mddev->suspend_hi; | ||
4341 | mddev->suspend_hi = new; | 4505 | mddev->suspend_hi = new; |
4342 | if (new <= old) | 4506 | if (new <= old) |
4343 | /* Shrinking suspended region */ | 4507 | /* Shrinking suspended region */ |
@@ -4347,7 +4511,10 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) | |||
4347 | mddev->pers->quiesce(mddev, 1); | 4511 | mddev->pers->quiesce(mddev, 1); |
4348 | mddev->pers->quiesce(mddev, 0); | 4512 | mddev->pers->quiesce(mddev, 0); |
4349 | } | 4513 | } |
4350 | return len; | 4514 | err = 0; |
4515 | unlock: | ||
4516 | mddev_unlock(mddev); | ||
4517 | return err ?: len; | ||
4351 | } | 4518 | } |
4352 | static struct md_sysfs_entry md_suspend_hi = | 4519 | static struct md_sysfs_entry md_suspend_hi = |
4353 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | 4520 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); |
@@ -4367,11 +4534,17 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) | |||
4367 | { | 4534 | { |
4368 | struct md_rdev *rdev; | 4535 | struct md_rdev *rdev; |
4369 | char *e; | 4536 | char *e; |
4537 | int err; | ||
4370 | unsigned long long new = simple_strtoull(buf, &e, 10); | 4538 | unsigned long long new = simple_strtoull(buf, &e, 10); |
4371 | if (mddev->pers) | 4539 | |
4372 | return -EBUSY; | ||
4373 | if (buf == e || (*e && *e != '\n')) | 4540 | if (buf == e || (*e && *e != '\n')) |
4374 | return -EINVAL; | 4541 | return -EINVAL; |
4542 | err = mddev_lock(mddev); | ||
4543 | if (err) | ||
4544 | return err; | ||
4545 | err = -EBUSY; | ||
4546 | if (mddev->pers) | ||
4547 | goto unlock; | ||
4375 | mddev->reshape_position = new; | 4548 | mddev->reshape_position = new; |
4376 | mddev->delta_disks = 0; | 4549 | mddev->delta_disks = 0; |
4377 | mddev->reshape_backwards = 0; | 4550 | mddev->reshape_backwards = 0; |
@@ -4380,7 +4553,10 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) | |||
4380 | mddev->new_chunk_sectors = mddev->chunk_sectors; | 4553 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
4381 | rdev_for_each(rdev, mddev) | 4554 | rdev_for_each(rdev, mddev) |
4382 | rdev->new_data_offset = rdev->data_offset; | 4555 | rdev->new_data_offset = rdev->data_offset; |
4383 | return len; | 4556 | err = 0; |
4557 | unlock: | ||
4558 | mddev_unlock(mddev); | ||
4559 | return err ?: len; | ||
4384 | } | 4560 | } |
4385 | 4561 | ||
4386 | static struct md_sysfs_entry md_reshape_position = | 4562 | static struct md_sysfs_entry md_reshape_position = |
@@ -4398,6 +4574,8 @@ static ssize_t | |||
4398 | reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) | 4574 | reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) |
4399 | { | 4575 | { |
4400 | int backwards = 0; | 4576 | int backwards = 0; |
4577 | int err; | ||
4578 | |||
4401 | if (cmd_match(buf, "forwards")) | 4579 | if (cmd_match(buf, "forwards")) |
4402 | backwards = 0; | 4580 | backwards = 0; |
4403 | else if (cmd_match(buf, "backwards")) | 4581 | else if (cmd_match(buf, "backwards")) |
@@ -4407,16 +4585,19 @@ reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) | |||
4407 | if (mddev->reshape_backwards == backwards) | 4585 | if (mddev->reshape_backwards == backwards) |
4408 | return len; | 4586 | return len; |
4409 | 4587 | ||
4588 | err = mddev_lock(mddev); | ||
4589 | if (err) | ||
4590 | return err; | ||
4410 | /* check if we are allowed to change */ | 4591 | /* check if we are allowed to change */ |
4411 | if (mddev->delta_disks) | 4592 | if (mddev->delta_disks) |
4412 | return -EBUSY; | 4593 | err = -EBUSY; |
4413 | 4594 | else if (mddev->persistent && | |
4414 | if (mddev->persistent && | ||
4415 | mddev->major_version == 0) | 4595 | mddev->major_version == 0) |
4416 | return -EINVAL; | 4596 | err = -EINVAL; |
4417 | 4597 | else | |
4418 | mddev->reshape_backwards = backwards; | 4598 | mddev->reshape_backwards = backwards; |
4419 | return len; | 4599 | mddev_unlock(mddev); |
4600 | return err ?: len; | ||
4420 | } | 4601 | } |
4421 | 4602 | ||
4422 | static struct md_sysfs_entry md_reshape_direction = | 4603 | static struct md_sysfs_entry md_reshape_direction = |
@@ -4437,6 +4618,11 @@ static ssize_t | |||
4437 | array_size_store(struct mddev *mddev, const char *buf, size_t len) | 4618 | array_size_store(struct mddev *mddev, const char *buf, size_t len) |
4438 | { | 4619 | { |
4439 | sector_t sectors; | 4620 | sector_t sectors; |
4621 | int err; | ||
4622 | |||
4623 | err = mddev_lock(mddev); | ||
4624 | if (err) | ||
4625 | return err; | ||
4440 | 4626 | ||
4441 | if (strncmp(buf, "default", 7) == 0) { | 4627 | if (strncmp(buf, "default", 7) == 0) { |
4442 | if (mddev->pers) | 4628 | if (mddev->pers) |
@@ -4447,19 +4633,22 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) | |||
4447 | mddev->external_size = 0; | 4633 | mddev->external_size = 0; |
4448 | } else { | 4634 | } else { |
4449 | if (strict_blocks_to_sectors(buf, §ors) < 0) | 4635 | if (strict_blocks_to_sectors(buf, §ors) < 0) |
4450 | return -EINVAL; | 4636 | err = -EINVAL; |
4451 | if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) | 4637 | else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) |
4452 | return -E2BIG; | 4638 | err = -E2BIG; |
4453 | 4639 | else | |
4454 | mddev->external_size = 1; | 4640 | mddev->external_size = 1; |
4455 | } | 4641 | } |
4456 | 4642 | ||
4457 | mddev->array_sectors = sectors; | 4643 | if (!err) { |
4458 | if (mddev->pers) { | 4644 | mddev->array_sectors = sectors; |
4459 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4645 | if (mddev->pers) { |
4460 | revalidate_disk(mddev->gendisk); | 4646 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4647 | revalidate_disk(mddev->gendisk); | ||
4648 | } | ||
4461 | } | 4649 | } |
4462 | return len; | 4650 | mddev_unlock(mddev); |
4651 | return err ?: len; | ||
4463 | } | 4652 | } |
4464 | 4653 | ||
4465 | static struct md_sysfs_entry md_array_size = | 4654 | static struct md_sysfs_entry md_array_size = |
@@ -4523,11 +4712,7 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |||
4523 | mddev_get(mddev); | 4712 | mddev_get(mddev); |
4524 | spin_unlock(&all_mddevs_lock); | 4713 | spin_unlock(&all_mddevs_lock); |
4525 | 4714 | ||
4526 | rv = mddev_lock(mddev); | 4715 | rv = entry->show(mddev, page); |
4527 | if (!rv) { | ||
4528 | rv = entry->show(mddev, page); | ||
4529 | mddev_unlock(mddev); | ||
4530 | } | ||
4531 | mddev_put(mddev); | 4716 | mddev_put(mddev); |
4532 | return rv; | 4717 | return rv; |
4533 | } | 4718 | } |
@@ -4551,13 +4736,7 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, | |||
4551 | } | 4736 | } |
4552 | mddev_get(mddev); | 4737 | mddev_get(mddev); |
4553 | spin_unlock(&all_mddevs_lock); | 4738 | spin_unlock(&all_mddevs_lock); |
4554 | if (entry->store == new_dev_store) | 4739 | rv = entry->store(mddev, page, length); |
4555 | flush_workqueue(md_misc_wq); | ||
4556 | rv = mddev_lock(mddev); | ||
4557 | if (!rv) { | ||
4558 | rv = entry->store(mddev, page, length); | ||
4559 | mddev_unlock(mddev); | ||
4560 | } | ||
4561 | mddev_put(mddev); | 4740 | mddev_put(mddev); |
4562 | return rv; | 4741 | return rv; |
4563 | } | 4742 | } |
@@ -4825,7 +5004,6 @@ int md_run(struct mddev *mddev) | |||
4825 | mddev->clevel); | 5004 | mddev->clevel); |
4826 | return -EINVAL; | 5005 | return -EINVAL; |
4827 | } | 5006 | } |
4828 | mddev->pers = pers; | ||
4829 | spin_unlock(&pers_lock); | 5007 | spin_unlock(&pers_lock); |
4830 | if (mddev->level != pers->level) { | 5008 | if (mddev->level != pers->level) { |
4831 | mddev->level = pers->level; | 5009 | mddev->level = pers->level; |
@@ -4836,7 +5014,6 @@ int md_run(struct mddev *mddev) | |||
4836 | if (mddev->reshape_position != MaxSector && | 5014 | if (mddev->reshape_position != MaxSector && |
4837 | pers->start_reshape == NULL) { | 5015 | pers->start_reshape == NULL) { |
4838 | /* This personality cannot handle reshaping... */ | 5016 | /* This personality cannot handle reshaping... */ |
4839 | mddev->pers = NULL; | ||
4840 | module_put(pers->owner); | 5017 | module_put(pers->owner); |
4841 | return -EINVAL; | 5018 | return -EINVAL; |
4842 | } | 5019 | } |
@@ -4880,35 +5057,38 @@ int md_run(struct mddev *mddev) | |||
4880 | if (start_readonly && mddev->ro == 0) | 5057 | if (start_readonly && mddev->ro == 0) |
4881 | mddev->ro = 2; /* read-only, but switch on first write */ | 5058 | mddev->ro = 2; /* read-only, but switch on first write */ |
4882 | 5059 | ||
4883 | err = mddev->pers->run(mddev); | 5060 | err = pers->run(mddev); |
4884 | if (err) | 5061 | if (err) |
4885 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 5062 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
4886 | else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) { | 5063 | else if (pers->size(mddev, 0, 0) < mddev->array_sectors) { |
4887 | WARN_ONCE(!mddev->external_size, "%s: default size too small," | 5064 | WARN_ONCE(!mddev->external_size, "%s: default size too small," |
4888 | " but 'external_size' not in effect?\n", __func__); | 5065 | " but 'external_size' not in effect?\n", __func__); |
4889 | printk(KERN_ERR | 5066 | printk(KERN_ERR |
4890 | "md: invalid array_size %llu > default size %llu\n", | 5067 | "md: invalid array_size %llu > default size %llu\n", |
4891 | (unsigned long long)mddev->array_sectors / 2, | 5068 | (unsigned long long)mddev->array_sectors / 2, |
4892 | (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2); | 5069 | (unsigned long long)pers->size(mddev, 0, 0) / 2); |
4893 | err = -EINVAL; | 5070 | err = -EINVAL; |
4894 | mddev->pers->stop(mddev); | ||
4895 | } | 5071 | } |
4896 | if (err == 0 && mddev->pers->sync_request && | 5072 | if (err == 0 && pers->sync_request && |
4897 | (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { | 5073 | (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { |
4898 | err = bitmap_create(mddev); | 5074 | err = bitmap_create(mddev); |
4899 | if (err) { | 5075 | if (err) |
4900 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | 5076 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", |
4901 | mdname(mddev), err); | 5077 | mdname(mddev), err); |
4902 | mddev->pers->stop(mddev); | ||
4903 | } | ||
4904 | } | 5078 | } |
4905 | if (err) { | 5079 | if (err) { |
4906 | module_put(mddev->pers->owner); | 5080 | mddev_detach(mddev); |
4907 | mddev->pers = NULL; | 5081 | pers->free(mddev, mddev->private); |
5082 | module_put(pers->owner); | ||
4908 | bitmap_destroy(mddev); | 5083 | bitmap_destroy(mddev); |
4909 | return err; | 5084 | return err; |
4910 | } | 5085 | } |
4911 | if (mddev->pers->sync_request) { | 5086 | if (mddev->queue) { |
5087 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
5088 | mddev->queue->backing_dev_info.congested_fn = md_congested; | ||
5089 | blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec); | ||
5090 | } | ||
5091 | if (pers->sync_request) { | ||
4912 | if (mddev->kobj.sd && | 5092 | if (mddev->kobj.sd && |
4913 | sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | 5093 | sysfs_create_group(&mddev->kobj, &md_redundancy_group)) |
4914 | printk(KERN_WARNING | 5094 | printk(KERN_WARNING |
@@ -4927,7 +5107,10 @@ int md_run(struct mddev *mddev) | |||
4927 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ | 5107 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
4928 | mddev->in_sync = 1; | 5108 | mddev->in_sync = 1; |
4929 | smp_wmb(); | 5109 | smp_wmb(); |
5110 | spin_lock(&mddev->lock); | ||
5111 | mddev->pers = pers; | ||
4930 | mddev->ready = 1; | 5112 | mddev->ready = 1; |
5113 | spin_unlock(&mddev->lock); | ||
4931 | rdev_for_each(rdev, mddev) | 5114 | rdev_for_each(rdev, mddev) |
4932 | if (rdev->raid_disk >= 0) | 5115 | if (rdev->raid_disk >= 0) |
4933 | if (sysfs_link_rdev(mddev, rdev)) | 5116 | if (sysfs_link_rdev(mddev, rdev)) |
@@ -5070,14 +5253,38 @@ void md_stop_writes(struct mddev *mddev) | |||
5070 | } | 5253 | } |
5071 | EXPORT_SYMBOL_GPL(md_stop_writes); | 5254 | EXPORT_SYMBOL_GPL(md_stop_writes); |
5072 | 5255 | ||
5256 | static void mddev_detach(struct mddev *mddev) | ||
5257 | { | ||
5258 | struct bitmap *bitmap = mddev->bitmap; | ||
5259 | /* wait for behind writes to complete */ | ||
5260 | if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { | ||
5261 | printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n", | ||
5262 | mdname(mddev)); | ||
5263 | /* need to kick something here to make sure I/O goes? */ | ||
5264 | wait_event(bitmap->behind_wait, | ||
5265 | atomic_read(&bitmap->behind_writes) == 0); | ||
5266 | } | ||
5267 | if (mddev->pers && mddev->pers->quiesce) { | ||
5268 | mddev->pers->quiesce(mddev, 1); | ||
5269 | mddev->pers->quiesce(mddev, 0); | ||
5270 | } | ||
5271 | md_unregister_thread(&mddev->thread); | ||
5272 | if (mddev->queue) | ||
5273 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
5274 | } | ||
5275 | |||
5073 | static void __md_stop(struct mddev *mddev) | 5276 | static void __md_stop(struct mddev *mddev) |
5074 | { | 5277 | { |
5278 | struct md_personality *pers = mddev->pers; | ||
5279 | mddev_detach(mddev); | ||
5280 | spin_lock(&mddev->lock); | ||
5075 | mddev->ready = 0; | 5281 | mddev->ready = 0; |
5076 | mddev->pers->stop(mddev); | ||
5077 | if (mddev->pers->sync_request && mddev->to_remove == NULL) | ||
5078 | mddev->to_remove = &md_redundancy_group; | ||
5079 | module_put(mddev->pers->owner); | ||
5080 | mddev->pers = NULL; | 5282 | mddev->pers = NULL; |
5283 | spin_unlock(&mddev->lock); | ||
5284 | pers->free(mddev, mddev->private); | ||
5285 | if (pers->sync_request && mddev->to_remove == NULL) | ||
5286 | mddev->to_remove = &md_redundancy_group; | ||
5287 | module_put(pers->owner); | ||
5081 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 5288 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
5082 | } | 5289 | } |
5083 | 5290 | ||
@@ -5226,8 +5433,11 @@ static int do_md_stop(struct mddev *mddev, int mode, | |||
5226 | 5433 | ||
5227 | bitmap_destroy(mddev); | 5434 | bitmap_destroy(mddev); |
5228 | if (mddev->bitmap_info.file) { | 5435 | if (mddev->bitmap_info.file) { |
5229 | fput(mddev->bitmap_info.file); | 5436 | struct file *f = mddev->bitmap_info.file; |
5437 | spin_lock(&mddev->lock); | ||
5230 | mddev->bitmap_info.file = NULL; | 5438 | mddev->bitmap_info.file = NULL; |
5439 | spin_unlock(&mddev->lock); | ||
5440 | fput(f); | ||
5231 | } | 5441 | } |
5232 | mddev->bitmap_info.offset = 0; | 5442 | mddev->bitmap_info.offset = 0; |
5233 | 5443 | ||
@@ -5436,37 +5646,31 @@ static int get_array_info(struct mddev *mddev, void __user *arg) | |||
5436 | static int get_bitmap_file(struct mddev *mddev, void __user * arg) | 5646 | static int get_bitmap_file(struct mddev *mddev, void __user * arg) |
5437 | { | 5647 | { |
5438 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | 5648 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ |
5439 | char *ptr, *buf = NULL; | 5649 | char *ptr; |
5440 | int err = -ENOMEM; | 5650 | int err; |
5441 | 5651 | ||
5442 | file = kmalloc(sizeof(*file), GFP_NOIO); | 5652 | file = kmalloc(sizeof(*file), GFP_NOIO); |
5443 | |||
5444 | if (!file) | 5653 | if (!file) |
5445 | goto out; | 5654 | return -ENOMEM; |
5446 | 5655 | ||
5656 | err = 0; | ||
5657 | spin_lock(&mddev->lock); | ||
5447 | /* bitmap disabled, zero the first byte and copy out */ | 5658 | /* bitmap disabled, zero the first byte and copy out */ |
5448 | if (!mddev->bitmap || !mddev->bitmap->storage.file) { | 5659 | if (!mddev->bitmap_info.file) |
5449 | file->pathname[0] = '\0'; | 5660 | file->pathname[0] = '\0'; |
5450 | goto copy_out; | 5661 | else if ((ptr = d_path(&mddev->bitmap_info.file->f_path, |
5451 | } | 5662 | file->pathname, sizeof(file->pathname))), |
5452 | 5663 | IS_ERR(ptr)) | |
5453 | buf = kmalloc(sizeof(file->pathname), GFP_KERNEL); | 5664 | err = PTR_ERR(ptr); |
5454 | if (!buf) | 5665 | else |
5455 | goto out; | 5666 | memmove(file->pathname, ptr, |
5456 | 5667 | sizeof(file->pathname)-(ptr-file->pathname)); | |
5457 | ptr = d_path(&mddev->bitmap->storage.file->f_path, | 5668 | spin_unlock(&mddev->lock); |
5458 | buf, sizeof(file->pathname)); | ||
5459 | if (IS_ERR(ptr)) | ||
5460 | goto out; | ||
5461 | |||
5462 | strcpy(file->pathname, ptr); | ||
5463 | 5669 | ||
5464 | copy_out: | 5670 | if (err == 0 && |
5465 | err = 0; | 5671 | copy_to_user(arg, file, sizeof(*file))) |
5466 | if (copy_to_user(arg, file, sizeof(*file))) | ||
5467 | err = -EFAULT; | 5672 | err = -EFAULT; |
5468 | out: | 5673 | |
5469 | kfree(buf); | ||
5470 | kfree(file); | 5674 | kfree(file); |
5471 | return err; | 5675 | return err; |
5472 | } | 5676 | } |
@@ -5789,22 +5993,24 @@ static int set_bitmap_file(struct mddev *mddev, int fd) | |||
5789 | 5993 | ||
5790 | if (fd >= 0) { | 5994 | if (fd >= 0) { |
5791 | struct inode *inode; | 5995 | struct inode *inode; |
5792 | if (mddev->bitmap) | 5996 | struct file *f; |
5997 | |||
5998 | if (mddev->bitmap || mddev->bitmap_info.file) | ||
5793 | return -EEXIST; /* cannot add when bitmap is present */ | 5999 | return -EEXIST; /* cannot add when bitmap is present */ |
5794 | mddev->bitmap_info.file = fget(fd); | 6000 | f = fget(fd); |
5795 | 6001 | ||
5796 | if (mddev->bitmap_info.file == NULL) { | 6002 | if (f == NULL) { |
5797 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | 6003 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", |
5798 | mdname(mddev)); | 6004 | mdname(mddev)); |
5799 | return -EBADF; | 6005 | return -EBADF; |
5800 | } | 6006 | } |
5801 | 6007 | ||
5802 | inode = mddev->bitmap_info.file->f_mapping->host; | 6008 | inode = f->f_mapping->host; |
5803 | if (!S_ISREG(inode->i_mode)) { | 6009 | if (!S_ISREG(inode->i_mode)) { |
5804 | printk(KERN_ERR "%s: error: bitmap file must be a regular file\n", | 6010 | printk(KERN_ERR "%s: error: bitmap file must be a regular file\n", |
5805 | mdname(mddev)); | 6011 | mdname(mddev)); |
5806 | err = -EBADF; | 6012 | err = -EBADF; |
5807 | } else if (!(mddev->bitmap_info.file->f_mode & FMODE_WRITE)) { | 6013 | } else if (!(f->f_mode & FMODE_WRITE)) { |
5808 | printk(KERN_ERR "%s: error: bitmap file must open for write\n", | 6014 | printk(KERN_ERR "%s: error: bitmap file must open for write\n", |
5809 | mdname(mddev)); | 6015 | mdname(mddev)); |
5810 | err = -EBADF; | 6016 | err = -EBADF; |
@@ -5814,10 +6020,10 @@ static int set_bitmap_file(struct mddev *mddev, int fd) | |||
5814 | err = -EBUSY; | 6020 | err = -EBUSY; |
5815 | } | 6021 | } |
5816 | if (err) { | 6022 | if (err) { |
5817 | fput(mddev->bitmap_info.file); | 6023 | fput(f); |
5818 | mddev->bitmap_info.file = NULL; | ||
5819 | return err; | 6024 | return err; |
5820 | } | 6025 | } |
6026 | mddev->bitmap_info.file = f; | ||
5821 | mddev->bitmap_info.offset = 0; /* file overrides offset */ | 6027 | mddev->bitmap_info.offset = 0; /* file overrides offset */ |
5822 | } else if (mddev->bitmap == NULL) | 6028 | } else if (mddev->bitmap == NULL) |
5823 | return -ENOENT; /* cannot remove what isn't there */ | 6029 | return -ENOENT; /* cannot remove what isn't there */ |
@@ -5836,9 +6042,13 @@ static int set_bitmap_file(struct mddev *mddev, int fd) | |||
5836 | mddev->pers->quiesce(mddev, 0); | 6042 | mddev->pers->quiesce(mddev, 0); |
5837 | } | 6043 | } |
5838 | if (fd < 0) { | 6044 | if (fd < 0) { |
5839 | if (mddev->bitmap_info.file) | 6045 | struct file *f = mddev->bitmap_info.file; |
5840 | fput(mddev->bitmap_info.file); | 6046 | if (f) { |
5841 | mddev->bitmap_info.file = NULL; | 6047 | spin_lock(&mddev->lock); |
6048 | mddev->bitmap_info.file = NULL; | ||
6049 | spin_unlock(&mddev->lock); | ||
6050 | fput(f); | ||
6051 | } | ||
5842 | } | 6052 | } |
5843 | 6053 | ||
5844 | return err; | 6054 | return err; |
@@ -6251,6 +6461,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6251 | case SET_DISK_FAULTY: | 6461 | case SET_DISK_FAULTY: |
6252 | err = set_disk_faulty(mddev, new_decode_dev(arg)); | 6462 | err = set_disk_faulty(mddev, new_decode_dev(arg)); |
6253 | goto out; | 6463 | goto out; |
6464 | |||
6465 | case GET_BITMAP_FILE: | ||
6466 | err = get_bitmap_file(mddev, argp); | ||
6467 | goto out; | ||
6468 | |||
6254 | } | 6469 | } |
6255 | 6470 | ||
6256 | if (cmd == ADD_NEW_DISK) | 6471 | if (cmd == ADD_NEW_DISK) |
@@ -6342,10 +6557,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6342 | * Commands even a read-only array can execute: | 6557 | * Commands even a read-only array can execute: |
6343 | */ | 6558 | */ |
6344 | switch (cmd) { | 6559 | switch (cmd) { |
6345 | case GET_BITMAP_FILE: | ||
6346 | err = get_bitmap_file(mddev, argp); | ||
6347 | goto unlock; | ||
6348 | |||
6349 | case RESTART_ARRAY_RW: | 6560 | case RESTART_ARRAY_RW: |
6350 | err = restart_array(mddev); | 6561 | err = restart_array(mddev); |
6351 | goto unlock; | 6562 | goto unlock; |
@@ -6873,9 +7084,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6873 | return 0; | 7084 | return 0; |
6874 | } | 7085 | } |
6875 | 7086 | ||
6876 | if (mddev_lock(mddev) < 0) | 7087 | spin_lock(&mddev->lock); |
6877 | return -EINTR; | ||
6878 | |||
6879 | if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { | 7088 | if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { |
6880 | seq_printf(seq, "%s : %sactive", mdname(mddev), | 7089 | seq_printf(seq, "%s : %sactive", mdname(mddev), |
6881 | mddev->pers ? "" : "in"); | 7090 | mddev->pers ? "" : "in"); |
@@ -6888,7 +7097,8 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6888 | } | 7097 | } |
6889 | 7098 | ||
6890 | sectors = 0; | 7099 | sectors = 0; |
6891 | rdev_for_each(rdev, mddev) { | 7100 | rcu_read_lock(); |
7101 | rdev_for_each_rcu(rdev, mddev) { | ||
6892 | char b[BDEVNAME_SIZE]; | 7102 | char b[BDEVNAME_SIZE]; |
6893 | seq_printf(seq, " %s[%d]", | 7103 | seq_printf(seq, " %s[%d]", |
6894 | bdevname(rdev->bdev,b), rdev->desc_nr); | 7104 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -6904,6 +7114,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6904 | seq_printf(seq, "(R)"); | 7114 | seq_printf(seq, "(R)"); |
6905 | sectors += rdev->sectors; | 7115 | sectors += rdev->sectors; |
6906 | } | 7116 | } |
7117 | rcu_read_unlock(); | ||
6907 | 7118 | ||
6908 | if (!list_empty(&mddev->disks)) { | 7119 | if (!list_empty(&mddev->disks)) { |
6909 | if (mddev->pers) | 7120 | if (mddev->pers) |
@@ -6946,7 +7157,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6946 | 7157 | ||
6947 | seq_printf(seq, "\n"); | 7158 | seq_printf(seq, "\n"); |
6948 | } | 7159 | } |
6949 | mddev_unlock(mddev); | 7160 | spin_unlock(&mddev->lock); |
6950 | 7161 | ||
6951 | return 0; | 7162 | return 0; |
6952 | } | 7163 | } |
@@ -7102,7 +7313,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) | |||
7102 | if (mddev->safemode == 1) | 7313 | if (mddev->safemode == 1) |
7103 | mddev->safemode = 0; | 7314 | mddev->safemode = 0; |
7104 | if (mddev->in_sync) { | 7315 | if (mddev->in_sync) { |
7105 | spin_lock_irq(&mddev->write_lock); | 7316 | spin_lock(&mddev->lock); |
7106 | if (mddev->in_sync) { | 7317 | if (mddev->in_sync) { |
7107 | mddev->in_sync = 0; | 7318 | mddev->in_sync = 0; |
7108 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 7319 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); |
@@ -7110,7 +7321,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) | |||
7110 | md_wakeup_thread(mddev->thread); | 7321 | md_wakeup_thread(mddev->thread); |
7111 | did_change = 1; | 7322 | did_change = 1; |
7112 | } | 7323 | } |
7113 | spin_unlock_irq(&mddev->write_lock); | 7324 | spin_unlock(&mddev->lock); |
7114 | } | 7325 | } |
7115 | if (did_change) | 7326 | if (did_change) |
7116 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 7327 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
@@ -7148,7 +7359,7 @@ int md_allow_write(struct mddev *mddev) | |||
7148 | if (!mddev->pers->sync_request) | 7359 | if (!mddev->pers->sync_request) |
7149 | return 0; | 7360 | return 0; |
7150 | 7361 | ||
7151 | spin_lock_irq(&mddev->write_lock); | 7362 | spin_lock(&mddev->lock); |
7152 | if (mddev->in_sync) { | 7363 | if (mddev->in_sync) { |
7153 | mddev->in_sync = 0; | 7364 | mddev->in_sync = 0; |
7154 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 7365 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); |
@@ -7156,11 +7367,11 @@ int md_allow_write(struct mddev *mddev) | |||
7156 | if (mddev->safemode_delay && | 7367 | if (mddev->safemode_delay && |
7157 | mddev->safemode == 0) | 7368 | mddev->safemode == 0) |
7158 | mddev->safemode = 1; | 7369 | mddev->safemode = 1; |
7159 | spin_unlock_irq(&mddev->write_lock); | 7370 | spin_unlock(&mddev->lock); |
7160 | md_update_sb(mddev, 0); | 7371 | md_update_sb(mddev, 0); |
7161 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 7372 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
7162 | } else | 7373 | } else |
7163 | spin_unlock_irq(&mddev->write_lock); | 7374 | spin_unlock(&mddev->lock); |
7164 | 7375 | ||
7165 | if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) | 7376 | if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) |
7166 | return -EAGAIN; | 7377 | return -EAGAIN; |
@@ -7513,6 +7724,7 @@ void md_do_sync(struct md_thread *thread) | |||
7513 | skip: | 7724 | skip: |
7514 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7725 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
7515 | 7726 | ||
7727 | spin_lock(&mddev->lock); | ||
7516 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 7728 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
7517 | /* We completed so min/max setting can be forgotten if used. */ | 7729 | /* We completed so min/max setting can be forgotten if used. */ |
7518 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 7730 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
@@ -7521,6 +7733,8 @@ void md_do_sync(struct md_thread *thread) | |||
7521 | } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 7733 | } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
7522 | mddev->resync_min = mddev->curr_resync_completed; | 7734 | mddev->resync_min = mddev->curr_resync_completed; |
7523 | mddev->curr_resync = 0; | 7735 | mddev->curr_resync = 0; |
7736 | spin_unlock(&mddev->lock); | ||
7737 | |||
7524 | wake_up(&resync_wait); | 7738 | wake_up(&resync_wait); |
7525 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 7739 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
7526 | md_wakeup_thread(mddev->thread); | 7740 | md_wakeup_thread(mddev->thread); |
@@ -7688,7 +7902,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7688 | 7902 | ||
7689 | if (!mddev->external) { | 7903 | if (!mddev->external) { |
7690 | int did_change = 0; | 7904 | int did_change = 0; |
7691 | spin_lock_irq(&mddev->write_lock); | 7905 | spin_lock(&mddev->lock); |
7692 | if (mddev->safemode && | 7906 | if (mddev->safemode && |
7693 | !atomic_read(&mddev->writes_pending) && | 7907 | !atomic_read(&mddev->writes_pending) && |
7694 | !mddev->in_sync && | 7908 | !mddev->in_sync && |
@@ -7699,7 +7913,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7699 | } | 7913 | } |
7700 | if (mddev->safemode == 1) | 7914 | if (mddev->safemode == 1) |
7701 | mddev->safemode = 0; | 7915 | mddev->safemode = 0; |
7702 | spin_unlock_irq(&mddev->write_lock); | 7916 | spin_unlock(&mddev->lock); |
7703 | if (did_change) | 7917 | if (did_change) |
7704 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 7918 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
7705 | } | 7919 | } |
@@ -7721,7 +7935,9 @@ void md_check_recovery(struct mddev *mddev) | |||
7721 | * any transients in the value of "sync_action". | 7935 | * any transients in the value of "sync_action". |
7722 | */ | 7936 | */ |
7723 | mddev->curr_resync_completed = 0; | 7937 | mddev->curr_resync_completed = 0; |
7938 | spin_lock(&mddev->lock); | ||
7724 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 7939 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
7940 | spin_unlock(&mddev->lock); | ||
7725 | /* Clear some bits that don't mean anything, but | 7941 | /* Clear some bits that don't mean anything, but |
7726 | * might be left set | 7942 | * might be left set |
7727 | */ | 7943 | */ |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 03cec5bdcaae..318ca8fd430f 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -386,7 +386,18 @@ struct mddev { | |||
386 | 386 | ||
387 | struct work_struct del_work; /* used for delayed sysfs removal */ | 387 | struct work_struct del_work; /* used for delayed sysfs removal */ |
388 | 388 | ||
389 | spinlock_t write_lock; | 389 | /* "lock" protects: |
390 | * flush_bio transition from NULL to !NULL | ||
391 | * rdev superblocks, events | ||
392 | * clearing MD_CHANGE_* | ||
393 | * in_sync - and related safemode and MD_CHANGE changes | ||
394 | * pers (also protected by reconfig_mutex and pending IO). | ||
395 | * clearing ->bitmap | ||
396 | * clearing ->bitmap_info.file | ||
397 | * changing ->resync_{min,max} | ||
398 | * setting MD_RECOVERY_RUNNING (which interacts with resync_{min,max}) | ||
399 | */ | ||
400 | spinlock_t lock; | ||
390 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ | 401 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ |
391 | atomic_t pending_writes; /* number of active superblock writes */ | 402 | atomic_t pending_writes; /* number of active superblock writes */ |
392 | 403 | ||
@@ -439,13 +450,30 @@ struct mddev { | |||
439 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); | 450 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); |
440 | }; | 451 | }; |
441 | 452 | ||
442 | static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) | 453 | static inline int __must_check mddev_lock(struct mddev *mddev) |
443 | { | 454 | { |
444 | int faulty = test_bit(Faulty, &rdev->flags); | 455 | return mutex_lock_interruptible(&mddev->reconfig_mutex); |
445 | if (atomic_dec_and_test(&rdev->nr_pending) && faulty) | 456 | } |
446 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 457 | |
458 | /* Sometimes we need to take the lock in a situation where | ||
459 | * failure due to interrupts is not acceptable. | ||
460 | */ | ||
461 | static inline void mddev_lock_nointr(struct mddev *mddev) | ||
462 | { | ||
463 | mutex_lock(&mddev->reconfig_mutex); | ||
464 | } | ||
465 | |||
466 | static inline int mddev_is_locked(struct mddev *mddev) | ||
467 | { | ||
468 | return mutex_is_locked(&mddev->reconfig_mutex); | ||
447 | } | 469 | } |
448 | 470 | ||
471 | static inline int mddev_trylock(struct mddev *mddev) | ||
472 | { | ||
473 | return mutex_trylock(&mddev->reconfig_mutex); | ||
474 | } | ||
475 | extern void mddev_unlock(struct mddev *mddev); | ||
476 | |||
449 | static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) | 477 | static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) |
450 | { | 478 | { |
451 | atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); | 479 | atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); |
@@ -459,7 +487,7 @@ struct md_personality | |||
459 | struct module *owner; | 487 | struct module *owner; |
460 | void (*make_request)(struct mddev *mddev, struct bio *bio); | 488 | void (*make_request)(struct mddev *mddev, struct bio *bio); |
461 | int (*run)(struct mddev *mddev); | 489 | int (*run)(struct mddev *mddev); |
462 | int (*stop)(struct mddev *mddev); | 490 | void (*free)(struct mddev *mddev, void *priv); |
463 | void (*status)(struct seq_file *seq, struct mddev *mddev); | 491 | void (*status)(struct seq_file *seq, struct mddev *mddev); |
464 | /* error_handler must set ->faulty and clear ->in_sync | 492 | /* error_handler must set ->faulty and clear ->in_sync |
465 | * if appropriate, and should abort recovery if needed | 493 | * if appropriate, and should abort recovery if needed |
@@ -490,6 +518,13 @@ struct md_personality | |||
490 | * array. | 518 | * array. |
491 | */ | 519 | */ |
492 | void *(*takeover) (struct mddev *mddev); | 520 | void *(*takeover) (struct mddev *mddev); |
521 | /* congested implements bdi.congested_fn(). | ||
522 | * Will not be called while array is 'suspended' */ | ||
523 | int (*congested)(struct mddev *mddev, int bits); | ||
524 | /* mergeable_bvec is use to implement ->merge_bvec_fn */ | ||
525 | int (*mergeable_bvec)(struct mddev *mddev, | ||
526 | struct bvec_merge_data *bvm, | ||
527 | struct bio_vec *biovec); | ||
493 | }; | 528 | }; |
494 | 529 | ||
495 | struct md_sysfs_entry { | 530 | struct md_sysfs_entry { |
@@ -624,4 +659,14 @@ static inline int mddev_check_plugged(struct mddev *mddev) | |||
624 | return !!blk_check_plugged(md_unplug, mddev, | 659 | return !!blk_check_plugged(md_unplug, mddev, |
625 | sizeof(struct blk_plug_cb)); | 660 | sizeof(struct blk_plug_cb)); |
626 | } | 661 | } |
662 | |||
663 | static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) | ||
664 | { | ||
665 | int faulty = test_bit(Faulty, &rdev->flags); | ||
666 | if (atomic_dec_and_test(&rdev->nr_pending) && faulty) { | ||
667 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
668 | md_wakeup_thread(mddev->thread); | ||
669 | } | ||
670 | } | ||
671 | |||
627 | #endif /* _MD_MD_H */ | 672 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 399272f9c042..ac3ede2bd00e 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -153,15 +153,11 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev) | |||
153 | seq_printf (seq, "]"); | 153 | seq_printf (seq, "]"); |
154 | } | 154 | } |
155 | 155 | ||
156 | static int multipath_congested(void *data, int bits) | 156 | static int multipath_congested(struct mddev *mddev, int bits) |
157 | { | 157 | { |
158 | struct mddev *mddev = data; | ||
159 | struct mpconf *conf = mddev->private; | 158 | struct mpconf *conf = mddev->private; |
160 | int i, ret = 0; | 159 | int i, ret = 0; |
161 | 160 | ||
162 | if (mddev_congested(mddev, bits)) | ||
163 | return 1; | ||
164 | |||
165 | rcu_read_lock(); | 161 | rcu_read_lock(); |
166 | for (i = 0; i < mddev->raid_disks ; i++) { | 162 | for (i = 0; i < mddev->raid_disks ; i++) { |
167 | struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); | 163 | struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); |
@@ -403,7 +399,7 @@ static int multipath_run (struct mddev *mddev) | |||
403 | /* | 399 | /* |
404 | * copy the already verified devices into our private MULTIPATH | 400 | * copy the already verified devices into our private MULTIPATH |
405 | * bookkeeping area. [whatever we allocate in multipath_run(), | 401 | * bookkeeping area. [whatever we allocate in multipath_run(), |
406 | * should be freed in multipath_stop()] | 402 | * should be freed in multipath_free()] |
407 | */ | 403 | */ |
408 | 404 | ||
409 | conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); | 405 | conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); |
@@ -489,9 +485,6 @@ static int multipath_run (struct mddev *mddev) | |||
489 | */ | 485 | */ |
490 | md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); | 486 | md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); |
491 | 487 | ||
492 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | ||
493 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
494 | |||
495 | if (md_integrity_register(mddev)) | 488 | if (md_integrity_register(mddev)) |
496 | goto out_free_conf; | 489 | goto out_free_conf; |
497 | 490 | ||
@@ -507,17 +500,13 @@ out: | |||
507 | return -EIO; | 500 | return -EIO; |
508 | } | 501 | } |
509 | 502 | ||
510 | static int multipath_stop (struct mddev *mddev) | 503 | static void multipath_free(struct mddev *mddev, void *priv) |
511 | { | 504 | { |
512 | struct mpconf *conf = mddev->private; | 505 | struct mpconf *conf = priv; |
513 | 506 | ||
514 | md_unregister_thread(&mddev->thread); | ||
515 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
516 | mempool_destroy(conf->pool); | 507 | mempool_destroy(conf->pool); |
517 | kfree(conf->multipaths); | 508 | kfree(conf->multipaths); |
518 | kfree(conf); | 509 | kfree(conf); |
519 | mddev->private = NULL; | ||
520 | return 0; | ||
521 | } | 510 | } |
522 | 511 | ||
523 | static struct md_personality multipath_personality = | 512 | static struct md_personality multipath_personality = |
@@ -527,12 +516,13 @@ static struct md_personality multipath_personality = | |||
527 | .owner = THIS_MODULE, | 516 | .owner = THIS_MODULE, |
528 | .make_request = multipath_make_request, | 517 | .make_request = multipath_make_request, |
529 | .run = multipath_run, | 518 | .run = multipath_run, |
530 | .stop = multipath_stop, | 519 | .free = multipath_free, |
531 | .status = multipath_status, | 520 | .status = multipath_status, |
532 | .error_handler = multipath_error, | 521 | .error_handler = multipath_error, |
533 | .hot_add_disk = multipath_add_disk, | 522 | .hot_add_disk = multipath_add_disk, |
534 | .hot_remove_disk= multipath_remove_disk, | 523 | .hot_remove_disk= multipath_remove_disk, |
535 | .size = multipath_size, | 524 | .size = multipath_size, |
525 | .congested = multipath_congested, | ||
536 | }; | 526 | }; |
537 | 527 | ||
538 | static int __init multipath_init (void) | 528 | static int __init multipath_init (void) |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ba6b85de96d2..a13f738a7b39 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -25,17 +25,13 @@ | |||
25 | #include "raid0.h" | 25 | #include "raid0.h" |
26 | #include "raid5.h" | 26 | #include "raid5.h" |
27 | 27 | ||
28 | static int raid0_congested(void *data, int bits) | 28 | static int raid0_congested(struct mddev *mddev, int bits) |
29 | { | 29 | { |
30 | struct mddev *mddev = data; | ||
31 | struct r0conf *conf = mddev->private; | 30 | struct r0conf *conf = mddev->private; |
32 | struct md_rdev **devlist = conf->devlist; | 31 | struct md_rdev **devlist = conf->devlist; |
33 | int raid_disks = conf->strip_zone[0].nb_dev; | 32 | int raid_disks = conf->strip_zone[0].nb_dev; |
34 | int i, ret = 0; | 33 | int i, ret = 0; |
35 | 34 | ||
36 | if (mddev_congested(mddev, bits)) | ||
37 | return 1; | ||
38 | |||
39 | for (i = 0; i < raid_disks && !ret ; i++) { | 35 | for (i = 0; i < raid_disks && !ret ; i++) { |
40 | struct request_queue *q = bdev_get_queue(devlist[i]->bdev); | 36 | struct request_queue *q = bdev_get_queue(devlist[i]->bdev); |
41 | 37 | ||
@@ -263,8 +259,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
263 | mdname(mddev), | 259 | mdname(mddev), |
264 | (unsigned long long)smallest->sectors); | 260 | (unsigned long long)smallest->sectors); |
265 | } | 261 | } |
266 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; | ||
267 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
268 | 262 | ||
269 | /* | 263 | /* |
270 | * now since we have the hard sector sizes, we can make sure | 264 | * now since we have the hard sector sizes, we can make sure |
@@ -356,17 +350,16 @@ static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | |||
356 | 350 | ||
357 | /** | 351 | /** |
358 | * raid0_mergeable_bvec -- tell bio layer if two requests can be merged | 352 | * raid0_mergeable_bvec -- tell bio layer if two requests can be merged |
359 | * @q: request queue | 353 | * @mddev: the md device |
360 | * @bvm: properties of new bio | 354 | * @bvm: properties of new bio |
361 | * @biovec: the request that could be merged to it. | 355 | * @biovec: the request that could be merged to it. |
362 | * | 356 | * |
363 | * Return amount of bytes we can accept at this offset | 357 | * Return amount of bytes we can accept at this offset |
364 | */ | 358 | */ |
365 | static int raid0_mergeable_bvec(struct request_queue *q, | 359 | static int raid0_mergeable_bvec(struct mddev *mddev, |
366 | struct bvec_merge_data *bvm, | 360 | struct bvec_merge_data *bvm, |
367 | struct bio_vec *biovec) | 361 | struct bio_vec *biovec) |
368 | { | 362 | { |
369 | struct mddev *mddev = q->queuedata; | ||
370 | struct r0conf *conf = mddev->private; | 363 | struct r0conf *conf = mddev->private; |
371 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 364 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
372 | sector_t sector_offset = sector; | 365 | sector_t sector_offset = sector; |
@@ -422,7 +415,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks | |||
422 | return array_sectors; | 415 | return array_sectors; |
423 | } | 416 | } |
424 | 417 | ||
425 | static int raid0_stop(struct mddev *mddev); | 418 | static void raid0_free(struct mddev *mddev, void *priv); |
426 | 419 | ||
427 | static int raid0_run(struct mddev *mddev) | 420 | static int raid0_run(struct mddev *mddev) |
428 | { | 421 | { |
@@ -471,26 +464,22 @@ static int raid0_run(struct mddev *mddev) | |||
471 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 464 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
472 | } | 465 | } |
473 | 466 | ||
474 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | ||
475 | dump_zones(mddev); | 467 | dump_zones(mddev); |
476 | 468 | ||
477 | ret = md_integrity_register(mddev); | 469 | ret = md_integrity_register(mddev); |
478 | if (ret) | 470 | if (ret) |
479 | raid0_stop(mddev); | 471 | raid0_free(mddev, conf); |
480 | 472 | ||
481 | return ret; | 473 | return ret; |
482 | } | 474 | } |
483 | 475 | ||
484 | static int raid0_stop(struct mddev *mddev) | 476 | static void raid0_free(struct mddev *mddev, void *priv) |
485 | { | 477 | { |
486 | struct r0conf *conf = mddev->private; | 478 | struct r0conf *conf = priv; |
487 | 479 | ||
488 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
489 | kfree(conf->strip_zone); | 480 | kfree(conf->strip_zone); |
490 | kfree(conf->devlist); | 481 | kfree(conf->devlist); |
491 | kfree(conf); | 482 | kfree(conf); |
492 | mddev->private = NULL; | ||
493 | return 0; | ||
494 | } | 483 | } |
495 | 484 | ||
496 | /* | 485 | /* |
@@ -724,11 +713,13 @@ static struct md_personality raid0_personality= | |||
724 | .owner = THIS_MODULE, | 713 | .owner = THIS_MODULE, |
725 | .make_request = raid0_make_request, | 714 | .make_request = raid0_make_request, |
726 | .run = raid0_run, | 715 | .run = raid0_run, |
727 | .stop = raid0_stop, | 716 | .free = raid0_free, |
728 | .status = raid0_status, | 717 | .status = raid0_status, |
729 | .size = raid0_size, | 718 | .size = raid0_size, |
730 | .takeover = raid0_takeover, | 719 | .takeover = raid0_takeover, |
731 | .quiesce = raid0_quiesce, | 720 | .quiesce = raid0_quiesce, |
721 | .congested = raid0_congested, | ||
722 | .mergeable_bvec = raid0_mergeable_bvec, | ||
732 | }; | 723 | }; |
733 | 724 | ||
734 | static int __init raid0_init (void) | 725 | static int __init raid0_init (void) |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 40b35be34f8d..5dd0c2e59ab9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -701,11 +701,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
701 | return best_disk; | 701 | return best_disk; |
702 | } | 702 | } |
703 | 703 | ||
704 | static int raid1_mergeable_bvec(struct request_queue *q, | 704 | static int raid1_mergeable_bvec(struct mddev *mddev, |
705 | struct bvec_merge_data *bvm, | 705 | struct bvec_merge_data *bvm, |
706 | struct bio_vec *biovec) | 706 | struct bio_vec *biovec) |
707 | { | 707 | { |
708 | struct mddev *mddev = q->queuedata; | ||
709 | struct r1conf *conf = mddev->private; | 708 | struct r1conf *conf = mddev->private; |
710 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 709 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
711 | int max = biovec->bv_len; | 710 | int max = biovec->bv_len; |
@@ -734,7 +733,7 @@ static int raid1_mergeable_bvec(struct request_queue *q, | |||
734 | 733 | ||
735 | } | 734 | } |
736 | 735 | ||
737 | int md_raid1_congested(struct mddev *mddev, int bits) | 736 | static int raid1_congested(struct mddev *mddev, int bits) |
738 | { | 737 | { |
739 | struct r1conf *conf = mddev->private; | 738 | struct r1conf *conf = mddev->private; |
740 | int i, ret = 0; | 739 | int i, ret = 0; |
@@ -763,15 +762,6 @@ int md_raid1_congested(struct mddev *mddev, int bits) | |||
763 | rcu_read_unlock(); | 762 | rcu_read_unlock(); |
764 | return ret; | 763 | return ret; |
765 | } | 764 | } |
766 | EXPORT_SYMBOL_GPL(md_raid1_congested); | ||
767 | |||
768 | static int raid1_congested(void *data, int bits) | ||
769 | { | ||
770 | struct mddev *mddev = data; | ||
771 | |||
772 | return mddev_congested(mddev, bits) || | ||
773 | md_raid1_congested(mddev, bits); | ||
774 | } | ||
775 | 765 | ||
776 | static void flush_pending_writes(struct r1conf *conf) | 766 | static void flush_pending_writes(struct r1conf *conf) |
777 | { | 767 | { |
@@ -2882,7 +2872,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2882 | return ERR_PTR(err); | 2872 | return ERR_PTR(err); |
2883 | } | 2873 | } |
2884 | 2874 | ||
2885 | static int stop(struct mddev *mddev); | 2875 | static void raid1_free(struct mddev *mddev, void *priv); |
2886 | static int run(struct mddev *mddev) | 2876 | static int run(struct mddev *mddev) |
2887 | { | 2877 | { |
2888 | struct r1conf *conf; | 2878 | struct r1conf *conf; |
@@ -2904,7 +2894,7 @@ static int run(struct mddev *mddev) | |||
2904 | /* | 2894 | /* |
2905 | * copy the already verified devices into our private RAID1 | 2895 | * copy the already verified devices into our private RAID1 |
2906 | * bookkeeping area. [whatever we allocate in run(), | 2896 | * bookkeeping area. [whatever we allocate in run(), |
2907 | * should be freed in stop()] | 2897 | * should be freed in raid1_free()] |
2908 | */ | 2898 | */ |
2909 | if (mddev->private == NULL) | 2899 | if (mddev->private == NULL) |
2910 | conf = setup_conf(mddev); | 2900 | conf = setup_conf(mddev); |
@@ -2955,10 +2945,6 @@ static int run(struct mddev *mddev) | |||
2955 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); | 2945 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); |
2956 | 2946 | ||
2957 | if (mddev->queue) { | 2947 | if (mddev->queue) { |
2958 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | ||
2959 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
2960 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); | ||
2961 | |||
2962 | if (discard_supported) | 2948 | if (discard_supported) |
2963 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | 2949 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, |
2964 | mddev->queue); | 2950 | mddev->queue); |
@@ -2968,37 +2954,23 @@ static int run(struct mddev *mddev) | |||
2968 | } | 2954 | } |
2969 | 2955 | ||
2970 | ret = md_integrity_register(mddev); | 2956 | ret = md_integrity_register(mddev); |
2971 | if (ret) | 2957 | if (ret) { |
2972 | stop(mddev); | 2958 | md_unregister_thread(&mddev->thread); |
2959 | raid1_free(mddev, conf); | ||
2960 | } | ||
2973 | return ret; | 2961 | return ret; |
2974 | } | 2962 | } |
2975 | 2963 | ||
2976 | static int stop(struct mddev *mddev) | 2964 | static void raid1_free(struct mddev *mddev, void *priv) |
2977 | { | 2965 | { |
2978 | struct r1conf *conf = mddev->private; | 2966 | struct r1conf *conf = priv; |
2979 | struct bitmap *bitmap = mddev->bitmap; | ||
2980 | 2967 | ||
2981 | /* wait for behind writes to complete */ | ||
2982 | if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { | ||
2983 | printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n", | ||
2984 | mdname(mddev)); | ||
2985 | /* need to kick something here to make sure I/O goes? */ | ||
2986 | wait_event(bitmap->behind_wait, | ||
2987 | atomic_read(&bitmap->behind_writes) == 0); | ||
2988 | } | ||
2989 | |||
2990 | freeze_array(conf, 0); | ||
2991 | unfreeze_array(conf); | ||
2992 | |||
2993 | md_unregister_thread(&mddev->thread); | ||
2994 | if (conf->r1bio_pool) | 2968 | if (conf->r1bio_pool) |
2995 | mempool_destroy(conf->r1bio_pool); | 2969 | mempool_destroy(conf->r1bio_pool); |
2996 | kfree(conf->mirrors); | 2970 | kfree(conf->mirrors); |
2997 | safe_put_page(conf->tmppage); | 2971 | safe_put_page(conf->tmppage); |
2998 | kfree(conf->poolinfo); | 2972 | kfree(conf->poolinfo); |
2999 | kfree(conf); | 2973 | kfree(conf); |
3000 | mddev->private = NULL; | ||
3001 | return 0; | ||
3002 | } | 2974 | } |
3003 | 2975 | ||
3004 | static int raid1_resize(struct mddev *mddev, sector_t sectors) | 2976 | static int raid1_resize(struct mddev *mddev, sector_t sectors) |
@@ -3181,7 +3153,7 @@ static struct md_personality raid1_personality = | |||
3181 | .owner = THIS_MODULE, | 3153 | .owner = THIS_MODULE, |
3182 | .make_request = make_request, | 3154 | .make_request = make_request, |
3183 | .run = run, | 3155 | .run = run, |
3184 | .stop = stop, | 3156 | .free = raid1_free, |
3185 | .status = status, | 3157 | .status = status, |
3186 | .error_handler = error, | 3158 | .error_handler = error, |
3187 | .hot_add_disk = raid1_add_disk, | 3159 | .hot_add_disk = raid1_add_disk, |
@@ -3193,6 +3165,8 @@ static struct md_personality raid1_personality = | |||
3193 | .check_reshape = raid1_reshape, | 3165 | .check_reshape = raid1_reshape, |
3194 | .quiesce = raid1_quiesce, | 3166 | .quiesce = raid1_quiesce, |
3195 | .takeover = raid1_takeover, | 3167 | .takeover = raid1_takeover, |
3168 | .congested = raid1_congested, | ||
3169 | .mergeable_bvec = raid1_mergeable_bvec, | ||
3196 | }; | 3170 | }; |
3197 | 3171 | ||
3198 | static int __init raid_init(void) | 3172 | static int __init raid_init(void) |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 33bda55ef9f7..14ebb288c1ef 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -170,7 +170,4 @@ struct r1bio { | |||
170 | */ | 170 | */ |
171 | #define R1BIO_MadeGood 7 | 171 | #define R1BIO_MadeGood 7 |
172 | #define R1BIO_WriteError 8 | 172 | #define R1BIO_WriteError 8 |
173 | |||
174 | extern int md_raid1_congested(struct mddev *mddev, int bits); | ||
175 | |||
176 | #endif | 173 | #endif |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 32e282f4c83c..b8d76b1fba64 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -674,7 +674,7 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
674 | 674 | ||
675 | /** | 675 | /** |
676 | * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged | 676 | * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged |
677 | * @q: request queue | 677 | * @mddev: the md device |
678 | * @bvm: properties of new bio | 678 | * @bvm: properties of new bio |
679 | * @biovec: the request that could be merged to it. | 679 | * @biovec: the request that could be merged to it. |
680 | * | 680 | * |
@@ -682,11 +682,10 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
682 | * This requires checking for end-of-chunk if near_copies != raid_disks, | 682 | * This requires checking for end-of-chunk if near_copies != raid_disks, |
683 | * and for subordinate merge_bvec_fns if merge_check_needed. | 683 | * and for subordinate merge_bvec_fns if merge_check_needed. |
684 | */ | 684 | */ |
685 | static int raid10_mergeable_bvec(struct request_queue *q, | 685 | static int raid10_mergeable_bvec(struct mddev *mddev, |
686 | struct bvec_merge_data *bvm, | 686 | struct bvec_merge_data *bvm, |
687 | struct bio_vec *biovec) | 687 | struct bio_vec *biovec) |
688 | { | 688 | { |
689 | struct mddev *mddev = q->queuedata; | ||
690 | struct r10conf *conf = mddev->private; | 689 | struct r10conf *conf = mddev->private; |
691 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 690 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
692 | int max; | 691 | int max; |
@@ -910,7 +909,7 @@ retry: | |||
910 | return rdev; | 909 | return rdev; |
911 | } | 910 | } |
912 | 911 | ||
913 | int md_raid10_congested(struct mddev *mddev, int bits) | 912 | static int raid10_congested(struct mddev *mddev, int bits) |
914 | { | 913 | { |
915 | struct r10conf *conf = mddev->private; | 914 | struct r10conf *conf = mddev->private; |
916 | int i, ret = 0; | 915 | int i, ret = 0; |
@@ -934,15 +933,6 @@ int md_raid10_congested(struct mddev *mddev, int bits) | |||
934 | rcu_read_unlock(); | 933 | rcu_read_unlock(); |
935 | return ret; | 934 | return ret; |
936 | } | 935 | } |
937 | EXPORT_SYMBOL_GPL(md_raid10_congested); | ||
938 | |||
939 | static int raid10_congested(void *data, int bits) | ||
940 | { | ||
941 | struct mddev *mddev = data; | ||
942 | |||
943 | return mddev_congested(mddev, bits) || | ||
944 | md_raid10_congested(mddev, bits); | ||
945 | } | ||
946 | 936 | ||
947 | static void flush_pending_writes(struct r10conf *conf) | 937 | static void flush_pending_writes(struct r10conf *conf) |
948 | { | 938 | { |
@@ -3757,8 +3747,6 @@ static int run(struct mddev *mddev) | |||
3757 | if (mddev->queue) { | 3747 | if (mddev->queue) { |
3758 | int stripe = conf->geo.raid_disks * | 3748 | int stripe = conf->geo.raid_disks * |
3759 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | 3749 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
3760 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | ||
3761 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3762 | 3750 | ||
3763 | /* Calculate max read-ahead size. | 3751 | /* Calculate max read-ahead size. |
3764 | * We need to readahead at least twice a whole stripe.... | 3752 | * We need to readahead at least twice a whole stripe.... |
@@ -3767,7 +3755,6 @@ static int run(struct mddev *mddev) | |||
3767 | stripe /= conf->geo.near_copies; | 3755 | stripe /= conf->geo.near_copies; |
3768 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 3756 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
3769 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 3757 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
3770 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3771 | } | 3758 | } |
3772 | 3759 | ||
3773 | if (md_integrity_register(mddev)) | 3760 | if (md_integrity_register(mddev)) |
@@ -3811,17 +3798,9 @@ out: | |||
3811 | return -EIO; | 3798 | return -EIO; |
3812 | } | 3799 | } |
3813 | 3800 | ||
3814 | static int stop(struct mddev *mddev) | 3801 | static void raid10_free(struct mddev *mddev, void *priv) |
3815 | { | 3802 | { |
3816 | struct r10conf *conf = mddev->private; | 3803 | struct r10conf *conf = priv; |
3817 | |||
3818 | raise_barrier(conf, 0); | ||
3819 | lower_barrier(conf); | ||
3820 | |||
3821 | md_unregister_thread(&mddev->thread); | ||
3822 | if (mddev->queue) | ||
3823 | /* the unplug fn references 'conf'*/ | ||
3824 | blk_sync_queue(mddev->queue); | ||
3825 | 3804 | ||
3826 | if (conf->r10bio_pool) | 3805 | if (conf->r10bio_pool) |
3827 | mempool_destroy(conf->r10bio_pool); | 3806 | mempool_destroy(conf->r10bio_pool); |
@@ -3830,8 +3809,6 @@ static int stop(struct mddev *mddev) | |||
3830 | kfree(conf->mirrors_old); | 3809 | kfree(conf->mirrors_old); |
3831 | kfree(conf->mirrors_new); | 3810 | kfree(conf->mirrors_new); |
3832 | kfree(conf); | 3811 | kfree(conf); |
3833 | mddev->private = NULL; | ||
3834 | return 0; | ||
3835 | } | 3812 | } |
3836 | 3813 | ||
3837 | static void raid10_quiesce(struct mddev *mddev, int state) | 3814 | static void raid10_quiesce(struct mddev *mddev, int state) |
@@ -3895,7 +3872,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) | |||
3895 | return 0; | 3872 | return 0; |
3896 | } | 3873 | } |
3897 | 3874 | ||
3898 | static void *raid10_takeover_raid0(struct mddev *mddev) | 3875 | static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) |
3899 | { | 3876 | { |
3900 | struct md_rdev *rdev; | 3877 | struct md_rdev *rdev; |
3901 | struct r10conf *conf; | 3878 | struct r10conf *conf; |
@@ -3905,6 +3882,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev) | |||
3905 | mdname(mddev)); | 3882 | mdname(mddev)); |
3906 | return ERR_PTR(-EINVAL); | 3883 | return ERR_PTR(-EINVAL); |
3907 | } | 3884 | } |
3885 | sector_div(size, devs); | ||
3908 | 3886 | ||
3909 | /* Set new parameters */ | 3887 | /* Set new parameters */ |
3910 | mddev->new_level = 10; | 3888 | mddev->new_level = 10; |
@@ -3915,12 +3893,15 @@ static void *raid10_takeover_raid0(struct mddev *mddev) | |||
3915 | mddev->raid_disks *= 2; | 3893 | mddev->raid_disks *= 2; |
3916 | /* make sure it will be not marked as dirty */ | 3894 | /* make sure it will be not marked as dirty */ |
3917 | mddev->recovery_cp = MaxSector; | 3895 | mddev->recovery_cp = MaxSector; |
3896 | mddev->dev_sectors = size; | ||
3918 | 3897 | ||
3919 | conf = setup_conf(mddev); | 3898 | conf = setup_conf(mddev); |
3920 | if (!IS_ERR(conf)) { | 3899 | if (!IS_ERR(conf)) { |
3921 | rdev_for_each(rdev, mddev) | 3900 | rdev_for_each(rdev, mddev) |
3922 | if (rdev->raid_disk >= 0) | 3901 | if (rdev->raid_disk >= 0) { |
3923 | rdev->new_raid_disk = rdev->raid_disk * 2; | 3902 | rdev->new_raid_disk = rdev->raid_disk * 2; |
3903 | rdev->sectors = size; | ||
3904 | } | ||
3924 | conf->barrier = 1; | 3905 | conf->barrier = 1; |
3925 | } | 3906 | } |
3926 | 3907 | ||
@@ -3943,7 +3924,9 @@ static void *raid10_takeover(struct mddev *mddev) | |||
3943 | mdname(mddev)); | 3924 | mdname(mddev)); |
3944 | return ERR_PTR(-EINVAL); | 3925 | return ERR_PTR(-EINVAL); |
3945 | } | 3926 | } |
3946 | return raid10_takeover_raid0(mddev); | 3927 | return raid10_takeover_raid0(mddev, |
3928 | raid0_conf->strip_zone->zone_end, | ||
3929 | raid0_conf->strip_zone->nb_dev); | ||
3947 | } | 3930 | } |
3948 | return ERR_PTR(-EINVAL); | 3931 | return ERR_PTR(-EINVAL); |
3949 | } | 3932 | } |
@@ -4713,7 +4696,7 @@ static struct md_personality raid10_personality = | |||
4713 | .owner = THIS_MODULE, | 4696 | .owner = THIS_MODULE, |
4714 | .make_request = make_request, | 4697 | .make_request = make_request, |
4715 | .run = run, | 4698 | .run = run, |
4716 | .stop = stop, | 4699 | .free = raid10_free, |
4717 | .status = status, | 4700 | .status = status, |
4718 | .error_handler = error, | 4701 | .error_handler = error, |
4719 | .hot_add_disk = raid10_add_disk, | 4702 | .hot_add_disk = raid10_add_disk, |
@@ -4727,6 +4710,8 @@ static struct md_personality raid10_personality = | |||
4727 | .check_reshape = raid10_check_reshape, | 4710 | .check_reshape = raid10_check_reshape, |
4728 | .start_reshape = raid10_start_reshape, | 4711 | .start_reshape = raid10_start_reshape, |
4729 | .finish_reshape = raid10_finish_reshape, | 4712 | .finish_reshape = raid10_finish_reshape, |
4713 | .congested = raid10_congested, | ||
4714 | .mergeable_bvec = raid10_mergeable_bvec, | ||
4730 | }; | 4715 | }; |
4731 | 4716 | ||
4732 | static int __init raid_init(void) | 4717 | static int __init raid_init(void) |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 157d69e83ff4..5ee6473ddc2c 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -150,7 +150,4 @@ enum r10bio_state { | |||
150 | */ | 150 | */ |
151 | R10BIO_Previous, | 151 | R10BIO_Previous, |
152 | }; | 152 | }; |
153 | |||
154 | extern int md_raid10_congested(struct mddev *mddev, int bits); | ||
155 | |||
156 | #endif | 153 | #endif |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b98765f6f77f..aa76865b804b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -296,12 +296,9 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
296 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 296 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
297 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 297 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
298 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 298 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
299 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 299 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
300 | list_add_tail(&sh->lru, &conf->delayed_list); | 300 | list_add_tail(&sh->lru, &conf->delayed_list); |
301 | if (atomic_read(&conf->preread_active_stripes) | 301 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
302 | < IO_THRESHOLD) | ||
303 | md_wakeup_thread(conf->mddev->thread); | ||
304 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | ||
305 | sh->bm_seq - conf->seq_write > 0) | 302 | sh->bm_seq - conf->seq_write > 0) |
306 | list_add_tail(&sh->lru, &conf->bitmap_list); | 303 | list_add_tail(&sh->lru, &conf->bitmap_list); |
307 | else { | 304 | else { |
@@ -2898,31 +2895,102 @@ static int want_replace(struct stripe_head *sh, int disk_idx) | |||
2898 | * Returns 1 when no more member devices need to be checked, otherwise returns | 2895 | * Returns 1 when no more member devices need to be checked, otherwise returns |
2899 | * 0 to tell the loop in handle_stripe_fill to continue | 2896 | * 0 to tell the loop in handle_stripe_fill to continue |
2900 | */ | 2897 | */ |
2901 | static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | 2898 | |
2902 | int disk_idx, int disks) | 2899 | static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, |
2900 | int disk_idx, int disks) | ||
2903 | { | 2901 | { |
2904 | struct r5dev *dev = &sh->dev[disk_idx]; | 2902 | struct r5dev *dev = &sh->dev[disk_idx]; |
2905 | struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], | 2903 | struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], |
2906 | &sh->dev[s->failed_num[1]] }; | 2904 | &sh->dev[s->failed_num[1]] }; |
2905 | int i; | ||
2906 | |||
2907 | |||
2908 | if (test_bit(R5_LOCKED, &dev->flags) || | ||
2909 | test_bit(R5_UPTODATE, &dev->flags)) | ||
2910 | /* No point reading this as we already have it or have | ||
2911 | * decided to get it. | ||
2912 | */ | ||
2913 | return 0; | ||
2914 | |||
2915 | if (dev->toread || | ||
2916 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))) | ||
2917 | /* We need this block to directly satisfy a request */ | ||
2918 | return 1; | ||
2919 | |||
2920 | if (s->syncing || s->expanding || | ||
2921 | (s->replacing && want_replace(sh, disk_idx))) | ||
2922 | /* When syncing, or expanding we read everything. | ||
2923 | * When replacing, we need the replaced block. | ||
2924 | */ | ||
2925 | return 1; | ||
2926 | |||
2927 | if ((s->failed >= 1 && fdev[0]->toread) || | ||
2928 | (s->failed >= 2 && fdev[1]->toread)) | ||
2929 | /* If we want to read from a failed device, then | ||
2930 | * we need to actually read every other device. | ||
2931 | */ | ||
2932 | return 1; | ||
2933 | |||
2934 | /* Sometimes neither read-modify-write nor reconstruct-write | ||
2935 | * cycles can work. In those cases we read every block we | ||
2936 | * can. Then the parity-update is certain to have enough to | ||
2937 | * work with. | ||
2938 | * This can only be a problem when we need to write something, | ||
2939 | * and some device has failed. If either of those tests | ||
2940 | * fail we need look no further. | ||
2941 | */ | ||
2942 | if (!s->failed || !s->to_write) | ||
2943 | return 0; | ||
2944 | |||
2945 | if (test_bit(R5_Insync, &dev->flags) && | ||
2946 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
2947 | /* Pre-reads at not permitted until after short delay | ||
2948 | * to gather multiple requests. However if this | ||
2949 | * device is no Insync, the block could only be be computed | ||
2950 | * and there is no need to delay that. | ||
2951 | */ | ||
2952 | return 0; | ||
2953 | |||
2954 | for (i = 0; i < s->failed; i++) { | ||
2955 | if (fdev[i]->towrite && | ||
2956 | !test_bit(R5_UPTODATE, &fdev[i]->flags) && | ||
2957 | !test_bit(R5_OVERWRITE, &fdev[i]->flags)) | ||
2958 | /* If we have a partial write to a failed | ||
2959 | * device, then we will need to reconstruct | ||
2960 | * the content of that device, so all other | ||
2961 | * devices must be read. | ||
2962 | */ | ||
2963 | return 1; | ||
2964 | } | ||
2965 | |||
2966 | /* If we are forced to do a reconstruct-write, either because | ||
2967 | * the current RAID6 implementation only supports that, or | ||
2968 | * or because parity cannot be trusted and we are currently | ||
2969 | * recovering it, there is extra need to be careful. | ||
2970 | * If one of the devices that we would need to read, because | ||
2971 | * it is not being overwritten (and maybe not written at all) | ||
2972 | * is missing/faulty, then we need to read everything we can. | ||
2973 | */ | ||
2974 | if (sh->raid_conf->level != 6 && | ||
2975 | sh->sector < sh->raid_conf->mddev->recovery_cp) | ||
2976 | /* reconstruct-write isn't being forced */ | ||
2977 | return 0; | ||
2978 | for (i = 0; i < s->failed; i++) { | ||
2979 | if (!test_bit(R5_UPTODATE, &fdev[i]->flags) && | ||
2980 | !test_bit(R5_OVERWRITE, &fdev[i]->flags)) | ||
2981 | return 1; | ||
2982 | } | ||
2983 | |||
2984 | return 0; | ||
2985 | } | ||
2986 | |||
2987 | static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | ||
2988 | int disk_idx, int disks) | ||
2989 | { | ||
2990 | struct r5dev *dev = &sh->dev[disk_idx]; | ||
2907 | 2991 | ||
2908 | /* is the data in this block needed, and can we get it? */ | 2992 | /* is the data in this block needed, and can we get it? */ |
2909 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2993 | if (need_this_block(sh, s, disk_idx, disks)) { |
2910 | !test_bit(R5_UPTODATE, &dev->flags) && | ||
2911 | (dev->toread || | ||
2912 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || | ||
2913 | s->syncing || s->expanding || | ||
2914 | (s->replacing && want_replace(sh, disk_idx)) || | ||
2915 | (s->failed >= 1 && fdev[0]->toread) || | ||
2916 | (s->failed >= 2 && fdev[1]->toread) || | ||
2917 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && | ||
2918 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | ||
2919 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || | ||
2920 | ((sh->raid_conf->level == 6 || | ||
2921 | sh->sector >= sh->raid_conf->mddev->recovery_cp) | ||
2922 | && s->failed && s->to_write && | ||
2923 | (s->to_write - s->non_overwrite < | ||
2924 | sh->raid_conf->raid_disks - sh->raid_conf->max_degraded) && | ||
2925 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | ||
2926 | /* we would like to get this block, possibly by computing it, | 2994 | /* we would like to get this block, possibly by computing it, |
2927 | * otherwise read it if the backing disk is insync | 2995 | * otherwise read it if the backing disk is insync |
2928 | */ | 2996 | */ |
@@ -4081,7 +4149,7 @@ static void activate_bit_delay(struct r5conf *conf, | |||
4081 | } | 4149 | } |
4082 | } | 4150 | } |
4083 | 4151 | ||
4084 | int md_raid5_congested(struct mddev *mddev, int bits) | 4152 | static int raid5_congested(struct mddev *mddev, int bits) |
4085 | { | 4153 | { |
4086 | struct r5conf *conf = mddev->private; | 4154 | struct r5conf *conf = mddev->private; |
4087 | 4155 | ||
@@ -4098,24 +4166,14 @@ int md_raid5_congested(struct mddev *mddev, int bits) | |||
4098 | 4166 | ||
4099 | return 0; | 4167 | return 0; |
4100 | } | 4168 | } |
4101 | EXPORT_SYMBOL_GPL(md_raid5_congested); | ||
4102 | |||
4103 | static int raid5_congested(void *data, int bits) | ||
4104 | { | ||
4105 | struct mddev *mddev = data; | ||
4106 | |||
4107 | return mddev_congested(mddev, bits) || | ||
4108 | md_raid5_congested(mddev, bits); | ||
4109 | } | ||
4110 | 4169 | ||
4111 | /* We want read requests to align with chunks where possible, | 4170 | /* We want read requests to align with chunks where possible, |
4112 | * but write requests don't need to. | 4171 | * but write requests don't need to. |
4113 | */ | 4172 | */ |
4114 | static int raid5_mergeable_bvec(struct request_queue *q, | 4173 | static int raid5_mergeable_bvec(struct mddev *mddev, |
4115 | struct bvec_merge_data *bvm, | 4174 | struct bvec_merge_data *bvm, |
4116 | struct bio_vec *biovec) | 4175 | struct bio_vec *biovec) |
4117 | { | 4176 | { |
4118 | struct mddev *mddev = q->queuedata; | ||
4119 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 4177 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
4120 | int max; | 4178 | int max; |
4121 | unsigned int chunk_sectors = mddev->chunk_sectors; | 4179 | unsigned int chunk_sectors = mddev->chunk_sectors; |
@@ -5296,11 +5354,14 @@ static void raid5d(struct md_thread *thread) | |||
5296 | static ssize_t | 5354 | static ssize_t |
5297 | raid5_show_stripe_cache_size(struct mddev *mddev, char *page) | 5355 | raid5_show_stripe_cache_size(struct mddev *mddev, char *page) |
5298 | { | 5356 | { |
5299 | struct r5conf *conf = mddev->private; | 5357 | struct r5conf *conf; |
5358 | int ret = 0; | ||
5359 | spin_lock(&mddev->lock); | ||
5360 | conf = mddev->private; | ||
5300 | if (conf) | 5361 | if (conf) |
5301 | return sprintf(page, "%d\n", conf->max_nr_stripes); | 5362 | ret = sprintf(page, "%d\n", conf->max_nr_stripes); |
5302 | else | 5363 | spin_unlock(&mddev->lock); |
5303 | return 0; | 5364 | return ret; |
5304 | } | 5365 | } |
5305 | 5366 | ||
5306 | int | 5367 | int |
@@ -5339,21 +5400,25 @@ EXPORT_SYMBOL(raid5_set_cache_size); | |||
5339 | static ssize_t | 5400 | static ssize_t |
5340 | raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len) | 5401 | raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len) |
5341 | { | 5402 | { |
5342 | struct r5conf *conf = mddev->private; | 5403 | struct r5conf *conf; |
5343 | unsigned long new; | 5404 | unsigned long new; |
5344 | int err; | 5405 | int err; |
5345 | 5406 | ||
5346 | if (len >= PAGE_SIZE) | 5407 | if (len >= PAGE_SIZE) |
5347 | return -EINVAL; | 5408 | return -EINVAL; |
5348 | if (!conf) | ||
5349 | return -ENODEV; | ||
5350 | |||
5351 | if (kstrtoul(page, 10, &new)) | 5409 | if (kstrtoul(page, 10, &new)) |
5352 | return -EINVAL; | 5410 | return -EINVAL; |
5353 | err = raid5_set_cache_size(mddev, new); | 5411 | err = mddev_lock(mddev); |
5354 | if (err) | 5412 | if (err) |
5355 | return err; | 5413 | return err; |
5356 | return len; | 5414 | conf = mddev->private; |
5415 | if (!conf) | ||
5416 | err = -ENODEV; | ||
5417 | else | ||
5418 | err = raid5_set_cache_size(mddev, new); | ||
5419 | mddev_unlock(mddev); | ||
5420 | |||
5421 | return err ?: len; | ||
5357 | } | 5422 | } |
5358 | 5423 | ||
5359 | static struct md_sysfs_entry | 5424 | static struct md_sysfs_entry |
@@ -5364,29 +5429,40 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, | |||
5364 | static ssize_t | 5429 | static ssize_t |
5365 | raid5_show_preread_threshold(struct mddev *mddev, char *page) | 5430 | raid5_show_preread_threshold(struct mddev *mddev, char *page) |
5366 | { | 5431 | { |
5367 | struct r5conf *conf = mddev->private; | 5432 | struct r5conf *conf; |
5433 | int ret = 0; | ||
5434 | spin_lock(&mddev->lock); | ||
5435 | conf = mddev->private; | ||
5368 | if (conf) | 5436 | if (conf) |
5369 | return sprintf(page, "%d\n", conf->bypass_threshold); | 5437 | ret = sprintf(page, "%d\n", conf->bypass_threshold); |
5370 | else | 5438 | spin_unlock(&mddev->lock); |
5371 | return 0; | 5439 | return ret; |
5372 | } | 5440 | } |
5373 | 5441 | ||
5374 | static ssize_t | 5442 | static ssize_t |
5375 | raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len) | 5443 | raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len) |
5376 | { | 5444 | { |
5377 | struct r5conf *conf = mddev->private; | 5445 | struct r5conf *conf; |
5378 | unsigned long new; | 5446 | unsigned long new; |
5447 | int err; | ||
5448 | |||
5379 | if (len >= PAGE_SIZE) | 5449 | if (len >= PAGE_SIZE) |
5380 | return -EINVAL; | 5450 | return -EINVAL; |
5381 | if (!conf) | ||
5382 | return -ENODEV; | ||
5383 | |||
5384 | if (kstrtoul(page, 10, &new)) | 5451 | if (kstrtoul(page, 10, &new)) |
5385 | return -EINVAL; | 5452 | return -EINVAL; |
5386 | if (new > conf->max_nr_stripes) | 5453 | |
5387 | return -EINVAL; | 5454 | err = mddev_lock(mddev); |
5388 | conf->bypass_threshold = new; | 5455 | if (err) |
5389 | return len; | 5456 | return err; |
5457 | conf = mddev->private; | ||
5458 | if (!conf) | ||
5459 | err = -ENODEV; | ||
5460 | else if (new > conf->max_nr_stripes) | ||
5461 | err = -EINVAL; | ||
5462 | else | ||
5463 | conf->bypass_threshold = new; | ||
5464 | mddev_unlock(mddev); | ||
5465 | return err ?: len; | ||
5390 | } | 5466 | } |
5391 | 5467 | ||
5392 | static struct md_sysfs_entry | 5468 | static struct md_sysfs_entry |
@@ -5398,39 +5474,48 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
5398 | static ssize_t | 5474 | static ssize_t |
5399 | raid5_show_skip_copy(struct mddev *mddev, char *page) | 5475 | raid5_show_skip_copy(struct mddev *mddev, char *page) |
5400 | { | 5476 | { |
5401 | struct r5conf *conf = mddev->private; | 5477 | struct r5conf *conf; |
5478 | int ret = 0; | ||
5479 | spin_lock(&mddev->lock); | ||
5480 | conf = mddev->private; | ||
5402 | if (conf) | 5481 | if (conf) |
5403 | return sprintf(page, "%d\n", conf->skip_copy); | 5482 | ret = sprintf(page, "%d\n", conf->skip_copy); |
5404 | else | 5483 | spin_unlock(&mddev->lock); |
5405 | return 0; | 5484 | return ret; |
5406 | } | 5485 | } |
5407 | 5486 | ||
5408 | static ssize_t | 5487 | static ssize_t |
5409 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) | 5488 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) |
5410 | { | 5489 | { |
5411 | struct r5conf *conf = mddev->private; | 5490 | struct r5conf *conf; |
5412 | unsigned long new; | 5491 | unsigned long new; |
5492 | int err; | ||
5493 | |||
5413 | if (len >= PAGE_SIZE) | 5494 | if (len >= PAGE_SIZE) |
5414 | return -EINVAL; | 5495 | return -EINVAL; |
5415 | if (!conf) | ||
5416 | return -ENODEV; | ||
5417 | |||
5418 | if (kstrtoul(page, 10, &new)) | 5496 | if (kstrtoul(page, 10, &new)) |
5419 | return -EINVAL; | 5497 | return -EINVAL; |
5420 | new = !!new; | 5498 | new = !!new; |
5421 | if (new == conf->skip_copy) | ||
5422 | return len; | ||
5423 | 5499 | ||
5424 | mddev_suspend(mddev); | 5500 | err = mddev_lock(mddev); |
5425 | conf->skip_copy = new; | 5501 | if (err) |
5426 | if (new) | 5502 | return err; |
5427 | mddev->queue->backing_dev_info.capabilities |= | 5503 | conf = mddev->private; |
5428 | BDI_CAP_STABLE_WRITES; | 5504 | if (!conf) |
5429 | else | 5505 | err = -ENODEV; |
5430 | mddev->queue->backing_dev_info.capabilities &= | 5506 | else if (new != conf->skip_copy) { |
5431 | ~BDI_CAP_STABLE_WRITES; | 5507 | mddev_suspend(mddev); |
5432 | mddev_resume(mddev); | 5508 | conf->skip_copy = new; |
5433 | return len; | 5509 | if (new) |
5510 | mddev->queue->backing_dev_info.capabilities |= | ||
5511 | BDI_CAP_STABLE_WRITES; | ||
5512 | else | ||
5513 | mddev->queue->backing_dev_info.capabilities &= | ||
5514 | ~BDI_CAP_STABLE_WRITES; | ||
5515 | mddev_resume(mddev); | ||
5516 | } | ||
5517 | mddev_unlock(mddev); | ||
5518 | return err ?: len; | ||
5434 | } | 5519 | } |
5435 | 5520 | ||
5436 | static struct md_sysfs_entry | 5521 | static struct md_sysfs_entry |
@@ -5454,11 +5539,14 @@ raid5_stripecache_active = __ATTR_RO(stripe_cache_active); | |||
5454 | static ssize_t | 5539 | static ssize_t |
5455 | raid5_show_group_thread_cnt(struct mddev *mddev, char *page) | 5540 | raid5_show_group_thread_cnt(struct mddev *mddev, char *page) |
5456 | { | 5541 | { |
5457 | struct r5conf *conf = mddev->private; | 5542 | struct r5conf *conf; |
5543 | int ret = 0; | ||
5544 | spin_lock(&mddev->lock); | ||
5545 | conf = mddev->private; | ||
5458 | if (conf) | 5546 | if (conf) |
5459 | return sprintf(page, "%d\n", conf->worker_cnt_per_group); | 5547 | ret = sprintf(page, "%d\n", conf->worker_cnt_per_group); |
5460 | else | 5548 | spin_unlock(&mddev->lock); |
5461 | return 0; | 5549 | return ret; |
5462 | } | 5550 | } |
5463 | 5551 | ||
5464 | static int alloc_thread_groups(struct r5conf *conf, int cnt, | 5552 | static int alloc_thread_groups(struct r5conf *conf, int cnt, |
@@ -5468,7 +5556,7 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, | |||
5468 | static ssize_t | 5556 | static ssize_t |
5469 | raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) | 5557 | raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) |
5470 | { | 5558 | { |
5471 | struct r5conf *conf = mddev->private; | 5559 | struct r5conf *conf; |
5472 | unsigned long new; | 5560 | unsigned long new; |
5473 | int err; | 5561 | int err; |
5474 | struct r5worker_group *new_groups, *old_groups; | 5562 | struct r5worker_group *new_groups, *old_groups; |
@@ -5476,41 +5564,41 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) | |||
5476 | 5564 | ||
5477 | if (len >= PAGE_SIZE) | 5565 | if (len >= PAGE_SIZE) |
5478 | return -EINVAL; | 5566 | return -EINVAL; |
5479 | if (!conf) | ||
5480 | return -ENODEV; | ||
5481 | |||
5482 | if (kstrtoul(page, 10, &new)) | 5567 | if (kstrtoul(page, 10, &new)) |
5483 | return -EINVAL; | 5568 | return -EINVAL; |
5484 | 5569 | ||
5485 | if (new == conf->worker_cnt_per_group) | 5570 | err = mddev_lock(mddev); |
5486 | return len; | 5571 | if (err) |
5487 | 5572 | return err; | |
5488 | mddev_suspend(mddev); | 5573 | conf = mddev->private; |
5574 | if (!conf) | ||
5575 | err = -ENODEV; | ||
5576 | else if (new != conf->worker_cnt_per_group) { | ||
5577 | mddev_suspend(mddev); | ||
5489 | 5578 | ||
5490 | old_groups = conf->worker_groups; | 5579 | old_groups = conf->worker_groups; |
5491 | if (old_groups) | 5580 | if (old_groups) |
5492 | flush_workqueue(raid5_wq); | 5581 | flush_workqueue(raid5_wq); |
5493 | 5582 | ||
5494 | err = alloc_thread_groups(conf, new, | 5583 | err = alloc_thread_groups(conf, new, |
5495 | &group_cnt, &worker_cnt_per_group, | 5584 | &group_cnt, &worker_cnt_per_group, |
5496 | &new_groups); | 5585 | &new_groups); |
5497 | if (!err) { | 5586 | if (!err) { |
5498 | spin_lock_irq(&conf->device_lock); | 5587 | spin_lock_irq(&conf->device_lock); |
5499 | conf->group_cnt = group_cnt; | 5588 | conf->group_cnt = group_cnt; |
5500 | conf->worker_cnt_per_group = worker_cnt_per_group; | 5589 | conf->worker_cnt_per_group = worker_cnt_per_group; |
5501 | conf->worker_groups = new_groups; | 5590 | conf->worker_groups = new_groups; |
5502 | spin_unlock_irq(&conf->device_lock); | 5591 | spin_unlock_irq(&conf->device_lock); |
5503 | 5592 | ||
5504 | if (old_groups) | 5593 | if (old_groups) |
5505 | kfree(old_groups[0].workers); | 5594 | kfree(old_groups[0].workers); |
5506 | kfree(old_groups); | 5595 | kfree(old_groups); |
5596 | } | ||
5597 | mddev_resume(mddev); | ||
5507 | } | 5598 | } |
5599 | mddev_unlock(mddev); | ||
5508 | 5600 | ||
5509 | mddev_resume(mddev); | 5601 | return err ?: len; |
5510 | |||
5511 | if (err) | ||
5512 | return err; | ||
5513 | return len; | ||
5514 | } | 5602 | } |
5515 | 5603 | ||
5516 | static struct md_sysfs_entry | 5604 | static struct md_sysfs_entry |
@@ -6178,11 +6266,6 @@ static int run(struct mddev *mddev) | |||
6178 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 6266 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
6179 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 6267 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
6180 | 6268 | ||
6181 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | ||
6182 | |||
6183 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
6184 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
6185 | |||
6186 | chunk_size = mddev->chunk_sectors << 9; | 6269 | chunk_size = mddev->chunk_sectors << 9; |
6187 | blk_queue_io_min(mddev->queue, chunk_size); | 6270 | blk_queue_io_min(mddev->queue, chunk_size); |
6188 | blk_queue_io_opt(mddev->queue, chunk_size * | 6271 | blk_queue_io_opt(mddev->queue, chunk_size * |
@@ -6260,17 +6343,12 @@ abort: | |||
6260 | return -EIO; | 6343 | return -EIO; |
6261 | } | 6344 | } |
6262 | 6345 | ||
6263 | static int stop(struct mddev *mddev) | 6346 | static void raid5_free(struct mddev *mddev, void *priv) |
6264 | { | 6347 | { |
6265 | struct r5conf *conf = mddev->private; | 6348 | struct r5conf *conf = priv; |
6266 | 6349 | ||
6267 | md_unregister_thread(&mddev->thread); | ||
6268 | if (mddev->queue) | ||
6269 | mddev->queue->backing_dev_info.congested_fn = NULL; | ||
6270 | free_conf(conf); | 6350 | free_conf(conf); |
6271 | mddev->private = NULL; | ||
6272 | mddev->to_remove = &raid5_attrs_group; | 6351 | mddev->to_remove = &raid5_attrs_group; |
6273 | return 0; | ||
6274 | } | 6352 | } |
6275 | 6353 | ||
6276 | static void status(struct seq_file *seq, struct mddev *mddev) | 6354 | static void status(struct seq_file *seq, struct mddev *mddev) |
@@ -7044,7 +7122,7 @@ static struct md_personality raid6_personality = | |||
7044 | .owner = THIS_MODULE, | 7122 | .owner = THIS_MODULE, |
7045 | .make_request = make_request, | 7123 | .make_request = make_request, |
7046 | .run = run, | 7124 | .run = run, |
7047 | .stop = stop, | 7125 | .free = raid5_free, |
7048 | .status = status, | 7126 | .status = status, |
7049 | .error_handler = error, | 7127 | .error_handler = error, |
7050 | .hot_add_disk = raid5_add_disk, | 7128 | .hot_add_disk = raid5_add_disk, |
@@ -7058,6 +7136,8 @@ static struct md_personality raid6_personality = | |||
7058 | .finish_reshape = raid5_finish_reshape, | 7136 | .finish_reshape = raid5_finish_reshape, |
7059 | .quiesce = raid5_quiesce, | 7137 | .quiesce = raid5_quiesce, |
7060 | .takeover = raid6_takeover, | 7138 | .takeover = raid6_takeover, |
7139 | .congested = raid5_congested, | ||
7140 | .mergeable_bvec = raid5_mergeable_bvec, | ||
7061 | }; | 7141 | }; |
7062 | static struct md_personality raid5_personality = | 7142 | static struct md_personality raid5_personality = |
7063 | { | 7143 | { |
@@ -7066,7 +7146,7 @@ static struct md_personality raid5_personality = | |||
7066 | .owner = THIS_MODULE, | 7146 | .owner = THIS_MODULE, |
7067 | .make_request = make_request, | 7147 | .make_request = make_request, |
7068 | .run = run, | 7148 | .run = run, |
7069 | .stop = stop, | 7149 | .free = raid5_free, |
7070 | .status = status, | 7150 | .status = status, |
7071 | .error_handler = error, | 7151 | .error_handler = error, |
7072 | .hot_add_disk = raid5_add_disk, | 7152 | .hot_add_disk = raid5_add_disk, |
@@ -7080,6 +7160,8 @@ static struct md_personality raid5_personality = | |||
7080 | .finish_reshape = raid5_finish_reshape, | 7160 | .finish_reshape = raid5_finish_reshape, |
7081 | .quiesce = raid5_quiesce, | 7161 | .quiesce = raid5_quiesce, |
7082 | .takeover = raid5_takeover, | 7162 | .takeover = raid5_takeover, |
7163 | .congested = raid5_congested, | ||
7164 | .mergeable_bvec = raid5_mergeable_bvec, | ||
7083 | }; | 7165 | }; |
7084 | 7166 | ||
7085 | static struct md_personality raid4_personality = | 7167 | static struct md_personality raid4_personality = |
@@ -7089,7 +7171,7 @@ static struct md_personality raid4_personality = | |||
7089 | .owner = THIS_MODULE, | 7171 | .owner = THIS_MODULE, |
7090 | .make_request = make_request, | 7172 | .make_request = make_request, |
7091 | .run = run, | 7173 | .run = run, |
7092 | .stop = stop, | 7174 | .free = raid5_free, |
7093 | .status = status, | 7175 | .status = status, |
7094 | .error_handler = error, | 7176 | .error_handler = error, |
7095 | .hot_add_disk = raid5_add_disk, | 7177 | .hot_add_disk = raid5_add_disk, |
@@ -7103,6 +7185,8 @@ static struct md_personality raid4_personality = | |||
7103 | .finish_reshape = raid5_finish_reshape, | 7185 | .finish_reshape = raid5_finish_reshape, |
7104 | .quiesce = raid5_quiesce, | 7186 | .quiesce = raid5_quiesce, |
7105 | .takeover = raid4_takeover, | 7187 | .takeover = raid4_takeover, |
7188 | .congested = raid5_congested, | ||
7189 | .mergeable_bvec = raid5_mergeable_bvec, | ||
7106 | }; | 7190 | }; |
7107 | 7191 | ||
7108 | static int __init raid5_init(void) | 7192 | static int __init raid5_init(void) |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index d59f5ca743cd..983e18a83db1 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -558,7 +558,6 @@ static inline int algorithm_is_DDF(int layout) | |||
558 | return layout >= 8 && layout <= 10; | 558 | return layout >= 8 && layout <= 10; |
559 | } | 559 | } |
560 | 560 | ||
561 | extern int md_raid5_congested(struct mddev *mddev, int bits); | ||
562 | extern void md_raid5_kick_device(struct r5conf *conf); | 561 | extern void md_raid5_kick_device(struct r5conf *conf); |
563 | extern int raid5_set_cache_size(struct mddev *mddev, int size); | 562 | extern int raid5_set_cache_size(struct mddev *mddev, int size); |
564 | #endif | 563 | #endif |