aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-12-14 20:56:56 -0500
committerNeilBrown <neilb@suse.de>2015-02-03 16:35:52 -0500
commit5c675f83c68fbdf9c0e103c1090b06be747fa62c (patch)
tree9a03f84c7a3bcef7d5e757dc28ce7bd5d205b26a
parent85572d7c75fd5b9fa3fc911e1c99c68ec74903a0 (diff)
md: make ->congested robust against personality changes.
There is currently no locking around calls to the 'congested' bdi function. If called at an awkward time while an array is being converted from one level (or personality) to another, there is a tiny chance of running code in an unreferenced module etc. So add a 'congested' function to the md_personality operations structure, and call it with appropriate locking from a central 'mddev_congested'. When the array personality is changing the array will be 'suspended' so no IO is processed. If mddev_congested detects this, it simply reports that the array is congested, which is a safe guess. As mddev_suspend calls synchronize_rcu(), mddev_congested can avoid races by included the whole call inside an rcu_read_lock() region. This require that the congested functions for all subordinate devices can be run under rcu_lock. Fortunately this is the case. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/dm-raid.c8
-rw-r--r--drivers/md/linear.c9
-rw-r--r--drivers/md/md.c22
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid1.c14
-rw-r--r--drivers/md/raid1.h3
-rw-r--r--drivers/md/raid10.c14
-rw-r--r--drivers/md/raid10.h3
-rw-r--r--drivers/md/raid5.c19
-rw-r--r--drivers/md/raid5.h1
12 files changed, 38 insertions, 77 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 07c0fa0fa284..777d9ba2acad 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -746,13 +746,7 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
746{ 746{
747 struct raid_set *rs = container_of(cb, struct raid_set, callbacks); 747 struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
748 748
749 if (rs->raid_type->level == 1) 749 return mddev_congested(&rs->md, bits);
750 return md_raid1_congested(&rs->md, bits);
751
752 if (rs->raid_type->level == 10)
753 return md_raid10_congested(&rs->md, bits);
754
755 return md_raid5_congested(&rs->md, bits);
756} 750}
757 751
758/* 752/*
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 64713b77df1c..05108510d9cd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -97,15 +97,11 @@ static int linear_mergeable_bvec(struct request_queue *q,
97 return maxsectors << 9; 97 return maxsectors << 9;
98} 98}
99 99
100static int linear_congested(void *data, int bits) 100static int linear_congested(struct mddev *mddev, int bits)
101{ 101{
102 struct mddev *mddev = data;
103 struct linear_conf *conf; 102 struct linear_conf *conf;
104 int i, ret = 0; 103 int i, ret = 0;
105 104
106 if (mddev_congested(mddev, bits))
107 return 1;
108
109 rcu_read_lock(); 105 rcu_read_lock();
110 conf = rcu_dereference(mddev->private); 106 conf = rcu_dereference(mddev->private);
111 107
@@ -218,8 +214,6 @@ static int linear_run (struct mddev *mddev)
218 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 214 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
219 215
220 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); 216 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
221 mddev->queue->backing_dev_info.congested_fn = linear_congested;
222 mddev->queue->backing_dev_info.congested_data = mddev;
223 217
224 ret = md_integrity_register(mddev); 218 ret = md_integrity_register(mddev);
225 if (ret) { 219 if (ret) {
@@ -366,6 +360,7 @@ static struct md_personality linear_personality =
366 .status = linear_status, 360 .status = linear_status,
367 .hot_add_disk = linear_add, 361 .hot_add_disk = linear_add,
368 .size = linear_size, 362 .size = linear_size,
363 .congested = linear_congested,
369}; 364};
370 365
371static int __init linear_init (void) 366static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 17e7fd776034..d45f52edb314 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -321,9 +321,23 @@ EXPORT_SYMBOL_GPL(mddev_resume);
321 321
322int mddev_congested(struct mddev *mddev, int bits) 322int mddev_congested(struct mddev *mddev, int bits)
323{ 323{
324 return mddev->suspended; 324 struct md_personality *pers = mddev->pers;
325 int ret = 0;
326
327 rcu_read_lock();
328 if (mddev->suspended)
329 ret = 1;
330 else if (pers && pers->congested)
331 ret = pers->congested(mddev, bits);
332 rcu_read_unlock();
333 return ret;
334}
335EXPORT_SYMBOL_GPL(mddev_congested);
336static int md_congested(void *data, int bits)
337{
338 struct mddev *mddev = data;
339 return mddev_congested(mddev, bits);
325} 340}
326EXPORT_SYMBOL(mddev_congested);
327 341
328/* 342/*
329 * Generic flush handling for md 343 * Generic flush handling for md
@@ -4908,6 +4922,10 @@ int md_run(struct mddev *mddev)
4908 bitmap_destroy(mddev); 4922 bitmap_destroy(mddev);
4909 return err; 4923 return err;
4910 } 4924 }
4925 if (mddev->queue) {
4926 mddev->queue->backing_dev_info.congested_data = mddev;
4927 mddev->queue->backing_dev_info.congested_fn = md_congested;
4928 }
4911 if (mddev->pers->sync_request) { 4929 if (mddev->pers->sync_request) {
4912 if (mddev->kobj.sd && 4930 if (mddev->kobj.sd &&
4913 sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 4931 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f0d15bdd96d4..f2602280fac1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -496,6 +496,9 @@ struct md_personality
496 * array. 496 * array.
497 */ 497 */
498 void *(*takeover) (struct mddev *mddev); 498 void *(*takeover) (struct mddev *mddev);
499 /* congested implements bdi.congested_fn().
500 * Will not be called while array is 'suspended' */
501 int (*congested)(struct mddev *mddev, int bits);
499}; 502};
500 503
501struct md_sysfs_entry { 504struct md_sysfs_entry {
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 399272f9c042..fedb1b31877d 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -153,15 +153,11 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev)
153 seq_printf (seq, "]"); 153 seq_printf (seq, "]");
154} 154}
155 155
156static int multipath_congested(void *data, int bits) 156static int multipath_congested(struct mddev *mddev, int bits)
157{ 157{
158 struct mddev *mddev = data;
159 struct mpconf *conf = mddev->private; 158 struct mpconf *conf = mddev->private;
160 int i, ret = 0; 159 int i, ret = 0;
161 160
162 if (mddev_congested(mddev, bits))
163 return 1;
164
165 rcu_read_lock(); 161 rcu_read_lock();
166 for (i = 0; i < mddev->raid_disks ; i++) { 162 for (i = 0; i < mddev->raid_disks ; i++) {
167 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); 163 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
@@ -489,9 +485,6 @@ static int multipath_run (struct mddev *mddev)
489 */ 485 */
490 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); 486 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
491 487
492 mddev->queue->backing_dev_info.congested_fn = multipath_congested;
493 mddev->queue->backing_dev_info.congested_data = mddev;
494
495 if (md_integrity_register(mddev)) 488 if (md_integrity_register(mddev))
496 goto out_free_conf; 489 goto out_free_conf;
497 490
@@ -533,6 +526,7 @@ static struct md_personality multipath_personality =
533 .hot_add_disk = multipath_add_disk, 526 .hot_add_disk = multipath_add_disk,
534 .hot_remove_disk= multipath_remove_disk, 527 .hot_remove_disk= multipath_remove_disk,
535 .size = multipath_size, 528 .size = multipath_size,
529 .congested = multipath_congested,
536}; 530};
537 531
538static int __init multipath_init (void) 532static int __init multipath_init (void)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ba6b85de96d2..4b521eac5b69 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -25,17 +25,13 @@
25#include "raid0.h" 25#include "raid0.h"
26#include "raid5.h" 26#include "raid5.h"
27 27
28static int raid0_congested(void *data, int bits) 28static int raid0_congested(struct mddev *mddev, int bits)
29{ 29{
30 struct mddev *mddev = data;
31 struct r0conf *conf = mddev->private; 30 struct r0conf *conf = mddev->private;
32 struct md_rdev **devlist = conf->devlist; 31 struct md_rdev **devlist = conf->devlist;
33 int raid_disks = conf->strip_zone[0].nb_dev; 32 int raid_disks = conf->strip_zone[0].nb_dev;
34 int i, ret = 0; 33 int i, ret = 0;
35 34
36 if (mddev_congested(mddev, bits))
37 return 1;
38
39 for (i = 0; i < raid_disks && !ret ; i++) { 35 for (i = 0; i < raid_disks && !ret ; i++) {
40 struct request_queue *q = bdev_get_queue(devlist[i]->bdev); 36 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
41 37
@@ -263,8 +259,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
263 mdname(mddev), 259 mdname(mddev),
264 (unsigned long long)smallest->sectors); 260 (unsigned long long)smallest->sectors);
265 } 261 }
266 mddev->queue->backing_dev_info.congested_fn = raid0_congested;
267 mddev->queue->backing_dev_info.congested_data = mddev;
268 262
269 /* 263 /*
270 * now since we have the hard sector sizes, we can make sure 264 * now since we have the hard sector sizes, we can make sure
@@ -729,6 +723,7 @@ static struct md_personality raid0_personality=
729 .size = raid0_size, 723 .size = raid0_size,
730 .takeover = raid0_takeover, 724 .takeover = raid0_takeover,
731 .quiesce = raid0_quiesce, 725 .quiesce = raid0_quiesce,
726 .congested = raid0_congested,
732}; 727};
733 728
734static int __init raid0_init (void) 729static int __init raid0_init (void)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 40b35be34f8d..9ad7ce7091be 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -734,7 +734,7 @@ static int raid1_mergeable_bvec(struct request_queue *q,
734 734
735} 735}
736 736
737int md_raid1_congested(struct mddev *mddev, int bits) 737static int raid1_congested(struct mddev *mddev, int bits)
738{ 738{
739 struct r1conf *conf = mddev->private; 739 struct r1conf *conf = mddev->private;
740 int i, ret = 0; 740 int i, ret = 0;
@@ -763,15 +763,6 @@ int md_raid1_congested(struct mddev *mddev, int bits)
763 rcu_read_unlock(); 763 rcu_read_unlock();
764 return ret; 764 return ret;
765} 765}
766EXPORT_SYMBOL_GPL(md_raid1_congested);
767
768static int raid1_congested(void *data, int bits)
769{
770 struct mddev *mddev = data;
771
772 return mddev_congested(mddev, bits) ||
773 md_raid1_congested(mddev, bits);
774}
775 766
776static void flush_pending_writes(struct r1conf *conf) 767static void flush_pending_writes(struct r1conf *conf)
777{ 768{
@@ -2955,8 +2946,6 @@ static int run(struct mddev *mddev)
2955 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2946 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2956 2947
2957 if (mddev->queue) { 2948 if (mddev->queue) {
2958 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2959 mddev->queue->backing_dev_info.congested_data = mddev;
2960 blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); 2949 blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
2961 2950
2962 if (discard_supported) 2951 if (discard_supported)
@@ -3193,6 +3182,7 @@ static struct md_personality raid1_personality =
3193 .check_reshape = raid1_reshape, 3182 .check_reshape = raid1_reshape,
3194 .quiesce = raid1_quiesce, 3183 .quiesce = raid1_quiesce,
3195 .takeover = raid1_takeover, 3184 .takeover = raid1_takeover,
3185 .congested = raid1_congested,
3196}; 3186};
3197 3187
3198static int __init raid_init(void) 3188static int __init raid_init(void)
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 33bda55ef9f7..14ebb288c1ef 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -170,7 +170,4 @@ struct r1bio {
170 */ 170 */
171#define R1BIO_MadeGood 7 171#define R1BIO_MadeGood 7
172#define R1BIO_WriteError 8 172#define R1BIO_WriteError 8
173
174extern int md_raid1_congested(struct mddev *mddev, int bits);
175
176#endif 173#endif
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 32e282f4c83c..fb6b88674e87 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -910,7 +910,7 @@ retry:
910 return rdev; 910 return rdev;
911} 911}
912 912
913int md_raid10_congested(struct mddev *mddev, int bits) 913static int raid10_congested(struct mddev *mddev, int bits)
914{ 914{
915 struct r10conf *conf = mddev->private; 915 struct r10conf *conf = mddev->private;
916 int i, ret = 0; 916 int i, ret = 0;
@@ -934,15 +934,6 @@ int md_raid10_congested(struct mddev *mddev, int bits)
934 rcu_read_unlock(); 934 rcu_read_unlock();
935 return ret; 935 return ret;
936} 936}
937EXPORT_SYMBOL_GPL(md_raid10_congested);
938
939static int raid10_congested(void *data, int bits)
940{
941 struct mddev *mddev = data;
942
943 return mddev_congested(mddev, bits) ||
944 md_raid10_congested(mddev, bits);
945}
946 937
947static void flush_pending_writes(struct r10conf *conf) 938static void flush_pending_writes(struct r10conf *conf)
948{ 939{
@@ -3757,8 +3748,6 @@ static int run(struct mddev *mddev)
3757 if (mddev->queue) { 3748 if (mddev->queue) {
3758 int stripe = conf->geo.raid_disks * 3749 int stripe = conf->geo.raid_disks *
3759 ((mddev->chunk_sectors << 9) / PAGE_SIZE); 3750 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
3760 mddev->queue->backing_dev_info.congested_fn = raid10_congested;
3761 mddev->queue->backing_dev_info.congested_data = mddev;
3762 3751
3763 /* Calculate max read-ahead size. 3752 /* Calculate max read-ahead size.
3764 * We need to readahead at least twice a whole stripe.... 3753 * We need to readahead at least twice a whole stripe....
@@ -4727,6 +4716,7 @@ static struct md_personality raid10_personality =
4727 .check_reshape = raid10_check_reshape, 4716 .check_reshape = raid10_check_reshape,
4728 .start_reshape = raid10_start_reshape, 4717 .start_reshape = raid10_start_reshape,
4729 .finish_reshape = raid10_finish_reshape, 4718 .finish_reshape = raid10_finish_reshape,
4719 .congested = raid10_congested,
4730}; 4720};
4731 4721
4732static int __init raid_init(void) 4722static int __init raid_init(void)
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 157d69e83ff4..5ee6473ddc2c 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -150,7 +150,4 @@ enum r10bio_state {
150 */ 150 */
151 R10BIO_Previous, 151 R10BIO_Previous,
152}; 152};
153
154extern int md_raid10_congested(struct mddev *mddev, int bits);
155
156#endif 153#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a03cf2d889bf..502a908149c6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4149,7 +4149,7 @@ static void activate_bit_delay(struct r5conf *conf,
4149 } 4149 }
4150} 4150}
4151 4151
4152int md_raid5_congested(struct mddev *mddev, int bits) 4152static int raid5_congested(struct mddev *mddev, int bits)
4153{ 4153{
4154 struct r5conf *conf = mddev->private; 4154 struct r5conf *conf = mddev->private;
4155 4155
@@ -4166,15 +4166,6 @@ int md_raid5_congested(struct mddev *mddev, int bits)
4166 4166
4167 return 0; 4167 return 0;
4168} 4168}
4169EXPORT_SYMBOL_GPL(md_raid5_congested);
4170
4171static int raid5_congested(void *data, int bits)
4172{
4173 struct mddev *mddev = data;
4174
4175 return mddev_congested(mddev, bits) ||
4176 md_raid5_congested(mddev, bits);
4177}
4178 4169
4179/* We want read requests to align with chunks where possible, 4170/* We want read requests to align with chunks where possible,
4180 * but write requests don't need to. 4171 * but write requests don't need to.
@@ -6248,9 +6239,6 @@ static int run(struct mddev *mddev)
6248 6239
6249 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 6240 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
6250 6241
6251 mddev->queue->backing_dev_info.congested_data = mddev;
6252 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
6253
6254 chunk_size = mddev->chunk_sectors << 9; 6242 chunk_size = mddev->chunk_sectors << 9;
6255 blk_queue_io_min(mddev->queue, chunk_size); 6243 blk_queue_io_min(mddev->queue, chunk_size);
6256 blk_queue_io_opt(mddev->queue, chunk_size * 6244 blk_queue_io_opt(mddev->queue, chunk_size *
@@ -6333,8 +6321,6 @@ static int stop(struct mddev *mddev)
6333 struct r5conf *conf = mddev->private; 6321 struct r5conf *conf = mddev->private;
6334 6322
6335 md_unregister_thread(&mddev->thread); 6323 md_unregister_thread(&mddev->thread);
6336 if (mddev->queue)
6337 mddev->queue->backing_dev_info.congested_fn = NULL;
6338 free_conf(conf); 6324 free_conf(conf);
6339 mddev->private = NULL; 6325 mddev->private = NULL;
6340 mddev->to_remove = &raid5_attrs_group; 6326 mddev->to_remove = &raid5_attrs_group;
@@ -7126,6 +7112,7 @@ static struct md_personality raid6_personality =
7126 .finish_reshape = raid5_finish_reshape, 7112 .finish_reshape = raid5_finish_reshape,
7127 .quiesce = raid5_quiesce, 7113 .quiesce = raid5_quiesce,
7128 .takeover = raid6_takeover, 7114 .takeover = raid6_takeover,
7115 .congested = raid5_congested,
7129}; 7116};
7130static struct md_personality raid5_personality = 7117static struct md_personality raid5_personality =
7131{ 7118{
@@ -7148,6 +7135,7 @@ static struct md_personality raid5_personality =
7148 .finish_reshape = raid5_finish_reshape, 7135 .finish_reshape = raid5_finish_reshape,
7149 .quiesce = raid5_quiesce, 7136 .quiesce = raid5_quiesce,
7150 .takeover = raid5_takeover, 7137 .takeover = raid5_takeover,
7138 .congested = raid5_congested,
7151}; 7139};
7152 7140
7153static struct md_personality raid4_personality = 7141static struct md_personality raid4_personality =
@@ -7171,6 +7159,7 @@ static struct md_personality raid4_personality =
7171 .finish_reshape = raid5_finish_reshape, 7159 .finish_reshape = raid5_finish_reshape,
7172 .quiesce = raid5_quiesce, 7160 .quiesce = raid5_quiesce,
7173 .takeover = raid4_takeover, 7161 .takeover = raid4_takeover,
7162 .congested = raid5_congested,
7174}; 7163};
7175 7164
7176static int __init raid5_init(void) 7165static int __init raid5_init(void)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index d59f5ca743cd..983e18a83db1 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -558,7 +558,6 @@ static inline int algorithm_is_DDF(int layout)
558 return layout >= 8 && layout <= 10; 558 return layout >= 8 && layout <= 10;
559} 559}
560 560
561extern int md_raid5_congested(struct mddev *mddev, int bits);
562extern void md_raid5_kick_device(struct r5conf *conf); 561extern void md_raid5_kick_device(struct r5conf *conf);
563extern int raid5_set_cache_size(struct mddev *mddev, int size); 562extern int raid5_set_cache_size(struct mddev *mddev, int size);
564#endif 563#endif