aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/linear.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/linear.c')
-rw-r--r--drivers/md/linear.c220
1 files changed, 93 insertions, 127 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7a36e38393a1..15c8b7b25a9b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -27,19 +27,27 @@
27 */ 27 */
28static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) 28static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
29{ 29{
30 dev_info_t *hash; 30 int lo, mid, hi;
31 linear_conf_t *conf = mddev_to_conf(mddev); 31 linear_conf_t *conf;
32 sector_t idx = sector >> conf->sector_shift; 32
33 lo = 0;
34 hi = mddev->raid_disks - 1;
35 conf = rcu_dereference(mddev->private);
33 36
34 /* 37 /*
35 * sector_div(a,b) returns the remainer and sets a to a/b 38 * Binary Search
36 */ 39 */
37 (void)sector_div(idx, conf->spacing);
38 hash = conf->hash_table[idx];
39 40
40 while (sector >= hash->num_sectors + hash->start_sector) 41 while (hi > lo) {
41 hash++; 42
42 return hash; 43 mid = (hi + lo) / 2;
44 if (sector < conf->disks[mid].end_sector)
45 hi = mid;
46 else
47 lo = mid + 1;
48 }
49
50 return conf->disks + lo;
43} 51}
44 52
45/** 53/**
@@ -59,8 +67,10 @@ static int linear_mergeable_bvec(struct request_queue *q,
59 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; 67 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
60 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); 68 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
61 69
70 rcu_read_lock();
62 dev0 = which_dev(mddev, sector); 71 dev0 = which_dev(mddev, sector);
63 maxsectors = dev0->num_sectors - (sector - dev0->start_sector); 72 maxsectors = dev0->end_sector - sector;
73 rcu_read_unlock();
64 74
65 if (maxsectors < bio_sectors) 75 if (maxsectors < bio_sectors)
66 maxsectors = 0; 76 maxsectors = 0;
@@ -79,46 +89,57 @@ static int linear_mergeable_bvec(struct request_queue *q,
79static void linear_unplug(struct request_queue *q) 89static void linear_unplug(struct request_queue *q)
80{ 90{
81 mddev_t *mddev = q->queuedata; 91 mddev_t *mddev = q->queuedata;
82 linear_conf_t *conf = mddev_to_conf(mddev); 92 linear_conf_t *conf;
83 int i; 93 int i;
84 94
95 rcu_read_lock();
96 conf = rcu_dereference(mddev->private);
97
85 for (i=0; i < mddev->raid_disks; i++) { 98 for (i=0; i < mddev->raid_disks; i++) {
86 struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); 99 struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
87 blk_unplug(r_queue); 100 blk_unplug(r_queue);
88 } 101 }
102 rcu_read_unlock();
89} 103}
90 104
91static int linear_congested(void *data, int bits) 105static int linear_congested(void *data, int bits)
92{ 106{
93 mddev_t *mddev = data; 107 mddev_t *mddev = data;
94 linear_conf_t *conf = mddev_to_conf(mddev); 108 linear_conf_t *conf;
95 int i, ret = 0; 109 int i, ret = 0;
96 110
111 rcu_read_lock();
112 conf = rcu_dereference(mddev->private);
113
97 for (i = 0; i < mddev->raid_disks && !ret ; i++) { 114 for (i = 0; i < mddev->raid_disks && !ret ; i++) {
98 struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); 115 struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
99 ret |= bdi_congested(&q->backing_dev_info, bits); 116 ret |= bdi_congested(&q->backing_dev_info, bits);
100 } 117 }
118
119 rcu_read_unlock();
101 return ret; 120 return ret;
102} 121}
103 122
104static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) 123static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
105{ 124{
106 linear_conf_t *conf = mddev_to_conf(mddev); 125 linear_conf_t *conf;
126 sector_t array_sectors;
107 127
128 rcu_read_lock();
129 conf = rcu_dereference(mddev->private);
108 WARN_ONCE(sectors || raid_disks, 130 WARN_ONCE(sectors || raid_disks,
109 "%s does not support generic reshape\n", __func__); 131 "%s does not support generic reshape\n", __func__);
132 array_sectors = conf->array_sectors;
133 rcu_read_unlock();
110 134
111 return conf->array_sectors; 135 return array_sectors;
112} 136}
113 137
114static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) 138static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
115{ 139{
116 linear_conf_t *conf; 140 linear_conf_t *conf;
117 dev_info_t **table;
118 mdk_rdev_t *rdev; 141 mdk_rdev_t *rdev;
119 int i, nb_zone, cnt; 142 int i, cnt;
120 sector_t min_sectors;
121 sector_t curr_sector;
122 143
123 conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), 144 conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
124 GFP_KERNEL); 145 GFP_KERNEL);
@@ -131,6 +152,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
131 list_for_each_entry(rdev, &mddev->disks, same_set) { 152 list_for_each_entry(rdev, &mddev->disks, same_set) {
132 int j = rdev->raid_disk; 153 int j = rdev->raid_disk;
133 dev_info_t *disk = conf->disks + j; 154 dev_info_t *disk = conf->disks + j;
155 sector_t sectors;
134 156
135 if (j < 0 || j >= raid_disks || disk->rdev) { 157 if (j < 0 || j >= raid_disks || disk->rdev) {
136 printk("linear: disk numbering problem. Aborting!\n"); 158 printk("linear: disk numbering problem. Aborting!\n");
@@ -138,6 +160,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
138 } 160 }
139 161
140 disk->rdev = rdev; 162 disk->rdev = rdev;
163 if (mddev->chunk_sectors) {
164 sectors = rdev->sectors;
165 sector_div(sectors, mddev->chunk_sectors);
166 rdev->sectors = sectors * mddev->chunk_sectors;
167 }
141 168
142 blk_queue_stack_limits(mddev->queue, 169 blk_queue_stack_limits(mddev->queue,
143 rdev->bdev->bd_disk->queue); 170 rdev->bdev->bd_disk->queue);
@@ -146,105 +173,27 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
146 * a one page request is never in violation. 173 * a one page request is never in violation.
147 */ 174 */
148 if (rdev->bdev->bd_disk->queue->merge_bvec_fn && 175 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
149 mddev->queue->max_sectors > (PAGE_SIZE>>9)) 176 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
150 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); 177 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
151 178
152 disk->num_sectors = rdev->sectors;
153 conf->array_sectors += rdev->sectors; 179 conf->array_sectors += rdev->sectors;
154
155 cnt++; 180 cnt++;
181
156 } 182 }
157 if (cnt != raid_disks) { 183 if (cnt != raid_disks) {
158 printk("linear: not enough drives present. Aborting!\n"); 184 printk("linear: not enough drives present. Aborting!\n");
159 goto out; 185 goto out;
160 } 186 }
161 187
162 min_sectors = conf->array_sectors;
163 sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *));
164 if (min_sectors == 0)
165 min_sectors = 1;
166
167 /* min_sectors is the minimum spacing that will fit the hash
168 * table in one PAGE. This may be much smaller than needed.
169 * We find the smallest non-terminal set of consecutive devices
170 * that is larger than min_sectors and use the size of that as
171 * the actual spacing
172 */
173 conf->spacing = conf->array_sectors;
174 for (i=0; i < cnt-1 ; i++) {
175 sector_t tmp = 0;
176 int j;
177 for (j = i; j < cnt - 1 && tmp < min_sectors; j++)
178 tmp += conf->disks[j].num_sectors;
179 if (tmp >= min_sectors && tmp < conf->spacing)
180 conf->spacing = tmp;
181 }
182
183 /* spacing may be too large for sector_div to work with,
184 * so we might need to pre-shift
185 */
186 conf->sector_shift = 0;
187 if (sizeof(sector_t) > sizeof(u32)) {
188 sector_t space = conf->spacing;
189 while (space > (sector_t)(~(u32)0)) {
190 space >>= 1;
191 conf->sector_shift++;
192 }
193 }
194 /* 188 /*
195 * This code was restructured to work around a gcc-2.95.3 internal 189 * Here we calculate the device offsets.
196 * compiler error. Alter it with care.
197 */ 190 */
198 { 191 conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
199 sector_t sz;
200 unsigned round;
201 unsigned long base;
202
203 sz = conf->array_sectors >> conf->sector_shift;
204 sz += 1; /* force round-up */
205 base = conf->spacing >> conf->sector_shift;
206 round = sector_div(sz, base);
207 nb_zone = sz + (round ? 1 : 0);
208 }
209 BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *));
210
211 conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
212 GFP_KERNEL);
213 if (!conf->hash_table)
214 goto out;
215 192
216 /*
217 * Here we generate the linear hash table
218 * First calculate the device offsets.
219 */
220 conf->disks[0].start_sector = 0;
221 for (i = 1; i < raid_disks; i++) 193 for (i = 1; i < raid_disks; i++)
222 conf->disks[i].start_sector = 194 conf->disks[i].end_sector =
223 conf->disks[i-1].start_sector + 195 conf->disks[i-1].end_sector +
224 conf->disks[i-1].num_sectors; 196 conf->disks[i].rdev->sectors;
225
226 table = conf->hash_table;
227 i = 0;
228 for (curr_sector = 0;
229 curr_sector < conf->array_sectors;
230 curr_sector += conf->spacing) {
231
232 while (i < raid_disks-1 &&
233 curr_sector >= conf->disks[i+1].start_sector)
234 i++;
235
236 *table ++ = conf->disks + i;
237 }
238
239 if (conf->sector_shift) {
240 conf->spacing >>= conf->sector_shift;
241 /* round spacing up so that when we divide by it,
242 * we err on the side of "too-low", which is safest.
243 */
244 conf->spacing++;
245 }
246
247 BUG_ON(table - conf->hash_table > nb_zone);
248 197
249 return conf; 198 return conf;
250 199
@@ -257,6 +206,8 @@ static int linear_run (mddev_t *mddev)
257{ 206{
258 linear_conf_t *conf; 207 linear_conf_t *conf;
259 208
209 if (md_check_no_bitmap(mddev))
210 return -EINVAL;
260 mddev->queue->queue_lock = &mddev->queue->__queue_lock; 211 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
261 conf = linear_conf(mddev, mddev->raid_disks); 212 conf = linear_conf(mddev, mddev->raid_disks);
262 213
@@ -272,6 +223,12 @@ static int linear_run (mddev_t *mddev)
272 return 0; 223 return 0;
273} 224}
274 225
226static void free_conf(struct rcu_head *head)
227{
228 linear_conf_t *conf = container_of(head, linear_conf_t, rcu);
229 kfree(conf);
230}
231
275static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) 232static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
276{ 233{
277 /* Adding a drive to a linear array allows the array to grow. 234 /* Adding a drive to a linear array allows the array to grow.
@@ -282,7 +239,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
282 * The current one is never freed until the array is stopped. 239 * The current one is never freed until the array is stopped.
283 * This avoids races. 240 * This avoids races.
284 */ 241 */
285 linear_conf_t *newconf; 242 linear_conf_t *newconf, *oldconf;
286 243
287 if (rdev->saved_raid_disk != mddev->raid_disks) 244 if (rdev->saved_raid_disk != mddev->raid_disks)
288 return -EINVAL; 245 return -EINVAL;
@@ -294,25 +251,29 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
294 if (!newconf) 251 if (!newconf)
295 return -ENOMEM; 252 return -ENOMEM;
296 253
297 newconf->prev = mddev_to_conf(mddev); 254 oldconf = rcu_dereference(mddev->private);
298 mddev->private = newconf;
299 mddev->raid_disks++; 255 mddev->raid_disks++;
256 rcu_assign_pointer(mddev->private, newconf);
300 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 257 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
301 set_capacity(mddev->gendisk, mddev->array_sectors); 258 set_capacity(mddev->gendisk, mddev->array_sectors);
259 call_rcu(&oldconf->rcu, free_conf);
302 return 0; 260 return 0;
303} 261}
304 262
305static int linear_stop (mddev_t *mddev) 263static int linear_stop (mddev_t *mddev)
306{ 264{
307 linear_conf_t *conf = mddev_to_conf(mddev); 265 linear_conf_t *conf = mddev->private;
308 266
267 /*
268 * We do not require rcu protection here since
269 * we hold reconfig_mutex for both linear_add and
270 * linear_stop, so they cannot race.
271 * We should make sure any old 'conf's are properly
272 * freed though.
273 */
274 rcu_barrier();
309 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 275 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
310 do { 276 kfree(conf);
311 linear_conf_t *t = conf->prev;
312 kfree(conf->hash_table);
313 kfree(conf);
314 conf = t;
315 } while (conf);
316 277
317 return 0; 278 return 0;
318} 279}
@@ -322,6 +283,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
322 const int rw = bio_data_dir(bio); 283 const int rw = bio_data_dir(bio);
323 mddev_t *mddev = q->queuedata; 284 mddev_t *mddev = q->queuedata;
324 dev_info_t *tmp_dev; 285 dev_info_t *tmp_dev;
286 sector_t start_sector;
325 int cpu; 287 int cpu;
326 288
327 if (unlikely(bio_barrier(bio))) { 289 if (unlikely(bio_barrier(bio))) {
@@ -335,33 +297,36 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
335 bio_sectors(bio)); 297 bio_sectors(bio));
336 part_stat_unlock(); 298 part_stat_unlock();
337 299
300 rcu_read_lock();
338 tmp_dev = which_dev(mddev, bio->bi_sector); 301 tmp_dev = which_dev(mddev, bio->bi_sector);
339 302 start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
340 if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + 303
341 tmp_dev->start_sector) 304
342 || (bio->bi_sector < 305 if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
343 tmp_dev->start_sector))) { 306 || (bio->bi_sector < start_sector))) {
344 char b[BDEVNAME_SIZE]; 307 char b[BDEVNAME_SIZE];
345 308
346 printk("linear_make_request: Sector %llu out of bounds on " 309 printk("linear_make_request: Sector %llu out of bounds on "
347 "dev %s: %llu sectors, offset %llu\n", 310 "dev %s: %llu sectors, offset %llu\n",
348 (unsigned long long)bio->bi_sector, 311 (unsigned long long)bio->bi_sector,
349 bdevname(tmp_dev->rdev->bdev, b), 312 bdevname(tmp_dev->rdev->bdev, b),
350 (unsigned long long)tmp_dev->num_sectors, 313 (unsigned long long)tmp_dev->rdev->sectors,
351 (unsigned long long)tmp_dev->start_sector); 314 (unsigned long long)start_sector);
315 rcu_read_unlock();
352 bio_io_error(bio); 316 bio_io_error(bio);
353 return 0; 317 return 0;
354 } 318 }
355 if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > 319 if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
356 tmp_dev->start_sector + tmp_dev->num_sectors)) { 320 tmp_dev->end_sector)) {
357 /* This bio crosses a device boundary, so we have to 321 /* This bio crosses a device boundary, so we have to
358 * split it. 322 * split it.
359 */ 323 */
360 struct bio_pair *bp; 324 struct bio_pair *bp;
325 sector_t end_sector = tmp_dev->end_sector;
326
327 rcu_read_unlock();
361 328
362 bp = bio_split(bio, 329 bp = bio_split(bio, end_sector - bio->bi_sector);
363 tmp_dev->start_sector + tmp_dev->num_sectors
364 - bio->bi_sector);
365 330
366 if (linear_make_request(q, &bp->bio1)) 331 if (linear_make_request(q, &bp->bio1))
367 generic_make_request(&bp->bio1); 332 generic_make_request(&bp->bio1);
@@ -372,8 +337,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
372 } 337 }
373 338
374 bio->bi_bdev = tmp_dev->rdev->bdev; 339 bio->bi_bdev = tmp_dev->rdev->bdev;
375 bio->bi_sector = bio->bi_sector - tmp_dev->start_sector 340 bio->bi_sector = bio->bi_sector - start_sector
376 + tmp_dev->rdev->data_offset; 341 + tmp_dev->rdev->data_offset;
342 rcu_read_unlock();
377 343
378 return 1; 344 return 1;
379} 345}
@@ -381,7 +347,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
381static void linear_status (struct seq_file *seq, mddev_t *mddev) 347static void linear_status (struct seq_file *seq, mddev_t *mddev)
382{ 348{
383 349
384 seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); 350 seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
385} 351}
386 352
387 353