diff options
Diffstat (limited to 'drivers/md/raid0.c')
-rw-r--r-- | drivers/md/raid0.c | 403 |
1 files changed, 211 insertions, 192 deletions
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 925507e7d673..ab4a489d8695 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -26,8 +26,8 @@ | |||
26 | static void raid0_unplug(struct request_queue *q) | 26 | static void raid0_unplug(struct request_queue *q) |
27 | { | 27 | { |
28 | mddev_t *mddev = q->queuedata; | 28 | mddev_t *mddev = q->queuedata; |
29 | raid0_conf_t *conf = mddev_to_conf(mddev); | 29 | raid0_conf_t *conf = mddev->private; |
30 | mdk_rdev_t **devlist = conf->strip_zone[0].dev; | 30 | mdk_rdev_t **devlist = conf->devlist; |
31 | int i; | 31 | int i; |
32 | 32 | ||
33 | for (i=0; i<mddev->raid_disks; i++) { | 33 | for (i=0; i<mddev->raid_disks; i++) { |
@@ -40,8 +40,8 @@ static void raid0_unplug(struct request_queue *q) | |||
40 | static int raid0_congested(void *data, int bits) | 40 | static int raid0_congested(void *data, int bits) |
41 | { | 41 | { |
42 | mddev_t *mddev = data; | 42 | mddev_t *mddev = data; |
43 | raid0_conf_t *conf = mddev_to_conf(mddev); | 43 | raid0_conf_t *conf = mddev->private; |
44 | mdk_rdev_t **devlist = conf->strip_zone[0].dev; | 44 | mdk_rdev_t **devlist = conf->devlist; |
45 | int i, ret = 0; | 45 | int i, ret = 0; |
46 | 46 | ||
47 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { | 47 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { |
@@ -52,27 +52,60 @@ static int raid0_congested(void *data, int bits) | |||
52 | return ret; | 52 | return ret; |
53 | } | 53 | } |
54 | 54 | ||
55 | /* | ||
56 | * inform the user of the raid configuration | ||
57 | */ | ||
58 | static void dump_zones(mddev_t *mddev) | ||
59 | { | ||
60 | int j, k, h; | ||
61 | sector_t zone_size = 0; | ||
62 | sector_t zone_start = 0; | ||
63 | char b[BDEVNAME_SIZE]; | ||
64 | raid0_conf_t *conf = mddev->private; | ||
65 | printk(KERN_INFO "******* %s configuration *********\n", | ||
66 | mdname(mddev)); | ||
67 | h = 0; | ||
68 | for (j = 0; j < conf->nr_strip_zones; j++) { | ||
69 | printk(KERN_INFO "zone%d=[", j); | ||
70 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | ||
71 | printk("%s/", | ||
72 | bdevname(conf->devlist[j*mddev->raid_disks | ||
73 | + k]->bdev, b)); | ||
74 | printk("]\n"); | ||
75 | |||
76 | zone_size = conf->strip_zone[j].zone_end - zone_start; | ||
77 | printk(KERN_INFO " zone offset=%llukb " | ||
78 | "device offset=%llukb size=%llukb\n", | ||
79 | (unsigned long long)zone_start>>1, | ||
80 | (unsigned long long)conf->strip_zone[j].dev_start>>1, | ||
81 | (unsigned long long)zone_size>>1); | ||
82 | zone_start = conf->strip_zone[j].zone_end; | ||
83 | } | ||
84 | printk(KERN_INFO "**********************************\n\n"); | ||
85 | } | ||
55 | 86 | ||
56 | static int create_strip_zones (mddev_t *mddev) | 87 | static int create_strip_zones(mddev_t *mddev) |
57 | { | 88 | { |
58 | int i, c, j; | 89 | int i, c, j, err; |
59 | sector_t current_start, curr_zone_start; | 90 | sector_t curr_zone_end, sectors; |
60 | sector_t min_spacing; | 91 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; |
61 | raid0_conf_t *conf = mddev_to_conf(mddev); | ||
62 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | ||
63 | struct strip_zone *zone; | 92 | struct strip_zone *zone; |
64 | int cnt; | 93 | int cnt; |
65 | char b[BDEVNAME_SIZE]; | 94 | char b[BDEVNAME_SIZE]; |
66 | 95 | raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL); | |
67 | /* | 96 | |
68 | * The number of 'same size groups' | 97 | if (!conf) |
69 | */ | 98 | return -ENOMEM; |
70 | conf->nr_strip_zones = 0; | ||
71 | |||
72 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 99 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
73 | printk(KERN_INFO "raid0: looking at %s\n", | 100 | printk(KERN_INFO "raid0: looking at %s\n", |
74 | bdevname(rdev1->bdev,b)); | 101 | bdevname(rdev1->bdev,b)); |
75 | c = 0; | 102 | c = 0; |
103 | |||
104 | /* round size to chunk_size */ | ||
105 | sectors = rdev1->sectors; | ||
106 | sector_div(sectors, mddev->chunk_sectors); | ||
107 | rdev1->sectors = sectors * mddev->chunk_sectors; | ||
108 | |||
76 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 109 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
77 | printk(KERN_INFO "raid0: comparing %s(%llu)", | 110 | printk(KERN_INFO "raid0: comparing %s(%llu)", |
78 | bdevname(rdev1->bdev,b), | 111 | bdevname(rdev1->bdev,b), |
@@ -103,16 +136,16 @@ static int create_strip_zones (mddev_t *mddev) | |||
103 | } | 136 | } |
104 | } | 137 | } |
105 | printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); | 138 | printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); |
106 | 139 | err = -ENOMEM; | |
107 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* | 140 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* |
108 | conf->nr_strip_zones, GFP_KERNEL); | 141 | conf->nr_strip_zones, GFP_KERNEL); |
109 | if (!conf->strip_zone) | 142 | if (!conf->strip_zone) |
110 | return 1; | 143 | goto abort; |
111 | conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* | 144 | conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* |
112 | conf->nr_strip_zones*mddev->raid_disks, | 145 | conf->nr_strip_zones*mddev->raid_disks, |
113 | GFP_KERNEL); | 146 | GFP_KERNEL); |
114 | if (!conf->devlist) | 147 | if (!conf->devlist) |
115 | return 1; | 148 | goto abort; |
116 | 149 | ||
117 | /* The first zone must contain all devices, so here we check that | 150 | /* The first zone must contain all devices, so here we check that |
118 | * there is a proper alignment of slots to devices and find them all | 151 | * there is a proper alignment of slots to devices and find them all |
@@ -120,7 +153,8 @@ static int create_strip_zones (mddev_t *mddev) | |||
120 | zone = &conf->strip_zone[0]; | 153 | zone = &conf->strip_zone[0]; |
121 | cnt = 0; | 154 | cnt = 0; |
122 | smallest = NULL; | 155 | smallest = NULL; |
123 | zone->dev = conf->devlist; | 156 | dev = conf->devlist; |
157 | err = -EINVAL; | ||
124 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 158 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
125 | int j = rdev1->raid_disk; | 159 | int j = rdev1->raid_disk; |
126 | 160 | ||
@@ -129,12 +163,12 @@ static int create_strip_zones (mddev_t *mddev) | |||
129 | "aborting!\n", j); | 163 | "aborting!\n", j); |
130 | goto abort; | 164 | goto abort; |
131 | } | 165 | } |
132 | if (zone->dev[j]) { | 166 | if (dev[j]) { |
133 | printk(KERN_ERR "raid0: multiple devices for %d - " | 167 | printk(KERN_ERR "raid0: multiple devices for %d - " |
134 | "aborting!\n", j); | 168 | "aborting!\n", j); |
135 | goto abort; | 169 | goto abort; |
136 | } | 170 | } |
137 | zone->dev[j] = rdev1; | 171 | dev[j] = rdev1; |
138 | 172 | ||
139 | blk_queue_stack_limits(mddev->queue, | 173 | blk_queue_stack_limits(mddev->queue, |
140 | rdev1->bdev->bd_disk->queue); | 174 | rdev1->bdev->bd_disk->queue); |
@@ -157,34 +191,32 @@ static int create_strip_zones (mddev_t *mddev) | |||
157 | goto abort; | 191 | goto abort; |
158 | } | 192 | } |
159 | zone->nb_dev = cnt; | 193 | zone->nb_dev = cnt; |
160 | zone->sectors = smallest->sectors * cnt; | 194 | zone->zone_end = smallest->sectors * cnt; |
161 | zone->zone_start = 0; | ||
162 | 195 | ||
163 | current_start = smallest->sectors; | 196 | curr_zone_end = zone->zone_end; |
164 | curr_zone_start = zone->sectors; | ||
165 | 197 | ||
166 | /* now do the other zones */ | 198 | /* now do the other zones */ |
167 | for (i = 1; i < conf->nr_strip_zones; i++) | 199 | for (i = 1; i < conf->nr_strip_zones; i++) |
168 | { | 200 | { |
169 | zone = conf->strip_zone + i; | 201 | zone = conf->strip_zone + i; |
170 | zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; | 202 | dev = conf->devlist + i * mddev->raid_disks; |
171 | 203 | ||
172 | printk(KERN_INFO "raid0: zone %d\n", i); | 204 | printk(KERN_INFO "raid0: zone %d\n", i); |
173 | zone->dev_start = current_start; | 205 | zone->dev_start = smallest->sectors; |
174 | smallest = NULL; | 206 | smallest = NULL; |
175 | c = 0; | 207 | c = 0; |
176 | 208 | ||
177 | for (j=0; j<cnt; j++) { | 209 | for (j=0; j<cnt; j++) { |
178 | char b[BDEVNAME_SIZE]; | 210 | char b[BDEVNAME_SIZE]; |
179 | rdev = conf->strip_zone[0].dev[j]; | 211 | rdev = conf->devlist[j]; |
180 | printk(KERN_INFO "raid0: checking %s ...", | 212 | printk(KERN_INFO "raid0: checking %s ...", |
181 | bdevname(rdev->bdev, b)); | 213 | bdevname(rdev->bdev, b)); |
182 | if (rdev->sectors <= current_start) { | 214 | if (rdev->sectors <= zone->dev_start) { |
183 | printk(KERN_INFO " nope.\n"); | 215 | printk(KERN_INFO " nope.\n"); |
184 | continue; | 216 | continue; |
185 | } | 217 | } |
186 | printk(KERN_INFO " contained as device %d\n", c); | 218 | printk(KERN_INFO " contained as device %d\n", c); |
187 | zone->dev[c] = rdev; | 219 | dev[c] = rdev; |
188 | c++; | 220 | c++; |
189 | if (!smallest || rdev->sectors < smallest->sectors) { | 221 | if (!smallest || rdev->sectors < smallest->sectors) { |
190 | smallest = rdev; | 222 | smallest = rdev; |
@@ -194,47 +226,39 @@ static int create_strip_zones (mddev_t *mddev) | |||
194 | } | 226 | } |
195 | 227 | ||
196 | zone->nb_dev = c; | 228 | zone->nb_dev = c; |
197 | zone->sectors = (smallest->sectors - current_start) * c; | 229 | sectors = (smallest->sectors - zone->dev_start) * c; |
198 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", | 230 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", |
199 | zone->nb_dev, (unsigned long long)zone->sectors); | 231 | zone->nb_dev, (unsigned long long)sectors); |
200 | 232 | ||
201 | zone->zone_start = curr_zone_start; | 233 | curr_zone_end += sectors; |
202 | curr_zone_start += zone->sectors; | 234 | zone->zone_end = curr_zone_end; |
203 | 235 | ||
204 | current_start = smallest->sectors; | ||
205 | printk(KERN_INFO "raid0: current zone start: %llu\n", | 236 | printk(KERN_INFO "raid0: current zone start: %llu\n", |
206 | (unsigned long long)current_start); | 237 | (unsigned long long)smallest->sectors); |
207 | } | ||
208 | |||
209 | /* Now find appropriate hash spacing. | ||
210 | * We want a number which causes most hash entries to cover | ||
211 | * at most two strips, but the hash table must be at most | ||
212 | * 1 PAGE. We choose the smallest strip, or contiguous collection | ||
213 | * of strips, that has big enough size. We never consider the last | ||
214 | * strip though as it's size has no bearing on the efficacy of the hash | ||
215 | * table. | ||
216 | */ | ||
217 | conf->spacing = curr_zone_start; | ||
218 | min_spacing = curr_zone_start; | ||
219 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); | ||
220 | for (i=0; i < conf->nr_strip_zones-1; i++) { | ||
221 | sector_t s = 0; | ||
222 | for (j = i; j < conf->nr_strip_zones - 1 && | ||
223 | s < min_spacing; j++) | ||
224 | s += conf->strip_zone[j].sectors; | ||
225 | if (s >= min_spacing && s < conf->spacing) | ||
226 | conf->spacing = s; | ||
227 | } | 238 | } |
228 | |||
229 | mddev->queue->unplug_fn = raid0_unplug; | 239 | mddev->queue->unplug_fn = raid0_unplug; |
230 | |||
231 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; | 240 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; |
232 | mddev->queue->backing_dev_info.congested_data = mddev; | 241 | mddev->queue->backing_dev_info.congested_data = mddev; |
233 | 242 | ||
243 | /* | ||
244 | * now since we have the hard sector sizes, we can make sure | ||
245 | * chunk size is a multiple of that sector size | ||
246 | */ | ||
247 | if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { | ||
248 | printk(KERN_ERR "%s chunk_size of %d not valid\n", | ||
249 | mdname(mddev), | ||
250 | mddev->chunk_sectors << 9); | ||
251 | goto abort; | ||
252 | } | ||
234 | printk(KERN_INFO "raid0: done.\n"); | 253 | printk(KERN_INFO "raid0: done.\n"); |
254 | mddev->private = conf; | ||
235 | return 0; | 255 | return 0; |
236 | abort: | 256 | abort: |
237 | return 1; | 257 | kfree(conf->strip_zone); |
258 | kfree(conf->devlist); | ||
259 | kfree(conf); | ||
260 | mddev->private = NULL; | ||
261 | return err; | ||
238 | } | 262 | } |
239 | 263 | ||
240 | /** | 264 | /** |
@@ -252,10 +276,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
252 | mddev_t *mddev = q->queuedata; | 276 | mddev_t *mddev = q->queuedata; |
253 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 277 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
254 | int max; | 278 | int max; |
255 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 279 | unsigned int chunk_sectors = mddev->chunk_sectors; |
256 | unsigned int bio_sectors = bvm->bi_size >> 9; | 280 | unsigned int bio_sectors = bvm->bi_size >> 9; |
257 | 281 | ||
258 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 282 | if (is_power_of_2(chunk_sectors)) |
283 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) | ||
284 | + bio_sectors)) << 9; | ||
285 | else | ||
286 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) | ||
287 | + bio_sectors)) << 9; | ||
259 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 288 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ |
260 | if (max <= biovec->bv_len && bio_sectors == 0) | 289 | if (max <= biovec->bv_len && bio_sectors == 0) |
261 | return biovec->bv_len; | 290 | return biovec->bv_len; |
@@ -277,84 +306,28 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
277 | return array_sectors; | 306 | return array_sectors; |
278 | } | 307 | } |
279 | 308 | ||
280 | static int raid0_run (mddev_t *mddev) | 309 | static int raid0_run(mddev_t *mddev) |
281 | { | 310 | { |
282 | unsigned cur=0, i=0, nb_zone; | 311 | int ret; |
283 | s64 sectors; | ||
284 | raid0_conf_t *conf; | ||
285 | 312 | ||
286 | if (mddev->chunk_size == 0) { | 313 | if (mddev->chunk_sectors == 0) { |
287 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); | 314 | printk(KERN_ERR "md/raid0: chunk size must be set.\n"); |
288 | return -EINVAL; | 315 | return -EINVAL; |
289 | } | 316 | } |
290 | printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", | 317 | if (md_check_no_bitmap(mddev)) |
291 | mdname(mddev), | 318 | return -EINVAL; |
292 | mddev->chunk_size >> 9, | 319 | blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); |
293 | (mddev->chunk_size>>1)-1); | ||
294 | blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); | ||
295 | blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); | ||
296 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; | 320 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; |
297 | 321 | ||
298 | conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); | 322 | ret = create_strip_zones(mddev); |
299 | if (!conf) | 323 | if (ret < 0) |
300 | goto out; | 324 | return ret; |
301 | mddev->private = (void *)conf; | ||
302 | |||
303 | conf->strip_zone = NULL; | ||
304 | conf->devlist = NULL; | ||
305 | if (create_strip_zones (mddev)) | ||
306 | goto out_free_conf; | ||
307 | 325 | ||
308 | /* calculate array device size */ | 326 | /* calculate array device size */ |
309 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); | 327 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); |
310 | 328 | ||
311 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", | 329 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", |
312 | (unsigned long long)mddev->array_sectors); | 330 | (unsigned long long)mddev->array_sectors); |
313 | printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", | ||
314 | (unsigned long long)conf->spacing); | ||
315 | { | ||
316 | sector_t s = raid0_size(mddev, 0, 0); | ||
317 | sector_t space = conf->spacing; | ||
318 | int round; | ||
319 | conf->sector_shift = 0; | ||
320 | if (sizeof(sector_t) > sizeof(u32)) { | ||
321 | /*shift down space and s so that sector_div will work */ | ||
322 | while (space > (sector_t) (~(u32)0)) { | ||
323 | s >>= 1; | ||
324 | space >>= 1; | ||
325 | s += 1; /* force round-up */ | ||
326 | conf->sector_shift++; | ||
327 | } | ||
328 | } | ||
329 | round = sector_div(s, (u32)space) ? 1 : 0; | ||
330 | nb_zone = s + round; | ||
331 | } | ||
332 | printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); | ||
333 | |||
334 | printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", | ||
335 | nb_zone*sizeof(struct strip_zone*)); | ||
336 | conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); | ||
337 | if (!conf->hash_table) | ||
338 | goto out_free_conf; | ||
339 | sectors = conf->strip_zone[cur].sectors; | ||
340 | |||
341 | conf->hash_table[0] = conf->strip_zone + cur; | ||
342 | for (i=1; i< nb_zone; i++) { | ||
343 | while (sectors <= conf->spacing) { | ||
344 | cur++; | ||
345 | sectors += conf->strip_zone[cur].sectors; | ||
346 | } | ||
347 | sectors -= conf->spacing; | ||
348 | conf->hash_table[i] = conf->strip_zone + cur; | ||
349 | } | ||
350 | if (conf->sector_shift) { | ||
351 | conf->spacing >>= conf->sector_shift; | ||
352 | /* round spacing up so when we divide by it, we | ||
353 | * err on the side of too-low, which is safest | ||
354 | */ | ||
355 | conf->spacing++; | ||
356 | } | ||
357 | |||
358 | /* calculate the max read-ahead size. | 331 | /* calculate the max read-ahead size. |
359 | * For read-ahead of large files to be effective, we need to | 332 | * For read-ahead of large files to be effective, we need to |
360 | * readahead at least twice a whole stripe. i.e. number of devices | 333 | * readahead at least twice a whole stripe. i.e. number of devices |
@@ -365,48 +338,107 @@ static int raid0_run (mddev_t *mddev) | |||
365 | * chunksize should be used in that case. | 338 | * chunksize should be used in that case. |
366 | */ | 339 | */ |
367 | { | 340 | { |
368 | int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; | 341 | int stripe = mddev->raid_disks * |
342 | (mddev->chunk_sectors << 9) / PAGE_SIZE; | ||
369 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) | 343 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) |
370 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 344 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
371 | } | 345 | } |
372 | 346 | ||
373 | |||
374 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 347 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
348 | dump_zones(mddev); | ||
375 | return 0; | 349 | return 0; |
350 | } | ||
376 | 351 | ||
377 | out_free_conf: | 352 | static int raid0_stop(mddev_t *mddev) |
353 | { | ||
354 | raid0_conf_t *conf = mddev->private; | ||
355 | |||
356 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
378 | kfree(conf->strip_zone); | 357 | kfree(conf->strip_zone); |
379 | kfree(conf->devlist); | 358 | kfree(conf->devlist); |
380 | kfree(conf); | 359 | kfree(conf); |
381 | mddev->private = NULL; | 360 | mddev->private = NULL; |
382 | out: | 361 | return 0; |
383 | return -ENOMEM; | ||
384 | } | 362 | } |
385 | 363 | ||
386 | static int raid0_stop (mddev_t *mddev) | 364 | /* Find the zone which holds a particular offset |
365 | * Update *sectorp to be an offset in that zone | ||
366 | */ | ||
367 | static struct strip_zone *find_zone(struct raid0_private_data *conf, | ||
368 | sector_t *sectorp) | ||
387 | { | 369 | { |
388 | raid0_conf_t *conf = mddev_to_conf(mddev); | 370 | int i; |
371 | struct strip_zone *z = conf->strip_zone; | ||
372 | sector_t sector = *sectorp; | ||
373 | |||
374 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
375 | if (sector < z[i].zone_end) { | ||
376 | if (i) | ||
377 | *sectorp = sector - z[i-1].zone_end; | ||
378 | return z + i; | ||
379 | } | ||
380 | BUG(); | ||
381 | } | ||
389 | 382 | ||
390 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 383 | /* |
391 | kfree(conf->hash_table); | 384 | * remaps the bio to the target device. we separate two flows. |
392 | conf->hash_table = NULL; | 385 | * power 2 flow and a general flow for the sake of perfromance |
393 | kfree(conf->strip_zone); | 386 | */ |
394 | conf->strip_zone = NULL; | 387 | static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, |
395 | kfree(conf); | 388 | sector_t sector, sector_t *sector_offset) |
396 | mddev->private = NULL; | 389 | { |
390 | unsigned int sect_in_chunk; | ||
391 | sector_t chunk; | ||
392 | raid0_conf_t *conf = mddev->private; | ||
393 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
394 | |||
395 | if (is_power_of_2(chunk_sects)) { | ||
396 | int chunksect_bits = ffz(~chunk_sects); | ||
397 | /* find the sector offset inside the chunk */ | ||
398 | sect_in_chunk = sector & (chunk_sects - 1); | ||
399 | sector >>= chunksect_bits; | ||
400 | /* chunk in zone */ | ||
401 | chunk = *sector_offset; | ||
402 | /* quotient is the chunk in real device*/ | ||
403 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
404 | } else{ | ||
405 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
406 | chunk = *sector_offset; | ||
407 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
408 | } | ||
409 | /* | ||
410 | * position the bio over the real device | ||
411 | * real sector = chunk in device + starting of zone | ||
412 | * + the position in the chunk | ||
413 | */ | ||
414 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
415 | return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks | ||
416 | + sector_div(sector, zone->nb_dev)]; | ||
417 | } | ||
397 | 418 | ||
398 | return 0; | 419 | /* |
420 | * Is io distribute over 1 or more chunks ? | ||
421 | */ | ||
422 | static inline int is_io_in_chunk_boundary(mddev_t *mddev, | ||
423 | unsigned int chunk_sects, struct bio *bio) | ||
424 | { | ||
425 | if (likely(is_power_of_2(chunk_sects))) { | ||
426 | return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) | ||
427 | + (bio->bi_size >> 9)); | ||
428 | } else{ | ||
429 | sector_t sector = bio->bi_sector; | ||
430 | return chunk_sects >= (sector_div(sector, chunk_sects) | ||
431 | + (bio->bi_size >> 9)); | ||
432 | } | ||
399 | } | 433 | } |
400 | 434 | ||
401 | static int raid0_make_request (struct request_queue *q, struct bio *bio) | 435 | static int raid0_make_request(struct request_queue *q, struct bio *bio) |
402 | { | 436 | { |
403 | mddev_t *mddev = q->queuedata; | 437 | mddev_t *mddev = q->queuedata; |
404 | unsigned int sect_in_chunk, chunksect_bits, chunk_sects; | 438 | unsigned int chunk_sects; |
405 | raid0_conf_t *conf = mddev_to_conf(mddev); | 439 | sector_t sector_offset; |
406 | struct strip_zone *zone; | 440 | struct strip_zone *zone; |
407 | mdk_rdev_t *tmp_dev; | 441 | mdk_rdev_t *tmp_dev; |
408 | sector_t chunk; | ||
409 | sector_t sector, rsect; | ||
410 | const int rw = bio_data_dir(bio); | 442 | const int rw = bio_data_dir(bio); |
411 | int cpu; | 443 | int cpu; |
412 | 444 | ||
@@ -421,11 +453,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
421 | bio_sectors(bio)); | 453 | bio_sectors(bio)); |
422 | part_stat_unlock(); | 454 | part_stat_unlock(); |
423 | 455 | ||
424 | chunk_sects = mddev->chunk_size >> 9; | 456 | chunk_sects = mddev->chunk_sectors; |
425 | chunksect_bits = ffz(~chunk_sects); | 457 | if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { |
426 | sector = bio->bi_sector; | 458 | sector_t sector = bio->bi_sector; |
427 | |||
428 | if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { | ||
429 | struct bio_pair *bp; | 459 | struct bio_pair *bp; |
430 | /* Sanity check -- queue functions should prevent this happening */ | 460 | /* Sanity check -- queue functions should prevent this happening */ |
431 | if (bio->bi_vcnt != 1 || | 461 | if (bio->bi_vcnt != 1 || |
@@ -434,7 +464,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
434 | /* This is a one page bio that upper layers | 464 | /* This is a one page bio that upper layers |
435 | * refuse to split for us, so we need to split it. | 465 | * refuse to split for us, so we need to split it. |
436 | */ | 466 | */ |
437 | bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); | 467 | if (likely(is_power_of_2(chunk_sects))) |
468 | bp = bio_split(bio, chunk_sects - (sector & | ||
469 | (chunk_sects-1))); | ||
470 | else | ||
471 | bp = bio_split(bio, chunk_sects - | ||
472 | sector_div(sector, chunk_sects)); | ||
438 | if (raid0_make_request(q, &bp->bio1)) | 473 | if (raid0_make_request(q, &bp->bio1)) |
439 | generic_make_request(&bp->bio1); | 474 | generic_make_request(&bp->bio1); |
440 | if (raid0_make_request(q, &bp->bio2)) | 475 | if (raid0_make_request(q, &bp->bio2)) |
@@ -443,34 +478,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
443 | bio_pair_release(bp); | 478 | bio_pair_release(bp); |
444 | return 0; | 479 | return 0; |
445 | } | 480 | } |
446 | |||
447 | |||
448 | { | ||
449 | sector_t x = sector >> conf->sector_shift; | ||
450 | sector_div(x, (u32)conf->spacing); | ||
451 | zone = conf->hash_table[x]; | ||
452 | } | ||
453 | 481 | ||
454 | while (sector >= zone->zone_start + zone->sectors) | 482 | sector_offset = bio->bi_sector; |
455 | zone++; | 483 | zone = find_zone(mddev->private, §or_offset); |
456 | 484 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, | |
457 | sect_in_chunk = bio->bi_sector & (chunk_sects - 1); | 485 | §or_offset); |
458 | |||
459 | |||
460 | { | ||
461 | sector_t x = (sector - zone->zone_start) >> chunksect_bits; | ||
462 | |||
463 | sector_div(x, zone->nb_dev); | ||
464 | chunk = x; | ||
465 | |||
466 | x = sector >> chunksect_bits; | ||
467 | tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; | ||
468 | } | ||
469 | rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; | ||
470 | |||
471 | bio->bi_bdev = tmp_dev->bdev; | 486 | bio->bi_bdev = tmp_dev->bdev; |
472 | bio->bi_sector = rsect + tmp_dev->data_offset; | 487 | bio->bi_sector = sector_offset + zone->dev_start + |
473 | 488 | tmp_dev->data_offset; | |
474 | /* | 489 | /* |
475 | * Let the main block layer submit the IO and resolve recursion: | 490 | * Let the main block layer submit the IO and resolve recursion: |
476 | */ | 491 | */ |
@@ -485,31 +500,35 @@ bad_map: | |||
485 | return 0; | 500 | return 0; |
486 | } | 501 | } |
487 | 502 | ||
488 | static void raid0_status (struct seq_file *seq, mddev_t *mddev) | 503 | static void raid0_status(struct seq_file *seq, mddev_t *mddev) |
489 | { | 504 | { |
490 | #undef MD_DEBUG | 505 | #undef MD_DEBUG |
491 | #ifdef MD_DEBUG | 506 | #ifdef MD_DEBUG |
492 | int j, k, h; | 507 | int j, k, h; |
493 | char b[BDEVNAME_SIZE]; | 508 | char b[BDEVNAME_SIZE]; |
494 | raid0_conf_t *conf = mddev_to_conf(mddev); | 509 | raid0_conf_t *conf = mddev->private; |
495 | 510 | ||
511 | sector_t zone_size; | ||
512 | sector_t zone_start = 0; | ||
496 | h = 0; | 513 | h = 0; |
514 | |||
497 | for (j = 0; j < conf->nr_strip_zones; j++) { | 515 | for (j = 0; j < conf->nr_strip_zones; j++) { |
498 | seq_printf(seq, " z%d", j); | 516 | seq_printf(seq, " z%d", j); |
499 | if (conf->hash_table[h] == conf->strip_zone+j) | ||
500 | seq_printf(seq, "(h%d)", h++); | ||
501 | seq_printf(seq, "=["); | 517 | seq_printf(seq, "=["); |
502 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | 518 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) |
503 | seq_printf(seq, "%s/", bdevname( | 519 | seq_printf(seq, "%s/", bdevname( |
504 | conf->strip_zone[j].dev[k]->bdev,b)); | 520 | conf->devlist[j*mddev->raid_disks + k] |
505 | 521 | ->bdev, b)); | |
506 | seq_printf(seq, "] zs=%d ds=%d s=%d\n", | 522 | |
507 | conf->strip_zone[j].zone_start, | 523 | zone_size = conf->strip_zone[j].zone_end - zone_start; |
508 | conf->strip_zone[j].dev_start, | 524 | seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", |
509 | conf->strip_zone[j].sectors); | 525 | (unsigned long long)zone_start>>1, |
526 | (unsigned long long)conf->strip_zone[j].dev_start>>1, | ||
527 | (unsigned long long)zone_size>>1); | ||
528 | zone_start = conf->strip_zone[j].zone_end; | ||
510 | } | 529 | } |
511 | #endif | 530 | #endif |
512 | seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); | 531 | seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); |
513 | return; | 532 | return; |
514 | } | 533 | } |
515 | 534 | ||