diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2010-02-11 06:01:39 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2010-02-28 06:55:53 -0500 |
commit | 50a76fd3c352ed2740eba01512efcfceee0703be (patch) | |
tree | 425416e068648e225b41327a120d00bbddd16d0e /fs/exofs | |
parent | b367e78bd1c7af4c018ce98b1f6d3e001aba895a (diff) |
exofs: groups support
* _calc_stripe_info() changes to accommodate for grouping
calculations. Returns additional information
* old _prepare_pages() becomes _prepare_one_group()
which stores pages belonging to one device group.
* New _prepare_for_striping iterates on all groups calling
_prepare_one_group().
* Enable mounting of groups data_maps (group_width != 0)
[QUESTION]
what is faster A or B;
A. x += stride;
x = x % width + first_x;
B x += stride
if (x < last_x)
x = first_x;
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs')
-rw-r--r-- | fs/exofs/exofs.h | 3 | ||||
-rw-r--r-- | fs/exofs/ios.c | 129 | ||||
-rw-r--r-- | fs/exofs/super.c | 46 |
3 files changed, 141 insertions, 37 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index acfebd36de83..59b8bf2825c7 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -63,6 +63,8 @@ struct exofs_layout { | |||
63 | unsigned mirrors_p1; | 63 | unsigned mirrors_p1; |
64 | 64 | ||
65 | unsigned group_width; | 65 | unsigned group_width; |
66 | u64 group_depth; | ||
67 | unsigned group_count; | ||
66 | 68 | ||
67 | enum exofs_inode_layout_gen_functions lay_func; | 69 | enum exofs_inode_layout_gen_functions lay_func; |
68 | 70 | ||
@@ -132,6 +134,7 @@ struct exofs_io_state { | |||
132 | struct page **pages; | 134 | struct page **pages; |
133 | unsigned nr_pages; | 135 | unsigned nr_pages; |
134 | unsigned pgbase; | 136 | unsigned pgbase; |
137 | unsigned pages_consumed; | ||
135 | 138 | ||
136 | /* Attributes */ | 139 | /* Attributes */ |
137 | unsigned in_attr_len; | 140 | unsigned in_attr_len; |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index d28febdf54ab..5293bc411d17 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -262,25 +262,50 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
262 | /* | 262 | /* |
263 | * L - logical offset into the file | 263 | * L - logical offset into the file |
264 | * | 264 | * |
265 | * U - The number of bytes in a full stripe | 265 | * U - The number of bytes in a stripe within a group |
266 | * | 266 | * |
267 | * U = stripe_unit * group_width | 267 | * U = stripe_unit * group_width |
268 | * | 268 | * |
269 | * N - The stripe number | 269 | * T - The number of bytes striped within a group of component objects |
270 | * (before advancing to the next group) | ||
270 | * | 271 | * |
271 | * N = L / U | 272 | * T = stripe_unit * group_width * group_depth |
273 | * | ||
274 | * S - The number of bytes striped across all component objects | ||
275 | * before the pattern repeats | ||
276 | * | ||
277 | * S = stripe_unit * group_width * group_depth * group_count | ||
278 | * | ||
279 | * M - The "major" (i.e., across all components) stripe number | ||
280 | * | ||
281 | * M = L / S | ||
282 | * | ||
283 | * G - Counts the groups from the beginning of the major stripe | ||
284 | * | ||
285 | * G = (L - (M * S)) / T [or (L % S) / T] | ||
286 | * | ||
287 | * H - The byte offset within the group | ||
288 | * | ||
289 | * H = (L - (M * S)) % T [or (L % S) % T] | ||
290 | * | ||
291 | * N - The "minor" (i.e., across the group) stripe number | ||
292 | * | ||
293 | * N = H / U | ||
272 | * | 294 | * |
273 | * C - The component index coresponding to L | 295 | * C - The component index coresponding to L |
274 | * | 296 | * |
275 | * C = (L - (N*U)) / stripe_unit | 297 | * C = (H - (N * U)) / stripe_unit + G * group_width |
298 | * [or (L % U) / stripe_unit + G * group_width] | ||
276 | * | 299 | * |
277 | * O - The component offset coresponding to L | 300 | * O - The component offset coresponding to L |
278 | * | 301 | * |
279 | * (N*stripe_unit)+(L%stripe_unit) | 302 | * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit |
280 | */ | 303 | */ |
281 | |||
282 | struct _striping_info { | 304 | struct _striping_info { |
283 | u64 obj_offset; | 305 | u64 obj_offset; |
306 | u64 group_length; | ||
307 | u64 total_group_length; | ||
308 | u64 Major; | ||
284 | unsigned dev; | 309 | unsigned dev; |
285 | unsigned unit_off; | 310 | unsigned unit_off; |
286 | }; | 311 | }; |
@@ -290,15 +315,35 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | |||
290 | { | 315 | { |
291 | u32 stripe_unit = ios->layout->stripe_unit; | 316 | u32 stripe_unit = ios->layout->stripe_unit; |
292 | u32 group_width = ios->layout->group_width; | 317 | u32 group_width = ios->layout->group_width; |
318 | u64 group_depth = ios->layout->group_depth; | ||
319 | |||
293 | u32 U = stripe_unit * group_width; | 320 | u32 U = stripe_unit * group_width; |
321 | u64 T = U * group_depth; | ||
322 | u64 S = T * ios->layout->group_count; | ||
323 | u64 M = div64_u64(file_offset, S); | ||
324 | |||
325 | /* | ||
326 | G = (L - (M * S)) / T | ||
327 | H = (L - (M * S)) % T | ||
328 | */ | ||
329 | u64 LmodS = file_offset - M * S; | ||
330 | u32 G = div64_u64(LmodS, T); | ||
331 | u64 H = LmodS - G * T; | ||
332 | |||
333 | u32 N = div_u64(H, U); | ||
334 | |||
335 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | ||
336 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | ||
337 | si->dev *= ios->layout->mirrors_p1; | ||
294 | 338 | ||
295 | u32 LmodU; | 339 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); |
296 | u64 N = div_u64_rem(file_offset, U, &LmodU); | ||
297 | 340 | ||
298 | si->unit_off = LmodU % stripe_unit; | 341 | si->obj_offset = si->unit_off + (N * stripe_unit) + |
299 | si->obj_offset = N * stripe_unit + si->unit_off; | 342 | (M * group_depth * stripe_unit); |
300 | si->dev = LmodU / stripe_unit; | 343 | |
301 | si->dev *= ios->layout->mirrors_p1; | 344 | si->group_length = T - H; |
345 | si->total_group_length = T; | ||
346 | si->Major = M; | ||
302 | } | 347 | } |
303 | 348 | ||
304 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | 349 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, |
@@ -345,16 +390,17 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
345 | return 0; | 390 | return 0; |
346 | } | 391 | } |
347 | 392 | ||
348 | static int _prepare_pages(struct exofs_io_state *ios, | 393 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, |
349 | struct _striping_info *si) | 394 | struct _striping_info *si, unsigned first_comp) |
350 | { | 395 | { |
351 | u64 length = ios->length; | ||
352 | unsigned stripe_unit = ios->layout->stripe_unit; | 396 | unsigned stripe_unit = ios->layout->stripe_unit; |
353 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | 397 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
398 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | ||
354 | unsigned dev = si->dev; | 399 | unsigned dev = si->dev; |
355 | unsigned comp = 0; | 400 | unsigned first_dev = dev - (dev % devs_in_group); |
356 | unsigned stripes = 0; | 401 | unsigned comp = first_comp + (dev - first_dev); |
357 | unsigned cur_pg = 0; | 402 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; |
403 | unsigned cur_pg = ios->pages_consumed; | ||
358 | int ret = 0; | 404 | int ret = 0; |
359 | 405 | ||
360 | while (length) { | 406 | while (length) { |
@@ -377,10 +423,11 @@ static int _prepare_pages(struct exofs_io_state *ios, | |||
377 | cur_len = stripe_unit; | 423 | cur_len = stripe_unit; |
378 | } | 424 | } |
379 | 425 | ||
380 | stripes++; | 426 | if (max_comp < comp) |
427 | max_comp = comp; | ||
381 | 428 | ||
382 | dev += mirrors_p1; | 429 | dev += mirrors_p1; |
383 | dev %= ios->layout->s_numdevs; | 430 | dev = (dev % devs_in_group) + first_dev; |
384 | } else { | 431 | } else { |
385 | cur_len = stripe_unit; | 432 | cur_len = stripe_unit; |
386 | } | 433 | } |
@@ -393,18 +440,24 @@ static int _prepare_pages(struct exofs_io_state *ios, | |||
393 | goto out; | 440 | goto out; |
394 | 441 | ||
395 | comp += mirrors_p1; | 442 | comp += mirrors_p1; |
396 | comp %= ios->layout->s_numdevs; | 443 | comp = (comp % devs_in_group) + first_comp; |
397 | 444 | ||
398 | length -= cur_len; | 445 | length -= cur_len; |
399 | } | 446 | } |
400 | out: | 447 | out: |
401 | ios->numdevs = stripes * mirrors_p1; | 448 | ios->numdevs = max_comp + mirrors_p1; |
449 | ios->pages_consumed = cur_pg; | ||
402 | return ret; | 450 | return ret; |
403 | } | 451 | } |
404 | 452 | ||
405 | static int _prepare_for_striping(struct exofs_io_state *ios) | 453 | static int _prepare_for_striping(struct exofs_io_state *ios) |
406 | { | 454 | { |
455 | u64 length = ios->length; | ||
407 | struct _striping_info si; | 456 | struct _striping_info si; |
457 | unsigned devs_in_group = ios->layout->group_width * | ||
458 | ios->layout->mirrors_p1; | ||
459 | unsigned first_comp = 0; | ||
460 | int ret = 0; | ||
408 | 461 | ||
409 | _calc_stripe_info(ios, ios->offset, &si); | 462 | _calc_stripe_info(ios, ios->offset, &si); |
410 | 463 | ||
@@ -424,7 +477,31 @@ static int _prepare_for_striping(struct exofs_io_state *ios) | |||
424 | return 0; | 477 | return 0; |
425 | } | 478 | } |
426 | 479 | ||
427 | return _prepare_pages(ios, &si); | 480 | while (length) { |
481 | if (length < si.group_length) | ||
482 | si.group_length = length; | ||
483 | |||
484 | ret = _prepare_one_group(ios, si.group_length, &si, first_comp); | ||
485 | if (unlikely(ret)) | ||
486 | goto out; | ||
487 | |||
488 | length -= si.group_length; | ||
489 | |||
490 | si.group_length = si.total_group_length; | ||
491 | si.unit_off = 0; | ||
492 | ++si.Major; | ||
493 | si.obj_offset = si.Major * ios->layout->stripe_unit * | ||
494 | ios->layout->group_depth; | ||
495 | |||
496 | si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group; | ||
497 | si.dev %= ios->layout->s_numdevs; | ||
498 | |||
499 | first_comp += devs_in_group; | ||
500 | first_comp %= ios->layout->s_numdevs; | ||
501 | } | ||
502 | |||
503 | out: | ||
504 | return ret; | ||
428 | } | 505 | } |
429 | 506 | ||
430 | int exofs_sbi_create(struct exofs_io_state *ios) | 507 | int exofs_sbi_create(struct exofs_io_state *ios) |
@@ -482,6 +559,9 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
482 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | 559 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; |
483 | int ret = 0; | 560 | int ret = 0; |
484 | 561 | ||
562 | if (ios->pages && !master_dev->length) | ||
563 | return 0; /* Just an empty slot */ | ||
564 | |||
485 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 565 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
486 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 566 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
487 | struct osd_request *or; | 567 | struct osd_request *or; |
@@ -580,6 +660,9 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) | |||
580 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 660 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
581 | unsigned first_dev = (unsigned)ios->obj.id; | 661 | unsigned first_dev = (unsigned)ios->obj.id; |
582 | 662 | ||
663 | if (ios->pages && !per_dev->length) | ||
664 | return 0; /* Just an empty slot */ | ||
665 | |||
583 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; | 666 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; |
584 | or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); | 667 | or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); |
585 | if (unlikely(!or)) { | 668 | if (unlikely(!or)) { |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8f4e4b37a578..6cf5e4e84d61 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -323,11 +323,7 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
323 | sbi->data_map.odm_raid_algorithm = | 323 | sbi->data_map.odm_raid_algorithm = |
324 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); | 324 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); |
325 | 325 | ||
326 | /* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */ | 326 | /* FIXME: Only raid0 for now. if not so, do not mount */ |
327 | if (sbi->data_map.odm_group_width || sbi->data_map.odm_group_depth) { | ||
328 | EXOFS_ERR("Group width/depth not supported\n"); | ||
329 | return -EINVAL; | ||
330 | } | ||
331 | if (sbi->data_map.odm_num_comps != numdevs) { | 327 | if (sbi->data_map.odm_num_comps != numdevs) { |
332 | EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", | 328 | EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", |
333 | sbi->data_map.odm_num_comps, numdevs); | 329 | sbi->data_map.odm_num_comps, numdevs); |
@@ -343,14 +339,6 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
343 | return -EINVAL; | 339 | return -EINVAL; |
344 | } | 340 | } |
345 | 341 | ||
346 | stripe_length = sbi->data_map.odm_stripe_unit * | ||
347 | (numdevs / (sbi->data_map.odm_mirror_cnt + 1)); | ||
348 | if (stripe_length >= (1ULL << 32)) { | ||
349 | EXOFS_ERR("Total Stripe length(0x%llx)" | ||
350 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
351 | return -EINVAL; | ||
352 | } | ||
353 | |||
354 | if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { | 342 | if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { |
355 | EXOFS_ERR("Stripe Unit(0x%llx)" | 343 | EXOFS_ERR("Stripe Unit(0x%llx)" |
356 | " must be Multples of PAGE_SIZE(0x%lx)\n", | 344 | " must be Multples of PAGE_SIZE(0x%lx)\n", |
@@ -360,8 +348,36 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
360 | 348 | ||
361 | sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; | 349 | sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; |
362 | sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; | 350 | sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; |
363 | sbi->layout.group_width = sbi->data_map.odm_num_comps / | 351 | |
352 | if (sbi->data_map.odm_group_width) { | ||
353 | sbi->layout.group_width = sbi->data_map.odm_group_width; | ||
354 | sbi->layout.group_depth = sbi->data_map.odm_group_depth; | ||
355 | if (!sbi->layout.group_depth) { | ||
356 | EXOFS_ERR("group_depth == 0 && group_width != 0\n"); | ||
357 | return -EINVAL; | ||
358 | } | ||
359 | sbi->layout.group_count = sbi->data_map.odm_num_comps / | ||
360 | sbi->layout.mirrors_p1 / | ||
361 | sbi->data_map.odm_group_width; | ||
362 | } else { | ||
363 | if (sbi->data_map.odm_group_depth) { | ||
364 | printk(KERN_NOTICE "Warning: group_depth ignored " | ||
365 | "group_width == 0 && group_depth == %d\n", | ||
366 | sbi->data_map.odm_group_depth); | ||
367 | sbi->data_map.odm_group_depth = 0; | ||
368 | } | ||
369 | sbi->layout.group_width = sbi->data_map.odm_num_comps / | ||
364 | sbi->layout.mirrors_p1; | 370 | sbi->layout.mirrors_p1; |
371 | sbi->layout.group_depth = -1; | ||
372 | sbi->layout.group_count = 1; | ||
373 | } | ||
374 | |||
375 | stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit; | ||
376 | if (stripe_length >= (1ULL << 32)) { | ||
377 | EXOFS_ERR("Total Stripe length(0x%llx)" | ||
378 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
379 | return -EINVAL; | ||
380 | } | ||
365 | 381 | ||
366 | return 0; | 382 | return 0; |
367 | } | 383 | } |
@@ -540,6 +556,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
540 | sbi->layout.stripe_unit = PAGE_SIZE; | 556 | sbi->layout.stripe_unit = PAGE_SIZE; |
541 | sbi->layout.mirrors_p1 = 1; | 557 | sbi->layout.mirrors_p1 = 1; |
542 | sbi->layout.group_width = 1; | 558 | sbi->layout.group_width = 1; |
559 | sbi->layout.group_depth = -1; | ||
560 | sbi->layout.group_count = 1; | ||
543 | sbi->layout.s_ods[0] = od; | 561 | sbi->layout.s_ods[0] = od; |
544 | sbi->layout.s_numdevs = 1; | 562 | sbi->layout.s_numdevs = 1; |
545 | sbi->layout.s_pid = opts->pid; | 563 | sbi->layout.s_pid = opts->pid; |