diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2009-05-22 17:17:53 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-05-22 17:22:55 -0400 |
commit | c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 (patch) | |
tree | a83f7540cc894caafe74db911cba3998d6a9a164 /block/blk-settings.c | |
parent | cd43e26f071524647e660706b784ebcbefbd2e44 (diff) |
block: Export I/O topology for block devices and partitions
To support devices with physical block sizes bigger than 512 bytes we
need to ensure proper alignment. This patch adds support for exposing
I/O topology characteristics as devices are stacked.
logical_block_size is the smallest unit the device can address.
physical_block_size indicates the smallest I/O the device can write
without incurring a read-modify-write penalty.
The io_min parameter is the smallest preferred I/O size reported by
the device. In many cases this is the same as the physical block
size. However, the io_min parameter can be scaled up when stacking
(RAID5 chunk size > physical block size).
The io_opt characteristic indicates the optimal I/O size reported by
the device. This is usually the stripe width for arrays.
The alignment_offset parameter indicates the number of bytes the start
of the device/partition is offset from the device's natural alignment.
Partition tools and MD/DM utilities can use this to pad their offsets
so filesystems start on proper boundaries.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block/blk-settings.c')
-rw-r--r-- | block/blk-settings.c | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c index b0f547cecfb8..5649f34adb40 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); | |||
309 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) | 309 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) |
310 | { | 310 | { |
311 | q->limits.logical_block_size = size; | 311 | q->limits.logical_block_size = size; |
312 | |||
313 | if (q->limits.physical_block_size < size) | ||
314 | q->limits.physical_block_size = size; | ||
315 | |||
316 | if (q->limits.io_min < q->limits.physical_block_size) | ||
317 | q->limits.io_min = q->limits.physical_block_size; | ||
312 | } | 318 | } |
313 | EXPORT_SYMBOL(blk_queue_logical_block_size); | 319 | EXPORT_SYMBOL(blk_queue_logical_block_size); |
314 | 320 | ||
321 | /** | ||
322 | * blk_queue_physical_block_size - set physical block size for the queue | ||
323 | * @q: the request queue for the device | ||
324 | * @size: the physical block size, in bytes | ||
325 | * | ||
326 | * Description: | ||
327 | * This should be set to the lowest possible sector size that the | ||
328 | * hardware can operate on without reverting to read-modify-write | ||
329 | * operations. | ||
330 | */ | ||
331 | void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) | ||
332 | { | ||
333 | q->limits.physical_block_size = size; | ||
334 | |||
335 | if (q->limits.physical_block_size < q->limits.logical_block_size) | ||
336 | q->limits.physical_block_size = q->limits.logical_block_size; | ||
337 | |||
338 | if (q->limits.io_min < q->limits.physical_block_size) | ||
339 | q->limits.io_min = q->limits.physical_block_size; | ||
340 | } | ||
341 | EXPORT_SYMBOL(blk_queue_physical_block_size); | ||
342 | |||
343 | /** | ||
344 | * blk_queue_alignment_offset - set physical block alignment offset | ||
345 | * @q: the request queue for the device | ||
346 | * @alignment: alignment offset in bytes | ||
347 | * | ||
348 | * Description: | ||
349 | * Some devices are naturally misaligned to compensate for things like | ||
350 | * the legacy DOS partition table 63-sector offset. Low-level drivers | ||
351 | * should call this function for devices whose first sector is not | ||
352 | * naturally aligned. | ||
353 | */ | ||
354 | void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) | ||
355 | { | ||
356 | q->limits.alignment_offset = | ||
357 | offset & (q->limits.physical_block_size - 1); | ||
358 | q->limits.misaligned = 0; | ||
359 | } | ||
360 | EXPORT_SYMBOL(blk_queue_alignment_offset); | ||
361 | |||
362 | /** | ||
363 | * blk_queue_io_min - set minimum request size for the queue | ||
364 | * @q: the request queue for the device | ||
365 | * @io_min: smallest I/O size in bytes | ||
366 | * | ||
367 | * Description: | ||
368 | * Some devices have an internal block size bigger than the reported | ||
369 | * hardware sector size. This function can be used to signal the | ||
370 | * smallest I/O the device can perform without incurring a performance | ||
371 | * penalty. | ||
372 | */ | ||
373 | void blk_queue_io_min(struct request_queue *q, unsigned int min) | ||
374 | { | ||
375 | q->limits.io_min = min; | ||
376 | |||
377 | if (q->limits.io_min < q->limits.logical_block_size) | ||
378 | q->limits.io_min = q->limits.logical_block_size; | ||
379 | |||
380 | if (q->limits.io_min < q->limits.physical_block_size) | ||
381 | q->limits.io_min = q->limits.physical_block_size; | ||
382 | } | ||
383 | EXPORT_SYMBOL(blk_queue_io_min); | ||
384 | |||
385 | /** | ||
386 | * blk_queue_io_opt - set optimal request size for the queue | ||
387 | * @q: the request queue for the device | ||
388 | * @io_opt: optimal request size in bytes | ||
389 | * | ||
390 | * Description: | ||
391 | * Drivers can call this function to set the preferred I/O request | ||
392 | * size for devices that report such a value. | ||
393 | */ | ||
394 | void blk_queue_io_opt(struct request_queue *q, unsigned int opt) | ||
395 | { | ||
396 | q->limits.io_opt = opt; | ||
397 | } | ||
398 | EXPORT_SYMBOL(blk_queue_io_opt); | ||
399 | |||
315 | /* | 400 | /* |
316 | * Returns the minimum that is _not_ zero, unless both are zero. | 401 | * Returns the minimum that is _not_ zero, unless both are zero. |
317 | */ | 402 | */ |
@@ -358,6 +443,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | |||
358 | EXPORT_SYMBOL(blk_queue_stack_limits); | 443 | EXPORT_SYMBOL(blk_queue_stack_limits); |
359 | 444 | ||
360 | /** | 445 | /** |
446 | * blk_stack_limits - adjust queue_limits for stacked devices | ||
447 | * @t: the stacking driver limits (top) | ||
448 | * @bdev: the underlying queue limits (bottom) | ||
449 | * @offset: offset to beginning of data within component device | ||
450 | * | ||
451 | * Description: | ||
452 | * Merges two queue_limit structs. Returns 0 if alignment didn't | ||
453 | * change. Returns -1 if adding the bottom device caused | ||
454 | * misalignment. | ||
455 | */ | ||
456 | int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | ||
457 | sector_t offset) | ||
458 | { | ||
459 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | ||
460 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | ||
461 | |||
462 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | ||
463 | b->seg_boundary_mask); | ||
464 | |||
465 | t->max_phys_segments = min_not_zero(t->max_phys_segments, | ||
466 | b->max_phys_segments); | ||
467 | |||
468 | t->max_hw_segments = min_not_zero(t->max_hw_segments, | ||
469 | b->max_hw_segments); | ||
470 | |||
471 | t->max_segment_size = min_not_zero(t->max_segment_size, | ||
472 | b->max_segment_size); | ||
473 | |||
474 | t->logical_block_size = max(t->logical_block_size, | ||
475 | b->logical_block_size); | ||
476 | |||
477 | t->physical_block_size = max(t->physical_block_size, | ||
478 | b->physical_block_size); | ||
479 | |||
480 | t->io_min = max(t->io_min, b->io_min); | ||
481 | t->no_cluster |= b->no_cluster; | ||
482 | |||
483 | /* Bottom device offset aligned? */ | ||
484 | if (offset && | ||
485 | (offset & (b->physical_block_size - 1)) != b->alignment_offset) { | ||
486 | t->misaligned = 1; | ||
487 | return -1; | ||
488 | } | ||
489 | |||
490 | /* If top has no alignment offset, inherit from bottom */ | ||
491 | if (!t->alignment_offset) | ||
492 | t->alignment_offset = | ||
493 | b->alignment_offset & (b->physical_block_size - 1); | ||
494 | |||
495 | /* Top device aligned on logical block boundary? */ | ||
496 | if (t->alignment_offset & (t->logical_block_size - 1)) { | ||
497 | t->misaligned = 1; | ||
498 | return -1; | ||
499 | } | ||
500 | |||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | /** | ||
505 | * disk_stack_limits - adjust queue limits for stacked drivers | ||
506 | * @t: MD/DM gendisk (top) | ||
507 | * @bdev: the underlying block device (bottom) | ||
508 | * @offset: offset to beginning of data within component device | ||
509 | * | ||
510 | * Description: | ||
511 | * Merges the limits for two queues. Returns 0 if alignment | ||
512 | * didn't change. Returns -1 if adding the bottom device caused | ||
513 | * misalignment. | ||
514 | */ | ||
515 | void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | ||
516 | sector_t offset) | ||
517 | { | ||
518 | struct request_queue *t = disk->queue; | ||
519 | struct request_queue *b = bdev_get_queue(bdev); | ||
520 | |||
521 | offset += get_start_sect(bdev) << 9; | ||
522 | |||
523 | if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { | ||
524 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; | ||
525 | |||
526 | disk_name(disk, 0, top); | ||
527 | bdevname(bdev, bottom); | ||
528 | |||
529 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", | ||
530 | top, bottom); | ||
531 | } | ||
532 | |||
533 | if (!t->queue_lock) | ||
534 | WARN_ON_ONCE(1); | ||
535 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | ||
536 | unsigned long flags; | ||
537 | |||
538 | spin_lock_irqsave(t->queue_lock, flags); | ||
539 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | ||
540 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | ||
541 | spin_unlock_irqrestore(t->queue_lock, flags); | ||
542 | } | ||
543 | } | ||
544 | EXPORT_SYMBOL(disk_stack_limits); | ||
545 | |||
546 | /** | ||
361 | * blk_queue_dma_pad - set pad mask | 547 | * blk_queue_dma_pad - set pad mask |
362 | * @q: the request queue for the device | 548 | * @q: the request queue for the device |
363 | * @mask: pad mask | 549 | * @mask: pad mask |