diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2009-05-22 17:17:53 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-05-22 17:22:55 -0400 |
commit | c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 (patch) | |
tree | a83f7540cc894caafe74db911cba3998d6a9a164 /block | |
parent | cd43e26f071524647e660706b784ebcbefbd2e44 (diff) |
block: Export I/O topology for block devices and partitions
To support devices with physical block sizes bigger than 512 bytes we
need to ensure proper alignment. This patch adds support for exposing
I/O topology characteristics as devices are stacked.
logical_block_size is the smallest unit the device can address.
physical_block_size indicates the smallest I/O the device can write
without incurring a read-modify-write penalty.
The io_min parameter is the smallest preferred I/O size reported by
the device. In many cases this is the same as the physical block
size. However, the io_min parameter can be scaled up when stacking
(RAID5 chunk size > physical block size).
The io_opt characteristic indicates the optimal I/O size reported by
the device. This is usually the stripe width for arrays.
The alignment_offset parameter indicates the number of bytes the start
of the device/partition is offset from the device's natural alignment.
Partition tools and MD/DM utilities can use this to pad their offsets
so filesystems start on proper boundaries.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-settings.c | 186 | ||||
-rw-r--r-- | block/blk-sysfs.c | 33 | ||||
-rw-r--r-- | block/genhd.c | 11 |
3 files changed, 230 insertions, 0 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c index b0f547cecfb8..5649f34adb40 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); | |||
309 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) | 309 | void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) |
310 | { | 310 | { |
311 | q->limits.logical_block_size = size; | 311 | q->limits.logical_block_size = size; |
312 | |||
313 | if (q->limits.physical_block_size < size) | ||
314 | q->limits.physical_block_size = size; | ||
315 | |||
316 | if (q->limits.io_min < q->limits.physical_block_size) | ||
317 | q->limits.io_min = q->limits.physical_block_size; | ||
312 | } | 318 | } |
313 | EXPORT_SYMBOL(blk_queue_logical_block_size); | 319 | EXPORT_SYMBOL(blk_queue_logical_block_size); |
314 | 320 | ||
321 | /** | ||
322 | * blk_queue_physical_block_size - set physical block size for the queue | ||
323 | * @q: the request queue for the device | ||
324 | * @size: the physical block size, in bytes | ||
325 | * | ||
326 | * Description: | ||
327 | * This should be set to the lowest possible sector size that the | ||
328 | * hardware can operate on without reverting to read-modify-write | ||
329 | * operations. | ||
330 | */ | ||
331 | void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) | ||
332 | { | ||
333 | q->limits.physical_block_size = size; | ||
334 | |||
335 | if (q->limits.physical_block_size < q->limits.logical_block_size) | ||
336 | q->limits.physical_block_size = q->limits.logical_block_size; | ||
337 | |||
338 | if (q->limits.io_min < q->limits.physical_block_size) | ||
339 | q->limits.io_min = q->limits.physical_block_size; | ||
340 | } | ||
341 | EXPORT_SYMBOL(blk_queue_physical_block_size); | ||
342 | |||
343 | /** | ||
344 | * blk_queue_alignment_offset - set physical block alignment offset | ||
345 | * @q: the request queue for the device | ||
346 | * @alignment: alignment offset in bytes | ||
347 | * | ||
348 | * Description: | ||
349 | * Some devices are naturally misaligned to compensate for things like | ||
350 | * the legacy DOS partition table 63-sector offset. Low-level drivers | ||
351 | * should call this function for devices whose first sector is not | ||
352 | * naturally aligned. | ||
353 | */ | ||
354 | void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) | ||
355 | { | ||
356 | q->limits.alignment_offset = | ||
357 | offset & (q->limits.physical_block_size - 1); | ||
358 | q->limits.misaligned = 0; | ||
359 | } | ||
360 | EXPORT_SYMBOL(blk_queue_alignment_offset); | ||
361 | |||
362 | /** | ||
363 | * blk_queue_io_min - set minimum request size for the queue | ||
364 | * @q: the request queue for the device | ||
365 | * @io_min: smallest I/O size in bytes | ||
366 | * | ||
367 | * Description: | ||
368 | * Some devices have an internal block size bigger than the reported | ||
369 | * hardware sector size. This function can be used to signal the | ||
370 | * smallest I/O the device can perform without incurring a performance | ||
371 | * penalty. | ||
372 | */ | ||
373 | void blk_queue_io_min(struct request_queue *q, unsigned int min) | ||
374 | { | ||
375 | q->limits.io_min = min; | ||
376 | |||
377 | if (q->limits.io_min < q->limits.logical_block_size) | ||
378 | q->limits.io_min = q->limits.logical_block_size; | ||
379 | |||
380 | if (q->limits.io_min < q->limits.physical_block_size) | ||
381 | q->limits.io_min = q->limits.physical_block_size; | ||
382 | } | ||
383 | EXPORT_SYMBOL(blk_queue_io_min); | ||
384 | |||
385 | /** | ||
386 | * blk_queue_io_opt - set optimal request size for the queue | ||
387 | * @q: the request queue for the device | ||
388 | * @io_opt: optimal request size in bytes | ||
389 | * | ||
390 | * Description: | ||
391 | * Drivers can call this function to set the preferred I/O request | ||
392 | * size for devices that report such a value. | ||
393 | */ | ||
394 | void blk_queue_io_opt(struct request_queue *q, unsigned int opt) | ||
395 | { | ||
396 | q->limits.io_opt = opt; | ||
397 | } | ||
398 | EXPORT_SYMBOL(blk_queue_io_opt); | ||
399 | |||
315 | /* | 400 | /* |
316 | * Returns the minimum that is _not_ zero, unless both are zero. | 401 | * Returns the minimum that is _not_ zero, unless both are zero. |
317 | */ | 402 | */ |
@@ -358,6 +443,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | |||
358 | EXPORT_SYMBOL(blk_queue_stack_limits); | 443 | EXPORT_SYMBOL(blk_queue_stack_limits); |
359 | 444 | ||
360 | /** | 445 | /** |
446 | * blk_stack_limits - adjust queue_limits for stacked devices | ||
447 | * @t: the stacking driver limits (top) | ||
448 | * @bdev: the underlying queue limits (bottom) | ||
449 | * @offset: offset to beginning of data within component device | ||
450 | * | ||
451 | * Description: | ||
452 | * Merges two queue_limit structs. Returns 0 if alignment didn't | ||
453 | * change. Returns -1 if adding the bottom device caused | ||
454 | * misalignment. | ||
455 | */ | ||
456 | int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | ||
457 | sector_t offset) | ||
458 | { | ||
459 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | ||
460 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | ||
461 | |||
462 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | ||
463 | b->seg_boundary_mask); | ||
464 | |||
465 | t->max_phys_segments = min_not_zero(t->max_phys_segments, | ||
466 | b->max_phys_segments); | ||
467 | |||
468 | t->max_hw_segments = min_not_zero(t->max_hw_segments, | ||
469 | b->max_hw_segments); | ||
470 | |||
471 | t->max_segment_size = min_not_zero(t->max_segment_size, | ||
472 | b->max_segment_size); | ||
473 | |||
474 | t->logical_block_size = max(t->logical_block_size, | ||
475 | b->logical_block_size); | ||
476 | |||
477 | t->physical_block_size = max(t->physical_block_size, | ||
478 | b->physical_block_size); | ||
479 | |||
480 | t->io_min = max(t->io_min, b->io_min); | ||
481 | t->no_cluster |= b->no_cluster; | ||
482 | |||
483 | /* Bottom device offset aligned? */ | ||
484 | if (offset && | ||
485 | (offset & (b->physical_block_size - 1)) != b->alignment_offset) { | ||
486 | t->misaligned = 1; | ||
487 | return -1; | ||
488 | } | ||
489 | |||
490 | /* If top has no alignment offset, inherit from bottom */ | ||
491 | if (!t->alignment_offset) | ||
492 | t->alignment_offset = | ||
493 | b->alignment_offset & (b->physical_block_size - 1); | ||
494 | |||
495 | /* Top device aligned on logical block boundary? */ | ||
496 | if (t->alignment_offset & (t->logical_block_size - 1)) { | ||
497 | t->misaligned = 1; | ||
498 | return -1; | ||
499 | } | ||
500 | |||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | /** | ||
505 | * disk_stack_limits - adjust queue limits for stacked drivers | ||
506 | * @t: MD/DM gendisk (top) | ||
507 | * @bdev: the underlying block device (bottom) | ||
508 | * @offset: offset to beginning of data within component device | ||
509 | * | ||
510 | * Description: | ||
511 | * Merges the limits for two queues. Returns 0 if alignment | ||
512 | * didn't change. Returns -1 if adding the bottom device caused | ||
513 | * misalignment. | ||
514 | */ | ||
515 | void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | ||
516 | sector_t offset) | ||
517 | { | ||
518 | struct request_queue *t = disk->queue; | ||
519 | struct request_queue *b = bdev_get_queue(bdev); | ||
520 | |||
521 | offset += get_start_sect(bdev) << 9; | ||
522 | |||
523 | if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { | ||
524 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; | ||
525 | |||
526 | disk_name(disk, 0, top); | ||
527 | bdevname(bdev, bottom); | ||
528 | |||
529 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", | ||
530 | top, bottom); | ||
531 | } | ||
532 | |||
533 | if (!t->queue_lock) | ||
534 | WARN_ON_ONCE(1); | ||
535 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | ||
536 | unsigned long flags; | ||
537 | |||
538 | spin_lock_irqsave(t->queue_lock, flags); | ||
539 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | ||
540 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | ||
541 | spin_unlock_irqrestore(t->queue_lock, flags); | ||
542 | } | ||
543 | } | ||
544 | EXPORT_SYMBOL(disk_stack_limits); | ||
545 | |||
546 | /** | ||
361 | * blk_queue_dma_pad - set pad mask | 547 | * blk_queue_dma_pad - set pad mask |
362 | * @q: the request queue for the device | 548 | * @q: the request queue for the device |
363 | * @mask: pad mask | 549 | * @mask: pad mask |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ccdadb8e204..9337e17f9110 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -105,6 +105,21 @@ static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page | |||
105 | return queue_var_show(queue_logical_block_size(q), page); | 105 | return queue_var_show(queue_logical_block_size(q), page); |
106 | } | 106 | } |
107 | 107 | ||
108 | static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page) | ||
109 | { | ||
110 | return queue_var_show(queue_physical_block_size(q), page); | ||
111 | } | ||
112 | |||
113 | static ssize_t queue_io_min_show(struct request_queue *q, char *page) | ||
114 | { | ||
115 | return queue_var_show(queue_io_min(q), page); | ||
116 | } | ||
117 | |||
118 | static ssize_t queue_io_opt_show(struct request_queue *q, char *page) | ||
119 | { | ||
120 | return queue_var_show(queue_io_opt(q), page); | ||
121 | } | ||
122 | |||
108 | static ssize_t | 123 | static ssize_t |
109 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 124 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
110 | { | 125 | { |
@@ -257,6 +272,21 @@ static struct queue_sysfs_entry queue_logical_block_size_entry = { | |||
257 | .show = queue_logical_block_size_show, | 272 | .show = queue_logical_block_size_show, |
258 | }; | 273 | }; |
259 | 274 | ||
275 | static struct queue_sysfs_entry queue_physical_block_size_entry = { | ||
276 | .attr = {.name = "physical_block_size", .mode = S_IRUGO }, | ||
277 | .show = queue_physical_block_size_show, | ||
278 | }; | ||
279 | |||
280 | static struct queue_sysfs_entry queue_io_min_entry = { | ||
281 | .attr = {.name = "minimum_io_size", .mode = S_IRUGO }, | ||
282 | .show = queue_io_min_show, | ||
283 | }; | ||
284 | |||
285 | static struct queue_sysfs_entry queue_io_opt_entry = { | ||
286 | .attr = {.name = "optimal_io_size", .mode = S_IRUGO }, | ||
287 | .show = queue_io_opt_show, | ||
288 | }; | ||
289 | |||
260 | static struct queue_sysfs_entry queue_nonrot_entry = { | 290 | static struct queue_sysfs_entry queue_nonrot_entry = { |
261 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, | 291 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, |
262 | .show = queue_nonrot_show, | 292 | .show = queue_nonrot_show, |
@@ -289,6 +319,9 @@ static struct attribute *default_attrs[] = { | |||
289 | &queue_iosched_entry.attr, | 319 | &queue_iosched_entry.attr, |
290 | &queue_hw_sector_size_entry.attr, | 320 | &queue_hw_sector_size_entry.attr, |
291 | &queue_logical_block_size_entry.attr, | 321 | &queue_logical_block_size_entry.attr, |
322 | &queue_physical_block_size_entry.attr, | ||
323 | &queue_io_min_entry.attr, | ||
324 | &queue_io_opt_entry.attr, | ||
292 | &queue_nonrot_entry.attr, | 325 | &queue_nonrot_entry.attr, |
293 | &queue_nomerges_entry.attr, | 326 | &queue_nomerges_entry.attr, |
294 | &queue_rq_affinity_entry.attr, | 327 | &queue_rq_affinity_entry.attr, |
diff --git a/block/genhd.c b/block/genhd.c index 1a4916e01732..fe7ccc0a618f 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev, | |||
852 | return sprintf(buf, "%x\n", disk->flags); | 852 | return sprintf(buf, "%x\n", disk->flags); |
853 | } | 853 | } |
854 | 854 | ||
855 | static ssize_t disk_alignment_offset_show(struct device *dev, | ||
856 | struct device_attribute *attr, | ||
857 | char *buf) | ||
858 | { | ||
859 | struct gendisk *disk = dev_to_disk(dev); | ||
860 | |||
861 | return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); | ||
862 | } | ||
863 | |||
855 | static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); | 864 | static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); |
856 | static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); | 865 | static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); |
857 | static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); | 866 | static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); |
858 | static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); | 867 | static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); |
859 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 868 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
869 | static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); | ||
860 | static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); | 870 | static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); |
861 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 871 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
862 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 872 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
@@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = { | |||
875 | &dev_attr_removable.attr, | 885 | &dev_attr_removable.attr, |
876 | &dev_attr_ro.attr, | 886 | &dev_attr_ro.attr, |
877 | &dev_attr_size.attr, | 887 | &dev_attr_size.attr, |
888 | &dev_attr_alignment_offset.attr, | ||
878 | &dev_attr_capability.attr, | 889 | &dev_attr_capability.attr, |
879 | &dev_attr_stat.attr, | 890 | &dev_attr_stat.attr, |
880 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 891 | #ifdef CONFIG_FAIL_MAKE_REQUEST |