aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2009-05-22 17:17:53 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-05-22 17:22:55 -0400
commitc72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 (patch)
treea83f7540cc894caafe74db911cba3998d6a9a164 /block
parentcd43e26f071524647e660706b784ebcbefbd2e44 (diff)
block: Export I/O topology for block devices and partitions
To support devices with physical block sizes bigger than 512 bytes we need to ensure proper alignment. This patch adds support for exposing I/O topology characteristics as devices are stacked. logical_block_size is the smallest unit the device can address. physical_block_size indicates the smallest I/O the device can write without incurring a read-modify-write penalty. The io_min parameter is the smallest preferred I/O size reported by the device. In many cases this is the same as the physical block size. However, the io_min parameter can be scaled up when stacking (RAID5 chunk size > physical block size). The io_opt characteristic indicates the optimal I/O size reported by the device. This is usually the stripe width for arrays. The alignment_offset parameter indicates the number of bytes the start of the device/partition is offset from the device's natural alignment. Partition tools and MD/DM utilities can use this to pad their offsets so filesystems start on proper boundaries. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-settings.c186
-rw-r--r--block/blk-sysfs.c33
-rw-r--r--block/genhd.c11
3 files changed, 230 insertions, 0 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b0f547cecfb8..5649f34adb40 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
309void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) 309void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
310{ 310{
311 q->limits.logical_block_size = size; 311 q->limits.logical_block_size = size;
312
313 if (q->limits.physical_block_size < size)
314 q->limits.physical_block_size = size;
315
316 if (q->limits.io_min < q->limits.physical_block_size)
317 q->limits.io_min = q->limits.physical_block_size;
312} 318}
313EXPORT_SYMBOL(blk_queue_logical_block_size); 319EXPORT_SYMBOL(blk_queue_logical_block_size);
314 320
321/**
322 * blk_queue_physical_block_size - set physical block size for the queue
323 * @q: the request queue for the device
324 * @size: the physical block size, in bytes
325 *
326 * Description:
327 * This should be set to the lowest possible sector size that the
328 * hardware can operate on without reverting to read-modify-write
329 * operations.
330 */
331void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
332{
333 q->limits.physical_block_size = size;
334
335 if (q->limits.physical_block_size < q->limits.logical_block_size)
336 q->limits.physical_block_size = q->limits.logical_block_size;
337
338 if (q->limits.io_min < q->limits.physical_block_size)
339 q->limits.io_min = q->limits.physical_block_size;
340}
341EXPORT_SYMBOL(blk_queue_physical_block_size);
342
343/**
344 * blk_queue_alignment_offset - set physical block alignment offset
345 * @q: the request queue for the device
346 * @alignment: alignment offset in bytes
347 *
348 * Description:
349 * Some devices are naturally misaligned to compensate for things like
350 * the legacy DOS partition table 63-sector offset. Low-level drivers
351 * should call this function for devices whose first sector is not
352 * naturally aligned.
353 */
354void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
355{
356 q->limits.alignment_offset =
357 offset & (q->limits.physical_block_size - 1);
358 q->limits.misaligned = 0;
359}
360EXPORT_SYMBOL(blk_queue_alignment_offset);
361
362/**
363 * blk_queue_io_min - set minimum request size for the queue
364 * @q: the request queue for the device
365 * @io_min: smallest I/O size in bytes
366 *
367 * Description:
368 * Some devices have an internal block size bigger than the reported
369 * hardware sector size. This function can be used to signal the
370 * smallest I/O the device can perform without incurring a performance
371 * penalty.
372 */
373void blk_queue_io_min(struct request_queue *q, unsigned int min)
374{
375 q->limits.io_min = min;
376
377 if (q->limits.io_min < q->limits.logical_block_size)
378 q->limits.io_min = q->limits.logical_block_size;
379
380 if (q->limits.io_min < q->limits.physical_block_size)
381 q->limits.io_min = q->limits.physical_block_size;
382}
383EXPORT_SYMBOL(blk_queue_io_min);
384
385/**
386 * blk_queue_io_opt - set optimal request size for the queue
387 * @q: the request queue for the device
388 * @io_opt: optimal request size in bytes
389 *
390 * Description:
391 * Drivers can call this function to set the preferred I/O request
392 * size for devices that report such a value.
393 */
394void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
395{
396 q->limits.io_opt = opt;
397}
398EXPORT_SYMBOL(blk_queue_io_opt);
399
315/* 400/*
316 * Returns the minimum that is _not_ zero, unless both are zero. 401 * Returns the minimum that is _not_ zero, unless both are zero.
317 */ 402 */
@@ -358,6 +443,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
358EXPORT_SYMBOL(blk_queue_stack_limits); 443EXPORT_SYMBOL(blk_queue_stack_limits);
359 444
360/** 445/**
446 * blk_stack_limits - adjust queue_limits for stacked devices
447 * @t: the stacking driver limits (top)
448 * @bdev: the underlying queue limits (bottom)
449 * @offset: offset to beginning of data within component device
450 *
451 * Description:
452 * Merges two queue_limit structs. Returns 0 if alignment didn't
453 * change. Returns -1 if adding the bottom device caused
454 * misalignment.
455 */
456int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
457 sector_t offset)
458{
459 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
460 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
461
462 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
463 b->seg_boundary_mask);
464
465 t->max_phys_segments = min_not_zero(t->max_phys_segments,
466 b->max_phys_segments);
467
468 t->max_hw_segments = min_not_zero(t->max_hw_segments,
469 b->max_hw_segments);
470
471 t->max_segment_size = min_not_zero(t->max_segment_size,
472 b->max_segment_size);
473
474 t->logical_block_size = max(t->logical_block_size,
475 b->logical_block_size);
476
477 t->physical_block_size = max(t->physical_block_size,
478 b->physical_block_size);
479
480 t->io_min = max(t->io_min, b->io_min);
481 t->no_cluster |= b->no_cluster;
482
483 /* Bottom device offset aligned? */
484 if (offset &&
485 (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
486 t->misaligned = 1;
487 return -1;
488 }
489
490 /* If top has no alignment offset, inherit from bottom */
491 if (!t->alignment_offset)
492 t->alignment_offset =
493 b->alignment_offset & (b->physical_block_size - 1);
494
495 /* Top device aligned on logical block boundary? */
496 if (t->alignment_offset & (t->logical_block_size - 1)) {
497 t->misaligned = 1;
498 return -1;
499 }
500
501 return 0;
502}
503
504/**
505 * disk_stack_limits - adjust queue limits for stacked drivers
506 * @t: MD/DM gendisk (top)
507 * @bdev: the underlying block device (bottom)
508 * @offset: offset to beginning of data within component device
509 *
510 * Description:
511 * Merges the limits for two queues. Returns 0 if alignment
512 * didn't change. Returns -1 if adding the bottom device caused
513 * misalignment.
514 */
515void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
516 sector_t offset)
517{
518 struct request_queue *t = disk->queue;
519 struct request_queue *b = bdev_get_queue(bdev);
520
521 offset += get_start_sect(bdev) << 9;
522
523 if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
524 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
525
526 disk_name(disk, 0, top);
527 bdevname(bdev, bottom);
528
529 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
530 top, bottom);
531 }
532
533 if (!t->queue_lock)
534 WARN_ON_ONCE(1);
535 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
536 unsigned long flags;
537
538 spin_lock_irqsave(t->queue_lock, flags);
539 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
540 queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
541 spin_unlock_irqrestore(t->queue_lock, flags);
542 }
543}
544EXPORT_SYMBOL(disk_stack_limits);
545
546/**
361 * blk_queue_dma_pad - set pad mask 547 * blk_queue_dma_pad - set pad mask
362 * @q: the request queue for the device 548 * @q: the request queue for the device
363 * @mask: pad mask 549 * @mask: pad mask
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ccdadb8e204..9337e17f9110 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -105,6 +105,21 @@ static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page
105 return queue_var_show(queue_logical_block_size(q), page); 105 return queue_var_show(queue_logical_block_size(q), page);
106} 106}
107 107
108static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
109{
110 return queue_var_show(queue_physical_block_size(q), page);
111}
112
113static ssize_t queue_io_min_show(struct request_queue *q, char *page)
114{
115 return queue_var_show(queue_io_min(q), page);
116}
117
118static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
119{
120 return queue_var_show(queue_io_opt(q), page);
121}
122
108static ssize_t 123static ssize_t
109queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 124queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
110{ 125{
@@ -257,6 +272,21 @@ static struct queue_sysfs_entry queue_logical_block_size_entry = {
257 .show = queue_logical_block_size_show, 272 .show = queue_logical_block_size_show,
258}; 273};
259 274
275static struct queue_sysfs_entry queue_physical_block_size_entry = {
276 .attr = {.name = "physical_block_size", .mode = S_IRUGO },
277 .show = queue_physical_block_size_show,
278};
279
280static struct queue_sysfs_entry queue_io_min_entry = {
281 .attr = {.name = "minimum_io_size", .mode = S_IRUGO },
282 .show = queue_io_min_show,
283};
284
285static struct queue_sysfs_entry queue_io_opt_entry = {
286 .attr = {.name = "optimal_io_size", .mode = S_IRUGO },
287 .show = queue_io_opt_show,
288};
289
260static struct queue_sysfs_entry queue_nonrot_entry = { 290static struct queue_sysfs_entry queue_nonrot_entry = {
261 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, 291 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
262 .show = queue_nonrot_show, 292 .show = queue_nonrot_show,
@@ -289,6 +319,9 @@ static struct attribute *default_attrs[] = {
289 &queue_iosched_entry.attr, 319 &queue_iosched_entry.attr,
290 &queue_hw_sector_size_entry.attr, 320 &queue_hw_sector_size_entry.attr,
291 &queue_logical_block_size_entry.attr, 321 &queue_logical_block_size_entry.attr,
322 &queue_physical_block_size_entry.attr,
323 &queue_io_min_entry.attr,
324 &queue_io_opt_entry.attr,
292 &queue_nonrot_entry.attr, 325 &queue_nonrot_entry.attr,
293 &queue_nomerges_entry.attr, 326 &queue_nomerges_entry.attr,
294 &queue_rq_affinity_entry.attr, 327 &queue_rq_affinity_entry.attr,
diff --git a/block/genhd.c b/block/genhd.c
index 1a4916e01732..fe7ccc0a618f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev,
852 return sprintf(buf, "%x\n", disk->flags); 852 return sprintf(buf, "%x\n", disk->flags);
853} 853}
854 854
855static ssize_t disk_alignment_offset_show(struct device *dev,
856 struct device_attribute *attr,
857 char *buf)
858{
859 struct gendisk *disk = dev_to_disk(dev);
860
861 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
862}
863
855static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 864static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
856static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 865static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
857static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 866static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
858static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 867static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
859static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 868static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
860static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
861static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
862#ifdef CONFIG_FAIL_MAKE_REQUEST 872#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = {
875 &dev_attr_removable.attr, 885 &dev_attr_removable.attr,
876 &dev_attr_ro.attr, 886 &dev_attr_ro.attr,
877 &dev_attr_size.attr, 887 &dev_attr_size.attr,
888 &dev_attr_alignment_offset.attr,
878 &dev_attr_capability.attr, 889 &dev_attr_capability.attr,
879 &dev_attr_stat.attr, 890 &dev_attr_stat.attr,
880#ifdef CONFIG_FAIL_MAKE_REQUEST 891#ifdef CONFIG_FAIL_MAKE_REQUEST