diff options
author | Mike Snitzer <snitzer@redhat.com> | 2009-06-22 05:12:34 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-06-22 05:12:34 -0400 |
commit | 754c5fc7ebb417b23601a6222a6005cc2e7f2913 (patch) | |
tree | 6c31b055fc26ec541d67fc1123ebaa4b7a8eae7a /drivers/md/dm-table.c | |
parent | 18d8594dd93a1ae2fafd591ec026e87d743292bf (diff) |
dm: calculate queue limits during resume not load
Currently, device-mapper maintains a separate instance of 'struct
queue_limits' for each table of each device. When the configuration of
a device is to be changed, first its table is loaded and this structure
is populated, then the device is 'resumed' and the calculated
queue_limits are applied.
This places restrictions on how userspace may process related devices,
where it is often advantageous to 'load' tables for several devices
at once before 'resuming' them together. As the new queue_limits
only take effect after the 'resume', if they are changing and one
device uses another, the latter must be 'resumed' before the former
may be 'loaded'.
This patch moves the calculation of these queue_limits out of
the 'load' operation into 'resume'. Since we are no longer
pre-calculating this struct, we no longer need to maintain copies
within our dm structs.
dm_set_device_limits() now passes the 'start' of the device's
data area (aka pe_start) as the 'offset' to blk_stack_limits().
init_valid_queue_limits() is replaced by blk_set_default_limits().
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: martin.petersen@oracle.com
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-table.c')
-rw-r--r-- | drivers/md/dm-table.c | 185 |
1 files changed, 102 insertions, 83 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 267817edc844..09a57113955e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -62,12 +62,6 @@ struct dm_table { | |||
62 | /* a list of devices used by this table */ | 62 | /* a list of devices used by this table */ |
63 | struct list_head devices; | 63 | struct list_head devices; |
64 | 64 | ||
65 | /* | ||
66 | * These are optimistic limits taken from all the | ||
67 | * targets, some targets will need smaller limits. | ||
68 | */ | ||
69 | struct queue_limits limits; | ||
70 | |||
71 | /* events get handed up using this callback */ | 65 | /* events get handed up using this callback */ |
72 | void (*event_fn)(void *); | 66 | void (*event_fn)(void *); |
73 | void *event_context; | 67 | void *event_context; |
@@ -346,18 +340,21 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) | |||
346 | /* | 340 | /* |
347 | * If possible, this checks an area of a destination device is valid. | 341 | * If possible, this checks an area of a destination device is valid. |
348 | */ | 342 | */ |
349 | static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev, | 343 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, |
350 | sector_t start, sector_t len) | 344 | sector_t start, void *data) |
351 | { | 345 | { |
352 | sector_t dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | 346 | struct queue_limits *limits = data; |
347 | struct block_device *bdev = dev->bdev; | ||
348 | sector_t dev_size = | ||
349 | i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | ||
353 | unsigned short logical_block_size_sectors = | 350 | unsigned short logical_block_size_sectors = |
354 | ti->limits.logical_block_size >> SECTOR_SHIFT; | 351 | limits->logical_block_size >> SECTOR_SHIFT; |
355 | char b[BDEVNAME_SIZE]; | 352 | char b[BDEVNAME_SIZE]; |
356 | 353 | ||
357 | if (!dev_size) | 354 | if (!dev_size) |
358 | return 1; | 355 | return 1; |
359 | 356 | ||
360 | if ((start >= dev_size) || (start + len > dev_size)) { | 357 | if ((start >= dev_size) || (start + ti->len > dev_size)) { |
361 | DMWARN("%s: %s too small for target", | 358 | DMWARN("%s: %s too small for target", |
362 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 359 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
363 | return 0; | 360 | return 0; |
@@ -371,16 +368,16 @@ static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev, | |||
371 | "logical block size %hu of %s", | 368 | "logical block size %hu of %s", |
372 | dm_device_name(ti->table->md), | 369 | dm_device_name(ti->table->md), |
373 | (unsigned long long)start, | 370 | (unsigned long long)start, |
374 | ti->limits.logical_block_size, bdevname(bdev, b)); | 371 | limits->logical_block_size, bdevname(bdev, b)); |
375 | return 0; | 372 | return 0; |
376 | } | 373 | } |
377 | 374 | ||
378 | if (len & (logical_block_size_sectors - 1)) { | 375 | if (ti->len & (logical_block_size_sectors - 1)) { |
379 | DMWARN("%s: len=%llu not aligned to h/w " | 376 | DMWARN("%s: len=%llu not aligned to h/w " |
380 | "logical block size %hu of %s", | 377 | "logical block size %hu of %s", |
381 | dm_device_name(ti->table->md), | 378 | dm_device_name(ti->table->md), |
382 | (unsigned long long)len, | 379 | (unsigned long long)ti->len, |
383 | ti->limits.logical_block_size, bdevname(bdev, b)); | 380 | limits->logical_block_size, bdevname(bdev, b)); |
384 | return 0; | 381 | return 0; |
385 | } | 382 | } |
386 | 383 | ||
@@ -479,18 +476,21 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
479 | */ | 476 | */ |
480 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 477 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
481 | 478 | ||
482 | void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) | 479 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, |
480 | sector_t start, void *data) | ||
483 | { | 481 | { |
482 | struct queue_limits *limits = data; | ||
483 | struct block_device *bdev = dev->bdev; | ||
484 | struct request_queue *q = bdev_get_queue(bdev); | 484 | struct request_queue *q = bdev_get_queue(bdev); |
485 | char b[BDEVNAME_SIZE]; | 485 | char b[BDEVNAME_SIZE]; |
486 | 486 | ||
487 | if (unlikely(!q)) { | 487 | if (unlikely(!q)) { |
488 | DMWARN("%s: Cannot set limits for nonexistent device %s", | 488 | DMWARN("%s: Cannot set limits for nonexistent device %s", |
489 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 489 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
490 | return; | 490 | return 0; |
491 | } | 491 | } |
492 | 492 | ||
493 | if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0) | 493 | if (blk_stack_limits(limits, &q->limits, start) < 0) |
494 | DMWARN("%s: target device %s is misaligned", | 494 | DMWARN("%s: target device %s is misaligned", |
495 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 495 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
496 | 496 | ||
@@ -501,32 +501,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) | |||
501 | */ | 501 | */ |
502 | 502 | ||
503 | if (q->merge_bvec_fn && !ti->type->merge) | 503 | if (q->merge_bvec_fn && !ti->type->merge) |
504 | ti->limits.max_sectors = | 504 | limits->max_sectors = |
505 | min_not_zero(ti->limits.max_sectors, | 505 | min_not_zero(limits->max_sectors, |
506 | (unsigned int) (PAGE_SIZE >> 9)); | 506 | (unsigned int) (PAGE_SIZE >> 9)); |
507 | return 0; | ||
507 | } | 508 | } |
508 | EXPORT_SYMBOL_GPL(dm_set_device_limits); | 509 | EXPORT_SYMBOL_GPL(dm_set_device_limits); |
509 | 510 | ||
510 | int dm_get_device(struct dm_target *ti, const char *path, sector_t start, | 511 | int dm_get_device(struct dm_target *ti, const char *path, sector_t start, |
511 | sector_t len, fmode_t mode, struct dm_dev **result) | 512 | sector_t len, fmode_t mode, struct dm_dev **result) |
512 | { | 513 | { |
513 | int r = __table_get_device(ti->table, ti, path, | 514 | return __table_get_device(ti->table, ti, path, |
514 | start, len, mode, result); | 515 | start, len, mode, result); |
515 | |||
516 | if (r) | ||
517 | return r; | ||
518 | |||
519 | dm_set_device_limits(ti, (*result)->bdev); | ||
520 | |||
521 | if (!device_area_is_valid(ti, (*result)->bdev, start, len)) { | ||
522 | dm_put_device(ti, *result); | ||
523 | *result = NULL; | ||
524 | return -EINVAL; | ||
525 | } | ||
526 | |||
527 | return r; | ||
528 | } | 516 | } |
529 | 517 | ||
518 | |||
530 | /* | 519 | /* |
531 | * Decrement a devices use count and remove it if necessary. | 520 | * Decrement a devices use count and remove it if necessary. |
532 | */ | 521 | */ |
@@ -641,34 +630,6 @@ int dm_split_args(int *argc, char ***argvp, char *input) | |||
641 | return 0; | 630 | return 0; |
642 | } | 631 | } |
643 | 632 | ||
644 | static void init_valid_queue_limits(struct queue_limits *limits) | ||
645 | { | ||
646 | if (!limits->max_sectors) | ||
647 | limits->max_sectors = SAFE_MAX_SECTORS; | ||
648 | if (!limits->max_hw_sectors) | ||
649 | limits->max_hw_sectors = SAFE_MAX_SECTORS; | ||
650 | if (!limits->max_phys_segments) | ||
651 | limits->max_phys_segments = MAX_PHYS_SEGMENTS; | ||
652 | if (!limits->max_hw_segments) | ||
653 | limits->max_hw_segments = MAX_HW_SEGMENTS; | ||
654 | if (!limits->logical_block_size) | ||
655 | limits->logical_block_size = 1 << SECTOR_SHIFT; | ||
656 | if (!limits->physical_block_size) | ||
657 | limits->physical_block_size = 1 << SECTOR_SHIFT; | ||
658 | if (!limits->io_min) | ||
659 | limits->io_min = 1 << SECTOR_SHIFT; | ||
660 | if (!limits->max_segment_size) | ||
661 | limits->max_segment_size = MAX_SEGMENT_SIZE; | ||
662 | if (!limits->seg_boundary_mask) | ||
663 | limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | ||
664 | if (!limits->bounce_pfn) | ||
665 | limits->bounce_pfn = -1; | ||
666 | /* | ||
667 | * The other fields (alignment_offset, io_opt, misaligned) | ||
668 | * hold 0 from the kzalloc(). | ||
669 | */ | ||
670 | } | ||
671 | |||
672 | /* | 633 | /* |
673 | * Impose necessary and sufficient conditions on a devices's table such | 634 | * Impose necessary and sufficient conditions on a devices's table such |
674 | * that any incoming bio which respects its logical_block_size can be | 635 | * that any incoming bio which respects its logical_block_size can be |
@@ -676,14 +637,15 @@ static void init_valid_queue_limits(struct queue_limits *limits) | |||
676 | * two or more targets, the size of each piece it gets split into must | 637 | * two or more targets, the size of each piece it gets split into must |
677 | * be compatible with the logical_block_size of the target processing it. | 638 | * be compatible with the logical_block_size of the target processing it. |
678 | */ | 639 | */ |
679 | static int validate_hardware_logical_block_alignment(struct dm_table *table) | 640 | static int validate_hardware_logical_block_alignment(struct dm_table *table, |
641 | struct queue_limits *limits) | ||
680 | { | 642 | { |
681 | /* | 643 | /* |
682 | * This function uses arithmetic modulo the logical_block_size | 644 | * This function uses arithmetic modulo the logical_block_size |
683 | * (in units of 512-byte sectors). | 645 | * (in units of 512-byte sectors). |
684 | */ | 646 | */ |
685 | unsigned short device_logical_block_size_sects = | 647 | unsigned short device_logical_block_size_sects = |
686 | table->limits.logical_block_size >> SECTOR_SHIFT; | 648 | limits->logical_block_size >> SECTOR_SHIFT; |
687 | 649 | ||
688 | /* | 650 | /* |
689 | * Offset of the start of the next table entry, mod logical_block_size. | 651 | * Offset of the start of the next table entry, mod logical_block_size. |
@@ -697,6 +659,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table) | |||
697 | unsigned short remaining = 0; | 659 | unsigned short remaining = 0; |
698 | 660 | ||
699 | struct dm_target *uninitialized_var(ti); | 661 | struct dm_target *uninitialized_var(ti); |
662 | struct queue_limits ti_limits; | ||
700 | unsigned i = 0; | 663 | unsigned i = 0; |
701 | 664 | ||
702 | /* | 665 | /* |
@@ -705,12 +668,19 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table) | |||
705 | while (i < dm_table_get_num_targets(table)) { | 668 | while (i < dm_table_get_num_targets(table)) { |
706 | ti = dm_table_get_target(table, i++); | 669 | ti = dm_table_get_target(table, i++); |
707 | 670 | ||
671 | blk_set_default_limits(&ti_limits); | ||
672 | |||
673 | /* combine all target devices' limits */ | ||
674 | if (ti->type->iterate_devices) | ||
675 | ti->type->iterate_devices(ti, dm_set_device_limits, | ||
676 | &ti_limits); | ||
677 | |||
708 | /* | 678 | /* |
709 | * If the remaining sectors fall entirely within this | 679 | * If the remaining sectors fall entirely within this |
710 | * table entry are they compatible with its logical_block_size? | 680 | * table entry are they compatible with its logical_block_size? |
711 | */ | 681 | */ |
712 | if (remaining < ti->len && | 682 | if (remaining < ti->len && |
713 | remaining & ((ti->limits.logical_block_size >> | 683 | remaining & ((ti_limits.logical_block_size >> |
714 | SECTOR_SHIFT) - 1)) | 684 | SECTOR_SHIFT) - 1)) |
715 | break; /* Error */ | 685 | break; /* Error */ |
716 | 686 | ||
@@ -723,11 +693,11 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table) | |||
723 | 693 | ||
724 | if (remaining) { | 694 | if (remaining) { |
725 | DMWARN("%s: table line %u (start sect %llu len %llu) " | 695 | DMWARN("%s: table line %u (start sect %llu len %llu) " |
726 | "not aligned to hardware logical block size %hu", | 696 | "not aligned to h/w logical block size %hu", |
727 | dm_device_name(table->md), i, | 697 | dm_device_name(table->md), i, |
728 | (unsigned long long) ti->begin, | 698 | (unsigned long long) ti->begin, |
729 | (unsigned long long) ti->len, | 699 | (unsigned long long) ti->len, |
730 | table->limits.logical_block_size); | 700 | limits->logical_block_size); |
731 | return -EINVAL; | 701 | return -EINVAL; |
732 | } | 702 | } |
733 | 703 | ||
@@ -786,12 +756,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
786 | 756 | ||
787 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; | 757 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; |
788 | 758 | ||
789 | if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0) | ||
790 | DMWARN("%s: target device (start sect %llu len %llu) " | ||
791 | "is misaligned", | ||
792 | dm_device_name(t->md), | ||
793 | (unsigned long long) tgt->begin, | ||
794 | (unsigned long long) tgt->len); | ||
795 | return 0; | 759 | return 0; |
796 | 760 | ||
797 | bad: | 761 | bad: |
@@ -834,12 +798,6 @@ int dm_table_complete(struct dm_table *t) | |||
834 | int r = 0; | 798 | int r = 0; |
835 | unsigned int leaf_nodes; | 799 | unsigned int leaf_nodes; |
836 | 800 | ||
837 | init_valid_queue_limits(&t->limits); | ||
838 | |||
839 | r = validate_hardware_logical_block_alignment(t); | ||
840 | if (r) | ||
841 | return r; | ||
842 | |||
843 | /* how many indexes will the btree have ? */ | 801 | /* how many indexes will the btree have ? */ |
844 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 802 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
845 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 803 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
@@ -915,6 +873,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) | |||
915 | } | 873 | } |
916 | 874 | ||
917 | /* | 875 | /* |
876 | * Establish the new table's queue_limits and validate them. | ||
877 | */ | ||
878 | int dm_calculate_queue_limits(struct dm_table *table, | ||
879 | struct queue_limits *limits) | ||
880 | { | ||
881 | struct dm_target *uninitialized_var(ti); | ||
882 | struct queue_limits ti_limits; | ||
883 | unsigned i = 0; | ||
884 | |||
885 | blk_set_default_limits(limits); | ||
886 | |||
887 | while (i < dm_table_get_num_targets(table)) { | ||
888 | blk_set_default_limits(&ti_limits); | ||
889 | |||
890 | ti = dm_table_get_target(table, i++); | ||
891 | |||
892 | if (!ti->type->iterate_devices) | ||
893 | goto combine_limits; | ||
894 | |||
895 | /* | ||
896 | * Combine queue limits of all the devices this target uses. | ||
897 | */ | ||
898 | ti->type->iterate_devices(ti, dm_set_device_limits, | ||
899 | &ti_limits); | ||
900 | |||
901 | /* | ||
902 | * Check each device area is consistent with the target's | ||
903 | * overall queue limits. | ||
904 | */ | ||
905 | if (!ti->type->iterate_devices(ti, device_area_is_valid, | ||
906 | &ti_limits)) | ||
907 | return -EINVAL; | ||
908 | |||
909 | combine_limits: | ||
910 | /* | ||
911 | * Merge this target's queue limits into the overall limits | ||
912 | * for the table. | ||
913 | */ | ||
914 | if (blk_stack_limits(limits, &ti_limits, 0) < 0) | ||
915 | DMWARN("%s: target device " | ||
916 | "(start sect %llu len %llu) " | ||
917 | "is misaligned", | ||
918 | dm_device_name(table->md), | ||
919 | (unsigned long long) ti->begin, | ||
920 | (unsigned long long) ti->len); | ||
921 | } | ||
922 | |||
923 | return validate_hardware_logical_block_alignment(table, limits); | ||
924 | } | ||
925 | |||
926 | /* | ||
918 | * Set the integrity profile for this device if all devices used have | 927 | * Set the integrity profile for this device if all devices used have |
919 | * matching profiles. | 928 | * matching profiles. |
920 | */ | 929 | */ |
@@ -953,14 +962,24 @@ no_integrity: | |||
953 | return; | 962 | return; |
954 | } | 963 | } |
955 | 964 | ||
956 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) | 965 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
966 | struct queue_limits *limits) | ||
957 | { | 967 | { |
958 | /* | 968 | /* |
969 | * Each target device in the table has a data area that should normally | ||
970 | * be aligned such that the DM device's alignment_offset is 0. | ||
971 | * FIXME: Propagate alignment_offsets up the stack and warn of | ||
972 | * sub-optimal or inconsistent settings. | ||
973 | */ | ||
974 | limits->alignment_offset = 0; | ||
975 | limits->misaligned = 0; | ||
976 | |||
977 | /* | ||
959 | * Copy table's limits to the DM device's request_queue | 978 | * Copy table's limits to the DM device's request_queue |
960 | */ | 979 | */ |
961 | q->limits = t->limits; | 980 | q->limits = *limits; |
962 | 981 | ||
963 | if (t->limits.no_cluster) | 982 | if (limits->no_cluster) |
964 | queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); | 983 | queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); |
965 | else | 984 | else |
966 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); | 985 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); |