aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2009-06-22 05:12:34 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-06-22 05:12:34 -0400
commit754c5fc7ebb417b23601a6222a6005cc2e7f2913 (patch)
tree6c31b055fc26ec541d67fc1123ebaa4b7a8eae7a
parent18d8594dd93a1ae2fafd591ec026e87d743292bf (diff)
dm: calculate queue limits during resume not load
Currently, device-mapper maintains a separate instance of 'struct queue_limits' for each table of each device. When the configuration of a device is to be changed, first its table is loaded and this structure is populated, then the device is 'resumed' and the calculated queue_limits are applied. This places restrictions on how userspace may process related devices, where it is often advantageous to 'load' tables for several devices at once before 'resuming' them together. As the new queue_limits only take effect after the 'resume', if they are changing and one device uses another, the latter must be 'resumed' before the former may be 'loaded'. This patch moves the calculation of these queue_limits out of the 'load' operation into 'resume'. Since we are no longer pre-calculating this struct, we no longer need to maintain copies within our dm structs. dm_set_device_limits() now passes the 'start' of the device's data area (aka pe_start) as the 'offset' to blk_stack_limits(). init_valid_queue_limits() is replaced by blk_set_default_limits(). Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: martin.petersen@oracle.com Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm-table.c185
-rw-r--r--drivers/md/dm.c12
-rw-r--r--drivers/md/dm.h5
-rw-r--r--include/linux/device-mapper.h10
4 files changed, 117 insertions, 95 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 267817edc844..09a57113955e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -62,12 +62,6 @@ struct dm_table {
62 /* a list of devices used by this table */ 62 /* a list of devices used by this table */
63 struct list_head devices; 63 struct list_head devices;
64 64
65 /*
66 * These are optimistic limits taken from all the
67 * targets, some targets will need smaller limits.
68 */
69 struct queue_limits limits;
70
71 /* events get handed up using this callback */ 65 /* events get handed up using this callback */
72 void (*event_fn)(void *); 66 void (*event_fn)(void *);
73 void *event_context; 67 void *event_context;
@@ -346,18 +340,21 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
346/* 340/*
347 * If possible, this checks an area of a destination device is valid. 341 * If possible, this checks an area of a destination device is valid.
348 */ 342 */
349static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev, 343static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
350 sector_t start, sector_t len) 344 sector_t start, void *data)
351{ 345{
352 sector_t dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 346 struct queue_limits *limits = data;
347 struct block_device *bdev = dev->bdev;
348 sector_t dev_size =
349 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
353 unsigned short logical_block_size_sectors = 350 unsigned short logical_block_size_sectors =
354 ti->limits.logical_block_size >> SECTOR_SHIFT; 351 limits->logical_block_size >> SECTOR_SHIFT;
355 char b[BDEVNAME_SIZE]; 352 char b[BDEVNAME_SIZE];
356 353
357 if (!dev_size) 354 if (!dev_size)
358 return 1; 355 return 1;
359 356
360 if ((start >= dev_size) || (start + len > dev_size)) { 357 if ((start >= dev_size) || (start + ti->len > dev_size)) {
361 DMWARN("%s: %s too small for target", 358 DMWARN("%s: %s too small for target",
362 dm_device_name(ti->table->md), bdevname(bdev, b)); 359 dm_device_name(ti->table->md), bdevname(bdev, b));
363 return 0; 360 return 0;
@@ -371,16 +368,16 @@ static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev,
371 "logical block size %hu of %s", 368 "logical block size %hu of %s",
372 dm_device_name(ti->table->md), 369 dm_device_name(ti->table->md),
373 (unsigned long long)start, 370 (unsigned long long)start,
374 ti->limits.logical_block_size, bdevname(bdev, b)); 371 limits->logical_block_size, bdevname(bdev, b));
375 return 0; 372 return 0;
376 } 373 }
377 374
378 if (len & (logical_block_size_sectors - 1)) { 375 if (ti->len & (logical_block_size_sectors - 1)) {
379 DMWARN("%s: len=%llu not aligned to h/w " 376 DMWARN("%s: len=%llu not aligned to h/w "
380 "logical block size %hu of %s", 377 "logical block size %hu of %s",
381 dm_device_name(ti->table->md), 378 dm_device_name(ti->table->md),
382 (unsigned long long)len, 379 (unsigned long long)ti->len,
383 ti->limits.logical_block_size, bdevname(bdev, b)); 380 limits->logical_block_size, bdevname(bdev, b));
384 return 0; 381 return 0;
385 } 382 }
386 383
@@ -479,18 +476,21 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
479 */ 476 */
480#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 477#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
481 478
482void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 479int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
480 sector_t start, void *data)
483{ 481{
482 struct queue_limits *limits = data;
483 struct block_device *bdev = dev->bdev;
484 struct request_queue *q = bdev_get_queue(bdev); 484 struct request_queue *q = bdev_get_queue(bdev);
485 char b[BDEVNAME_SIZE]; 485 char b[BDEVNAME_SIZE];
486 486
487 if (unlikely(!q)) { 487 if (unlikely(!q)) {
488 DMWARN("%s: Cannot set limits for nonexistent device %s", 488 DMWARN("%s: Cannot set limits for nonexistent device %s",
489 dm_device_name(ti->table->md), bdevname(bdev, b)); 489 dm_device_name(ti->table->md), bdevname(bdev, b));
490 return; 490 return 0;
491 } 491 }
492 492
493 if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0) 493 if (blk_stack_limits(limits, &q->limits, start) < 0)
494 DMWARN("%s: target device %s is misaligned", 494 DMWARN("%s: target device %s is misaligned",
495 dm_device_name(ti->table->md), bdevname(bdev, b)); 495 dm_device_name(ti->table->md), bdevname(bdev, b));
496 496
@@ -501,32 +501,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
501 */ 501 */
502 502
503 if (q->merge_bvec_fn && !ti->type->merge) 503 if (q->merge_bvec_fn && !ti->type->merge)
504 ti->limits.max_sectors = 504 limits->max_sectors =
505 min_not_zero(ti->limits.max_sectors, 505 min_not_zero(limits->max_sectors,
506 (unsigned int) (PAGE_SIZE >> 9)); 506 (unsigned int) (PAGE_SIZE >> 9));
507 return 0;
507} 508}
508EXPORT_SYMBOL_GPL(dm_set_device_limits); 509EXPORT_SYMBOL_GPL(dm_set_device_limits);
509 510
510int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 511int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
511 sector_t len, fmode_t mode, struct dm_dev **result) 512 sector_t len, fmode_t mode, struct dm_dev **result)
512{ 513{
513 int r = __table_get_device(ti->table, ti, path, 514 return __table_get_device(ti->table, ti, path,
514 start, len, mode, result); 515 start, len, mode, result);
515
516 if (r)
517 return r;
518
519 dm_set_device_limits(ti, (*result)->bdev);
520
521 if (!device_area_is_valid(ti, (*result)->bdev, start, len)) {
522 dm_put_device(ti, *result);
523 *result = NULL;
524 return -EINVAL;
525 }
526
527 return r;
528} 516}
529 517
518
530/* 519/*
531 * Decrement a devices use count and remove it if necessary. 520 * Decrement a devices use count and remove it if necessary.
532 */ 521 */
@@ -641,34 +630,6 @@ int dm_split_args(int *argc, char ***argvp, char *input)
641 return 0; 630 return 0;
642} 631}
643 632
644static void init_valid_queue_limits(struct queue_limits *limits)
645{
646 if (!limits->max_sectors)
647 limits->max_sectors = SAFE_MAX_SECTORS;
648 if (!limits->max_hw_sectors)
649 limits->max_hw_sectors = SAFE_MAX_SECTORS;
650 if (!limits->max_phys_segments)
651 limits->max_phys_segments = MAX_PHYS_SEGMENTS;
652 if (!limits->max_hw_segments)
653 limits->max_hw_segments = MAX_HW_SEGMENTS;
654 if (!limits->logical_block_size)
655 limits->logical_block_size = 1 << SECTOR_SHIFT;
656 if (!limits->physical_block_size)
657 limits->physical_block_size = 1 << SECTOR_SHIFT;
658 if (!limits->io_min)
659 limits->io_min = 1 << SECTOR_SHIFT;
660 if (!limits->max_segment_size)
661 limits->max_segment_size = MAX_SEGMENT_SIZE;
662 if (!limits->seg_boundary_mask)
663 limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
664 if (!limits->bounce_pfn)
665 limits->bounce_pfn = -1;
666 /*
667 * The other fields (alignment_offset, io_opt, misaligned)
668 * hold 0 from the kzalloc().
669 */
670}
671
672/* 633/*
673 * Impose necessary and sufficient conditions on a devices's table such 634 * Impose necessary and sufficient conditions on a devices's table such
674 * that any incoming bio which respects its logical_block_size can be 635 * that any incoming bio which respects its logical_block_size can be
@@ -676,14 +637,15 @@ static void init_valid_queue_limits(struct queue_limits *limits)
676 * two or more targets, the size of each piece it gets split into must 637 * two or more targets, the size of each piece it gets split into must
677 * be compatible with the logical_block_size of the target processing it. 638 * be compatible with the logical_block_size of the target processing it.
678 */ 639 */
679static int validate_hardware_logical_block_alignment(struct dm_table *table) 640static int validate_hardware_logical_block_alignment(struct dm_table *table,
641 struct queue_limits *limits)
680{ 642{
681 /* 643 /*
682 * This function uses arithmetic modulo the logical_block_size 644 * This function uses arithmetic modulo the logical_block_size
683 * (in units of 512-byte sectors). 645 * (in units of 512-byte sectors).
684 */ 646 */
685 unsigned short device_logical_block_size_sects = 647 unsigned short device_logical_block_size_sects =
686 table->limits.logical_block_size >> SECTOR_SHIFT; 648 limits->logical_block_size >> SECTOR_SHIFT;
687 649
688 /* 650 /*
689 * Offset of the start of the next table entry, mod logical_block_size. 651 * Offset of the start of the next table entry, mod logical_block_size.
@@ -697,6 +659,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
697 unsigned short remaining = 0; 659 unsigned short remaining = 0;
698 660
699 struct dm_target *uninitialized_var(ti); 661 struct dm_target *uninitialized_var(ti);
662 struct queue_limits ti_limits;
700 unsigned i = 0; 663 unsigned i = 0;
701 664
702 /* 665 /*
@@ -705,12 +668,19 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
705 while (i < dm_table_get_num_targets(table)) { 668 while (i < dm_table_get_num_targets(table)) {
706 ti = dm_table_get_target(table, i++); 669 ti = dm_table_get_target(table, i++);
707 670
671 blk_set_default_limits(&ti_limits);
672
673 /* combine all target devices' limits */
674 if (ti->type->iterate_devices)
675 ti->type->iterate_devices(ti, dm_set_device_limits,
676 &ti_limits);
677
708 /* 678 /*
709 * If the remaining sectors fall entirely within this 679 * If the remaining sectors fall entirely within this
710 * table entry are they compatible with its logical_block_size? 680 * table entry are they compatible with its logical_block_size?
711 */ 681 */
712 if (remaining < ti->len && 682 if (remaining < ti->len &&
713 remaining & ((ti->limits.logical_block_size >> 683 remaining & ((ti_limits.logical_block_size >>
714 SECTOR_SHIFT) - 1)) 684 SECTOR_SHIFT) - 1))
715 break; /* Error */ 685 break; /* Error */
716 686
@@ -723,11 +693,11 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
723 693
724 if (remaining) { 694 if (remaining) {
725 DMWARN("%s: table line %u (start sect %llu len %llu) " 695 DMWARN("%s: table line %u (start sect %llu len %llu) "
726 "not aligned to hardware logical block size %hu", 696 "not aligned to h/w logical block size %hu",
727 dm_device_name(table->md), i, 697 dm_device_name(table->md), i,
728 (unsigned long long) ti->begin, 698 (unsigned long long) ti->begin,
729 (unsigned long long) ti->len, 699 (unsigned long long) ti->len,
730 table->limits.logical_block_size); 700 limits->logical_block_size);
731 return -EINVAL; 701 return -EINVAL;
732 } 702 }
733 703
@@ -786,12 +756,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
786 756
787 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 757 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
788 758
789 if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0)
790 DMWARN("%s: target device (start sect %llu len %llu) "
791 "is misaligned",
792 dm_device_name(t->md),
793 (unsigned long long) tgt->begin,
794 (unsigned long long) tgt->len);
795 return 0; 759 return 0;
796 760
797 bad: 761 bad:
@@ -834,12 +798,6 @@ int dm_table_complete(struct dm_table *t)
834 int r = 0; 798 int r = 0;
835 unsigned int leaf_nodes; 799 unsigned int leaf_nodes;
836 800
837 init_valid_queue_limits(&t->limits);
838
839 r = validate_hardware_logical_block_alignment(t);
840 if (r)
841 return r;
842
843 /* how many indexes will the btree have ? */ 801 /* how many indexes will the btree have ? */
844 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 802 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
845 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 803 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -915,6 +873,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
915} 873}
916 874
917/* 875/*
876 * Establish the new table's queue_limits and validate them.
877 */
878int dm_calculate_queue_limits(struct dm_table *table,
879 struct queue_limits *limits)
880{
881 struct dm_target *uninitialized_var(ti);
882 struct queue_limits ti_limits;
883 unsigned i = 0;
884
885 blk_set_default_limits(limits);
886
887 while (i < dm_table_get_num_targets(table)) {
888 blk_set_default_limits(&ti_limits);
889
890 ti = dm_table_get_target(table, i++);
891
892 if (!ti->type->iterate_devices)
893 goto combine_limits;
894
895 /*
896 * Combine queue limits of all the devices this target uses.
897 */
898 ti->type->iterate_devices(ti, dm_set_device_limits,
899 &ti_limits);
900
901 /*
902 * Check each device area is consistent with the target's
903 * overall queue limits.
904 */
905 if (!ti->type->iterate_devices(ti, device_area_is_valid,
906 &ti_limits))
907 return -EINVAL;
908
909combine_limits:
910 /*
911 * Merge this target's queue limits into the overall limits
912 * for the table.
913 */
914 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
915 DMWARN("%s: target device "
916 "(start sect %llu len %llu) "
917 "is misaligned",
918 dm_device_name(table->md),
919 (unsigned long long) ti->begin,
920 (unsigned long long) ti->len);
921 }
922
923 return validate_hardware_logical_block_alignment(table, limits);
924}
925
926/*
918 * Set the integrity profile for this device if all devices used have 927 * Set the integrity profile for this device if all devices used have
919 * matching profiles. 928 * matching profiles.
920 */ 929 */
@@ -953,14 +962,24 @@ no_integrity:
953 return; 962 return;
954} 963}
955 964
956void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 965void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
966 struct queue_limits *limits)
957{ 967{
958 /* 968 /*
969 * Each target device in the table has a data area that should normally
970 * be aligned such that the DM device's alignment_offset is 0.
971 * FIXME: Propagate alignment_offsets up the stack and warn of
972 * sub-optimal or inconsistent settings.
973 */
974 limits->alignment_offset = 0;
975 limits->misaligned = 0;
976
977 /*
959 * Copy table's limits to the DM device's request_queue 978 * Copy table's limits to the DM device's request_queue
960 */ 979 */
961 q->limits = t->limits; 980 q->limits = *limits;
962 981
963 if (t->limits.no_cluster) 982 if (limits->no_cluster)
964 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 983 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
965 else 984 else
966 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 985 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a9210bb594e7..f609793a92d0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1313,7 +1313,8 @@ static void __set_size(struct mapped_device *md, sector_t size)
1313 mutex_unlock(&md->bdev->bd_inode->i_mutex); 1313 mutex_unlock(&md->bdev->bd_inode->i_mutex);
1314} 1314}
1315 1315
1316static int __bind(struct mapped_device *md, struct dm_table *t) 1316static int __bind(struct mapped_device *md, struct dm_table *t,
1317 struct queue_limits *limits)
1317{ 1318{
1318 struct request_queue *q = md->queue; 1319 struct request_queue *q = md->queue;
1319 sector_t size; 1320 sector_t size;
@@ -1337,7 +1338,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
1337 1338
1338 write_lock(&md->map_lock); 1339 write_lock(&md->map_lock);
1339 md->map = t; 1340 md->map = t;
1340 dm_table_set_restrictions(t, q); 1341 dm_table_set_restrictions(t, q, limits);
1341 write_unlock(&md->map_lock); 1342 write_unlock(&md->map_lock);
1342 1343
1343 return 0; 1344 return 0;
@@ -1562,6 +1563,7 @@ static void dm_queue_flush(struct mapped_device *md)
1562 */ 1563 */
1563int dm_swap_table(struct mapped_device *md, struct dm_table *table) 1564int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1564{ 1565{
1566 struct queue_limits limits;
1565 int r = -EINVAL; 1567 int r = -EINVAL;
1566 1568
1567 mutex_lock(&md->suspend_lock); 1569 mutex_lock(&md->suspend_lock);
@@ -1570,8 +1572,12 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1570 if (!dm_suspended(md)) 1572 if (!dm_suspended(md))
1571 goto out; 1573 goto out;
1572 1574
1575 r = dm_calculate_queue_limits(table, &limits);
1576 if (r)
1577 goto out;
1578
1573 __unbind(md); 1579 __unbind(md);
1574 r = __bind(md, table); 1580 r = __bind(md, table, &limits);
1575 1581
1576out: 1582out:
1577 mutex_unlock(&md->suspend_lock); 1583 mutex_unlock(&md->suspend_lock);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b5935c610c44..604e85caadf6 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -41,7 +41,10 @@ void dm_table_event_callback(struct dm_table *t,
41 void (*fn)(void *), void *context); 41 void (*fn)(void *), void *context);
42struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 42struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
43struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 43struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
44void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q); 44int dm_calculate_queue_limits(struct dm_table *table,
45 struct queue_limits *limits);
46void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
47 struct queue_limits *limits);
45struct list_head *dm_table_get_devices(struct dm_table *t); 48struct list_head *dm_table_get_devices(struct dm_table *t);
46void dm_table_presuspend_targets(struct dm_table *t); 49void dm_table_presuspend_targets(struct dm_table *t);
47void dm_table_postsuspend_targets(struct dm_table *t); 50void dm_table_postsuspend_targets(struct dm_table *t);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index deac3b4e5e18..e6bf3b8c7bf2 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -103,7 +103,8 @@ void dm_error(const char *message);
103/* 103/*
104 * Combine device limits. 104 * Combine device limits.
105 */ 105 */
106void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); 106int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
107 sector_t start, void *data);
107 108
108struct dm_dev { 109struct dm_dev {
109 struct block_device *bdev; 110 struct block_device *bdev;
@@ -163,7 +164,6 @@ struct dm_target {
163 sector_t begin; 164 sector_t begin;
164 sector_t len; 165 sector_t len;
165 166
166 /* FIXME: turn this into a mask, and merge with queue_limits */
167 /* Always a power of 2 */ 167 /* Always a power of 2 */
168 sector_t split_io; 168 sector_t split_io;
169 169
@@ -177,12 +177,6 @@ struct dm_target {
177 */ 177 */
178 unsigned num_flush_requests; 178 unsigned num_flush_requests;
179 179
180 /*
181 * These are automatically filled in by
182 * dm_table_get_device.
183 */
184 struct queue_limits limits;
185
186 /* target specific data */ 180 /* target specific data */
187 void *private; 181 void *private;
188 182