aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-table.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-table.c')
-rw-r--r--drivers/md/dm-table.c465
1 files changed, 326 insertions, 139 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e9a73bb242b0..4899ebe767c8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -41,6 +41,7 @@
41struct dm_table { 41struct dm_table {
42 struct mapped_device *md; 42 struct mapped_device *md;
43 atomic_t holders; 43 atomic_t holders;
44 unsigned type;
44 45
45 /* btree table */ 46 /* btree table */
46 unsigned int depth; 47 unsigned int depth;
@@ -62,15 +63,11 @@ struct dm_table {
62 /* a list of devices used by this table */ 63 /* a list of devices used by this table */
63 struct list_head devices; 64 struct list_head devices;
64 65
65 /*
66 * These are optimistic limits taken from all the
67 * targets, some targets will need smaller limits.
68 */
69 struct io_restrictions limits;
70
71 /* events get handed up using this callback */ 66 /* events get handed up using this callback */
72 void (*event_fn)(void *); 67 void (*event_fn)(void *);
73 void *event_context; 68 void *event_context;
69
70 struct dm_md_mempools *mempools;
74}; 71};
75 72
76/* 73/*
@@ -89,43 +86,6 @@ static unsigned int int_log(unsigned int n, unsigned int base)
89} 86}
90 87
91/* 88/*
92 * Returns the minimum that is _not_ zero, unless both are zero.
93 */
94#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
95
96/*
97 * Combine two io_restrictions, always taking the lower value.
98 */
99static void combine_restrictions_low(struct io_restrictions *lhs,
100 struct io_restrictions *rhs)
101{
102 lhs->max_sectors =
103 min_not_zero(lhs->max_sectors, rhs->max_sectors);
104
105 lhs->max_phys_segments =
106 min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
107
108 lhs->max_hw_segments =
109 min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
110
111 lhs->logical_block_size = max(lhs->logical_block_size,
112 rhs->logical_block_size);
113
114 lhs->max_segment_size =
115 min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
116
117 lhs->max_hw_sectors =
118 min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
119
120 lhs->seg_boundary_mask =
121 min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
122
123 lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
124
125 lhs->no_cluster |= rhs->no_cluster;
126}
127
128/*
129 * Calculate the index of the child node of the n'th node k'th key. 89 * Calculate the index of the child node of the n'th node k'th key.
130 */ 90 */
131static inline unsigned int get_child(unsigned int n, unsigned int k) 91static inline unsigned int get_child(unsigned int n, unsigned int k)
@@ -267,6 +227,8 @@ static void free_devices(struct list_head *devices)
267 list_for_each_safe(tmp, next, devices) { 227 list_for_each_safe(tmp, next, devices) {
268 struct dm_dev_internal *dd = 228 struct dm_dev_internal *dd =
269 list_entry(tmp, struct dm_dev_internal, list); 229 list_entry(tmp, struct dm_dev_internal, list);
230 DMWARN("dm_table_destroy: dm_put_device call missing for %s",
231 dd->dm_dev.name);
270 kfree(dd); 232 kfree(dd);
271 } 233 }
272} 234}
@@ -296,12 +258,10 @@ void dm_table_destroy(struct dm_table *t)
296 vfree(t->highs); 258 vfree(t->highs);
297 259
298 /* free the device list */ 260 /* free the device list */
299 if (t->devices.next != &t->devices) { 261 if (t->devices.next != &t->devices)
300 DMWARN("devices still present during destroy: "
301 "dm_table_remove_device calls missing");
302
303 free_devices(&t->devices); 262 free_devices(&t->devices);
304 } 263
264 dm_free_md_mempools(t->mempools);
305 265
306 kfree(t); 266 kfree(t);
307} 267}
@@ -385,15 +345,48 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
385/* 345/*
386 * If possible, this checks an area of a destination device is valid. 346 * If possible, this checks an area of a destination device is valid.
387 */ 347 */
388static int check_device_area(struct dm_dev_internal *dd, sector_t start, 348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
389 sector_t len) 349 sector_t start, void *data)
390{ 350{
391 sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; 351 struct queue_limits *limits = data;
352 struct block_device *bdev = dev->bdev;
353 sector_t dev_size =
354 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
355 unsigned short logical_block_size_sectors =
356 limits->logical_block_size >> SECTOR_SHIFT;
357 char b[BDEVNAME_SIZE];
392 358
393 if (!dev_size) 359 if (!dev_size)
394 return 1; 360 return 1;
395 361
396 return ((start < dev_size) && (len <= (dev_size - start))); 362 if ((start >= dev_size) || (start + ti->len > dev_size)) {
363 DMWARN("%s: %s too small for target",
364 dm_device_name(ti->table->md), bdevname(bdev, b));
365 return 0;
366 }
367
368 if (logical_block_size_sectors <= 1)
369 return 1;
370
371 if (start & (logical_block_size_sectors - 1)) {
372 DMWARN("%s: start=%llu not aligned to h/w "
373 "logical block size %hu of %s",
374 dm_device_name(ti->table->md),
375 (unsigned long long)start,
376 limits->logical_block_size, bdevname(bdev, b));
377 return 0;
378 }
379
380 if (ti->len & (logical_block_size_sectors - 1)) {
381 DMWARN("%s: len=%llu not aligned to h/w "
382 "logical block size %hu of %s",
383 dm_device_name(ti->table->md),
384 (unsigned long long)ti->len,
385 limits->logical_block_size, bdevname(bdev, b));
386 return 0;
387 }
388
389 return 1;
397} 390}
398 391
399/* 392/*
@@ -479,38 +472,32 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
479 } 472 }
480 atomic_inc(&dd->count); 473 atomic_inc(&dd->count);
481 474
482 if (!check_device_area(dd, start, len)) {
483 DMWARN("device %s too small for target", path);
484 dm_put_device(ti, &dd->dm_dev);
485 return -EINVAL;
486 }
487
488 *result = &dd->dm_dev; 475 *result = &dd->dm_dev;
489
490 return 0; 476 return 0;
491} 477}
492 478
493void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 479/*
480 * Returns the minimum that is _not_ zero, unless both are zero.
481 */
482#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
483
484int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
485 sector_t start, void *data)
494{ 486{
487 struct queue_limits *limits = data;
488 struct block_device *bdev = dev->bdev;
495 struct request_queue *q = bdev_get_queue(bdev); 489 struct request_queue *q = bdev_get_queue(bdev);
496 struct io_restrictions *rs = &ti->limits;
497 char b[BDEVNAME_SIZE]; 490 char b[BDEVNAME_SIZE];
498 491
499 if (unlikely(!q)) { 492 if (unlikely(!q)) {
500 DMWARN("%s: Cannot set limits for nonexistent device %s", 493 DMWARN("%s: Cannot set limits for nonexistent device %s",
501 dm_device_name(ti->table->md), bdevname(bdev, b)); 494 dm_device_name(ti->table->md), bdevname(bdev, b));
502 return; 495 return 0;
503 } 496 }
504 497
505 /* 498 if (blk_stack_limits(limits, &q->limits, start) < 0)
506 * Combine the device limits low. 499 DMWARN("%s: target device %s is misaligned",
507 * 500 dm_device_name(ti->table->md), bdevname(bdev, b));
508 * FIXME: if we move an io_restriction struct
509 * into q this would just be a call to
510 * combine_restrictions_low()
511 */
512 rs->max_sectors =
513 min_not_zero(rs->max_sectors, queue_max_sectors(q));
514 501
515 /* 502 /*
516 * Check if merge fn is supported. 503 * Check if merge fn is supported.
@@ -519,48 +506,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
519 */ 506 */
520 507
521 if (q->merge_bvec_fn && !ti->type->merge) 508 if (q->merge_bvec_fn && !ti->type->merge)
522 rs->max_sectors = 509 limits->max_sectors =
523 min_not_zero(rs->max_sectors, 510 min_not_zero(limits->max_sectors,
524 (unsigned int) (PAGE_SIZE >> 9)); 511 (unsigned int) (PAGE_SIZE >> 9));
525 512 return 0;
526 rs->max_phys_segments =
527 min_not_zero(rs->max_phys_segments,
528 queue_max_phys_segments(q));
529
530 rs->max_hw_segments =
531 min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
532
533 rs->logical_block_size = max(rs->logical_block_size,
534 queue_logical_block_size(q));
535
536 rs->max_segment_size =
537 min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
538
539 rs->max_hw_sectors =
540 min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
541
542 rs->seg_boundary_mask =
543 min_not_zero(rs->seg_boundary_mask,
544 queue_segment_boundary(q));
545
546 rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
547
548 rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
549} 513}
550EXPORT_SYMBOL_GPL(dm_set_device_limits); 514EXPORT_SYMBOL_GPL(dm_set_device_limits);
551 515
552int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 516int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
553 sector_t len, fmode_t mode, struct dm_dev **result) 517 sector_t len, fmode_t mode, struct dm_dev **result)
554{ 518{
555 int r = __table_get_device(ti->table, ti, path, 519 return __table_get_device(ti->table, ti, path,
556 start, len, mode, result); 520 start, len, mode, result);
557
558 if (!r)
559 dm_set_device_limits(ti, (*result)->bdev);
560
561 return r;
562} 521}
563 522
523
564/* 524/*
565 * Decrement a devices use count and remove it if necessary. 525 * Decrement a devices use count and remove it if necessary.
566 */ 526 */
@@ -675,24 +635,78 @@ int dm_split_args(int *argc, char ***argvp, char *input)
675 return 0; 635 return 0;
676} 636}
677 637
678static void check_for_valid_limits(struct io_restrictions *rs) 638/*
639 * Impose necessary and sufficient conditions on a devices's table such
640 * that any incoming bio which respects its logical_block_size can be
641 * processed successfully. If it falls across the boundary between
642 * two or more targets, the size of each piece it gets split into must
643 * be compatible with the logical_block_size of the target processing it.
644 */
645static int validate_hardware_logical_block_alignment(struct dm_table *table,
646 struct queue_limits *limits)
679{ 647{
680 if (!rs->max_sectors) 648 /*
681 rs->max_sectors = SAFE_MAX_SECTORS; 649 * This function uses arithmetic modulo the logical_block_size
682 if (!rs->max_hw_sectors) 650 * (in units of 512-byte sectors).
683 rs->max_hw_sectors = SAFE_MAX_SECTORS; 651 */
684 if (!rs->max_phys_segments) 652 unsigned short device_logical_block_size_sects =
685 rs->max_phys_segments = MAX_PHYS_SEGMENTS; 653 limits->logical_block_size >> SECTOR_SHIFT;
686 if (!rs->max_hw_segments) 654
687 rs->max_hw_segments = MAX_HW_SEGMENTS; 655 /*
688 if (!rs->logical_block_size) 656 * Offset of the start of the next table entry, mod logical_block_size.
689 rs->logical_block_size = 1 << SECTOR_SHIFT; 657 */
690 if (!rs->max_segment_size) 658 unsigned short next_target_start = 0;
691 rs->max_segment_size = MAX_SEGMENT_SIZE; 659
692 if (!rs->seg_boundary_mask) 660 /*
693 rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 661 * Given an aligned bio that extends beyond the end of a
694 if (!rs->bounce_pfn) 662 * target, how many sectors must the next target handle?
695 rs->bounce_pfn = -1; 663 */
664 unsigned short remaining = 0;
665
666 struct dm_target *uninitialized_var(ti);
667 struct queue_limits ti_limits;
668 unsigned i = 0;
669
670 /*
671 * Check each entry in the table in turn.
672 */
673 while (i < dm_table_get_num_targets(table)) {
674 ti = dm_table_get_target(table, i++);
675
676 blk_set_default_limits(&ti_limits);
677
678 /* combine all target devices' limits */
679 if (ti->type->iterate_devices)
680 ti->type->iterate_devices(ti, dm_set_device_limits,
681 &ti_limits);
682
683 /*
684 * If the remaining sectors fall entirely within this
685 * table entry are they compatible with its logical_block_size?
686 */
687 if (remaining < ti->len &&
688 remaining & ((ti_limits.logical_block_size >>
689 SECTOR_SHIFT) - 1))
690 break; /* Error */
691
692 next_target_start =
693 (unsigned short) ((next_target_start + ti->len) &
694 (device_logical_block_size_sects - 1));
695 remaining = next_target_start ?
696 device_logical_block_size_sects - next_target_start : 0;
697 }
698
699 if (remaining) {
700 DMWARN("%s: table line %u (start sect %llu len %llu) "
701 "not aligned to h/w logical block size %hu",
702 dm_device_name(table->md), i,
703 (unsigned long long) ti->begin,
704 (unsigned long long) ti->len,
705 limits->logical_block_size);
706 return -EINVAL;
707 }
708
709 return 0;
696} 710}
697 711
698int dm_table_add_target(struct dm_table *t, const char *type, 712int dm_table_add_target(struct dm_table *t, const char *type,
@@ -747,9 +761,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
747 761
748 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 762 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
749 763
750 /* FIXME: the plan is to combine high here and then have
751 * the merge fn apply the target level restrictions. */
752 combine_restrictions_low(&t->limits, &tgt->limits);
753 return 0; 764 return 0;
754 765
755 bad: 766 bad:
@@ -758,6 +769,104 @@ int dm_table_add_target(struct dm_table *t, const char *type,
758 return r; 769 return r;
759} 770}
760 771
772int dm_table_set_type(struct dm_table *t)
773{
774 unsigned i;
775 unsigned bio_based = 0, request_based = 0;
776 struct dm_target *tgt;
777 struct dm_dev_internal *dd;
778 struct list_head *devices;
779
780 for (i = 0; i < t->num_targets; i++) {
781 tgt = t->targets + i;
782 if (dm_target_request_based(tgt))
783 request_based = 1;
784 else
785 bio_based = 1;
786
787 if (bio_based && request_based) {
788 DMWARN("Inconsistent table: different target types"
789 " can't be mixed up");
790 return -EINVAL;
791 }
792 }
793
794 if (bio_based) {
795 /* We must use this table as bio-based */
796 t->type = DM_TYPE_BIO_BASED;
797 return 0;
798 }
799
800 BUG_ON(!request_based); /* No targets in this table */
801
802 /* Non-request-stackable devices can't be used for request-based dm */
803 devices = dm_table_get_devices(t);
804 list_for_each_entry(dd, devices, list) {
805 if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
806 DMWARN("table load rejected: including"
807 " non-request-stackable devices");
808 return -EINVAL;
809 }
810 }
811
812 /*
813 * Request-based dm supports only tables that have a single target now.
814 * To support multiple targets, request splitting support is needed,
815 * and that needs lots of changes in the block-layer.
816 * (e.g. request completion process for partial completion.)
817 */
818 if (t->num_targets > 1) {
819 DMWARN("Request-based dm doesn't support multiple targets yet");
820 return -EINVAL;
821 }
822
823 t->type = DM_TYPE_REQUEST_BASED;
824
825 return 0;
826}
827
828unsigned dm_table_get_type(struct dm_table *t)
829{
830 return t->type;
831}
832
833bool dm_table_bio_based(struct dm_table *t)
834{
835 return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
836}
837
838bool dm_table_request_based(struct dm_table *t)
839{
840 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
841}
842
843int dm_table_alloc_md_mempools(struct dm_table *t)
844{
845 unsigned type = dm_table_get_type(t);
846
847 if (unlikely(type == DM_TYPE_NONE)) {
848 DMWARN("no table type is set, can't allocate mempools");
849 return -EINVAL;
850 }
851
852 t->mempools = dm_alloc_md_mempools(type);
853 if (!t->mempools)
854 return -ENOMEM;
855
856 return 0;
857}
858
859void dm_table_free_md_mempools(struct dm_table *t)
860{
861 dm_free_md_mempools(t->mempools);
862 t->mempools = NULL;
863}
864
865struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
866{
867 return t->mempools;
868}
869
761static int setup_indexes(struct dm_table *t) 870static int setup_indexes(struct dm_table *t)
762{ 871{
763 int i; 872 int i;
@@ -792,8 +901,6 @@ int dm_table_complete(struct dm_table *t)
792 int r = 0; 901 int r = 0;
793 unsigned int leaf_nodes; 902 unsigned int leaf_nodes;
794 903
795 check_for_valid_limits(&t->limits);
796
797 /* how many indexes will the btree have ? */ 904 /* how many indexes will the btree have ? */
798 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 905 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
799 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 906 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -869,6 +976,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
869} 976}
870 977
871/* 978/*
979 * Establish the new table's queue_limits and validate them.
980 */
981int dm_calculate_queue_limits(struct dm_table *table,
982 struct queue_limits *limits)
983{
984 struct dm_target *uninitialized_var(ti);
985 struct queue_limits ti_limits;
986 unsigned i = 0;
987
988 blk_set_default_limits(limits);
989
990 while (i < dm_table_get_num_targets(table)) {
991 blk_set_default_limits(&ti_limits);
992
993 ti = dm_table_get_target(table, i++);
994
995 if (!ti->type->iterate_devices)
996 goto combine_limits;
997
998 /*
999 * Combine queue limits of all the devices this target uses.
1000 */
1001 ti->type->iterate_devices(ti, dm_set_device_limits,
1002 &ti_limits);
1003
1004 /*
1005 * Check each device area is consistent with the target's
1006 * overall queue limits.
1007 */
1008 if (!ti->type->iterate_devices(ti, device_area_is_valid,
1009 &ti_limits))
1010 return -EINVAL;
1011
1012combine_limits:
1013 /*
1014 * Merge this target's queue limits into the overall limits
1015 * for the table.
1016 */
1017 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1018 DMWARN("%s: target device "
1019 "(start sect %llu len %llu) "
1020 "is misaligned",
1021 dm_device_name(table->md),
1022 (unsigned long long) ti->begin,
1023 (unsigned long long) ti->len);
1024 }
1025
1026 return validate_hardware_logical_block_alignment(table, limits);
1027}
1028
1029/*
872 * Set the integrity profile for this device if all devices used have 1030 * Set the integrity profile for this device if all devices used have
873 * matching profiles. 1031 * matching profiles.
874 */ 1032 */
@@ -907,27 +1065,42 @@ no_integrity:
907 return; 1065 return;
908} 1066}
909 1067
910void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 1068void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1069 struct queue_limits *limits)
911{ 1070{
912 /* 1071 /*
913 * Make sure we obey the optimistic sub devices 1072 * Each target device in the table has a data area that should normally
914 * restrictions. 1073 * be aligned such that the DM device's alignment_offset is 0.
1074 * FIXME: Propagate alignment_offsets up the stack and warn of
1075 * sub-optimal or inconsistent settings.
1076 */
1077 limits->alignment_offset = 0;
1078 limits->misaligned = 0;
1079
1080 /*
1081 * Copy table's limits to the DM device's request_queue
915 */ 1082 */
916 blk_queue_max_sectors(q, t->limits.max_sectors); 1083 q->limits = *limits;
917 blk_queue_max_phys_segments(q, t->limits.max_phys_segments); 1084
918 blk_queue_max_hw_segments(q, t->limits.max_hw_segments); 1085 if (limits->no_cluster)
919 blk_queue_logical_block_size(q, t->limits.logical_block_size);
920 blk_queue_max_segment_size(q, t->limits.max_segment_size);
921 blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
922 blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
923 blk_queue_bounce_limit(q, t->limits.bounce_pfn);
924
925 if (t->limits.no_cluster)
926 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1086 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
927 else 1087 else
928 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1088 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
929 1089
930 dm_table_set_integrity(t); 1090 dm_table_set_integrity(t);
1091
1092 /*
1093 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1094 * visible to other CPUs because, once the flag is set, incoming bios
1095 * are processed by request-based dm, which refers to the queue
1096 * settings.
1097 * Until the flag set, bios are passed to bio-based dm and queued to
1098 * md->deferred where queue settings are not needed yet.
1099 * Those bios are passed to request-based dm at the resume time.
1100 */
1101 smp_mb();
1102 if (dm_table_request_based(t))
1103 queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
931} 1104}
932 1105
933unsigned int dm_table_get_num_targets(struct dm_table *t) 1106unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -1023,6 +1196,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1023 return r; 1196 return r;
1024} 1197}
1025 1198
1199int dm_table_any_busy_target(struct dm_table *t)
1200{
1201 unsigned i;
1202 struct dm_target *ti;
1203
1204 for (i = 0; i < t->num_targets; i++) {
1205 ti = t->targets + i;
1206 if (ti->type->busy && ti->type->busy(ti))
1207 return 1;
1208 }
1209
1210 return 0;
1211}
1212
1026void dm_table_unplug_all(struct dm_table *t) 1213void dm_table_unplug_all(struct dm_table *t)
1027{ 1214{
1028 struct dm_dev_internal *dd; 1215 struct dm_dev_internal *dd;