aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-table.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-table.c')
-rw-r--r--drivers/md/dm-table.c463
1 files changed, 326 insertions, 137 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 429b50b975d5..4899ebe767c8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -41,6 +41,7 @@
41struct dm_table { 41struct dm_table {
42 struct mapped_device *md; 42 struct mapped_device *md;
43 atomic_t holders; 43 atomic_t holders;
44 unsigned type;
44 45
45 /* btree table */ 46 /* btree table */
46 unsigned int depth; 47 unsigned int depth;
@@ -62,15 +63,11 @@ struct dm_table {
62 /* a list of devices used by this table */ 63 /* a list of devices used by this table */
63 struct list_head devices; 64 struct list_head devices;
64 65
65 /*
66 * These are optimistic limits taken from all the
67 * targets, some targets will need smaller limits.
68 */
69 struct io_restrictions limits;
70
71 /* events get handed up using this callback */ 66 /* events get handed up using this callback */
72 void (*event_fn)(void *); 67 void (*event_fn)(void *);
73 void *event_context; 68 void *event_context;
69
70 struct dm_md_mempools *mempools;
74}; 71};
75 72
76/* 73/*
@@ -89,42 +86,6 @@ static unsigned int int_log(unsigned int n, unsigned int base)
89} 86}
90 87
91/* 88/*
92 * Returns the minimum that is _not_ zero, unless both are zero.
93 */
94#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
95
96/*
97 * Combine two io_restrictions, always taking the lower value.
98 */
99static void combine_restrictions_low(struct io_restrictions *lhs,
100 struct io_restrictions *rhs)
101{
102 lhs->max_sectors =
103 min_not_zero(lhs->max_sectors, rhs->max_sectors);
104
105 lhs->max_phys_segments =
106 min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
107
108 lhs->max_hw_segments =
109 min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
110
111 lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
112
113 lhs->max_segment_size =
114 min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
115
116 lhs->max_hw_sectors =
117 min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
118
119 lhs->seg_boundary_mask =
120 min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
121
122 lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
123
124 lhs->no_cluster |= rhs->no_cluster;
125}
126
127/*
128 * Calculate the index of the child node of the n'th node k'th key. 89 * Calculate the index of the child node of the n'th node k'th key.
129 */ 90 */
130static inline unsigned int get_child(unsigned int n, unsigned int k) 91static inline unsigned int get_child(unsigned int n, unsigned int k)
@@ -266,6 +227,8 @@ static void free_devices(struct list_head *devices)
266 list_for_each_safe(tmp, next, devices) { 227 list_for_each_safe(tmp, next, devices) {
267 struct dm_dev_internal *dd = 228 struct dm_dev_internal *dd =
268 list_entry(tmp, struct dm_dev_internal, list); 229 list_entry(tmp, struct dm_dev_internal, list);
230 DMWARN("dm_table_destroy: dm_put_device call missing for %s",
231 dd->dm_dev.name);
269 kfree(dd); 232 kfree(dd);
270 } 233 }
271} 234}
@@ -295,12 +258,10 @@ void dm_table_destroy(struct dm_table *t)
295 vfree(t->highs); 258 vfree(t->highs);
296 259
297 /* free the device list */ 260 /* free the device list */
298 if (t->devices.next != &t->devices) { 261 if (t->devices.next != &t->devices)
299 DMWARN("devices still present during destroy: "
300 "dm_table_remove_device calls missing");
301
302 free_devices(&t->devices); 262 free_devices(&t->devices);
303 } 263
264 dm_free_md_mempools(t->mempools);
304 265
305 kfree(t); 266 kfree(t);
306} 267}
@@ -384,15 +345,48 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
384/* 345/*
385 * If possible, this checks an area of a destination device is valid. 346 * If possible, this checks an area of a destination device is valid.
386 */ 347 */
387static int check_device_area(struct dm_dev_internal *dd, sector_t start, 348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
388 sector_t len) 349 sector_t start, void *data)
389{ 350{
390 sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; 351 struct queue_limits *limits = data;
352 struct block_device *bdev = dev->bdev;
353 sector_t dev_size =
354 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
355 unsigned short logical_block_size_sectors =
356 limits->logical_block_size >> SECTOR_SHIFT;
357 char b[BDEVNAME_SIZE];
391 358
392 if (!dev_size) 359 if (!dev_size)
393 return 1; 360 return 1;
394 361
395 return ((start < dev_size) && (len <= (dev_size - start))); 362 if ((start >= dev_size) || (start + ti->len > dev_size)) {
363 DMWARN("%s: %s too small for target",
364 dm_device_name(ti->table->md), bdevname(bdev, b));
365 return 0;
366 }
367
368 if (logical_block_size_sectors <= 1)
369 return 1;
370
371 if (start & (logical_block_size_sectors - 1)) {
372 DMWARN("%s: start=%llu not aligned to h/w "
373 "logical block size %hu of %s",
374 dm_device_name(ti->table->md),
375 (unsigned long long)start,
376 limits->logical_block_size, bdevname(bdev, b));
377 return 0;
378 }
379
380 if (ti->len & (logical_block_size_sectors - 1)) {
381 DMWARN("%s: len=%llu not aligned to h/w "
382 "logical block size %hu of %s",
383 dm_device_name(ti->table->md),
384 (unsigned long long)ti->len,
385 limits->logical_block_size, bdevname(bdev, b));
386 return 0;
387 }
388
389 return 1;
396} 390}
397 391
398/* 392/*
@@ -478,38 +472,32 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
478 } 472 }
479 atomic_inc(&dd->count); 473 atomic_inc(&dd->count);
480 474
481 if (!check_device_area(dd, start, len)) {
482 DMWARN("device %s too small for target", path);
483 dm_put_device(ti, &dd->dm_dev);
484 return -EINVAL;
485 }
486
487 *result = &dd->dm_dev; 475 *result = &dd->dm_dev;
488
489 return 0; 476 return 0;
490} 477}
491 478
492void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 479/*
480 * Returns the minimum that is _not_ zero, unless both are zero.
481 */
482#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
483
484int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
485 sector_t start, void *data)
493{ 486{
487 struct queue_limits *limits = data;
488 struct block_device *bdev = dev->bdev;
494 struct request_queue *q = bdev_get_queue(bdev); 489 struct request_queue *q = bdev_get_queue(bdev);
495 struct io_restrictions *rs = &ti->limits;
496 char b[BDEVNAME_SIZE]; 490 char b[BDEVNAME_SIZE];
497 491
498 if (unlikely(!q)) { 492 if (unlikely(!q)) {
499 DMWARN("%s: Cannot set limits for nonexistent device %s", 493 DMWARN("%s: Cannot set limits for nonexistent device %s",
500 dm_device_name(ti->table->md), bdevname(bdev, b)); 494 dm_device_name(ti->table->md), bdevname(bdev, b));
501 return; 495 return 0;
502 } 496 }
503 497
504 /* 498 if (blk_stack_limits(limits, &q->limits, start) < 0)
505 * Combine the device limits low. 499 DMWARN("%s: target device %s is misaligned",
506 * 500 dm_device_name(ti->table->md), bdevname(bdev, b));
507 * FIXME: if we move an io_restriction struct
508 * into q this would just be a call to
509 * combine_restrictions_low()
510 */
511 rs->max_sectors =
512 min_not_zero(rs->max_sectors, q->max_sectors);
513 501
514 /* 502 /*
515 * Check if merge fn is supported. 503 * Check if merge fn is supported.
@@ -518,47 +506,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
518 */ 506 */
519 507
520 if (q->merge_bvec_fn && !ti->type->merge) 508 if (q->merge_bvec_fn && !ti->type->merge)
521 rs->max_sectors = 509 limits->max_sectors =
522 min_not_zero(rs->max_sectors, 510 min_not_zero(limits->max_sectors,
523 (unsigned int) (PAGE_SIZE >> 9)); 511 (unsigned int) (PAGE_SIZE >> 9));
524 512 return 0;
525 rs->max_phys_segments =
526 min_not_zero(rs->max_phys_segments,
527 q->max_phys_segments);
528
529 rs->max_hw_segments =
530 min_not_zero(rs->max_hw_segments, q->max_hw_segments);
531
532 rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
533
534 rs->max_segment_size =
535 min_not_zero(rs->max_segment_size, q->max_segment_size);
536
537 rs->max_hw_sectors =
538 min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
539
540 rs->seg_boundary_mask =
541 min_not_zero(rs->seg_boundary_mask,
542 q->seg_boundary_mask);
543
544 rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
545
546 rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
547} 513}
548EXPORT_SYMBOL_GPL(dm_set_device_limits); 514EXPORT_SYMBOL_GPL(dm_set_device_limits);
549 515
550int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 516int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
551 sector_t len, fmode_t mode, struct dm_dev **result) 517 sector_t len, fmode_t mode, struct dm_dev **result)
552{ 518{
553 int r = __table_get_device(ti->table, ti, path, 519 return __table_get_device(ti->table, ti, path,
554 start, len, mode, result); 520 start, len, mode, result);
555
556 if (!r)
557 dm_set_device_limits(ti, (*result)->bdev);
558
559 return r;
560} 521}
561 522
523
562/* 524/*
563 * Decrement a devices use count and remove it if necessary. 525 * Decrement a devices use count and remove it if necessary.
564 */ 526 */
@@ -673,24 +635,78 @@ int dm_split_args(int *argc, char ***argvp, char *input)
673 return 0; 635 return 0;
674} 636}
675 637
676static void check_for_valid_limits(struct io_restrictions *rs) 638/*
639 * Impose necessary and sufficient conditions on a devices's table such
640 * that any incoming bio which respects its logical_block_size can be
641 * processed successfully. If it falls across the boundary between
642 * two or more targets, the size of each piece it gets split into must
643 * be compatible with the logical_block_size of the target processing it.
644 */
645static int validate_hardware_logical_block_alignment(struct dm_table *table,
646 struct queue_limits *limits)
677{ 647{
678 if (!rs->max_sectors) 648 /*
679 rs->max_sectors = SAFE_MAX_SECTORS; 649 * This function uses arithmetic modulo the logical_block_size
680 if (!rs->max_hw_sectors) 650 * (in units of 512-byte sectors).
681 rs->max_hw_sectors = SAFE_MAX_SECTORS; 651 */
682 if (!rs->max_phys_segments) 652 unsigned short device_logical_block_size_sects =
683 rs->max_phys_segments = MAX_PHYS_SEGMENTS; 653 limits->logical_block_size >> SECTOR_SHIFT;
684 if (!rs->max_hw_segments) 654
685 rs->max_hw_segments = MAX_HW_SEGMENTS; 655 /*
686 if (!rs->hardsect_size) 656 * Offset of the start of the next table entry, mod logical_block_size.
687 rs->hardsect_size = 1 << SECTOR_SHIFT; 657 */
688 if (!rs->max_segment_size) 658 unsigned short next_target_start = 0;
689 rs->max_segment_size = MAX_SEGMENT_SIZE; 659
690 if (!rs->seg_boundary_mask) 660 /*
691 rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 661 * Given an aligned bio that extends beyond the end of a
692 if (!rs->bounce_pfn) 662 * target, how many sectors must the next target handle?
693 rs->bounce_pfn = -1; 663 */
664 unsigned short remaining = 0;
665
666 struct dm_target *uninitialized_var(ti);
667 struct queue_limits ti_limits;
668 unsigned i = 0;
669
670 /*
671 * Check each entry in the table in turn.
672 */
673 while (i < dm_table_get_num_targets(table)) {
674 ti = dm_table_get_target(table, i++);
675
676 blk_set_default_limits(&ti_limits);
677
678 /* combine all target devices' limits */
679 if (ti->type->iterate_devices)
680 ti->type->iterate_devices(ti, dm_set_device_limits,
681 &ti_limits);
682
683 /*
684 * If the remaining sectors fall entirely within this
685 * table entry are they compatible with its logical_block_size?
686 */
687 if (remaining < ti->len &&
688 remaining & ((ti_limits.logical_block_size >>
689 SECTOR_SHIFT) - 1))
690 break; /* Error */
691
692 next_target_start =
693 (unsigned short) ((next_target_start + ti->len) &
694 (device_logical_block_size_sects - 1));
695 remaining = next_target_start ?
696 device_logical_block_size_sects - next_target_start : 0;
697 }
698
699 if (remaining) {
700 DMWARN("%s: table line %u (start sect %llu len %llu) "
701 "not aligned to h/w logical block size %hu",
702 dm_device_name(table->md), i,
703 (unsigned long long) ti->begin,
704 (unsigned long long) ti->len,
705 limits->logical_block_size);
706 return -EINVAL;
707 }
708
709 return 0;
694} 710}
695 711
696int dm_table_add_target(struct dm_table *t, const char *type, 712int dm_table_add_target(struct dm_table *t, const char *type,
@@ -745,9 +761,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
745 761
746 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 762 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
747 763
748 /* FIXME: the plan is to combine high here and then have
749 * the merge fn apply the target level restrictions. */
750 combine_restrictions_low(&t->limits, &tgt->limits);
751 return 0; 764 return 0;
752 765
753 bad: 766 bad:
@@ -756,6 +769,104 @@ int dm_table_add_target(struct dm_table *t, const char *type,
756 return r; 769 return r;
757} 770}
758 771
772int dm_table_set_type(struct dm_table *t)
773{
774 unsigned i;
775 unsigned bio_based = 0, request_based = 0;
776 struct dm_target *tgt;
777 struct dm_dev_internal *dd;
778 struct list_head *devices;
779
780 for (i = 0; i < t->num_targets; i++) {
781 tgt = t->targets + i;
782 if (dm_target_request_based(tgt))
783 request_based = 1;
784 else
785 bio_based = 1;
786
787 if (bio_based && request_based) {
788 DMWARN("Inconsistent table: different target types"
789 " can't be mixed up");
790 return -EINVAL;
791 }
792 }
793
794 if (bio_based) {
795 /* We must use this table as bio-based */
796 t->type = DM_TYPE_BIO_BASED;
797 return 0;
798 }
799
800 BUG_ON(!request_based); /* No targets in this table */
801
802 /* Non-request-stackable devices can't be used for request-based dm */
803 devices = dm_table_get_devices(t);
804 list_for_each_entry(dd, devices, list) {
805 if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
806 DMWARN("table load rejected: including"
807 " non-request-stackable devices");
808 return -EINVAL;
809 }
810 }
811
812 /*
813 * Request-based dm supports only tables that have a single target now.
814 * To support multiple targets, request splitting support is needed,
815 * and that needs lots of changes in the block-layer.
816 * (e.g. request completion process for partial completion.)
817 */
818 if (t->num_targets > 1) {
819 DMWARN("Request-based dm doesn't support multiple targets yet");
820 return -EINVAL;
821 }
822
823 t->type = DM_TYPE_REQUEST_BASED;
824
825 return 0;
826}
827
828unsigned dm_table_get_type(struct dm_table *t)
829{
830 return t->type;
831}
832
833bool dm_table_bio_based(struct dm_table *t)
834{
835 return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
836}
837
838bool dm_table_request_based(struct dm_table *t)
839{
840 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
841}
842
843int dm_table_alloc_md_mempools(struct dm_table *t)
844{
845 unsigned type = dm_table_get_type(t);
846
847 if (unlikely(type == DM_TYPE_NONE)) {
848 DMWARN("no table type is set, can't allocate mempools");
849 return -EINVAL;
850 }
851
852 t->mempools = dm_alloc_md_mempools(type);
853 if (!t->mempools)
854 return -ENOMEM;
855
856 return 0;
857}
858
859void dm_table_free_md_mempools(struct dm_table *t)
860{
861 dm_free_md_mempools(t->mempools);
862 t->mempools = NULL;
863}
864
865struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
866{
867 return t->mempools;
868}
869
759static int setup_indexes(struct dm_table *t) 870static int setup_indexes(struct dm_table *t)
760{ 871{
761 int i; 872 int i;
@@ -790,8 +901,6 @@ int dm_table_complete(struct dm_table *t)
790 int r = 0; 901 int r = 0;
791 unsigned int leaf_nodes; 902 unsigned int leaf_nodes;
792 903
793 check_for_valid_limits(&t->limits);
794
795 /* how many indexes will the btree have ? */ 904 /* how many indexes will the btree have ? */
796 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 905 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
797 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 906 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -867,6 +976,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
867} 976}
868 977
869/* 978/*
979 * Establish the new table's queue_limits and validate them.
980 */
981int dm_calculate_queue_limits(struct dm_table *table,
982 struct queue_limits *limits)
983{
984 struct dm_target *uninitialized_var(ti);
985 struct queue_limits ti_limits;
986 unsigned i = 0;
987
988 blk_set_default_limits(limits);
989
990 while (i < dm_table_get_num_targets(table)) {
991 blk_set_default_limits(&ti_limits);
992
993 ti = dm_table_get_target(table, i++);
994
995 if (!ti->type->iterate_devices)
996 goto combine_limits;
997
998 /*
999 * Combine queue limits of all the devices this target uses.
1000 */
1001 ti->type->iterate_devices(ti, dm_set_device_limits,
1002 &ti_limits);
1003
1004 /*
1005 * Check each device area is consistent with the target's
1006 * overall queue limits.
1007 */
1008 if (!ti->type->iterate_devices(ti, device_area_is_valid,
1009 &ti_limits))
1010 return -EINVAL;
1011
1012combine_limits:
1013 /*
1014 * Merge this target's queue limits into the overall limits
1015 * for the table.
1016 */
1017 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1018 DMWARN("%s: target device "
1019 "(start sect %llu len %llu) "
1020 "is misaligned",
1021 dm_device_name(table->md),
1022 (unsigned long long) ti->begin,
1023 (unsigned long long) ti->len);
1024 }
1025
1026 return validate_hardware_logical_block_alignment(table, limits);
1027}
1028
1029/*
870 * Set the integrity profile for this device if all devices used have 1030 * Set the integrity profile for this device if all devices used have
871 * matching profiles. 1031 * matching profiles.
872 */ 1032 */
@@ -905,27 +1065,42 @@ no_integrity:
905 return; 1065 return;
906} 1066}
907 1067
908void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 1068void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1069 struct queue_limits *limits)
909{ 1070{
910 /* 1071 /*
911 * Make sure we obey the optimistic sub devices 1072 * Each target device in the table has a data area that should normally
912 * restrictions. 1073 * be aligned such that the DM device's alignment_offset is 0.
1074 * FIXME: Propagate alignment_offsets up the stack and warn of
1075 * sub-optimal or inconsistent settings.
1076 */
1077 limits->alignment_offset = 0;
1078 limits->misaligned = 0;
1079
1080 /*
1081 * Copy table's limits to the DM device's request_queue
913 */ 1082 */
914 blk_queue_max_sectors(q, t->limits.max_sectors); 1083 q->limits = *limits;
915 q->max_phys_segments = t->limits.max_phys_segments; 1084
916 q->max_hw_segments = t->limits.max_hw_segments; 1085 if (limits->no_cluster)
917 q->hardsect_size = t->limits.hardsect_size;
918 q->max_segment_size = t->limits.max_segment_size;
919 q->max_hw_sectors = t->limits.max_hw_sectors;
920 q->seg_boundary_mask = t->limits.seg_boundary_mask;
921 q->bounce_pfn = t->limits.bounce_pfn;
922
923 if (t->limits.no_cluster)
924 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1086 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
925 else 1087 else
926 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1088 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
927 1089
928 dm_table_set_integrity(t); 1090 dm_table_set_integrity(t);
1091
1092 /*
1093 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1094 * visible to other CPUs because, once the flag is set, incoming bios
1095 * are processed by request-based dm, which refers to the queue
1096 * settings.
1097 * Until the flag set, bios are passed to bio-based dm and queued to
1098 * md->deferred where queue settings are not needed yet.
1099 * Those bios are passed to request-based dm at the resume time.
1100 */
1101 smp_mb();
1102 if (dm_table_request_based(t))
1103 queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
929} 1104}
930 1105
931unsigned int dm_table_get_num_targets(struct dm_table *t) 1106unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -1021,6 +1196,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1021 return r; 1196 return r;
1022} 1197}
1023 1198
1199int dm_table_any_busy_target(struct dm_table *t)
1200{
1201 unsigned i;
1202 struct dm_target *ti;
1203
1204 for (i = 0; i < t->num_targets; i++) {
1205 ti = t->targets + i;
1206 if (ti->type->busy && ti->type->busy(ti))
1207 return 1;
1208 }
1209
1210 return 0;
1211}
1212
1024void dm_table_unplug_all(struct dm_table *t) 1213void dm_table_unplug_all(struct dm_table *t)
1025{ 1214{
1026 struct dm_dev_internal *dd; 1215 struct dm_dev_internal *dd;