diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-03-31 10:36:43 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:45:02 -0500 |
commit | 23361cf32b58efdf09945a64e1d8d41fa6117157 (patch) | |
tree | 4a0736ea59dfefca4f8bb3fc784d310ac18ff769 /drivers/block/drbd | |
parent | 7726547e67a1fda0d12e1de5ec917a2e5d4b8186 (diff) |
drbd: get rid of bio_split, allow bios of "arbitrary" size
Where "arbitrary" size is currently 1 MiB, which is the BIO_MAX_SIZE
for architectures with 4k PAGE_CACHE_SIZE (most).
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 92 |
2 files changed, 28 insertions, 84 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f43a366b82c..2dbffb3b5485 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1326,8 +1326,16 @@ struct bm_extent { | |||
1326 | #endif | 1326 | #endif |
1327 | #endif | 1327 | #endif |
1328 | 1328 | ||
1329 | #define HT_SHIFT 8 | 1329 | /* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE, |
1330 | #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) | 1330 | * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte. |
1331 | * Since we may live in a mixed-platform cluster, | ||
1332 | * we limit us to a platform agnostic constant here for now. | ||
1333 | * A followup commit may allow even bigger BIO sizes, | ||
1334 | * once we thought that through. */ | ||
1335 | #define DRBD_MAX_BIO_SIZE (1 << 20) | ||
1336 | #if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE | ||
1337 | #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE | ||
1338 | #endif | ||
1331 | #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ | 1339 | #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ |
1332 | 1340 | ||
1333 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ | 1341 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ |
@@ -2231,20 +2239,20 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) | |||
2231 | return true; | 2239 | return true; |
2232 | } | 2240 | } |
2233 | 2241 | ||
2234 | static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) | 2242 | static inline bool inc_ap_bio_cond(struct drbd_conf *mdev) |
2235 | { | 2243 | { |
2236 | bool rv = false; | 2244 | bool rv = false; |
2237 | 2245 | ||
2238 | spin_lock_irq(&mdev->tconn->req_lock); | 2246 | spin_lock_irq(&mdev->tconn->req_lock); |
2239 | rv = may_inc_ap_bio(mdev); | 2247 | rv = may_inc_ap_bio(mdev); |
2240 | if (rv) | 2248 | if (rv) |
2241 | atomic_add(count, &mdev->ap_bio_cnt); | 2249 | atomic_inc(&mdev->ap_bio_cnt); |
2242 | spin_unlock_irq(&mdev->tconn->req_lock); | 2250 | spin_unlock_irq(&mdev->tconn->req_lock); |
2243 | 2251 | ||
2244 | return rv; | 2252 | return rv; |
2245 | } | 2253 | } |
2246 | 2254 | ||
2247 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | 2255 | static inline void inc_ap_bio(struct drbd_conf *mdev) |
2248 | { | 2256 | { |
2249 | /* we wait here | 2257 | /* we wait here |
2250 | * as long as the device is suspended | 2258 | * as long as the device is suspended |
@@ -2254,7 +2262,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | |||
2254 | * to avoid races with the reconnect code, | 2262 | * to avoid races with the reconnect code, |
2255 | * we need to atomic_inc within the spinlock. */ | 2263 | * we need to atomic_inc within the spinlock. */ |
2256 | 2264 | ||
2257 | wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); | 2265 | wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev)); |
2258 | } | 2266 | } |
2259 | 2267 | ||
2260 | static inline void dec_ap_bio(struct drbd_conf *mdev) | 2268 | static inline void dec_ap_bio(struct drbd_conf *mdev) |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4406d829800f..6e0e3bb33167 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -998,7 +998,6 @@ fail_and_free_req: | |||
998 | 998 | ||
999 | int drbd_make_request(struct request_queue *q, struct bio *bio) | 999 | int drbd_make_request(struct request_queue *q, struct bio *bio) |
1000 | { | 1000 | { |
1001 | unsigned int s_enr, e_enr; | ||
1002 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; | 1001 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; |
1003 | unsigned long start_time; | 1002 | unsigned long start_time; |
1004 | 1003 | ||
@@ -1010,93 +1009,30 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) | |||
1010 | D_ASSERT(bio->bi_size > 0); | 1009 | D_ASSERT(bio->bi_size > 0); |
1011 | D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); | 1010 | D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); |
1012 | 1011 | ||
1013 | /* to make some things easier, force alignment of requests within the | 1012 | inc_ap_bio(mdev); |
1014 | * granularity of our hash tables */ | 1013 | return __drbd_make_request(mdev, bio, start_time); |
1015 | s_enr = bio->bi_sector >> HT_SHIFT; | ||
1016 | e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; | ||
1017 | |||
1018 | if (likely(s_enr == e_enr)) { | ||
1019 | inc_ap_bio(mdev, 1); | ||
1020 | return __drbd_make_request(mdev, bio, start_time); | ||
1021 | } | ||
1022 | |||
1023 | /* can this bio be split generically? | ||
1024 | * Maybe add our own split-arbitrary-bios function. */ | ||
1025 | if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { | ||
1026 | /* rather error out here than BUG in bio_split */ | ||
1027 | dev_err(DEV, "bio would need to, but cannot, be split: " | ||
1028 | "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", | ||
1029 | bio->bi_vcnt, bio->bi_idx, bio->bi_size, | ||
1030 | (unsigned long long)bio->bi_sector); | ||
1031 | bio_endio(bio, -EINVAL); | ||
1032 | } else { | ||
1033 | /* This bio crosses some boundary, so we have to split it. */ | ||
1034 | struct bio_pair *bp; | ||
1035 | /* works for the "do not cross hash slot boundaries" case | ||
1036 | * e.g. sector 262269, size 4096 | ||
1037 | * s_enr = 262269 >> 6 = 4097 | ||
1038 | * e_enr = (262269+8-1) >> 6 = 4098 | ||
1039 | * HT_SHIFT = 6 | ||
1040 | * sps = 64, mask = 63 | ||
1041 | * first_sectors = 64 - (262269 & 63) = 3 | ||
1042 | */ | ||
1043 | const sector_t sect = bio->bi_sector; | ||
1044 | const int sps = 1 << HT_SHIFT; /* sectors per slot */ | ||
1045 | const int mask = sps - 1; | ||
1046 | const sector_t first_sectors = sps - (sect & mask); | ||
1047 | bp = bio_split(bio, first_sectors); | ||
1048 | |||
1049 | /* we need to get a "reference count" (ap_bio_cnt) | ||
1050 | * to avoid races with the disconnect/reconnect/suspend code. | ||
1051 | * In case we need to split the bio here, we need to get three references | ||
1052 | * atomically, otherwise we might deadlock when trying to submit the | ||
1053 | * second one! */ | ||
1054 | inc_ap_bio(mdev, 3); | ||
1055 | |||
1056 | D_ASSERT(e_enr == s_enr + 1); | ||
1057 | |||
1058 | while (__drbd_make_request(mdev, &bp->bio1, start_time)) | ||
1059 | inc_ap_bio(mdev, 1); | ||
1060 | |||
1061 | while (__drbd_make_request(mdev, &bp->bio2, start_time)) | ||
1062 | inc_ap_bio(mdev, 1); | ||
1063 | |||
1064 | dec_ap_bio(mdev); | ||
1065 | |||
1066 | bio_pair_release(bp); | ||
1067 | } | ||
1068 | return 0; | ||
1069 | } | 1014 | } |
1070 | 1015 | ||
1071 | /* This is called by bio_add_page(). With this function we reduce | 1016 | /* This is called by bio_add_page(). |
1072 | * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs | 1017 | * |
1073 | * units (was AL_EXTENTs). | 1018 | * q->max_hw_sectors and other global limits are already enforced there. |
1074 | * | 1019 | * |
1075 | * we do the calculation within the lower 32bit of the byte offsets, | 1020 | * We need to call down to our lower level device, |
1076 | * since we don't care for actual offset, but only check whether it | 1021 | * in case it has special restrictions. |
1077 | * would cross "activity log extent" boundaries. | 1022 | * |
1023 | * We also may need to enforce configured max-bio-bvecs limits. | ||
1078 | * | 1024 | * |
1079 | * As long as the BIO is empty we have to allow at least one bvec, | 1025 | * As long as the BIO is empty we have to allow at least one bvec, |
1080 | * regardless of size and offset. so the resulting bio may still | 1026 | * regardless of size and offset, so no need to ask lower levels. |
1081 | * cross extent boundaries. those are dealt with (bio_split) in | ||
1082 | * drbd_make_request. | ||
1083 | */ | 1027 | */ |
1084 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) | 1028 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) |
1085 | { | 1029 | { |
1086 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; | 1030 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; |
1087 | unsigned int bio_offset = | ||
1088 | (unsigned int)bvm->bi_sector << 9; /* 32 bit */ | ||
1089 | unsigned int bio_size = bvm->bi_size; | 1031 | unsigned int bio_size = bvm->bi_size; |
1090 | int limit, backing_limit; | 1032 | int limit = DRBD_MAX_BIO_SIZE; |
1091 | 1033 | int backing_limit; | |
1092 | limit = DRBD_MAX_BIO_SIZE | 1034 | |
1093 | - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); | 1035 | if (bio_size && get_ldev(mdev)) { |
1094 | if (limit < 0) | ||
1095 | limit = 0; | ||
1096 | if (bio_size == 0) { | ||
1097 | if (limit <= bvec->bv_len) | ||
1098 | limit = bvec->bv_len; | ||
1099 | } else if (limit && get_ldev(mdev)) { | ||
1100 | struct request_queue * const b = | 1036 | struct request_queue * const b = |
1101 | mdev->ldev->backing_bdev->bd_disk->queue; | 1037 | mdev->ldev->backing_bdev->bd_disk->queue; |
1102 | if (b->merge_bvec_fn) { | 1038 | if (b->merge_bvec_fn) { |