aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-03-31 10:36:43 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 10:45:02 -0500
commit23361cf32b58efdf09945a64e1d8d41fa6117157 (patch)
tree4a0736ea59dfefca4f8bb3fc784d310ac18ff769 /drivers/block/drbd
parent7726547e67a1fda0d12e1de5ec917a2e5d4b8186 (diff)
drbd: get rid of bio_split, allow bios of "arbitrary" size
Where "arbitrary" size is currently 1 MiB, which is the BIO_MAX_SIZE for architectures with 4k PAGE_CACHE_SIZE (most). Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_int.h20
-rw-r--r--drivers/block/drbd/drbd_req.c92
2 files changed, 28 insertions, 84 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8f43a366b82c..2dbffb3b5485 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1326,8 +1326,16 @@ struct bm_extent {
1326#endif 1326#endif
1327#endif 1327#endif
1328 1328
1329#define HT_SHIFT 8 1329/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
1330#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) 1330 * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
1331 * Since we may live in a mixed-platform cluster,
1332 * we limit us to a platform agnostic constant here for now.
1333 * A followup commit may allow even bigger BIO sizes,
1334 * once we thought that through. */
1335#define DRBD_MAX_BIO_SIZE (1 << 20)
1336#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
1337#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
1338#endif
1331#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ 1339#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
1332 1340
1333#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ 1341#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
@@ -2231,20 +2239,20 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
2231 return true; 2239 return true;
2232} 2240}
2233 2241
2234static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) 2242static inline bool inc_ap_bio_cond(struct drbd_conf *mdev)
2235{ 2243{
2236 bool rv = false; 2244 bool rv = false;
2237 2245
2238 spin_lock_irq(&mdev->tconn->req_lock); 2246 spin_lock_irq(&mdev->tconn->req_lock);
2239 rv = may_inc_ap_bio(mdev); 2247 rv = may_inc_ap_bio(mdev);
2240 if (rv) 2248 if (rv)
2241 atomic_add(count, &mdev->ap_bio_cnt); 2249 atomic_inc(&mdev->ap_bio_cnt);
2242 spin_unlock_irq(&mdev->tconn->req_lock); 2250 spin_unlock_irq(&mdev->tconn->req_lock);
2243 2251
2244 return rv; 2252 return rv;
2245} 2253}
2246 2254
2247static inline void inc_ap_bio(struct drbd_conf *mdev, int count) 2255static inline void inc_ap_bio(struct drbd_conf *mdev)
2248{ 2256{
2249 /* we wait here 2257 /* we wait here
2250 * as long as the device is suspended 2258 * as long as the device is suspended
@@ -2254,7 +2262,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2254 * to avoid races with the reconnect code, 2262 * to avoid races with the reconnect code,
2255 * we need to atomic_inc within the spinlock. */ 2263 * we need to atomic_inc within the spinlock. */
2256 2264
2257 wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); 2265 wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev));
2258} 2266}
2259 2267
2260static inline void dec_ap_bio(struct drbd_conf *mdev) 2268static inline void dec_ap_bio(struct drbd_conf *mdev)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 4406d829800f..6e0e3bb33167 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -998,7 +998,6 @@ fail_and_free_req:
998 998
999int drbd_make_request(struct request_queue *q, struct bio *bio) 999int drbd_make_request(struct request_queue *q, struct bio *bio)
1000{ 1000{
1001 unsigned int s_enr, e_enr;
1002 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; 1001 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
1003 unsigned long start_time; 1002 unsigned long start_time;
1004 1003
@@ -1010,93 +1009,30 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
1010 D_ASSERT(bio->bi_size > 0); 1009 D_ASSERT(bio->bi_size > 0);
1011 D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); 1010 D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
1012 1011
1013 /* to make some things easier, force alignment of requests within the 1012 inc_ap_bio(mdev);
1014 * granularity of our hash tables */ 1013 return __drbd_make_request(mdev, bio, start_time);
1015 s_enr = bio->bi_sector >> HT_SHIFT;
1016 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
1017
1018 if (likely(s_enr == e_enr)) {
1019 inc_ap_bio(mdev, 1);
1020 return __drbd_make_request(mdev, bio, start_time);
1021 }
1022
1023 /* can this bio be split generically?
1024 * Maybe add our own split-arbitrary-bios function. */
1025 if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
1026 /* rather error out here than BUG in bio_split */
1027 dev_err(DEV, "bio would need to, but cannot, be split: "
1028 "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
1029 bio->bi_vcnt, bio->bi_idx, bio->bi_size,
1030 (unsigned long long)bio->bi_sector);
1031 bio_endio(bio, -EINVAL);
1032 } else {
1033 /* This bio crosses some boundary, so we have to split it. */
1034 struct bio_pair *bp;
1035 /* works for the "do not cross hash slot boundaries" case
1036 * e.g. sector 262269, size 4096
1037 * s_enr = 262269 >> 6 = 4097
1038 * e_enr = (262269+8-1) >> 6 = 4098
1039 * HT_SHIFT = 6
1040 * sps = 64, mask = 63
1041 * first_sectors = 64 - (262269 & 63) = 3
1042 */
1043 const sector_t sect = bio->bi_sector;
1044 const int sps = 1 << HT_SHIFT; /* sectors per slot */
1045 const int mask = sps - 1;
1046 const sector_t first_sectors = sps - (sect & mask);
1047 bp = bio_split(bio, first_sectors);
1048
1049 /* we need to get a "reference count" (ap_bio_cnt)
1050 * to avoid races with the disconnect/reconnect/suspend code.
1051 * In case we need to split the bio here, we need to get three references
1052 * atomically, otherwise we might deadlock when trying to submit the
1053 * second one! */
1054 inc_ap_bio(mdev, 3);
1055
1056 D_ASSERT(e_enr == s_enr + 1);
1057
1058 while (__drbd_make_request(mdev, &bp->bio1, start_time))
1059 inc_ap_bio(mdev, 1);
1060
1061 while (__drbd_make_request(mdev, &bp->bio2, start_time))
1062 inc_ap_bio(mdev, 1);
1063
1064 dec_ap_bio(mdev);
1065
1066 bio_pair_release(bp);
1067 }
1068 return 0;
1069} 1014}
1070 1015
1071/* This is called by bio_add_page(). With this function we reduce 1016/* This is called by bio_add_page().
1072 * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs 1017 *
1073 * units (was AL_EXTENTs). 1018 * q->max_hw_sectors and other global limits are already enforced there.
1074 * 1019 *
1075 * we do the calculation within the lower 32bit of the byte offsets, 1020 * We need to call down to our lower level device,
1076 * since we don't care for actual offset, but only check whether it 1021 * in case it has special restrictions.
1077 * would cross "activity log extent" boundaries. 1022 *
1023 * We also may need to enforce configured max-bio-bvecs limits.
1078 * 1024 *
1079 * As long as the BIO is empty we have to allow at least one bvec, 1025 * As long as the BIO is empty we have to allow at least one bvec,
1080 * regardless of size and offset. so the resulting bio may still 1026 * regardless of size and offset, so no need to ask lower levels.
1081 * cross extent boundaries. those are dealt with (bio_split) in
1082 * drbd_make_request.
1083 */ 1027 */
1084int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) 1028int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
1085{ 1029{
1086 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; 1030 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
1087 unsigned int bio_offset =
1088 (unsigned int)bvm->bi_sector << 9; /* 32 bit */
1089 unsigned int bio_size = bvm->bi_size; 1031 unsigned int bio_size = bvm->bi_size;
1090 int limit, backing_limit; 1032 int limit = DRBD_MAX_BIO_SIZE;
1091 1033 int backing_limit;
1092 limit = DRBD_MAX_BIO_SIZE 1034
1093 - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); 1035 if (bio_size && get_ldev(mdev)) {
1094 if (limit < 0)
1095 limit = 0;
1096 if (bio_size == 0) {
1097 if (limit <= bvec->bv_len)
1098 limit = bvec->bv_len;
1099 } else if (limit && get_ldev(mdev)) {
1100 struct request_queue * const b = 1036 struct request_queue * const b =
1101 mdev->ldev->backing_bdev->bd_disk->queue; 1037 mdev->ldev->backing_bdev->bd_disk->queue;
1102 if (b->merge_bvec_fn) { 1038 if (b->merge_bvec_fn) {