diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2018-01-20 04:30:10 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2018-04-02 04:12:38 -0400 |
commit | 5359a17d2706b86da2af83027343d5eb256f7670 (patch) | |
tree | 31053fe22a0d91b40911882b2aaa961e77504d4b /net | |
parent | a1fbb5e7bbb56fccdf54bf4ab5086c6080ee5bfa (diff) |
libceph, rbd: new bio handling code (aka don't clone bios)
The reason we clone bios is to be able to give each object request
(and consequently each ceph_osd_data/ceph_msg_data item) its own
pointer to a (list of) bio(s). The messenger then initializes its
cursor with cloned bio's ->bi_iter, so it knows where to start reading
from/writing to. That's all the cloned bios are used for: to determine
each object request's starting position in the provided data buffer.
Introduce ceph_bio_iter to do exactly that -- store position within bio
list (i.e. pointer to bio) + position within that bio (i.e. bvec_iter).
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/messenger.c | 101 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 13 |
2 files changed, 42 insertions, 72 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8a4d3758030b..b9fa8b869c08 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -839,90 +839,57 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, | |||
839 | size_t length) | 839 | size_t length) |
840 | { | 840 | { |
841 | struct ceph_msg_data *data = cursor->data; | 841 | struct ceph_msg_data *data = cursor->data; |
842 | struct bio *bio; | 842 | struct ceph_bio_iter *it = &cursor->bio_iter; |
843 | 843 | ||
844 | BUG_ON(data->type != CEPH_MSG_DATA_BIO); | 844 | cursor->resid = min_t(size_t, length, data->bio_length); |
845 | *it = data->bio_pos; | ||
846 | if (cursor->resid < it->iter.bi_size) | ||
847 | it->iter.bi_size = cursor->resid; | ||
845 | 848 | ||
846 | bio = data->bio; | 849 | BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); |
847 | BUG_ON(!bio); | 850 | cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); |
848 | |||
849 | cursor->resid = min(length, data->bio_length); | ||
850 | cursor->bio = bio; | ||
851 | cursor->bvec_iter = bio->bi_iter; | ||
852 | cursor->last_piece = | ||
853 | cursor->resid <= bio_iter_len(bio, cursor->bvec_iter); | ||
854 | } | 851 | } |
855 | 852 | ||
856 | static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, | 853 | static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, |
857 | size_t *page_offset, | 854 | size_t *page_offset, |
858 | size_t *length) | 855 | size_t *length) |
859 | { | 856 | { |
860 | struct ceph_msg_data *data = cursor->data; | 857 | struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio, |
861 | struct bio *bio; | 858 | cursor->bio_iter.iter); |
862 | struct bio_vec bio_vec; | ||
863 | |||
864 | BUG_ON(data->type != CEPH_MSG_DATA_BIO); | ||
865 | |||
866 | bio = cursor->bio; | ||
867 | BUG_ON(!bio); | ||
868 | 859 | ||
869 | bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); | 860 | *page_offset = bv.bv_offset; |
870 | 861 | *length = bv.bv_len; | |
871 | *page_offset = (size_t) bio_vec.bv_offset; | 862 | return bv.bv_page; |
872 | BUG_ON(*page_offset >= PAGE_SIZE); | ||
873 | if (cursor->last_piece) /* pagelist offset is always 0 */ | ||
874 | *length = cursor->resid; | ||
875 | else | ||
876 | *length = (size_t) bio_vec.bv_len; | ||
877 | BUG_ON(*length > cursor->resid); | ||
878 | BUG_ON(*page_offset + *length > PAGE_SIZE); | ||
879 | |||
880 | return bio_vec.bv_page; | ||
881 | } | 863 | } |
882 | 864 | ||
883 | static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, | 865 | static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, |
884 | size_t bytes) | 866 | size_t bytes) |
885 | { | 867 | { |
886 | struct bio *bio; | 868 | struct ceph_bio_iter *it = &cursor->bio_iter; |
887 | struct bio_vec bio_vec; | ||
888 | |||
889 | BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); | ||
890 | |||
891 | bio = cursor->bio; | ||
892 | BUG_ON(!bio); | ||
893 | |||
894 | bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); | ||
895 | 869 | ||
896 | /* Advance the cursor offset */ | 870 | BUG_ON(bytes > cursor->resid); |
897 | 871 | BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); | |
898 | BUG_ON(cursor->resid < bytes); | ||
899 | cursor->resid -= bytes; | 872 | cursor->resid -= bytes; |
873 | bio_advance_iter(it->bio, &it->iter, bytes); | ||
900 | 874 | ||
901 | bio_advance_iter(bio, &cursor->bvec_iter, bytes); | 875 | if (!cursor->resid) { |
876 | BUG_ON(!cursor->last_piece); | ||
877 | return false; /* no more data */ | ||
878 | } | ||
902 | 879 | ||
903 | if (bytes < bio_vec.bv_len) | 880 | if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) |
904 | return false; /* more bytes to process in this segment */ | 881 | return false; /* more bytes to process in this segment */ |
905 | 882 | ||
906 | /* Move on to the next segment, and possibly the next bio */ | 883 | if (!it->iter.bi_size) { |
907 | 884 | it->bio = it->bio->bi_next; | |
908 | if (!cursor->bvec_iter.bi_size) { | 885 | it->iter = it->bio->bi_iter; |
909 | bio = bio->bi_next; | 886 | if (cursor->resid < it->iter.bi_size) |
910 | cursor->bio = bio; | 887 | it->iter.bi_size = cursor->resid; |
911 | if (bio) | ||
912 | cursor->bvec_iter = bio->bi_iter; | ||
913 | else | ||
914 | memset(&cursor->bvec_iter, 0, | ||
915 | sizeof(cursor->bvec_iter)); | ||
916 | } | ||
917 | |||
918 | if (!cursor->last_piece) { | ||
919 | BUG_ON(!cursor->resid); | ||
920 | BUG_ON(!bio); | ||
921 | /* A short read is OK, so use <= rather than == */ | ||
922 | if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter)) | ||
923 | cursor->last_piece = true; | ||
924 | } | 888 | } |
925 | 889 | ||
890 | BUG_ON(cursor->last_piece); | ||
891 | BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); | ||
892 | cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); | ||
926 | return true; | 893 | return true; |
927 | } | 894 | } |
928 | #endif /* CONFIG_BLOCK */ | 895 | #endif /* CONFIG_BLOCK */ |
@@ -1163,9 +1130,11 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, | |||
1163 | page = NULL; | 1130 | page = NULL; |
1164 | break; | 1131 | break; |
1165 | } | 1132 | } |
1133 | |||
1166 | BUG_ON(!page); | 1134 | BUG_ON(!page); |
1167 | BUG_ON(*page_offset + *length > PAGE_SIZE); | 1135 | BUG_ON(*page_offset + *length > PAGE_SIZE); |
1168 | BUG_ON(!*length); | 1136 | BUG_ON(!*length); |
1137 | BUG_ON(*length > cursor->resid); | ||
1169 | if (last_piece) | 1138 | if (last_piece) |
1170 | *last_piece = cursor->last_piece; | 1139 | *last_piece = cursor->last_piece; |
1171 | 1140 | ||
@@ -3262,16 +3231,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, | |||
3262 | EXPORT_SYMBOL(ceph_msg_data_add_pagelist); | 3231 | EXPORT_SYMBOL(ceph_msg_data_add_pagelist); |
3263 | 3232 | ||
3264 | #ifdef CONFIG_BLOCK | 3233 | #ifdef CONFIG_BLOCK |
3265 | void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, | 3234 | void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, |
3266 | size_t length) | 3235 | u32 length) |
3267 | { | 3236 | { |
3268 | struct ceph_msg_data *data; | 3237 | struct ceph_msg_data *data; |
3269 | 3238 | ||
3270 | BUG_ON(!bio); | ||
3271 | |||
3272 | data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); | 3239 | data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); |
3273 | BUG_ON(!data); | 3240 | BUG_ON(!data); |
3274 | data->bio = bio; | 3241 | data->bio_pos = *bio_pos; |
3275 | data->bio_length = length; | 3242 | data->bio_length = length; |
3276 | 3243 | ||
3277 | list_add_tail(&data->links, &msg->data); | 3244 | list_add_tail(&data->links, &msg->data); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4b0485458d26..339d8773ebe8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -146,10 +146,11 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, | |||
146 | 146 | ||
147 | #ifdef CONFIG_BLOCK | 147 | #ifdef CONFIG_BLOCK |
148 | static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, | 148 | static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, |
149 | struct bio *bio, size_t bio_length) | 149 | struct ceph_bio_iter *bio_pos, |
150 | u32 bio_length) | ||
150 | { | 151 | { |
151 | osd_data->type = CEPH_OSD_DATA_TYPE_BIO; | 152 | osd_data->type = CEPH_OSD_DATA_TYPE_BIO; |
152 | osd_data->bio = bio; | 153 | osd_data->bio_pos = *bio_pos; |
153 | osd_data->bio_length = bio_length; | 154 | osd_data->bio_length = bio_length; |
154 | } | 155 | } |
155 | #endif /* CONFIG_BLOCK */ | 156 | #endif /* CONFIG_BLOCK */ |
@@ -216,12 +217,14 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); | |||
216 | 217 | ||
217 | #ifdef CONFIG_BLOCK | 218 | #ifdef CONFIG_BLOCK |
218 | void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, | 219 | void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, |
219 | unsigned int which, struct bio *bio, size_t bio_length) | 220 | unsigned int which, |
221 | struct ceph_bio_iter *bio_pos, | ||
222 | u32 bio_length) | ||
220 | { | 223 | { |
221 | struct ceph_osd_data *osd_data; | 224 | struct ceph_osd_data *osd_data; |
222 | 225 | ||
223 | osd_data = osd_req_op_data(osd_req, which, extent, osd_data); | 226 | osd_data = osd_req_op_data(osd_req, which, extent, osd_data); |
224 | ceph_osd_data_bio_init(osd_data, bio, bio_length); | 227 | ceph_osd_data_bio_init(osd_data, bio_pos, bio_length); |
225 | } | 228 | } |
226 | EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); | 229 | EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); |
227 | #endif /* CONFIG_BLOCK */ | 230 | #endif /* CONFIG_BLOCK */ |
@@ -826,7 +829,7 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, | |||
826 | ceph_msg_data_add_pagelist(msg, osd_data->pagelist); | 829 | ceph_msg_data_add_pagelist(msg, osd_data->pagelist); |
827 | #ifdef CONFIG_BLOCK | 830 | #ifdef CONFIG_BLOCK |
828 | } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { | 831 | } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { |
829 | ceph_msg_data_add_bio(msg, osd_data->bio, length); | 832 | ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length); |
830 | #endif | 833 | #endif |
831 | } else { | 834 | } else { |
832 | BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); | 835 | BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); |