summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2018-01-20 04:30:10 -0500
committerIlya Dryomov <idryomov@gmail.com>2018-04-02 04:12:38 -0400
commit5359a17d2706b86da2af83027343d5eb256f7670 (patch)
tree31053fe22a0d91b40911882b2aaa961e77504d4b /net
parenta1fbb5e7bbb56fccdf54bf4ab5086c6080ee5bfa (diff)
libceph, rbd: new bio handling code (aka don't clone bios)
The reason we clone bios is to be able to give each object request (and consequently each ceph_osd_data/ceph_msg_data item) its own pointer to a (list of) bio(s). The messenger then initializes its cursor with cloned bio's ->bi_iter, so it knows where to start reading from/writing to. That's all the cloned bios are used for: to determine each object request's starting position in the provided data buffer. Introduce ceph_bio_iter to do exactly that -- store position within bio list (i.e. pointer to bio) + position within that bio (i.e. bvec_iter). Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c101
-rw-r--r--net/ceph/osd_client.c13
2 files changed, 42 insertions, 72 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8a4d3758030b..b9fa8b869c08 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -839,90 +839,57 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
839 size_t length) 839 size_t length)
840{ 840{
841 struct ceph_msg_data *data = cursor->data; 841 struct ceph_msg_data *data = cursor->data;
842 struct bio *bio; 842 struct ceph_bio_iter *it = &cursor->bio_iter;
843 843
844 BUG_ON(data->type != CEPH_MSG_DATA_BIO); 844 cursor->resid = min_t(size_t, length, data->bio_length);
845 *it = data->bio_pos;
846 if (cursor->resid < it->iter.bi_size)
847 it->iter.bi_size = cursor->resid;
845 848
846 bio = data->bio; 849 BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
847 BUG_ON(!bio); 850 cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
848
849 cursor->resid = min(length, data->bio_length);
850 cursor->bio = bio;
851 cursor->bvec_iter = bio->bi_iter;
852 cursor->last_piece =
853 cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
854} 851}
855 852
856static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, 853static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
857 size_t *page_offset, 854 size_t *page_offset,
858 size_t *length) 855 size_t *length)
859{ 856{
860 struct ceph_msg_data *data = cursor->data; 857 struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
861 struct bio *bio; 858 cursor->bio_iter.iter);
862 struct bio_vec bio_vec;
863
864 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
865
866 bio = cursor->bio;
867 BUG_ON(!bio);
868 859
869 bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); 860 *page_offset = bv.bv_offset;
870 861 *length = bv.bv_len;
871 *page_offset = (size_t) bio_vec.bv_offset; 862 return bv.bv_page;
872 BUG_ON(*page_offset >= PAGE_SIZE);
873 if (cursor->last_piece) /* pagelist offset is always 0 */
874 *length = cursor->resid;
875 else
876 *length = (size_t) bio_vec.bv_len;
877 BUG_ON(*length > cursor->resid);
878 BUG_ON(*page_offset + *length > PAGE_SIZE);
879
880 return bio_vec.bv_page;
881} 863}
882 864
883static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, 865static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
884 size_t bytes) 866 size_t bytes)
885{ 867{
886 struct bio *bio; 868 struct ceph_bio_iter *it = &cursor->bio_iter;
887 struct bio_vec bio_vec;
888
889 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
890
891 bio = cursor->bio;
892 BUG_ON(!bio);
893
894 bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
895 869
896 /* Advance the cursor offset */ 870 BUG_ON(bytes > cursor->resid);
897 871 BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
898 BUG_ON(cursor->resid < bytes);
899 cursor->resid -= bytes; 872 cursor->resid -= bytes;
873 bio_advance_iter(it->bio, &it->iter, bytes);
900 874
901 bio_advance_iter(bio, &cursor->bvec_iter, bytes); 875 if (!cursor->resid) {
876 BUG_ON(!cursor->last_piece);
877 return false; /* no more data */
878 }
902 879
903 if (bytes < bio_vec.bv_len) 880 if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
904 return false; /* more bytes to process in this segment */ 881 return false; /* more bytes to process in this segment */
905 882
906 /* Move on to the next segment, and possibly the next bio */ 883 if (!it->iter.bi_size) {
907 884 it->bio = it->bio->bi_next;
908 if (!cursor->bvec_iter.bi_size) { 885 it->iter = it->bio->bi_iter;
909 bio = bio->bi_next; 886 if (cursor->resid < it->iter.bi_size)
910 cursor->bio = bio; 887 it->iter.bi_size = cursor->resid;
911 if (bio)
912 cursor->bvec_iter = bio->bi_iter;
913 else
914 memset(&cursor->bvec_iter, 0,
915 sizeof(cursor->bvec_iter));
916 }
917
918 if (!cursor->last_piece) {
919 BUG_ON(!cursor->resid);
920 BUG_ON(!bio);
921 /* A short read is OK, so use <= rather than == */
922 if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
923 cursor->last_piece = true;
924 } 888 }
925 889
890 BUG_ON(cursor->last_piece);
891 BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
892 cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
926 return true; 893 return true;
927} 894}
928#endif /* CONFIG_BLOCK */ 895#endif /* CONFIG_BLOCK */
@@ -1163,9 +1130,11 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
1163 page = NULL; 1130 page = NULL;
1164 break; 1131 break;
1165 } 1132 }
1133
1166 BUG_ON(!page); 1134 BUG_ON(!page);
1167 BUG_ON(*page_offset + *length > PAGE_SIZE); 1135 BUG_ON(*page_offset + *length > PAGE_SIZE);
1168 BUG_ON(!*length); 1136 BUG_ON(!*length);
1137 BUG_ON(*length > cursor->resid);
1169 if (last_piece) 1138 if (last_piece)
1170 *last_piece = cursor->last_piece; 1139 *last_piece = cursor->last_piece;
1171 1140
@@ -3262,16 +3231,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
3262EXPORT_SYMBOL(ceph_msg_data_add_pagelist); 3231EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
3263 3232
3264#ifdef CONFIG_BLOCK 3233#ifdef CONFIG_BLOCK
3265void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, 3234void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
3266 size_t length) 3235 u32 length)
3267{ 3236{
3268 struct ceph_msg_data *data; 3237 struct ceph_msg_data *data;
3269 3238
3270 BUG_ON(!bio);
3271
3272 data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); 3239 data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
3273 BUG_ON(!data); 3240 BUG_ON(!data);
3274 data->bio = bio; 3241 data->bio_pos = *bio_pos;
3275 data->bio_length = length; 3242 data->bio_length = length;
3276 3243
3277 list_add_tail(&data->links, &msg->data); 3244 list_add_tail(&data->links, &msg->data);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4b0485458d26..339d8773ebe8 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -146,10 +146,11 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
146 146
147#ifdef CONFIG_BLOCK 147#ifdef CONFIG_BLOCK
148static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, 148static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
149 struct bio *bio, size_t bio_length) 149 struct ceph_bio_iter *bio_pos,
150 u32 bio_length)
150{ 151{
151 osd_data->type = CEPH_OSD_DATA_TYPE_BIO; 152 osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
152 osd_data->bio = bio; 153 osd_data->bio_pos = *bio_pos;
153 osd_data->bio_length = bio_length; 154 osd_data->bio_length = bio_length;
154} 155}
155#endif /* CONFIG_BLOCK */ 156#endif /* CONFIG_BLOCK */
@@ -216,12 +217,14 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
216 217
217#ifdef CONFIG_BLOCK 218#ifdef CONFIG_BLOCK
218void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 219void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
219 unsigned int which, struct bio *bio, size_t bio_length) 220 unsigned int which,
221 struct ceph_bio_iter *bio_pos,
222 u32 bio_length)
220{ 223{
221 struct ceph_osd_data *osd_data; 224 struct ceph_osd_data *osd_data;
222 225
223 osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 226 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
224 ceph_osd_data_bio_init(osd_data, bio, bio_length); 227 ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
225} 228}
226EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); 229EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
227#endif /* CONFIG_BLOCK */ 230#endif /* CONFIG_BLOCK */
@@ -826,7 +829,7 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
826 ceph_msg_data_add_pagelist(msg, osd_data->pagelist); 829 ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
827#ifdef CONFIG_BLOCK 830#ifdef CONFIG_BLOCK
828 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { 831 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
829 ceph_msg_data_add_bio(msg, osd_data->bio, length); 832 ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
830#endif 833#endif
831 } else { 834 } else {
832 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); 835 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);