diff options
author | Alex Elder <elder@inktank.com> | 2012-11-13 22:11:15 -0500 |
---|---|---|
committer | Alex Elder <elder@inktank.com> | 2013-01-17 16:52:03 -0500 |
commit | c885837f7d4f8c4f5cb2a744cc6929bc078e9dc0 (patch) | |
tree | 2b4e60e8e7aee98e8f4e8c56119978160ba85cfd | |
parent | 7c3d22cf16f1bbcb37a73e88338c042bb49ff112 (diff) |
libceph: always allow trail in osd request
An osd request structure contains an optional trail portion, which
if present will contain data to be passed in the payload portion of
the message containing the request. The trail field is a
ceph_pagelist pointer, and if null it indicates there is no trail.
A ceph_pagelist structure contains a length field, and it can
legitimately hold value 0. Make use of this to change the
interpretation of the "trail" of an osd request so that every osd
request has trailing data, it just might have length 0.
This means we change the r_trail field in a ceph_osd_request
structure from a pointer to a structure that is always initialized.
Note that in ceph_osdc_start_request(), the trail pointer (or now
address of that structure) is assigned to a ceph message's trail
field. Here's why that's still OK (looking at net/ceph/messenger.c):
- What would have resulted in a null pointer previously will now
refer to a 0-length page list. That message trail pointer
is used in two functions, write_partial_msg_pages() and
out_msg_pos_next().
- In write_partial_msg_pages(), a null page list pointer is
handled the same as a message with 0-length trail, and both
result in a "in_trail" variable set to false. The trail
pointer is only used if in_trail is true.
- The only other place the message trail pointer is used is
out_msg_pos_next(). That function is only called by
write_partial_msg_pages() and only touches the trail pointer
if the in_trail value it is passed is true.
Therefore a null ceph_msg->trail pointer is equivalent to a non-null
pointer referring to a 0-length page list structure.
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r-- | include/linux/ceph/osd_client.h | 4 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 43 |
2 files changed, 14 insertions, 33 deletions
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f2e5d2cdca06..61562c792855 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/ceph/osdmap.h> | 10 | #include <linux/ceph/osdmap.h> |
11 | #include <linux/ceph/messenger.h> | 11 | #include <linux/ceph/messenger.h> |
12 | #include <linux/ceph/auth.h> | 12 | #include <linux/ceph/auth.h> |
13 | #include <linux/ceph/pagelist.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * Maximum object name size | 16 | * Maximum object name size |
@@ -22,7 +23,6 @@ struct ceph_snap_context; | |||
22 | struct ceph_osd_request; | 23 | struct ceph_osd_request; |
23 | struct ceph_osd_client; | 24 | struct ceph_osd_client; |
24 | struct ceph_authorizer; | 25 | struct ceph_authorizer; |
25 | struct ceph_pagelist; | ||
26 | 26 | ||
27 | /* | 27 | /* |
28 | * completion callback for async writepages | 28 | * completion callback for async writepages |
@@ -95,7 +95,7 @@ struct ceph_osd_request { | |||
95 | struct bio *r_bio; /* instead of pages */ | 95 | struct bio *r_bio; /* instead of pages */ |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | struct ceph_pagelist *r_trail; /* trailing part of the data */ | 98 | struct ceph_pagelist r_trail; /* trailing part of the data */ |
99 | }; | 99 | }; |
100 | 100 | ||
101 | struct ceph_osd_event { | 101 | struct ceph_osd_event { |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7d38327a8e89..2be50d82ccbc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -171,10 +171,7 @@ void ceph_osdc_release_request(struct kref *kref) | |||
171 | bio_put(req->r_bio); | 171 | bio_put(req->r_bio); |
172 | #endif | 172 | #endif |
173 | ceph_put_snap_context(req->r_snapc); | 173 | ceph_put_snap_context(req->r_snapc); |
174 | if (req->r_trail) { | 174 | ceph_pagelist_release(&req->r_trail); |
175 | ceph_pagelist_release(req->r_trail); | ||
176 | kfree(req->r_trail); | ||
177 | } | ||
178 | if (req->r_mempool) | 175 | if (req->r_mempool) |
179 | mempool_free(req, req->r_osdc->req_mempool); | 176 | mempool_free(req, req->r_osdc->req_mempool); |
180 | else | 177 | else |
@@ -208,8 +205,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
208 | { | 205 | { |
209 | struct ceph_osd_request *req; | 206 | struct ceph_osd_request *req; |
210 | struct ceph_msg *msg; | 207 | struct ceph_msg *msg; |
211 | int needs_trail; | 208 | int num_op = get_num_ops(ops, NULL); |
212 | int num_op = get_num_ops(ops, &needs_trail); | ||
213 | size_t msg_size = sizeof(struct ceph_osd_request_head); | 209 | size_t msg_size = sizeof(struct ceph_osd_request_head); |
214 | 210 | ||
215 | msg_size += num_op*sizeof(struct ceph_osd_op); | 211 | msg_size += num_op*sizeof(struct ceph_osd_op); |
@@ -252,15 +248,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
252 | } | 248 | } |
253 | req->r_reply = msg; | 249 | req->r_reply = msg; |
254 | 250 | ||
255 | /* allocate space for the trailing data */ | 251 | ceph_pagelist_init(&req->r_trail); |
256 | if (needs_trail) { | ||
257 | req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||
258 | if (!req->r_trail) { | ||
259 | ceph_osdc_put_request(req); | ||
260 | return NULL; | ||
261 | } | ||
262 | ceph_pagelist_init(req->r_trail); | ||
263 | } | ||
264 | 252 | ||
265 | /* create request message; allow space for oid */ | 253 | /* create request message; allow space for oid */ |
266 | msg_size += MAX_OBJ_NAME_SIZE; | 254 | msg_size += MAX_OBJ_NAME_SIZE; |
@@ -312,29 +300,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
312 | case CEPH_OSD_OP_GETXATTR: | 300 | case CEPH_OSD_OP_GETXATTR: |
313 | case CEPH_OSD_OP_SETXATTR: | 301 | case CEPH_OSD_OP_SETXATTR: |
314 | case CEPH_OSD_OP_CMPXATTR: | 302 | case CEPH_OSD_OP_CMPXATTR: |
315 | BUG_ON(!req->r_trail); | ||
316 | |||
317 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | 303 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); |
318 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | 304 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); |
319 | dst->xattr.cmp_op = src->xattr.cmp_op; | 305 | dst->xattr.cmp_op = src->xattr.cmp_op; |
320 | dst->xattr.cmp_mode = src->xattr.cmp_mode; | 306 | dst->xattr.cmp_mode = src->xattr.cmp_mode; |
321 | ceph_pagelist_append(req->r_trail, src->xattr.name, | 307 | ceph_pagelist_append(&req->r_trail, src->xattr.name, |
322 | src->xattr.name_len); | 308 | src->xattr.name_len); |
323 | ceph_pagelist_append(req->r_trail, src->xattr.val, | 309 | ceph_pagelist_append(&req->r_trail, src->xattr.val, |
324 | src->xattr.value_len); | 310 | src->xattr.value_len); |
325 | break; | 311 | break; |
326 | case CEPH_OSD_OP_CALL: | 312 | case CEPH_OSD_OP_CALL: |
327 | BUG_ON(!req->r_trail); | ||
328 | |||
329 | dst->cls.class_len = src->cls.class_len; | 313 | dst->cls.class_len = src->cls.class_len; |
330 | dst->cls.method_len = src->cls.method_len; | 314 | dst->cls.method_len = src->cls.method_len; |
331 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | 315 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); |
332 | 316 | ||
333 | ceph_pagelist_append(req->r_trail, src->cls.class_name, | 317 | ceph_pagelist_append(&req->r_trail, src->cls.class_name, |
334 | src->cls.class_len); | 318 | src->cls.class_len); |
335 | ceph_pagelist_append(req->r_trail, src->cls.method_name, | 319 | ceph_pagelist_append(&req->r_trail, src->cls.method_name, |
336 | src->cls.method_len); | 320 | src->cls.method_len); |
337 | ceph_pagelist_append(req->r_trail, src->cls.indata, | 321 | ceph_pagelist_append(&req->r_trail, src->cls.indata, |
338 | src->cls.indata_len); | 322 | src->cls.indata_len); |
339 | break; | 323 | break; |
340 | case CEPH_OSD_OP_ROLLBACK: | 324 | case CEPH_OSD_OP_ROLLBACK: |
@@ -347,11 +331,9 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
347 | __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); | 331 | __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); |
348 | __le32 timeout = cpu_to_le32(src->watch.timeout); | 332 | __le32 timeout = cpu_to_le32(src->watch.timeout); |
349 | 333 | ||
350 | BUG_ON(!req->r_trail); | 334 | ceph_pagelist_append(&req->r_trail, |
351 | |||
352 | ceph_pagelist_append(req->r_trail, | ||
353 | &prot_ver, sizeof(prot_ver)); | 335 | &prot_ver, sizeof(prot_ver)); |
354 | ceph_pagelist_append(req->r_trail, | 336 | ceph_pagelist_append(&req->r_trail, |
355 | &timeout, sizeof(timeout)); | 337 | &timeout, sizeof(timeout)); |
356 | } | 338 | } |
357 | case CEPH_OSD_OP_NOTIFY_ACK: | 339 | case CEPH_OSD_OP_NOTIFY_ACK: |
@@ -414,8 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
414 | op++; | 396 | op++; |
415 | } | 397 | } |
416 | 398 | ||
417 | if (req->r_trail) | 399 | data_len += req->r_trail.length; |
418 | data_len += req->r_trail->length; | ||
419 | 400 | ||
420 | if (snapc) { | 401 | if (snapc) { |
421 | head->snap_seq = cpu_to_le64(snapc->seq); | 402 | head->snap_seq = cpu_to_le64(snapc->seq); |
@@ -1715,7 +1696,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
1715 | #ifdef CONFIG_BLOCK | 1696 | #ifdef CONFIG_BLOCK |
1716 | req->r_request->bio = req->r_bio; | 1697 | req->r_request->bio = req->r_bio; |
1717 | #endif | 1698 | #endif |
1718 | req->r_request->trail = req->r_trail; | 1699 | req->r_request->trail = &req->r_trail; |
1719 | 1700 | ||
1720 | register_request(osdc, req); | 1701 | register_request(osdc, req); |
1721 | 1702 | ||