diff options
author | Alex Elder <elder@inktank.com> | 2013-04-03 22:32:51 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:18:12 -0400 |
commit | 79528734f3ae4699a2886f62f55e18fb34fb3651 (patch) | |
tree | 51905378486b592fc2d4037d67ef3b577fe4eaa7 /net/ceph | |
parent | 430c28c3cb7f3dbd87de266ed52d65928957ff78 (diff) |
libceph: keep source rather than message osd op array
An osd request keeps a pointer to the osd operations (ops) array
that it builds in its request message.
In order to allow each op in the array to have its own distinct
data, we will need to keep track of each op's data, and that
information does not go over the wire.
As long as we're tracking the data we might as well just track the
entire (source) op definition for each of the ops. And if we're
doing that, we'll have no more need to keep a pointer to the
wire-encoded version.
This patch makes the array of source ops be kept with the osd
request structure, and uses that instead of the version encoded in
the message in places where that was previously used. The array
will be embedded in the request structure, and the maximum number of
ops we ever actually use is currently 2. So reduce CEPH_OSD_MAX_OP
to 2 to reduce the size of the structure.
The result of doing this sort of ripples back up, and as a result
various function parameters and local variables become unnecessary.
Make r_num_ops be unsigned, and move the definition of struct
ceph_osd_req_op earlier to ensure it's defined where needed.
It does not yet add per-op data, that's coming soon.
This resolves:
http://tracker.ceph.com/issues/4656
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r-- | net/ceph/debugfs.c | 4 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 53 |
2 files changed, 29 insertions, 28 deletions
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 00d051f4894e..83661cdc0766 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp) | |||
123 | mutex_lock(&osdc->request_mutex); | 123 | mutex_lock(&osdc->request_mutex); |
124 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | 124 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { |
125 | struct ceph_osd_request *req; | 125 | struct ceph_osd_request *req; |
126 | unsigned int i; | ||
126 | int opcode; | 127 | int opcode; |
127 | int i; | ||
128 | 128 | ||
129 | req = rb_entry(p, struct ceph_osd_request, r_node); | 129 | req = rb_entry(p, struct ceph_osd_request, r_node); |
130 | 130 | ||
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp) | |||
142 | seq_printf(s, "\t"); | 142 | seq_printf(s, "\t"); |
143 | 143 | ||
144 | for (i = 0; i < req->r_num_ops; i++) { | 144 | for (i = 0; i < req->r_num_ops; i++) { |
145 | opcode = le16_to_cpu(req->r_request_ops[i].op); | 145 | opcode = req->r_ops[i].op; |
146 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | 146 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); |
147 | } | 147 | } |
148 | 148 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e197c5c0b3a2..a498d2de17a4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -186,6 +186,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
186 | struct ceph_msg *msg; | 186 | struct ceph_msg *msg; |
187 | size_t msg_size; | 187 | size_t msg_size; |
188 | 188 | ||
189 | BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX); | ||
190 | BUG_ON(num_ops > CEPH_OSD_MAX_OP); | ||
191 | |||
189 | msg_size = 4 + 4 + 8 + 8 + 4+8; | 192 | msg_size = 4 + 4 + 8 + 8 + 4+8; |
190 | msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ | 193 | msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ |
191 | msg_size += 1 + 8 + 4 + 4; /* pg_t */ | 194 | msg_size += 1 + 8 + 4 + 4; /* pg_t */ |
@@ -207,6 +210,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
207 | 210 | ||
208 | req->r_osdc = osdc; | 211 | req->r_osdc = osdc; |
209 | req->r_mempool = use_mempool; | 212 | req->r_mempool = use_mempool; |
213 | req->r_num_ops = num_ops; | ||
210 | 214 | ||
211 | kref_init(&req->r_kref); | 215 | kref_init(&req->r_kref); |
212 | init_completion(&req->r_completion); | 216 | init_completion(&req->r_completion); |
@@ -418,12 +422,14 @@ void osd_req_op_watch_init(struct ceph_osd_req_op *op, u16 opcode, | |||
418 | EXPORT_SYMBOL(osd_req_op_watch_init); | 422 | EXPORT_SYMBOL(osd_req_op_watch_init); |
419 | 423 | ||
420 | static u64 osd_req_encode_op(struct ceph_osd_request *req, | 424 | static u64 osd_req_encode_op(struct ceph_osd_request *req, |
421 | struct ceph_osd_op *dst, | 425 | struct ceph_osd_op *dst, unsigned int which) |
422 | struct ceph_osd_req_op *src) | ||
423 | { | 426 | { |
427 | struct ceph_osd_req_op *src; | ||
424 | u64 out_data_len = 0; | 428 | u64 out_data_len = 0; |
425 | struct ceph_pagelist *pagelist; | 429 | struct ceph_pagelist *pagelist; |
426 | 430 | ||
431 | BUG_ON(which >= req->r_num_ops); | ||
432 | src = &req->r_ops[which]; | ||
427 | if (WARN_ON(!osd_req_opcode_valid(src->op))) { | 433 | if (WARN_ON(!osd_req_opcode_valid(src->op))) { |
428 | pr_err("unrecognized osd opcode %d\n", src->op); | 434 | pr_err("unrecognized osd opcode %d\n", src->op); |
429 | 435 | ||
@@ -487,21 +493,17 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, | |||
487 | * build new request AND message | 493 | * build new request AND message |
488 | * | 494 | * |
489 | */ | 495 | */ |
490 | void ceph_osdc_build_request(struct ceph_osd_request *req, | 496 | void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, |
491 | u64 off, unsigned int num_ops, | 497 | struct ceph_snap_context *snapc, u64 snap_id, |
492 | struct ceph_osd_req_op *src_ops, | 498 | struct timespec *mtime) |
493 | struct ceph_snap_context *snapc, u64 snap_id, | ||
494 | struct timespec *mtime) | ||
495 | { | 499 | { |
496 | struct ceph_msg *msg = req->r_request; | 500 | struct ceph_msg *msg = req->r_request; |
497 | struct ceph_osd_req_op *src_op; | ||
498 | void *p; | 501 | void *p; |
499 | size_t msg_size; | 502 | size_t msg_size; |
500 | int flags = req->r_flags; | 503 | int flags = req->r_flags; |
501 | u64 data_len; | 504 | u64 data_len; |
502 | int i; | 505 | unsigned int i; |
503 | 506 | ||
504 | req->r_num_ops = num_ops; | ||
505 | req->r_snapid = snap_id; | 507 | req->r_snapid = snap_id; |
506 | req->r_snapc = ceph_get_snap_context(snapc); | 508 | req->r_snapc = ceph_get_snap_context(snapc); |
507 | 509 | ||
@@ -541,12 +543,10 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
541 | p += req->r_oid_len; | 543 | p += req->r_oid_len; |
542 | 544 | ||
543 | /* ops--can imply data */ | 545 | /* ops--can imply data */ |
544 | ceph_encode_16(&p, num_ops); | 546 | ceph_encode_16(&p, (u16)req->r_num_ops); |
545 | src_op = src_ops; | ||
546 | req->r_request_ops = p; | ||
547 | data_len = 0; | 547 | data_len = 0; |
548 | for (i = 0; i < num_ops; i++, src_op++) { | 548 | for (i = 0; i < req->r_num_ops; i++) { |
549 | data_len += osd_req_encode_op(req, p, src_op); | 549 | data_len += osd_req_encode_op(req, p, i); |
550 | p += sizeof(struct ceph_osd_op); | 550 | p += sizeof(struct ceph_osd_op); |
551 | } | 551 | } |
552 | 552 | ||
@@ -602,7 +602,6 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
602 | struct ceph_file_layout *layout, | 602 | struct ceph_file_layout *layout, |
603 | struct ceph_vino vino, | 603 | struct ceph_vino vino, |
604 | u64 off, u64 *plen, int num_ops, | 604 | u64 off, u64 *plen, int num_ops, |
605 | struct ceph_osd_req_op *ops, | ||
606 | int opcode, int flags, | 605 | int opcode, int flags, |
607 | struct ceph_snap_context *snapc, | 606 | struct ceph_snap_context *snapc, |
608 | u32 truncate_seq, | 607 | u32 truncate_seq, |
@@ -610,6 +609,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
610 | bool use_mempool) | 609 | bool use_mempool) |
611 | { | 610 | { |
612 | struct ceph_osd_request *req; | 611 | struct ceph_osd_request *req; |
612 | struct ceph_osd_req_op *op; | ||
613 | u64 objnum = 0; | 613 | u64 objnum = 0; |
614 | u64 objoff = 0; | 614 | u64 objoff = 0; |
615 | u64 objlen = 0; | 615 | u64 objlen = 0; |
@@ -623,6 +623,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
623 | GFP_NOFS); | 623 | GFP_NOFS); |
624 | if (!req) | 624 | if (!req) |
625 | return ERR_PTR(-ENOMEM); | 625 | return ERR_PTR(-ENOMEM); |
626 | |||
626 | req->r_flags = flags; | 627 | req->r_flags = flags; |
627 | 628 | ||
628 | /* calculate max write size */ | 629 | /* calculate max write size */ |
@@ -642,7 +643,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
642 | truncate_size = object_size; | 643 | truncate_size = object_size; |
643 | } | 644 | } |
644 | 645 | ||
645 | osd_req_op_extent_init(&ops[0], opcode, objoff, objlen, | 646 | op = &req->r_ops[0]; |
647 | osd_req_op_extent_init(op, opcode, objoff, objlen, | ||
646 | truncate_size, truncate_seq); | 648 | truncate_size, truncate_seq); |
647 | /* | 649 | /* |
648 | * A second op in the ops array means the caller wants to | 650 | * A second op in the ops array means the caller wants to |
@@ -650,7 +652,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
650 | * osd will flush data quickly. | 652 | * osd will flush data quickly. |
651 | */ | 653 | */ |
652 | if (num_ops > 1) | 654 | if (num_ops > 1) |
653 | osd_req_op_init(&ops[1], CEPH_OSD_OP_STARTSYNC); | 655 | osd_req_op_init(++op, CEPH_OSD_OP_STARTSYNC); |
654 | 656 | ||
655 | req->r_file_layout = *layout; /* keep a copy */ | 657 | req->r_file_layout = *layout; /* keep a copy */ |
656 | 658 | ||
@@ -1342,7 +1344,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
1342 | struct ceph_osd_request *req; | 1344 | struct ceph_osd_request *req; |
1343 | u64 tid; | 1345 | u64 tid; |
1344 | int object_len; | 1346 | int object_len; |
1345 | int numops, payload_len, flags; | 1347 | unsigned int numops; |
1348 | int payload_len, flags; | ||
1346 | s32 result; | 1349 | s32 result; |
1347 | s32 retry_attempt; | 1350 | s32 retry_attempt; |
1348 | struct ceph_pg pg; | 1351 | struct ceph_pg pg; |
@@ -1352,7 +1355,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
1352 | u32 osdmap_epoch; | 1355 | u32 osdmap_epoch; |
1353 | int already_completed; | 1356 | int already_completed; |
1354 | u32 bytes; | 1357 | u32 bytes; |
1355 | int i; | 1358 | unsigned int i; |
1356 | 1359 | ||
1357 | tid = le64_to_cpu(msg->hdr.tid); | 1360 | tid = le64_to_cpu(msg->hdr.tid); |
1358 | dout("handle_reply %p tid %llu\n", msg, tid); | 1361 | dout("handle_reply %p tid %llu\n", msg, tid); |
@@ -2116,12 +2119,11 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
2116 | struct page **pages, int num_pages, int page_align) | 2119 | struct page **pages, int num_pages, int page_align) |
2117 | { | 2120 | { |
2118 | struct ceph_osd_request *req; | 2121 | struct ceph_osd_request *req; |
2119 | struct ceph_osd_req_op op; | ||
2120 | int rc = 0; | 2122 | int rc = 0; |
2121 | 2123 | ||
2122 | dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, | 2124 | dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, |
2123 | vino.snap, off, *plen); | 2125 | vino.snap, off, *plen); |
2124 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, &op, | 2126 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, |
2125 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 2127 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
2126 | NULL, truncate_seq, truncate_size, | 2128 | NULL, truncate_seq, truncate_size, |
2127 | false); | 2129 | false); |
@@ -2136,7 +2138,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
2136 | dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", | 2138 | dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", |
2137 | off, *plen, *plen, page_align); | 2139 | off, *plen, *plen, page_align); |
2138 | 2140 | ||
2139 | ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL); | 2141 | ceph_osdc_build_request(req, off, NULL, vino.snap, NULL); |
2140 | 2142 | ||
2141 | rc = ceph_osdc_start_request(osdc, req, false); | 2143 | rc = ceph_osdc_start_request(osdc, req, false); |
2142 | if (!rc) | 2144 | if (!rc) |
@@ -2160,12 +2162,11 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
2160 | struct page **pages, int num_pages) | 2162 | struct page **pages, int num_pages) |
2161 | { | 2163 | { |
2162 | struct ceph_osd_request *req; | 2164 | struct ceph_osd_request *req; |
2163 | struct ceph_osd_req_op op; | ||
2164 | int rc = 0; | 2165 | int rc = 0; |
2165 | int page_align = off & ~PAGE_MASK; | 2166 | int page_align = off & ~PAGE_MASK; |
2166 | 2167 | ||
2167 | BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ | 2168 | BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ |
2168 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, &op, | 2169 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, |
2169 | CEPH_OSD_OP_WRITE, | 2170 | CEPH_OSD_OP_WRITE, |
2170 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, | 2171 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, |
2171 | snapc, truncate_seq, truncate_size, | 2172 | snapc, truncate_seq, truncate_size, |
@@ -2178,7 +2179,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
2178 | false, false); | 2179 | false, false); |
2179 | dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); | 2180 | dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); |
2180 | 2181 | ||
2181 | ceph_osdc_build_request(req, off, 1, &op, snapc, CEPH_NOSNAP, mtime); | 2182 | ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime); |
2182 | 2183 | ||
2183 | rc = ceph_osdc_start_request(osdc, req, true); | 2184 | rc = ceph_osdc_start_request(osdc, req, true); |
2184 | if (!rc) | 2185 | if (!rc) |