aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-04-03 22:32:51 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:18:12 -0400
commit79528734f3ae4699a2886f62f55e18fb34fb3651 (patch)
tree51905378486b592fc2d4037d67ef3b577fe4eaa7 /net/ceph
parent430c28c3cb7f3dbd87de266ed52d65928957ff78 (diff)
libceph: keep source rather than message osd op array
An osd request keeps a pointer to the osd operations (ops) array that it builds in its request message. In order to allow each op in the array to have its own distinct data, we will need to keep track of each op's data, and that information does not go over the wire. As long as we're tracking the data we might as well just track the entire (source) op definition for each of the ops. And if we're doing that, we'll have no more need to keep a pointer to the wire-encoded version. This patch makes the array of source ops be kept with the osd request structure, and uses that instead of the version encoded in the message in places where that was previously used. The array will be embedded in the request structure, and the maximum number of ops we ever actually use is currently 2. So reduce CEPH_OSD_MAX_OP to 2 to reduce the size of the structure. The result of doing this sort of ripples back up, and as a result various function parameters and local variables become unnecessary. Make r_num_ops be unsigned, and move the definition of struct ceph_osd_req_op earlier to ensure it's defined where needed. It does not yet add per-op data, that's coming soon. This resolves: http://tracker.ceph.com/issues/4656 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/osd_client.c53
2 files changed, 29 insertions, 28 deletions
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f4894e..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
123 mutex_lock(&osdc->request_mutex); 123 mutex_lock(&osdc->request_mutex);
124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
125 struct ceph_osd_request *req; 125 struct ceph_osd_request *req;
126 unsigned int i;
126 int opcode; 127 int opcode;
127 int i;
128 128
129 req = rb_entry(p, struct ceph_osd_request, r_node); 129 req = rb_entry(p, struct ceph_osd_request, r_node);
130 130
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
142 seq_printf(s, "\t"); 142 seq_printf(s, "\t");
143 143
144 for (i = 0; i < req->r_num_ops; i++) { 144 for (i = 0; i < req->r_num_ops; i++) {
145 opcode = le16_to_cpu(req->r_request_ops[i].op); 145 opcode = req->r_ops[i].op;
146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); 146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
147 } 147 }
148 148
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e197c5c0b3a2..a498d2de17a4 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -186,6 +186,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
186 struct ceph_msg *msg; 186 struct ceph_msg *msg;
187 size_t msg_size; 187 size_t msg_size;
188 188
189 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
190 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
191
189 msg_size = 4 + 4 + 8 + 8 + 4+8; 192 msg_size = 4 + 4 + 8 + 8 + 4+8;
190 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ 193 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
191 msg_size += 1 + 8 + 4 + 4; /* pg_t */ 194 msg_size += 1 + 8 + 4 + 4; /* pg_t */
@@ -207,6 +210,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
207 210
208 req->r_osdc = osdc; 211 req->r_osdc = osdc;
209 req->r_mempool = use_mempool; 212 req->r_mempool = use_mempool;
213 req->r_num_ops = num_ops;
210 214
211 kref_init(&req->r_kref); 215 kref_init(&req->r_kref);
212 init_completion(&req->r_completion); 216 init_completion(&req->r_completion);
@@ -418,12 +422,14 @@ void osd_req_op_watch_init(struct ceph_osd_req_op *op, u16 opcode,
418EXPORT_SYMBOL(osd_req_op_watch_init); 422EXPORT_SYMBOL(osd_req_op_watch_init);
419 423
420static u64 osd_req_encode_op(struct ceph_osd_request *req, 424static u64 osd_req_encode_op(struct ceph_osd_request *req,
421 struct ceph_osd_op *dst, 425 struct ceph_osd_op *dst, unsigned int which)
422 struct ceph_osd_req_op *src)
423{ 426{
427 struct ceph_osd_req_op *src;
424 u64 out_data_len = 0; 428 u64 out_data_len = 0;
425 struct ceph_pagelist *pagelist; 429 struct ceph_pagelist *pagelist;
426 430
431 BUG_ON(which >= req->r_num_ops);
432 src = &req->r_ops[which];
427 if (WARN_ON(!osd_req_opcode_valid(src->op))) { 433 if (WARN_ON(!osd_req_opcode_valid(src->op))) {
428 pr_err("unrecognized osd opcode %d\n", src->op); 434 pr_err("unrecognized osd opcode %d\n", src->op);
429 435
@@ -487,21 +493,17 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
487 * build new request AND message 493 * build new request AND message
488 * 494 *
489 */ 495 */
490void ceph_osdc_build_request(struct ceph_osd_request *req, 496void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
491 u64 off, unsigned int num_ops, 497 struct ceph_snap_context *snapc, u64 snap_id,
492 struct ceph_osd_req_op *src_ops, 498 struct timespec *mtime)
493 struct ceph_snap_context *snapc, u64 snap_id,
494 struct timespec *mtime)
495{ 499{
496 struct ceph_msg *msg = req->r_request; 500 struct ceph_msg *msg = req->r_request;
497 struct ceph_osd_req_op *src_op;
498 void *p; 501 void *p;
499 size_t msg_size; 502 size_t msg_size;
500 int flags = req->r_flags; 503 int flags = req->r_flags;
501 u64 data_len; 504 u64 data_len;
502 int i; 505 unsigned int i;
503 506
504 req->r_num_ops = num_ops;
505 req->r_snapid = snap_id; 507 req->r_snapid = snap_id;
506 req->r_snapc = ceph_get_snap_context(snapc); 508 req->r_snapc = ceph_get_snap_context(snapc);
507 509
@@ -541,12 +543,10 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
541 p += req->r_oid_len; 543 p += req->r_oid_len;
542 544
543 /* ops--can imply data */ 545 /* ops--can imply data */
544 ceph_encode_16(&p, num_ops); 546 ceph_encode_16(&p, (u16)req->r_num_ops);
545 src_op = src_ops;
546 req->r_request_ops = p;
547 data_len = 0; 547 data_len = 0;
548 for (i = 0; i < num_ops; i++, src_op++) { 548 for (i = 0; i < req->r_num_ops; i++) {
549 data_len += osd_req_encode_op(req, p, src_op); 549 data_len += osd_req_encode_op(req, p, i);
550 p += sizeof(struct ceph_osd_op); 550 p += sizeof(struct ceph_osd_op);
551 } 551 }
552 552
@@ -602,7 +602,6 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
602 struct ceph_file_layout *layout, 602 struct ceph_file_layout *layout,
603 struct ceph_vino vino, 603 struct ceph_vino vino,
604 u64 off, u64 *plen, int num_ops, 604 u64 off, u64 *plen, int num_ops,
605 struct ceph_osd_req_op *ops,
606 int opcode, int flags, 605 int opcode, int flags,
607 struct ceph_snap_context *snapc, 606 struct ceph_snap_context *snapc,
608 u32 truncate_seq, 607 u32 truncate_seq,
@@ -610,6 +609,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
610 bool use_mempool) 609 bool use_mempool)
611{ 610{
612 struct ceph_osd_request *req; 611 struct ceph_osd_request *req;
612 struct ceph_osd_req_op *op;
613 u64 objnum = 0; 613 u64 objnum = 0;
614 u64 objoff = 0; 614 u64 objoff = 0;
615 u64 objlen = 0; 615 u64 objlen = 0;
@@ -623,6 +623,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
623 GFP_NOFS); 623 GFP_NOFS);
624 if (!req) 624 if (!req)
625 return ERR_PTR(-ENOMEM); 625 return ERR_PTR(-ENOMEM);
626
626 req->r_flags = flags; 627 req->r_flags = flags;
627 628
628 /* calculate max write size */ 629 /* calculate max write size */
@@ -642,7 +643,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
642 truncate_size = object_size; 643 truncate_size = object_size;
643 } 644 }
644 645
645 osd_req_op_extent_init(&ops[0], opcode, objoff, objlen, 646 op = &req->r_ops[0];
647 osd_req_op_extent_init(op, opcode, objoff, objlen,
646 truncate_size, truncate_seq); 648 truncate_size, truncate_seq);
647 /* 649 /*
648 * A second op in the ops array means the caller wants to 650 * A second op in the ops array means the caller wants to
@@ -650,7 +652,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
650 * osd will flush data quickly. 652 * osd will flush data quickly.
651 */ 653 */
652 if (num_ops > 1) 654 if (num_ops > 1)
653 osd_req_op_init(&ops[1], CEPH_OSD_OP_STARTSYNC); 655 osd_req_op_init(++op, CEPH_OSD_OP_STARTSYNC);
654 656
655 req->r_file_layout = *layout; /* keep a copy */ 657 req->r_file_layout = *layout; /* keep a copy */
656 658
@@ -1342,7 +1344,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1342 struct ceph_osd_request *req; 1344 struct ceph_osd_request *req;
1343 u64 tid; 1345 u64 tid;
1344 int object_len; 1346 int object_len;
1345 int numops, payload_len, flags; 1347 unsigned int numops;
1348 int payload_len, flags;
1346 s32 result; 1349 s32 result;
1347 s32 retry_attempt; 1350 s32 retry_attempt;
1348 struct ceph_pg pg; 1351 struct ceph_pg pg;
@@ -1352,7 +1355,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1352 u32 osdmap_epoch; 1355 u32 osdmap_epoch;
1353 int already_completed; 1356 int already_completed;
1354 u32 bytes; 1357 u32 bytes;
1355 int i; 1358 unsigned int i;
1356 1359
1357 tid = le64_to_cpu(msg->hdr.tid); 1360 tid = le64_to_cpu(msg->hdr.tid);
1358 dout("handle_reply %p tid %llu\n", msg, tid); 1361 dout("handle_reply %p tid %llu\n", msg, tid);
@@ -2116,12 +2119,11 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
2116 struct page **pages, int num_pages, int page_align) 2119 struct page **pages, int num_pages, int page_align)
2117{ 2120{
2118 struct ceph_osd_request *req; 2121 struct ceph_osd_request *req;
2119 struct ceph_osd_req_op op;
2120 int rc = 0; 2122 int rc = 0;
2121 2123
2122 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, 2124 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
2123 vino.snap, off, *plen); 2125 vino.snap, off, *plen);
2124 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, &op, 2126 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
2125 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2127 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
2126 NULL, truncate_seq, truncate_size, 2128 NULL, truncate_seq, truncate_size,
2127 false); 2129 false);
@@ -2136,7 +2138,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
2136 dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", 2138 dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
2137 off, *plen, *plen, page_align); 2139 off, *plen, *plen, page_align);
2138 2140
2139 ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL); 2141 ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
2140 2142
2141 rc = ceph_osdc_start_request(osdc, req, false); 2143 rc = ceph_osdc_start_request(osdc, req, false);
2142 if (!rc) 2144 if (!rc)
@@ -2160,12 +2162,11 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2160 struct page **pages, int num_pages) 2162 struct page **pages, int num_pages)
2161{ 2163{
2162 struct ceph_osd_request *req; 2164 struct ceph_osd_request *req;
2163 struct ceph_osd_req_op op;
2164 int rc = 0; 2165 int rc = 0;
2165 int page_align = off & ~PAGE_MASK; 2166 int page_align = off & ~PAGE_MASK;
2166 2167
2167 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ 2168 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
2168 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, &op, 2169 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
2169 CEPH_OSD_OP_WRITE, 2170 CEPH_OSD_OP_WRITE,
2170 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, 2171 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
2171 snapc, truncate_seq, truncate_size, 2172 snapc, truncate_seq, truncate_size,
@@ -2178,7 +2179,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2178 false, false); 2179 false, false);
2179 dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); 2180 dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
2180 2181
2181 ceph_osdc_build_request(req, off, 1, &op, snapc, CEPH_NOSNAP, mtime); 2182 ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
2182 2183
2183 rc = ceph_osdc_start_request(osdc, req, true); 2184 rc = ceph_osdc_start_request(osdc, req, true);
2184 if (!rc) 2185 if (!rc)