aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-03-14 15:09:05 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:17:58 -0400
commitacead002b200569273bed331c93c4a91d25e10b8 (patch)
tree779bf689da149acf73bd75f51641f3700469f6b7
parenta19308048182d5f9e16b03b1d1c038d9346c7589 (diff)
libceph: don't build request in ceph_osdc_new_request()
This patch moves the call to ceph_osdc_build_request() out of ceph_osdc_new_request() and into its caller. This is in order to defer formatting osd operation information into the request message until just before request is started. The only unusual (ab)user of ceph_osdc_build_request() is ceph_writepages_start(), where the final length of write request may change (downward) based on the current inode size or the oldest snapshot context with dirty data for the inode. The remaining callers don't change anything in the request after has been built. This means the ops array is now supplied by the caller. It also means there is no need to pass the mtime to ceph_osdc_new_request() (it gets provided to ceph_osdc_build_request()). And rather than passing a do_sync flag, have the number of ops in the ops array supplied imply adding a second STARTSYNC operation after the READ or WRITE requested. This and some of the patches that follow are related to having the messenger (only) be responsible for filling the content of the message header, as described here: http://tracker.ceph.com/issues/4589 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--fs/ceph/addr.c36
-rw-r--r--fs/ceph/file.c20
-rw-r--r--include/linux/ceph/osd_client.h12
-rw-r--r--net/ceph/osd_client.c40
4 files changed, 63 insertions, 45 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ae438d02a422..681463d5459b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -284,7 +284,9 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
284 &ceph_inode_to_client(inode)->client->osdc; 284 &ceph_inode_to_client(inode)->client->osdc;
285 struct ceph_inode_info *ci = ceph_inode(inode); 285 struct ceph_inode_info *ci = ceph_inode(inode);
286 struct page *page = list_entry(page_list->prev, struct page, lru); 286 struct page *page = list_entry(page_list->prev, struct page, lru);
287 struct ceph_vino vino;
287 struct ceph_osd_request *req; 288 struct ceph_osd_request *req;
289 struct ceph_osd_req_op op;
288 u64 off; 290 u64 off;
289 u64 len; 291 u64 len;
290 int i; 292 int i;
@@ -308,16 +310,17 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
308 len = nr_pages << PAGE_CACHE_SHIFT; 310 len = nr_pages << PAGE_CACHE_SHIFT;
309 dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages, 311 dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
310 off, len); 312 off, len);
311 313 vino = ceph_vino(inode);
312 req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), 314 req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
313 off, &len, 315 1, &op, CEPH_OSD_OP_READ,
314 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 316 CEPH_OSD_FLAG_READ, NULL,
315 NULL, 0,
316 ci->i_truncate_seq, ci->i_truncate_size, 317 ci->i_truncate_seq, ci->i_truncate_size,
317 NULL, false); 318 false);
318 if (IS_ERR(req)) 319 if (IS_ERR(req))
319 return PTR_ERR(req); 320 return PTR_ERR(req);
320 321
322 ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL);
323
321 /* build page vector */ 324 /* build page vector */
322 nr_pages = calc_pages_for(0, len); 325 nr_pages = calc_pages_for(0, len);
323 pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS); 326 pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
@@ -736,6 +739,7 @@ retry:
736 last_snapc = snapc; 739 last_snapc = snapc;
737 740
738 while (!done && index <= end) { 741 while (!done && index <= end) {
742 struct ceph_osd_req_op ops[2];
739 unsigned i; 743 unsigned i;
740 int first; 744 int first;
741 pgoff_t next; 745 pgoff_t next;
@@ -825,20 +829,22 @@ get_more_pages:
825 829
826 /* ok */ 830 /* ok */
827 if (locked_pages == 0) { 831 if (locked_pages == 0) {
832 struct ceph_vino vino;
833 int num_ops = do_sync ? 2 : 1;
834
828 /* prepare async write request */ 835 /* prepare async write request */
829 offset = (u64) page_offset(page); 836 offset = (u64) page_offset(page);
830 len = wsize; 837 len = wsize;
838 vino = ceph_vino(inode);
839 /* BUG_ON(vino.snap != CEPH_NOSNAP); */
831 req = ceph_osdc_new_request(&fsc->client->osdc, 840 req = ceph_osdc_new_request(&fsc->client->osdc,
832 &ci->i_layout, 841 &ci->i_layout, vino, offset, &len,
833 ceph_vino(inode), 842 num_ops, ops,
834 offset, &len,
835 CEPH_OSD_OP_WRITE, 843 CEPH_OSD_OP_WRITE,
836 CEPH_OSD_FLAG_WRITE | 844 CEPH_OSD_FLAG_WRITE |
837 CEPH_OSD_FLAG_ONDISK, 845 CEPH_OSD_FLAG_ONDISK,
838 snapc, do_sync, 846 snapc, ci->i_truncate_seq,
839 ci->i_truncate_seq, 847 ci->i_truncate_size, true);
840 ci->i_truncate_size,
841 &inode->i_mtime, true);
842 848
843 if (IS_ERR(req)) { 849 if (IS_ERR(req)) {
844 rc = PTR_ERR(req); 850 rc = PTR_ERR(req);
@@ -846,6 +852,10 @@ get_more_pages:
846 break; 852 break;
847 } 853 }
848 854
855 ceph_osdc_build_request(req, offset,
856 num_ops, ops, snapc, vino.snap,
857 &inode->i_mtime);
858
849 req->r_data_out.type = CEPH_OSD_DATA_TYPE_PAGES; 859 req->r_data_out.type = CEPH_OSD_DATA_TYPE_PAGES;
850 req->r_data_out.length = len; 860 req->r_data_out.length = len;
851 req->r_data_out.alignment = 0; 861 req->r_data_out.alignment = 0;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index aeafa67bfe99..3d6dcf23b4ad 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -475,14 +475,17 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
475 struct inode *inode = file_inode(file); 475 struct inode *inode = file_inode(file);
476 struct ceph_inode_info *ci = ceph_inode(inode); 476 struct ceph_inode_info *ci = ceph_inode(inode);
477 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 477 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
478 struct ceph_snap_context *snapc;
479 struct ceph_vino vino;
478 struct ceph_osd_request *req; 480 struct ceph_osd_request *req;
481 struct ceph_osd_req_op ops[2];
482 int num_ops = 1;
479 struct page **pages; 483 struct page **pages;
480 int num_pages; 484 int num_pages;
481 long long unsigned pos; 485 long long unsigned pos;
482 u64 len; 486 u64 len;
483 int written = 0; 487 int written = 0;
484 int flags; 488 int flags;
485 int do_sync = 0;
486 int check_caps = 0; 489 int check_caps = 0;
487 int page_align, io_align; 490 int page_align, io_align;
488 unsigned long buf_align; 491 unsigned long buf_align;
@@ -516,7 +519,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
516 if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0) 519 if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0)
517 flags |= CEPH_OSD_FLAG_ACK; 520 flags |= CEPH_OSD_FLAG_ACK;
518 else 521 else
519 do_sync = 1; 522 num_ops++; /* Also include a 'startsync' command. */
520 523
521 /* 524 /*
522 * we may need to do multiple writes here if we span an object 525 * we may need to do multiple writes here if we span an object
@@ -527,16 +530,19 @@ more:
527 buf_align = (unsigned long)data & ~PAGE_MASK; 530 buf_align = (unsigned long)data & ~PAGE_MASK;
528 len = left; 531 len = left;
529 532
533 snapc = ci->i_snap_realm->cached_context;
534 vino = ceph_vino(inode);
530 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 535 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
531 ceph_vino(inode), pos, &len, 536 vino, pos, &len, num_ops, ops,
532 CEPH_OSD_OP_WRITE, flags, 537 CEPH_OSD_OP_WRITE, flags, snapc,
533 ci->i_snap_realm->cached_context,
534 do_sync,
535 ci->i_truncate_seq, ci->i_truncate_size, 538 ci->i_truncate_seq, ci->i_truncate_size,
536 &mtime, false); 539 false);
537 if (IS_ERR(req)) 540 if (IS_ERR(req))
538 return PTR_ERR(req); 541 return PTR_ERR(req);
539 542
543 ceph_osdc_build_request(req, pos, num_ops, ops,
544 snapc, vino.snap, &mtime);
545
540 /* write from beginning of first page, regardless of io alignment */ 546 /* write from beginning of first page, regardless of io alignment */
541 page_align = file->f_flags & O_DIRECT ? buf_align : io_align; 547 page_align = file->f_flags & O_DIRECT ? buf_align : io_align;
542 num_pages = calc_pages_for(page_align, len); 548 num_pages = calc_pages_for(page_align, len);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fdda93ebbb4c..ffaf9076fdc4 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -243,12 +243,12 @@ extern void osd_req_op_watch_init(struct ceph_osd_req_op *op, u16 opcode,
243 243
244extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 244extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
245 struct ceph_snap_context *snapc, 245 struct ceph_snap_context *snapc,
246 unsigned int num_op, 246 unsigned int num_ops,
247 bool use_mempool, 247 bool use_mempool,
248 gfp_t gfp_flags); 248 gfp_t gfp_flags);
249 249
250extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, 250extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
251 unsigned int num_op, 251 unsigned int num_ops,
252 struct ceph_osd_req_op *src_ops, 252 struct ceph_osd_req_op *src_ops,
253 struct ceph_snap_context *snapc, 253 struct ceph_snap_context *snapc,
254 u64 snap_id, 254 u64 snap_id,
@@ -257,11 +257,11 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
257extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 257extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
258 struct ceph_file_layout *layout, 258 struct ceph_file_layout *layout,
259 struct ceph_vino vino, 259 struct ceph_vino vino,
260 u64 offset, u64 *len, int op, int flags, 260 u64 offset, u64 *len,
261 int num_ops, struct ceph_osd_req_op *ops,
262 int opcode, int flags,
261 struct ceph_snap_context *snapc, 263 struct ceph_snap_context *snapc,
262 int do_sync, u32 truncate_seq, 264 u32 truncate_seq, u64 truncate_size,
263 u64 truncate_size,
264 struct timespec *mtime,
265 bool use_mempool); 265 bool use_mempool);
266 266
267extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, 267extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0b4951e27532..115790aac30a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -512,9 +512,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
512 msg->front.iov_len = msg_size; 512 msg->front.iov_len = msg_size;
513 msg->hdr.front_len = cpu_to_le32(msg_size); 513 msg->hdr.front_len = cpu_to_le32(msg_size);
514 514
515 dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, 515 dout("build_request msg_size was %d\n", (int)msg_size);
516 num_ops);
517 return;
518} 516}
519EXPORT_SYMBOL(ceph_osdc_build_request); 517EXPORT_SYMBOL(ceph_osdc_build_request);
520 518
@@ -532,18 +530,15 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
532struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 530struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
533 struct ceph_file_layout *layout, 531 struct ceph_file_layout *layout,
534 struct ceph_vino vino, 532 struct ceph_vino vino,
535 u64 off, u64 *plen, 533 u64 off, u64 *plen, int num_ops,
534 struct ceph_osd_req_op *ops,
536 int opcode, int flags, 535 int opcode, int flags,
537 struct ceph_snap_context *snapc, 536 struct ceph_snap_context *snapc,
538 int do_sync,
539 u32 truncate_seq, 537 u32 truncate_seq,
540 u64 truncate_size, 538 u64 truncate_size,
541 struct timespec *mtime,
542 bool use_mempool) 539 bool use_mempool)
543{ 540{
544 struct ceph_osd_req_op ops[2];
545 struct ceph_osd_request *req; 541 struct ceph_osd_request *req;
546 unsigned int num_op = do_sync ? 2 : 1;
547 u64 objnum = 0; 542 u64 objnum = 0;
548 u64 objoff = 0; 543 u64 objoff = 0;
549 u64 objlen = 0; 544 u64 objlen = 0;
@@ -553,7 +548,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
553 548
554 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); 549 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
555 550
556 req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, 551 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
557 GFP_NOFS); 552 GFP_NOFS);
558 if (!req) 553 if (!req)
559 return ERR_PTR(-ENOMEM); 554 return ERR_PTR(-ENOMEM);
@@ -578,7 +573,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
578 573
579 osd_req_op_extent_init(&ops[0], opcode, objoff, objlen, 574 osd_req_op_extent_init(&ops[0], opcode, objoff, objlen,
580 truncate_size, truncate_seq); 575 truncate_size, truncate_seq);
581 if (do_sync) 576 /*
577 * A second op in the ops array means the caller wants to
578 * also issue a include a 'startsync' command so that the
579 * osd will flush data quickly.
580 */
581 if (num_ops > 1)
582 osd_req_op_init(&ops[1], CEPH_OSD_OP_STARTSYNC); 582 osd_req_op_init(&ops[1], CEPH_OSD_OP_STARTSYNC);
583 583
584 req->r_file_layout = *layout; /* keep a copy */ 584 req->r_file_layout = *layout; /* keep a copy */
@@ -587,9 +587,6 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
587 vino.ino, objnum); 587 vino.ino, objnum);
588 req->r_oid_len = strlen(req->r_oid); 588 req->r_oid_len = strlen(req->r_oid);
589 589
590 ceph_osdc_build_request(req, off, num_op, ops,
591 snapc, vino.snap, mtime);
592
593 return req; 590 return req;
594} 591}
595EXPORT_SYMBOL(ceph_osdc_new_request); 592EXPORT_SYMBOL(ceph_osdc_new_request);
@@ -2047,17 +2044,20 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
2047{ 2044{
2048 struct ceph_osd_request *req; 2045 struct ceph_osd_request *req;
2049 struct ceph_osd_data *osd_data; 2046 struct ceph_osd_data *osd_data;
2047 struct ceph_osd_req_op op;
2050 int rc = 0; 2048 int rc = 0;
2051 2049
2052 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, 2050 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
2053 vino.snap, off, *plen); 2051 vino.snap, off, *plen);
2054 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 2052 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, &op,
2055 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2053 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
2056 NULL, 0, truncate_seq, truncate_size, NULL, 2054 NULL, truncate_seq, truncate_size,
2057 false); 2055 false);
2058 if (IS_ERR(req)) 2056 if (IS_ERR(req))
2059 return PTR_ERR(req); 2057 return PTR_ERR(req);
2060 2058
2059 ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL);
2060
2061 /* it may be a short read due to an object boundary */ 2061 /* it may be a short read due to an object boundary */
2062 2062
2063 osd_data = &req->r_data_in; 2063 osd_data = &req->r_data_in;
@@ -2092,19 +2092,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2092{ 2092{
2093 struct ceph_osd_request *req; 2093 struct ceph_osd_request *req;
2094 struct ceph_osd_data *osd_data; 2094 struct ceph_osd_data *osd_data;
2095 struct ceph_osd_req_op op;
2095 int rc = 0; 2096 int rc = 0;
2096 int page_align = off & ~PAGE_MASK; 2097 int page_align = off & ~PAGE_MASK;
2097 2098
2098 BUG_ON(vino.snap != CEPH_NOSNAP); 2099 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
2099 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 2100 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, &op,
2100 CEPH_OSD_OP_WRITE, 2101 CEPH_OSD_OP_WRITE,
2101 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, 2102 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
2102 snapc, 0, 2103 snapc, truncate_seq, truncate_size,
2103 truncate_seq, truncate_size, mtime,
2104 true); 2104 true);
2105 if (IS_ERR(req)) 2105 if (IS_ERR(req))
2106 return PTR_ERR(req); 2106 return PTR_ERR(req);
2107 2107
2108 ceph_osdc_build_request(req, off, 1, &op, snapc, CEPH_NOSNAP, mtime);
2109
2108 /* it may be a short write due to an object boundary */ 2110 /* it may be a short write due to an object boundary */
2109 osd_data = &req->r_data_out; 2111 osd_data = &req->r_data_out;
2110 osd_data->type = CEPH_OSD_DATA_TYPE_PAGES; 2112 osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;