aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-11-09 15:43:12 -0500
committerSage Weil <sage@newdream.net>2010-11-09 15:43:12 -0500
commitb7495fc2ff941db6a118a93ab8d61149e3f4cef8 (patch)
tree231c339d74760e2fa13e5e6f41c10bc28cea51b3 /net
parente98b6fed84d0f0155d7b398e0dfeac74c792f2d0 (diff)
ceph: make page alignment explicit in osd interface
We used to infer alignment of IOs within a page based on the file offset, which assumed they matched. This broke with direct IO that was not aligned to pages (e.g., 512-byte aligned IO). We were also trusting the alignment specified in the OSD reply, which could have been adjusted by the server. Explicitly specify the page alignment when setting up OSD IO requests. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/osd_client.c22
1 files changed, 14 insertions, 8 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 79391994b3e..6c096239660 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
71 op->extent.length = objlen; 71 op->extent.length = objlen;
72 } 72 }
73 req->r_num_pages = calc_pages_for(off, *plen); 73 req->r_num_pages = calc_pages_for(off, *plen);
74 req->r_page_alignment = off & ~PAGE_MASK;
74 if (op->op == CEPH_OSD_OP_WRITE) 75 if (op->op == CEPH_OSD_OP_WRITE)
75 op->payload_len = *plen; 76 op->payload_len = *plen;
76 77
@@ -419,7 +420,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
419 u32 truncate_seq, 420 u32 truncate_seq,
420 u64 truncate_size, 421 u64 truncate_size,
421 struct timespec *mtime, 422 struct timespec *mtime,
422 bool use_mempool, int num_reply) 423 bool use_mempool, int num_reply,
424 int page_align)
423{ 425{
424 struct ceph_osd_req_op ops[3]; 426 struct ceph_osd_req_op ops[3];
425 struct ceph_osd_request *req; 427 struct ceph_osd_request *req;
@@ -447,6 +449,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
447 calc_layout(osdc, vino, layout, off, plen, req, ops); 449 calc_layout(osdc, vino, layout, off, plen, req, ops);
448 req->r_file_layout = *layout; /* keep a copy */ 450 req->r_file_layout = *layout; /* keep a copy */
449 451
452 /* in case it differs from natural alignment that calc_layout
453 filled in for us */
454 req->r_page_alignment = page_align;
455
450 ceph_osdc_build_request(req, off, plen, ops, 456 ceph_osdc_build_request(req, off, plen, ops,
451 snapc, 457 snapc,
452 mtime, 458 mtime,
@@ -1489,7 +1495,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1489 struct ceph_vino vino, struct ceph_file_layout *layout, 1495 struct ceph_vino vino, struct ceph_file_layout *layout,
1490 u64 off, u64 *plen, 1496 u64 off, u64 *plen,
1491 u32 truncate_seq, u64 truncate_size, 1497 u32 truncate_seq, u64 truncate_size,
1492 struct page **pages, int num_pages) 1498 struct page **pages, int num_pages, int page_align)
1493{ 1499{
1494 struct ceph_osd_request *req; 1500 struct ceph_osd_request *req;
1495 int rc = 0; 1501 int rc = 0;
@@ -1499,15 +1505,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1499 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1505 req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
1500 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 1506 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1501 NULL, 0, truncate_seq, truncate_size, NULL, 1507 NULL, 0, truncate_seq, truncate_size, NULL,
1502 false, 1); 1508 false, 1, page_align);
1503 if (!req) 1509 if (!req)
1504 return -ENOMEM; 1510 return -ENOMEM;
1505 1511
1506 /* it may be a short read due to an object boundary */ 1512 /* it may be a short read due to an object boundary */
1507 req->r_pages = pages; 1513 req->r_pages = pages;
1508 1514
1509 dout("readpages final extent is %llu~%llu (%d pages)\n", 1515 dout("readpages final extent is %llu~%llu (%d pages align %d)\n",
1510 off, *plen, req->r_num_pages); 1516 off, *plen, req->r_num_pages, page_align);
1511 1517
1512 rc = ceph_osdc_start_request(osdc, req, false); 1518 rc = ceph_osdc_start_request(osdc, req, false);
1513 if (!rc) 1519 if (!rc)
@@ -1533,6 +1539,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1533{ 1539{
1534 struct ceph_osd_request *req; 1540 struct ceph_osd_request *req;
1535 int rc = 0; 1541 int rc = 0;
1542 int page_align = off & ~PAGE_MASK;
1536 1543
1537 BUG_ON(vino.snap != CEPH_NOSNAP); 1544 BUG_ON(vino.snap != CEPH_NOSNAP);
1538 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1545 req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
@@ -1541,7 +1548,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1541 CEPH_OSD_FLAG_WRITE, 1548 CEPH_OSD_FLAG_WRITE,
1542 snapc, do_sync, 1549 snapc, do_sync,
1543 truncate_seq, truncate_size, mtime, 1550 truncate_seq, truncate_size, mtime,
1544 nofail, 1); 1551 nofail, 1, page_align);
1545 if (!req) 1552 if (!req)
1546 return -ENOMEM; 1553 return -ENOMEM;
1547 1554
@@ -1638,8 +1645,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1638 m = ceph_msg_get(req->r_reply); 1645 m = ceph_msg_get(req->r_reply);
1639 1646
1640 if (data_len > 0) { 1647 if (data_len > 0) {
1641 unsigned data_off = le16_to_cpu(hdr->data_off); 1648 int want = calc_pages_for(req->r_page_alignment, data_len);
1642 int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
1643 1649
1644 if (unlikely(req->r_num_pages < want)) { 1650 if (unlikely(req->r_num_pages < want)) {
1645 pr_warning("tid %lld reply %d > expected %d pages\n", 1651 pr_warning("tid %lld reply %d > expected %d pages\n",