aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-11-09 15:40:00 -0500
committerSage Weil <sage@newdream.net>2010-11-09 15:43:17 -0500
commitc5c6b19d4b8f5431fca05f28ae9e141045022149 (patch)
tree0961d9aeee49f481134089ac5b93b7118cf3a34b
parentb7495fc2ff941db6a118a93ab8d61149e3f4cef8 (diff)
ceph: explicitly specify page alignment in network messages
The alignment used for reading data into or out of pages used to be taken from the data_off field in the message header. This only worked as long as the page alignment matched the object offset, breaking direct io to non-page aligned offsets. Instead, explicitly specify the page alignment next to the page vector in the ceph_msg struct, and use that instead of the message header (which probably shouldn't be trusted). The alloc_msg callback is responsible for filling in this field properly when it sets up the page vector. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--include/linux/ceph/messenger.h1
-rw-r--r--net/ceph/messenger.c10
-rw-r--r--net/ceph/osd_client.c3
3 files changed, 9 insertions, 5 deletions
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 5956d62c3057..a108b425fee2 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -82,6 +82,7 @@ struct ceph_msg {
82 struct ceph_buffer *middle; 82 struct ceph_buffer *middle;
83 struct page **pages; /* data payload. NOT OWNER. */ 83 struct page **pages; /* data payload. NOT OWNER. */
84 unsigned nr_pages; /* size of page array */ 84 unsigned nr_pages; /* size of page array */
85 unsigned page_alignment; /* io offset in first page */
85 struct ceph_pagelist *pagelist; /* instead of pages */ 86 struct ceph_pagelist *pagelist; /* instead of pages */
86 struct list_head list_head; 87 struct list_head list_head;
87 struct kref kref; 88 struct kref kref;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index d379abf873bc..1c7a2ec4f3cc 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con)
540 /* initialize page iterator */ 540 /* initialize page iterator */
541 con->out_msg_pos.page = 0; 541 con->out_msg_pos.page = 0;
542 if (m->pages) 542 if (m->pages)
543 con->out_msg_pos.page_pos = 543 con->out_msg_pos.page_pos = m->page_alignment;
544 le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
545 else 544 else
546 con->out_msg_pos.page_pos = 0; 545 con->out_msg_pos.page_pos = 0;
547 con->out_msg_pos.data_pos = 0; 546 con->out_msg_pos.data_pos = 0;
@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con)
1491 struct ceph_msg *m = con->in_msg; 1490 struct ceph_msg *m = con->in_msg;
1492 int ret; 1491 int ret;
1493 int to, left; 1492 int to, left;
1494 unsigned front_len, middle_len, data_len, data_off; 1493 unsigned front_len, middle_len, data_len;
1495 int datacrc = con->msgr->nocrc; 1494 int datacrc = con->msgr->nocrc;
1496 int skip; 1495 int skip;
1497 u64 seq; 1496 u64 seq;
@@ -1527,7 +1526,6 @@ static int read_partial_message(struct ceph_connection *con)
1527 data_len = le32_to_cpu(con->in_hdr.data_len); 1526 data_len = le32_to_cpu(con->in_hdr.data_len);
1528 if (data_len > CEPH_MSG_MAX_DATA_LEN) 1527 if (data_len > CEPH_MSG_MAX_DATA_LEN)
1529 return -EIO; 1528 return -EIO;
1530 data_off = le16_to_cpu(con->in_hdr.data_off);
1531 1529
1532 /* verify seq# */ 1530 /* verify seq# */
1533 seq = le64_to_cpu(con->in_hdr.seq); 1531 seq = le64_to_cpu(con->in_hdr.seq);
@@ -1575,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con)
1575 1573
1576 con->in_msg_pos.page = 0; 1574 con->in_msg_pos.page = 0;
1577 if (m->pages) 1575 if (m->pages)
1578 con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; 1576 con->in_msg_pos.page_pos = m->page_alignment;
1579 else 1577 else
1580 con->in_msg_pos.page_pos = 0; 1578 con->in_msg_pos.page_pos = 0;
1581 con->in_msg_pos.data_pos = 0; 1579 con->in_msg_pos.data_pos = 0;
@@ -2300,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2300 2298
2301 /* data */ 2299 /* data */
2302 m->nr_pages = 0; 2300 m->nr_pages = 0;
2301 m->page_alignment = 0;
2303 m->pages = NULL; 2302 m->pages = NULL;
2304 m->pagelist = NULL; 2303 m->pagelist = NULL;
2305 m->bio = NULL; 2304 m->bio = NULL;
@@ -2369,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
2369 type, front_len); 2368 type, front_len);
2370 return NULL; 2369 return NULL;
2371 } 2370 }
2371 msg->page_alignment = le16_to_cpu(hdr->data_off);
2372 } 2372 }
2373 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 2373 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2374 2374
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6c096239660c..3e20a122ffa2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -391,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
391 req->r_request->hdr.data_len = cpu_to_le32(data_len); 391 req->r_request->hdr.data_len = cpu_to_le32(data_len);
392 } 392 }
393 393
394 req->r_request->page_alignment = req->r_page_alignment;
395
394 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 396 BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
395 msg_size = p - msg->front.iov_base; 397 msg_size = p - msg->front.iov_base;
396 msg->front.iov_len = msg_size; 398 msg->front.iov_len = msg_size;
@@ -1657,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1657 } 1659 }
1658 m->pages = req->r_pages; 1660 m->pages = req->r_pages;
1659 m->nr_pages = req->r_num_pages; 1661 m->nr_pages = req->r_num_pages;
1662 m->page_alignment = req->r_page_alignment;
1660#ifdef CONFIG_BLOCK 1663#ifdef CONFIG_BLOCK
1661 m->bio = req->r_bio; 1664 m->bio = req->r_bio;
1662#endif 1665#endif