diff options
author | Sage Weil <sage@newdream.net> | 2010-11-09 15:40:00 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-11-09 15:43:17 -0500 |
commit | c5c6b19d4b8f5431fca05f28ae9e141045022149 (patch) | |
tree | 0961d9aeee49f481134089ac5b93b7118cf3a34b | |
parent | b7495fc2ff941db6a118a93ab8d61149e3f4cef8 (diff) |
ceph: explicitly specify page alignment in network messages
The alignment used for reading data into or out of pages used to be taken
from the data_off field in the message header. This only worked as long
as the page alignment matched the object offset, breaking direct io to
non-page aligned offsets.
Instead, explicitly specify the page alignment next to the page vector
in the ceph_msg struct, and use that instead of the message header (which
probably shouldn't be trusted). The alloc_msg callback is responsible for
filling in this field properly when it sets up the page vector.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | include/linux/ceph/messenger.h | 1 | ||||
-rw-r--r-- | net/ceph/messenger.c | 10 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 3 |
3 files changed, 9 insertions, 5 deletions
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 5956d62c3057..a108b425fee2 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -82,6 +82,7 @@ struct ceph_msg { | |||
82 | struct ceph_buffer *middle; | 82 | struct ceph_buffer *middle; |
83 | struct page **pages; /* data payload. NOT OWNER. */ | 83 | struct page **pages; /* data payload. NOT OWNER. */ |
84 | unsigned nr_pages; /* size of page array */ | 84 | unsigned nr_pages; /* size of page array */ |
85 | unsigned page_alignment; /* io offset in first page */ | ||
85 | struct ceph_pagelist *pagelist; /* instead of pages */ | 86 | struct ceph_pagelist *pagelist; /* instead of pages */ |
86 | struct list_head list_head; | 87 | struct list_head list_head; |
87 | struct kref kref; | 88 | struct kref kref; |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d379abf873bc..1c7a2ec4f3cc 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con) | |||
540 | /* initialize page iterator */ | 540 | /* initialize page iterator */ |
541 | con->out_msg_pos.page = 0; | 541 | con->out_msg_pos.page = 0; |
542 | if (m->pages) | 542 | if (m->pages) |
543 | con->out_msg_pos.page_pos = | 543 | con->out_msg_pos.page_pos = m->page_alignment; |
544 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
545 | else | 544 | else |
546 | con->out_msg_pos.page_pos = 0; | 545 | con->out_msg_pos.page_pos = 0; |
547 | con->out_msg_pos.data_pos = 0; | 546 | con->out_msg_pos.data_pos = 0; |
@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1491 | struct ceph_msg *m = con->in_msg; | 1490 | struct ceph_msg *m = con->in_msg; |
1492 | int ret; | 1491 | int ret; |
1493 | int to, left; | 1492 | int to, left; |
1494 | unsigned front_len, middle_len, data_len, data_off; | 1493 | unsigned front_len, middle_len, data_len; |
1495 | int datacrc = con->msgr->nocrc; | 1494 | int datacrc = con->msgr->nocrc; |
1496 | int skip; | 1495 | int skip; |
1497 | u64 seq; | 1496 | u64 seq; |
@@ -1527,7 +1526,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
1527 | data_len = le32_to_cpu(con->in_hdr.data_len); | 1526 | data_len = le32_to_cpu(con->in_hdr.data_len); |
1528 | if (data_len > CEPH_MSG_MAX_DATA_LEN) | 1527 | if (data_len > CEPH_MSG_MAX_DATA_LEN) |
1529 | return -EIO; | 1528 | return -EIO; |
1530 | data_off = le16_to_cpu(con->in_hdr.data_off); | ||
1531 | 1529 | ||
1532 | /* verify seq# */ | 1530 | /* verify seq# */ |
1533 | seq = le64_to_cpu(con->in_hdr.seq); | 1531 | seq = le64_to_cpu(con->in_hdr.seq); |
@@ -1575,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1575 | 1573 | ||
1576 | con->in_msg_pos.page = 0; | 1574 | con->in_msg_pos.page = 0; |
1577 | if (m->pages) | 1575 | if (m->pages) |
1578 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 1576 | con->in_msg_pos.page_pos = m->page_alignment; |
1579 | else | 1577 | else |
1580 | con->in_msg_pos.page_pos = 0; | 1578 | con->in_msg_pos.page_pos = 0; |
1581 | con->in_msg_pos.data_pos = 0; | 1579 | con->in_msg_pos.data_pos = 0; |
@@ -2300,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||
2300 | 2298 | ||
2301 | /* data */ | 2299 | /* data */ |
2302 | m->nr_pages = 0; | 2300 | m->nr_pages = 0; |
2301 | m->page_alignment = 0; | ||
2303 | m->pages = NULL; | 2302 | m->pages = NULL; |
2304 | m->pagelist = NULL; | 2303 | m->pagelist = NULL; |
2305 | m->bio = NULL; | 2304 | m->bio = NULL; |
@@ -2369,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
2369 | type, front_len); | 2368 | type, front_len); |
2370 | return NULL; | 2369 | return NULL; |
2371 | } | 2370 | } |
2371 | msg->page_alignment = le16_to_cpu(hdr->data_off); | ||
2372 | } | 2372 | } |
2373 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2373 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2374 | 2374 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6c096239660c..3e20a122ffa2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -391,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
391 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | 391 | req->r_request->hdr.data_len = cpu_to_le32(data_len); |
392 | } | 392 | } |
393 | 393 | ||
394 | req->r_request->page_alignment = req->r_page_alignment; | ||
395 | |||
394 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 396 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
395 | msg_size = p - msg->front.iov_base; | 397 | msg_size = p - msg->front.iov_base; |
396 | msg->front.iov_len = msg_size; | 398 | msg->front.iov_len = msg_size; |
@@ -1657,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1657 | } | 1659 | } |
1658 | m->pages = req->r_pages; | 1660 | m->pages = req->r_pages; |
1659 | m->nr_pages = req->r_num_pages; | 1661 | m->nr_pages = req->r_num_pages; |
1662 | m->page_alignment = req->r_page_alignment; | ||
1660 | #ifdef CONFIG_BLOCK | 1663 | #ifdef CONFIG_BLOCK |
1661 | m->bio = req->r_bio; | 1664 | m->bio = req->r_bio; |
1662 | #endif | 1665 | #endif |