aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2018-11-20 09:44:00 -0500
committerIlya Dryomov <idryomov@gmail.com>2018-12-26 09:56:04 -0500
commit433b0a12953bc1dfcb52febb186136395a65aad0 (patch)
treeae43992cb2721ae51f4b91be78570af8d14b6b95 /net
parent3239eb5215ebdef593a79316c9dbbdf8849166ec (diff)
libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
Prevent do_tcp_sendpages() from calling tcp_push() (at least) once per page. Instead, arrange for tcp_push() to be called (at least) once per data payload. This results in more MSS-sized packets and fewer packets overall (5-10% reduction in my tests with typical OSD request sizes). See commits 2f5338442425 ("tcp: allow splice() to build full TSO packets"), 35f9c09fe9c7 ("tcp: tcp_sendpages() should call tcp_push() once") and ae62ca7b0321 ("tcp: fix MSG_SENDPAGE_NOTLAST logic") for details. Here is an example of a packet size histogram for 128K OSD requests (MSS = 1448, top 5): Before: SIZE COUNT 1448 777700 952 127915 1200 39238 1219 9806 21 5675 After: SIZE COUNT 1448 897280 21 6201 1019 2797 643 2739 376 2479 We could do slightly better by explicitly corking the socket but it's not clear it's worth it. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 21a743a3bd29..649faa626b35 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -560,12 +560,15 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
560 return r; 560 return r;
561} 561}
562 562
563/*
564 * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
565 */
563static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 566static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
564 int offset, size_t size, bool more) 567 int offset, size_t size, int more)
565{ 568{
566 ssize_t (*sendpage)(struct socket *sock, struct page *page, 569 ssize_t (*sendpage)(struct socket *sock, struct page *page,
567 int offset, size_t size, int flags); 570 int offset, size_t size, int flags);
568 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : 0); 571 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
569 int ret; 572 int ret;
570 573
571 /* 574 /*
@@ -1552,6 +1555,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1552 struct ceph_msg *msg = con->out_msg; 1555 struct ceph_msg *msg = con->out_msg;
1553 struct ceph_msg_data_cursor *cursor = &msg->cursor; 1556 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1554 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 1557 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1558 int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
1555 u32 crc; 1559 u32 crc;
1556 1560
1557 dout("%s %p msg %p\n", __func__, con, msg); 1561 dout("%s %p msg %p\n", __func__, con, msg);
@@ -1580,8 +1584,10 @@ static int write_partial_message_data(struct ceph_connection *con)
1580 } 1584 }
1581 1585
1582 page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); 1586 page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
1587 if (length == cursor->total_resid)
1588 more = MSG_MORE;
1583 ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, 1589 ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
1584 true); 1590 more);
1585 if (ret <= 0) { 1591 if (ret <= 0) {
1586 if (do_datacrc) 1592 if (do_datacrc)
1587 msg->footer.data_crc = cpu_to_le32(crc); 1593 msg->footer.data_crc = cpu_to_le32(crc);
@@ -1611,13 +1617,16 @@ static int write_partial_message_data(struct ceph_connection *con)
1611 */ 1617 */
1612static int write_partial_skip(struct ceph_connection *con) 1618static int write_partial_skip(struct ceph_connection *con)
1613{ 1619{
1620 int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
1614 int ret; 1621 int ret;
1615 1622
1616 dout("%s %p %d left\n", __func__, con, con->out_skip); 1623 dout("%s %p %d left\n", __func__, con, con->out_skip);
1617 while (con->out_skip > 0) { 1624 while (con->out_skip > 0) {
1618 size_t size = min(con->out_skip, (int) PAGE_SIZE); 1625 size_t size = min(con->out_skip, (int) PAGE_SIZE);
1619 1626
1620 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); 1627 if (size == con->out_skip)
1628 more = MSG_MORE;
1629 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
1621 if (ret <= 0) 1630 if (ret <= 0)
1622 goto out; 1631 goto out;
1623 con->out_skip -= ret; 1632 con->out_skip -= ret;