aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Stephens <allan.stephens@windriver.com>2007-06-10 20:25:24 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-07-11 01:06:12 -0400
commit05646c91109bfd129361d57dc5d98464ab6f6578 (patch)
tree647639854d1ffb86820d2304419cbcb1bec21451
parent5eee6a6dc945acc5bf4da12956b2f698bbb102b9 (diff)
[TIPC]: Optimize stream send routine to avoid fragmentation
This patch enhances TIPC's stream socket send routine so that it avoids transmitting data in chunks that require fragmentation and reassembly, thereby improving performance at both the sending and receiving ends of the connection. The "maximum packet size" hint that records MTU info allows the socket to decide how big a chunk it should send; in the event that the hint has become stale, fragmentation may still occur, but the data will be passed correctly and the hint will be updated in time for the following send. Note: The 66060 byte pseudo-MTU used for intra-node connections requires the send routine to perform an additional check to ensure it does not exceed TIPC"s limit of 66000 bytes of user data per chunk. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Jon Paul Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tipc/tipc_port.h6
-rw-r--r--net/tipc/link.c16
-rw-r--r--net/tipc/port.c10
-rw-r--r--net/tipc/port.h6
-rw-r--r--net/tipc/socket.c25
5 files changed, 36 insertions, 27 deletions
diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
index 333bba6dc522..cfc4ba46de8f 100644
--- a/include/net/tipc/tipc_port.h
+++ b/include/net/tipc/tipc_port.h
@@ -1,8 +1,8 @@
1/* 1/*
2 * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports 2 * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
3 * 3 *
4 * Copyright (c) 1994-2006, Ericsson AB 4 * Copyright (c) 1994-2007, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,7 @@
55 * @conn_unacked: number of unacknowledged messages received from peer port 55 * @conn_unacked: number of unacknowledged messages received from peer port
56 * @published: non-zero if port has one or more associated names 56 * @published: non-zero if port has one or more associated names
57 * @congested: non-zero if cannot send because of link or port congestion 57 * @congested: non-zero if cannot send because of link or port congestion
58 * @max_pkt: maximum packet size "hint" used when building messages sent by port
58 * @ref: unique reference to port in TIPC object registry 59 * @ref: unique reference to port in TIPC object registry
59 * @phdr: preformatted message header used when sending messages 60 * @phdr: preformatted message header used when sending messages
60 */ 61 */
@@ -68,6 +69,7 @@ struct tipc_port {
68 u32 conn_unacked; 69 u32 conn_unacked;
69 int published; 70 int published;
70 u32 congested; 71 u32 congested;
72 u32 max_pkt;
71 u32 ref; 73 u32 ref;
72 struct tipc_msg phdr; 74 struct tipc_msg phdr;
73}; 75};
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2124f32ef29f..5adfdfd49d61 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2006, Ericsson AB 4 * Copyright (c) 1996-2007, Ericsson AB
5 * Copyright (c) 2004-2006, Wind River Systems 5 * Copyright (c) 2004-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -1260,7 +1260,7 @@ again:
1260 * (Must not hold any locks while building message.) 1260 * (Must not hold any locks while building message.)
1261 */ 1261 */
1262 1262
1263 res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt, 1263 res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
1264 !sender->user_port, &buf); 1264 !sender->user_port, &buf);
1265 1265
1266 read_lock_bh(&tipc_net_lock); 1266 read_lock_bh(&tipc_net_lock);
@@ -1271,7 +1271,7 @@ again:
1271 if (likely(l_ptr)) { 1271 if (likely(l_ptr)) {
1272 if (likely(buf)) { 1272 if (likely(buf)) {
1273 res = link_send_buf_fast(l_ptr, buf, 1273 res = link_send_buf_fast(l_ptr, buf,
1274 &sender->max_pkt); 1274 &sender->publ.max_pkt);
1275 if (unlikely(res < 0)) 1275 if (unlikely(res < 0))
1276 buf_discard(buf); 1276 buf_discard(buf);
1277exit: 1277exit:
@@ -1299,12 +1299,12 @@ exit:
1299 * then re-try fast path or fragment the message 1299 * then re-try fast path or fragment the message
1300 */ 1300 */
1301 1301
1302 sender->max_pkt = link_max_pkt(l_ptr); 1302 sender->publ.max_pkt = link_max_pkt(l_ptr);
1303 tipc_node_unlock(node); 1303 tipc_node_unlock(node);
1304 read_unlock_bh(&tipc_net_lock); 1304 read_unlock_bh(&tipc_net_lock);
1305 1305
1306 1306
1307 if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) 1307 if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt)
1308 goto again; 1308 goto again;
1309 1309
1310 return link_send_sections_long(sender, msg_sect, 1310 return link_send_sections_long(sender, msg_sect,
@@ -1357,7 +1357,7 @@ static int link_send_sections_long(struct port *sender,
1357 1357
1358again: 1358again:
1359 fragm_no = 1; 1359 fragm_no = 1;
1360 max_pkt = sender->max_pkt - INT_H_SIZE; 1360 max_pkt = sender->publ.max_pkt - INT_H_SIZE;
1361 /* leave room for tunnel header in case of link changeover */ 1361 /* leave room for tunnel header in case of link changeover */
1362 fragm_sz = max_pkt - INT_H_SIZE; 1362 fragm_sz = max_pkt - INT_H_SIZE;
1363 /* leave room for fragmentation header in each fragment */ 1363 /* leave room for fragmentation header in each fragment */
@@ -1463,7 +1463,7 @@ error:
1463 goto reject; 1463 goto reject;
1464 } 1464 }
1465 if (link_max_pkt(l_ptr) < max_pkt) { 1465 if (link_max_pkt(l_ptr) < max_pkt) {
1466 sender->max_pkt = link_max_pkt(l_ptr); 1466 sender->publ.max_pkt = link_max_pkt(l_ptr);
1467 tipc_node_unlock(node); 1467 tipc_node_unlock(node);
1468 for (; buf_chain; buf_chain = buf) { 1468 for (; buf_chain; buf_chain = buf) {
1469 buf = buf_chain->next; 1469 buf = buf_chain->next;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index bcd5da00737b..5d2b9ce84d0a 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * net/tipc/port.c: TIPC port code 2 * net/tipc/port.c: TIPC port code
3 * 3 *
4 * Copyright (c) 1992-2006, Ericsson AB 4 * Copyright (c) 1992-2007, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -239,6 +239,8 @@ u32 tipc_createport_raw(void *usr_handle,
239 } 239 }
240 240
241 tipc_port_lock(ref); 241 tipc_port_lock(ref);
242 p_ptr->publ.usr_handle = usr_handle;
243 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
242 p_ptr->publ.ref = ref; 244 p_ptr->publ.ref = ref;
243 msg = &p_ptr->publ.phdr; 245 msg = &p_ptr->publ.phdr;
244 msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); 246 msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0);
@@ -248,11 +250,9 @@ u32 tipc_createport_raw(void *usr_handle,
248 msg_set_importance(msg,importance); 250 msg_set_importance(msg,importance);
249 p_ptr->last_in_seqno = 41; 251 p_ptr->last_in_seqno = 41;
250 p_ptr->sent = 1; 252 p_ptr->sent = 1;
251 p_ptr->publ.usr_handle = usr_handle;
252 INIT_LIST_HEAD(&p_ptr->wait_list); 253 INIT_LIST_HEAD(&p_ptr->wait_list);
253 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); 254 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
254 p_ptr->congested_link = NULL; 255 p_ptr->congested_link = NULL;
255 p_ptr->max_pkt = MAX_PKT_DEFAULT;
256 p_ptr->dispatcher = dispatcher; 256 p_ptr->dispatcher = dispatcher;
257 p_ptr->wakeup = wakeup; 257 p_ptr->wakeup = wakeup;
258 p_ptr->user_port = NULL; 258 p_ptr->user_port = NULL;
@@ -1243,7 +1243,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
1243 res = TIPC_OK; 1243 res = TIPC_OK;
1244exit: 1244exit:
1245 tipc_port_unlock(p_ptr); 1245 tipc_port_unlock(p_ptr);
1246 p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); 1246 p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
1247 return res; 1247 return res;
1248} 1248}
1249 1249
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 7ef4d64b32f7..e5f8c16429bd 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -1,8 +1,8 @@
1/* 1/*
2 * net/tipc/port.h: Include file for TIPC port code 2 * net/tipc/port.h: Include file for TIPC port code
3 * 3 *
4 * Copyright (c) 1994-2006, Ericsson AB 4 * Copyright (c) 1994-2007, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -81,7 +81,6 @@ struct user_port {
81 * @acked: 81 * @acked:
82 * @publications: list of publications for port 82 * @publications: list of publications for port
83 * @pub_count: total # of publications port has made during its lifetime 83 * @pub_count: total # of publications port has made during its lifetime
84 * @max_pkt: maximum packet size "hint" used when building messages sent by port
85 * @probing_state: 84 * @probing_state:
86 * @probing_interval: 85 * @probing_interval:
87 * @last_in_seqno: 86 * @last_in_seqno:
@@ -102,7 +101,6 @@ struct port {
102 u32 acked; 101 u32 acked;
103 struct list_head publications; 102 struct list_head publications;
104 u32 pub_count; 103 u32 pub_count;
105 u32 max_pkt;
106 u32 probing_state; 104 u32 probing_state;
107 u32 probing_interval; 105 u32 probing_interval;
108 u32 last_in_seqno; 106 u32 last_in_seqno;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ac7f2aacc77b..4a8f37f48764 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -607,23 +607,24 @@ exit:
607static int send_stream(struct kiocb *iocb, struct socket *sock, 607static int send_stream(struct kiocb *iocb, struct socket *sock,
608 struct msghdr *m, size_t total_len) 608 struct msghdr *m, size_t total_len)
609{ 609{
610 struct tipc_port *tport;
610 struct msghdr my_msg; 611 struct msghdr my_msg;
611 struct iovec my_iov; 612 struct iovec my_iov;
612 struct iovec *curr_iov; 613 struct iovec *curr_iov;
613 int curr_iovlen; 614 int curr_iovlen;
614 char __user *curr_start; 615 char __user *curr_start;
616 u32 hdr_size;
615 int curr_left; 617 int curr_left;
616 int bytes_to_send; 618 int bytes_to_send;
617 int bytes_sent; 619 int bytes_sent;
618 int res; 620 int res;
619 621
620 if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) 622 /* Handle special cases where there is no connection */
621 return send_packet(iocb, sock, m, total_len);
622
623 /* Can only send large data streams if already connected */
624 623
625 if (unlikely(sock->state != SS_CONNECTED)) { 624 if (unlikely(sock->state != SS_CONNECTED)) {
626 if (sock->state == SS_DISCONNECTING) 625 if (sock->state == SS_UNCONNECTED)
626 return send_packet(iocb, sock, m, total_len);
627 else if (sock->state == SS_DISCONNECTING)
627 return -EPIPE; 628 return -EPIPE;
628 else 629 else
629 return -ENOTCONN; 630 return -ENOTCONN;
@@ -648,17 +649,25 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
648 my_msg.msg_name = NULL; 649 my_msg.msg_name = NULL;
649 bytes_sent = 0; 650 bytes_sent = 0;
650 651
652 tport = tipc_sk(sock->sk)->p;
653 hdr_size = msg_hdr_sz(&tport->phdr);
654
651 while (curr_iovlen--) { 655 while (curr_iovlen--) {
652 curr_start = curr_iov->iov_base; 656 curr_start = curr_iov->iov_base;
653 curr_left = curr_iov->iov_len; 657 curr_left = curr_iov->iov_len;
654 658
655 while (curr_left) { 659 while (curr_left) {
656 bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE) 660 bytes_to_send = tport->max_pkt - hdr_size;
657 ? curr_left : TIPC_MAX_USER_MSG_SIZE; 661 if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
662 bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
663 if (curr_left < bytes_to_send)
664 bytes_to_send = curr_left;
658 my_iov.iov_base = curr_start; 665 my_iov.iov_base = curr_start;
659 my_iov.iov_len = bytes_to_send; 666 my_iov.iov_len = bytes_to_send;
660 if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { 667 if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
661 return bytes_sent ? bytes_sent : res; 668 if (bytes_sent != 0)
669 res = bytes_sent;
670 return res;
662 } 671 }
663 curr_left -= bytes_to_send; 672 curr_left -= bytes_to_send;
664 curr_start += bytes_to_send; 673 curr_start += bytes_to_send;