aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc/node.c
diff options
context:
space:
mode:
authorJon Paul Maloy <jon.maloy@ericsson.com>2017-01-03 10:55:11 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-03 11:13:05 -0500
commit365ad353c2564bba8835290061308ba825166b3a (patch)
tree06439f0724f3df34e29e8d3fb32432894e6d8ff0 /net/tipc/node.c
parent4d8642d896c53966d32d5e343c3620813dd0e7c8 (diff)
tipc: reduce risk of user starvation during link congestion
The socket code currently handles link congestion by either blocking and trying to send again when the congestion has abated, or just returning to the user with -EAGAIN and let him re-try later. This mechanism is prone to starvation, because the wakeup algorithm is non-atomic. During the time the link issues a wakeup signal, until the socket wakes up and re-attempts sending, other senders may have come in between and occupied the free buffer space in the link. This in turn may lead to a socket having to make many send attempts before it is successful. In extremely loaded systems we have observed latency times of several seconds before a low-priority socket is able to send out a message. In this commit, we simplify this mechanism and reduce the risk of the described scenario happening. When a message is attempted sent via a congested link, we now let it be added to the link's backlog queue anyway, thus permitting an oversubscription of one message per source socket. We still create a wakeup item and return an error code, hence instructing the sender to block or stop sending. Only when enough space has been freed up in the link's backlog queue do we issue a wakeup event that allows the sender to continue with the next message, if any. The fact that a socket now can consider a message sent even when the link returns a congestion code means that the sending socket code can be simplified. Also, since this is a good opportunity to get rid of the obsolete 'mtu change' condition in the three socket send functions, we now choose to refactor those functions completely. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc/node.c')
-rw-r--r--net/tipc/node.c15
1 files changed, 5 insertions, 10 deletions
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d2f4c2b08ab..2883f6a0ed98 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1167,7 +1167,7 @@ msg_full:
1167 * @list: chain of buffers containing message 1167 * @list: chain of buffers containing message
1168 * @dnode: address of destination node 1168 * @dnode: address of destination node
1169 * @selector: a number used for deterministic link selection 1169 * @selector: a number used for deterministic link selection
1170 * Consumes the buffer chain, except when returning -ELINKCONG 1170 * Consumes the buffer chain.
1171 * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF 1171 * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
1172 */ 1172 */
1173int tipc_node_xmit(struct net *net, struct sk_buff_head *list, 1173int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
@@ -1206,10 +1206,10 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1206 spin_unlock_bh(&le->lock); 1206 spin_unlock_bh(&le->lock);
1207 tipc_node_read_unlock(n); 1207 tipc_node_read_unlock(n);
1208 1208
1209 if (likely(rc == 0)) 1209 if (unlikely(rc == -ENOBUFS))
1210 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1211 else if (rc == -ENOBUFS)
1212 tipc_node_link_down(n, bearer_id, false); 1210 tipc_node_link_down(n, bearer_id, false);
1211 else
1212 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1213 1213
1214 tipc_node_put(n); 1214 tipc_node_put(n);
1215 1215
@@ -1221,20 +1221,15 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1221 * messages, which will not be rejected 1221 * messages, which will not be rejected
1222 * The only exception is datagram messages rerouted after secondary 1222 * The only exception is datagram messages rerouted after secondary
1223 * lookup, which are rare and safe to dispose of anyway. 1223 * lookup, which are rare and safe to dispose of anyway.
1224 * TODO: Return real return value, and let callers use
1225 * tipc_wait_for_sendpkt() where applicable
1226 */ 1224 */
1227int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, 1225int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
1228 u32 selector) 1226 u32 selector)
1229{ 1227{
1230 struct sk_buff_head head; 1228 struct sk_buff_head head;
1231 int rc;
1232 1229
1233 skb_queue_head_init(&head); 1230 skb_queue_head_init(&head);
1234 __skb_queue_tail(&head, skb); 1231 __skb_queue_tail(&head, skb);
1235 rc = tipc_node_xmit(net, &head, dnode, selector); 1232 tipc_node_xmit(net, &head, dnode, selector);
1236 if (rc == -ELINKCONG)
1237 kfree_skb(skb);
1238 return 0; 1233 return 0;
1239} 1234}
1240 1235