aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc/socket.c
diff options
context:
space:
mode:
authorJon Maloy <jon.maloy@ericsson.com>2017-10-13 05:04:31 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-13 11:46:01 -0400
commit2f487712b89376fce267223bbb0db93d393d4b09 (patch)
tree2f40d5c08f966de843218ff96d797fdc8ca77985 /net/tipc/socket.c
parentb87a5ea31c935a7f7e11ca85df2ec7917921e96d (diff)
tipc: guarantee that group broadcast doesn't bypass group unicast
We need a mechanism guaranteeing that group unicasts sent out from a socket are not bypassed by later sent broadcasts from the same socket. We do this as follows: - Each time a unicast is sent, we set a the broadcast method for the socket to "replicast" and "mandatory". This forces the first subsequent broadcast message to follow the same network and data path as the preceding unicast to a destination, hence preventing it from overtaking the latter. - In order to make the 'same data path' statement above true, we let group unicasts pass through the multicast link input queue, instead of as previously through the unicast link input queue. - In the first broadcast following a unicast, we set a new header flag, requiring all recipients to immediately acknowledge its reception. - During the period before all the expected acknowledges are received, the socket refuses to accept any more broadcast attempts, i.e., by blocking or returning EAGAIN. This period should typically not be longer than a few microseconds. - When all acknowledges have been received, the sending socket will open up for subsequent broadcasts, this time giving the link layer freedom to itself select the best transmission method. - The forced and/or abrupt transmission method changes described above may lead to broadcasts arriving out of order to the recipients. We remedy this by introducing code that checks and if necessary re-orders such messages at the receiving end. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r--net/tipc/socket.c34
1 files changed, 29 insertions, 5 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3276b7a0d445..b1f1c3c2b1e2 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -831,6 +831,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
831 u32 dnode, u32 dport, int dlen) 831 u32 dnode, u32 dport, int dlen)
832{ 832{
833 u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); 833 u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
834 struct tipc_mc_method *method = &tsk->mc_method;
834 int blks = tsk_blocks(GROUP_H_SIZE + dlen); 835 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
835 struct tipc_msg *hdr = &tsk->phdr; 836 struct tipc_msg *hdr = &tsk->phdr;
836 struct sk_buff_head pkts; 837 struct sk_buff_head pkts;
@@ -857,9 +858,12 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
857 tsk->cong_link_cnt++; 858 tsk->cong_link_cnt++;
858 } 859 }
859 860
860 /* Update send window and sequence number */ 861 /* Update send window */
861 tipc_group_update_member(mb, blks); 862 tipc_group_update_member(mb, blks);
862 863
864 /* A broadcast sent within next EXPIRE period must follow same path */
865 method->rcast = true;
866 method->mandatory = true;
863 return dlen; 867 return dlen;
864} 868}
865 869
@@ -1008,6 +1012,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
1008 struct tipc_group *grp = tsk->group; 1012 struct tipc_group *grp = tsk->group;
1009 struct tipc_nlist *dsts = tipc_group_dests(grp); 1013 struct tipc_nlist *dsts = tipc_group_dests(grp);
1010 struct tipc_mc_method *method = &tsk->mc_method; 1014 struct tipc_mc_method *method = &tsk->mc_method;
1015 bool ack = method->mandatory && method->rcast;
1011 int blks = tsk_blocks(MCAST_H_SIZE + dlen); 1016 int blks = tsk_blocks(MCAST_H_SIZE + dlen);
1012 struct tipc_msg *hdr = &tsk->phdr; 1017 struct tipc_msg *hdr = &tsk->phdr;
1013 int mtu = tipc_bcast_get_mtu(net); 1018 int mtu = tipc_bcast_get_mtu(net);
@@ -1036,6 +1041,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
1036 msg_set_destnode(hdr, 0); 1041 msg_set_destnode(hdr, 0);
1037 msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); 1042 msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
1038 1043
1044 /* Avoid getting stuck with repeated forced replicasts */
1045 msg_set_grp_bc_ack_req(hdr, ack);
1046
1039 /* Build message as chain of buffers */ 1047 /* Build message as chain of buffers */
1040 skb_queue_head_init(&pkts); 1048 skb_queue_head_init(&pkts);
1041 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); 1049 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
@@ -1043,13 +1051,17 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
1043 return rc; 1051 return rc;
1044 1052
1045 /* Send message */ 1053 /* Send message */
1046 rc = tipc_mcast_xmit(net, &pkts, method, dsts, 1054 rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
1047 &tsk->cong_link_cnt);
1048 if (unlikely(rc)) 1055 if (unlikely(rc))
1049 return rc; 1056 return rc;
1050 1057
1051 /* Update broadcast sequence number and send windows */ 1058 /* Update broadcast sequence number and send windows */
1052 tipc_group_update_bc_members(tsk->group, blks); 1059 tipc_group_update_bc_members(tsk->group, blks, ack);
1060
1061 /* Broadcast link is now free to choose method for next broadcast */
1062 method->mandatory = false;
1063 method->expires = jiffies;
1064
1053 return dlen; 1065 return dlen;
1054} 1066}
1055 1067
@@ -1113,7 +1125,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1113 u32 portid, oport, onode; 1125 u32 portid, oport, onode;
1114 struct list_head dports; 1126 struct list_head dports;
1115 struct tipc_msg *msg; 1127 struct tipc_msg *msg;
1116 int hsz; 1128 int user, mtyp, hsz;
1117 1129
1118 __skb_queue_head_init(&tmpq); 1130 __skb_queue_head_init(&tmpq);
1119 INIT_LIST_HEAD(&dports); 1131 INIT_LIST_HEAD(&dports);
@@ -1121,6 +1133,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1121 skb = tipc_skb_peek(arrvq, &inputq->lock); 1133 skb = tipc_skb_peek(arrvq, &inputq->lock);
1122 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 1134 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
1123 msg = buf_msg(skb); 1135 msg = buf_msg(skb);
1136 user = msg_user(msg);
1137 mtyp = msg_type(msg);
1138 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
1139 spin_lock_bh(&inputq->lock);
1140 if (skb_peek(arrvq) == skb) {
1141 __skb_dequeue(arrvq);
1142 __skb_queue_tail(inputq, skb);
1143 }
1144 refcount_dec(&skb->users);
1145 spin_unlock_bh(&inputq->lock);
1146 continue;
1147 }
1124 hsz = skb_headroom(skb) + msg_hdr_sz(msg); 1148 hsz = skb_headroom(skb) + msg_hdr_sz(msg);
1125 oport = msg_origport(msg); 1149 oport = msg_origport(msg);
1126 onode = msg_orignode(msg); 1150 onode = msg_orignode(msg);