From 4f1688b2c63cd86f0d7bcf95a9b3040e38bd3c1a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:32 -0500 Subject: tipc: introduce send functions for chained buffers in link The current link implementation provides several different transmit functions, depending on the characteristics of the message to be sent: if it is an iovec or an sk_buff, if it needs fragmentation or not, if the caller holds the node_lock or not. The permutation of these options gives us an unwanted amount of unnecessarily complex code. As a first step towards simplifying the send path for all messages, we introduce two new send functions at link level, tipc_link_xmit2() and __tipc_link_xmit2(). The former looks up a link to the message destination, and if one is found, it grabs the node lock and calls the second function, which works exclusively inside the node lock protection. If no link is found, and the destination is on the same node, it delivers the message directly to the local destination socket. The new functions take a buffer chain where all packet headers are already prepared, and the correct MTU has been used. These two functions will later replace all other link-level transmit functions. The functions are not backwards compatible, so we have added them as new functions with temporary names. They are tested, but have no users yet. Those will be added later in this series. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 11 deletions(-) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8be6e94a1ca9..e02afc96edd7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -37,20 +37,11 @@ #include "core.h" #include "msg.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +static unsigned int align(unsigned int i) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; - } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return (i + 3) & ~3u; } - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode) { @@ -152,3 +143,86 @@ out_free: kfree_skb(*buf); return 0; } + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bbuf: the existing buffer ("bundle") + * @buf: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu) +{ + struct tipc_msg *bmsg = buf_msg(bbuf); + struct tipc_msg *msg = buf_msg(buf); + unsigned int bsz = msg_size(bmsg); + unsigned int msz = msg_size(msg); + u32 start = align(bsz); + u32 max = mtu - INT_H_SIZE; + u32 pad = start - bsz; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (likely(msg_type(bmsg) != BUNDLE_OPEN)) + return false; + if (unlikely(skb_tailroom(bbuf) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bbuf, pad + msz); + skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + bbuf->next = buf->next; + kfree_skb(buf); + return true; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @buf: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if sucess, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) +{ + struct sk_buff *bbuf; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*buf); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == CHANGEOVER_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bbuf = tipc_buf_acquire(max); + if (!bbuf) + return false; + + skb_trim(bbuf, INT_H_SIZE); + bmsg = buf_msg(bbuf); + tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + bbuf->next = (*buf)->next; + tipc_msg_bundle(bbuf, *buf, mtu); + *buf = bbuf; + return true; +} -- cgit v1.2.2 From 067608e9d019d6477fd45dd948e81af0e5bf599f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:34 -0500 Subject: tipc: introduce direct iovec to buffer chain fragmentation function Fragmentation at message sending is currently performed in two places in link.c, depending on whether data to be transmitted is delivered in the form of an iovec or as a big sk_buff. Those functions are also tightly entangled with the send functions that are using them. We now introduce a re-entrant, standalone function, tipc_msg_build2(), that builds a packet chain directly from an iovec. Each fragment is sized according to the MTU value given by the caller, and is prepended with a correctly built fragment header, when needed. The function is independent from who is calling and where the chain will be delivered, as long as the caller is able to indicate a correct MTU. The function is tested, but not called by anybody yet. Since it is incompatible with the existing tipc_msg_build(), and we cannot yet remove that function, we have given it a temporary name. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e02afc96edd7..4093b4c94d26 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -144,6 +144,108 @@ out_free: return 0; } + +/** + * tipc_msg_build2 - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @iov: User data + * @offset: Posision in iov to start copying from + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @chain: Buffer or chain of buffers to be returned to caller + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *buf, *prev; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + buf = tipc_buf_acquire(msz); + *chain = buf; + if (unlikely(!buf)) + return -ENOMEM; + skb_copy_to_linear_data(buf, mhdr, mhsz); + pktpos = buf->data + mhsz; + if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, + INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + + /* Prepare first fragment */ + *chain = buf = tipc_buf_acquire(pktmax); + if (!buf) + return -ENOMEM; + pktpos = buf->data; + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + offset += pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + prev = buf; + buf = tipc_buf_acquire(pktsz); + if (!buf) { + rc = -ENOMEM; + goto error; + } + prev->next = buf; + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos = buf->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + + msg_set_type(buf_msg(buf), LAST_FRAGMENT); + return dsz; +error: + kfree_skb_list(*chain); + *chain = NULL; + return rc; +} + /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one * @bbuf: the existing buffer ("bundle") -- cgit v1.2.2 From 8db1bae30b7cd3c3abc05f467d0f7c69b33b80e9 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:35 -0500 Subject: tipc: separate building and sending of rejected messages The way we build and send rejected message is currenty perceived as hard to follow, partly because we let the transmission go via deep call chains through functions such as tipc_reject_msg() and net_route_msg(). We want to remove those functions, and make the call sequences shallower and simpler. For this purpose, we separate building and sending of rejected messages. We build the reject message using the new function tipc_msg_reverse(), and let the transmission go via the newly introduced tipc_link_xmit2() function, as all transmission eventually will do. We also ensure that all calls to tipc_link_xmit2() are made outside port_lock/bh_lock_sock. Finally, we replace all calls to tipc_reject_msg() with the two new calls at all locations in the code that we want to keep. The remaining calls are made from code that we are planning to remove, along with tipc_reject_msg() itself. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4093b4c94d26..6070dd0ec634 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -37,6 +37,8 @@ #include "core.h" #include "msg.h" +#define MAX_FORWARD_SIZE 1024 + static unsigned int align(unsigned int i) { return (i + 3) & ~3u; @@ -328,3 +330,43 @@ bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) *buf = bbuf; return true; } + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +{ + struct tipc_msg *msg = buf_msg(buf); + uint imp = msg_importance(msg); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf) || !msg_isdata(msg)) + goto exit; + if (msg_dest_droppable(msg) || msg_errcode(msg)) + goto exit; + + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + msg_set_importance(msg, min_t(uint, ++imp, TIPC_CRITICAL_IMPORTANCE)); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, tipc_own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + return false; +} -- cgit v1.2.2 From 5a379074a7dd6d288ec9e6472769ba0e0c54dd85 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:36 -0500 Subject: tipc: introduce message evaluation function When a message arrives in a node and finds no destination socket, we may need to drop it, reject it, or forward it after a secondary destination lookup. The latter two cases currently results in a code path that is perceived as complex, because it follows a deep call chain via obscure functions such as net_route_named_msg() and net_route_msg(). We now introduce a function, tipc_msg_eval(), that takes the decision about whether such a message should be rejected or forwarded, but leaves it to the caller to actually perform the indicated action. If the decision is 'reject', it is still the task of the recently introduced function tipc_msg_reverse() to take the final decision about whether the message is rejectable or not. In the latter case it drops the message. As a result of this change, we can finally eliminate the function net_route_named_msg(), and hence become independent of net_route_msg(). Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 6070dd0ec634..7bfc4422bf2c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -36,6 +36,8 @@ #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" #define MAX_FORWARD_SIZE 1024 @@ -370,3 +372,39 @@ exit: kfree_skb(buf); return false; } + +/** + * tipc_msg_eval: determine fate of message that found no destination + * @buf: the buffer containing the message. + * @dnode: return value: next-hop node, if message to be forwarded + * @err: error code to use, if message to be rejected + * + * Does not consume buffer + * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error + * code if message to be rejected + */ +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +{ + struct tipc_msg *msg = buf_msg(buf); + u32 dport; + + if (msg_type(msg) != TIPC_NAMED_MSG) + return -TIPC_ERR_NO_PORT; + if (skb_linearize(buf)) + return -TIPC_ERR_NO_NAME; + if (msg_data_sz(msg) > MAX_FORWARD_SIZE) + return -TIPC_ERR_NO_NAME; + if (msg_reroute_cnt(msg) > 0) + return -TIPC_ERR_NO_NAME; + + *dnode = addr_domain(msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(msg_nametype(msg), + msg_nameinst(msg), + dnode); + if (!dport) + return -TIPC_ERR_NO_NAME; + msg_incr_reroute_cnt(msg); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + return TIPC_OK; +} -- cgit v1.2.2 From ac0074ee70ddb32f62d918b31cb20e3c947c75a1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:41 -0500 Subject: tipc: clean up connection protocol reception function We simplify the code for receiving connection probes, leveraging the recently introduced tipc_msg_reverse() function. We also stick to the principle of sending a possible response message directly from the calling (tipc_sk_rcv or backlog_rcv) functions, hence making the call chain shallower and easier to follow. We make one small protocol change here, allowed according to the spec. If a protocol message arrives from a remote socket that is not the one we are connected to, we are currently generating a connection abort message and send it to the source. This behavior is unnecessary, and might even be a security risk, so instead we now choose to only ignore the message. The consequnce for the sender is that he will need longer time to discover his mistake (until the next timeout), but this is an extreme corner case, and may happen anyway under other circumstances, so we deem this change acceptable. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 7bfc4422bf2c..6ec958401f78 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -348,13 +348,17 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) struct tipc_msg ohdr; uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); - if (skb_linearize(buf) || !msg_isdata(msg)) + if (skb_linearize(buf)) + goto exit; + if (msg_dest_droppable(msg)) goto exit; - if (msg_dest_droppable(msg) || msg_errcode(msg)) + if (msg_errcode(msg)) goto exit; memcpy(&ohdr, msg, msg_hdr_sz(msg)); - msg_set_importance(msg, min_t(uint, ++imp, TIPC_CRITICAL_IMPORTANCE)); + imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); + if (msg_isdata(msg)) + msg_set_importance(msg, imp); msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); -- cgit v1.2.2 From 078bec826f7b73cf2a2397680537bcb7e075b492 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:00 -0400 Subject: tipc: add new functions for multicast and broadcast distribution We add a new broadcast link transmit function in bclink.c and a new receive function in socket.c. The purpose is to move the branching between external and internal destination down to the link layer, just as we have done with unicast in earlier commits. We also make use of the new link-independent fragmentation support that was introduced in an earlier commit series. This gives a shorter and simpler code path, and makes it possible to obtain copy-free buffer delivery to all node local destination sockets. The new transmission code is added in parallel with the existing one, and will be used by the socket multicast send function in the next commit in this series. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ce6d929d66d2..9682296f5e7c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -417,3 +417,38 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) msg_set_destport(msg, dport); return TIPC_OK; } + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain) +{ + struct sk_buff *buf = chain; + struct sk_buff *frag = buf; + struct sk_buff *head = NULL; + int hdr_sz; + + /* Copy header if single buffer */ + if (!buf->next) { + hdr_sz = skb_headroom(buf) + msg_hdr_sz(buf_msg(buf)); + return __pskb_copy(buf, hdr_sz, GFP_ATOMIC); + } + + /* Clone all fragments and reassemble */ + while (buf) { + frag = skb_clone(buf, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + buf = buf->next; + } + return frag; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return NULL; +} -- cgit v1.2.2 From c4116e10579c5bbbfc3cd2ad0324ee0d8691e531 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:02 -0400 Subject: tipc: remove unreferenced functions We can now remove a number of functions which have become obsolete and unreferenced through this commit series. There are no functional changes in this commit. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 9682296f5e7c..e3102ea494d4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -60,41 +60,6 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, msg_set_destnode(m, destnode); } -/** - * tipc_msg_build - create message using specified header and data - * - * Note: Caller must not hold any locks in case copy_from_user() is interrupted! - * - * Returns message data size or errno - */ -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - unsigned int len, int max_size, struct sk_buff **buf) -{ - int dsz, sz, hsz; - unsigned char *to; - - dsz = len; - hsz = msg_hdr_sz(hdr); - sz = hsz + dsz; - msg_set_size(hdr, sz); - if (unlikely(sz > max_size)) { - *buf = NULL; - return dsz; - } - - *buf = tipc_buf_acquire(sz); - if (!(*buf)) - return -ENOMEM; - skb_copy_to_linear_data(*buf, hdr, hsz); - to = (*buf)->data + hsz; - if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) { - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; - } - return dsz; -} - /* tipc_buf_append(): Append a buffer to the fragment list of another buffer * @*headbuf: in: NULL for first frag, otherwise value returned from prev call * out: set when successful non-complete reassembly, otherwise NULL -- cgit v1.2.2 From 9fbfb8b120bd4fe89cd70d6c8841e6e1cfab2609 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:03 -0400 Subject: tipc: rename temporarily named functions After the previous commit, we can now give the functions with temporary names, such as tipc_link_xmit2(), tipc_msg_build2() etc., their proper names. There are no functional changes in this commit. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e3102ea494d4..b6f45d029933 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -120,7 +120,7 @@ out_free: /** - * tipc_msg_build2 - create buffer chain containing specified header and data + * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @iov: User data * @offset: Posision in iov to start copying from @@ -129,8 +129,8 @@ out_free: * @chain: Buffer or chain of buffers to be returned to caller * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, - int offset, int dsz, int pktmax , struct sk_buff **chain) +int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) { int mhsz = msg_hdr_sz(mhdr); int msz = mhsz + dsz; -- cgit v1.2.2 From 13e9b9972fa0f34059e737ae215a26e43966b46f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 25 Jul 2014 14:48:09 -0400 Subject: tipc: make tipc_buf_append() more robust As per comment from David Miller, we try to make the buffer reassembly function more resilient to user errors than it is today. - We check that the "*buf" parameter always is set, since this is mandatory input. - We ensure that *buf->next always is set to NULL before linking in the buffer, instead of relying of the caller to have done this. - We ensure that the "tail" pointer in the head buffer's control block is initialized to NULL when the first fragment arrives. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'net/tipc/msg.c') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6f45d029933..9680be6d388a 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -72,27 +72,38 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; struct sk_buff *tail; - struct tipc_msg *msg = buf_msg(frag); - u32 fragid = msg_type(msg); - bool headstolen; + struct tipc_msg *msg; + u32 fragid; int delta; + bool headstolen; + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; skb_pull(frag, msg_hdr_sz(msg)); if (fragid == FIRST_FRAGMENT) { - if (head || skb_unclone(frag, GFP_ATOMIC)) - goto out_free; + if (unlikely(head)) + goto err; + if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + goto err; head = *headbuf = frag; skb_frag_list_init(head); + TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; return 0; } + if (!head) - goto out_free; - tail = TIPC_SKB_CB(head)->tail; + goto err; + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { + tail = TIPC_SKB_CB(head)->tail; if (!skb_has_frag_list(head)) skb_shinfo(head)->frag_list = frag; else @@ -102,6 +113,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) head->len += frag->len; TIPC_SKB_CB(head)->tail = frag; } + if (fragid == LAST_FRAGMENT) { *buf = head; TIPC_SKB_CB(head)->tail = NULL; @@ -110,7 +122,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } *buf = NULL; return 0; -out_free: + +err: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); kfree_skb(*headbuf); -- cgit v1.2.2