From 2f55c43788df7358be8c6e78ae2a3d3268e7afb6 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:00 +0800 Subject: tipc: remove unnecessary wrapper functions of kernel timer APIs Not only some wrapper function like k_term_timer() is empty, but also some others including k_start_timer() and k_cancel_timer() don't return back any value to its caller, what's more, there is no any component in the kernel world to do such thing. Therefore, these timer interfaces defined in tipc module should be purged. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d5c83d7ecb47..1a52f7cf3cd3 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -77,12 +77,10 @@ #define TIPC_MEDIA_ADDR_OFFSET 5 - struct tipc_msg { __be32 hdr[15]; }; - static inline u32 msg_word(struct tipc_msg *m, u32 pos) { return ntohl(m->hdr[pos]); -- cgit v1.2.2 From 859fc7c0cedca0f84dac471fa31e9512259e1ecd Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:01 +0800 Subject: tipc: cleanup core.c and core.h files Only the works of initializing and shutting down tipc module are done in core.h and core.c files, so all stuffs which are not closely associated with the two tasks should be moved to appropriate places. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 1a52f7cf3cd3..0065a2e8ad9b 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -77,10 +77,38 @@ #define TIPC_MEDIA_ADDR_OFFSET 5 +/** + * TIPC message buffer code + * + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). + * + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access + */ +#define BUF_HEADROOM LL_MAX_HEADER + +struct tipc_skb_cb { + void *handle; + struct sk_buff *tail; + bool deferred; + bool wakeup_pending; + bool bundling; + u16 chain_sz; + u16 chain_imp; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) + struct tipc_msg { __be32 hdr[15]; }; +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} + static inline u32 msg_word(struct tipc_msg *m, u32 pos) { return ntohl(m->hdr[pos]); @@ -719,27 +747,20 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) return msg_origport(m); } +struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); - int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); - struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); - int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); - bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); - bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu, u32 dnode); - int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); - struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); #endif -- cgit v1.2.2 From 1da465683a93142488a54a9038155f23d6349441 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:07 +0800 Subject: tipc: make tipc broadcast link support net namespace TIPC broadcast link is statically established and its relevant states are maintained with the global variables: "bcbearer", "bclink" and "bcl". Allowing different namespace to own different broadcast link instances, these variables must be moved to tipc_net structure and broadcast link instances would be allocated and initialized when namespace is created. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 0065a2e8ad9b..75f324287244 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,7 +37,7 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "bearer.h" +#include /* * Constants and routines used to read and write TIPC payload message headers -- cgit v1.2.2 From 4ac1c8d0ee9faf3a4be185cc4db1381fa0d81280 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:09 +0800 Subject: tipc: name tipc name table support net namespace TIPC name table is used to store the mapping relationship between TIPC service name and socket port ID. When tipc supports namespace, it allows users to publish service names only owned by a certain namespace. Therefore, every namespace must have its private name table to prevent service names published to one namespace from being contaminated by other service names in another namespace. Therefore, The name table global variable (ie, nametbl) and its lock must be moved to tipc_net structure, and a parameter of namespace must be added for necessary functions so that they can obtain name table variable defined in tipc_net structure. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 75f324287244..69f37e652a8e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -749,7 +749,7 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); +int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, -- cgit v1.2.2 From 347475395434abb2b61bf59c2952470f37072567 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:10 +0800 Subject: tipc: make tipc node address support net namespace If net namespace is supported in tipc, each namespace will be treated as a separate tipc node. Therefore, every namespace must own its private tipc node address. This means the "tipc_own_addr" global variable of node address must be moved to tipc_net structure to satisfy the requirement. It's turned out that users also can assign node address for every namespace. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 69f37e652a8e..526ef345b70e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -748,19 +748,20 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) } struct sk_buff *tipc_buf_acquire(u32 size); -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); +bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode, + int err); int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode); -struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, - uint data_sz, u32 dnode, u32 onode, - u32 dport, u32 oport, int errcode); +void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode); -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode); +int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); #endif -- cgit v1.2.2 From c5898636c440da91d58f10beac00f073e68378df Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:36 -0500 Subject: tipc: reduce usage of context info in socket and link The most common usage of namespace information is when we fetch the own node addess from the net structure. This leads to a lot of passing around of a parameter of type 'struct net *' between functions just to make them able to obtain this address. However, in many cases this is unnecessary. The own node address is readily available as a member of both struct tipc_sock and tipc_link, and can be fetched from there instead. The fact that the vast majority of functions in socket.c and link.c anyway are maintaining a pointer to their respective base structures makes this option even more compelling. In this commit, we introduce the inline functions tsk_own_node() and link_own_node() to make it easy for functions to fetch the node address from those structs instead of having to pass along and dereference the namespace struct. In particular, we make calls to the msg_xx() functions in msg.{h,c} context independent by directly passing them the own node address as parameter when needed. Those functions should be regarded as leaves in the code dependency tree, and it is hence desirable to keep them namspace unaware. Apart from a potential positive effect on cache behavior, these changes make it easier to introduce the changes that will follow later in this series. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 526ef345b70e..f7ea95458c6f 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -748,19 +748,19 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) } struct sk_buff *tipc_buf_acquire(u32 size); -bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode, +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err); int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode); -void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type, +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); -struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type, +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); -bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list, +bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu, u32 dnode); -int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); -- cgit v1.2.2 From e3a77561e7d326e18881ef3cb84807892b353459 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:39 -0500 Subject: tipc: split up function tipc_msg_eval() The function tipc_msg_eval() is in reality doing two related, but different tasks. First it tries to find a new destination for named messages, in case there was no first lookup, or if the first lookup failed. Second, it does what its name suggests, evaluating the validity of the message and its destination, and returning an appropriate error code depending on the result. This is confusing, and in this commit we choose to break it up into two functions. A new function, tipc_msg_lookup_dest(), first attempts to find a new destination, if the message is of the right type. If this lookup fails, or if the message should not be subject to a second lookup, the already existing tipc_msg_reverse() is called. This function performs prepares the message for rejection, if applicable. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index f7ea95458c6f..60702992933d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -750,18 +750,19 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err); -int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); -struct sk_buff *tipc_msg_create(uint user, uint type, - uint hdr_sz, uint data_sz, u32 dnode, - u32 onode, u32 dport, u32 oport, int errcode); +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu, u32 dnode); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, + int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); #endif -- cgit v1.2.2 From c637c1035534867b85b78b453c38c495b58e2c5a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:41 -0500 Subject: tipc: resolve race problem at unicast message reception TIPC handles message cardinality and sequencing at the link layer, before passing messages upwards to the destination sockets. During the upcall from link to socket no locks are held. It is therefore possible, and we see it happen occasionally, that messages arriving in different threads and delivered in sequence still bypass each other before they reach the destination socket. This must not happen, since it violates the sequentiality guarantee. We solve this by adding a new input buffer queue to the link structure. Arriving messages are added safely to the tail of that queue by the link, while the head of the queue is consumed, also safely, by the receiving socket. Sequentiality is secured per socket by only allowing buffers to be dequeued inside the socket lock. Since there may be multiple simultaneous readers of the queue, we use a 'filter' parameter to reduce the risk that they peek the same buffer from the queue, hence also reducing the risk of contention on the receiving socket locks. This solves the sequentiality problem, and seems to cause no measurable performance degradation. A nice side effect of this change is that lock handling in the functions tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that will enable future simplifications of those functions. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 60702992933d..ab467261bd9d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -45,6 +45,7 @@ * Note: Some items are also used with TIPC internal message headers */ #define TIPC_VERSION 2 +struct plist; /* * Payload message users are defined in TIPC's public API: @@ -759,10 +760,82 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_tail(): add buffer to tail of list; + * @list: list to be appended to + * @skb: buffer to append. Always appended + * @dport: the destination port of the buffer + * returns true if dport differs from previous destination + */ +static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, + struct sk_buff *skb, u32 dport) +{ + struct sk_buff *_skb = NULL; + bool rv = false; + + spin_lock_bh(&list->lock); + _skb = skb_peek_tail(list); + if (!_skb || (msg_destport(buf_msg(_skb)) != dport) || + (skb_queue_len(list) > 32)) + rv = true; + __skb_queue_tail(list, skb); + spin_unlock_bh(&list->lock); + return rv; +} + #endif -- cgit v1.2.2 From cb1b728096f54e7408d60fb571944bed00c5b771 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:44 -0500 Subject: tipc: eliminate race condition at multicast reception In a previous commit in this series we resolved a race problem during unicast message reception. Here, we resolve the same problem at multicast reception. We apply the same technique: an input queue serializing the delivery of arriving buffers. The main difference is that here we do it in two steps. First, the broadcast link feeds arriving buffers into the tail of an arrival queue, which head is consumed at the socket level, and where destination lookup is performed. Second, if the lookup is successful, the resulting buffer clones are fed into a second queue, the input queue. This queue is consumed at reception in the socket just like in the unicast case. Both queues are protected by the same lock, -the one of the input queue. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net/tipc/msg.h') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ab467261bd9d..9ace47f44a69 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -767,6 +767,23 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + /* tipc_skb_peek_port(): find a destination port, ignoring all destinations * up to and including 'filter'. * Note: ignoring previously tried destinations minimizes the risk of -- cgit v1.2.2