aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-27 17:05:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-27 17:05:19 -0400
commit2decb2682f80759f631c8332f9a2a34a02150a03 (patch)
tree6c30015e4118ff6a56c67043f2ba842ed4a6e011 /net
parentb787f68c36d49bb1d9236f403813641efa74a031 (diff)
parent22a8f237c0551bae95ffcd2a7ff17d6f5fcce7e7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) mlx4 doesn't check fully for supported valid RSS hash function, fix from Amir Vadai 2) Off by one in ibmveth_change_mtu(), from David Gibson 3) Prevent altera chip from reporting false error interrupts in some circumstances, from Chee Nouk Phoon 4) Get rid of that stupid endless loop trying to allocate a FIN packet in TCP, and in the process kill deadlocks. From Eric Dumazet 5) Fix get_rps_cpus() crash due to wrong invalid-cpu value, also from Eric Dumazet 6) Fix two bugs in async rhashtable resizing, from Thomas Graf 7) Fix topology server listener socket namespace bug in TIPC, from Ying Xue 8) Add some missing HAS_DMA kconfig dependencies, from Geert Uytterhoeven 9) bgmac driver intends to force re-polling but does so by returning the wrong value from it's ->poll() handler. Fix from Rafał Miłecki 10) When the creater of an rhashtable configures a max size for it, don't bark in the logs and drop insertions when that is exceeded. Fix from Johannes Berg 11) Recover from out of order packets in ppp mppe properly, from Sylvain Rochet * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (41 commits) bnx2x: really disable TPA if 'disable_tpa' option is set net:treewide: Fix typo in drivers/net net/mlx4_en: Prevent setting invalid RSS hash function mdio-mux-gpio: use new gpiod_get_array and gpiod_put_array functions netfilter; Add some missing default cases to switch statements in nft_reject. ppp: mppe: discard late packet in stateless mode ppp: mppe: sanity error path rework net/bonding: Make DRV macros private net: rfs: fix crash in get_rps_cpus() altera tse: add support for fixed-links. pxa168: fix double deallocation of managed resources net: fix crash in build_skb() net: eth: altera: Resolve false errors from MSGDMA to TSE ehea: Fix memory hook reference counting crashes net/tg3: Release IRQs on permanent error net: mdio-gpio: support access that may sleep inet: fix possible panic in reqsk_queue_unlink() rhashtable: don't attempt to grow when at max_size bgmac: fix requests for extra polling calls from NAPI tcp: avoid looping in tcp_send_fin() ...
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/skbuff.c30
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/ipv6.c3
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/ipv4/inet_connection_sock.c34
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c64
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/mpls/af_mpls.c125
-rw-r--r--net/mpls/internal.h6
-rw-r--r--net/netfilter/nft_reject.c2
-rw-r--r--net/netfilter/nft_reject_inet.c2
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/server.c9
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/unix/garbage.c70
20 files changed, 290 insertions, 105 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 1796cef55ab5..c7ba0388f1be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3079,7 +3079,7 @@ static struct rps_dev_flow *
3079set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 3079set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3080 struct rps_dev_flow *rflow, u16 next_cpu) 3080 struct rps_dev_flow *rflow, u16 next_cpu)
3081{ 3081{
3082 if (next_cpu != RPS_NO_CPU) { 3082 if (next_cpu < nr_cpu_ids) {
3083#ifdef CONFIG_RFS_ACCEL 3083#ifdef CONFIG_RFS_ACCEL
3084 struct netdev_rx_queue *rxqueue; 3084 struct netdev_rx_queue *rxqueue;
3085 struct rps_dev_flow_table *flow_table; 3085 struct rps_dev_flow_table *flow_table;
@@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3184 * If the desired CPU (where last recvmsg was done) is 3184 * If the desired CPU (where last recvmsg was done) is
3185 * different from current CPU (one in the rx-queue flow 3185 * different from current CPU (one in the rx-queue flow
3186 * table entry), switch if one of the following holds: 3186 * table entry), switch if one of the following holds:
3187 * - Current CPU is unset (equal to RPS_NO_CPU). 3187 * - Current CPU is unset (>= nr_cpu_ids).
3188 * - Current CPU is offline. 3188 * - Current CPU is offline.
3189 * - The current CPU's queue tail has advanced beyond the 3189 * - The current CPU's queue tail has advanced beyond the
3190 * last packet that was enqueued using this table entry. 3190 * last packet that was enqueued using this table entry.
@@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3192 * have been dequeued, thus preserving in order delivery. 3192 * have been dequeued, thus preserving in order delivery.
3193 */ 3193 */
3194 if (unlikely(tcpu != next_cpu) && 3194 if (unlikely(tcpu != next_cpu) &&
3195 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 3195 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
3196 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 3196 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3197 rflow->last_qtail)) >= 0)) { 3197 rflow->last_qtail)) >= 0)) {
3198 tcpu = next_cpu; 3198 tcpu = next_cpu;
3199 rflow = set_rps_cpu(dev, skb, rflow, next_cpu); 3199 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3200 } 3200 }
3201 3201
3202 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 3202 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
3203 *rflowp = rflow; 3203 *rflowp = rflow;
3204 cpu = tcpu; 3204 cpu = tcpu;
3205 goto done; 3205 goto done;
@@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3240 struct rps_dev_flow_table *flow_table; 3240 struct rps_dev_flow_table *flow_table;
3241 struct rps_dev_flow *rflow; 3241 struct rps_dev_flow *rflow;
3242 bool expire = true; 3242 bool expire = true;
3243 int cpu; 3243 unsigned int cpu;
3244 3244
3245 rcu_read_lock(); 3245 rcu_read_lock();
3246 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3246 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3247 if (flow_table && flow_id <= flow_table->mask) { 3247 if (flow_table && flow_id <= flow_table->mask) {
3248 rflow = &flow_table->flows[flow_id]; 3248 rflow = &flow_table->flows[flow_id];
3249 cpu = ACCESS_ONCE(rflow->cpu); 3249 cpu = ACCESS_ONCE(rflow->cpu);
3250 if (rflow->filter == filter_id && cpu != RPS_NO_CPU && 3250 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3251 ((int)(per_cpu(softnet_data, cpu).input_queue_head - 3251 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3252 rflow->last_qtail) < 3252 rflow->last_qtail) <
3253 (int)(10 * flow_table->mask))) 3253 (int)(10 * flow_table->mask)))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d1967dab9cc6..3cfff2a3d651 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -280,13 +280,14 @@ nodata:
280EXPORT_SYMBOL(__alloc_skb); 280EXPORT_SYMBOL(__alloc_skb);
281 281
282/** 282/**
283 * build_skb - build a network buffer 283 * __build_skb - build a network buffer
284 * @data: data buffer provided by caller 284 * @data: data buffer provided by caller
285 * @frag_size: size of fragment, or 0 if head was kmalloced 285 * @frag_size: size of data, or 0 if head was kmalloced
286 * 286 *
287 * Allocate a new &sk_buff. Caller provides space holding head and 287 * Allocate a new &sk_buff. Caller provides space holding head and
288 * skb_shared_info. @data must have been allocated by kmalloc() only if 288 * skb_shared_info. @data must have been allocated by kmalloc() only if
289 * @frag_size is 0, otherwise data should come from the page allocator. 289 * @frag_size is 0, otherwise data should come from the page allocator
290 * or vmalloc()
290 * The return is the new skb buffer. 291 * The return is the new skb buffer.
291 * On a failure the return is %NULL, and @data is not freed. 292 * On a failure the return is %NULL, and @data is not freed.
292 * Notes : 293 * Notes :
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb);
297 * before giving packet to stack. 298 * before giving packet to stack.
298 * RX rings only contains data buffers, not full skbs. 299 * RX rings only contains data buffers, not full skbs.
299 */ 300 */
300struct sk_buff *build_skb(void *data, unsigned int frag_size) 301struct sk_buff *__build_skb(void *data, unsigned int frag_size)
301{ 302{
302 struct skb_shared_info *shinfo; 303 struct skb_shared_info *shinfo;
303 struct sk_buff *skb; 304 struct sk_buff *skb;
@@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
311 312
312 memset(skb, 0, offsetof(struct sk_buff, tail)); 313 memset(skb, 0, offsetof(struct sk_buff, tail));
313 skb->truesize = SKB_TRUESIZE(size); 314 skb->truesize = SKB_TRUESIZE(size);
314 skb->head_frag = frag_size != 0;
315 atomic_set(&skb->users, 1); 315 atomic_set(&skb->users, 1);
316 skb->head = data; 316 skb->head = data;
317 skb->data = data; 317 skb->data = data;
@@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
328 328
329 return skb; 329 return skb;
330} 330}
331
332/* build_skb() is wrapper over __build_skb(), that specifically
333 * takes care of skb->head and skb->pfmemalloc
334 * This means that if @frag_size is not zero, then @data must be backed
335 * by a page fragment, not kmalloc() or vmalloc()
336 */
337struct sk_buff *build_skb(void *data, unsigned int frag_size)
338{
339 struct sk_buff *skb = __build_skb(data, frag_size);
340
341 if (skb && frag_size) {
342 skb->head_frag = 1;
343 if (virt_to_head_page(data)->pfmemalloc)
344 skb->pfmemalloc = 1;
345 }
346 return skb;
347}
331EXPORT_SYMBOL(build_skb); 348EXPORT_SYMBOL(build_skb);
332 349
333struct netdev_alloc_cache { 350struct netdev_alloc_cache {
@@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
348 gfp_t gfp = gfp_mask; 365 gfp_t gfp = gfp_mask;
349 366
350 if (order) { 367 if (order) {
351 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; 368 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
369 __GFP_NOMEMALLOC;
352 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); 370 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
353 nc->frag.size = PAGE_SIZE << (page ? order : 0); 371 nc->frag.size = PAGE_SIZE << (page ? order : 0);
354 } 372 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2b4f21d34df6..ccf4c5629b3c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
453 iph->saddr, iph->daddr); 453 iph->saddr, iph->daddr);
454 if (req) { 454 if (req) {
455 nsk = dccp_check_req(sk, skb, req); 455 nsk = dccp_check_req(sk, skb, req);
456 reqsk_put(req); 456 if (!nsk)
457 reqsk_put(req);
457 return nsk; 458 return nsk;
458 } 459 }
459 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, 460 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9d0551092c6c..5165571f397a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
301 &iph->daddr, inet6_iif(skb)); 301 &iph->daddr, inet6_iif(skb));
302 if (req) { 302 if (req) {
303 nsk = dccp_check_req(sk, skb, req); 303 nsk = dccp_check_req(sk, skb, req);
304 reqsk_put(req); 304 if (!nsk)
305 reqsk_put(req);
305 return nsk; 306 return nsk;
306 } 307 }
307 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, 308 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 5f566663e47f..30addee2dd03 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
186 if (child == NULL) 186 if (child == NULL)
187 goto listen_overflow; 187 goto listen_overflow;
188 188
189 inet_csk_reqsk_queue_unlink(sk, req); 189 inet_csk_reqsk_queue_drop(sk, req);
190 inet_csk_reqsk_queue_removed(sk, req);
191 inet_csk_reqsk_queue_add(sk, req, child); 190 inet_csk_reqsk_queue_add(sk, req, child);
192out: 191out:
193 return child; 192 return child;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5c3dd6267ed3..8976ca423a07 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
564} 564}
565EXPORT_SYMBOL(inet_rtx_syn_ack); 565EXPORT_SYMBOL(inet_rtx_syn_ack);
566 566
567/* return true if req was found in the syn_table[] */
568static bool reqsk_queue_unlink(struct request_sock_queue *queue,
569 struct request_sock *req)
570{
571 struct listen_sock *lopt = queue->listen_opt;
572 struct request_sock **prev;
573 bool found = false;
574
575 spin_lock(&queue->syn_wait_lock);
576
577 for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
578 prev = &(*prev)->dl_next) {
579 if (*prev == req) {
580 *prev = req->dl_next;
581 found = true;
582 break;
583 }
584 }
585
586 spin_unlock(&queue->syn_wait_lock);
587 if (del_timer(&req->rsk_timer))
588 reqsk_put(req);
589 return found;
590}
591
592void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
593{
594 if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
595 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
596 reqsk_put(req);
597 }
598}
599EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
600
567static void reqsk_timer_handler(unsigned long data) 601static void reqsk_timer_handler(unsigned long data)
568{ 602{
569 struct request_sock *req = (struct request_sock *)data; 603 struct request_sock *req = (struct request_sock *)data;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3571f2be4470..fc1c658ec6c1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1348 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1348 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
1349 if (req) { 1349 if (req) {
1350 nsk = tcp_check_req(sk, skb, req, false); 1350 nsk = tcp_check_req(sk, skb, req, false);
1351 reqsk_put(req); 1351 if (!nsk)
1352 reqsk_put(req);
1352 return nsk; 1353 return nsk;
1353 } 1354 }
1354 1355
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d6311b5365..e5d7649136fc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
755 if (!child) 755 if (!child)
756 goto listen_overflow; 756 goto listen_overflow;
757 757
758 inet_csk_reqsk_queue_unlink(sk, req); 758 inet_csk_reqsk_queue_drop(sk, req);
759 inet_csk_reqsk_queue_removed(sk, req);
760
761 inet_csk_reqsk_queue_add(sk, req, child); 759 inet_csk_reqsk_queue_add(sk, req, child);
760 /* Warning: caller must not call reqsk_put(req);
761 * child stole last reference on it.
762 */
762 return child; 763 return child;
763 764
764listen_overflow: 765listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c8d7e06b72f..a369e8a70b2c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2812,39 +2812,65 @@ begin_fwd:
2812 } 2812 }
2813} 2813}
2814 2814
2815/* Send a fin. The caller locks the socket for us. This cannot be 2815/* We allow to exceed memory limits for FIN packets to expedite
2816 * allowed to fail queueing a FIN frame under any circumstances. 2816 * connection tear down and (memory) recovery.
2817 * Otherwise tcp_send_fin() could be tempted to either delay FIN
2818 * or even be forced to close flow without any FIN.
2819 */
2820static void sk_forced_wmem_schedule(struct sock *sk, int size)
2821{
2822 int amt, status;
2823
2824 if (size <= sk->sk_forward_alloc)
2825 return;
2826 amt = sk_mem_pages(size);
2827 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2828 sk_memory_allocated_add(sk, amt, &status);
2829}
2830
2831/* Send a FIN. The caller locks the socket for us.
2832 * We should try to send a FIN packet really hard, but eventually give up.
2817 */ 2833 */
2818void tcp_send_fin(struct sock *sk) 2834void tcp_send_fin(struct sock *sk)
2819{ 2835{
2836 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
2820 struct tcp_sock *tp = tcp_sk(sk); 2837 struct tcp_sock *tp = tcp_sk(sk);
2821 struct sk_buff *skb = tcp_write_queue_tail(sk);
2822 int mss_now;
2823 2838
2824 /* Optimization, tack on the FIN if we have a queue of 2839 /* Optimization, tack on the FIN if we have one skb in write queue and
2825 * unsent frames. But be careful about outgoing SACKS 2840 * this skb was not yet sent, or we are under memory pressure.
2826 * and IP options. 2841 * Note: in the latter case, FIN packet will be sent after a timeout,
2842 * as TCP stack thinks it has already been transmitted.
2827 */ 2843 */
2828 mss_now = tcp_current_mss(sk); 2844 if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
2829 2845coalesce:
2830 if (tcp_send_head(sk)) { 2846 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2831 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; 2847 TCP_SKB_CB(tskb)->end_seq++;
2832 TCP_SKB_CB(skb)->end_seq++;
2833 tp->write_seq++; 2848 tp->write_seq++;
2849 if (!tcp_send_head(sk)) {
2850 /* This means tskb was already sent.
2851 * Pretend we included the FIN on previous transmit.
2852 * We need to set tp->snd_nxt to the value it would have
2853 * if FIN had been sent. This is because retransmit path
2854 * does not change tp->snd_nxt.
2855 */
2856 tp->snd_nxt++;
2857 return;
2858 }
2834 } else { 2859 } else {
2835 /* Socket is locked, keep trying until memory is available. */ 2860 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
2836 for (;;) { 2861 if (unlikely(!skb)) {
2837 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); 2862 if (tskb)
2838 if (skb) 2863 goto coalesce;
2839 break; 2864 return;
2840 yield();
2841 } 2865 }
2866 skb_reserve(skb, MAX_TCP_HEADER);
2867 sk_forced_wmem_schedule(sk, skb->truesize);
2842 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2868 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2843 tcp_init_nondata_skb(skb, tp->write_seq, 2869 tcp_init_nondata_skb(skb, tp->write_seq,
2844 TCPHDR_ACK | TCPHDR_FIN); 2870 TCPHDR_ACK | TCPHDR_FIN);
2845 tcp_queue_skb(sk, skb); 2871 tcp_queue_skb(sk, skb);
2846 } 2872 }
2847 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2873 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
2848} 2874}
2849 2875
2850/* We get here when a process closes a file descriptor (either due to 2876/* We get here when a process closes a file descriptor (either due to
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b5e6cc1d4a73..a38d3ac0f18f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1246static int ip6gre_tunnel_init(struct net_device *dev) 1246static int ip6gre_tunnel_init(struct net_device *dev)
1247{ 1247{
1248 struct ip6_tnl *tunnel; 1248 struct ip6_tnl *tunnel;
1249 int i;
1250 1249
1251 tunnel = netdev_priv(dev); 1250 tunnel = netdev_priv(dev);
1252 1251
@@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev)
1260 if (ipv6_addr_any(&tunnel->parms.raddr)) 1259 if (ipv6_addr_any(&tunnel->parms.raddr))
1261 dev->header_ops = &ip6gre_header_ops; 1260 dev->header_ops = &ip6gre_header_ops;
1262 1261
1263 dev->tstats = alloc_percpu(struct pcpu_sw_netstats); 1262 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1264 if (!dev->tstats) 1263 if (!dev->tstats)
1265 return -ENOMEM; 1264 return -ENOMEM;
1266 1265
1267 for_each_possible_cpu(i) {
1268 struct pcpu_sw_netstats *ip6gre_tunnel_stats;
1269 ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i);
1270 u64_stats_init(&ip6gre_tunnel_stats->syncp);
1271 }
1272
1273 return 0; 1266 return 0;
1274} 1267}
1275 1268
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ad51df85aa00..b6575d665568 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
946 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); 946 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
947 if (req) { 947 if (req) {
948 nsk = tcp_check_req(sk, skb, req, false); 948 nsk = tcp_check_req(sk, skb, req, false);
949 reqsk_put(req); 949 if (!nsk)
950 reqsk_put(req);
950 return nsk; 951 return nsk;
951 } 952 }
952 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 953 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index db8a2ea6d4de..954810c76a86 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
53 return rt; 53 return rt;
54} 54}
55 55
56static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
57{
58 return rcu_dereference_rtnl(dev->mpls_ptr);
59}
60
56static bool mpls_output_possible(const struct net_device *dev) 61static bool mpls_output_possible(const struct net_device *dev)
57{ 62{
58 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 63 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
@@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
136 struct mpls_route *rt; 141 struct mpls_route *rt;
137 struct mpls_entry_decoded dec; 142 struct mpls_entry_decoded dec;
138 struct net_device *out_dev; 143 struct net_device *out_dev;
144 struct mpls_dev *mdev;
139 unsigned int hh_len; 145 unsigned int hh_len;
140 unsigned int new_header_size; 146 unsigned int new_header_size;
141 unsigned int mtu; 147 unsigned int mtu;
@@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
143 149
144 /* Careful this entire function runs inside of an rcu critical section */ 150 /* Careful this entire function runs inside of an rcu critical section */
145 151
152 mdev = mpls_dev_get(dev);
153 if (!mdev || !mdev->input_enabled)
154 goto drop;
155
146 if (skb->pkt_type != PACKET_HOST) 156 if (skb->pkt_type != PACKET_HOST)
147 goto drop; 157 goto drop;
148 158
@@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg)
352 if (!dev) 362 if (!dev)
353 goto errout; 363 goto errout;
354 364
355 /* For now just support ethernet devices */ 365 /* Ensure this is a supported device */
356 err = -EINVAL; 366 err = -EINVAL;
357 if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) 367 if (!mpls_dev_get(dev))
358 goto errout; 368 goto errout;
359 369
360 err = -EINVAL; 370 err = -EINVAL;
@@ -428,10 +438,89 @@ errout:
428 return err; 438 return err;
429} 439}
430 440
441#define MPLS_PERDEV_SYSCTL_OFFSET(field) \
442 (&((struct mpls_dev *)0)->field)
443
444static const struct ctl_table mpls_dev_table[] = {
445 {
446 .procname = "input",
447 .maxlen = sizeof(int),
448 .mode = 0644,
449 .proc_handler = proc_dointvec,
450 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
451 },
452 { }
453};
454
455static int mpls_dev_sysctl_register(struct net_device *dev,
456 struct mpls_dev *mdev)
457{
458 char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
459 struct ctl_table *table;
460 int i;
461
462 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
463 if (!table)
464 goto out;
465
466 /* Table data contains only offsets relative to the base of
467 * the mdev at this point, so make them absolute.
468 */
469 for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++)
470 table[i].data = (char *)mdev + (uintptr_t)table[i].data;
471
472 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
473
474 mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
475 if (!mdev->sysctl)
476 goto free;
477
478 return 0;
479
480free:
481 kfree(table);
482out:
483 return -ENOBUFS;
484}
485
486static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
487{
488 struct ctl_table *table;
489
490 table = mdev->sysctl->ctl_table_arg;
491 unregister_net_sysctl_table(mdev->sysctl);
492 kfree(table);
493}
494
495static struct mpls_dev *mpls_add_dev(struct net_device *dev)
496{
497 struct mpls_dev *mdev;
498 int err = -ENOMEM;
499
500 ASSERT_RTNL();
501
502 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
503 if (!mdev)
504 return ERR_PTR(err);
505
506 err = mpls_dev_sysctl_register(dev, mdev);
507 if (err)
508 goto free;
509
510 rcu_assign_pointer(dev->mpls_ptr, mdev);
511
512 return mdev;
513
514free:
515 kfree(mdev);
516 return ERR_PTR(err);
517}
518
431static void mpls_ifdown(struct net_device *dev) 519static void mpls_ifdown(struct net_device *dev)
432{ 520{
433 struct mpls_route __rcu **platform_label; 521 struct mpls_route __rcu **platform_label;
434 struct net *net = dev_net(dev); 522 struct net *net = dev_net(dev);
523 struct mpls_dev *mdev;
435 unsigned index; 524 unsigned index;
436 525
437 platform_label = rtnl_dereference(net->mpls.platform_label); 526 platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev)
443 continue; 532 continue;
444 rt->rt_dev = NULL; 533 rt->rt_dev = NULL;
445 } 534 }
535
536 mdev = mpls_dev_get(dev);
537 if (!mdev)
538 return;
539
540 mpls_dev_sysctl_unregister(mdev);
541
542 RCU_INIT_POINTER(dev->mpls_ptr, NULL);
543
544 kfree(mdev);
446} 545}
447 546
448static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 547static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
449 void *ptr) 548 void *ptr)
450{ 549{
451 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 550 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
551 struct mpls_dev *mdev;
452 552
453 switch(event) { 553 switch(event) {
554 case NETDEV_REGISTER:
555 /* For now just support ethernet devices */
556 if ((dev->type == ARPHRD_ETHER) ||
557 (dev->type == ARPHRD_LOOPBACK)) {
558 mdev = mpls_add_dev(dev);
559 if (IS_ERR(mdev))
560 return notifier_from_errno(PTR_ERR(mdev));
561 }
562 break;
563
454 case NETDEV_UNREGISTER: 564 case NETDEV_UNREGISTER:
455 mpls_ifdown(dev); 565 mpls_ifdown(dev);
456 break; 566 break;
@@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla,
536 if ((dec.bos != bos) || dec.ttl || dec.tc) 646 if ((dec.bos != bos) || dec.ttl || dec.tc)
537 return -EINVAL; 647 return -EINVAL;
538 648
649 switch (dec.label) {
650 case LABEL_IMPLICIT_NULL:
651 /* RFC3032: This is a label that an LSR may
652 * assign and distribute, but which never
653 * actually appears in the encapsulation.
654 */
655 return -EINVAL;
656 }
657
539 label[i] = dec.label; 658 label[i] = dec.label;
540 } 659 }
541 *labels = nla_labels; 660 *labels = nla_labels;
@@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
912 return ret; 1031 return ret;
913} 1032}
914 1033
915static struct ctl_table mpls_table[] = { 1034static const struct ctl_table mpls_table[] = {
916 { 1035 {
917 .procname = "platform_labels", 1036 .procname = "platform_labels",
918 .data = NULL, 1037 .data = NULL,
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index fb6de92052c4..693877d69606 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -22,6 +22,12 @@ struct mpls_entry_decoded {
22 u8 bos; 22 u8 bos;
23}; 23};
24 24
25struct mpls_dev {
26 int input_enabled;
27
28 struct ctl_table_header *sysctl;
29};
30
25struct sk_buff; 31struct sk_buff;
26 32
27static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) 33static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 57d3e1af5630..0522fc9bfb0a 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
63 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) 63 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
64 goto nla_put_failure; 64 goto nla_put_failure;
65 break; 65 break;
66 default:
67 break;
66 } 68 }
67 69
68 return 0; 70 return 0;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 62cabee42fbe..635dbba93d01 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb,
108 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) 108 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
109 goto nla_put_failure; 109 goto nla_put_failure;
110 break; 110 break;
111 default:
112 break;
111 } 113 }
112 114
113 return 0; 115 return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 19909d0786a2..ec4adbdcb9b4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1629 if (data == NULL) 1629 if (data == NULL)
1630 return NULL; 1630 return NULL;
1631 1631
1632 skb = build_skb(data, size); 1632 skb = __build_skb(data, size);
1633 if (skb == NULL) 1633 if (skb == NULL)
1634 vfree(data); 1634 vfree(data);
1635 else { 1635 else
1636 skb->head_frag = 0;
1637 skb->destructor = netlink_skb_destructor; 1636 skb->destructor = netlink_skb_destructor;
1638 }
1639 1637
1640 return skb; 1638 return skb;
1641} 1639}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a6b30df6ec02..57be6e6aff99 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2143,7 +2143,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
2143 err = __tipc_nl_add_node_links(net, &msg, node, 2143 err = __tipc_nl_add_node_links(net, &msg, node,
2144 &prev_link); 2144 &prev_link);
2145 tipc_node_unlock(node); 2145 tipc_node_unlock(node);
2146 tipc_node_put(node);
2147 if (err) 2146 if (err)
2148 goto out; 2147 goto out;
2149 2148
diff --git a/net/tipc/server.c b/net/tipc/server.c
index ab6183cdb121..77ff03ed1e18 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref)
102 } 102 }
103 saddr->scope = -TIPC_NODE_SCOPE; 103 saddr->scope = -TIPC_NODE_SCOPE;
104 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); 104 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
105 sk_release_kernel(sk); 105 sock_release(sock);
106 con->sock = NULL; 106 con->sock = NULL;
107 } 107 }
108 108
@@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
321 struct socket *sock = NULL; 321 struct socket *sock = NULL;
322 int ret; 322 int ret;
323 323
324 ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); 324 ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1);
325 if (ret < 0) 325 if (ret < 0)
326 return NULL; 326 return NULL;
327
328 sk_change_net(sock->sk, s->net);
329
330 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, 327 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
331 (char *)&s->imp, sizeof(s->imp)); 328 (char *)&s->imp, sizeof(s->imp));
332 if (ret < 0) 329 if (ret < 0)
@@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
376 373
377create_err: 374create_err:
378 kernel_sock_shutdown(sock, SHUT_RDWR); 375 kernel_sock_shutdown(sock, SHUT_RDWR);
379 sk_release_kernel(sock->sk); 376 sock_release(sock);
380 return NULL; 377 return NULL;
381} 378}
382 379
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ee90d74d7516..9074b5cede38 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1764int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) 1764int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1765{ 1765{
1766 u32 dnode, dport = 0; 1766 u32 dnode, dport = 0;
1767 int err = -TIPC_ERR_NO_PORT; 1767 int err;
1768 struct sk_buff *skb; 1768 struct sk_buff *skb;
1769 struct tipc_sock *tsk; 1769 struct tipc_sock *tsk;
1770 struct tipc_net *tn; 1770 struct tipc_net *tn;
1771 struct sock *sk; 1771 struct sock *sk;
1772 1772
1773 while (skb_queue_len(inputq)) { 1773 while (skb_queue_len(inputq)) {
1774 err = -TIPC_ERR_NO_PORT;
1774 skb = NULL; 1775 skb = NULL;
1775 dport = tipc_skb_peek_port(inputq, dport); 1776 dport = tipc_skb_peek_port(inputq, dport);
1776 tsk = tipc_sk_lookup(net, dport); 1777 tsk = tipc_sk_lookup(net, dport);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 99f7012b23b9..a73a226f2d33 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
95 95
96unsigned int unix_tot_inflight; 96unsigned int unix_tot_inflight;
97 97
98
99struct sock *unix_get_socket(struct file *filp) 98struct sock *unix_get_socket(struct file *filp)
100{ 99{
101 struct sock *u_sock = NULL; 100 struct sock *u_sock = NULL;
102 struct inode *inode = file_inode(filp); 101 struct inode *inode = file_inode(filp);
103 102
104 /* 103 /* Socket ? */
105 * Socket ?
106 */
107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { 104 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 105 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 106 struct sock *s = sock->sk;
110 107
111 /* 108 /* PF_UNIX ? */
112 * PF_UNIX ?
113 */
114 if (s && sock->ops && sock->ops->family == PF_UNIX) 109 if (s && sock->ops && sock->ops->family == PF_UNIX)
115 u_sock = s; 110 u_sock = s;
116 } 111 }
117 return u_sock; 112 return u_sock;
118} 113}
119 114
120/* 115/* Keep the number of times in flight count for the file
121 * Keep the number of times in flight count for the file 116 * descriptor if it is for an AF_UNIX socket.
122 * descriptor if it is for an AF_UNIX socket.
123 */ 117 */
124 118
125void unix_inflight(struct file *fp) 119void unix_inflight(struct file *fp)
126{ 120{
127 struct sock *s = unix_get_socket(fp); 121 struct sock *s = unix_get_socket(fp);
122
128 if (s) { 123 if (s) {
129 struct unix_sock *u = unix_sk(s); 124 struct unix_sock *u = unix_sk(s);
125
130 spin_lock(&unix_gc_lock); 126 spin_lock(&unix_gc_lock);
127
131 if (atomic_long_inc_return(&u->inflight) == 1) { 128 if (atomic_long_inc_return(&u->inflight) == 1) {
132 BUG_ON(!list_empty(&u->link)); 129 BUG_ON(!list_empty(&u->link));
133 list_add_tail(&u->link, &gc_inflight_list); 130 list_add_tail(&u->link, &gc_inflight_list);
@@ -142,10 +139,13 @@ void unix_inflight(struct file *fp)
142void unix_notinflight(struct file *fp) 139void unix_notinflight(struct file *fp)
143{ 140{
144 struct sock *s = unix_get_socket(fp); 141 struct sock *s = unix_get_socket(fp);
142
145 if (s) { 143 if (s) {
146 struct unix_sock *u = unix_sk(s); 144 struct unix_sock *u = unix_sk(s);
145
147 spin_lock(&unix_gc_lock); 146 spin_lock(&unix_gc_lock);
148 BUG_ON(list_empty(&u->link)); 147 BUG_ON(list_empty(&u->link));
148
149 if (atomic_long_dec_and_test(&u->inflight)) 149 if (atomic_long_dec_and_test(&u->inflight))
150 list_del_init(&u->link); 150 list_del_init(&u->link);
151 unix_tot_inflight--; 151 unix_tot_inflight--;
@@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
161 161
162 spin_lock(&x->sk_receive_queue.lock); 162 spin_lock(&x->sk_receive_queue.lock);
163 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 163 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
164 /* 164 /* Do we have file descriptors ? */
165 * Do we have file descriptors ?
166 */
167 if (UNIXCB(skb).fp) { 165 if (UNIXCB(skb).fp) {
168 bool hit = false; 166 bool hit = false;
169 /* 167 /* Process the descriptors of this socket */
170 * Process the descriptors of this socket
171 */
172 int nfd = UNIXCB(skb).fp->count; 168 int nfd = UNIXCB(skb).fp->count;
173 struct file **fp = UNIXCB(skb).fp->fp; 169 struct file **fp = UNIXCB(skb).fp->fp;
170
174 while (nfd--) { 171 while (nfd--) {
175 /* 172 /* Get the socket the fd matches if it indeed does so */
176 * Get the socket the fd matches
177 * if it indeed does so
178 */
179 struct sock *sk = unix_get_socket(*fp++); 173 struct sock *sk = unix_get_socket(*fp++);
174
180 if (sk) { 175 if (sk) {
181 struct unix_sock *u = unix_sk(sk); 176 struct unix_sock *u = unix_sk(sk);
182 177
183 /* 178 /* Ignore non-candidates, they could
184 * Ignore non-candidates, they could
185 * have been added to the queues after 179 * have been added to the queues after
186 * starting the garbage collection 180 * starting the garbage collection
187 */ 181 */
188 if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { 182 if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
189 hit = true; 183 hit = true;
184
190 func(u); 185 func(u);
191 } 186 }
192 } 187 }
@@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
203static void scan_children(struct sock *x, void (*func)(struct unix_sock *), 198static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
204 struct sk_buff_head *hitlist) 199 struct sk_buff_head *hitlist)
205{ 200{
206 if (x->sk_state != TCP_LISTEN) 201 if (x->sk_state != TCP_LISTEN) {
207 scan_inflight(x, func, hitlist); 202 scan_inflight(x, func, hitlist);
208 else { 203 } else {
209 struct sk_buff *skb; 204 struct sk_buff *skb;
210 struct sk_buff *next; 205 struct sk_buff *next;
211 struct unix_sock *u; 206 struct unix_sock *u;
212 LIST_HEAD(embryos); 207 LIST_HEAD(embryos);
213 208
214 /* 209 /* For a listening socket collect the queued embryos
215 * For a listening socket collect the queued embryos
216 * and perform a scan on them as well. 210 * and perform a scan on them as well.
217 */ 211 */
218 spin_lock(&x->sk_receive_queue.lock); 212 spin_lock(&x->sk_receive_queue.lock);
219 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 213 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
220 u = unix_sk(skb->sk); 214 u = unix_sk(skb->sk);
221 215
222 /* 216 /* An embryo cannot be in-flight, so it's safe
223 * An embryo cannot be in-flight, so it's safe
224 * to use the list link. 217 * to use the list link.
225 */ 218 */
226 BUG_ON(!list_empty(&u->link)); 219 BUG_ON(!list_empty(&u->link));
@@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk)
249static void inc_inflight_move_tail(struct unix_sock *u) 242static void inc_inflight_move_tail(struct unix_sock *u)
250{ 243{
251 atomic_long_inc(&u->inflight); 244 atomic_long_inc(&u->inflight);
252 /* 245 /* If this still might be part of a cycle, move it to the end
253 * If this still might be part of a cycle, move it to the end
254 * of the list, so that it's checked even if it was already 246 * of the list, so that it's checked even if it was already
255 * passed over 247 * passed over
256 */ 248 */
@@ -263,8 +255,7 @@ static bool gc_in_progress;
263 255
264void wait_for_unix_gc(void) 256void wait_for_unix_gc(void)
265{ 257{
266 /* 258 /* If number of inflight sockets is insane,
267 * If number of inflight sockets is insane,
268 * force a garbage collect right now. 259 * force a garbage collect right now.
269 */ 260 */
270 if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) 261 if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
@@ -288,8 +279,7 @@ void unix_gc(void)
288 goto out; 279 goto out;
289 280
290 gc_in_progress = true; 281 gc_in_progress = true;
291 /* 282 /* First, select candidates for garbage collection. Only
292 * First, select candidates for garbage collection. Only
293 * in-flight sockets are considered, and from those only ones 283 * in-flight sockets are considered, and from those only ones
294 * which don't have any external reference. 284 * which don't have any external reference.
295 * 285 *
@@ -320,15 +310,13 @@ void unix_gc(void)
320 } 310 }
321 } 311 }
322 312
323 /* 313 /* Now remove all internal in-flight reference to children of
324 * Now remove all internal in-flight reference to children of
325 * the candidates. 314 * the candidates.
326 */ 315 */
327 list_for_each_entry(u, &gc_candidates, link) 316 list_for_each_entry(u, &gc_candidates, link)
328 scan_children(&u->sk, dec_inflight, NULL); 317 scan_children(&u->sk, dec_inflight, NULL);
329 318
330 /* 319 /* Restore the references for children of all candidates,
331 * Restore the references for children of all candidates,
332 * which have remaining references. Do this recursively, so 320 * which have remaining references. Do this recursively, so
333 * only those remain, which form cyclic references. 321 * only those remain, which form cyclic references.
334 * 322 *
@@ -350,8 +338,7 @@ void unix_gc(void)
350 } 338 }
351 list_del(&cursor); 339 list_del(&cursor);
352 340
353 /* 341 /* not_cycle_list contains those sockets which do not make up a
354 * not_cycle_list contains those sockets which do not make up a
355 * cycle. Restore these to the inflight list. 342 * cycle. Restore these to the inflight list.
356 */ 343 */
357 while (!list_empty(&not_cycle_list)) { 344 while (!list_empty(&not_cycle_list)) {
@@ -360,8 +347,7 @@ void unix_gc(void)
360 list_move_tail(&u->link, &gc_inflight_list); 347 list_move_tail(&u->link, &gc_inflight_list);
361 } 348 }
362 349
363 /* 350 /* Now gc_candidates contains only garbage. Restore original
364 * Now gc_candidates contains only garbage. Restore original
365 * inflight counters for these as well, and remove the skbuffs 351 * inflight counters for these as well, and remove the skbuffs
366 * which are creating the cycle(s). 352 * which are creating the cycle(s).
367 */ 353 */