diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/skbuff.c | 30 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 3 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 3 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 64 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 9 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 3 | ||||
-rw-r--r-- | net/mpls/af_mpls.c | 125 | ||||
-rw-r--r-- | net/mpls/internal.h | 6 | ||||
-rw-r--r-- | net/netfilter/nft_reject.c | 2 | ||||
-rw-r--r-- | net/netfilter/nft_reject_inet.c | 2 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 6 | ||||
-rw-r--r-- | net/tipc/link.c | 1 | ||||
-rw-r--r-- | net/tipc/server.c | 9 | ||||
-rw-r--r-- | net/tipc/socket.c | 3 | ||||
-rw-r--r-- | net/unix/garbage.c | 70 |
20 files changed, 290 insertions, 105 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef55ab5..c7ba0388f1be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3079,7 +3079,7 @@ static struct rps_dev_flow * | |||
3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
3080 | struct rps_dev_flow *rflow, u16 next_cpu) | 3080 | struct rps_dev_flow *rflow, u16 next_cpu) |
3081 | { | 3081 | { |
3082 | if (next_cpu != RPS_NO_CPU) { | 3082 | if (next_cpu < nr_cpu_ids) { |
3083 | #ifdef CONFIG_RFS_ACCEL | 3083 | #ifdef CONFIG_RFS_ACCEL |
3084 | struct netdev_rx_queue *rxqueue; | 3084 | struct netdev_rx_queue *rxqueue; |
3085 | struct rps_dev_flow_table *flow_table; | 3085 | struct rps_dev_flow_table *flow_table; |
@@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
3184 | * If the desired CPU (where last recvmsg was done) is | 3184 | * If the desired CPU (where last recvmsg was done) is |
3185 | * different from current CPU (one in the rx-queue flow | 3185 | * different from current CPU (one in the rx-queue flow |
3186 | * table entry), switch if one of the following holds: | 3186 | * table entry), switch if one of the following holds: |
3187 | * - Current CPU is unset (equal to RPS_NO_CPU). | 3187 | * - Current CPU is unset (>= nr_cpu_ids). |
3188 | * - Current CPU is offline. | 3188 | * - Current CPU is offline. |
3189 | * - The current CPU's queue tail has advanced beyond the | 3189 | * - The current CPU's queue tail has advanced beyond the |
3190 | * last packet that was enqueued using this table entry. | 3190 | * last packet that was enqueued using this table entry. |
@@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
3192 | * have been dequeued, thus preserving in order delivery. | 3192 | * have been dequeued, thus preserving in order delivery. |
3193 | */ | 3193 | */ |
3194 | if (unlikely(tcpu != next_cpu) && | 3194 | if (unlikely(tcpu != next_cpu) && |
3195 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 3195 | (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || |
3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
3197 | rflow->last_qtail)) >= 0)) { | 3197 | rflow->last_qtail)) >= 0)) { |
3198 | tcpu = next_cpu; | 3198 | tcpu = next_cpu; |
3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); | 3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
3200 | } | 3200 | } |
3201 | 3201 | ||
3202 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 3202 | if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { |
3203 | *rflowp = rflow; | 3203 | *rflowp = rflow; |
3204 | cpu = tcpu; | 3204 | cpu = tcpu; |
3205 | goto done; | 3205 | goto done; |
@@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | |||
3240 | struct rps_dev_flow_table *flow_table; | 3240 | struct rps_dev_flow_table *flow_table; |
3241 | struct rps_dev_flow *rflow; | 3241 | struct rps_dev_flow *rflow; |
3242 | bool expire = true; | 3242 | bool expire = true; |
3243 | int cpu; | 3243 | unsigned int cpu; |
3244 | 3244 | ||
3245 | rcu_read_lock(); | 3245 | rcu_read_lock(); |
3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
3247 | if (flow_table && flow_id <= flow_table->mask) { | 3247 | if (flow_table && flow_id <= flow_table->mask) { |
3248 | rflow = &flow_table->flows[flow_id]; | 3248 | rflow = &flow_table->flows[flow_id]; |
3249 | cpu = ACCESS_ONCE(rflow->cpu); | 3249 | cpu = ACCESS_ONCE(rflow->cpu); |
3250 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | 3250 | if (rflow->filter == filter_id && cpu < nr_cpu_ids && |
3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | 3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - |
3252 | rflow->last_qtail) < | 3252 | rflow->last_qtail) < |
3253 | (int)(10 * flow_table->mask))) | 3253 | (int)(10 * flow_table->mask))) |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1967dab9cc6..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -280,13 +280,14 @@ nodata: | |||
280 | EXPORT_SYMBOL(__alloc_skb); | 280 | EXPORT_SYMBOL(__alloc_skb); |
281 | 281 | ||
282 | /** | 282 | /** |
283 | * build_skb - build a network buffer | 283 | * __build_skb - build a network buffer |
284 | * @data: data buffer provided by caller | 284 | * @data: data buffer provided by caller |
285 | * @frag_size: size of fragment, or 0 if head was kmalloced | 285 | * @frag_size: size of data, or 0 if head was kmalloced |
286 | * | 286 | * |
287 | * Allocate a new &sk_buff. Caller provides space holding head and | 287 | * Allocate a new &sk_buff. Caller provides space holding head and |
288 | * skb_shared_info. @data must have been allocated by kmalloc() only if | 288 | * skb_shared_info. @data must have been allocated by kmalloc() only if |
289 | * @frag_size is 0, otherwise data should come from the page allocator. | 289 | * @frag_size is 0, otherwise data should come from the page allocator |
290 | * or vmalloc() | ||
290 | * The return is the new skb buffer. | 291 | * The return is the new skb buffer. |
291 | * On a failure the return is %NULL, and @data is not freed. | 292 | * On a failure the return is %NULL, and @data is not freed. |
292 | * Notes : | 293 | * Notes : |
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); | |||
297 | * before giving packet to stack. | 298 | * before giving packet to stack. |
298 | * RX rings only contains data buffers, not full skbs. | 299 | * RX rings only contains data buffers, not full skbs. |
299 | */ | 300 | */ |
300 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | 301 | struct sk_buff *__build_skb(void *data, unsigned int frag_size) |
301 | { | 302 | { |
302 | struct skb_shared_info *shinfo; | 303 | struct skb_shared_info *shinfo; |
303 | struct sk_buff *skb; | 304 | struct sk_buff *skb; |
@@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
311 | 312 | ||
312 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 313 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
313 | skb->truesize = SKB_TRUESIZE(size); | 314 | skb->truesize = SKB_TRUESIZE(size); |
314 | skb->head_frag = frag_size != 0; | ||
315 | atomic_set(&skb->users, 1); | 315 | atomic_set(&skb->users, 1); |
316 | skb->head = data; | 316 | skb->head = data; |
317 | skb->data = data; | 317 | skb->data = data; |
@@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
328 | 328 | ||
329 | return skb; | 329 | return skb; |
330 | } | 330 | } |
331 | |||
332 | /* build_skb() is wrapper over __build_skb(), that specifically | ||
333 | * takes care of skb->head and skb->pfmemalloc | ||
334 | * This means that if @frag_size is not zero, then @data must be backed | ||
335 | * by a page fragment, not kmalloc() or vmalloc() | ||
336 | */ | ||
337 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | ||
338 | { | ||
339 | struct sk_buff *skb = __build_skb(data, frag_size); | ||
340 | |||
341 | if (skb && frag_size) { | ||
342 | skb->head_frag = 1; | ||
343 | if (virt_to_head_page(data)->pfmemalloc) | ||
344 | skb->pfmemalloc = 1; | ||
345 | } | ||
346 | return skb; | ||
347 | } | ||
331 | EXPORT_SYMBOL(build_skb); | 348 | EXPORT_SYMBOL(build_skb); |
332 | 349 | ||
333 | struct netdev_alloc_cache { | 350 | struct netdev_alloc_cache { |
@@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, | |||
348 | gfp_t gfp = gfp_mask; | 365 | gfp_t gfp = gfp_mask; |
349 | 366 | ||
350 | if (order) { | 367 | if (order) { |
351 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; | 368 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | |
369 | __GFP_NOMEMALLOC; | ||
352 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | 370 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); |
353 | nc->frag.size = PAGE_SIZE << (page ? order : 0); | 371 | nc->frag.size = PAGE_SIZE << (page ? order : 0); |
354 | } | 372 | } |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d34df6..ccf4c5629b3c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
453 | iph->saddr, iph->daddr); | 453 | iph->saddr, iph->daddr); |
454 | if (req) { | 454 | if (req) { |
455 | nsk = dccp_check_req(sk, skb, req); | 455 | nsk = dccp_check_req(sk, skb, req); |
456 | reqsk_put(req); | 456 | if (!nsk) |
457 | reqsk_put(req); | ||
457 | return nsk; | 458 | return nsk; |
458 | } | 459 | } |
459 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, | 460 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d0551092c6c..5165571f397a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
301 | &iph->daddr, inet6_iif(skb)); | 301 | &iph->daddr, inet6_iif(skb)); |
302 | if (req) { | 302 | if (req) { |
303 | nsk = dccp_check_req(sk, skb, req); | 303 | nsk = dccp_check_req(sk, skb, req); |
304 | reqsk_put(req); | 304 | if (!nsk) |
305 | reqsk_put(req); | ||
305 | return nsk; | 306 | return nsk; |
306 | } | 307 | } |
307 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, | 308 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f566663e47f..30addee2dd03 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
186 | if (child == NULL) | 186 | if (child == NULL) |
187 | goto listen_overflow; | 187 | goto listen_overflow; |
188 | 188 | ||
189 | inet_csk_reqsk_queue_unlink(sk, req); | 189 | inet_csk_reqsk_queue_drop(sk, req); |
190 | inet_csk_reqsk_queue_removed(sk, req); | ||
191 | inet_csk_reqsk_queue_add(sk, req, child); | 190 | inet_csk_reqsk_queue_add(sk, req, child); |
192 | out: | 191 | out: |
193 | return child; | 192 | return child; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) | |||
564 | } | 564 | } |
565 | EXPORT_SYMBOL(inet_rtx_syn_ack); | 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); |
566 | 566 | ||
567 | /* return true if req was found in the syn_table[] */ | ||
568 | static bool reqsk_queue_unlink(struct request_sock_queue *queue, | ||
569 | struct request_sock *req) | ||
570 | { | ||
571 | struct listen_sock *lopt = queue->listen_opt; | ||
572 | struct request_sock **prev; | ||
573 | bool found = false; | ||
574 | |||
575 | spin_lock(&queue->syn_wait_lock); | ||
576 | |||
577 | for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; | ||
578 | prev = &(*prev)->dl_next) { | ||
579 | if (*prev == req) { | ||
580 | *prev = req->dl_next; | ||
581 | found = true; | ||
582 | break; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | spin_unlock(&queue->syn_wait_lock); | ||
587 | if (del_timer(&req->rsk_timer)) | ||
588 | reqsk_put(req); | ||
589 | return found; | ||
590 | } | ||
591 | |||
592 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) | ||
593 | { | ||
594 | if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { | ||
595 | reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); | ||
596 | reqsk_put(req); | ||
597 | } | ||
598 | } | ||
599 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); | ||
600 | |||
567 | static void reqsk_timer_handler(unsigned long data) | 601 | static void reqsk_timer_handler(unsigned long data) |
568 | { | 602 | { |
569 | struct request_sock *req = (struct request_sock *)data; | 603 | struct request_sock *req = (struct request_sock *)data; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); | 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); |
1349 | if (req) { | 1349 | if (req) { |
1350 | nsk = tcp_check_req(sk, skb, req, false); | 1350 | nsk = tcp_check_req(sk, skb, req, false); |
1351 | reqsk_put(req); | 1351 | if (!nsk) |
1352 | reqsk_put(req); | ||
1352 | return nsk; | 1353 | return nsk; |
1353 | } | 1354 | } |
1354 | 1355 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..e5d7649136fc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
755 | if (!child) | 755 | if (!child) |
756 | goto listen_overflow; | 756 | goto listen_overflow; |
757 | 757 | ||
758 | inet_csk_reqsk_queue_unlink(sk, req); | 758 | inet_csk_reqsk_queue_drop(sk, req); |
759 | inet_csk_reqsk_queue_removed(sk, req); | ||
760 | |||
761 | inet_csk_reqsk_queue_add(sk, req, child); | 759 | inet_csk_reqsk_queue_add(sk, req, child); |
760 | /* Warning: caller must not call reqsk_put(req); | ||
761 | * child stole last reference on it. | ||
762 | */ | ||
762 | return child; | 763 | return child; |
763 | 764 | ||
764 | listen_overflow: | 765 | listen_overflow: |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2812,39 +2812,65 @@ begin_fwd: | |||
2812 | } | 2812 | } |
2813 | } | 2813 | } |
2814 | 2814 | ||
2815 | /* Send a fin. The caller locks the socket for us. This cannot be | 2815 | /* We allow to exceed memory limits for FIN packets to expedite |
2816 | * allowed to fail queueing a FIN frame under any circumstances. | 2816 | * connection tear down and (memory) recovery. |
2817 | * Otherwise tcp_send_fin() could be tempted to either delay FIN | ||
2818 | * or even be forced to close flow without any FIN. | ||
2819 | */ | ||
2820 | static void sk_forced_wmem_schedule(struct sock *sk, int size) | ||
2821 | { | ||
2822 | int amt, status; | ||
2823 | |||
2824 | if (size <= sk->sk_forward_alloc) | ||
2825 | return; | ||
2826 | amt = sk_mem_pages(size); | ||
2827 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | ||
2828 | sk_memory_allocated_add(sk, amt, &status); | ||
2829 | } | ||
2830 | |||
2831 | /* Send a FIN. The caller locks the socket for us. | ||
2832 | * We should try to send a FIN packet really hard, but eventually give up. | ||
2817 | */ | 2833 | */ |
2818 | void tcp_send_fin(struct sock *sk) | 2834 | void tcp_send_fin(struct sock *sk) |
2819 | { | 2835 | { |
2836 | struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); | ||
2820 | struct tcp_sock *tp = tcp_sk(sk); | 2837 | struct tcp_sock *tp = tcp_sk(sk); |
2821 | struct sk_buff *skb = tcp_write_queue_tail(sk); | ||
2822 | int mss_now; | ||
2823 | 2838 | ||
2824 | /* Optimization, tack on the FIN if we have a queue of | 2839 | /* Optimization, tack on the FIN if we have one skb in write queue and |
2825 | * unsent frames. But be careful about outgoing SACKS | 2840 | * this skb was not yet sent, or we are under memory pressure. |
2826 | * and IP options. | 2841 | * Note: in the latter case, FIN packet will be sent after a timeout, |
2842 | * as TCP stack thinks it has already been transmitted. | ||
2827 | */ | 2843 | */ |
2828 | mss_now = tcp_current_mss(sk); | 2844 | if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { |
2829 | 2845 | coalesce: | |
2830 | if (tcp_send_head(sk)) { | 2846 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; |
2831 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; | 2847 | TCP_SKB_CB(tskb)->end_seq++; |
2832 | TCP_SKB_CB(skb)->end_seq++; | ||
2833 | tp->write_seq++; | 2848 | tp->write_seq++; |
2849 | if (!tcp_send_head(sk)) { | ||
2850 | /* This means tskb was already sent. | ||
2851 | * Pretend we included the FIN on previous transmit. | ||
2852 | * We need to set tp->snd_nxt to the value it would have | ||
2853 | * if FIN had been sent. This is because retransmit path | ||
2854 | * does not change tp->snd_nxt. | ||
2855 | */ | ||
2856 | tp->snd_nxt++; | ||
2857 | return; | ||
2858 | } | ||
2834 | } else { | 2859 | } else { |
2835 | /* Socket is locked, keep trying until memory is available. */ | 2860 | skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); |
2836 | for (;;) { | 2861 | if (unlikely(!skb)) { |
2837 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); | 2862 | if (tskb) |
2838 | if (skb) | 2863 | goto coalesce; |
2839 | break; | 2864 | return; |
2840 | yield(); | ||
2841 | } | 2865 | } |
2866 | skb_reserve(skb, MAX_TCP_HEADER); | ||
2867 | sk_forced_wmem_schedule(sk, skb->truesize); | ||
2842 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2868 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
2843 | tcp_init_nondata_skb(skb, tp->write_seq, | 2869 | tcp_init_nondata_skb(skb, tp->write_seq, |
2844 | TCPHDR_ACK | TCPHDR_FIN); | 2870 | TCPHDR_ACK | TCPHDR_FIN); |
2845 | tcp_queue_skb(sk, skb); | 2871 | tcp_queue_skb(sk, skb); |
2846 | } | 2872 | } |
2847 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2873 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
2848 | } | 2874 | } |
2849 | 2875 | ||
2850 | /* We get here when a process closes a file descriptor (either due to | 2876 | /* We get here when a process closes a file descriptor (either due to |
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1d4a73..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
@@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) | |||
1246 | static int ip6gre_tunnel_init(struct net_device *dev) | 1246 | static int ip6gre_tunnel_init(struct net_device *dev) |
1247 | { | 1247 | { |
1248 | struct ip6_tnl *tunnel; | 1248 | struct ip6_tnl *tunnel; |
1249 | int i; | ||
1250 | 1249 | ||
1251 | tunnel = netdev_priv(dev); | 1250 | tunnel = netdev_priv(dev); |
1252 | 1251 | ||
@@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) | |||
1260 | if (ipv6_addr_any(&tunnel->parms.raddr)) | 1259 | if (ipv6_addr_any(&tunnel->parms.raddr)) |
1261 | dev->header_ops = &ip6gre_header_ops; | 1260 | dev->header_ops = &ip6gre_header_ops; |
1262 | 1261 | ||
1263 | dev->tstats = alloc_percpu(struct pcpu_sw_netstats); | 1262 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
1264 | if (!dev->tstats) | 1263 | if (!dev->tstats) |
1265 | return -ENOMEM; | 1264 | return -ENOMEM; |
1266 | 1265 | ||
1267 | for_each_possible_cpu(i) { | ||
1268 | struct pcpu_sw_netstats *ip6gre_tunnel_stats; | ||
1269 | ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); | ||
1270 | u64_stats_init(&ip6gre_tunnel_stats->syncp); | ||
1271 | } | ||
1272 | |||
1273 | return 0; | 1266 | return 0; |
1274 | } | 1267 | } |
1275 | 1268 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df85aa00..b6575d665568 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); | 946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); |
947 | if (req) { | 947 | if (req) { |
948 | nsk = tcp_check_req(sk, skb, req, false); | 948 | nsk = tcp_check_req(sk, skb, req, false); |
949 | reqsk_put(req); | 949 | if (!nsk) |
950 | reqsk_put(req); | ||
950 | return nsk; | 951 | return nsk; |
951 | } | 952 | } |
952 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, | 953 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, |
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..954810c76a86 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c | |||
@@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) | |||
53 | return rt; | 53 | return rt; |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) | ||
57 | { | ||
58 | return rcu_dereference_rtnl(dev->mpls_ptr); | ||
59 | } | ||
60 | |||
56 | static bool mpls_output_possible(const struct net_device *dev) | 61 | static bool mpls_output_possible(const struct net_device *dev) |
57 | { | 62 | { |
58 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); | 63 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); |
@@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
136 | struct mpls_route *rt; | 141 | struct mpls_route *rt; |
137 | struct mpls_entry_decoded dec; | 142 | struct mpls_entry_decoded dec; |
138 | struct net_device *out_dev; | 143 | struct net_device *out_dev; |
144 | struct mpls_dev *mdev; | ||
139 | unsigned int hh_len; | 145 | unsigned int hh_len; |
140 | unsigned int new_header_size; | 146 | unsigned int new_header_size; |
141 | unsigned int mtu; | 147 | unsigned int mtu; |
@@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
143 | 149 | ||
144 | /* Careful this entire function runs inside of an rcu critical section */ | 150 | /* Careful this entire function runs inside of an rcu critical section */ |
145 | 151 | ||
152 | mdev = mpls_dev_get(dev); | ||
153 | if (!mdev || !mdev->input_enabled) | ||
154 | goto drop; | ||
155 | |||
146 | if (skb->pkt_type != PACKET_HOST) | 156 | if (skb->pkt_type != PACKET_HOST) |
147 | goto drop; | 157 | goto drop; |
148 | 158 | ||
@@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) | |||
352 | if (!dev) | 362 | if (!dev) |
353 | goto errout; | 363 | goto errout; |
354 | 364 | ||
355 | /* For now just support ethernet devices */ | 365 | /* Ensure this is a supported device */ |
356 | err = -EINVAL; | 366 | err = -EINVAL; |
357 | if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) | 367 | if (!mpls_dev_get(dev)) |
358 | goto errout; | 368 | goto errout; |
359 | 369 | ||
360 | err = -EINVAL; | 370 | err = -EINVAL; |
@@ -428,10 +438,89 @@ errout: | |||
428 | return err; | 438 | return err; |
429 | } | 439 | } |
430 | 440 | ||
441 | #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ | ||
442 | (&((struct mpls_dev *)0)->field) | ||
443 | |||
444 | static const struct ctl_table mpls_dev_table[] = { | ||
445 | { | ||
446 | .procname = "input", | ||
447 | .maxlen = sizeof(int), | ||
448 | .mode = 0644, | ||
449 | .proc_handler = proc_dointvec, | ||
450 | .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), | ||
451 | }, | ||
452 | { } | ||
453 | }; | ||
454 | |||
455 | static int mpls_dev_sysctl_register(struct net_device *dev, | ||
456 | struct mpls_dev *mdev) | ||
457 | { | ||
458 | char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; | ||
459 | struct ctl_table *table; | ||
460 | int i; | ||
461 | |||
462 | table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); | ||
463 | if (!table) | ||
464 | goto out; | ||
465 | |||
466 | /* Table data contains only offsets relative to the base of | ||
467 | * the mdev at this point, so make them absolute. | ||
468 | */ | ||
469 | for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) | ||
470 | table[i].data = (char *)mdev + (uintptr_t)table[i].data; | ||
471 | |||
472 | snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); | ||
473 | |||
474 | mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); | ||
475 | if (!mdev->sysctl) | ||
476 | goto free; | ||
477 | |||
478 | return 0; | ||
479 | |||
480 | free: | ||
481 | kfree(table); | ||
482 | out: | ||
483 | return -ENOBUFS; | ||
484 | } | ||
485 | |||
486 | static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) | ||
487 | { | ||
488 | struct ctl_table *table; | ||
489 | |||
490 | table = mdev->sysctl->ctl_table_arg; | ||
491 | unregister_net_sysctl_table(mdev->sysctl); | ||
492 | kfree(table); | ||
493 | } | ||
494 | |||
495 | static struct mpls_dev *mpls_add_dev(struct net_device *dev) | ||
496 | { | ||
497 | struct mpls_dev *mdev; | ||
498 | int err = -ENOMEM; | ||
499 | |||
500 | ASSERT_RTNL(); | ||
501 | |||
502 | mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); | ||
503 | if (!mdev) | ||
504 | return ERR_PTR(err); | ||
505 | |||
506 | err = mpls_dev_sysctl_register(dev, mdev); | ||
507 | if (err) | ||
508 | goto free; | ||
509 | |||
510 | rcu_assign_pointer(dev->mpls_ptr, mdev); | ||
511 | |||
512 | return mdev; | ||
513 | |||
514 | free: | ||
515 | kfree(mdev); | ||
516 | return ERR_PTR(err); | ||
517 | } | ||
518 | |||
431 | static void mpls_ifdown(struct net_device *dev) | 519 | static void mpls_ifdown(struct net_device *dev) |
432 | { | 520 | { |
433 | struct mpls_route __rcu **platform_label; | 521 | struct mpls_route __rcu **platform_label; |
434 | struct net *net = dev_net(dev); | 522 | struct net *net = dev_net(dev); |
523 | struct mpls_dev *mdev; | ||
435 | unsigned index; | 524 | unsigned index; |
436 | 525 | ||
437 | platform_label = rtnl_dereference(net->mpls.platform_label); | 526 | platform_label = rtnl_dereference(net->mpls.platform_label); |
@@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) | |||
443 | continue; | 532 | continue; |
444 | rt->rt_dev = NULL; | 533 | rt->rt_dev = NULL; |
445 | } | 534 | } |
535 | |||
536 | mdev = mpls_dev_get(dev); | ||
537 | if (!mdev) | ||
538 | return; | ||
539 | |||
540 | mpls_dev_sysctl_unregister(mdev); | ||
541 | |||
542 | RCU_INIT_POINTER(dev->mpls_ptr, NULL); | ||
543 | |||
544 | kfree(mdev); | ||
446 | } | 545 | } |
447 | 546 | ||
448 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, | 547 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, |
449 | void *ptr) | 548 | void *ptr) |
450 | { | 549 | { |
451 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | 550 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
551 | struct mpls_dev *mdev; | ||
452 | 552 | ||
453 | switch(event) { | 553 | switch(event) { |
554 | case NETDEV_REGISTER: | ||
555 | /* For now just support ethernet devices */ | ||
556 | if ((dev->type == ARPHRD_ETHER) || | ||
557 | (dev->type == ARPHRD_LOOPBACK)) { | ||
558 | mdev = mpls_add_dev(dev); | ||
559 | if (IS_ERR(mdev)) | ||
560 | return notifier_from_errno(PTR_ERR(mdev)); | ||
561 | } | ||
562 | break; | ||
563 | |||
454 | case NETDEV_UNREGISTER: | 564 | case NETDEV_UNREGISTER: |
455 | mpls_ifdown(dev); | 565 | mpls_ifdown(dev); |
456 | break; | 566 | break; |
@@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, | |||
536 | if ((dec.bos != bos) || dec.ttl || dec.tc) | 646 | if ((dec.bos != bos) || dec.ttl || dec.tc) |
537 | return -EINVAL; | 647 | return -EINVAL; |
538 | 648 | ||
649 | switch (dec.label) { | ||
650 | case LABEL_IMPLICIT_NULL: | ||
651 | /* RFC3032: This is a label that an LSR may | ||
652 | * assign and distribute, but which never | ||
653 | * actually appears in the encapsulation. | ||
654 | */ | ||
655 | return -EINVAL; | ||
656 | } | ||
657 | |||
539 | label[i] = dec.label; | 658 | label[i] = dec.label; |
540 | } | 659 | } |
541 | *labels = nla_labels; | 660 | *labels = nla_labels; |
@@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, | |||
912 | return ret; | 1031 | return ret; |
913 | } | 1032 | } |
914 | 1033 | ||
915 | static struct ctl_table mpls_table[] = { | 1034 | static const struct ctl_table mpls_table[] = { |
916 | { | 1035 | { |
917 | .procname = "platform_labels", | 1036 | .procname = "platform_labels", |
918 | .data = NULL, | 1037 | .data = NULL, |
diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..693877d69606 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h | |||
@@ -22,6 +22,12 @@ struct mpls_entry_decoded { | |||
22 | u8 bos; | 22 | u8 bos; |
23 | }; | 23 | }; |
24 | 24 | ||
25 | struct mpls_dev { | ||
26 | int input_enabled; | ||
27 | |||
28 | struct ctl_table_header *sysctl; | ||
29 | }; | ||
30 | |||
25 | struct sk_buff; | 31 | struct sk_buff; |
26 | 32 | ||
27 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) | 33 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) |
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1af5630..0522fc9bfb0a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c | |||
@@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) | |||
63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
64 | goto nla_put_failure; | 64 | goto nla_put_failure; |
65 | break; | 65 | break; |
66 | default: | ||
67 | break; | ||
66 | } | 68 | } |
67 | 69 | ||
68 | return 0; | 70 | return 0; |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee42fbe..635dbba93d01 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c | |||
@@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, | |||
108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
109 | goto nla_put_failure; | 109 | goto nla_put_failure; |
110 | break; | 110 | break; |
111 | default: | ||
112 | break; | ||
111 | } | 113 | } |
112 | 114 | ||
113 | return 0; | 115 | return 0; |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..ec4adbdcb9b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, | |||
1629 | if (data == NULL) | 1629 | if (data == NULL) |
1630 | return NULL; | 1630 | return NULL; |
1631 | 1631 | ||
1632 | skb = build_skb(data, size); | 1632 | skb = __build_skb(data, size); |
1633 | if (skb == NULL) | 1633 | if (skb == NULL) |
1634 | vfree(data); | 1634 | vfree(data); |
1635 | else { | 1635 | else |
1636 | skb->head_frag = 0; | ||
1637 | skb->destructor = netlink_skb_destructor; | 1636 | skb->destructor = netlink_skb_destructor; |
1638 | } | ||
1639 | 1637 | ||
1640 | return skb; | 1638 | return skb; |
1641 | } | 1639 | } |
diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df6ec02..57be6e6aff99 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c | |||
@@ -2143,7 +2143,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2143 | err = __tipc_nl_add_node_links(net, &msg, node, | 2143 | err = __tipc_nl_add_node_links(net, &msg, node, |
2144 | &prev_link); | 2144 | &prev_link); |
2145 | tipc_node_unlock(node); | 2145 | tipc_node_unlock(node); |
2146 | tipc_node_put(node); | ||
2147 | if (err) | 2146 | if (err) |
2148 | goto out; | 2147 | goto out; |
2149 | 2148 | ||
diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183cdb121..77ff03ed1e18 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c | |||
@@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) | |||
102 | } | 102 | } |
103 | saddr->scope = -TIPC_NODE_SCOPE; | 103 | saddr->scope = -TIPC_NODE_SCOPE; |
104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); | 104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); |
105 | sk_release_kernel(sk); | 105 | sock_release(sock); |
106 | con->sock = NULL; | 106 | con->sock = NULL; |
107 | } | 107 | } |
108 | 108 | ||
@@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
321 | struct socket *sock = NULL; | 321 | struct socket *sock = NULL; |
322 | int ret; | 322 | int ret; |
323 | 323 | ||
324 | ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); | 324 | ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); |
325 | if (ret < 0) | 325 | if (ret < 0) |
326 | return NULL; | 326 | return NULL; |
327 | |||
328 | sk_change_net(sock->sk, s->net); | ||
329 | |||
330 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, | 327 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, |
331 | (char *)&s->imp, sizeof(s->imp)); | 328 | (char *)&s->imp, sizeof(s->imp)); |
332 | if (ret < 0) | 329 | if (ret < 0) |
@@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
376 | 373 | ||
377 | create_err: | 374 | create_err: |
378 | kernel_sock_shutdown(sock, SHUT_RDWR); | 375 | kernel_sock_shutdown(sock, SHUT_RDWR); |
379 | sk_release_kernel(sock->sk); | 376 | sock_release(sock); |
380 | return NULL; | 377 | return NULL; |
381 | } | 378 | } |
382 | 379 | ||
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74d7516..9074b5cede38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c | |||
@@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, | |||
1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) | 1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) |
1765 | { | 1765 | { |
1766 | u32 dnode, dport = 0; | 1766 | u32 dnode, dport = 0; |
1767 | int err = -TIPC_ERR_NO_PORT; | 1767 | int err; |
1768 | struct sk_buff *skb; | 1768 | struct sk_buff *skb; |
1769 | struct tipc_sock *tsk; | 1769 | struct tipc_sock *tsk; |
1770 | struct tipc_net *tn; | 1770 | struct tipc_net *tn; |
1771 | struct sock *sk; | 1771 | struct sock *sk; |
1772 | 1772 | ||
1773 | while (skb_queue_len(inputq)) { | 1773 | while (skb_queue_len(inputq)) { |
1774 | err = -TIPC_ERR_NO_PORT; | ||
1774 | skb = NULL; | 1775 | skb = NULL; |
1775 | dport = tipc_skb_peek_port(inputq, dport); | 1776 | dport = tipc_skb_peek_port(inputq, dport); |
1776 | tsk = tipc_sk_lookup(net, dport); | 1777 | tsk = tipc_sk_lookup(net, dport); |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012b23b9..a73a226f2d33 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); | |||
95 | 95 | ||
96 | unsigned int unix_tot_inflight; | 96 | unsigned int unix_tot_inflight; |
97 | 97 | ||
98 | |||
99 | struct sock *unix_get_socket(struct file *filp) | 98 | struct sock *unix_get_socket(struct file *filp) |
100 | { | 99 | { |
101 | struct sock *u_sock = NULL; | 100 | struct sock *u_sock = NULL; |
102 | struct inode *inode = file_inode(filp); | 101 | struct inode *inode = file_inode(filp); |
103 | 102 | ||
104 | /* | 103 | /* Socket ? */ |
105 | * Socket ? | ||
106 | */ | ||
107 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { | 104 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { |
108 | struct socket *sock = SOCKET_I(inode); | 105 | struct socket *sock = SOCKET_I(inode); |
109 | struct sock *s = sock->sk; | 106 | struct sock *s = sock->sk; |
110 | 107 | ||
111 | /* | 108 | /* PF_UNIX ? */ |
112 | * PF_UNIX ? | ||
113 | */ | ||
114 | if (s && sock->ops && sock->ops->family == PF_UNIX) | 109 | if (s && sock->ops && sock->ops->family == PF_UNIX) |
115 | u_sock = s; | 110 | u_sock = s; |
116 | } | 111 | } |
117 | return u_sock; | 112 | return u_sock; |
118 | } | 113 | } |
119 | 114 | ||
120 | /* | 115 | /* Keep the number of times in flight count for the file |
121 | * Keep the number of times in flight count for the file | 116 | * descriptor if it is for an AF_UNIX socket. |
122 | * descriptor if it is for an AF_UNIX socket. | ||
123 | */ | 117 | */ |
124 | 118 | ||
125 | void unix_inflight(struct file *fp) | 119 | void unix_inflight(struct file *fp) |
126 | { | 120 | { |
127 | struct sock *s = unix_get_socket(fp); | 121 | struct sock *s = unix_get_socket(fp); |
122 | |||
128 | if (s) { | 123 | if (s) { |
129 | struct unix_sock *u = unix_sk(s); | 124 | struct unix_sock *u = unix_sk(s); |
125 | |||
130 | spin_lock(&unix_gc_lock); | 126 | spin_lock(&unix_gc_lock); |
127 | |||
131 | if (atomic_long_inc_return(&u->inflight) == 1) { | 128 | if (atomic_long_inc_return(&u->inflight) == 1) { |
132 | BUG_ON(!list_empty(&u->link)); | 129 | BUG_ON(!list_empty(&u->link)); |
133 | list_add_tail(&u->link, &gc_inflight_list); | 130 | list_add_tail(&u->link, &gc_inflight_list); |
@@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) | |||
142 | void unix_notinflight(struct file *fp) | 139 | void unix_notinflight(struct file *fp) |
143 | { | 140 | { |
144 | struct sock *s = unix_get_socket(fp); | 141 | struct sock *s = unix_get_socket(fp); |
142 | |||
145 | if (s) { | 143 | if (s) { |
146 | struct unix_sock *u = unix_sk(s); | 144 | struct unix_sock *u = unix_sk(s); |
145 | |||
147 | spin_lock(&unix_gc_lock); | 146 | spin_lock(&unix_gc_lock); |
148 | BUG_ON(list_empty(&u->link)); | 147 | BUG_ON(list_empty(&u->link)); |
148 | |||
149 | if (atomic_long_dec_and_test(&u->inflight)) | 149 | if (atomic_long_dec_and_test(&u->inflight)) |
150 | list_del_init(&u->link); | 150 | list_del_init(&u->link); |
151 | unix_tot_inflight--; | 151 | unix_tot_inflight--; |
@@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
161 | 161 | ||
162 | spin_lock(&x->sk_receive_queue.lock); | 162 | spin_lock(&x->sk_receive_queue.lock); |
163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
164 | /* | 164 | /* Do we have file descriptors ? */ |
165 | * Do we have file descriptors ? | ||
166 | */ | ||
167 | if (UNIXCB(skb).fp) { | 165 | if (UNIXCB(skb).fp) { |
168 | bool hit = false; | 166 | bool hit = false; |
169 | /* | 167 | /* Process the descriptors of this socket */ |
170 | * Process the descriptors of this socket | ||
171 | */ | ||
172 | int nfd = UNIXCB(skb).fp->count; | 168 | int nfd = UNIXCB(skb).fp->count; |
173 | struct file **fp = UNIXCB(skb).fp->fp; | 169 | struct file **fp = UNIXCB(skb).fp->fp; |
170 | |||
174 | while (nfd--) { | 171 | while (nfd--) { |
175 | /* | 172 | /* Get the socket the fd matches if it indeed does so */ |
176 | * Get the socket the fd matches | ||
177 | * if it indeed does so | ||
178 | */ | ||
179 | struct sock *sk = unix_get_socket(*fp++); | 173 | struct sock *sk = unix_get_socket(*fp++); |
174 | |||
180 | if (sk) { | 175 | if (sk) { |
181 | struct unix_sock *u = unix_sk(sk); | 176 | struct unix_sock *u = unix_sk(sk); |
182 | 177 | ||
183 | /* | 178 | /* Ignore non-candidates, they could |
184 | * Ignore non-candidates, they could | ||
185 | * have been added to the queues after | 179 | * have been added to the queues after |
186 | * starting the garbage collection | 180 | * starting the garbage collection |
187 | */ | 181 | */ |
188 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { | 182 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { |
189 | hit = true; | 183 | hit = true; |
184 | |||
190 | func(u); | 185 | func(u); |
191 | } | 186 | } |
192 | } | 187 | } |
@@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
203 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), | 198 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), |
204 | struct sk_buff_head *hitlist) | 199 | struct sk_buff_head *hitlist) |
205 | { | 200 | { |
206 | if (x->sk_state != TCP_LISTEN) | 201 | if (x->sk_state != TCP_LISTEN) { |
207 | scan_inflight(x, func, hitlist); | 202 | scan_inflight(x, func, hitlist); |
208 | else { | 203 | } else { |
209 | struct sk_buff *skb; | 204 | struct sk_buff *skb; |
210 | struct sk_buff *next; | 205 | struct sk_buff *next; |
211 | struct unix_sock *u; | 206 | struct unix_sock *u; |
212 | LIST_HEAD(embryos); | 207 | LIST_HEAD(embryos); |
213 | 208 | ||
214 | /* | 209 | /* For a listening socket collect the queued embryos |
215 | * For a listening socket collect the queued embryos | ||
216 | * and perform a scan on them as well. | 210 | * and perform a scan on them as well. |
217 | */ | 211 | */ |
218 | spin_lock(&x->sk_receive_queue.lock); | 212 | spin_lock(&x->sk_receive_queue.lock); |
219 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 213 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
220 | u = unix_sk(skb->sk); | 214 | u = unix_sk(skb->sk); |
221 | 215 | ||
222 | /* | 216 | /* An embryo cannot be in-flight, so it's safe |
223 | * An embryo cannot be in-flight, so it's safe | ||
224 | * to use the list link. | 217 | * to use the list link. |
225 | */ | 218 | */ |
226 | BUG_ON(!list_empty(&u->link)); | 219 | BUG_ON(!list_empty(&u->link)); |
@@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) | |||
249 | static void inc_inflight_move_tail(struct unix_sock *u) | 242 | static void inc_inflight_move_tail(struct unix_sock *u) |
250 | { | 243 | { |
251 | atomic_long_inc(&u->inflight); | 244 | atomic_long_inc(&u->inflight); |
252 | /* | 245 | /* If this still might be part of a cycle, move it to the end |
253 | * If this still might be part of a cycle, move it to the end | ||
254 | * of the list, so that it's checked even if it was already | 246 | * of the list, so that it's checked even if it was already |
255 | * passed over | 247 | * passed over |
256 | */ | 248 | */ |
@@ -263,8 +255,7 @@ static bool gc_in_progress; | |||
263 | 255 | ||
264 | void wait_for_unix_gc(void) | 256 | void wait_for_unix_gc(void) |
265 | { | 257 | { |
266 | /* | 258 | /* If number of inflight sockets is insane, |
267 | * If number of inflight sockets is insane, | ||
268 | * force a garbage collect right now. | 259 | * force a garbage collect right now. |
269 | */ | 260 | */ |
270 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) | 261 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) |
@@ -288,8 +279,7 @@ void unix_gc(void) | |||
288 | goto out; | 279 | goto out; |
289 | 280 | ||
290 | gc_in_progress = true; | 281 | gc_in_progress = true; |
291 | /* | 282 | /* First, select candidates for garbage collection. Only |
292 | * First, select candidates for garbage collection. Only | ||
293 | * in-flight sockets are considered, and from those only ones | 283 | * in-flight sockets are considered, and from those only ones |
294 | * which don't have any external reference. | 284 | * which don't have any external reference. |
295 | * | 285 | * |
@@ -320,15 +310,13 @@ void unix_gc(void) | |||
320 | } | 310 | } |
321 | } | 311 | } |
322 | 312 | ||
323 | /* | 313 | /* Now remove all internal in-flight reference to children of |
324 | * Now remove all internal in-flight reference to children of | ||
325 | * the candidates. | 314 | * the candidates. |
326 | */ | 315 | */ |
327 | list_for_each_entry(u, &gc_candidates, link) | 316 | list_for_each_entry(u, &gc_candidates, link) |
328 | scan_children(&u->sk, dec_inflight, NULL); | 317 | scan_children(&u->sk, dec_inflight, NULL); |
329 | 318 | ||
330 | /* | 319 | /* Restore the references for children of all candidates, |
331 | * Restore the references for children of all candidates, | ||
332 | * which have remaining references. Do this recursively, so | 320 | * which have remaining references. Do this recursively, so |
333 | * only those remain, which form cyclic references. | 321 | * only those remain, which form cyclic references. |
334 | * | 322 | * |
@@ -350,8 +338,7 @@ void unix_gc(void) | |||
350 | } | 338 | } |
351 | list_del(&cursor); | 339 | list_del(&cursor); |
352 | 340 | ||
353 | /* | 341 | /* not_cycle_list contains those sockets which do not make up a |
354 | * not_cycle_list contains those sockets which do not make up a | ||
355 | * cycle. Restore these to the inflight list. | 342 | * cycle. Restore these to the inflight list. |
356 | */ | 343 | */ |
357 | while (!list_empty(¬_cycle_list)) { | 344 | while (!list_empty(¬_cycle_list)) { |
@@ -360,8 +347,7 @@ void unix_gc(void) | |||
360 | list_move_tail(&u->link, &gc_inflight_list); | 347 | list_move_tail(&u->link, &gc_inflight_list); |
361 | } | 348 | } |
362 | 349 | ||
363 | /* | 350 | /* Now gc_candidates contains only garbage. Restore original |
364 | * Now gc_candidates contains only garbage. Restore original | ||
365 | * inflight counters for these as well, and remove the skbuffs | 351 | * inflight counters for these as well, and remove the skbuffs |
366 | * which are creating the cycle(s). | 352 | * which are creating the cycle(s). |
367 | */ | 353 | */ |