diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/dev.c | 12 | ||||
| -rw-r--r-- | net/core/skbuff.c | 30 | ||||
| -rw-r--r-- | net/dccp/ipv4.c | 3 | ||||
| -rw-r--r-- | net/dccp/ipv6.c | 3 | ||||
| -rw-r--r-- | net/dccp/minisocks.c | 3 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 7 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 64 | ||||
| -rw-r--r-- | net/ipv6/ip6_gre.c | 9 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 3 | ||||
| -rw-r--r-- | net/mpls/af_mpls.c | 125 | ||||
| -rw-r--r-- | net/mpls/internal.h | 6 | ||||
| -rw-r--r-- | net/netfilter/nft_reject.c | 2 | ||||
| -rw-r--r-- | net/netfilter/nft_reject_inet.c | 2 | ||||
| -rw-r--r-- | net/netlink/af_netlink.c | 6 | ||||
| -rw-r--r-- | net/tipc/link.c | 1 | ||||
| -rw-r--r-- | net/tipc/server.c | 9 | ||||
| -rw-r--r-- | net/tipc/socket.c | 3 | ||||
| -rw-r--r-- | net/unix/garbage.c | 70 |
20 files changed, 290 insertions, 105 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef55ab5..c7ba0388f1be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * | |||
| 3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
| 3080 | struct rps_dev_flow *rflow, u16 next_cpu) | 3080 | struct rps_dev_flow *rflow, u16 next_cpu) |
| 3081 | { | 3081 | { |
| 3082 | if (next_cpu != RPS_NO_CPU) { | 3082 | if (next_cpu < nr_cpu_ids) { |
| 3083 | #ifdef CONFIG_RFS_ACCEL | 3083 | #ifdef CONFIG_RFS_ACCEL |
| 3084 | struct netdev_rx_queue *rxqueue; | 3084 | struct netdev_rx_queue *rxqueue; |
| 3085 | struct rps_dev_flow_table *flow_table; | 3085 | struct rps_dev_flow_table *flow_table; |
| @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
| 3184 | * If the desired CPU (where last recvmsg was done) is | 3184 | * If the desired CPU (where last recvmsg was done) is |
| 3185 | * different from current CPU (one in the rx-queue flow | 3185 | * different from current CPU (one in the rx-queue flow |
| 3186 | * table entry), switch if one of the following holds: | 3186 | * table entry), switch if one of the following holds: |
| 3187 | * - Current CPU is unset (equal to RPS_NO_CPU). | 3187 | * - Current CPU is unset (>= nr_cpu_ids). |
| 3188 | * - Current CPU is offline. | 3188 | * - Current CPU is offline. |
| 3189 | * - The current CPU's queue tail has advanced beyond the | 3189 | * - The current CPU's queue tail has advanced beyond the |
| 3190 | * last packet that was enqueued using this table entry. | 3190 | * last packet that was enqueued using this table entry. |
| @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
| 3192 | * have been dequeued, thus preserving in order delivery. | 3192 | * have been dequeued, thus preserving in order delivery. |
| 3193 | */ | 3193 | */ |
| 3194 | if (unlikely(tcpu != next_cpu) && | 3194 | if (unlikely(tcpu != next_cpu) && |
| 3195 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 3195 | (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || |
| 3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
| 3197 | rflow->last_qtail)) >= 0)) { | 3197 | rflow->last_qtail)) >= 0)) { |
| 3198 | tcpu = next_cpu; | 3198 | tcpu = next_cpu; |
| 3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); | 3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
| 3200 | } | 3200 | } |
| 3201 | 3201 | ||
| 3202 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 3202 | if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { |
| 3203 | *rflowp = rflow; | 3203 | *rflowp = rflow; |
| 3204 | cpu = tcpu; | 3204 | cpu = tcpu; |
| 3205 | goto done; | 3205 | goto done; |
| @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | |||
| 3240 | struct rps_dev_flow_table *flow_table; | 3240 | struct rps_dev_flow_table *flow_table; |
| 3241 | struct rps_dev_flow *rflow; | 3241 | struct rps_dev_flow *rflow; |
| 3242 | bool expire = true; | 3242 | bool expire = true; |
| 3243 | int cpu; | 3243 | unsigned int cpu; |
| 3244 | 3244 | ||
| 3245 | rcu_read_lock(); | 3245 | rcu_read_lock(); |
| 3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
| 3247 | if (flow_table && flow_id <= flow_table->mask) { | 3247 | if (flow_table && flow_id <= flow_table->mask) { |
| 3248 | rflow = &flow_table->flows[flow_id]; | 3248 | rflow = &flow_table->flows[flow_id]; |
| 3249 | cpu = ACCESS_ONCE(rflow->cpu); | 3249 | cpu = ACCESS_ONCE(rflow->cpu); |
| 3250 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | 3250 | if (rflow->filter == filter_id && cpu < nr_cpu_ids && |
| 3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | 3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - |
| 3252 | rflow->last_qtail) < | 3252 | rflow->last_qtail) < |
| 3253 | (int)(10 * flow_table->mask))) | 3253 | (int)(10 * flow_table->mask))) |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1967dab9cc6..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -280,13 +280,14 @@ nodata: | |||
| 280 | EXPORT_SYMBOL(__alloc_skb); | 280 | EXPORT_SYMBOL(__alloc_skb); |
| 281 | 281 | ||
| 282 | /** | 282 | /** |
| 283 | * build_skb - build a network buffer | 283 | * __build_skb - build a network buffer |
| 284 | * @data: data buffer provided by caller | 284 | * @data: data buffer provided by caller |
| 285 | * @frag_size: size of fragment, or 0 if head was kmalloced | 285 | * @frag_size: size of data, or 0 if head was kmalloced |
| 286 | * | 286 | * |
| 287 | * Allocate a new &sk_buff. Caller provides space holding head and | 287 | * Allocate a new &sk_buff. Caller provides space holding head and |
| 288 | * skb_shared_info. @data must have been allocated by kmalloc() only if | 288 | * skb_shared_info. @data must have been allocated by kmalloc() only if |
| 289 | * @frag_size is 0, otherwise data should come from the page allocator. | 289 | * @frag_size is 0, otherwise data should come from the page allocator |
| 290 | * or vmalloc() | ||
| 290 | * The return is the new skb buffer. | 291 | * The return is the new skb buffer. |
| 291 | * On a failure the return is %NULL, and @data is not freed. | 292 | * On a failure the return is %NULL, and @data is not freed. |
| 292 | * Notes : | 293 | * Notes : |
| @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); | |||
| 297 | * before giving packet to stack. | 298 | * before giving packet to stack. |
| 298 | * RX rings only contains data buffers, not full skbs. | 299 | * RX rings only contains data buffers, not full skbs. |
| 299 | */ | 300 | */ |
| 300 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | 301 | struct sk_buff *__build_skb(void *data, unsigned int frag_size) |
| 301 | { | 302 | { |
| 302 | struct skb_shared_info *shinfo; | 303 | struct skb_shared_info *shinfo; |
| 303 | struct sk_buff *skb; | 304 | struct sk_buff *skb; |
| @@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
| 311 | 312 | ||
| 312 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 313 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
| 313 | skb->truesize = SKB_TRUESIZE(size); | 314 | skb->truesize = SKB_TRUESIZE(size); |
| 314 | skb->head_frag = frag_size != 0; | ||
| 315 | atomic_set(&skb->users, 1); | 315 | atomic_set(&skb->users, 1); |
| 316 | skb->head = data; | 316 | skb->head = data; |
| 317 | skb->data = data; | 317 | skb->data = data; |
| @@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
| 328 | 328 | ||
| 329 | return skb; | 329 | return skb; |
| 330 | } | 330 | } |
| 331 | |||
| 332 | /* build_skb() is wrapper over __build_skb(), that specifically | ||
| 333 | * takes care of skb->head and skb->pfmemalloc | ||
| 334 | * This means that if @frag_size is not zero, then @data must be backed | ||
| 335 | * by a page fragment, not kmalloc() or vmalloc() | ||
| 336 | */ | ||
| 337 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | ||
| 338 | { | ||
| 339 | struct sk_buff *skb = __build_skb(data, frag_size); | ||
| 340 | |||
| 341 | if (skb && frag_size) { | ||
| 342 | skb->head_frag = 1; | ||
| 343 | if (virt_to_head_page(data)->pfmemalloc) | ||
| 344 | skb->pfmemalloc = 1; | ||
| 345 | } | ||
| 346 | return skb; | ||
| 347 | } | ||
| 331 | EXPORT_SYMBOL(build_skb); | 348 | EXPORT_SYMBOL(build_skb); |
| 332 | 349 | ||
| 333 | struct netdev_alloc_cache { | 350 | struct netdev_alloc_cache { |
| @@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, | |||
| 348 | gfp_t gfp = gfp_mask; | 365 | gfp_t gfp = gfp_mask; |
| 349 | 366 | ||
| 350 | if (order) { | 367 | if (order) { |
| 351 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; | 368 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | |
| 369 | __GFP_NOMEMALLOC; | ||
| 352 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | 370 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); |
| 353 | nc->frag.size = PAGE_SIZE << (page ? order : 0); | 371 | nc->frag.size = PAGE_SIZE << (page ? order : 0); |
| 354 | } | 372 | } |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d34df6..ccf4c5629b3c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
| @@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 453 | iph->saddr, iph->daddr); | 453 | iph->saddr, iph->daddr); |
| 454 | if (req) { | 454 | if (req) { |
| 455 | nsk = dccp_check_req(sk, skb, req); | 455 | nsk = dccp_check_req(sk, skb, req); |
| 456 | reqsk_put(req); | 456 | if (!nsk) |
| 457 | reqsk_put(req); | ||
| 457 | return nsk; | 458 | return nsk; |
| 458 | } | 459 | } |
| 459 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, | 460 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d0551092c6c..5165571f397a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
| @@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
| 301 | &iph->daddr, inet6_iif(skb)); | 301 | &iph->daddr, inet6_iif(skb)); |
| 302 | if (req) { | 302 | if (req) { |
| 303 | nsk = dccp_check_req(sk, skb, req); | 303 | nsk = dccp_check_req(sk, skb, req); |
| 304 | reqsk_put(req); | 304 | if (!nsk) |
| 305 | reqsk_put(req); | ||
| 305 | return nsk; | 306 | return nsk; |
| 306 | } | 307 | } |
| 307 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, | 308 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f566663e47f..30addee2dd03 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
| @@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
| 186 | if (child == NULL) | 186 | if (child == NULL) |
| 187 | goto listen_overflow; | 187 | goto listen_overflow; |
| 188 | 188 | ||
| 189 | inet_csk_reqsk_queue_unlink(sk, req); | 189 | inet_csk_reqsk_queue_drop(sk, req); |
| 190 | inet_csk_reqsk_queue_removed(sk, req); | ||
| 191 | inet_csk_reqsk_queue_add(sk, req, child); | 190 | inet_csk_reqsk_queue_add(sk, req, child); |
| 192 | out: | 191 | out: |
| 193 | return child; | 192 | return child; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) | |||
| 564 | } | 564 | } |
| 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); | 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); |
| 566 | 566 | ||
| 567 | /* return true if req was found in the syn_table[] */ | ||
| 568 | static bool reqsk_queue_unlink(struct request_sock_queue *queue, | ||
| 569 | struct request_sock *req) | ||
| 570 | { | ||
| 571 | struct listen_sock *lopt = queue->listen_opt; | ||
| 572 | struct request_sock **prev; | ||
| 573 | bool found = false; | ||
| 574 | |||
| 575 | spin_lock(&queue->syn_wait_lock); | ||
| 576 | |||
| 577 | for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; | ||
| 578 | prev = &(*prev)->dl_next) { | ||
| 579 | if (*prev == req) { | ||
| 580 | *prev = req->dl_next; | ||
| 581 | found = true; | ||
| 582 | break; | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | spin_unlock(&queue->syn_wait_lock); | ||
| 587 | if (del_timer(&req->rsk_timer)) | ||
| 588 | reqsk_put(req); | ||
| 589 | return found; | ||
| 590 | } | ||
| 591 | |||
| 592 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) | ||
| 593 | { | ||
| 594 | if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { | ||
| 595 | reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); | ||
| 596 | reqsk_put(req); | ||
| 597 | } | ||
| 598 | } | ||
| 599 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); | ||
| 600 | |||
| 567 | static void reqsk_timer_handler(unsigned long data) | 601 | static void reqsk_timer_handler(unsigned long data) |
| 568 | { | 602 | { |
| 569 | struct request_sock *req = (struct request_sock *)data; | 603 | struct request_sock *req = (struct request_sock *)data; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); | 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); |
| 1349 | if (req) { | 1349 | if (req) { |
| 1350 | nsk = tcp_check_req(sk, skb, req, false); | 1350 | nsk = tcp_check_req(sk, skb, req, false); |
| 1351 | reqsk_put(req); | 1351 | if (!nsk) |
| 1352 | reqsk_put(req); | ||
| 1352 | return nsk; | 1353 | return nsk; |
| 1353 | } | 1354 | } |
| 1354 | 1355 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..e5d7649136fc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
| 755 | if (!child) | 755 | if (!child) |
| 756 | goto listen_overflow; | 756 | goto listen_overflow; |
| 757 | 757 | ||
| 758 | inet_csk_reqsk_queue_unlink(sk, req); | 758 | inet_csk_reqsk_queue_drop(sk, req); |
| 759 | inet_csk_reqsk_queue_removed(sk, req); | ||
| 760 | |||
| 761 | inet_csk_reqsk_queue_add(sk, req, child); | 759 | inet_csk_reqsk_queue_add(sk, req, child); |
| 760 | /* Warning: caller must not call reqsk_put(req); | ||
| 761 | * child stole last reference on it. | ||
| 762 | */ | ||
| 762 | return child; | 763 | return child; |
| 763 | 764 | ||
| 764 | listen_overflow: | 765 | listen_overflow: |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -2812,39 +2812,65 @@ begin_fwd: | |||
| 2812 | } | 2812 | } |
| 2813 | } | 2813 | } |
| 2814 | 2814 | ||
| 2815 | /* Send a fin. The caller locks the socket for us. This cannot be | 2815 | /* We allow to exceed memory limits for FIN packets to expedite |
| 2816 | * allowed to fail queueing a FIN frame under any circumstances. | 2816 | * connection tear down and (memory) recovery. |
| 2817 | * Otherwise tcp_send_fin() could be tempted to either delay FIN | ||
| 2818 | * or even be forced to close flow without any FIN. | ||
| 2819 | */ | ||
| 2820 | static void sk_forced_wmem_schedule(struct sock *sk, int size) | ||
| 2821 | { | ||
| 2822 | int amt, status; | ||
| 2823 | |||
| 2824 | if (size <= sk->sk_forward_alloc) | ||
| 2825 | return; | ||
| 2826 | amt = sk_mem_pages(size); | ||
| 2827 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | ||
| 2828 | sk_memory_allocated_add(sk, amt, &status); | ||
| 2829 | } | ||
| 2830 | |||
| 2831 | /* Send a FIN. The caller locks the socket for us. | ||
| 2832 | * We should try to send a FIN packet really hard, but eventually give up. | ||
| 2817 | */ | 2833 | */ |
| 2818 | void tcp_send_fin(struct sock *sk) | 2834 | void tcp_send_fin(struct sock *sk) |
| 2819 | { | 2835 | { |
| 2836 | struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); | ||
| 2820 | struct tcp_sock *tp = tcp_sk(sk); | 2837 | struct tcp_sock *tp = tcp_sk(sk); |
| 2821 | struct sk_buff *skb = tcp_write_queue_tail(sk); | ||
| 2822 | int mss_now; | ||
| 2823 | 2838 | ||
| 2824 | /* Optimization, tack on the FIN if we have a queue of | 2839 | /* Optimization, tack on the FIN if we have one skb in write queue and |
| 2825 | * unsent frames. But be careful about outgoing SACKS | 2840 | * this skb was not yet sent, or we are under memory pressure. |
| 2826 | * and IP options. | 2841 | * Note: in the latter case, FIN packet will be sent after a timeout, |
| 2842 | * as TCP stack thinks it has already been transmitted. | ||
| 2827 | */ | 2843 | */ |
| 2828 | mss_now = tcp_current_mss(sk); | 2844 | if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { |
| 2829 | 2845 | coalesce: | |
| 2830 | if (tcp_send_head(sk)) { | 2846 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; |
| 2831 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; | 2847 | TCP_SKB_CB(tskb)->end_seq++; |
| 2832 | TCP_SKB_CB(skb)->end_seq++; | ||
| 2833 | tp->write_seq++; | 2848 | tp->write_seq++; |
| 2849 | if (!tcp_send_head(sk)) { | ||
| 2850 | /* This means tskb was already sent. | ||
| 2851 | * Pretend we included the FIN on previous transmit. | ||
| 2852 | * We need to set tp->snd_nxt to the value it would have | ||
| 2853 | * if FIN had been sent. This is because retransmit path | ||
| 2854 | * does not change tp->snd_nxt. | ||
| 2855 | */ | ||
| 2856 | tp->snd_nxt++; | ||
| 2857 | return; | ||
| 2858 | } | ||
| 2834 | } else { | 2859 | } else { |
| 2835 | /* Socket is locked, keep trying until memory is available. */ | 2860 | skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); |
| 2836 | for (;;) { | 2861 | if (unlikely(!skb)) { |
| 2837 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); | 2862 | if (tskb) |
| 2838 | if (skb) | 2863 | goto coalesce; |
| 2839 | break; | 2864 | return; |
| 2840 | yield(); | ||
| 2841 | } | 2865 | } |
| 2866 | skb_reserve(skb, MAX_TCP_HEADER); | ||
| 2867 | sk_forced_wmem_schedule(sk, skb->truesize); | ||
| 2842 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2868 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
| 2843 | tcp_init_nondata_skb(skb, tp->write_seq, | 2869 | tcp_init_nondata_skb(skb, tp->write_seq, |
| 2844 | TCPHDR_ACK | TCPHDR_FIN); | 2870 | TCPHDR_ACK | TCPHDR_FIN); |
| 2845 | tcp_queue_skb(sk, skb); | 2871 | tcp_queue_skb(sk, skb); |
| 2846 | } | 2872 | } |
| 2847 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2873 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
| 2848 | } | 2874 | } |
| 2849 | 2875 | ||
| 2850 | /* We get here when a process closes a file descriptor (either due to | 2876 | /* We get here when a process closes a file descriptor (either due to |
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1d4a73..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
| @@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) | |||
| 1246 | static int ip6gre_tunnel_init(struct net_device *dev) | 1246 | static int ip6gre_tunnel_init(struct net_device *dev) |
| 1247 | { | 1247 | { |
| 1248 | struct ip6_tnl *tunnel; | 1248 | struct ip6_tnl *tunnel; |
| 1249 | int i; | ||
| 1250 | 1249 | ||
| 1251 | tunnel = netdev_priv(dev); | 1250 | tunnel = netdev_priv(dev); |
| 1252 | 1251 | ||
| @@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) | |||
| 1260 | if (ipv6_addr_any(&tunnel->parms.raddr)) | 1259 | if (ipv6_addr_any(&tunnel->parms.raddr)) |
| 1261 | dev->header_ops = &ip6gre_header_ops; | 1260 | dev->header_ops = &ip6gre_header_ops; |
| 1262 | 1261 | ||
| 1263 | dev->tstats = alloc_percpu(struct pcpu_sw_netstats); | 1262 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
| 1264 | if (!dev->tstats) | 1263 | if (!dev->tstats) |
| 1265 | return -ENOMEM; | 1264 | return -ENOMEM; |
| 1266 | 1265 | ||
| 1267 | for_each_possible_cpu(i) { | ||
| 1268 | struct pcpu_sw_netstats *ip6gre_tunnel_stats; | ||
| 1269 | ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); | ||
| 1270 | u64_stats_init(&ip6gre_tunnel_stats->syncp); | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | return 0; | 1266 | return 0; |
| 1274 | } | 1267 | } |
| 1275 | 1268 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df85aa00..b6575d665568 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
| @@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); | 946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); |
| 947 | if (req) { | 947 | if (req) { |
| 948 | nsk = tcp_check_req(sk, skb, req, false); | 948 | nsk = tcp_check_req(sk, skb, req, false); |
| 949 | reqsk_put(req); | 949 | if (!nsk) |
| 950 | reqsk_put(req); | ||
| 950 | return nsk; | 951 | return nsk; |
| 951 | } | 952 | } |
| 952 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, | 953 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, |
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..954810c76a86 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c | |||
| @@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) | |||
| 53 | return rt; | 53 | return rt; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) | ||
| 57 | { | ||
| 58 | return rcu_dereference_rtnl(dev->mpls_ptr); | ||
| 59 | } | ||
| 60 | |||
| 56 | static bool mpls_output_possible(const struct net_device *dev) | 61 | static bool mpls_output_possible(const struct net_device *dev) |
| 57 | { | 62 | { |
| 58 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); | 63 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); |
| @@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
| 136 | struct mpls_route *rt; | 141 | struct mpls_route *rt; |
| 137 | struct mpls_entry_decoded dec; | 142 | struct mpls_entry_decoded dec; |
| 138 | struct net_device *out_dev; | 143 | struct net_device *out_dev; |
| 144 | struct mpls_dev *mdev; | ||
| 139 | unsigned int hh_len; | 145 | unsigned int hh_len; |
| 140 | unsigned int new_header_size; | 146 | unsigned int new_header_size; |
| 141 | unsigned int mtu; | 147 | unsigned int mtu; |
| @@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
| 143 | 149 | ||
| 144 | /* Careful this entire function runs inside of an rcu critical section */ | 150 | /* Careful this entire function runs inside of an rcu critical section */ |
| 145 | 151 | ||
| 152 | mdev = mpls_dev_get(dev); | ||
| 153 | if (!mdev || !mdev->input_enabled) | ||
| 154 | goto drop; | ||
| 155 | |||
| 146 | if (skb->pkt_type != PACKET_HOST) | 156 | if (skb->pkt_type != PACKET_HOST) |
| 147 | goto drop; | 157 | goto drop; |
| 148 | 158 | ||
| @@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) | |||
| 352 | if (!dev) | 362 | if (!dev) |
| 353 | goto errout; | 363 | goto errout; |
| 354 | 364 | ||
| 355 | /* For now just support ethernet devices */ | 365 | /* Ensure this is a supported device */ |
| 356 | err = -EINVAL; | 366 | err = -EINVAL; |
| 357 | if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) | 367 | if (!mpls_dev_get(dev)) |
| 358 | goto errout; | 368 | goto errout; |
| 359 | 369 | ||
| 360 | err = -EINVAL; | 370 | err = -EINVAL; |
| @@ -428,10 +438,89 @@ errout: | |||
| 428 | return err; | 438 | return err; |
| 429 | } | 439 | } |
| 430 | 440 | ||
| 441 | #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ | ||
| 442 | (&((struct mpls_dev *)0)->field) | ||
| 443 | |||
| 444 | static const struct ctl_table mpls_dev_table[] = { | ||
| 445 | { | ||
| 446 | .procname = "input", | ||
| 447 | .maxlen = sizeof(int), | ||
| 448 | .mode = 0644, | ||
| 449 | .proc_handler = proc_dointvec, | ||
| 450 | .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), | ||
| 451 | }, | ||
| 452 | { } | ||
| 453 | }; | ||
| 454 | |||
| 455 | static int mpls_dev_sysctl_register(struct net_device *dev, | ||
| 456 | struct mpls_dev *mdev) | ||
| 457 | { | ||
| 458 | char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; | ||
| 459 | struct ctl_table *table; | ||
| 460 | int i; | ||
| 461 | |||
| 462 | table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); | ||
| 463 | if (!table) | ||
| 464 | goto out; | ||
| 465 | |||
| 466 | /* Table data contains only offsets relative to the base of | ||
| 467 | * the mdev at this point, so make them absolute. | ||
| 468 | */ | ||
| 469 | for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) | ||
| 470 | table[i].data = (char *)mdev + (uintptr_t)table[i].data; | ||
| 471 | |||
| 472 | snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); | ||
| 473 | |||
| 474 | mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); | ||
| 475 | if (!mdev->sysctl) | ||
| 476 | goto free; | ||
| 477 | |||
| 478 | return 0; | ||
| 479 | |||
| 480 | free: | ||
| 481 | kfree(table); | ||
| 482 | out: | ||
| 483 | return -ENOBUFS; | ||
| 484 | } | ||
| 485 | |||
| 486 | static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) | ||
| 487 | { | ||
| 488 | struct ctl_table *table; | ||
| 489 | |||
| 490 | table = mdev->sysctl->ctl_table_arg; | ||
| 491 | unregister_net_sysctl_table(mdev->sysctl); | ||
| 492 | kfree(table); | ||
| 493 | } | ||
| 494 | |||
| 495 | static struct mpls_dev *mpls_add_dev(struct net_device *dev) | ||
| 496 | { | ||
| 497 | struct mpls_dev *mdev; | ||
| 498 | int err = -ENOMEM; | ||
| 499 | |||
| 500 | ASSERT_RTNL(); | ||
| 501 | |||
| 502 | mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); | ||
| 503 | if (!mdev) | ||
| 504 | return ERR_PTR(err); | ||
| 505 | |||
| 506 | err = mpls_dev_sysctl_register(dev, mdev); | ||
| 507 | if (err) | ||
| 508 | goto free; | ||
| 509 | |||
| 510 | rcu_assign_pointer(dev->mpls_ptr, mdev); | ||
| 511 | |||
| 512 | return mdev; | ||
| 513 | |||
| 514 | free: | ||
| 515 | kfree(mdev); | ||
| 516 | return ERR_PTR(err); | ||
| 517 | } | ||
| 518 | |||
| 431 | static void mpls_ifdown(struct net_device *dev) | 519 | static void mpls_ifdown(struct net_device *dev) |
| 432 | { | 520 | { |
| 433 | struct mpls_route __rcu **platform_label; | 521 | struct mpls_route __rcu **platform_label; |
| 434 | struct net *net = dev_net(dev); | 522 | struct net *net = dev_net(dev); |
| 523 | struct mpls_dev *mdev; | ||
| 435 | unsigned index; | 524 | unsigned index; |
| 436 | 525 | ||
| 437 | platform_label = rtnl_dereference(net->mpls.platform_label); | 526 | platform_label = rtnl_dereference(net->mpls.platform_label); |
| @@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) | |||
| 443 | continue; | 532 | continue; |
| 444 | rt->rt_dev = NULL; | 533 | rt->rt_dev = NULL; |
| 445 | } | 534 | } |
| 535 | |||
| 536 | mdev = mpls_dev_get(dev); | ||
| 537 | if (!mdev) | ||
| 538 | return; | ||
| 539 | |||
| 540 | mpls_dev_sysctl_unregister(mdev); | ||
| 541 | |||
| 542 | RCU_INIT_POINTER(dev->mpls_ptr, NULL); | ||
| 543 | |||
| 544 | kfree(mdev); | ||
| 446 | } | 545 | } |
| 447 | 546 | ||
| 448 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, | 547 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, |
| 449 | void *ptr) | 548 | void *ptr) |
| 450 | { | 549 | { |
| 451 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | 550 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
| 551 | struct mpls_dev *mdev; | ||
| 452 | 552 | ||
| 453 | switch(event) { | 553 | switch(event) { |
| 554 | case NETDEV_REGISTER: | ||
| 555 | /* For now just support ethernet devices */ | ||
| 556 | if ((dev->type == ARPHRD_ETHER) || | ||
| 557 | (dev->type == ARPHRD_LOOPBACK)) { | ||
| 558 | mdev = mpls_add_dev(dev); | ||
| 559 | if (IS_ERR(mdev)) | ||
| 560 | return notifier_from_errno(PTR_ERR(mdev)); | ||
| 561 | } | ||
| 562 | break; | ||
| 563 | |||
| 454 | case NETDEV_UNREGISTER: | 564 | case NETDEV_UNREGISTER: |
| 455 | mpls_ifdown(dev); | 565 | mpls_ifdown(dev); |
| 456 | break; | 566 | break; |
| @@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, | |||
| 536 | if ((dec.bos != bos) || dec.ttl || dec.tc) | 646 | if ((dec.bos != bos) || dec.ttl || dec.tc) |
| 537 | return -EINVAL; | 647 | return -EINVAL; |
| 538 | 648 | ||
| 649 | switch (dec.label) { | ||
| 650 | case LABEL_IMPLICIT_NULL: | ||
| 651 | /* RFC3032: This is a label that an LSR may | ||
| 652 | * assign and distribute, but which never | ||
| 653 | * actually appears in the encapsulation. | ||
| 654 | */ | ||
| 655 | return -EINVAL; | ||
| 656 | } | ||
| 657 | |||
| 539 | label[i] = dec.label; | 658 | label[i] = dec.label; |
| 540 | } | 659 | } |
| 541 | *labels = nla_labels; | 660 | *labels = nla_labels; |
| @@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, | |||
| 912 | return ret; | 1031 | return ret; |
| 913 | } | 1032 | } |
| 914 | 1033 | ||
| 915 | static struct ctl_table mpls_table[] = { | 1034 | static const struct ctl_table mpls_table[] = { |
| 916 | { | 1035 | { |
| 917 | .procname = "platform_labels", | 1036 | .procname = "platform_labels", |
| 918 | .data = NULL, | 1037 | .data = NULL, |
diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..693877d69606 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h | |||
| @@ -22,6 +22,12 @@ struct mpls_entry_decoded { | |||
| 22 | u8 bos; | 22 | u8 bos; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | struct mpls_dev { | ||
| 26 | int input_enabled; | ||
| 27 | |||
| 28 | struct ctl_table_header *sysctl; | ||
| 29 | }; | ||
| 30 | |||
| 25 | struct sk_buff; | 31 | struct sk_buff; |
| 26 | 32 | ||
| 27 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) | 33 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) |
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1af5630..0522fc9bfb0a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c | |||
| @@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) | |||
| 63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
| 64 | goto nla_put_failure; | 64 | goto nla_put_failure; |
| 65 | break; | 65 | break; |
| 66 | default: | ||
| 67 | break; | ||
| 66 | } | 68 | } |
| 67 | 69 | ||
| 68 | return 0; | 70 | return 0; |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee42fbe..635dbba93d01 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c | |||
| @@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, | |||
| 108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
| 109 | goto nla_put_failure; | 109 | goto nla_put_failure; |
| 110 | break; | 110 | break; |
| 111 | default: | ||
| 112 | break; | ||
| 111 | } | 113 | } |
| 112 | 114 | ||
| 113 | return 0; | 115 | return 0; |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..ec4adbdcb9b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
| @@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, | |||
| 1629 | if (data == NULL) | 1629 | if (data == NULL) |
| 1630 | return NULL; | 1630 | return NULL; |
| 1631 | 1631 | ||
| 1632 | skb = build_skb(data, size); | 1632 | skb = __build_skb(data, size); |
| 1633 | if (skb == NULL) | 1633 | if (skb == NULL) |
| 1634 | vfree(data); | 1634 | vfree(data); |
| 1635 | else { | 1635 | else |
| 1636 | skb->head_frag = 0; | ||
| 1637 | skb->destructor = netlink_skb_destructor; | 1636 | skb->destructor = netlink_skb_destructor; |
| 1638 | } | ||
| 1639 | 1637 | ||
| 1640 | return skb; | 1638 | return skb; |
| 1641 | } | 1639 | } |
diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df6ec02..57be6e6aff99 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c | |||
| @@ -2143,7 +2143,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2143 | err = __tipc_nl_add_node_links(net, &msg, node, | 2143 | err = __tipc_nl_add_node_links(net, &msg, node, |
| 2144 | &prev_link); | 2144 | &prev_link); |
| 2145 | tipc_node_unlock(node); | 2145 | tipc_node_unlock(node); |
| 2146 | tipc_node_put(node); | ||
| 2147 | if (err) | 2146 | if (err) |
| 2148 | goto out; | 2147 | goto out; |
| 2149 | 2148 | ||
diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183cdb121..77ff03ed1e18 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c | |||
| @@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) | |||
| 102 | } | 102 | } |
| 103 | saddr->scope = -TIPC_NODE_SCOPE; | 103 | saddr->scope = -TIPC_NODE_SCOPE; |
| 104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); | 104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); |
| 105 | sk_release_kernel(sk); | 105 | sock_release(sock); |
| 106 | con->sock = NULL; | 106 | con->sock = NULL; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| @@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
| 321 | struct socket *sock = NULL; | 321 | struct socket *sock = NULL; |
| 322 | int ret; | 322 | int ret; |
| 323 | 323 | ||
| 324 | ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); | 324 | ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); |
| 325 | if (ret < 0) | 325 | if (ret < 0) |
| 326 | return NULL; | 326 | return NULL; |
| 327 | |||
| 328 | sk_change_net(sock->sk, s->net); | ||
| 329 | |||
| 330 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, | 327 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, |
| 331 | (char *)&s->imp, sizeof(s->imp)); | 328 | (char *)&s->imp, sizeof(s->imp)); |
| 332 | if (ret < 0) | 329 | if (ret < 0) |
| @@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
| 376 | 373 | ||
| 377 | create_err: | 374 | create_err: |
| 378 | kernel_sock_shutdown(sock, SHUT_RDWR); | 375 | kernel_sock_shutdown(sock, SHUT_RDWR); |
| 379 | sk_release_kernel(sock->sk); | 376 | sock_release(sock); |
| 380 | return NULL; | 377 | return NULL; |
| 381 | } | 378 | } |
| 382 | 379 | ||
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74d7516..9074b5cede38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c | |||
| @@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, | |||
| 1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) | 1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) |
| 1765 | { | 1765 | { |
| 1766 | u32 dnode, dport = 0; | 1766 | u32 dnode, dport = 0; |
| 1767 | int err = -TIPC_ERR_NO_PORT; | 1767 | int err; |
| 1768 | struct sk_buff *skb; | 1768 | struct sk_buff *skb; |
| 1769 | struct tipc_sock *tsk; | 1769 | struct tipc_sock *tsk; |
| 1770 | struct tipc_net *tn; | 1770 | struct tipc_net *tn; |
| 1771 | struct sock *sk; | 1771 | struct sock *sk; |
| 1772 | 1772 | ||
| 1773 | while (skb_queue_len(inputq)) { | 1773 | while (skb_queue_len(inputq)) { |
| 1774 | err = -TIPC_ERR_NO_PORT; | ||
| 1774 | skb = NULL; | 1775 | skb = NULL; |
| 1775 | dport = tipc_skb_peek_port(inputq, dport); | 1776 | dport = tipc_skb_peek_port(inputq, dport); |
| 1776 | tsk = tipc_sk_lookup(net, dport); | 1777 | tsk = tipc_sk_lookup(net, dport); |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012b23b9..a73a226f2d33 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
| @@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); | |||
| 95 | 95 | ||
| 96 | unsigned int unix_tot_inflight; | 96 | unsigned int unix_tot_inflight; |
| 97 | 97 | ||
| 98 | |||
| 99 | struct sock *unix_get_socket(struct file *filp) | 98 | struct sock *unix_get_socket(struct file *filp) |
| 100 | { | 99 | { |
| 101 | struct sock *u_sock = NULL; | 100 | struct sock *u_sock = NULL; |
| 102 | struct inode *inode = file_inode(filp); | 101 | struct inode *inode = file_inode(filp); |
| 103 | 102 | ||
| 104 | /* | 103 | /* Socket ? */ |
| 105 | * Socket ? | ||
| 106 | */ | ||
| 107 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { | 104 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { |
| 108 | struct socket *sock = SOCKET_I(inode); | 105 | struct socket *sock = SOCKET_I(inode); |
| 109 | struct sock *s = sock->sk; | 106 | struct sock *s = sock->sk; |
| 110 | 107 | ||
| 111 | /* | 108 | /* PF_UNIX ? */ |
| 112 | * PF_UNIX ? | ||
| 113 | */ | ||
| 114 | if (s && sock->ops && sock->ops->family == PF_UNIX) | 109 | if (s && sock->ops && sock->ops->family == PF_UNIX) |
| 115 | u_sock = s; | 110 | u_sock = s; |
| 116 | } | 111 | } |
| 117 | return u_sock; | 112 | return u_sock; |
| 118 | } | 113 | } |
| 119 | 114 | ||
| 120 | /* | 115 | /* Keep the number of times in flight count for the file |
| 121 | * Keep the number of times in flight count for the file | 116 | * descriptor if it is for an AF_UNIX socket. |
| 122 | * descriptor if it is for an AF_UNIX socket. | ||
| 123 | */ | 117 | */ |
| 124 | 118 | ||
| 125 | void unix_inflight(struct file *fp) | 119 | void unix_inflight(struct file *fp) |
| 126 | { | 120 | { |
| 127 | struct sock *s = unix_get_socket(fp); | 121 | struct sock *s = unix_get_socket(fp); |
| 122 | |||
| 128 | if (s) { | 123 | if (s) { |
| 129 | struct unix_sock *u = unix_sk(s); | 124 | struct unix_sock *u = unix_sk(s); |
| 125 | |||
| 130 | spin_lock(&unix_gc_lock); | 126 | spin_lock(&unix_gc_lock); |
| 127 | |||
| 131 | if (atomic_long_inc_return(&u->inflight) == 1) { | 128 | if (atomic_long_inc_return(&u->inflight) == 1) { |
| 132 | BUG_ON(!list_empty(&u->link)); | 129 | BUG_ON(!list_empty(&u->link)); |
| 133 | list_add_tail(&u->link, &gc_inflight_list); | 130 | list_add_tail(&u->link, &gc_inflight_list); |
| @@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) | |||
| 142 | void unix_notinflight(struct file *fp) | 139 | void unix_notinflight(struct file *fp) |
| 143 | { | 140 | { |
| 144 | struct sock *s = unix_get_socket(fp); | 141 | struct sock *s = unix_get_socket(fp); |
| 142 | |||
| 145 | if (s) { | 143 | if (s) { |
| 146 | struct unix_sock *u = unix_sk(s); | 144 | struct unix_sock *u = unix_sk(s); |
| 145 | |||
| 147 | spin_lock(&unix_gc_lock); | 146 | spin_lock(&unix_gc_lock); |
| 148 | BUG_ON(list_empty(&u->link)); | 147 | BUG_ON(list_empty(&u->link)); |
| 148 | |||
| 149 | if (atomic_long_dec_and_test(&u->inflight)) | 149 | if (atomic_long_dec_and_test(&u->inflight)) |
| 150 | list_del_init(&u->link); | 150 | list_del_init(&u->link); |
| 151 | unix_tot_inflight--; | 151 | unix_tot_inflight--; |
| @@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
| 161 | 161 | ||
| 162 | spin_lock(&x->sk_receive_queue.lock); | 162 | spin_lock(&x->sk_receive_queue.lock); |
| 163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
| 164 | /* | 164 | /* Do we have file descriptors ? */ |
| 165 | * Do we have file descriptors ? | ||
| 166 | */ | ||
| 167 | if (UNIXCB(skb).fp) { | 165 | if (UNIXCB(skb).fp) { |
| 168 | bool hit = false; | 166 | bool hit = false; |
| 169 | /* | 167 | /* Process the descriptors of this socket */ |
| 170 | * Process the descriptors of this socket | ||
| 171 | */ | ||
| 172 | int nfd = UNIXCB(skb).fp->count; | 168 | int nfd = UNIXCB(skb).fp->count; |
| 173 | struct file **fp = UNIXCB(skb).fp->fp; | 169 | struct file **fp = UNIXCB(skb).fp->fp; |
| 170 | |||
| 174 | while (nfd--) { | 171 | while (nfd--) { |
| 175 | /* | 172 | /* Get the socket the fd matches if it indeed does so */ |
| 176 | * Get the socket the fd matches | ||
| 177 | * if it indeed does so | ||
| 178 | */ | ||
| 179 | struct sock *sk = unix_get_socket(*fp++); | 173 | struct sock *sk = unix_get_socket(*fp++); |
| 174 | |||
| 180 | if (sk) { | 175 | if (sk) { |
| 181 | struct unix_sock *u = unix_sk(sk); | 176 | struct unix_sock *u = unix_sk(sk); |
| 182 | 177 | ||
| 183 | /* | 178 | /* Ignore non-candidates, they could |
| 184 | * Ignore non-candidates, they could | ||
| 185 | * have been added to the queues after | 179 | * have been added to the queues after |
| 186 | * starting the garbage collection | 180 | * starting the garbage collection |
| 187 | */ | 181 | */ |
| 188 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { | 182 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { |
| 189 | hit = true; | 183 | hit = true; |
| 184 | |||
| 190 | func(u); | 185 | func(u); |
| 191 | } | 186 | } |
| 192 | } | 187 | } |
| @@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
| 203 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), | 198 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), |
| 204 | struct sk_buff_head *hitlist) | 199 | struct sk_buff_head *hitlist) |
| 205 | { | 200 | { |
| 206 | if (x->sk_state != TCP_LISTEN) | 201 | if (x->sk_state != TCP_LISTEN) { |
| 207 | scan_inflight(x, func, hitlist); | 202 | scan_inflight(x, func, hitlist); |
| 208 | else { | 203 | } else { |
| 209 | struct sk_buff *skb; | 204 | struct sk_buff *skb; |
| 210 | struct sk_buff *next; | 205 | struct sk_buff *next; |
| 211 | struct unix_sock *u; | 206 | struct unix_sock *u; |
| 212 | LIST_HEAD(embryos); | 207 | LIST_HEAD(embryos); |
| 213 | 208 | ||
| 214 | /* | 209 | /* For a listening socket collect the queued embryos |
| 215 | * For a listening socket collect the queued embryos | ||
| 216 | * and perform a scan on them as well. | 210 | * and perform a scan on them as well. |
| 217 | */ | 211 | */ |
| 218 | spin_lock(&x->sk_receive_queue.lock); | 212 | spin_lock(&x->sk_receive_queue.lock); |
| 219 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 213 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
| 220 | u = unix_sk(skb->sk); | 214 | u = unix_sk(skb->sk); |
| 221 | 215 | ||
| 222 | /* | 216 | /* An embryo cannot be in-flight, so it's safe |
| 223 | * An embryo cannot be in-flight, so it's safe | ||
| 224 | * to use the list link. | 217 | * to use the list link. |
| 225 | */ | 218 | */ |
| 226 | BUG_ON(!list_empty(&u->link)); | 219 | BUG_ON(!list_empty(&u->link)); |
| @@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) | |||
| 249 | static void inc_inflight_move_tail(struct unix_sock *u) | 242 | static void inc_inflight_move_tail(struct unix_sock *u) |
| 250 | { | 243 | { |
| 251 | atomic_long_inc(&u->inflight); | 244 | atomic_long_inc(&u->inflight); |
| 252 | /* | 245 | /* If this still might be part of a cycle, move it to the end |
| 253 | * If this still might be part of a cycle, move it to the end | ||
| 254 | * of the list, so that it's checked even if it was already | 246 | * of the list, so that it's checked even if it was already |
| 255 | * passed over | 247 | * passed over |
| 256 | */ | 248 | */ |
| @@ -263,8 +255,7 @@ static bool gc_in_progress; | |||
| 263 | 255 | ||
| 264 | void wait_for_unix_gc(void) | 256 | void wait_for_unix_gc(void) |
| 265 | { | 257 | { |
| 266 | /* | 258 | /* If number of inflight sockets is insane, |
| 267 | * If number of inflight sockets is insane, | ||
| 268 | * force a garbage collect right now. | 259 | * force a garbage collect right now. |
| 269 | */ | 260 | */ |
| 270 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) | 261 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) |
| @@ -288,8 +279,7 @@ void unix_gc(void) | |||
| 288 | goto out; | 279 | goto out; |
| 289 | 280 | ||
| 290 | gc_in_progress = true; | 281 | gc_in_progress = true; |
| 291 | /* | 282 | /* First, select candidates for garbage collection. Only |
| 292 | * First, select candidates for garbage collection. Only | ||
| 293 | * in-flight sockets are considered, and from those only ones | 283 | * in-flight sockets are considered, and from those only ones |
| 294 | * which don't have any external reference. | 284 | * which don't have any external reference. |
| 295 | * | 285 | * |
| @@ -320,15 +310,13 @@ void unix_gc(void) | |||
| 320 | } | 310 | } |
| 321 | } | 311 | } |
| 322 | 312 | ||
| 323 | /* | 313 | /* Now remove all internal in-flight reference to children of |
| 324 | * Now remove all internal in-flight reference to children of | ||
| 325 | * the candidates. | 314 | * the candidates. |
| 326 | */ | 315 | */ |
| 327 | list_for_each_entry(u, &gc_candidates, link) | 316 | list_for_each_entry(u, &gc_candidates, link) |
| 328 | scan_children(&u->sk, dec_inflight, NULL); | 317 | scan_children(&u->sk, dec_inflight, NULL); |
| 329 | 318 | ||
| 330 | /* | 319 | /* Restore the references for children of all candidates, |
| 331 | * Restore the references for children of all candidates, | ||
| 332 | * which have remaining references. Do this recursively, so | 320 | * which have remaining references. Do this recursively, so |
| 333 | * only those remain, which form cyclic references. | 321 | * only those remain, which form cyclic references. |
| 334 | * | 322 | * |
| @@ -350,8 +338,7 @@ void unix_gc(void) | |||
| 350 | } | 338 | } |
| 351 | list_del(&cursor); | 339 | list_del(&cursor); |
| 352 | 340 | ||
| 353 | /* | 341 | /* not_cycle_list contains those sockets which do not make up a |
| 354 | * not_cycle_list contains those sockets which do not make up a | ||
| 355 | * cycle. Restore these to the inflight list. | 342 | * cycle. Restore these to the inflight list. |
| 356 | */ | 343 | */ |
| 357 | while (!list_empty(¬_cycle_list)) { | 344 | while (!list_empty(¬_cycle_list)) { |
| @@ -360,8 +347,7 @@ void unix_gc(void) | |||
| 360 | list_move_tail(&u->link, &gc_inflight_list); | 347 | list_move_tail(&u->link, &gc_inflight_list); |
| 361 | } | 348 | } |
| 362 | 349 | ||
| 363 | /* | 350 | /* Now gc_candidates contains only garbage. Restore original |
| 364 | * Now gc_candidates contains only garbage. Restore original | ||
| 365 | * inflight counters for these as well, and remove the skbuffs | 351 | * inflight counters for these as well, and remove the skbuffs |
| 366 | * which are creating the cycle(s). | 352 | * which are creating the cycle(s). |
| 367 | */ | 353 | */ |
