diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/esp4.c | 3 | ||||
| -rw-r--r-- | net/ipv4/fib_trie.c | 3 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
| -rw-r--r-- | net/ipv4/inet_diag.c | 8 | ||||
| -rw-r--r-- | net/ipv4/ip_vti.c | 14 | ||||
| -rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 6 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 6 | ||||
| -rw-r--r-- | net/ipv4/ping.c | 1 | ||||
| -rw-r--r-- | net/ipv4/route.c | 9 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 5 | ||||
| -rw-r--r-- | net/ipv4/tcp_dctcp.c | 20 | ||||
| -rw-r--r-- | net/ipv4/tcp_fastopen.c | 5 | ||||
| -rw-r--r-- | net/ipv4/tcp_illinois.c | 21 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 55 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 14 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 64 | ||||
| -rw-r--r-- | net/ipv4/tcp_vegas.c | 19 | ||||
| -rw-r--r-- | net/ipv4/tcp_vegas.h | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_westwood.c | 15 | ||||
| -rw-r--r-- | net/ipv4/udp.c | 24 |
22 files changed, 263 insertions, 100 deletions
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 421a80b09b62..30b544f025ac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
| @@ -256,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 256 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); | 256 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); |
| 257 | aead_givcrypt_set_assoc(req, asg, assoclen); | 257 | aead_givcrypt_set_assoc(req, asg, assoclen); |
| 258 | aead_givcrypt_set_giv(req, esph->enc_data, | 258 | aead_givcrypt_set_giv(req, esph->enc_data, |
| 259 | XFRM_SKB_CB(skb)->seq.output.low); | 259 | XFRM_SKB_CB(skb)->seq.output.low + |
| 260 | ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); | ||
| 260 | 261 | ||
| 261 | ESP_SKB_CB(skb)->tmp = tmp; | 262 | ESP_SKB_CB(skb)->tmp = tmp; |
| 262 | err = crypto_aead_givencrypt(req); | 263 | err = crypto_aead_givencrypt(req); |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..09b62e17dd8c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -1164,6 +1164,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
| 1164 | state = fa->fa_state; | 1164 | state = fa->fa_state; |
| 1165 | new_fa->fa_state = state & ~FA_S_ACCESSED; | 1165 | new_fa->fa_state = state & ~FA_S_ACCESSED; |
| 1166 | new_fa->fa_slen = fa->fa_slen; | 1166 | new_fa->fa_slen = fa->fa_slen; |
| 1167 | new_fa->tb_id = tb->tb_id; | ||
| 1167 | 1168 | ||
| 1168 | err = netdev_switch_fib_ipv4_add(key, plen, fi, | 1169 | err = netdev_switch_fib_ipv4_add(key, plen, fi, |
| 1169 | new_fa->fa_tos, | 1170 | new_fa->fa_tos, |
| @@ -1764,7 +1765,7 @@ void fib_table_flush_external(struct fib_table *tb) | |||
| 1764 | /* record local slen */ | 1765 | /* record local slen */ |
| 1765 | slen = fa->fa_slen; | 1766 | slen = fa->fa_slen; |
| 1766 | 1767 | ||
| 1767 | if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) | 1768 | if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) |
| 1768 | continue; | 1769 | continue; |
| 1769 | 1770 | ||
| 1770 | netdev_switch_fib_ipv4_del(n->key, | 1771 | netdev_switch_fib_ipv4_del(n->key, |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) | |||
| 564 | } | 564 | } |
| 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); | 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); |
| 566 | 566 | ||
| 567 | /* return true if req was found in the syn_table[] */ | ||
| 568 | static bool reqsk_queue_unlink(struct request_sock_queue *queue, | ||
| 569 | struct request_sock *req) | ||
| 570 | { | ||
| 571 | struct listen_sock *lopt = queue->listen_opt; | ||
| 572 | struct request_sock **prev; | ||
| 573 | bool found = false; | ||
| 574 | |||
| 575 | spin_lock(&queue->syn_wait_lock); | ||
| 576 | |||
| 577 | for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; | ||
| 578 | prev = &(*prev)->dl_next) { | ||
| 579 | if (*prev == req) { | ||
| 580 | *prev = req->dl_next; | ||
| 581 | found = true; | ||
| 582 | break; | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | spin_unlock(&queue->syn_wait_lock); | ||
| 587 | if (del_timer(&req->rsk_timer)) | ||
| 588 | reqsk_put(req); | ||
| 589 | return found; | ||
| 590 | } | ||
| 591 | |||
| 592 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) | ||
| 593 | { | ||
| 594 | if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { | ||
| 595 | reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); | ||
| 596 | reqsk_put(req); | ||
| 597 | } | ||
| 598 | } | ||
| 599 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); | ||
| 600 | |||
| 567 | static void reqsk_timer_handler(unsigned long data) | 601 | static void reqsk_timer_handler(unsigned long data) |
| 568 | { | 602 | { |
| 569 | struct request_sock *req = (struct request_sock *)data; | 603 | struct request_sock *req = (struct request_sock *)data; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bb77ebdae3b3..4d32262c7502 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
| @@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
| 224 | handler->idiag_get_info(sk, r, info); | 224 | handler->idiag_get_info(sk, r, info); |
| 225 | 225 | ||
| 226 | if (sk->sk_state < TCP_TIME_WAIT) { | 226 | if (sk->sk_state < TCP_TIME_WAIT) { |
| 227 | int err = 0; | 227 | union tcp_cc_info info; |
| 228 | size_t sz = 0; | ||
| 229 | int attr; | ||
| 228 | 230 | ||
| 229 | rcu_read_lock(); | 231 | rcu_read_lock(); |
| 230 | ca_ops = READ_ONCE(icsk->icsk_ca_ops); | 232 | ca_ops = READ_ONCE(icsk->icsk_ca_ops); |
| 231 | if (ca_ops && ca_ops->get_info) | 233 | if (ca_ops && ca_ops->get_info) |
| 232 | err = ca_ops->get_info(sk, ext, skb); | 234 | sz = ca_ops->get_info(sk, ext, &attr, &info); |
| 233 | rcu_read_unlock(); | 235 | rcu_read_unlock(); |
| 234 | if (err < 0) | 236 | if (sz && nla_put(skb, attr, sz, &info) < 0) |
| 235 | goto errout; | 237 | goto errout; |
| 236 | } | 238 | } |
| 237 | 239 | ||
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9f7269f3c54a..0c152087ca15 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c | |||
| @@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, | |||
| 65 | goto drop; | 65 | goto drop; |
| 66 | 66 | ||
| 67 | XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; | 67 | XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; |
| 68 | skb->mark = be32_to_cpu(tunnel->parms.i_key); | ||
| 69 | 68 | ||
| 70 | return xfrm_input(skb, nexthdr, spi, encap_type); | 69 | return xfrm_input(skb, nexthdr, spi, encap_type); |
| 71 | } | 70 | } |
| @@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) | |||
| 91 | struct pcpu_sw_netstats *tstats; | 90 | struct pcpu_sw_netstats *tstats; |
| 92 | struct xfrm_state *x; | 91 | struct xfrm_state *x; |
| 93 | struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; | 92 | struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; |
| 93 | u32 orig_mark = skb->mark; | ||
| 94 | int ret; | ||
| 94 | 95 | ||
| 95 | if (!tunnel) | 96 | if (!tunnel) |
| 96 | return 1; | 97 | return 1; |
| @@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) | |||
| 107 | x = xfrm_input_state(skb); | 108 | x = xfrm_input_state(skb); |
| 108 | family = x->inner_mode->afinfo->family; | 109 | family = x->inner_mode->afinfo->family; |
| 109 | 110 | ||
| 110 | if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) | 111 | skb->mark = be32_to_cpu(tunnel->parms.i_key); |
| 112 | ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); | ||
| 113 | skb->mark = orig_mark; | ||
| 114 | |||
| 115 | if (!ret) | ||
| 111 | return -EPERM; | 116 | return -EPERM; |
| 112 | 117 | ||
| 113 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); | 118 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); |
| @@ -216,8 +221,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 216 | 221 | ||
| 217 | memset(&fl, 0, sizeof(fl)); | 222 | memset(&fl, 0, sizeof(fl)); |
| 218 | 223 | ||
| 219 | skb->mark = be32_to_cpu(tunnel->parms.o_key); | ||
| 220 | |||
| 221 | switch (skb->protocol) { | 224 | switch (skb->protocol) { |
| 222 | case htons(ETH_P_IP): | 225 | case htons(ETH_P_IP): |
| 223 | xfrm_decode_session(skb, &fl, AF_INET); | 226 | xfrm_decode_session(skb, &fl, AF_INET); |
| @@ -233,6 +236,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 233 | return NETDEV_TX_OK; | 236 | return NETDEV_TX_OK; |
| 234 | } | 237 | } |
| 235 | 238 | ||
| 239 | /* override mark with tunnel output key */ | ||
| 240 | fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); | ||
| 241 | |||
| 236 | return vti_xmit(skb, dev, &fl); | 242 | return vti_xmit(skb, dev, &fl); |
| 237 | } | 243 | } |
| 238 | 244 | ||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 13bfe84bf3ca..a61200754f4b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
| @@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user, | |||
| 1075 | /* overflow check */ | 1075 | /* overflow check */ |
| 1076 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1076 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1077 | return -ENOMEM; | 1077 | return -ENOMEM; |
| 1078 | if (tmp.num_counters == 0) | ||
| 1079 | return -EINVAL; | ||
| 1080 | |||
| 1078 | tmp.name[sizeof(tmp.name)-1] = 0; | 1081 | tmp.name[sizeof(tmp.name)-1] = 0; |
| 1079 | 1082 | ||
| 1080 | newinfo = xt_alloc_table_info(tmp.size); | 1083 | newinfo = xt_alloc_table_info(tmp.size); |
| @@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
| 1499 | return -ENOMEM; | 1502 | return -ENOMEM; |
| 1500 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1503 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1501 | return -ENOMEM; | 1504 | return -ENOMEM; |
| 1505 | if (tmp.num_counters == 0) | ||
| 1506 | return -EINVAL; | ||
| 1507 | |||
| 1502 | tmp.name[sizeof(tmp.name)-1] = 0; | 1508 | tmp.name[sizeof(tmp.name)-1] = 0; |
| 1503 | 1509 | ||
| 1504 | newinfo = xt_alloc_table_info(tmp.size); | 1510 | newinfo = xt_alloc_table_info(tmp.size); |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c69db7fa25ee..2d0e265fef6e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
| @@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) | |||
| 1262 | /* overflow check */ | 1262 | /* overflow check */ |
| 1263 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1263 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1264 | return -ENOMEM; | 1264 | return -ENOMEM; |
| 1265 | if (tmp.num_counters == 0) | ||
| 1266 | return -EINVAL; | ||
| 1267 | |||
| 1265 | tmp.name[sizeof(tmp.name)-1] = 0; | 1268 | tmp.name[sizeof(tmp.name)-1] = 0; |
| 1266 | 1269 | ||
| 1267 | newinfo = xt_alloc_table_info(tmp.size); | 1270 | newinfo = xt_alloc_table_info(tmp.size); |
| @@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1809 | return -ENOMEM; | 1812 | return -ENOMEM; |
| 1810 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1813 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1811 | return -ENOMEM; | 1814 | return -ENOMEM; |
| 1815 | if (tmp.num_counters == 0) | ||
| 1816 | return -EINVAL; | ||
| 1817 | |||
| 1812 | tmp.name[sizeof(tmp.name)-1] = 0; | 1818 | tmp.name[sizeof(tmp.name)-1] = 0; |
| 1813 | 1819 | ||
| 1814 | newinfo = xt_alloc_table_info(tmp.size); | 1820 | newinfo = xt_alloc_table_info(tmp.size); |
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a93f260cf24c..05ff44b758df 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
| @@ -158,6 +158,7 @@ void ping_unhash(struct sock *sk) | |||
| 158 | if (sk_hashed(sk)) { | 158 | if (sk_hashed(sk)) { |
| 159 | write_lock_bh(&ping_table.lock); | 159 | write_lock_bh(&ping_table.lock); |
| 160 | hlist_nulls_del(&sk->sk_nulls_node); | 160 | hlist_nulls_del(&sk->sk_nulls_node); |
| 161 | sk_nulls_node_init(&sk->sk_nulls_node); | ||
| 161 | sock_put(sk); | 162 | sock_put(sk); |
| 162 | isk->inet_num = 0; | 163 | isk->inet_num = 0; |
| 163 | isk->inet_sport = 0; | 164 | isk->inet_sport = 0; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a78540f28276..f45f2a12f37b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -902,6 +902,10 @@ static int ip_error(struct sk_buff *skb) | |||
| 902 | bool send; | 902 | bool send; |
| 903 | int code; | 903 | int code; |
| 904 | 904 | ||
| 905 | /* IP on this device is disabled. */ | ||
| 906 | if (!in_dev) | ||
| 907 | goto out; | ||
| 908 | |||
| 905 | net = dev_net(rt->dst.dev); | 909 | net = dev_net(rt->dst.dev); |
| 906 | if (!IN_DEV_FORWARD(in_dev)) { | 910 | if (!IN_DEV_FORWARD(in_dev)) { |
| 907 | switch (rt->dst.error) { | 911 | switch (rt->dst.error) { |
| @@ -962,10 +966,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
| 962 | if (dst_metric_locked(dst, RTAX_MTU)) | 966 | if (dst_metric_locked(dst, RTAX_MTU)) |
| 963 | return; | 967 | return; |
| 964 | 968 | ||
| 965 | if (dst->dev->mtu < mtu) | 969 | if (ipv4_mtu(dst) < mtu) |
| 966 | return; | ||
| 967 | |||
| 968 | if (rt->rt_pmtu && rt->rt_pmtu < mtu) | ||
| 969 | return; | 970 | return; |
| 970 | 971 | ||
| 971 | if (mtu < ip_rt_min_pmtu) | 972 | if (mtu < ip_rt_min_pmtu) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8c5cd9efebbc..f1377f2a0472 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -252,6 +252,7 @@ | |||
| 252 | #include <linux/types.h> | 252 | #include <linux/types.h> |
| 253 | #include <linux/fcntl.h> | 253 | #include <linux/fcntl.h> |
| 254 | #include <linux/poll.h> | 254 | #include <linux/poll.h> |
| 255 | #include <linux/inet_diag.h> | ||
| 255 | #include <linux/init.h> | 256 | #include <linux/init.h> |
| 256 | #include <linux/fs.h> | 257 | #include <linux/fs.h> |
| 257 | #include <linux/skbuff.h> | 258 | #include <linux/skbuff.h> |
| @@ -401,6 +402,7 @@ void tcp_init_sock(struct sock *sk) | |||
| 401 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 402 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
| 402 | tp->snd_cwnd_clamp = ~0; | 403 | tp->snd_cwnd_clamp = ~0; |
| 403 | tp->mss_cache = TCP_MSS_DEFAULT; | 404 | tp->mss_cache = TCP_MSS_DEFAULT; |
| 405 | u64_stats_init(&tp->syncp); | ||
| 404 | 406 | ||
| 405 | tp->reordering = sysctl_tcp_reordering; | 407 | tp->reordering = sysctl_tcp_reordering; |
| 406 | tcp_enable_early_retrans(tp); | 408 | tcp_enable_early_retrans(tp); |
| @@ -2592,11 +2594,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); | |||
| 2592 | #endif | 2594 | #endif |
| 2593 | 2595 | ||
| 2594 | /* Return information about state of tcp endpoint in API format. */ | 2596 | /* Return information about state of tcp endpoint in API format. */ |
| 2595 | void tcp_get_info(const struct sock *sk, struct tcp_info *info) | 2597 | void tcp_get_info(struct sock *sk, struct tcp_info *info) |
| 2596 | { | 2598 | { |
| 2597 | const struct tcp_sock *tp = tcp_sk(sk); | 2599 | const struct tcp_sock *tp = tcp_sk(sk); |
| 2598 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2600 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 2599 | u32 now = tcp_time_stamp; | 2601 | u32 now = tcp_time_stamp; |
| 2602 | unsigned int start; | ||
| 2600 | u32 rate; | 2603 | u32 rate; |
| 2601 | 2604 | ||
| 2602 | memset(info, 0, sizeof(*info)); | 2605 | memset(info, 0, sizeof(*info)); |
| @@ -2663,6 +2666,12 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) | |||
| 2663 | 2666 | ||
| 2664 | rate = READ_ONCE(sk->sk_max_pacing_rate); | 2667 | rate = READ_ONCE(sk->sk_max_pacing_rate); |
| 2665 | info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; | 2668 | info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; |
| 2669 | |||
| 2670 | do { | ||
| 2671 | start = u64_stats_fetch_begin_irq(&tp->syncp); | ||
| 2672 | info->tcpi_bytes_acked = tp->bytes_acked; | ||
| 2673 | info->tcpi_bytes_received = tp->bytes_received; | ||
| 2674 | } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); | ||
| 2666 | } | 2675 | } |
| 2667 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2676 | EXPORT_SYMBOL_GPL(tcp_get_info); |
| 2668 | 2677 | ||
| @@ -2734,6 +2743,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
| 2734 | return -EFAULT; | 2743 | return -EFAULT; |
| 2735 | return 0; | 2744 | return 0; |
| 2736 | } | 2745 | } |
| 2746 | case TCP_CC_INFO: { | ||
| 2747 | const struct tcp_congestion_ops *ca_ops; | ||
| 2748 | union tcp_cc_info info; | ||
| 2749 | size_t sz = 0; | ||
| 2750 | int attr; | ||
| 2751 | |||
| 2752 | if (get_user(len, optlen)) | ||
| 2753 | return -EFAULT; | ||
| 2754 | |||
| 2755 | ca_ops = icsk->icsk_ca_ops; | ||
| 2756 | if (ca_ops && ca_ops->get_info) | ||
| 2757 | sz = ca_ops->get_info(sk, ~0U, &attr, &info); | ||
| 2758 | |||
| 2759 | len = min_t(unsigned int, len, sz); | ||
| 2760 | if (put_user(len, optlen)) | ||
| 2761 | return -EFAULT; | ||
| 2762 | if (copy_to_user(optval, &info, len)) | ||
| 2763 | return -EFAULT; | ||
| 2764 | return 0; | ||
| 2765 | } | ||
| 2737 | case TCP_QUICKACK: | 2766 | case TCP_QUICKACK: |
| 2738 | val = !icsk->icsk_ack.pingpong; | 2767 | val = !icsk->icsk_ack.pingpong; |
| 2739 | break; | 2768 | break; |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7a5ae50c80c8..84be008c945c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, | |||
| 187 | 187 | ||
| 188 | tcp_cleanup_congestion_control(sk); | 188 | tcp_cleanup_congestion_control(sk); |
| 189 | icsk->icsk_ca_ops = ca; | 189 | icsk->icsk_ca_ops = ca; |
| 190 | icsk->icsk_ca_setsockopt = 1; | ||
| 190 | 191 | ||
| 191 | if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) | 192 | if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) |
| 192 | icsk->icsk_ca_ops->init(sk); | 193 | icsk->icsk_ca_ops->init(sk); |
| @@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
| 335 | rcu_read_lock(); | 336 | rcu_read_lock(); |
| 336 | ca = __tcp_ca_find_autoload(name); | 337 | ca = __tcp_ca_find_autoload(name); |
| 337 | /* No change asking for existing value */ | 338 | /* No change asking for existing value */ |
| 338 | if (ca == icsk->icsk_ca_ops) | 339 | if (ca == icsk->icsk_ca_ops) { |
| 340 | icsk->icsk_ca_setsockopt = 1; | ||
| 339 | goto out; | 341 | goto out; |
| 342 | } | ||
| 340 | if (!ca) | 343 | if (!ca) |
| 341 | err = -ENOENT; | 344 | err = -ENOENT; |
| 342 | else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || | 345 | else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || |
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 4376016f7fa5..4c41c1287197 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c | |||
| @@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) | |||
| 277 | } | 277 | } |
| 278 | } | 278 | } |
| 279 | 279 | ||
| 280 | static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 280 | static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, |
| 281 | union tcp_cc_info *info) | ||
| 281 | { | 282 | { |
| 282 | const struct dctcp *ca = inet_csk_ca(sk); | 283 | const struct dctcp *ca = inet_csk_ca(sk); |
| 283 | 284 | ||
| @@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | |||
| 286 | */ | 287 | */ |
| 287 | if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || | 288 | if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || |
| 288 | ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 289 | ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
| 289 | struct tcp_dctcp_info info; | 290 | memset(info, 0, sizeof(struct tcp_dctcp_info)); |
| 290 | |||
| 291 | memset(&info, 0, sizeof(info)); | ||
| 292 | if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { | 291 | if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { |
| 293 | info.dctcp_enabled = 1; | 292 | info->dctcp.dctcp_enabled = 1; |
| 294 | info.dctcp_ce_state = (u16) ca->ce_state; | 293 | info->dctcp.dctcp_ce_state = (u16) ca->ce_state; |
| 295 | info.dctcp_alpha = ca->dctcp_alpha; | 294 | info->dctcp.dctcp_alpha = ca->dctcp_alpha; |
| 296 | info.dctcp_ab_ecn = ca->acked_bytes_ecn; | 295 | info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; |
| 297 | info.dctcp_ab_tot = ca->acked_bytes_total; | 296 | info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; |
| 298 | } | 297 | } |
| 299 | 298 | ||
| 300 | return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); | 299 | *attr = INET_DIAG_DCTCPINFO; |
| 300 | return sizeof(*info); | ||
| 301 | } | 301 | } |
| 302 | return 0; | 302 | return 0; |
| 303 | } | 303 | } |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3d87aca6be8..46b087a27503 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
| @@ -206,6 +206,11 @@ static bool tcp_fastopen_create_child(struct sock *sk, | |||
| 206 | skb_set_owner_r(skb2, child); | 206 | skb_set_owner_r(skb2, child); |
| 207 | __skb_queue_tail(&child->sk_receive_queue, skb2); | 207 | __skb_queue_tail(&child->sk_receive_queue, skb2); |
| 208 | tp->syn_data_acked = 1; | 208 | tp->syn_data_acked = 1; |
| 209 | |||
| 210 | /* u64_stats_update_begin(&tp->syncp) not needed here, | ||
| 211 | * as we certainly are not changing upper 32bit value (0) | ||
| 212 | */ | ||
| 213 | tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; | ||
| 209 | } else { | 214 | } else { |
| 210 | end_seq = TCP_SKB_CB(skb)->seq + 1; | 215 | end_seq = TCP_SKB_CB(skb)->seq + 1; |
| 211 | } | 216 | } |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 67476f085e48..f71002e4db0b 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
| @@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) | |||
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | /* Extract info for Tcp socket info provided via netlink. */ | 302 | /* Extract info for Tcp socket info provided via netlink. */ |
| 303 | static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 303 | static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, |
| 304 | union tcp_cc_info *info) | ||
| 304 | { | 305 | { |
| 305 | const struct illinois *ca = inet_csk_ca(sk); | 306 | const struct illinois *ca = inet_csk_ca(sk); |
| 306 | 307 | ||
| 307 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 308 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
| 308 | struct tcpvegas_info info = { | 309 | info->vegas.tcpv_enabled = 1; |
| 309 | .tcpv_enabled = 1, | 310 | info->vegas.tcpv_rttcnt = ca->cnt_rtt; |
| 310 | .tcpv_rttcnt = ca->cnt_rtt, | 311 | info->vegas.tcpv_minrtt = ca->base_rtt; |
| 311 | .tcpv_minrtt = ca->base_rtt, | 312 | info->vegas.tcpv_rtt = 0; |
| 312 | }; | ||
| 313 | 313 | ||
| 314 | if (info.tcpv_rttcnt > 0) { | 314 | if (info->vegas.tcpv_rttcnt > 0) { |
| 315 | u64 t = ca->sum_rtt; | 315 | u64 t = ca->sum_rtt; |
| 316 | 316 | ||
| 317 | do_div(t, info.tcpv_rttcnt); | 317 | do_div(t, info->vegas.tcpv_rttcnt); |
| 318 | info.tcpv_rtt = t; | 318 | info->vegas.tcpv_rtt = t; |
| 319 | } | 319 | } |
| 320 | return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | 320 | *attr = INET_DIAG_VEGASINFO; |
| 321 | return sizeof(struct tcpvegas_info); | ||
| 321 | } | 322 | } |
| 322 | return 0; | 323 | return 0; |
| 323 | } | 324 | } |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a4d9b34bed4..c9ab964189a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -1820,14 +1820,12 @@ advance_sp: | |||
| 1820 | for (j = 0; j < used_sacks; j++) | 1820 | for (j = 0; j < used_sacks; j++) |
| 1821 | tp->recv_sack_cache[i++] = sp[j]; | 1821 | tp->recv_sack_cache[i++] = sp[j]; |
| 1822 | 1822 | ||
| 1823 | tcp_mark_lost_retrans(sk); | ||
| 1824 | |||
| 1825 | tcp_verify_left_out(tp); | ||
| 1826 | |||
| 1827 | if ((state.reord < tp->fackets_out) && | 1823 | if ((state.reord < tp->fackets_out) && |
| 1828 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) | 1824 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) |
| 1829 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); | 1825 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
| 1830 | 1826 | ||
| 1827 | tcp_mark_lost_retrans(sk); | ||
| 1828 | tcp_verify_left_out(tp); | ||
| 1831 | out: | 1829 | out: |
| 1832 | 1830 | ||
| 1833 | #if FASTRETRANS_DEBUG > 0 | 1831 | #if FASTRETRANS_DEBUG > 0 |
| @@ -2700,16 +2698,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) | |||
| 2700 | struct tcp_sock *tp = tcp_sk(sk); | 2698 | struct tcp_sock *tp = tcp_sk(sk); |
| 2701 | bool recovered = !before(tp->snd_una, tp->high_seq); | 2699 | bool recovered = !before(tp->snd_una, tp->high_seq); |
| 2702 | 2700 | ||
| 2701 | if ((flag & FLAG_SND_UNA_ADVANCED) && | ||
| 2702 | tcp_try_undo_loss(sk, false)) | ||
| 2703 | return; | ||
| 2704 | |||
| 2703 | if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ | 2705 | if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ |
| 2704 | /* Step 3.b. A timeout is spurious if not all data are | 2706 | /* Step 3.b. A timeout is spurious if not all data are |
| 2705 | * lost, i.e., never-retransmitted data are (s)acked. | 2707 | * lost, i.e., never-retransmitted data are (s)acked. |
| 2706 | */ | 2708 | */ |
| 2707 | if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) | 2709 | if ((flag & FLAG_ORIG_SACK_ACKED) && |
| 2710 | tcp_try_undo_loss(sk, true)) | ||
| 2708 | return; | 2711 | return; |
| 2709 | 2712 | ||
| 2710 | if (after(tp->snd_nxt, tp->high_seq) && | 2713 | if (after(tp->snd_nxt, tp->high_seq)) { |
| 2711 | (flag & FLAG_DATA_SACKED || is_dupack)) { | 2714 | if (flag & FLAG_DATA_SACKED || is_dupack) |
| 2712 | tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ | 2715 | tp->frto = 0; /* Step 3.a. loss was real */ |
| 2713 | } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { | 2716 | } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { |
| 2714 | tp->high_seq = tp->snd_nxt; | 2717 | tp->high_seq = tp->snd_nxt; |
| 2715 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), | 2718 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), |
| @@ -2734,8 +2737,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) | |||
| 2734 | else if (flag & FLAG_SND_UNA_ADVANCED) | 2737 | else if (flag & FLAG_SND_UNA_ADVANCED) |
| 2735 | tcp_reset_reno_sack(tp); | 2738 | tcp_reset_reno_sack(tp); |
| 2736 | } | 2739 | } |
| 2737 | if (tcp_try_undo_loss(sk, false)) | ||
| 2738 | return; | ||
| 2739 | tcp_xmit_retransmit_queue(sk); | 2740 | tcp_xmit_retransmit_queue(sk); |
| 2740 | } | 2741 | } |
| 2741 | 2742 | ||
| @@ -3280,6 +3281,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, | |||
| 3280 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); | 3281 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
| 3281 | } | 3282 | } |
| 3282 | 3283 | ||
| 3284 | /* If we update tp->snd_una, also update tp->bytes_acked */ | ||
| 3285 | static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) | ||
| 3286 | { | ||
| 3287 | u32 delta = ack - tp->snd_una; | ||
| 3288 | |||
| 3289 | u64_stats_update_begin(&tp->syncp); | ||
| 3290 | tp->bytes_acked += delta; | ||
| 3291 | u64_stats_update_end(&tp->syncp); | ||
| 3292 | tp->snd_una = ack; | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | /* If we update tp->rcv_nxt, also update tp->bytes_received */ | ||
| 3296 | static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) | ||
| 3297 | { | ||
| 3298 | u32 delta = seq - tp->rcv_nxt; | ||
| 3299 | |||
| 3300 | u64_stats_update_begin(&tp->syncp); | ||
| 3301 | tp->bytes_received += delta; | ||
| 3302 | u64_stats_update_end(&tp->syncp); | ||
| 3303 | tp->rcv_nxt = seq; | ||
| 3304 | } | ||
| 3305 | |||
| 3283 | /* Update our send window. | 3306 | /* Update our send window. |
| 3284 | * | 3307 | * |
| 3285 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 | 3308 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 |
| @@ -3315,7 +3338,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
| 3315 | } | 3338 | } |
| 3316 | } | 3339 | } |
| 3317 | 3340 | ||
| 3318 | tp->snd_una = ack; | 3341 | tcp_snd_una_update(tp, ack); |
| 3319 | 3342 | ||
| 3320 | return flag; | 3343 | return flag; |
| 3321 | } | 3344 | } |
| @@ -3497,7 +3520,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3497 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3520 | * Note, we use the fact that SND.UNA>=SND.WL2. |
| 3498 | */ | 3521 | */ |
| 3499 | tcp_update_wl(tp, ack_seq); | 3522 | tcp_update_wl(tp, ack_seq); |
| 3500 | tp->snd_una = ack; | 3523 | tcp_snd_una_update(tp, ack); |
| 3501 | flag |= FLAG_WIN_UPDATE; | 3524 | flag |= FLAG_WIN_UPDATE; |
| 3502 | 3525 | ||
| 3503 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); | 3526 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); |
| @@ -4236,7 +4259,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 4236 | 4259 | ||
| 4237 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4260 | tail = skb_peek_tail(&sk->sk_receive_queue); |
| 4238 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | 4261 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
| 4239 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4262 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
| 4240 | if (!eaten) | 4263 | if (!eaten) |
| 4241 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4264 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
| 4242 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4265 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
| @@ -4404,7 +4427,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
| 4404 | __skb_pull(skb, hdrlen); | 4427 | __skb_pull(skb, hdrlen); |
| 4405 | eaten = (tail && | 4428 | eaten = (tail && |
| 4406 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | 4429 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; |
| 4407 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4430 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
| 4408 | if (!eaten) { | 4431 | if (!eaten) { |
| 4409 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4432 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
| 4410 | skb_set_owner_r(skb, sk); | 4433 | skb_set_owner_r(skb, sk); |
| @@ -4497,7 +4520,7 @@ queue_and_out: | |||
| 4497 | 4520 | ||
| 4498 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4521 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
| 4499 | } | 4522 | } |
| 4500 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4523 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
| 4501 | if (skb->len) | 4524 | if (skb->len) |
| 4502 | tcp_event_data_recv(sk, skb); | 4525 | tcp_event_data_recv(sk, skb); |
| 4503 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4526 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
| @@ -5245,7 +5268,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 5245 | tcp_rcv_rtt_measure_ts(sk, skb); | 5268 | tcp_rcv_rtt_measure_ts(sk, skb); |
| 5246 | 5269 | ||
| 5247 | __skb_pull(skb, tcp_header_len); | 5270 | __skb_pull(skb, tcp_header_len); |
| 5248 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 5271 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
| 5249 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); | 5272 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); |
| 5250 | eaten = 1; | 5273 | eaten = 1; |
| 5251 | } | 5274 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); | 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); |
| 1349 | if (req) { | 1349 | if (req) { |
| 1350 | nsk = tcp_check_req(sk, skb, req, false); | 1350 | nsk = tcp_check_req(sk, skb, req, false); |
| 1351 | reqsk_put(req); | 1351 | if (!nsk) |
| 1352 | reqsk_put(req); | ||
| 1352 | return nsk; | 1353 | return nsk; |
| 1353 | } | 1354 | } |
| 1354 | 1355 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..17e7339ee5ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
| 300 | tw->tw_v6_daddr = sk->sk_v6_daddr; | 300 | tw->tw_v6_daddr = sk->sk_v6_daddr; |
| 301 | tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; | 301 | tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; |
| 302 | tw->tw_tclass = np->tclass; | 302 | tw->tw_tclass = np->tclass; |
| 303 | tw->tw_flowlabel = np->flow_label >> 12; | 303 | tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); |
| 304 | tw->tw_ipv6only = sk->sk_ipv6only; | 304 | tw->tw_ipv6only = sk->sk_ipv6only; |
| 305 | } | 305 | } |
| 306 | #endif | 306 | #endif |
| @@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) | |||
| 420 | rcu_read_unlock(); | 420 | rcu_read_unlock(); |
| 421 | } | 421 | } |
| 422 | 422 | ||
| 423 | if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) | 423 | /* If no valid choice made yet, assign current system default ca. */ |
| 424 | if (!ca_got_dst && | ||
| 425 | (!icsk->icsk_ca_setsockopt || | ||
| 426 | !try_module_get(icsk->icsk_ca_ops->owner))) | ||
| 424 | tcp_assign_congestion_control(sk); | 427 | tcp_assign_congestion_control(sk); |
| 425 | 428 | ||
| 426 | tcp_set_ca_state(sk, TCP_CA_Open); | 429 | tcp_set_ca_state(sk, TCP_CA_Open); |
| @@ -755,10 +758,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
| 755 | if (!child) | 758 | if (!child) |
| 756 | goto listen_overflow; | 759 | goto listen_overflow; |
| 757 | 760 | ||
| 758 | inet_csk_reqsk_queue_unlink(sk, req); | 761 | inet_csk_reqsk_queue_drop(sk, req); |
| 759 | inet_csk_reqsk_queue_removed(sk, req); | ||
| 760 | |||
| 761 | inet_csk_reqsk_queue_add(sk, req, child); | 762 | inet_csk_reqsk_queue_add(sk, req, child); |
| 763 | /* Warning: caller must not call reqsk_put(req); | ||
| 764 | * child stole last reference on it. | ||
| 765 | */ | ||
| 762 | return child; | 766 | return child; |
| 763 | 767 | ||
| 764 | listen_overflow: | 768 | listen_overflow: |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -2812,39 +2812,65 @@ begin_fwd: | |||
| 2812 | } | 2812 | } |
| 2813 | } | 2813 | } |
| 2814 | 2814 | ||
| 2815 | /* Send a fin. The caller locks the socket for us. This cannot be | 2815 | /* We allow to exceed memory limits for FIN packets to expedite |
| 2816 | * allowed to fail queueing a FIN frame under any circumstances. | 2816 | * connection tear down and (memory) recovery. |
| 2817 | * Otherwise tcp_send_fin() could be tempted to either delay FIN | ||
| 2818 | * or even be forced to close flow without any FIN. | ||
| 2819 | */ | ||
| 2820 | static void sk_forced_wmem_schedule(struct sock *sk, int size) | ||
| 2821 | { | ||
| 2822 | int amt, status; | ||
| 2823 | |||
| 2824 | if (size <= sk->sk_forward_alloc) | ||
| 2825 | return; | ||
| 2826 | amt = sk_mem_pages(size); | ||
| 2827 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | ||
| 2828 | sk_memory_allocated_add(sk, amt, &status); | ||
| 2829 | } | ||
| 2830 | |||
| 2831 | /* Send a FIN. The caller locks the socket for us. | ||
| 2832 | * We should try to send a FIN packet really hard, but eventually give up. | ||
| 2817 | */ | 2833 | */ |
| 2818 | void tcp_send_fin(struct sock *sk) | 2834 | void tcp_send_fin(struct sock *sk) |
| 2819 | { | 2835 | { |
| 2836 | struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); | ||
| 2820 | struct tcp_sock *tp = tcp_sk(sk); | 2837 | struct tcp_sock *tp = tcp_sk(sk); |
| 2821 | struct sk_buff *skb = tcp_write_queue_tail(sk); | ||
| 2822 | int mss_now; | ||
| 2823 | 2838 | ||
| 2824 | /* Optimization, tack on the FIN if we have a queue of | 2839 | /* Optimization, tack on the FIN if we have one skb in write queue and |
| 2825 | * unsent frames. But be careful about outgoing SACKS | 2840 | * this skb was not yet sent, or we are under memory pressure. |
| 2826 | * and IP options. | 2841 | * Note: in the latter case, FIN packet will be sent after a timeout, |
| 2842 | * as TCP stack thinks it has already been transmitted. | ||
| 2827 | */ | 2843 | */ |
| 2828 | mss_now = tcp_current_mss(sk); | 2844 | if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { |
| 2829 | 2845 | coalesce: | |
| 2830 | if (tcp_send_head(sk)) { | 2846 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; |
| 2831 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; | 2847 | TCP_SKB_CB(tskb)->end_seq++; |
| 2832 | TCP_SKB_CB(skb)->end_seq++; | ||
| 2833 | tp->write_seq++; | 2848 | tp->write_seq++; |
| 2849 | if (!tcp_send_head(sk)) { | ||
| 2850 | /* This means tskb was already sent. | ||
| 2851 | * Pretend we included the FIN on previous transmit. | ||
| 2852 | * We need to set tp->snd_nxt to the value it would have | ||
| 2853 | * if FIN had been sent. This is because retransmit path | ||
| 2854 | * does not change tp->snd_nxt. | ||
| 2855 | */ | ||
| 2856 | tp->snd_nxt++; | ||
| 2857 | return; | ||
| 2858 | } | ||
| 2834 | } else { | 2859 | } else { |
| 2835 | /* Socket is locked, keep trying until memory is available. */ | 2860 | skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); |
| 2836 | for (;;) { | 2861 | if (unlikely(!skb)) { |
| 2837 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); | 2862 | if (tskb) |
| 2838 | if (skb) | 2863 | goto coalesce; |
| 2839 | break; | 2864 | return; |
| 2840 | yield(); | ||
| 2841 | } | 2865 | } |
| 2866 | skb_reserve(skb, MAX_TCP_HEADER); | ||
| 2867 | sk_forced_wmem_schedule(sk, skb->truesize); | ||
| 2842 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2868 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
| 2843 | tcp_init_nondata_skb(skb, tp->write_seq, | 2869 | tcp_init_nondata_skb(skb, tp->write_seq, |
| 2844 | TCPHDR_ACK | TCPHDR_FIN); | 2870 | TCPHDR_ACK | TCPHDR_FIN); |
| 2845 | tcp_queue_skb(sk, skb); | 2871 | tcp_queue_skb(sk, skb); |
| 2846 | } | 2872 | } |
| 2847 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2873 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
| 2848 | } | 2874 | } |
| 2849 | 2875 | ||
| 2850 | /* We get here when a process closes a file descriptor (either due to | 2876 | /* We get here when a process closes a file descriptor (either due to |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c71a1b8f7bde..a6cea1d5e20d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
| @@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | /* Extract info for Tcp socket info provided via netlink. */ | 288 | /* Extract info for Tcp socket info provided via netlink. */ |
| 289 | int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 289 | size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, |
| 290 | union tcp_cc_info *info) | ||
| 290 | { | 291 | { |
| 291 | const struct vegas *ca = inet_csk_ca(sk); | 292 | const struct vegas *ca = inet_csk_ca(sk); |
| 293 | |||
| 292 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 294 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
| 293 | struct tcpvegas_info info = { | 295 | info->vegas.tcpv_enabled = ca->doing_vegas_now, |
| 294 | .tcpv_enabled = ca->doing_vegas_now, | 296 | info->vegas.tcpv_rttcnt = ca->cntRTT, |
| 295 | .tcpv_rttcnt = ca->cntRTT, | 297 | info->vegas.tcpv_rtt = ca->baseRTT, |
| 296 | .tcpv_rtt = ca->baseRTT, | 298 | info->vegas.tcpv_minrtt = ca->minRTT, |
| 297 | .tcpv_minrtt = ca->minRTT, | 299 | |
| 298 | }; | 300 | *attr = INET_DIAG_VEGASINFO; |
| 299 | 301 | return sizeof(struct tcpvegas_info); | |
| 300 | return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | ||
| 301 | } | 302 | } |
| 302 | return 0; | 303 | return 0; |
| 303 | } | 304 | } |
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index e8a6b33cc61d..ef9da5306c68 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h | |||
| @@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk); | |||
| 19 | void tcp_vegas_state(struct sock *sk, u8 ca_state); | 19 | void tcp_vegas_state(struct sock *sk, u8 ca_state); |
| 20 | void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); | 20 | void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); |
| 21 | void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); | 21 | void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); |
| 22 | int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); | 22 | size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, |
| 23 | union tcp_cc_info *info); | ||
| 23 | 24 | ||
| 24 | #endif /* __TCP_VEGAS_H */ | 25 | #endif /* __TCP_VEGAS_H */ |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b3c57cceb990..c10732e39837 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
| @@ -256,18 +256,19 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) | |||
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | /* Extract info for Tcp socket info provided via netlink. */ | 258 | /* Extract info for Tcp socket info provided via netlink. */ |
| 259 | static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 259 | static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, |
| 260 | union tcp_cc_info *info) | ||
| 260 | { | 261 | { |
| 261 | const struct westwood *ca = inet_csk_ca(sk); | 262 | const struct westwood *ca = inet_csk_ca(sk); |
| 262 | 263 | ||
| 263 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 264 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
| 264 | struct tcpvegas_info info = { | 265 | info->vegas.tcpv_enabled = 1; |
| 265 | .tcpv_enabled = 1, | 266 | info->vegas.tcpv_rttcnt = 0; |
| 266 | .tcpv_rtt = jiffies_to_usecs(ca->rtt), | 267 | info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), |
| 267 | .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), | 268 | info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), |
| 268 | }; | ||
| 269 | 269 | ||
| 270 | return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | 270 | *attr = INET_DIAG_VEGASINFO; |
| 271 | return sizeof(struct tcpvegas_info); | ||
| 271 | } | 272 | } |
| 272 | return 0; | 273 | return 0; |
| 273 | } | 274 | } |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d10b7e0112eb..83aa604f9273 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -90,6 +90,7 @@ | |||
| 90 | #include <linux/socket.h> | 90 | #include <linux/socket.h> |
| 91 | #include <linux/sockios.h> | 91 | #include <linux/sockios.h> |
| 92 | #include <linux/igmp.h> | 92 | #include <linux/igmp.h> |
| 93 | #include <linux/inetdevice.h> | ||
| 93 | #include <linux/in.h> | 94 | #include <linux/in.h> |
| 94 | #include <linux/errno.h> | 95 | #include <linux/errno.h> |
| 95 | #include <linux/timer.h> | 96 | #include <linux/timer.h> |
| @@ -1345,10 +1346,8 @@ csum_copy_err: | |||
| 1345 | } | 1346 | } |
| 1346 | unlock_sock_fast(sk, slow); | 1347 | unlock_sock_fast(sk, slow); |
| 1347 | 1348 | ||
| 1348 | if (noblock) | 1349 | /* starting over for a new packet, but check if we need to yield */ |
| 1349 | return -EAGAIN; | 1350 | cond_resched(); |
| 1350 | |||
| 1351 | /* starting over for a new packet */ | ||
| 1352 | msg->msg_flags &= ~MSG_TRUNC; | 1351 | msg->msg_flags &= ~MSG_TRUNC; |
| 1353 | goto try_again; | 1352 | goto try_again; |
| 1354 | } | 1353 | } |
| @@ -1962,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb) | |||
| 1962 | struct sock *sk; | 1961 | struct sock *sk; |
| 1963 | struct dst_entry *dst; | 1962 | struct dst_entry *dst; |
| 1964 | int dif = skb->dev->ifindex; | 1963 | int dif = skb->dev->ifindex; |
| 1964 | int ours; | ||
| 1965 | 1965 | ||
| 1966 | /* validate the packet */ | 1966 | /* validate the packet */ |
| 1967 | if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) | 1967 | if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) |
| @@ -1971,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb) | |||
| 1971 | uh = udp_hdr(skb); | 1971 | uh = udp_hdr(skb); |
| 1972 | 1972 | ||
| 1973 | if (skb->pkt_type == PACKET_BROADCAST || | 1973 | if (skb->pkt_type == PACKET_BROADCAST || |
| 1974 | skb->pkt_type == PACKET_MULTICAST) | 1974 | skb->pkt_type == PACKET_MULTICAST) { |
| 1975 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); | ||
| 1976 | |||
| 1977 | if (!in_dev) | ||
| 1978 | return; | ||
| 1979 | |||
| 1980 | ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, | ||
| 1981 | iph->protocol); | ||
| 1982 | if (!ours) | ||
| 1983 | return; | ||
| 1975 | sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, | 1984 | sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, |
| 1976 | uh->source, iph->saddr, dif); | 1985 | uh->source, iph->saddr, dif); |
| 1977 | else if (skb->pkt_type == PACKET_HOST) | 1986 | } else if (skb->pkt_type == PACKET_HOST) { |
| 1978 | sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, | 1987 | sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, |
| 1979 | uh->source, iph->saddr, dif); | 1988 | uh->source, iph->saddr, dif); |
| 1980 | else | 1989 | } else { |
| 1981 | return; | 1990 | return; |
| 1991 | } | ||
| 1982 | 1992 | ||
| 1983 | if (!sk) | 1993 | if (!sk) |
| 1984 | return; | 1994 | return; |
