diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_frontend.c | 9 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 8 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_tables_arp.c | 3 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 49 | ||||
-rw-r--r-- | net/ipv4/udp.c | 13 |
6 files changed, 60 insertions, 23 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e678fa892dd..044d2a159a3c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = { | |||
1334 | 1334 | ||
1335 | void __init ip_fib_init(void) | 1335 | void __init ip_fib_init(void) |
1336 | { | 1336 | { |
1337 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); | 1337 | fib_trie_init(); |
1338 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); | ||
1339 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); | ||
1340 | 1338 | ||
1341 | register_pernet_subsys(&fib_net_ops); | 1339 | register_pernet_subsys(&fib_net_ops); |
1340 | |||
1342 | register_netdevice_notifier(&fib_netdev_notifier); | 1341 | register_netdevice_notifier(&fib_netdev_notifier); |
1343 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 1342 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
1344 | 1343 | ||
1345 | fib_trie_init(); | 1344 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); |
1345 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); | ||
1346 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); | ||
1346 | } | 1347 | } |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7eb252dcecee..50c74cd890bc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -599,6 +599,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, | |||
599 | hlen = iph->ihl * 4; | 599 | hlen = iph->ihl * 4; |
600 | mtu = mtu - hlen; /* Size of data space */ | 600 | mtu = mtu - hlen; /* Size of data space */ |
601 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; | 601 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; |
602 | ll_rs = LL_RESERVED_SPACE(rt->dst.dev); | ||
602 | 603 | ||
603 | /* When frag_list is given, use it. First, check its validity: | 604 | /* When frag_list is given, use it. First, check its validity: |
604 | * some transformers could create wrong frag_list or break existing | 605 | * some transformers could create wrong frag_list or break existing |
@@ -614,14 +615,15 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, | |||
614 | if (first_len - hlen > mtu || | 615 | if (first_len - hlen > mtu || |
615 | ((first_len - hlen) & 7) || | 616 | ((first_len - hlen) & 7) || |
616 | ip_is_fragment(iph) || | 617 | ip_is_fragment(iph) || |
617 | skb_cloned(skb)) | 618 | skb_cloned(skb) || |
619 | skb_headroom(skb) < ll_rs) | ||
618 | goto slow_path; | 620 | goto slow_path; |
619 | 621 | ||
620 | skb_walk_frags(skb, frag) { | 622 | skb_walk_frags(skb, frag) { |
621 | /* Correct geometry. */ | 623 | /* Correct geometry. */ |
622 | if (frag->len > mtu || | 624 | if (frag->len > mtu || |
623 | ((frag->len & 7) && frag->next) || | 625 | ((frag->len & 7) && frag->next) || |
624 | skb_headroom(frag) < hlen) | 626 | skb_headroom(frag) < hlen + ll_rs) |
625 | goto slow_path_clean; | 627 | goto slow_path_clean; |
626 | 628 | ||
627 | /* Partially cloned skb? */ | 629 | /* Partially cloned skb? */ |
@@ -711,8 +713,6 @@ slow_path: | |||
711 | left = skb->len - hlen; /* Space per frame */ | 713 | left = skb->len - hlen; /* Space per frame */ |
712 | ptr = hlen; /* Where to start from */ | 714 | ptr = hlen; /* Where to start from */ |
713 | 715 | ||
714 | ll_rs = LL_RESERVED_SPACE(rt->dst.dev); | ||
715 | |||
716 | /* | 716 | /* |
717 | * Fragment the datagram. | 717 | * Fragment the datagram. |
718 | */ | 718 | */ |
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 805c8ddfe860..4bbc273b45e8 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c | |||
@@ -72,8 +72,7 @@ static const struct nf_chain_type filter_arp = { | |||
72 | .family = NFPROTO_ARP, | 72 | .family = NFPROTO_ARP, |
73 | .owner = THIS_MODULE, | 73 | .owner = THIS_MODULE, |
74 | .hook_mask = (1 << NF_ARP_IN) | | 74 | .hook_mask = (1 << NF_ARP_IN) | |
75 | (1 << NF_ARP_OUT) | | 75 | (1 << NF_ARP_OUT), |
76 | (1 << NF_ARP_FORWARD), | ||
77 | }; | 76 | }; |
78 | 77 | ||
79 | static int __init nf_tables_arp_init(void) | 78 | static int __init nf_tables_arp_init(void) |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0905cf04c2a4..03ad8778c395 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -335,6 +335,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) | |||
335 | treq->rcv_isn = ntohl(th->seq) - 1; | 335 | treq->rcv_isn = ntohl(th->seq) - 1; |
336 | treq->snt_isn = cookie; | 336 | treq->snt_isn = cookie; |
337 | treq->ts_off = 0; | 337 | treq->ts_off = 0; |
338 | treq->txhash = net_tx_rndhash(); | ||
338 | req->mss = mss; | 339 | req->mss = mss; |
339 | ireq->ir_num = ntohs(th->dest); | 340 | ireq->ir_num = ntohs(th->dest); |
340 | ireq->ir_rmt_port = th->source; | 341 | ireq->ir_rmt_port = th->source; |
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index dbcc9352a48f..69ee877574d0 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c | |||
@@ -112,7 +112,8 @@ struct bbr { | |||
112 | cwnd_gain:10, /* current gain for setting cwnd */ | 112 | cwnd_gain:10, /* current gain for setting cwnd */ |
113 | full_bw_cnt:3, /* number of rounds without large bw gains */ | 113 | full_bw_cnt:3, /* number of rounds without large bw gains */ |
114 | cycle_idx:3, /* current index in pacing_gain cycle array */ | 114 | cycle_idx:3, /* current index in pacing_gain cycle array */ |
115 | unused_b:6; | 115 | has_seen_rtt:1, /* have we seen an RTT sample yet? */ |
116 | unused_b:5; | ||
116 | u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ | 117 | u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ |
117 | u32 full_bw; /* recent bw, to estimate if pipe is full */ | 118 | u32 full_bw; /* recent bw, to estimate if pipe is full */ |
118 | }; | 119 | }; |
@@ -211,6 +212,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) | |||
211 | return rate >> BW_SCALE; | 212 | return rate >> BW_SCALE; |
212 | } | 213 | } |
213 | 214 | ||
215 | /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ | ||
216 | static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) | ||
217 | { | ||
218 | u64 rate = bw; | ||
219 | |||
220 | rate = bbr_rate_bytes_per_sec(sk, rate, gain); | ||
221 | rate = min_t(u64, rate, sk->sk_max_pacing_rate); | ||
222 | return rate; | ||
223 | } | ||
224 | |||
225 | /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ | ||
226 | static void bbr_init_pacing_rate_from_rtt(struct sock *sk) | ||
227 | { | ||
228 | struct tcp_sock *tp = tcp_sk(sk); | ||
229 | struct bbr *bbr = inet_csk_ca(sk); | ||
230 | u64 bw; | ||
231 | u32 rtt_us; | ||
232 | |||
233 | if (tp->srtt_us) { /* any RTT sample yet? */ | ||
234 | rtt_us = max(tp->srtt_us >> 3, 1U); | ||
235 | bbr->has_seen_rtt = 1; | ||
236 | } else { /* no RTT sample yet */ | ||
237 | rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ | ||
238 | } | ||
239 | bw = (u64)tp->snd_cwnd * BW_UNIT; | ||
240 | do_div(bw, rtt_us); | ||
241 | sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); | ||
242 | } | ||
243 | |||
214 | /* Pace using current bw estimate and a gain factor. In order to help drive the | 244 | /* Pace using current bw estimate and a gain factor. In order to help drive the |
215 | * network toward lower queues while maintaining high utilization and low | 245 | * network toward lower queues while maintaining high utilization and low |
216 | * latency, the average pacing rate aims to be slightly (~1%) lower than the | 246 | * latency, the average pacing rate aims to be slightly (~1%) lower than the |
@@ -220,12 +250,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) | |||
220 | */ | 250 | */ |
221 | static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) | 251 | static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) |
222 | { | 252 | { |
253 | struct tcp_sock *tp = tcp_sk(sk); | ||
223 | struct bbr *bbr = inet_csk_ca(sk); | 254 | struct bbr *bbr = inet_csk_ca(sk); |
224 | u64 rate = bw; | 255 | u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); |
225 | 256 | ||
226 | rate = bbr_rate_bytes_per_sec(sk, rate, gain); | 257 | if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) |
227 | rate = min_t(u64, rate, sk->sk_max_pacing_rate); | 258 | bbr_init_pacing_rate_from_rtt(sk); |
228 | if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) | 259 | if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) |
229 | sk->sk_pacing_rate = rate; | 260 | sk->sk_pacing_rate = rate; |
230 | } | 261 | } |
231 | 262 | ||
@@ -798,7 +829,6 @@ static void bbr_init(struct sock *sk) | |||
798 | { | 829 | { |
799 | struct tcp_sock *tp = tcp_sk(sk); | 830 | struct tcp_sock *tp = tcp_sk(sk); |
800 | struct bbr *bbr = inet_csk_ca(sk); | 831 | struct bbr *bbr = inet_csk_ca(sk); |
801 | u64 bw; | ||
802 | 832 | ||
803 | bbr->prior_cwnd = 0; | 833 | bbr->prior_cwnd = 0; |
804 | bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ | 834 | bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ |
@@ -814,11 +844,8 @@ static void bbr_init(struct sock *sk) | |||
814 | 844 | ||
815 | minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ | 845 | minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ |
816 | 846 | ||
817 | /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ | 847 | bbr->has_seen_rtt = 0; |
818 | bw = (u64)tp->snd_cwnd * BW_UNIT; | 848 | bbr_init_pacing_rate_from_rtt(sk); |
819 | do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); | ||
820 | sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ | ||
821 | bbr_set_pacing_rate(sk, bw, bbr_high_gain); | ||
822 | 849 | ||
823 | bbr->restore_cwnd = 0; | 850 | bbr->restore_cwnd = 0; |
824 | bbr->round_start = 0; | 851 | bbr->round_start = 0; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 25294d43e147..b057653ceca9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1388,6 +1388,11 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) | |||
1388 | unlock_sock_fast(sk, slow); | 1388 | unlock_sock_fast(sk, slow); |
1389 | } | 1389 | } |
1390 | 1390 | ||
1391 | /* we cleared the head states previously only if the skb lacks any IP | ||
1392 | * options, see __udp_queue_rcv_skb(). | ||
1393 | */ | ||
1394 | if (unlikely(IPCB(skb)->opt.optlen > 0)) | ||
1395 | skb_release_head_state(skb); | ||
1391 | consume_stateless_skb(skb); | 1396 | consume_stateless_skb(skb); |
1392 | } | 1397 | } |
1393 | EXPORT_SYMBOL_GPL(skb_consume_udp); | 1398 | EXPORT_SYMBOL_GPL(skb_consume_udp); |
@@ -1779,8 +1784,12 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1779 | sk_mark_napi_id_once(sk, skb); | 1784 | sk_mark_napi_id_once(sk, skb); |
1780 | } | 1785 | } |
1781 | 1786 | ||
1782 | /* clear all pending head states while they are hot in the cache */ | 1787 | /* At recvmsg() time we need skb->dst to process IP options-related |
1783 | skb_release_head_state(skb); | 1788 | * cmsg, elsewhere can we clear all pending head states while they are |
1789 | * hot in the cache | ||
1790 | */ | ||
1791 | if (likely(IPCB(skb)->opt.optlen == 0)) | ||
1792 | skb_release_head_state(skb); | ||
1784 | 1793 | ||
1785 | rc = __udp_enqueue_schedule_skb(sk, skb); | 1794 | rc = __udp_enqueue_schedule_skb(sk, skb); |
1786 | if (rc < 0) { | 1795 | if (rc < 0) { |