aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c3
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/tcp_bbr.c49
-rw-r--r--net/ipv4/udp.c13
6 files changed, 60 insertions, 23 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e678fa892dd..044d2a159a3c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
1334 1334
1335void __init ip_fib_init(void) 1335void __init ip_fib_init(void)
1336{ 1336{
1337 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); 1337 fib_trie_init();
1338 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1339 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1340 1338
1341 register_pernet_subsys(&fib_net_ops); 1339 register_pernet_subsys(&fib_net_ops);
1340
1342 register_netdevice_notifier(&fib_netdev_notifier); 1341 register_netdevice_notifier(&fib_netdev_notifier);
1343 register_inetaddr_notifier(&fib_inetaddr_notifier); 1342 register_inetaddr_notifier(&fib_inetaddr_notifier);
1344 1343
1345 fib_trie_init(); 1344 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1345 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1346 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1346} 1347}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7eb252dcecee..50c74cd890bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -599,6 +599,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
599 hlen = iph->ihl * 4; 599 hlen = iph->ihl * 4;
600 mtu = mtu - hlen; /* Size of data space */ 600 mtu = mtu - hlen; /* Size of data space */
601 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 601 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
602 ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
602 603
603 /* When frag_list is given, use it. First, check its validity: 604 /* When frag_list is given, use it. First, check its validity:
604 * some transformers could create wrong frag_list or break existing 605 * some transformers could create wrong frag_list or break existing
@@ -614,14 +615,15 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
614 if (first_len - hlen > mtu || 615 if (first_len - hlen > mtu ||
615 ((first_len - hlen) & 7) || 616 ((first_len - hlen) & 7) ||
616 ip_is_fragment(iph) || 617 ip_is_fragment(iph) ||
617 skb_cloned(skb)) 618 skb_cloned(skb) ||
619 skb_headroom(skb) < ll_rs)
618 goto slow_path; 620 goto slow_path;
619 621
620 skb_walk_frags(skb, frag) { 622 skb_walk_frags(skb, frag) {
621 /* Correct geometry. */ 623 /* Correct geometry. */
622 if (frag->len > mtu || 624 if (frag->len > mtu ||
623 ((frag->len & 7) && frag->next) || 625 ((frag->len & 7) && frag->next) ||
624 skb_headroom(frag) < hlen) 626 skb_headroom(frag) < hlen + ll_rs)
625 goto slow_path_clean; 627 goto slow_path_clean;
626 628
627 /* Partially cloned skb? */ 629 /* Partially cloned skb? */
@@ -711,8 +713,6 @@ slow_path:
711 left = skb->len - hlen; /* Space per frame */ 713 left = skb->len - hlen; /* Space per frame */
712 ptr = hlen; /* Where to start from */ 714 ptr = hlen; /* Where to start from */
713 715
714 ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
715
716 /* 716 /*
717 * Fragment the datagram. 717 * Fragment the datagram.
718 */ 718 */
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 805c8ddfe860..4bbc273b45e8 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -72,8 +72,7 @@ static const struct nf_chain_type filter_arp = {
72 .family = NFPROTO_ARP, 72 .family = NFPROTO_ARP,
73 .owner = THIS_MODULE, 73 .owner = THIS_MODULE,
74 .hook_mask = (1 << NF_ARP_IN) | 74 .hook_mask = (1 << NF_ARP_IN) |
75 (1 << NF_ARP_OUT) | 75 (1 << NF_ARP_OUT),
76 (1 << NF_ARP_FORWARD),
77}; 76};
78 77
79static int __init nf_tables_arp_init(void) 78static int __init nf_tables_arp_init(void)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0905cf04c2a4..03ad8778c395 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -335,6 +335,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
335 treq->rcv_isn = ntohl(th->seq) - 1; 335 treq->rcv_isn = ntohl(th->seq) - 1;
336 treq->snt_isn = cookie; 336 treq->snt_isn = cookie;
337 treq->ts_off = 0; 337 treq->ts_off = 0;
338 treq->txhash = net_tx_rndhash();
338 req->mss = mss; 339 req->mss = mss;
339 ireq->ir_num = ntohs(th->dest); 340 ireq->ir_num = ntohs(th->dest);
340 ireq->ir_rmt_port = th->source; 341 ireq->ir_rmt_port = th->source;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index dbcc9352a48f..69ee877574d0 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -112,7 +112,8 @@ struct bbr {
112 cwnd_gain:10, /* current gain for setting cwnd */ 112 cwnd_gain:10, /* current gain for setting cwnd */
113 full_bw_cnt:3, /* number of rounds without large bw gains */ 113 full_bw_cnt:3, /* number of rounds without large bw gains */
114 cycle_idx:3, /* current index in pacing_gain cycle array */ 114 cycle_idx:3, /* current index in pacing_gain cycle array */
115 unused_b:6; 115 has_seen_rtt:1, /* have we seen an RTT sample yet? */
116 unused_b:5;
116 u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ 117 u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
117 u32 full_bw; /* recent bw, to estimate if pipe is full */ 118 u32 full_bw; /* recent bw, to estimate if pipe is full */
118}; 119};
@@ -211,6 +212,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
211 return rate >> BW_SCALE; 212 return rate >> BW_SCALE;
212} 213}
213 214
215/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
216static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
217{
218 u64 rate = bw;
219
220 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
221 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
222 return rate;
223}
224
225/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
226static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
227{
228 struct tcp_sock *tp = tcp_sk(sk);
229 struct bbr *bbr = inet_csk_ca(sk);
230 u64 bw;
231 u32 rtt_us;
232
233 if (tp->srtt_us) { /* any RTT sample yet? */
234 rtt_us = max(tp->srtt_us >> 3, 1U);
235 bbr->has_seen_rtt = 1;
236 } else { /* no RTT sample yet */
237 rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
238 }
239 bw = (u64)tp->snd_cwnd * BW_UNIT;
240 do_div(bw, rtt_us);
241 sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
242}
243
214/* Pace using current bw estimate and a gain factor. In order to help drive the 244/* Pace using current bw estimate and a gain factor. In order to help drive the
215 * network toward lower queues while maintaining high utilization and low 245 * network toward lower queues while maintaining high utilization and low
216 * latency, the average pacing rate aims to be slightly (~1%) lower than the 246 * latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -220,12 +250,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
220 */ 250 */
221static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) 251static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
222{ 252{
253 struct tcp_sock *tp = tcp_sk(sk);
223 struct bbr *bbr = inet_csk_ca(sk); 254 struct bbr *bbr = inet_csk_ca(sk);
224 u64 rate = bw; 255 u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
225 256
226 rate = bbr_rate_bytes_per_sec(sk, rate, gain); 257 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
227 rate = min_t(u64, rate, sk->sk_max_pacing_rate); 258 bbr_init_pacing_rate_from_rtt(sk);
228 if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) 259 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
229 sk->sk_pacing_rate = rate; 260 sk->sk_pacing_rate = rate;
230} 261}
231 262
@@ -798,7 +829,6 @@ static void bbr_init(struct sock *sk)
798{ 829{
799 struct tcp_sock *tp = tcp_sk(sk); 830 struct tcp_sock *tp = tcp_sk(sk);
800 struct bbr *bbr = inet_csk_ca(sk); 831 struct bbr *bbr = inet_csk_ca(sk);
801 u64 bw;
802 832
803 bbr->prior_cwnd = 0; 833 bbr->prior_cwnd = 0;
804 bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ 834 bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -814,11 +844,8 @@ static void bbr_init(struct sock *sk)
814 844
815 minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ 845 minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
816 846
817 /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ 847 bbr->has_seen_rtt = 0;
818 bw = (u64)tp->snd_cwnd * BW_UNIT; 848 bbr_init_pacing_rate_from_rtt(sk);
819 do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
820 sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
821 bbr_set_pacing_rate(sk, bw, bbr_high_gain);
822 849
823 bbr->restore_cwnd = 0; 850 bbr->restore_cwnd = 0;
824 bbr->round_start = 0; 851 bbr->round_start = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 25294d43e147..b057653ceca9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1388,6 +1388,11 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
1388 unlock_sock_fast(sk, slow); 1388 unlock_sock_fast(sk, slow);
1389 } 1389 }
1390 1390
1391 /* we cleared the head states previously only if the skb lacks any IP
1392 * options, see __udp_queue_rcv_skb().
1393 */
1394 if (unlikely(IPCB(skb)->opt.optlen > 0))
1395 skb_release_head_state(skb);
1391 consume_stateless_skb(skb); 1396 consume_stateless_skb(skb);
1392} 1397}
1393EXPORT_SYMBOL_GPL(skb_consume_udp); 1398EXPORT_SYMBOL_GPL(skb_consume_udp);
@@ -1779,8 +1784,12 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1779 sk_mark_napi_id_once(sk, skb); 1784 sk_mark_napi_id_once(sk, skb);
1780 } 1785 }
1781 1786
1782 /* clear all pending head states while they are hot in the cache */ 1787 /* At recvmsg() time we need skb->dst to process IP options-related
1783 skb_release_head_state(skb); 1788 * cmsg, elsewhere can we clear all pending head states while they are
1789 * hot in the cache
1790 */
1791 if (likely(IPCB(skb)->opt.optlen == 0))
1792 skb_release_head_state(skb);
1784 1793
1785 rc = __udp_enqueue_schedule_skb(sk, skb); 1794 rc = __udp_enqueue_schedule_skb(sk, skb);
1786 if (rc < 0) { 1795 if (rc < 0) {