aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-28 21:22:46 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-28 21:22:46 -0500
commit457740a903db30b14d53060f4e10d8cdecf464ac (patch)
tree18dd5d2200031fba1d4784e1376ca2b81ee88523 /net/ipv4
parent6b2e2829c11ea677aa97ecfe95d9544aa0208b8c (diff)
parent8223967fe0b8eb2448cca5cfe3c64a0838e6f60d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-01-26 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) A number of extensions to tcp-bpf, from Lawrence. - direct R or R/W access to many tcp_sock fields via bpf_sock_ops - passing up to 3 arguments to bpf_sock_ops functions - tcp_sock field bpf_sock_ops_cb_flags for controlling callbacks - optionally calling bpf_sock_ops program when RTO fires - optionally calling bpf_sock_ops program when packet is retransmitted - optionally calling bpf_sock_ops program when TCP state changes - access to tclass and sk_txhash - new selftest 2) div/mod exception handling, from Daniel. One of the ugly leftovers from the early eBPF days is that div/mod operations based on registers have a hard-coded src_reg == 0 test in the interpreter as well as in JIT code generators that would return from the BPF program with exit code 0. This was basically adopted from cBPF interpreter for historical reasons. There are multiple reasons why this is very suboptimal and prone to bugs. To name one: the return code mapping for such abnormal program exit of 0 does not always match with a suitable program type's exit code mapping. For example, '0' in tc means action 'ok' where the packet gets passed further up the stack, which is just undesirable for such cases (e.g. when implementing policy) and also does not match with other program types. After considering _four_ different ways to address the problem, we adapt the same behavior as on some major archs like ARMv8: X div 0 results in 0, and X mod 0 results in X. aarch64 and aarch32 ISA do not generate any traps or otherwise aborts of program execution for unsigned divides. Given the options, it seems the most suitable from all of them, also since major archs have similar schemes in place. Given this is all in the realm of undefined behavior, we still have the option to adapt if deemed necessary. 3) sockmap sample refactoring, from John. 4) lpm map get_next_key fixes, from Yonghong. 5) test cleanups, from Alexei and Prashant. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c7
4 files changed, 38 insertions, 3 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d7cf861bf699..f013ddc191e0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -463,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
463 tcp_mtup_init(sk); 463 tcp_mtup_init(sk);
464 icsk->icsk_af_ops->rebuild_header(sk); 464 icsk->icsk_af_ops->rebuild_header(sk);
465 tcp_init_metrics(sk); 465 tcp_init_metrics(sk);
466 tcp_call_bpf(sk, bpf_op); 466 tcp_call_bpf(sk, bpf_op, 0, NULL);
467 tcp_init_congestion_control(sk); 467 tcp_init_congestion_control(sk);
468 tcp_init_buffer_space(sk); 468 tcp_init_buffer_space(sk);
469} 469}
@@ -2042,6 +2042,30 @@ void tcp_set_state(struct sock *sk, int state)
2042{ 2042{
2043 int oldstate = sk->sk_state; 2043 int oldstate = sk->sk_state;
2044 2044
2045 /* We defined a new enum for TCP states that are exported in BPF
2046 * so as not force the internal TCP states to be frozen. The
2047 * following checks will detect if an internal state value ever
2048 * differs from the BPF value. If this ever happens, then we will
2049 * need to remap the internal value to the BPF value before calling
2050 * tcp_call_bpf_2arg.
2051 */
2052 BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
2053 BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
2054 BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
2055 BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
2056 BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
2057 BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
2058 BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
2059 BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
2060 BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
2061 BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
2062 BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
2063 BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
2064 BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
2065
2066 if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
2067 tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
2068
2045 switch (state) { 2069 switch (state) {
2046 case TCP_ESTABLISHED: 2070 case TCP_ESTABLISHED:
2047 if (oldstate != TCP_ESTABLISHED) 2071 if (oldstate != TCP_ESTABLISHED)
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..ddbce73edae8 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
146 * within a datacenter, where we have reasonable estimates of 146 * within a datacenter, where we have reasonable estimates of
147 * RTTs 147 * RTTs
148 */ 148 */
149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); 149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
150 if (base_rtt > 0) { 150 if (base_rtt > 0) {
151 ca->nv_base_rtt = base_rtt; 151 ca->nv_base_rtt = base_rtt;
152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ 152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95461f02ac9a..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2905,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2905 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2905 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2906 } 2906 }
2907 2907
2908 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2909 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2910 TCP_SKB_CB(skb)->seq, segs, err);
2911
2908 if (likely(!err)) { 2912 if (likely(!err)) {
2909 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2913 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2910 trace_tcp_retransmit_skb(sk, skb); 2914 trace_tcp_retransmit_skb(sk, skb);
@@ -3469,7 +3473,7 @@ int tcp_connect(struct sock *sk)
3469 struct sk_buff *buff; 3473 struct sk_buff *buff;
3470 int err; 3474 int err;
3471 3475
3472 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); 3476 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3473 3477
3474 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 3478 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3475 return -EHOSTUNREACH; /* Routing failure or similar. */ 3479 return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6db3124cdbda..257abdde23b0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -213,11 +213,18 @@ static int tcp_write_timeout(struct sock *sk)
213 icsk->icsk_user_timeout); 213 icsk->icsk_user_timeout);
214 } 214 }
215 tcp_fastopen_active_detect_blackhole(sk, expired); 215 tcp_fastopen_active_detect_blackhole(sk, expired);
216
217 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
218 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
219 icsk->icsk_retransmits,
220 icsk->icsk_rto, (int)expired);
221
216 if (expired) { 222 if (expired) {
217 /* Has it gone just too far? */ 223 /* Has it gone just too far? */
218 tcp_write_err(sk); 224 tcp_write_err(sk);
219 return 1; 225 return 1;
220 } 226 }
227
221 return 0; 228 return 0;
222} 229}
223 230