aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-17 15:18:12 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-17 15:18:12 -0400
commit9f2dbdd9b11d40f5fe0749eb91cd1cfc86fde575 (patch)
tree9c5c365b44d7e6f4e7701d8235402b914ce25b79 /net
parentc24973957975403521ca76a776c2dfd12fbe9add (diff)
parent7970ddc8f9ffe149b392975da60739ccd1796dea (diff)
Merge branch 'listener_refactor_part_11'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 11 Before inserting request sockets into general (ehash) table, we need to prepare netfilter to cope with them, as they are not full sockets. I'll later change xt_socket to get full support, including for request sockets (NEW_SYN_RECV) Save 8 bytes in inet_request_sock on 64bit arches. We'll soon add a pointer to the listener socket. I included two TCP changes in this patch series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_log_common.c2
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue_core.c2
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/xt_TPROXY.c18
-rw-r--r--net/netfilter/xt_socket.c34
8 files changed, 95 insertions, 24 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 717d437b6ce1..7257eb206c07 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3321 return flag; 3321 return flag;
3322} 3322}
3323 3323
3324/* Return true if we're currently rate-limiting out-of-window ACKs and
3325 * thus shouldn't send a dupack right now. We rate-limit dupacks in
3326 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
3327 * attacks that send repeated SYNs or ACKs for the same connection. To
3328 * do this, we do not send a duplicate SYNACK or ACK if the remote
3329 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
3330 */
3331bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3332 int mib_idx, u32 *last_oow_ack_time)
3333{
3334 /* Data packets without SYNs are not likely part of an ACK loop. */
3335 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3336 !tcp_hdr(skb)->syn)
3337 goto not_rate_limited;
3338
3339 if (*last_oow_ack_time) {
3340 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3341
3342 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3343 NET_INC_STATS_BH(net, mib_idx);
3344 return true; /* rate-limited: don't send yet! */
3345 }
3346 }
3347
3348 *last_oow_ack_time = tcp_time_stamp;
3349
3350not_rate_limited:
3351 return false; /* not rate-limited: go ahead, send dupack now! */
3352}
3353
3324/* RFC 5961 7 [ACK Throttling] */ 3354/* RFC 5961 7 [ACK Throttling] */
3325static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) 3355static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3326{ 3356{
@@ -5912,6 +5942,31 @@ static void tcp_ecn_create_request(struct request_sock *req,
5912 inet_rsk(req)->ecn_ok = 1; 5942 inet_rsk(req)->ecn_ok = 1;
5913} 5943}
5914 5944
5945static void tcp_openreq_init(struct request_sock *req,
5946 const struct tcp_options_received *rx_opt,
5947 struct sk_buff *skb, const struct sock *sk)
5948{
5949 struct inet_request_sock *ireq = inet_rsk(req);
5950
5951 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
5952 req->cookie_ts = 0;
5953 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
5954 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5955 tcp_rsk(req)->snt_synack = tcp_time_stamp;
5956 tcp_rsk(req)->last_oow_ack_time = 0;
5957 req->mss = rx_opt->mss_clamp;
5958 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
5959 ireq->tstamp_ok = rx_opt->tstamp_ok;
5960 ireq->sack_ok = rx_opt->sack_ok;
5961 ireq->snd_wscale = rx_opt->snd_wscale;
5962 ireq->wscale_ok = rx_opt->wscale_ok;
5963 ireq->acked = 0;
5964 ireq->ecn_ok = 0;
5965 ireq->ir_rmt_port = tcp_hdr(skb)->source;
5966 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
5967 ireq->ir_mark = inet_request_mark(sk, skb);
5968}
5969
5915int tcp_conn_request(struct request_sock_ops *rsk_ops, 5970int tcp_conn_request(struct request_sock_ops *rsk_ops,
5916 const struct tcp_request_sock_ops *af_ops, 5971 const struct tcp_request_sock_ops *af_ops,
5917 struct sock *sk, struct sk_buff *skb) 5972 struct sock *sk, struct sk_buff *skb)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..f35c15b0de6b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
209 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
210 struct rtable *ort = skb_rtable(skb); 210 struct rtable *ort = skb_rtable(skb);
211 211
212 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) 212 if (!skb->dev && sk && sk_fullsock(sk))
213 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 213 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
214} 214}
215 215
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a2233e77cf39..2631876ac55b 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
133 133
134void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) 134void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
135{ 135{
136 if (!sk || sk->sk_state == TCP_TIME_WAIT) 136 if (!sk || !sk_fullsock(sk))
137 return; 137 return;
138 138
139 read_lock_bh(&sk->sk_callback_lock); 139 read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11d85b3813f2..61d04bf9be2b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -539,7 +539,7 @@ __build_packet_message(struct nfnl_log_net *log,
539 539
540 /* UID */ 540 /* UID */
541 sk = skb->sk; 541 sk = skb->sk;
542 if (sk && sk->sk_state != TCP_TIME_WAIT) { 542 if (sk && sk_fullsock(sk)) {
543 read_lock_bh(&sk->sk_callback_lock); 543 read_lock_bh(&sk->sk_callback_lock);
544 if (sk->sk_socket && sk->sk_socket->file) { 544 if (sk->sk_socket && sk->sk_socket->file) {
545 struct file *file = sk->sk_socket->file; 545 struct file *file = sk->sk_socket->file;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0db8515e76da..86ee8b05adae 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
257{ 257{
258 const struct cred *cred; 258 const struct cred *cred;
259 259
260 if (sk->sk_state == TCP_TIME_WAIT) 260 if (!sk_fullsock(sk))
261 return 0; 261 return 0;
262 262
263 read_lock_bh(&sk->sk_callback_lock); 263 read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e99911eda915..abe68119a76c 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -83,7 +83,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
83 *(u16 *)dest->data = out->type; 83 *(u16 *)dest->data = out->type;
84 break; 84 break;
85 case NFT_META_SKUID: 85 case NFT_META_SKUID:
86 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) 86 if (skb->sk == NULL || !sk_fullsock(skb->sk))
87 goto err; 87 goto err;
88 88
89 read_lock_bh(&skb->sk->sk_callback_lock); 89 read_lock_bh(&skb->sk->sk_callback_lock);
@@ -99,7 +99,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
99 read_unlock_bh(&skb->sk->sk_callback_lock); 99 read_unlock_bh(&skb->sk->sk_callback_lock);
100 break; 100 break;
101 case NFT_META_SKGID: 101 case NFT_META_SKGID:
102 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) 102 if (skb->sk == NULL || !sk_fullsock(skb->sk))
103 goto err; 103 goto err;
104 104
105 read_lock_bh(&skb->sk->sk_callback_lock); 105 read_lock_bh(&skb->sk->sk_callback_lock);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ef8a926752a9..165b77ce9aa9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
42 42
43static bool tproxy_sk_is_transparent(struct sock *sk) 43static bool tproxy_sk_is_transparent(struct sock *sk)
44{ 44{
45 if (sk->sk_state != TCP_TIME_WAIT) { 45 switch (sk->sk_state) {
46 if (inet_sk(sk)->transparent) 46 case TCP_TIME_WAIT:
47 return true;
48 sock_put(sk);
49 } else {
50 if (inet_twsk(sk)->tw_transparent) 47 if (inet_twsk(sk)->tw_transparent)
51 return true; 48 return true;
52 inet_twsk_put(inet_twsk(sk)); 49 break;
50 case TCP_NEW_SYN_RECV:
51 if (inet_rsk(inet_reqsk(sk))->no_srccheck)
52 return true;
53 break;
54 default:
55 if (inet_sk(sk)->transparent)
56 return true;
53 } 57 }
58
59 sock_gen_put(sk);
54 return false; 60 return false;
55} 61}
56 62
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 13332dbf291d..895534e87a47 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -129,6 +129,20 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
129 return NULL; 129 return NULL;
130} 130}
131 131
132static bool xt_socket_sk_is_transparent(struct sock *sk)
133{
134 switch (sk->sk_state) {
135 case TCP_TIME_WAIT:
136 return inet_twsk(sk)->tw_transparent;
137
138 case TCP_NEW_SYN_RECV:
139 return inet_rsk(inet_reqsk(sk))->no_srccheck;
140
141 default:
142 return inet_sk(sk)->transparent;
143 }
144}
145
132static bool 146static bool
133socket_match(const struct sk_buff *skb, struct xt_action_param *par, 147socket_match(const struct sk_buff *skb, struct xt_action_param *par,
134 const struct xt_socket_mtinfo1 *info) 148 const struct xt_socket_mtinfo1 *info)
@@ -195,16 +209,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
195 * unless XT_SOCKET_NOWILDCARD is set 209 * unless XT_SOCKET_NOWILDCARD is set
196 */ 210 */
197 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && 211 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
198 sk->sk_state != TCP_TIME_WAIT && 212 sk_fullsock(sk) &&
199 inet_sk(sk)->inet_rcv_saddr == 0); 213 inet_sk(sk)->inet_rcv_saddr == 0);
200 214
201 /* Ignore non-transparent sockets, 215 /* Ignore non-transparent sockets,
202 if XT_SOCKET_TRANSPARENT is used */ 216 * if XT_SOCKET_TRANSPARENT is used
217 */
203 if (info->flags & XT_SOCKET_TRANSPARENT) 218 if (info->flags & XT_SOCKET_TRANSPARENT)
204 transparent = ((sk->sk_state != TCP_TIME_WAIT && 219 transparent = xt_socket_sk_is_transparent(sk);
205 inet_sk(sk)->transparent) ||
206 (sk->sk_state == TCP_TIME_WAIT &&
207 inet_twsk(sk)->tw_transparent));
208 220
209 if (sk != skb->sk) 221 if (sk != skb->sk)
210 sock_gen_put(sk); 222 sock_gen_put(sk);
@@ -363,16 +375,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
363 * unless XT_SOCKET_NOWILDCARD is set 375 * unless XT_SOCKET_NOWILDCARD is set
364 */ 376 */
365 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && 377 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
366 sk->sk_state != TCP_TIME_WAIT && 378 sk_fullsock(sk) &&
367 ipv6_addr_any(&sk->sk_v6_rcv_saddr)); 379 ipv6_addr_any(&sk->sk_v6_rcv_saddr));
368 380
369 /* Ignore non-transparent sockets, 381 /* Ignore non-transparent sockets,
370 if XT_SOCKET_TRANSPARENT is used */ 382 * if XT_SOCKET_TRANSPARENT is used
383 */
371 if (info->flags & XT_SOCKET_TRANSPARENT) 384 if (info->flags & XT_SOCKET_TRANSPARENT)
372 transparent = ((sk->sk_state != TCP_TIME_WAIT && 385 transparent = xt_socket_sk_is_transparent(sk);
373 inet_sk(sk)->transparent) ||
374 (sk->sk_state == TCP_TIME_WAIT &&
375 inet_twsk(sk)->tw_transparent));
376 386
377 if (sk != skb->sk) 387 if (sk != skb->sk)
378 sock_gen_put(sk); 388 sock_gen_put(sk);