aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-17 15:18:12 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-17 15:18:12 -0400
commit9f2dbdd9b11d40f5fe0749eb91cd1cfc86fde575 (patch)
tree9c5c365b44d7e6f4e7701d8235402b914ce25b79
parentc24973957975403521ca76a776c2dfd12fbe9add (diff)
parent7970ddc8f9ffe149b392975da60739ccd1796dea (diff)
Merge branch 'listener_refactor_part_11'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 11 Before inserting request sockets into general (ehash) table, we need to prepare netfilter to cope with them, as they are not full sockets. I'll later change xt_socket to get full support, including for request sockets (NEW_SYN_RECV) Save 8 bytes in inet_request_sock on 64bit arches. We'll soon add a pointer to the listener socket. I included two TCP changes in this patch series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_sock.h11
-rw-r--r--include/net/tcp.h57
-rw-r--r--net/ipv4/tcp_input.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_log_common.c2
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue_core.c2
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/xt_TPROXY.c18
-rw-r--r--net/netfilter/xt_socket.c34
10 files changed, 102 insertions, 85 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 3d8c09abb097..c9ed91891887 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -94,11 +94,11 @@ struct inet_request_sock {
94 acked : 1, 94 acked : 1,
95 no_srccheck: 1; 95 no_srccheck: 1;
96 kmemcheck_bitfield_end(flags); 96 kmemcheck_bitfield_end(flags);
97 u32 ir_mark;
97 union { 98 union {
98 struct ip_options_rcu *opt; 99 struct ip_options_rcu *opt;
99 struct sk_buff *pktopts; 100 struct sk_buff *pktopts;
100 }; 101 };
101 u32 ir_mark;
102}; 102};
103 103
104static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) 104static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -106,13 +106,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
106 return (struct inet_request_sock *)sk; 106 return (struct inet_request_sock *)sk;
107} 107}
108 108
109static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) 109static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
110{ 110{
111 if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { 111 if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
112 return skb->mark; 112 return skb->mark;
113 } else { 113
114 return sk->sk_mark; 114 return sk->sk_mark;
115 }
116} 115}
117 116
118struct inet_cork { 117struct inet_cork {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2e11e38205c2..5b29835b81d8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1137,31 +1137,6 @@ static inline int tcp_full_space(const struct sock *sk)
1137 return tcp_win_from_space(sk->sk_rcvbuf); 1137 return tcp_win_from_space(sk->sk_rcvbuf);
1138} 1138}
1139 1139
1140static inline void tcp_openreq_init(struct request_sock *req,
1141 struct tcp_options_received *rx_opt,
1142 struct sk_buff *skb, struct sock *sk)
1143{
1144 struct inet_request_sock *ireq = inet_rsk(req);
1145
1146 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
1147 req->cookie_ts = 0;
1148 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1149 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
1150 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1151 tcp_rsk(req)->last_oow_ack_time = 0;
1152 req->mss = rx_opt->mss_clamp;
1153 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1154 ireq->tstamp_ok = rx_opt->tstamp_ok;
1155 ireq->sack_ok = rx_opt->sack_ok;
1156 ireq->snd_wscale = rx_opt->snd_wscale;
1157 ireq->wscale_ok = rx_opt->wscale_ok;
1158 ireq->acked = 0;
1159 ireq->ecn_ok = 0;
1160 ireq->ir_rmt_port = tcp_hdr(skb)->source;
1161 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
1162 ireq->ir_mark = inet_request_mark(sk, skb);
1163}
1164
1165extern void tcp_openreq_init_rwin(struct request_sock *req, 1140extern void tcp_openreq_init_rwin(struct request_sock *req,
1166 struct sock *sk, struct dst_entry *dst); 1141 struct sock *sk, struct dst_entry *dst);
1167 1142
@@ -1241,36 +1216,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1241 return true; 1216 return true;
1242} 1217}
1243 1218
1244/* Return true if we're currently rate-limiting out-of-window ACKs and 1219bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1245 * thus shouldn't send a dupack right now. We rate-limit dupacks in 1220 int mib_idx, u32 *last_oow_ack_time);
1246 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
1247 * attacks that send repeated SYNs or ACKs for the same connection. To
1248 * do this, we do not send a duplicate SYNACK or ACK if the remote
1249 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
1250 */
1251static inline bool tcp_oow_rate_limited(struct net *net,
1252 const struct sk_buff *skb,
1253 int mib_idx, u32 *last_oow_ack_time)
1254{
1255 /* Data packets without SYNs are not likely part of an ACK loop. */
1256 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
1257 !tcp_hdr(skb)->syn)
1258 goto not_rate_limited;
1259
1260 if (*last_oow_ack_time) {
1261 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1262
1263 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1264 NET_INC_STATS_BH(net, mib_idx);
1265 return true; /* rate-limited: don't send yet! */
1266 }
1267 }
1268
1269 *last_oow_ack_time = tcp_time_stamp;
1270
1271not_rate_limited:
1272 return false; /* not rate-limited: go ahead, send dupack now! */
1273}
1274 1221
1275static inline void tcp_mib_init(struct net *net) 1222static inline void tcp_mib_init(struct net *net)
1276{ 1223{
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 717d437b6ce1..7257eb206c07 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3321 return flag; 3321 return flag;
3322} 3322}
3323 3323
3324/* Return true if we're currently rate-limiting out-of-window ACKs and
3325 * thus shouldn't send a dupack right now. We rate-limit dupacks in
3326 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
3327 * attacks that send repeated SYNs or ACKs for the same connection. To
3328 * do this, we do not send a duplicate SYNACK or ACK if the remote
3329 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
3330 */
3331bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3332 int mib_idx, u32 *last_oow_ack_time)
3333{
3334 /* Data packets without SYNs are not likely part of an ACK loop. */
3335 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3336 !tcp_hdr(skb)->syn)
3337 goto not_rate_limited;
3338
3339 if (*last_oow_ack_time) {
3340 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3341
3342 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3343 NET_INC_STATS_BH(net, mib_idx);
3344 return true; /* rate-limited: don't send yet! */
3345 }
3346 }
3347
3348 *last_oow_ack_time = tcp_time_stamp;
3349
3350not_rate_limited:
3351 return false; /* not rate-limited: go ahead, send dupack now! */
3352}
3353
3324/* RFC 5961 7 [ACK Throttling] */ 3354/* RFC 5961 7 [ACK Throttling] */
3325static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) 3355static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3326{ 3356{
@@ -5912,6 +5942,31 @@ static void tcp_ecn_create_request(struct request_sock *req,
5912 inet_rsk(req)->ecn_ok = 1; 5942 inet_rsk(req)->ecn_ok = 1;
5913} 5943}
5914 5944
5945static void tcp_openreq_init(struct request_sock *req,
5946 const struct tcp_options_received *rx_opt,
5947 struct sk_buff *skb, const struct sock *sk)
5948{
5949 struct inet_request_sock *ireq = inet_rsk(req);
5950
5951 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
5952 req->cookie_ts = 0;
5953 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
5954 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5955 tcp_rsk(req)->snt_synack = tcp_time_stamp;
5956 tcp_rsk(req)->last_oow_ack_time = 0;
5957 req->mss = rx_opt->mss_clamp;
5958 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
5959 ireq->tstamp_ok = rx_opt->tstamp_ok;
5960 ireq->sack_ok = rx_opt->sack_ok;
5961 ireq->snd_wscale = rx_opt->snd_wscale;
5962 ireq->wscale_ok = rx_opt->wscale_ok;
5963 ireq->acked = 0;
5964 ireq->ecn_ok = 0;
5965 ireq->ir_rmt_port = tcp_hdr(skb)->source;
5966 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
5967 ireq->ir_mark = inet_request_mark(sk, skb);
5968}
5969
5915int tcp_conn_request(struct request_sock_ops *rsk_ops, 5970int tcp_conn_request(struct request_sock_ops *rsk_ops,
5916 const struct tcp_request_sock_ops *af_ops, 5971 const struct tcp_request_sock_ops *af_ops,
5917 struct sock *sk, struct sk_buff *skb) 5972 struct sock *sk, struct sk_buff *skb)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..f35c15b0de6b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
209 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
210 struct rtable *ort = skb_rtable(skb); 210 struct rtable *ort = skb_rtable(skb);
211 211
212 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) 212 if (!skb->dev && sk && sk_fullsock(sk))
213 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 213 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
214} 214}
215 215
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a2233e77cf39..2631876ac55b 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
133 133
134void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) 134void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
135{ 135{
136 if (!sk || sk->sk_state == TCP_TIME_WAIT) 136 if (!sk || !sk_fullsock(sk))
137 return; 137 return;
138 138
139 read_lock_bh(&sk->sk_callback_lock); 139 read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11d85b3813f2..61d04bf9be2b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -539,7 +539,7 @@ __build_packet_message(struct nfnl_log_net *log,
539 539
540 /* UID */ 540 /* UID */
541 sk = skb->sk; 541 sk = skb->sk;
542 if (sk && sk->sk_state != TCP_TIME_WAIT) { 542 if (sk && sk_fullsock(sk)) {
543 read_lock_bh(&sk->sk_callback_lock); 543 read_lock_bh(&sk->sk_callback_lock);
544 if (sk->sk_socket && sk->sk_socket->file) { 544 if (sk->sk_socket && sk->sk_socket->file) {
545 struct file *file = sk->sk_socket->file; 545 struct file *file = sk->sk_socket->file;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0db8515e76da..86ee8b05adae 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
257{ 257{
258 const struct cred *cred; 258 const struct cred *cred;
259 259
260 if (sk->sk_state == TCP_TIME_WAIT) 260 if (!sk_fullsock(sk))
261 return 0; 261 return 0;
262 262
263 read_lock_bh(&sk->sk_callback_lock); 263 read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e99911eda915..abe68119a76c 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -83,7 +83,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
83 *(u16 *)dest->data = out->type; 83 *(u16 *)dest->data = out->type;
84 break; 84 break;
85 case NFT_META_SKUID: 85 case NFT_META_SKUID:
86 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) 86 if (skb->sk == NULL || !sk_fullsock(skb->sk))
87 goto err; 87 goto err;
88 88
89 read_lock_bh(&skb->sk->sk_callback_lock); 89 read_lock_bh(&skb->sk->sk_callback_lock);
@@ -99,7 +99,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
99 read_unlock_bh(&skb->sk->sk_callback_lock); 99 read_unlock_bh(&skb->sk->sk_callback_lock);
100 break; 100 break;
101 case NFT_META_SKGID: 101 case NFT_META_SKGID:
102 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) 102 if (skb->sk == NULL || !sk_fullsock(skb->sk))
103 goto err; 103 goto err;
104 104
105 read_lock_bh(&skb->sk->sk_callback_lock); 105 read_lock_bh(&skb->sk->sk_callback_lock);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ef8a926752a9..165b77ce9aa9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
42 42
43static bool tproxy_sk_is_transparent(struct sock *sk) 43static bool tproxy_sk_is_transparent(struct sock *sk)
44{ 44{
45 if (sk->sk_state != TCP_TIME_WAIT) { 45 switch (sk->sk_state) {
46 if (inet_sk(sk)->transparent) 46 case TCP_TIME_WAIT:
47 return true;
48 sock_put(sk);
49 } else {
50 if (inet_twsk(sk)->tw_transparent) 47 if (inet_twsk(sk)->tw_transparent)
51 return true; 48 return true;
52 inet_twsk_put(inet_twsk(sk)); 49 break;
50 case TCP_NEW_SYN_RECV:
51 if (inet_rsk(inet_reqsk(sk))->no_srccheck)
52 return true;
53 break;
54 default:
55 if (inet_sk(sk)->transparent)
56 return true;
53 } 57 }
58
59 sock_gen_put(sk);
54 return false; 60 return false;
55} 61}
56 62
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 13332dbf291d..895534e87a47 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -129,6 +129,20 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
129 return NULL; 129 return NULL;
130} 130}
131 131
132static bool xt_socket_sk_is_transparent(struct sock *sk)
133{
134 switch (sk->sk_state) {
135 case TCP_TIME_WAIT:
136 return inet_twsk(sk)->tw_transparent;
137
138 case TCP_NEW_SYN_RECV:
139 return inet_rsk(inet_reqsk(sk))->no_srccheck;
140
141 default:
142 return inet_sk(sk)->transparent;
143 }
144}
145
132static bool 146static bool
133socket_match(const struct sk_buff *skb, struct xt_action_param *par, 147socket_match(const struct sk_buff *skb, struct xt_action_param *par,
134 const struct xt_socket_mtinfo1 *info) 148 const struct xt_socket_mtinfo1 *info)
@@ -195,16 +209,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
195 * unless XT_SOCKET_NOWILDCARD is set 209 * unless XT_SOCKET_NOWILDCARD is set
196 */ 210 */
197 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && 211 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
198 sk->sk_state != TCP_TIME_WAIT && 212 sk_fullsock(sk) &&
199 inet_sk(sk)->inet_rcv_saddr == 0); 213 inet_sk(sk)->inet_rcv_saddr == 0);
200 214
201 /* Ignore non-transparent sockets, 215 /* Ignore non-transparent sockets,
202 if XT_SOCKET_TRANSPARENT is used */ 216 * if XT_SOCKET_TRANSPARENT is used
217 */
203 if (info->flags & XT_SOCKET_TRANSPARENT) 218 if (info->flags & XT_SOCKET_TRANSPARENT)
204 transparent = ((sk->sk_state != TCP_TIME_WAIT && 219 transparent = xt_socket_sk_is_transparent(sk);
205 inet_sk(sk)->transparent) ||
206 (sk->sk_state == TCP_TIME_WAIT &&
207 inet_twsk(sk)->tw_transparent));
208 220
209 if (sk != skb->sk) 221 if (sk != skb->sk)
210 sock_gen_put(sk); 222 sock_gen_put(sk);
@@ -363,16 +375,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
363 * unless XT_SOCKET_NOWILDCARD is set 375 * unless XT_SOCKET_NOWILDCARD is set
364 */ 376 */
365 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && 377 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
366 sk->sk_state != TCP_TIME_WAIT && 378 sk_fullsock(sk) &&
367 ipv6_addr_any(&sk->sk_v6_rcv_saddr)); 379 ipv6_addr_any(&sk->sk_v6_rcv_saddr));
368 380
369 /* Ignore non-transparent sockets, 381 /* Ignore non-transparent sockets,
370 if XT_SOCKET_TRANSPARENT is used */ 382 * if XT_SOCKET_TRANSPARENT is used
383 */
371 if (info->flags & XT_SOCKET_TRANSPARENT) 384 if (info->flags & XT_SOCKET_TRANSPARENT)
372 transparent = ((sk->sk_state != TCP_TIME_WAIT && 385 transparent = xt_socket_sk_is_transparent(sk);
373 inet_sk(sk)->transparent) ||
374 (sk->sk_state == TCP_TIME_WAIT &&
375 inet_twsk(sk)->tw_transparent));
376 386
377 if (sk != skb->sk) 387 if (sk != skb->sk)
378 sock_gen_put(sk); 388 sock_gen_put(sk);