summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-17 13:39:56 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-17 13:39:56 -0400
commit4fddbf8a99ee5a65bdd31b3ebbf5a84b9395d496 (patch)
tree671cdce9ff92bdd0a99619f995bbcea9124735ad
parent6be8e297f9bcea666ea85ac7a6cd9d52d6deaf92 (diff)
parent967c05aee439e6e5d7d805e195b3a20ef5c433d6 (diff)
Merge branch 'tcp-fixes'
Eric Dumazet says: ==================== tcp: make sack processing more robust Jonathan Looney brought to our attention multiple problems in TCP stack at the sender side. SACK processing can be abused by malicious peers to either cause overflows, or increase of memory usage. First two patches fix the immediate problems. Since the malicious peers abuse senders by advertizing a very small MSS in their SYN or SYNACK packet, the last two patches add a new sysctl so that admins can chose a higher limit for MSS clamping. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt8
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_input.c26
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c10
-rw-r--r--net/ipv4/tcp_timer.c1
12 files changed, 58 insertions, 9 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 288aa264ac26..22f6b8b1110a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -255,6 +255,14 @@ tcp_base_mss - INTEGER
255 Path MTU discovery (MTU probing). If MTU probing is enabled, 255 Path MTU discovery (MTU probing). If MTU probing is enabled,
256 this is the initial MSS used by the connection. 256 this is the initial MSS used by the connection.
257 257
258tcp_min_snd_mss - INTEGER
259 TCP SYN and SYNACK messages usually advertise an ADVMSS option,
260 as described in RFC 1122 and RFC 6691.
261 If this ADVMSS option is smaller than tcp_min_snd_mss,
262 it is silently capped to tcp_min_snd_mss.
263
264 Default : 48 (at least 8 bytes of payload per segment)
265
258tcp_congestion_control - STRING 266tcp_congestion_control - STRING
259 Set the congestion control algorithm to be used for new 267 Set the congestion control algorithm to be used for new
260 connections. The algorithm "reno" is always available, but 268 connections. The algorithm "reno" is always available, but
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 711361af9ce0..9a478a0cd3a2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -484,4 +484,8 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
484 484
485 return (user_mss && user_mss < mss) ? user_mss : mss; 485 return (user_mss && user_mss < mss) ? user_mss : mss;
486} 486}
487
488int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
489 int shiftlen);
490
487#endif /* _LINUX_TCP_H */ 491#endif /* _LINUX_TCP_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7698460a3dd1..623cfbb7b8dc 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -117,6 +117,7 @@ struct netns_ipv4 {
117#endif 117#endif
118 int sysctl_tcp_mtu_probing; 118 int sysctl_tcp_mtu_probing;
119 int sysctl_tcp_base_mss; 119 int sysctl_tcp_base_mss;
120 int sysctl_tcp_min_snd_mss;
120 int sysctl_tcp_probe_threshold; 121 int sysctl_tcp_probe_threshold;
121 u32 sysctl_tcp_probe_interval; 122 u32 sysctl_tcp_probe_interval;
122 123
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac2f53fbfa6b..582c0caa9811 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -51,6 +51,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
51 51
52#define MAX_TCP_HEADER (128 + MAX_HEADER) 52#define MAX_TCP_HEADER (128 + MAX_HEADER)
53#define MAX_TCP_OPTION_SPACE 40 53#define MAX_TCP_OPTION_SPACE 40
54#define TCP_MIN_SND_MSS 48
55#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
54 56
55/* 57/*
56 * Never offer a window over 32767 without using window scaling. Some 58 * Never offer a window over 32767 without using window scaling. Some
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 86dc24a96c90..fd42c1316d3d 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -283,6 +283,7 @@ enum
283 LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ 283 LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
284 LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ 284 LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
285 LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ 285 LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
286 LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
286 __LINUX_MIB_MAX 287 __LINUX_MIB_MAX
287}; 288};
288 289
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4370f4246e86..073273b751f8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -287,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = {
287 SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), 287 SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
288 SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), 288 SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
289 SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), 289 SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
290 SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
290 SNMP_MIB_SENTINEL 291 SNMP_MIB_SENTINEL
291}; 292};
292 293
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fa213bd8e233..b6f14af926fa 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 };
39static int ip_local_port_range_max[] = { 65535, 65535 }; 39static int ip_local_port_range_max[] = { 65535, 65535 };
40static int tcp_adv_win_scale_min = -31; 40static int tcp_adv_win_scale_min = -31;
41static int tcp_adv_win_scale_max = 31; 41static int tcp_adv_win_scale_max = 31;
42static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
43static int tcp_min_snd_mss_max = 65535;
42static int ip_privileged_port_min; 44static int ip_privileged_port_min;
43static int ip_privileged_port_max = 65535; 45static int ip_privileged_port_max = 65535;
44static int ip_ttl_min = 1; 46static int ip_ttl_min = 1;
@@ -770,6 +772,15 @@ static struct ctl_table ipv4_net_table[] = {
770 .proc_handler = proc_dointvec, 772 .proc_handler = proc_dointvec,
771 }, 773 },
772 { 774 {
775 .procname = "tcp_min_snd_mss",
776 .data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
777 .maxlen = sizeof(int),
778 .mode = 0644,
779 .proc_handler = proc_dointvec_minmax,
780 .extra1 = &tcp_min_snd_mss_min,
781 .extra2 = &tcp_min_snd_mss_max,
782 },
783 {
773 .procname = "tcp_probe_threshold", 784 .procname = "tcp_probe_threshold",
774 .data = &init_net.ipv4.sysctl_tcp_probe_threshold, 785 .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
775 .maxlen = sizeof(int), 786 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f448a288d158..7dc9ab84bb69 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3873,6 +3873,7 @@ void __init tcp_init(void)
3873 unsigned long limit; 3873 unsigned long limit;
3874 unsigned int i; 3874 unsigned int i;
3875 3875
3876 BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
3876 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > 3877 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
3877 FIELD_SIZEOF(struct sk_buff, cb)); 3878 FIELD_SIZEOF(struct sk_buff, cb));
3878 3879
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38dfc308c0fb..d95ee40df6c2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1302 TCP_SKB_CB(skb)->seq += shifted; 1302 TCP_SKB_CB(skb)->seq += shifted;
1303 1303
1304 tcp_skb_pcount_add(prev, pcount); 1304 tcp_skb_pcount_add(prev, pcount);
1305 BUG_ON(tcp_skb_pcount(skb) < pcount); 1305 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1306 tcp_skb_pcount_add(skb, -pcount); 1306 tcp_skb_pcount_add(skb, -pcount);
1307 1307
1308 /* When we're adding to gso_segs == 1, gso_size will be zero, 1308 /* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb)
1368 return !skb_headlen(skb) && skb_is_nonlinear(skb); 1368 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1369} 1369}
1370 1370
1371int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1372 int pcount, int shiftlen)
1373{
1374 /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
1375 * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
1376 * to make sure not storing more than 65535 * 8 bytes per skb,
1377 * even if current MSS is bigger.
1378 */
1379 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1380 return 0;
1381 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1382 return 0;
1383 return skb_shift(to, from, shiftlen);
1384}
1385
1371/* Try collapsing SACK blocks spanning across multiple skbs to a single 1386/* Try collapsing SACK blocks spanning across multiple skbs to a single
1372 * skb. 1387 * skb.
1373 */ 1388 */
@@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1473 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) 1488 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1474 goto fallback; 1489 goto fallback;
1475 1490
1476 if (!skb_shift(prev, skb, len)) 1491 if (!tcp_skb_shift(prev, skb, pcount, len))
1477 goto fallback; 1492 goto fallback;
1478 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) 1493 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1479 goto out; 1494 goto out;
@@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1491 goto out; 1506 goto out;
1492 1507
1493 len = skb->len; 1508 len = skb->len;
1494 if (skb_shift(prev, skb, len)) { 1509 pcount = tcp_skb_pcount(skb);
1495 pcount += tcp_skb_pcount(skb); 1510 if (tcp_skb_shift(prev, skb, pcount, len))
1496 tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), 1511 tcp_shifted_skb(sk, prev, skb, state, pcount,
1497 len, mss, 0); 1512 len, mss, 0);
1498 }
1499 1513
1500out: 1514out:
1501 return prev; 1515 return prev;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bc86f9735f45..cfa81190a1b1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2628,6 +2628,7 @@ static int __net_init tcp_sk_init(struct net *net)
2628 net->ipv4.sysctl_tcp_ecn_fallback = 1; 2628 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2629 2629
2630 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 2630 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2631 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
2631 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 2632 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2632 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2633 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2633 2634
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f429e856e263..00c01a01b547 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1296,6 +1296,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1296 if (nsize < 0) 1296 if (nsize < 0)
1297 nsize = 0; 1297 nsize = 0;
1298 1298
1299 if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
1300 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
1301 return -ENOMEM;
1302 }
1303
1299 if (skb_unclone(skb, gfp)) 1304 if (skb_unclone(skb, gfp))
1300 return -ENOMEM; 1305 return -ENOMEM;
1301 1306
@@ -1454,8 +1459,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1454 mss_now -= icsk->icsk_ext_hdr_len; 1459 mss_now -= icsk->icsk_ext_hdr_len;
1455 1460
1456 /* Then reserve room for full set of TCP options and 8 bytes of data */ 1461 /* Then reserve room for full set of TCP options and 8 bytes of data */
1457 if (mss_now < 48) 1462 mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
1458 mss_now = 48;
1459 return mss_now; 1463 return mss_now;
1460} 1464}
1461 1465
@@ -2747,7 +2751,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2747 if (next_skb_size <= skb_availroom(skb)) 2751 if (next_skb_size <= skb_availroom(skb))
2748 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), 2752 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2749 next_skb_size); 2753 next_skb_size);
2750 else if (!skb_shift(skb, next_skb, next_skb_size)) 2754 else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
2751 return false; 2755 return false;
2752 } 2756 }
2753 tcp_highest_sack_replace(sk, next_skb, skb); 2757 tcp_highest_sack_replace(sk, next_skb, skb);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5bad937ce779..c801cd37cc2a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -155,6 +155,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
155 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; 155 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
156 mss = min(net->ipv4.sysctl_tcp_base_mss, mss); 156 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
157 mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); 157 mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
158 mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
158 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); 159 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
159 } 160 }
160 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 161 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);