From 63c6f81cdde58c41da62a8d8a209592e42a0203e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Jun 2014 16:13:06 -0700 Subject: udp: ipv4: do not waste time in __udp4_lib_mcast_demux_lookup Its too easy to add thousand of UDP sockets on a particular bucket, and slow down an innocent multicast receiver. Early demux is supposed to be an optimization, we should avoid spending too much time in it. It is interesting to note __udp4_lib_demux_lookup() only tries to match first socket in the chain. 10 is the threshold we already have in __udp4_lib_lookup() to switch to secondary hash. Fixes: 421b3885bf6d5 ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Reported-by: David Held Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 185ed3e59802..d92f94b7e402 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1861,6 +1861,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); struct udp_hslot *hslot = &udp_table.hash[slot]; + /* Do not bother scanning a too big list */ + if (hslot->count > 10) + return NULL; + rcu_read_lock(); begin: count = 0; -- cgit v1.2.2 From 46fb51eb96cafb2c148b7b5119adb5e31a2bf3c4 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sat, 14 Jun 2014 23:24:03 -0700 Subject: net: Fix save software checksum complete Geert reported issues regarding checksum complete and UDP. The logic introduced in commit 7e3cead5172927732f51fde ("net: Save software checksum complete") is not correct. This patch: 1) Restores code in __skb_checksum_complete_header except for setting CHECKSUM_UNNECESSARY. This function may be calculating checksum on something less than skb->len. 2) Adds saving checksum to __skb_checksum_complete. The full packet checksum 0..skb->len is calculated without adding in pseudo header. This value is saved in skb->csum and then the pseudo header is added to that to derive the checksum for validation. 3) In both __skb_checksum_complete_header and __skb_checksum_complete, set skb->csum_valid to whether checksum of zero was computed. This allows skb_csum_unnecessary to return true without changing to CHECKSUM_UNNECESSARY which was done previously. 4) Copy new csum related bits in __copy_skb_header. Reported-by: Geert Uytterhoeven Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/datagram.c | 36 ++++++++++++++++++++++++++---------- net/core/skbuff.c | 3 +++ 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 6b1c04ca1d50..488dd1a825c0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -739,22 +739,38 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum && - !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); - - /* Save checksum complete for later use */ - skb->csum = sum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); __sum16 __skb_checksum_complete(struct sk_buff *skb) { - return __skb_checksum_complete_head(skb, skb->len); + __wsum csum; + __sum16 sum; + + csum = skb_checksum(skb, 0, skb->len, 0); + + /* skb->csum holds pseudo checksum */ + sum = csum_fold(csum_add(skb->csum, csum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + + return sum; } EXPORT_SYMBOL(__skb_checksum_complete); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bf92824af3f7..9cd5344fad73 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -689,6 +689,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ooo_okay = old->ooo_okay; new->no_fcs = old->no_fcs; new->encapsulation = old->encapsulation; + new->encap_hdr_csum = old->encap_hdr_csum; + new->csum_valid = old->csum_valid; + new->csum_complete_sw = old->csum_complete_sw; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif -- cgit v1.2.2 From b58537a1f5629bdc98a8b9dc2051ce0e952f6b4b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 15 Jun 2014 00:59:14 +0200 Subject: net: sctp: fix permissions for rto_alpha and rto_beta knobs Commit 3fd091e73b81 ("[SCTP]: Remove multiple levels of msecs to jiffies conversions.") has silently changed permissions for rto_alpha and rto_beta knobs from 0644 to 0444. The purpose of this was to discourage users from tweaking rto_alpha and rto_beta knobs in production environments since they are key to correctly compute rtt/srtt. RFC4960 under section 6.3.1. RTO Calculation says regarding rto_alpha and rto_beta under rule C3 and C4: [...] C3) When a new RTT measurement R' is made, set RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| and SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' Note: The value of SRTT used in the update to RTTVAR is its value before updating SRTT itself using the second assignment. After the computation, update RTO <- SRTT + 4 * RTTVAR. C4) When data is in flight and when allowed by rule C5 below, a new RTT measurement MUST be made each round trip. Furthermore, new RTT measurements SHOULD be made no more than once per round trip for a given destination transport address. There are two reasons for this recommendation: First, it appears that measuring more frequently often does not in practice yield any significant benefit [ALLMAN99]; second, if measurements are made more often, then the values of RTO.Alpha and RTO.Beta in rule C3 above should be adjusted so that SRTT and RTTVAR still adjust to changes at roughly the same rate (in terms of how many round trips it takes them to reflect new values) as they would if making only one measurement per round-trip and using RTO.Alpha and RTO.Beta as given in rule C3. However, the exact nature of these adjustments remains a research issue. [...] While it is discouraged to adjust rto_alpha and rto_beta and not further specified how to adjust them, the RFC also doesn't explicitly forbid it, but rather gives a RECOMMENDED default value (rto_alpha=3, rto_beta=2). We have a couple of users relying on the old permissions before they got changed. That said, if someone really has the urge to adjust them, we could allow it with a warning in the log. Fixes: 3fd091e73b81 ("[SCTP]: Remove multiple levels of msecs to jiffies conversions.") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 7e5eb7554990..dcb19592761e 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -34,6 +34,8 @@ * Sridhar Samudrala */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -46,6 +48,11 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static int rto_alpha_min = 0; +static int rto_beta_min = 0; +static int rto_alpha_max = 1000; +static int rto_beta_max = 1000; + static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) @@ -64,6 +71,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -126,15 +136,19 @@ static struct ctl_table sctp_net_table[] = { .procname = "rto_alpha_exp_divisor", .data = &init_net.sctp.rto_alpha, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = proc_sctp_do_alpha_beta, + .extra1 = &rto_alpha_min, + .extra2 = &rto_alpha_max, }, { .procname = "rto_beta_exp_divisor", .data = &init_net.sctp.rto_beta, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = proc_sctp_do_alpha_beta, + .extra1 = &rto_beta_min, + .extra2 = &rto_beta_max, }, { .procname = "max_burst", @@ -403,6 +417,16 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + pr_warn_once("Changing rto_alpha or rto_beta may lead to " + "suboptimal rtt/srtt estimations!\n"); + + return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); +} + static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v1.2.2