From 63c6f81cdde58c41da62a8d8a209592e42a0203e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Jun 2014 16:13:06 -0700
Subject: udp: ipv4: do not waste time in __udp4_lib_mcast_demux_lookup

Its too easy to add thousand of UDP sockets on a particular bucket,
and slow down an innocent multicast receiver.

Early demux is supposed to be an optimization, we should avoid spending
too much time in it.

It is interesting to note __udp4_lib_demux_lookup() only tries to
match first socket in the chain.

10 is the threshold we already have in __udp4_lib_lookup() to switch
to secondary hash.

Fixes: 421b3885bf6d5 ("udp: ipv4: Add udp early demux")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: David Held <drheld@google.com>
Cc: Shawn Bohrer <sbohrer@rgmadvisors.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 185ed3e59802..d92f94b7e402 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1861,6 +1861,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 	unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
 	struct udp_hslot *hslot = &udp_table.hash[slot];
 
+	/* Do not bother scanning a too big list */
+	if (hslot->count > 10)
+		return NULL;
+
 	rcu_read_lock();
 begin:
 	count = 0;
-- 
cgit v1.2.2


From 46fb51eb96cafb2c148b7b5119adb5e31a2bf3c4 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:24:03 -0700
Subject: net: Fix save software checksum complete

Geert reported issues regarding checksum complete and UDP.
The logic introduced in commit 7e3cead5172927732f51fde
("net: Save software checksum complete") is not correct.

This patch:
1) Restores code in __skb_checksum_complete_header except for setting
   CHECKSUM_UNNECESSARY. This function may be calculating checksum on
   something less than skb->len.
2) Adds saving checksum to __skb_checksum_complete. The full packet
   checksum 0..skb->len is calculated without adding in pseudo header.
   This value is saved in skb->csum and then the pseudo header is added
   to that to derive the checksum for validation.
3) In both __skb_checksum_complete_header and __skb_checksum_complete,
   set skb->csum_valid to whether checksum of zero was computed. This
   allows skb_csum_unnecessary to return true without changing to
   CHECKSUM_UNNECESSARY which was done previously.
4) Copy new csum related bits in __copy_skb_header.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 36 ++++++++++++++++++++++++++----------
 net/core/skbuff.c   |  3 +++
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 6b1c04ca1d50..488dd1a825c0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -739,22 +739,38 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 	__sum16 sum;
 
 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
-	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
-	    !skb->csum_complete_sw)
-		netdev_rx_csum_fault(skb->dev);
-
-	/* Save checksum complete for later use */
-	skb->csum = sum;
-	skb->ip_summed = CHECKSUM_COMPLETE;
-	skb->csum_complete_sw = 1;
-
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev);
+	}
+	skb->csum_valid = !sum;
 	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete_head);
 
 __sum16 __skb_checksum_complete(struct sk_buff *skb)
 {
-	return __skb_checksum_complete_head(skb, skb->len);
+	__wsum csum;
+	__sum16 sum;
+
+	csum = skb_checksum(skb, 0, skb->len, 0);
+
+	/* skb->csum holds pseudo checksum */
+	sum = csum_fold(csum_add(skb->csum, csum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev);
+	}
+
+	/* Save full packet checksum */
+	skb->csum = csum;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb->csum_complete_sw = 1;
+	skb->csum_valid = !sum;
+
+	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bf92824af3f7..9cd5344fad73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -689,6 +689,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->ooo_okay		= old->ooo_okay;
 	new->no_fcs		= old->no_fcs;
 	new->encapsulation	= old->encapsulation;
+	new->encap_hdr_csum	= old->encap_hdr_csum;
+	new->csum_valid		= old->csum_valid;
+	new->csum_complete_sw	= old->csum_complete_sw;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
-- 
cgit v1.2.2


From b58537a1f5629bdc98a8b9dc2051ce0e952f6b4b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sun, 15 Jun 2014 00:59:14 +0200
Subject: net: sctp: fix permissions for rto_alpha and rto_beta knobs

Commit 3fd091e73b81 ("[SCTP]: Remove multiple levels of msecs
to jiffies conversions.") has silently changed permissions for
rto_alpha and rto_beta knobs from 0644 to 0444. The purpose of
this was to discourage users from tweaking rto_alpha and
rto_beta knobs in production environments since they are key
to correctly compute rtt/srtt.

RFC4960 under section 6.3.1. RTO Calculation says regarding
rto_alpha and rto_beta under rule C3 and C4:

  [...]
  C3)  When a new RTT measurement R' is made, set

       RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|

       and

       SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'

       Note: The value of SRTT used in the update to RTTVAR
       is its value before updating SRTT itself using the
       second assignment. After the computation, update
       RTO <- SRTT + 4 * RTTVAR.

  C4)  When data is in flight and when allowed by rule C5
       below, a new RTT measurement MUST be made each round
       trip. Furthermore, new RTT measurements SHOULD be
       made no more than once per round trip for a given
       destination transport address. There are two reasons
       for this recommendation: First, it appears that
       measuring more frequently often does not in practice
       yield any significant benefit [ALLMAN99]; second,
       if measurements are made more often, then the values
       of RTO.Alpha and RTO.Beta in rule C3 above should be
       adjusted so that SRTT and RTTVAR still adjust to
       changes at roughly the same rate (in terms of how many
       round trips it takes them to reflect new values) as
       they would if making only one measurement per
       round-trip and using RTO.Alpha and RTO.Beta as given
       in rule C3. However, the exact nature of these
       adjustments remains a research issue.
  [...]

While it is discouraged to adjust rto_alpha and rto_beta
and not further specified how to adjust them, the RFC also
doesn't explicitly forbid it, but rather gives a RECOMMENDED
default value (rto_alpha=3, rto_beta=2). We have a couple
of users relying on the old permissions before they got
changed. That said, if someone really has the urge to adjust
them, we could allow it with a warning in the log.

Fixes: 3fd091e73b81 ("[SCTP]: Remove multiple levels of msecs to jiffies conversions.")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7e5eb7554990..dcb19592761e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -34,6 +34,8 @@
  *    Sridhar Samudrala     <sri@us.ibm.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <net/sctp/structs.h>
 #include <net/sctp/sctp.h>
 #include <linux/sysctl.h>
@@ -46,6 +48,11 @@ static int sack_timer_min = 1;
 static int sack_timer_max = 500;
 static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
 static int rwnd_scale_max = 16;
+static int rto_alpha_min = 0;
+static int rto_beta_min = 0;
+static int rto_alpha_max = 1000;
+static int rto_beta_max = 1000;
+
 static unsigned long max_autoclose_min = 0;
 static unsigned long max_autoclose_max =
 	(MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
@@ -64,6 +71,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
 static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp,
 				loff_t *ppos);
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+				   void __user *buffer, size_t *lenp,
+				   loff_t *ppos);
 static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
 			     void __user *buffer, size_t *lenp,
 			     loff_t *ppos);
@@ -126,15 +136,19 @@ static struct ctl_table sctp_net_table[] = {
 		.procname	= "rto_alpha_exp_divisor",
 		.data		= &init_net.sctp.rto_alpha,
 		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.mode		= 0644,
+		.proc_handler	= proc_sctp_do_alpha_beta,
+		.extra1		= &rto_alpha_min,
+		.extra2		= &rto_alpha_max,
 	},
 	{
 		.procname	= "rto_beta_exp_divisor",
 		.data		= &init_net.sctp.rto_beta,
 		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.mode		= 0644,
+		.proc_handler	= proc_sctp_do_alpha_beta,
+		.extra1		= &rto_beta_min,
+		.extra2		= &rto_beta_max,
 	},
 	{
 		.procname	= "max_burst",
@@ -403,6 +417,16 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+				   void __user *buffer, size_t *lenp,
+				   loff_t *ppos)
+{
+	pr_warn_once("Changing rto_alpha or rto_beta may lead to "
+		     "suboptimal rtt/srtt estimations!\n");
+
+	return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
 static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
 			     void __user *buffer, size_t *lenp,
 			     loff_t *ppos)
-- 
cgit v1.2.2