From f291196979ca80cdef199ca2b55e2758e8c23a0d Mon Sep 17 00:00:00 2001 From: Herbert Xu ~{PmVHI~} Date: Mon, 5 Jun 2006 15:03:37 -0700 Subject: [TCP]: Avoid skb_pull if possible when trimming head Trimming the head of an skb by calling skb_pull can cause the packet to become unaligned if the length pulled is odd. Since the length is entirely arbitrary for a FIN packet carrying data, this is actually quite common. Unaligned data is not the end of the world, but we should avoid it if it's easily done. In this case it is trivial. Since we're discarding all of the head data it doesn't matter whether we move skb->data forward or back. However, it is still possible to have unaligned skb->data in general. So network drivers should be prepared to handle it instead of crashing. This patch also adds an unlikely marking on len < headlen since partial ACKs on head data are extremely rare in the wild. As the return value of __pskb_trim_head is no longer ever NULL that has been removed. Signed-off-by: Herbert Xu ~{PmV>HI~} Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 743016baa048..f33c9dddaa12 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -642,7 +642,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) +static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; @@ -667,7 +667,6 @@ static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) skb->tail = skb->data; skb->data_len -= len; skb->len = skb->data_len; - return skb->tail; } int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) @@ -676,12 +675,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - if (len <= skb_headlen(skb)) { + /* If len == headlen, we avoid __skb_pull to preserve alignment. */ + if (unlikely(len < skb_headlen(skb))) __skb_pull(skb, len); - } else { - if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL) - return -ENOMEM; - } + else + __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_HW; -- cgit v1.2.2 From 6569a351da7e58d6f0fbc92fcf0bef5d4a4bc0a4 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 5 Jun 2006 15:34:11 -0700 Subject: [NET]: Eliminate unused /proc/sys/net/ethernet The /proc/sys/net/ethernet directory has been sitting empty for more than 10 years! Time to eliminate it! Signed-off-by: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ethernet/Makefile | 1 - net/ethernet/sysctl_net_ether.c | 14 -------------- net/sysctl_net.c | 8 -------- 3 files changed, 23 deletions(-) delete mode 100644 net/ethernet/sysctl_net_ether.c (limited to 'net') diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile index 69b74a9a0fc3..7cef1d8ace27 100644 --- a/net/ethernet/Makefile +++ b/net/ethernet/Makefile @@ -3,6 +3,5 @@ # obj-y += eth.o -obj-$(CONFIG_SYSCTL) += sysctl_net_ether.o obj-$(subst m,y,$(CONFIG_IPX)) += pe2.o obj-$(subst m,y,$(CONFIG_ATALK)) += pe2.o diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c deleted file mode 100644 index 66b39fc342d2..000000000000 --- a/net/ethernet/sysctl_net_ether.c +++ /dev/null @@ -1,14 +0,0 @@ -/* -*- linux-c -*- - * sysctl_net_ether.c: sysctl interface to net Ethernet subsystem. - * - * Begun April 1, 1996, Mike Shaver. - * Added /proc/sys/net/ether directory entry (empty =) ). [MS] - */ - -#include -#include -#include - -ctl_table ether_table[] = { - {0} -}; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 55538f6b60ff..58a1b6b42ddd 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -37,14 +37,6 @@ struct ctl_table net_table[] = { .mode = 0555, .child = core_table, }, -#ifdef CONFIG_NET - { - .ctl_name = NET_ETHER, - .procname = "ethernet", - .mode = 0555, - .child = ether_table, - }, -#endif #ifdef CONFIG_INET { .ctl_name = NET_IPV4, -- cgit v1.2.2 From 8c893ff6abbac0c7c05b1cb9bfb6e2dfc4538c75 Mon Sep 17 00:00:00 2001 From: Florin Malita Date: Mon, 5 Jun 2006 15:34:52 -0700 Subject: [IRDA]: Missing allocation result check in irlap_change_speed(). The skb allocation may fail, which can result in a NULL pointer dereference in irlap_queue_xmit(). Coverity CID: 434. Signed-off-by: Florin Malita Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/irda/irlap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 7029618f5719..a16528657b4c 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -884,7 +884,8 @@ static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) if (now) { /* Send down empty frame to trigger speed change */ skb = dev_alloc_skb(0); - irlap_queue_xmit(self, skb); + if (skb) + irlap_queue_xmit(self, skb); } } -- cgit v1.2.2 From 36485707bbd9729e0c52315b173aeed9bc2303dd Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 5 Jun 2006 16:39:34 -0700 Subject: [BRIDGE]: fix locking and memory leak in br_add_bridge There are several bugs in error handling in br_add_bridge: - when dev_alloc_name fails, allocated net_device is not freed - unregister_netdev is called when rtnl lock is held - free_netdev is called before netdev_run_todo has a chance to be run after unregistering net_device Signed-off-by: Jiri Benc Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ad1c7af65ec8..f5d47bf4f967 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -300,25 +300,20 @@ int br_add_bridge(const char *name) rtnl_lock(); if (strchr(dev->name, '%')) { ret = dev_alloc_name(dev, dev->name); - if (ret < 0) - goto err1; + if (ret < 0) { + free_netdev(dev); + goto out; + } } ret = register_netdevice(dev); if (ret) - goto err2; + goto out; ret = br_sysfs_addbr(dev); if (ret) - goto err3; - rtnl_unlock(); - return 0; - - err3: - unregister_netdev(dev); - err2: - free_netdev(dev); - err1: + unregister_netdevice(dev); + out: rtnl_unlock(); return ret; } -- cgit v1.2.2 From afec35e3fee900b3016519d0b5512064e4625b2c Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Sun, 11 Jun 2006 20:58:33 -0700 Subject: [DCCP] Ackvec: fix soft lockup in ackvec handling code A soft lockup existed in the handling of ack vector records. Specifically, when a tail of the list of ack vector records was removed, it was possible to end up iterating infinitely on an element of the tail. Signed-off-by: Andrea Bittau Signed-off-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ackvec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index b5981e5f6b00..8c211c58893b 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -452,6 +452,7 @@ found: (unsigned long long) avr->dccpavr_ack_ackno); dccp_ackvec_throw_record(av, avr); + break; } /* * If it wasn't received, continue scanning... we might -- cgit v1.2.2 From 79320d7e14900c549c3520791a297328f53ff71e Mon Sep 17 00:00:00 2001 From: Aki M Nyrhinen Date: Sun, 11 Jun 2006 21:18:56 -0700 Subject: [TCP]: continued: reno sacked_out count fix From: Aki M Nyrhinen IMHO the current fix to the problem (in_flight underflow in reno) is incorrect. it treats the symptons but ignores the problem. the problem is timing out packets other than the head packet when we don't have sack. i try to explain (sorry if explaining the obvious). with sack, scanning the retransmit queue for timed out packets is fine because we know which packets in our retransmit queue have been acked by the receiver. without sack, we know only how many packets in our retransmit queue the receiver has acknowledged, but no idea which packets. think of a "typical" slow-start overshoot case, where for example every third packet in a window get lost because a router buffer gets full. with sack, we check for timeouts on those every third packet (as the rest have been sacked). the packet counting works out and if there is no reordering, we'll retransmit exactly the packets that were lost. without sack, however, we check for timeout on every packet and end up retransmitting consecutive packets in the retransmit queue. in our slow-start example, 2/3 of those retransmissions are unnecessary. these unnecessary retransmissions eat the congestion window and evetually prevent fast recovery from continuing, if enough packets were lost. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4a538bc1683d..b5521a9d3dc1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1649,7 +1649,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (tcp_head_timedout(sk, tp)) { + if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1662,8 +1662,6 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, tp, tcp_skb_pcount(skb) + 1); /* clear xmit_retrans hint */ if (tp->retransmit_skb_hint && -- cgit v1.2.2 From 42d1d52e695d87475846e9a09964cae1209eeecb Mon Sep 17 00:00:00 2001 From: Weidong Date: Mon, 12 Jun 2006 13:09:59 -0700 Subject: [IPV4]: Increment ipInHdrErrors when TTL expires. Signed-off-by: Weidong Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 0923add122b4..9f0bb529ab70 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -116,6 +116,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ + IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); -- cgit v1.2.2