From a70ea994a0d83fd0151a070be72b87d014ef0a7e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 9 Feb 2006 16:40:11 -0800 Subject: [NETLINK]: Fix a severe bug netlink overrun was broken while improvement of netlink. Destination socket is used in the place where it was meant to be source socket, so that now overrun is never sent to user netlink sockets, when it should be, and it even can be set on kernel socket, which results in complete deadlock of rtnetlink. Suggested fix is to restore status quo passing source socket as additional argument to netlink_attachskb(). A little explanation: overrun is set on a socket, when it failed to receive some message and sender of this messages does not or even have no way to handle this error. This happens in two cases: 1. when kernel sends something. Kernel never retransmits and cannot wait for buffer space. 2. when user sends a broadcast and the message was not delivered to some recipients. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2101b45d2ec6..6b9772d95872 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -702,7 +702,8 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo) +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -712,7 +713,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!nlk->pid) + if (!ssk || nlk_sk(ssk)->pid == 0) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -797,7 +798,7 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } - err = netlink_attachskb(sk, skb, nonblock, timeo); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) -- cgit v1.2.2 From 28633514afd68afa77ed2fa34fa53626837bf2d5 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 9 Feb 2006 16:40:58 -0800 Subject: [NETLINK]: illegal use of pid in rtnetlink When a netlink message is not related to a netlink socket, it is issued by kernel socket with pid 0. Netlink "pid" has nothing to do with current->pid. I called it incorrectly, if it was named "port", the confusion would be avoided. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_semantics.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8700379685e0..eca2976abb25 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -455,7 +455,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) if (!skb) return; - if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) { + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { kfree_skb(skb); return; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 95b9d81ac488..3ffa60dadc0c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1135,7 +1135,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) if (!skb) netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); - else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { + else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); } else { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ef4724de7350..0f4145babb14 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1045,7 +1045,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, } nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = current->pid; + nl->nlmsg_pid = 0; nl->nlmsg_seq = 0; nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); if (cmd == SIOCDELRT) { -- cgit v1.2.2 From d93077fb0e7cb9d4f4094a649501d840c55fdc8b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 9 Feb 2006 16:58:46 -0800 Subject: [IRDA]: Set proper IrLAP device address length This patch set IrDA's addr_len properly, i.e to 4 bytes, the size of the IrLAP device address. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irda_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 890bac0d4a56..e3debbdb67f5 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -343,12 +343,12 @@ static void irda_task_timer_expired(void *data) static void irda_device_setup(struct net_device *dev) { dev->hard_header_len = 0; - dev->addr_len = 0; + dev->addr_len = LAP_ALEN; dev->type = ARPHRD_IRDA; dev->tx_queue_len = 8; /* Window size + 1 s-frame */ - memset(dev->broadcast, 0xff, 4); + memset(dev->broadcast, 0xff, LAP_ALEN); dev->mtu = 2048; dev->flags = IFF_NOARP; -- cgit v1.2.2 From 80ba250e59ced808a8c9b79560938bbe4509c0a7 Mon Sep 17 00:00:00 2001 From: David Binderman Date: Thu, 9 Feb 2006 16:59:48 -0800 Subject: [IRDA]: out of range array access This patch fixes an out of range array access in irnet_irda.c. Author: David Binderman Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irnet/irnet_irda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index 07ec326c71f5..f65c7a83bc5c 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -696,7 +696,7 @@ irnet_daddr_to_dname(irnet_socket * self) { /* Yes !!! Get it.. */ strlcpy(self->rname, discoveries[i].info, sizeof(self->rname)); - self->rname[NICKNAME_MAX_LEN + 1] = '\0'; + self->rname[sizeof(self->rname) - 1] = '\0'; DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n", self->daddr, self->rname); kfree(discoveries); -- cgit v1.2.2 From 6fcf9412de64056238a6295f21db7aa9c37a532e Mon Sep 17 00:00:00 2001 From: John Heffner Date: Thu, 9 Feb 2006 17:06:57 -0800 Subject: [TCP]: rcvbuf lock when tcp_moderate_rcvbuf enabled The rcvbuf lock should probably be honored here. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a97ed5416c28..e9a54ae7d690 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -456,7 +456,8 @@ void tcp_rcv_space_adjust(struct sock *sk) tp->rcvq_space.space = space; - if (sysctl_tcp_moderate_rcvbuf) { + if (sysctl_tcp_moderate_rcvbuf && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int new_clamp = space; /* Receive space grows, normalize in order to -- cgit v1.2.2 From b3f1be4b5412e34647764457bec901e06b03e624 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:08:52 -0800 Subject: [BRIDGE]: fix for RCU and deadlock on device removal Change Bridge receive path to correctly handle RCU removal of device from bridge. Also fixes deadlock between carrier_check and del_nbp. This replaces the previous deleted flag fix. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 21 +++++++++++---------- net/bridge/br_input.c | 19 ++++++++++++------- net/bridge/br_private.h | 1 - net/bridge/br_stp_bpdu.c | 30 ++++++++++++++++++------------ 4 files changed, 41 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index da687c8dc6ff..70b7ef917234 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -79,9 +79,14 @@ static int port_cost(struct net_device *dev) */ static void port_carrier_check(void *arg) { - struct net_bridge_port *p = arg; + struct net_device *dev = arg; + struct net_bridge_port *p; rtnl_lock(); + p = dev->br_port; + if (!p) + goto done; + if (netif_carrier_ok(p->dev)) { u32 cost = port_cost(p->dev); @@ -97,6 +102,7 @@ static void port_carrier_check(void *arg) br_stp_disable_port(p); spin_unlock_bh(&p->br->lock); } +done: rtnl_unlock(); } @@ -104,7 +110,6 @@ static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; - dev->br_port = NULL; p->br = NULL; p->dev = NULL; dev_put(dev); @@ -133,24 +138,20 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; - /* Race between RTNL notify and RCU callback */ - if (p->deleted) - return; - dev_set_promiscuity(dev, -1); cancel_delayed_work(&p->carrier_check); - flush_scheduled_work(); spin_lock_bh(&br->lock); br_stp_disable_port(p); - p->deleted = 1; spin_unlock_bh(&br->lock); br_fdb_delete_by_port(br, p); list_del_rcu(&p->list); + rcu_assign_pointer(dev->br_port, NULL); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -254,11 +255,10 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; - dev->br_port = p; p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, p); + INIT_WORK(&p->carrier_check, port_carrier_check, dev); kobject_init(&p->kobj); return p; @@ -397,6 +397,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) else if ((err = br_sysfs_addif(p))) del_nbp(p); else { + rcu_assign_pointer(dev->br_port, p); dev_set_promiscuity(dev, 1); list_add_rcu(&p->list, &br->port_list); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e3a73cead6b6..4eef83755315 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -45,18 +45,20 @@ static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge *br; struct net_bridge_fdb_entry *dst; int passedup = 0; + if (!p || p->state == BR_STATE_DISABLED) + goto drop; + /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br = p->br; + br_fdb_update(br, p, eth_hdr(skb)->h_source); - if (p->state == BR_STATE_LEARNING) { - kfree_skb(skb); - goto out; - } + if (p->state == BR_STATE_LEARNING) + goto drop; if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; @@ -93,6 +95,9 @@ int br_handle_frame_finish(struct sk_buff *skb) out: return 0; +drop: + kfree_skb(skb); + goto out; } /* diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e330b17b6d81..c5bd631ffcd5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -68,7 +68,6 @@ struct net_bridge_port /* STP */ u8 priority; u8 state; - u8 deleted; u16 port_no; unsigned char topology_change_ack; unsigned char config_pending; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index d071f1c9ad0b..296f6a487c52 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -133,29 +133,35 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) static const unsigned char header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; -/* NO locks */ +/* NO locks, but rcu_read_lock (preempt_disabled) */ int br_stp_handle_bpdu(struct sk_buff *skb) { - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge *br; unsigned char *buf; + if (!p) + goto err; + + br = p->br; + spin_lock(&br->lock); + + if (p->state == BR_STATE_DISABLED || !(br->dev->flags & IFF_UP)) + goto out; + /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source); + + if (!br->stp_enabled) + goto out; /* need at least the 802 and STP headers */ if (!pskb_may_pull(skb, sizeof(header)+1) || memcmp(skb->data, header, sizeof(header))) - goto err; + goto out; buf = skb_pull(skb, sizeof(header)); - spin_lock_bh(&br->lock); - if (p->state == BR_STATE_DISABLED - || !(br->dev->flags & IFF_UP) - || !br->stp_enabled) - goto out; - if (buf[0] == BPDU_TYPE_CONFIG) { struct br_config_bpdu bpdu; @@ -201,7 +207,7 @@ int br_stp_handle_bpdu(struct sk_buff *skb) br_received_tcn_bpdu(p); } out: - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); err: kfree_skb(skb); return 0; -- cgit v1.2.2 From 5dce971acf2ae20c80d5e9d1f6bbf17376870911 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:09:38 -0800 Subject: [BRIDGE]: netfilter handle RCU during removal Bridge netfilter code needs to handle the case where device is removed from bridge while packet in process. In these cases the bridge_parent can become null while processing. This should fix: http://bugzilla.kernel.org/show_bug.cgi?id=5803 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 53 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 7cac3fb9f809..b5018166b0e5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -51,9 +51,6 @@ #define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) #define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) -#define has_bridge_parent(device) ((device)->br_port != NULL) -#define bridge_parent(device) ((device)->br_port->br->dev) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables = 1; @@ -98,6 +95,12 @@ static struct rtable __fake_rtable = { .rt_flags = 0, }; +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->br_port); + + return port ? port->br->dev : NULL; +} /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the @@ -189,11 +192,15 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; skb->dev = bridge_parent(skb->dev); - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; + if (!skb->dev) + kfree_skb(skb); + else { + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + skb->dst->output(skb); } - skb->dst->output(skb); return 0; } @@ -270,7 +277,7 @@ bridged_dnat: } /* Some common code for IPv4/IPv6 */ -static void setup_pre_routing(struct sk_buff *skb) +static struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; @@ -282,6 +289,8 @@ static void setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = bridge_parent(skb->dev); + + return skb->dev; } /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ @@ -376,7 +385,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, nf_bridge_put(skb->nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); @@ -465,7 +475,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, nf_bridge_put(skb->nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; store_orig_dstaddr(skb); NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, @@ -539,11 +550,16 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_device *parent; int pf; if (!skb->nf_bridge) return NF_ACCEPT; + parent = bridge_parent(out); + if (!parent) + return NF_DROP; + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) pf = PF_INET; else @@ -564,8 +580,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; - NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), - bridge_parent(out), br_nf_forward_finish); + NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent, + br_nf_forward_finish); return NF_STOLEN; } @@ -688,6 +704,8 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, goto out; } realoutdev = bridge_parent(skb->dev); + if (!realoutdev) + return NF_DROP; #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) /* iptables should match -o br0.x */ @@ -701,9 +719,11 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, /* IP forwarded traffic has a physindev, locally * generated traffic hasn't. */ if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) && - has_bridge_parent(realindev)) - realindev = bridge_parent(realindev); + if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) ) { + struct net_device *parent = bridge_parent(realindev); + if (parent) + realindev = parent; + } NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, realoutdev, br_nf_local_out_finish, @@ -743,6 +763,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, if (!nf_bridge) return NF_ACCEPT; + if (!realoutdev) + return NF_DROP; + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) pf = PF_INET; else -- cgit v1.2.2 From bab1deea308afcf9200837d6ac20aefe92972efb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:10:12 -0800 Subject: [BRIDGE]: fix error handling for add interface to bridge Refactor how the bridge code interacts with kobject system. It should still use kobjects even if not using sysfs. Fix the error unwind handling in br_add_if. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 77 ++++++++++++++++++++++++++++++++++-------------- net/bridge/br_private.h | 5 +--- net/bridge/br_sysfs_if.c | 50 ++----------------------------- 3 files changed, 59 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 70b7ef917234..7fa3a5a9971f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -106,6 +106,20 @@ done: rtnl_unlock(); } +static void release_nbp(struct kobject *kobj) +{ + struct net_bridge_port *p + = container_of(kobj, struct net_bridge_port, kobj); + kfree(p); +} + +static struct kobj_type brport_ktype = { +#ifdef CONFIG_SYSFS + .sysfs_ops = &brport_sysfs_ops, +#endif + .release = release_nbp, +}; + static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; @@ -114,7 +128,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->dev = NULL; dev_put(dev); - br_sysfs_freeif(p); + kobject_put(&p->kobj); } static void destroy_nbp_rcu(struct rcu_head *head) @@ -138,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; + sysfs_remove_link(&br->ifobj, dev->name); + dev_set_promiscuity(dev, -1); cancel_delayed_work(&p->carrier_check); @@ -152,6 +168,8 @@ static void del_nbp(struct net_bridge_port *p) rcu_assign_pointer(dev->br_port, NULL); + kobject_del(&p->kobj); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -161,7 +179,6 @@ static void del_br(struct net_bridge *br) struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { - br_sysfs_removeif(p); del_nbp(p); } @@ -261,6 +278,11 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, INIT_WORK(&p->carrier_check, port_carrier_check, dev); kobject_init(&p->kobj); + kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); + p->kobj.ktype = &brport_ktype; + p->kobj.parent = &(dev->class_dev.kobj); + p->kobj.kset = NULL; + return p; } @@ -388,31 +410,43 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->br_port != NULL) return -EBUSY; - if (IS_ERR(p = new_nbp(br, dev))) + p = new_nbp(br, dev); + if (IS_ERR(p)) return PTR_ERR(p); - if ((err = br_fdb_insert(br, p, dev->dev_addr))) - destroy_nbp(p); - - else if ((err = br_sysfs_addif(p))) - del_nbp(p); - else { - rcu_assign_pointer(dev->br_port, p); - dev_set_promiscuity(dev, 1); + err = kobject_add(&p->kobj); + if (err) + goto err0; - list_add_rcu(&p->list, &br->port_list); + err = br_fdb_insert(br, p, dev->dev_addr); + if (err) + goto err1; - spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); - br_features_recompute(br); - if ((br->dev->flags & IFF_UP) - && (dev->flags & IFF_UP) && netif_carrier_ok(dev)) - br_stp_enable_port(p); - spin_unlock_bh(&br->lock); + err = br_sysfs_addif(p); + if (err) + goto err2; - dev_set_mtu(br->dev, br_min_mtu(br)); - } + rcu_assign_pointer(dev->br_port, p); + dev_set_promiscuity(dev, 1); + + list_add_rcu(&p->list, &br->port_list); + + spin_lock_bh(&br->lock); + br_stp_recalculate_bridge_id(br); + br_features_recompute(br); + schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE); + spin_unlock_bh(&br->lock); + + dev_set_mtu(br->dev, br_min_mtu(br)); + kobject_uevent(&p->kobj, KOBJ_ADD); + return 0; +err2: + br_fdb_delete_by_port(br, p); +err1: + kobject_del(&p->kobj); +err0: + kobject_put(&p->kobj); return err; } @@ -424,7 +458,6 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; - br_sysfs_removeif(p); del_nbp(p); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c5bd631ffcd5..8f10e09f251b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -232,9 +232,8 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ +extern struct sysfs_ops brport_sysfs_ops; extern int br_sysfs_addif(struct net_bridge_port *p); -extern void br_sysfs_removeif(struct net_bridge_port *p); -extern void br_sysfs_freeif(struct net_bridge_port *p); /* br_sysfs_br.c */ extern int br_sysfs_addbr(struct net_device *dev); @@ -243,8 +242,6 @@ extern void br_sysfs_delbr(struct net_device *dev); #else #define br_sysfs_addif(p) (0) -#define br_sysfs_removeif(p) do { } while(0) -#define br_sysfs_freeif(p) kfree(p) #define br_sysfs_addbr(dev) (0) #define br_sysfs_delbr(dev) do { } while(0) #endif /* CONFIG_SYSFS */ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 0ac0355d16dd..c51c9e42aeb3 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -195,23 +195,11 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -/* called from kobject_put when port ref count goes to zero. */ -static void brport_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct net_bridge_port, kobj)); -} - -static struct sysfs_ops brport_sysfs_ops = { +struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; -static struct kobj_type brport_ktype = { - .sysfs_ops = &brport_sysfs_ops, - .release = brport_release, -}; - - /* * Add sysfs entries to ethernet device added to a bridge. * Creates a brport subdirectory with bridge attributes. @@ -223,17 +211,6 @@ int br_sysfs_addif(struct net_bridge_port *p) struct brport_attribute **a; int err; - ASSERT_RTNL(); - - kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); - p->kobj.ktype = &brport_ktype; - p->kobj.parent = &(p->dev->class_dev.kobj); - p->kobj.kset = NULL; - - err = kobject_add(&p->kobj); - if(err) - goto out1; - err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) @@ -245,28 +222,7 @@ int br_sysfs_addif(struct net_bridge_port *p) goto out2; } - err = sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); - if (err) - goto out2; - - kobject_uevent(&p->kobj, KOBJ_ADD); - return 0; - out2: - kobject_del(&p->kobj); - out1: + err= sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); +out2: return err; } - -void br_sysfs_removeif(struct net_bridge_port *p) -{ - pr_debug("br_sysfs_removeif\n"); - sysfs_remove_link(&p->br->ifobj, p->dev->name); - kobject_uevent(&p->kobj, KOBJ_REMOVE); - kobject_del(&p->kobj); -} - -void br_sysfs_freeif(struct net_bridge_port *p) -{ - pr_debug("br_sysfs_freeif\n"); - kobject_put(&p->kobj); -} -- cgit v1.2.2 From 3c791925da0e6108cda15e3c2c7bfaebcd9ab9cf Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 12 Feb 2006 14:34:53 -0800 Subject: [PATCH] netfilter: fix build error due to missing has_bridge_parent macro net/bridge/br_netfilter.c: In function `br_nf_post_routing': net/bridge/br_netfilter.c:808: warning: implicit declaration of function `has_bridge_parent' Signed-off-by: Jesper Juhl Cc: Harald Welte Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/bridge/br_netfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index b5018166b0e5..c06cb0983530 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -805,7 +805,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, print_error: if (skb->dev != NULL) { printk("[%s]", skb->dev->name); - if (has_bridge_parent(skb->dev)) + if (bridge_parent(skb->dev)) printk("[%s]", bridge_parent(skb->dev)->name); } printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, -- cgit v1.2.2 From 56f3a40a5e7586043260669cc794e56fa58339e1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 13 Feb 2006 11:39:57 +0100 Subject: [Bluetooth] Reduce L2CAP MTU for RFCOMM connections This patch reduces the default L2CAP MTU for all RFCOMM connections from 1024 to 1013 to improve the interoperability with some broken RFCOMM implementations. To make this more flexible the L2CAP MTU becomes also a module parameter and so it can changed at runtime. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 0d89d6434136..5b4253c61f62 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,13 +46,15 @@ #include #include -#define VERSION "1.6" - #ifndef CONFIG_BT_RFCOMM_DEBUG #undef BT_DBG #define BT_DBG(D...) #endif +#define VERSION "1.7" + +static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; + static struct task_struct *rfcomm_thread; static DECLARE_MUTEX(rfcomm_sem); @@ -623,7 +625,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); s = rfcomm_session_add(sock, BT_BOUND); @@ -1868,7 +1870,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); /* Start listening on the socket */ @@ -2070,6 +2072,9 @@ static void __exit rfcomm_exit(void) module_init(rfcomm_init); module_exit(rfcomm_exit); +module_param(l2cap_mtu, uint, 0644); +MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); + MODULE_AUTHOR("Maxim Krasnyansky , Marcel Holtmann "); MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION); MODULE_VERSION(VERSION); -- cgit v1.2.2 From 7b005bd34c895ebeefd1c62f90a329730b88946b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 13 Feb 2006 11:40:03 +0100 Subject: [Bluetooth] Fix NULL pointer dereferences of the HCI socket This patch fixes the two NULL pointer dereferences found by the sfuzz tool from Ilja van Sprundel. The first one was a call of getsockname() for an unbound socket and the second was calling accept() while this operation isn't implemented for the HCI socket interface. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index bdb6458c6bd5..97bdec73d17e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -143,13 +143,15 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev; BT_DBG("sock %p sk %p", sock, sk); if (!sk) return 0; + hdev = hci_pi(sk)->hdev; + bt_sock_unlink(&hci_sk_list, sk); if (hdev) { @@ -311,14 +313,18 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *add { struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; struct sock *sk = sock->sk; + struct hci_dev *hdev = hci_pi(sk)->hdev; BT_DBG("sock %p sk %p", sock, sk); + if (!hdev) + return -EBADFD; + lock_sock(sk); *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; - haddr->hci_dev = hci_pi(sk)->hdev->id; + haddr->hci_dev = hdev->id; release_sock(sk); return 0; -- cgit v1.2.2 From bf3883c12fece9189ab4f7bb6e2690451db1366e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 13 Feb 2006 15:34:58 -0800 Subject: [ATM]: Ratelimit atmsvc failure messages This seems to be trivial to trigger. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/atm/signaling.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/signaling.c b/net/atm/signaling.c index e7211a7f382c..93ad59a28ef5 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -56,7 +56,8 @@ static void sigd_put_skb(struct sk_buff *skb) remove_wait_queue(&sigd_sleep,&wait); #else if (!sigd) { - printk(KERN_WARNING "atmsvc: no signaling demon\n"); + if (net_ratelimit()) + printk(KERN_WARNING "atmsvc: no signaling demon\n"); kfree_skb(skb); return; } -- cgit v1.2.2 From 77decfc716d460b3f7037bb19bd4eb12cd0dc996 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 13 Feb 2006 15:36:21 -0800 Subject: [IPV4] ICMP: Invert default for invalid icmp msgs sysctl isic can trigger these msgs to be spewed at a very high rate. There's already a sysctl to turn them off. Given these messages aren't useful for most people, this patch disables them by default. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4d1c40972a4b..e7bbff4340bb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -192,7 +192,7 @@ int sysctl_icmp_echo_ignore_all; int sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses; +int sysctl_icmp_ignore_bogus_error_responses = 1; /* * Configurable global rate limit. -- cgit v1.2.2 From 99e382afd297d91ab150ae46c28c4585f925818c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 13 Feb 2006 15:38:42 -0800 Subject: [P8023]: Fix tainting of kernel. Missing license tag. I've assumed this is GPL. (It could also use a MODULE_AUTHOR) Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/802/p8023.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/802/p8023.c b/net/802/p8023.c index d23e906456eb..53cf05709283 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -59,3 +59,5 @@ void destroy_8023_client(struct datalink_proto *dl) EXPORT_SYMBOL(destroy_8023_client); EXPORT_SYMBOL(make_8023_client); + +MODULE_LICENSE("GPL"); -- cgit v1.2.2 From a6c1cd572642478528165ac44db4d2daae125a21 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 13 Feb 2006 15:42:48 -0800 Subject: [NETFILTER] Fix Kconfig menu level for x_tables The new x_tables related Kconfig options appear at the wrong menu level without this patch. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 99c0a0fa4a97..0e550127fa7e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -102,8 +102,6 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface -endmenu - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" help @@ -361,3 +359,5 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +endmenu + -- cgit v1.2.2 From 178a3259f2508e786fb1bd6538365a167cee35c1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 13 Feb 2006 15:43:58 -0800 Subject: [BRIDGE]: Better fix for netfilter missing symbol has_bridge_parent Horms patch was the best of the three fixes. Dave, already applied Harald's version, so this patch converts that to the better one. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index c06cb0983530..6bb0c7eb1ef0 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -805,8 +805,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, print_error: if (skb->dev != NULL) { printk("[%s]", skb->dev->name); - if (bridge_parent(skb->dev)) - printk("[%s]", bridge_parent(skb->dev)->name); + if (realoutdev) + printk("[%s]", realoutdev->name); } printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, skb->data); -- cgit v1.2.2 From e200bd8065e4db6297cd8db071a9188cf9aa6b56 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 13 Feb 2006 15:51:24 -0800 Subject: [NETLINK] genetlink: Fix bugs spotted by Andrew Morton. - panic() doesn't return. - Don't forget to unlock on genl_register_family() error path - genl_rcv_msg() is called via pointer so there's no point in declaring it `inline'. Notes: genl_ctrl_event() ignores the genlmsg_multicast() return value. lots of things ignore the genl_ctrl_event() return value. Signed-off-by: Jamal Hadi Salim Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 4ae1538c54a9..43e72419c868 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -238,7 +238,7 @@ int genl_register_family(struct genl_family *family) sizeof(struct nlattr *), GFP_KERNEL); if (family->attrbuf == NULL) { err = -ENOMEM; - goto errout; + goto errout_locked; } } else family->attrbuf = NULL; @@ -288,7 +288,7 @@ int genl_unregister_family(struct genl_family *family) return -ENOENT; } -static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) { struct genl_ops *ops; @@ -375,7 +375,7 @@ static void genl_rcv(struct sock *sk, int len) do { if (genl_trylock()) return; - netlink_run_queue(sk, &qlen, &genl_rcv_msg); + netlink_run_queue(sk, &qlen, genl_rcv_msg); genl_unlock(); } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); } @@ -549,10 +549,8 @@ static int __init genl_init(void) netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, genl_rcv, THIS_MODULE); - if (genl_sock == NULL) { + if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); - return -ENOMEM; - } return 0; @@ -560,7 +558,6 @@ errout_register: genl_unregister_family(&genl_ctrl); errout: panic("GENL: Cannot register controller: %d\n", err); - return err; } subsys_initcall(genl_init); -- cgit v1.2.2 From 6d3e85ecf22a5e3610df47b9c3fb2fc32cfd35bf Mon Sep 17 00:00:00 2001 From: Nicolas DICHTEL Date: Mon, 13 Feb 2006 15:56:13 -0800 Subject: [IPV6] Don't store dst_entry for RAW socket Signed-off-by: Nicolas DICHTEL Signed-off-by: David S. Miller --- net/ipv6/raw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 738376cf0c51..ae20a0ec9bd8 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -803,10 +803,7 @@ back_from_confirm: err = rawv6_push_pending_frames(sk, &fl, rp); } done: - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); - + dst_release(dst); release_sock(sk); out: fl6_sock_release(flowlabel); -- cgit v1.2.2 From 00de651d14baabc5c1d2f32c49d9a984d8891c8e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Feb 2006 16:01:27 -0800 Subject: [IPSEC]: Fix strange IPsec freeze. Problem discovered and initial patch by Olaf Kirch: there's a problem with IPsec that has been bugging some of our users for the last couple of kernel revs. Every now and then, IPsec will freeze the machine completely. This is with openswan user land, and with kernels up to and including 2.6.16-rc2. I managed to debug this a little, and what happens is that we end up looping in xfrm_lookup, and never get out. With a bit of debug printks added, I can this happening: ip_route_output_flow calls xfrm_lookup xfrm_find_bundle returns NULL (apparently we're in the middle of negotiating a new SA or something) We therefore call xfrm_tmpl_resolve. This returns EAGAIN We go to sleep, waiting for a policy update. Then we loop back to the top Apparently, the dst_orig that was passed into xfrm_lookup has been dropped from the routing table (obsolete=2) This leads to the endless loop, because we now create a new bundle, check the new bundle and find it's stale (stale_bundle -> xfrm_bundle_ok -> dst_check() return 0) People have been testing with the patch below, which seems to fix the problem partially. They still see connection hangs however (things only clear up when they start a new ping or new ssh). So the patch is obvsiouly not sufficient, and something else seems to go wrong. I'm grateful for any hints you may have... I suggest that we simply bail out always. If the dst decides to die on us later on, the packet will be dropped anyway. So there is no great urgency to retry here. Once we have the proper resolution queueing, we can then do the retry again. Signed-off-by: Herbert Xu Acked-by: Olaf Kirch Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dbf4620768d6..98ec53bd3ac7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -889,7 +889,9 @@ restart: xfrm_pol_put(policy); if (dst) dst_free(dst); - goto restart; + + err = -EHOSTUNREACH; + goto error; } dst->next = policy->bundles; policy->bundles = dst; -- cgit v1.2.2 From b4d9eda028e8becbb5057b554e63eea12e496a88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 13 Feb 2006 16:06:10 -0800 Subject: [NET]: Revert skb_copy_datagram_iovec() recursion elimination. Revert the following changeset: bc8dfcb93970ad7139c976356bfc99d7e251deaf Recursive SKB frag lists are really possible and disallowing them breaks things. Noticed by: Jesse Brandeburg Signed-off-by: David S. Miller --- net/core/datagram.c | 81 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index f8d322e1ea92..b8ce6bf81188 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -247,49 +247,74 @@ EXPORT_SYMBOL(skb_kill_datagram); int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, int len) { - int i, err, fraglen, end = 0; - struct sk_buff *next = skb_shinfo(skb)->frag_list; + int start = skb_headlen(skb); + int i, copy = start - offset; - if (!len) - return 0; + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovec(to, skb->data + offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } -next_skb: - fraglen = skb_headlen(skb); - i = -1; + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; - while (1) { - int start = end; + BUG_TRAP(start <= offset + len); - if ((end += fraglen) > offset) { - int copy = end - offset, o = offset - start; + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; if (copy > len) copy = len; - if (i == -1) - err = memcpy_toiovec(to, skb->data + o, copy); - else { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; - void *p = kmap(page) + frag->page_offset + o; - err = memcpy_toiovec(to, p, copy); - kunmap(page); - } + vaddr = kmap(page); + err = memcpy_toiovec(to, vaddr + frag->page_offset + + offset - start, copy); + kunmap(page); if (err) goto fault; if (!(len -= copy)) return 0; offset += copy; } - if (++i >= skb_shinfo(skb)->nr_frags) - break; - fraglen = skb_shinfo(skb)->frags[i].size; + start = end; } - if (next) { - skb = next; - BUG_ON(skb_shinfo(skb)->frag_list); - next = skb->next; - goto next_skb; + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_iovec(list, + offset - start, + to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } + start = end; + } } + if (!len) + return 0; + fault: return -EFAULT; } -- cgit v1.2.2 From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 01:34:23 -0800 Subject: [NETFILTER]: Fix xfrm lookup after SNAT To find out if a packet needs to be handled by IPsec after SNAT, packets are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This breaks SNAT of non-unicast packets to non-local addresses because the packet is routed as incoming packet and no neighbour entry is bound to the dst_entry. In general, it seems to be a bad idea to replace the dst_entry after the packet was already sent to the output routine because its state might not match what's expected. This patch changes the xfrm lookup in POST_ROUTING to re-use the original dst_entry without routing the packet again. This means no policy routing can be used for transport mode transforms (which keep the original route) when packets are SNATed to match the policy, but it looks like the best we can do for now. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/netfilter.c | 41 ++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/ip_nat_standalone.c | 6 ++--- 2 files changed, 44 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 52a3d7c57907..ed42cdc57cd9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +#ifdef CONFIG_XFRM +int ip_xfrm_me_harder(struct sk_buff **pskb) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + return 0; + if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + return -1; + + dst = (*pskb)->dst; + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + return -1; + + dst_release((*pskb)->dst); + (*pskb)->dst = dst; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + return 0; +} +EXPORT_SYMBOL(ip_xfrm_me_harder); +#endif + void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(ip_nat_decode_session); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 92c54999a19d..7c3f7d380240 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum, return NF_ACCEPT; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); +#ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.ip != ct->tuplehash[!dir].tuple.dst.ip -#ifdef CONFIG_XFRM || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all -#endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; } +#endif return ret; } -- cgit v1.2.2 From 78872ccb68335b14f0d1ac7338ecfcbf1cba1df4 Mon Sep 17 00:00:00 2001 From: Adrian Drzewiecki Date: Wed, 15 Feb 2006 01:47:48 -0800 Subject: [BRIDGE]: Fix deadlock in br_stp_disable_bridge Looks like somebody forgot to use the _bh spin_lock variant. We ran into a deadlock where br->hello_timer expired while br_stp_disable_br() walked br->port_list. Signed-off-by: Adrian Drzewiecki Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index cc047f7fb6ef..35cf3a074087 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -67,7 +67,7 @@ void br_stp_disable_bridge(struct net_bridge *br) { struct net_bridge_port *p; - spin_lock(&br->lock); + spin_lock_bh(&br->lock); list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED) br_stp_disable_port(p); @@ -76,7 +76,7 @@ void br_stp_disable_bridge(struct net_bridge *br) br->topology_change = 0; br->topology_change_detected = 0; - spin_unlock(&br->lock); + spin_unlock_bh(&br->lock); del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); -- cgit v1.2.2 From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:10:22 -0800 Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish When a packet matching an IPsec policy is SNATed so it doesn't match any policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish crash because of a NULL pointer dereference. This patch directs these packets to the original output path instead. Since the packets have already passed the POST_ROUTING hook, but need to start at the beginning of the original output path which includes another POST_ROUTING invocation, a flag is added to the IPCB to indicate that the packet was rerouted and doesn't need to pass the POST_ROUTING hook again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 3 ++- net/ipv4/ip_output.c | 16 ++++++++++------ net/ipv4/ipip.c | 3 ++- net/ipv4/xfrm4_output.c | 13 ++++++++++--- 4 files changed, 24 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index abe23923e4e7..9981dcd68f11 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3324fbfe528a..57d290d89ec2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb->dst->xfrm != NULL) - return xfrm4_output_finish(skb); + if (skb->dst->xfrm != NULL) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } #endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) @@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb) newskb->dev, ip_dev_loopback_xmit); } - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct sk_buff *skb) @@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_queue_xmit(struct sk_buff *skb, int ipfragok) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e5cbe72c6b80..03d13742a4b8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d4df0ddd424b..32ad229b4fed 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -152,10 +152,16 @@ error_nolock: goto out_exit; } -int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish(struct sk_buff *skb) { int err; +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, - xfrm4_output_finish); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } -- cgit v1.2.2 From deac0ccdb4da16b68539d75edecf26162de05150 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 15 Feb 2006 15:21:31 -0800 Subject: [NETFILTER]: x_tables: fix dependencies of conntrack related modules NF_CONNTRACK_MARK is bool and depends on NF_CONNTRACK which is tristate. If a variable depends on NF_CONNTRACK_MARK and doesn't take care about NF_CONNTRACK, it can be y even if NF_CONNTRACK isn't y. NF_CT_ACCT have same issue, too. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0e550127fa7e..a8e5544da93e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -126,7 +126,7 @@ config NETFILTER_XT_TARGET_CONNMARK tristate '"CONNMARK" target support' depends on NETFILTER_XTABLES depends on IP_NF_MANGLE || IP6_NF_MANGLE - depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) + depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK) help This option adds a `CONNMARK' target, which allows one to manipulate the connection mark value. Similar to the MARK target, but @@ -187,7 +187,7 @@ config NETFILTER_XT_MATCH_COMMENT config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' depends on NETFILTER_XTABLES - depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || NF_CT_ACCT + depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK) help This option adds a `connbytes' match, which allows you to match the number of bytes and/or packets for each direction within a connection. @@ -198,7 +198,7 @@ config NETFILTER_XT_MATCH_CONNBYTES config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NETFILTER_XTABLES - depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || NF_CONNTRACK_MARK + depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK) help This option adds a `connmark' match, which allows you to match the connection mark value previously set for the session by `CONNMARK'. -- cgit v1.2.2 From 7d3cdc6b554137a7a0534ce38b155a63a3117f27 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 15 Feb 2006 15:22:21 -0800 Subject: [NETFILTER]: nf_conntrack: move registration of __nf_ct_attach Move registration of __nf_ct_attach to nf_conntrack_core to make it usable for IPv6 connection tracking as well. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 5 ----- net/netfilter/nf_conntrack_core.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 167619f638c6..6c8624a54933 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -529,15 +529,10 @@ static int init_or_cleanup(int init) goto cleanup_localinops; } #endif - - /* For use by REJECT target */ - ip_ct_attach = __nf_conntrack_attach; - return ret; cleanup: synchronize_net(); - ip_ct_attach = NULL; #ifdef CONFIG_SYSCTL unregister_sysctl_table(nf_ct_ipv4_sysctl_header); cleanup_localinops: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0ce337a1d974..d622ddf08bb0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1556,6 +1556,8 @@ void nf_conntrack_cleanup(void) { int i; + ip_ct_attach = NULL; + /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ @@ -1715,6 +1717,9 @@ int __init nf_conntrack_init(void) nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; write_unlock_bh(&nf_conntrack_lock); + /* For use by REJECT target */ + ip_ct_attach = __nf_conntrack_attach; + /* Set up fake conntrack: - to never be deleted, not in any hashes */ atomic_set(&nf_conntrack_untracked.ct_general.use, 1); -- cgit v1.2.2 From 08857fa745ab6ce46601960d2774490e1cef2cff Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 15 Feb 2006 15:23:28 -0800 Subject: [NETFILTER]: nf_conntrack: attach conntrack to TCP RST generated by ip6t_REJECT TCP RSTs generated by the REJECT target should be associated with the conntrack of the original TCP packet. Since the conntrack entry is usually not is the hash tables, it must be manually attached. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_REJECT.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index c745717b4ce2..0e6d1d4bbd5c 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -160,6 +160,8 @@ static void send_reset(struct sk_buff *oldskb) csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); + nf_ct_attach(nskb, oldskb); + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, dst_output); } -- cgit v1.2.2 From 763ecff1879b3877f57f20fc9e79599aef59359f Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 15 Feb 2006 15:24:15 -0800 Subject: [NETFILTER]: nf_conntrack: attach conntrack to locally generated ICMPv6 error Locally generated ICMPv6 errors should be associated with the conntrack of the original packet. Since the conntrack entry may not be in the hash tables (for the first packet), it must be manually attached. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fcf883183cef..21eb725e885f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -42,6 +42,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include @@ -255,6 +256,7 @@ out: struct icmpv6_msg { struct sk_buff *skb; int offset; + uint8_t type; }; static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) @@ -266,6 +268,8 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, to, len, csum); skb->csum = csum_block_add(skb->csum, csum, odd); + if (!(msg->type & ICMPV6_INFOMSG_MASK)) + nf_ct_attach(skb, org_skb); return 0; } @@ -403,6 +407,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, msg.skb = skb; msg.offset = skb->nh.raw - skb->data; + msg.type = type; len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); @@ -500,6 +505,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.skb = skb; msg.offset = 0; + msg.type = ICMPV6_ECHO_REPLY; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl, -- cgit v1.2.2 From 7c6de05884b9fcc7ef621e2ab198ba93d85f46aa Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 15 Feb 2006 15:25:18 -0800 Subject: [NETFILTER]: nf_conntrack: Fix TCP/UDP HW checksum handling for IPv6 packet If skb->ip_summed is CHECKSUM_HW here, skb->csum includes checksum of actual IPv6 header and extension headers. Then such excess checksum must be subtruct when nf_conntrack calculates TCP/UDP checksum with pseudo IPv6 header. Spotted by Ben Skeggs. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 4 +++- net/netfilter/nf_conntrack_proto_udp.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index df99138c3b3b..6492ed66fb3c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -864,7 +864,9 @@ static int csum6(const struct sk_buff *skb, unsigned int dataoff) { return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, skb->len - dataoff, IPPROTO_TCP, - skb->ip_summed == CHECKSUM_HW ? skb->csum + skb->ip_summed == CHECKSUM_HW + ? csum_sub(skb->csum, + skb_checksum(skb, 0, dataoff, 0)) : skb_checksum(skb, dataoff, skb->len - dataoff, 0)); } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 4264dd079a16..831d206344e0 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -161,7 +161,9 @@ static int csum6(const struct sk_buff *skb, unsigned int dataoff) { return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, skb->len - dataoff, IPPROTO_UDP, - skb->ip_summed == CHECKSUM_HW ? skb->csum + skb->ip_summed == CHECKSUM_HW + ? csum_sub(skb->csum, + skb_checksum(skb, 0, dataoff, 0)) : skb_checksum(skb, dataoff, skb->len - dataoff, 0)); } -- cgit v1.2.2