Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Conflicts: drivers/net/ethernet/sfc/rx.c Overlapping changes in drivers/net/ethernet/sfc/rx.c, one to change the rx_buf->is_page boolean into a set of u16 flags, and another to adjust how ->ip_summed is initialized. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2012-02-26 21:55:51 -0500
committer: David S. Miller <davem@davemloft.net> 2012-02-26 21:55:51 -0500
commit: ff4783ce78c08d2990126ce1874250ae8e72bbd2 (patch)
tree: 5c95885a4ab768101dd72942b57c238d452a7565 /net
parent: 622121719934f60378279eb440d3cec2fc3176d2 (diff)
parent: 203738e548cefc3fc3c2f73a9063176c9f3583d5 (diff)
18 files changed, 174 insertions, 101 deletions
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ef95a30306fa..5de42ea309bc 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -46,8 +46,8 @@
 static struct net_device *clip_devs;
 static struct atm_vcc *atmarpd;
-static struct neigh_table clip_tbl;
 static struct timer_list idle_timer;
+static const struct neigh_ops clip_neigh_ops;
 static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
 {
@@ -123,6 +123,8 @@ static int neigh_check_cb(struct neighbour *n)
        struct atmarp_entry *entry = neighbour_priv(n);
        struct clip_vcc *cv;
+        if (n->ops != &clip_neigh_ops)
+                return 0;
        for (cv = entry->vccs; cv; cv = cv->next) {
                unsigned long exp = cv->last_use + cv->idle_timeout;
@@ -154,10 +156,10 @@ static int neigh_check_cb(struct neighbour *n)
 static void idle_timer_check(unsigned long dummy)
 {
-        write_lock(&clip_tbl.lock);
+        write_lock(&arp_tbl.lock);
-        __neigh_for_each_release(&clip_tbl, neigh_check_cb);
+        __neigh_for_each_release(&arp_tbl, neigh_check_cb);
        mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
-        write_unlock(&clip_tbl.lock);
+        write_unlock(&arp_tbl.lock);
 }
 static int clip_arp_rcv(struct sk_buff *skb)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f98ec444133a..0a68045782d1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -826,6 +826,8 @@ next_elt:
                write_unlock_bh(&tbl->lock);
                cond_resched();
                write_lock_bh(&tbl->lock);
+                nht = rcu_dereference_protected(tbl->nht,
+                                                lockdep_is_held(&tbl->lock));
        }
        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7aef62e53113..5cf39cd7da85 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link {
 };
 static DEFINE_MUTEX(rtnl_mutex);
-static u16 min_ifinfo_dump_size;
 void rtnl_lock(void)
 {
@@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 }
 /* All VF info */
-static inline int rtnl_vfinfo_size(const struct net_device *dev)
+static inline int rtnl_vfinfo_size(const struct net_device *dev,
+                                   u32 ext_filter_mask)
 {
-        if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+        if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
+            (ext_filter_mask & RTEXT_FILTER_VF)) {
                int num_vfs = dev_num_vf(dev->dev.parent);
                size_t size = nla_total_size(sizeof(struct nlattr));
                size += nla_total_size(num_vfs * sizeof(struct nlattr));
@@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
                return port_self_size;
 }
-static noinline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev,
+                                     u32 ext_filter_mask)
 {
        return NLMSG_ALIGN(sizeof(struct ifinfomsg))
               + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
               + nla_total_size(4) /* IFLA_MASTER */
               + nla_total_size(1) /* IFLA_OPERSTATE */
               + nla_total_size(1) /* IFLA_LINKMODE */
-               + nla_total_size(4) /* IFLA_NUM_VF */
+               + nla_total_size(ext_filter_mask
-               + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+                                & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+               + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
               + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
               + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
               + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
@@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                            int type, u32 pid, u32 seq, u32 change,
-                            unsigned int flags)
+                            unsigned int flags, u32 ext_filter_mask)
 {
        struct ifinfomsg *ifm;
        struct nlmsghdr *nlh;
@@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                goto nla_put_failure;
        copy_rtnl_link_stats64(nla_data(attr), stats);
-        if (dev->dev.parent)
+        if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
                NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
-        if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
+        if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
+            && (ext_filter_mask & RTEXT_FILTER_VF)) {
                int i;
                struct nlattr *vfinfo, *vf;
@@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        struct net_device *dev;
        struct hlist_head *head;
        struct hlist_node *node;
+        struct nlattr *tb[IFLA_MAX+1];
+        u32 ext_filter_mask = 0;
        s_h = cb->args[0];
        s_idx = cb->args[1];
@@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        rcu_read_lock();
        cb->seq = net->dev_base_seq;
+        nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+                    ifla_policy);
+        if (tb[IFLA_EXT_MASK])
+                ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                idx = 0;
                head = &net->dev_index_head[h];
@@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
                        if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
                                             NETLINK_CB(cb->skb).pid,
                                             cb->nlh->nlmsg_seq, 0,
-                                             NLM_F_MULTI) <= 0)
+                                             NLM_F_MULTI,
+                                             ext_filter_mask) <= 0)
                                goto out;
                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_VF_PORTS]         = { .type = NLA_NESTED },
        [IFLA_PORT_SELF]        = { .type = NLA_NESTED },
        [IFLA_AF_SPEC]          = { .type = NLA_NESTED },
+        [IFLA_EXT_MASK]         = { .type = NLA_U32 },
 };
 EXPORT_SYMBOL(ifla_policy);
@@ -1509,8 +1522,6 @@ errout:
        if (send_addr_notify)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-        min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
-                                     min_ifinfo_dump_size);
        return err;
 }
@@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
        struct net_device *dev = NULL;
        struct sk_buff *nskb;
        int err;
+        u32 ext_filter_mask = 0;
        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
        if (err < 0)
@@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
        if (tb[IFLA_IFNAME])
                nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+        if (tb[IFLA_EXT_MASK])
+                ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
@@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
        if (dev == NULL)
                return -ENODEV;
-        nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+        nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
        if (nskb == NULL)
                return -ENOBUFS;
        err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
-                               nlh->nlmsg_seq, 0, 0);
+                               nlh->nlmsg_seq, 0, 0, ext_filter_mask);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size */
                WARN_ON(err == -EMSGSIZE);
@@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
        return err;
 }
-static u16 rtnl_calcit(struct sk_buff *skb)
+static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+        struct net *net = sock_net(skb->sk);
+        struct net_device *dev;
+        struct nlattr *tb[IFLA_MAX+1];
+        u32 ext_filter_mask = 0;
+        u16 min_ifinfo_dump_size = 0;
+        nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
+        if (tb[IFLA_EXT_MASK])
+                ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+        if (!ext_filter_mask)
+                return NLMSG_GOODSIZE;
+        /*
+         * traverse the list of net devices and compute the minimum
+         * buffer size based upon the filter mask.
+         */
+        list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+                min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+                                             if_nlmsg_size(dev,
+                                                           ext_filter_mask));
+        }
        return min_ifinfo_dump_size;
 }
@@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
        int err = -ENOBUFS;
        size_t if_info_size;
-        skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
+        skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
        if (skb == NULL)
                goto errout;
-        min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
+        err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
-        err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
@@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                        return -EOPNOTSUPP;
                calcit = rtnl_get_calcit(family, type);
                if (calcit)
-                        min_dump_alloc = calcit(skb);
+                        min_dump_alloc = calcit(skb, nlh);
                __rtnl_unlock();
                rtnl = net->rtnl;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b59414a0c1ee..6ef66af12291 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -65,7 +65,7 @@
   it is infeasible task. The most general solutions would be
   to keep skb->encapsulation counter (sort of local ttl),
   and silently drop packet when it expires. It is a good
-   solution, but it supposes maintaing new variable in ALL
+   solution, but it supposes maintaining new variable in ALL
   skb, even if no tunneling is used.
   Current solution: xmit_recursion breaks dead loops. This is a percpu
@@ -91,14 +91,14 @@
   One of them is to parse packet trying to detect inner encapsulation
   made by our node. It is difficult or even impossible, especially,
-   taking into account fragmentation. TO be short, tt is not solution at all.
+   taking into account fragmentation. TO be short, ttl is not solution at all.
   Current solution: The solution was UNEXPECTEDLY SIMPLE.
   We force DF flag on tunnels with preconfigured hop limit,
   that is ALL. :-) Well, it does not remove the problem completely,
   but exponential growth of network traffic is changed to linear
   (branches, that exceed pmtu are pruned) and tunnel mtu
-   fastly degrades to value <68, where looping stops.
+   rapidly degrades to value <68, where looping stops.
   Yes, it is not good if there exists a router in the loop,
   which does not force DF, even when encapsulating packets have DF set.
   But it is not our problem! Nobody could accuse us, we made
@@ -457,8 +457,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
   GRE tunnels with enabled checksum. Tell them "thank you".
   Well, I wonder, rfc1812 was written by Cisco employee,
-   what the hell these idiots break standrads established
+   what the hell these idiots break standards established
-   by themself???
+   by themselves???
 */
        const struct iphdr *iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index cfc82cf339f6..4398a45a9600 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -631,6 +631,7 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
+        err = -EOPNOTSUPP;
        if (flags & MSG_OOB)
                goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 37755ccc0e96..22ef5f9fd2ff 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3240,7 +3240,8 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-        int i, max_share, cnt;
+        int max_share, cnt;
+        unsigned int i;
        unsigned long jiffy = jiffies;
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3283,7 +3284,7 @@ void __init tcp_init(void)
                                        &tcp_hashinfo.bhash_size,
                                        NULL,
                                        64 * 1024);
-        tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
+        tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
        for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
                spin_lock_init(&tcp_hashinfo.bhash[i].lock);
                INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 63418185f524..e3db3f915114 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
        skb_push(skb, sizeof(*iph));
        skb_reset_network_header(skb);
+        skb_mac_header_rebuild(skb);
-        memmove(skb->data - skb->mac_len, skb_mac_header(skb),
-                skb->mac_len);
-        skb_set_mac_header(skb, -skb->mac_len);
        xfrm4_beet_make_header(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 534972e114ac..ed4bf11ef9f4 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-        const unsigned char *old_mac;
        int err = -EINVAL;
        if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
        if (!(x->props.flags & XFRM_STATE_NOECN))
                ipip_ecn_decapsulate(skb);
-        old_mac = skb_mac_header(skb);
-        skb_set_mac_header(skb, -skb->mac_len);
-        memmove(skb_mac_header(skb), old_mac, skb->mac_len);
        skb_reset_network_header(skb);
+        skb_mac_header_rebuild(skb);
        err = 0;
 out:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index c7e95c8c579f..5aa3981a3922 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1926,8 +1926,10 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
        };
        dst = ip6_route_output(net, NULL, &fl6);
-        if (!dst)
+        if (dst->error) {
+                dst_release(dst);
                goto out_free;
+        }
        skb_dst_drop(skb);
        skb_dst_set(skb, dst);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8d817018c188..3dcdb81ec3e8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1550,9 +1550,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
                         &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
        dst = ip6_route_output(net, NULL, &fl6);
-        if (dst == NULL)
+        if (dst->error) {
+                dst_release(dst);
                return;
+        }
        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
        if (IS_ERR(dst))
                return;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index a81ce9450750..9949a356d62c 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
        struct ipv6hdr *ip6h;
-        const unsigned char *old_mac;
        int size = sizeof(struct ipv6hdr);
        int err;
@@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
        __skb_push(skb, size);
        skb_reset_network_header(skb);
+        skb_mac_header_rebuild(skb);
-        old_mac = skb_mac_header(skb);
-        skb_set_mac_header(skb, -skb->mac_len);
-        memmove(skb_mac_header(skb), old_mac, skb->mac_len);
        xfrm6_beet_make_header(skb);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 261e6e6f487e..9f2095b19ad0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err = -EINVAL;
-        const unsigned char *old_mac;
        if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
                goto out;
@@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
        if (!(x->props.flags & XFRM_STATE_NOECN))
                ipip6_ecn_decapsulate(skb);
-        old_mac = skb_mac_header(skb);
-        skb_set_mac_header(skb, -skb->mac_len);
-        memmove(skb_mac_header(skb), old_mac, skb->mac_len);
        skb_reset_network_header(skb);
+        skb_mac_header_rebuild(skb);
        err = 0;
 out:
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 611c3359b94d..2555816e7788 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -232,6 +232,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        __be16 dport = 0;               /* destination port to forward */
        unsigned int flags;
        struct ip_vs_conn_param param;
+        const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
        union nf_inet_addr snet;        /* source network of the client,
                                           after masking */
@@ -267,7 +268,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        {
                int protocol = iph.protocol;
                const union nf_inet_addr *vaddr = &iph.daddr;
-                const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
                __be16 vport = 0;
                if (dst_port == svc->port) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 76613f5a55c0..ed86a3be678e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
                           &net->ct.hash[repl_hash]);
 }
-void nf_conntrack_hash_insert(struct nf_conn *ct)
+int
+nf_conntrack_hash_check_insert(struct nf_conn *ct)
 {
        struct net *net = nf_ct_net(ct);
        unsigned int hash, repl_hash;
+        struct nf_conntrack_tuple_hash *h;
+        struct hlist_nulls_node *n;
        u16 zone;
        zone = nf_ct_zone(ct);
-        hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+        hash = hash_conntrack(net, zone,
-        repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                              &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+        repl_hash = hash_conntrack(net, zone,
+                                   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+        spin_lock_bh(&nf_conntrack_lock);
+        /* See if there's one in the list already, including reverse */
+        hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
+                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                      &h->tuple) &&
+                    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                        goto out;
+        hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                      &h->tuple) &&
+                    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                        goto out;
+        add_timer(&ct->timeout);
+        nf_conntrack_get(&ct->ct_general);
        __nf_conntrack_hash_insert(ct, hash, repl_hash);
+        NF_CT_STAT_INC(net, insert);
+        spin_unlock_bh(&nf_conntrack_lock);
+        return 0;
+out:
+        NF_CT_STAT_INC(net, insert_failed);
+        spin_unlock_bh(&nf_conntrack_lock);
+        return -EEXIST;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 /* Confirm a connection given skb; places it in hash table */
 int
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 28d0312d890a..04fb409623d2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1404,15 +1404,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
                                                    nf_ct_protonum(ct));
                if (helper == NULL) {
                        rcu_read_unlock();
-                        spin_unlock_bh(&nf_conntrack_lock);
 #ifdef CONFIG_MODULES
                        if (request_module("nfct-helper-%s", helpname) < 0) {
-                                spin_lock_bh(&nf_conntrack_lock);
                                err = -EOPNOTSUPP;
                                goto err1;
                        }
-                        spin_lock_bh(&nf_conntrack_lock);
                        rcu_read_lock();
                        helper = __nf_conntrack_helper_find(helpname,
                                                            nf_ct_l3num(ct),
@@ -1505,8 +1502,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
        if (tstamp)
                tstamp->start = ktime_to_ns(ktime_get_real());
-        add_timer(&ct->timeout);
+        err = nf_conntrack_hash_check_insert(ct);
-        nf_conntrack_hash_insert(ct);
+        if (err < 0)
+                goto err2;
        rcu_read_unlock();
        return ct;
@@ -1527,6 +1526,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_tuple otuple, rtuple;
        struct nf_conntrack_tuple_hash *h = NULL;
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+        struct nf_conn *ct;
        u_int8_t u3 = nfmsg->nfgen_family;
        u16 zone;
        int err;
@@ -1547,27 +1547,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
                        return err;
        }
-        spin_lock_bh(&nf_conntrack_lock);
        if (cda[CTA_TUPLE_ORIG])
-                h = __nf_conntrack_find(net, zone, &otuple);
+                h = nf_conntrack_find_get(net, zone, &otuple);
        else if (cda[CTA_TUPLE_REPLY])
-                h = __nf_conntrack_find(net, zone, &rtuple);
+                h = nf_conntrack_find_get(net, zone, &rtuple);
        if (h == NULL) {
                err = -ENOENT;
                if (nlh->nlmsg_flags & NLM_F_CREATE) {
-                        struct nf_conn *ct;
                        enum ip_conntrack_events events;
                        ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
                                                        &rtuple, u3);
-                        if (IS_ERR(ct)) {
+                        if (IS_ERR(ct))
-                                err = PTR_ERR(ct);
+                                return PTR_ERR(ct);
-                                goto out_unlock;
-                        }
                        err = 0;
-                        nf_conntrack_get(&ct->ct_general);
-                        spin_unlock_bh(&nf_conntrack_lock);
                        if (test_bit(IPS_EXPECTED_BIT, &ct->status))
                                events = IPCT_RELATED;
                        else
@@ -1582,23 +1577,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
                                                      ct, NETLINK_CB(skb).pid,
                                                      nlmsg_report(nlh));
                        nf_ct_put(ct);
-                } else
+                }
-                        spin_unlock_bh(&nf_conntrack_lock);
                return err;
        }
        /* implicit 'else' */
-        /* We manipulate the conntrack inside the global conntrack table lock,
-         * so there's no need to increase the refcount */
        err = -EEXIST;
+        ct = nf_ct_tuplehash_to_ctrack(h);
        if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
-                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+                spin_lock_bh(&nf_conntrack_lock);
                err = ctnetlink_change_conntrack(ct, cda);
+                spin_unlock_bh(&nf_conntrack_lock);
                if (err == 0) {
-                        nf_conntrack_get(&ct->ct_general);
-                        spin_unlock_bh(&nf_conntrack_lock);
                        nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
                                                      (1 << IPCT_ASSURED) |
                                                      (1 << IPCT_HELPER) |
@@ -1607,15 +1598,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
                                                      (1 << IPCT_MARK),
                                                      ct, NETLINK_CB(skb).pid,
                                                      nlmsg_report(nlh));
-                        nf_ct_put(ct);
+                }
-                } else
-                        spin_unlock_bh(&nf_conntrack_lock);
-                return err;
        }
-out_unlock:
+        nf_ct_put(ct);
-        spin_unlock_bh(&nf_conntrack_lock);
        return err;
 }
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b3a7db678b8d..ce60cf0f6c11 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -203,6 +203,27 @@ err:
        return status;
 }
+#ifdef CONFIG_BRIDGE_NETFILTER
+/* When called from bridge netfilter, skb->data must point to MAC header
+ * before calling skb_gso_segment(). Else, original MAC header is lost
+ * and segmented skbs will be sent to wrong destination.
+ */
+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
+{
+        if (skb->nf_bridge)
+                __skb_push(skb, skb->network_header - skb->mac_header);
+}
+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
+{
+        if (skb->nf_bridge)
+                __skb_pull(skb, skb->network_header - skb->mac_header);
+}
+#else
+#define nf_bridge_adjust_skb_data(s) do {} while (0)
+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
+#endif
 int nf_queue(struct sk_buff *skb,
             struct list_head *elem,
             u_int8_t pf, unsigned int hook,
@@ -212,7 +233,7 @@ int nf_queue(struct sk_buff *skb,
             unsigned int queuenum)
 {
        struct sk_buff *segs;
-        int err;
+        int err = -EINVAL;
        unsigned int queued;
        if (!skb_is_gso(skb))
@@ -228,23 +249,25 @@ int nf_queue(struct sk_buff *skb,
                break;
        }
+        nf_bridge_adjust_skb_data(skb);
        segs = skb_gso_segment(skb, 0);
        /* Does not use PTR_ERR to limit the number of error codes that can be
         * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
         * 'ignore this hook'.
         */
        if (IS_ERR(segs))
-                return -EINVAL;
+                goto out_err;
        queued = 0;
        err = 0;
        do {
                struct sk_buff *nskb = segs->next;
                segs->next = NULL;
-                if (err == 0)
+                if (err == 0) {
+                        nf_bridge_adjust_segmented_data(segs);
                        err = __nf_queue(segs, elem, pf, hook, indev,
                                           outdev, okfn, queuenum);
+                }
                if (err == 0)
                        queued++;
                else
@@ -252,11 +275,12 @@ int nf_queue(struct sk_buff *skb,
                segs = nskb;
        } while (segs);
-        /* also free orig skb if only some segments were queued */
+        if (queued) {
-        if (unlikely(err && queued))
-                err = 0;
-        if (err == 0)
                kfree_skb(skb);
+                return 0;
+        }
+  out_err:
+        nf_bridge_adjust_segmented_data(skb);
        return err;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3aae66facf9f..4d5057902839 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -152,9 +152,10 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
        fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
                           (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
        dst = ip6_route_output(net, NULL, &fl6);
-        if (dst == NULL)
+        if (dst->error) {
+                dst_release(dst);
                return false;
+        }
        skb_dst_drop(skb);
        skb_dst_set(skb, dst);
        skb->dev      = dst->dev;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e83d61ca78ca..5da548fa7ae9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -501,9 +501,8 @@ tfifo_dequeue:
                /* if more time remaining? */
                if (cb->time_to_send <= psched_get_time()) {
-                        skb = qdisc_dequeue_tail(sch);
+                        __skb_unlink(skb, &sch->q);
-                        if (unlikely(!skb))
+                        sch->qstats.backlog -= qdisc_pkt_len(skb);
-                                goto qdisc_dequeue;
 #ifdef CONFIG_NET_CLS_ACT
                        /*
@@ -539,7 +538,6 @@ deliver:
                qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
        }
-qdisc_dequeue:
        if (q->qdisc) {
                skb = q->qdisc->ops->dequeue(q->qdisc);
                if (skb)
author	David S. Miller <davem@davemloft.net>	2012-02-26 21:55:51 -0500
committer	David S. Miller <davem@davemloft.net>	2012-02-26 21:55:51 -0500
commit	ff4783ce78c08d2990126ce1874250ae8e72bbd2 (patch)
tree	5c95885a4ab768101dd72942b57c238d452a7565 /net
parent	622121719934f60378279eb440d3cec2fc3176d2 (diff)
parent	203738e548cefc3fc3c2f73a9063176c9f3583d5 (diff)