Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller: "This set fixes a bunch of fallout from the changes that went in during this merge window, particularly: - Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build failures. - Several networking drivers do atomic_set() on page counts where that's not exactly legal. From Eric Dumazet. - Make __skb_flow_get_ports() work cleanly with unaligned data, from Alexander Duyck. - Fix some kernel-doc buglets in rfkill and netlabel, from Fabian Frederick. - Unbalanced enable_irq_wake usage in bcmgenet and systemport drivers, from Florian Fainelli. - pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven. - Multi-dequeue in the qdisc layer severely bypasses the fairness limits the previous code used to enforce, reintroduce in a way that at the same time doesn't compromise bulk dequeue opportunities. From Jesper Dangaard Brouer. - macvlan receive path unnecessarily hops through a softirq by using netif_rx() instead of netif_receive_skb(). From Jason Baron" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits) net: systemport: avoid unbalanced enable_irq_wake calls net: bcmgenet: avoid unbalanced enable_irq_wake calls net: bcmgenet: fix off-by-one in incrementing read pointer net: fix races in page->_count manipulation mlx4: fix race accessing page->_count ixgbe: fix race accessing page->_count igb: fix race accessing page->_count fm10k: fix race accessing page->_count net/phy: micrel: Add clock support for KSZ8021/KSZ8031 flow-dissector: Fix alignment issue in __skb_flow_get_ports net: filter: fix the comments Documentation: replace __sk_run_filter with __bpf_prog_run macvlan: optimize the receive path macvlan: pass 'bool' type to macvlan_count_rx() drivers: net: xgene: Add 10GbE ethtool support drivers: net: xgene: Add 10GbE support drivers: net: xgene: Preparing for adding 10GbE support dtb: Add 10GbE node to APM X-Gene SoC device tree Documentation: dts: Update section header for APM X-Gene MAINTAINERS: Update APM X-Gene section ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-11 21:19:00 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-11 21:19:00 -0400
commit: ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree: 0042e8674aff7ae5785db467836d8d4101906f70 /net
parent: 052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent: 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)
8 files changed, 69 insertions, 47 deletions
diff --git a/net/Kconfig b/net/Kconfig
index d6b138e2c263..6272420a721b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,6 +6,7 @@ menuconfig NET
        bool "Networking support"
        select NLATTR
        select GENERIC_NET_UTILS
+        select ANON_INODES
        ---help---
          Unless you really know what you are doing, you should say Y here.
          The reason is that some programs need kernel networking support even
diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f6742a6a..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
 *      @skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
@@ -566,11 +566,8 @@ err:
 /* Security:
 *
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
- *
 * As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
 * a malicious user doesn't try to abuse us.
 */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8560dea58803..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -100,6 +100,13 @@ ip:
                if (ip_is_fragment(iph))
                        ip_proto = 0;
+                /* skip the address processing if skb is NULL.  The assumption
+                 * here is that if there is no skb we are not looking for flow
+                 * info but lengths and protocols.
+                 */
+                if (!skb)
+                        break;
                iph_to_flow_copy_addrs(flow, iph);
                break;
        }
@@ -114,17 +121,15 @@ ipv6:
                        return false;
                ip_proto = iph->nexthdr;
-                flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
-                flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
                nhoff += sizeof(struct ipv6hdr);
-                /* skip the flow label processing if skb is NULL.  The
+                /* see comment above in IPv4 section */
-                 * assumption here is that if there is no skb we are not
-                 * looking for flow info as much as we are length.
-                 */
                if (!skb)
                        break;
+                flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+                flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
                flow_label = ip6_flowlabel(iph);
                if (flow_label) {
                        /* Awesome, IPv6 packet has a flow label so we can
@@ -231,9 +236,13 @@ ipv6:
        flow->n_proto = proto;
        flow->ip_proto = ip_proto;
-        flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
        flow->thoff = (u16) nhoff;
+        /* unless skb is set we don't need to record port info */
+        if (skb)
+                flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+                                                   data, hlen);
        return true;
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
@@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
        switch (keys->ip_proto) {
        case IPPROTO_TCP: {
-                const struct tcphdr *tcph;
+                /* access doff as u8 to avoid unaligned access */
-                struct tcphdr _tcph;
+                const u8 *doff;
+                u8 _doff;
-                tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
+                doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
-                                            data, hlen, &_tcph);
+                                            data, hlen, &_doff);
-                if (!tcph)
+                if (!doff)
                        return poff;
-                poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+                poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
                break;
        }
        case IPPROTO_UDP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b3df0d518ab..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
                                goto end;
                }
                nc->frag.size = PAGE_SIZE << order;
-recycle:
+                /* Even if we own the page, we do not use atomic_set().
-                atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+                 * This would break get_page_unless_zero() users.
+                 */
+                atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+                           &nc->frag.page->_count);
                nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
                nc->frag.offset = 0;
        }
        if (nc->frag.offset + fragsz > nc->frag.size) {
-                /* avoid unnecessary locked operations if possible */
+                if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
-                if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
+                        if (!atomic_sub_and_test(nc->pagecnt_bias,
-                    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
+                                                 &nc->frag.page->_count))
-                        goto recycle;
+                                goto refill;
-                goto refill;
+                        /* OK, page count is 0, we can safely set it */
+                        atomic_set(&nc->frag.page->_count,
+                                   NETDEV_PAGECNT_MAX_BIAS);
+                } else {
+                        atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+                                   &nc->frag.page->_count);
+                }
+                nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+                nc->frag.offset = 0;
        }
        data = page_address(nc->frag.page) + nc->frag.offset;
@@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag);
 /**
 * alloc_skb_with_frags - allocate skb with page frags
 *
- * header_len: size of linear part
+ * @header_len: size of linear part
- * data_len: needed length in frags
+ * @data_len: needed length in frags
- * max_page_order: max page order desired.
+ * @max_page_order: max page order desired.
- * errcode: pointer to error code if any
+ * @errcode: pointer to error code if any
- * gfp_mask: allocation mask
+ * @gfp_mask: allocation mask
 *
 * This can be used to allocate a paged skb, given a maximal order for frags.
 */
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ec8a456092a7..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -72,7 +72,7 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_reject_dump);
-static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
        [NFT_REJECT_ICMPX_NO_ROUTE]             = ICMP_NET_UNREACH,
        [NFT_REJECT_ICMPX_PORT_UNREACH]         = ICMP_PORT_UNREACH,
        [NFT_REJECT_ICMPX_HOST_UNREACH]         = ICMP_HOST_UNREACH,
@@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
 int nft_reject_icmp_code(u8 code)
 {
-        if (code > NFT_REJECT_ICMPX_MAX)
+        BUG_ON(code > NFT_REJECT_ICMPX_MAX);
-                return -EINVAL;
        return icmp_code_v4[code];
 }
@@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code)
 EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
-static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
        [NFT_REJECT_ICMPX_NO_ROUTE]             = ICMPV6_NOROUTE,
        [NFT_REJECT_ICMPX_PORT_UNREACH]         = ICMPV6_PORT_UNREACH,
        [NFT_REJECT_ICMPX_HOST_UNREACH]         = ICMPV6_ADDR_UNREACH,
@@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
 int nft_reject_icmpv6_code(u8 code)
 {
-        if (code > NFT_REJECT_ICMPX_MAX)
+        BUG_ON(code > NFT_REJECT_ICMPX_MAX);
-                return -EINVAL;
        return icmp_code_v6[code];
 }
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0b4692dd1c5e..a845cd4cf21e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
 * @addr: IP address in network byte order (struct in[6]_addr)
 * @mask: address mask in network byte order (struct in[6]_addr)
 * @family: address family
- * @secid: LSM secid value for the entry
 * @audit_info: NetLabel audit information
 *
 * Description:
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b3b16c070a7f..fa7cd792791c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
 /**
 * __rfkill_switch_all - Toggle state of all switches of given type
 * @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
 *
 * This function sets the state of all switches of given type,
 * unless a specific switch is claimed by userspace (in which case,
@@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
 /**
 * rfkill_switch_all - Toggle state of all switches of given type
 * @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
 *
 * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
 * Please refer to __rfkill_switch_all() for details.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38d58e6cef07..6efca30894aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 static void try_bulk_dequeue_skb(struct Qdisc *q,
                                 struct sk_buff *skb,
-                                 const struct netdev_queue *txq)
+                                 const struct netdev_queue *txq,
+                                 int *packets)
 {
        int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
@@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
                bytelimit -= nskb->len; /* covers GSO len */
                skb->next = nskb;
                skb = nskb;
+                (*packets)++; /* GSO counts as one pkt */
        }
        skb->next = NULL;
 }
@@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
 * A requeued skb (via q->gso_skb) can also be a SKB list.
 */
-static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
+                                   int *packets)
 {
        struct sk_buff *skb = q->gso_skb;
        const struct netdev_queue *txq = q->dev_queue;
+        *packets = 1;
        *validate = true;
        if (unlikely(skb)) {
                /* check the reason of requeuing without tx lock first */
@@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
                    !netif_xmit_frozen_or_stopped(txq)) {
                        skb = q->dequeue(q);
                        if (skb && qdisc_may_bulk(q))
-                                try_bulk_dequeue_skb(q, skb, txq);
+                                try_bulk_dequeue_skb(q, skb, txq, packets);
                }
        }
        return skb;
@@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 *                              >0 - queue is not empty.
 *
 */
-static inline int qdisc_restart(struct Qdisc *q)
+static inline int qdisc_restart(struct Qdisc *q, int *packets)
 {
        struct netdev_queue *txq;
        struct net_device *dev;
@@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q)
        bool validate;
        /* Dequeue packet */
-        skb = dequeue_skb(q, &validate);
+        skb = dequeue_skb(q, &validate, packets);
        if (unlikely(!skb))
                return 0;
@@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q)
 void __qdisc_run(struct Qdisc *q)
 {
        int quota = weight_p;
+        int packets;
-        while (qdisc_restart(q)) {
+        while (qdisc_restart(q, &packets)) {
                /*
                 * Ordered by possible occurrence: Postpone processing if
                 * 1. we've exceeded packet quota
                 * 2. another process needs the CPU;
                 */
-                if (--quota <= 0 || need_resched()) {
+                quota -= packets;
+                if (quota <= 0 || need_resched()) {
                        __netif_schedule(q);
                        break;
                }
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-11 21:19:00 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-11 21:19:00 -0400
commit	ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree	0042e8674aff7ae5785db467836d8d4101906f70 /net
parent	052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent	01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)