aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 21:19:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 21:19:00 -0400
commitca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree0042e8674aff7ae5785db467836d8d4101906f70 /net
parent052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "This set fixes a bunch of fallout from the changes that went in during this merge window, particularly: - Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build failures. - Several networking drivers do atomic_set() on page counts where that's not exactly legal. From Eric Dumazet. - Make __skb_flow_get_ports() work cleanly with unaligned data, from Alexander Duyck. - Fix some kernel-doc buglets in rfkill and netlabel, from Fabian Frederick. - Unbalanced enable_irq_wake usage in bcmgenet and systemport drivers, from Florian Fainelli. - pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven. - Multi-dequeue in the qdisc layer severely bypasses the fairness limits the previous code used to enforce, reintroduce in a way that at the same time doesn't compromise bulk dequeue opportunities. From Jesper Dangaard Brouer. - macvlan receive path unnecessarily hops through a softirq by using netif_rx() instead of netif_receive_skb(). From Jason Baron" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits) net: systemport: avoid unbalanced enable_irq_wake calls net: bcmgenet: avoid unbalanced enable_irq_wake calls net: bcmgenet: fix off-by-one in incrementing read pointer net: fix races in page->_count manipulation mlx4: fix race accessing page->_count ixgbe: fix race accessing page->_count igb: fix race accessing page->_count fm10k: fix race accessing page->_count net/phy: micrel: Add clock support for KSZ8021/KSZ8031 flow-dissector: Fix alignment issue in __skb_flow_get_ports net: filter: fix the comments Documentation: replace __sk_run_filter with __bpf_prog_run macvlan: optimize the receive path macvlan: pass 'bool' type to macvlan_count_rx() drivers: net: xgene: Add 10GbE ethtool support drivers: net: xgene: Add 10GbE support drivers: net: xgene: Preparing for adding 10GbE support dtb: Add 10GbE node to APM X-Gene SoC device tree Documentation: dts: Update section header for APM X-Gene MAINTAINERS: Update APM X-Gene section ...
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/core/filter.c9
-rw-r--r--net/core/flow_dissector.c36
-rw-r--r--net/core/skbuff.c35
-rw-r--r--net/netfilter/nft_reject.c10
-rw-r--r--net/netlabel/netlabel_kapi.c1
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/sched/sch_generic.c20
8 files changed, 69 insertions, 47 deletions
diff --git a/net/Kconfig b/net/Kconfig
index d6b138e2c263..6272420a721b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,6 +6,7 @@ menuconfig NET
6 bool "Networking support" 6 bool "Networking support"
7 select NLATTR 7 select NLATTR
8 select GENERIC_NET_UTILS 8 select GENERIC_NET_UTILS
9 select ANON_INODES
9 ---help--- 10 ---help---
10 Unless you really know what you are doing, you should say Y here. 11 Unless you really know what you are doing, you should say Y here.
11 The reason is that some programs need kernel networking support even 12 The reason is that some programs need kernel networking support even
diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f6742a6a..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
51 * @skb: buffer to filter 51 * @skb: buffer to filter
52 * 52 *
53 * Run the filter code and then cut skb->data to correct size returned by 53 * Run the filter code and then cut skb->data to correct size returned by
54 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller 54 * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
55 * than pkt_len we keep whole skb->data. This is the socket level 55 * than pkt_len we keep whole skb->data. This is the socket level
56 * wrapper to sk_run_filter. It returns 0 if the packet should 56 * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
57 * be accepted or -EPERM if the packet should be tossed. 57 * be accepted or -EPERM if the packet should be tossed.
58 * 58 *
59 */ 59 */
@@ -566,11 +566,8 @@ err:
566 566
567/* Security: 567/* Security:
568 * 568 *
569 * A BPF program is able to use 16 cells of memory to store intermediate
570 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
571 *
572 * As we dont want to clear mem[] array for each packet going through 569 * As we dont want to clear mem[] array for each packet going through
573 * sk_run_filter(), we check that filter loaded by user never try to read 570 * __bpf_prog_run(), we check that filter loaded by user never try to read
574 * a cell if not previously written, and we check all branches to be sure 571 * a cell if not previously written, and we check all branches to be sure
575 * a malicious user doesn't try to abuse us. 572 * a malicious user doesn't try to abuse us.
576 */ 573 */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8560dea58803..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -100,6 +100,13 @@ ip:
100 if (ip_is_fragment(iph)) 100 if (ip_is_fragment(iph))
101 ip_proto = 0; 101 ip_proto = 0;
102 102
103 /* skip the address processing if skb is NULL. The assumption
104 * here is that if there is no skb we are not looking for flow
105 * info but lengths and protocols.
106 */
107 if (!skb)
108 break;
109
103 iph_to_flow_copy_addrs(flow, iph); 110 iph_to_flow_copy_addrs(flow, iph);
104 break; 111 break;
105 } 112 }
@@ -114,17 +121,15 @@ ipv6:
114 return false; 121 return false;
115 122
116 ip_proto = iph->nexthdr; 123 ip_proto = iph->nexthdr;
117 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
118 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
119 nhoff += sizeof(struct ipv6hdr); 124 nhoff += sizeof(struct ipv6hdr);
120 125
121 /* skip the flow label processing if skb is NULL. The 126 /* see comment above in IPv4 section */
122 * assumption here is that if there is no skb we are not
123 * looking for flow info as much as we are length.
124 */
125 if (!skb) 127 if (!skb)
126 break; 128 break;
127 129
130 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
131 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
132
128 flow_label = ip6_flowlabel(iph); 133 flow_label = ip6_flowlabel(iph);
129 if (flow_label) { 134 if (flow_label) {
130 /* Awesome, IPv6 packet has a flow label so we can 135 /* Awesome, IPv6 packet has a flow label so we can
@@ -231,9 +236,13 @@ ipv6:
231 236
232 flow->n_proto = proto; 237 flow->n_proto = proto;
233 flow->ip_proto = ip_proto; 238 flow->ip_proto = ip_proto;
234 flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
235 flow->thoff = (u16) nhoff; 239 flow->thoff = (u16) nhoff;
236 240
241 /* unless skb is set we don't need to record port info */
242 if (skb)
243 flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
244 data, hlen);
245
237 return true; 246 return true;
238} 247}
239EXPORT_SYMBOL(__skb_flow_dissect); 248EXPORT_SYMBOL(__skb_flow_dissect);
@@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
334 343
335 switch (keys->ip_proto) { 344 switch (keys->ip_proto) {
336 case IPPROTO_TCP: { 345 case IPPROTO_TCP: {
337 const struct tcphdr *tcph; 346 /* access doff as u8 to avoid unaligned access */
338 struct tcphdr _tcph; 347 const u8 *doff;
348 u8 _doff;
339 349
340 tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), 350 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
341 data, hlen, &_tcph); 351 data, hlen, &_doff);
342 if (!tcph) 352 if (!doff)
343 return poff; 353 return poff;
344 354
345 poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); 355 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
346 break; 356 break;
347 } 357 }
348 case IPPROTO_UDP: 358 case IPPROTO_UDP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b3df0d518ab..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
360 goto end; 360 goto end;
361 } 361 }
362 nc->frag.size = PAGE_SIZE << order; 362 nc->frag.size = PAGE_SIZE << order;
363recycle: 363 /* Even if we own the page, we do not use atomic_set().
364 atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); 364 * This would break get_page_unless_zero() users.
365 */
366 atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
367 &nc->frag.page->_count);
365 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 368 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
366 nc->frag.offset = 0; 369 nc->frag.offset = 0;
367 } 370 }
368 371
369 if (nc->frag.offset + fragsz > nc->frag.size) { 372 if (nc->frag.offset + fragsz > nc->frag.size) {
370 /* avoid unnecessary locked operations if possible */ 373 if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
371 if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || 374 if (!atomic_sub_and_test(nc->pagecnt_bias,
372 atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) 375 &nc->frag.page->_count))
373 goto recycle; 376 goto refill;
374 goto refill; 377 /* OK, page count is 0, we can safely set it */
378 atomic_set(&nc->frag.page->_count,
379 NETDEV_PAGECNT_MAX_BIAS);
380 } else {
381 atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
382 &nc->frag.page->_count);
383 }
384 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
385 nc->frag.offset = 0;
375 } 386 }
376 387
377 data = page_address(nc->frag.page) + nc->frag.offset; 388 data = page_address(nc->frag.page) + nc->frag.offset;
@@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag);
4126/** 4137/**
4127 * alloc_skb_with_frags - allocate skb with page frags 4138 * alloc_skb_with_frags - allocate skb with page frags
4128 * 4139 *
4129 * header_len: size of linear part 4140 * @header_len: size of linear part
4130 * data_len: needed length in frags 4141 * @data_len: needed length in frags
4131 * max_page_order: max page order desired. 4142 * @max_page_order: max page order desired.
4132 * errcode: pointer to error code if any 4143 * @errcode: pointer to error code if any
4133 * gfp_mask: allocation mask 4144 * @gfp_mask: allocation mask
4134 * 4145 *
4135 * This can be used to allocate a paged skb, given a maximal order for frags. 4146 * This can be used to allocate a paged skb, given a maximal order for frags.
4136 */ 4147 */
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ec8a456092a7..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -72,7 +72,7 @@ nla_put_failure:
72} 72}
73EXPORT_SYMBOL_GPL(nft_reject_dump); 73EXPORT_SYMBOL_GPL(nft_reject_dump);
74 74
75static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { 75static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
76 [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, 76 [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH,
77 [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, 77 [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH,
78 [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, 78 [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH,
@@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
81 81
82int nft_reject_icmp_code(u8 code) 82int nft_reject_icmp_code(u8 code)
83{ 83{
84 if (code > NFT_REJECT_ICMPX_MAX) 84 BUG_ON(code > NFT_REJECT_ICMPX_MAX);
85 return -EINVAL;
86 85
87 return icmp_code_v4[code]; 86 return icmp_code_v4[code];
88} 87}
@@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code)
90EXPORT_SYMBOL_GPL(nft_reject_icmp_code); 89EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
91 90
92 91
93static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { 92static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
94 [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, 93 [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE,
95 [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, 94 [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH,
96 [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, 95 [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH,
@@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
99 98
100int nft_reject_icmpv6_code(u8 code) 99int nft_reject_icmpv6_code(u8 code)
101{ 100{
102 if (code > NFT_REJECT_ICMPX_MAX) 101 BUG_ON(code > NFT_REJECT_ICMPX_MAX);
103 return -EINVAL;
104 102
105 return icmp_code_v6[code]; 103 return icmp_code_v6[code];
106} 104}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0b4692dd1c5e..a845cd4cf21e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
246 * @addr: IP address in network byte order (struct in[6]_addr) 246 * @addr: IP address in network byte order (struct in[6]_addr)
247 * @mask: address mask in network byte order (struct in[6]_addr) 247 * @mask: address mask in network byte order (struct in[6]_addr)
248 * @family: address family 248 * @family: address family
249 * @secid: LSM secid value for the entry
250 * @audit_info: NetLabel audit information 249 * @audit_info: NetLabel audit information
251 * 250 *
252 * Description: 251 * Description:
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b3b16c070a7f..fa7cd792791c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
329/** 329/**
330 * __rfkill_switch_all - Toggle state of all switches of given type 330 * __rfkill_switch_all - Toggle state of all switches of given type
331 * @type: type of interfaces to be affected 331 * @type: type of interfaces to be affected
332 * @state: the new state 332 * @blocked: the new state
333 * 333 *
334 * This function sets the state of all switches of given type, 334 * This function sets the state of all switches of given type,
335 * unless a specific switch is claimed by userspace (in which case, 335 * unless a specific switch is claimed by userspace (in which case,
@@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
353/** 353/**
354 * rfkill_switch_all - Toggle state of all switches of given type 354 * rfkill_switch_all - Toggle state of all switches of given type
355 * @type: type of interfaces to be affected 355 * @type: type of interfaces to be affected
356 * @state: the new state 356 * @blocked: the new state
357 * 357 *
358 * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). 358 * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
359 * Please refer to __rfkill_switch_all() for details. 359 * Please refer to __rfkill_switch_all() for details.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38d58e6cef07..6efca30894aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
57 57
58static void try_bulk_dequeue_skb(struct Qdisc *q, 58static void try_bulk_dequeue_skb(struct Qdisc *q,
59 struct sk_buff *skb, 59 struct sk_buff *skb,
60 const struct netdev_queue *txq) 60 const struct netdev_queue *txq,
61 int *packets)
61{ 62{
62 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; 63 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
63 64
@@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
70 bytelimit -= nskb->len; /* covers GSO len */ 71 bytelimit -= nskb->len; /* covers GSO len */
71 skb->next = nskb; 72 skb->next = nskb;
72 skb = nskb; 73 skb = nskb;
74 (*packets)++; /* GSO counts as one pkt */
73 } 75 }
74 skb->next = NULL; 76 skb->next = NULL;
75} 77}
@@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
77/* Note that dequeue_skb can possibly return a SKB list (via skb->next). 79/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
78 * A requeued skb (via q->gso_skb) can also be a SKB list. 80 * A requeued skb (via q->gso_skb) can also be a SKB list.
79 */ 81 */
80static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) 82static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
83 int *packets)
81{ 84{
82 struct sk_buff *skb = q->gso_skb; 85 struct sk_buff *skb = q->gso_skb;
83 const struct netdev_queue *txq = q->dev_queue; 86 const struct netdev_queue *txq = q->dev_queue;
84 87
88 *packets = 1;
85 *validate = true; 89 *validate = true;
86 if (unlikely(skb)) { 90 if (unlikely(skb)) {
87 /* check the reason of requeuing without tx lock first */ 91 /* check the reason of requeuing without tx lock first */
@@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
98 !netif_xmit_frozen_or_stopped(txq)) { 102 !netif_xmit_frozen_or_stopped(txq)) {
99 skb = q->dequeue(q); 103 skb = q->dequeue(q);
100 if (skb && qdisc_may_bulk(q)) 104 if (skb && qdisc_may_bulk(q))
101 try_bulk_dequeue_skb(q, skb, txq); 105 try_bulk_dequeue_skb(q, skb, txq, packets);
102 } 106 }
103 } 107 }
104 return skb; 108 return skb;
@@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
204 * >0 - queue is not empty. 208 * >0 - queue is not empty.
205 * 209 *
206 */ 210 */
207static inline int qdisc_restart(struct Qdisc *q) 211static inline int qdisc_restart(struct Qdisc *q, int *packets)
208{ 212{
209 struct netdev_queue *txq; 213 struct netdev_queue *txq;
210 struct net_device *dev; 214 struct net_device *dev;
@@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q)
213 bool validate; 217 bool validate;
214 218
215 /* Dequeue packet */ 219 /* Dequeue packet */
216 skb = dequeue_skb(q, &validate); 220 skb = dequeue_skb(q, &validate, packets);
217 if (unlikely(!skb)) 221 if (unlikely(!skb))
218 return 0; 222 return 0;
219 223
@@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q)
227void __qdisc_run(struct Qdisc *q) 231void __qdisc_run(struct Qdisc *q)
228{ 232{
229 int quota = weight_p; 233 int quota = weight_p;
234 int packets;
230 235
231 while (qdisc_restart(q)) { 236 while (qdisc_restart(q, &packets)) {
232 /* 237 /*
233 * Ordered by possible occurrence: Postpone processing if 238 * Ordered by possible occurrence: Postpone processing if
234 * 1. we've exceeded packet quota 239 * 1. we've exceeded packet quota
235 * 2. another process needs the CPU; 240 * 2. another process needs the CPU;
236 */ 241 */
237 if (--quota <= 0 || need_resched()) { 242 quota -= packets;
243 if (quota <= 0 || need_resched()) {
238 __netif_schedule(q); 244 __netif_schedule(q);
239 break; 245 break;
240 } 246 }