aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/skbuff.c32
-rw-r--r--net/core/sock.c17
5 files changed, 52 insertions, 39 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 1796cef55ab5..aa82f9ab6a36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
1718 1718
1719int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1719int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1720{ 1720{
1721 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1721 if (skb_orphan_frags(skb, GFP_ATOMIC) ||
1722 if (skb_copy_ubufs(skb, GFP_ATOMIC)) { 1722 unlikely(!is_skb_forwardable(dev, skb))) {
1723 atomic_long_inc(&dev->rx_dropped);
1724 kfree_skb(skb);
1725 return NET_RX_DROP;
1726 }
1727 }
1728
1729 if (unlikely(!is_skb_forwardable(dev, skb))) {
1730 atomic_long_inc(&dev->rx_dropped); 1723 atomic_long_inc(&dev->rx_dropped);
1731 kfree_skb(skb); 1724 kfree_skb(skb);
1732 return NET_RX_DROP; 1725 return NET_RX_DROP;
@@ -3079,7 +3072,7 @@ static struct rps_dev_flow *
3079set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 3072set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3080 struct rps_dev_flow *rflow, u16 next_cpu) 3073 struct rps_dev_flow *rflow, u16 next_cpu)
3081{ 3074{
3082 if (next_cpu != RPS_NO_CPU) { 3075 if (next_cpu < nr_cpu_ids) {
3083#ifdef CONFIG_RFS_ACCEL 3076#ifdef CONFIG_RFS_ACCEL
3084 struct netdev_rx_queue *rxqueue; 3077 struct netdev_rx_queue *rxqueue;
3085 struct rps_dev_flow_table *flow_table; 3078 struct rps_dev_flow_table *flow_table;
@@ -3184,7 +3177,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3184 * If the desired CPU (where last recvmsg was done) is 3177 * If the desired CPU (where last recvmsg was done) is
3185 * different from current CPU (one in the rx-queue flow 3178 * different from current CPU (one in the rx-queue flow
3186 * table entry), switch if one of the following holds: 3179 * table entry), switch if one of the following holds:
3187 * - Current CPU is unset (equal to RPS_NO_CPU). 3180 * - Current CPU is unset (>= nr_cpu_ids).
3188 * - Current CPU is offline. 3181 * - Current CPU is offline.
3189 * - The current CPU's queue tail has advanced beyond the 3182 * - The current CPU's queue tail has advanced beyond the
3190 * last packet that was enqueued using this table entry. 3183 * last packet that was enqueued using this table entry.
@@ -3192,14 +3185,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3192 * have been dequeued, thus preserving in order delivery. 3185 * have been dequeued, thus preserving in order delivery.
3193 */ 3186 */
3194 if (unlikely(tcpu != next_cpu) && 3187 if (unlikely(tcpu != next_cpu) &&
3195 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 3188 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
3196 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 3189 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3197 rflow->last_qtail)) >= 0)) { 3190 rflow->last_qtail)) >= 0)) {
3198 tcpu = next_cpu; 3191 tcpu = next_cpu;
3199 rflow = set_rps_cpu(dev, skb, rflow, next_cpu); 3192 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3200 } 3193 }
3201 3194
3202 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 3195 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
3203 *rflowp = rflow; 3196 *rflowp = rflow;
3204 cpu = tcpu; 3197 cpu = tcpu;
3205 goto done; 3198 goto done;
@@ -3240,14 +3233,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3240 struct rps_dev_flow_table *flow_table; 3233 struct rps_dev_flow_table *flow_table;
3241 struct rps_dev_flow *rflow; 3234 struct rps_dev_flow *rflow;
3242 bool expire = true; 3235 bool expire = true;
3243 int cpu; 3236 unsigned int cpu;
3244 3237
3245 rcu_read_lock(); 3238 rcu_read_lock();
3246 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3239 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3247 if (flow_table && flow_id <= flow_table->mask) { 3240 if (flow_table && flow_id <= flow_table->mask) {
3248 rflow = &flow_table->flows[flow_id]; 3241 rflow = &flow_table->flows[flow_id];
3249 cpu = ACCESS_ONCE(rflow->cpu); 3242 cpu = ACCESS_ONCE(rflow->cpu);
3250 if (rflow->filter == filter_id && cpu != RPS_NO_CPU && 3243 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3251 ((int)(per_cpu(softnet_data, cpu).input_queue_head - 3244 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3252 rflow->last_qtail) < 3245 rflow->last_qtail) <
3253 (int)(10 * flow_table->mask))) 3246 (int)(10 * flow_table->mask)))
@@ -5209,7 +5202,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5209 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) 5202 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
5210 return -EBUSY; 5203 return -EBUSY;
5211 5204
5212 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) 5205 if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
5213 return -EEXIST; 5206 return -EEXIST;
5214 5207
5215 if (master && netdev_master_upper_dev_get(dev)) 5208 if (master && netdev_master_upper_dev_get(dev))
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 78fc04ad36fc..572af0011997 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -601,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
601 } 601 }
602 602
603 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 603 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
604 RTM_GETNSID, net, peer, -1); 604 RTM_NEWNSID, net, peer, -1);
605 if (err < 0) 605 if (err < 0)
606 goto err_out; 606 goto err_out;
607 607
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 358d52a38533..8de36824018d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2416,6 +2416,9 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
2416{ 2416{
2417 struct sk_buff *skb; 2417 struct sk_buff *skb;
2418 2418
2419 if (dev->reg_state != NETREG_REGISTERED)
2420 return;
2421
2419 skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); 2422 skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
2420 if (skb) 2423 if (skb)
2421 rtmsg_ifinfo_send(skb, dev, flags); 2424 rtmsg_ifinfo_send(skb, dev, flags);
@@ -2854,7 +2857,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
2854 2857
2855int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 2858int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2856 struct net_device *dev, u16 mode, 2859 struct net_device *dev, u16 mode,
2857 u32 flags, u32 mask) 2860 u32 flags, u32 mask, int nlflags)
2858{ 2861{
2859 struct nlmsghdr *nlh; 2862 struct nlmsghdr *nlh;
2860 struct ifinfomsg *ifm; 2863 struct ifinfomsg *ifm;
@@ -2863,7 +2866,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2863 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 2866 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2864 struct net_device *br_dev = netdev_master_upper_dev_get(dev); 2867 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2865 2868
2866 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); 2869 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
2867 if (nlh == NULL) 2870 if (nlh == NULL)
2868 return -EMSGSIZE; 2871 return -EMSGSIZE;
2869 2872
@@ -2969,7 +2972,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2969 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { 2972 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
2970 if (idx >= cb->args[0] && 2973 if (idx >= cb->args[0] &&
2971 br_dev->netdev_ops->ndo_bridge_getlink( 2974 br_dev->netdev_ops->ndo_bridge_getlink(
2972 skb, portid, seq, dev, filter_mask) < 0) 2975 skb, portid, seq, dev, filter_mask,
2976 NLM_F_MULTI) < 0)
2973 break; 2977 break;
2974 idx++; 2978 idx++;
2975 } 2979 }
@@ -2977,7 +2981,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2977 if (ops->ndo_bridge_getlink) { 2981 if (ops->ndo_bridge_getlink) {
2978 if (idx >= cb->args[0] && 2982 if (idx >= cb->args[0] &&
2979 ops->ndo_bridge_getlink(skb, portid, seq, dev, 2983 ops->ndo_bridge_getlink(skb, portid, seq, dev,
2980 filter_mask) < 0) 2984 filter_mask,
2985 NLM_F_MULTI) < 0)
2981 break; 2986 break;
2982 idx++; 2987 idx++;
2983 } 2988 }
@@ -3018,7 +3023,7 @@ static int rtnl_bridge_notify(struct net_device *dev)
3018 goto errout; 3023 goto errout;
3019 } 3024 }
3020 3025
3021 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); 3026 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0);
3022 if (err < 0) 3027 if (err < 0)
3023 goto errout; 3028 goto errout;
3024 3029
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d1967dab9cc6..41ec02242ea7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -280,13 +280,14 @@ nodata:
280EXPORT_SYMBOL(__alloc_skb); 280EXPORT_SYMBOL(__alloc_skb);
281 281
282/** 282/**
283 * build_skb - build a network buffer 283 * __build_skb - build a network buffer
284 * @data: data buffer provided by caller 284 * @data: data buffer provided by caller
285 * @frag_size: size of fragment, or 0 if head was kmalloced 285 * @frag_size: size of data, or 0 if head was kmalloced
286 * 286 *
287 * Allocate a new &sk_buff. Caller provides space holding head and 287 * Allocate a new &sk_buff. Caller provides space holding head and
288 * skb_shared_info. @data must have been allocated by kmalloc() only if 288 * skb_shared_info. @data must have been allocated by kmalloc() only if
289 * @frag_size is 0, otherwise data should come from the page allocator. 289 * @frag_size is 0, otherwise data should come from the page allocator
290 * or vmalloc()
290 * The return is the new skb buffer. 291 * The return is the new skb buffer.
291 * On a failure the return is %NULL, and @data is not freed. 292 * On a failure the return is %NULL, and @data is not freed.
292 * Notes : 293 * Notes :
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb);
297 * before giving packet to stack. 298 * before giving packet to stack.
298 * RX rings only contains data buffers, not full skbs. 299 * RX rings only contains data buffers, not full skbs.
299 */ 300 */
300struct sk_buff *build_skb(void *data, unsigned int frag_size) 301struct sk_buff *__build_skb(void *data, unsigned int frag_size)
301{ 302{
302 struct skb_shared_info *shinfo; 303 struct skb_shared_info *shinfo;
303 struct sk_buff *skb; 304 struct sk_buff *skb;
@@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
311 312
312 memset(skb, 0, offsetof(struct sk_buff, tail)); 313 memset(skb, 0, offsetof(struct sk_buff, tail));
313 skb->truesize = SKB_TRUESIZE(size); 314 skb->truesize = SKB_TRUESIZE(size);
314 skb->head_frag = frag_size != 0;
315 atomic_set(&skb->users, 1); 315 atomic_set(&skb->users, 1);
316 skb->head = data; 316 skb->head = data;
317 skb->data = data; 317 skb->data = data;
@@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
328 328
329 return skb; 329 return skb;
330} 330}
331
332/* build_skb() is wrapper over __build_skb(), that specifically
333 * takes care of skb->head and skb->pfmemalloc
334 * This means that if @frag_size is not zero, then @data must be backed
335 * by a page fragment, not kmalloc() or vmalloc()
336 */
337struct sk_buff *build_skb(void *data, unsigned int frag_size)
338{
339 struct sk_buff *skb = __build_skb(data, frag_size);
340
341 if (skb && frag_size) {
342 skb->head_frag = 1;
343 if (virt_to_head_page(data)->pfmemalloc)
344 skb->pfmemalloc = 1;
345 }
346 return skb;
347}
331EXPORT_SYMBOL(build_skb); 348EXPORT_SYMBOL(build_skb);
332 349
333struct netdev_alloc_cache { 350struct netdev_alloc_cache {
@@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
348 gfp_t gfp = gfp_mask; 365 gfp_t gfp = gfp_mask;
349 366
350 if (order) { 367 if (order) {
351 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; 368 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
369 __GFP_NOMEMALLOC;
352 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); 370 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
353 nc->frag.size = PAGE_SIZE << (page ? order : 0); 371 nc->frag.size = PAGE_SIZE << (page ? order : 0);
354 } 372 }
@@ -4380,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4380 4398
4381 while (order) { 4399 while (order) {
4382 if (npages >= 1 << order) { 4400 if (npages >= 1 << order) {
4383 page = alloc_pages(gfp_mask | 4401 page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
4384 __GFP_COMP | 4402 __GFP_COMP |
4385 __GFP_NOWARN | 4403 __GFP_NOWARN |
4386 __GFP_NORETRY, 4404 __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index e891bcf325ca..dc30dc5bb1b8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk)
354 354
355 /* 355 /*
356 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward 356 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
357 * progress of swapping. However, if SOCK_MEMALLOC is cleared while 357 * progress of swapping. SOCK_MEMALLOC may be cleared while
358 * it has rmem allocations there is a risk that the user of the 358 * it has rmem allocations due to the last swapfile being deactivated
359 * socket cannot make forward progress due to exceeding the rmem 359 * but there is a risk that the socket is unusable due to exceeding
360 * limits. By rights, sk_clear_memalloc() should only be called 360 * the rmem limits. Reclaim the reserves and obey rmem limits again.
361 * on sockets being torn down but warn and reset the accounting if
362 * that assumption breaks.
363 */ 361 */
364 if (WARN_ON(sk->sk_forward_alloc)) 362 sk_mem_reclaim(sk);
365 sk_mem_reclaim(sk);
366} 363}
367EXPORT_SYMBOL_GPL(sk_clear_memalloc); 364EXPORT_SYMBOL_GPL(sk_clear_memalloc);
368 365
@@ -1474,8 +1471,8 @@ void sk_release_kernel(struct sock *sk)
1474 return; 1471 return;
1475 1472
1476 sock_hold(sk); 1473 sock_hold(sk);
1477 sock_net_set(sk, get_net(&init_net));
1478 sock_release(sk->sk_socket); 1474 sock_release(sk->sk_socket);
1475 sock_net_set(sk, get_net(&init_net));
1479 sock_put(sk); 1476 sock_put(sk);
1480} 1477}
1481EXPORT_SYMBOL(sk_release_kernel); 1478EXPORT_SYMBOL(sk_release_kernel);
@@ -1883,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1883 1880
1884 pfrag->offset = 0; 1881 pfrag->offset = 0;
1885 if (SKB_FRAG_PAGE_ORDER) { 1882 if (SKB_FRAG_PAGE_ORDER) {
1886 pfrag->page = alloc_pages(gfp | __GFP_COMP | 1883 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1887 __GFP_NOWARN | __GFP_NORETRY, 1884 __GFP_NOWARN | __GFP_NORETRY,
1888 SKB_FRAG_PAGE_ORDER); 1885 SKB_FRAG_PAGE_ORDER);
1889 if (likely(pfrag->page)) { 1886 if (likely(pfrag->page)) {