aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c236
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c55
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/netprio_cgroup.c262
-rw-r--r--net/core/pktgen.c47
-rw-r--r--net/core/rtnetlink.c230
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c34
-rw-r--r--net/core/sock.c84
-rw-r--r--net/core/sysctl_net_core.c5
15 files changed, 854 insertions, 293 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e5942bf45a6d..d0cbc93fcf32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177 177
178static DEFINE_SPINLOCK(ptype_lock); 178static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock);
179static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180static struct list_head ptype_all __read_mostly; /* Taps */ 181static struct list_head ptype_all __read_mostly; /* Taps */
182static struct list_head offload_base __read_mostly;
181 183
182/* 184/*
183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 185 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */
201DEFINE_RWLOCK(dev_base_lock); 203DEFINE_RWLOCK(dev_base_lock);
202EXPORT_SYMBOL(dev_base_lock); 204EXPORT_SYMBOL(dev_base_lock);
203 205
206DEFINE_SEQLOCK(devnet_rename_seq);
207
204static inline void dev_base_seq_inc(struct net *net) 208static inline void dev_base_seq_inc(struct net *net)
205{ 209{
206 while (++net->dev_base_seq == 0); 210 while (++net->dev_base_seq == 0);
@@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt)
470} 474}
471EXPORT_SYMBOL(dev_remove_pack); 475EXPORT_SYMBOL(dev_remove_pack);
472 476
477
478/**
479 * dev_add_offload - register offload handlers
480 * @po: protocol offload declaration
481 *
482 * Add protocol offload handlers to the networking stack. The passed
483 * &proto_offload is linked into kernel lists and may not be freed until
484 * it has been removed from the kernel lists.
485 *
486 * This call does not sleep therefore it can not
487 * guarantee all CPU's that are in middle of receiving packets
488 * will see the new offload handlers (until the next received packet).
489 */
490void dev_add_offload(struct packet_offload *po)
491{
492 struct list_head *head = &offload_base;
493
494 spin_lock(&offload_lock);
495 list_add_rcu(&po->list, head);
496 spin_unlock(&offload_lock);
497}
498EXPORT_SYMBOL(dev_add_offload);
499
500/**
501 * __dev_remove_offload - remove offload handler
502 * @po: packet offload declaration
503 *
504 * Remove a protocol offload handler that was previously added to the
505 * kernel offload handlers by dev_add_offload(). The passed &offload_type
506 * is removed from the kernel lists and can be freed or reused once this
507 * function returns.
508 *
509 * The packet type might still be in use by receivers
510 * and must not be freed until after all the CPU's have gone
511 * through a quiescent state.
512 */
513void __dev_remove_offload(struct packet_offload *po)
514{
515 struct list_head *head = &offload_base;
516 struct packet_offload *po1;
517
518 spin_lock(&offload_lock);
519
520 list_for_each_entry(po1, head, list) {
521 if (po == po1) {
522 list_del_rcu(&po->list);
523 goto out;
524 }
525 }
526
527 pr_warn("dev_remove_offload: %p not found\n", po);
528out:
529 spin_unlock(&offload_lock);
530}
531EXPORT_SYMBOL(__dev_remove_offload);
532
533/**
534 * dev_remove_offload - remove packet offload handler
535 * @po: packet offload declaration
536 *
537 * Remove a packet offload handler that was previously added to the kernel
538 * offload handlers by dev_add_offload(). The passed &offload_type is
539 * removed from the kernel lists and can be freed or reused once this
540 * function returns.
541 *
542 * This call sleeps to guarantee that no CPU is looking at the packet
543 * type after return.
544 */
545void dev_remove_offload(struct packet_offload *po)
546{
547 __dev_remove_offload(po);
548
549 synchronize_net();
550}
551EXPORT_SYMBOL(dev_remove_offload);
552
473/****************************************************************************** 553/******************************************************************************
474 554
475 Device Boot-time Settings Routines 555 Device Boot-time Settings Routines
@@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
1013 if (dev->flags & IFF_UP) 1093 if (dev->flags & IFF_UP)
1014 return -EBUSY; 1094 return -EBUSY;
1015 1095
1016 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 1096 write_seqlock(&devnet_rename_seq);
1097
1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1099 write_sequnlock(&devnet_rename_seq);
1017 return 0; 1100 return 0;
1101 }
1018 1102
1019 memcpy(oldname, dev->name, IFNAMSIZ); 1103 memcpy(oldname, dev->name, IFNAMSIZ);
1020 1104
1021 err = dev_get_valid_name(net, dev, newname); 1105 err = dev_get_valid_name(net, dev, newname);
1022 if (err < 0) 1106 if (err < 0) {
1107 write_sequnlock(&devnet_rename_seq);
1023 return err; 1108 return err;
1109 }
1024 1110
1025rollback: 1111rollback:
1026 ret = device_rename(&dev->dev, dev->name); 1112 ret = device_rename(&dev->dev, dev->name);
1027 if (ret) { 1113 if (ret) {
1028 memcpy(dev->name, oldname, IFNAMSIZ); 1114 memcpy(dev->name, oldname, IFNAMSIZ);
1115 write_sequnlock(&devnet_rename_seq);
1029 return ret; 1116 return ret;
1030 } 1117 }
1031 1118
1119 write_sequnlock(&devnet_rename_seq);
1120
1032 write_lock_bh(&dev_base_lock); 1121 write_lock_bh(&dev_base_lock);
1033 hlist_del_rcu(&dev->name_hlist); 1122 hlist_del_rcu(&dev->name_hlist);
1034 write_unlock_bh(&dev_base_lock); 1123 write_unlock_bh(&dev_base_lock);
@@ -1046,6 +1135,7 @@ rollback:
1046 /* err >= 0 after dev_alloc_name() or stores the first errno */ 1135 /* err >= 0 after dev_alloc_name() or stores the first errno */
1047 if (err >= 0) { 1136 if (err >= 0) {
1048 err = ret; 1137 err = ret;
1138 write_seqlock(&devnet_rename_seq);
1049 memcpy(dev->name, oldname, IFNAMSIZ); 1139 memcpy(dev->name, oldname, IFNAMSIZ);
1050 goto rollback; 1140 goto rollback;
1051 } else { 1141 } else {
@@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1075 return -EINVAL; 1165 return -EINVAL;
1076 1166
1077 if (!len) { 1167 if (!len) {
1078 if (dev->ifalias) { 1168 kfree(dev->ifalias);
1079 kfree(dev->ifalias); 1169 dev->ifalias = NULL;
1080 dev->ifalias = NULL;
1081 }
1082 return 0; 1170 return 0;
1083 } 1171 }
1084 1172
@@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1994 netdev_features_t features) 2082 netdev_features_t features)
1995{ 2083{
1996 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2084 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1997 struct packet_type *ptype; 2085 struct packet_offload *ptype;
1998 __be16 type = skb->protocol; 2086 __be16 type = skb->protocol;
1999 int vlan_depth = ETH_HLEN; 2087 int vlan_depth = ETH_HLEN;
2000 int err; 2088 int err;
@@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2023 } 2111 }
2024 2112
2025 rcu_read_lock(); 2113 rcu_read_lock();
2026 list_for_each_entry_rcu(ptype, 2114 list_for_each_entry_rcu(ptype, &offload_base, list) {
2027 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2115 if (ptype->type == type && ptype->callbacks.gso_segment) {
2028 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2029 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2116 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2030 err = ptype->gso_send_check(skb); 2117 err = ptype->callbacks.gso_send_check(skb);
2031 segs = ERR_PTR(err); 2118 segs = ERR_PTR(err);
2032 if (err || skb_gso_ok(skb, features)) 2119 if (err || skb_gso_ok(skb, features))
2033 break; 2120 break;
2034 __skb_push(skb, (skb->data - 2121 __skb_push(skb, (skb->data -
2035 skb_network_header(skb))); 2122 skb_network_header(skb)));
2036 } 2123 }
2037 segs = ptype->gso_segment(skb, features); 2124 segs = ptype->callbacks.gso_segment(skb, features);
2038 break; 2125 break;
2039 } 2126 }
2040 } 2127 }
@@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2237 skb->vlan_tci = 0; 2324 skb->vlan_tci = 0;
2238 } 2325 }
2239 2326
2327 /* If encapsulation offload request, verify we are testing
2328 * hardware encapsulation features instead of standard
2329 * features for the netdev
2330 */
2331 if (skb->encapsulation)
2332 features &= dev->hw_enc_features;
2333
2240 if (netif_needs_gso(skb, features)) { 2334 if (netif_needs_gso(skb, features)) {
2241 if (unlikely(dev_gso_segment(skb, features))) 2335 if (unlikely(dev_gso_segment(skb, features)))
2242 goto out_kfree_skb; 2336 goto out_kfree_skb;
@@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2252 * checksumming here. 2346 * checksumming here.
2253 */ 2347 */
2254 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2348 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2255 skb_set_transport_header(skb, 2349 if (skb->encapsulation)
2256 skb_checksum_start_offset(skb)); 2350 skb_set_inner_transport_header(skb,
2351 skb_checksum_start_offset(skb));
2352 else
2353 skb_set_transport_header(skb,
2354 skb_checksum_start_offset(skb));
2257 if (!(features & NETIF_F_ALL_CSUM) && 2355 if (!(features & NETIF_F_ALL_CSUM) &&
2258 skb_checksum_help(skb)) 2356 skb_checksum_help(skb))
2259 goto out_kfree_skb; 2357 goto out_kfree_skb;
@@ -3446,9 +3544,9 @@ static void flush_backlog(void *arg)
3446 3544
3447static int napi_gro_complete(struct sk_buff *skb) 3545static int napi_gro_complete(struct sk_buff *skb)
3448{ 3546{
3449 struct packet_type *ptype; 3547 struct packet_offload *ptype;
3450 __be16 type = skb->protocol; 3548 __be16 type = skb->protocol;
3451 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3549 struct list_head *head = &offload_base;
3452 int err = -ENOENT; 3550 int err = -ENOENT;
3453 3551
3454 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); 3552 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -3460,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb)
3460 3558
3461 rcu_read_lock(); 3559 rcu_read_lock();
3462 list_for_each_entry_rcu(ptype, head, list) { 3560 list_for_each_entry_rcu(ptype, head, list) {
3463 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 3561 if (ptype->type != type || !ptype->callbacks.gro_complete)
3464 continue; 3562 continue;
3465 3563
3466 err = ptype->gro_complete(skb); 3564 err = ptype->callbacks.gro_complete(skb);
3467 break; 3565 break;
3468 } 3566 }
3469 rcu_read_unlock(); 3567 rcu_read_unlock();
@@ -3507,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3507} 3605}
3508EXPORT_SYMBOL(napi_gro_flush); 3606EXPORT_SYMBOL(napi_gro_flush);
3509 3607
3510enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3608static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3609{
3610 struct sk_buff *p;
3611 unsigned int maclen = skb->dev->hard_header_len;
3612
3613 for (p = napi->gro_list; p; p = p->next) {
3614 unsigned long diffs;
3615
3616 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3617 diffs |= p->vlan_tci ^ skb->vlan_tci;
3618 if (maclen == ETH_HLEN)
3619 diffs |= compare_ether_header(skb_mac_header(p),
3620 skb_gro_mac_header(skb));
3621 else if (!diffs)
3622 diffs = memcmp(skb_mac_header(p),
3623 skb_gro_mac_header(skb),
3624 maclen);
3625 NAPI_GRO_CB(p)->same_flow = !diffs;
3626 NAPI_GRO_CB(p)->flush = 0;
3627 }
3628}
3629
3630static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3511{ 3631{
3512 struct sk_buff **pp = NULL; 3632 struct sk_buff **pp = NULL;
3513 struct packet_type *ptype; 3633 struct packet_offload *ptype;
3514 __be16 type = skb->protocol; 3634 __be16 type = skb->protocol;
3515 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3635 struct list_head *head = &offload_base;
3516 int same_flow; 3636 int same_flow;
3517 int mac_len; 3637 int mac_len;
3518 enum gro_result ret; 3638 enum gro_result ret;
@@ -3523,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3523 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3643 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3524 goto normal; 3644 goto normal;
3525 3645
3646 gro_list_prepare(napi, skb);
3647
3526 rcu_read_lock(); 3648 rcu_read_lock();
3527 list_for_each_entry_rcu(ptype, head, list) { 3649 list_for_each_entry_rcu(ptype, head, list) {
3528 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 3650 if (ptype->type != type || !ptype->callbacks.gro_receive)
3529 continue; 3651 continue;
3530 3652
3531 skb_set_network_header(skb, skb_gro_offset(skb)); 3653 skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3535,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3535 NAPI_GRO_CB(skb)->flush = 0; 3657 NAPI_GRO_CB(skb)->flush = 0;
3536 NAPI_GRO_CB(skb)->free = 0; 3658 NAPI_GRO_CB(skb)->free = 0;
3537 3659
3538 pp = ptype->gro_receive(&napi->gro_list, skb); 3660 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3539 break; 3661 break;
3540 } 3662 }
3541 rcu_read_unlock(); 3663 rcu_read_unlock();
@@ -3598,34 +3720,9 @@ normal:
3598 ret = GRO_NORMAL; 3720 ret = GRO_NORMAL;
3599 goto pull; 3721 goto pull;
3600} 3722}
3601EXPORT_SYMBOL(dev_gro_receive);
3602
3603static inline gro_result_t
3604__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3605{
3606 struct sk_buff *p;
3607 unsigned int maclen = skb->dev->hard_header_len;
3608
3609 for (p = napi->gro_list; p; p = p->next) {
3610 unsigned long diffs;
3611
3612 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3613 diffs |= p->vlan_tci ^ skb->vlan_tci;
3614 if (maclen == ETH_HLEN)
3615 diffs |= compare_ether_header(skb_mac_header(p),
3616 skb_gro_mac_header(skb));
3617 else if (!diffs)
3618 diffs = memcmp(skb_mac_header(p),
3619 skb_gro_mac_header(skb),
3620 maclen);
3621 NAPI_GRO_CB(p)->same_flow = !diffs;
3622 NAPI_GRO_CB(p)->flush = 0;
3623 }
3624 3723
3625 return dev_gro_receive(napi, skb);
3626}
3627 3724
3628gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 3725static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3629{ 3726{
3630 switch (ret) { 3727 switch (ret) {
3631 case GRO_NORMAL: 3728 case GRO_NORMAL:
@@ -3651,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3651 3748
3652 return ret; 3749 return ret;
3653} 3750}
3654EXPORT_SYMBOL(napi_skb_finish);
3655 3751
3656static void skb_gro_reset_offset(struct sk_buff *skb) 3752static void skb_gro_reset_offset(struct sk_buff *skb)
3657{ 3753{
@@ -3674,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3674{ 3770{
3675 skb_gro_reset_offset(skb); 3771 skb_gro_reset_offset(skb);
3676 3772
3677 return napi_skb_finish(__napi_gro_receive(napi, skb), skb); 3773 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3678} 3774}
3679EXPORT_SYMBOL(napi_gro_receive); 3775EXPORT_SYMBOL(napi_gro_receive);
3680 3776
@@ -3703,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3703} 3799}
3704EXPORT_SYMBOL(napi_get_frags); 3800EXPORT_SYMBOL(napi_get_frags);
3705 3801
3706gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 3802static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3707 gro_result_t ret) 3803 gro_result_t ret)
3708{ 3804{
3709 switch (ret) { 3805 switch (ret) {
@@ -3728,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3728 3824
3729 return ret; 3825 return ret;
3730} 3826}
3731EXPORT_SYMBOL(napi_frags_finish);
3732 3827
3733static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 3828static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3734{ 3829{
@@ -3773,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
3773 if (!skb) 3868 if (!skb)
3774 return GRO_DROP; 3869 return GRO_DROP;
3775 3870
3776 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 3871 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
3777} 3872}
3778EXPORT_SYMBOL(napi_gro_frags); 3873EXPORT_SYMBOL(napi_gro_frags);
3779 3874
@@ -4075,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4075{ 4170{
4076 struct net_device *dev; 4171 struct net_device *dev;
4077 struct ifreq ifr; 4172 struct ifreq ifr;
4173 unsigned seq;
4078 4174
4079 /* 4175 /*
4080 * Fetch the caller's info block. 4176 * Fetch the caller's info block.
@@ -4083,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4083 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 4179 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4084 return -EFAULT; 4180 return -EFAULT;
4085 4181
4182retry:
4183 seq = read_seqbegin(&devnet_rename_seq);
4086 rcu_read_lock(); 4184 rcu_read_lock();
4087 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); 4185 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4088 if (!dev) { 4186 if (!dev) {
@@ -4092,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4092 4190
4093 strcpy(ifr.ifr_name, dev->name); 4191 strcpy(ifr.ifr_name, dev->name);
4094 rcu_read_unlock(); 4192 rcu_read_unlock();
4193 if (read_seqretry(&devnet_rename_seq, seq))
4194 goto retry;
4095 4195
4096 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 4196 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4097 return -EFAULT; 4197 return -EFAULT;
@@ -4884,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4884 else 4984 else
4885 dev->mtu = new_mtu; 4985 dev->mtu = new_mtu;
4886 4986
4887 if (!err && dev->flags & IFF_UP) 4987 if (!err)
4888 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 4988 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4889 return err; 4989 return err;
4890} 4990}
@@ -5204,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5204 case SIOCGMIIPHY: 5304 case SIOCGMIIPHY:
5205 case SIOCGMIIREG: 5305 case SIOCGMIIREG:
5206 case SIOCSIFNAME: 5306 case SIOCSIFNAME:
5207 if (!capable(CAP_NET_ADMIN)) 5307 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5208 return -EPERM; 5308 return -EPERM;
5209 dev_load(net, ifr.ifr_name); 5309 dev_load(net, ifr.ifr_name);
5210 rtnl_lock(); 5310 rtnl_lock();
@@ -5225,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5225 * - require strict serialization. 5325 * - require strict serialization.
5226 * - do not return a value 5326 * - do not return a value
5227 */ 5327 */
5328 case SIOCSIFMAP:
5329 case SIOCSIFTXQLEN:
5330 if (!capable(CAP_NET_ADMIN))
5331 return -EPERM;
5332 /* fall through */
5333 /*
5334 * These ioctl calls:
5335 * - require local superuser power.
5336 * - require strict serialization.
5337 * - do not return a value
5338 */
5228 case SIOCSIFFLAGS: 5339 case SIOCSIFFLAGS:
5229 case SIOCSIFMETRIC: 5340 case SIOCSIFMETRIC:
5230 case SIOCSIFMTU: 5341 case SIOCSIFMTU:
5231 case SIOCSIFMAP:
5232 case SIOCSIFHWADDR: 5342 case SIOCSIFHWADDR:
5233 case SIOCSIFSLAVE: 5343 case SIOCSIFSLAVE:
5234 case SIOCADDMULTI: 5344 case SIOCADDMULTI:
5235 case SIOCDELMULTI: 5345 case SIOCDELMULTI:
5236 case SIOCSIFHWBROADCAST: 5346 case SIOCSIFHWBROADCAST:
5237 case SIOCSIFTXQLEN:
5238 case SIOCSMIIREG: 5347 case SIOCSMIIREG:
5239 case SIOCBONDENSLAVE: 5348 case SIOCBONDENSLAVE:
5240 case SIOCBONDRELEASE: 5349 case SIOCBONDRELEASE:
@@ -5243,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5243 case SIOCBRADDIF: 5352 case SIOCBRADDIF:
5244 case SIOCBRDELIF: 5353 case SIOCBRDELIF:
5245 case SIOCSHWTSTAMP: 5354 case SIOCSHWTSTAMP:
5246 if (!capable(CAP_NET_ADMIN)) 5355 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5247 return -EPERM; 5356 return -EPERM;
5248 /* fall through */ 5357 /* fall through */
5249 case SIOCBONDSLAVEINFOQUERY: 5358 case SIOCBONDSLAVEINFOQUERY:
@@ -6268,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6268 goto out; 6377 goto out;
6269 6378
6270 /* Ensure the device has been registrered */ 6379 /* Ensure the device has been registrered */
6271 err = -EINVAL;
6272 if (dev->reg_state != NETREG_REGISTERED) 6380 if (dev->reg_state != NETREG_REGISTERED)
6273 goto out; 6381 goto out;
6274 6382
@@ -6323,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6323 dev_uc_flush(dev); 6431 dev_uc_flush(dev);
6324 dev_mc_flush(dev); 6432 dev_mc_flush(dev);
6325 6433
6434 /* Send a netdev-removed uevent to the old namespace */
6435 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6436
6326 /* Actually switch the network namespace */ 6437 /* Actually switch the network namespace */
6327 dev_net_set(dev, net); 6438 dev_net_set(dev, net);
6328 6439
@@ -6334,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6334 dev->iflink = dev->ifindex; 6445 dev->iflink = dev->ifindex;
6335 } 6446 }
6336 6447
6448 /* Send a netdev-add uevent to the new namespace */
6449 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6450
6337 /* Fixup kobjects */ 6451 /* Fixup kobjects */
6338 err = device_rename(&dev->dev, dev->name); 6452 err = device_rename(&dev->dev, dev->name);
6339 WARN_ON(err); 6453 WARN_ON(err);
@@ -6666,6 +6780,8 @@ static int __init net_dev_init(void)
6666 for (i = 0; i < PTYPE_HASH_SIZE; i++) 6780 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6667 INIT_LIST_HEAD(&ptype_base[i]); 6781 INIT_LIST_HEAD(&ptype_base[i]);
6668 6782
6783 INIT_LIST_HEAD(&offload_base);
6784
6669 if (register_pernet_subsys(&netdev_net_ops)) 6785 if (register_pernet_subsys(&netdev_net_ops))
6670 goto out; 6786 goto out;
6671 6787
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d64cc2e3fa9..a8705432e4b1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1460 case ETHTOOL_GEEE: 1460 case ETHTOOL_GEEE:
1461 break; 1461 break;
1462 default: 1462 default:
1463 if (!capable(CAP_NET_ADMIN)) 1463 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1464 return -EPERM; 1464 return -EPERM;
1465 } 1465 }
1466 1466
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
39#include <linux/reciprocal_div.h> 39#include <linux/reciprocal_div.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/seccomp.h> 41#include <linux/seccomp.h>
42#include <linux/if_vlan.h>
42 43
43/* No hurry in this branch 44/* No hurry in this branch
44 * 45 *
@@ -341,6 +342,12 @@ load_b:
341 case BPF_S_ANC_CPU: 342 case BPF_S_ANC_CPU:
342 A = raw_smp_processor_id(); 343 A = raw_smp_processor_id();
343 continue; 344 continue;
345 case BPF_S_ANC_VLAN_TAG:
346 A = vlan_tx_tag_get(skb);
347 continue;
348 case BPF_S_ANC_VLAN_TAG_PRESENT:
349 A = !!vlan_tx_tag_present(skb);
350 continue;
344 case BPF_S_ANC_NLATTR: { 351 case BPF_S_ANC_NLATTR: {
345 struct nlattr *nla; 352 struct nlattr *nla;
346 353
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
600 ANCILLARY(RXHASH); 607 ANCILLARY(RXHASH);
601 ANCILLARY(CPU); 608 ANCILLARY(CPU);
602 ANCILLARY(ALU_XOR_X); 609 ANCILLARY(ALU_XOR_X);
610 ANCILLARY(VLAN_TAG);
611 ANCILLARY(VLAN_TAG_PRESENT);
603 } 612 }
604 } 613 }
605 ftest->code = code; 614 ftest->code = code;
@@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
751 return ret; 760 return ret;
752} 761}
753EXPORT_SYMBOL_GPL(sk_detach_filter); 762EXPORT_SYMBOL_GPL(sk_detach_filter);
763
764static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
765{
766 static const u16 decodes[] = {
767 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
768 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
769 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
770 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
771 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
772 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
773 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
774 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
775 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
776 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
777 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
778 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
779 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
780 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
781 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
782 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
783 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
784 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
785 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
786 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
787 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
788 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
789 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
790 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
791 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
792 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
793 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
794 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
795 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
796 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
797 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
798 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
799 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
800 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
801 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
802 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
803 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
804 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
805 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
806 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
807 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
808 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
809 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
810 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
811 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
812 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
813 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
814 [BPF_S_RET_K] = BPF_RET|BPF_K,
815 [BPF_S_RET_A] = BPF_RET|BPF_A,
816 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
817 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
818 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
819 [BPF_S_ST] = BPF_ST,
820 [BPF_S_STX] = BPF_STX,
821 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
822 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
823 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
824 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
825 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
826 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
827 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
828 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
829 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
830 };
831 u16 code;
832
833 code = filt->code;
834
835 to->code = decodes[code];
836 to->jt = filt->jt;
837 to->jf = filt->jf;
838
839 if (code == BPF_S_ALU_DIV_K) {
840 /*
841 * When loaded this rule user gave us X, which was
842 * translated into R = r(X). Now we calculate the
843 * RR = r(R) and report it back. If next time this
844 * value is loaded and RRR = r(RR) is calculated
845 * then the R == RRR will be true.
846 *
847 * One exception. X == 1 translates into R == 0 and
848 * we can't calculate RR out of it with r().
849 */
850
851 if (filt->k == 0)
852 to->k = 1;
853 else
854 to->k = reciprocal_value(filt->k);
855
856 BUG_ON(reciprocal_value(to->k) != filt->k);
857 } else
858 to->k = filt->k;
859}
860
861int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
862{
863 struct sk_filter *filter;
864 int i, ret;
865
866 lock_sock(sk);
867 filter = rcu_dereference_protected(sk->sk_filter,
868 sock_owned_by_user(sk));
869 ret = 0;
870 if (!filter)
871 goto out;
872 ret = filter->len;
873 if (!len)
874 goto out;
875 ret = -EINVAL;
876 if (len < filter->len)
877 goto out;
878
879 ret = -EFAULT;
880 for (i = 0; i < filter->len; i++) {
881 struct sock_filter fb;
882
883 sk_decode_filter(&filter->insns[i], &fb);
884 if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
885 goto out;
886 }
887
888 ret = filter->len;
889out:
890 release_sock(sk);
891 return ret;
892}
diff --git a/net/core/flow.c b/net/core/flow.c
index e318c7e98042..b0901ee5a002 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
327static void flow_cache_flush_per_cpu(void *data) 327static void flow_cache_flush_per_cpu(void *data)
328{ 328{
329 struct flow_flush_info *info = data; 329 struct flow_flush_info *info = data;
330 int cpu;
331 struct tasklet_struct *tasklet; 330 struct tasklet_struct *tasklet;
332 331
333 cpu = smp_processor_id(); 332 tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
334 tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
335 tasklet->data = (unsigned long)info; 333 tasklet->data = (unsigned long)info;
336 tasklet_schedule(tasklet); 334 tasklet_schedule(tasklet);
337} 335}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 22571488730a..c815f285e5ab 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1787,8 +1787,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1787 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || 1787 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1788 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1788 /* approximative value for deprecated QUEUE_LEN (in packets) */
1789 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1789 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1790 DIV_ROUND_UP(parms->queue_len_bytes, 1790 parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1791 SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1792 nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || 1791 nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1793 nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || 1792 nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1794 nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || 1793 nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
@@ -2770,6 +2769,8 @@ EXPORT_SYMBOL(neigh_app_ns);
2770#endif /* CONFIG_ARPD */ 2769#endif /* CONFIG_ARPD */
2771 2770
2772#ifdef CONFIG_SYSCTL 2771#ifdef CONFIG_SYSCTL
2772static int zero;
2773static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2773 2774
2774static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, 2775static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2775 size_t *lenp, loff_t *ppos) 2776 size_t *lenp, loff_t *ppos)
@@ -2777,9 +2778,13 @@ static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2777 int size, ret; 2778 int size, ret;
2778 ctl_table tmp = *ctl; 2779 ctl_table tmp = *ctl;
2779 2780
2781 tmp.extra1 = &zero;
2782 tmp.extra2 = &unres_qlen_max;
2780 tmp.data = &size; 2783 tmp.data = &size;
2781 size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); 2784
2782 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 2785 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2786 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2787
2783 if (write && !ret) 2788 if (write && !ret)
2784 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 2789 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2785 return ret; 2790 return ret;
@@ -2865,7 +2870,8 @@ static struct neigh_sysctl_table {
2865 .procname = "unres_qlen_bytes", 2870 .procname = "unres_qlen_bytes",
2866 .maxlen = sizeof(int), 2871 .maxlen = sizeof(int),
2867 .mode = 0644, 2872 .mode = 0644,
2868 .proc_handler = proc_dointvec, 2873 .extra1 = &zero,
2874 .proc_handler = proc_dointvec_minmax,
2869 }, 2875 },
2870 [NEIGH_VAR_PROXY_QLEN] = { 2876 [NEIGH_VAR_PROXY_QLEN] = {
2871 .procname = "proxy_qlen", 2877 .procname = "proxy_qlen",
@@ -2987,6 +2993,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2987 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; 2993 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2988 } 2994 }
2989 2995
2996 /* Don't export sysctls to unprivileged users */
2997 if (neigh_parms_net(p)->user_ns != &init_user_ns)
2998 t->neigh_vars[0].procname = NULL;
2999
2990 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3000 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2991 p_name, dev_name_source); 3001 p_name, dev_name_source);
2992 t->sysctl_header = 3002 t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 017a8bacfb27..334efd5d67a9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,11 +18,9 @@
18#include <net/sock.h> 18#include <net/sock.h>
19#include <net/net_namespace.h> 19#include <net/net_namespace.h>
20#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
21#include <linux/wireless.h>
22#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
23#include <linux/export.h> 22#include <linux/export.h>
24#include <linux/jiffies.h> 23#include <linux/jiffies.h>
25#include <net/wext.h>
26 24
27#include "net-sysfs.h" 25#include "net-sysfs.h"
28 26
@@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
73 const char *buf, size_t len, 71 const char *buf, size_t len,
74 int (*set)(struct net_device *, unsigned long)) 72 int (*set)(struct net_device *, unsigned long))
75{ 73{
76 struct net_device *net = to_net_dev(dev); 74 struct net_device *netdev = to_net_dev(dev);
75 struct net *net = dev_net(netdev);
77 unsigned long new; 76 unsigned long new;
78 int ret = -EINVAL; 77 int ret = -EINVAL;
79 78
80 if (!capable(CAP_NET_ADMIN)) 79 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
81 return -EPERM; 80 return -EPERM;
82 81
83 ret = kstrtoul(buf, 0, &new); 82 ret = kstrtoul(buf, 0, &new);
@@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
87 if (!rtnl_trylock()) 86 if (!rtnl_trylock())
88 return restart_syscall(); 87 return restart_syscall();
89 88
90 if (dev_isalive(net)) { 89 if (dev_isalive(netdev)) {
91 if ((ret = (*set)(net, new)) == 0) 90 if ((ret = (*set)(netdev, new)) == 0)
92 ret = len; 91 ret = len;
93 } 92 }
94 rtnl_unlock(); 93 rtnl_unlock();
@@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
264 struct device_attribute *attr, 263 struct device_attribute *attr,
265 const char *buf, size_t len) 264 const char *buf, size_t len)
266{ 265{
266 if (!capable(CAP_NET_ADMIN))
267 return -EPERM;
268
267 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 269 return netdev_store(dev, attr, buf, len, change_tx_queue_len);
268} 270}
269 271
@@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
271 const char *buf, size_t len) 273 const char *buf, size_t len)
272{ 274{
273 struct net_device *netdev = to_net_dev(dev); 275 struct net_device *netdev = to_net_dev(dev);
276 struct net *net = dev_net(netdev);
274 size_t count = len; 277 size_t count = len;
275 ssize_t ret; 278 ssize_t ret;
276 279
277 if (!capable(CAP_NET_ADMIN)) 280 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
278 return -EPERM; 281 return -EPERM;
279 282
280 /* ignore trailing newline */ 283 /* ignore trailing newline */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42f1e1c7514f..8acce01b6dab 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -13,6 +13,7 @@
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/user_namespace.h>
16#include <net/net_namespace.h> 17#include <net/net_namespace.h>
17#include <net/netns/generic.h> 18#include <net/netns/generic.h>
18 19
@@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops,
145/* 146/*
146 * setup_net runs the initializers for the network namespace object. 147 * setup_net runs the initializers for the network namespace object.
147 */ 148 */
148static __net_init int setup_net(struct net *net) 149static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
149{ 150{
150 /* Must be called with net_mutex held */ 151 /* Must be called with net_mutex held */
151 const struct pernet_operations *ops, *saved_ops; 152 const struct pernet_operations *ops, *saved_ops;
@@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net)
155 atomic_set(&net->count, 1); 156 atomic_set(&net->count, 1);
156 atomic_set(&net->passive, 1); 157 atomic_set(&net->passive, 1);
157 net->dev_base_seq = 1; 158 net->dev_base_seq = 1;
159 net->user_ns = user_ns;
158 160
159#ifdef NETNS_REFCNT_DEBUG 161#ifdef NETNS_REFCNT_DEBUG
160 atomic_set(&net->use_count, 0); 162 atomic_set(&net->use_count, 0);
@@ -232,7 +234,8 @@ void net_drop_ns(void *p)
232 net_free(ns); 234 net_free(ns);
233} 235}
234 236
235struct net *copy_net_ns(unsigned long flags, struct net *old_net) 237struct net *copy_net_ns(unsigned long flags,
238 struct user_namespace *user_ns, struct net *old_net)
236{ 239{
237 struct net *net; 240 struct net *net;
238 int rv; 241 int rv;
@@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
243 net = net_alloc(); 246 net = net_alloc();
244 if (!net) 247 if (!net)
245 return ERR_PTR(-ENOMEM); 248 return ERR_PTR(-ENOMEM);
249
250 get_user_ns(user_ns);
251
246 mutex_lock(&net_mutex); 252 mutex_lock(&net_mutex);
247 rv = setup_net(net); 253 rv = setup_net(net, user_ns);
248 if (rv == 0) { 254 if (rv == 0) {
249 rtnl_lock(); 255 rtnl_lock();
250 list_add_tail_rcu(&net->list, &net_namespace_list); 256 list_add_tail_rcu(&net->list, &net_namespace_list);
@@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
252 } 258 }
253 mutex_unlock(&net_mutex); 259 mutex_unlock(&net_mutex);
254 if (rv < 0) { 260 if (rv < 0) {
261 put_user_ns(user_ns);
255 net_drop_ns(net); 262 net_drop_ns(net);
256 return ERR_PTR(rv); 263 return ERR_PTR(rv);
257 } 264 }
@@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work)
308 /* Finally it is safe to free my network namespace structure */ 315 /* Finally it is safe to free my network namespace structure */
309 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 316 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
310 list_del_init(&net->exit_list); 317 list_del_init(&net->exit_list);
318 put_user_ns(net->user_ns);
311 net_drop_ns(net); 319 net_drop_ns(net);
312 } 320 }
313} 321}
@@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd)
347} 355}
348 356
349#else 357#else
350struct net *copy_net_ns(unsigned long flags, struct net *old_net)
351{
352 if (flags & CLONE_NEWNET)
353 return ERR_PTR(-EINVAL);
354 return old_net;
355}
356
357struct net *get_net_ns_by_fd(int fd) 358struct net *get_net_ns_by_fd(int fd)
358{ 359{
359 return ERR_PTR(-EINVAL); 360 return ERR_PTR(-EINVAL);
@@ -380,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
380} 381}
381EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 382EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
382 383
384static __net_init int net_ns_net_init(struct net *net)
385{
386 return proc_alloc_inum(&net->proc_inum);
387}
388
389static __net_exit void net_ns_net_exit(struct net *net)
390{
391 proc_free_inum(net->proc_inum);
392}
393
394static struct pernet_operations __net_initdata net_ns_ops = {
395 .init = net_ns_net_init,
396 .exit = net_ns_net_exit,
397};
398
383static int __init net_ns_init(void) 399static int __init net_ns_init(void)
384{ 400{
385 struct net_generic *ng; 401 struct net_generic *ng;
@@ -402,7 +418,7 @@ static int __init net_ns_init(void)
402 rcu_assign_pointer(init_net.gen, ng); 418 rcu_assign_pointer(init_net.gen, ng);
403 419
404 mutex_lock(&net_mutex); 420 mutex_lock(&net_mutex);
405 if (setup_net(&init_net)) 421 if (setup_net(&init_net, &init_user_ns))
406 panic("Could not setup the initial network namespace"); 422 panic("Could not setup the initial network namespace");
407 423
408 rtnl_lock(); 424 rtnl_lock();
@@ -411,6 +427,8 @@ static int __init net_ns_init(void)
411 427
412 mutex_unlock(&net_mutex); 428 mutex_unlock(&net_mutex);
413 429
430 register_pernet_subsys(&net_ns_ops);
431
414 return 0; 432 return 0;
415} 433}
416 434
@@ -629,16 +647,29 @@ static void netns_put(void *ns)
629 647
630static int netns_install(struct nsproxy *nsproxy, void *ns) 648static int netns_install(struct nsproxy *nsproxy, void *ns)
631{ 649{
650 struct net *net = ns;
651
652 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
653 !nsown_capable(CAP_SYS_ADMIN))
654 return -EPERM;
655
632 put_net(nsproxy->net_ns); 656 put_net(nsproxy->net_ns);
633 nsproxy->net_ns = get_net(ns); 657 nsproxy->net_ns = get_net(net);
634 return 0; 658 return 0;
635} 659}
636 660
661static unsigned int netns_inum(void *ns)
662{
663 struct net *net = ns;
664 return net->proc_inum;
665}
666
637const struct proc_ns_operations netns_operations = { 667const struct proc_ns_operations netns_operations = {
638 .name = "net", 668 .name = "net",
639 .type = CLONE_NEWNET, 669 .type = CLONE_NEWNET,
640 .get = netns_get, 670 .get = netns_get,
641 .put = netns_put, 671 .put = netns_put,
642 .install = netns_install, 672 .install = netns_install,
673 .inum = netns_inum,
643}; 674};
644#endif 675#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 77a0388fc3be..3151acf5ec13 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -674,7 +674,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
674 if ((delim = strchr(cur, '@')) == NULL) 674 if ((delim = strchr(cur, '@')) == NULL)
675 goto parse_failed; 675 goto parse_failed;
676 *delim = 0; 676 *delim = 0;
677 np->local_port = simple_strtol(cur, NULL, 10); 677 if (kstrtou16(cur, 10, &np->local_port))
678 goto parse_failed;
678 cur = delim; 679 cur = delim;
679 } 680 }
680 cur++; 681 cur++;
@@ -705,7 +706,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
705 *delim = 0; 706 *delim = 0;
706 if (*cur == ' ' || *cur == '\t') 707 if (*cur == ' ' || *cur == '\t')
707 np_info(np, "warning: whitespace is not allowed\n"); 708 np_info(np, "warning: whitespace is not allowed\n");
708 np->remote_port = simple_strtol(cur, NULL, 10); 709 if (kstrtou16(cur, 10, &np->remote_port))
710 goto parse_failed;
709 cur = delim; 711 cur = delim;
710 } 712 }
711 cur++; 713 cur++;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..5e67defe2cb0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -27,11 +27,7 @@
27 27
28#include <linux/fdtable.h> 28#include <linux/fdtable.h>
29 29
30#define PRIOIDX_SZ 128 30#define PRIOMAP_MIN_SZ 128
31
32static unsigned long prioidx_map[PRIOIDX_SZ];
33static DEFINE_SPINLOCK(prioidx_map_lock);
34static atomic_t max_prioidx = ATOMIC_INIT(0);
35 31
36static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) 32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
37{ 33{
@@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr
39 struct cgroup_netprio_state, css); 35 struct cgroup_netprio_state, css);
40} 36}
41 37
42static int get_prioidx(u32 *prio) 38/*
43{ 39 * Extend @dev->priomap so that it's large enough to accomodate
44 unsigned long flags; 40 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
45 u32 prioidx; 41 * return. Must be called under rtnl lock.
46 42 */
47 spin_lock_irqsave(&prioidx_map_lock, flags); 43static int extend_netdev_table(struct net_device *dev, u32 target_idx)
48 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
49 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
50 spin_unlock_irqrestore(&prioidx_map_lock, flags);
51 return -ENOSPC;
52 }
53 set_bit(prioidx, prioidx_map);
54 if (atomic_read(&max_prioidx) < prioidx)
55 atomic_set(&max_prioidx, prioidx);
56 spin_unlock_irqrestore(&prioidx_map_lock, flags);
57 *prio = prioidx;
58 return 0;
59}
60
61static void put_prioidx(u32 idx)
62{ 44{
63 unsigned long flags; 45 struct netprio_map *old, *new;
64 46 size_t new_sz, new_len;
65 spin_lock_irqsave(&prioidx_map_lock, flags);
66 clear_bit(idx, prioidx_map);
67 spin_unlock_irqrestore(&prioidx_map_lock, flags);
68}
69 47
70static int extend_netdev_table(struct net_device *dev, u32 new_len) 48 /* is the existing priomap large enough? */
71{ 49 old = rtnl_dereference(dev->priomap);
72 size_t new_size = sizeof(struct netprio_map) + 50 if (old && old->priomap_len > target_idx)
73 ((sizeof(u32) * new_len)); 51 return 0;
74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
75 struct netprio_map *old_priomap;
76 52
77 old_priomap = rtnl_dereference(dev->priomap); 53 /*
54 * Determine the new size. Let's keep it power-of-two. We start
55 * from PRIOMAP_MIN_SZ and double it until it's large enough to
56 * accommodate @target_idx.
57 */
58 new_sz = PRIOMAP_MIN_SZ;
59 while (true) {
60 new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
61 sizeof(new->priomap[0]);
62 if (new_len > target_idx)
63 break;
64 new_sz *= 2;
65 /* overflowed? */
66 if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
67 return -ENOSPC;
68 }
78 69
79 if (!new_priomap) { 70 /* allocate & copy */
71 new = kzalloc(new_sz, GFP_KERNEL);
72 if (!new) {
80 pr_warn("Unable to alloc new priomap!\n"); 73 pr_warn("Unable to alloc new priomap!\n");
81 return -ENOMEM; 74 return -ENOMEM;
82 } 75 }
83 76
84 if (old_priomap) 77 if (old)
85 memcpy(new_priomap->priomap, old_priomap->priomap, 78 memcpy(new->priomap, old->priomap,
86 old_priomap->priomap_len * 79 old->priomap_len * sizeof(old->priomap[0]));
87 sizeof(old_priomap->priomap[0]));
88 80
89 new_priomap->priomap_len = new_len; 81 new->priomap_len = new_len;
90 82
91 rcu_assign_pointer(dev->priomap, new_priomap); 83 /* install the new priomap */
92 if (old_priomap) 84 rcu_assign_pointer(dev->priomap, new);
93 kfree_rcu(old_priomap, rcu); 85 if (old)
86 kfree_rcu(old, rcu);
94 return 0; 87 return 0;
95} 88}
96 89
97static int write_update_netdev_table(struct net_device *dev) 90/**
91 * netprio_prio - return the effective netprio of a cgroup-net_device pair
92 * @cgrp: cgroup part of the target pair
93 * @dev: net_device part of the target pair
94 *
95 * Should be called under RCU read or rtnl lock.
96 */
97static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
98{
99 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
100
101 if (map && cgrp->id < map->priomap_len)
102 return map->priomap[cgrp->id];
103 return 0;
104}
105
106/**
107 * netprio_set_prio - set netprio on a cgroup-net_device pair
108 * @cgrp: cgroup part of the target pair
109 * @dev: net_device part of the target pair
110 * @prio: prio to set
111 *
112 * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
113 * lock and may fail under memory pressure for non-zero @prio.
114 */
115static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
116 u32 prio)
98{ 117{
99 int ret = 0;
100 u32 max_len;
101 struct netprio_map *map; 118 struct netprio_map *map;
119 int ret;
102 120
103 max_len = atomic_read(&max_prioidx) + 1; 121 /* avoid extending priomap for zero writes */
104 map = rtnl_dereference(dev->priomap); 122 map = rtnl_dereference(dev->priomap);
105 if (!map || map->priomap_len < max_len) 123 if (!prio && (!map || map->priomap_len <= cgrp->id))
106 ret = extend_netdev_table(dev, max_len); 124 return 0;
107 125
108 return ret; 126 ret = extend_netdev_table(dev, cgrp->id);
127 if (ret)
128 return ret;
129
130 map = rtnl_dereference(dev->priomap);
131 map->priomap[cgrp->id] = prio;
132 return 0;
109} 133}
110 134
111static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 135static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
112{ 136{
113 struct cgroup_netprio_state *cs; 137 struct cgroup_netprio_state *cs;
114 int ret = -EINVAL;
115 138
116 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 139 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
117 if (!cs) 140 if (!cs)
118 return ERR_PTR(-ENOMEM); 141 return ERR_PTR(-ENOMEM);
119 142
120 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
121 goto out;
122
123 ret = get_prioidx(&cs->prioidx);
124 if (ret < 0) {
125 pr_warn("No space in priority index array\n");
126 goto out;
127 }
128
129 return &cs->css; 143 return &cs->css;
130out:
131 kfree(cs);
132 return ERR_PTR(ret);
133} 144}
134 145
135static void cgrp_destroy(struct cgroup *cgrp) 146static int cgrp_css_online(struct cgroup *cgrp)
136{ 147{
137 struct cgroup_netprio_state *cs; 148 struct cgroup *parent = cgrp->parent;
138 struct net_device *dev; 149 struct net_device *dev;
139 struct netprio_map *map; 150 int ret = 0;
151
152 if (!parent)
153 return 0;
140 154
141 cs = cgrp_netprio_state(cgrp);
142 rtnl_lock(); 155 rtnl_lock();
156 /*
157 * Inherit prios from the parent. As all prios are set during
158 * onlining, there is no need to clear them on offline.
159 */
143 for_each_netdev(&init_net, dev) { 160 for_each_netdev(&init_net, dev) {
144 map = rtnl_dereference(dev->priomap); 161 u32 prio = netprio_prio(parent, dev);
145 if (map && cs->prioidx < map->priomap_len) 162
146 map->priomap[cs->prioidx] = 0; 163 ret = netprio_set_prio(cgrp, dev, prio);
164 if (ret)
165 break;
147 } 166 }
148 rtnl_unlock(); 167 rtnl_unlock();
149 put_prioidx(cs->prioidx); 168 return ret;
150 kfree(cs); 169}
170
171static void cgrp_css_free(struct cgroup *cgrp)
172{
173 kfree(cgrp_netprio_state(cgrp));
151} 174}
152 175
153static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 176static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
154{ 177{
155 return (u64)cgrp_netprio_state(cgrp)->prioidx; 178 return cgrp->id;
156} 179}
157 180
158static int read_priomap(struct cgroup *cont, struct cftype *cft, 181static int read_priomap(struct cgroup *cont, struct cftype *cft,
159 struct cgroup_map_cb *cb) 182 struct cgroup_map_cb *cb)
160{ 183{
161 struct net_device *dev; 184 struct net_device *dev;
162 u32 prioidx = cgrp_netprio_state(cont)->prioidx;
163 u32 priority;
164 struct netprio_map *map;
165 185
166 rcu_read_lock(); 186 rcu_read_lock();
167 for_each_netdev_rcu(&init_net, dev) { 187 for_each_netdev_rcu(&init_net, dev)
168 map = rcu_dereference(dev->priomap); 188 cb->fill(cb, dev->name, netprio_prio(cont, dev));
169 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
170 cb->fill(cb, dev->name, priority);
171 }
172 rcu_read_unlock(); 189 rcu_read_unlock();
173 return 0; 190 return 0;
174} 191}
@@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
176static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 193static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
177 const char *buffer) 194 const char *buffer)
178{ 195{
179 char *devname = kstrdup(buffer, GFP_KERNEL); 196 char devname[IFNAMSIZ + 1];
180 int ret = -EINVAL;
181 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
182 unsigned long priority;
183 char *priostr;
184 struct net_device *dev; 197 struct net_device *dev;
185 struct netprio_map *map; 198 u32 prio;
186 199 int ret;
187 if (!devname)
188 return -ENOMEM;
189
190 /*
191 * Minimally sized valid priomap string
192 */
193 if (strlen(devname) < 3)
194 goto out_free_devname;
195
196 priostr = strstr(devname, " ");
197 if (!priostr)
198 goto out_free_devname;
199
200 /*
201 *Separate the devname from the associated priority
202 *and advance the priostr pointer to the priority value
203 */
204 *priostr = '\0';
205 priostr++;
206
207 /*
208 * If the priostr points to NULL, we're at the end of the passed
209 * in string, and its not a valid write
210 */
211 if (*priostr == '\0')
212 goto out_free_devname;
213
214 ret = kstrtoul(priostr, 10, &priority);
215 if (ret < 0)
216 goto out_free_devname;
217 200
218 ret = -ENODEV; 201 if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
202 return -EINVAL;
219 203
220 dev = dev_get_by_name(&init_net, devname); 204 dev = dev_get_by_name(&init_net, devname);
221 if (!dev) 205 if (!dev)
222 goto out_free_devname; 206 return -ENODEV;
223 207
224 rtnl_lock(); 208 rtnl_lock();
225 ret = write_update_netdev_table(dev);
226 if (ret < 0)
227 goto out_put_dev;
228 209
229 map = rtnl_dereference(dev->priomap); 210 ret = netprio_set_prio(cgrp, dev, prio);
230 if (map)
231 map->priomap[prioidx] = priority;
232 211
233out_put_dev:
234 rtnl_unlock(); 212 rtnl_unlock();
235 dev_put(dev); 213 dev_put(dev);
236
237out_free_devname:
238 kfree(devname);
239 return ret; 214 return ret;
240} 215}
241 216
@@ -248,7 +223,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
248 return 0; 223 return 0;
249} 224}
250 225
251void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 226static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
252{ 227{
253 struct task_struct *p; 228 struct task_struct *p;
254 void *v; 229 void *v;
@@ -276,22 +251,13 @@ static struct cftype ss_files[] = {
276 251
277struct cgroup_subsys net_prio_subsys = { 252struct cgroup_subsys net_prio_subsys = {
278 .name = "net_prio", 253 .name = "net_prio",
279 .create = cgrp_create, 254 .css_alloc = cgrp_css_alloc,
280 .destroy = cgrp_destroy, 255 .css_online = cgrp_css_online,
256 .css_free = cgrp_css_free,
281 .attach = net_prio_attach, 257 .attach = net_prio_attach,
282 .subsys_id = net_prio_subsys_id, 258 .subsys_id = net_prio_subsys_id,
283 .base_cftypes = ss_files, 259 .base_cftypes = ss_files,
284 .module = THIS_MODULE, 260 .module = THIS_MODULE,
285
286 /*
287 * net_prio has artificial limit on the number of cgroups and
288 * disallows nesting making it impossible to co-mount it with other
289 * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
290 * limit and properly nest configuration such that children follow
291 * their parents' configurations by default and are allowed to
292 * override and remove the following.
293 */
294 .broken_hierarchy = true,
295}; 261};
296 262
297static int netprio_device_event(struct notifier_block *unused, 263static int netprio_device_event(struct notifier_block *unused,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1dc14c2aac4..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
419#define REMOVE 1 419#define REMOVE 1
420#define FIND 0 420#define FIND 0
421 421
422static inline ktime_t ktime_now(void)
423{
424 struct timespec ts;
425 ktime_get_ts(&ts);
426
427 return timespec_to_ktime(ts);
428}
429
430/* This works even if 32 bit because of careful byte order choice */
431static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
432{
433 return cmp1.tv64 < cmp2.tv64;
434}
435
436static const char version[] = 422static const char version[] =
437 "Packet Generator for packet performance testing. " 423 "Packet Generator for packet performance testing. "
438 "Version: " VERSION "\n"; 424 "Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
675 seq_puts(seq, "\n"); 661 seq_puts(seq, "\n");
676 662
677 /* not really stopped, more like last-running-at */ 663 /* not really stopped, more like last-running-at */
678 stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; 664 stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
679 idle = pkt_dev->idle_acc; 665 idle = pkt_dev->idle_acc;
680 do_div(idle, NSEC_PER_USEC); 666 do_div(idle, NSEC_PER_USEC);
681 667
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2141 return; 2127 return;
2142 } 2128 }
2143 2129
2144 start_time = ktime_now(); 2130 start_time = ktime_get();
2145 if (remaining < 100000) { 2131 if (remaining < 100000) {
2146 /* for small delays (<100us), just loop until limit is reached */ 2132 /* for small delays (<100us), just loop until limit is reached */
2147 do { 2133 do {
2148 end_time = ktime_now(); 2134 end_time = ktime_get();
2149 } while (ktime_lt(end_time, spin_until)); 2135 } while (ktime_compare(end_time, spin_until) < 0);
2150 } else { 2136 } else {
2151 /* see do_nanosleep */ 2137 /* see do_nanosleep */
2152 hrtimer_init_sleeper(&t, current); 2138 hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2162 hrtimer_cancel(&t.timer); 2148 hrtimer_cancel(&t.timer);
2163 } while (t.task && pkt_dev->running && !signal_pending(current)); 2149 } while (t.task && pkt_dev->running && !signal_pending(current));
2164 __set_current_state(TASK_RUNNING); 2150 __set_current_state(TASK_RUNNING);
2165 end_time = ktime_now(); 2151 end_time = ktime_get();
2166 } 2152 }
2167 2153
2168 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2154 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2427 } 2413 }
2428 } else { /* IPV6 * */ 2414 } else { /* IPV6 * */
2429 2415
2430 if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && 2416 if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) {
2431 pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
2432 pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
2433 pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
2434 else {
2435 int i; 2417 int i;
2436 2418
2437 /* Only random destinations yet */ 2419 /* Only random destinations yet */
@@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
2916 pktgen_clear_counters(pkt_dev); 2898 pktgen_clear_counters(pkt_dev);
2917 pkt_dev->running = 1; /* Cranke yeself! */ 2899 pkt_dev->running = 1; /* Cranke yeself! */
2918 pkt_dev->skb = NULL; 2900 pkt_dev->skb = NULL;
2919 pkt_dev->started_at = 2901 pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
2920 pkt_dev->next_tx = ktime_now();
2921 2902
2922 set_pkt_overhead(pkt_dev); 2903 set_pkt_overhead(pkt_dev);
2923 2904
@@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3076 3057
3077 kfree_skb(pkt_dev->skb); 3058 kfree_skb(pkt_dev->skb);
3078 pkt_dev->skb = NULL; 3059 pkt_dev->skb = NULL;
3079 pkt_dev->stopped_at = ktime_now(); 3060 pkt_dev->stopped_at = ktime_get();
3080 pkt_dev->running = 0; 3061 pkt_dev->running = 0;
3081 3062
3082 show_results(pkt_dev, nr_frags); 3063 show_results(pkt_dev, nr_frags);
@@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3095 continue; 3076 continue;
3096 if (best == NULL) 3077 if (best == NULL)
3097 best = pkt_dev; 3078 best = pkt_dev;
3098 else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) 3079 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
3099 best = pkt_dev; 3080 best = pkt_dev;
3100 } 3081 }
3101 if_unlock(t); 3082 if_unlock(t);
@@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3180 3161
3181static void pktgen_resched(struct pktgen_dev *pkt_dev) 3162static void pktgen_resched(struct pktgen_dev *pkt_dev)
3182{ 3163{
3183 ktime_t idle_start = ktime_now(); 3164 ktime_t idle_start = ktime_get();
3184 schedule(); 3165 schedule();
3185 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3166 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3186} 3167}
3187 3168
3188static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) 3169static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3189{ 3170{
3190 ktime_t idle_start = ktime_now(); 3171 ktime_t idle_start = ktime_get();
3191 3172
3192 while (atomic_read(&(pkt_dev->skb->users)) != 1) { 3173 while (atomic_read(&(pkt_dev->skb->users)) != 1) {
3193 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3198 else 3179 else
3199 cpu_relax(); 3180 cpu_relax();
3200 } 3181 }
3201 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3182 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3202} 3183}
3203 3184
3204static void pktgen_xmit(struct pktgen_dev *pkt_dev) 3185static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3220 * "never transmit" 3201 * "never transmit"
3221 */ 3202 */
3222 if (unlikely(pkt_dev->delay == ULLONG_MAX)) { 3203 if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
3223 pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); 3204 pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
3224 return; 3205 return;
3225 } 3206 }
3226 3207
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fad649ae4dec..1868625af25e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
128 if (tab == NULL || tab[msgindex].doit == NULL) 128 if (tab == NULL || tab[msgindex].doit == NULL)
129 tab = rtnl_msg_handlers[PF_UNSPEC]; 129 tab = rtnl_msg_handlers[PF_UNSPEC];
130 130
131 return tab ? tab[msgindex].doit : NULL; 131 return tab[msgindex].doit;
132} 132}
133 133
134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) 134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
143 if (tab == NULL || tab[msgindex].dumpit == NULL) 143 if (tab == NULL || tab[msgindex].dumpit == NULL)
144 tab = rtnl_msg_handlers[PF_UNSPEC]; 144 tab = rtnl_msg_handlers[PF_UNSPEC];
145 145
146 return tab ? tab[msgindex].dumpit : NULL; 146 return tab[msgindex].dumpit;
147} 147}
148 148
149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) 149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
@@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
158 if (tab == NULL || tab[msgindex].calcit == NULL) 158 if (tab == NULL || tab[msgindex].calcit == NULL)
159 tab = rtnl_msg_handlers[PF_UNSPEC]; 159 tab = rtnl_msg_handlers[PF_UNSPEC];
160 160
161 return tab ? tab[msgindex].calcit : NULL; 161 return tab[msgindex].calcit;
162} 162}
163 163
164/** 164/**
@@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1316 err = PTR_ERR(net); 1316 err = PTR_ERR(net);
1317 goto errout; 1317 goto errout;
1318 } 1318 }
1319 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1320 err = -EPERM;
1321 goto errout;
1322 }
1319 err = dev_change_net_namespace(dev, net, ifname); 1323 err = dev_change_net_namespace(dev, net, ifname);
1320 put_net(net); 1324 put_net(net);
1321 if (err) 1325 if (err)
@@ -1638,7 +1642,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
1638} 1642}
1639EXPORT_SYMBOL(rtnl_configure_link); 1643EXPORT_SYMBOL(rtnl_configure_link);
1640 1644
1641struct net_device *rtnl_create_link(struct net *src_net, struct net *net, 1645struct net_device *rtnl_create_link(struct net *net,
1642 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) 1646 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
1643{ 1647{
1644 int err; 1648 int err;
@@ -1836,7 +1840,7 @@ replay:
1836 if (IS_ERR(dest_net)) 1840 if (IS_ERR(dest_net))
1837 return PTR_ERR(dest_net); 1841 return PTR_ERR(dest_net);
1838 1842
1839 dev = rtnl_create_link(net, dest_net, ifname, ops, tb); 1843 dev = rtnl_create_link(dest_net, ifname, ops, tb);
1840 if (IS_ERR(dev)) { 1844 if (IS_ERR(dev)) {
1841 err = PTR_ERR(dev); 1845 err = PTR_ERR(dev);
1842 goto out; 1846 goto out;
@@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2057 u8 *addr; 2061 u8 *addr;
2058 int err; 2062 int err;
2059 2063
2064 if (!capable(CAP_NET_ADMIN))
2065 return -EPERM;
2066
2060 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 2067 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2061 if (err < 0) 2068 if (err < 0)
2062 return err; 2069 return err;
@@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2123 int err = -EINVAL; 2130 int err = -EINVAL;
2124 __u8 *addr; 2131 __u8 *addr;
2125 2132
2133 if (!capable(CAP_NET_ADMIN))
2134 return -EPERM;
2135
2126 if (nlmsg_len(nlh) < sizeof(*ndm)) 2136 if (nlmsg_len(nlh) < sizeof(*ndm))
2127 return -EINVAL; 2137 return -EINVAL;
2128 2138
@@ -2253,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2253 return skb->len; 2263 return skb->len;
2254} 2264}
2255 2265
2266int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2267 struct net_device *dev, u16 mode)
2268{
2269 struct nlmsghdr *nlh;
2270 struct ifinfomsg *ifm;
2271 struct nlattr *br_afspec;
2272 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2273
2274 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
2275 if (nlh == NULL)
2276 return -EMSGSIZE;
2277
2278 ifm = nlmsg_data(nlh);
2279 ifm->ifi_family = AF_BRIDGE;
2280 ifm->__ifi_pad = 0;
2281 ifm->ifi_type = dev->type;
2282 ifm->ifi_index = dev->ifindex;
2283 ifm->ifi_flags = dev_get_flags(dev);
2284 ifm->ifi_change = 0;
2285
2286
2287 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
2288 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
2289 nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
2290 (dev->master &&
2291 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
2292 (dev->addr_len &&
2293 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
2294 (dev->ifindex != dev->iflink &&
2295 nla_put_u32(skb, IFLA_LINK, dev->iflink)))
2296 goto nla_put_failure;
2297
2298 br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
2299 if (!br_afspec)
2300 goto nla_put_failure;
2301
2302 if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
2303 nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
2304 nla_nest_cancel(skb, br_afspec);
2305 goto nla_put_failure;
2306 }
2307 nla_nest_end(skb, br_afspec);
2308
2309 return nlmsg_end(skb, nlh);
2310nla_put_failure:
2311 nlmsg_cancel(skb, nlh);
2312 return -EMSGSIZE;
2313}
2314EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
2315
2316static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2317{
2318 struct net *net = sock_net(skb->sk);
2319 struct net_device *dev;
2320 int idx = 0;
2321 u32 portid = NETLINK_CB(cb->skb).portid;
2322 u32 seq = cb->nlh->nlmsg_seq;
2323
2324 rcu_read_lock();
2325 for_each_netdev_rcu(net, dev) {
2326 const struct net_device_ops *ops = dev->netdev_ops;
2327 struct net_device *master = dev->master;
2328
2329 if (master && master->netdev_ops->ndo_bridge_getlink) {
2330 if (idx >= cb->args[0] &&
2331 master->netdev_ops->ndo_bridge_getlink(
2332 skb, portid, seq, dev) < 0)
2333 break;
2334 idx++;
2335 }
2336
2337 if (ops->ndo_bridge_getlink) {
2338 if (idx >= cb->args[0] &&
2339 ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
2340 break;
2341 idx++;
2342 }
2343 }
2344 rcu_read_unlock();
2345 cb->args[0] = idx;
2346
2347 return skb->len;
2348}
2349
2350static inline size_t bridge_nlmsg_size(void)
2351{
2352 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
2353 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
2354 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
2355 + nla_total_size(sizeof(u32)) /* IFLA_MASTER */
2356 + nla_total_size(sizeof(u32)) /* IFLA_MTU */
2357 + nla_total_size(sizeof(u32)) /* IFLA_LINK */
2358 + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
2359 + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
2360 + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
2361 + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
2362 + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
2363}
2364
2365static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2366{
2367 struct net *net = dev_net(dev);
2368 struct net_device *master = dev->master;
2369 struct sk_buff *skb;
2370 int err = -EOPNOTSUPP;
2371
2372 skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
2373 if (!skb) {
2374 err = -ENOMEM;
2375 goto errout;
2376 }
2377
2378 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
2379 master && master->netdev_ops->ndo_bridge_getlink) {
2380 err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2381 if (err < 0)
2382 goto errout;
2383 }
2384
2385 if ((flags & BRIDGE_FLAGS_SELF) &&
2386 dev->netdev_ops->ndo_bridge_getlink) {
2387 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2388 if (err < 0)
2389 goto errout;
2390 }
2391
2392 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
2393 return 0;
2394errout:
2395 WARN_ON(err == -EMSGSIZE);
2396 kfree_skb(skb);
2397 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
2398 return err;
2399}
2400
2401static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2402 void *arg)
2403{
2404 struct net *net = sock_net(skb->sk);
2405 struct ifinfomsg *ifm;
2406 struct net_device *dev;
2407 struct nlattr *br_spec, *attr = NULL;
2408 int rem, err = -EOPNOTSUPP;
2409 u16 oflags, flags = 0;
2410 bool have_flags = false;
2411
2412 if (nlmsg_len(nlh) < sizeof(*ifm))
2413 return -EINVAL;
2414
2415 ifm = nlmsg_data(nlh);
2416 if (ifm->ifi_family != AF_BRIDGE)
2417 return -EPFNOSUPPORT;
2418
2419 dev = __dev_get_by_index(net, ifm->ifi_index);
2420 if (!dev) {
2421 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
2422 return -ENODEV;
2423 }
2424
2425 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2426 if (br_spec) {
2427 nla_for_each_nested(attr, br_spec, rem) {
2428 if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
2429 have_flags = true;
2430 flags = nla_get_u16(attr);
2431 break;
2432 }
2433 }
2434 }
2435
2436 oflags = flags;
2437
2438 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2439 if (!dev->master ||
2440 !dev->master->netdev_ops->ndo_bridge_setlink) {
2441 err = -EOPNOTSUPP;
2442 goto out;
2443 }
2444
2445 err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
2446 if (err)
2447 goto out;
2448
2449 flags &= ~BRIDGE_FLAGS_MASTER;
2450 }
2451
2452 if ((flags & BRIDGE_FLAGS_SELF)) {
2453 if (!dev->netdev_ops->ndo_bridge_setlink)
2454 err = -EOPNOTSUPP;
2455 else
2456 err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
2457
2458 if (!err)
2459 flags &= ~BRIDGE_FLAGS_SELF;
2460 }
2461
2462 if (have_flags)
2463 memcpy(nla_data(attr), &flags, sizeof(flags));
2464 /* Generate event to notify upper layer of bridge change */
2465 if (!err)
2466 err = rtnl_bridge_notify(dev, oflags);
2467out:
2468 return err;
2469}
2470
2256/* Protected by RTNL sempahore. */ 2471/* Protected by RTNL sempahore. */
2257static struct rtattr **rta_buf; 2472static struct rtattr **rta_buf;
2258static int rtattr_max; 2473static int rtattr_max;
@@ -2283,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2283 sz_idx = type>>2; 2498 sz_idx = type>>2;
2284 kind = type&3; 2499 kind = type&3;
2285 2500
2286 if (kind != 2 && !capable(CAP_NET_ADMIN)) 2501 if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2287 return -EPERM; 2502 return -EPERM;
2288 2503
2289 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 2504 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
@@ -2434,5 +2649,8 @@ void __init rtnetlink_init(void)
2434 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); 2649 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
2435 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); 2650 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
2436 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); 2651 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2652
2653 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
2654 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
2437} 2655}
2438 2656
diff --git a/net/core/scm.c b/net/core/scm.c
index ab570841a532..57fb1ee6649f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
51 if (!uid_valid(uid) || !gid_valid(gid)) 51 if (!uid_valid(uid) || !gid_valid(gid))
52 return -EINVAL; 52 return -EINVAL;
53 53
54 if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && 54 if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
55 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 55 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
56 uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && 56 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
57 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 57 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
58 gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { 58 gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
59 return 0; 59 return 0;
60 } 60 }
61 return -EPERM; 61 return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3f0636cd76cd..3ab989b0de42 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
519 519
520 uarg = skb_shinfo(skb)->destructor_arg; 520 uarg = skb_shinfo(skb)->destructor_arg;
521 if (uarg->callback) 521 if (uarg->callback)
522 uarg->callback(uarg); 522 uarg->callback(uarg, true);
523 } 523 }
524 524
525 if (skb_has_frag_list(skb)) 525 if (skb_has_frag_list(skb))
@@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb)
635EXPORT_SYMBOL(kfree_skb); 635EXPORT_SYMBOL(kfree_skb);
636 636
637/** 637/**
638 * skb_tx_error - report an sk_buff xmit error
639 * @skb: buffer that triggered an error
640 *
641 * Report xmit error if a device callback is tracking this skb.
642 * skb must be freed afterwards.
643 */
644void skb_tx_error(struct sk_buff *skb)
645{
646 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
647 struct ubuf_info *uarg;
648
649 uarg = skb_shinfo(skb)->destructor_arg;
650 if (uarg->callback)
651 uarg->callback(uarg, false);
652 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
653 }
654}
655EXPORT_SYMBOL(skb_tx_error);
656
657/**
638 * consume_skb - free an skbuff 658 * consume_skb - free an skbuff
639 * @skb: buffer to free 659 * @skb: buffer to free
640 * 660 *
@@ -662,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
662 new->transport_header = old->transport_header; 682 new->transport_header = old->transport_header;
663 new->network_header = old->network_header; 683 new->network_header = old->network_header;
664 new->mac_header = old->mac_header; 684 new->mac_header = old->mac_header;
685 new->inner_transport_header = old->inner_transport_header;
686 new->inner_network_header = old->inner_transport_header;
665 skb_dst_copy(new, old); 687 skb_dst_copy(new, old);
666 new->rxhash = old->rxhash; 688 new->rxhash = old->rxhash;
667 new->ooo_okay = old->ooo_okay; 689 new->ooo_okay = old->ooo_okay;
668 new->l4_rxhash = old->l4_rxhash; 690 new->l4_rxhash = old->l4_rxhash;
669 new->no_fcs = old->no_fcs; 691 new->no_fcs = old->no_fcs;
692 new->encapsulation = old->encapsulation;
670#ifdef CONFIG_XFRM 693#ifdef CONFIG_XFRM
671 new->sp = secpath_get(old->sp); 694 new->sp = secpath_get(old->sp);
672#endif 695#endif
@@ -797,7 +820,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
797 for (i = 0; i < num_frags; i++) 820 for (i = 0; i < num_frags; i++)
798 skb_frag_unref(skb, i); 821 skb_frag_unref(skb, i);
799 822
800 uarg->callback(uarg); 823 uarg->callback(uarg, false);
801 824
802 /* skb frags point to kernel buffers */ 825 /* skb frags point to kernel buffers */
803 for (i = num_frags - 1; i >= 0; i--) { 826 for (i = num_frags - 1; i >= 0; i--) {
@@ -872,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
872 new->network_header += offset; 895 new->network_header += offset;
873 if (skb_mac_header_was_set(new)) 896 if (skb_mac_header_was_set(new))
874 new->mac_header += offset; 897 new->mac_header += offset;
898 new->inner_transport_header += offset;
899 new->inner_network_header += offset;
875#endif 900#endif
876 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 901 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
877 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 902 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1069,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1069 skb->network_header += off; 1094 skb->network_header += off;
1070 if (skb_mac_header_was_set(skb)) 1095 if (skb_mac_header_was_set(skb))
1071 skb->mac_header += off; 1096 skb->mac_header += off;
1097 skb->inner_transport_header += off;
1098 skb->inner_network_header += off;
1072 /* Only adjust this if it actually is csum_start rather than csum */ 1099 /* Only adjust this if it actually is csum_start rather than csum */
1073 if (skb->ip_summed == CHECKSUM_PARTIAL) 1100 if (skb->ip_summed == CHECKSUM_PARTIAL)
1074 skb->csum_start += nhead; 1101 skb->csum_start += nhead;
@@ -1168,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1168 n->network_header += off; 1195 n->network_header += off;
1169 if (skb_mac_header_was_set(skb)) 1196 if (skb_mac_header_was_set(skb))
1170 n->mac_header += off; 1197 n->mac_header += off;
1198 n->inner_transport_header += off;
1199 n->inner_network_header += off;
1171#endif 1200#endif
1172 1201
1173 return n; 1202 return n;
@@ -2999,7 +3028,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2999 memcpy(skb_mac_header(nskb), skb_mac_header(p), 3028 memcpy(skb_mac_header(nskb), skb_mac_header(p),
3000 p->data - skb_mac_header(p)); 3029 p->data - skb_mac_header(p));
3001 3030
3002 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
3003 skb_shinfo(nskb)->frag_list = p; 3031 skb_shinfo(nskb)->frag_list = p;
3004 skb_shinfo(nskb)->gso_size = pinfo->gso_size; 3032 skb_shinfo(nskb)->gso_size = pinfo->gso_size;
3005 pinfo->gso_size = 0; 3033 pinfo->gso_size = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc366..a692ef49c9bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
505} 505}
506EXPORT_SYMBOL(sk_dst_check); 506EXPORT_SYMBOL(sk_dst_check);
507 507
508static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) 508static int sock_setbindtodevice(struct sock *sk, char __user *optval,
509 int optlen)
509{ 510{
510 int ret = -ENOPROTOOPT; 511 int ret = -ENOPROTOOPT;
511#ifdef CONFIG_NETDEVICES 512#ifdef CONFIG_NETDEVICES
@@ -515,7 +516,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
515 516
516 /* Sorry... */ 517 /* Sorry... */
517 ret = -EPERM; 518 ret = -EPERM;
518 if (!capable(CAP_NET_RAW)) 519 if (!ns_capable(net->user_ns, CAP_NET_RAW))
519 goto out; 520 goto out;
520 521
521 ret = -EINVAL; 522 ret = -EINVAL;
@@ -562,6 +563,59 @@ out:
562 return ret; 563 return ret;
563} 564}
564 565
566static int sock_getbindtodevice(struct sock *sk, char __user *optval,
567 int __user *optlen, int len)
568{
569 int ret = -ENOPROTOOPT;
570#ifdef CONFIG_NETDEVICES
571 struct net *net = sock_net(sk);
572 struct net_device *dev;
573 char devname[IFNAMSIZ];
574 unsigned seq;
575
576 if (sk->sk_bound_dev_if == 0) {
577 len = 0;
578 goto zero;
579 }
580
581 ret = -EINVAL;
582 if (len < IFNAMSIZ)
583 goto out;
584
585retry:
586 seq = read_seqbegin(&devnet_rename_seq);
587 rcu_read_lock();
588 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
589 ret = -ENODEV;
590 if (!dev) {
591 rcu_read_unlock();
592 goto out;
593 }
594
595 strcpy(devname, dev->name);
596 rcu_read_unlock();
597 if (read_seqretry(&devnet_rename_seq, seq))
598 goto retry;
599
600 len = strlen(devname) + 1;
601
602 ret = -EFAULT;
603 if (copy_to_user(optval, devname, len))
604 goto out;
605
606zero:
607 ret = -EFAULT;
608 if (put_user(len, optlen))
609 goto out;
610
611 ret = 0;
612
613out:
614#endif
615
616 return ret;
617}
618
565static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 619static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
566{ 620{
567 if (valbool) 621 if (valbool)
@@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
589 */ 643 */
590 644
591 if (optname == SO_BINDTODEVICE) 645 if (optname == SO_BINDTODEVICE)
592 return sock_bindtodevice(sk, optval, optlen); 646 return sock_setbindtodevice(sk, optval, optlen);
593 647
594 if (optlen < sizeof(int)) 648 if (optlen < sizeof(int))
595 return -EINVAL; 649 return -EINVAL;
@@ -696,7 +750,8 @@ set_rcvbuf:
696 break; 750 break;
697 751
698 case SO_PRIORITY: 752 case SO_PRIORITY:
699 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 753 if ((val >= 0 && val <= 6) ||
754 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
700 sk->sk_priority = val; 755 sk->sk_priority = val;
701 else 756 else
702 ret = -EPERM; 757 ret = -EPERM;
@@ -813,7 +868,7 @@ set_rcvbuf:
813 clear_bit(SOCK_PASSSEC, &sock->flags); 868 clear_bit(SOCK_PASSSEC, &sock->flags);
814 break; 869 break;
815 case SO_MARK: 870 case SO_MARK:
816 if (!capable(CAP_NET_ADMIN)) 871 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
817 ret = -EPERM; 872 ret = -EPERM;
818 else 873 else
819 sk->sk_mark = val; 874 sk->sk_mark = val;
@@ -1074,6 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1074 case SO_NOFCS: 1129 case SO_NOFCS:
1075 v.val = sock_flag(sk, SOCK_NOFCS); 1130 v.val = sock_flag(sk, SOCK_NOFCS);
1076 break; 1131 break;
1132
1133 case SO_BINDTODEVICE:
1134 return sock_getbindtodevice(sk, optval, optlen, len);
1135
1136 case SO_GET_FILTER:
1137 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1138 if (len < 0)
1139 return len;
1140
1141 goto lenout;
1142
1077 default: 1143 default:
1078 return -ENOPROTOOPT; 1144 return -ENOPROTOOPT;
1079 } 1145 }
@@ -1214,13 +1280,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1214 1280
1215#ifdef CONFIG_CGROUPS 1281#ifdef CONFIG_CGROUPS
1216#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1282#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1217void sock_update_classid(struct sock *sk) 1283void sock_update_classid(struct sock *sk, struct task_struct *task)
1218{ 1284{
1219 u32 classid; 1285 u32 classid;
1220 1286
1221 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1287 classid = task_cls_classid(task);
1222 classid = task_cls_classid(current);
1223 rcu_read_unlock();
1224 if (classid != sk->sk_classid) 1288 if (classid != sk->sk_classid)
1225 sk->sk_classid = classid; 1289 sk->sk_classid = classid;
1226} 1290}
@@ -1263,7 +1327,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1263 sock_net_set(sk, get_net(net)); 1327 sock_net_set(sk, get_net(net));
1264 atomic_set(&sk->sk_wmem_alloc, 1); 1328 atomic_set(&sk->sk_wmem_alloc, 1);
1265 1329
1266 sock_update_classid(sk); 1330 sock_update_classid(sk, current);
1267 sock_update_netprioidx(sk, current); 1331 sock_update_netprioidx(sk, current);
1268 } 1332 }
1269 1333
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a7c36845b123..d1b08045a9df 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net)
216 goto err_dup; 216 goto err_dup;
217 217
218 tbl[0].data = &net->core.sysctl_somaxconn; 218 tbl[0].data = &net->core.sysctl_somaxconn;
219
220 /* Don't export any sysctls to unprivileged users */
221 if (net->user_ns != &init_user_ns) {
222 tbl[0].procname = NULL;
223 }
219 } 224 }
220 225
221 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 226 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);