aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 21:07:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 21:07:07 -0500
commit6be35c700f742e911ecedd07fcc43d4439922334 (patch)
treeca9f37214d204465fcc2d79c82efd291e357c53c /net/core/dev.c
parente37aa63e87bd581f9be5555ed0ba83f5295c92fc (diff)
parent520dfe3a3645257bf83660f672c47f8558f3d4c4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) Allow to dump, monitor, and change the bridge multicast database using netlink. From Cong Wang. 2) RFC 5961 TCP blind data injection attack mitigation, from Eric Dumazet. 3) Networking user namespace support from Eric W. Biederman. 4) tuntap/virtio-net multiqueue support by Jason Wang. 5) Support for checksum offload of encapsulated packets (basically, tunneled traffic can still be checksummed by HW). From Joseph Gasparakis. 6) Allow BPF filter access to VLAN tags, from Eric Dumazet and Daniel Borkmann. 7) Bridge port parameters over netlink and BPDU blocking support from Stephen Hemminger. 8) Improve data access patterns during inet socket demux by rearranging socket layout, from Eric Dumazet. 9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and Jon Maloy. 10) Update TCP socket hash sizing to be more in line with current day realities. The existing heurstics were choosen a decade ago. From Eric Dumazet. 11) Fix races, queue bloat, and excessive wakeups in ATM and associated drivers, from Krzysztof Mazur and David Woodhouse. 12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions in VXLAN driver, from David Stevens. 13) Add "oops_only" mode to netconsole, from Amerigo Wang. 14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also allow DCB netlink to work on namespaces other than the initial namespace. From John Fastabend. 15) Support PTP in the Tigon3 driver, from Matt Carlson. 16) tun/vhost zero copy fixes and improvements, plus turn it on by default, from Michael S. Tsirkin. 17) Support per-association statistics in SCTP, from Michele Baldessari. And many, many, driver updates, cleanups, and improvements. Too numerous to mention individually. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits) net/mlx4_en: Add support for destination MAC in steering rules net/mlx4_en: Use generic etherdevice.h functions. net: ethtool: Add destination MAC address to flow steering API bridge: add support of adding and deleting mdb entries bridge: notify mdb changes via netlink ndisc: Unexport ndisc_{build,send}_skb(). uapi: add missing netconf.h to export list pkt_sched: avoid requeues if possible solos-pci: fix double-free of TX skb in DMA mode bnx2: Fix accidental reversions. bna: Driver Version Updated to 3.1.2.1 bna: Firmware update bna: Add RX State bna: Rx Page Based Allocation bna: TX Intr Coalescing Fix bna: Tx and Rx Optimizations bna: Code Cleanup and Enhancements ath9k: check pdata variable before dereferencing it ath5k: RX timestamp is reported at end of frame ath9k_htc: RX timestamp is reported at end of frame ...
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c236
1 files changed, 176 insertions, 60 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e5942bf45a6d..d0cbc93fcf32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177 177
178static DEFINE_SPINLOCK(ptype_lock); 178static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock);
179static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180static struct list_head ptype_all __read_mostly; /* Taps */ 181static struct list_head ptype_all __read_mostly; /* Taps */
182static struct list_head offload_base __read_mostly;
181 183
182/* 184/*
183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 185 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */
201DEFINE_RWLOCK(dev_base_lock); 203DEFINE_RWLOCK(dev_base_lock);
202EXPORT_SYMBOL(dev_base_lock); 204EXPORT_SYMBOL(dev_base_lock);
203 205
206DEFINE_SEQLOCK(devnet_rename_seq);
207
204static inline void dev_base_seq_inc(struct net *net) 208static inline void dev_base_seq_inc(struct net *net)
205{ 209{
206 while (++net->dev_base_seq == 0); 210 while (++net->dev_base_seq == 0);
@@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt)
470} 474}
471EXPORT_SYMBOL(dev_remove_pack); 475EXPORT_SYMBOL(dev_remove_pack);
472 476
477
478/**
479 * dev_add_offload - register offload handlers
480 * @po: protocol offload declaration
481 *
482 * Add protocol offload handlers to the networking stack. The passed
483 * &proto_offload is linked into kernel lists and may not be freed until
484 * it has been removed from the kernel lists.
485 *
486 * This call does not sleep therefore it can not
487 * guarantee all CPU's that are in middle of receiving packets
488 * will see the new offload handlers (until the next received packet).
489 */
490void dev_add_offload(struct packet_offload *po)
491{
492 struct list_head *head = &offload_base;
493
494 spin_lock(&offload_lock);
495 list_add_rcu(&po->list, head);
496 spin_unlock(&offload_lock);
497}
498EXPORT_SYMBOL(dev_add_offload);
499
500/**
501 * __dev_remove_offload - remove offload handler
502 * @po: packet offload declaration
503 *
504 * Remove a protocol offload handler that was previously added to the
505 * kernel offload handlers by dev_add_offload(). The passed &offload_type
506 * is removed from the kernel lists and can be freed or reused once this
507 * function returns.
508 *
509 * The packet type might still be in use by receivers
510 * and must not be freed until after all the CPU's have gone
511 * through a quiescent state.
512 */
513void __dev_remove_offload(struct packet_offload *po)
514{
515 struct list_head *head = &offload_base;
516 struct packet_offload *po1;
517
518 spin_lock(&offload_lock);
519
520 list_for_each_entry(po1, head, list) {
521 if (po == po1) {
522 list_del_rcu(&po->list);
523 goto out;
524 }
525 }
526
527 pr_warn("dev_remove_offload: %p not found\n", po);
528out:
529 spin_unlock(&offload_lock);
530}
531EXPORT_SYMBOL(__dev_remove_offload);
532
533/**
534 * dev_remove_offload - remove packet offload handler
535 * @po: packet offload declaration
536 *
537 * Remove a packet offload handler that was previously added to the kernel
538 * offload handlers by dev_add_offload(). The passed &offload_type is
539 * removed from the kernel lists and can be freed or reused once this
540 * function returns.
541 *
542 * This call sleeps to guarantee that no CPU is looking at the packet
543 * type after return.
544 */
545void dev_remove_offload(struct packet_offload *po)
546{
547 __dev_remove_offload(po);
548
549 synchronize_net();
550}
551EXPORT_SYMBOL(dev_remove_offload);
552
473/****************************************************************************** 553/******************************************************************************
474 554
475 Device Boot-time Settings Routines 555 Device Boot-time Settings Routines
@@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
1013 if (dev->flags & IFF_UP) 1093 if (dev->flags & IFF_UP)
1014 return -EBUSY; 1094 return -EBUSY;
1015 1095
1016 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 1096 write_seqlock(&devnet_rename_seq);
1097
1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1099 write_sequnlock(&devnet_rename_seq);
1017 return 0; 1100 return 0;
1101 }
1018 1102
1019 memcpy(oldname, dev->name, IFNAMSIZ); 1103 memcpy(oldname, dev->name, IFNAMSIZ);
1020 1104
1021 err = dev_get_valid_name(net, dev, newname); 1105 err = dev_get_valid_name(net, dev, newname);
1022 if (err < 0) 1106 if (err < 0) {
1107 write_sequnlock(&devnet_rename_seq);
1023 return err; 1108 return err;
1109 }
1024 1110
1025rollback: 1111rollback:
1026 ret = device_rename(&dev->dev, dev->name); 1112 ret = device_rename(&dev->dev, dev->name);
1027 if (ret) { 1113 if (ret) {
1028 memcpy(dev->name, oldname, IFNAMSIZ); 1114 memcpy(dev->name, oldname, IFNAMSIZ);
1115 write_sequnlock(&devnet_rename_seq);
1029 return ret; 1116 return ret;
1030 } 1117 }
1031 1118
1119 write_sequnlock(&devnet_rename_seq);
1120
1032 write_lock_bh(&dev_base_lock); 1121 write_lock_bh(&dev_base_lock);
1033 hlist_del_rcu(&dev->name_hlist); 1122 hlist_del_rcu(&dev->name_hlist);
1034 write_unlock_bh(&dev_base_lock); 1123 write_unlock_bh(&dev_base_lock);
@@ -1046,6 +1135,7 @@ rollback:
1046 /* err >= 0 after dev_alloc_name() or stores the first errno */ 1135 /* err >= 0 after dev_alloc_name() or stores the first errno */
1047 if (err >= 0) { 1136 if (err >= 0) {
1048 err = ret; 1137 err = ret;
1138 write_seqlock(&devnet_rename_seq);
1049 memcpy(dev->name, oldname, IFNAMSIZ); 1139 memcpy(dev->name, oldname, IFNAMSIZ);
1050 goto rollback; 1140 goto rollback;
1051 } else { 1141 } else {
@@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1075 return -EINVAL; 1165 return -EINVAL;
1076 1166
1077 if (!len) { 1167 if (!len) {
1078 if (dev->ifalias) { 1168 kfree(dev->ifalias);
1079 kfree(dev->ifalias); 1169 dev->ifalias = NULL;
1080 dev->ifalias = NULL;
1081 }
1082 return 0; 1170 return 0;
1083 } 1171 }
1084 1172
@@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1994 netdev_features_t features) 2082 netdev_features_t features)
1995{ 2083{
1996 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2084 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1997 struct packet_type *ptype; 2085 struct packet_offload *ptype;
1998 __be16 type = skb->protocol; 2086 __be16 type = skb->protocol;
1999 int vlan_depth = ETH_HLEN; 2087 int vlan_depth = ETH_HLEN;
2000 int err; 2088 int err;
@@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2023 } 2111 }
2024 2112
2025 rcu_read_lock(); 2113 rcu_read_lock();
2026 list_for_each_entry_rcu(ptype, 2114 list_for_each_entry_rcu(ptype, &offload_base, list) {
2027 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2115 if (ptype->type == type && ptype->callbacks.gso_segment) {
2028 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2029 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2116 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2030 err = ptype->gso_send_check(skb); 2117 err = ptype->callbacks.gso_send_check(skb);
2031 segs = ERR_PTR(err); 2118 segs = ERR_PTR(err);
2032 if (err || skb_gso_ok(skb, features)) 2119 if (err || skb_gso_ok(skb, features))
2033 break; 2120 break;
2034 __skb_push(skb, (skb->data - 2121 __skb_push(skb, (skb->data -
2035 skb_network_header(skb))); 2122 skb_network_header(skb)));
2036 } 2123 }
2037 segs = ptype->gso_segment(skb, features); 2124 segs = ptype->callbacks.gso_segment(skb, features);
2038 break; 2125 break;
2039 } 2126 }
2040 } 2127 }
@@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2237 skb->vlan_tci = 0; 2324 skb->vlan_tci = 0;
2238 } 2325 }
2239 2326
2327 /* If encapsulation offload request, verify we are testing
2328 * hardware encapsulation features instead of standard
2329 * features for the netdev
2330 */
2331 if (skb->encapsulation)
2332 features &= dev->hw_enc_features;
2333
2240 if (netif_needs_gso(skb, features)) { 2334 if (netif_needs_gso(skb, features)) {
2241 if (unlikely(dev_gso_segment(skb, features))) 2335 if (unlikely(dev_gso_segment(skb, features)))
2242 goto out_kfree_skb; 2336 goto out_kfree_skb;
@@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2252 * checksumming here. 2346 * checksumming here.
2253 */ 2347 */
2254 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2348 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2255 skb_set_transport_header(skb, 2349 if (skb->encapsulation)
2256 skb_checksum_start_offset(skb)); 2350 skb_set_inner_transport_header(skb,
2351 skb_checksum_start_offset(skb));
2352 else
2353 skb_set_transport_header(skb,
2354 skb_checksum_start_offset(skb));
2257 if (!(features & NETIF_F_ALL_CSUM) && 2355 if (!(features & NETIF_F_ALL_CSUM) &&
2258 skb_checksum_help(skb)) 2356 skb_checksum_help(skb))
2259 goto out_kfree_skb; 2357 goto out_kfree_skb;
@@ -3446,9 +3544,9 @@ static void flush_backlog(void *arg)
3446 3544
3447static int napi_gro_complete(struct sk_buff *skb) 3545static int napi_gro_complete(struct sk_buff *skb)
3448{ 3546{
3449 struct packet_type *ptype; 3547 struct packet_offload *ptype;
3450 __be16 type = skb->protocol; 3548 __be16 type = skb->protocol;
3451 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3549 struct list_head *head = &offload_base;
3452 int err = -ENOENT; 3550 int err = -ENOENT;
3453 3551
3454 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); 3552 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -3460,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb)
3460 3558
3461 rcu_read_lock(); 3559 rcu_read_lock();
3462 list_for_each_entry_rcu(ptype, head, list) { 3560 list_for_each_entry_rcu(ptype, head, list) {
3463 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 3561 if (ptype->type != type || !ptype->callbacks.gro_complete)
3464 continue; 3562 continue;
3465 3563
3466 err = ptype->gro_complete(skb); 3564 err = ptype->callbacks.gro_complete(skb);
3467 break; 3565 break;
3468 } 3566 }
3469 rcu_read_unlock(); 3567 rcu_read_unlock();
@@ -3507,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3507} 3605}
3508EXPORT_SYMBOL(napi_gro_flush); 3606EXPORT_SYMBOL(napi_gro_flush);
3509 3607
3510enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3608static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3609{
3610 struct sk_buff *p;
3611 unsigned int maclen = skb->dev->hard_header_len;
3612
3613 for (p = napi->gro_list; p; p = p->next) {
3614 unsigned long diffs;
3615
3616 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3617 diffs |= p->vlan_tci ^ skb->vlan_tci;
3618 if (maclen == ETH_HLEN)
3619 diffs |= compare_ether_header(skb_mac_header(p),
3620 skb_gro_mac_header(skb));
3621 else if (!diffs)
3622 diffs = memcmp(skb_mac_header(p),
3623 skb_gro_mac_header(skb),
3624 maclen);
3625 NAPI_GRO_CB(p)->same_flow = !diffs;
3626 NAPI_GRO_CB(p)->flush = 0;
3627 }
3628}
3629
3630static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3511{ 3631{
3512 struct sk_buff **pp = NULL; 3632 struct sk_buff **pp = NULL;
3513 struct packet_type *ptype; 3633 struct packet_offload *ptype;
3514 __be16 type = skb->protocol; 3634 __be16 type = skb->protocol;
3515 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3635 struct list_head *head = &offload_base;
3516 int same_flow; 3636 int same_flow;
3517 int mac_len; 3637 int mac_len;
3518 enum gro_result ret; 3638 enum gro_result ret;
@@ -3523,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3523 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3643 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3524 goto normal; 3644 goto normal;
3525 3645
3646 gro_list_prepare(napi, skb);
3647
3526 rcu_read_lock(); 3648 rcu_read_lock();
3527 list_for_each_entry_rcu(ptype, head, list) { 3649 list_for_each_entry_rcu(ptype, head, list) {
3528 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 3650 if (ptype->type != type || !ptype->callbacks.gro_receive)
3529 continue; 3651 continue;
3530 3652
3531 skb_set_network_header(skb, skb_gro_offset(skb)); 3653 skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3535,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3535 NAPI_GRO_CB(skb)->flush = 0; 3657 NAPI_GRO_CB(skb)->flush = 0;
3536 NAPI_GRO_CB(skb)->free = 0; 3658 NAPI_GRO_CB(skb)->free = 0;
3537 3659
3538 pp = ptype->gro_receive(&napi->gro_list, skb); 3660 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3539 break; 3661 break;
3540 } 3662 }
3541 rcu_read_unlock(); 3663 rcu_read_unlock();
@@ -3598,34 +3720,9 @@ normal:
3598 ret = GRO_NORMAL; 3720 ret = GRO_NORMAL;
3599 goto pull; 3721 goto pull;
3600} 3722}
3601EXPORT_SYMBOL(dev_gro_receive);
3602
3603static inline gro_result_t
3604__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3605{
3606 struct sk_buff *p;
3607 unsigned int maclen = skb->dev->hard_header_len;
3608
3609 for (p = napi->gro_list; p; p = p->next) {
3610 unsigned long diffs;
3611
3612 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3613 diffs |= p->vlan_tci ^ skb->vlan_tci;
3614 if (maclen == ETH_HLEN)
3615 diffs |= compare_ether_header(skb_mac_header(p),
3616 skb_gro_mac_header(skb));
3617 else if (!diffs)
3618 diffs = memcmp(skb_mac_header(p),
3619 skb_gro_mac_header(skb),
3620 maclen);
3621 NAPI_GRO_CB(p)->same_flow = !diffs;
3622 NAPI_GRO_CB(p)->flush = 0;
3623 }
3624 3723
3625 return dev_gro_receive(napi, skb);
3626}
3627 3724
3628gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 3725static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3629{ 3726{
3630 switch (ret) { 3727 switch (ret) {
3631 case GRO_NORMAL: 3728 case GRO_NORMAL:
@@ -3651,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3651 3748
3652 return ret; 3749 return ret;
3653} 3750}
3654EXPORT_SYMBOL(napi_skb_finish);
3655 3751
3656static void skb_gro_reset_offset(struct sk_buff *skb) 3752static void skb_gro_reset_offset(struct sk_buff *skb)
3657{ 3753{
@@ -3674,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3674{ 3770{
3675 skb_gro_reset_offset(skb); 3771 skb_gro_reset_offset(skb);
3676 3772
3677 return napi_skb_finish(__napi_gro_receive(napi, skb), skb); 3773 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3678} 3774}
3679EXPORT_SYMBOL(napi_gro_receive); 3775EXPORT_SYMBOL(napi_gro_receive);
3680 3776
@@ -3703,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3703} 3799}
3704EXPORT_SYMBOL(napi_get_frags); 3800EXPORT_SYMBOL(napi_get_frags);
3705 3801
3706gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 3802static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3707 gro_result_t ret) 3803 gro_result_t ret)
3708{ 3804{
3709 switch (ret) { 3805 switch (ret) {
@@ -3728,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3728 3824
3729 return ret; 3825 return ret;
3730} 3826}
3731EXPORT_SYMBOL(napi_frags_finish);
3732 3827
3733static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 3828static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3734{ 3829{
@@ -3773,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
3773 if (!skb) 3868 if (!skb)
3774 return GRO_DROP; 3869 return GRO_DROP;
3775 3870
3776 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 3871 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
3777} 3872}
3778EXPORT_SYMBOL(napi_gro_frags); 3873EXPORT_SYMBOL(napi_gro_frags);
3779 3874
@@ -4075,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4075{ 4170{
4076 struct net_device *dev; 4171 struct net_device *dev;
4077 struct ifreq ifr; 4172 struct ifreq ifr;
4173 unsigned seq;
4078 4174
4079 /* 4175 /*
4080 * Fetch the caller's info block. 4176 * Fetch the caller's info block.
@@ -4083,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4083 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 4179 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4084 return -EFAULT; 4180 return -EFAULT;
4085 4181
4182retry:
4183 seq = read_seqbegin(&devnet_rename_seq);
4086 rcu_read_lock(); 4184 rcu_read_lock();
4087 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); 4185 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4088 if (!dev) { 4186 if (!dev) {
@@ -4092,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
4092 4190
4093 strcpy(ifr.ifr_name, dev->name); 4191 strcpy(ifr.ifr_name, dev->name);
4094 rcu_read_unlock(); 4192 rcu_read_unlock();
4193 if (read_seqretry(&devnet_rename_seq, seq))
4194 goto retry;
4095 4195
4096 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 4196 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4097 return -EFAULT; 4197 return -EFAULT;
@@ -4884,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4884 else 4984 else
4885 dev->mtu = new_mtu; 4985 dev->mtu = new_mtu;
4886 4986
4887 if (!err && dev->flags & IFF_UP) 4987 if (!err)
4888 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 4988 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4889 return err; 4989 return err;
4890} 4990}
@@ -5204,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5204 case SIOCGMIIPHY: 5304 case SIOCGMIIPHY:
5205 case SIOCGMIIREG: 5305 case SIOCGMIIREG:
5206 case SIOCSIFNAME: 5306 case SIOCSIFNAME:
5207 if (!capable(CAP_NET_ADMIN)) 5307 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5208 return -EPERM; 5308 return -EPERM;
5209 dev_load(net, ifr.ifr_name); 5309 dev_load(net, ifr.ifr_name);
5210 rtnl_lock(); 5310 rtnl_lock();
@@ -5225,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5225 * - require strict serialization. 5325 * - require strict serialization.
5226 * - do not return a value 5326 * - do not return a value
5227 */ 5327 */
5328 case SIOCSIFMAP:
5329 case SIOCSIFTXQLEN:
5330 if (!capable(CAP_NET_ADMIN))
5331 return -EPERM;
5332 /* fall through */
5333 /*
5334 * These ioctl calls:
5335 * - require local superuser power.
5336 * - require strict serialization.
5337 * - do not return a value
5338 */
5228 case SIOCSIFFLAGS: 5339 case SIOCSIFFLAGS:
5229 case SIOCSIFMETRIC: 5340 case SIOCSIFMETRIC:
5230 case SIOCSIFMTU: 5341 case SIOCSIFMTU:
5231 case SIOCSIFMAP:
5232 case SIOCSIFHWADDR: 5342 case SIOCSIFHWADDR:
5233 case SIOCSIFSLAVE: 5343 case SIOCSIFSLAVE:
5234 case SIOCADDMULTI: 5344 case SIOCADDMULTI:
5235 case SIOCDELMULTI: 5345 case SIOCDELMULTI:
5236 case SIOCSIFHWBROADCAST: 5346 case SIOCSIFHWBROADCAST:
5237 case SIOCSIFTXQLEN:
5238 case SIOCSMIIREG: 5347 case SIOCSMIIREG:
5239 case SIOCBONDENSLAVE: 5348 case SIOCBONDENSLAVE:
5240 case SIOCBONDRELEASE: 5349 case SIOCBONDRELEASE:
@@ -5243,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5243 case SIOCBRADDIF: 5352 case SIOCBRADDIF:
5244 case SIOCBRDELIF: 5353 case SIOCBRDELIF:
5245 case SIOCSHWTSTAMP: 5354 case SIOCSHWTSTAMP:
5246 if (!capable(CAP_NET_ADMIN)) 5355 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5247 return -EPERM; 5356 return -EPERM;
5248 /* fall through */ 5357 /* fall through */
5249 case SIOCBONDSLAVEINFOQUERY: 5358 case SIOCBONDSLAVEINFOQUERY:
@@ -6268,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6268 goto out; 6377 goto out;
6269 6378
6270 /* Ensure the device has been registrered */ 6379 /* Ensure the device has been registrered */
6271 err = -EINVAL;
6272 if (dev->reg_state != NETREG_REGISTERED) 6380 if (dev->reg_state != NETREG_REGISTERED)
6273 goto out; 6381 goto out;
6274 6382
@@ -6323,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6323 dev_uc_flush(dev); 6431 dev_uc_flush(dev);
6324 dev_mc_flush(dev); 6432 dev_mc_flush(dev);
6325 6433
6434 /* Send a netdev-removed uevent to the old namespace */
6435 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6436
6326 /* Actually switch the network namespace */ 6437 /* Actually switch the network namespace */
6327 dev_net_set(dev, net); 6438 dev_net_set(dev, net);
6328 6439
@@ -6334,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6334 dev->iflink = dev->ifindex; 6445 dev->iflink = dev->ifindex;
6335 } 6446 }
6336 6447
6448 /* Send a netdev-add uevent to the new namespace */
6449 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6450
6337 /* Fixup kobjects */ 6451 /* Fixup kobjects */
6338 err = device_rename(&dev->dev, dev->name); 6452 err = device_rename(&dev->dev, dev->name);
6339 WARN_ON(err); 6453 WARN_ON(err);
@@ -6666,6 +6780,8 @@ static int __init net_dev_init(void)
6666 for (i = 0; i < PTYPE_HASH_SIZE; i++) 6780 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6667 INIT_LIST_HEAD(&ptype_base[i]); 6781 INIT_LIST_HEAD(&ptype_base[i]);
6668 6782
6783 INIT_LIST_HEAD(&offload_base);
6784
6669 if (register_pernet_subsys(&netdev_net_ops)) 6785 if (register_pernet_subsys(&netdev_net_ops))
6670 goto out; 6786 goto out;
6671 6787