aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c131
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c23
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c47
-rw-r--r--net/core/rtnetlink.c226
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c24
-rw-r--r--net/core/sock.c24
-rw-r--r--net/core/sysctl_net_core.c5
14 files changed, 558 insertions, 96 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c0946cb2b354..7304ea8a1f13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177 177
178static DEFINE_SPINLOCK(ptype_lock); 178static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock);
179static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180static struct list_head ptype_all __read_mostly; /* Taps */ 181static struct list_head ptype_all __read_mostly; /* Taps */
182static struct list_head offload_base __read_mostly;
181 183
182/* 184/*
183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 185 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt)
470} 472}
471EXPORT_SYMBOL(dev_remove_pack); 473EXPORT_SYMBOL(dev_remove_pack);
472 474
475
476/**
477 * dev_add_offload - register offload handlers
478 * @po: protocol offload declaration
479 *
480 * Add protocol offload handlers to the networking stack. The passed
481 * &proto_offload is linked into kernel lists and may not be freed until
482 * it has been removed from the kernel lists.
483 *
484 * This call does not sleep therefore it can not
485 * guarantee all CPU's that are in middle of receiving packets
486 * will see the new offload handlers (until the next received packet).
487 */
488void dev_add_offload(struct packet_offload *po)
489{
490 struct list_head *head = &offload_base;
491
492 spin_lock(&offload_lock);
493 list_add_rcu(&po->list, head);
494 spin_unlock(&offload_lock);
495}
496EXPORT_SYMBOL(dev_add_offload);
497
498/**
499 * __dev_remove_offload - remove offload handler
500 * @po: packet offload declaration
501 *
502 * Remove a protocol offload handler that was previously added to the
503 * kernel offload handlers by dev_add_offload(). The passed &offload_type
504 * is removed from the kernel lists and can be freed or reused once this
505 * function returns.
506 *
507 * The packet type might still be in use by receivers
508 * and must not be freed until after all the CPU's have gone
509 * through a quiescent state.
510 */
511void __dev_remove_offload(struct packet_offload *po)
512{
513 struct list_head *head = &offload_base;
514 struct packet_offload *po1;
515
516 spin_lock(&offload_lock);
517
518 list_for_each_entry(po1, head, list) {
519 if (po == po1) {
520 list_del_rcu(&po->list);
521 goto out;
522 }
523 }
524
525 pr_warn("dev_remove_offload: %p not found\n", po);
526out:
527 spin_unlock(&offload_lock);
528}
529EXPORT_SYMBOL(__dev_remove_offload);
530
531/**
532 * dev_remove_offload - remove packet offload handler
533 * @po: packet offload declaration
534 *
535 * Remove a packet offload handler that was previously added to the kernel
536 * offload handlers by dev_add_offload(). The passed &offload_type is
537 * removed from the kernel lists and can be freed or reused once this
538 * function returns.
539 *
540 * This call sleeps to guarantee that no CPU is looking at the packet
541 * type after return.
542 */
543void dev_remove_offload(struct packet_offload *po)
544{
545 __dev_remove_offload(po);
546
547 synchronize_net();
548}
549EXPORT_SYMBOL(dev_remove_offload);
550
473/****************************************************************************** 551/******************************************************************************
474 552
475 Device Boot-time Settings Routines 553 Device Boot-time Settings Routines
@@ -1075,10 +1153,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1075 return -EINVAL; 1153 return -EINVAL;
1076 1154
1077 if (!len) { 1155 if (!len) {
1078 if (dev->ifalias) { 1156 kfree(dev->ifalias);
1079 kfree(dev->ifalias); 1157 dev->ifalias = NULL;
1080 dev->ifalias = NULL;
1081 }
1082 return 0; 1158 return 0;
1083 } 1159 }
1084 1160
@@ -1994,7 +2070,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1994 netdev_features_t features) 2070 netdev_features_t features)
1995{ 2071{
1996 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2072 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1997 struct packet_type *ptype; 2073 struct packet_offload *ptype;
1998 __be16 type = skb->protocol; 2074 __be16 type = skb->protocol;
1999 int vlan_depth = ETH_HLEN; 2075 int vlan_depth = ETH_HLEN;
2000 int err; 2076 int err;
@@ -2023,18 +2099,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2023 } 2099 }
2024 2100
2025 rcu_read_lock(); 2101 rcu_read_lock();
2026 list_for_each_entry_rcu(ptype, 2102 list_for_each_entry_rcu(ptype, &offload_base, list) {
2027 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2103 if (ptype->type == type && ptype->callbacks.gso_segment) {
2028 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2029 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2104 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2030 err = ptype->gso_send_check(skb); 2105 err = ptype->callbacks.gso_send_check(skb);
2031 segs = ERR_PTR(err); 2106 segs = ERR_PTR(err);
2032 if (err || skb_gso_ok(skb, features)) 2107 if (err || skb_gso_ok(skb, features))
2033 break; 2108 break;
2034 __skb_push(skb, (skb->data - 2109 __skb_push(skb, (skb->data -
2035 skb_network_header(skb))); 2110 skb_network_header(skb)));
2036 } 2111 }
2037 segs = ptype->gso_segment(skb, features); 2112 segs = ptype->callbacks.gso_segment(skb, features);
2038 break; 2113 break;
2039 } 2114 }
2040 } 2115 }
@@ -3446,9 +3521,9 @@ static void flush_backlog(void *arg)
3446 3521
3447static int napi_gro_complete(struct sk_buff *skb) 3522static int napi_gro_complete(struct sk_buff *skb)
3448{ 3523{
3449 struct packet_type *ptype; 3524 struct packet_offload *ptype;
3450 __be16 type = skb->protocol; 3525 __be16 type = skb->protocol;
3451 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3526 struct list_head *head = &offload_base;
3452 int err = -ENOENT; 3527 int err = -ENOENT;
3453 3528
3454 if (NAPI_GRO_CB(skb)->count == 1) { 3529 if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3458,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb)
3458 3533
3459 rcu_read_lock(); 3534 rcu_read_lock();
3460 list_for_each_entry_rcu(ptype, head, list) { 3535 list_for_each_entry_rcu(ptype, head, list) {
3461 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 3536 if (ptype->type != type || !ptype->callbacks.gro_complete)
3462 continue; 3537 continue;
3463 3538
3464 err = ptype->gro_complete(skb); 3539 err = ptype->callbacks.gro_complete(skb);
3465 break; 3540 break;
3466 } 3541 }
3467 rcu_read_unlock(); 3542 rcu_read_unlock();
@@ -3508,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush);
3508enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3583enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3509{ 3584{
3510 struct sk_buff **pp = NULL; 3585 struct sk_buff **pp = NULL;
3511 struct packet_type *ptype; 3586 struct packet_offload *ptype;
3512 __be16 type = skb->protocol; 3587 __be16 type = skb->protocol;
3513 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 3588 struct list_head *head = &offload_base;
3514 int same_flow; 3589 int same_flow;
3515 int mac_len; 3590 int mac_len;
3516 enum gro_result ret; 3591 enum gro_result ret;
@@ -3523,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3523 3598
3524 rcu_read_lock(); 3599 rcu_read_lock();
3525 list_for_each_entry_rcu(ptype, head, list) { 3600 list_for_each_entry_rcu(ptype, head, list) {
3526 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 3601 if (ptype->type != type || !ptype->callbacks.gro_receive)
3527 continue; 3602 continue;
3528 3603
3529 skb_set_network_header(skb, skb_gro_offset(skb)); 3604 skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3533,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3533 NAPI_GRO_CB(skb)->flush = 0; 3608 NAPI_GRO_CB(skb)->flush = 0;
3534 NAPI_GRO_CB(skb)->free = 0; 3609 NAPI_GRO_CB(skb)->free = 0;
3535 3610
3536 pp = ptype->gro_receive(&napi->gro_list, skb); 3611 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3537 break; 3612 break;
3538 } 3613 }
3539 rcu_read_unlock(); 3614 rcu_read_unlock();
@@ -5202,7 +5277,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5202 case SIOCGMIIPHY: 5277 case SIOCGMIIPHY:
5203 case SIOCGMIIREG: 5278 case SIOCGMIIREG:
5204 case SIOCSIFNAME: 5279 case SIOCSIFNAME:
5205 if (!capable(CAP_NET_ADMIN)) 5280 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5206 return -EPERM; 5281 return -EPERM;
5207 dev_load(net, ifr.ifr_name); 5282 dev_load(net, ifr.ifr_name);
5208 rtnl_lock(); 5283 rtnl_lock();
@@ -5223,16 +5298,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5223 * - require strict serialization. 5298 * - require strict serialization.
5224 * - do not return a value 5299 * - do not return a value
5225 */ 5300 */
5301 case SIOCSIFMAP:
5302 case SIOCSIFTXQLEN:
5303 if (!capable(CAP_NET_ADMIN))
5304 return -EPERM;
5305 /* fall through */
5306 /*
5307 * These ioctl calls:
5308 * - require local superuser power.
5309 * - require strict serialization.
5310 * - do not return a value
5311 */
5226 case SIOCSIFFLAGS: 5312 case SIOCSIFFLAGS:
5227 case SIOCSIFMETRIC: 5313 case SIOCSIFMETRIC:
5228 case SIOCSIFMTU: 5314 case SIOCSIFMTU:
5229 case SIOCSIFMAP:
5230 case SIOCSIFHWADDR: 5315 case SIOCSIFHWADDR:
5231 case SIOCSIFSLAVE: 5316 case SIOCSIFSLAVE:
5232 case SIOCADDMULTI: 5317 case SIOCADDMULTI:
5233 case SIOCDELMULTI: 5318 case SIOCDELMULTI:
5234 case SIOCSIFHWBROADCAST: 5319 case SIOCSIFHWBROADCAST:
5235 case SIOCSIFTXQLEN:
5236 case SIOCSMIIREG: 5320 case SIOCSMIIREG:
5237 case SIOCBONDENSLAVE: 5321 case SIOCBONDENSLAVE:
5238 case SIOCBONDRELEASE: 5322 case SIOCBONDRELEASE:
@@ -5241,7 +5325,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5241 case SIOCBRADDIF: 5325 case SIOCBRADDIF:
5242 case SIOCBRDELIF: 5326 case SIOCBRDELIF:
5243 case SIOCSHWTSTAMP: 5327 case SIOCSHWTSTAMP:
5244 if (!capable(CAP_NET_ADMIN)) 5328 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5245 return -EPERM; 5329 return -EPERM;
5246 /* fall through */ 5330 /* fall through */
5247 case SIOCBONDSLAVEINFOQUERY: 5331 case SIOCBONDSLAVEINFOQUERY:
@@ -6266,7 +6350,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6266 goto out; 6350 goto out;
6267 6351
6268 /* Ensure the device has been registrered */ 6352 /* Ensure the device has been registrered */
6269 err = -EINVAL;
6270 if (dev->reg_state != NETREG_REGISTERED) 6353 if (dev->reg_state != NETREG_REGISTERED)
6271 goto out; 6354 goto out;
6272 6355
@@ -6664,6 +6747,8 @@ static int __init net_dev_init(void)
6664 for (i = 0; i < PTYPE_HASH_SIZE; i++) 6747 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6665 INIT_LIST_HEAD(&ptype_base[i]); 6748 INIT_LIST_HEAD(&ptype_base[i]);
6666 6749
6750 INIT_LIST_HEAD(&offload_base);
6751
6667 if (register_pernet_subsys(&netdev_net_ops)) 6752 if (register_pernet_subsys(&netdev_net_ops))
6668 goto out; 6753 goto out;
6669 6754
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d64cc2e3fa9..a8705432e4b1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1460 case ETHTOOL_GEEE: 1460 case ETHTOOL_GEEE:
1461 break; 1461 break;
1462 default: 1462 default:
1463 if (!capable(CAP_NET_ADMIN)) 1463 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1464 return -EPERM; 1464 return -EPERM;
1465 } 1465 }
1466 1466
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
39#include <linux/reciprocal_div.h> 39#include <linux/reciprocal_div.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/seccomp.h> 41#include <linux/seccomp.h>
42#include <linux/if_vlan.h>
42 43
43/* No hurry in this branch 44/* No hurry in this branch
44 * 45 *
@@ -341,6 +342,12 @@ load_b:
341 case BPF_S_ANC_CPU: 342 case BPF_S_ANC_CPU:
342 A = raw_smp_processor_id(); 343 A = raw_smp_processor_id();
343 continue; 344 continue;
345 case BPF_S_ANC_VLAN_TAG:
346 A = vlan_tx_tag_get(skb);
347 continue;
348 case BPF_S_ANC_VLAN_TAG_PRESENT:
349 A = !!vlan_tx_tag_present(skb);
350 continue;
344 case BPF_S_ANC_NLATTR: { 351 case BPF_S_ANC_NLATTR: {
345 struct nlattr *nla; 352 struct nlattr *nla;
346 353
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
600 ANCILLARY(RXHASH); 607 ANCILLARY(RXHASH);
601 ANCILLARY(CPU); 608 ANCILLARY(CPU);
602 ANCILLARY(ALU_XOR_X); 609 ANCILLARY(ALU_XOR_X);
610 ANCILLARY(VLAN_TAG);
611 ANCILLARY(VLAN_TAG_PRESENT);
603 } 612 }
604 } 613 }
605 ftest->code = code; 614 ftest->code = code;
@@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
751 return ret; 760 return ret;
752} 761}
753EXPORT_SYMBOL_GPL(sk_detach_filter); 762EXPORT_SYMBOL_GPL(sk_detach_filter);
763
764static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
765{
766 static const u16 decodes[] = {
767 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
768 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
769 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
770 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
771 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
772 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
773 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
774 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
775 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
776 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
777 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
778 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
779 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
780 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
781 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
782 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
783 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
784 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
785 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
786 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
787 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
788 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
789 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
790 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
791 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
792 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
793 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
794 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
795 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
796 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
797 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
798 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
799 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
800 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
801 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
802 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
803 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
804 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
805 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
806 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
807 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
808 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
809 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
810 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
811 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
812 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
813 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
814 [BPF_S_RET_K] = BPF_RET|BPF_K,
815 [BPF_S_RET_A] = BPF_RET|BPF_A,
816 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
817 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
818 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
819 [BPF_S_ST] = BPF_ST,
820 [BPF_S_STX] = BPF_STX,
821 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
822 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
823 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
824 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
825 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
826 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
827 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
828 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
829 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
830 };
831 u16 code;
832
833 code = filt->code;
834
835 to->code = decodes[code];
836 to->jt = filt->jt;
837 to->jf = filt->jf;
838
839 if (code == BPF_S_ALU_DIV_K) {
840 /*
841 * When loaded this rule user gave us X, which was
842 * translated into R = r(X). Now we calculate the
843 * RR = r(R) and report it back. If next time this
844 * value is loaded and RRR = r(RR) is calculated
845 * then the R == RRR will be true.
846 *
847 * One exception. X == 1 translates into R == 0 and
848 * we can't calculate RR out of it with r().
849 */
850
851 if (filt->k == 0)
852 to->k = 1;
853 else
854 to->k = reciprocal_value(filt->k);
855
856 BUG_ON(reciprocal_value(to->k) != filt->k);
857 } else
858 to->k = filt->k;
859}
860
861int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
862{
863 struct sk_filter *filter;
864 int i, ret;
865
866 lock_sock(sk);
867 filter = rcu_dereference_protected(sk->sk_filter,
868 sock_owned_by_user(sk));
869 ret = 0;
870 if (!filter)
871 goto out;
872 ret = filter->len;
873 if (!len)
874 goto out;
875 ret = -EINVAL;
876 if (len < filter->len)
877 goto out;
878
879 ret = -EFAULT;
880 for (i = 0; i < filter->len; i++) {
881 struct sock_filter fb;
882
883 sk_decode_filter(&filter->insns[i], &fb);
884 if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
885 goto out;
886 }
887
888 ret = filter->len;
889out:
890 release_sock(sk);
891 return ret;
892}
diff --git a/net/core/flow.c b/net/core/flow.c
index e318c7e98042..b0901ee5a002 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
327static void flow_cache_flush_per_cpu(void *data) 327static void flow_cache_flush_per_cpu(void *data)
328{ 328{
329 struct flow_flush_info *info = data; 329 struct flow_flush_info *info = data;
330 int cpu;
331 struct tasklet_struct *tasklet; 330 struct tasklet_struct *tasklet;
332 331
333 cpu = smp_processor_id(); 332 tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
334 tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
335 tasklet->data = (unsigned long)info; 333 tasklet->data = (unsigned long)info;
336 tasklet_schedule(tasklet); 334 tasklet_schedule(tasklet);
337} 335}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 22571488730a..f1c0c2e9cad5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2987,6 +2987,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2987 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; 2987 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2988 } 2988 }
2989 2989
2990 /* Don't export sysctls to unprivileged users */
2991 if (neigh_parms_net(p)->user_ns != &init_user_ns)
2992 t->neigh_vars[0].procname = NULL;
2993
2990 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 2994 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2991 p_name, dev_name_source); 2995 p_name, dev_name_source);
2992 t->sysctl_header = 2996 t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 017a8bacfb27..334efd5d67a9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,11 +18,9 @@
18#include <net/sock.h> 18#include <net/sock.h>
19#include <net/net_namespace.h> 19#include <net/net_namespace.h>
20#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
21#include <linux/wireless.h>
22#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
23#include <linux/export.h> 22#include <linux/export.h>
24#include <linux/jiffies.h> 23#include <linux/jiffies.h>
25#include <net/wext.h>
26 24
27#include "net-sysfs.h" 25#include "net-sysfs.h"
28 26
@@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
73 const char *buf, size_t len, 71 const char *buf, size_t len,
74 int (*set)(struct net_device *, unsigned long)) 72 int (*set)(struct net_device *, unsigned long))
75{ 73{
76 struct net_device *net = to_net_dev(dev); 74 struct net_device *netdev = to_net_dev(dev);
75 struct net *net = dev_net(netdev);
77 unsigned long new; 76 unsigned long new;
78 int ret = -EINVAL; 77 int ret = -EINVAL;
79 78
80 if (!capable(CAP_NET_ADMIN)) 79 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
81 return -EPERM; 80 return -EPERM;
82 81
83 ret = kstrtoul(buf, 0, &new); 82 ret = kstrtoul(buf, 0, &new);
@@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
87 if (!rtnl_trylock()) 86 if (!rtnl_trylock())
88 return restart_syscall(); 87 return restart_syscall();
89 88
90 if (dev_isalive(net)) { 89 if (dev_isalive(netdev)) {
91 if ((ret = (*set)(net, new)) == 0) 90 if ((ret = (*set)(netdev, new)) == 0)
92 ret = len; 91 ret = len;
93 } 92 }
94 rtnl_unlock(); 93 rtnl_unlock();
@@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
264 struct device_attribute *attr, 263 struct device_attribute *attr,
265 const char *buf, size_t len) 264 const char *buf, size_t len)
266{ 265{
266 if (!capable(CAP_NET_ADMIN))
267 return -EPERM;
268
267 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 269 return netdev_store(dev, attr, buf, len, change_tx_queue_len);
268} 270}
269 271
@@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
271 const char *buf, size_t len) 273 const char *buf, size_t len)
272{ 274{
273 struct net_device *netdev = to_net_dev(dev); 275 struct net_device *netdev = to_net_dev(dev);
276 struct net *net = dev_net(netdev);
274 size_t count = len; 277 size_t count = len;
275 ssize_t ret; 278 ssize_t ret;
276 279
277 if (!capable(CAP_NET_ADMIN)) 280 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
278 return -EPERM; 281 return -EPERM;
279 282
280 /* ignore trailing newline */ 283 /* ignore trailing newline */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42f1e1c7514f..6456439cbbd9 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -13,6 +13,7 @@
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/user_namespace.h>
16#include <net/net_namespace.h> 17#include <net/net_namespace.h>
17#include <net/netns/generic.h> 18#include <net/netns/generic.h>
18 19
@@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops,
145/* 146/*
146 * setup_net runs the initializers for the network namespace object. 147 * setup_net runs the initializers for the network namespace object.
147 */ 148 */
148static __net_init int setup_net(struct net *net) 149static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
149{ 150{
150 /* Must be called with net_mutex held */ 151 /* Must be called with net_mutex held */
151 const struct pernet_operations *ops, *saved_ops; 152 const struct pernet_operations *ops, *saved_ops;
@@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net)
155 atomic_set(&net->count, 1); 156 atomic_set(&net->count, 1);
156 atomic_set(&net->passive, 1); 157 atomic_set(&net->passive, 1);
157 net->dev_base_seq = 1; 158 net->dev_base_seq = 1;
159 net->user_ns = user_ns;
158 160
159#ifdef NETNS_REFCNT_DEBUG 161#ifdef NETNS_REFCNT_DEBUG
160 atomic_set(&net->use_count, 0); 162 atomic_set(&net->use_count, 0);
@@ -232,7 +234,8 @@ void net_drop_ns(void *p)
232 net_free(ns); 234 net_free(ns);
233} 235}
234 236
235struct net *copy_net_ns(unsigned long flags, struct net *old_net) 237struct net *copy_net_ns(unsigned long flags,
238 struct user_namespace *user_ns, struct net *old_net)
236{ 239{
237 struct net *net; 240 struct net *net;
238 int rv; 241 int rv;
@@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
243 net = net_alloc(); 246 net = net_alloc();
244 if (!net) 247 if (!net)
245 return ERR_PTR(-ENOMEM); 248 return ERR_PTR(-ENOMEM);
249
250 get_user_ns(user_ns);
251
246 mutex_lock(&net_mutex); 252 mutex_lock(&net_mutex);
247 rv = setup_net(net); 253 rv = setup_net(net, user_ns);
248 if (rv == 0) { 254 if (rv == 0) {
249 rtnl_lock(); 255 rtnl_lock();
250 list_add_tail_rcu(&net->list, &net_namespace_list); 256 list_add_tail_rcu(&net->list, &net_namespace_list);
@@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
252 } 258 }
253 mutex_unlock(&net_mutex); 259 mutex_unlock(&net_mutex);
254 if (rv < 0) { 260 if (rv < 0) {
261 put_user_ns(user_ns);
255 net_drop_ns(net); 262 net_drop_ns(net);
256 return ERR_PTR(rv); 263 return ERR_PTR(rv);
257 } 264 }
@@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work)
308 /* Finally it is safe to free my network namespace structure */ 315 /* Finally it is safe to free my network namespace structure */
309 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 316 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
310 list_del_init(&net->exit_list); 317 list_del_init(&net->exit_list);
318 put_user_ns(net->user_ns);
311 net_drop_ns(net); 319 net_drop_ns(net);
312 } 320 }
313} 321}
@@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd)
347} 355}
348 356
349#else 357#else
350struct net *copy_net_ns(unsigned long flags, struct net *old_net)
351{
352 if (flags & CLONE_NEWNET)
353 return ERR_PTR(-EINVAL);
354 return old_net;
355}
356
357struct net *get_net_ns_by_fd(int fd) 358struct net *get_net_ns_by_fd(int fd)
358{ 359{
359 return ERR_PTR(-EINVAL); 360 return ERR_PTR(-EINVAL);
@@ -402,7 +403,7 @@ static int __init net_ns_init(void)
402 rcu_assign_pointer(init_net.gen, ng); 403 rcu_assign_pointer(init_net.gen, ng);
403 404
404 mutex_lock(&net_mutex); 405 mutex_lock(&net_mutex);
405 if (setup_net(&init_net)) 406 if (setup_net(&init_net, &init_user_ns))
406 panic("Could not setup the initial network namespace"); 407 panic("Could not setup the initial network namespace");
407 408
408 rtnl_lock(); 409 rtnl_lock();
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..847c02b197b0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
248 return 0; 248 return 0;
249} 249}
250 250
251void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 251static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
252{ 252{
253 struct task_struct *p; 253 struct task_struct *p;
254 void *v; 254 void *v;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1dc14c2aac4..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
419#define REMOVE 1 419#define REMOVE 1
420#define FIND 0 420#define FIND 0
421 421
422static inline ktime_t ktime_now(void)
423{
424 struct timespec ts;
425 ktime_get_ts(&ts);
426
427 return timespec_to_ktime(ts);
428}
429
430/* This works even if 32 bit because of careful byte order choice */
431static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
432{
433 return cmp1.tv64 < cmp2.tv64;
434}
435
436static const char version[] = 422static const char version[] =
437 "Packet Generator for packet performance testing. " 423 "Packet Generator for packet performance testing. "
438 "Version: " VERSION "\n"; 424 "Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
675 seq_puts(seq, "\n"); 661 seq_puts(seq, "\n");
676 662
677 /* not really stopped, more like last-running-at */ 663 /* not really stopped, more like last-running-at */
678 stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; 664 stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
679 idle = pkt_dev->idle_acc; 665 idle = pkt_dev->idle_acc;
680 do_div(idle, NSEC_PER_USEC); 666 do_div(idle, NSEC_PER_USEC);
681 667
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2141 return; 2127 return;
2142 } 2128 }
2143 2129
2144 start_time = ktime_now(); 2130 start_time = ktime_get();
2145 if (remaining < 100000) { 2131 if (remaining < 100000) {
2146 /* for small delays (<100us), just loop until limit is reached */ 2132 /* for small delays (<100us), just loop until limit is reached */
2147 do { 2133 do {
2148 end_time = ktime_now(); 2134 end_time = ktime_get();
2149 } while (ktime_lt(end_time, spin_until)); 2135 } while (ktime_compare(end_time, spin_until) < 0);
2150 } else { 2136 } else {
2151 /* see do_nanosleep */ 2137 /* see do_nanosleep */
2152 hrtimer_init_sleeper(&t, current); 2138 hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2162 hrtimer_cancel(&t.timer); 2148 hrtimer_cancel(&t.timer);
2163 } while (t.task && pkt_dev->running && !signal_pending(current)); 2149 } while (t.task && pkt_dev->running && !signal_pending(current));
2164 __set_current_state(TASK_RUNNING); 2150 __set_current_state(TASK_RUNNING);
2165 end_time = ktime_now(); 2151 end_time = ktime_get();
2166 } 2152 }
2167 2153
2168 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2154 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2427 } 2413 }
2428 } else { /* IPV6 * */ 2414 } else { /* IPV6 * */
2429 2415
2430 if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && 2416 if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) {
2431 pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
2432 pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
2433 pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
2434 else {
2435 int i; 2417 int i;
2436 2418
2437 /* Only random destinations yet */ 2419 /* Only random destinations yet */
@@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
2916 pktgen_clear_counters(pkt_dev); 2898 pktgen_clear_counters(pkt_dev);
2917 pkt_dev->running = 1; /* Cranke yeself! */ 2899 pkt_dev->running = 1; /* Cranke yeself! */
2918 pkt_dev->skb = NULL; 2900 pkt_dev->skb = NULL;
2919 pkt_dev->started_at = 2901 pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
2920 pkt_dev->next_tx = ktime_now();
2921 2902
2922 set_pkt_overhead(pkt_dev); 2903 set_pkt_overhead(pkt_dev);
2923 2904
@@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3076 3057
3077 kfree_skb(pkt_dev->skb); 3058 kfree_skb(pkt_dev->skb);
3078 pkt_dev->skb = NULL; 3059 pkt_dev->skb = NULL;
3079 pkt_dev->stopped_at = ktime_now(); 3060 pkt_dev->stopped_at = ktime_get();
3080 pkt_dev->running = 0; 3061 pkt_dev->running = 0;
3081 3062
3082 show_results(pkt_dev, nr_frags); 3063 show_results(pkt_dev, nr_frags);
@@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3095 continue; 3076 continue;
3096 if (best == NULL) 3077 if (best == NULL)
3097 best = pkt_dev; 3078 best = pkt_dev;
3098 else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) 3079 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
3099 best = pkt_dev; 3080 best = pkt_dev;
3100 } 3081 }
3101 if_unlock(t); 3082 if_unlock(t);
@@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3180 3161
3181static void pktgen_resched(struct pktgen_dev *pkt_dev) 3162static void pktgen_resched(struct pktgen_dev *pkt_dev)
3182{ 3163{
3183 ktime_t idle_start = ktime_now(); 3164 ktime_t idle_start = ktime_get();
3184 schedule(); 3165 schedule();
3185 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3166 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3186} 3167}
3187 3168
3188static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) 3169static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3189{ 3170{
3190 ktime_t idle_start = ktime_now(); 3171 ktime_t idle_start = ktime_get();
3191 3172
3192 while (atomic_read(&(pkt_dev->skb->users)) != 1) { 3173 while (atomic_read(&(pkt_dev->skb->users)) != 1) {
3193 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3198 else 3179 else
3199 cpu_relax(); 3180 cpu_relax();
3200 } 3181 }
3201 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3182 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3202} 3183}
3203 3184
3204static void pktgen_xmit(struct pktgen_dev *pkt_dev) 3185static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3220 * "never transmit" 3201 * "never transmit"
3221 */ 3202 */
3222 if (unlikely(pkt_dev->delay == ULLONG_MAX)) { 3203 if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
3223 pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); 3204 pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
3224 return; 3205 return;
3225 } 3206 }
3226 3207
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fad649ae4dec..575a6ee89944 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
128 if (tab == NULL || tab[msgindex].doit == NULL) 128 if (tab == NULL || tab[msgindex].doit == NULL)
129 tab = rtnl_msg_handlers[PF_UNSPEC]; 129 tab = rtnl_msg_handlers[PF_UNSPEC];
130 130
131 return tab ? tab[msgindex].doit : NULL; 131 return tab[msgindex].doit;
132} 132}
133 133
134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) 134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
143 if (tab == NULL || tab[msgindex].dumpit == NULL) 143 if (tab == NULL || tab[msgindex].dumpit == NULL)
144 tab = rtnl_msg_handlers[PF_UNSPEC]; 144 tab = rtnl_msg_handlers[PF_UNSPEC];
145 145
146 return tab ? tab[msgindex].dumpit : NULL; 146 return tab[msgindex].dumpit;
147} 147}
148 148
149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) 149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
@@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
158 if (tab == NULL || tab[msgindex].calcit == NULL) 158 if (tab == NULL || tab[msgindex].calcit == NULL)
159 tab = rtnl_msg_handlers[PF_UNSPEC]; 159 tab = rtnl_msg_handlers[PF_UNSPEC];
160 160
161 return tab ? tab[msgindex].calcit : NULL; 161 return tab[msgindex].calcit;
162} 162}
163 163
164/** 164/**
@@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1316 err = PTR_ERR(net); 1316 err = PTR_ERR(net);
1317 goto errout; 1317 goto errout;
1318 } 1318 }
1319 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1320 err = -EPERM;
1321 goto errout;
1322 }
1319 err = dev_change_net_namespace(dev, net, ifname); 1323 err = dev_change_net_namespace(dev, net, ifname);
1320 put_net(net); 1324 put_net(net);
1321 if (err) 1325 if (err)
@@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2057 u8 *addr; 2061 u8 *addr;
2058 int err; 2062 int err;
2059 2063
2064 if (!capable(CAP_NET_ADMIN))
2065 return -EPERM;
2066
2060 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 2067 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2061 if (err < 0) 2068 if (err < 0)
2062 return err; 2069 return err;
@@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2123 int err = -EINVAL; 2130 int err = -EINVAL;
2124 __u8 *addr; 2131 __u8 *addr;
2125 2132
2133 if (!capable(CAP_NET_ADMIN))
2134 return -EPERM;
2135
2126 if (nlmsg_len(nlh) < sizeof(*ndm)) 2136 if (nlmsg_len(nlh) < sizeof(*ndm))
2127 return -EINVAL; 2137 return -EINVAL;
2128 2138
@@ -2253,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2253 return skb->len; 2263 return skb->len;
2254} 2264}
2255 2265
2266int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2267 struct net_device *dev, u16 mode)
2268{
2269 struct nlmsghdr *nlh;
2270 struct ifinfomsg *ifm;
2271 struct nlattr *br_afspec;
2272 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2273
2274 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
2275 if (nlh == NULL)
2276 return -EMSGSIZE;
2277
2278 ifm = nlmsg_data(nlh);
2279 ifm->ifi_family = AF_BRIDGE;
2280 ifm->__ifi_pad = 0;
2281 ifm->ifi_type = dev->type;
2282 ifm->ifi_index = dev->ifindex;
2283 ifm->ifi_flags = dev_get_flags(dev);
2284 ifm->ifi_change = 0;
2285
2286
2287 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
2288 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
2289 nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
2290 (dev->master &&
2291 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
2292 (dev->addr_len &&
2293 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
2294 (dev->ifindex != dev->iflink &&
2295 nla_put_u32(skb, IFLA_LINK, dev->iflink)))
2296 goto nla_put_failure;
2297
2298 br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
2299 if (!br_afspec)
2300 goto nla_put_failure;
2301
2302 if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
2303 nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
2304 nla_nest_cancel(skb, br_afspec);
2305 goto nla_put_failure;
2306 }
2307 nla_nest_end(skb, br_afspec);
2308
2309 return nlmsg_end(skb, nlh);
2310nla_put_failure:
2311 nlmsg_cancel(skb, nlh);
2312 return -EMSGSIZE;
2313}
2314EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
2315
2316static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2317{
2318 struct net *net = sock_net(skb->sk);
2319 struct net_device *dev;
2320 int idx = 0;
2321 u32 portid = NETLINK_CB(cb->skb).portid;
2322 u32 seq = cb->nlh->nlmsg_seq;
2323
2324 rcu_read_lock();
2325 for_each_netdev_rcu(net, dev) {
2326 const struct net_device_ops *ops = dev->netdev_ops;
2327 struct net_device *master = dev->master;
2328
2329 if (master && master->netdev_ops->ndo_bridge_getlink) {
2330 if (idx >= cb->args[0] &&
2331 master->netdev_ops->ndo_bridge_getlink(
2332 skb, portid, seq, dev) < 0)
2333 break;
2334 idx++;
2335 }
2336
2337 if (ops->ndo_bridge_getlink) {
2338 if (idx >= cb->args[0] &&
2339 ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
2340 break;
2341 idx++;
2342 }
2343 }
2344 rcu_read_unlock();
2345 cb->args[0] = idx;
2346
2347 return skb->len;
2348}
2349
2350static inline size_t bridge_nlmsg_size(void)
2351{
2352 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
2353 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
2354 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
2355 + nla_total_size(sizeof(u32)) /* IFLA_MASTER */
2356 + nla_total_size(sizeof(u32)) /* IFLA_MTU */
2357 + nla_total_size(sizeof(u32)) /* IFLA_LINK */
2358 + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
2359 + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
2360 + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
2361 + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
2362 + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
2363}
2364
2365static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2366{
2367 struct net *net = dev_net(dev);
2368 struct net_device *master = dev->master;
2369 struct sk_buff *skb;
2370 int err = -EOPNOTSUPP;
2371
2372 skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
2373 if (!skb) {
2374 err = -ENOMEM;
2375 goto errout;
2376 }
2377
2378 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
2379 master && master->netdev_ops->ndo_bridge_getlink) {
2380 err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2381 if (err < 0)
2382 goto errout;
2383 }
2384
2385 if ((flags & BRIDGE_FLAGS_SELF) &&
2386 dev->netdev_ops->ndo_bridge_getlink) {
2387 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2388 if (err < 0)
2389 goto errout;
2390 }
2391
2392 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
2393 return 0;
2394errout:
2395 WARN_ON(err == -EMSGSIZE);
2396 kfree_skb(skb);
2397 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
2398 return err;
2399}
2400
2401static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2402 void *arg)
2403{
2404 struct net *net = sock_net(skb->sk);
2405 struct ifinfomsg *ifm;
2406 struct net_device *dev;
2407 struct nlattr *br_spec, *attr = NULL;
2408 int rem, err = -EOPNOTSUPP;
2409 u16 oflags, flags = 0;
2410 bool have_flags = false;
2411
2412 if (nlmsg_len(nlh) < sizeof(*ifm))
2413 return -EINVAL;
2414
2415 ifm = nlmsg_data(nlh);
2416 if (ifm->ifi_family != AF_BRIDGE)
2417 return -EPFNOSUPPORT;
2418
2419 dev = __dev_get_by_index(net, ifm->ifi_index);
2420 if (!dev) {
2421 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
2422 return -ENODEV;
2423 }
2424
2425 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2426 if (br_spec) {
2427 nla_for_each_nested(attr, br_spec, rem) {
2428 if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
2429 have_flags = true;
2430 flags = nla_get_u16(attr);
2431 break;
2432 }
2433 }
2434 }
2435
2436 oflags = flags;
2437
2438 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2439 if (!dev->master ||
2440 !dev->master->netdev_ops->ndo_bridge_setlink) {
2441 err = -EOPNOTSUPP;
2442 goto out;
2443 }
2444
2445 err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
2446 if (err)
2447 goto out;
2448
2449 flags &= ~BRIDGE_FLAGS_MASTER;
2450 }
2451
2452 if ((flags & BRIDGE_FLAGS_SELF)) {
2453 if (!dev->netdev_ops->ndo_bridge_setlink)
2454 err = -EOPNOTSUPP;
2455 else
2456 err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
2457
2458 if (!err)
2459 flags &= ~BRIDGE_FLAGS_SELF;
2460 }
2461
2462 if (have_flags)
2463 memcpy(nla_data(attr), &flags, sizeof(flags));
2464 /* Generate event to notify upper layer of bridge change */
2465 if (!err)
2466 err = rtnl_bridge_notify(dev, oflags);
2467out:
2468 return err;
2469}
2470
2256/* Protected by RTNL sempahore. */ 2471/* Protected by RTNL sempahore. */
2257static struct rtattr **rta_buf; 2472static struct rtattr **rta_buf;
2258static int rtattr_max; 2473static int rtattr_max;
@@ -2283,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2283 sz_idx = type>>2; 2498 sz_idx = type>>2;
2284 kind = type&3; 2499 kind = type&3;
2285 2500
2286 if (kind != 2 && !capable(CAP_NET_ADMIN)) 2501 if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2287 return -EPERM; 2502 return -EPERM;
2288 2503
2289 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 2504 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
@@ -2434,5 +2649,8 @@ void __init rtnetlink_init(void)
2434 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); 2649 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
2435 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); 2650 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
2436 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); 2651 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2652
2653 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
2654 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
2437} 2655}
2438 2656
diff --git a/net/core/scm.c b/net/core/scm.c
index ab570841a532..57fb1ee6649f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
51 if (!uid_valid(uid) || !gid_valid(gid)) 51 if (!uid_valid(uid) || !gid_valid(gid))
52 return -EINVAL; 52 return -EINVAL;
53 53
54 if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && 54 if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
55 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 55 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
56 uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && 56 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
57 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 57 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
58 gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { 58 gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
59 return 0; 59 return 0;
60 } 60 }
61 return -EPERM; 61 return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4007c1437fda..880722e22cc5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
519 519
520 uarg = skb_shinfo(skb)->destructor_arg; 520 uarg = skb_shinfo(skb)->destructor_arg;
521 if (uarg->callback) 521 if (uarg->callback)
522 uarg->callback(uarg); 522 uarg->callback(uarg, true);
523 } 523 }
524 524
525 if (skb_has_frag_list(skb)) 525 if (skb_has_frag_list(skb))
@@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb)
635EXPORT_SYMBOL(kfree_skb); 635EXPORT_SYMBOL(kfree_skb);
636 636
637/** 637/**
638 * skb_tx_error - report an sk_buff xmit error
639 * @skb: buffer that triggered an error
640 *
641 * Report xmit error if a device callback is tracking this skb.
642 * skb must be freed afterwards.
643 */
644void skb_tx_error(struct sk_buff *skb)
645{
646 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
647 struct ubuf_info *uarg;
648
649 uarg = skb_shinfo(skb)->destructor_arg;
650 if (uarg->callback)
651 uarg->callback(uarg, false);
652 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
653 }
654}
655EXPORT_SYMBOL(skb_tx_error);
656
657/**
638 * consume_skb - free an skbuff 658 * consume_skb - free an skbuff
639 * @skb: buffer to free 659 * @skb: buffer to free
640 * 660 *
@@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
797 for (i = 0; i < num_frags; i++) 817 for (i = 0; i < num_frags; i++)
798 skb_frag_unref(skb, i); 818 skb_frag_unref(skb, i);
799 819
800 uarg->callback(uarg); 820 uarg->callback(uarg, false);
801 821
802 /* skb frags point to kernel buffers */ 822 /* skb frags point to kernel buffers */
803 for (i = num_frags - 1; i >= 0; i--) { 823 for (i = num_frags - 1; i >= 0; i--) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc366..d4f7b58b3866 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -515,7 +515,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
515 515
516 /* Sorry... */ 516 /* Sorry... */
517 ret = -EPERM; 517 ret = -EPERM;
518 if (!capable(CAP_NET_RAW)) 518 if (!ns_capable(net->user_ns, CAP_NET_RAW))
519 goto out; 519 goto out;
520 520
521 ret = -EINVAL; 521 ret = -EINVAL;
@@ -696,7 +696,8 @@ set_rcvbuf:
696 break; 696 break;
697 697
698 case SO_PRIORITY: 698 case SO_PRIORITY:
699 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 699 if ((val >= 0 && val <= 6) ||
700 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
700 sk->sk_priority = val; 701 sk->sk_priority = val;
701 else 702 else
702 ret = -EPERM; 703 ret = -EPERM;
@@ -813,7 +814,7 @@ set_rcvbuf:
813 clear_bit(SOCK_PASSSEC, &sock->flags); 814 clear_bit(SOCK_PASSSEC, &sock->flags);
814 break; 815 break;
815 case SO_MARK: 816 case SO_MARK:
816 if (!capable(CAP_NET_ADMIN)) 817 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
817 ret = -EPERM; 818 ret = -EPERM;
818 else 819 else
819 sk->sk_mark = val; 820 sk->sk_mark = val;
@@ -1074,6 +1075,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1074 case SO_NOFCS: 1075 case SO_NOFCS:
1075 v.val = sock_flag(sk, SOCK_NOFCS); 1076 v.val = sock_flag(sk, SOCK_NOFCS);
1076 break; 1077 break;
1078 case SO_BINDTODEVICE:
1079 v.val = sk->sk_bound_dev_if;
1080 break;
1081 case SO_GET_FILTER:
1082 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1083 if (len < 0)
1084 return len;
1085
1086 goto lenout;
1077 default: 1087 default:
1078 return -ENOPROTOOPT; 1088 return -ENOPROTOOPT;
1079 } 1089 }
@@ -1214,13 +1224,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1214 1224
1215#ifdef CONFIG_CGROUPS 1225#ifdef CONFIG_CGROUPS
1216#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1226#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1217void sock_update_classid(struct sock *sk) 1227void sock_update_classid(struct sock *sk, struct task_struct *task)
1218{ 1228{
1219 u32 classid; 1229 u32 classid;
1220 1230
1221 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1231 classid = task_cls_classid(task);
1222 classid = task_cls_classid(current);
1223 rcu_read_unlock();
1224 if (classid != sk->sk_classid) 1232 if (classid != sk->sk_classid)
1225 sk->sk_classid = classid; 1233 sk->sk_classid = classid;
1226} 1234}
@@ -1263,7 +1271,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1263 sock_net_set(sk, get_net(net)); 1271 sock_net_set(sk, get_net(net));
1264 atomic_set(&sk->sk_wmem_alloc, 1); 1272 atomic_set(&sk->sk_wmem_alloc, 1);
1265 1273
1266 sock_update_classid(sk); 1274 sock_update_classid(sk, current);
1267 sock_update_netprioidx(sk, current); 1275 sock_update_netprioidx(sk, current);
1268 } 1276 }
1269 1277
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a7c36845b123..d1b08045a9df 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net)
216 goto err_dup; 216 goto err_dup;
217 217
218 tbl[0].data = &net->core.sysctl_somaxconn; 218 tbl[0].data = &net->core.sysctl_somaxconn;
219
220 /* Don't export any sysctls to unprivileged users */
221 if (net->user_ns != &init_user_ns) {
222 tbl[0].procname = NULL;
223 }
219 } 224 }
220 225
221 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 226 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);