diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/dev.c | 236 | ||||
| -rw-r--r-- | net/core/ethtool.c | 2 | ||||
| -rw-r--r-- | net/core/filter.c | 139 | ||||
| -rw-r--r-- | net/core/flow.c | 4 | ||||
| -rw-r--r-- | net/core/neighbour.c | 20 | ||||
| -rw-r--r-- | net/core/net-sysfs.c | 17 | ||||
| -rw-r--r-- | net/core/net_namespace.c | 55 | ||||
| -rw-r--r-- | net/core/netpoll.c | 6 | ||||
| -rw-r--r-- | net/core/netprio_cgroup.c | 262 | ||||
| -rw-r--r-- | net/core/pktgen.c | 47 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 230 | ||||
| -rw-r--r-- | net/core/scm.c | 6 | ||||
| -rw-r--r-- | net/core/skbuff.c | 34 | ||||
| -rw-r--r-- | net/core/sock.c | 84 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 5 |
15 files changed, 854 insertions, 293 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e5942bf45a6d..d0cbc93fcf32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -176,8 +176,10 @@ | |||
| 176 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) | 176 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) |
| 177 | 177 | ||
| 178 | static DEFINE_SPINLOCK(ptype_lock); | 178 | static DEFINE_SPINLOCK(ptype_lock); |
| 179 | static DEFINE_SPINLOCK(offload_lock); | ||
| 179 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; | 180 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; |
| 180 | static struct list_head ptype_all __read_mostly; /* Taps */ | 181 | static struct list_head ptype_all __read_mostly; /* Taps */ |
| 182 | static struct list_head offload_base __read_mostly; | ||
| 181 | 183 | ||
| 182 | /* | 184 | /* |
| 183 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl | 185 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
| @@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */ | |||
| 201 | DEFINE_RWLOCK(dev_base_lock); | 203 | DEFINE_RWLOCK(dev_base_lock); |
| 202 | EXPORT_SYMBOL(dev_base_lock); | 204 | EXPORT_SYMBOL(dev_base_lock); |
| 203 | 205 | ||
| 206 | DEFINE_SEQLOCK(devnet_rename_seq); | ||
| 207 | |||
| 204 | static inline void dev_base_seq_inc(struct net *net) | 208 | static inline void dev_base_seq_inc(struct net *net) |
| 205 | { | 209 | { |
| 206 | while (++net->dev_base_seq == 0); | 210 | while (++net->dev_base_seq == 0); |
| @@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt) | |||
| 470 | } | 474 | } |
| 471 | EXPORT_SYMBOL(dev_remove_pack); | 475 | EXPORT_SYMBOL(dev_remove_pack); |
| 472 | 476 | ||
| 477 | |||
| 478 | /** | ||
| 479 | * dev_add_offload - register offload handlers | ||
| 480 | * @po: protocol offload declaration | ||
| 481 | * | ||
| 482 | * Add protocol offload handlers to the networking stack. The passed | ||
| 483 | * &proto_offload is linked into kernel lists and may not be freed until | ||
| 484 | * it has been removed from the kernel lists. | ||
| 485 | * | ||
| 486 | * This call does not sleep therefore it can not | ||
| 487 | * guarantee all CPU's that are in middle of receiving packets | ||
| 488 | * will see the new offload handlers (until the next received packet). | ||
| 489 | */ | ||
| 490 | void dev_add_offload(struct packet_offload *po) | ||
| 491 | { | ||
| 492 | struct list_head *head = &offload_base; | ||
| 493 | |||
| 494 | spin_lock(&offload_lock); | ||
| 495 | list_add_rcu(&po->list, head); | ||
| 496 | spin_unlock(&offload_lock); | ||
| 497 | } | ||
| 498 | EXPORT_SYMBOL(dev_add_offload); | ||
| 499 | |||
| 500 | /** | ||
| 501 | * __dev_remove_offload - remove offload handler | ||
| 502 | * @po: packet offload declaration | ||
| 503 | * | ||
| 504 | * Remove a protocol offload handler that was previously added to the | ||
| 505 | * kernel offload handlers by dev_add_offload(). The passed &offload_type | ||
| 506 | * is removed from the kernel lists and can be freed or reused once this | ||
| 507 | * function returns. | ||
| 508 | * | ||
| 509 | * The packet type might still be in use by receivers | ||
| 510 | * and must not be freed until after all the CPU's have gone | ||
| 511 | * through a quiescent state. | ||
| 512 | */ | ||
| 513 | void __dev_remove_offload(struct packet_offload *po) | ||
| 514 | { | ||
| 515 | struct list_head *head = &offload_base; | ||
| 516 | struct packet_offload *po1; | ||
| 517 | |||
| 518 | spin_lock(&offload_lock); | ||
| 519 | |||
| 520 | list_for_each_entry(po1, head, list) { | ||
| 521 | if (po == po1) { | ||
| 522 | list_del_rcu(&po->list); | ||
| 523 | goto out; | ||
| 524 | } | ||
| 525 | } | ||
| 526 | |||
| 527 | pr_warn("dev_remove_offload: %p not found\n", po); | ||
| 528 | out: | ||
| 529 | spin_unlock(&offload_lock); | ||
| 530 | } | ||
| 531 | EXPORT_SYMBOL(__dev_remove_offload); | ||
| 532 | |||
| 533 | /** | ||
| 534 | * dev_remove_offload - remove packet offload handler | ||
| 535 | * @po: packet offload declaration | ||
| 536 | * | ||
| 537 | * Remove a packet offload handler that was previously added to the kernel | ||
| 538 | * offload handlers by dev_add_offload(). The passed &offload_type is | ||
| 539 | * removed from the kernel lists and can be freed or reused once this | ||
| 540 | * function returns. | ||
| 541 | * | ||
| 542 | * This call sleeps to guarantee that no CPU is looking at the packet | ||
| 543 | * type after return. | ||
| 544 | */ | ||
| 545 | void dev_remove_offload(struct packet_offload *po) | ||
| 546 | { | ||
| 547 | __dev_remove_offload(po); | ||
| 548 | |||
| 549 | synchronize_net(); | ||
| 550 | } | ||
| 551 | EXPORT_SYMBOL(dev_remove_offload); | ||
| 552 | |||
| 473 | /****************************************************************************** | 553 | /****************************************************************************** |
| 474 | 554 | ||
| 475 | Device Boot-time Settings Routines | 555 | Device Boot-time Settings Routines |
| @@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
| 1013 | if (dev->flags & IFF_UP) | 1093 | if (dev->flags & IFF_UP) |
| 1014 | return -EBUSY; | 1094 | return -EBUSY; |
| 1015 | 1095 | ||
| 1016 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | 1096 | write_seqlock(&devnet_rename_seq); |
| 1097 | |||
| 1098 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | ||
| 1099 | write_sequnlock(&devnet_rename_seq); | ||
| 1017 | return 0; | 1100 | return 0; |
| 1101 | } | ||
| 1018 | 1102 | ||
| 1019 | memcpy(oldname, dev->name, IFNAMSIZ); | 1103 | memcpy(oldname, dev->name, IFNAMSIZ); |
| 1020 | 1104 | ||
| 1021 | err = dev_get_valid_name(net, dev, newname); | 1105 | err = dev_get_valid_name(net, dev, newname); |
| 1022 | if (err < 0) | 1106 | if (err < 0) { |
| 1107 | write_sequnlock(&devnet_rename_seq); | ||
| 1023 | return err; | 1108 | return err; |
| 1109 | } | ||
| 1024 | 1110 | ||
| 1025 | rollback: | 1111 | rollback: |
| 1026 | ret = device_rename(&dev->dev, dev->name); | 1112 | ret = device_rename(&dev->dev, dev->name); |
| 1027 | if (ret) { | 1113 | if (ret) { |
| 1028 | memcpy(dev->name, oldname, IFNAMSIZ); | 1114 | memcpy(dev->name, oldname, IFNAMSIZ); |
| 1115 | write_sequnlock(&devnet_rename_seq); | ||
| 1029 | return ret; | 1116 | return ret; |
| 1030 | } | 1117 | } |
| 1031 | 1118 | ||
| 1119 | write_sequnlock(&devnet_rename_seq); | ||
| 1120 | |||
| 1032 | write_lock_bh(&dev_base_lock); | 1121 | write_lock_bh(&dev_base_lock); |
| 1033 | hlist_del_rcu(&dev->name_hlist); | 1122 | hlist_del_rcu(&dev->name_hlist); |
| 1034 | write_unlock_bh(&dev_base_lock); | 1123 | write_unlock_bh(&dev_base_lock); |
| @@ -1046,6 +1135,7 @@ rollback: | |||
| 1046 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | 1135 | /* err >= 0 after dev_alloc_name() or stores the first errno */ |
| 1047 | if (err >= 0) { | 1136 | if (err >= 0) { |
| 1048 | err = ret; | 1137 | err = ret; |
| 1138 | write_seqlock(&devnet_rename_seq); | ||
| 1049 | memcpy(dev->name, oldname, IFNAMSIZ); | 1139 | memcpy(dev->name, oldname, IFNAMSIZ); |
| 1050 | goto rollback; | 1140 | goto rollback; |
| 1051 | } else { | 1141 | } else { |
| @@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) | |||
| 1075 | return -EINVAL; | 1165 | return -EINVAL; |
| 1076 | 1166 | ||
| 1077 | if (!len) { | 1167 | if (!len) { |
| 1078 | if (dev->ifalias) { | 1168 | kfree(dev->ifalias); |
| 1079 | kfree(dev->ifalias); | 1169 | dev->ifalias = NULL; |
| 1080 | dev->ifalias = NULL; | ||
| 1081 | } | ||
| 1082 | return 0; | 1170 | return 0; |
| 1083 | } | 1171 | } |
| 1084 | 1172 | ||
| @@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, | |||
| 1994 | netdev_features_t features) | 2082 | netdev_features_t features) |
| 1995 | { | 2083 | { |
| 1996 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 2084 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
| 1997 | struct packet_type *ptype; | 2085 | struct packet_offload *ptype; |
| 1998 | __be16 type = skb->protocol; | 2086 | __be16 type = skb->protocol; |
| 1999 | int vlan_depth = ETH_HLEN; | 2087 | int vlan_depth = ETH_HLEN; |
| 2000 | int err; | 2088 | int err; |
| @@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, | |||
| 2023 | } | 2111 | } |
| 2024 | 2112 | ||
| 2025 | rcu_read_lock(); | 2113 | rcu_read_lock(); |
| 2026 | list_for_each_entry_rcu(ptype, | 2114 | list_for_each_entry_rcu(ptype, &offload_base, list) { |
| 2027 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 2115 | if (ptype->type == type && ptype->callbacks.gso_segment) { |
| 2028 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { | ||
| 2029 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | 2116 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
| 2030 | err = ptype->gso_send_check(skb); | 2117 | err = ptype->callbacks.gso_send_check(skb); |
| 2031 | segs = ERR_PTR(err); | 2118 | segs = ERR_PTR(err); |
| 2032 | if (err || skb_gso_ok(skb, features)) | 2119 | if (err || skb_gso_ok(skb, features)) |
| 2033 | break; | 2120 | break; |
| 2034 | __skb_push(skb, (skb->data - | 2121 | __skb_push(skb, (skb->data - |
| 2035 | skb_network_header(skb))); | 2122 | skb_network_header(skb))); |
| 2036 | } | 2123 | } |
| 2037 | segs = ptype->gso_segment(skb, features); | 2124 | segs = ptype->callbacks.gso_segment(skb, features); |
| 2038 | break; | 2125 | break; |
| 2039 | } | 2126 | } |
| 2040 | } | 2127 | } |
| @@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 2237 | skb->vlan_tci = 0; | 2324 | skb->vlan_tci = 0; |
| 2238 | } | 2325 | } |
| 2239 | 2326 | ||
| 2327 | /* If encapsulation offload request, verify we are testing | ||
| 2328 | * hardware encapsulation features instead of standard | ||
| 2329 | * features for the netdev | ||
| 2330 | */ | ||
| 2331 | if (skb->encapsulation) | ||
| 2332 | features &= dev->hw_enc_features; | ||
| 2333 | |||
| 2240 | if (netif_needs_gso(skb, features)) { | 2334 | if (netif_needs_gso(skb, features)) { |
| 2241 | if (unlikely(dev_gso_segment(skb, features))) | 2335 | if (unlikely(dev_gso_segment(skb, features))) |
| 2242 | goto out_kfree_skb; | 2336 | goto out_kfree_skb; |
| @@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 2252 | * checksumming here. | 2346 | * checksumming here. |
| 2253 | */ | 2347 | */ |
| 2254 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 2348 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
| 2255 | skb_set_transport_header(skb, | 2349 | if (skb->encapsulation) |
| 2256 | skb_checksum_start_offset(skb)); | 2350 | skb_set_inner_transport_header(skb, |
| 2351 | skb_checksum_start_offset(skb)); | ||
| 2352 | else | ||
| 2353 | skb_set_transport_header(skb, | ||
| 2354 | skb_checksum_start_offset(skb)); | ||
| 2257 | if (!(features & NETIF_F_ALL_CSUM) && | 2355 | if (!(features & NETIF_F_ALL_CSUM) && |
| 2258 | skb_checksum_help(skb)) | 2356 | skb_checksum_help(skb)) |
| 2259 | goto out_kfree_skb; | 2357 | goto out_kfree_skb; |
| @@ -3446,9 +3544,9 @@ static void flush_backlog(void *arg) | |||
| 3446 | 3544 | ||
| 3447 | static int napi_gro_complete(struct sk_buff *skb) | 3545 | static int napi_gro_complete(struct sk_buff *skb) |
| 3448 | { | 3546 | { |
| 3449 | struct packet_type *ptype; | 3547 | struct packet_offload *ptype; |
| 3450 | __be16 type = skb->protocol; | 3548 | __be16 type = skb->protocol; |
| 3451 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 3549 | struct list_head *head = &offload_base; |
| 3452 | int err = -ENOENT; | 3550 | int err = -ENOENT; |
| 3453 | 3551 | ||
| 3454 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); | 3552 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); |
| @@ -3460,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
| 3460 | 3558 | ||
| 3461 | rcu_read_lock(); | 3559 | rcu_read_lock(); |
| 3462 | list_for_each_entry_rcu(ptype, head, list) { | 3560 | list_for_each_entry_rcu(ptype, head, list) { |
| 3463 | if (ptype->type != type || ptype->dev || !ptype->gro_complete) | 3561 | if (ptype->type != type || !ptype->callbacks.gro_complete) |
| 3464 | continue; | 3562 | continue; |
| 3465 | 3563 | ||
| 3466 | err = ptype->gro_complete(skb); | 3564 | err = ptype->callbacks.gro_complete(skb); |
| 3467 | break; | 3565 | break; |
| 3468 | } | 3566 | } |
| 3469 | rcu_read_unlock(); | 3567 | rcu_read_unlock(); |
| @@ -3507,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) | |||
| 3507 | } | 3605 | } |
| 3508 | EXPORT_SYMBOL(napi_gro_flush); | 3606 | EXPORT_SYMBOL(napi_gro_flush); |
| 3509 | 3607 | ||
| 3510 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3608 | static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) |
| 3609 | { | ||
| 3610 | struct sk_buff *p; | ||
| 3611 | unsigned int maclen = skb->dev->hard_header_len; | ||
| 3612 | |||
| 3613 | for (p = napi->gro_list; p; p = p->next) { | ||
| 3614 | unsigned long diffs; | ||
| 3615 | |||
| 3616 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | ||
| 3617 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
| 3618 | if (maclen == ETH_HLEN) | ||
| 3619 | diffs |= compare_ether_header(skb_mac_header(p), | ||
| 3620 | skb_gro_mac_header(skb)); | ||
| 3621 | else if (!diffs) | ||
| 3622 | diffs = memcmp(skb_mac_header(p), | ||
| 3623 | skb_gro_mac_header(skb), | ||
| 3624 | maclen); | ||
| 3625 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
| 3626 | NAPI_GRO_CB(p)->flush = 0; | ||
| 3627 | } | ||
| 3628 | } | ||
| 3629 | |||
| 3630 | static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
| 3511 | { | 3631 | { |
| 3512 | struct sk_buff **pp = NULL; | 3632 | struct sk_buff **pp = NULL; |
| 3513 | struct packet_type *ptype; | 3633 | struct packet_offload *ptype; |
| 3514 | __be16 type = skb->protocol; | 3634 | __be16 type = skb->protocol; |
| 3515 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 3635 | struct list_head *head = &offload_base; |
| 3516 | int same_flow; | 3636 | int same_flow; |
| 3517 | int mac_len; | 3637 | int mac_len; |
| 3518 | enum gro_result ret; | 3638 | enum gro_result ret; |
| @@ -3523,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
| 3523 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) | 3643 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
| 3524 | goto normal; | 3644 | goto normal; |
| 3525 | 3645 | ||
| 3646 | gro_list_prepare(napi, skb); | ||
| 3647 | |||
| 3526 | rcu_read_lock(); | 3648 | rcu_read_lock(); |
| 3527 | list_for_each_entry_rcu(ptype, head, list) { | 3649 | list_for_each_entry_rcu(ptype, head, list) { |
| 3528 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) | 3650 | if (ptype->type != type || !ptype->callbacks.gro_receive) |
| 3529 | continue; | 3651 | continue; |
| 3530 | 3652 | ||
| 3531 | skb_set_network_header(skb, skb_gro_offset(skb)); | 3653 | skb_set_network_header(skb, skb_gro_offset(skb)); |
| @@ -3535,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
| 3535 | NAPI_GRO_CB(skb)->flush = 0; | 3657 | NAPI_GRO_CB(skb)->flush = 0; |
| 3536 | NAPI_GRO_CB(skb)->free = 0; | 3658 | NAPI_GRO_CB(skb)->free = 0; |
| 3537 | 3659 | ||
| 3538 | pp = ptype->gro_receive(&napi->gro_list, skb); | 3660 | pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); |
| 3539 | break; | 3661 | break; |
| 3540 | } | 3662 | } |
| 3541 | rcu_read_unlock(); | 3663 | rcu_read_unlock(); |
| @@ -3598,34 +3720,9 @@ normal: | |||
| 3598 | ret = GRO_NORMAL; | 3720 | ret = GRO_NORMAL; |
| 3599 | goto pull; | 3721 | goto pull; |
| 3600 | } | 3722 | } |
| 3601 | EXPORT_SYMBOL(dev_gro_receive); | ||
| 3602 | |||
| 3603 | static inline gro_result_t | ||
| 3604 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
| 3605 | { | ||
| 3606 | struct sk_buff *p; | ||
| 3607 | unsigned int maclen = skb->dev->hard_header_len; | ||
| 3608 | |||
| 3609 | for (p = napi->gro_list; p; p = p->next) { | ||
| 3610 | unsigned long diffs; | ||
| 3611 | |||
| 3612 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | ||
| 3613 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
| 3614 | if (maclen == ETH_HLEN) | ||
| 3615 | diffs |= compare_ether_header(skb_mac_header(p), | ||
| 3616 | skb_gro_mac_header(skb)); | ||
| 3617 | else if (!diffs) | ||
| 3618 | diffs = memcmp(skb_mac_header(p), | ||
| 3619 | skb_gro_mac_header(skb), | ||
| 3620 | maclen); | ||
| 3621 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
| 3622 | NAPI_GRO_CB(p)->flush = 0; | ||
| 3623 | } | ||
| 3624 | 3723 | ||
| 3625 | return dev_gro_receive(napi, skb); | ||
| 3626 | } | ||
| 3627 | 3724 | ||
| 3628 | gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | 3725 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
| 3629 | { | 3726 | { |
| 3630 | switch (ret) { | 3727 | switch (ret) { |
| 3631 | case GRO_NORMAL: | 3728 | case GRO_NORMAL: |
| @@ -3651,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | |||
| 3651 | 3748 | ||
| 3652 | return ret; | 3749 | return ret; |
| 3653 | } | 3750 | } |
| 3654 | EXPORT_SYMBOL(napi_skb_finish); | ||
| 3655 | 3751 | ||
| 3656 | static void skb_gro_reset_offset(struct sk_buff *skb) | 3752 | static void skb_gro_reset_offset(struct sk_buff *skb) |
| 3657 | { | 3753 | { |
| @@ -3674,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
| 3674 | { | 3770 | { |
| 3675 | skb_gro_reset_offset(skb); | 3771 | skb_gro_reset_offset(skb); |
| 3676 | 3772 | ||
| 3677 | return napi_skb_finish(__napi_gro_receive(napi, skb), skb); | 3773 | return napi_skb_finish(dev_gro_receive(napi, skb), skb); |
| 3678 | } | 3774 | } |
| 3679 | EXPORT_SYMBOL(napi_gro_receive); | 3775 | EXPORT_SYMBOL(napi_gro_receive); |
| 3680 | 3776 | ||
| @@ -3703,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) | |||
| 3703 | } | 3799 | } |
| 3704 | EXPORT_SYMBOL(napi_get_frags); | 3800 | EXPORT_SYMBOL(napi_get_frags); |
| 3705 | 3801 | ||
| 3706 | gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, | 3802 | static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, |
| 3707 | gro_result_t ret) | 3803 | gro_result_t ret) |
| 3708 | { | 3804 | { |
| 3709 | switch (ret) { | 3805 | switch (ret) { |
| @@ -3728,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, | |||
| 3728 | 3824 | ||
| 3729 | return ret; | 3825 | return ret; |
| 3730 | } | 3826 | } |
| 3731 | EXPORT_SYMBOL(napi_frags_finish); | ||
| 3732 | 3827 | ||
| 3733 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) | 3828 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) |
| 3734 | { | 3829 | { |
| @@ -3773,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) | |||
| 3773 | if (!skb) | 3868 | if (!skb) |
| 3774 | return GRO_DROP; | 3869 | return GRO_DROP; |
| 3775 | 3870 | ||
| 3776 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); | 3871 | return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
| 3777 | } | 3872 | } |
| 3778 | EXPORT_SYMBOL(napi_gro_frags); | 3873 | EXPORT_SYMBOL(napi_gro_frags); |
| 3779 | 3874 | ||
| @@ -4075,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) | |||
| 4075 | { | 4170 | { |
| 4076 | struct net_device *dev; | 4171 | struct net_device *dev; |
| 4077 | struct ifreq ifr; | 4172 | struct ifreq ifr; |
| 4173 | unsigned seq; | ||
| 4078 | 4174 | ||
| 4079 | /* | 4175 | /* |
| 4080 | * Fetch the caller's info block. | 4176 | * Fetch the caller's info block. |
| @@ -4083,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) | |||
| 4083 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 4179 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
| 4084 | return -EFAULT; | 4180 | return -EFAULT; |
| 4085 | 4181 | ||
| 4182 | retry: | ||
| 4183 | seq = read_seqbegin(&devnet_rename_seq); | ||
| 4086 | rcu_read_lock(); | 4184 | rcu_read_lock(); |
| 4087 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); | 4185 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); |
| 4088 | if (!dev) { | 4186 | if (!dev) { |
| @@ -4092,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) | |||
| 4092 | 4190 | ||
| 4093 | strcpy(ifr.ifr_name, dev->name); | 4191 | strcpy(ifr.ifr_name, dev->name); |
| 4094 | rcu_read_unlock(); | 4192 | rcu_read_unlock(); |
| 4193 | if (read_seqretry(&devnet_rename_seq, seq)) | ||
| 4194 | goto retry; | ||
| 4095 | 4195 | ||
| 4096 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | 4196 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
| 4097 | return -EFAULT; | 4197 | return -EFAULT; |
| @@ -4884,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
| 4884 | else | 4984 | else |
| 4885 | dev->mtu = new_mtu; | 4985 | dev->mtu = new_mtu; |
| 4886 | 4986 | ||
| 4887 | if (!err && dev->flags & IFF_UP) | 4987 | if (!err) |
| 4888 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); | 4988 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); |
| 4889 | return err; | 4989 | return err; |
| 4890 | } | 4990 | } |
| @@ -5204,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 5204 | case SIOCGMIIPHY: | 5304 | case SIOCGMIIPHY: |
| 5205 | case SIOCGMIIREG: | 5305 | case SIOCGMIIREG: |
| 5206 | case SIOCSIFNAME: | 5306 | case SIOCSIFNAME: |
| 5207 | if (!capable(CAP_NET_ADMIN)) | 5307 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 5208 | return -EPERM; | 5308 | return -EPERM; |
| 5209 | dev_load(net, ifr.ifr_name); | 5309 | dev_load(net, ifr.ifr_name); |
| 5210 | rtnl_lock(); | 5310 | rtnl_lock(); |
| @@ -5225,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 5225 | * - require strict serialization. | 5325 | * - require strict serialization. |
| 5226 | * - do not return a value | 5326 | * - do not return a value |
| 5227 | */ | 5327 | */ |
| 5328 | case SIOCSIFMAP: | ||
| 5329 | case SIOCSIFTXQLEN: | ||
| 5330 | if (!capable(CAP_NET_ADMIN)) | ||
| 5331 | return -EPERM; | ||
| 5332 | /* fall through */ | ||
| 5333 | /* | ||
| 5334 | * These ioctl calls: | ||
| 5335 | * - require local superuser power. | ||
| 5336 | * - require strict serialization. | ||
| 5337 | * - do not return a value | ||
| 5338 | */ | ||
| 5228 | case SIOCSIFFLAGS: | 5339 | case SIOCSIFFLAGS: |
| 5229 | case SIOCSIFMETRIC: | 5340 | case SIOCSIFMETRIC: |
| 5230 | case SIOCSIFMTU: | 5341 | case SIOCSIFMTU: |
| 5231 | case SIOCSIFMAP: | ||
| 5232 | case SIOCSIFHWADDR: | 5342 | case SIOCSIFHWADDR: |
| 5233 | case SIOCSIFSLAVE: | 5343 | case SIOCSIFSLAVE: |
| 5234 | case SIOCADDMULTI: | 5344 | case SIOCADDMULTI: |
| 5235 | case SIOCDELMULTI: | 5345 | case SIOCDELMULTI: |
| 5236 | case SIOCSIFHWBROADCAST: | 5346 | case SIOCSIFHWBROADCAST: |
| 5237 | case SIOCSIFTXQLEN: | ||
| 5238 | case SIOCSMIIREG: | 5347 | case SIOCSMIIREG: |
| 5239 | case SIOCBONDENSLAVE: | 5348 | case SIOCBONDENSLAVE: |
| 5240 | case SIOCBONDRELEASE: | 5349 | case SIOCBONDRELEASE: |
| @@ -5243,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 5243 | case SIOCBRADDIF: | 5352 | case SIOCBRADDIF: |
| 5244 | case SIOCBRDELIF: | 5353 | case SIOCBRDELIF: |
| 5245 | case SIOCSHWTSTAMP: | 5354 | case SIOCSHWTSTAMP: |
| 5246 | if (!capable(CAP_NET_ADMIN)) | 5355 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 5247 | return -EPERM; | 5356 | return -EPERM; |
| 5248 | /* fall through */ | 5357 | /* fall through */ |
| 5249 | case SIOCBONDSLAVEINFOQUERY: | 5358 | case SIOCBONDSLAVEINFOQUERY: |
| @@ -6268,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 6268 | goto out; | 6377 | goto out; |
| 6269 | 6378 | ||
| 6270 | /* Ensure the device has been registrered */ | 6379 | /* Ensure the device has been registrered */ |
| 6271 | err = -EINVAL; | ||
| 6272 | if (dev->reg_state != NETREG_REGISTERED) | 6380 | if (dev->reg_state != NETREG_REGISTERED) |
| 6273 | goto out; | 6381 | goto out; |
| 6274 | 6382 | ||
| @@ -6323,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 6323 | dev_uc_flush(dev); | 6431 | dev_uc_flush(dev); |
| 6324 | dev_mc_flush(dev); | 6432 | dev_mc_flush(dev); |
| 6325 | 6433 | ||
| 6434 | /* Send a netdev-removed uevent to the old namespace */ | ||
| 6435 | kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); | ||
| 6436 | |||
| 6326 | /* Actually switch the network namespace */ | 6437 | /* Actually switch the network namespace */ |
| 6327 | dev_net_set(dev, net); | 6438 | dev_net_set(dev, net); |
| 6328 | 6439 | ||
| @@ -6334,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 6334 | dev->iflink = dev->ifindex; | 6445 | dev->iflink = dev->ifindex; |
| 6335 | } | 6446 | } |
| 6336 | 6447 | ||
| 6448 | /* Send a netdev-add uevent to the new namespace */ | ||
| 6449 | kobject_uevent(&dev->dev.kobj, KOBJ_ADD); | ||
| 6450 | |||
| 6337 | /* Fixup kobjects */ | 6451 | /* Fixup kobjects */ |
| 6338 | err = device_rename(&dev->dev, dev->name); | 6452 | err = device_rename(&dev->dev, dev->name); |
| 6339 | WARN_ON(err); | 6453 | WARN_ON(err); |
| @@ -6666,6 +6780,8 @@ static int __init net_dev_init(void) | |||
| 6666 | for (i = 0; i < PTYPE_HASH_SIZE; i++) | 6780 | for (i = 0; i < PTYPE_HASH_SIZE; i++) |
| 6667 | INIT_LIST_HEAD(&ptype_base[i]); | 6781 | INIT_LIST_HEAD(&ptype_base[i]); |
| 6668 | 6782 | ||
| 6783 | INIT_LIST_HEAD(&offload_base); | ||
| 6784 | |||
| 6669 | if (register_pernet_subsys(&netdev_net_ops)) | 6785 | if (register_pernet_subsys(&netdev_net_ops)) |
| 6670 | goto out; | 6786 | goto out; |
| 6671 | 6787 | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4d64cc2e3fa9..a8705432e4b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
| @@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
| 1460 | case ETHTOOL_GEEE: | 1460 | case ETHTOOL_GEEE: |
| 1461 | break; | 1461 | break; |
| 1462 | default: | 1462 | default: |
| 1463 | if (!capable(CAP_NET_ADMIN)) | 1463 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 1464 | return -EPERM; | 1464 | return -EPERM; |
| 1465 | } | 1465 | } |
| 1466 | 1466 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbcf..c23543cba132 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/reciprocal_div.h> | 39 | #include <linux/reciprocal_div.h> |
| 40 | #include <linux/ratelimit.h> | 40 | #include <linux/ratelimit.h> |
| 41 | #include <linux/seccomp.h> | 41 | #include <linux/seccomp.h> |
| 42 | #include <linux/if_vlan.h> | ||
| 42 | 43 | ||
| 43 | /* No hurry in this branch | 44 | /* No hurry in this branch |
| 44 | * | 45 | * |
| @@ -341,6 +342,12 @@ load_b: | |||
| 341 | case BPF_S_ANC_CPU: | 342 | case BPF_S_ANC_CPU: |
| 342 | A = raw_smp_processor_id(); | 343 | A = raw_smp_processor_id(); |
| 343 | continue; | 344 | continue; |
| 345 | case BPF_S_ANC_VLAN_TAG: | ||
| 346 | A = vlan_tx_tag_get(skb); | ||
| 347 | continue; | ||
| 348 | case BPF_S_ANC_VLAN_TAG_PRESENT: | ||
| 349 | A = !!vlan_tx_tag_present(skb); | ||
| 350 | continue; | ||
| 344 | case BPF_S_ANC_NLATTR: { | 351 | case BPF_S_ANC_NLATTR: { |
| 345 | struct nlattr *nla; | 352 | struct nlattr *nla; |
| 346 | 353 | ||
| @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) | |||
| 600 | ANCILLARY(RXHASH); | 607 | ANCILLARY(RXHASH); |
| 601 | ANCILLARY(CPU); | 608 | ANCILLARY(CPU); |
| 602 | ANCILLARY(ALU_XOR_X); | 609 | ANCILLARY(ALU_XOR_X); |
| 610 | ANCILLARY(VLAN_TAG); | ||
| 611 | ANCILLARY(VLAN_TAG_PRESENT); | ||
| 603 | } | 612 | } |
| 604 | } | 613 | } |
| 605 | ftest->code = code; | 614 | ftest->code = code; |
| @@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk) | |||
| 751 | return ret; | 760 | return ret; |
| 752 | } | 761 | } |
| 753 | EXPORT_SYMBOL_GPL(sk_detach_filter); | 762 | EXPORT_SYMBOL_GPL(sk_detach_filter); |
| 763 | |||
| 764 | static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) | ||
| 765 | { | ||
| 766 | static const u16 decodes[] = { | ||
| 767 | [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, | ||
| 768 | [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, | ||
| 769 | [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, | ||
| 770 | [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, | ||
| 771 | [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, | ||
| 772 | [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, | ||
| 773 | [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, | ||
| 774 | [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, | ||
| 775 | [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, | ||
| 776 | [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, | ||
| 777 | [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, | ||
| 778 | [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, | ||
| 779 | [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, | ||
| 780 | [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, | ||
| 781 | [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, | ||
| 782 | [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, | ||
| 783 | [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, | ||
| 784 | [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, | ||
| 785 | [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, | ||
| 786 | [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, | ||
| 787 | [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, | ||
| 788 | [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, | ||
| 789 | [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, | ||
| 790 | [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, | ||
| 791 | [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, | ||
| 792 | [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, | ||
| 793 | [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, | ||
| 794 | [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, | ||
| 795 | [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, | ||
| 796 | [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, | ||
| 797 | [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, | ||
| 798 | [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, | ||
| 799 | [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, | ||
| 800 | [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, | ||
| 801 | [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, | ||
| 802 | [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, | ||
| 803 | [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, | ||
| 804 | [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, | ||
| 805 | [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, | ||
| 806 | [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, | ||
| 807 | [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, | ||
| 808 | [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, | ||
| 809 | [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, | ||
| 810 | [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, | ||
| 811 | [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, | ||
| 812 | [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, | ||
| 813 | [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, | ||
| 814 | [BPF_S_RET_K] = BPF_RET|BPF_K, | ||
| 815 | [BPF_S_RET_A] = BPF_RET|BPF_A, | ||
| 816 | [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, | ||
| 817 | [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, | ||
| 818 | [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, | ||
| 819 | [BPF_S_ST] = BPF_ST, | ||
| 820 | [BPF_S_STX] = BPF_STX, | ||
| 821 | [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, | ||
| 822 | [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, | ||
| 823 | [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, | ||
| 824 | [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, | ||
| 825 | [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, | ||
| 826 | [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, | ||
| 827 | [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, | ||
| 828 | [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, | ||
| 829 | [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, | ||
| 830 | }; | ||
| 831 | u16 code; | ||
| 832 | |||
| 833 | code = filt->code; | ||
| 834 | |||
| 835 | to->code = decodes[code]; | ||
| 836 | to->jt = filt->jt; | ||
| 837 | to->jf = filt->jf; | ||
| 838 | |||
| 839 | if (code == BPF_S_ALU_DIV_K) { | ||
| 840 | /* | ||
| 841 | * When loaded this rule user gave us X, which was | ||
| 842 | * translated into R = r(X). Now we calculate the | ||
| 843 | * RR = r(R) and report it back. If next time this | ||
| 844 | * value is loaded and RRR = r(RR) is calculated | ||
| 845 | * then the R == RRR will be true. | ||
| 846 | * | ||
| 847 | * One exception. X == 1 translates into R == 0 and | ||
| 848 | * we can't calculate RR out of it with r(). | ||
| 849 | */ | ||
| 850 | |||
| 851 | if (filt->k == 0) | ||
| 852 | to->k = 1; | ||
| 853 | else | ||
| 854 | to->k = reciprocal_value(filt->k); | ||
| 855 | |||
| 856 | BUG_ON(reciprocal_value(to->k) != filt->k); | ||
| 857 | } else | ||
| 858 | to->k = filt->k; | ||
| 859 | } | ||
| 860 | |||
| 861 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) | ||
| 862 | { | ||
| 863 | struct sk_filter *filter; | ||
| 864 | int i, ret; | ||
| 865 | |||
| 866 | lock_sock(sk); | ||
| 867 | filter = rcu_dereference_protected(sk->sk_filter, | ||
| 868 | sock_owned_by_user(sk)); | ||
| 869 | ret = 0; | ||
| 870 | if (!filter) | ||
| 871 | goto out; | ||
| 872 | ret = filter->len; | ||
| 873 | if (!len) | ||
| 874 | goto out; | ||
| 875 | ret = -EINVAL; | ||
| 876 | if (len < filter->len) | ||
| 877 | goto out; | ||
| 878 | |||
| 879 | ret = -EFAULT; | ||
| 880 | for (i = 0; i < filter->len; i++) { | ||
| 881 | struct sock_filter fb; | ||
| 882 | |||
| 883 | sk_decode_filter(&filter->insns[i], &fb); | ||
| 884 | if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) | ||
| 885 | goto out; | ||
| 886 | } | ||
| 887 | |||
| 888 | ret = filter->len; | ||
| 889 | out: | ||
| 890 | release_sock(sk); | ||
| 891 | return ret; | ||
| 892 | } | ||
diff --git a/net/core/flow.c b/net/core/flow.c index e318c7e98042..b0901ee5a002 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
| @@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data) | |||
| 327 | static void flow_cache_flush_per_cpu(void *data) | 327 | static void flow_cache_flush_per_cpu(void *data) |
| 328 | { | 328 | { |
| 329 | struct flow_flush_info *info = data; | 329 | struct flow_flush_info *info = data; |
| 330 | int cpu; | ||
| 331 | struct tasklet_struct *tasklet; | 330 | struct tasklet_struct *tasklet; |
| 332 | 331 | ||
| 333 | cpu = smp_processor_id(); | 332 | tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); |
| 334 | tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; | ||
| 335 | tasklet->data = (unsigned long)info; | 333 | tasklet->data = (unsigned long)info; |
| 336 | tasklet_schedule(tasklet); | 334 | tasklet_schedule(tasklet); |
| 337 | } | 335 | } |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 22571488730a..c815f285e5ab 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
| @@ -1787,8 +1787,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) | |||
| 1787 | nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || | 1787 | nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || |
| 1788 | /* approximative value for deprecated QUEUE_LEN (in packets) */ | 1788 | /* approximative value for deprecated QUEUE_LEN (in packets) */ |
| 1789 | nla_put_u32(skb, NDTPA_QUEUE_LEN, | 1789 | nla_put_u32(skb, NDTPA_QUEUE_LEN, |
| 1790 | DIV_ROUND_UP(parms->queue_len_bytes, | 1790 | parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || |
| 1791 | SKB_TRUESIZE(ETH_FRAME_LEN))) || | ||
| 1792 | nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || | 1791 | nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || |
| 1793 | nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || | 1792 | nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || |
| 1794 | nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || | 1793 | nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || |
| @@ -2770,6 +2769,8 @@ EXPORT_SYMBOL(neigh_app_ns); | |||
| 2770 | #endif /* CONFIG_ARPD */ | 2769 | #endif /* CONFIG_ARPD */ |
| 2771 | 2770 | ||
| 2772 | #ifdef CONFIG_SYSCTL | 2771 | #ifdef CONFIG_SYSCTL |
| 2772 | static int zero; | ||
| 2773 | static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); | ||
| 2773 | 2774 | ||
| 2774 | static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, | 2775 | static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, |
| 2775 | size_t *lenp, loff_t *ppos) | 2776 | size_t *lenp, loff_t *ppos) |
| @@ -2777,9 +2778,13 @@ static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, | |||
| 2777 | int size, ret; | 2778 | int size, ret; |
| 2778 | ctl_table tmp = *ctl; | 2779 | ctl_table tmp = *ctl; |
| 2779 | 2780 | ||
| 2781 | tmp.extra1 = &zero; | ||
| 2782 | tmp.extra2 = &unres_qlen_max; | ||
| 2780 | tmp.data = &size; | 2783 | tmp.data = &size; |
| 2781 | size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); | 2784 | |
| 2782 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | 2785 | size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); |
| 2786 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | ||
| 2787 | |||
| 2783 | if (write && !ret) | 2788 | if (write && !ret) |
| 2784 | *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); | 2789 | *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); |
| 2785 | return ret; | 2790 | return ret; |
| @@ -2865,7 +2870,8 @@ static struct neigh_sysctl_table { | |||
| 2865 | .procname = "unres_qlen_bytes", | 2870 | .procname = "unres_qlen_bytes", |
| 2866 | .maxlen = sizeof(int), | 2871 | .maxlen = sizeof(int), |
| 2867 | .mode = 0644, | 2872 | .mode = 0644, |
| 2868 | .proc_handler = proc_dointvec, | 2873 | .extra1 = &zero, |
| 2874 | .proc_handler = proc_dointvec_minmax, | ||
| 2869 | }, | 2875 | }, |
| 2870 | [NEIGH_VAR_PROXY_QLEN] = { | 2876 | [NEIGH_VAR_PROXY_QLEN] = { |
| 2871 | .procname = "proxy_qlen", | 2877 | .procname = "proxy_qlen", |
| @@ -2987,6 +2993,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | |||
| 2987 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; | 2993 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; |
| 2988 | } | 2994 | } |
| 2989 | 2995 | ||
| 2996 | /* Don't export sysctls to unprivileged users */ | ||
| 2997 | if (neigh_parms_net(p)->user_ns != &init_user_ns) | ||
| 2998 | t->neigh_vars[0].procname = NULL; | ||
| 2999 | |||
| 2990 | snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", | 3000 | snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", |
| 2991 | p_name, dev_name_source); | 3001 | p_name, dev_name_source); |
| 2992 | t->sysctl_header = | 3002 | t->sysctl_header = |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 017a8bacfb27..334efd5d67a9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
| @@ -18,11 +18,9 @@ | |||
| 18 | #include <net/sock.h> | 18 | #include <net/sock.h> |
| 19 | #include <net/net_namespace.h> | 19 | #include <net/net_namespace.h> |
| 20 | #include <linux/rtnetlink.h> | 20 | #include <linux/rtnetlink.h> |
| 21 | #include <linux/wireless.h> | ||
| 22 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
| 23 | #include <linux/export.h> | 22 | #include <linux/export.h> |
| 24 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
| 25 | #include <net/wext.h> | ||
| 26 | 24 | ||
| 27 | #include "net-sysfs.h" | 25 | #include "net-sysfs.h" |
| 28 | 26 | ||
| @@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, | |||
| 73 | const char *buf, size_t len, | 71 | const char *buf, size_t len, |
| 74 | int (*set)(struct net_device *, unsigned long)) | 72 | int (*set)(struct net_device *, unsigned long)) |
| 75 | { | 73 | { |
| 76 | struct net_device *net = to_net_dev(dev); | 74 | struct net_device *netdev = to_net_dev(dev); |
| 75 | struct net *net = dev_net(netdev); | ||
| 77 | unsigned long new; | 76 | unsigned long new; |
| 78 | int ret = -EINVAL; | 77 | int ret = -EINVAL; |
| 79 | 78 | ||
| 80 | if (!capable(CAP_NET_ADMIN)) | 79 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 81 | return -EPERM; | 80 | return -EPERM; |
| 82 | 81 | ||
| 83 | ret = kstrtoul(buf, 0, &new); | 82 | ret = kstrtoul(buf, 0, &new); |
| @@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, | |||
| 87 | if (!rtnl_trylock()) | 86 | if (!rtnl_trylock()) |
| 88 | return restart_syscall(); | 87 | return restart_syscall(); |
| 89 | 88 | ||
| 90 | if (dev_isalive(net)) { | 89 | if (dev_isalive(netdev)) { |
| 91 | if ((ret = (*set)(net, new)) == 0) | 90 | if ((ret = (*set)(netdev, new)) == 0) |
| 92 | ret = len; | 91 | ret = len; |
| 93 | } | 92 | } |
| 94 | rtnl_unlock(); | 93 | rtnl_unlock(); |
| @@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev, | |||
| 264 | struct device_attribute *attr, | 263 | struct device_attribute *attr, |
| 265 | const char *buf, size_t len) | 264 | const char *buf, size_t len) |
| 266 | { | 265 | { |
| 266 | if (!capable(CAP_NET_ADMIN)) | ||
| 267 | return -EPERM; | ||
| 268 | |||
| 267 | return netdev_store(dev, attr, buf, len, change_tx_queue_len); | 269 | return netdev_store(dev, attr, buf, len, change_tx_queue_len); |
| 268 | } | 270 | } |
| 269 | 271 | ||
| @@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, | |||
| 271 | const char *buf, size_t len) | 273 | const char *buf, size_t len) |
| 272 | { | 274 | { |
| 273 | struct net_device *netdev = to_net_dev(dev); | 275 | struct net_device *netdev = to_net_dev(dev); |
| 276 | struct net *net = dev_net(netdev); | ||
| 274 | size_t count = len; | 277 | size_t count = len; |
| 275 | ssize_t ret; | 278 | ssize_t ret; |
| 276 | 279 | ||
| 277 | if (!capable(CAP_NET_ADMIN)) | 280 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 278 | return -EPERM; | 281 | return -EPERM; |
| 279 | 282 | ||
| 280 | /* ignore trailing newline */ | 283 | /* ignore trailing newline */ |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 42f1e1c7514f..8acce01b6dab 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
| 14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
| 15 | #include <linux/export.h> | 15 | #include <linux/export.h> |
| 16 | #include <linux/user_namespace.h> | ||
| 16 | #include <net/net_namespace.h> | 17 | #include <net/net_namespace.h> |
| 17 | #include <net/netns/generic.h> | 18 | #include <net/netns/generic.h> |
| 18 | 19 | ||
| @@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops, | |||
| 145 | /* | 146 | /* |
| 146 | * setup_net runs the initializers for the network namespace object. | 147 | * setup_net runs the initializers for the network namespace object. |
| 147 | */ | 148 | */ |
| 148 | static __net_init int setup_net(struct net *net) | 149 | static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) |
| 149 | { | 150 | { |
| 150 | /* Must be called with net_mutex held */ | 151 | /* Must be called with net_mutex held */ |
| 151 | const struct pernet_operations *ops, *saved_ops; | 152 | const struct pernet_operations *ops, *saved_ops; |
| @@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net) | |||
| 155 | atomic_set(&net->count, 1); | 156 | atomic_set(&net->count, 1); |
| 156 | atomic_set(&net->passive, 1); | 157 | atomic_set(&net->passive, 1); |
| 157 | net->dev_base_seq = 1; | 158 | net->dev_base_seq = 1; |
| 159 | net->user_ns = user_ns; | ||
| 158 | 160 | ||
| 159 | #ifdef NETNS_REFCNT_DEBUG | 161 | #ifdef NETNS_REFCNT_DEBUG |
| 160 | atomic_set(&net->use_count, 0); | 162 | atomic_set(&net->use_count, 0); |
| @@ -232,7 +234,8 @@ void net_drop_ns(void *p) | |||
| 232 | net_free(ns); | 234 | net_free(ns); |
| 233 | } | 235 | } |
| 234 | 236 | ||
| 235 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | 237 | struct net *copy_net_ns(unsigned long flags, |
| 238 | struct user_namespace *user_ns, struct net *old_net) | ||
| 236 | { | 239 | { |
| 237 | struct net *net; | 240 | struct net *net; |
| 238 | int rv; | 241 | int rv; |
| @@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
| 243 | net = net_alloc(); | 246 | net = net_alloc(); |
| 244 | if (!net) | 247 | if (!net) |
| 245 | return ERR_PTR(-ENOMEM); | 248 | return ERR_PTR(-ENOMEM); |
| 249 | |||
| 250 | get_user_ns(user_ns); | ||
| 251 | |||
| 246 | mutex_lock(&net_mutex); | 252 | mutex_lock(&net_mutex); |
| 247 | rv = setup_net(net); | 253 | rv = setup_net(net, user_ns); |
| 248 | if (rv == 0) { | 254 | if (rv == 0) { |
| 249 | rtnl_lock(); | 255 | rtnl_lock(); |
| 250 | list_add_tail_rcu(&net->list, &net_namespace_list); | 256 | list_add_tail_rcu(&net->list, &net_namespace_list); |
| @@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
| 252 | } | 258 | } |
| 253 | mutex_unlock(&net_mutex); | 259 | mutex_unlock(&net_mutex); |
| 254 | if (rv < 0) { | 260 | if (rv < 0) { |
| 261 | put_user_ns(user_ns); | ||
| 255 | net_drop_ns(net); | 262 | net_drop_ns(net); |
| 256 | return ERR_PTR(rv); | 263 | return ERR_PTR(rv); |
| 257 | } | 264 | } |
| @@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work) | |||
| 308 | /* Finally it is safe to free my network namespace structure */ | 315 | /* Finally it is safe to free my network namespace structure */ |
| 309 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { | 316 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
| 310 | list_del_init(&net->exit_list); | 317 | list_del_init(&net->exit_list); |
| 318 | put_user_ns(net->user_ns); | ||
| 311 | net_drop_ns(net); | 319 | net_drop_ns(net); |
| 312 | } | 320 | } |
| 313 | } | 321 | } |
| @@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd) | |||
| 347 | } | 355 | } |
| 348 | 356 | ||
| 349 | #else | 357 | #else |
| 350 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | ||
| 351 | { | ||
| 352 | if (flags & CLONE_NEWNET) | ||
| 353 | return ERR_PTR(-EINVAL); | ||
| 354 | return old_net; | ||
| 355 | } | ||
| 356 | |||
| 357 | struct net *get_net_ns_by_fd(int fd) | 358 | struct net *get_net_ns_by_fd(int fd) |
| 358 | { | 359 | { |
| 359 | return ERR_PTR(-EINVAL); | 360 | return ERR_PTR(-EINVAL); |
| @@ -380,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid) | |||
| 380 | } | 381 | } |
| 381 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); | 382 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); |
| 382 | 383 | ||
| 384 | static __net_init int net_ns_net_init(struct net *net) | ||
| 385 | { | ||
| 386 | return proc_alloc_inum(&net->proc_inum); | ||
| 387 | } | ||
| 388 | |||
| 389 | static __net_exit void net_ns_net_exit(struct net *net) | ||
| 390 | { | ||
| 391 | proc_free_inum(net->proc_inum); | ||
| 392 | } | ||
| 393 | |||
| 394 | static struct pernet_operations __net_initdata net_ns_ops = { | ||
| 395 | .init = net_ns_net_init, | ||
| 396 | .exit = net_ns_net_exit, | ||
| 397 | }; | ||
| 398 | |||
| 383 | static int __init net_ns_init(void) | 399 | static int __init net_ns_init(void) |
| 384 | { | 400 | { |
| 385 | struct net_generic *ng; | 401 | struct net_generic *ng; |
| @@ -402,7 +418,7 @@ static int __init net_ns_init(void) | |||
| 402 | rcu_assign_pointer(init_net.gen, ng); | 418 | rcu_assign_pointer(init_net.gen, ng); |
| 403 | 419 | ||
| 404 | mutex_lock(&net_mutex); | 420 | mutex_lock(&net_mutex); |
| 405 | if (setup_net(&init_net)) | 421 | if (setup_net(&init_net, &init_user_ns)) |
| 406 | panic("Could not setup the initial network namespace"); | 422 | panic("Could not setup the initial network namespace"); |
| 407 | 423 | ||
| 408 | rtnl_lock(); | 424 | rtnl_lock(); |
| @@ -411,6 +427,8 @@ static int __init net_ns_init(void) | |||
| 411 | 427 | ||
| 412 | mutex_unlock(&net_mutex); | 428 | mutex_unlock(&net_mutex); |
| 413 | 429 | ||
| 430 | register_pernet_subsys(&net_ns_ops); | ||
| 431 | |||
| 414 | return 0; | 432 | return 0; |
| 415 | } | 433 | } |
| 416 | 434 | ||
| @@ -629,16 +647,29 @@ static void netns_put(void *ns) | |||
| 629 | 647 | ||
| 630 | static int netns_install(struct nsproxy *nsproxy, void *ns) | 648 | static int netns_install(struct nsproxy *nsproxy, void *ns) |
| 631 | { | 649 | { |
| 650 | struct net *net = ns; | ||
| 651 | |||
| 652 | if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || | ||
| 653 | !nsown_capable(CAP_SYS_ADMIN)) | ||
| 654 | return -EPERM; | ||
| 655 | |||
| 632 | put_net(nsproxy->net_ns); | 656 | put_net(nsproxy->net_ns); |
| 633 | nsproxy->net_ns = get_net(ns); | 657 | nsproxy->net_ns = get_net(net); |
| 634 | return 0; | 658 | return 0; |
| 635 | } | 659 | } |
| 636 | 660 | ||
| 661 | static unsigned int netns_inum(void *ns) | ||
| 662 | { | ||
| 663 | struct net *net = ns; | ||
| 664 | return net->proc_inum; | ||
| 665 | } | ||
| 666 | |||
| 637 | const struct proc_ns_operations netns_operations = { | 667 | const struct proc_ns_operations netns_operations = { |
| 638 | .name = "net", | 668 | .name = "net", |
| 639 | .type = CLONE_NEWNET, | 669 | .type = CLONE_NEWNET, |
| 640 | .get = netns_get, | 670 | .get = netns_get, |
| 641 | .put = netns_put, | 671 | .put = netns_put, |
| 642 | .install = netns_install, | 672 | .install = netns_install, |
| 673 | .inum = netns_inum, | ||
| 643 | }; | 674 | }; |
| 644 | #endif | 675 | #endif |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 77a0388fc3be..3151acf5ec13 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
| @@ -674,7 +674,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
| 674 | if ((delim = strchr(cur, '@')) == NULL) | 674 | if ((delim = strchr(cur, '@')) == NULL) |
| 675 | goto parse_failed; | 675 | goto parse_failed; |
| 676 | *delim = 0; | 676 | *delim = 0; |
| 677 | np->local_port = simple_strtol(cur, NULL, 10); | 677 | if (kstrtou16(cur, 10, &np->local_port)) |
| 678 | goto parse_failed; | ||
| 678 | cur = delim; | 679 | cur = delim; |
| 679 | } | 680 | } |
| 680 | cur++; | 681 | cur++; |
| @@ -705,7 +706,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
| 705 | *delim = 0; | 706 | *delim = 0; |
| 706 | if (*cur == ' ' || *cur == '\t') | 707 | if (*cur == ' ' || *cur == '\t') |
| 707 | np_info(np, "warning: whitespace is not allowed\n"); | 708 | np_info(np, "warning: whitespace is not allowed\n"); |
| 708 | np->remote_port = simple_strtol(cur, NULL, 10); | 709 | if (kstrtou16(cur, 10, &np->remote_port)) |
| 710 | goto parse_failed; | ||
| 709 | cur = delim; | 711 | cur = delim; |
| 710 | } | 712 | } |
| 711 | cur++; | 713 | cur++; |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a36035f..5e67defe2cb0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
| @@ -27,11 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | #include <linux/fdtable.h> | 28 | #include <linux/fdtable.h> |
| 29 | 29 | ||
| 30 | #define PRIOIDX_SZ 128 | 30 | #define PRIOMAP_MIN_SZ 128 |
| 31 | |||
| 32 | static unsigned long prioidx_map[PRIOIDX_SZ]; | ||
| 33 | static DEFINE_SPINLOCK(prioidx_map_lock); | ||
| 34 | static atomic_t max_prioidx = ATOMIC_INIT(0); | ||
| 35 | 31 | ||
| 36 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | 32 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) |
| 37 | { | 33 | { |
| @@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr | |||
| 39 | struct cgroup_netprio_state, css); | 35 | struct cgroup_netprio_state, css); |
| 40 | } | 36 | } |
| 41 | 37 | ||
| 42 | static int get_prioidx(u32 *prio) | 38 | /* |
| 43 | { | 39 | * Extend @dev->priomap so that it's large enough to accomodate |
| 44 | unsigned long flags; | 40 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
| 45 | u32 prioidx; | 41 | * return. Must be called under rtnl lock. |
| 46 | 42 | */ | |
| 47 | spin_lock_irqsave(&prioidx_map_lock, flags); | 43 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) |
| 48 | prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); | ||
| 49 | if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { | ||
| 50 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
| 51 | return -ENOSPC; | ||
| 52 | } | ||
| 53 | set_bit(prioidx, prioidx_map); | ||
| 54 | if (atomic_read(&max_prioidx) < prioidx) | ||
| 55 | atomic_set(&max_prioidx, prioidx); | ||
| 56 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
| 57 | *prio = prioidx; | ||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | static void put_prioidx(u32 idx) | ||
| 62 | { | 44 | { |
| 63 | unsigned long flags; | 45 | struct netprio_map *old, *new; |
| 64 | 46 | size_t new_sz, new_len; | |
| 65 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
| 66 | clear_bit(idx, prioidx_map); | ||
| 67 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
| 68 | } | ||
| 69 | 47 | ||
| 70 | static int extend_netdev_table(struct net_device *dev, u32 new_len) | 48 | /* is the existing priomap large enough? */ |
| 71 | { | 49 | old = rtnl_dereference(dev->priomap); |
| 72 | size_t new_size = sizeof(struct netprio_map) + | 50 | if (old && old->priomap_len > target_idx) |
| 73 | ((sizeof(u32) * new_len)); | 51 | return 0; |
| 74 | struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); | ||
| 75 | struct netprio_map *old_priomap; | ||
| 76 | 52 | ||
| 77 | old_priomap = rtnl_dereference(dev->priomap); | 53 | /* |
| 54 | * Determine the new size. Let's keep it power-of-two. We start | ||
| 55 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | ||
| 56 | * accommodate @target_idx. | ||
| 57 | */ | ||
| 58 | new_sz = PRIOMAP_MIN_SZ; | ||
| 59 | while (true) { | ||
| 60 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | ||
| 61 | sizeof(new->priomap[0]); | ||
| 62 | if (new_len > target_idx) | ||
| 63 | break; | ||
| 64 | new_sz *= 2; | ||
| 65 | /* overflowed? */ | ||
| 66 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | ||
| 67 | return -ENOSPC; | ||
| 68 | } | ||
| 78 | 69 | ||
| 79 | if (!new_priomap) { | 70 | /* allocate & copy */ |
| 71 | new = kzalloc(new_sz, GFP_KERNEL); | ||
| 72 | if (!new) { | ||
| 80 | pr_warn("Unable to alloc new priomap!\n"); | 73 | pr_warn("Unable to alloc new priomap!\n"); |
| 81 | return -ENOMEM; | 74 | return -ENOMEM; |
| 82 | } | 75 | } |
| 83 | 76 | ||
| 84 | if (old_priomap) | 77 | if (old) |
| 85 | memcpy(new_priomap->priomap, old_priomap->priomap, | 78 | memcpy(new->priomap, old->priomap, |
| 86 | old_priomap->priomap_len * | 79 | old->priomap_len * sizeof(old->priomap[0])); |
| 87 | sizeof(old_priomap->priomap[0])); | ||
| 88 | 80 | ||
| 89 | new_priomap->priomap_len = new_len; | 81 | new->priomap_len = new_len; |
| 90 | 82 | ||
| 91 | rcu_assign_pointer(dev->priomap, new_priomap); | 83 | /* install the new priomap */ |
| 92 | if (old_priomap) | 84 | rcu_assign_pointer(dev->priomap, new); |
| 93 | kfree_rcu(old_priomap, rcu); | 85 | if (old) |
| 86 | kfree_rcu(old, rcu); | ||
| 94 | return 0; | 87 | return 0; |
| 95 | } | 88 | } |
| 96 | 89 | ||
| 97 | static int write_update_netdev_table(struct net_device *dev) | 90 | /** |
| 91 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | ||
| 92 | * @cgrp: cgroup part of the target pair | ||
| 93 | * @dev: net_device part of the target pair | ||
| 94 | * | ||
| 95 | * Should be called under RCU read or rtnl lock. | ||
| 96 | */ | ||
| 97 | static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) | ||
| 98 | { | ||
| 99 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | ||
| 100 | |||
| 101 | if (map && cgrp->id < map->priomap_len) | ||
| 102 | return map->priomap[cgrp->id]; | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | /** | ||
| 107 | * netprio_set_prio - set netprio on a cgroup-net_device pair | ||
| 108 | * @cgrp: cgroup part of the target pair | ||
| 109 | * @dev: net_device part of the target pair | ||
| 110 | * @prio: prio to set | ||
| 111 | * | ||
| 112 | * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl | ||
| 113 | * lock and may fail under memory pressure for non-zero @prio. | ||
| 114 | */ | ||
| 115 | static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, | ||
| 116 | u32 prio) | ||
| 98 | { | 117 | { |
| 99 | int ret = 0; | ||
| 100 | u32 max_len; | ||
| 101 | struct netprio_map *map; | 118 | struct netprio_map *map; |
| 119 | int ret; | ||
| 102 | 120 | ||
| 103 | max_len = atomic_read(&max_prioidx) + 1; | 121 | /* avoid extending priomap for zero writes */ |
| 104 | map = rtnl_dereference(dev->priomap); | 122 | map = rtnl_dereference(dev->priomap); |
| 105 | if (!map || map->priomap_len < max_len) | 123 | if (!prio && (!map || map->priomap_len <= cgrp->id)) |
| 106 | ret = extend_netdev_table(dev, max_len); | 124 | return 0; |
| 107 | 125 | ||
| 108 | return ret; | 126 | ret = extend_netdev_table(dev, cgrp->id); |
| 127 | if (ret) | ||
| 128 | return ret; | ||
| 129 | |||
| 130 | map = rtnl_dereference(dev->priomap); | ||
| 131 | map->priomap[cgrp->id] = prio; | ||
| 132 | return 0; | ||
| 109 | } | 133 | } |
| 110 | 134 | ||
| 111 | static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) | 135 | static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) |
| 112 | { | 136 | { |
| 113 | struct cgroup_netprio_state *cs; | 137 | struct cgroup_netprio_state *cs; |
| 114 | int ret = -EINVAL; | ||
| 115 | 138 | ||
| 116 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 139 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
| 117 | if (!cs) | 140 | if (!cs) |
| 118 | return ERR_PTR(-ENOMEM); | 141 | return ERR_PTR(-ENOMEM); |
| 119 | 142 | ||
| 120 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) | ||
| 121 | goto out; | ||
| 122 | |||
| 123 | ret = get_prioidx(&cs->prioidx); | ||
| 124 | if (ret < 0) { | ||
| 125 | pr_warn("No space in priority index array\n"); | ||
| 126 | goto out; | ||
| 127 | } | ||
| 128 | |||
| 129 | return &cs->css; | 143 | return &cs->css; |
| 130 | out: | ||
| 131 | kfree(cs); | ||
| 132 | return ERR_PTR(ret); | ||
| 133 | } | 144 | } |
| 134 | 145 | ||
| 135 | static void cgrp_destroy(struct cgroup *cgrp) | 146 | static int cgrp_css_online(struct cgroup *cgrp) |
| 136 | { | 147 | { |
| 137 | struct cgroup_netprio_state *cs; | 148 | struct cgroup *parent = cgrp->parent; |
| 138 | struct net_device *dev; | 149 | struct net_device *dev; |
| 139 | struct netprio_map *map; | 150 | int ret = 0; |
| 151 | |||
| 152 | if (!parent) | ||
| 153 | return 0; | ||
| 140 | 154 | ||
| 141 | cs = cgrp_netprio_state(cgrp); | ||
| 142 | rtnl_lock(); | 155 | rtnl_lock(); |
| 156 | /* | ||
| 157 | * Inherit prios from the parent. As all prios are set during | ||
| 158 | * onlining, there is no need to clear them on offline. | ||
| 159 | */ | ||
| 143 | for_each_netdev(&init_net, dev) { | 160 | for_each_netdev(&init_net, dev) { |
| 144 | map = rtnl_dereference(dev->priomap); | 161 | u32 prio = netprio_prio(parent, dev); |
| 145 | if (map && cs->prioidx < map->priomap_len) | 162 | |
| 146 | map->priomap[cs->prioidx] = 0; | 163 | ret = netprio_set_prio(cgrp, dev, prio); |
| 164 | if (ret) | ||
| 165 | break; | ||
| 147 | } | 166 | } |
| 148 | rtnl_unlock(); | 167 | rtnl_unlock(); |
| 149 | put_prioidx(cs->prioidx); | 168 | return ret; |
| 150 | kfree(cs); | 169 | } |
| 170 | |||
| 171 | static void cgrp_css_free(struct cgroup *cgrp) | ||
| 172 | { | ||
| 173 | kfree(cgrp_netprio_state(cgrp)); | ||
| 151 | } | 174 | } |
| 152 | 175 | ||
| 153 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | 176 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) |
| 154 | { | 177 | { |
| 155 | return (u64)cgrp_netprio_state(cgrp)->prioidx; | 178 | return cgrp->id; |
| 156 | } | 179 | } |
| 157 | 180 | ||
| 158 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | 181 | static int read_priomap(struct cgroup *cont, struct cftype *cft, |
| 159 | struct cgroup_map_cb *cb) | 182 | struct cgroup_map_cb *cb) |
| 160 | { | 183 | { |
| 161 | struct net_device *dev; | 184 | struct net_device *dev; |
| 162 | u32 prioidx = cgrp_netprio_state(cont)->prioidx; | ||
| 163 | u32 priority; | ||
| 164 | struct netprio_map *map; | ||
| 165 | 185 | ||
| 166 | rcu_read_lock(); | 186 | rcu_read_lock(); |
| 167 | for_each_netdev_rcu(&init_net, dev) { | 187 | for_each_netdev_rcu(&init_net, dev) |
| 168 | map = rcu_dereference(dev->priomap); | 188 | cb->fill(cb, dev->name, netprio_prio(cont, dev)); |
| 169 | priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; | ||
| 170 | cb->fill(cb, dev->name, priority); | ||
| 171 | } | ||
| 172 | rcu_read_unlock(); | 189 | rcu_read_unlock(); |
| 173 | return 0; | 190 | return 0; |
| 174 | } | 191 | } |
| @@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, | |||
| 176 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | 193 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, |
| 177 | const char *buffer) | 194 | const char *buffer) |
| 178 | { | 195 | { |
| 179 | char *devname = kstrdup(buffer, GFP_KERNEL); | 196 | char devname[IFNAMSIZ + 1]; |
| 180 | int ret = -EINVAL; | ||
| 181 | u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; | ||
| 182 | unsigned long priority; | ||
| 183 | char *priostr; | ||
| 184 | struct net_device *dev; | 197 | struct net_device *dev; |
| 185 | struct netprio_map *map; | 198 | u32 prio; |
| 186 | 199 | int ret; | |
| 187 | if (!devname) | ||
| 188 | return -ENOMEM; | ||
| 189 | |||
| 190 | /* | ||
| 191 | * Minimally sized valid priomap string | ||
| 192 | */ | ||
| 193 | if (strlen(devname) < 3) | ||
| 194 | goto out_free_devname; | ||
| 195 | |||
| 196 | priostr = strstr(devname, " "); | ||
| 197 | if (!priostr) | ||
| 198 | goto out_free_devname; | ||
| 199 | |||
| 200 | /* | ||
| 201 | *Separate the devname from the associated priority | ||
| 202 | *and advance the priostr pointer to the priority value | ||
| 203 | */ | ||
| 204 | *priostr = '\0'; | ||
| 205 | priostr++; | ||
| 206 | |||
| 207 | /* | ||
| 208 | * If the priostr points to NULL, we're at the end of the passed | ||
| 209 | * in string, and its not a valid write | ||
| 210 | */ | ||
| 211 | if (*priostr == '\0') | ||
| 212 | goto out_free_devname; | ||
| 213 | |||
| 214 | ret = kstrtoul(priostr, 10, &priority); | ||
| 215 | if (ret < 0) | ||
| 216 | goto out_free_devname; | ||
| 217 | 200 | ||
| 218 | ret = -ENODEV; | 201 | if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) |
| 202 | return -EINVAL; | ||
| 219 | 203 | ||
| 220 | dev = dev_get_by_name(&init_net, devname); | 204 | dev = dev_get_by_name(&init_net, devname); |
| 221 | if (!dev) | 205 | if (!dev) |
| 222 | goto out_free_devname; | 206 | return -ENODEV; |
| 223 | 207 | ||
| 224 | rtnl_lock(); | 208 | rtnl_lock(); |
| 225 | ret = write_update_netdev_table(dev); | ||
| 226 | if (ret < 0) | ||
| 227 | goto out_put_dev; | ||
| 228 | 209 | ||
| 229 | map = rtnl_dereference(dev->priomap); | 210 | ret = netprio_set_prio(cgrp, dev, prio); |
| 230 | if (map) | ||
| 231 | map->priomap[prioidx] = priority; | ||
| 232 | 211 | ||
| 233 | out_put_dev: | ||
| 234 | rtnl_unlock(); | 212 | rtnl_unlock(); |
| 235 | dev_put(dev); | 213 | dev_put(dev); |
| 236 | |||
| 237 | out_free_devname: | ||
| 238 | kfree(devname); | ||
| 239 | return ret; | 214 | return ret; |
| 240 | } | 215 | } |
| 241 | 216 | ||
| @@ -248,7 +223,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n) | |||
| 248 | return 0; | 223 | return 0; |
| 249 | } | 224 | } |
| 250 | 225 | ||
| 251 | void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 226 | static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
| 252 | { | 227 | { |
| 253 | struct task_struct *p; | 228 | struct task_struct *p; |
| 254 | void *v; | 229 | void *v; |
| @@ -276,22 +251,13 @@ static struct cftype ss_files[] = { | |||
| 276 | 251 | ||
| 277 | struct cgroup_subsys net_prio_subsys = { | 252 | struct cgroup_subsys net_prio_subsys = { |
| 278 | .name = "net_prio", | 253 | .name = "net_prio", |
| 279 | .create = cgrp_create, | 254 | .css_alloc = cgrp_css_alloc, |
| 280 | .destroy = cgrp_destroy, | 255 | .css_online = cgrp_css_online, |
| 256 | .css_free = cgrp_css_free, | ||
| 281 | .attach = net_prio_attach, | 257 | .attach = net_prio_attach, |
| 282 | .subsys_id = net_prio_subsys_id, | 258 | .subsys_id = net_prio_subsys_id, |
| 283 | .base_cftypes = ss_files, | 259 | .base_cftypes = ss_files, |
| 284 | .module = THIS_MODULE, | 260 | .module = THIS_MODULE, |
| 285 | |||
| 286 | /* | ||
| 287 | * net_prio has artificial limit on the number of cgroups and | ||
| 288 | * disallows nesting making it impossible to co-mount it with other | ||
| 289 | * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ | ||
| 290 | * limit and properly nest configuration such that children follow | ||
| 291 | * their parents' configurations by default and are allowed to | ||
| 292 | * override and remove the following. | ||
| 293 | */ | ||
| 294 | .broken_hierarchy = true, | ||
| 295 | }; | 261 | }; |
| 296 | 262 | ||
| 297 | static int netprio_device_event(struct notifier_block *unused, | 263 | static int netprio_device_event(struct notifier_block *unused, |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1dc14c2aac4..b29dacf900f9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
| @@ -419,20 +419,6 @@ struct pktgen_thread { | |||
| 419 | #define REMOVE 1 | 419 | #define REMOVE 1 |
| 420 | #define FIND 0 | 420 | #define FIND 0 |
| 421 | 421 | ||
| 422 | static inline ktime_t ktime_now(void) | ||
| 423 | { | ||
| 424 | struct timespec ts; | ||
| 425 | ktime_get_ts(&ts); | ||
| 426 | |||
| 427 | return timespec_to_ktime(ts); | ||
| 428 | } | ||
| 429 | |||
| 430 | /* This works even if 32 bit because of careful byte order choice */ | ||
| 431 | static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) | ||
| 432 | { | ||
| 433 | return cmp1.tv64 < cmp2.tv64; | ||
| 434 | } | ||
| 435 | |||
| 436 | static const char version[] = | 422 | static const char version[] = |
| 437 | "Packet Generator for packet performance testing. " | 423 | "Packet Generator for packet performance testing. " |
| 438 | "Version: " VERSION "\n"; | 424 | "Version: " VERSION "\n"; |
| @@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
| 675 | seq_puts(seq, "\n"); | 661 | seq_puts(seq, "\n"); |
| 676 | 662 | ||
| 677 | /* not really stopped, more like last-running-at */ | 663 | /* not really stopped, more like last-running-at */ |
| 678 | stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; | 664 | stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at; |
| 679 | idle = pkt_dev->idle_acc; | 665 | idle = pkt_dev->idle_acc; |
| 680 | do_div(idle, NSEC_PER_USEC); | 666 | do_div(idle, NSEC_PER_USEC); |
| 681 | 667 | ||
| @@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) | |||
| 2141 | return; | 2127 | return; |
| 2142 | } | 2128 | } |
| 2143 | 2129 | ||
| 2144 | start_time = ktime_now(); | 2130 | start_time = ktime_get(); |
| 2145 | if (remaining < 100000) { | 2131 | if (remaining < 100000) { |
| 2146 | /* for small delays (<100us), just loop until limit is reached */ | 2132 | /* for small delays (<100us), just loop until limit is reached */ |
| 2147 | do { | 2133 | do { |
| 2148 | end_time = ktime_now(); | 2134 | end_time = ktime_get(); |
| 2149 | } while (ktime_lt(end_time, spin_until)); | 2135 | } while (ktime_compare(end_time, spin_until) < 0); |
| 2150 | } else { | 2136 | } else { |
| 2151 | /* see do_nanosleep */ | 2137 | /* see do_nanosleep */ |
| 2152 | hrtimer_init_sleeper(&t, current); | 2138 | hrtimer_init_sleeper(&t, current); |
| @@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) | |||
| 2162 | hrtimer_cancel(&t.timer); | 2148 | hrtimer_cancel(&t.timer); |
| 2163 | } while (t.task && pkt_dev->running && !signal_pending(current)); | 2149 | } while (t.task && pkt_dev->running && !signal_pending(current)); |
| 2164 | __set_current_state(TASK_RUNNING); | 2150 | __set_current_state(TASK_RUNNING); |
| 2165 | end_time = ktime_now(); | 2151 | end_time = ktime_get(); |
| 2166 | } | 2152 | } |
| 2167 | 2153 | ||
| 2168 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); | 2154 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); |
| @@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) | |||
| 2427 | } | 2413 | } |
| 2428 | } else { /* IPV6 * */ | 2414 | } else { /* IPV6 * */ |
| 2429 | 2415 | ||
| 2430 | if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && | 2416 | if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) { |
| 2431 | pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && | ||
| 2432 | pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && | ||
| 2433 | pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ; | ||
| 2434 | else { | ||
| 2435 | int i; | 2417 | int i; |
| 2436 | 2418 | ||
| 2437 | /* Only random destinations yet */ | 2419 | /* Only random destinations yet */ |
| @@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t) | |||
| 2916 | pktgen_clear_counters(pkt_dev); | 2898 | pktgen_clear_counters(pkt_dev); |
| 2917 | pkt_dev->running = 1; /* Cranke yeself! */ | 2899 | pkt_dev->running = 1; /* Cranke yeself! */ |
| 2918 | pkt_dev->skb = NULL; | 2900 | pkt_dev->skb = NULL; |
| 2919 | pkt_dev->started_at = | 2901 | pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); |
| 2920 | pkt_dev->next_tx = ktime_now(); | ||
| 2921 | 2902 | ||
| 2922 | set_pkt_overhead(pkt_dev); | 2903 | set_pkt_overhead(pkt_dev); |
| 2923 | 2904 | ||
| @@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) | |||
| 3076 | 3057 | ||
| 3077 | kfree_skb(pkt_dev->skb); | 3058 | kfree_skb(pkt_dev->skb); |
| 3078 | pkt_dev->skb = NULL; | 3059 | pkt_dev->skb = NULL; |
| 3079 | pkt_dev->stopped_at = ktime_now(); | 3060 | pkt_dev->stopped_at = ktime_get(); |
| 3080 | pkt_dev->running = 0; | 3061 | pkt_dev->running = 0; |
| 3081 | 3062 | ||
| 3082 | show_results(pkt_dev, nr_frags); | 3063 | show_results(pkt_dev, nr_frags); |
| @@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) | |||
| 3095 | continue; | 3076 | continue; |
| 3096 | if (best == NULL) | 3077 | if (best == NULL) |
| 3097 | best = pkt_dev; | 3078 | best = pkt_dev; |
| 3098 | else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) | 3079 | else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) |
| 3099 | best = pkt_dev; | 3080 | best = pkt_dev; |
| 3100 | } | 3081 | } |
| 3101 | if_unlock(t); | 3082 | if_unlock(t); |
| @@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) | |||
| 3180 | 3161 | ||
| 3181 | static void pktgen_resched(struct pktgen_dev *pkt_dev) | 3162 | static void pktgen_resched(struct pktgen_dev *pkt_dev) |
| 3182 | { | 3163 | { |
| 3183 | ktime_t idle_start = ktime_now(); | 3164 | ktime_t idle_start = ktime_get(); |
| 3184 | schedule(); | 3165 | schedule(); |
| 3185 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); | 3166 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); |
| 3186 | } | 3167 | } |
| 3187 | 3168 | ||
| 3188 | static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) | 3169 | static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) |
| 3189 | { | 3170 | { |
| 3190 | ktime_t idle_start = ktime_now(); | 3171 | ktime_t idle_start = ktime_get(); |
| 3191 | 3172 | ||
| 3192 | while (atomic_read(&(pkt_dev->skb->users)) != 1) { | 3173 | while (atomic_read(&(pkt_dev->skb->users)) != 1) { |
| 3193 | if (signal_pending(current)) | 3174 | if (signal_pending(current)) |
| @@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) | |||
| 3198 | else | 3179 | else |
| 3199 | cpu_relax(); | 3180 | cpu_relax(); |
| 3200 | } | 3181 | } |
| 3201 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); | 3182 | pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); |
| 3202 | } | 3183 | } |
| 3203 | 3184 | ||
| 3204 | static void pktgen_xmit(struct pktgen_dev *pkt_dev) | 3185 | static void pktgen_xmit(struct pktgen_dev *pkt_dev) |
| @@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
| 3220 | * "never transmit" | 3201 | * "never transmit" |
| 3221 | */ | 3202 | */ |
| 3222 | if (unlikely(pkt_dev->delay == ULLONG_MAX)) { | 3203 | if (unlikely(pkt_dev->delay == ULLONG_MAX)) { |
| 3223 | pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); | 3204 | pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX); |
| 3224 | return; | 3205 | return; |
| 3225 | } | 3206 | } |
| 3226 | 3207 | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fad649ae4dec..1868625af25e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
| @@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) | |||
| 128 | if (tab == NULL || tab[msgindex].doit == NULL) | 128 | if (tab == NULL || tab[msgindex].doit == NULL) |
| 129 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 129 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
| 130 | 130 | ||
| 131 | return tab ? tab[msgindex].doit : NULL; | 131 | return tab[msgindex].doit; |
| 132 | } | 132 | } |
| 133 | 133 | ||
| 134 | static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) | 134 | static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) |
| @@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) | |||
| 143 | if (tab == NULL || tab[msgindex].dumpit == NULL) | 143 | if (tab == NULL || tab[msgindex].dumpit == NULL) |
| 144 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 144 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
| 145 | 145 | ||
| 146 | return tab ? tab[msgindex].dumpit : NULL; | 146 | return tab[msgindex].dumpit; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) | 149 | static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) |
| @@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) | |||
| 158 | if (tab == NULL || tab[msgindex].calcit == NULL) | 158 | if (tab == NULL || tab[msgindex].calcit == NULL) |
| 159 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 159 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
| 160 | 160 | ||
| 161 | return tab ? tab[msgindex].calcit : NULL; | 161 | return tab[msgindex].calcit; |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | /** | 164 | /** |
| @@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
| 1316 | err = PTR_ERR(net); | 1316 | err = PTR_ERR(net); |
| 1317 | goto errout; | 1317 | goto errout; |
| 1318 | } | 1318 | } |
| 1319 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { | ||
| 1320 | err = -EPERM; | ||
| 1321 | goto errout; | ||
| 1322 | } | ||
| 1319 | err = dev_change_net_namespace(dev, net, ifname); | 1323 | err = dev_change_net_namespace(dev, net, ifname); |
| 1320 | put_net(net); | 1324 | put_net(net); |
| 1321 | if (err) | 1325 | if (err) |
| @@ -1638,7 +1642,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) | |||
| 1638 | } | 1642 | } |
| 1639 | EXPORT_SYMBOL(rtnl_configure_link); | 1643 | EXPORT_SYMBOL(rtnl_configure_link); |
| 1640 | 1644 | ||
| 1641 | struct net_device *rtnl_create_link(struct net *src_net, struct net *net, | 1645 | struct net_device *rtnl_create_link(struct net *net, |
| 1642 | char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) | 1646 | char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) |
| 1643 | { | 1647 | { |
| 1644 | int err; | 1648 | int err; |
| @@ -1836,7 +1840,7 @@ replay: | |||
| 1836 | if (IS_ERR(dest_net)) | 1840 | if (IS_ERR(dest_net)) |
| 1837 | return PTR_ERR(dest_net); | 1841 | return PTR_ERR(dest_net); |
| 1838 | 1842 | ||
| 1839 | dev = rtnl_create_link(net, dest_net, ifname, ops, tb); | 1843 | dev = rtnl_create_link(dest_net, ifname, ops, tb); |
| 1840 | if (IS_ERR(dev)) { | 1844 | if (IS_ERR(dev)) { |
| 1841 | err = PTR_ERR(dev); | 1845 | err = PTR_ERR(dev); |
| 1842 | goto out; | 1846 | goto out; |
| @@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
| 2057 | u8 *addr; | 2061 | u8 *addr; |
| 2058 | int err; | 2062 | int err; |
| 2059 | 2063 | ||
| 2064 | if (!capable(CAP_NET_ADMIN)) | ||
| 2065 | return -EPERM; | ||
| 2066 | |||
| 2060 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); | 2067 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); |
| 2061 | if (err < 0) | 2068 | if (err < 0) |
| 2062 | return err; | 2069 | return err; |
| @@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
| 2123 | int err = -EINVAL; | 2130 | int err = -EINVAL; |
| 2124 | __u8 *addr; | 2131 | __u8 *addr; |
| 2125 | 2132 | ||
| 2133 | if (!capable(CAP_NET_ADMIN)) | ||
| 2134 | return -EPERM; | ||
| 2135 | |||
| 2126 | if (nlmsg_len(nlh) < sizeof(*ndm)) | 2136 | if (nlmsg_len(nlh) < sizeof(*ndm)) |
| 2127 | return -EINVAL; | 2137 | return -EINVAL; |
| 2128 | 2138 | ||
| @@ -2253,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2253 | return skb->len; | 2263 | return skb->len; |
| 2254 | } | 2264 | } |
| 2255 | 2265 | ||
| 2266 | int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, | ||
| 2267 | struct net_device *dev, u16 mode) | ||
| 2268 | { | ||
| 2269 | struct nlmsghdr *nlh; | ||
| 2270 | struct ifinfomsg *ifm; | ||
| 2271 | struct nlattr *br_afspec; | ||
| 2272 | u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; | ||
| 2273 | |||
| 2274 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); | ||
| 2275 | if (nlh == NULL) | ||
| 2276 | return -EMSGSIZE; | ||
| 2277 | |||
| 2278 | ifm = nlmsg_data(nlh); | ||
| 2279 | ifm->ifi_family = AF_BRIDGE; | ||
| 2280 | ifm->__ifi_pad = 0; | ||
| 2281 | ifm->ifi_type = dev->type; | ||
| 2282 | ifm->ifi_index = dev->ifindex; | ||
| 2283 | ifm->ifi_flags = dev_get_flags(dev); | ||
| 2284 | ifm->ifi_change = 0; | ||
| 2285 | |||
| 2286 | |||
| 2287 | if (nla_put_string(skb, IFLA_IFNAME, dev->name) || | ||
| 2288 | nla_put_u32(skb, IFLA_MTU, dev->mtu) || | ||
| 2289 | nla_put_u8(skb, IFLA_OPERSTATE, operstate) || | ||
| 2290 | (dev->master && | ||
| 2291 | nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || | ||
| 2292 | (dev->addr_len && | ||
| 2293 | nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || | ||
| 2294 | (dev->ifindex != dev->iflink && | ||
| 2295 | nla_put_u32(skb, IFLA_LINK, dev->iflink))) | ||
| 2296 | goto nla_put_failure; | ||
| 2297 | |||
| 2298 | br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); | ||
| 2299 | if (!br_afspec) | ||
| 2300 | goto nla_put_failure; | ||
| 2301 | |||
| 2302 | if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) || | ||
| 2303 | nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { | ||
| 2304 | nla_nest_cancel(skb, br_afspec); | ||
| 2305 | goto nla_put_failure; | ||
| 2306 | } | ||
| 2307 | nla_nest_end(skb, br_afspec); | ||
| 2308 | |||
| 2309 | return nlmsg_end(skb, nlh); | ||
| 2310 | nla_put_failure: | ||
| 2311 | nlmsg_cancel(skb, nlh); | ||
| 2312 | return -EMSGSIZE; | ||
| 2313 | } | ||
| 2314 | EXPORT_SYMBOL(ndo_dflt_bridge_getlink); | ||
| 2315 | |||
| 2316 | static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) | ||
| 2317 | { | ||
| 2318 | struct net *net = sock_net(skb->sk); | ||
| 2319 | struct net_device *dev; | ||
| 2320 | int idx = 0; | ||
| 2321 | u32 portid = NETLINK_CB(cb->skb).portid; | ||
| 2322 | u32 seq = cb->nlh->nlmsg_seq; | ||
| 2323 | |||
| 2324 | rcu_read_lock(); | ||
| 2325 | for_each_netdev_rcu(net, dev) { | ||
| 2326 | const struct net_device_ops *ops = dev->netdev_ops; | ||
| 2327 | struct net_device *master = dev->master; | ||
| 2328 | |||
| 2329 | if (master && master->netdev_ops->ndo_bridge_getlink) { | ||
| 2330 | if (idx >= cb->args[0] && | ||
| 2331 | master->netdev_ops->ndo_bridge_getlink( | ||
| 2332 | skb, portid, seq, dev) < 0) | ||
| 2333 | break; | ||
| 2334 | idx++; | ||
| 2335 | } | ||
| 2336 | |||
| 2337 | if (ops->ndo_bridge_getlink) { | ||
| 2338 | if (idx >= cb->args[0] && | ||
| 2339 | ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) | ||
| 2340 | break; | ||
| 2341 | idx++; | ||
| 2342 | } | ||
| 2343 | } | ||
| 2344 | rcu_read_unlock(); | ||
| 2345 | cb->args[0] = idx; | ||
| 2346 | |||
| 2347 | return skb->len; | ||
| 2348 | } | ||
| 2349 | |||
| 2350 | static inline size_t bridge_nlmsg_size(void) | ||
| 2351 | { | ||
| 2352 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | ||
| 2353 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ | ||
| 2354 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ | ||
| 2355 | + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ | ||
| 2356 | + nla_total_size(sizeof(u32)) /* IFLA_MTU */ | ||
| 2357 | + nla_total_size(sizeof(u32)) /* IFLA_LINK */ | ||
| 2358 | + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ | ||
| 2359 | + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ | ||
| 2360 | + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ | ||
| 2361 | + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ | ||
| 2362 | + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ | ||
| 2363 | } | ||
| 2364 | |||
| 2365 | static int rtnl_bridge_notify(struct net_device *dev, u16 flags) | ||
| 2366 | { | ||
| 2367 | struct net *net = dev_net(dev); | ||
| 2368 | struct net_device *master = dev->master; | ||
| 2369 | struct sk_buff *skb; | ||
| 2370 | int err = -EOPNOTSUPP; | ||
| 2371 | |||
| 2372 | skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); | ||
| 2373 | if (!skb) { | ||
| 2374 | err = -ENOMEM; | ||
| 2375 | goto errout; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && | ||
| 2379 | master && master->netdev_ops->ndo_bridge_getlink) { | ||
| 2380 | err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); | ||
| 2381 | if (err < 0) | ||
| 2382 | goto errout; | ||
| 2383 | } | ||
| 2384 | |||
| 2385 | if ((flags & BRIDGE_FLAGS_SELF) && | ||
| 2386 | dev->netdev_ops->ndo_bridge_getlink) { | ||
| 2387 | err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); | ||
| 2388 | if (err < 0) | ||
| 2389 | goto errout; | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); | ||
| 2393 | return 0; | ||
| 2394 | errout: | ||
| 2395 | WARN_ON(err == -EMSGSIZE); | ||
| 2396 | kfree_skb(skb); | ||
| 2397 | rtnl_set_sk_err(net, RTNLGRP_LINK, err); | ||
| 2398 | return err; | ||
| 2399 | } | ||
| 2400 | |||
| 2401 | static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, | ||
| 2402 | void *arg) | ||
| 2403 | { | ||
| 2404 | struct net *net = sock_net(skb->sk); | ||
| 2405 | struct ifinfomsg *ifm; | ||
| 2406 | struct net_device *dev; | ||
| 2407 | struct nlattr *br_spec, *attr = NULL; | ||
| 2408 | int rem, err = -EOPNOTSUPP; | ||
| 2409 | u16 oflags, flags = 0; | ||
| 2410 | bool have_flags = false; | ||
| 2411 | |||
| 2412 | if (nlmsg_len(nlh) < sizeof(*ifm)) | ||
| 2413 | return -EINVAL; | ||
| 2414 | |||
| 2415 | ifm = nlmsg_data(nlh); | ||
| 2416 | if (ifm->ifi_family != AF_BRIDGE) | ||
| 2417 | return -EPFNOSUPPORT; | ||
| 2418 | |||
| 2419 | dev = __dev_get_by_index(net, ifm->ifi_index); | ||
| 2420 | if (!dev) { | ||
| 2421 | pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); | ||
| 2422 | return -ENODEV; | ||
| 2423 | } | ||
| 2424 | |||
| 2425 | br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); | ||
| 2426 | if (br_spec) { | ||
| 2427 | nla_for_each_nested(attr, br_spec, rem) { | ||
| 2428 | if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { | ||
| 2429 | have_flags = true; | ||
| 2430 | flags = nla_get_u16(attr); | ||
| 2431 | break; | ||
| 2432 | } | ||
| 2433 | } | ||
| 2434 | } | ||
| 2435 | |||
| 2436 | oflags = flags; | ||
| 2437 | |||
| 2438 | if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { | ||
| 2439 | if (!dev->master || | ||
| 2440 | !dev->master->netdev_ops->ndo_bridge_setlink) { | ||
| 2441 | err = -EOPNOTSUPP; | ||
| 2442 | goto out; | ||
| 2443 | } | ||
| 2444 | |||
| 2445 | err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); | ||
| 2446 | if (err) | ||
| 2447 | goto out; | ||
| 2448 | |||
| 2449 | flags &= ~BRIDGE_FLAGS_MASTER; | ||
| 2450 | } | ||
| 2451 | |||
| 2452 | if ((flags & BRIDGE_FLAGS_SELF)) { | ||
| 2453 | if (!dev->netdev_ops->ndo_bridge_setlink) | ||
| 2454 | err = -EOPNOTSUPP; | ||
| 2455 | else | ||
| 2456 | err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); | ||
| 2457 | |||
| 2458 | if (!err) | ||
| 2459 | flags &= ~BRIDGE_FLAGS_SELF; | ||
| 2460 | } | ||
| 2461 | |||
| 2462 | if (have_flags) | ||
| 2463 | memcpy(nla_data(attr), &flags, sizeof(flags)); | ||
| 2464 | /* Generate event to notify upper layer of bridge change */ | ||
| 2465 | if (!err) | ||
| 2466 | err = rtnl_bridge_notify(dev, oflags); | ||
| 2467 | out: | ||
| 2468 | return err; | ||
| 2469 | } | ||
| 2470 | |||
| 2256 | /* Protected by RTNL sempahore. */ | 2471 | /* Protected by RTNL sempahore. */ |
| 2257 | static struct rtattr **rta_buf; | 2472 | static struct rtattr **rta_buf; |
| 2258 | static int rtattr_max; | 2473 | static int rtattr_max; |
| @@ -2283,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 2283 | sz_idx = type>>2; | 2498 | sz_idx = type>>2; |
| 2284 | kind = type&3; | 2499 | kind = type&3; |
| 2285 | 2500 | ||
| 2286 | if (kind != 2 && !capable(CAP_NET_ADMIN)) | 2501 | if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) |
| 2287 | return -EPERM; | 2502 | return -EPERM; |
| 2288 | 2503 | ||
| 2289 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | 2504 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { |
| @@ -2434,5 +2649,8 @@ void __init rtnetlink_init(void) | |||
| 2434 | rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); | 2649 | rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); |
| 2435 | rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); | 2650 | rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); |
| 2436 | rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); | 2651 | rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); |
| 2652 | |||
| 2653 | rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); | ||
| 2654 | rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); | ||
| 2437 | } | 2655 | } |
| 2438 | 2656 | ||
diff --git a/net/core/scm.c b/net/core/scm.c index ab570841a532..57fb1ee6649f 100644 --- a/net/core/scm.c +++ b/net/core/scm.c | |||
| @@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds) | |||
| 51 | if (!uid_valid(uid) || !gid_valid(gid)) | 51 | if (!uid_valid(uid) || !gid_valid(gid)) |
| 52 | return -EINVAL; | 52 | return -EINVAL; |
| 53 | 53 | ||
| 54 | if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && | 54 | if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && |
| 55 | ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || | 55 | ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || |
| 56 | uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && | 56 | uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && |
| 57 | ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || | 57 | ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || |
| 58 | gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { | 58 | gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { |
| 59 | return 0; | 59 | return 0; |
| 60 | } | 60 | } |
| 61 | return -EPERM; | 61 | return -EPERM; |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3f0636cd76cd..3ab989b0de42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb) | |||
| 519 | 519 | ||
| 520 | uarg = skb_shinfo(skb)->destructor_arg; | 520 | uarg = skb_shinfo(skb)->destructor_arg; |
| 521 | if (uarg->callback) | 521 | if (uarg->callback) |
| 522 | uarg->callback(uarg); | 522 | uarg->callback(uarg, true); |
| 523 | } | 523 | } |
| 524 | 524 | ||
| 525 | if (skb_has_frag_list(skb)) | 525 | if (skb_has_frag_list(skb)) |
| @@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb) | |||
| 635 | EXPORT_SYMBOL(kfree_skb); | 635 | EXPORT_SYMBOL(kfree_skb); |
| 636 | 636 | ||
| 637 | /** | 637 | /** |
| 638 | * skb_tx_error - report an sk_buff xmit error | ||
| 639 | * @skb: buffer that triggered an error | ||
| 640 | * | ||
| 641 | * Report xmit error if a device callback is tracking this skb. | ||
| 642 | * skb must be freed afterwards. | ||
| 643 | */ | ||
| 644 | void skb_tx_error(struct sk_buff *skb) | ||
| 645 | { | ||
| 646 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { | ||
| 647 | struct ubuf_info *uarg; | ||
| 648 | |||
| 649 | uarg = skb_shinfo(skb)->destructor_arg; | ||
| 650 | if (uarg->callback) | ||
| 651 | uarg->callback(uarg, false); | ||
| 652 | skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; | ||
| 653 | } | ||
| 654 | } | ||
| 655 | EXPORT_SYMBOL(skb_tx_error); | ||
| 656 | |||
| 657 | /** | ||
| 638 | * consume_skb - free an skbuff | 658 | * consume_skb - free an skbuff |
| 639 | * @skb: buffer to free | 659 | * @skb: buffer to free |
| 640 | * | 660 | * |
| @@ -662,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 662 | new->transport_header = old->transport_header; | 682 | new->transport_header = old->transport_header; |
| 663 | new->network_header = old->network_header; | 683 | new->network_header = old->network_header; |
| 664 | new->mac_header = old->mac_header; | 684 | new->mac_header = old->mac_header; |
| 685 | new->inner_transport_header = old->inner_transport_header; | ||
| 686 | new->inner_network_header = old->inner_transport_header; | ||
| 665 | skb_dst_copy(new, old); | 687 | skb_dst_copy(new, old); |
| 666 | new->rxhash = old->rxhash; | 688 | new->rxhash = old->rxhash; |
| 667 | new->ooo_okay = old->ooo_okay; | 689 | new->ooo_okay = old->ooo_okay; |
| 668 | new->l4_rxhash = old->l4_rxhash; | 690 | new->l4_rxhash = old->l4_rxhash; |
| 669 | new->no_fcs = old->no_fcs; | 691 | new->no_fcs = old->no_fcs; |
| 692 | new->encapsulation = old->encapsulation; | ||
| 670 | #ifdef CONFIG_XFRM | 693 | #ifdef CONFIG_XFRM |
| 671 | new->sp = secpath_get(old->sp); | 694 | new->sp = secpath_get(old->sp); |
| 672 | #endif | 695 | #endif |
| @@ -797,7 +820,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) | |||
| 797 | for (i = 0; i < num_frags; i++) | 820 | for (i = 0; i < num_frags; i++) |
| 798 | skb_frag_unref(skb, i); | 821 | skb_frag_unref(skb, i); |
| 799 | 822 | ||
| 800 | uarg->callback(uarg); | 823 | uarg->callback(uarg, false); |
| 801 | 824 | ||
| 802 | /* skb frags point to kernel buffers */ | 825 | /* skb frags point to kernel buffers */ |
| 803 | for (i = num_frags - 1; i >= 0; i--) { | 826 | for (i = num_frags - 1; i >= 0; i--) { |
| @@ -872,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 872 | new->network_header += offset; | 895 | new->network_header += offset; |
| 873 | if (skb_mac_header_was_set(new)) | 896 | if (skb_mac_header_was_set(new)) |
| 874 | new->mac_header += offset; | 897 | new->mac_header += offset; |
| 898 | new->inner_transport_header += offset; | ||
| 899 | new->inner_network_header += offset; | ||
| 875 | #endif | 900 | #endif |
| 876 | skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; | 901 | skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; |
| 877 | skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; | 902 | skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; |
| @@ -1069,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
| 1069 | skb->network_header += off; | 1094 | skb->network_header += off; |
| 1070 | if (skb_mac_header_was_set(skb)) | 1095 | if (skb_mac_header_was_set(skb)) |
| 1071 | skb->mac_header += off; | 1096 | skb->mac_header += off; |
| 1097 | skb->inner_transport_header += off; | ||
| 1098 | skb->inner_network_header += off; | ||
| 1072 | /* Only adjust this if it actually is csum_start rather than csum */ | 1099 | /* Only adjust this if it actually is csum_start rather than csum */ |
| 1073 | if (skb->ip_summed == CHECKSUM_PARTIAL) | 1100 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
| 1074 | skb->csum_start += nhead; | 1101 | skb->csum_start += nhead; |
| @@ -1168,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |||
| 1168 | n->network_header += off; | 1195 | n->network_header += off; |
| 1169 | if (skb_mac_header_was_set(skb)) | 1196 | if (skb_mac_header_was_set(skb)) |
| 1170 | n->mac_header += off; | 1197 | n->mac_header += off; |
| 1198 | n->inner_transport_header += off; | ||
| 1199 | n->inner_network_header += off; | ||
| 1171 | #endif | 1200 | #endif |
| 1172 | 1201 | ||
| 1173 | return n; | 1202 | return n; |
| @@ -2999,7 +3028,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2999 | memcpy(skb_mac_header(nskb), skb_mac_header(p), | 3028 | memcpy(skb_mac_header(nskb), skb_mac_header(p), |
| 3000 | p->data - skb_mac_header(p)); | 3029 | p->data - skb_mac_header(p)); |
| 3001 | 3030 | ||
| 3002 | *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); | ||
| 3003 | skb_shinfo(nskb)->frag_list = p; | 3031 | skb_shinfo(nskb)->frag_list = p; |
| 3004 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; | 3032 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; |
| 3005 | pinfo->gso_size = 0; | 3033 | pinfo->gso_size = 0; |
diff --git a/net/core/sock.c b/net/core/sock.c index 8a146cfcc366..a692ef49c9bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) | |||
| 505 | } | 505 | } |
| 506 | EXPORT_SYMBOL(sk_dst_check); | 506 | EXPORT_SYMBOL(sk_dst_check); |
| 507 | 507 | ||
| 508 | static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) | 508 | static int sock_setbindtodevice(struct sock *sk, char __user *optval, |
| 509 | int optlen) | ||
| 509 | { | 510 | { |
| 510 | int ret = -ENOPROTOOPT; | 511 | int ret = -ENOPROTOOPT; |
| 511 | #ifdef CONFIG_NETDEVICES | 512 | #ifdef CONFIG_NETDEVICES |
| @@ -515,7 +516,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) | |||
| 515 | 516 | ||
| 516 | /* Sorry... */ | 517 | /* Sorry... */ |
| 517 | ret = -EPERM; | 518 | ret = -EPERM; |
| 518 | if (!capable(CAP_NET_RAW)) | 519 | if (!ns_capable(net->user_ns, CAP_NET_RAW)) |
| 519 | goto out; | 520 | goto out; |
| 520 | 521 | ||
| 521 | ret = -EINVAL; | 522 | ret = -EINVAL; |
| @@ -562,6 +563,59 @@ out: | |||
| 562 | return ret; | 563 | return ret; |
| 563 | } | 564 | } |
| 564 | 565 | ||
| 566 | static int sock_getbindtodevice(struct sock *sk, char __user *optval, | ||
| 567 | int __user *optlen, int len) | ||
| 568 | { | ||
| 569 | int ret = -ENOPROTOOPT; | ||
| 570 | #ifdef CONFIG_NETDEVICES | ||
| 571 | struct net *net = sock_net(sk); | ||
| 572 | struct net_device *dev; | ||
| 573 | char devname[IFNAMSIZ]; | ||
| 574 | unsigned seq; | ||
| 575 | |||
| 576 | if (sk->sk_bound_dev_if == 0) { | ||
| 577 | len = 0; | ||
| 578 | goto zero; | ||
| 579 | } | ||
| 580 | |||
| 581 | ret = -EINVAL; | ||
| 582 | if (len < IFNAMSIZ) | ||
| 583 | goto out; | ||
| 584 | |||
| 585 | retry: | ||
| 586 | seq = read_seqbegin(&devnet_rename_seq); | ||
| 587 | rcu_read_lock(); | ||
| 588 | dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); | ||
| 589 | ret = -ENODEV; | ||
| 590 | if (!dev) { | ||
| 591 | rcu_read_unlock(); | ||
| 592 | goto out; | ||
| 593 | } | ||
| 594 | |||
| 595 | strcpy(devname, dev->name); | ||
| 596 | rcu_read_unlock(); | ||
| 597 | if (read_seqretry(&devnet_rename_seq, seq)) | ||
| 598 | goto retry; | ||
| 599 | |||
| 600 | len = strlen(devname) + 1; | ||
| 601 | |||
| 602 | ret = -EFAULT; | ||
| 603 | if (copy_to_user(optval, devname, len)) | ||
| 604 | goto out; | ||
| 605 | |||
| 606 | zero: | ||
| 607 | ret = -EFAULT; | ||
| 608 | if (put_user(len, optlen)) | ||
| 609 | goto out; | ||
| 610 | |||
| 611 | ret = 0; | ||
| 612 | |||
| 613 | out: | ||
| 614 | #endif | ||
| 615 | |||
| 616 | return ret; | ||
| 617 | } | ||
| 618 | |||
| 565 | static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) | 619 | static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) |
| 566 | { | 620 | { |
| 567 | if (valbool) | 621 | if (valbool) |
| @@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | |||
| 589 | */ | 643 | */ |
| 590 | 644 | ||
| 591 | if (optname == SO_BINDTODEVICE) | 645 | if (optname == SO_BINDTODEVICE) |
| 592 | return sock_bindtodevice(sk, optval, optlen); | 646 | return sock_setbindtodevice(sk, optval, optlen); |
| 593 | 647 | ||
| 594 | if (optlen < sizeof(int)) | 648 | if (optlen < sizeof(int)) |
| 595 | return -EINVAL; | 649 | return -EINVAL; |
| @@ -696,7 +750,8 @@ set_rcvbuf: | |||
| 696 | break; | 750 | break; |
| 697 | 751 | ||
| 698 | case SO_PRIORITY: | 752 | case SO_PRIORITY: |
| 699 | if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) | 753 | if ((val >= 0 && val <= 6) || |
| 754 | ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) | ||
| 700 | sk->sk_priority = val; | 755 | sk->sk_priority = val; |
| 701 | else | 756 | else |
| 702 | ret = -EPERM; | 757 | ret = -EPERM; |
| @@ -813,7 +868,7 @@ set_rcvbuf: | |||
| 813 | clear_bit(SOCK_PASSSEC, &sock->flags); | 868 | clear_bit(SOCK_PASSSEC, &sock->flags); |
| 814 | break; | 869 | break; |
| 815 | case SO_MARK: | 870 | case SO_MARK: |
| 816 | if (!capable(CAP_NET_ADMIN)) | 871 | if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
| 817 | ret = -EPERM; | 872 | ret = -EPERM; |
| 818 | else | 873 | else |
| 819 | sk->sk_mark = val; | 874 | sk->sk_mark = val; |
| @@ -1074,6 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
| 1074 | case SO_NOFCS: | 1129 | case SO_NOFCS: |
| 1075 | v.val = sock_flag(sk, SOCK_NOFCS); | 1130 | v.val = sock_flag(sk, SOCK_NOFCS); |
| 1076 | break; | 1131 | break; |
| 1132 | |||
| 1133 | case SO_BINDTODEVICE: | ||
| 1134 | return sock_getbindtodevice(sk, optval, optlen, len); | ||
| 1135 | |||
| 1136 | case SO_GET_FILTER: | ||
| 1137 | len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); | ||
| 1138 | if (len < 0) | ||
| 1139 | return len; | ||
| 1140 | |||
| 1141 | goto lenout; | ||
| 1142 | |||
| 1077 | default: | 1143 | default: |
| 1078 | return -ENOPROTOOPT; | 1144 | return -ENOPROTOOPT; |
| 1079 | } | 1145 | } |
| @@ -1214,13 +1280,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
| 1214 | 1280 | ||
| 1215 | #ifdef CONFIG_CGROUPS | 1281 | #ifdef CONFIG_CGROUPS |
| 1216 | #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) | 1282 | #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) |
| 1217 | void sock_update_classid(struct sock *sk) | 1283 | void sock_update_classid(struct sock *sk, struct task_struct *task) |
| 1218 | { | 1284 | { |
| 1219 | u32 classid; | 1285 | u32 classid; |
| 1220 | 1286 | ||
| 1221 | rcu_read_lock(); /* doing current task, which cannot vanish. */ | 1287 | classid = task_cls_classid(task); |
| 1222 | classid = task_cls_classid(current); | ||
| 1223 | rcu_read_unlock(); | ||
| 1224 | if (classid != sk->sk_classid) | 1288 | if (classid != sk->sk_classid) |
| 1225 | sk->sk_classid = classid; | 1289 | sk->sk_classid = classid; |
| 1226 | } | 1290 | } |
| @@ -1263,7 +1327,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
| 1263 | sock_net_set(sk, get_net(net)); | 1327 | sock_net_set(sk, get_net(net)); |
| 1264 | atomic_set(&sk->sk_wmem_alloc, 1); | 1328 | atomic_set(&sk->sk_wmem_alloc, 1); |
| 1265 | 1329 | ||
| 1266 | sock_update_classid(sk); | 1330 | sock_update_classid(sk, current); |
| 1267 | sock_update_netprioidx(sk, current); | 1331 | sock_update_netprioidx(sk, current); |
| 1268 | } | 1332 | } |
| 1269 | 1333 | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a7c36845b123..d1b08045a9df 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
| @@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net) | |||
| 216 | goto err_dup; | 216 | goto err_dup; |
| 217 | 217 | ||
| 218 | tbl[0].data = &net->core.sysctl_somaxconn; | 218 | tbl[0].data = &net->core.sysctl_somaxconn; |
| 219 | |||
| 220 | /* Don't export any sysctls to unprivileged users */ | ||
| 221 | if (net->user_ns != &init_user_ns) { | ||
| 222 | tbl[0].procname = NULL; | ||
| 223 | } | ||
| 219 | } | 224 | } |
| 220 | 225 | ||
| 221 | net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); | 226 | net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); |
