diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/dev.c | 588 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 115 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 2 | ||||
-rw-r--r-- | net/core/fib_rules.c | 7 | ||||
-rw-r--r-- | net/core/filter.c | 30 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 26 | ||||
-rw-r--r-- | net/core/neighbour.c | 483 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 82 | ||||
-rw-r--r-- | net/core/net-sysfs.h | 2 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 120 | ||||
-rw-r--r-- | net/core/netpoll.c | 10 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 10 | ||||
-rw-r--r-- | net/core/pktgen.c | 88 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 205 | ||||
-rw-r--r-- | net/core/skbuff.c | 526 | ||||
-rw-r--r-- | net/core/sock.c | 54 | ||||
-rw-r--r-- | net/core/stream.c | 2 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 3 |
19 files changed, 1512 insertions, 844 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index b33b996f5dd6..9628c20acff6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o | |||
21 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o | 21 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o |
22 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o | 22 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o |
23 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o | 23 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o |
24 | obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o | 24 | obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o |
25 | obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 0ce469e5ec80..b1b0c8d4d7df 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; | |||
147 | struct list_head ptype_all __read_mostly; /* Taps */ | 147 | struct list_head ptype_all __read_mostly; /* Taps */ |
148 | static struct list_head offload_base __read_mostly; | 148 | static struct list_head offload_base __read_mostly; |
149 | 149 | ||
150 | static int netif_rx_internal(struct sk_buff *skb); | ||
151 | |||
150 | /* | 152 | /* |
151 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl | 153 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
152 | * semaphore. | 154 | * semaphore. |
@@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload); | |||
480 | * and must not be freed until after all the CPU's have gone | 482 | * and must not be freed until after all the CPU's have gone |
481 | * through a quiescent state. | 483 | * through a quiescent state. |
482 | */ | 484 | */ |
483 | void __dev_remove_offload(struct packet_offload *po) | 485 | static void __dev_remove_offload(struct packet_offload *po) |
484 | { | 486 | { |
485 | struct list_head *head = &offload_base; | 487 | struct list_head *head = &offload_base; |
486 | struct packet_offload *po1; | 488 | struct packet_offload *po1; |
@@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po) | |||
498 | out: | 500 | out: |
499 | spin_unlock(&offload_lock); | 501 | spin_unlock(&offload_lock); |
500 | } | 502 | } |
501 | EXPORT_SYMBOL(__dev_remove_offload); | ||
502 | 503 | ||
503 | /** | 504 | /** |
504 | * dev_remove_offload - remove packet offload handler | 505 | * dev_remove_offload - remove packet offload handler |
@@ -1118,6 +1119,8 @@ rollback: | |||
1118 | 1119 | ||
1119 | write_seqcount_end(&devnet_rename_seq); | 1120 | write_seqcount_end(&devnet_rename_seq); |
1120 | 1121 | ||
1122 | netdev_adjacent_rename_links(dev, oldname); | ||
1123 | |||
1121 | write_lock_bh(&dev_base_lock); | 1124 | write_lock_bh(&dev_base_lock); |
1122 | hlist_del_rcu(&dev->name_hlist); | 1125 | hlist_del_rcu(&dev->name_hlist); |
1123 | write_unlock_bh(&dev_base_lock); | 1126 | write_unlock_bh(&dev_base_lock); |
@@ -1137,6 +1140,7 @@ rollback: | |||
1137 | err = ret; | 1140 | err = ret; |
1138 | write_seqcount_begin(&devnet_rename_seq); | 1141 | write_seqcount_begin(&devnet_rename_seq); |
1139 | memcpy(dev->name, oldname, IFNAMSIZ); | 1142 | memcpy(dev->name, oldname, IFNAMSIZ); |
1143 | memcpy(oldname, newname, IFNAMSIZ); | ||
1140 | goto rollback; | 1144 | goto rollback; |
1141 | } else { | 1145 | } else { |
1142 | pr_err("%s: name change rollback failed: %d\n", | 1146 | pr_err("%s: name change rollback failed: %d\n", |
@@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); | |||
1566 | * are as for raw_notifier_call_chain(). | 1570 | * are as for raw_notifier_call_chain(). |
1567 | */ | 1571 | */ |
1568 | 1572 | ||
1569 | int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, | 1573 | static int call_netdevice_notifiers_info(unsigned long val, |
1570 | struct netdev_notifier_info *info) | 1574 | struct net_device *dev, |
1575 | struct netdev_notifier_info *info) | ||
1571 | { | 1576 | { |
1572 | ASSERT_RTNL(); | 1577 | ASSERT_RTNL(); |
1573 | netdev_notifier_info_init(info, dev); | 1578 | netdev_notifier_info_init(info, dev); |
1574 | return raw_notifier_call_chain(&netdev_chain, val, info); | 1579 | return raw_notifier_call_chain(&netdev_chain, val, info); |
1575 | } | 1580 | } |
1576 | EXPORT_SYMBOL(call_netdevice_notifiers_info); | ||
1577 | 1581 | ||
1578 | /** | 1582 | /** |
1579 | * call_netdevice_notifiers - call all network notifier blocks | 1583 | * call_netdevice_notifiers - call all network notifier blocks |
@@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1699 | skb_scrub_packet(skb, true); | 1703 | skb_scrub_packet(skb, true); |
1700 | skb->protocol = eth_type_trans(skb, dev); | 1704 | skb->protocol = eth_type_trans(skb, dev); |
1701 | 1705 | ||
1702 | return netif_rx(skb); | 1706 | return netif_rx_internal(skb); |
1703 | } | 1707 | } |
1704 | EXPORT_SYMBOL_GPL(dev_forward_skb); | 1708 | EXPORT_SYMBOL_GPL(dev_forward_skb); |
1705 | 1709 | ||
@@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
2079 | } | 2083 | } |
2080 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 2084 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
2081 | 2085 | ||
2082 | #ifdef CONFIG_RPS | 2086 | #ifdef CONFIG_SYSFS |
2083 | /** | 2087 | /** |
2084 | * netif_set_real_num_rx_queues - set actual number of RX queues used | 2088 | * netif_set_real_num_rx_queues - set actual number of RX queues used |
2085 | * @dev: Network device | 2089 | * @dev: Network device |
@@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q) | |||
2145 | } | 2149 | } |
2146 | EXPORT_SYMBOL(__netif_schedule); | 2150 | EXPORT_SYMBOL(__netif_schedule); |
2147 | 2151 | ||
2148 | void dev_kfree_skb_irq(struct sk_buff *skb) | 2152 | struct dev_kfree_skb_cb { |
2153 | enum skb_free_reason reason; | ||
2154 | }; | ||
2155 | |||
2156 | static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) | ||
2149 | { | 2157 | { |
2150 | if (atomic_dec_and_test(&skb->users)) { | 2158 | return (struct dev_kfree_skb_cb *)skb->cb; |
2151 | struct softnet_data *sd; | 2159 | } |
2152 | unsigned long flags; | 2160 | |
2161 | void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) | ||
2162 | { | ||
2163 | unsigned long flags; | ||
2153 | 2164 | ||
2154 | local_irq_save(flags); | 2165 | if (likely(atomic_read(&skb->users) == 1)) { |
2155 | sd = &__get_cpu_var(softnet_data); | 2166 | smp_rmb(); |
2156 | skb->next = sd->completion_queue; | 2167 | atomic_set(&skb->users, 0); |
2157 | sd->completion_queue = skb; | 2168 | } else if (likely(!atomic_dec_and_test(&skb->users))) { |
2158 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 2169 | return; |
2159 | local_irq_restore(flags); | ||
2160 | } | 2170 | } |
2171 | get_kfree_skb_cb(skb)->reason = reason; | ||
2172 | local_irq_save(flags); | ||
2173 | skb->next = __this_cpu_read(softnet_data.completion_queue); | ||
2174 | __this_cpu_write(softnet_data.completion_queue, skb); | ||
2175 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | ||
2176 | local_irq_restore(flags); | ||
2161 | } | 2177 | } |
2162 | EXPORT_SYMBOL(dev_kfree_skb_irq); | 2178 | EXPORT_SYMBOL(__dev_kfree_skb_irq); |
2163 | 2179 | ||
2164 | void dev_kfree_skb_any(struct sk_buff *skb) | 2180 | void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) |
2165 | { | 2181 | { |
2166 | if (in_irq() || irqs_disabled()) | 2182 | if (in_irq() || irqs_disabled()) |
2167 | dev_kfree_skb_irq(skb); | 2183 | __dev_kfree_skb_irq(skb, reason); |
2168 | else | 2184 | else |
2169 | dev_kfree_skb(skb); | 2185 | dev_kfree_skb(skb); |
2170 | } | 2186 | } |
2171 | EXPORT_SYMBOL(dev_kfree_skb_any); | 2187 | EXPORT_SYMBOL(__dev_kfree_skb_any); |
2172 | 2188 | ||
2173 | 2189 | ||
2174 | /** | 2190 | /** |
@@ -2404,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); | |||
2404 | * 2. No high memory really exists on this machine. | 2420 | * 2. No high memory really exists on this machine. |
2405 | */ | 2421 | */ |
2406 | 2422 | ||
2407 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | 2423 | static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) |
2408 | { | 2424 | { |
2409 | #ifdef CONFIG_HIGHMEM | 2425 | #ifdef CONFIG_HIGHMEM |
2410 | int i; | 2426 | int i; |
@@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) | |||
2442 | { | 2458 | { |
2443 | struct dev_gso_cb *cb; | 2459 | struct dev_gso_cb *cb; |
2444 | 2460 | ||
2445 | do { | 2461 | kfree_skb_list(skb->next); |
2446 | struct sk_buff *nskb = skb->next; | 2462 | skb->next = NULL; |
2447 | |||
2448 | skb->next = nskb->next; | ||
2449 | nskb->next = NULL; | ||
2450 | kfree_skb(nskb); | ||
2451 | } while (skb->next); | ||
2452 | 2463 | ||
2453 | cb = DEV_GSO_CB(skb); | 2464 | cb = DEV_GSO_CB(skb); |
2454 | if (cb->destructor) | 2465 | if (cb->destructor) |
@@ -2484,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) | |||
2484 | } | 2495 | } |
2485 | 2496 | ||
2486 | static netdev_features_t harmonize_features(struct sk_buff *skb, | 2497 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
2487 | netdev_features_t features) | 2498 | const struct net_device *dev, |
2499 | netdev_features_t features) | ||
2488 | { | 2500 | { |
2489 | if (skb->ip_summed != CHECKSUM_NONE && | 2501 | if (skb->ip_summed != CHECKSUM_NONE && |
2490 | !can_checksum_protocol(features, skb_network_protocol(skb))) { | 2502 | !can_checksum_protocol(features, skb_network_protocol(skb))) { |
2491 | features &= ~NETIF_F_ALL_CSUM; | 2503 | features &= ~NETIF_F_ALL_CSUM; |
2492 | } else if (illegal_highdma(skb->dev, skb)) { | 2504 | } else if (illegal_highdma(dev, skb)) { |
2493 | features &= ~NETIF_F_SG; | 2505 | features &= ~NETIF_F_SG; |
2494 | } | 2506 | } |
2495 | 2507 | ||
2496 | return features; | 2508 | return features; |
2497 | } | 2509 | } |
2498 | 2510 | ||
2499 | netdev_features_t netif_skb_features(struct sk_buff *skb) | 2511 | netdev_features_t netif_skb_dev_features(struct sk_buff *skb, |
2512 | const struct net_device *dev) | ||
2500 | { | 2513 | { |
2501 | __be16 protocol = skb->protocol; | 2514 | __be16 protocol = skb->protocol; |
2502 | netdev_features_t features = skb->dev->features; | 2515 | netdev_features_t features = dev->features; |
2503 | 2516 | ||
2504 | if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) | 2517 | if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) |
2505 | features &= ~NETIF_F_GSO_MASK; | 2518 | features &= ~NETIF_F_GSO_MASK; |
2506 | 2519 | ||
2507 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { | 2520 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { |
2508 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2521 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
2509 | protocol = veh->h_vlan_encapsulated_proto; | 2522 | protocol = veh->h_vlan_encapsulated_proto; |
2510 | } else if (!vlan_tx_tag_present(skb)) { | 2523 | } else if (!vlan_tx_tag_present(skb)) { |
2511 | return harmonize_features(skb, features); | 2524 | return harmonize_features(skb, dev, features); |
2512 | } | 2525 | } |
2513 | 2526 | ||
2514 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | | 2527 | features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | |
2515 | NETIF_F_HW_VLAN_STAG_TX); | 2528 | NETIF_F_HW_VLAN_STAG_TX); |
2516 | 2529 | ||
2517 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) | 2530 | if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) |
@@ -2519,24 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) | |||
2519 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | | 2532 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | |
2520 | NETIF_F_HW_VLAN_STAG_TX; | 2533 | NETIF_F_HW_VLAN_STAG_TX; |
2521 | 2534 | ||
2522 | return harmonize_features(skb, features); | 2535 | return harmonize_features(skb, dev, features); |
2523 | } | ||
2524 | EXPORT_SYMBOL(netif_skb_features); | ||
2525 | |||
2526 | /* | ||
2527 | * Returns true if either: | ||
2528 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | ||
2529 | * 2. skb is fragmented and the device does not support SG. | ||
2530 | */ | ||
2531 | static inline int skb_needs_linearize(struct sk_buff *skb, | ||
2532 | netdev_features_t features) | ||
2533 | { | ||
2534 | return skb_is_nonlinear(skb) && | ||
2535 | ((skb_has_frag_list(skb) && | ||
2536 | !(features & NETIF_F_FRAGLIST)) || | ||
2537 | (skb_shinfo(skb)->nr_frags && | ||
2538 | !(features & NETIF_F_SG))); | ||
2539 | } | 2536 | } |
2537 | EXPORT_SYMBOL(netif_skb_dev_features); | ||
2540 | 2538 | ||
2541 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 2539 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
2542 | struct netdev_queue *txq) | 2540 | struct netdev_queue *txq) |
@@ -2605,8 +2603,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
2605 | dev_queue_xmit_nit(skb, dev); | 2603 | dev_queue_xmit_nit(skb, dev); |
2606 | 2604 | ||
2607 | skb_len = skb->len; | 2605 | skb_len = skb->len; |
2608 | rc = ops->ndo_start_xmit(skb, dev); | 2606 | trace_net_dev_start_xmit(skb, dev); |
2609 | 2607 | rc = ops->ndo_start_xmit(skb, dev); | |
2610 | trace_net_dev_xmit(skb, rc, dev, skb_len); | 2608 | trace_net_dev_xmit(skb, rc, dev, skb_len); |
2611 | if (rc == NETDEV_TX_OK) | 2609 | if (rc == NETDEV_TX_OK) |
2612 | txq_trans_update(txq); | 2610 | txq_trans_update(txq); |
@@ -2624,6 +2622,7 @@ gso: | |||
2624 | dev_queue_xmit_nit(nskb, dev); | 2622 | dev_queue_xmit_nit(nskb, dev); |
2625 | 2623 | ||
2626 | skb_len = nskb->len; | 2624 | skb_len = nskb->len; |
2625 | trace_net_dev_start_xmit(nskb, dev); | ||
2627 | rc = ops->ndo_start_xmit(nskb, dev); | 2626 | rc = ops->ndo_start_xmit(nskb, dev); |
2628 | trace_net_dev_xmit(nskb, rc, dev, skb_len); | 2627 | trace_net_dev_xmit(nskb, rc, dev, skb_len); |
2629 | if (unlikely(rc != NETDEV_TX_OK)) { | 2628 | if (unlikely(rc != NETDEV_TX_OK)) { |
@@ -2744,7 +2743,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2744 | return rc; | 2743 | return rc; |
2745 | } | 2744 | } |
2746 | 2745 | ||
2747 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | 2746 | #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) |
2748 | static void skb_update_prio(struct sk_buff *skb) | 2747 | static void skb_update_prio(struct sk_buff *skb) |
2749 | { | 2748 | { |
2750 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | 2749 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); |
@@ -2781,8 +2780,9 @@ int dev_loopback_xmit(struct sk_buff *skb) | |||
2781 | EXPORT_SYMBOL(dev_loopback_xmit); | 2780 | EXPORT_SYMBOL(dev_loopback_xmit); |
2782 | 2781 | ||
2783 | /** | 2782 | /** |
2784 | * dev_queue_xmit - transmit a buffer | 2783 | * __dev_queue_xmit - transmit a buffer |
2785 | * @skb: buffer to transmit | 2784 | * @skb: buffer to transmit |
2785 | * @accel_priv: private data used for L2 forwarding offload | ||
2786 | * | 2786 | * |
2787 | * Queue a buffer for transmission to a network device. The caller must | 2787 | * Queue a buffer for transmission to a network device. The caller must |
2788 | * have set the device and priority and built the buffer before calling | 2788 | * have set the device and priority and built the buffer before calling |
@@ -2805,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); | |||
2805 | * the BH enable code must have IRQs enabled so that it will not deadlock. | 2805 | * the BH enable code must have IRQs enabled so that it will not deadlock. |
2806 | * --BLG | 2806 | * --BLG |
2807 | */ | 2807 | */ |
2808 | int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | 2808 | static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) |
2809 | { | 2809 | { |
2810 | struct net_device *dev = skb->dev; | 2810 | struct net_device *dev = skb->dev; |
2811 | struct netdev_queue *txq; | 2811 | struct netdev_queue *txq; |
@@ -3014,7 +3014,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
3014 | } | 3014 | } |
3015 | 3015 | ||
3016 | skb_reset_network_header(skb); | 3016 | skb_reset_network_header(skb); |
3017 | if (!skb_get_rxhash(skb)) | 3017 | if (!skb_get_hash(skb)) |
3018 | goto done; | 3018 | goto done; |
3019 | 3019 | ||
3020 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3020 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
@@ -3159,7 +3159,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) | |||
3159 | rcu_read_lock(); | 3159 | rcu_read_lock(); |
3160 | fl = rcu_dereference(sd->flow_limit); | 3160 | fl = rcu_dereference(sd->flow_limit); |
3161 | if (fl) { | 3161 | if (fl) { |
3162 | new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); | 3162 | new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); |
3163 | old_flow = fl->history[fl->history_head]; | 3163 | old_flow = fl->history[fl->history_head]; |
3164 | fl->history[fl->history_head] = new_flow; | 3164 | fl->history[fl->history_head] = new_flow; |
3165 | 3165 | ||
@@ -3227,22 +3227,7 @@ enqueue: | |||
3227 | return NET_RX_DROP; | 3227 | return NET_RX_DROP; |
3228 | } | 3228 | } |
3229 | 3229 | ||
3230 | /** | 3230 | static int netif_rx_internal(struct sk_buff *skb) |
3231 | * netif_rx - post buffer to the network code | ||
3232 | * @skb: buffer to post | ||
3233 | * | ||
3234 | * This function receives a packet from a device driver and queues it for | ||
3235 | * the upper (protocol) levels to process. It always succeeds. The buffer | ||
3236 | * may be dropped during processing for congestion control or by the | ||
3237 | * protocol layers. | ||
3238 | * | ||
3239 | * return values: | ||
3240 | * NET_RX_SUCCESS (no congestion) | ||
3241 | * NET_RX_DROP (packet was dropped) | ||
3242 | * | ||
3243 | */ | ||
3244 | |||
3245 | int netif_rx(struct sk_buff *skb) | ||
3246 | { | 3231 | { |
3247 | int ret; | 3232 | int ret; |
3248 | 3233 | ||
@@ -3278,14 +3263,38 @@ int netif_rx(struct sk_buff *skb) | |||
3278 | } | 3263 | } |
3279 | return ret; | 3264 | return ret; |
3280 | } | 3265 | } |
3266 | |||
3267 | /** | ||
3268 | * netif_rx - post buffer to the network code | ||
3269 | * @skb: buffer to post | ||
3270 | * | ||
3271 | * This function receives a packet from a device driver and queues it for | ||
3272 | * the upper (protocol) levels to process. It always succeeds. The buffer | ||
3273 | * may be dropped during processing for congestion control or by the | ||
3274 | * protocol layers. | ||
3275 | * | ||
3276 | * return values: | ||
3277 | * NET_RX_SUCCESS (no congestion) | ||
3278 | * NET_RX_DROP (packet was dropped) | ||
3279 | * | ||
3280 | */ | ||
3281 | |||
3282 | int netif_rx(struct sk_buff *skb) | ||
3283 | { | ||
3284 | trace_netif_rx_entry(skb); | ||
3285 | |||
3286 | return netif_rx_internal(skb); | ||
3287 | } | ||
3281 | EXPORT_SYMBOL(netif_rx); | 3288 | EXPORT_SYMBOL(netif_rx); |
3282 | 3289 | ||
3283 | int netif_rx_ni(struct sk_buff *skb) | 3290 | int netif_rx_ni(struct sk_buff *skb) |
3284 | { | 3291 | { |
3285 | int err; | 3292 | int err; |
3286 | 3293 | ||
3294 | trace_netif_rx_ni_entry(skb); | ||
3295 | |||
3287 | preempt_disable(); | 3296 | preempt_disable(); |
3288 | err = netif_rx(skb); | 3297 | err = netif_rx_internal(skb); |
3289 | if (local_softirq_pending()) | 3298 | if (local_softirq_pending()) |
3290 | do_softirq(); | 3299 | do_softirq(); |
3291 | preempt_enable(); | 3300 | preempt_enable(); |
@@ -3311,7 +3320,10 @@ static void net_tx_action(struct softirq_action *h) | |||
3311 | clist = clist->next; | 3320 | clist = clist->next; |
3312 | 3321 | ||
3313 | WARN_ON(atomic_read(&skb->users)); | 3322 | WARN_ON(atomic_read(&skb->users)); |
3314 | trace_kfree_skb(skb, net_tx_action); | 3323 | if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) |
3324 | trace_consume_skb(skb); | ||
3325 | else | ||
3326 | trace_kfree_skb(skb, net_tx_action); | ||
3315 | __kfree_skb(skb); | 3327 | __kfree_skb(skb); |
3316 | } | 3328 | } |
3317 | } | 3329 | } |
@@ -3667,22 +3679,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3667 | return ret; | 3679 | return ret; |
3668 | } | 3680 | } |
3669 | 3681 | ||
3670 | /** | 3682 | static int netif_receive_skb_internal(struct sk_buff *skb) |
3671 | * netif_receive_skb - process receive buffer from network | ||
3672 | * @skb: buffer to process | ||
3673 | * | ||
3674 | * netif_receive_skb() is the main receive data processing function. | ||
3675 | * It always succeeds. The buffer may be dropped during processing | ||
3676 | * for congestion control or by the protocol layers. | ||
3677 | * | ||
3678 | * This function may only be called from softirq context and interrupts | ||
3679 | * should be enabled. | ||
3680 | * | ||
3681 | * Return values (usually ignored): | ||
3682 | * NET_RX_SUCCESS: no congestion | ||
3683 | * NET_RX_DROP: packet was dropped | ||
3684 | */ | ||
3685 | int netif_receive_skb(struct sk_buff *skb) | ||
3686 | { | 3683 | { |
3687 | net_timestamp_check(netdev_tstamp_prequeue, skb); | 3684 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3688 | 3685 | ||
@@ -3708,6 +3705,28 @@ int netif_receive_skb(struct sk_buff *skb) | |||
3708 | #endif | 3705 | #endif |
3709 | return __netif_receive_skb(skb); | 3706 | return __netif_receive_skb(skb); |
3710 | } | 3707 | } |
3708 | |||
3709 | /** | ||
3710 | * netif_receive_skb - process receive buffer from network | ||
3711 | * @skb: buffer to process | ||
3712 | * | ||
3713 | * netif_receive_skb() is the main receive data processing function. | ||
3714 | * It always succeeds. The buffer may be dropped during processing | ||
3715 | * for congestion control or by the protocol layers. | ||
3716 | * | ||
3717 | * This function may only be called from softirq context and interrupts | ||
3718 | * should be enabled. | ||
3719 | * | ||
3720 | * Return values (usually ignored): | ||
3721 | * NET_RX_SUCCESS: no congestion | ||
3722 | * NET_RX_DROP: packet was dropped | ||
3723 | */ | ||
3724 | int netif_receive_skb(struct sk_buff *skb) | ||
3725 | { | ||
3726 | trace_netif_receive_skb_entry(skb); | ||
3727 | |||
3728 | return netif_receive_skb_internal(skb); | ||
3729 | } | ||
3711 | EXPORT_SYMBOL(netif_receive_skb); | 3730 | EXPORT_SYMBOL(netif_receive_skb); |
3712 | 3731 | ||
3713 | /* Network device is going away, flush any packets still pending | 3732 | /* Network device is going away, flush any packets still pending |
@@ -3757,7 +3776,7 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
3757 | if (ptype->type != type || !ptype->callbacks.gro_complete) | 3776 | if (ptype->type != type || !ptype->callbacks.gro_complete) |
3758 | continue; | 3777 | continue; |
3759 | 3778 | ||
3760 | err = ptype->callbacks.gro_complete(skb); | 3779 | err = ptype->callbacks.gro_complete(skb, 0); |
3761 | break; | 3780 | break; |
3762 | } | 3781 | } |
3763 | rcu_read_unlock(); | 3782 | rcu_read_unlock(); |
@@ -3769,7 +3788,7 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
3769 | } | 3788 | } |
3770 | 3789 | ||
3771 | out: | 3790 | out: |
3772 | return netif_receive_skb(skb); | 3791 | return netif_receive_skb_internal(skb); |
3773 | } | 3792 | } |
3774 | 3793 | ||
3775 | /* napi->gro_list contains packets ordered by age. | 3794 | /* napi->gro_list contains packets ordered by age. |
@@ -3805,10 +3824,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) | |||
3805 | { | 3824 | { |
3806 | struct sk_buff *p; | 3825 | struct sk_buff *p; |
3807 | unsigned int maclen = skb->dev->hard_header_len; | 3826 | unsigned int maclen = skb->dev->hard_header_len; |
3827 | u32 hash = skb_get_hash_raw(skb); | ||
3808 | 3828 | ||
3809 | for (p = napi->gro_list; p; p = p->next) { | 3829 | for (p = napi->gro_list; p; p = p->next) { |
3810 | unsigned long diffs; | 3830 | unsigned long diffs; |
3811 | 3831 | ||
3832 | NAPI_GRO_CB(p)->flush = 0; | ||
3833 | |||
3834 | if (hash != skb_get_hash_raw(p)) { | ||
3835 | NAPI_GRO_CB(p)->same_flow = 0; | ||
3836 | continue; | ||
3837 | } | ||
3838 | |||
3812 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | 3839 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3813 | diffs |= p->vlan_tci ^ skb->vlan_tci; | 3840 | diffs |= p->vlan_tci ^ skb->vlan_tci; |
3814 | if (maclen == ETH_HLEN) | 3841 | if (maclen == ETH_HLEN) |
@@ -3819,7 +3846,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) | |||
3819 | skb_gro_mac_header(skb), | 3846 | skb_gro_mac_header(skb), |
3820 | maclen); | 3847 | maclen); |
3821 | NAPI_GRO_CB(p)->same_flow = !diffs; | 3848 | NAPI_GRO_CB(p)->same_flow = !diffs; |
3822 | NAPI_GRO_CB(p)->flush = 0; | 3849 | } |
3850 | } | ||
3851 | |||
3852 | static void skb_gro_reset_offset(struct sk_buff *skb) | ||
3853 | { | ||
3854 | const struct skb_shared_info *pinfo = skb_shinfo(skb); | ||
3855 | const skb_frag_t *frag0 = &pinfo->frags[0]; | ||
3856 | |||
3857 | NAPI_GRO_CB(skb)->data_offset = 0; | ||
3858 | NAPI_GRO_CB(skb)->frag0 = NULL; | ||
3859 | NAPI_GRO_CB(skb)->frag0_len = 0; | ||
3860 | |||
3861 | if (skb_mac_header(skb) == skb_tail_pointer(skb) && | ||
3862 | pinfo->nr_frags && | ||
3863 | !PageHighMem(skb_frag_page(frag0))) { | ||
3864 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); | ||
3865 | NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); | ||
3823 | } | 3866 | } |
3824 | } | 3867 | } |
3825 | 3868 | ||
@@ -3838,7 +3881,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
3838 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) | 3881 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3839 | goto normal; | 3882 | goto normal; |
3840 | 3883 | ||
3884 | skb_gro_reset_offset(skb); | ||
3841 | gro_list_prepare(napi, skb); | 3885 | gro_list_prepare(napi, skb); |
3886 | NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ | ||
3842 | 3887 | ||
3843 | rcu_read_lock(); | 3888 | rcu_read_lock(); |
3844 | list_for_each_entry_rcu(ptype, head, list) { | 3889 | list_for_each_entry_rcu(ptype, head, list) { |
@@ -3850,6 +3895,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
3850 | NAPI_GRO_CB(skb)->same_flow = 0; | 3895 | NAPI_GRO_CB(skb)->same_flow = 0; |
3851 | NAPI_GRO_CB(skb)->flush = 0; | 3896 | NAPI_GRO_CB(skb)->flush = 0; |
3852 | NAPI_GRO_CB(skb)->free = 0; | 3897 | NAPI_GRO_CB(skb)->free = 0; |
3898 | NAPI_GRO_CB(skb)->udp_mark = 0; | ||
3853 | 3899 | ||
3854 | pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); | 3900 | pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); |
3855 | break; | 3901 | break; |
@@ -3874,10 +3920,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
3874 | if (same_flow) | 3920 | if (same_flow) |
3875 | goto ok; | 3921 | goto ok; |
3876 | 3922 | ||
3877 | if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) | 3923 | if (NAPI_GRO_CB(skb)->flush) |
3878 | goto normal; | 3924 | goto normal; |
3879 | 3925 | ||
3880 | napi->gro_count++; | 3926 | if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { |
3927 | struct sk_buff *nskb = napi->gro_list; | ||
3928 | |||
3929 | /* locate the end of the list to select the 'oldest' flow */ | ||
3930 | while (nskb->next) { | ||
3931 | pp = &nskb->next; | ||
3932 | nskb = *pp; | ||
3933 | } | ||
3934 | *pp = NULL; | ||
3935 | nskb->next = NULL; | ||
3936 | napi_gro_complete(nskb); | ||
3937 | } else { | ||
3938 | napi->gro_count++; | ||
3939 | } | ||
3881 | NAPI_GRO_CB(skb)->count = 1; | 3940 | NAPI_GRO_CB(skb)->count = 1; |
3882 | NAPI_GRO_CB(skb)->age = jiffies; | 3941 | NAPI_GRO_CB(skb)->age = jiffies; |
3883 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); | 3942 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); |
@@ -3915,12 +3974,39 @@ normal: | |||
3915 | goto pull; | 3974 | goto pull; |
3916 | } | 3975 | } |
3917 | 3976 | ||
3977 | struct packet_offload *gro_find_receive_by_type(__be16 type) | ||
3978 | { | ||
3979 | struct list_head *offload_head = &offload_base; | ||
3980 | struct packet_offload *ptype; | ||
3981 | |||
3982 | list_for_each_entry_rcu(ptype, offload_head, list) { | ||
3983 | if (ptype->type != type || !ptype->callbacks.gro_receive) | ||
3984 | continue; | ||
3985 | return ptype; | ||
3986 | } | ||
3987 | return NULL; | ||
3988 | } | ||
3989 | EXPORT_SYMBOL(gro_find_receive_by_type); | ||
3990 | |||
3991 | struct packet_offload *gro_find_complete_by_type(__be16 type) | ||
3992 | { | ||
3993 | struct list_head *offload_head = &offload_base; | ||
3994 | struct packet_offload *ptype; | ||
3995 | |||
3996 | list_for_each_entry_rcu(ptype, offload_head, list) { | ||
3997 | if (ptype->type != type || !ptype->callbacks.gro_complete) | ||
3998 | continue; | ||
3999 | return ptype; | ||
4000 | } | ||
4001 | return NULL; | ||
4002 | } | ||
4003 | EXPORT_SYMBOL(gro_find_complete_by_type); | ||
3918 | 4004 | ||
3919 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | 4005 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
3920 | { | 4006 | { |
3921 | switch (ret) { | 4007 | switch (ret) { |
3922 | case GRO_NORMAL: | 4008 | case GRO_NORMAL: |
3923 | if (netif_receive_skb(skb)) | 4009 | if (netif_receive_skb_internal(skb)) |
3924 | ret = GRO_DROP; | 4010 | ret = GRO_DROP; |
3925 | break; | 4011 | break; |
3926 | 4012 | ||
@@ -3943,26 +4029,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | |||
3943 | return ret; | 4029 | return ret; |
3944 | } | 4030 | } |
3945 | 4031 | ||
3946 | static void skb_gro_reset_offset(struct sk_buff *skb) | ||
3947 | { | ||
3948 | const struct skb_shared_info *pinfo = skb_shinfo(skb); | ||
3949 | const skb_frag_t *frag0 = &pinfo->frags[0]; | ||
3950 | |||
3951 | NAPI_GRO_CB(skb)->data_offset = 0; | ||
3952 | NAPI_GRO_CB(skb)->frag0 = NULL; | ||
3953 | NAPI_GRO_CB(skb)->frag0_len = 0; | ||
3954 | |||
3955 | if (skb_mac_header(skb) == skb_tail_pointer(skb) && | ||
3956 | pinfo->nr_frags && | ||
3957 | !PageHighMem(skb_frag_page(frag0))) { | ||
3958 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); | ||
3959 | NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); | ||
3960 | } | ||
3961 | } | ||
3962 | |||
3963 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 4032 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3964 | { | 4033 | { |
3965 | skb_gro_reset_offset(skb); | 4034 | trace_napi_gro_receive_entry(skb); |
3966 | 4035 | ||
3967 | return napi_skb_finish(dev_gro_receive(napi, skb), skb); | 4036 | return napi_skb_finish(dev_gro_receive(napi, skb), skb); |
3968 | } | 4037 | } |
@@ -3986,8 +4055,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) | |||
3986 | 4055 | ||
3987 | if (!skb) { | 4056 | if (!skb) { |
3988 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); | 4057 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); |
3989 | if (skb) | 4058 | napi->skb = skb; |
3990 | napi->skb = skb; | ||
3991 | } | 4059 | } |
3992 | return skb; | 4060 | return skb; |
3993 | } | 4061 | } |
@@ -3998,12 +4066,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * | |||
3998 | { | 4066 | { |
3999 | switch (ret) { | 4067 | switch (ret) { |
4000 | case GRO_NORMAL: | 4068 | case GRO_NORMAL: |
4001 | case GRO_HELD: | 4069 | if (netif_receive_skb_internal(skb)) |
4002 | skb->protocol = eth_type_trans(skb, skb->dev); | ||
4003 | |||
4004 | if (ret == GRO_HELD) | ||
4005 | skb_gro_pull(skb, -ETH_HLEN); | ||
4006 | else if (netif_receive_skb(skb)) | ||
4007 | ret = GRO_DROP; | 4070 | ret = GRO_DROP; |
4008 | break; | 4071 | break; |
4009 | 4072 | ||
@@ -4012,6 +4075,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * | |||
4012 | napi_reuse_skb(napi, skb); | 4075 | napi_reuse_skb(napi, skb); |
4013 | break; | 4076 | break; |
4014 | 4077 | ||
4078 | case GRO_HELD: | ||
4015 | case GRO_MERGED: | 4079 | case GRO_MERGED: |
4016 | break; | 4080 | break; |
4017 | } | 4081 | } |
@@ -4022,36 +4086,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * | |||
4022 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) | 4086 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) |
4023 | { | 4087 | { |
4024 | struct sk_buff *skb = napi->skb; | 4088 | struct sk_buff *skb = napi->skb; |
4025 | struct ethhdr *eth; | ||
4026 | unsigned int hlen; | ||
4027 | unsigned int off; | ||
4028 | 4089 | ||
4029 | napi->skb = NULL; | 4090 | napi->skb = NULL; |
4030 | 4091 | ||
4031 | skb_reset_mac_header(skb); | 4092 | if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { |
4032 | skb_gro_reset_offset(skb); | 4093 | napi_reuse_skb(napi, skb); |
4033 | 4094 | return NULL; | |
4034 | off = skb_gro_offset(skb); | ||
4035 | hlen = off + sizeof(*eth); | ||
4036 | eth = skb_gro_header_fast(skb, off); | ||
4037 | if (skb_gro_header_hard(skb, hlen)) { | ||
4038 | eth = skb_gro_header_slow(skb, hlen, off); | ||
4039 | if (unlikely(!eth)) { | ||
4040 | napi_reuse_skb(napi, skb); | ||
4041 | skb = NULL; | ||
4042 | goto out; | ||
4043 | } | ||
4044 | } | 4095 | } |
4096 | skb->protocol = eth_type_trans(skb, skb->dev); | ||
4045 | 4097 | ||
4046 | skb_gro_pull(skb, sizeof(*eth)); | ||
4047 | |||
4048 | /* | ||
4049 | * This works because the only protocols we care about don't require | ||
4050 | * special handling. We'll fix it up properly at the end. | ||
4051 | */ | ||
4052 | skb->protocol = eth->h_proto; | ||
4053 | |||
4054 | out: | ||
4055 | return skb; | 4098 | return skb; |
4056 | } | 4099 | } |
4057 | 4100 | ||
@@ -4062,12 +4105,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) | |||
4062 | if (!skb) | 4105 | if (!skb) |
4063 | return GRO_DROP; | 4106 | return GRO_DROP; |
4064 | 4107 | ||
4108 | trace_napi_gro_frags_entry(skb); | ||
4109 | |||
4065 | return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); | 4110 | return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
4066 | } | 4111 | } |
4067 | EXPORT_SYMBOL(napi_gro_frags); | 4112 | EXPORT_SYMBOL(napi_gro_frags); |
4068 | 4113 | ||
4069 | /* | 4114 | /* |
4070 | * net_rps_action sends any pending IPI's for rps. | 4115 | * net_rps_action_and_irq_enable sends any pending IPI's for rps. |
4071 | * Note: called with local irq disabled, but exits with local irq enabled. | 4116 | * Note: called with local irq disabled, but exits with local irq enabled. |
4072 | */ | 4117 | */ |
4073 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) | 4118 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
@@ -4272,17 +4317,10 @@ EXPORT_SYMBOL(netif_napi_add); | |||
4272 | 4317 | ||
4273 | void netif_napi_del(struct napi_struct *napi) | 4318 | void netif_napi_del(struct napi_struct *napi) |
4274 | { | 4319 | { |
4275 | struct sk_buff *skb, *next; | ||
4276 | |||
4277 | list_del_init(&napi->dev_list); | 4320 | list_del_init(&napi->dev_list); |
4278 | napi_free_frags(napi); | 4321 | napi_free_frags(napi); |
4279 | 4322 | ||
4280 | for (skb = napi->gro_list; skb; skb = next) { | 4323 | kfree_skb_list(napi->gro_list); |
4281 | next = skb->next; | ||
4282 | skb->next = NULL; | ||
4283 | kfree_skb(skb); | ||
4284 | } | ||
4285 | |||
4286 | napi->gro_list = NULL; | 4324 | napi->gro_list = NULL; |
4287 | napi->gro_count = 0; | 4325 | napi->gro_count = 0; |
4288 | } | 4326 | } |
@@ -4399,19 +4437,6 @@ struct netdev_adjacent { | |||
4399 | struct rcu_head rcu; | 4437 | struct rcu_head rcu; |
4400 | }; | 4438 | }; |
4401 | 4439 | ||
4402 | static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, | ||
4403 | struct net_device *adj_dev, | ||
4404 | struct list_head *adj_list) | ||
4405 | { | ||
4406 | struct netdev_adjacent *adj; | ||
4407 | |||
4408 | list_for_each_entry_rcu(adj, adj_list, list) { | ||
4409 | if (adj->dev == adj_dev) | ||
4410 | return adj; | ||
4411 | } | ||
4412 | return NULL; | ||
4413 | } | ||
4414 | |||
4415 | static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, | 4440 | static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, |
4416 | struct net_device *adj_dev, | 4441 | struct net_device *adj_dev, |
4417 | struct list_head *adj_list) | 4442 | struct list_head *adj_list) |
@@ -4450,13 +4475,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); | |||
4450 | * Find out if a device is linked to an upper device and return true in case | 4475 | * Find out if a device is linked to an upper device and return true in case |
4451 | * it is. The caller must hold the RTNL lock. | 4476 | * it is. The caller must hold the RTNL lock. |
4452 | */ | 4477 | */ |
4453 | bool netdev_has_any_upper_dev(struct net_device *dev) | 4478 | static bool netdev_has_any_upper_dev(struct net_device *dev) |
4454 | { | 4479 | { |
4455 | ASSERT_RTNL(); | 4480 | ASSERT_RTNL(); |
4456 | 4481 | ||
4457 | return !list_empty(&dev->all_adj_list.upper); | 4482 | return !list_empty(&dev->all_adj_list.upper); |
4458 | } | 4483 | } |
4459 | EXPORT_SYMBOL(netdev_has_any_upper_dev); | ||
4460 | 4484 | ||
4461 | /** | 4485 | /** |
4462 | * netdev_master_upper_dev_get - Get master upper device | 4486 | * netdev_master_upper_dev_get - Get master upper device |
@@ -4576,6 +4600,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, | |||
4576 | EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); | 4600 | EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); |
4577 | 4601 | ||
4578 | /** | 4602 | /** |
4603 | * netdev_lower_get_first_private_rcu - Get the first ->private from the | ||
4604 | * lower neighbour list, RCU | ||
4605 | * variant | ||
4606 | * @dev: device | ||
4607 | * | ||
4608 | * Gets the first netdev_adjacent->private from the dev's lower neighbour | ||
4609 | * list. The caller must hold RCU read lock. | ||
4610 | */ | ||
4611 | void *netdev_lower_get_first_private_rcu(struct net_device *dev) | ||
4612 | { | ||
4613 | struct netdev_adjacent *lower; | ||
4614 | |||
4615 | lower = list_first_or_null_rcu(&dev->adj_list.lower, | ||
4616 | struct netdev_adjacent, list); | ||
4617 | if (lower) | ||
4618 | return lower->private; | ||
4619 | return NULL; | ||
4620 | } | ||
4621 | EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); | ||
4622 | |||
4623 | /** | ||
4579 | * netdev_master_upper_dev_get_rcu - Get master upper device | 4624 | * netdev_master_upper_dev_get_rcu - Get master upper device |
4580 | * @dev: device | 4625 | * @dev: device |
4581 | * | 4626 | * |
@@ -4594,13 +4639,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) | |||
4594 | } | 4639 | } |
4595 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); | 4640 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); |
4596 | 4641 | ||
4642 | static int netdev_adjacent_sysfs_add(struct net_device *dev, | ||
4643 | struct net_device *adj_dev, | ||
4644 | struct list_head *dev_list) | ||
4645 | { | ||
4646 | char linkname[IFNAMSIZ+7]; | ||
4647 | sprintf(linkname, dev_list == &dev->adj_list.upper ? | ||
4648 | "upper_%s" : "lower_%s", adj_dev->name); | ||
4649 | return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), | ||
4650 | linkname); | ||
4651 | } | ||
4652 | static void netdev_adjacent_sysfs_del(struct net_device *dev, | ||
4653 | char *name, | ||
4654 | struct list_head *dev_list) | ||
4655 | { | ||
4656 | char linkname[IFNAMSIZ+7]; | ||
4657 | sprintf(linkname, dev_list == &dev->adj_list.upper ? | ||
4658 | "upper_%s" : "lower_%s", name); | ||
4659 | sysfs_remove_link(&(dev->dev.kobj), linkname); | ||
4660 | } | ||
4661 | |||
4662 | #define netdev_adjacent_is_neigh_list(dev, dev_list) \ | ||
4663 | (dev_list == &dev->adj_list.upper || \ | ||
4664 | dev_list == &dev->adj_list.lower) | ||
4665 | |||
4597 | static int __netdev_adjacent_dev_insert(struct net_device *dev, | 4666 | static int __netdev_adjacent_dev_insert(struct net_device *dev, |
4598 | struct net_device *adj_dev, | 4667 | struct net_device *adj_dev, |
4599 | struct list_head *dev_list, | 4668 | struct list_head *dev_list, |
4600 | void *private, bool master) | 4669 | void *private, bool master) |
4601 | { | 4670 | { |
4602 | struct netdev_adjacent *adj; | 4671 | struct netdev_adjacent *adj; |
4603 | char linkname[IFNAMSIZ+7]; | ||
4604 | int ret; | 4672 | int ret; |
4605 | 4673 | ||
4606 | adj = __netdev_find_adj(dev, adj_dev, dev_list); | 4674 | adj = __netdev_find_adj(dev, adj_dev, dev_list); |
@@ -4623,16 +4691,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, | |||
4623 | pr_debug("dev_hold for %s, because of link added from %s to %s\n", | 4691 | pr_debug("dev_hold for %s, because of link added from %s to %s\n", |
4624 | adj_dev->name, dev->name, adj_dev->name); | 4692 | adj_dev->name, dev->name, adj_dev->name); |
4625 | 4693 | ||
4626 | if (dev_list == &dev->adj_list.lower) { | 4694 | if (netdev_adjacent_is_neigh_list(dev, dev_list)) { |
4627 | sprintf(linkname, "lower_%s", adj_dev->name); | 4695 | ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); |
4628 | ret = sysfs_create_link(&(dev->dev.kobj), | ||
4629 | &(adj_dev->dev.kobj), linkname); | ||
4630 | if (ret) | ||
4631 | goto free_adj; | ||
4632 | } else if (dev_list == &dev->adj_list.upper) { | ||
4633 | sprintf(linkname, "upper_%s", adj_dev->name); | ||
4634 | ret = sysfs_create_link(&(dev->dev.kobj), | ||
4635 | &(adj_dev->dev.kobj), linkname); | ||
4636 | if (ret) | 4696 | if (ret) |
4637 | goto free_adj; | 4697 | goto free_adj; |
4638 | } | 4698 | } |
@@ -4652,14 +4712,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, | |||
4652 | return 0; | 4712 | return 0; |
4653 | 4713 | ||
4654 | remove_symlinks: | 4714 | remove_symlinks: |
4655 | if (dev_list == &dev->adj_list.lower) { | 4715 | if (netdev_adjacent_is_neigh_list(dev, dev_list)) |
4656 | sprintf(linkname, "lower_%s", adj_dev->name); | 4716 | netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); |
4657 | sysfs_remove_link(&(dev->dev.kobj), linkname); | ||
4658 | } else if (dev_list == &dev->adj_list.upper) { | ||
4659 | sprintf(linkname, "upper_%s", adj_dev->name); | ||
4660 | sysfs_remove_link(&(dev->dev.kobj), linkname); | ||
4661 | } | ||
4662 | |||
4663 | free_adj: | 4717 | free_adj: |
4664 | kfree(adj); | 4718 | kfree(adj); |
4665 | dev_put(adj_dev); | 4719 | dev_put(adj_dev); |
@@ -4667,12 +4721,11 @@ free_adj: | |||
4667 | return ret; | 4721 | return ret; |
4668 | } | 4722 | } |
4669 | 4723 | ||
4670 | void __netdev_adjacent_dev_remove(struct net_device *dev, | 4724 | static void __netdev_adjacent_dev_remove(struct net_device *dev, |
4671 | struct net_device *adj_dev, | 4725 | struct net_device *adj_dev, |
4672 | struct list_head *dev_list) | 4726 | struct list_head *dev_list) |
4673 | { | 4727 | { |
4674 | struct netdev_adjacent *adj; | 4728 | struct netdev_adjacent *adj; |
4675 | char linkname[IFNAMSIZ+7]; | ||
4676 | 4729 | ||
4677 | adj = __netdev_find_adj(dev, adj_dev, dev_list); | 4730 | adj = __netdev_find_adj(dev, adj_dev, dev_list); |
4678 | 4731 | ||
@@ -4692,13 +4745,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, | |||
4692 | if (adj->master) | 4745 | if (adj->master) |
4693 | sysfs_remove_link(&(dev->dev.kobj), "master"); | 4746 | sysfs_remove_link(&(dev->dev.kobj), "master"); |
4694 | 4747 | ||
4695 | if (dev_list == &dev->adj_list.lower) { | 4748 | if (netdev_adjacent_is_neigh_list(dev, dev_list)) |
4696 | sprintf(linkname, "lower_%s", adj_dev->name); | 4749 | netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); |
4697 | sysfs_remove_link(&(dev->dev.kobj), linkname); | ||
4698 | } else if (dev_list == &dev->adj_list.upper) { | ||
4699 | sprintf(linkname, "upper_%s", adj_dev->name); | ||
4700 | sysfs_remove_link(&(dev->dev.kobj), linkname); | ||
4701 | } | ||
4702 | 4750 | ||
4703 | list_del_rcu(&adj->list); | 4751 | list_del_rcu(&adj->list); |
4704 | pr_debug("dev_put for %s, because link removed from %s to %s\n", | 4752 | pr_debug("dev_put for %s, because link removed from %s to %s\n", |
@@ -4707,11 +4755,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, | |||
4707 | kfree_rcu(adj, rcu); | 4755 | kfree_rcu(adj, rcu); |
4708 | } | 4756 | } |
4709 | 4757 | ||
4710 | int __netdev_adjacent_dev_link_lists(struct net_device *dev, | 4758 | static int __netdev_adjacent_dev_link_lists(struct net_device *dev, |
4711 | struct net_device *upper_dev, | 4759 | struct net_device *upper_dev, |
4712 | struct list_head *up_list, | 4760 | struct list_head *up_list, |
4713 | struct list_head *down_list, | 4761 | struct list_head *down_list, |
4714 | void *private, bool master) | 4762 | void *private, bool master) |
4715 | { | 4763 | { |
4716 | int ret; | 4764 | int ret; |
4717 | 4765 | ||
@@ -4730,8 +4778,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, | |||
4730 | return 0; | 4778 | return 0; |
4731 | } | 4779 | } |
4732 | 4780 | ||
4733 | int __netdev_adjacent_dev_link(struct net_device *dev, | 4781 | static int __netdev_adjacent_dev_link(struct net_device *dev, |
4734 | struct net_device *upper_dev) | 4782 | struct net_device *upper_dev) |
4735 | { | 4783 | { |
4736 | return __netdev_adjacent_dev_link_lists(dev, upper_dev, | 4784 | return __netdev_adjacent_dev_link_lists(dev, upper_dev, |
4737 | &dev->all_adj_list.upper, | 4785 | &dev->all_adj_list.upper, |
@@ -4739,26 +4787,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, | |||
4739 | NULL, false); | 4787 | NULL, false); |
4740 | } | 4788 | } |
4741 | 4789 | ||
4742 | void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, | 4790 | static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, |
4743 | struct net_device *upper_dev, | 4791 | struct net_device *upper_dev, |
4744 | struct list_head *up_list, | 4792 | struct list_head *up_list, |
4745 | struct list_head *down_list) | 4793 | struct list_head *down_list) |
4746 | { | 4794 | { |
4747 | __netdev_adjacent_dev_remove(dev, upper_dev, up_list); | 4795 | __netdev_adjacent_dev_remove(dev, upper_dev, up_list); |
4748 | __netdev_adjacent_dev_remove(upper_dev, dev, down_list); | 4796 | __netdev_adjacent_dev_remove(upper_dev, dev, down_list); |
4749 | } | 4797 | } |
4750 | 4798 | ||
4751 | void __netdev_adjacent_dev_unlink(struct net_device *dev, | 4799 | static void __netdev_adjacent_dev_unlink(struct net_device *dev, |
4752 | struct net_device *upper_dev) | 4800 | struct net_device *upper_dev) |
4753 | { | 4801 | { |
4754 | __netdev_adjacent_dev_unlink_lists(dev, upper_dev, | 4802 | __netdev_adjacent_dev_unlink_lists(dev, upper_dev, |
4755 | &dev->all_adj_list.upper, | 4803 | &dev->all_adj_list.upper, |
4756 | &upper_dev->all_adj_list.lower); | 4804 | &upper_dev->all_adj_list.lower); |
4757 | } | 4805 | } |
4758 | 4806 | ||
4759 | int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, | 4807 | static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, |
4760 | struct net_device *upper_dev, | 4808 | struct net_device *upper_dev, |
4761 | void *private, bool master) | 4809 | void *private, bool master) |
4762 | { | 4810 | { |
4763 | int ret = __netdev_adjacent_dev_link(dev, upper_dev); | 4811 | int ret = __netdev_adjacent_dev_link(dev, upper_dev); |
4764 | 4812 | ||
@@ -4777,8 +4825,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, | |||
4777 | return 0; | 4825 | return 0; |
4778 | } | 4826 | } |
4779 | 4827 | ||
4780 | void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, | 4828 | static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, |
4781 | struct net_device *upper_dev) | 4829 | struct net_device *upper_dev) |
4782 | { | 4830 | { |
4783 | __netdev_adjacent_dev_unlink(dev, upper_dev); | 4831 | __netdev_adjacent_dev_unlink(dev, upper_dev); |
4784 | __netdev_adjacent_dev_unlink_lists(dev, upper_dev, | 4832 | __netdev_adjacent_dev_unlink_lists(dev, upper_dev, |
@@ -4967,20 +5015,24 @@ void netdev_upper_dev_unlink(struct net_device *dev, | |||
4967 | } | 5015 | } |
4968 | EXPORT_SYMBOL(netdev_upper_dev_unlink); | 5016 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
4969 | 5017 | ||
4970 | void *netdev_lower_dev_get_private_rcu(struct net_device *dev, | 5018 | void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) |
4971 | struct net_device *lower_dev) | ||
4972 | { | 5019 | { |
4973 | struct netdev_adjacent *lower; | 5020 | struct netdev_adjacent *iter; |
4974 | 5021 | ||
4975 | if (!lower_dev) | 5022 | list_for_each_entry(iter, &dev->adj_list.upper, list) { |
4976 | return NULL; | 5023 | netdev_adjacent_sysfs_del(iter->dev, oldname, |
4977 | lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); | 5024 | &iter->dev->adj_list.lower); |
4978 | if (!lower) | 5025 | netdev_adjacent_sysfs_add(iter->dev, dev, |
4979 | return NULL; | 5026 | &iter->dev->adj_list.lower); |
5027 | } | ||
4980 | 5028 | ||
4981 | return lower->private; | 5029 | list_for_each_entry(iter, &dev->adj_list.lower, list) { |
5030 | netdev_adjacent_sysfs_del(iter->dev, oldname, | ||
5031 | &iter->dev->adj_list.upper); | ||
5032 | netdev_adjacent_sysfs_add(iter->dev, dev, | ||
5033 | &iter->dev->adj_list.upper); | ||
5034 | } | ||
4982 | } | 5035 | } |
4983 | EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); | ||
4984 | 5036 | ||
4985 | void *netdev_lower_dev_get_private(struct net_device *dev, | 5037 | void *netdev_lower_dev_get_private(struct net_device *dev, |
4986 | struct net_device *lower_dev) | 5038 | struct net_device *lower_dev) |
@@ -5314,6 +5366,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) | |||
5314 | } | 5366 | } |
5315 | EXPORT_SYMBOL(dev_change_flags); | 5367 | EXPORT_SYMBOL(dev_change_flags); |
5316 | 5368 | ||
5369 | static int __dev_set_mtu(struct net_device *dev, int new_mtu) | ||
5370 | { | ||
5371 | const struct net_device_ops *ops = dev->netdev_ops; | ||
5372 | |||
5373 | if (ops->ndo_change_mtu) | ||
5374 | return ops->ndo_change_mtu(dev, new_mtu); | ||
5375 | |||
5376 | dev->mtu = new_mtu; | ||
5377 | return 0; | ||
5378 | } | ||
5379 | |||
5317 | /** | 5380 | /** |
5318 | * dev_set_mtu - Change maximum transfer unit | 5381 | * dev_set_mtu - Change maximum transfer unit |
5319 | * @dev: device | 5382 | * @dev: device |
@@ -5323,8 +5386,7 @@ EXPORT_SYMBOL(dev_change_flags); | |||
5323 | */ | 5386 | */ |
5324 | int dev_set_mtu(struct net_device *dev, int new_mtu) | 5387 | int dev_set_mtu(struct net_device *dev, int new_mtu) |
5325 | { | 5388 | { |
5326 | const struct net_device_ops *ops = dev->netdev_ops; | 5389 | int err, orig_mtu; |
5327 | int err; | ||
5328 | 5390 | ||
5329 | if (new_mtu == dev->mtu) | 5391 | if (new_mtu == dev->mtu) |
5330 | return 0; | 5392 | return 0; |
@@ -5336,14 +5398,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
5336 | if (!netif_device_present(dev)) | 5398 | if (!netif_device_present(dev)) |
5337 | return -ENODEV; | 5399 | return -ENODEV; |
5338 | 5400 | ||
5339 | err = 0; | 5401 | err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev); |
5340 | if (ops->ndo_change_mtu) | 5402 | err = notifier_to_errno(err); |
5341 | err = ops->ndo_change_mtu(dev, new_mtu); | 5403 | if (err) |
5342 | else | 5404 | return err; |
5343 | dev->mtu = new_mtu; | ||
5344 | 5405 | ||
5345 | if (!err) | 5406 | orig_mtu = dev->mtu; |
5346 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); | 5407 | err = __dev_set_mtu(dev, new_mtu); |
5408 | |||
5409 | if (!err) { | ||
5410 | err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); | ||
5411 | err = notifier_to_errno(err); | ||
5412 | if (err) { | ||
5413 | /* setting mtu back and notifying everyone again, | ||
5414 | * so that they have a chance to revert changes. | ||
5415 | */ | ||
5416 | __dev_set_mtu(dev, orig_mtu); | ||
5417 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); | ||
5418 | } | ||
5419 | } | ||
5347 | return err; | 5420 | return err; |
5348 | } | 5421 | } |
5349 | EXPORT_SYMBOL(dev_set_mtu); | 5422 | EXPORT_SYMBOL(dev_set_mtu); |
@@ -5697,7 +5770,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
5697 | } | 5770 | } |
5698 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5771 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
5699 | 5772 | ||
5700 | #ifdef CONFIG_RPS | 5773 | #ifdef CONFIG_SYSFS |
5701 | static int netif_alloc_rx_queues(struct net_device *dev) | 5774 | static int netif_alloc_rx_queues(struct net_device *dev) |
5702 | { | 5775 | { |
5703 | unsigned int i, count = dev->num_rx_queues; | 5776 | unsigned int i, count = dev->num_rx_queues; |
@@ -5836,13 +5909,8 @@ int register_netdevice(struct net_device *dev) | |||
5836 | dev->features |= NETIF_F_SOFT_FEATURES; | 5909 | dev->features |= NETIF_F_SOFT_FEATURES; |
5837 | dev->wanted_features = dev->features & dev->hw_features; | 5910 | dev->wanted_features = dev->features & dev->hw_features; |
5838 | 5911 | ||
5839 | /* Turn on no cache copy if HW is doing checksum */ | ||
5840 | if (!(dev->flags & IFF_LOOPBACK)) { | 5912 | if (!(dev->flags & IFF_LOOPBACK)) { |
5841 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | 5913 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
5842 | if (dev->features & NETIF_F_ALL_CSUM) { | ||
5843 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | ||
5844 | dev->features |= NETIF_F_NOCACHE_COPY; | ||
5845 | } | ||
5846 | } | 5914 | } |
5847 | 5915 | ||
5848 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. | 5916 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. |
@@ -6229,7 +6297,7 @@ void netdev_freemem(struct net_device *dev) | |||
6229 | * @rxqs: the number of RX subqueues to allocate | 6297 | * @rxqs: the number of RX subqueues to allocate |
6230 | * | 6298 | * |
6231 | * Allocates a struct net_device with private data area for driver use | 6299 | * Allocates a struct net_device with private data area for driver use |
6232 | * and performs basic initialization. Also allocates subquue structs | 6300 | * and performs basic initialization. Also allocates subqueue structs |
6233 | * for each queue on the device. | 6301 | * for each queue on the device. |
6234 | */ | 6302 | */ |
6235 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | 6303 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, |
@@ -6247,7 +6315,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6247 | return NULL; | 6315 | return NULL; |
6248 | } | 6316 | } |
6249 | 6317 | ||
6250 | #ifdef CONFIG_RPS | 6318 | #ifdef CONFIG_SYSFS |
6251 | if (rxqs < 1) { | 6319 | if (rxqs < 1) { |
6252 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); | 6320 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); |
6253 | return NULL; | 6321 | return NULL; |
@@ -6303,7 +6371,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6303 | if (netif_alloc_netdev_queues(dev)) | 6371 | if (netif_alloc_netdev_queues(dev)) |
6304 | goto free_all; | 6372 | goto free_all; |
6305 | 6373 | ||
6306 | #ifdef CONFIG_RPS | 6374 | #ifdef CONFIG_SYSFS |
6307 | dev->num_rx_queues = rxqs; | 6375 | dev->num_rx_queues = rxqs; |
6308 | dev->real_num_rx_queues = rxqs; | 6376 | dev->real_num_rx_queues = rxqs; |
6309 | if (netif_alloc_rx_queues(dev)) | 6377 | if (netif_alloc_rx_queues(dev)) |
@@ -6323,7 +6391,7 @@ free_all: | |||
6323 | free_pcpu: | 6391 | free_pcpu: |
6324 | free_percpu(dev->pcpu_refcnt); | 6392 | free_percpu(dev->pcpu_refcnt); |
6325 | netif_free_tx_queues(dev); | 6393 | netif_free_tx_queues(dev); |
6326 | #ifdef CONFIG_RPS | 6394 | #ifdef CONFIG_SYSFS |
6327 | kfree(dev->_rx); | 6395 | kfree(dev->_rx); |
6328 | #endif | 6396 | #endif |
6329 | 6397 | ||
@@ -6348,7 +6416,7 @@ void free_netdev(struct net_device *dev) | |||
6348 | release_net(dev_net(dev)); | 6416 | release_net(dev_net(dev)); |
6349 | 6417 | ||
6350 | netif_free_tx_queues(dev); | 6418 | netif_free_tx_queues(dev); |
6351 | #ifdef CONFIG_RPS | 6419 | #ifdef CONFIG_SYSFS |
6352 | kfree(dev->_rx); | 6420 | kfree(dev->_rx); |
6353 | #endif | 6421 | #endif |
6354 | 6422 | ||
@@ -6618,11 +6686,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
6618 | 6686 | ||
6619 | /* Process offline CPU's input_pkt_queue */ | 6687 | /* Process offline CPU's input_pkt_queue */ |
6620 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | 6688 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { |
6621 | netif_rx(skb); | 6689 | netif_rx_internal(skb); |
6622 | input_queue_head_incr(oldsd); | 6690 | input_queue_head_incr(oldsd); |
6623 | } | 6691 | } |
6624 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | 6692 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
6625 | netif_rx(skb); | 6693 | netif_rx_internal(skb); |
6626 | input_queue_head_incr(oldsd); | 6694 | input_queue_head_incr(oldsd); |
6627 | } | 6695 | } |
6628 | 6696 | ||
@@ -6935,28 +7003,18 @@ static int __init net_dev_init(void) | |||
6935 | for_each_possible_cpu(i) { | 7003 | for_each_possible_cpu(i) { |
6936 | struct softnet_data *sd = &per_cpu(softnet_data, i); | 7004 | struct softnet_data *sd = &per_cpu(softnet_data, i); |
6937 | 7005 | ||
6938 | memset(sd, 0, sizeof(*sd)); | ||
6939 | skb_queue_head_init(&sd->input_pkt_queue); | 7006 | skb_queue_head_init(&sd->input_pkt_queue); |
6940 | skb_queue_head_init(&sd->process_queue); | 7007 | skb_queue_head_init(&sd->process_queue); |
6941 | sd->completion_queue = NULL; | ||
6942 | INIT_LIST_HEAD(&sd->poll_list); | 7008 | INIT_LIST_HEAD(&sd->poll_list); |
6943 | sd->output_queue = NULL; | ||
6944 | sd->output_queue_tailp = &sd->output_queue; | 7009 | sd->output_queue_tailp = &sd->output_queue; |
6945 | #ifdef CONFIG_RPS | 7010 | #ifdef CONFIG_RPS |
6946 | sd->csd.func = rps_trigger_softirq; | 7011 | sd->csd.func = rps_trigger_softirq; |
6947 | sd->csd.info = sd; | 7012 | sd->csd.info = sd; |
6948 | sd->csd.flags = 0; | ||
6949 | sd->cpu = i; | 7013 | sd->cpu = i; |
6950 | #endif | 7014 | #endif |
6951 | 7015 | ||
6952 | sd->backlog.poll = process_backlog; | 7016 | sd->backlog.poll = process_backlog; |
6953 | sd->backlog.weight = weight_p; | 7017 | sd->backlog.weight = weight_p; |
6954 | sd->backlog.gro_list = NULL; | ||
6955 | sd->backlog.gro_count = 0; | ||
6956 | |||
6957 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
6958 | sd->flow_limit = NULL; | ||
6959 | #endif | ||
6960 | } | 7018 | } |
6961 | 7019 | ||
6962 | dev_boot_phase = 0; | 7020 | dev_boot_phase = 0; |
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index ec40a849fc42..329d5794e7dc 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c | |||
@@ -38,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, | |||
38 | ha->type = addr_type; | 38 | ha->type = addr_type; |
39 | ha->refcount = 1; | 39 | ha->refcount = 1; |
40 | ha->global_use = global; | 40 | ha->global_use = global; |
41 | ha->synced = sync; | 41 | ha->synced = sync ? 1 : 0; |
42 | ha->sync_cnt = 0; | 42 | ha->sync_cnt = 0; |
43 | list_add_tail_rcu(&ha->list, &list->list); | 43 | list_add_tail_rcu(&ha->list, &list->list); |
44 | list->count++; | 44 | list->count++; |
@@ -48,7 +48,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, | |||
48 | 48 | ||
49 | static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, | 49 | static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, |
50 | const unsigned char *addr, int addr_len, | 50 | const unsigned char *addr, int addr_len, |
51 | unsigned char addr_type, bool global, bool sync) | 51 | unsigned char addr_type, bool global, bool sync, |
52 | int sync_count) | ||
52 | { | 53 | { |
53 | struct netdev_hw_addr *ha; | 54 | struct netdev_hw_addr *ha; |
54 | 55 | ||
@@ -66,10 +67,10 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, | |||
66 | ha->global_use = true; | 67 | ha->global_use = true; |
67 | } | 68 | } |
68 | if (sync) { | 69 | if (sync) { |
69 | if (ha->synced) | 70 | if (ha->synced && sync_count) |
70 | return -EEXIST; | 71 | return -EEXIST; |
71 | else | 72 | else |
72 | ha->synced = true; | 73 | ha->synced++; |
73 | } | 74 | } |
74 | ha->refcount++; | 75 | ha->refcount++; |
75 | return 0; | 76 | return 0; |
@@ -84,7 +85,8 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, | |||
84 | const unsigned char *addr, int addr_len, | 85 | const unsigned char *addr, int addr_len, |
85 | unsigned char addr_type) | 86 | unsigned char addr_type) |
86 | { | 87 | { |
87 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); | 88 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, |
89 | 0); | ||
88 | } | 90 | } |
89 | 91 | ||
90 | static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, | 92 | static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, |
@@ -101,7 +103,7 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, | |||
101 | ha->global_use = false; | 103 | ha->global_use = false; |
102 | 104 | ||
103 | if (sync) | 105 | if (sync) |
104 | ha->synced = false; | 106 | ha->synced--; |
105 | 107 | ||
106 | if (--ha->refcount) | 108 | if (--ha->refcount) |
107 | return 0; | 109 | return 0; |
@@ -139,7 +141,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, | |||
139 | int err; | 141 | int err; |
140 | 142 | ||
141 | err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, | 143 | err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, |
142 | false, true); | 144 | false, true, ha->sync_cnt); |
143 | if (err && err != -EEXIST) | 145 | if (err && err != -EEXIST) |
144 | return err; | 146 | return err; |
145 | 147 | ||
@@ -186,47 +188,6 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, | |||
186 | return err; | 188 | return err; |
187 | } | 189 | } |
188 | 190 | ||
189 | int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
190 | struct netdev_hw_addr_list *from_list, | ||
191 | int addr_len, unsigned char addr_type) | ||
192 | { | ||
193 | int err; | ||
194 | struct netdev_hw_addr *ha, *ha2; | ||
195 | unsigned char type; | ||
196 | |||
197 | list_for_each_entry(ha, &from_list->list, list) { | ||
198 | type = addr_type ? addr_type : ha->type; | ||
199 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
200 | if (err) | ||
201 | goto unroll; | ||
202 | } | ||
203 | return 0; | ||
204 | |||
205 | unroll: | ||
206 | list_for_each_entry(ha2, &from_list->list, list) { | ||
207 | if (ha2 == ha) | ||
208 | break; | ||
209 | type = addr_type ? addr_type : ha2->type; | ||
210 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
211 | } | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL(__hw_addr_add_multiple); | ||
215 | |||
216 | void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
217 | struct netdev_hw_addr_list *from_list, | ||
218 | int addr_len, unsigned char addr_type) | ||
219 | { | ||
220 | struct netdev_hw_addr *ha; | ||
221 | unsigned char type; | ||
222 | |||
223 | list_for_each_entry(ha, &from_list->list, list) { | ||
224 | type = addr_type ? addr_type : ha->type; | ||
225 | __hw_addr_del(to_list, ha->addr, addr_len, type); | ||
226 | } | ||
227 | } | ||
228 | EXPORT_SYMBOL(__hw_addr_del_multiple); | ||
229 | |||
230 | /* This function only works where there is a strict 1-1 relationship | 191 | /* This function only works where there is a strict 1-1 relationship |
231 | * between source and destionation of they synch. If you ever need to | 192 | * between source and destionation of they synch. If you ever need to |
232 | * sync addresses to more then 1 destination, you need to use | 193 | * sync addresses to more then 1 destination, you need to use |
@@ -264,7 +225,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | |||
264 | } | 225 | } |
265 | EXPORT_SYMBOL(__hw_addr_unsync); | 226 | EXPORT_SYMBOL(__hw_addr_unsync); |
266 | 227 | ||
267 | void __hw_addr_flush(struct netdev_hw_addr_list *list) | 228 | static void __hw_addr_flush(struct netdev_hw_addr_list *list) |
268 | { | 229 | { |
269 | struct netdev_hw_addr *ha, *tmp; | 230 | struct netdev_hw_addr *ha, *tmp; |
270 | 231 | ||
@@ -274,7 +235,6 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list) | |||
274 | } | 235 | } |
275 | list->count = 0; | 236 | list->count = 0; |
276 | } | 237 | } |
277 | EXPORT_SYMBOL(__hw_addr_flush); | ||
278 | 238 | ||
279 | void __hw_addr_init(struct netdev_hw_addr_list *list) | 239 | void __hw_addr_init(struct netdev_hw_addr_list *list) |
280 | { | 240 | { |
@@ -400,59 +360,6 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr, | |||
400 | } | 360 | } |
401 | EXPORT_SYMBOL(dev_addr_del); | 361 | EXPORT_SYMBOL(dev_addr_del); |
402 | 362 | ||
403 | /** | ||
404 | * dev_addr_add_multiple - Add device addresses from another device | ||
405 | * @to_dev: device to which addresses will be added | ||
406 | * @from_dev: device from which addresses will be added | ||
407 | * @addr_type: address type - 0 means type will be used from from_dev | ||
408 | * | ||
409 | * Add device addresses of the one device to another. | ||
410 | ** | ||
411 | * The caller must hold the rtnl_mutex. | ||
412 | */ | ||
413 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
414 | struct net_device *from_dev, | ||
415 | unsigned char addr_type) | ||
416 | { | ||
417 | int err; | ||
418 | |||
419 | ASSERT_RTNL(); | ||
420 | |||
421 | if (from_dev->addr_len != to_dev->addr_len) | ||
422 | return -EINVAL; | ||
423 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
424 | to_dev->addr_len, addr_type); | ||
425 | if (!err) | ||
426 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
427 | return err; | ||
428 | } | ||
429 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
430 | |||
431 | /** | ||
432 | * dev_addr_del_multiple - Delete device addresses by another device | ||
433 | * @to_dev: device where the addresses will be deleted | ||
434 | * @from_dev: device supplying the addresses to be deleted | ||
435 | * @addr_type: address type - 0 means type will be used from from_dev | ||
436 | * | ||
437 | * Deletes addresses in to device by the list of addresses in from device. | ||
438 | * | ||
439 | * The caller must hold the rtnl_mutex. | ||
440 | */ | ||
441 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
442 | struct net_device *from_dev, | ||
443 | unsigned char addr_type) | ||
444 | { | ||
445 | ASSERT_RTNL(); | ||
446 | |||
447 | if (from_dev->addr_len != to_dev->addr_len) | ||
448 | return -EINVAL; | ||
449 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
450 | to_dev->addr_len, addr_type); | ||
451 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
452 | return 0; | ||
453 | } | ||
454 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
455 | |||
456 | /* | 363 | /* |
457 | * Unicast list handling functions | 364 | * Unicast list handling functions |
458 | */ | 365 | */ |
@@ -676,7 +583,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, | |||
676 | 583 | ||
677 | netif_addr_lock_bh(dev); | 584 | netif_addr_lock_bh(dev); |
678 | err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, | 585 | err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, |
679 | NETDEV_HW_ADDR_T_MULTICAST, global, false); | 586 | NETDEV_HW_ADDR_T_MULTICAST, global, false, 0); |
680 | if (!err) | 587 | if (!err) |
681 | __dev_set_rx_mode(dev); | 588 | __dev_set_rx_mode(dev); |
682 | netif_addr_unlock_bh(dev); | 589 | netif_addr_unlock_bh(dev); |
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5b7d0e1d0664..cf999e09bcd2 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c | |||
@@ -327,6 +327,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
327 | cmd == SIOCBRADDIF || | 327 | cmd == SIOCBRADDIF || |
328 | cmd == SIOCBRDELIF || | 328 | cmd == SIOCBRDELIF || |
329 | cmd == SIOCSHWTSTAMP || | 329 | cmd == SIOCSHWTSTAMP || |
330 | cmd == SIOCGHWTSTAMP || | ||
330 | cmd == SIOCWANDEV) { | 331 | cmd == SIOCWANDEV) { |
331 | err = -EOPNOTSUPP; | 332 | err = -EOPNOTSUPP; |
332 | if (ops->ndo_do_ioctl) { | 333 | if (ops->ndo_do_ioctl) { |
@@ -546,6 +547,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
546 | */ | 547 | */ |
547 | default: | 548 | default: |
548 | if (cmd == SIOCWANDEV || | 549 | if (cmd == SIOCWANDEV || |
550 | cmd == SIOCGHWTSTAMP || | ||
549 | (cmd >= SIOCDEVPRIVATE && | 551 | (cmd >= SIOCDEVPRIVATE && |
550 | cmd <= SIOCDEVPRIVATE + 15)) { | 552 | cmd <= SIOCDEVPRIVATE + 15)) { |
551 | dev_load(net, ifr.ifr_name); | 553 | dev_load(net, ifr.ifr_name); |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index f409e0bd35c0..185c341fafbd 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
745 | attach_rules(&ops->rules_list, dev); | 745 | attach_rules(&ops->rules_list, dev); |
746 | break; | 746 | break; |
747 | 747 | ||
748 | case NETDEV_CHANGENAME: | ||
749 | list_for_each_entry(ops, &net->rules_ops, list) { | ||
750 | detach_rules(&ops->rules_list, dev); | ||
751 | attach_rules(&ops->rules_list, dev); | ||
752 | } | ||
753 | break; | ||
754 | |||
748 | case NETDEV_UNREGISTER: | 755 | case NETDEV_UNREGISTER: |
749 | list_for_each_entry(ops, &net->rules_ops, list) | 756 | list_for_each_entry(ops, &net->rules_ops, list) |
750 | detach_rules(&ops->rules_list, dev); | 757 | detach_rules(&ops->rules_list, dev); |
diff --git a/net/core/filter.c b/net/core/filter.c index 01b780856db2..ad30d626a5bd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/unaligned.h> | 37 | #include <asm/unaligned.h> |
38 | #include <linux/filter.h> | 38 | #include <linux/filter.h> |
39 | #include <linux/reciprocal_div.h> | ||
40 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
41 | #include <linux/seccomp.h> | 40 | #include <linux/seccomp.h> |
42 | #include <linux/if_vlan.h> | 41 | #include <linux/if_vlan.h> |
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, | |||
166 | A /= X; | 165 | A /= X; |
167 | continue; | 166 | continue; |
168 | case BPF_S_ALU_DIV_K: | 167 | case BPF_S_ALU_DIV_K: |
169 | A = reciprocal_divide(A, K); | 168 | A /= K; |
170 | continue; | 169 | continue; |
171 | case BPF_S_ALU_MOD_X: | 170 | case BPF_S_ALU_MOD_X: |
172 | if (X == 0) | 171 | if (X == 0) |
@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) | |||
553 | /* Some instructions need special checks */ | 552 | /* Some instructions need special checks */ |
554 | switch (code) { | 553 | switch (code) { |
555 | case BPF_S_ALU_DIV_K: | 554 | case BPF_S_ALU_DIV_K: |
556 | /* check for division by zero */ | ||
557 | if (ftest->k == 0) | ||
558 | return -EINVAL; | ||
559 | ftest->k = reciprocal_value(ftest->k); | ||
560 | break; | ||
561 | case BPF_S_ALU_MOD_K: | 555 | case BPF_S_ALU_MOD_K: |
562 | /* check for division by zero */ | 556 | /* check for division by zero */ |
563 | if (ftest->k == 0) | 557 | if (ftest->k == 0) |
@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) | |||
853 | to->code = decodes[code]; | 847 | to->code = decodes[code]; |
854 | to->jt = filt->jt; | 848 | to->jt = filt->jt; |
855 | to->jf = filt->jf; | 849 | to->jf = filt->jf; |
856 | 850 | to->k = filt->k; | |
857 | if (code == BPF_S_ALU_DIV_K) { | ||
858 | /* | ||
859 | * When loaded this rule user gave us X, which was | ||
860 | * translated into R = r(X). Now we calculate the | ||
861 | * RR = r(R) and report it back. If next time this | ||
862 | * value is loaded and RRR = r(RR) is calculated | ||
863 | * then the R == RRR will be true. | ||
864 | * | ||
865 | * One exception. X == 1 translates into R == 0 and | ||
866 | * we can't calculate RR out of it with r(). | ||
867 | */ | ||
868 | |||
869 | if (filt->k == 0) | ||
870 | to->k = 1; | ||
871 | else | ||
872 | to->k = reciprocal_value(filt->k); | ||
873 | |||
874 | BUG_ON(reciprocal_value(to->k) != filt->k); | ||
875 | } else | ||
876 | to->k = filt->k; | ||
877 | } | 851 | } |
878 | 852 | ||
879 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) | 853 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2fc5beaf5783..e29e810663d7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses | 205 | * __skb_get_hash: calculate a flow hash based on src/dst addresses |
206 | * and src/dst port numbers. Sets rxhash in skb to non-zero hash value | 206 | * and src/dst port numbers. Sets rxhash in skb to non-zero hash value |
207 | * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb | 207 | * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb |
208 | * if hash is a canonical 4-tuple hash over transport ports. | 208 | * if hash is a canonical 4-tuple hash over transport ports. |
209 | */ | 209 | */ |
210 | void __skb_get_rxhash(struct sk_buff *skb) | 210 | void __skb_get_hash(struct sk_buff *skb) |
211 | { | 211 | { |
212 | struct flow_keys keys; | 212 | struct flow_keys keys; |
213 | u32 hash; | 213 | u32 hash; |
@@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb) | |||
234 | 234 | ||
235 | skb->rxhash = hash; | 235 | skb->rxhash = hash; |
236 | } | 236 | } |
237 | EXPORT_SYMBOL(__skb_get_rxhash); | 237 | EXPORT_SYMBOL(__skb_get_hash); |
238 | 238 | ||
239 | /* | 239 | /* |
240 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | 240 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number |
@@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb) | |||
323 | return poff; | 323 | return poff; |
324 | } | 324 | } |
325 | 325 | ||
326 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
327 | { | ||
328 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
329 | net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", | ||
330 | dev->name, queue_index, | ||
331 | dev->real_num_tx_queues); | ||
332 | return 0; | ||
333 | } | ||
334 | return queue_index; | ||
335 | } | ||
336 | |||
337 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | 326 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) |
338 | { | 327 | { |
339 | #ifdef CONFIG_XPS | 328 | #ifdef CONFIG_XPS |
@@ -372,7 +361,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | |||
372 | #endif | 361 | #endif |
373 | } | 362 | } |
374 | 363 | ||
375 | u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) | 364 | static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) |
376 | { | 365 | { |
377 | struct sock *sk = skb->sk; | 366 | struct sock *sk = skb->sk; |
378 | int queue_index = sk_tx_queue_get(sk); | 367 | int queue_index = sk_tx_queue_get(sk); |
@@ -392,7 +381,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) | |||
392 | 381 | ||
393 | return queue_index; | 382 | return queue_index; |
394 | } | 383 | } |
395 | EXPORT_SYMBOL(__netdev_pick_tx); | ||
396 | 384 | ||
397 | struct netdev_queue *netdev_pick_tx(struct net_device *dev, | 385 | struct netdev_queue *netdev_pick_tx(struct net_device *dev, |
398 | struct sk_buff *skb, | 386 | struct sk_buff *skb, |
@@ -403,13 +391,13 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, | |||
403 | if (dev->real_num_tx_queues != 1) { | 391 | if (dev->real_num_tx_queues != 1) { |
404 | const struct net_device_ops *ops = dev->netdev_ops; | 392 | const struct net_device_ops *ops = dev->netdev_ops; |
405 | if (ops->ndo_select_queue) | 393 | if (ops->ndo_select_queue) |
406 | queue_index = ops->ndo_select_queue(dev, skb, | 394 | queue_index = ops->ndo_select_queue(dev, skb, accel_priv, |
407 | accel_priv); | 395 | __netdev_pick_tx); |
408 | else | 396 | else |
409 | queue_index = __netdev_pick_tx(dev, skb); | 397 | queue_index = __netdev_pick_tx(dev, skb); |
410 | 398 | ||
411 | if (!accel_priv) | 399 | if (!accel_priv) |
412 | queue_index = dev_cap_txqueue(dev, queue_index); | 400 | queue_index = netdev_cap_txqueue(dev, queue_index); |
413 | } | 401 | } |
414 | 402 | ||
415 | skb_set_queue_mapping(skb, queue_index); | 403 | skb_set_queue_mapping(skb, queue_index); |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 932c6d7cf666..e16129019c66 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <linux/random.h> | 38 | #include <linux/random.h> |
39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/log2.h> | 40 | #include <linux/log2.h> |
41 | #include <linux/inetdevice.h> | ||
42 | #include <net/addrconf.h> | ||
41 | 43 | ||
42 | #define DEBUG | 44 | #define DEBUG |
43 | #define NEIGH_DEBUG 1 | 45 | #define NEIGH_DEBUG 1 |
@@ -115,7 +117,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) | |||
115 | 117 | ||
116 | unsigned long neigh_rand_reach_time(unsigned long base) | 118 | unsigned long neigh_rand_reach_time(unsigned long base) |
117 | { | 119 | { |
118 | return base ? (net_random() % base) + (base >> 1) : 0; | 120 | return base ? (prandom_u32() % base) + (base >> 1) : 0; |
119 | } | 121 | } |
120 | EXPORT_SYMBOL(neigh_rand_reach_time); | 122 | EXPORT_SYMBOL(neigh_rand_reach_time); |
121 | 123 | ||
@@ -497,7 +499,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, | |||
497 | goto out_neigh_release; | 499 | goto out_neigh_release; |
498 | } | 500 | } |
499 | 501 | ||
500 | n->confirmed = jiffies - (n->parms->base_reachable_time << 1); | 502 | n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); |
501 | 503 | ||
502 | write_lock_bh(&tbl->lock); | 504 | write_lock_bh(&tbl->lock); |
503 | nht = rcu_dereference_protected(tbl->nht, | 505 | nht = rcu_dereference_protected(tbl->nht, |
@@ -764,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work) | |||
764 | nht = rcu_dereference_protected(tbl->nht, | 766 | nht = rcu_dereference_protected(tbl->nht, |
765 | lockdep_is_held(&tbl->lock)); | 767 | lockdep_is_held(&tbl->lock)); |
766 | 768 | ||
767 | if (atomic_read(&tbl->entries) < tbl->gc_thresh1) | ||
768 | goto out; | ||
769 | |||
770 | /* | 769 | /* |
771 | * periodically recompute ReachableTime from random function | 770 | * periodically recompute ReachableTime from random function |
772 | */ | 771 | */ |
@@ -776,9 +775,12 @@ static void neigh_periodic_work(struct work_struct *work) | |||
776 | tbl->last_rand = jiffies; | 775 | tbl->last_rand = jiffies; |
777 | for (p = &tbl->parms; p; p = p->next) | 776 | for (p = &tbl->parms; p; p = p->next) |
778 | p->reachable_time = | 777 | p->reachable_time = |
779 | neigh_rand_reach_time(p->base_reachable_time); | 778 | neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); |
780 | } | 779 | } |
781 | 780 | ||
781 | if (atomic_read(&tbl->entries) < tbl->gc_thresh1) | ||
782 | goto out; | ||
783 | |||
782 | for (i = 0 ; i < (1 << nht->hash_shift); i++) { | 784 | for (i = 0 ; i < (1 << nht->hash_shift); i++) { |
783 | np = &nht->hash_buckets[i]; | 785 | np = &nht->hash_buckets[i]; |
784 | 786 | ||
@@ -799,7 +801,7 @@ static void neigh_periodic_work(struct work_struct *work) | |||
799 | 801 | ||
800 | if (atomic_read(&n->refcnt) == 1 && | 802 | if (atomic_read(&n->refcnt) == 1 && |
801 | (state == NUD_FAILED || | 803 | (state == NUD_FAILED || |
802 | time_after(jiffies, n->used + n->parms->gc_staletime))) { | 804 | time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { |
803 | *np = n->next; | 805 | *np = n->next; |
804 | n->dead = 1; | 806 | n->dead = 1; |
805 | write_unlock(&n->lock); | 807 | write_unlock(&n->lock); |
@@ -822,12 +824,12 @@ next_elt: | |||
822 | lockdep_is_held(&tbl->lock)); | 824 | lockdep_is_held(&tbl->lock)); |
823 | } | 825 | } |
824 | out: | 826 | out: |
825 | /* Cycle through all hash buckets every base_reachable_time/2 ticks. | 827 | /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. |
826 | * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 | 828 | * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 |
827 | * base_reachable_time. | 829 | * BASE_REACHABLE_TIME. |
828 | */ | 830 | */ |
829 | schedule_delayed_work(&tbl->gc_work, | 831 | queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, |
830 | tbl->parms.base_reachable_time >> 1); | 832 | NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); |
831 | write_unlock_bh(&tbl->lock); | 833 | write_unlock_bh(&tbl->lock); |
832 | } | 834 | } |
833 | 835 | ||
@@ -835,8 +837,9 @@ static __inline__ int neigh_max_probes(struct neighbour *n) | |||
835 | { | 837 | { |
836 | struct neigh_parms *p = n->parms; | 838 | struct neigh_parms *p = n->parms; |
837 | return (n->nud_state & NUD_PROBE) ? | 839 | return (n->nud_state & NUD_PROBE) ? |
838 | p->ucast_probes : | 840 | NEIGH_VAR(p, UCAST_PROBES) : |
839 | p->ucast_probes + p->app_probes + p->mcast_probes; | 841 | NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + |
842 | NEIGH_VAR(p, MCAST_PROBES); | ||
840 | } | 843 | } |
841 | 844 | ||
842 | static void neigh_invalidate(struct neighbour *neigh) | 845 | static void neigh_invalidate(struct neighbour *neigh) |
@@ -901,12 +904,13 @@ static void neigh_timer_handler(unsigned long arg) | |||
901 | neigh_dbg(2, "neigh %p is still alive\n", neigh); | 904 | neigh_dbg(2, "neigh %p is still alive\n", neigh); |
902 | next = neigh->confirmed + neigh->parms->reachable_time; | 905 | next = neigh->confirmed + neigh->parms->reachable_time; |
903 | } else if (time_before_eq(now, | 906 | } else if (time_before_eq(now, |
904 | neigh->used + neigh->parms->delay_probe_time)) { | 907 | neigh->used + |
908 | NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { | ||
905 | neigh_dbg(2, "neigh %p is delayed\n", neigh); | 909 | neigh_dbg(2, "neigh %p is delayed\n", neigh); |
906 | neigh->nud_state = NUD_DELAY; | 910 | neigh->nud_state = NUD_DELAY; |
907 | neigh->updated = jiffies; | 911 | neigh->updated = jiffies; |
908 | neigh_suspect(neigh); | 912 | neigh_suspect(neigh); |
909 | next = now + neigh->parms->delay_probe_time; | 913 | next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); |
910 | } else { | 914 | } else { |
911 | neigh_dbg(2, "neigh %p is suspected\n", neigh); | 915 | neigh_dbg(2, "neigh %p is suspected\n", neigh); |
912 | neigh->nud_state = NUD_STALE; | 916 | neigh->nud_state = NUD_STALE; |
@@ -916,7 +920,8 @@ static void neigh_timer_handler(unsigned long arg) | |||
916 | } | 920 | } |
917 | } else if (state & NUD_DELAY) { | 921 | } else if (state & NUD_DELAY) { |
918 | if (time_before_eq(now, | 922 | if (time_before_eq(now, |
919 | neigh->confirmed + neigh->parms->delay_probe_time)) { | 923 | neigh->confirmed + |
924 | NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { | ||
920 | neigh_dbg(2, "neigh %p is now reachable\n", neigh); | 925 | neigh_dbg(2, "neigh %p is now reachable\n", neigh); |
921 | neigh->nud_state = NUD_REACHABLE; | 926 | neigh->nud_state = NUD_REACHABLE; |
922 | neigh->updated = jiffies; | 927 | neigh->updated = jiffies; |
@@ -928,11 +933,11 @@ static void neigh_timer_handler(unsigned long arg) | |||
928 | neigh->nud_state = NUD_PROBE; | 933 | neigh->nud_state = NUD_PROBE; |
929 | neigh->updated = jiffies; | 934 | neigh->updated = jiffies; |
930 | atomic_set(&neigh->probes, 0); | 935 | atomic_set(&neigh->probes, 0); |
931 | next = now + neigh->parms->retrans_time; | 936 | next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); |
932 | } | 937 | } |
933 | } else { | 938 | } else { |
934 | /* NUD_PROBE|NUD_INCOMPLETE */ | 939 | /* NUD_PROBE|NUD_INCOMPLETE */ |
935 | next = now + neigh->parms->retrans_time; | 940 | next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); |
936 | } | 941 | } |
937 | 942 | ||
938 | if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && | 943 | if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && |
@@ -973,13 +978,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | |||
973 | goto out_unlock_bh; | 978 | goto out_unlock_bh; |
974 | 979 | ||
975 | if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { | 980 | if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { |
976 | if (neigh->parms->mcast_probes + neigh->parms->app_probes) { | 981 | if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + |
982 | NEIGH_VAR(neigh->parms, APP_PROBES)) { | ||
977 | unsigned long next, now = jiffies; | 983 | unsigned long next, now = jiffies; |
978 | 984 | ||
979 | atomic_set(&neigh->probes, neigh->parms->ucast_probes); | 985 | atomic_set(&neigh->probes, |
986 | NEIGH_VAR(neigh->parms, UCAST_PROBES)); | ||
980 | neigh->nud_state = NUD_INCOMPLETE; | 987 | neigh->nud_state = NUD_INCOMPLETE; |
981 | neigh->updated = now; | 988 | neigh->updated = now; |
982 | next = now + max(neigh->parms->retrans_time, HZ/2); | 989 | next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), |
990 | HZ/2); | ||
983 | neigh_add_timer(neigh, next); | 991 | neigh_add_timer(neigh, next); |
984 | immediate_probe = true; | 992 | immediate_probe = true; |
985 | } else { | 993 | } else { |
@@ -994,14 +1002,14 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | |||
994 | neigh_dbg(2, "neigh %p is delayed\n", neigh); | 1002 | neigh_dbg(2, "neigh %p is delayed\n", neigh); |
995 | neigh->nud_state = NUD_DELAY; | 1003 | neigh->nud_state = NUD_DELAY; |
996 | neigh->updated = jiffies; | 1004 | neigh->updated = jiffies; |
997 | neigh_add_timer(neigh, | 1005 | neigh_add_timer(neigh, jiffies + |
998 | jiffies + neigh->parms->delay_probe_time); | 1006 | NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); |
999 | } | 1007 | } |
1000 | 1008 | ||
1001 | if (neigh->nud_state == NUD_INCOMPLETE) { | 1009 | if (neigh->nud_state == NUD_INCOMPLETE) { |
1002 | if (skb) { | 1010 | if (skb) { |
1003 | while (neigh->arp_queue_len_bytes + skb->truesize > | 1011 | while (neigh->arp_queue_len_bytes + skb->truesize > |
1004 | neigh->parms->queue_len_bytes) { | 1012 | NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { |
1005 | struct sk_buff *buff; | 1013 | struct sk_buff *buff; |
1006 | 1014 | ||
1007 | buff = __skb_dequeue(&neigh->arp_queue); | 1015 | buff = __skb_dequeue(&neigh->arp_queue); |
@@ -1171,7 +1179,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1171 | neigh_update_hhs(neigh); | 1179 | neigh_update_hhs(neigh); |
1172 | if (!(new & NUD_CONNECTED)) | 1180 | if (!(new & NUD_CONNECTED)) |
1173 | neigh->confirmed = jiffies - | 1181 | neigh->confirmed = jiffies - |
1174 | (neigh->parms->base_reachable_time << 1); | 1182 | (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); |
1175 | notify = 1; | 1183 | notify = 1; |
1176 | } | 1184 | } |
1177 | if (new == old) | 1185 | if (new == old) |
@@ -1231,6 +1239,21 @@ out: | |||
1231 | } | 1239 | } |
1232 | EXPORT_SYMBOL(neigh_update); | 1240 | EXPORT_SYMBOL(neigh_update); |
1233 | 1241 | ||
1242 | /* Update the neigh to listen temporarily for probe responses, even if it is | ||
1243 | * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. | ||
1244 | */ | ||
1245 | void __neigh_set_probe_once(struct neighbour *neigh) | ||
1246 | { | ||
1247 | neigh->updated = jiffies; | ||
1248 | if (!(neigh->nud_state & NUD_FAILED)) | ||
1249 | return; | ||
1250 | neigh->nud_state = NUD_PROBE; | ||
1251 | atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); | ||
1252 | neigh_add_timer(neigh, | ||
1253 | jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); | ||
1254 | } | ||
1255 | EXPORT_SYMBOL(__neigh_set_probe_once); | ||
1256 | |||
1234 | struct neighbour *neigh_event_ns(struct neigh_table *tbl, | 1257 | struct neighbour *neigh_event_ns(struct neigh_table *tbl, |
1235 | u8 *lladdr, void *saddr, | 1258 | u8 *lladdr, void *saddr, |
1236 | struct net_device *dev) | 1259 | struct net_device *dev) |
@@ -1392,9 +1415,11 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, | |||
1392 | struct sk_buff *skb) | 1415 | struct sk_buff *skb) |
1393 | { | 1416 | { |
1394 | unsigned long now = jiffies; | 1417 | unsigned long now = jiffies; |
1395 | unsigned long sched_next = now + (net_random() % p->proxy_delay); | ||
1396 | 1418 | ||
1397 | if (tbl->proxy_queue.qlen > p->proxy_qlen) { | 1419 | unsigned long sched_next = now + (prandom_u32() % |
1420 | NEIGH_VAR(p, PROXY_DELAY)); | ||
1421 | |||
1422 | if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { | ||
1398 | kfree_skb(skb); | 1423 | kfree_skb(skb); |
1399 | return; | 1424 | return; |
1400 | } | 1425 | } |
@@ -1441,7 +1466,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, | |||
1441 | p->tbl = tbl; | 1466 | p->tbl = tbl; |
1442 | atomic_set(&p->refcnt, 1); | 1467 | atomic_set(&p->refcnt, 1); |
1443 | p->reachable_time = | 1468 | p->reachable_time = |
1444 | neigh_rand_reach_time(p->base_reachable_time); | 1469 | neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); |
1445 | dev_hold(dev); | 1470 | dev_hold(dev); |
1446 | p->dev = dev; | 1471 | p->dev = dev; |
1447 | write_pnet(&p->net, hold_net(net)); | 1472 | write_pnet(&p->net, hold_net(net)); |
@@ -1458,6 +1483,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, | |||
1458 | p->next = tbl->parms.next; | 1483 | p->next = tbl->parms.next; |
1459 | tbl->parms.next = p; | 1484 | tbl->parms.next = p; |
1460 | write_unlock_bh(&tbl->lock); | 1485 | write_unlock_bh(&tbl->lock); |
1486 | |||
1487 | neigh_parms_data_state_cleanall(p); | ||
1461 | } | 1488 | } |
1462 | return p; | 1489 | return p; |
1463 | } | 1490 | } |
@@ -1510,7 +1537,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1510 | write_pnet(&tbl->parms.net, &init_net); | 1537 | write_pnet(&tbl->parms.net, &init_net); |
1511 | atomic_set(&tbl->parms.refcnt, 1); | 1538 | atomic_set(&tbl->parms.refcnt, 1); |
1512 | tbl->parms.reachable_time = | 1539 | tbl->parms.reachable_time = |
1513 | neigh_rand_reach_time(tbl->parms.base_reachable_time); | 1540 | neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); |
1514 | 1541 | ||
1515 | tbl->stats = alloc_percpu(struct neigh_statistics); | 1542 | tbl->stats = alloc_percpu(struct neigh_statistics); |
1516 | if (!tbl->stats) | 1543 | if (!tbl->stats) |
@@ -1538,7 +1565,8 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1538 | 1565 | ||
1539 | rwlock_init(&tbl->lock); | 1566 | rwlock_init(&tbl->lock); |
1540 | INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); | 1567 | INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); |
1541 | schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); | 1568 | queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, |
1569 | tbl->parms.reachable_time); | ||
1542 | setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); | 1570 | setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); |
1543 | skb_queue_head_init_class(&tbl->proxy_queue, | 1571 | skb_queue_head_init_class(&tbl->proxy_queue, |
1544 | &neigh_table_proxy_queue_class); | 1572 | &neigh_table_proxy_queue_class); |
@@ -1778,24 +1806,32 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) | |||
1778 | if ((parms->dev && | 1806 | if ((parms->dev && |
1779 | nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || | 1807 | nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || |
1780 | nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || | 1808 | nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || |
1781 | nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || | 1809 | nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, |
1810 | NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || | ||
1782 | /* approximative value for deprecated QUEUE_LEN (in packets) */ | 1811 | /* approximative value for deprecated QUEUE_LEN (in packets) */ |
1783 | nla_put_u32(skb, NDTPA_QUEUE_LEN, | 1812 | nla_put_u32(skb, NDTPA_QUEUE_LEN, |
1784 | parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || | 1813 | NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || |
1785 | nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || | 1814 | nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || |
1786 | nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || | 1815 | nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || |
1787 | nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || | 1816 | nla_put_u32(skb, NDTPA_UCAST_PROBES, |
1788 | nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || | 1817 | NEIGH_VAR(parms, UCAST_PROBES)) || |
1818 | nla_put_u32(skb, NDTPA_MCAST_PROBES, | ||
1819 | NEIGH_VAR(parms, MCAST_PROBES)) || | ||
1789 | nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || | 1820 | nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || |
1790 | nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, | 1821 | nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, |
1791 | parms->base_reachable_time) || | 1822 | NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || |
1792 | nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || | 1823 | nla_put_msecs(skb, NDTPA_GC_STALETIME, |
1824 | NEIGH_VAR(parms, GC_STALETIME)) || | ||
1793 | nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, | 1825 | nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, |
1794 | parms->delay_probe_time) || | 1826 | NEIGH_VAR(parms, DELAY_PROBE_TIME)) || |
1795 | nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || | 1827 | nla_put_msecs(skb, NDTPA_RETRANS_TIME, |
1796 | nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || | 1828 | NEIGH_VAR(parms, RETRANS_TIME)) || |
1797 | nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || | 1829 | nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, |
1798 | nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) | 1830 | NEIGH_VAR(parms, ANYCAST_DELAY)) || |
1831 | nla_put_msecs(skb, NDTPA_PROXY_DELAY, | ||
1832 | NEIGH_VAR(parms, PROXY_DELAY)) || | ||
1833 | nla_put_msecs(skb, NDTPA_LOCKTIME, | ||
1834 | NEIGH_VAR(parms, LOCKTIME))) | ||
1799 | goto nla_put_failure; | 1835 | goto nla_put_failure; |
1800 | return nla_nest_end(skb, nest); | 1836 | return nla_nest_end(skb, nest); |
1801 | 1837 | ||
@@ -2011,44 +2047,57 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
2011 | 2047 | ||
2012 | switch (i) { | 2048 | switch (i) { |
2013 | case NDTPA_QUEUE_LEN: | 2049 | case NDTPA_QUEUE_LEN: |
2014 | p->queue_len_bytes = nla_get_u32(tbp[i]) * | 2050 | NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, |
2015 | SKB_TRUESIZE(ETH_FRAME_LEN); | 2051 | nla_get_u32(tbp[i]) * |
2052 | SKB_TRUESIZE(ETH_FRAME_LEN)); | ||
2016 | break; | 2053 | break; |
2017 | case NDTPA_QUEUE_LENBYTES: | 2054 | case NDTPA_QUEUE_LENBYTES: |
2018 | p->queue_len_bytes = nla_get_u32(tbp[i]); | 2055 | NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, |
2056 | nla_get_u32(tbp[i])); | ||
2019 | break; | 2057 | break; |
2020 | case NDTPA_PROXY_QLEN: | 2058 | case NDTPA_PROXY_QLEN: |
2021 | p->proxy_qlen = nla_get_u32(tbp[i]); | 2059 | NEIGH_VAR_SET(p, PROXY_QLEN, |
2060 | nla_get_u32(tbp[i])); | ||
2022 | break; | 2061 | break; |
2023 | case NDTPA_APP_PROBES: | 2062 | case NDTPA_APP_PROBES: |
2024 | p->app_probes = nla_get_u32(tbp[i]); | 2063 | NEIGH_VAR_SET(p, APP_PROBES, |
2064 | nla_get_u32(tbp[i])); | ||
2025 | break; | 2065 | break; |
2026 | case NDTPA_UCAST_PROBES: | 2066 | case NDTPA_UCAST_PROBES: |
2027 | p->ucast_probes = nla_get_u32(tbp[i]); | 2067 | NEIGH_VAR_SET(p, UCAST_PROBES, |
2068 | nla_get_u32(tbp[i])); | ||
2028 | break; | 2069 | break; |
2029 | case NDTPA_MCAST_PROBES: | 2070 | case NDTPA_MCAST_PROBES: |
2030 | p->mcast_probes = nla_get_u32(tbp[i]); | 2071 | NEIGH_VAR_SET(p, MCAST_PROBES, |
2072 | nla_get_u32(tbp[i])); | ||
2031 | break; | 2073 | break; |
2032 | case NDTPA_BASE_REACHABLE_TIME: | 2074 | case NDTPA_BASE_REACHABLE_TIME: |
2033 | p->base_reachable_time = nla_get_msecs(tbp[i]); | 2075 | NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, |
2076 | nla_get_msecs(tbp[i])); | ||
2034 | break; | 2077 | break; |
2035 | case NDTPA_GC_STALETIME: | 2078 | case NDTPA_GC_STALETIME: |
2036 | p->gc_staletime = nla_get_msecs(tbp[i]); | 2079 | NEIGH_VAR_SET(p, GC_STALETIME, |
2080 | nla_get_msecs(tbp[i])); | ||
2037 | break; | 2081 | break; |
2038 | case NDTPA_DELAY_PROBE_TIME: | 2082 | case NDTPA_DELAY_PROBE_TIME: |
2039 | p->delay_probe_time = nla_get_msecs(tbp[i]); | 2083 | NEIGH_VAR_SET(p, DELAY_PROBE_TIME, |
2084 | nla_get_msecs(tbp[i])); | ||
2040 | break; | 2085 | break; |
2041 | case NDTPA_RETRANS_TIME: | 2086 | case NDTPA_RETRANS_TIME: |
2042 | p->retrans_time = nla_get_msecs(tbp[i]); | 2087 | NEIGH_VAR_SET(p, RETRANS_TIME, |
2088 | nla_get_msecs(tbp[i])); | ||
2043 | break; | 2089 | break; |
2044 | case NDTPA_ANYCAST_DELAY: | 2090 | case NDTPA_ANYCAST_DELAY: |
2045 | p->anycast_delay = nla_get_msecs(tbp[i]); | 2091 | NEIGH_VAR_SET(p, ANYCAST_DELAY, |
2092 | nla_get_msecs(tbp[i])); | ||
2046 | break; | 2093 | break; |
2047 | case NDTPA_PROXY_DELAY: | 2094 | case NDTPA_PROXY_DELAY: |
2048 | p->proxy_delay = nla_get_msecs(tbp[i]); | 2095 | NEIGH_VAR_SET(p, PROXY_DELAY, |
2096 | nla_get_msecs(tbp[i])); | ||
2049 | break; | 2097 | break; |
2050 | case NDTPA_LOCKTIME: | 2098 | case NDTPA_LOCKTIME: |
2051 | p->locktime = nla_get_msecs(tbp[i]); | 2099 | NEIGH_VAR_SET(p, LOCKTIME, |
2100 | nla_get_msecs(tbp[i])); | ||
2052 | break; | 2101 | break; |
2053 | } | 2102 | } |
2054 | } | 2103 | } |
@@ -2789,133 +2838,167 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, | |||
2789 | return ret; | 2838 | return ret; |
2790 | } | 2839 | } |
2791 | 2840 | ||
2792 | enum { | 2841 | static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, |
2793 | NEIGH_VAR_MCAST_PROBE, | 2842 | int family) |
2794 | NEIGH_VAR_UCAST_PROBE, | 2843 | { |
2795 | NEIGH_VAR_APP_PROBE, | 2844 | switch (family) { |
2796 | NEIGH_VAR_RETRANS_TIME, | 2845 | case AF_INET: |
2797 | NEIGH_VAR_BASE_REACHABLE_TIME, | 2846 | return __in_dev_arp_parms_get_rcu(dev); |
2798 | NEIGH_VAR_DELAY_PROBE_TIME, | 2847 | case AF_INET6: |
2799 | NEIGH_VAR_GC_STALETIME, | 2848 | return __in6_dev_nd_parms_get_rcu(dev); |
2800 | NEIGH_VAR_QUEUE_LEN, | 2849 | } |
2801 | NEIGH_VAR_QUEUE_LEN_BYTES, | 2850 | return NULL; |
2802 | NEIGH_VAR_PROXY_QLEN, | 2851 | } |
2803 | NEIGH_VAR_ANYCAST_DELAY, | 2852 | |
2804 | NEIGH_VAR_PROXY_DELAY, | 2853 | static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, |
2805 | NEIGH_VAR_LOCKTIME, | 2854 | int index) |
2806 | NEIGH_VAR_RETRANS_TIME_MS, | 2855 | { |
2807 | NEIGH_VAR_BASE_REACHABLE_TIME_MS, | 2856 | struct net_device *dev; |
2808 | NEIGH_VAR_GC_INTERVAL, | 2857 | int family = neigh_parms_family(p); |
2809 | NEIGH_VAR_GC_THRESH1, | 2858 | |
2810 | NEIGH_VAR_GC_THRESH2, | 2859 | rcu_read_lock(); |
2811 | NEIGH_VAR_GC_THRESH3, | 2860 | for_each_netdev_rcu(net, dev) { |
2812 | NEIGH_VAR_MAX | 2861 | struct neigh_parms *dst_p = |
2813 | }; | 2862 | neigh_get_dev_parms_rcu(dev, family); |
2863 | |||
2864 | if (dst_p && !test_bit(index, dst_p->data_state)) | ||
2865 | dst_p->data[index] = p->data[index]; | ||
2866 | } | ||
2867 | rcu_read_unlock(); | ||
2868 | } | ||
2869 | |||
2870 | static void neigh_proc_update(struct ctl_table *ctl, int write) | ||
2871 | { | ||
2872 | struct net_device *dev = ctl->extra1; | ||
2873 | struct neigh_parms *p = ctl->extra2; | ||
2874 | struct net *net = neigh_parms_net(p); | ||
2875 | int index = (int *) ctl->data - p->data; | ||
2876 | |||
2877 | if (!write) | ||
2878 | return; | ||
2879 | |||
2880 | set_bit(index, p->data_state); | ||
2881 | if (!dev) /* NULL dev means this is default value */ | ||
2882 | neigh_copy_dflt_parms(net, p, index); | ||
2883 | } | ||
2884 | |||
2885 | static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, | ||
2886 | void __user *buffer, | ||
2887 | size_t *lenp, loff_t *ppos) | ||
2888 | { | ||
2889 | struct ctl_table tmp = *ctl; | ||
2890 | int ret; | ||
2891 | |||
2892 | tmp.extra1 = &zero; | ||
2893 | tmp.extra2 = &int_max; | ||
2894 | |||
2895 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | ||
2896 | neigh_proc_update(ctl, write); | ||
2897 | return ret; | ||
2898 | } | ||
2899 | |||
2900 | int neigh_proc_dointvec(struct ctl_table *ctl, int write, | ||
2901 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2902 | { | ||
2903 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | ||
2904 | |||
2905 | neigh_proc_update(ctl, write); | ||
2906 | return ret; | ||
2907 | } | ||
2908 | EXPORT_SYMBOL(neigh_proc_dointvec); | ||
2909 | |||
2910 | int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, | ||
2911 | void __user *buffer, | ||
2912 | size_t *lenp, loff_t *ppos) | ||
2913 | { | ||
2914 | int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); | ||
2915 | |||
2916 | neigh_proc_update(ctl, write); | ||
2917 | return ret; | ||
2918 | } | ||
2919 | EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); | ||
2920 | |||
2921 | static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, | ||
2922 | void __user *buffer, | ||
2923 | size_t *lenp, loff_t *ppos) | ||
2924 | { | ||
2925 | int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); | ||
2926 | |||
2927 | neigh_proc_update(ctl, write); | ||
2928 | return ret; | ||
2929 | } | ||
2930 | |||
2931 | int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, | ||
2932 | void __user *buffer, | ||
2933 | size_t *lenp, loff_t *ppos) | ||
2934 | { | ||
2935 | int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); | ||
2936 | |||
2937 | neigh_proc_update(ctl, write); | ||
2938 | return ret; | ||
2939 | } | ||
2940 | EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); | ||
2941 | |||
2942 | static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, | ||
2943 | void __user *buffer, | ||
2944 | size_t *lenp, loff_t *ppos) | ||
2945 | { | ||
2946 | int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); | ||
2947 | |||
2948 | neigh_proc_update(ctl, write); | ||
2949 | return ret; | ||
2950 | } | ||
2951 | |||
2952 | #define NEIGH_PARMS_DATA_OFFSET(index) \ | ||
2953 | (&((struct neigh_parms *) 0)->data[index]) | ||
2954 | |||
2955 | #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ | ||
2956 | [NEIGH_VAR_ ## attr] = { \ | ||
2957 | .procname = name, \ | ||
2958 | .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ | ||
2959 | .maxlen = sizeof(int), \ | ||
2960 | .mode = mval, \ | ||
2961 | .proc_handler = proc, \ | ||
2962 | } | ||
2963 | |||
2964 | #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ | ||
2965 | NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) | ||
2966 | |||
2967 | #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ | ||
2968 | NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) | ||
2969 | |||
2970 | #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ | ||
2971 | NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) | ||
2972 | |||
2973 | #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ | ||
2974 | NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) | ||
2975 | |||
2976 | #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ | ||
2977 | NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) | ||
2978 | |||
2979 | #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ | ||
2980 | NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) | ||
2814 | 2981 | ||
2815 | static struct neigh_sysctl_table { | 2982 | static struct neigh_sysctl_table { |
2816 | struct ctl_table_header *sysctl_header; | 2983 | struct ctl_table_header *sysctl_header; |
2817 | struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; | 2984 | struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; |
2818 | } neigh_sysctl_template __read_mostly = { | 2985 | } neigh_sysctl_template __read_mostly = { |
2819 | .neigh_vars = { | 2986 | .neigh_vars = { |
2820 | [NEIGH_VAR_MCAST_PROBE] = { | 2987 | NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), |
2821 | .procname = "mcast_solicit", | 2988 | NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), |
2822 | .maxlen = sizeof(int), | 2989 | NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), |
2823 | .mode = 0644, | 2990 | NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), |
2824 | .extra1 = &zero, | 2991 | NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), |
2825 | .extra2 = &int_max, | 2992 | NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), |
2826 | .proc_handler = proc_dointvec_minmax, | 2993 | NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), |
2827 | }, | 2994 | NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), |
2828 | [NEIGH_VAR_UCAST_PROBE] = { | 2995 | NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), |
2829 | .procname = "ucast_solicit", | 2996 | NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), |
2830 | .maxlen = sizeof(int), | 2997 | NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), |
2831 | .mode = 0644, | 2998 | NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), |
2832 | .extra1 = &zero, | 2999 | NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), |
2833 | .extra2 = &int_max, | 3000 | NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), |
2834 | .proc_handler = proc_dointvec_minmax, | 3001 | NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), |
2835 | }, | ||
2836 | [NEIGH_VAR_APP_PROBE] = { | ||
2837 | .procname = "app_solicit", | ||
2838 | .maxlen = sizeof(int), | ||
2839 | .mode = 0644, | ||
2840 | .extra1 = &zero, | ||
2841 | .extra2 = &int_max, | ||
2842 | .proc_handler = proc_dointvec_minmax, | ||
2843 | }, | ||
2844 | [NEIGH_VAR_RETRANS_TIME] = { | ||
2845 | .procname = "retrans_time", | ||
2846 | .maxlen = sizeof(int), | ||
2847 | .mode = 0644, | ||
2848 | .proc_handler = proc_dointvec_userhz_jiffies, | ||
2849 | }, | ||
2850 | [NEIGH_VAR_BASE_REACHABLE_TIME] = { | ||
2851 | .procname = "base_reachable_time", | ||
2852 | .maxlen = sizeof(int), | ||
2853 | .mode = 0644, | ||
2854 | .proc_handler = proc_dointvec_jiffies, | ||
2855 | }, | ||
2856 | [NEIGH_VAR_DELAY_PROBE_TIME] = { | ||
2857 | .procname = "delay_first_probe_time", | ||
2858 | .maxlen = sizeof(int), | ||
2859 | .mode = 0644, | ||
2860 | .proc_handler = proc_dointvec_jiffies, | ||
2861 | }, | ||
2862 | [NEIGH_VAR_GC_STALETIME] = { | ||
2863 | .procname = "gc_stale_time", | ||
2864 | .maxlen = sizeof(int), | ||
2865 | .mode = 0644, | ||
2866 | .proc_handler = proc_dointvec_jiffies, | ||
2867 | }, | ||
2868 | [NEIGH_VAR_QUEUE_LEN] = { | ||
2869 | .procname = "unres_qlen", | ||
2870 | .maxlen = sizeof(int), | ||
2871 | .mode = 0644, | ||
2872 | .proc_handler = proc_unres_qlen, | ||
2873 | }, | ||
2874 | [NEIGH_VAR_QUEUE_LEN_BYTES] = { | ||
2875 | .procname = "unres_qlen_bytes", | ||
2876 | .maxlen = sizeof(int), | ||
2877 | .mode = 0644, | ||
2878 | .extra1 = &zero, | ||
2879 | .proc_handler = proc_dointvec_minmax, | ||
2880 | }, | ||
2881 | [NEIGH_VAR_PROXY_QLEN] = { | ||
2882 | .procname = "proxy_qlen", | ||
2883 | .maxlen = sizeof(int), | ||
2884 | .mode = 0644, | ||
2885 | .extra1 = &zero, | ||
2886 | .extra2 = &int_max, | ||
2887 | .proc_handler = proc_dointvec_minmax, | ||
2888 | }, | ||
2889 | [NEIGH_VAR_ANYCAST_DELAY] = { | ||
2890 | .procname = "anycast_delay", | ||
2891 | .maxlen = sizeof(int), | ||
2892 | .mode = 0644, | ||
2893 | .proc_handler = proc_dointvec_userhz_jiffies, | ||
2894 | }, | ||
2895 | [NEIGH_VAR_PROXY_DELAY] = { | ||
2896 | .procname = "proxy_delay", | ||
2897 | .maxlen = sizeof(int), | ||
2898 | .mode = 0644, | ||
2899 | .proc_handler = proc_dointvec_userhz_jiffies, | ||
2900 | }, | ||
2901 | [NEIGH_VAR_LOCKTIME] = { | ||
2902 | .procname = "locktime", | ||
2903 | .maxlen = sizeof(int), | ||
2904 | .mode = 0644, | ||
2905 | .proc_handler = proc_dointvec_userhz_jiffies, | ||
2906 | }, | ||
2907 | [NEIGH_VAR_RETRANS_TIME_MS] = { | ||
2908 | .procname = "retrans_time_ms", | ||
2909 | .maxlen = sizeof(int), | ||
2910 | .mode = 0644, | ||
2911 | .proc_handler = proc_dointvec_ms_jiffies, | ||
2912 | }, | ||
2913 | [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { | ||
2914 | .procname = "base_reachable_time_ms", | ||
2915 | .maxlen = sizeof(int), | ||
2916 | .mode = 0644, | ||
2917 | .proc_handler = proc_dointvec_ms_jiffies, | ||
2918 | }, | ||
2919 | [NEIGH_VAR_GC_INTERVAL] = { | 3002 | [NEIGH_VAR_GC_INTERVAL] = { |
2920 | .procname = "gc_interval", | 3003 | .procname = "gc_interval", |
2921 | .maxlen = sizeof(int), | 3004 | .maxlen = sizeof(int), |
@@ -2951,31 +3034,23 @@ static struct neigh_sysctl_table { | |||
2951 | }; | 3034 | }; |
2952 | 3035 | ||
2953 | int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | 3036 | int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, |
2954 | char *p_name, proc_handler *handler) | 3037 | proc_handler *handler) |
2955 | { | 3038 | { |
3039 | int i; | ||
2956 | struct neigh_sysctl_table *t; | 3040 | struct neigh_sysctl_table *t; |
2957 | const char *dev_name_source = NULL; | 3041 | const char *dev_name_source; |
2958 | char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; | 3042 | char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; |
3043 | char *p_name; | ||
2959 | 3044 | ||
2960 | t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); | 3045 | t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); |
2961 | if (!t) | 3046 | if (!t) |
2962 | goto err; | 3047 | goto err; |
2963 | 3048 | ||
2964 | t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; | 3049 | for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { |
2965 | t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; | 3050 | t->neigh_vars[i].data += (long) p; |
2966 | t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; | 3051 | t->neigh_vars[i].extra1 = dev; |
2967 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; | 3052 | t->neigh_vars[i].extra2 = p; |
2968 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; | 3053 | } |
2969 | t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; | ||
2970 | t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; | ||
2971 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; | ||
2972 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; | ||
2973 | t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; | ||
2974 | t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; | ||
2975 | t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; | ||
2976 | t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; | ||
2977 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; | ||
2978 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; | ||
2979 | 3054 | ||
2980 | if (dev) { | 3055 | if (dev) { |
2981 | dev_name_source = dev->name; | 3056 | dev_name_source = dev->name; |
@@ -2990,26 +3065,32 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | |||
2990 | t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; | 3065 | t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; |
2991 | } | 3066 | } |
2992 | 3067 | ||
2993 | |||
2994 | if (handler) { | 3068 | if (handler) { |
2995 | /* RetransTime */ | 3069 | /* RetransTime */ |
2996 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; | 3070 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; |
2997 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; | ||
2998 | /* ReachableTime */ | 3071 | /* ReachableTime */ |
2999 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; | 3072 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; |
3000 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; | ||
3001 | /* RetransTime (in milliseconds)*/ | 3073 | /* RetransTime (in milliseconds)*/ |
3002 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; | 3074 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; |
3003 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; | ||
3004 | /* ReachableTime (in milliseconds) */ | 3075 | /* ReachableTime (in milliseconds) */ |
3005 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; | 3076 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; |
3006 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; | ||
3007 | } | 3077 | } |
3008 | 3078 | ||
3009 | /* Don't export sysctls to unprivileged users */ | 3079 | /* Don't export sysctls to unprivileged users */ |
3010 | if (neigh_parms_net(p)->user_ns != &init_user_ns) | 3080 | if (neigh_parms_net(p)->user_ns != &init_user_ns) |
3011 | t->neigh_vars[0].procname = NULL; | 3081 | t->neigh_vars[0].procname = NULL; |
3012 | 3082 | ||
3083 | switch (neigh_parms_family(p)) { | ||
3084 | case AF_INET: | ||
3085 | p_name = "ipv4"; | ||
3086 | break; | ||
3087 | case AF_INET6: | ||
3088 | p_name = "ipv6"; | ||
3089 | break; | ||
3090 | default: | ||
3091 | BUG(); | ||
3092 | } | ||
3093 | |||
3013 | snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", | 3094 | snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", |
3014 | p_name, dev_name_source); | 3095 | p_name, dev_name_source); |
3015 | t->sysctl_header = | 3096 | t->sysctl_header = |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f3edf9635e02..93886246a0b4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = { | |||
498 | #define net_class_groups NULL | 498 | #define net_class_groups NULL |
499 | #endif /* CONFIG_SYSFS */ | 499 | #endif /* CONFIG_SYSFS */ |
500 | 500 | ||
501 | #ifdef CONFIG_RPS | 501 | #ifdef CONFIG_SYSFS |
502 | /* | ||
503 | * RX queue sysfs structures and functions. | ||
504 | */ | ||
505 | struct rx_queue_attribute { | ||
506 | struct attribute attr; | ||
507 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
508 | struct rx_queue_attribute *attr, char *buf); | ||
509 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
510 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
511 | }; | ||
512 | #define to_rx_queue_attr(_attr) container_of(_attr, \ | 502 | #define to_rx_queue_attr(_attr) container_of(_attr, \ |
513 | struct rx_queue_attribute, attr) | 503 | struct rx_queue_attribute, attr) |
514 | 504 | ||
@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = { | |||
543 | .store = rx_queue_attr_store, | 533 | .store = rx_queue_attr_store, |
544 | }; | 534 | }; |
545 | 535 | ||
536 | #ifdef CONFIG_RPS | ||
546 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, | 537 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, |
547 | struct rx_queue_attribute *attribute, char *buf) | 538 | struct rx_queue_attribute *attribute, char *buf) |
548 | { | 539 | { |
@@ -676,8 +667,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
676 | while ((mask | (mask >> 1)) != mask) | 667 | while ((mask | (mask >> 1)) != mask) |
677 | mask |= (mask >> 1); | 668 | mask |= (mask >> 1); |
678 | /* On 64 bit arches, must check mask fits in table->mask (u32), | 669 | /* On 64 bit arches, must check mask fits in table->mask (u32), |
679 | * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) | 670 | * and on 32bit arches, must check |
680 | * doesnt overflow. | 671 | * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. |
681 | */ | 672 | */ |
682 | #if BITS_PER_LONG > 32 | 673 | #if BITS_PER_LONG > 32 |
683 | if (mask > (unsigned long)(u32)mask) | 674 | if (mask > (unsigned long)(u32)mask) |
@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute = | |||
718 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = | 709 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = |
719 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, | 710 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, |
720 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); | 711 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); |
712 | #endif /* CONFIG_RPS */ | ||
721 | 713 | ||
722 | static struct attribute *rx_queue_default_attrs[] = { | 714 | static struct attribute *rx_queue_default_attrs[] = { |
715 | #ifdef CONFIG_RPS | ||
723 | &rps_cpus_attribute.attr, | 716 | &rps_cpus_attribute.attr, |
724 | &rps_dev_flow_table_cnt_attribute.attr, | 717 | &rps_dev_flow_table_cnt_attribute.attr, |
718 | #endif | ||
725 | NULL | 719 | NULL |
726 | }; | 720 | }; |
727 | 721 | ||
728 | static void rx_queue_release(struct kobject *kobj) | 722 | static void rx_queue_release(struct kobject *kobj) |
729 | { | 723 | { |
730 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | 724 | struct netdev_rx_queue *queue = to_rx_queue(kobj); |
725 | #ifdef CONFIG_RPS | ||
731 | struct rps_map *map; | 726 | struct rps_map *map; |
732 | struct rps_dev_flow_table *flow_table; | 727 | struct rps_dev_flow_table *flow_table; |
733 | 728 | ||
@@ -743,15 +738,29 @@ static void rx_queue_release(struct kobject *kobj) | |||
743 | RCU_INIT_POINTER(queue->rps_flow_table, NULL); | 738 | RCU_INIT_POINTER(queue->rps_flow_table, NULL); |
744 | call_rcu(&flow_table->rcu, rps_dev_flow_table_release); | 739 | call_rcu(&flow_table->rcu, rps_dev_flow_table_release); |
745 | } | 740 | } |
741 | #endif | ||
746 | 742 | ||
747 | memset(kobj, 0, sizeof(*kobj)); | 743 | memset(kobj, 0, sizeof(*kobj)); |
748 | dev_put(queue->dev); | 744 | dev_put(queue->dev); |
749 | } | 745 | } |
750 | 746 | ||
747 | static const void *rx_queue_namespace(struct kobject *kobj) | ||
748 | { | ||
749 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
750 | struct device *dev = &queue->dev->dev; | ||
751 | const void *ns = NULL; | ||
752 | |||
753 | if (dev->class && dev->class->ns_type) | ||
754 | ns = dev->class->namespace(dev); | ||
755 | |||
756 | return ns; | ||
757 | } | ||
758 | |||
751 | static struct kobj_type rx_queue_ktype = { | 759 | static struct kobj_type rx_queue_ktype = { |
752 | .sysfs_ops = &rx_queue_sysfs_ops, | 760 | .sysfs_ops = &rx_queue_sysfs_ops, |
753 | .release = rx_queue_release, | 761 | .release = rx_queue_release, |
754 | .default_attrs = rx_queue_default_attrs, | 762 | .default_attrs = rx_queue_default_attrs, |
763 | .namespace = rx_queue_namespace | ||
755 | }; | 764 | }; |
756 | 765 | ||
757 | static int rx_queue_add_kobject(struct net_device *net, int index) | 766 | static int rx_queue_add_kobject(struct net_device *net, int index) |
@@ -763,25 +772,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index) | |||
763 | kobj->kset = net->queues_kset; | 772 | kobj->kset = net->queues_kset; |
764 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, | 773 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, |
765 | "rx-%u", index); | 774 | "rx-%u", index); |
766 | if (error) { | 775 | if (error) |
767 | kobject_put(kobj); | 776 | goto exit; |
768 | return error; | 777 | |
778 | if (net->sysfs_rx_queue_group) { | ||
779 | error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); | ||
780 | if (error) | ||
781 | goto exit; | ||
769 | } | 782 | } |
770 | 783 | ||
771 | kobject_uevent(kobj, KOBJ_ADD); | 784 | kobject_uevent(kobj, KOBJ_ADD); |
772 | dev_hold(queue->dev); | 785 | dev_hold(queue->dev); |
773 | 786 | ||
774 | return error; | 787 | return error; |
788 | exit: | ||
789 | kobject_put(kobj); | ||
790 | return error; | ||
775 | } | 791 | } |
776 | #endif /* CONFIG_RPS */ | 792 | #endif /* CONFIG_SYFS */ |
777 | 793 | ||
778 | int | 794 | int |
779 | net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | 795 | net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) |
780 | { | 796 | { |
781 | #ifdef CONFIG_RPS | 797 | #ifdef CONFIG_SYSFS |
782 | int i; | 798 | int i; |
783 | int error = 0; | 799 | int error = 0; |
784 | 800 | ||
801 | #ifndef CONFIG_RPS | ||
802 | if (!net->sysfs_rx_queue_group) | ||
803 | return 0; | ||
804 | #endif | ||
785 | for (i = old_num; i < new_num; i++) { | 805 | for (i = old_num; i < new_num; i++) { |
786 | error = rx_queue_add_kobject(net, i); | 806 | error = rx_queue_add_kobject(net, i); |
787 | if (error) { | 807 | if (error) { |
@@ -790,8 +810,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
790 | } | 810 | } |
791 | } | 811 | } |
792 | 812 | ||
793 | while (--i >= new_num) | 813 | while (--i >= new_num) { |
814 | if (net->sysfs_rx_queue_group) | ||
815 | sysfs_remove_group(&net->_rx[i].kobj, | ||
816 | net->sysfs_rx_queue_group); | ||
794 | kobject_put(&net->_rx[i].kobj); | 817 | kobject_put(&net->_rx[i].kobj); |
818 | } | ||
795 | 819 | ||
796 | return error; | 820 | return error; |
797 | #else | 821 | #else |
@@ -1082,10 +1106,23 @@ static void netdev_queue_release(struct kobject *kobj) | |||
1082 | dev_put(queue->dev); | 1106 | dev_put(queue->dev); |
1083 | } | 1107 | } |
1084 | 1108 | ||
1109 | static const void *netdev_queue_namespace(struct kobject *kobj) | ||
1110 | { | ||
1111 | struct netdev_queue *queue = to_netdev_queue(kobj); | ||
1112 | struct device *dev = &queue->dev->dev; | ||
1113 | const void *ns = NULL; | ||
1114 | |||
1115 | if (dev->class && dev->class->ns_type) | ||
1116 | ns = dev->class->namespace(dev); | ||
1117 | |||
1118 | return ns; | ||
1119 | } | ||
1120 | |||
1085 | static struct kobj_type netdev_queue_ktype = { | 1121 | static struct kobj_type netdev_queue_ktype = { |
1086 | .sysfs_ops = &netdev_queue_sysfs_ops, | 1122 | .sysfs_ops = &netdev_queue_sysfs_ops, |
1087 | .release = netdev_queue_release, | 1123 | .release = netdev_queue_release, |
1088 | .default_attrs = netdev_queue_default_attrs, | 1124 | .default_attrs = netdev_queue_default_attrs, |
1125 | .namespace = netdev_queue_namespace, | ||
1089 | }; | 1126 | }; |
1090 | 1127 | ||
1091 | static int netdev_queue_add_kobject(struct net_device *net, int index) | 1128 | static int netdev_queue_add_kobject(struct net_device *net, int index) |
@@ -1155,9 +1192,6 @@ static int register_queue_kobjects(struct net_device *net) | |||
1155 | NULL, &net->dev.kobj); | 1192 | NULL, &net->dev.kobj); |
1156 | if (!net->queues_kset) | 1193 | if (!net->queues_kset) |
1157 | return -ENOMEM; | 1194 | return -ENOMEM; |
1158 | #endif | ||
1159 | |||
1160 | #ifdef CONFIG_RPS | ||
1161 | real_rx = net->real_num_rx_queues; | 1195 | real_rx = net->real_num_rx_queues; |
1162 | #endif | 1196 | #endif |
1163 | real_tx = net->real_num_tx_queues; | 1197 | real_tx = net->real_num_tx_queues; |
@@ -1184,7 +1218,7 @@ static void remove_queue_kobjects(struct net_device *net) | |||
1184 | { | 1218 | { |
1185 | int real_rx = 0, real_tx = 0; | 1219 | int real_rx = 0, real_tx = 0; |
1186 | 1220 | ||
1187 | #ifdef CONFIG_RPS | 1221 | #ifdef CONFIG_SYSFS |
1188 | real_rx = net->real_num_rx_queues; | 1222 | real_rx = net->real_num_rx_queues; |
1189 | #endif | 1223 | #endif |
1190 | real_tx = net->real_num_tx_queues; | 1224 | real_tx = net->real_num_tx_queues; |
@@ -1358,7 +1392,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr, | |||
1358 | } | 1392 | } |
1359 | EXPORT_SYMBOL(netdev_class_remove_file_ns); | 1393 | EXPORT_SYMBOL(netdev_class_remove_file_ns); |
1360 | 1394 | ||
1361 | int netdev_kobject_init(void) | 1395 | int __init netdev_kobject_init(void) |
1362 | { | 1396 | { |
1363 | kobj_ns_type_register(&net_ns_type_operations); | 1397 | kobj_ns_type_register(&net_ns_type_operations); |
1364 | return class_register(&net_class); | 1398 | return class_register(&net_class); |
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index bd7751ec1c4d..2745a1b51e03 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef __NET_SYSFS_H__ | 1 | #ifndef __NET_SYSFS_H__ |
2 | #define __NET_SYSFS_H__ | 2 | #define __NET_SYSFS_H__ |
3 | 3 | ||
4 | int netdev_kobject_init(void); | 4 | int __init netdev_kobject_init(void); |
5 | int netdev_register_kobject(struct net_device *); | 5 | int netdev_register_kobject(struct net_device *); |
6 | void netdev_unregister_kobject(struct net_device *); | 6 | void netdev_unregister_kobject(struct net_device *); |
7 | int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); | 7 | int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); |
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c new file mode 100644 index 000000000000..719efd541668 --- /dev/null +++ b/net/core/netclassid_cgroup.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * net/core/netclassid_cgroup.c Classid Cgroupfs Handling | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/cgroup.h> | ||
15 | #include <linux/fdtable.h> | ||
16 | #include <net/cls_cgroup.h> | ||
17 | #include <net/sock.h> | ||
18 | |||
19 | static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) | ||
20 | { | ||
21 | return css ? container_of(css, struct cgroup_cls_state, css) : NULL; | ||
22 | } | ||
23 | |||
24 | struct cgroup_cls_state *task_cls_state(struct task_struct *p) | ||
25 | { | ||
26 | return css_cls_state(task_css(p, net_cls_subsys_id)); | ||
27 | } | ||
28 | EXPORT_SYMBOL_GPL(task_cls_state); | ||
29 | |||
30 | static struct cgroup_subsys_state * | ||
31 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | ||
32 | { | ||
33 | struct cgroup_cls_state *cs; | ||
34 | |||
35 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | ||
36 | if (!cs) | ||
37 | return ERR_PTR(-ENOMEM); | ||
38 | |||
39 | return &cs->css; | ||
40 | } | ||
41 | |||
42 | static int cgrp_css_online(struct cgroup_subsys_state *css) | ||
43 | { | ||
44 | struct cgroup_cls_state *cs = css_cls_state(css); | ||
45 | struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); | ||
46 | |||
47 | if (parent) | ||
48 | cs->classid = parent->classid; | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static void cgrp_css_free(struct cgroup_subsys_state *css) | ||
54 | { | ||
55 | kfree(css_cls_state(css)); | ||
56 | } | ||
57 | |||
58 | static int update_classid(const void *v, struct file *file, unsigned n) | ||
59 | { | ||
60 | int err; | ||
61 | struct socket *sock = sock_from_file(file, &err); | ||
62 | |||
63 | if (sock) | ||
64 | sock->sk->sk_classid = (u32)(unsigned long)v; | ||
65 | |||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static void cgrp_attach(struct cgroup_subsys_state *css, | ||
70 | struct cgroup_taskset *tset) | ||
71 | { | ||
72 | struct cgroup_cls_state *cs = css_cls_state(css); | ||
73 | void *v = (void *)(unsigned long)cs->classid; | ||
74 | struct task_struct *p; | ||
75 | |||
76 | cgroup_taskset_for_each(p, css, tset) { | ||
77 | task_lock(p); | ||
78 | iterate_fd(p->files, 0, update_classid, v); | ||
79 | task_unlock(p); | ||
80 | } | ||
81 | } | ||
82 | |||
83 | static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) | ||
84 | { | ||
85 | return css_cls_state(css)->classid; | ||
86 | } | ||
87 | |||
88 | static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, | ||
89 | u64 value) | ||
90 | { | ||
91 | css_cls_state(css)->classid = (u32) value; | ||
92 | |||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static struct cftype ss_files[] = { | ||
97 | { | ||
98 | .name = "classid", | ||
99 | .read_u64 = read_classid, | ||
100 | .write_u64 = write_classid, | ||
101 | }, | ||
102 | { } /* terminate */ | ||
103 | }; | ||
104 | |||
105 | struct cgroup_subsys net_cls_subsys = { | ||
106 | .name = "net_cls", | ||
107 | .css_alloc = cgrp_css_alloc, | ||
108 | .css_online = cgrp_css_online, | ||
109 | .css_free = cgrp_css_free, | ||
110 | .attach = cgrp_attach, | ||
111 | .subsys_id = net_cls_subsys_id, | ||
112 | .base_cftypes = ss_files, | ||
113 | .module = THIS_MODULE, | ||
114 | }; | ||
115 | |||
116 | static int __init init_netclassid_cgroup(void) | ||
117 | { | ||
118 | return cgroup_load_subsys(&net_cls_subsys); | ||
119 | } | ||
120 | __initcall(init_netclassid_cgroup); | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 19fe9c717ced..df9e6b1a9759 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -520,8 +520,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) | |||
520 | skb->protocol = eth->h_proto = htons(ETH_P_IP); | 520 | skb->protocol = eth->h_proto = htons(ETH_P_IP); |
521 | } | 521 | } |
522 | 522 | ||
523 | memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); | 523 | ether_addr_copy(eth->h_source, np->dev->dev_addr); |
524 | memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); | 524 | ether_addr_copy(eth->h_dest, np->remote_mac); |
525 | 525 | ||
526 | skb->dev = np->dev; | 526 | skb->dev = np->dev; |
527 | 527 | ||
@@ -742,7 +742,7 @@ static bool pkt_is_ns(struct sk_buff *skb) | |||
742 | struct nd_msg *msg; | 742 | struct nd_msg *msg; |
743 | struct ipv6hdr *hdr; | 743 | struct ipv6hdr *hdr; |
744 | 744 | ||
745 | if (skb->protocol != htons(ETH_P_ARP)) | 745 | if (skb->protocol != htons(ETH_P_IPV6)) |
746 | return false; | 746 | return false; |
747 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) | 747 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) |
748 | return false; | 748 | return false; |
@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
948 | { | 948 | { |
949 | char *cur=opt, *delim; | 949 | char *cur=opt, *delim; |
950 | int ipv6; | 950 | int ipv6; |
951 | bool ipversion_set = false; | ||
951 | 952 | ||
952 | if (*cur != '@') { | 953 | if (*cur != '@') { |
953 | if ((delim = strchr(cur, '@')) == NULL) | 954 | if ((delim = strchr(cur, '@')) == NULL) |
@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
960 | cur++; | 961 | cur++; |
961 | 962 | ||
962 | if (*cur != '/') { | 963 | if (*cur != '/') { |
964 | ipversion_set = true; | ||
963 | if ((delim = strchr(cur, '/')) == NULL) | 965 | if ((delim = strchr(cur, '/')) == NULL) |
964 | goto parse_failed; | 966 | goto parse_failed; |
965 | *delim = 0; | 967 | *delim = 0; |
@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
1002 | ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); | 1004 | ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); |
1003 | if (ipv6 < 0) | 1005 | if (ipv6 < 0) |
1004 | goto parse_failed; | 1006 | goto parse_failed; |
1005 | else if (np->ipv6 != (bool)ipv6) | 1007 | else if (ipversion_set && np->ipv6 != (bool)ipv6) |
1006 | goto parse_failed; | 1008 | goto parse_failed; |
1007 | else | 1009 | else |
1008 | np->ipv6 = (bool)ipv6; | 1010 | np->ipv6 = (bool)ipv6; |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9b7cf6c85f82..9043caedcd08 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #define PRIOMAP_MIN_SZ 128 | 30 | #define PRIOMAP_MIN_SZ 128 |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Extend @dev->priomap so that it's large enough to accomodate | 33 | * Extend @dev->priomap so that it's large enough to accommodate |
34 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful | 34 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
35 | * return. Must be called under rtnl lock. | 35 | * return. Must be called under rtnl lock. |
36 | */ | 36 | */ |
@@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) | |||
173 | return css->cgroup->id; | 173 | return css->cgroup->id; |
174 | } | 174 | } |
175 | 175 | ||
176 | static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft, | 176 | static int read_priomap(struct seq_file *sf, void *v) |
177 | struct cgroup_map_cb *cb) | ||
178 | { | 177 | { |
179 | struct net_device *dev; | 178 | struct net_device *dev; |
180 | 179 | ||
181 | rcu_read_lock(); | 180 | rcu_read_lock(); |
182 | for_each_netdev_rcu(&init_net, dev) | 181 | for_each_netdev_rcu(&init_net, dev) |
183 | cb->fill(cb, dev->name, netprio_prio(css, dev)); | 182 | seq_printf(sf, "%s %u\n", dev->name, |
183 | netprio_prio(seq_css(sf), dev)); | ||
184 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
185 | return 0; | 185 | return 0; |
186 | } | 186 | } |
@@ -238,7 +238,7 @@ static struct cftype ss_files[] = { | |||
238 | }, | 238 | }, |
239 | { | 239 | { |
240 | .name = "ifpriomap", | 240 | .name = "ifpriomap", |
241 | .read_map = read_priomap, | 241 | .seq_show = read_priomap, |
242 | .write_string = write_priomap, | 242 | .write_string = write_priomap, |
243 | }, | 243 | }, |
244 | { } /* terminate */ | 244 | { } /* terminate */ |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a797fff7f222..fdac61cac1bd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -389,6 +389,9 @@ struct pktgen_dev { | |||
389 | #ifdef CONFIG_XFRM | 389 | #ifdef CONFIG_XFRM |
390 | __u8 ipsmode; /* IPSEC mode (config) */ | 390 | __u8 ipsmode; /* IPSEC mode (config) */ |
391 | __u8 ipsproto; /* IPSEC type (config) */ | 391 | __u8 ipsproto; /* IPSEC type (config) */ |
392 | __u32 spi; | ||
393 | struct dst_entry dst; | ||
394 | struct dst_ops dstops; | ||
392 | #endif | 395 | #endif |
393 | char result[512]; | 396 | char result[512]; |
394 | }; | 397 | }; |
@@ -654,8 +657,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
654 | } | 657 | } |
655 | 658 | ||
656 | #ifdef CONFIG_XFRM | 659 | #ifdef CONFIG_XFRM |
657 | if (pkt_dev->flags & F_IPSEC_ON) | 660 | if (pkt_dev->flags & F_IPSEC_ON) { |
658 | seq_printf(seq, "IPSEC "); | 661 | seq_printf(seq, "IPSEC "); |
662 | if (pkt_dev->spi) | ||
663 | seq_printf(seq, "spi:%u", pkt_dev->spi); | ||
664 | } | ||
659 | #endif | 665 | #endif |
660 | 666 | ||
661 | if (pkt_dev->flags & F_MACSRC_RND) | 667 | if (pkt_dev->flags & F_MACSRC_RND) |
@@ -1434,7 +1440,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1434 | if (!mac_pton(valstr, pkt_dev->dst_mac)) | 1440 | if (!mac_pton(valstr, pkt_dev->dst_mac)) |
1435 | return -EINVAL; | 1441 | return -EINVAL; |
1436 | /* Set up Dest MAC */ | 1442 | /* Set up Dest MAC */ |
1437 | memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); | 1443 | ether_addr_copy(&pkt_dev->hh[0], pkt_dev->dst_mac); |
1438 | 1444 | ||
1439 | sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); | 1445 | sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); |
1440 | return count; | 1446 | return count; |
@@ -1451,7 +1457,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1451 | if (!mac_pton(valstr, pkt_dev->src_mac)) | 1457 | if (!mac_pton(valstr, pkt_dev->src_mac)) |
1452 | return -EINVAL; | 1458 | return -EINVAL; |
1453 | /* Set up Src MAC */ | 1459 | /* Set up Src MAC */ |
1454 | memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); | 1460 | ether_addr_copy(&pkt_dev->hh[6], pkt_dev->src_mac); |
1455 | 1461 | ||
1456 | sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); | 1462 | sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); |
1457 | return count; | 1463 | return count; |
@@ -1476,7 +1482,18 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1476 | sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); | 1482 | sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); |
1477 | return count; | 1483 | return count; |
1478 | } | 1484 | } |
1485 | #ifdef CONFIG_XFRM | ||
1486 | if (!strcmp(name, "spi")) { | ||
1487 | len = num_arg(&user_buffer[i], 10, &value); | ||
1488 | if (len < 0) | ||
1489 | return len; | ||
1479 | 1490 | ||
1491 | i += len; | ||
1492 | pkt_dev->spi = value; | ||
1493 | sprintf(pg_result, "OK: spi=%u", pkt_dev->spi); | ||
1494 | return count; | ||
1495 | } | ||
1496 | #endif | ||
1480 | if (!strcmp(name, "flowlen")) { | 1497 | if (!strcmp(name, "flowlen")) { |
1481 | len = num_arg(&user_buffer[i], 10, &value); | 1498 | len = num_arg(&user_buffer[i], 10, &value); |
1482 | if (len < 0) | 1499 | if (len < 0) |
@@ -2043,10 +2060,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
2043 | /* Default to the interface's mac if not explicitly set. */ | 2060 | /* Default to the interface's mac if not explicitly set. */ |
2044 | 2061 | ||
2045 | if (is_zero_ether_addr(pkt_dev->src_mac)) | 2062 | if (is_zero_ether_addr(pkt_dev->src_mac)) |
2046 | memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); | 2063 | ether_addr_copy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr); |
2047 | 2064 | ||
2048 | /* Set up Dest MAC */ | 2065 | /* Set up Dest MAC */ |
2049 | memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); | 2066 | ether_addr_copy(&(pkt_dev->hh[0]), pkt_dev->dst_mac); |
2050 | 2067 | ||
2051 | if (pkt_dev->flags & F_IPV6) { | 2068 | if (pkt_dev->flags & F_IPV6) { |
2052 | int i, set = 0, err = 1; | 2069 | int i, set = 0, err = 1; |
@@ -2233,13 +2250,21 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) | |||
2233 | struct xfrm_state *x = pkt_dev->flows[flow].x; | 2250 | struct xfrm_state *x = pkt_dev->flows[flow].x; |
2234 | struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); | 2251 | struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); |
2235 | if (!x) { | 2252 | if (!x) { |
2236 | /*slow path: we dont already have xfrm_state*/ | 2253 | |
2237 | x = xfrm_stateonly_find(pn->net, DUMMY_MARK, | 2254 | if (pkt_dev->spi) { |
2238 | (xfrm_address_t *)&pkt_dev->cur_daddr, | 2255 | /* We need as quick as possible to find the right SA |
2239 | (xfrm_address_t *)&pkt_dev->cur_saddr, | 2256 | * Searching with minimum criteria to archieve this. |
2240 | AF_INET, | 2257 | */ |
2241 | pkt_dev->ipsmode, | 2258 | x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET); |
2242 | pkt_dev->ipsproto, 0); | 2259 | } else { |
2260 | /* slow path: we dont already have xfrm_state */ | ||
2261 | x = xfrm_stateonly_find(pn->net, DUMMY_MARK, | ||
2262 | (xfrm_address_t *)&pkt_dev->cur_daddr, | ||
2263 | (xfrm_address_t *)&pkt_dev->cur_saddr, | ||
2264 | AF_INET, | ||
2265 | pkt_dev->ipsmode, | ||
2266 | pkt_dev->ipsproto, 0); | ||
2267 | } | ||
2243 | if (x) { | 2268 | if (x) { |
2244 | pkt_dev->flows[flow].x = x; | 2269 | pkt_dev->flows[flow].x = x; |
2245 | set_pkt_overhead(pkt_dev); | 2270 | set_pkt_overhead(pkt_dev); |
@@ -2475,31 +2500,47 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) | |||
2475 | 2500 | ||
2476 | 2501 | ||
2477 | #ifdef CONFIG_XFRM | 2502 | #ifdef CONFIG_XFRM |
2503 | static u32 pktgen_dst_metrics[RTAX_MAX + 1] = { | ||
2504 | |||
2505 | [RTAX_HOPLIMIT] = 0x5, /* Set a static hoplimit */ | ||
2506 | }; | ||
2507 | |||
2478 | static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) | 2508 | static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) |
2479 | { | 2509 | { |
2480 | struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; | 2510 | struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; |
2481 | int err = 0; | 2511 | int err = 0; |
2512 | struct net *net = dev_net(pkt_dev->odev); | ||
2482 | 2513 | ||
2483 | if (!x) | 2514 | if (!x) |
2484 | return 0; | 2515 | return 0; |
2485 | /* XXX: we dont support tunnel mode for now until | 2516 | /* XXX: we dont support tunnel mode for now until |
2486 | * we resolve the dst issue */ | 2517 | * we resolve the dst issue */ |
2487 | if (x->props.mode != XFRM_MODE_TRANSPORT) | 2518 | if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0)) |
2488 | return 0; | 2519 | return 0; |
2489 | 2520 | ||
2490 | spin_lock(&x->lock); | 2521 | /* But when user specify an valid SPI, transformation |
2522 | * supports both transport/tunnel mode + ESP/AH type. | ||
2523 | */ | ||
2524 | if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0)) | ||
2525 | skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF; | ||
2491 | 2526 | ||
2527 | rcu_read_lock_bh(); | ||
2492 | err = x->outer_mode->output(x, skb); | 2528 | err = x->outer_mode->output(x, skb); |
2493 | if (err) | 2529 | rcu_read_unlock_bh(); |
2530 | if (err) { | ||
2531 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); | ||
2494 | goto error; | 2532 | goto error; |
2533 | } | ||
2495 | err = x->type->output(x, skb); | 2534 | err = x->type->output(x, skb); |
2496 | if (err) | 2535 | if (err) { |
2536 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); | ||
2497 | goto error; | 2537 | goto error; |
2498 | 2538 | } | |
2539 | spin_lock_bh(&x->lock); | ||
2499 | x->curlft.bytes += skb->len; | 2540 | x->curlft.bytes += skb->len; |
2500 | x->curlft.packets++; | 2541 | x->curlft.packets++; |
2542 | spin_unlock_bh(&x->lock); | ||
2501 | error: | 2543 | error: |
2502 | spin_unlock(&x->lock); | ||
2503 | return err; | 2544 | return err; |
2504 | } | 2545 | } |
2505 | 2546 | ||
@@ -3542,6 +3583,17 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) | |||
3542 | #ifdef CONFIG_XFRM | 3583 | #ifdef CONFIG_XFRM |
3543 | pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; | 3584 | pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; |
3544 | pkt_dev->ipsproto = IPPROTO_ESP; | 3585 | pkt_dev->ipsproto = IPPROTO_ESP; |
3586 | |||
3587 | /* xfrm tunnel mode needs additional dst to extract outter | ||
3588 | * ip header protocol/ttl/id field, here creat a phony one. | ||
3589 | * instead of looking for a valid rt, which definitely hurting | ||
3590 | * performance under such circumstance. | ||
3591 | */ | ||
3592 | pkt_dev->dstops.family = AF_INET; | ||
3593 | pkt_dev->dst.dev = pkt_dev->odev; | ||
3594 | dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false); | ||
3595 | pkt_dev->dst.child = &pkt_dev->dst; | ||
3596 | pkt_dev->dst.ops = &pkt_dev->dstops; | ||
3545 | #endif | 3597 | #endif |
3546 | 3598 | ||
3547 | return add_dev_to_thread(t, pkt_dev); | 3599 | return add_dev_to_thread(t, pkt_dev); |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf67144d3e3c..120eecc0f5a4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -365,6 +365,22 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) | |||
365 | } | 365 | } |
366 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); | 366 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); |
367 | 367 | ||
368 | static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) | ||
369 | { | ||
370 | struct net_device *master_dev; | ||
371 | const struct rtnl_link_ops *ops; | ||
372 | |||
373 | master_dev = netdev_master_upper_dev_get((struct net_device *) dev); | ||
374 | if (!master_dev) | ||
375 | return 0; | ||
376 | ops = master_dev->rtnl_link_ops; | ||
377 | if (!ops || !ops->get_slave_size) | ||
378 | return 0; | ||
379 | /* IFLA_INFO_SLAVE_DATA + nested data */ | ||
380 | return nla_total_size(sizeof(struct nlattr)) + | ||
381 | ops->get_slave_size(master_dev, dev); | ||
382 | } | ||
383 | |||
368 | static size_t rtnl_link_get_size(const struct net_device *dev) | 384 | static size_t rtnl_link_get_size(const struct net_device *dev) |
369 | { | 385 | { |
370 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; | 386 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; |
@@ -385,6 +401,8 @@ static size_t rtnl_link_get_size(const struct net_device *dev) | |||
385 | /* IFLA_INFO_XSTATS */ | 401 | /* IFLA_INFO_XSTATS */ |
386 | size += nla_total_size(ops->get_xstats_size(dev)); | 402 | size += nla_total_size(ops->get_xstats_size(dev)); |
387 | 403 | ||
404 | size += rtnl_link_get_slave_info_data_size(dev); | ||
405 | |||
388 | return size; | 406 | return size; |
389 | } | 407 | } |
390 | 408 | ||
@@ -403,34 +421,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) | |||
403 | } | 421 | } |
404 | 422 | ||
405 | /** | 423 | /** |
406 | * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. | ||
407 | * @ops: struct rtnl_af_ops * to register | ||
408 | * | ||
409 | * The caller must hold the rtnl_mutex. | ||
410 | * | ||
411 | * Returns 0 on success or a negative error code. | ||
412 | */ | ||
413 | int __rtnl_af_register(struct rtnl_af_ops *ops) | ||
414 | { | ||
415 | list_add_tail(&ops->list, &rtnl_af_ops); | ||
416 | return 0; | ||
417 | } | ||
418 | EXPORT_SYMBOL_GPL(__rtnl_af_register); | ||
419 | |||
420 | /** | ||
421 | * rtnl_af_register - Register rtnl_af_ops with rtnetlink. | 424 | * rtnl_af_register - Register rtnl_af_ops with rtnetlink. |
422 | * @ops: struct rtnl_af_ops * to register | 425 | * @ops: struct rtnl_af_ops * to register |
423 | * | 426 | * |
424 | * Returns 0 on success or a negative error code. | 427 | * Returns 0 on success or a negative error code. |
425 | */ | 428 | */ |
426 | int rtnl_af_register(struct rtnl_af_ops *ops) | 429 | void rtnl_af_register(struct rtnl_af_ops *ops) |
427 | { | 430 | { |
428 | int err; | ||
429 | |||
430 | rtnl_lock(); | 431 | rtnl_lock(); |
431 | err = __rtnl_af_register(ops); | 432 | list_add_tail(&ops->list, &rtnl_af_ops); |
432 | rtnl_unlock(); | 433 | rtnl_unlock(); |
433 | return err; | ||
434 | } | 434 | } |
435 | EXPORT_SYMBOL_GPL(rtnl_af_register); | 435 | EXPORT_SYMBOL_GPL(rtnl_af_register); |
436 | 436 | ||
@@ -477,40 +477,100 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev) | |||
477 | return size; | 477 | return size; |
478 | } | 478 | } |
479 | 479 | ||
480 | static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) | 480 | static bool rtnl_have_link_slave_info(const struct net_device *dev) |
481 | { | 481 | { |
482 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; | 482 | struct net_device *master_dev; |
483 | struct nlattr *linkinfo, *data; | ||
484 | int err = -EMSGSIZE; | ||
485 | 483 | ||
486 | linkinfo = nla_nest_start(skb, IFLA_LINKINFO); | 484 | master_dev = netdev_master_upper_dev_get((struct net_device *) dev); |
487 | if (linkinfo == NULL) | 485 | if (master_dev && master_dev->rtnl_link_ops) |
488 | goto out; | 486 | return true; |
487 | return false; | ||
488 | } | ||
489 | |||
490 | static int rtnl_link_slave_info_fill(struct sk_buff *skb, | ||
491 | const struct net_device *dev) | ||
492 | { | ||
493 | struct net_device *master_dev; | ||
494 | const struct rtnl_link_ops *ops; | ||
495 | struct nlattr *slave_data; | ||
496 | int err; | ||
497 | |||
498 | master_dev = netdev_master_upper_dev_get((struct net_device *) dev); | ||
499 | if (!master_dev) | ||
500 | return 0; | ||
501 | ops = master_dev->rtnl_link_ops; | ||
502 | if (!ops) | ||
503 | return 0; | ||
504 | if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) | ||
505 | return -EMSGSIZE; | ||
506 | if (ops->fill_slave_info) { | ||
507 | slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA); | ||
508 | if (!slave_data) | ||
509 | return -EMSGSIZE; | ||
510 | err = ops->fill_slave_info(skb, master_dev, dev); | ||
511 | if (err < 0) | ||
512 | goto err_cancel_slave_data; | ||
513 | nla_nest_end(skb, slave_data); | ||
514 | } | ||
515 | return 0; | ||
489 | 516 | ||
517 | err_cancel_slave_data: | ||
518 | nla_nest_cancel(skb, slave_data); | ||
519 | return err; | ||
520 | } | ||
521 | |||
522 | static int rtnl_link_info_fill(struct sk_buff *skb, | ||
523 | const struct net_device *dev) | ||
524 | { | ||
525 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; | ||
526 | struct nlattr *data; | ||
527 | int err; | ||
528 | |||
529 | if (!ops) | ||
530 | return 0; | ||
490 | if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) | 531 | if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) |
491 | goto err_cancel_link; | 532 | return -EMSGSIZE; |
492 | if (ops->fill_xstats) { | 533 | if (ops->fill_xstats) { |
493 | err = ops->fill_xstats(skb, dev); | 534 | err = ops->fill_xstats(skb, dev); |
494 | if (err < 0) | 535 | if (err < 0) |
495 | goto err_cancel_link; | 536 | return err; |
496 | } | 537 | } |
497 | if (ops->fill_info) { | 538 | if (ops->fill_info) { |
498 | data = nla_nest_start(skb, IFLA_INFO_DATA); | 539 | data = nla_nest_start(skb, IFLA_INFO_DATA); |
499 | if (data == NULL) { | 540 | if (data == NULL) |
500 | err = -EMSGSIZE; | 541 | return -EMSGSIZE; |
501 | goto err_cancel_link; | ||
502 | } | ||
503 | err = ops->fill_info(skb, dev); | 542 | err = ops->fill_info(skb, dev); |
504 | if (err < 0) | 543 | if (err < 0) |
505 | goto err_cancel_data; | 544 | goto err_cancel_data; |
506 | nla_nest_end(skb, data); | 545 | nla_nest_end(skb, data); |
507 | } | 546 | } |
508 | |||
509 | nla_nest_end(skb, linkinfo); | ||
510 | return 0; | 547 | return 0; |
511 | 548 | ||
512 | err_cancel_data: | 549 | err_cancel_data: |
513 | nla_nest_cancel(skb, data); | 550 | nla_nest_cancel(skb, data); |
551 | return err; | ||
552 | } | ||
553 | |||
554 | static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) | ||
555 | { | ||
556 | struct nlattr *linkinfo; | ||
557 | int err = -EMSGSIZE; | ||
558 | |||
559 | linkinfo = nla_nest_start(skb, IFLA_LINKINFO); | ||
560 | if (linkinfo == NULL) | ||
561 | goto out; | ||
562 | |||
563 | err = rtnl_link_info_fill(skb, dev); | ||
564 | if (err < 0) | ||
565 | goto err_cancel_link; | ||
566 | |||
567 | err = rtnl_link_slave_info_fill(skb, dev); | ||
568 | if (err < 0) | ||
569 | goto err_cancel_link; | ||
570 | |||
571 | nla_nest_end(skb, linkinfo); | ||
572 | return 0; | ||
573 | |||
514 | err_cancel_link: | 574 | err_cancel_link: |
515 | nla_nest_cancel(skb, linkinfo); | 575 | nla_nest_cancel(skb, linkinfo); |
516 | out: | 576 | out: |
@@ -1019,7 +1079,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
1019 | if (rtnl_port_fill(skb, dev)) | 1079 | if (rtnl_port_fill(skb, dev)) |
1020 | goto nla_put_failure; | 1080 | goto nla_put_failure; |
1021 | 1081 | ||
1022 | if (dev->rtnl_link_ops) { | 1082 | if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { |
1023 | if (rtnl_link_fill(skb, dev) < 0) | 1083 | if (rtnl_link_fill(skb, dev) < 0) |
1024 | goto nla_put_failure; | 1084 | goto nla_put_failure; |
1025 | } | 1085 | } |
@@ -1142,6 +1202,8 @@ EXPORT_SYMBOL(ifla_policy); | |||
1142 | static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { | 1202 | static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { |
1143 | [IFLA_INFO_KIND] = { .type = NLA_STRING }, | 1203 | [IFLA_INFO_KIND] = { .type = NLA_STRING }, |
1144 | [IFLA_INFO_DATA] = { .type = NLA_NESTED }, | 1204 | [IFLA_INFO_DATA] = { .type = NLA_NESTED }, |
1205 | [IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING }, | ||
1206 | [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, | ||
1145 | }; | 1207 | }; |
1146 | 1208 | ||
1147 | static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { | 1209 | static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { |
@@ -1729,7 +1791,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1729 | { | 1791 | { |
1730 | struct net *net = sock_net(skb->sk); | 1792 | struct net *net = sock_net(skb->sk); |
1731 | const struct rtnl_link_ops *ops; | 1793 | const struct rtnl_link_ops *ops; |
1794 | const struct rtnl_link_ops *m_ops = NULL; | ||
1732 | struct net_device *dev; | 1795 | struct net_device *dev; |
1796 | struct net_device *master_dev = NULL; | ||
1733 | struct ifinfomsg *ifm; | 1797 | struct ifinfomsg *ifm; |
1734 | char kind[MODULE_NAME_LEN]; | 1798 | char kind[MODULE_NAME_LEN]; |
1735 | char ifname[IFNAMSIZ]; | 1799 | char ifname[IFNAMSIZ]; |
@@ -1759,6 +1823,12 @@ replay: | |||
1759 | dev = NULL; | 1823 | dev = NULL; |
1760 | } | 1824 | } |
1761 | 1825 | ||
1826 | if (dev) { | ||
1827 | master_dev = netdev_master_upper_dev_get(dev); | ||
1828 | if (master_dev) | ||
1829 | m_ops = master_dev->rtnl_link_ops; | ||
1830 | } | ||
1831 | |||
1762 | err = validate_linkmsg(dev, tb); | 1832 | err = validate_linkmsg(dev, tb); |
1763 | if (err < 0) | 1833 | if (err < 0) |
1764 | return err; | 1834 | return err; |
@@ -1780,7 +1850,10 @@ replay: | |||
1780 | } | 1850 | } |
1781 | 1851 | ||
1782 | if (1) { | 1852 | if (1) { |
1783 | struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; | 1853 | struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; |
1854 | struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; | ||
1855 | struct nlattr **data = NULL; | ||
1856 | struct nlattr **slave_data = NULL; | ||
1784 | struct net *dest_net; | 1857 | struct net *dest_net; |
1785 | 1858 | ||
1786 | if (ops) { | 1859 | if (ops) { |
@@ -1799,6 +1872,24 @@ replay: | |||
1799 | } | 1872 | } |
1800 | } | 1873 | } |
1801 | 1874 | ||
1875 | if (m_ops) { | ||
1876 | if (m_ops->slave_maxtype && | ||
1877 | linkinfo[IFLA_INFO_SLAVE_DATA]) { | ||
1878 | err = nla_parse_nested(slave_attr, | ||
1879 | m_ops->slave_maxtype, | ||
1880 | linkinfo[IFLA_INFO_SLAVE_DATA], | ||
1881 | m_ops->slave_policy); | ||
1882 | if (err < 0) | ||
1883 | return err; | ||
1884 | slave_data = slave_attr; | ||
1885 | } | ||
1886 | if (m_ops->slave_validate) { | ||
1887 | err = m_ops->slave_validate(tb, slave_data); | ||
1888 | if (err < 0) | ||
1889 | return err; | ||
1890 | } | ||
1891 | } | ||
1892 | |||
1802 | if (dev) { | 1893 | if (dev) { |
1803 | int modified = 0; | 1894 | int modified = 0; |
1804 | 1895 | ||
@@ -1818,6 +1909,17 @@ replay: | |||
1818 | modified = 1; | 1909 | modified = 1; |
1819 | } | 1910 | } |
1820 | 1911 | ||
1912 | if (linkinfo[IFLA_INFO_SLAVE_DATA]) { | ||
1913 | if (!m_ops || !m_ops->slave_changelink) | ||
1914 | return -EOPNOTSUPP; | ||
1915 | |||
1916 | err = m_ops->slave_changelink(master_dev, dev, | ||
1917 | tb, slave_data); | ||
1918 | if (err < 0) | ||
1919 | return err; | ||
1920 | modified = 1; | ||
1921 | } | ||
1922 | |||
1821 | return do_setlink(dev, ifm, tb, ifname, modified); | 1923 | return do_setlink(dev, ifm, tb, ifname, modified); |
1822 | } | 1924 | } |
1823 | 1925 | ||
@@ -1861,16 +1963,21 @@ replay: | |||
1861 | 1963 | ||
1862 | dev->ifindex = ifm->ifi_index; | 1964 | dev->ifindex = ifm->ifi_index; |
1863 | 1965 | ||
1864 | if (ops->newlink) | 1966 | if (ops->newlink) { |
1865 | err = ops->newlink(net, dev, tb, data); | 1967 | err = ops->newlink(net, dev, tb, data); |
1866 | else | 1968 | /* Drivers should call free_netdev() in ->destructor |
1969 | * and unregister it on failure so that device could be | ||
1970 | * finally freed in rtnl_unlock. | ||
1971 | */ | ||
1972 | if (err < 0) | ||
1973 | goto out; | ||
1974 | } else { | ||
1867 | err = register_netdevice(dev); | 1975 | err = register_netdevice(dev); |
1868 | 1976 | if (err < 0) { | |
1869 | if (err < 0) { | 1977 | free_netdev(dev); |
1870 | free_netdev(dev); | 1978 | goto out; |
1871 | goto out; | 1979 | } |
1872 | } | 1980 | } |
1873 | |||
1874 | err = rtnl_configure_link(dev, ifm); | 1981 | err = rtnl_configure_link(dev, ifm); |
1875 | if (err < 0) | 1982 | if (err < 0) |
1876 | unregister_netdevice(dev); | 1983 | unregister_netdevice(dev); |
@@ -2014,12 +2121,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); | |||
2014 | static int nlmsg_populate_fdb_fill(struct sk_buff *skb, | 2121 | static int nlmsg_populate_fdb_fill(struct sk_buff *skb, |
2015 | struct net_device *dev, | 2122 | struct net_device *dev, |
2016 | u8 *addr, u32 pid, u32 seq, | 2123 | u8 *addr, u32 pid, u32 seq, |
2017 | int type, unsigned int flags) | 2124 | int type, unsigned int flags, |
2125 | int nlflags) | ||
2018 | { | 2126 | { |
2019 | struct nlmsghdr *nlh; | 2127 | struct nlmsghdr *nlh; |
2020 | struct ndmsg *ndm; | 2128 | struct ndmsg *ndm; |
2021 | 2129 | ||
2022 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); | 2130 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); |
2023 | if (!nlh) | 2131 | if (!nlh) |
2024 | return -EMSGSIZE; | 2132 | return -EMSGSIZE; |
2025 | 2133 | ||
@@ -2057,7 +2165,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) | |||
2057 | if (!skb) | 2165 | if (!skb) |
2058 | goto errout; | 2166 | goto errout; |
2059 | 2167 | ||
2060 | err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); | 2168 | err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); |
2061 | if (err < 0) { | 2169 | if (err < 0) { |
2062 | kfree_skb(skb); | 2170 | kfree_skb(skb); |
2063 | goto errout; | 2171 | goto errout; |
@@ -2282,7 +2390,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, | |||
2282 | 2390 | ||
2283 | err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, | 2391 | err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, |
2284 | portid, seq, | 2392 | portid, seq, |
2285 | RTM_NEWNEIGH, NTF_SELF); | 2393 | RTM_NEWNEIGH, NTF_SELF, |
2394 | NLM_F_MULTI); | ||
2286 | if (err < 0) | 2395 | if (err < 0) |
2287 | return err; | 2396 | return err; |
2288 | skip: | 2397 | skip: |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 06e72d3cdf60..869c7afe3b07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #include <linux/in.h> | 47 | #include <linux/in.h> |
48 | #include <linux/inet.h> | 48 | #include <linux/inet.h> |
49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
50 | #include <linux/tcp.h> | ||
51 | #include <linux/udp.h> | ||
50 | #include <linux/netdevice.h> | 52 | #include <linux/netdevice.h> |
51 | #ifdef CONFIG_NET_CLS_ACT | 53 | #ifdef CONFIG_NET_CLS_ACT |
52 | #include <net/pkt_sched.h> | 54 | #include <net/pkt_sched.h> |
@@ -65,6 +67,7 @@ | |||
65 | #include <net/dst.h> | 67 | #include <net/dst.h> |
66 | #include <net/sock.h> | 68 | #include <net/sock.h> |
67 | #include <net/checksum.h> | 69 | #include <net/checksum.h> |
70 | #include <net/ip6_checksum.h> | ||
68 | #include <net/xfrm.h> | 71 | #include <net/xfrm.h> |
69 | 72 | ||
70 | #include <asm/uaccess.h> | 73 | #include <asm/uaccess.h> |
@@ -74,36 +77,6 @@ | |||
74 | struct kmem_cache *skbuff_head_cache __read_mostly; | 77 | struct kmem_cache *skbuff_head_cache __read_mostly; |
75 | static struct kmem_cache *skbuff_fclone_cache __read_mostly; | 78 | static struct kmem_cache *skbuff_fclone_cache __read_mostly; |
76 | 79 | ||
77 | static void sock_pipe_buf_release(struct pipe_inode_info *pipe, | ||
78 | struct pipe_buffer *buf) | ||
79 | { | ||
80 | put_page(buf->page); | ||
81 | } | ||
82 | |||
83 | static void sock_pipe_buf_get(struct pipe_inode_info *pipe, | ||
84 | struct pipe_buffer *buf) | ||
85 | { | ||
86 | get_page(buf->page); | ||
87 | } | ||
88 | |||
89 | static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
90 | struct pipe_buffer *buf) | ||
91 | { | ||
92 | return 1; | ||
93 | } | ||
94 | |||
95 | |||
96 | /* Pipe buffer operations for a socket. */ | ||
97 | static const struct pipe_buf_operations sock_pipe_buf_ops = { | ||
98 | .can_merge = 0, | ||
99 | .map = generic_pipe_buf_map, | ||
100 | .unmap = generic_pipe_buf_unmap, | ||
101 | .confirm = generic_pipe_buf_confirm, | ||
102 | .release = sock_pipe_buf_release, | ||
103 | .steal = sock_pipe_buf_steal, | ||
104 | .get = sock_pipe_buf_get, | ||
105 | }; | ||
106 | |||
107 | /** | 80 | /** |
108 | * skb_panic - private function for out-of-line support | 81 | * skb_panic - private function for out-of-line support |
109 | * @skb: buffer | 82 | * @skb: buffer |
@@ -712,9 +685,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
712 | new->inner_network_header = old->inner_network_header; | 685 | new->inner_network_header = old->inner_network_header; |
713 | new->inner_mac_header = old->inner_mac_header; | 686 | new->inner_mac_header = old->inner_mac_header; |
714 | skb_dst_copy(new, old); | 687 | skb_dst_copy(new, old); |
715 | new->rxhash = old->rxhash; | 688 | skb_copy_hash(new, old); |
716 | new->ooo_okay = old->ooo_okay; | 689 | new->ooo_okay = old->ooo_okay; |
717 | new->l4_rxhash = old->l4_rxhash; | ||
718 | new->no_fcs = old->no_fcs; | 690 | new->no_fcs = old->no_fcs; |
719 | new->encapsulation = old->encapsulation; | 691 | new->encapsulation = old->encapsulation; |
720 | #ifdef CONFIG_XFRM | 692 | #ifdef CONFIG_XFRM |
@@ -735,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
735 | new->mark = old->mark; | 707 | new->mark = old->mark; |
736 | new->skb_iif = old->skb_iif; | 708 | new->skb_iif = old->skb_iif; |
737 | __nf_copy(new, old); | 709 | __nf_copy(new, old); |
738 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) | ||
739 | new->nf_trace = old->nf_trace; | ||
740 | #endif | ||
741 | #ifdef CONFIG_NET_SCHED | 710 | #ifdef CONFIG_NET_SCHED |
742 | new->tc_index = old->tc_index; | 711 | new->tc_index = old->tc_index; |
743 | #ifdef CONFIG_NET_CLS_ACT | 712 | #ifdef CONFIG_NET_CLS_ACT |
@@ -1830,7 +1799,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
1830 | .partial = partial, | 1799 | .partial = partial, |
1831 | .nr_pages_max = MAX_SKB_FRAGS, | 1800 | .nr_pages_max = MAX_SKB_FRAGS, |
1832 | .flags = flags, | 1801 | .flags = flags, |
1833 | .ops = &sock_pipe_buf_ops, | 1802 | .ops = &nosteal_pipe_buf_ops, |
1834 | .spd_release = sock_spd_release, | 1803 | .spd_release = sock_spd_release, |
1835 | }; | 1804 | }; |
1836 | struct sk_buff *frag_iter; | 1805 | struct sk_buff *frag_iter; |
@@ -2122,6 +2091,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, | |||
2122 | } | 2091 | } |
2123 | EXPORT_SYMBOL(skb_copy_and_csum_bits); | 2092 | EXPORT_SYMBOL(skb_copy_and_csum_bits); |
2124 | 2093 | ||
2094 | /** | ||
2095 | * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() | ||
2096 | * @from: source buffer | ||
2097 | * | ||
2098 | * Calculates the amount of linear headroom needed in the 'to' skb passed | ||
2099 | * into skb_zerocopy(). | ||
2100 | */ | ||
2101 | unsigned int | ||
2102 | skb_zerocopy_headlen(const struct sk_buff *from) | ||
2103 | { | ||
2104 | unsigned int hlen = 0; | ||
2105 | |||
2106 | if (!from->head_frag || | ||
2107 | skb_headlen(from) < L1_CACHE_BYTES || | ||
2108 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) | ||
2109 | hlen = skb_headlen(from); | ||
2110 | |||
2111 | if (skb_has_frag_list(from)) | ||
2112 | hlen = from->len; | ||
2113 | |||
2114 | return hlen; | ||
2115 | } | ||
2116 | EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); | ||
2117 | |||
2118 | /** | ||
2119 | * skb_zerocopy - Zero copy skb to skb | ||
2120 | * @to: destination buffer | ||
2121 | * @from: source buffer | ||
2122 | * @len: number of bytes to copy from source buffer | ||
2123 | * @hlen: size of linear headroom in destination buffer | ||
2124 | * | ||
2125 | * Copies up to `len` bytes from `from` to `to` by creating references | ||
2126 | * to the frags in the source buffer. | ||
2127 | * | ||
2128 | * The `hlen` as calculated by skb_zerocopy_headlen() specifies the | ||
2129 | * headroom in the `to` buffer. | ||
2130 | */ | ||
2131 | void | ||
2132 | skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) | ||
2133 | { | ||
2134 | int i, j = 0; | ||
2135 | int plen = 0; /* length of skb->head fragment */ | ||
2136 | struct page *page; | ||
2137 | unsigned int offset; | ||
2138 | |||
2139 | BUG_ON(!from->head_frag && !hlen); | ||
2140 | |||
2141 | /* dont bother with small payloads */ | ||
2142 | if (len <= skb_tailroom(to)) { | ||
2143 | skb_copy_bits(from, 0, skb_put(to, len), len); | ||
2144 | return; | ||
2145 | } | ||
2146 | |||
2147 | if (hlen) { | ||
2148 | skb_copy_bits(from, 0, skb_put(to, hlen), hlen); | ||
2149 | len -= hlen; | ||
2150 | } else { | ||
2151 | plen = min_t(int, skb_headlen(from), len); | ||
2152 | if (plen) { | ||
2153 | page = virt_to_head_page(from->head); | ||
2154 | offset = from->data - (unsigned char *)page_address(page); | ||
2155 | __skb_fill_page_desc(to, 0, page, offset, plen); | ||
2156 | get_page(page); | ||
2157 | j = 1; | ||
2158 | len -= plen; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2162 | to->truesize += len + plen; | ||
2163 | to->len += len + plen; | ||
2164 | to->data_len += len + plen; | ||
2165 | |||
2166 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { | ||
2167 | if (!len) | ||
2168 | break; | ||
2169 | skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; | ||
2170 | skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); | ||
2171 | len -= skb_shinfo(to)->frags[j].size; | ||
2172 | skb_frag_ref(to, j); | ||
2173 | j++; | ||
2174 | } | ||
2175 | skb_shinfo(to)->nr_frags = j; | ||
2176 | } | ||
2177 | EXPORT_SYMBOL_GPL(skb_zerocopy); | ||
2178 | |||
2125 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | 2179 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) |
2126 | { | 2180 | { |
2127 | __wsum csum; | 2181 | __wsum csum; |
@@ -2784,81 +2838,84 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |||
2784 | 2838 | ||
2785 | /** | 2839 | /** |
2786 | * skb_segment - Perform protocol segmentation on skb. | 2840 | * skb_segment - Perform protocol segmentation on skb. |
2787 | * @skb: buffer to segment | 2841 | * @head_skb: buffer to segment |
2788 | * @features: features for the output path (see dev->features) | 2842 | * @features: features for the output path (see dev->features) |
2789 | * | 2843 | * |
2790 | * This function performs segmentation on the given skb. It returns | 2844 | * This function performs segmentation on the given skb. It returns |
2791 | * a pointer to the first in a list of new skbs for the segments. | 2845 | * a pointer to the first in a list of new skbs for the segments. |
2792 | * In case of error it returns ERR_PTR(err). | 2846 | * In case of error it returns ERR_PTR(err). |
2793 | */ | 2847 | */ |
2794 | struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | 2848 | struct sk_buff *skb_segment(struct sk_buff *head_skb, |
2849 | netdev_features_t features) | ||
2795 | { | 2850 | { |
2796 | struct sk_buff *segs = NULL; | 2851 | struct sk_buff *segs = NULL; |
2797 | struct sk_buff *tail = NULL; | 2852 | struct sk_buff *tail = NULL; |
2798 | struct sk_buff *fskb = skb_shinfo(skb)->frag_list; | 2853 | struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; |
2799 | skb_frag_t *skb_frag = skb_shinfo(skb)->frags; | 2854 | skb_frag_t *frag = skb_shinfo(head_skb)->frags; |
2800 | unsigned int mss = skb_shinfo(skb)->gso_size; | 2855 | unsigned int mss = skb_shinfo(head_skb)->gso_size; |
2801 | unsigned int doffset = skb->data - skb_mac_header(skb); | 2856 | unsigned int doffset = head_skb->data - skb_mac_header(head_skb); |
2857 | struct sk_buff *frag_skb = head_skb; | ||
2802 | unsigned int offset = doffset; | 2858 | unsigned int offset = doffset; |
2803 | unsigned int tnl_hlen = skb_tnl_header_len(skb); | 2859 | unsigned int tnl_hlen = skb_tnl_header_len(head_skb); |
2804 | unsigned int headroom; | 2860 | unsigned int headroom; |
2805 | unsigned int len; | 2861 | unsigned int len; |
2806 | __be16 proto; | 2862 | __be16 proto; |
2807 | bool csum; | 2863 | bool csum; |
2808 | int sg = !!(features & NETIF_F_SG); | 2864 | int sg = !!(features & NETIF_F_SG); |
2809 | int nfrags = skb_shinfo(skb)->nr_frags; | 2865 | int nfrags = skb_shinfo(head_skb)->nr_frags; |
2810 | int err = -ENOMEM; | 2866 | int err = -ENOMEM; |
2811 | int i = 0; | 2867 | int i = 0; |
2812 | int pos; | 2868 | int pos; |
2813 | 2869 | ||
2814 | proto = skb_network_protocol(skb); | 2870 | proto = skb_network_protocol(head_skb); |
2815 | if (unlikely(!proto)) | 2871 | if (unlikely(!proto)) |
2816 | return ERR_PTR(-EINVAL); | 2872 | return ERR_PTR(-EINVAL); |
2817 | 2873 | ||
2818 | csum = !!can_checksum_protocol(features, proto); | 2874 | csum = !!can_checksum_protocol(features, proto); |
2819 | __skb_push(skb, doffset); | 2875 | __skb_push(head_skb, doffset); |
2820 | headroom = skb_headroom(skb); | 2876 | headroom = skb_headroom(head_skb); |
2821 | pos = skb_headlen(skb); | 2877 | pos = skb_headlen(head_skb); |
2822 | 2878 | ||
2823 | do { | 2879 | do { |
2824 | struct sk_buff *nskb; | 2880 | struct sk_buff *nskb; |
2825 | skb_frag_t *frag; | 2881 | skb_frag_t *nskb_frag; |
2826 | int hsize; | 2882 | int hsize; |
2827 | int size; | 2883 | int size; |
2828 | 2884 | ||
2829 | len = skb->len - offset; | 2885 | len = head_skb->len - offset; |
2830 | if (len > mss) | 2886 | if (len > mss) |
2831 | len = mss; | 2887 | len = mss; |
2832 | 2888 | ||
2833 | hsize = skb_headlen(skb) - offset; | 2889 | hsize = skb_headlen(head_skb) - offset; |
2834 | if (hsize < 0) | 2890 | if (hsize < 0) |
2835 | hsize = 0; | 2891 | hsize = 0; |
2836 | if (hsize > len || !sg) | 2892 | if (hsize > len || !sg) |
2837 | hsize = len; | 2893 | hsize = len; |
2838 | 2894 | ||
2839 | if (!hsize && i >= nfrags && skb_headlen(fskb) && | 2895 | if (!hsize && i >= nfrags && skb_headlen(list_skb) && |
2840 | (skb_headlen(fskb) == len || sg)) { | 2896 | (skb_headlen(list_skb) == len || sg)) { |
2841 | BUG_ON(skb_headlen(fskb) > len); | 2897 | BUG_ON(skb_headlen(list_skb) > len); |
2842 | 2898 | ||
2843 | i = 0; | 2899 | i = 0; |
2844 | nfrags = skb_shinfo(fskb)->nr_frags; | 2900 | nfrags = skb_shinfo(list_skb)->nr_frags; |
2845 | skb_frag = skb_shinfo(fskb)->frags; | 2901 | frag = skb_shinfo(list_skb)->frags; |
2846 | pos += skb_headlen(fskb); | 2902 | frag_skb = list_skb; |
2903 | pos += skb_headlen(list_skb); | ||
2847 | 2904 | ||
2848 | while (pos < offset + len) { | 2905 | while (pos < offset + len) { |
2849 | BUG_ON(i >= nfrags); | 2906 | BUG_ON(i >= nfrags); |
2850 | 2907 | ||
2851 | size = skb_frag_size(skb_frag); | 2908 | size = skb_frag_size(frag); |
2852 | if (pos + size > offset + len) | 2909 | if (pos + size > offset + len) |
2853 | break; | 2910 | break; |
2854 | 2911 | ||
2855 | i++; | 2912 | i++; |
2856 | pos += size; | 2913 | pos += size; |
2857 | skb_frag++; | 2914 | frag++; |
2858 | } | 2915 | } |
2859 | 2916 | ||
2860 | nskb = skb_clone(fskb, GFP_ATOMIC); | 2917 | nskb = skb_clone(list_skb, GFP_ATOMIC); |
2861 | fskb = fskb->next; | 2918 | list_skb = list_skb->next; |
2862 | 2919 | ||
2863 | if (unlikely(!nskb)) | 2920 | if (unlikely(!nskb)) |
2864 | goto err; | 2921 | goto err; |
@@ -2879,7 +2936,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2879 | __skb_push(nskb, doffset); | 2936 | __skb_push(nskb, doffset); |
2880 | } else { | 2937 | } else { |
2881 | nskb = __alloc_skb(hsize + doffset + headroom, | 2938 | nskb = __alloc_skb(hsize + doffset + headroom, |
2882 | GFP_ATOMIC, skb_alloc_rx_flag(skb), | 2939 | GFP_ATOMIC, skb_alloc_rx_flag(head_skb), |
2883 | NUMA_NO_NODE); | 2940 | NUMA_NO_NODE); |
2884 | 2941 | ||
2885 | if (unlikely(!nskb)) | 2942 | if (unlikely(!nskb)) |
@@ -2895,12 +2952,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2895 | segs = nskb; | 2952 | segs = nskb; |
2896 | tail = nskb; | 2953 | tail = nskb; |
2897 | 2954 | ||
2898 | __copy_skb_header(nskb, skb); | 2955 | __copy_skb_header(nskb, head_skb); |
2899 | nskb->mac_len = skb->mac_len; | 2956 | nskb->mac_len = head_skb->mac_len; |
2900 | 2957 | ||
2901 | skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); | 2958 | skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); |
2902 | 2959 | ||
2903 | skb_copy_from_linear_data_offset(skb, -tnl_hlen, | 2960 | skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, |
2904 | nskb->data - tnl_hlen, | 2961 | nskb->data - tnl_hlen, |
2905 | doffset + tnl_hlen); | 2962 | doffset + tnl_hlen); |
2906 | 2963 | ||
@@ -2909,30 +2966,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2909 | 2966 | ||
2910 | if (!sg) { | 2967 | if (!sg) { |
2911 | nskb->ip_summed = CHECKSUM_NONE; | 2968 | nskb->ip_summed = CHECKSUM_NONE; |
2912 | nskb->csum = skb_copy_and_csum_bits(skb, offset, | 2969 | nskb->csum = skb_copy_and_csum_bits(head_skb, offset, |
2913 | skb_put(nskb, len), | 2970 | skb_put(nskb, len), |
2914 | len, 0); | 2971 | len, 0); |
2915 | continue; | 2972 | continue; |
2916 | } | 2973 | } |
2917 | 2974 | ||
2918 | frag = skb_shinfo(nskb)->frags; | 2975 | nskb_frag = skb_shinfo(nskb)->frags; |
2919 | 2976 | ||
2920 | skb_copy_from_linear_data_offset(skb, offset, | 2977 | skb_copy_from_linear_data_offset(head_skb, offset, |
2921 | skb_put(nskb, hsize), hsize); | 2978 | skb_put(nskb, hsize), hsize); |
2922 | 2979 | ||
2923 | skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; | 2980 | skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & |
2981 | SKBTX_SHARED_FRAG; | ||
2924 | 2982 | ||
2925 | while (pos < offset + len) { | 2983 | while (pos < offset + len) { |
2926 | if (i >= nfrags) { | 2984 | if (i >= nfrags) { |
2927 | BUG_ON(skb_headlen(fskb)); | 2985 | BUG_ON(skb_headlen(list_skb)); |
2928 | 2986 | ||
2929 | i = 0; | 2987 | i = 0; |
2930 | nfrags = skb_shinfo(fskb)->nr_frags; | 2988 | nfrags = skb_shinfo(list_skb)->nr_frags; |
2931 | skb_frag = skb_shinfo(fskb)->frags; | 2989 | frag = skb_shinfo(list_skb)->frags; |
2990 | frag_skb = list_skb; | ||
2932 | 2991 | ||
2933 | BUG_ON(!nfrags); | 2992 | BUG_ON(!nfrags); |
2934 | 2993 | ||
2935 | fskb = fskb->next; | 2994 | list_skb = list_skb->next; |
2936 | } | 2995 | } |
2937 | 2996 | ||
2938 | if (unlikely(skb_shinfo(nskb)->nr_frags >= | 2997 | if (unlikely(skb_shinfo(nskb)->nr_frags >= |
@@ -2943,27 +3002,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2943 | goto err; | 3002 | goto err; |
2944 | } | 3003 | } |
2945 | 3004 | ||
2946 | *frag = *skb_frag; | 3005 | if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) |
2947 | __skb_frag_ref(frag); | 3006 | goto err; |
2948 | size = skb_frag_size(frag); | 3007 | |
3008 | *nskb_frag = *frag; | ||
3009 | __skb_frag_ref(nskb_frag); | ||
3010 | size = skb_frag_size(nskb_frag); | ||
2949 | 3011 | ||
2950 | if (pos < offset) { | 3012 | if (pos < offset) { |
2951 | frag->page_offset += offset - pos; | 3013 | nskb_frag->page_offset += offset - pos; |
2952 | skb_frag_size_sub(frag, offset - pos); | 3014 | skb_frag_size_sub(nskb_frag, offset - pos); |
2953 | } | 3015 | } |
2954 | 3016 | ||
2955 | skb_shinfo(nskb)->nr_frags++; | 3017 | skb_shinfo(nskb)->nr_frags++; |
2956 | 3018 | ||
2957 | if (pos + size <= offset + len) { | 3019 | if (pos + size <= offset + len) { |
2958 | i++; | 3020 | i++; |
2959 | skb_frag++; | 3021 | frag++; |
2960 | pos += size; | 3022 | pos += size; |
2961 | } else { | 3023 | } else { |
2962 | skb_frag_size_sub(frag, pos + size - (offset + len)); | 3024 | skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); |
2963 | goto skip_fraglist; | 3025 | goto skip_fraglist; |
2964 | } | 3026 | } |
2965 | 3027 | ||
2966 | frag++; | 3028 | nskb_frag++; |
2967 | } | 3029 | } |
2968 | 3030 | ||
2969 | skip_fraglist: | 3031 | skip_fraglist: |
@@ -2977,15 +3039,12 @@ perform_csum_check: | |||
2977 | nskb->len - doffset, 0); | 3039 | nskb->len - doffset, 0); |
2978 | nskb->ip_summed = CHECKSUM_NONE; | 3040 | nskb->ip_summed = CHECKSUM_NONE; |
2979 | } | 3041 | } |
2980 | } while ((offset += len) < skb->len); | 3042 | } while ((offset += len) < head_skb->len); |
2981 | 3043 | ||
2982 | return segs; | 3044 | return segs; |
2983 | 3045 | ||
2984 | err: | 3046 | err: |
2985 | while ((skb = segs)) { | 3047 | kfree_skb_list(segs); |
2986 | segs = skb->next; | ||
2987 | kfree_skb(skb); | ||
2988 | } | ||
2989 | return ERR_PTR(err); | 3048 | return ERR_PTR(err); |
2990 | } | 3049 | } |
2991 | EXPORT_SYMBOL_GPL(skb_segment); | 3050 | EXPORT_SYMBOL_GPL(skb_segment); |
@@ -3468,6 +3527,278 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) | |||
3468 | } | 3527 | } |
3469 | EXPORT_SYMBOL_GPL(skb_partial_csum_set); | 3528 | EXPORT_SYMBOL_GPL(skb_partial_csum_set); |
3470 | 3529 | ||
3530 | static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, | ||
3531 | unsigned int max) | ||
3532 | { | ||
3533 | if (skb_headlen(skb) >= len) | ||
3534 | return 0; | ||
3535 | |||
3536 | /* If we need to pullup then pullup to the max, so we | ||
3537 | * won't need to do it again. | ||
3538 | */ | ||
3539 | if (max > skb->len) | ||
3540 | max = skb->len; | ||
3541 | |||
3542 | if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) | ||
3543 | return -ENOMEM; | ||
3544 | |||
3545 | if (skb_headlen(skb) < len) | ||
3546 | return -EPROTO; | ||
3547 | |||
3548 | return 0; | ||
3549 | } | ||
3550 | |||
3551 | /* This value should be large enough to cover a tagged ethernet header plus | ||
3552 | * maximally sized IP and TCP or UDP headers. | ||
3553 | */ | ||
3554 | #define MAX_IP_HDR_LEN 128 | ||
3555 | |||
3556 | static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) | ||
3557 | { | ||
3558 | unsigned int off; | ||
3559 | bool fragment; | ||
3560 | int err; | ||
3561 | |||
3562 | fragment = false; | ||
3563 | |||
3564 | err = skb_maybe_pull_tail(skb, | ||
3565 | sizeof(struct iphdr), | ||
3566 | MAX_IP_HDR_LEN); | ||
3567 | if (err < 0) | ||
3568 | goto out; | ||
3569 | |||
3570 | if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) | ||
3571 | fragment = true; | ||
3572 | |||
3573 | off = ip_hdrlen(skb); | ||
3574 | |||
3575 | err = -EPROTO; | ||
3576 | |||
3577 | if (fragment) | ||
3578 | goto out; | ||
3579 | |||
3580 | switch (ip_hdr(skb)->protocol) { | ||
3581 | case IPPROTO_TCP: | ||
3582 | err = skb_maybe_pull_tail(skb, | ||
3583 | off + sizeof(struct tcphdr), | ||
3584 | MAX_IP_HDR_LEN); | ||
3585 | if (err < 0) | ||
3586 | goto out; | ||
3587 | |||
3588 | if (!skb_partial_csum_set(skb, off, | ||
3589 | offsetof(struct tcphdr, check))) { | ||
3590 | err = -EPROTO; | ||
3591 | goto out; | ||
3592 | } | ||
3593 | |||
3594 | if (recalculate) | ||
3595 | tcp_hdr(skb)->check = | ||
3596 | ~csum_tcpudp_magic(ip_hdr(skb)->saddr, | ||
3597 | ip_hdr(skb)->daddr, | ||
3598 | skb->len - off, | ||
3599 | IPPROTO_TCP, 0); | ||
3600 | break; | ||
3601 | case IPPROTO_UDP: | ||
3602 | err = skb_maybe_pull_tail(skb, | ||
3603 | off + sizeof(struct udphdr), | ||
3604 | MAX_IP_HDR_LEN); | ||
3605 | if (err < 0) | ||
3606 | goto out; | ||
3607 | |||
3608 | if (!skb_partial_csum_set(skb, off, | ||
3609 | offsetof(struct udphdr, check))) { | ||
3610 | err = -EPROTO; | ||
3611 | goto out; | ||
3612 | } | ||
3613 | |||
3614 | if (recalculate) | ||
3615 | udp_hdr(skb)->check = | ||
3616 | ~csum_tcpudp_magic(ip_hdr(skb)->saddr, | ||
3617 | ip_hdr(skb)->daddr, | ||
3618 | skb->len - off, | ||
3619 | IPPROTO_UDP, 0); | ||
3620 | break; | ||
3621 | default: | ||
3622 | goto out; | ||
3623 | } | ||
3624 | |||
3625 | err = 0; | ||
3626 | |||
3627 | out: | ||
3628 | return err; | ||
3629 | } | ||
3630 | |||
3631 | /* This value should be large enough to cover a tagged ethernet header plus | ||
3632 | * an IPv6 header, all options, and a maximal TCP or UDP header. | ||
3633 | */ | ||
3634 | #define MAX_IPV6_HDR_LEN 256 | ||
3635 | |||
3636 | #define OPT_HDR(type, skb, off) \ | ||
3637 | (type *)(skb_network_header(skb) + (off)) | ||
3638 | |||
3639 | static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) | ||
3640 | { | ||
3641 | int err; | ||
3642 | u8 nexthdr; | ||
3643 | unsigned int off; | ||
3644 | unsigned int len; | ||
3645 | bool fragment; | ||
3646 | bool done; | ||
3647 | |||
3648 | fragment = false; | ||
3649 | done = false; | ||
3650 | |||
3651 | off = sizeof(struct ipv6hdr); | ||
3652 | |||
3653 | err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); | ||
3654 | if (err < 0) | ||
3655 | goto out; | ||
3656 | |||
3657 | nexthdr = ipv6_hdr(skb)->nexthdr; | ||
3658 | |||
3659 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); | ||
3660 | while (off <= len && !done) { | ||
3661 | switch (nexthdr) { | ||
3662 | case IPPROTO_DSTOPTS: | ||
3663 | case IPPROTO_HOPOPTS: | ||
3664 | case IPPROTO_ROUTING: { | ||
3665 | struct ipv6_opt_hdr *hp; | ||
3666 | |||
3667 | err = skb_maybe_pull_tail(skb, | ||
3668 | off + | ||
3669 | sizeof(struct ipv6_opt_hdr), | ||
3670 | MAX_IPV6_HDR_LEN); | ||
3671 | if (err < 0) | ||
3672 | goto out; | ||
3673 | |||
3674 | hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); | ||
3675 | nexthdr = hp->nexthdr; | ||
3676 | off += ipv6_optlen(hp); | ||
3677 | break; | ||
3678 | } | ||
3679 | case IPPROTO_AH: { | ||
3680 | struct ip_auth_hdr *hp; | ||
3681 | |||
3682 | err = skb_maybe_pull_tail(skb, | ||
3683 | off + | ||
3684 | sizeof(struct ip_auth_hdr), | ||
3685 | MAX_IPV6_HDR_LEN); | ||
3686 | if (err < 0) | ||
3687 | goto out; | ||
3688 | |||
3689 | hp = OPT_HDR(struct ip_auth_hdr, skb, off); | ||
3690 | nexthdr = hp->nexthdr; | ||
3691 | off += ipv6_authlen(hp); | ||
3692 | break; | ||
3693 | } | ||
3694 | case IPPROTO_FRAGMENT: { | ||
3695 | struct frag_hdr *hp; | ||
3696 | |||
3697 | err = skb_maybe_pull_tail(skb, | ||
3698 | off + | ||
3699 | sizeof(struct frag_hdr), | ||
3700 | MAX_IPV6_HDR_LEN); | ||
3701 | if (err < 0) | ||
3702 | goto out; | ||
3703 | |||
3704 | hp = OPT_HDR(struct frag_hdr, skb, off); | ||
3705 | |||
3706 | if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) | ||
3707 | fragment = true; | ||
3708 | |||
3709 | nexthdr = hp->nexthdr; | ||
3710 | off += sizeof(struct frag_hdr); | ||
3711 | break; | ||
3712 | } | ||
3713 | default: | ||
3714 | done = true; | ||
3715 | break; | ||
3716 | } | ||
3717 | } | ||
3718 | |||
3719 | err = -EPROTO; | ||
3720 | |||
3721 | if (!done || fragment) | ||
3722 | goto out; | ||
3723 | |||
3724 | switch (nexthdr) { | ||
3725 | case IPPROTO_TCP: | ||
3726 | err = skb_maybe_pull_tail(skb, | ||
3727 | off + sizeof(struct tcphdr), | ||
3728 | MAX_IPV6_HDR_LEN); | ||
3729 | if (err < 0) | ||
3730 | goto out; | ||
3731 | |||
3732 | if (!skb_partial_csum_set(skb, off, | ||
3733 | offsetof(struct tcphdr, check))) { | ||
3734 | err = -EPROTO; | ||
3735 | goto out; | ||
3736 | } | ||
3737 | |||
3738 | if (recalculate) | ||
3739 | tcp_hdr(skb)->check = | ||
3740 | ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | ||
3741 | &ipv6_hdr(skb)->daddr, | ||
3742 | skb->len - off, | ||
3743 | IPPROTO_TCP, 0); | ||
3744 | break; | ||
3745 | case IPPROTO_UDP: | ||
3746 | err = skb_maybe_pull_tail(skb, | ||
3747 | off + sizeof(struct udphdr), | ||
3748 | MAX_IPV6_HDR_LEN); | ||
3749 | if (err < 0) | ||
3750 | goto out; | ||
3751 | |||
3752 | if (!skb_partial_csum_set(skb, off, | ||
3753 | offsetof(struct udphdr, check))) { | ||
3754 | err = -EPROTO; | ||
3755 | goto out; | ||
3756 | } | ||
3757 | |||
3758 | if (recalculate) | ||
3759 | udp_hdr(skb)->check = | ||
3760 | ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | ||
3761 | &ipv6_hdr(skb)->daddr, | ||
3762 | skb->len - off, | ||
3763 | IPPROTO_UDP, 0); | ||
3764 | break; | ||
3765 | default: | ||
3766 | goto out; | ||
3767 | } | ||
3768 | |||
3769 | err = 0; | ||
3770 | |||
3771 | out: | ||
3772 | return err; | ||
3773 | } | ||
3774 | |||
3775 | /** | ||
3776 | * skb_checksum_setup - set up partial checksum offset | ||
3777 | * @skb: the skb to set up | ||
3778 | * @recalculate: if true the pseudo-header checksum will be recalculated | ||
3779 | */ | ||
3780 | int skb_checksum_setup(struct sk_buff *skb, bool recalculate) | ||
3781 | { | ||
3782 | int err; | ||
3783 | |||
3784 | switch (skb->protocol) { | ||
3785 | case htons(ETH_P_IP): | ||
3786 | err = skb_checksum_setup_ip(skb, recalculate); | ||
3787 | break; | ||
3788 | |||
3789 | case htons(ETH_P_IPV6): | ||
3790 | err = skb_checksum_setup_ipv6(skb, recalculate); | ||
3791 | break; | ||
3792 | |||
3793 | default: | ||
3794 | err = -EPROTO; | ||
3795 | break; | ||
3796 | } | ||
3797 | |||
3798 | return err; | ||
3799 | } | ||
3800 | EXPORT_SYMBOL(skb_checksum_setup); | ||
3801 | |||
3471 | void __skb_warn_lro_forwarding(const struct sk_buff *skb) | 3802 | void __skb_warn_lro_forwarding(const struct sk_buff *skb) |
3472 | { | 3803 | { |
3473 | net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", | 3804 | net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", |
@@ -3592,3 +3923,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) | |||
3592 | nf_reset_trace(skb); | 3923 | nf_reset_trace(skb); |
3593 | } | 3924 | } |
3594 | EXPORT_SYMBOL_GPL(skb_scrub_packet); | 3925 | EXPORT_SYMBOL_GPL(skb_scrub_packet); |
3926 | |||
3927 | /** | ||
3928 | * skb_gso_transport_seglen - Return length of individual segments of a gso packet | ||
3929 | * | ||
3930 | * @skb: GSO skb | ||
3931 | * | ||
3932 | * skb_gso_transport_seglen is used to determine the real size of the | ||
3933 | * individual segments, including Layer4 headers (TCP/UDP). | ||
3934 | * | ||
3935 | * The MAC/L2 or network (IP, IPv6) headers are not accounted for. | ||
3936 | */ | ||
3937 | unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) | ||
3938 | { | ||
3939 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
3940 | unsigned int hdr_len; | ||
3941 | |||
3942 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | ||
3943 | hdr_len = tcp_hdrlen(skb); | ||
3944 | else | ||
3945 | hdr_len = sizeof(struct udphdr); | ||
3946 | return hdr_len + shinfo->gso_size; | ||
3947 | } | ||
3948 | EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 5393b4b719d7..c0fc6bdad1e3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -925,8 +925,8 @@ set_rcvbuf: | |||
925 | EXPORT_SYMBOL(sock_setsockopt); | 925 | EXPORT_SYMBOL(sock_setsockopt); |
926 | 926 | ||
927 | 927 | ||
928 | void cred_to_ucred(struct pid *pid, const struct cred *cred, | 928 | static void cred_to_ucred(struct pid *pid, const struct cred *cred, |
929 | struct ucred *ucred) | 929 | struct ucred *ucred) |
930 | { | 930 | { |
931 | ucred->pid = pid_vnr(pid); | 931 | ucred->pid = pid_vnr(pid); |
932 | ucred->uid = ucred->gid = -1; | 932 | ucred->uid = ucred->gid = -1; |
@@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, | |||
937 | ucred->gid = from_kgid_munged(current_ns, cred->egid); | 937 | ucred->gid = from_kgid_munged(current_ns, cred->egid); |
938 | } | 938 | } |
939 | } | 939 | } |
940 | EXPORT_SYMBOL_GPL(cred_to_ucred); | ||
941 | 940 | ||
942 | int sock_getsockopt(struct socket *sock, int level, int optname, | 941 | int sock_getsockopt(struct socket *sock, int level, int optname, |
943 | char __user *optval, int __user *optlen) | 942 | char __user *optval, int __user *optlen) |
@@ -1168,6 +1167,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
1168 | v.val = sock_flag(sk, SOCK_FILTER_LOCKED); | 1167 | v.val = sock_flag(sk, SOCK_FILTER_LOCKED); |
1169 | break; | 1168 | break; |
1170 | 1169 | ||
1170 | case SO_BPF_EXTENSIONS: | ||
1171 | v.val = bpf_tell_extensions(); | ||
1172 | break; | ||
1173 | |||
1171 | case SO_SELECT_ERR_QUEUE: | 1174 | case SO_SELECT_ERR_QUEUE: |
1172 | v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); | 1175 | v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); |
1173 | break; | 1176 | break; |
@@ -1308,19 +1311,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
1308 | module_put(owner); | 1311 | module_put(owner); |
1309 | } | 1312 | } |
1310 | 1313 | ||
1311 | #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) | 1314 | #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) |
1312 | void sock_update_classid(struct sock *sk) | ||
1313 | { | ||
1314 | u32 classid; | ||
1315 | |||
1316 | classid = task_cls_classid(current); | ||
1317 | if (classid != sk->sk_classid) | ||
1318 | sk->sk_classid = classid; | ||
1319 | } | ||
1320 | EXPORT_SYMBOL(sock_update_classid); | ||
1321 | #endif | ||
1322 | |||
1323 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | ||
1324 | void sock_update_netprioidx(struct sock *sk) | 1315 | void sock_update_netprioidx(struct sock *sk) |
1325 | { | 1316 | { |
1326 | if (in_interrupt()) | 1317 | if (in_interrupt()) |
@@ -1666,22 +1657,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, | |||
1666 | EXPORT_SYMBOL(sock_wmalloc); | 1657 | EXPORT_SYMBOL(sock_wmalloc); |
1667 | 1658 | ||
1668 | /* | 1659 | /* |
1669 | * Allocate a skb from the socket's receive buffer. | ||
1670 | */ | ||
1671 | struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, | ||
1672 | gfp_t priority) | ||
1673 | { | ||
1674 | if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { | ||
1675 | struct sk_buff *skb = alloc_skb(size, priority); | ||
1676 | if (skb) { | ||
1677 | skb_set_owner_r(skb, sk); | ||
1678 | return skb; | ||
1679 | } | ||
1680 | } | ||
1681 | return NULL; | ||
1682 | } | ||
1683 | |||
1684 | /* | ||
1685 | * Allocate a memory block from the socket's option memory buffer. | 1660 | * Allocate a memory block from the socket's option memory buffer. |
1686 | */ | 1661 | */ |
1687 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) | 1662 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) |
@@ -1800,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, | |||
1800 | while (order) { | 1775 | while (order) { |
1801 | if (npages >= 1 << order) { | 1776 | if (npages >= 1 << order) { |
1802 | page = alloc_pages(sk->sk_allocation | | 1777 | page = alloc_pages(sk->sk_allocation | |
1803 | __GFP_COMP | __GFP_NOWARN, | 1778 | __GFP_COMP | |
1779 | __GFP_NOWARN | | ||
1780 | __GFP_NORETRY, | ||
1804 | order); | 1781 | order); |
1805 | if (page) | 1782 | if (page) |
1806 | goto fill_page; | 1783 | goto fill_page; |
@@ -1865,14 +1842,12 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) | |||
1865 | put_page(pfrag->page); | 1842 | put_page(pfrag->page); |
1866 | } | 1843 | } |
1867 | 1844 | ||
1868 | /* We restrict high order allocations to users that can afford to wait */ | 1845 | order = SKB_FRAG_PAGE_ORDER; |
1869 | order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; | ||
1870 | |||
1871 | do { | 1846 | do { |
1872 | gfp_t gfp = prio; | 1847 | gfp_t gfp = prio; |
1873 | 1848 | ||
1874 | if (order) | 1849 | if (order) |
1875 | gfp |= __GFP_COMP | __GFP_NOWARN; | 1850 | gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; |
1876 | pfrag->page = alloc_pages(gfp, order); | 1851 | pfrag->page = alloc_pages(gfp, order); |
1877 | if (likely(pfrag->page)) { | 1852 | if (likely(pfrag->page)) { |
1878 | pfrag->offset = 0; | 1853 | pfrag->offset = 0; |
@@ -2382,10 +2357,13 @@ void release_sock(struct sock *sk) | |||
2382 | if (sk->sk_backlog.tail) | 2357 | if (sk->sk_backlog.tail) |
2383 | __release_sock(sk); | 2358 | __release_sock(sk); |
2384 | 2359 | ||
2360 | /* Warning : release_cb() might need to release sk ownership, | ||
2361 | * ie call sock_release_ownership(sk) before us. | ||
2362 | */ | ||
2385 | if (sk->sk_prot->release_cb) | 2363 | if (sk->sk_prot->release_cb) |
2386 | sk->sk_prot->release_cb(sk); | 2364 | sk->sk_prot->release_cb(sk); |
2387 | 2365 | ||
2388 | sk->sk_lock.owned = 0; | 2366 | sock_release_ownership(sk); |
2389 | if (waitqueue_active(&sk->sk_lock.wq)) | 2367 | if (waitqueue_active(&sk->sk_lock.wq)) |
2390 | wake_up(&sk->sk_lock.wq); | 2368 | wake_up(&sk->sk_lock.wq); |
2391 | spin_unlock_bh(&sk->sk_lock.slock); | 2369 | spin_unlock_bh(&sk->sk_lock.slock); |
diff --git a/net/core/stream.c b/net/core/stream.c index 512f0a24269b..301c05f26060 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -122,7 +122,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
122 | DEFINE_WAIT(wait); | 122 | DEFINE_WAIT(wait); |
123 | 123 | ||
124 | if (sk_stream_memory_free(sk)) | 124 | if (sk_stream_memory_free(sk)) |
125 | current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; | 125 | current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; |
126 | 126 | ||
127 | while (1) { | 127 | while (1) { |
128 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 128 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cca444190907..cf9cd13509a7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -122,7 +122,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, | |||
122 | synchronize_rcu(); | 122 | synchronize_rcu(); |
123 | kfree(cur); | 123 | kfree(cur); |
124 | } else if (!cur && cpumask_test_cpu(i, mask)) { | 124 | } else if (!cur && cpumask_test_cpu(i, mask)) { |
125 | cur = kzalloc(len, GFP_KERNEL); | 125 | cur = kzalloc_node(len, GFP_KERNEL, |
126 | cpu_to_node(i)); | ||
126 | if (!cur) { | 127 | if (!cur) { |
127 | /* not unwinding previous changes */ | 128 | /* not unwinding previous changes */ |
128 | ret = -ENOMEM; | 129 | ret = -ENOMEM; |