diff options
author | Cong Wang <xiyou.wangcong@gmail.com> | 2013-01-20 19:39:24 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-01-21 14:26:17 -0500 |
commit | 441d9d327f1e770f5aa76fd91735851ac6e1e236 (patch) | |
tree | f7a0a8d206d5745fd196bc9a5c98c2f8d67086ac | |
parent | cd5f20461de8e3e9b04932e4ba3e37afe39b8898 (diff) |
net: move rx and tx hash functions to net/core/flow_dissector.c
__skb_tx_hash() and __skb_get_rxhash() are all for calculating hash
value based by some fields in skb, mostly used for selecting queues
by device drivers.
Meanwhile, net/core/dev.c is bloating.
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/core/dev.c | 175 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 173 |
2 files changed, 173 insertions, 175 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index b6d2b32933ba..c69cd8721b28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -134,7 +134,6 @@ | |||
134 | #include <linux/cpu_rmap.h> | 134 | #include <linux/cpu_rmap.h> |
135 | #include <linux/net_tstamp.h> | 135 | #include <linux/net_tstamp.h> |
136 | #include <linux/static_key.h> | 136 | #include <linux/static_key.h> |
137 | #include <net/flow_keys.h> | ||
138 | 137 | ||
139 | #include "net-sysfs.h" | 138 | #include "net-sysfs.h" |
140 | 139 | ||
@@ -2636,136 +2635,6 @@ out: | |||
2636 | return rc; | 2635 | return rc; |
2637 | } | 2636 | } |
2638 | 2637 | ||
2639 | static u32 hashrnd __read_mostly; | ||
2640 | |||
2641 | /* | ||
2642 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | ||
2643 | * to be used as a distribution range. | ||
2644 | */ | ||
2645 | u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | ||
2646 | unsigned int num_tx_queues) | ||
2647 | { | ||
2648 | u32 hash; | ||
2649 | u16 qoffset = 0; | ||
2650 | u16 qcount = num_tx_queues; | ||
2651 | |||
2652 | if (skb_rx_queue_recorded(skb)) { | ||
2653 | hash = skb_get_rx_queue(skb); | ||
2654 | while (unlikely(hash >= num_tx_queues)) | ||
2655 | hash -= num_tx_queues; | ||
2656 | return hash; | ||
2657 | } | ||
2658 | |||
2659 | if (dev->num_tc) { | ||
2660 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2661 | qoffset = dev->tc_to_txq[tc].offset; | ||
2662 | qcount = dev->tc_to_txq[tc].count; | ||
2663 | } | ||
2664 | |||
2665 | if (skb->sk && skb->sk->sk_hash) | ||
2666 | hash = skb->sk->sk_hash; | ||
2667 | else | ||
2668 | hash = (__force u16) skb->protocol; | ||
2669 | hash = jhash_1word(hash, hashrnd); | ||
2670 | |||
2671 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; | ||
2672 | } | ||
2673 | EXPORT_SYMBOL(__skb_tx_hash); | ||
2674 | |||
2675 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
2676 | { | ||
2677 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
2678 | net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", | ||
2679 | dev->name, queue_index, | ||
2680 | dev->real_num_tx_queues); | ||
2681 | return 0; | ||
2682 | } | ||
2683 | return queue_index; | ||
2684 | } | ||
2685 | |||
2686 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | ||
2687 | { | ||
2688 | #ifdef CONFIG_XPS | ||
2689 | struct xps_dev_maps *dev_maps; | ||
2690 | struct xps_map *map; | ||
2691 | int queue_index = -1; | ||
2692 | |||
2693 | rcu_read_lock(); | ||
2694 | dev_maps = rcu_dereference(dev->xps_maps); | ||
2695 | if (dev_maps) { | ||
2696 | map = rcu_dereference( | ||
2697 | dev_maps->cpu_map[raw_smp_processor_id()]); | ||
2698 | if (map) { | ||
2699 | if (map->len == 1) | ||
2700 | queue_index = map->queues[0]; | ||
2701 | else { | ||
2702 | u32 hash; | ||
2703 | if (skb->sk && skb->sk->sk_hash) | ||
2704 | hash = skb->sk->sk_hash; | ||
2705 | else | ||
2706 | hash = (__force u16) skb->protocol ^ | ||
2707 | skb->rxhash; | ||
2708 | hash = jhash_1word(hash, hashrnd); | ||
2709 | queue_index = map->queues[ | ||
2710 | ((u64)hash * map->len) >> 32]; | ||
2711 | } | ||
2712 | if (unlikely(queue_index >= dev->real_num_tx_queues)) | ||
2713 | queue_index = -1; | ||
2714 | } | ||
2715 | } | ||
2716 | rcu_read_unlock(); | ||
2717 | |||
2718 | return queue_index; | ||
2719 | #else | ||
2720 | return -1; | ||
2721 | #endif | ||
2722 | } | ||
2723 | |||
2724 | u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) | ||
2725 | { | ||
2726 | struct sock *sk = skb->sk; | ||
2727 | int queue_index = sk_tx_queue_get(sk); | ||
2728 | |||
2729 | if (queue_index < 0 || skb->ooo_okay || | ||
2730 | queue_index >= dev->real_num_tx_queues) { | ||
2731 | int new_index = get_xps_queue(dev, skb); | ||
2732 | if (new_index < 0) | ||
2733 | new_index = skb_tx_hash(dev, skb); | ||
2734 | |||
2735 | if (queue_index != new_index && sk) { | ||
2736 | struct dst_entry *dst = | ||
2737 | rcu_dereference_check(sk->sk_dst_cache, 1); | ||
2738 | |||
2739 | if (dst && skb_dst(skb) == dst) | ||
2740 | sk_tx_queue_set(sk, queue_index); | ||
2741 | |||
2742 | } | ||
2743 | |||
2744 | queue_index = new_index; | ||
2745 | } | ||
2746 | |||
2747 | return queue_index; | ||
2748 | } | ||
2749 | EXPORT_SYMBOL(__netdev_pick_tx); | ||
2750 | |||
2751 | struct netdev_queue *netdev_pick_tx(struct net_device *dev, | ||
2752 | struct sk_buff *skb) | ||
2753 | { | ||
2754 | int queue_index = 0; | ||
2755 | |||
2756 | if (dev->real_num_tx_queues != 1) { | ||
2757 | const struct net_device_ops *ops = dev->netdev_ops; | ||
2758 | if (ops->ndo_select_queue) | ||
2759 | queue_index = ops->ndo_select_queue(dev, skb); | ||
2760 | else | ||
2761 | queue_index = __netdev_pick_tx(dev, skb); | ||
2762 | queue_index = dev_cap_txqueue(dev, queue_index); | ||
2763 | } | ||
2764 | |||
2765 | skb_set_queue_mapping(skb, queue_index); | ||
2766 | return netdev_get_tx_queue(dev, queue_index); | ||
2767 | } | ||
2768 | |||
2769 | static void qdisc_pkt_len_init(struct sk_buff *skb) | 2638 | static void qdisc_pkt_len_init(struct sk_buff *skb) |
2770 | { | 2639 | { |
2771 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | 2640 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
@@ -3015,41 +2884,6 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
3015 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2884 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
3016 | } | 2885 | } |
3017 | 2886 | ||
3018 | /* | ||
3019 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses | ||
3020 | * and src/dst port numbers. Sets rxhash in skb to non-zero hash value | ||
3021 | * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb | ||
3022 | * if hash is a canonical 4-tuple hash over transport ports. | ||
3023 | */ | ||
3024 | void __skb_get_rxhash(struct sk_buff *skb) | ||
3025 | { | ||
3026 | struct flow_keys keys; | ||
3027 | u32 hash; | ||
3028 | |||
3029 | if (!skb_flow_dissect(skb, &keys)) | ||
3030 | return; | ||
3031 | |||
3032 | if (keys.ports) | ||
3033 | skb->l4_rxhash = 1; | ||
3034 | |||
3035 | /* get a consistent hash (same value on both flow directions) */ | ||
3036 | if (((__force u32)keys.dst < (__force u32)keys.src) || | ||
3037 | (((__force u32)keys.dst == (__force u32)keys.src) && | ||
3038 | ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { | ||
3039 | swap(keys.dst, keys.src); | ||
3040 | swap(keys.port16[0], keys.port16[1]); | ||
3041 | } | ||
3042 | |||
3043 | hash = jhash_3words((__force u32)keys.dst, | ||
3044 | (__force u32)keys.src, | ||
3045 | (__force u32)keys.ports, hashrnd); | ||
3046 | if (!hash) | ||
3047 | hash = 1; | ||
3048 | |||
3049 | skb->rxhash = hash; | ||
3050 | } | ||
3051 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
3052 | |||
3053 | #ifdef CONFIG_RPS | 2887 | #ifdef CONFIG_RPS |
3054 | 2888 | ||
3055 | /* One global table that all flow-based protocols share. */ | 2889 | /* One global table that all flow-based protocols share. */ |
@@ -7308,12 +7142,3 @@ out: | |||
7308 | } | 7142 | } |
7309 | 7143 | ||
7310 | subsys_initcall(net_dev_init); | 7144 | subsys_initcall(net_dev_init); |
7311 | |||
7312 | static int __init initialize_hashrnd(void) | ||
7313 | { | ||
7314 | get_random_bytes(&hashrnd, sizeof(hashrnd)); | ||
7315 | return 0; | ||
7316 | } | ||
7317 | |||
7318 | late_initcall_sync(initialize_hashrnd); | ||
7319 | |||
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 466820b6e344..9d4c7201400d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -143,3 +143,176 @@ ipv6: | |||
143 | return true; | 143 | return true; |
144 | } | 144 | } |
145 | EXPORT_SYMBOL(skb_flow_dissect); | 145 | EXPORT_SYMBOL(skb_flow_dissect); |
146 | |||
147 | static u32 hashrnd __read_mostly; | ||
148 | |||
149 | /* | ||
150 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses | ||
151 | * and src/dst port numbers. Sets rxhash in skb to non-zero hash value | ||
152 | * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb | ||
153 | * if hash is a canonical 4-tuple hash over transport ports. | ||
154 | */ | ||
155 | void __skb_get_rxhash(struct sk_buff *skb) | ||
156 | { | ||
157 | struct flow_keys keys; | ||
158 | u32 hash; | ||
159 | |||
160 | if (!skb_flow_dissect(skb, &keys)) | ||
161 | return; | ||
162 | |||
163 | if (keys.ports) | ||
164 | skb->l4_rxhash = 1; | ||
165 | |||
166 | /* get a consistent hash (same value on both flow directions) */ | ||
167 | if (((__force u32)keys.dst < (__force u32)keys.src) || | ||
168 | (((__force u32)keys.dst == (__force u32)keys.src) && | ||
169 | ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { | ||
170 | swap(keys.dst, keys.src); | ||
171 | swap(keys.port16[0], keys.port16[1]); | ||
172 | } | ||
173 | |||
174 | hash = jhash_3words((__force u32)keys.dst, | ||
175 | (__force u32)keys.src, | ||
176 | (__force u32)keys.ports, hashrnd); | ||
177 | if (!hash) | ||
178 | hash = 1; | ||
179 | |||
180 | skb->rxhash = hash; | ||
181 | } | ||
182 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
183 | |||
184 | /* | ||
185 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | ||
186 | * to be used as a distribution range. | ||
187 | */ | ||
188 | u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | ||
189 | unsigned int num_tx_queues) | ||
190 | { | ||
191 | u32 hash; | ||
192 | u16 qoffset = 0; | ||
193 | u16 qcount = num_tx_queues; | ||
194 | |||
195 | if (skb_rx_queue_recorded(skb)) { | ||
196 | hash = skb_get_rx_queue(skb); | ||
197 | while (unlikely(hash >= num_tx_queues)) | ||
198 | hash -= num_tx_queues; | ||
199 | return hash; | ||
200 | } | ||
201 | |||
202 | if (dev->num_tc) { | ||
203 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
204 | qoffset = dev->tc_to_txq[tc].offset; | ||
205 | qcount = dev->tc_to_txq[tc].count; | ||
206 | } | ||
207 | |||
208 | if (skb->sk && skb->sk->sk_hash) | ||
209 | hash = skb->sk->sk_hash; | ||
210 | else | ||
211 | hash = (__force u16) skb->protocol; | ||
212 | hash = jhash_1word(hash, hashrnd); | ||
213 | |||
214 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; | ||
215 | } | ||
216 | EXPORT_SYMBOL(__skb_tx_hash); | ||
217 | |||
218 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
219 | { | ||
220 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
221 | net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", | ||
222 | dev->name, queue_index, | ||
223 | dev->real_num_tx_queues); | ||
224 | return 0; | ||
225 | } | ||
226 | return queue_index; | ||
227 | } | ||
228 | |||
229 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | ||
230 | { | ||
231 | #ifdef CONFIG_XPS | ||
232 | struct xps_dev_maps *dev_maps; | ||
233 | struct xps_map *map; | ||
234 | int queue_index = -1; | ||
235 | |||
236 | rcu_read_lock(); | ||
237 | dev_maps = rcu_dereference(dev->xps_maps); | ||
238 | if (dev_maps) { | ||
239 | map = rcu_dereference( | ||
240 | dev_maps->cpu_map[raw_smp_processor_id()]); | ||
241 | if (map) { | ||
242 | if (map->len == 1) | ||
243 | queue_index = map->queues[0]; | ||
244 | else { | ||
245 | u32 hash; | ||
246 | if (skb->sk && skb->sk->sk_hash) | ||
247 | hash = skb->sk->sk_hash; | ||
248 | else | ||
249 | hash = (__force u16) skb->protocol ^ | ||
250 | skb->rxhash; | ||
251 | hash = jhash_1word(hash, hashrnd); | ||
252 | queue_index = map->queues[ | ||
253 | ((u64)hash * map->len) >> 32]; | ||
254 | } | ||
255 | if (unlikely(queue_index >= dev->real_num_tx_queues)) | ||
256 | queue_index = -1; | ||
257 | } | ||
258 | } | ||
259 | rcu_read_unlock(); | ||
260 | |||
261 | return queue_index; | ||
262 | #else | ||
263 | return -1; | ||
264 | #endif | ||
265 | } | ||
266 | |||
267 | u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) | ||
268 | { | ||
269 | struct sock *sk = skb->sk; | ||
270 | int queue_index = sk_tx_queue_get(sk); | ||
271 | |||
272 | if (queue_index < 0 || skb->ooo_okay || | ||
273 | queue_index >= dev->real_num_tx_queues) { | ||
274 | int new_index = get_xps_queue(dev, skb); | ||
275 | if (new_index < 0) | ||
276 | new_index = skb_tx_hash(dev, skb); | ||
277 | |||
278 | if (queue_index != new_index && sk) { | ||
279 | struct dst_entry *dst = | ||
280 | rcu_dereference_check(sk->sk_dst_cache, 1); | ||
281 | |||
282 | if (dst && skb_dst(skb) == dst) | ||
283 | sk_tx_queue_set(sk, queue_index); | ||
284 | |||
285 | } | ||
286 | |||
287 | queue_index = new_index; | ||
288 | } | ||
289 | |||
290 | return queue_index; | ||
291 | } | ||
292 | EXPORT_SYMBOL(__netdev_pick_tx); | ||
293 | |||
294 | struct netdev_queue *netdev_pick_tx(struct net_device *dev, | ||
295 | struct sk_buff *skb) | ||
296 | { | ||
297 | int queue_index = 0; | ||
298 | |||
299 | if (dev->real_num_tx_queues != 1) { | ||
300 | const struct net_device_ops *ops = dev->netdev_ops; | ||
301 | if (ops->ndo_select_queue) | ||
302 | queue_index = ops->ndo_select_queue(dev, skb); | ||
303 | else | ||
304 | queue_index = __netdev_pick_tx(dev, skb); | ||
305 | queue_index = dev_cap_txqueue(dev, queue_index); | ||
306 | } | ||
307 | |||
308 | skb_set_queue_mapping(skb, queue_index); | ||
309 | return netdev_get_tx_queue(dev, queue_index); | ||
310 | } | ||
311 | |||
312 | static int __init initialize_hashrnd(void) | ||
313 | { | ||
314 | get_random_bytes(&hashrnd, sizeof(hashrnd)); | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | late_initcall_sync(initialize_hashrnd); | ||