diff options
| -rw-r--r-- | drivers/net/tun.c | 5 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 34 | ||||
| -rw-r--r-- | include/net/sock.h | 24 | ||||
| -rw-r--r-- | net/core/dev.c | 48 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 2 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 2 |
6 files changed, 47 insertions, 68 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ad7d3d5f3ee5..857dca47bf80 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
| @@ -256,7 +256,6 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) | |||
| 256 | { | 256 | { |
| 257 | tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", | 257 | tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", |
| 258 | e->rxhash, e->queue_index); | 258 | e->rxhash, e->queue_index); |
| 259 | sock_rps_reset_flow_hash(e->rps_rxhash); | ||
| 260 | hlist_del_rcu(&e->hash_link); | 259 | hlist_del_rcu(&e->hash_link); |
| 261 | kfree_rcu(e, rcu); | 260 | kfree_rcu(e, rcu); |
| 262 | --tun->flow_count; | 261 | --tun->flow_count; |
| @@ -373,10 +372,8 @@ unlock: | |||
| 373 | */ | 372 | */ |
| 374 | static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) | 373 | static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) |
| 375 | { | 374 | { |
| 376 | if (unlikely(e->rps_rxhash != hash)) { | 375 | if (unlikely(e->rps_rxhash != hash)) |
| 377 | sock_rps_reset_flow_hash(e->rps_rxhash); | ||
| 378 | e->rps_rxhash = hash; | 376 | e->rps_rxhash = hash; |
| 379 | } | ||
| 380 | } | 377 | } |
| 381 | 378 | ||
| 382 | /* We try to identify a flow through its rxhash first. The reason that | 379 | /* We try to identify a flow through its rxhash first. The reason that |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce784d5018e0..ab3b7cef4638 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
| @@ -644,39 +644,39 @@ struct rps_dev_flow_table { | |||
| 644 | /* | 644 | /* |
| 645 | * The rps_sock_flow_table contains mappings of flows to the last CPU | 645 | * The rps_sock_flow_table contains mappings of flows to the last CPU |
| 646 | * on which they were processed by the application (set in recvmsg). | 646 | * on which they were processed by the application (set in recvmsg). |
| 647 | * Each entry is a 32bit value. Upper part is the high order bits | ||
| 648 | * of flow hash, lower part is cpu number. | ||
| 649 | * rps_cpu_mask is used to partition the space, depending on number of | ||
| 650 | * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 | ||
| 651 | * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, | ||
| 652 | * meaning we use 32-6=26 bits for the hash. | ||
| 647 | */ | 653 | */ |
| 648 | struct rps_sock_flow_table { | 654 | struct rps_sock_flow_table { |
| 649 | unsigned int mask; | 655 | u32 mask; |
| 650 | u16 ents[0]; | 656 | u32 ents[0]; |
| 651 | }; | 657 | }; |
| 652 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ | 658 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) |
| 653 | ((_num) * sizeof(u16))) | ||
| 654 | 659 | ||
| 655 | #define RPS_NO_CPU 0xffff | 660 | #define RPS_NO_CPU 0xffff |
| 656 | 661 | ||
| 662 | extern u32 rps_cpu_mask; | ||
| 663 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; | ||
| 664 | |||
| 657 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, | 665 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, |
| 658 | u32 hash) | 666 | u32 hash) |
| 659 | { | 667 | { |
| 660 | if (table && hash) { | 668 | if (table && hash) { |
| 661 | unsigned int cpu, index = hash & table->mask; | 669 | unsigned int index = hash & table->mask; |
| 670 | u32 val = hash & ~rps_cpu_mask; | ||
| 662 | 671 | ||
| 663 | /* We only give a hint, preemption can change cpu under us */ | 672 | /* We only give a hint, preemption can change cpu under us */ |
| 664 | cpu = raw_smp_processor_id(); | 673 | val |= raw_smp_processor_id(); |
| 665 | 674 | ||
| 666 | if (table->ents[index] != cpu) | 675 | if (table->ents[index] != val) |
| 667 | table->ents[index] = cpu; | 676 | table->ents[index] = val; |
| 668 | } | 677 | } |
| 669 | } | 678 | } |
| 670 | 679 | ||
| 671 | static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, | ||
| 672 | u32 hash) | ||
| 673 | { | ||
| 674 | if (table && hash) | ||
| 675 | table->ents[hash & table->mask] = RPS_NO_CPU; | ||
| 676 | } | ||
| 677 | |||
| 678 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; | ||
| 679 | |||
| 680 | #ifdef CONFIG_RFS_ACCEL | 680 | #ifdef CONFIG_RFS_ACCEL |
| 681 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, | 681 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, |
| 682 | u16 filter_id); | 682 | u16 filter_id); |
diff --git a/include/net/sock.h b/include/net/sock.h index d28b8fededd6..e13824570b0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -857,18 +857,6 @@ static inline void sock_rps_record_flow_hash(__u32 hash) | |||
| 857 | #endif | 857 | #endif |
| 858 | } | 858 | } |
| 859 | 859 | ||
| 860 | static inline void sock_rps_reset_flow_hash(__u32 hash) | ||
| 861 | { | ||
| 862 | #ifdef CONFIG_RPS | ||
| 863 | struct rps_sock_flow_table *sock_flow_table; | ||
| 864 | |||
| 865 | rcu_read_lock(); | ||
| 866 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
| 867 | rps_reset_sock_flow(sock_flow_table, hash); | ||
| 868 | rcu_read_unlock(); | ||
| 869 | #endif | ||
| 870 | } | ||
| 871 | |||
| 872 | static inline void sock_rps_record_flow(const struct sock *sk) | 860 | static inline void sock_rps_record_flow(const struct sock *sk) |
| 873 | { | 861 | { |
| 874 | #ifdef CONFIG_RPS | 862 | #ifdef CONFIG_RPS |
| @@ -876,28 +864,18 @@ static inline void sock_rps_record_flow(const struct sock *sk) | |||
| 876 | #endif | 864 | #endif |
| 877 | } | 865 | } |
| 878 | 866 | ||
| 879 | static inline void sock_rps_reset_flow(const struct sock *sk) | ||
| 880 | { | ||
| 881 | #ifdef CONFIG_RPS | ||
| 882 | sock_rps_reset_flow_hash(sk->sk_rxhash); | ||
| 883 | #endif | ||
| 884 | } | ||
| 885 | |||
| 886 | static inline void sock_rps_save_rxhash(struct sock *sk, | 867 | static inline void sock_rps_save_rxhash(struct sock *sk, |
| 887 | const struct sk_buff *skb) | 868 | const struct sk_buff *skb) |
| 888 | { | 869 | { |
| 889 | #ifdef CONFIG_RPS | 870 | #ifdef CONFIG_RPS |
| 890 | if (unlikely(sk->sk_rxhash != skb->hash)) { | 871 | if (unlikely(sk->sk_rxhash != skb->hash)) |
| 891 | sock_rps_reset_flow(sk); | ||
| 892 | sk->sk_rxhash = skb->hash; | 872 | sk->sk_rxhash = skb->hash; |
| 893 | } | ||
| 894 | #endif | 873 | #endif |
| 895 | } | 874 | } |
| 896 | 875 | ||
| 897 | static inline void sock_rps_reset_rxhash(struct sock *sk) | 876 | static inline void sock_rps_reset_rxhash(struct sock *sk) |
| 898 | { | 877 | { |
| 899 | #ifdef CONFIG_RPS | 878 | #ifdef CONFIG_RPS |
| 900 | sock_rps_reset_flow(sk); | ||
| 901 | sk->sk_rxhash = 0; | 879 | sk->sk_rxhash = 0; |
| 902 | #endif | 880 | #endif |
| 903 | } | 881 | } |
diff --git a/net/core/dev.c b/net/core/dev.c index a3a96ffc67f4..8be38675e1a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -3030,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
| 3030 | /* One global table that all flow-based protocols share. */ | 3030 | /* One global table that all flow-based protocols share. */ |
| 3031 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 3031 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
| 3032 | EXPORT_SYMBOL(rps_sock_flow_table); | 3032 | EXPORT_SYMBOL(rps_sock_flow_table); |
| 3033 | u32 rps_cpu_mask __read_mostly; | ||
| 3034 | EXPORT_SYMBOL(rps_cpu_mask); | ||
| 3033 | 3035 | ||
| 3034 | struct static_key rps_needed __read_mostly; | 3036 | struct static_key rps_needed __read_mostly; |
| 3035 | 3037 | ||
| @@ -3086,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
| 3086 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 3088 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
| 3087 | struct rps_dev_flow **rflowp) | 3089 | struct rps_dev_flow **rflowp) |
| 3088 | { | 3090 | { |
| 3089 | struct netdev_rx_queue *rxqueue; | 3091 | const struct rps_sock_flow_table *sock_flow_table; |
| 3090 | struct rps_map *map; | 3092 | struct netdev_rx_queue *rxqueue = dev->_rx; |
| 3091 | struct rps_dev_flow_table *flow_table; | 3093 | struct rps_dev_flow_table *flow_table; |
| 3092 | struct rps_sock_flow_table *sock_flow_table; | 3094 | struct rps_map *map; |
| 3093 | int cpu = -1; | 3095 | int cpu = -1; |
| 3094 | u16 tcpu; | 3096 | u32 tcpu; |
| 3095 | u32 hash; | 3097 | u32 hash; |
| 3096 | 3098 | ||
| 3097 | if (skb_rx_queue_recorded(skb)) { | 3099 | if (skb_rx_queue_recorded(skb)) { |
| 3098 | u16 index = skb_get_rx_queue(skb); | 3100 | u16 index = skb_get_rx_queue(skb); |
| 3101 | |||
| 3099 | if (unlikely(index >= dev->real_num_rx_queues)) { | 3102 | if (unlikely(index >= dev->real_num_rx_queues)) { |
| 3100 | WARN_ONCE(dev->real_num_rx_queues > 1, | 3103 | WARN_ONCE(dev->real_num_rx_queues > 1, |
| 3101 | "%s received packet on queue %u, but number " | 3104 | "%s received packet on queue %u, but number " |
| @@ -3103,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
| 3103 | dev->name, index, dev->real_num_rx_queues); | 3106 | dev->name, index, dev->real_num_rx_queues); |
| 3104 | goto done; | 3107 | goto done; |
| 3105 | } | 3108 | } |
| 3106 | rxqueue = dev->_rx + index; | 3109 | rxqueue += index; |
| 3107 | } else | 3110 | } |
| 3108 | rxqueue = dev->_rx; | ||
| 3109 | 3111 | ||
| 3112 | /* Avoid computing hash if RFS/RPS is not active for this rxqueue */ | ||
| 3113 | |||
| 3114 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
| 3110 | map = rcu_dereference(rxqueue->rps_map); | 3115 | map = rcu_dereference(rxqueue->rps_map); |
| 3111 | if (map) { | 3116 | if (!flow_table && !map) |
| 3112 | if (map->len == 1 && | ||
| 3113 | !rcu_access_pointer(rxqueue->rps_flow_table)) { | ||
| 3114 | tcpu = map->cpus[0]; | ||
| 3115 | if (cpu_online(tcpu)) | ||
| 3116 | cpu = tcpu; | ||
| 3117 | goto done; | ||
| 3118 | } | ||
| 3119 | } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { | ||
| 3120 | goto done; | 3117 | goto done; |
| 3121 | } | ||
| 3122 | 3118 | ||
| 3123 | skb_reset_network_header(skb); | 3119 | skb_reset_network_header(skb); |
| 3124 | hash = skb_get_hash(skb); | 3120 | hash = skb_get_hash(skb); |
| 3125 | if (!hash) | 3121 | if (!hash) |
| 3126 | goto done; | 3122 | goto done; |
| 3127 | 3123 | ||
| 3128 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
| 3129 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 3124 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
| 3130 | if (flow_table && sock_flow_table) { | 3125 | if (flow_table && sock_flow_table) { |
| 3131 | u16 next_cpu; | ||
| 3132 | struct rps_dev_flow *rflow; | 3126 | struct rps_dev_flow *rflow; |
| 3127 | u32 next_cpu; | ||
| 3128 | u32 ident; | ||
| 3129 | |||
| 3130 | /* First check into global flow table if there is a match */ | ||
| 3131 | ident = sock_flow_table->ents[hash & sock_flow_table->mask]; | ||
| 3132 | if ((ident ^ hash) & ~rps_cpu_mask) | ||
| 3133 | goto try_rps; | ||
| 3133 | 3134 | ||
| 3135 | next_cpu = ident & rps_cpu_mask; | ||
| 3136 | |||
| 3137 | /* OK, now we know there is a match, | ||
| 3138 | * we can look at the local (per receive queue) flow table | ||
| 3139 | */ | ||
| 3134 | rflow = &flow_table->flows[hash & flow_table->mask]; | 3140 | rflow = &flow_table->flows[hash & flow_table->mask]; |
| 3135 | tcpu = rflow->cpu; | 3141 | tcpu = rflow->cpu; |
| 3136 | 3142 | ||
| 3137 | next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; | ||
| 3138 | |||
| 3139 | /* | 3143 | /* |
| 3140 | * If the desired CPU (where last recvmsg was done) is | 3144 | * If the desired CPU (where last recvmsg was done) is |
| 3141 | * different from current CPU (one in the rx-queue flow | 3145 | * different from current CPU (one in the rx-queue flow |
| @@ -3162,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
| 3162 | } | 3166 | } |
| 3163 | } | 3167 | } |
| 3164 | 3168 | ||
| 3169 | try_rps: | ||
| 3170 | |||
| 3165 | if (map) { | 3171 | if (map) { |
| 3166 | tcpu = map->cpus[reciprocal_scale(hash, map->len)]; | 3172 | tcpu = map->cpus[reciprocal_scale(hash, map->len)]; |
| 3167 | if (cpu_online(tcpu)) { | 3173 | if (cpu_online(tcpu)) { |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index fde21d19e61b..7a31be5e361f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
| @@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, | |||
| 65 | mutex_unlock(&sock_flow_mutex); | 65 | mutex_unlock(&sock_flow_mutex); |
| 66 | return -ENOMEM; | 66 | return -ENOMEM; |
| 67 | } | 67 | } |
| 68 | 68 | rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; | |
| 69 | sock_table->mask = size - 1; | 69 | sock_table->mask = size - 1; |
| 70 | } else | 70 | } else |
| 71 | sock_table = orig_sock_table; | 71 | sock_table = orig_sock_table; |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a44773c8346c..d2e49baaff63 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -395,8 +395,6 @@ int inet_release(struct socket *sock) | |||
| 395 | if (sk) { | 395 | if (sk) { |
| 396 | long timeout; | 396 | long timeout; |
| 397 | 397 | ||
| 398 | sock_rps_reset_flow(sk); | ||
| 399 | |||
| 400 | /* Applications forget to leave groups before exiting */ | 398 | /* Applications forget to leave groups before exiting */ |
| 401 | ip_mc_drop_socket(sk); | 399 | ip_mc_drop_socket(sk); |
| 402 | 400 | ||
