diff options
-rw-r--r-- | include/linux/netdevice.h | 32 | ||||
-rw-r--r-- | include/linux/skbuff.h | 3 | ||||
-rw-r--r-- | net/core/dev.c | 335 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 225 | ||||
-rw-r--r-- | net/core/skbuff.c | 2 |
5 files changed, 538 insertions, 59 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c79a88be7c33..de1a52bcb9e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -223,6 +223,7 @@ struct netif_rx_stats { | |||
223 | unsigned dropped; | 223 | unsigned dropped; |
224 | unsigned time_squeeze; | 224 | unsigned time_squeeze; |
225 | unsigned cpu_collision; | 225 | unsigned cpu_collision; |
226 | unsigned received_rps; | ||
226 | }; | 227 | }; |
227 | 228 | ||
228 | DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); | 229 | DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); |
@@ -530,6 +531,24 @@ struct netdev_queue { | |||
530 | unsigned long tx_dropped; | 531 | unsigned long tx_dropped; |
531 | } ____cacheline_aligned_in_smp; | 532 | } ____cacheline_aligned_in_smp; |
532 | 533 | ||
534 | /* | ||
535 | * This structure holds an RPS map which can be of variable length. The | ||
536 | * map is an array of CPUs. | ||
537 | */ | ||
538 | struct rps_map { | ||
539 | unsigned int len; | ||
540 | struct rcu_head rcu; | ||
541 | u16 cpus[0]; | ||
542 | }; | ||
543 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) | ||
544 | |||
545 | /* This structure contains an instance of an RX queue. */ | ||
546 | struct netdev_rx_queue { | ||
547 | struct rps_map *rps_map; | ||
548 | struct kobject kobj; | ||
549 | struct netdev_rx_queue *first; | ||
550 | atomic_t count; | ||
551 | } ____cacheline_aligned_in_smp; | ||
533 | 552 | ||
534 | /* | 553 | /* |
535 | * This structure defines the management hooks for network devices. | 554 | * This structure defines the management hooks for network devices. |
@@ -878,6 +897,13 @@ struct net_device { | |||
878 | 897 | ||
879 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ | 898 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ |
880 | 899 | ||
900 | struct kset *queues_kset; | ||
901 | |||
902 | struct netdev_rx_queue *_rx; | ||
903 | |||
904 | /* Number of RX queues allocated at alloc_netdev_mq() time */ | ||
905 | unsigned int num_rx_queues; | ||
906 | |||
881 | struct netdev_queue rx_queue; | 907 | struct netdev_queue rx_queue; |
882 | 908 | ||
883 | struct netdev_queue *_tx ____cacheline_aligned_in_smp; | 909 | struct netdev_queue *_tx ____cacheline_aligned_in_smp; |
@@ -1311,14 +1337,16 @@ static inline int unregister_gifconf(unsigned int family) | |||
1311 | */ | 1337 | */ |
1312 | struct softnet_data { | 1338 | struct softnet_data { |
1313 | struct Qdisc *output_queue; | 1339 | struct Qdisc *output_queue; |
1314 | struct sk_buff_head input_pkt_queue; | ||
1315 | struct list_head poll_list; | 1340 | struct list_head poll_list; |
1316 | struct sk_buff *completion_queue; | 1341 | struct sk_buff *completion_queue; |
1317 | 1342 | ||
1343 | /* Elements below can be accessed between CPUs for RPS */ | ||
1344 | struct call_single_data csd ____cacheline_aligned_in_smp; | ||
1345 | struct sk_buff_head input_pkt_queue; | ||
1318 | struct napi_struct backlog; | 1346 | struct napi_struct backlog; |
1319 | }; | 1347 | }; |
1320 | 1348 | ||
1321 | DECLARE_PER_CPU(struct softnet_data,softnet_data); | 1349 | DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
1322 | 1350 | ||
1323 | #define HAVE_NETIF_QUEUE | 1351 | #define HAVE_NETIF_QUEUE |
1324 | 1352 | ||
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 03f816a9b659..def10b064f29 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -300,6 +300,7 @@ typedef unsigned char *sk_buff_data_t; | |||
300 | * @nfct_reasm: netfilter conntrack re-assembly pointer | 300 | * @nfct_reasm: netfilter conntrack re-assembly pointer |
301 | * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c | 301 | * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c |
302 | * @skb_iif: ifindex of device we arrived on | 302 | * @skb_iif: ifindex of device we arrived on |
303 | * @rxhash: the packet hash computed on receive | ||
303 | * @queue_mapping: Queue mapping for multiqueue devices | 304 | * @queue_mapping: Queue mapping for multiqueue devices |
304 | * @tc_index: Traffic control index | 305 | * @tc_index: Traffic control index |
305 | * @tc_verd: traffic control verdict | 306 | * @tc_verd: traffic control verdict |
@@ -375,6 +376,8 @@ struct sk_buff { | |||
375 | #endif | 376 | #endif |
376 | #endif | 377 | #endif |
377 | 378 | ||
379 | __u32 rxhash; | ||
380 | |||
378 | kmemcheck_bitfield_begin(flags2); | 381 | kmemcheck_bitfield_begin(flags2); |
379 | __u16 queue_mapping:16; | 382 | __u16 queue_mapping:16; |
380 | #ifdef CONFIG_IPV6_NDISC_NODETYPE | 383 | #ifdef CONFIG_IPV6_NDISC_NODETYPE |
diff --git a/net/core/dev.c b/net/core/dev.c index bcc490cc9452..17b168671501 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1931,7 +1931,7 @@ out_kfree_skb: | |||
1931 | return rc; | 1931 | return rc; |
1932 | } | 1932 | } |
1933 | 1933 | ||
1934 | static u32 skb_tx_hashrnd; | 1934 | static u32 hashrnd __read_mostly; |
1935 | 1935 | ||
1936 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 1936 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1937 | { | 1937 | { |
@@ -1949,7 +1949,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1949 | else | 1949 | else |
1950 | hash = skb->protocol; | 1950 | hash = skb->protocol; |
1951 | 1951 | ||
1952 | hash = jhash_1word(hash, skb_tx_hashrnd); | 1952 | hash = jhash_1word(hash, hashrnd); |
1953 | 1953 | ||
1954 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1954 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1955 | } | 1955 | } |
@@ -1959,10 +1959,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
1959 | { | 1959 | { |
1960 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | 1960 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { |
1961 | if (net_ratelimit()) { | 1961 | if (net_ratelimit()) { |
1962 | WARN(1, "%s selects TX queue %d, but " | 1962 | netdev_warn(dev, "selects TX queue %d, but " |
1963 | "real number of TX queues is %d\n", | 1963 | "real number of TX queues is %d\n", |
1964 | dev->name, queue_index, | 1964 | queue_index, dev->real_num_tx_queues); |
1965 | dev->real_num_tx_queues); | ||
1966 | } | 1965 | } |
1967 | return 0; | 1966 | return 0; |
1968 | } | 1967 | } |
@@ -2175,6 +2174,172 @@ int weight_p __read_mostly = 64; /* old backlog weight */ | |||
2175 | 2174 | ||
2176 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 2175 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
2177 | 2176 | ||
2177 | /* | ||
2178 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2179 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2180 | */ | ||
2181 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) | ||
2182 | { | ||
2183 | struct ipv6hdr *ip6; | ||
2184 | struct iphdr *ip; | ||
2185 | struct netdev_rx_queue *rxqueue; | ||
2186 | struct rps_map *map; | ||
2187 | int cpu = -1; | ||
2188 | u8 ip_proto; | ||
2189 | u32 addr1, addr2, ports, ihl; | ||
2190 | |||
2191 | rcu_read_lock(); | ||
2192 | |||
2193 | if (skb_rx_queue_recorded(skb)) { | ||
2194 | u16 index = skb_get_rx_queue(skb); | ||
2195 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2196 | if (net_ratelimit()) { | ||
2197 | netdev_warn(dev, "received packet on queue " | ||
2198 | "%u, but number of RX queues is %u\n", | ||
2199 | index, dev->num_rx_queues); | ||
2200 | } | ||
2201 | goto done; | ||
2202 | } | ||
2203 | rxqueue = dev->_rx + index; | ||
2204 | } else | ||
2205 | rxqueue = dev->_rx; | ||
2206 | |||
2207 | if (!rxqueue->rps_map) | ||
2208 | goto done; | ||
2209 | |||
2210 | if (skb->rxhash) | ||
2211 | goto got_hash; /* Skip hash computation on packet header */ | ||
2212 | |||
2213 | switch (skb->protocol) { | ||
2214 | case __constant_htons(ETH_P_IP): | ||
2215 | if (!pskb_may_pull(skb, sizeof(*ip))) | ||
2216 | goto done; | ||
2217 | |||
2218 | ip = (struct iphdr *) skb->data; | ||
2219 | ip_proto = ip->protocol; | ||
2220 | addr1 = ip->saddr; | ||
2221 | addr2 = ip->daddr; | ||
2222 | ihl = ip->ihl; | ||
2223 | break; | ||
2224 | case __constant_htons(ETH_P_IPV6): | ||
2225 | if (!pskb_may_pull(skb, sizeof(*ip6))) | ||
2226 | goto done; | ||
2227 | |||
2228 | ip6 = (struct ipv6hdr *) skb->data; | ||
2229 | ip_proto = ip6->nexthdr; | ||
2230 | addr1 = ip6->saddr.s6_addr32[3]; | ||
2231 | addr2 = ip6->daddr.s6_addr32[3]; | ||
2232 | ihl = (40 >> 2); | ||
2233 | break; | ||
2234 | default: | ||
2235 | goto done; | ||
2236 | } | ||
2237 | ports = 0; | ||
2238 | switch (ip_proto) { | ||
2239 | case IPPROTO_TCP: | ||
2240 | case IPPROTO_UDP: | ||
2241 | case IPPROTO_DCCP: | ||
2242 | case IPPROTO_ESP: | ||
2243 | case IPPROTO_AH: | ||
2244 | case IPPROTO_SCTP: | ||
2245 | case IPPROTO_UDPLITE: | ||
2246 | if (pskb_may_pull(skb, (ihl * 4) + 4)) | ||
2247 | ports = *((u32 *) (skb->data + (ihl * 4))); | ||
2248 | break; | ||
2249 | |||
2250 | default: | ||
2251 | break; | ||
2252 | } | ||
2253 | |||
2254 | skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd); | ||
2255 | if (!skb->rxhash) | ||
2256 | skb->rxhash = 1; | ||
2257 | |||
2258 | got_hash: | ||
2259 | map = rcu_dereference(rxqueue->rps_map); | ||
2260 | if (map) { | ||
2261 | u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | ||
2262 | |||
2263 | if (cpu_online(tcpu)) { | ||
2264 | cpu = tcpu; | ||
2265 | goto done; | ||
2266 | } | ||
2267 | } | ||
2268 | |||
2269 | done: | ||
2270 | rcu_read_unlock(); | ||
2271 | return cpu; | ||
2272 | } | ||
2273 | |||
2274 | /* | ||
2275 | * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled | ||
2276 | * to be sent to kick remote softirq processing. There are two masks since | ||
2277 | * the sending of IPIs must be done with interrupts enabled. The select field | ||
2278 | * indicates the current mask that enqueue_backlog uses to schedule IPIs. | ||
2279 | * select is flipped before net_rps_action is called while still under lock, | ||
2280 | * net_rps_action then uses the non-selected mask to send the IPIs and clears | ||
2281 | * it without conflicting with enqueue_backlog operation. | ||
2282 | */ | ||
2283 | struct rps_remote_softirq_cpus { | ||
2284 | cpumask_t mask[2]; | ||
2285 | int select; | ||
2286 | }; | ||
2287 | static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus); | ||
2288 | |||
2289 | /* Called from hardirq (IPI) context */ | ||
2290 | static void trigger_softirq(void *data) | ||
2291 | { | ||
2292 | struct softnet_data *queue = data; | ||
2293 | __napi_schedule(&queue->backlog); | ||
2294 | __get_cpu_var(netdev_rx_stat).received_rps++; | ||
2295 | } | ||
2296 | |||
2297 | /* | ||
2298 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | ||
2299 | * queue (may be a remote CPU queue). | ||
2300 | */ | ||
2301 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu) | ||
2302 | { | ||
2303 | struct softnet_data *queue; | ||
2304 | unsigned long flags; | ||
2305 | |||
2306 | queue = &per_cpu(softnet_data, cpu); | ||
2307 | |||
2308 | local_irq_save(flags); | ||
2309 | __get_cpu_var(netdev_rx_stat).total++; | ||
2310 | |||
2311 | spin_lock(&queue->input_pkt_queue.lock); | ||
2312 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
2313 | if (queue->input_pkt_queue.qlen) { | ||
2314 | enqueue: | ||
2315 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
2316 | spin_unlock_irqrestore(&queue->input_pkt_queue.lock, | ||
2317 | flags); | ||
2318 | return NET_RX_SUCCESS; | ||
2319 | } | ||
2320 | |||
2321 | /* Schedule NAPI for backlog device */ | ||
2322 | if (napi_schedule_prep(&queue->backlog)) { | ||
2323 | if (cpu != smp_processor_id()) { | ||
2324 | struct rps_remote_softirq_cpus *rcpus = | ||
2325 | &__get_cpu_var(rps_remote_softirq_cpus); | ||
2326 | |||
2327 | cpu_set(cpu, rcpus->mask[rcpus->select]); | ||
2328 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2329 | } else | ||
2330 | __napi_schedule(&queue->backlog); | ||
2331 | } | ||
2332 | goto enqueue; | ||
2333 | } | ||
2334 | |||
2335 | spin_unlock(&queue->input_pkt_queue.lock); | ||
2336 | |||
2337 | __get_cpu_var(netdev_rx_stat).dropped++; | ||
2338 | local_irq_restore(flags); | ||
2339 | |||
2340 | kfree_skb(skb); | ||
2341 | return NET_RX_DROP; | ||
2342 | } | ||
2178 | 2343 | ||
2179 | /** | 2344 | /** |
2180 | * netif_rx - post buffer to the network code | 2345 | * netif_rx - post buffer to the network code |
@@ -2193,8 +2358,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | |||
2193 | 2358 | ||
2194 | int netif_rx(struct sk_buff *skb) | 2359 | int netif_rx(struct sk_buff *skb) |
2195 | { | 2360 | { |
2196 | struct softnet_data *queue; | 2361 | int cpu; |
2197 | unsigned long flags; | ||
2198 | 2362 | ||
2199 | /* if netpoll wants it, pretend we never saw it */ | 2363 | /* if netpoll wants it, pretend we never saw it */ |
2200 | if (netpoll_rx(skb)) | 2364 | if (netpoll_rx(skb)) |
@@ -2203,31 +2367,11 @@ int netif_rx(struct sk_buff *skb) | |||
2203 | if (!skb->tstamp.tv64) | 2367 | if (!skb->tstamp.tv64) |
2204 | net_timestamp(skb); | 2368 | net_timestamp(skb); |
2205 | 2369 | ||
2206 | /* | 2370 | cpu = get_rps_cpu(skb->dev, skb); |
2207 | * The code is rearranged so that the path is the most | 2371 | if (cpu < 0) |
2208 | * short when CPU is congested, but is still operating. | 2372 | cpu = smp_processor_id(); |
2209 | */ | ||
2210 | local_irq_save(flags); | ||
2211 | queue = &__get_cpu_var(softnet_data); | ||
2212 | |||
2213 | __get_cpu_var(netdev_rx_stat).total++; | ||
2214 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
2215 | if (queue->input_pkt_queue.qlen) { | ||
2216 | enqueue: | ||
2217 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
2218 | local_irq_restore(flags); | ||
2219 | return NET_RX_SUCCESS; | ||
2220 | } | ||
2221 | |||
2222 | napi_schedule(&queue->backlog); | ||
2223 | goto enqueue; | ||
2224 | } | ||
2225 | |||
2226 | __get_cpu_var(netdev_rx_stat).dropped++; | ||
2227 | local_irq_restore(flags); | ||
2228 | 2373 | ||
2229 | kfree_skb(skb); | 2374 | return enqueue_to_backlog(skb, cpu); |
2230 | return NET_RX_DROP; | ||
2231 | } | 2375 | } |
2232 | EXPORT_SYMBOL(netif_rx); | 2376 | EXPORT_SYMBOL(netif_rx); |
2233 | 2377 | ||
@@ -2464,22 +2608,7 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2464 | rcu_read_unlock(); | 2608 | rcu_read_unlock(); |
2465 | } | 2609 | } |
2466 | 2610 | ||
2467 | /** | 2611 | int __netif_receive_skb(struct sk_buff *skb) |
2468 | * netif_receive_skb - process receive buffer from network | ||
2469 | * @skb: buffer to process | ||
2470 | * | ||
2471 | * netif_receive_skb() is the main receive data processing function. | ||
2472 | * It always succeeds. The buffer may be dropped during processing | ||
2473 | * for congestion control or by the protocol layers. | ||
2474 | * | ||
2475 | * This function may only be called from softirq context and interrupts | ||
2476 | * should be enabled. | ||
2477 | * | ||
2478 | * Return values (usually ignored): | ||
2479 | * NET_RX_SUCCESS: no congestion | ||
2480 | * NET_RX_DROP: packet was dropped | ||
2481 | */ | ||
2482 | int netif_receive_skb(struct sk_buff *skb) | ||
2483 | { | 2612 | { |
2484 | struct packet_type *ptype, *pt_prev; | 2613 | struct packet_type *ptype, *pt_prev; |
2485 | struct net_device *orig_dev; | 2614 | struct net_device *orig_dev; |
@@ -2588,6 +2717,33 @@ out: | |||
2588 | rcu_read_unlock(); | 2717 | rcu_read_unlock(); |
2589 | return ret; | 2718 | return ret; |
2590 | } | 2719 | } |
2720 | |||
2721 | /** | ||
2722 | * netif_receive_skb - process receive buffer from network | ||
2723 | * @skb: buffer to process | ||
2724 | * | ||
2725 | * netif_receive_skb() is the main receive data processing function. | ||
2726 | * It always succeeds. The buffer may be dropped during processing | ||
2727 | * for congestion control or by the protocol layers. | ||
2728 | * | ||
2729 | * This function may only be called from softirq context and interrupts | ||
2730 | * should be enabled. | ||
2731 | * | ||
2732 | * Return values (usually ignored): | ||
2733 | * NET_RX_SUCCESS: no congestion | ||
2734 | * NET_RX_DROP: packet was dropped | ||
2735 | */ | ||
2736 | int netif_receive_skb(struct sk_buff *skb) | ||
2737 | { | ||
2738 | int cpu; | ||
2739 | |||
2740 | cpu = get_rps_cpu(skb->dev, skb); | ||
2741 | |||
2742 | if (cpu < 0) | ||
2743 | return __netif_receive_skb(skb); | ||
2744 | else | ||
2745 | return enqueue_to_backlog(skb, cpu); | ||
2746 | } | ||
2591 | EXPORT_SYMBOL(netif_receive_skb); | 2747 | EXPORT_SYMBOL(netif_receive_skb); |
2592 | 2748 | ||
2593 | /* Network device is going away, flush any packets still pending */ | 2749 | /* Network device is going away, flush any packets still pending */ |
@@ -2914,16 +3070,16 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
2914 | do { | 3070 | do { |
2915 | struct sk_buff *skb; | 3071 | struct sk_buff *skb; |
2916 | 3072 | ||
2917 | local_irq_disable(); | 3073 | spin_lock_irq(&queue->input_pkt_queue.lock); |
2918 | skb = __skb_dequeue(&queue->input_pkt_queue); | 3074 | skb = __skb_dequeue(&queue->input_pkt_queue); |
2919 | if (!skb) { | 3075 | if (!skb) { |
2920 | __napi_complete(napi); | 3076 | __napi_complete(napi); |
2921 | local_irq_enable(); | 3077 | spin_unlock_irq(&queue->input_pkt_queue.lock); |
2922 | break; | 3078 | break; |
2923 | } | 3079 | } |
2924 | local_irq_enable(); | 3080 | spin_unlock_irq(&queue->input_pkt_queue.lock); |
2925 | 3081 | ||
2926 | netif_receive_skb(skb); | 3082 | __netif_receive_skb(skb); |
2927 | } while (++work < quota && jiffies == start_time); | 3083 | } while (++work < quota && jiffies == start_time); |
2928 | 3084 | ||
2929 | return work; | 3085 | return work; |
@@ -3012,6 +3168,22 @@ void netif_napi_del(struct napi_struct *napi) | |||
3012 | } | 3168 | } |
3013 | EXPORT_SYMBOL(netif_napi_del); | 3169 | EXPORT_SYMBOL(netif_napi_del); |
3014 | 3170 | ||
3171 | /* | ||
3172 | * net_rps_action sends any pending IPI's for rps. This is only called from | ||
3173 | * softirq and interrupts must be enabled. | ||
3174 | */ | ||
3175 | static void net_rps_action(cpumask_t *mask) | ||
3176 | { | ||
3177 | int cpu; | ||
3178 | |||
3179 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | ||
3180 | for_each_cpu_mask_nr(cpu, *mask) { | ||
3181 | struct softnet_data *queue = &per_cpu(softnet_data, cpu); | ||
3182 | if (cpu_online(cpu)) | ||
3183 | __smp_call_function_single(cpu, &queue->csd, 0); | ||
3184 | } | ||
3185 | cpus_clear(*mask); | ||
3186 | } | ||
3015 | 3187 | ||
3016 | static void net_rx_action(struct softirq_action *h) | 3188 | static void net_rx_action(struct softirq_action *h) |
3017 | { | 3189 | { |
@@ -3019,6 +3191,8 @@ static void net_rx_action(struct softirq_action *h) | |||
3019 | unsigned long time_limit = jiffies + 2; | 3191 | unsigned long time_limit = jiffies + 2; |
3020 | int budget = netdev_budget; | 3192 | int budget = netdev_budget; |
3021 | void *have; | 3193 | void *have; |
3194 | int select; | ||
3195 | struct rps_remote_softirq_cpus *rcpus; | ||
3022 | 3196 | ||
3023 | local_irq_disable(); | 3197 | local_irq_disable(); |
3024 | 3198 | ||
@@ -3081,8 +3255,14 @@ static void net_rx_action(struct softirq_action *h) | |||
3081 | netpoll_poll_unlock(have); | 3255 | netpoll_poll_unlock(have); |
3082 | } | 3256 | } |
3083 | out: | 3257 | out: |
3258 | rcpus = &__get_cpu_var(rps_remote_softirq_cpus); | ||
3259 | select = rcpus->select; | ||
3260 | rcpus->select ^= 1; | ||
3261 | |||
3084 | local_irq_enable(); | 3262 | local_irq_enable(); |
3085 | 3263 | ||
3264 | net_rps_action(&rcpus->mask[select]); | ||
3265 | |||
3086 | #ifdef CONFIG_NET_DMA | 3266 | #ifdef CONFIG_NET_DMA |
3087 | /* | 3267 | /* |
3088 | * There may not be any more sk_buffs coming right now, so push | 3268 | * There may not be any more sk_buffs coming right now, so push |
@@ -3327,10 +3507,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v) | |||
3327 | { | 3507 | { |
3328 | struct netif_rx_stats *s = v; | 3508 | struct netif_rx_stats *s = v; |
3329 | 3509 | ||
3330 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 3510 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
3331 | s->total, s->dropped, s->time_squeeze, 0, | 3511 | s->total, s->dropped, s->time_squeeze, 0, |
3332 | 0, 0, 0, 0, /* was fastroute */ | 3512 | 0, 0, 0, 0, /* was fastroute */ |
3333 | s->cpu_collision); | 3513 | s->cpu_collision, s->received_rps); |
3334 | return 0; | 3514 | return 0; |
3335 | } | 3515 | } |
3336 | 3516 | ||
@@ -5067,6 +5247,23 @@ int register_netdevice(struct net_device *dev) | |||
5067 | 5247 | ||
5068 | dev->iflink = -1; | 5248 | dev->iflink = -1; |
5069 | 5249 | ||
5250 | if (!dev->num_rx_queues) { | ||
5251 | /* | ||
5252 | * Allocate a single RX queue if driver never called | ||
5253 | * alloc_netdev_mq | ||
5254 | */ | ||
5255 | |||
5256 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5257 | if (!dev->_rx) { | ||
5258 | ret = -ENOMEM; | ||
5259 | goto out; | ||
5260 | } | ||
5261 | |||
5262 | dev->_rx->first = dev->_rx; | ||
5263 | atomic_set(&dev->_rx->count, 1); | ||
5264 | dev->num_rx_queues = 1; | ||
5265 | } | ||
5266 | |||
5070 | /* Init, if this function is available */ | 5267 | /* Init, if this function is available */ |
5071 | if (dev->netdev_ops->ndo_init) { | 5268 | if (dev->netdev_ops->ndo_init) { |
5072 | ret = dev->netdev_ops->ndo_init(dev); | 5269 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5424,9 +5621,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5424 | void (*setup)(struct net_device *), unsigned int queue_count) | 5621 | void (*setup)(struct net_device *), unsigned int queue_count) |
5425 | { | 5622 | { |
5426 | struct netdev_queue *tx; | 5623 | struct netdev_queue *tx; |
5624 | struct netdev_rx_queue *rx; | ||
5427 | struct net_device *dev; | 5625 | struct net_device *dev; |
5428 | size_t alloc_size; | 5626 | size_t alloc_size; |
5429 | struct net_device *p; | 5627 | struct net_device *p; |
5628 | int i; | ||
5430 | 5629 | ||
5431 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5630 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5432 | 5631 | ||
@@ -5452,11 +5651,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5452 | goto free_p; | 5651 | goto free_p; |
5453 | } | 5652 | } |
5454 | 5653 | ||
5654 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5655 | if (!rx) { | ||
5656 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5657 | "rx queues.\n"); | ||
5658 | goto free_tx; | ||
5659 | } | ||
5660 | |||
5661 | atomic_set(&rx->count, queue_count); | ||
5662 | |||
5663 | /* | ||
5664 | * Set a pointer to first element in the array which holds the | ||
5665 | * reference count. | ||
5666 | */ | ||
5667 | for (i = 0; i < queue_count; i++) | ||
5668 | rx[i].first = rx; | ||
5669 | |||
5455 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5670 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5456 | dev->padded = (char *)dev - (char *)p; | 5671 | dev->padded = (char *)dev - (char *)p; |
5457 | 5672 | ||
5458 | if (dev_addr_init(dev)) | 5673 | if (dev_addr_init(dev)) |
5459 | goto free_tx; | 5674 | goto free_rx; |
5460 | 5675 | ||
5461 | dev_unicast_init(dev); | 5676 | dev_unicast_init(dev); |
5462 | 5677 | ||
@@ -5466,6 +5681,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5466 | dev->num_tx_queues = queue_count; | 5681 | dev->num_tx_queues = queue_count; |
5467 | dev->real_num_tx_queues = queue_count; | 5682 | dev->real_num_tx_queues = queue_count; |
5468 | 5683 | ||
5684 | dev->_rx = rx; | ||
5685 | dev->num_rx_queues = queue_count; | ||
5686 | |||
5469 | dev->gso_max_size = GSO_MAX_SIZE; | 5687 | dev->gso_max_size = GSO_MAX_SIZE; |
5470 | 5688 | ||
5471 | netdev_init_queues(dev); | 5689 | netdev_init_queues(dev); |
@@ -5480,9 +5698,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5480 | strcpy(dev->name, name); | 5698 | strcpy(dev->name, name); |
5481 | return dev; | 5699 | return dev; |
5482 | 5700 | ||
5701 | free_rx: | ||
5702 | kfree(rx); | ||
5483 | free_tx: | 5703 | free_tx: |
5484 | kfree(tx); | 5704 | kfree(tx); |
5485 | |||
5486 | free_p: | 5705 | free_p: |
5487 | kfree(p); | 5706 | kfree(p); |
5488 | return NULL; | 5707 | return NULL; |
@@ -5985,6 +6204,10 @@ static int __init net_dev_init(void) | |||
5985 | queue->completion_queue = NULL; | 6204 | queue->completion_queue = NULL; |
5986 | INIT_LIST_HEAD(&queue->poll_list); | 6205 | INIT_LIST_HEAD(&queue->poll_list); |
5987 | 6206 | ||
6207 | queue->csd.func = trigger_softirq; | ||
6208 | queue->csd.info = queue; | ||
6209 | queue->csd.flags = 0; | ||
6210 | |||
5988 | queue->backlog.poll = process_backlog; | 6211 | queue->backlog.poll = process_backlog; |
5989 | queue->backlog.weight = weight_p; | 6212 | queue->backlog.weight = weight_p; |
5990 | queue->backlog.gro_list = NULL; | 6213 | queue->backlog.gro_list = NULL; |
@@ -6023,7 +6246,7 @@ subsys_initcall(net_dev_init); | |||
6023 | 6246 | ||
6024 | static int __init initialize_hashrnd(void) | 6247 | static int __init initialize_hashrnd(void) |
6025 | { | 6248 | { |
6026 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | 6249 | get_random_bytes(&hashrnd, sizeof(hashrnd)); |
6027 | return 0; | 6250 | return 0; |
6028 | } | 6251 | } |
6029 | 6252 | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 099c753c4213..7a46343d5ae3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -466,6 +466,216 @@ static struct attribute_group wireless_group = { | |||
466 | }; | 466 | }; |
467 | #endif | 467 | #endif |
468 | 468 | ||
469 | /* | ||
470 | * RX queue sysfs structures and functions. | ||
471 | */ | ||
472 | struct rx_queue_attribute { | ||
473 | struct attribute attr; | ||
474 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
475 | struct rx_queue_attribute *attr, char *buf); | ||
476 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
477 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
478 | }; | ||
479 | #define to_rx_queue_attr(_attr) container_of(_attr, \ | ||
480 | struct rx_queue_attribute, attr) | ||
481 | |||
482 | #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) | ||
483 | |||
484 | static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, | ||
485 | char *buf) | ||
486 | { | ||
487 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
488 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
489 | |||
490 | if (!attribute->show) | ||
491 | return -EIO; | ||
492 | |||
493 | return attribute->show(queue, attribute, buf); | ||
494 | } | ||
495 | |||
496 | static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, | ||
497 | const char *buf, size_t count) | ||
498 | { | ||
499 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
500 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
501 | |||
502 | if (!attribute->store) | ||
503 | return -EIO; | ||
504 | |||
505 | return attribute->store(queue, attribute, buf, count); | ||
506 | } | ||
507 | |||
508 | static struct sysfs_ops rx_queue_sysfs_ops = { | ||
509 | .show = rx_queue_attr_show, | ||
510 | .store = rx_queue_attr_store, | ||
511 | }; | ||
512 | |||
513 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, | ||
514 | struct rx_queue_attribute *attribute, char *buf) | ||
515 | { | ||
516 | struct rps_map *map; | ||
517 | cpumask_var_t mask; | ||
518 | size_t len = 0; | ||
519 | int i; | ||
520 | |||
521 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
522 | return -ENOMEM; | ||
523 | |||
524 | rcu_read_lock(); | ||
525 | map = rcu_dereference(queue->rps_map); | ||
526 | if (map) | ||
527 | for (i = 0; i < map->len; i++) | ||
528 | cpumask_set_cpu(map->cpus[i], mask); | ||
529 | |||
530 | len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); | ||
531 | if (PAGE_SIZE - len < 3) { | ||
532 | rcu_read_unlock(); | ||
533 | free_cpumask_var(mask); | ||
534 | return -EINVAL; | ||
535 | } | ||
536 | rcu_read_unlock(); | ||
537 | |||
538 | free_cpumask_var(mask); | ||
539 | len += sprintf(buf + len, "\n"); | ||
540 | return len; | ||
541 | } | ||
542 | |||
543 | static void rps_map_release(struct rcu_head *rcu) | ||
544 | { | ||
545 | struct rps_map *map = container_of(rcu, struct rps_map, rcu); | ||
546 | |||
547 | kfree(map); | ||
548 | } | ||
549 | |||
550 | ssize_t store_rps_map(struct netdev_rx_queue *queue, | ||
551 | struct rx_queue_attribute *attribute, | ||
552 | const char *buf, size_t len) | ||
553 | { | ||
554 | struct rps_map *old_map, *map; | ||
555 | cpumask_var_t mask; | ||
556 | int err, cpu, i; | ||
557 | static DEFINE_SPINLOCK(rps_map_lock); | ||
558 | |||
559 | if (!capable(CAP_NET_ADMIN)) | ||
560 | return -EPERM; | ||
561 | |||
562 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
563 | return -ENOMEM; | ||
564 | |||
565 | err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); | ||
566 | if (err) { | ||
567 | free_cpumask_var(mask); | ||
568 | return err; | ||
569 | } | ||
570 | |||
571 | map = kzalloc(max_t(unsigned, | ||
572 | RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), | ||
573 | GFP_KERNEL); | ||
574 | if (!map) { | ||
575 | free_cpumask_var(mask); | ||
576 | return -ENOMEM; | ||
577 | } | ||
578 | |||
579 | i = 0; | ||
580 | for_each_cpu_and(cpu, mask, cpu_online_mask) | ||
581 | map->cpus[i++] = cpu; | ||
582 | |||
583 | if (i) | ||
584 | map->len = i; | ||
585 | else { | ||
586 | kfree(map); | ||
587 | map = NULL; | ||
588 | } | ||
589 | |||
590 | spin_lock(&rps_map_lock); | ||
591 | old_map = queue->rps_map; | ||
592 | rcu_assign_pointer(queue->rps_map, map); | ||
593 | spin_unlock(&rps_map_lock); | ||
594 | |||
595 | if (old_map) | ||
596 | call_rcu(&old_map->rcu, rps_map_release); | ||
597 | |||
598 | free_cpumask_var(mask); | ||
599 | return len; | ||
600 | } | ||
601 | |||
602 | static struct rx_queue_attribute rps_cpus_attribute = | ||
603 | __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); | ||
604 | |||
605 | static struct attribute *rx_queue_default_attrs[] = { | ||
606 | &rps_cpus_attribute.attr, | ||
607 | NULL | ||
608 | }; | ||
609 | |||
610 | static void rx_queue_release(struct kobject *kobj) | ||
611 | { | ||
612 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
613 | struct rps_map *map = queue->rps_map; | ||
614 | struct netdev_rx_queue *first = queue->first; | ||
615 | |||
616 | if (map) | ||
617 | call_rcu(&map->rcu, rps_map_release); | ||
618 | |||
619 | if (atomic_dec_and_test(&first->count)) | ||
620 | kfree(first); | ||
621 | } | ||
622 | |||
623 | static struct kobj_type rx_queue_ktype = { | ||
624 | .sysfs_ops = &rx_queue_sysfs_ops, | ||
625 | .release = rx_queue_release, | ||
626 | .default_attrs = rx_queue_default_attrs, | ||
627 | }; | ||
628 | |||
629 | static int rx_queue_add_kobject(struct net_device *net, int index) | ||
630 | { | ||
631 | struct netdev_rx_queue *queue = net->_rx + index; | ||
632 | struct kobject *kobj = &queue->kobj; | ||
633 | int error = 0; | ||
634 | |||
635 | kobj->kset = net->queues_kset; | ||
636 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, | ||
637 | "rx-%u", index); | ||
638 | if (error) { | ||
639 | kobject_put(kobj); | ||
640 | return error; | ||
641 | } | ||
642 | |||
643 | kobject_uevent(kobj, KOBJ_ADD); | ||
644 | |||
645 | return error; | ||
646 | } | ||
647 | |||
648 | static int rx_queue_register_kobjects(struct net_device *net) | ||
649 | { | ||
650 | int i; | ||
651 | int error = 0; | ||
652 | |||
653 | net->queues_kset = kset_create_and_add("queues", | ||
654 | NULL, &net->dev.kobj); | ||
655 | if (!net->queues_kset) | ||
656 | return -ENOMEM; | ||
657 | for (i = 0; i < net->num_rx_queues; i++) { | ||
658 | error = rx_queue_add_kobject(net, i); | ||
659 | if (error) | ||
660 | break; | ||
661 | } | ||
662 | |||
663 | if (error) | ||
664 | while (--i >= 0) | ||
665 | kobject_put(&net->_rx[i].kobj); | ||
666 | |||
667 | return error; | ||
668 | } | ||
669 | |||
670 | static void rx_queue_remove_kobjects(struct net_device *net) | ||
671 | { | ||
672 | int i; | ||
673 | |||
674 | for (i = 0; i < net->num_rx_queues; i++) | ||
675 | kobject_put(&net->_rx[i].kobj); | ||
676 | kset_unregister(net->queues_kset); | ||
677 | } | ||
678 | |||
469 | #endif /* CONFIG_SYSFS */ | 679 | #endif /* CONFIG_SYSFS */ |
470 | 680 | ||
471 | #ifdef CONFIG_HOTPLUG | 681 | #ifdef CONFIG_HOTPLUG |
@@ -529,6 +739,8 @@ void netdev_unregister_kobject(struct net_device * net) | |||
529 | if (!net_eq(dev_net(net), &init_net)) | 739 | if (!net_eq(dev_net(net), &init_net)) |
530 | return; | 740 | return; |
531 | 741 | ||
742 | rx_queue_remove_kobjects(net); | ||
743 | |||
532 | device_del(dev); | 744 | device_del(dev); |
533 | } | 745 | } |
534 | 746 | ||
@@ -537,6 +749,7 @@ int netdev_register_kobject(struct net_device *net) | |||
537 | { | 749 | { |
538 | struct device *dev = &(net->dev); | 750 | struct device *dev = &(net->dev); |
539 | const struct attribute_group **groups = net->sysfs_groups; | 751 | const struct attribute_group **groups = net->sysfs_groups; |
752 | int error = 0; | ||
540 | 753 | ||
541 | dev->class = &net_class; | 754 | dev->class = &net_class; |
542 | dev->platform_data = net; | 755 | dev->platform_data = net; |
@@ -563,7 +776,17 @@ int netdev_register_kobject(struct net_device *net) | |||
563 | if (!net_eq(dev_net(net), &init_net)) | 776 | if (!net_eq(dev_net(net), &init_net)) |
564 | return 0; | 777 | return 0; |
565 | 778 | ||
566 | return device_add(dev); | 779 | error = device_add(dev); |
780 | if (error) | ||
781 | return error; | ||
782 | |||
783 | error = rx_queue_register_kobjects(net); | ||
784 | if (error) { | ||
785 | device_del(dev); | ||
786 | return error; | ||
787 | } | ||
788 | |||
789 | return error; | ||
567 | } | 790 | } |
568 | 791 | ||
569 | int netdev_class_create_file(struct class_attribute *class_attr) | 792 | int netdev_class_create_file(struct class_attribute *class_attr) |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e060c91e..bdea0efdf8cb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
534 | new->network_header = old->network_header; | 534 | new->network_header = old->network_header; |
535 | new->mac_header = old->mac_header; | 535 | new->mac_header = old->mac_header; |
536 | skb_dst_set(new, dst_clone(skb_dst(old))); | 536 | skb_dst_set(new, dst_clone(skb_dst(old))); |
537 | new->rxhash = old->rxhash; | ||
537 | #ifdef CONFIG_XFRM | 538 | #ifdef CONFIG_XFRM |
538 | new->sp = secpath_get(old->sp); | 539 | new->sp = secpath_get(old->sp); |
539 | #endif | 540 | #endif |
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) | |||
581 | C(len); | 582 | C(len); |
582 | C(data_len); | 583 | C(data_len); |
583 | C(mac_len); | 584 | C(mac_len); |
585 | C(rxhash); | ||
584 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; | 586 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; |
585 | n->cloned = 1; | 587 | n->cloned = 1; |
586 | n->nohdr = 0; | 588 | n->nohdr = 0; |