diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 111 |
1 files changed, 91 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e8041eb76ac1..d7107ac835fa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */ | |||
2203 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 2203 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
2204 | 2204 | ||
2205 | #ifdef CONFIG_RPS | 2205 | #ifdef CONFIG_RPS |
2206 | |||
2207 | /* One global table that all flow-based protocols share. */ | ||
2208 | struct rps_sock_flow_table *rps_sock_flow_table; | ||
2209 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2210 | |||
2206 | /* | 2211 | /* |
2207 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2212 | * get_rps_cpu is called from netif_receive_skb and returns the target |
2208 | * CPU from the RPS map of the receiving queue for a given skb. | 2213 | * CPU from the RPS map of the receiving queue for a given skb. |
2209 | * rcu_read_lock must be held on entry. | 2214 | * rcu_read_lock must be held on entry. |
2210 | */ | 2215 | */ |
2211 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) | 2216 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
2217 | struct rps_dev_flow **rflowp) | ||
2212 | { | 2218 | { |
2213 | struct ipv6hdr *ip6; | 2219 | struct ipv6hdr *ip6; |
2214 | struct iphdr *ip; | 2220 | struct iphdr *ip; |
2215 | struct netdev_rx_queue *rxqueue; | 2221 | struct netdev_rx_queue *rxqueue; |
2216 | struct rps_map *map; | 2222 | struct rps_map *map; |
2223 | struct rps_dev_flow_table *flow_table; | ||
2224 | struct rps_sock_flow_table *sock_flow_table; | ||
2217 | int cpu = -1; | 2225 | int cpu = -1; |
2218 | u8 ip_proto; | 2226 | u8 ip_proto; |
2227 | u16 tcpu; | ||
2219 | u32 addr1, addr2, ports, ihl; | 2228 | u32 addr1, addr2, ports, ihl; |
2220 | 2229 | ||
2221 | if (skb_rx_queue_recorded(skb)) { | 2230 | if (skb_rx_queue_recorded(skb)) { |
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) | |||
2232 | } else | 2241 | } else |
2233 | rxqueue = dev->_rx; | 2242 | rxqueue = dev->_rx; |
2234 | 2243 | ||
2235 | if (!rxqueue->rps_map) | 2244 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) |
2236 | goto done; | 2245 | goto done; |
2237 | 2246 | ||
2238 | if (skb->rxhash) | 2247 | if (skb->rxhash) |
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) | |||
2284 | skb->rxhash = 1; | 2293 | skb->rxhash = 1; |
2285 | 2294 | ||
2286 | got_hash: | 2295 | got_hash: |
2296 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2297 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
2298 | if (flow_table && sock_flow_table) { | ||
2299 | u16 next_cpu; | ||
2300 | struct rps_dev_flow *rflow; | ||
2301 | |||
2302 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; | ||
2303 | tcpu = rflow->cpu; | ||
2304 | |||
2305 | next_cpu = sock_flow_table->ents[skb->rxhash & | ||
2306 | sock_flow_table->mask]; | ||
2307 | |||
2308 | /* | ||
2309 | * If the desired CPU (where last recvmsg was done) is | ||
2310 | * different from current CPU (one in the rx-queue flow | ||
2311 | * table entry), switch if one of the following holds: | ||
2312 | * - Current CPU is unset (equal to RPS_NO_CPU). | ||
2313 | * - Current CPU is offline. | ||
2314 | * - The current CPU's queue tail has advanced beyond the | ||
2315 | * last packet that was enqueued using this table entry. | ||
2316 | * This guarantees that all previous packets for the flow | ||
2317 | * have been dequeued, thus preserving in order delivery. | ||
2318 | */ | ||
2319 | if (unlikely(tcpu != next_cpu) && | ||
2320 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | ||
2321 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | ||
2322 | rflow->last_qtail)) >= 0)) { | ||
2323 | tcpu = rflow->cpu = next_cpu; | ||
2324 | if (tcpu != RPS_NO_CPU) | ||
2325 | rflow->last_qtail = per_cpu(softnet_data, | ||
2326 | tcpu).input_queue_head; | ||
2327 | } | ||
2328 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | ||
2329 | *rflowp = rflow; | ||
2330 | cpu = tcpu; | ||
2331 | goto done; | ||
2332 | } | ||
2333 | } | ||
2334 | |||
2287 | map = rcu_dereference(rxqueue->rps_map); | 2335 | map = rcu_dereference(rxqueue->rps_map); |
2288 | if (map) { | 2336 | if (map) { |
2289 | u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 2337 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
2290 | 2338 | ||
2291 | if (cpu_online(tcpu)) { | 2339 | if (cpu_online(tcpu)) { |
2292 | cpu = tcpu; | 2340 | cpu = tcpu; |
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data) | |||
2320 | __napi_schedule(&queue->backlog); | 2368 | __napi_schedule(&queue->backlog); |
2321 | __get_cpu_var(netdev_rx_stat).received_rps++; | 2369 | __get_cpu_var(netdev_rx_stat).received_rps++; |
2322 | } | 2370 | } |
2323 | #endif /* CONFIG_SMP */ | 2371 | #endif /* CONFIG_RPS */ |
2324 | 2372 | ||
2325 | /* | 2373 | /* |
2326 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | 2374 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog |
2327 | * queue (may be a remote CPU queue). | 2375 | * queue (may be a remote CPU queue). |
2328 | */ | 2376 | */ |
2329 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu) | 2377 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, |
2378 | unsigned int *qtail) | ||
2330 | { | 2379 | { |
2331 | struct softnet_data *queue; | 2380 | struct softnet_data *queue; |
2332 | unsigned long flags; | 2381 | unsigned long flags; |
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu) | |||
2341 | if (queue->input_pkt_queue.qlen) { | 2390 | if (queue->input_pkt_queue.qlen) { |
2342 | enqueue: | 2391 | enqueue: |
2343 | __skb_queue_tail(&queue->input_pkt_queue, skb); | 2392 | __skb_queue_tail(&queue->input_pkt_queue, skb); |
2393 | #ifdef CONFIG_RPS | ||
2394 | *qtail = queue->input_queue_head + | ||
2395 | queue->input_pkt_queue.qlen; | ||
2396 | #endif | ||
2344 | rps_unlock(queue); | 2397 | rps_unlock(queue); |
2345 | local_irq_restore(flags); | 2398 | local_irq_restore(flags); |
2346 | return NET_RX_SUCCESS; | 2399 | return NET_RX_SUCCESS; |
@@ -2355,11 +2408,10 @@ enqueue: | |||
2355 | 2408 | ||
2356 | cpu_set(cpu, rcpus->mask[rcpus->select]); | 2409 | cpu_set(cpu, rcpus->mask[rcpus->select]); |
2357 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2410 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2358 | } else | 2411 | goto enqueue; |
2359 | __napi_schedule(&queue->backlog); | 2412 | } |
2360 | #else | ||
2361 | __napi_schedule(&queue->backlog); | ||
2362 | #endif | 2413 | #endif |
2414 | __napi_schedule(&queue->backlog); | ||
2363 | } | 2415 | } |
2364 | goto enqueue; | 2416 | goto enqueue; |
2365 | } | 2417 | } |
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb) | |||
2401 | 2453 | ||
2402 | #ifdef CONFIG_RPS | 2454 | #ifdef CONFIG_RPS |
2403 | { | 2455 | { |
2456 | struct rps_dev_flow voidflow, *rflow = &voidflow; | ||
2404 | int cpu; | 2457 | int cpu; |
2405 | 2458 | ||
2406 | rcu_read_lock(); | 2459 | rcu_read_lock(); |
2407 | cpu = get_rps_cpu(skb->dev, skb); | 2460 | |
2461 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | ||
2408 | if (cpu < 0) | 2462 | if (cpu < 0) |
2409 | cpu = smp_processor_id(); | 2463 | cpu = smp_processor_id(); |
2410 | ret = enqueue_to_backlog(skb, cpu); | 2464 | |
2465 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | ||
2466 | |||
2411 | rcu_read_unlock(); | 2467 | rcu_read_unlock(); |
2412 | } | 2468 | } |
2413 | #else | 2469 | #else |
2414 | ret = enqueue_to_backlog(skb, get_cpu()); | 2470 | { |
2415 | put_cpu(); | 2471 | unsigned int qtail; |
2472 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | ||
2473 | put_cpu(); | ||
2474 | } | ||
2416 | #endif | 2475 | #endif |
2417 | return ret; | 2476 | return ret; |
2418 | } | 2477 | } |
@@ -2830,14 +2889,22 @@ out: | |||
2830 | int netif_receive_skb(struct sk_buff *skb) | 2889 | int netif_receive_skb(struct sk_buff *skb) |
2831 | { | 2890 | { |
2832 | #ifdef CONFIG_RPS | 2891 | #ifdef CONFIG_RPS |
2833 | int cpu; | 2892 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
2893 | int cpu, ret; | ||
2894 | |||
2895 | rcu_read_lock(); | ||
2834 | 2896 | ||
2835 | cpu = get_rps_cpu(skb->dev, skb); | 2897 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
2836 | 2898 | ||
2837 | if (cpu < 0) | 2899 | if (cpu >= 0) { |
2838 | return __netif_receive_skb(skb); | 2900 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
2839 | else | 2901 | rcu_read_unlock(); |
2840 | return enqueue_to_backlog(skb, cpu); | 2902 | } else { |
2903 | rcu_read_unlock(); | ||
2904 | ret = __netif_receive_skb(skb); | ||
2905 | } | ||
2906 | |||
2907 | return ret; | ||
2841 | #else | 2908 | #else |
2842 | return __netif_receive_skb(skb); | 2909 | return __netif_receive_skb(skb); |
2843 | #endif | 2910 | #endif |
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg) | |||
2856 | if (skb->dev == dev) { | 2923 | if (skb->dev == dev) { |
2857 | __skb_unlink(skb, &queue->input_pkt_queue); | 2924 | __skb_unlink(skb, &queue->input_pkt_queue); |
2858 | kfree_skb(skb); | 2925 | kfree_skb(skb); |
2926 | incr_input_queue_head(queue); | ||
2859 | } | 2927 | } |
2860 | rps_unlock(queue); | 2928 | rps_unlock(queue); |
2861 | } | 2929 | } |
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
3179 | local_irq_enable(); | 3247 | local_irq_enable(); |
3180 | break; | 3248 | break; |
3181 | } | 3249 | } |
3250 | incr_input_queue_head(queue); | ||
3182 | rps_unlock(queue); | 3251 | rps_unlock(queue); |
3183 | local_irq_enable(); | 3252 | local_irq_enable(); |
3184 | 3253 | ||
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5542 | local_irq_enable(); | 5611 | local_irq_enable(); |
5543 | 5612 | ||
5544 | /* Process offline CPU's input_pkt_queue */ | 5613 | /* Process offline CPU's input_pkt_queue */ |
5545 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | 5614 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
5546 | netif_rx(skb); | 5615 | netif_rx(skb); |
5616 | incr_input_queue_head(oldsd); | ||
5617 | } | ||
5547 | 5618 | ||
5548 | return NOTIFY_OK; | 5619 | return NOTIFY_OK; |
5549 | } | 5620 | } |