aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c111
1 files changed, 91 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e8041eb76ac1..d7107ac835fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */
2203DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 2203DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2204 2204
2205#ifdef CONFIG_RPS 2205#ifdef CONFIG_RPS
2206
2207/* One global table that all flow-based protocols share. */
2208struct rps_sock_flow_table *rps_sock_flow_table;
2209EXPORT_SYMBOL(rps_sock_flow_table);
2210
2206/* 2211/*
2207 * get_rps_cpu is called from netif_receive_skb and returns the target 2212 * get_rps_cpu is called from netif_receive_skb and returns the target
2208 * CPU from the RPS map of the receiving queue for a given skb. 2213 * CPU from the RPS map of the receiving queue for a given skb.
2209 * rcu_read_lock must be held on entry. 2214 * rcu_read_lock must be held on entry.
2210 */ 2215 */
2211static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) 2216static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2217 struct rps_dev_flow **rflowp)
2212{ 2218{
2213 struct ipv6hdr *ip6; 2219 struct ipv6hdr *ip6;
2214 struct iphdr *ip; 2220 struct iphdr *ip;
2215 struct netdev_rx_queue *rxqueue; 2221 struct netdev_rx_queue *rxqueue;
2216 struct rps_map *map; 2222 struct rps_map *map;
2223 struct rps_dev_flow_table *flow_table;
2224 struct rps_sock_flow_table *sock_flow_table;
2217 int cpu = -1; 2225 int cpu = -1;
2218 u8 ip_proto; 2226 u8 ip_proto;
2227 u16 tcpu;
2219 u32 addr1, addr2, ports, ihl; 2228 u32 addr1, addr2, ports, ihl;
2220 2229
2221 if (skb_rx_queue_recorded(skb)) { 2230 if (skb_rx_queue_recorded(skb)) {
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
2232 } else 2241 } else
2233 rxqueue = dev->_rx; 2242 rxqueue = dev->_rx;
2234 2243
2235 if (!rxqueue->rps_map) 2244 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2236 goto done; 2245 goto done;
2237 2246
2238 if (skb->rxhash) 2247 if (skb->rxhash)
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
2284 skb->rxhash = 1; 2293 skb->rxhash = 1;
2285 2294
2286got_hash: 2295got_hash:
2296 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2297 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2298 if (flow_table && sock_flow_table) {
2299 u16 next_cpu;
2300 struct rps_dev_flow *rflow;
2301
2302 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2303 tcpu = rflow->cpu;
2304
2305 next_cpu = sock_flow_table->ents[skb->rxhash &
2306 sock_flow_table->mask];
2307
2308 /*
2309 * If the desired CPU (where last recvmsg was done) is
2310 * different from current CPU (one in the rx-queue flow
2311 * table entry), switch if one of the following holds:
2312 * - Current CPU is unset (equal to RPS_NO_CPU).
2313 * - Current CPU is offline.
2314 * - The current CPU's queue tail has advanced beyond the
2315 * last packet that was enqueued using this table entry.
2316 * This guarantees that all previous packets for the flow
2317 * have been dequeued, thus preserving in order delivery.
2318 */
2319 if (unlikely(tcpu != next_cpu) &&
2320 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2321 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2322 rflow->last_qtail)) >= 0)) {
2323 tcpu = rflow->cpu = next_cpu;
2324 if (tcpu != RPS_NO_CPU)
2325 rflow->last_qtail = per_cpu(softnet_data,
2326 tcpu).input_queue_head;
2327 }
2328 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2329 *rflowp = rflow;
2330 cpu = tcpu;
2331 goto done;
2332 }
2333 }
2334
2287 map = rcu_dereference(rxqueue->rps_map); 2335 map = rcu_dereference(rxqueue->rps_map);
2288 if (map) { 2336 if (map) {
2289 u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2337 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2290 2338
2291 if (cpu_online(tcpu)) { 2339 if (cpu_online(tcpu)) {
2292 cpu = tcpu; 2340 cpu = tcpu;
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
2320 __napi_schedule(&queue->backlog); 2368 __napi_schedule(&queue->backlog);
2321 __get_cpu_var(netdev_rx_stat).received_rps++; 2369 __get_cpu_var(netdev_rx_stat).received_rps++;
2322} 2370}
2323#endif /* CONFIG_SMP */ 2371#endif /* CONFIG_RPS */
2324 2372
2325/* 2373/*
2326 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 2374 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2327 * queue (may be a remote CPU queue). 2375 * queue (may be a remote CPU queue).
2328 */ 2376 */
2329static int enqueue_to_backlog(struct sk_buff *skb, int cpu) 2377static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2378 unsigned int *qtail)
2330{ 2379{
2331 struct softnet_data *queue; 2380 struct softnet_data *queue;
2332 unsigned long flags; 2381 unsigned long flags;
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
2341 if (queue->input_pkt_queue.qlen) { 2390 if (queue->input_pkt_queue.qlen) {
2342enqueue: 2391enqueue:
2343 __skb_queue_tail(&queue->input_pkt_queue, skb); 2392 __skb_queue_tail(&queue->input_pkt_queue, skb);
2393#ifdef CONFIG_RPS
2394 *qtail = queue->input_queue_head +
2395 queue->input_pkt_queue.qlen;
2396#endif
2344 rps_unlock(queue); 2397 rps_unlock(queue);
2345 local_irq_restore(flags); 2398 local_irq_restore(flags);
2346 return NET_RX_SUCCESS; 2399 return NET_RX_SUCCESS;
@@ -2355,11 +2408,10 @@ enqueue:
2355 2408
2356 cpu_set(cpu, rcpus->mask[rcpus->select]); 2409 cpu_set(cpu, rcpus->mask[rcpus->select]);
2357 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2410 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2358 } else 2411 goto enqueue;
2359 __napi_schedule(&queue->backlog); 2412 }
2360#else
2361 __napi_schedule(&queue->backlog);
2362#endif 2413#endif
2414 __napi_schedule(&queue->backlog);
2363 } 2415 }
2364 goto enqueue; 2416 goto enqueue;
2365 } 2417 }
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
2401 2453
2402#ifdef CONFIG_RPS 2454#ifdef CONFIG_RPS
2403 { 2455 {
2456 struct rps_dev_flow voidflow, *rflow = &voidflow;
2404 int cpu; 2457 int cpu;
2405 2458
2406 rcu_read_lock(); 2459 rcu_read_lock();
2407 cpu = get_rps_cpu(skb->dev, skb); 2460
2461 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2408 if (cpu < 0) 2462 if (cpu < 0)
2409 cpu = smp_processor_id(); 2463 cpu = smp_processor_id();
2410 ret = enqueue_to_backlog(skb, cpu); 2464
2465 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2466
2411 rcu_read_unlock(); 2467 rcu_read_unlock();
2412 } 2468 }
2413#else 2469#else
2414 ret = enqueue_to_backlog(skb, get_cpu()); 2470 {
2415 put_cpu(); 2471 unsigned int qtail;
2472 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2473 put_cpu();
2474 }
2416#endif 2475#endif
2417 return ret; 2476 return ret;
2418} 2477}
@@ -2830,14 +2889,22 @@ out:
2830int netif_receive_skb(struct sk_buff *skb) 2889int netif_receive_skb(struct sk_buff *skb)
2831{ 2890{
2832#ifdef CONFIG_RPS 2891#ifdef CONFIG_RPS
2833 int cpu; 2892 struct rps_dev_flow voidflow, *rflow = &voidflow;
2893 int cpu, ret;
2894
2895 rcu_read_lock();
2834 2896
2835 cpu = get_rps_cpu(skb->dev, skb); 2897 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2836 2898
2837 if (cpu < 0) 2899 if (cpu >= 0) {
2838 return __netif_receive_skb(skb); 2900 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2839 else 2901 rcu_read_unlock();
2840 return enqueue_to_backlog(skb, cpu); 2902 } else {
2903 rcu_read_unlock();
2904 ret = __netif_receive_skb(skb);
2905 }
2906
2907 return ret;
2841#else 2908#else
2842 return __netif_receive_skb(skb); 2909 return __netif_receive_skb(skb);
2843#endif 2910#endif
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
2856 if (skb->dev == dev) { 2923 if (skb->dev == dev) {
2857 __skb_unlink(skb, &queue->input_pkt_queue); 2924 __skb_unlink(skb, &queue->input_pkt_queue);
2858 kfree_skb(skb); 2925 kfree_skb(skb);
2926 incr_input_queue_head(queue);
2859 } 2927 }
2860 rps_unlock(queue); 2928 rps_unlock(queue);
2861} 2929}
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
3179 local_irq_enable(); 3247 local_irq_enable();
3180 break; 3248 break;
3181 } 3249 }
3250 incr_input_queue_head(queue);
3182 rps_unlock(queue); 3251 rps_unlock(queue);
3183 local_irq_enable(); 3252 local_irq_enable();
3184 3253
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5542 local_irq_enable(); 5611 local_irq_enable();
5543 5612
5544 /* Process offline CPU's input_pkt_queue */ 5613 /* Process offline CPU's input_pkt_queue */
5545 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 5614 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5546 netif_rx(skb); 5615 netif_rx(skb);
5616 incr_input_queue_head(oldsd);
5617 }
5547 5618
5548 return NOTIFY_OK; 5619 return NOTIFY_OK;
5549} 5620}