aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChangli Gao <xiaosuo@gmail.com>2010-04-27 18:07:33 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-27 18:11:49 -0400
commit6e7676c1a76aed6e957611d8d7a9e5592e23aeba (patch)
tree0cd14260745f755c885466c59182452f637e92e3
parentc58dc01babfd58ec9e71a6ce080150dc27755d88 (diff)
net: batch skb dequeueing from softnet input_pkt_queue
batch skb dequeueing from softnet input_pkt_queue to reduce potential lock contention when RPS is enabled. Note: in the worst case, the number of packets in a softnet_data may be double of netdev_max_backlog. Signed-off-by: Changli Gao <xiaosuo@gmail.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--net/core/dev.c57
2 files changed, 43 insertions, 20 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c04ca246395d..40d4c20d034b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1388,6 +1388,7 @@ struct softnet_data {
1388 struct Qdisc **output_queue_tailp; 1388 struct Qdisc **output_queue_tailp;
1389 struct list_head poll_list; 1389 struct list_head poll_list;
1390 struct sk_buff *completion_queue; 1390 struct sk_buff *completion_queue;
1391 struct sk_buff_head process_queue;
1391 1392
1392#ifdef CONFIG_RPS 1393#ifdef CONFIG_RPS
1393 struct softnet_data *rps_ipi_list; 1394 struct softnet_data *rps_ipi_list;
@@ -1402,10 +1403,11 @@ struct softnet_data {
1402 struct napi_struct backlog; 1403 struct napi_struct backlog;
1403}; 1404};
1404 1405
1405static inline void input_queue_head_incr(struct softnet_data *sd) 1406static inline void input_queue_head_add(struct softnet_data *sd,
1407 unsigned int len)
1406{ 1408{
1407#ifdef CONFIG_RPS 1409#ifdef CONFIG_RPS
1408 sd->input_queue_head++; 1410 sd->input_queue_head += len;
1409#endif 1411#endif
1410} 1412}
1411 1413
diff --git a/net/core/dev.c b/net/core/dev.c
index 3d314919a2cf..100dcbd29739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2408,12 +2408,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2408 __get_cpu_var(netdev_rx_stat).total++; 2408 __get_cpu_var(netdev_rx_stat).total++;
2409 2409
2410 rps_lock(sd); 2410 rps_lock(sd);
2411 if (sd->input_pkt_queue.qlen <= netdev_max_backlog) { 2411 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2412 if (sd->input_pkt_queue.qlen) { 2412 if (skb_queue_len(&sd->input_pkt_queue)) {
2413enqueue: 2413enqueue:
2414 __skb_queue_tail(&sd->input_pkt_queue, skb); 2414 __skb_queue_tail(&sd->input_pkt_queue, skb);
2415#ifdef CONFIG_RPS 2415#ifdef CONFIG_RPS
2416 *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen; 2416 *qtail = sd->input_queue_head +
2417 skb_queue_len(&sd->input_pkt_queue);
2417#endif 2418#endif
2418 rps_unlock(sd); 2419 rps_unlock(sd);
2419 local_irq_restore(flags); 2420 local_irq_restore(flags);
@@ -2934,13 +2935,21 @@ static void flush_backlog(void *arg)
2934 struct sk_buff *skb, *tmp; 2935 struct sk_buff *skb, *tmp;
2935 2936
2936 rps_lock(sd); 2937 rps_lock(sd);
2937 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) 2938 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
2938 if (skb->dev == dev) { 2939 if (skb->dev == dev) {
2939 __skb_unlink(skb, &sd->input_pkt_queue); 2940 __skb_unlink(skb, &sd->input_pkt_queue);
2940 kfree_skb(skb); 2941 kfree_skb(skb);
2941 input_queue_head_incr(sd); 2942 input_queue_head_add(sd, 1);
2942 } 2943 }
2944 }
2943 rps_unlock(sd); 2945 rps_unlock(sd);
2946
2947 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2948 if (skb->dev == dev) {
2949 __skb_unlink(skb, &sd->process_queue);
2950 kfree_skb(skb);
2951 }
2952 }
2944} 2953}
2945 2954
2946static int napi_gro_complete(struct sk_buff *skb) 2955static int napi_gro_complete(struct sk_buff *skb)
@@ -3286,24 +3295,33 @@ static int process_backlog(struct napi_struct *napi, int quota)
3286 } 3295 }
3287#endif 3296#endif
3288 napi->weight = weight_p; 3297 napi->weight = weight_p;
3289 do { 3298 local_irq_disable();
3299 while (work < quota) {
3290 struct sk_buff *skb; 3300 struct sk_buff *skb;
3301 unsigned int qlen;
3302
3303 while ((skb = __skb_dequeue(&sd->process_queue))) {
3304 local_irq_enable();
3305 __netif_receive_skb(skb);
3306 if (++work >= quota)
3307 return work;
3308 local_irq_disable();
3309 }
3291 3310
3292 local_irq_disable();
3293 rps_lock(sd); 3311 rps_lock(sd);
3294 skb = __skb_dequeue(&sd->input_pkt_queue); 3312 qlen = skb_queue_len(&sd->input_pkt_queue);
3295 if (!skb) { 3313 if (qlen) {
3314 input_queue_head_add(sd, qlen);
3315 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3316 &sd->process_queue);
3317 }
3318 if (qlen < quota - work) {
3296 __napi_complete(napi); 3319 __napi_complete(napi);
3297 rps_unlock(sd); 3320 quota = work + qlen;
3298 local_irq_enable();
3299 break;
3300 } 3321 }
3301 input_queue_head_incr(sd);
3302 rps_unlock(sd); 3322 rps_unlock(sd);
3303 local_irq_enable(); 3323 }
3304 3324 local_irq_enable();
3305 __netif_receive_skb(skb);
3306 } while (++work < quota);
3307 3325
3308 return work; 3326 return work;
3309} 3327}
@@ -5630,8 +5648,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5630 /* Process offline CPU's input_pkt_queue */ 5648 /* Process offline CPU's input_pkt_queue */
5631 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 5649 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5632 netif_rx(skb); 5650 netif_rx(skb);
5633 input_queue_head_incr(oldsd); 5651 input_queue_head_add(oldsd, 1);
5634 } 5652 }
5653 while ((skb = __skb_dequeue(&oldsd->process_queue)))
5654 netif_rx(skb);
5635 5655
5636 return NOTIFY_OK; 5656 return NOTIFY_OK;
5637} 5657}
@@ -5850,6 +5870,7 @@ static int __init net_dev_init(void)
5850 struct softnet_data *sd = &per_cpu(softnet_data, i); 5870 struct softnet_data *sd = &per_cpu(softnet_data, i);
5851 5871
5852 skb_queue_head_init(&sd->input_pkt_queue); 5872 skb_queue_head_init(&sd->input_pkt_queue);
5873 skb_queue_head_init(&sd->process_queue);
5853 sd->completion_queue = NULL; 5874 sd->completion_queue = NULL;
5854 INIT_LIST_HEAD(&sd->poll_list); 5875 INIT_LIST_HEAD(&sd->poll_list);
5855 sd->output_queue = NULL; 5876 sd->output_queue = NULL;