aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-10-03 18:31:07 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-03 18:36:11 -0400
commit55a93b3ea780908b7d1b3a8cf1976223a9268d78 (patch)
tree2c33ad6aaf7a604185bff5659d64895f5cf9a348
parent6a05880a8b22c6ba2ffdabbceb4635d28abe9072 (diff)
qdisc: validate skb without holding lock
Validation of skb can be pretty expensive : GSO segmentation and/or checksum computations. We can do this without holding qdisc lock, so that other cpus can queue additional packets. Trick is that requeued packets were already validated, so we carry a boolean so that sch_direct_xmit() can validate a fresh skb list, or directly use an old one. Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads host. Turning TSO on or off had no effect on throughput, only few more cpu cycles. Lock contention on qdisc lock disappeared. Same if disabling TX checksum offload. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/net/pkt_sched.h2
-rw-r--r--net/core/dev.c29
-rw-r--r--net/sched/sch_generic.c61
4 files changed, 56 insertions, 38 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b7fbacb6296..910fb17ad148 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
2821int dev_change_carrier(struct net_device *, bool new_carrier); 2821int dev_change_carrier(struct net_device *, bool new_carrier);
2822int dev_get_phys_port_id(struct net_device *dev, 2822int dev_get_phys_port_id(struct net_device *dev,
2823 struct netdev_phys_port_id *ppid); 2823 struct netdev_phys_port_id *ppid);
2824struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); 2824struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
2825struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2825struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2826 struct netdev_queue *txq, int *ret); 2826 struct netdev_queue *txq, int *ret);
2827int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); 2827int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8bbe626e9ece..e4b3c828c1c2 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab);
99void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); 99void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
100int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 100int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
101 struct net_device *dev, struct netdev_queue *txq, 101 struct net_device *dev, struct netdev_queue *txq,
102 spinlock_t *root_lock); 102 spinlock_t *root_lock, bool validate);
103 103
104void __qdisc_run(struct Qdisc *q); 104void __qdisc_run(struct Qdisc *q);
105 105
diff --git a/net/core/dev.c b/net/core/dev.c
index e55c546717d4..1a90530f83ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
2655 return skb; 2655 return skb;
2656} 2656}
2657 2657
2658struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) 2658static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
2659{ 2659{
2660 netdev_features_t features; 2660 netdev_features_t features;
2661 2661
@@ -2720,6 +2720,30 @@ out_null:
2720 return NULL; 2720 return NULL;
2721} 2721}
2722 2722
2723struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
2724{
2725 struct sk_buff *next, *head = NULL, *tail;
2726
2727 while (skb) {
2728 next = skb->next;
2729 skb->next = NULL;
2730 skb = validate_xmit_skb(skb, dev);
2731 if (skb) {
2732 struct sk_buff *end = skb;
2733
2734 while (end->next)
2735 end = end->next;
2736 if (!head)
2737 head = skb;
2738 else
2739 tail->next = skb;
2740 tail = end;
2741 }
2742 skb = next;
2743 }
2744 return head;
2745}
2746
2723static void qdisc_pkt_len_init(struct sk_buff *skb) 2747static void qdisc_pkt_len_init(struct sk_buff *skb)
2724{ 2748{
2725 const struct skb_shared_info *shinfo = skb_shinfo(skb); 2749 const struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2786 2810
2787 qdisc_bstats_update(q, skb); 2811 qdisc_bstats_update(q, skb);
2788 2812
2789 skb = validate_xmit_skb(skb, dev); 2813 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
2790 if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2791 if (unlikely(contended)) { 2814 if (unlikely(contended)) {
2792 spin_unlock(&q->busylock); 2815 spin_unlock(&q->busylock);
2793 contended = false; 2816 contended = false;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 797ebef73642..2b349a4de3c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -56,40 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
56 return 0; 56 return 0;
57} 57}
58 58
59static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q, 59static void try_bulk_dequeue_skb(struct Qdisc *q,
60 struct sk_buff *head_skb, 60 struct sk_buff *skb,
61 int bytelimit) 61 const struct netdev_queue *txq)
62{ 62{
63 struct sk_buff *skb, *tail_skb = head_skb; 63 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
64 64
65 while (bytelimit > 0) { 65 while (bytelimit > 0) {
66 skb = q->dequeue(q); 66 struct sk_buff *nskb = q->dequeue(q);
67 if (!skb)
68 break;
69 67
70 bytelimit -= skb->len; /* covers GSO len */ 68 if (!nskb)
71 skb = validate_xmit_skb(skb, qdisc_dev(q));
72 if (!skb)
73 break; 69 break;
74 70
75 while (tail_skb->next) /* GSO list goto tail */ 71 bytelimit -= nskb->len; /* covers GSO len */
76 tail_skb = tail_skb->next; 72 skb->next = nskb;
77 73 skb = nskb;
78 tail_skb->next = skb;
79 tail_skb = skb;
80 } 74 }
81 75 skb->next = NULL;
82 return head_skb;
83} 76}
84 77
85/* Note that dequeue_skb can possibly return a SKB list (via skb->next). 78/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
86 * A requeued skb (via q->gso_skb) can also be a SKB list. 79 * A requeued skb (via q->gso_skb) can also be a SKB list.
87 */ 80 */
88static inline struct sk_buff *dequeue_skb(struct Qdisc *q) 81static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
89{ 82{
90 struct sk_buff *skb = q->gso_skb; 83 struct sk_buff *skb = q->gso_skb;
91 const struct netdev_queue *txq = q->dev_queue; 84 const struct netdev_queue *txq = q->dev_queue;
92 85
86 *validate = true;
93 if (unlikely(skb)) { 87 if (unlikely(skb)) {
94 /* check the reason of requeuing without tx lock first */ 88 /* check the reason of requeuing without tx lock first */
95 txq = skb_get_tx_queue(txq->dev, skb); 89 txq = skb_get_tx_queue(txq->dev, skb);
@@ -98,21 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
98 q->q.qlen--; 92 q->q.qlen--;
99 } else 93 } else
100 skb = NULL; 94 skb = NULL;
95 /* skb in gso_skb were already validated */
96 *validate = false;
101 } else { 97 } else {
102 if (!(q->flags & TCQ_F_ONETXQUEUE) || 98 if (!(q->flags & TCQ_F_ONETXQUEUE) ||
103 !netif_xmit_frozen_or_stopped(txq)) { 99 !netif_xmit_frozen_or_stopped(txq)) {
104 int bytelimit = qdisc_avail_bulklimit(txq);
105
106 skb = q->dequeue(q); 100 skb = q->dequeue(q);
107 if (skb) {
108 bytelimit -= skb->len;
109 skb = validate_xmit_skb(skb, qdisc_dev(q));
110 }
111 if (skb && qdisc_may_bulk(q)) 101 if (skb && qdisc_may_bulk(q))
112 skb = try_bulk_dequeue_skb(q, skb, bytelimit); 102 try_bulk_dequeue_skb(q, skb, txq);
113 } 103 }
114 } 104 }
115
116 return skb; 105 return skb;
117} 106}
118 107
@@ -156,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
156 */ 145 */
157int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 146int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
158 struct net_device *dev, struct netdev_queue *txq, 147 struct net_device *dev, struct netdev_queue *txq,
159 spinlock_t *root_lock) 148 spinlock_t *root_lock, bool validate)
160{ 149{
161 int ret = NETDEV_TX_BUSY; 150 int ret = NETDEV_TX_BUSY;
162 151
163 /* And release qdisc */ 152 /* And release qdisc */
164 spin_unlock(root_lock); 153 spin_unlock(root_lock);
165 154
166 HARD_TX_LOCK(dev, txq, smp_processor_id()); 155 /* Note that we validate skb (GSO, checksum, ...) outside of locks */
167 if (!netif_xmit_frozen_or_stopped(txq)) 156 if (validate)
168 skb = dev_hard_start_xmit(skb, dev, txq, &ret); 157 skb = validate_xmit_skb_list(skb, dev);
169 158
170 HARD_TX_UNLOCK(dev, txq); 159 if (skb) {
160 HARD_TX_LOCK(dev, txq, smp_processor_id());
161 if (!netif_xmit_frozen_or_stopped(txq))
162 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
171 163
164 HARD_TX_UNLOCK(dev, txq);
165 }
172 spin_lock(root_lock); 166 spin_lock(root_lock);
173 167
174 if (dev_xmit_complete(ret)) { 168 if (dev_xmit_complete(ret)) {
@@ -217,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q)
217 struct net_device *dev; 211 struct net_device *dev;
218 spinlock_t *root_lock; 212 spinlock_t *root_lock;
219 struct sk_buff *skb; 213 struct sk_buff *skb;
214 bool validate;
220 215
221 /* Dequeue packet */ 216 /* Dequeue packet */
222 skb = dequeue_skb(q); 217 skb = dequeue_skb(q, &validate);
223 if (unlikely(!skb)) 218 if (unlikely(!skb))
224 return 0; 219 return 0;
225 220
@@ -229,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q)
229 dev = qdisc_dev(q); 224 dev = qdisc_dev(q);
230 txq = skb_get_tx_queue(dev, skb); 225 txq = skb_get_tx_queue(dev, skb);
231 226
232 return sch_direct_xmit(skb, q, dev, txq, root_lock); 227 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
233} 228}
234 229
235void __qdisc_run(struct Qdisc *q) 230void __qdisc_run(struct Qdisc *q)