aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishna Kumar <krkumar2@in.ibm.com>2009-08-05 21:44:21 -0400
committerDavid S. Miller <davem@davemloft.net>2009-08-06 23:10:18 -0400
commitbbd8a0d3a3b65d341437f8b99c828fa5cc29c739 (patch)
treea4055c65be5ce3f8fd4987a32a38dfab1642ec95
parent9f519f68cfffba022978634f724944a0b971fec1 (diff)
net: Avoid enqueuing skb for default qdiscs
dev_queue_xmit enqueue's a skb and calls qdisc_run which dequeue's the skb and xmits it. In most cases, the skb that is enqueue'd is the same one that is dequeue'd (unless the queue gets stopped or multiple cpu's write to the same queue and ends in a race with qdisc_run). For default qdiscs, we can remove the redundant enqueue/dequeue and simply xmit the skb since the default qdisc is work-conserving. The patch uses a new flag - TCQ_F_CAN_BYPASS to identify the default fast queue. The controversial part of the patch is incrementing qlen when a skb is requeued - this is to avoid checks like the second line below: + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && >> !q->gso_skb && + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { Results of a 2 hour testing for multiple netperf sessions (1, 2, 4, 8, 12 sessions on a 4 cpu system-X). The BW numbers are aggregate Mb/s across iterations tested with this version on System-X boxes with Chelsio 10gbps cards: ---------------------------------- Size | ORG BW NEW BW | ---------------------------------- 128K | 156964 159381 | 256K | 158650 162042 | ---------------------------------- Changes from ver1: 1. Move sch_direct_xmit declaration from sch_generic.h to pkt_sched.h 2. Update qdisc basic statistics for direct xmit path. 3. Set qlen to zero in qdisc_reset. 4. Changed some function names to more meaningful ones. Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/pkt_sched.h3
-rw-r--r--include/net/sch_generic.h15
-rw-r--r--net/core/dev.c48
-rw-r--r--net/sched/sch_generic.c93
4 files changed, 108 insertions, 51 deletions
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 82a3191375f5..f911ec7598ef 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
87extern void qdisc_put_rtab(struct qdisc_rate_table *tab); 87extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
88extern void qdisc_put_stab(struct qdisc_size_table *tab); 88extern void qdisc_put_stab(struct qdisc_size_table *tab);
89extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc); 89extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
90extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
91 struct net_device *dev, struct netdev_queue *txq,
92 spinlock_t *root_lock);
90 93
91extern void __qdisc_run(struct Qdisc *q); 94extern void __qdisc_run(struct Qdisc *q);
92 95
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 964ffa0d8815..84b3fc2aef0f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -45,6 +45,7 @@ struct Qdisc
45#define TCQ_F_BUILTIN 1 45#define TCQ_F_BUILTIN 1
46#define TCQ_F_THROTTLED 2 46#define TCQ_F_THROTTLED 2
47#define TCQ_F_INGRESS 4 47#define TCQ_F_INGRESS 4
48#define TCQ_F_CAN_BYPASS 8
48#define TCQ_F_WARN_NONWC (1 << 16) 49#define TCQ_F_WARN_NONWC (1 << 16)
49 int padded; 50 int padded;
50 struct Qdisc_ops *ops; 51 struct Qdisc_ops *ops;
@@ -182,6 +183,11 @@ struct qdisc_skb_cb {
182 char data[]; 183 char data[];
183}; 184};
184 185
186static inline int qdisc_qlen(struct Qdisc *q)
187{
188 return q->q.qlen;
189}
190
185static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) 191static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
186{ 192{
187 return (struct qdisc_skb_cb *)skb->cb; 193 return (struct qdisc_skb_cb *)skb->cb;
@@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
387 return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; 393 return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
388} 394}
389 395
396static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
397{
398 sch->bstats.bytes += len;
399 sch->bstats.packets++;
400}
401
390static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, 402static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
391 struct sk_buff_head *list) 403 struct sk_buff_head *list)
392{ 404{
393 __skb_queue_tail(list, skb); 405 __skb_queue_tail(list, skb);
394 sch->qstats.backlog += qdisc_pkt_len(skb); 406 sch->qstats.backlog += qdisc_pkt_len(skb);
395 sch->bstats.bytes += qdisc_pkt_len(skb); 407 __qdisc_update_bstats(sch, qdisc_pkt_len(skb));
396 sch->bstats.packets++;
397 408
398 return NET_XMIT_SUCCESS; 409 return NET_XMIT_SUCCESS;
399} 410}
diff --git a/net/core/dev.c b/net/core/dev.c
index f01a9c41f112..a0bc087616a4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,6 +1786,40 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1786 return netdev_get_tx_queue(dev, queue_index); 1786 return netdev_get_tx_queue(dev, queue_index);
1787} 1787}
1788 1788
1789static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1790 struct net_device *dev,
1791 struct netdev_queue *txq)
1792{
1793 spinlock_t *root_lock = qdisc_lock(q);
1794 int rc;
1795
1796 spin_lock(root_lock);
1797 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1798 kfree_skb(skb);
1799 rc = NET_XMIT_DROP;
1800 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1801 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
1802 /*
1803 * This is a work-conserving queue; there are no old skbs
1804 * waiting to be sent out; and the qdisc is not running -
1805 * xmit the skb directly.
1806 */
1807 __qdisc_update_bstats(q, skb->len);
1808 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
1809 __qdisc_run(q);
1810 else
1811 clear_bit(__QDISC_STATE_RUNNING, &q->state);
1812
1813 rc = NET_XMIT_SUCCESS;
1814 } else {
1815 rc = qdisc_enqueue_root(skb, q);
1816 qdisc_run(q);
1817 }
1818 spin_unlock(root_lock);
1819
1820 return rc;
1821}
1822
1789/** 1823/**
1790 * dev_queue_xmit - transmit a buffer 1824 * dev_queue_xmit - transmit a buffer
1791 * @skb: buffer to transmit 1825 * @skb: buffer to transmit
@@ -1859,19 +1893,7 @@ gso:
1859 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1893 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1860#endif 1894#endif
1861 if (q->enqueue) { 1895 if (q->enqueue) {
1862 spinlock_t *root_lock = qdisc_lock(q); 1896 rc = __dev_xmit_skb(skb, q, dev, txq);
1863
1864 spin_lock(root_lock);
1865
1866 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1867 kfree_skb(skb);
1868 rc = NET_XMIT_DROP;
1869 } else {
1870 rc = qdisc_enqueue_root(skb, q);
1871 qdisc_run(q);
1872 }
1873 spin_unlock(root_lock);
1874
1875 goto out; 1897 goto out;
1876 } 1898 }
1877 1899
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27d03816ec3e..693df7ae33d8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -37,15 +37,11 @@
37 * - updates to tree and tree walking are only done under the rtnl mutex. 37 * - updates to tree and tree walking are only done under the rtnl mutex.
38 */ 38 */
39 39
40static inline int qdisc_qlen(struct Qdisc *q)
41{
42 return q->q.qlen;
43}
44
45static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) 40static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46{ 41{
47 q->gso_skb = skb; 42 q->gso_skb = skb;
48 q->qstats.requeues++; 43 q->qstats.requeues++;
44 q->q.qlen++; /* it's still part of the queue */
49 __netif_schedule(q); 45 __netif_schedule(q);
50 46
51 return 0; 47 return 0;
@@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
61 57
62 /* check the reason of requeuing without tx lock first */ 58 /* check the reason of requeuing without tx lock first */
63 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 59 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
64 if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) 60 if (!netif_tx_queue_stopped(txq) &&
61 !netif_tx_queue_frozen(txq)) {
65 q->gso_skb = NULL; 62 q->gso_skb = NULL;
66 else 63 q->q.qlen--;
64 } else
67 skb = NULL; 65 skb = NULL;
68 } else { 66 } else {
69 skb = q->dequeue(q); 67 skb = q->dequeue(q);
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
103} 101}
104 102
105/* 103/*
106 * NOTE: Called under qdisc_lock(q) with locally disabled BH. 104 * Transmit one skb, and handle the return status as required. Holding the
107 * 105 * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
108 * __QDISC_STATE_RUNNING guarantees only one CPU can process 106 * function.
109 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
110 * this queue.
111 *
112 * netif_tx_lock serializes accesses to device driver.
113 *
114 * qdisc_lock(q) and netif_tx_lock are mutually exclusive,
115 * if one is grabbed, another must be free.
116 *
117 * Note, that this procedure can be called by a watchdog timer
118 * 107 *
119 * Returns to the caller: 108 * Returns to the caller:
120 * 0 - queue is empty or throttled. 109 * 0 - queue is empty or throttled.
121 * >0 - queue is not empty. 110 * >0 - queue is not empty.
122 *
123 */ 111 */
124static inline int qdisc_restart(struct Qdisc *q) 112int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
113 struct net_device *dev, struct netdev_queue *txq,
114 spinlock_t *root_lock)
125{ 115{
126 struct netdev_queue *txq;
127 int ret = NETDEV_TX_BUSY; 116 int ret = NETDEV_TX_BUSY;
128 struct net_device *dev;
129 spinlock_t *root_lock;
130 struct sk_buff *skb;
131
132 /* Dequeue packet */
133 if (unlikely((skb = dequeue_skb(q)) == NULL))
134 return 0;
135
136 root_lock = qdisc_lock(q);
137 117
138 /* And release qdisc */ 118 /* And release qdisc */
139 spin_unlock(root_lock); 119 spin_unlock(root_lock);
140 120
141 dev = qdisc_dev(q);
142 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
143
144 HARD_TX_LOCK(dev, txq, smp_processor_id()); 121 HARD_TX_LOCK(dev, txq, smp_processor_id());
145 if (!netif_tx_queue_stopped(txq) && 122 if (!netif_tx_queue_stopped(txq) &&
146 !netif_tx_queue_frozen(txq)) 123 !netif_tx_queue_frozen(txq))
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Qdisc *q)
177 return ret; 154 return ret;
178} 155}
179 156
157/*
158 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
159 *
160 * __QDISC_STATE_RUNNING guarantees only one CPU can process
161 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
162 * this queue.
163 *
164 * netif_tx_lock serializes accesses to device driver.
165 *
166 * qdisc_lock(q) and netif_tx_lock are mutually exclusive,
167 * if one is grabbed, another must be free.
168 *
169 * Note, that this procedure can be called by a watchdog timer
170 *
171 * Returns to the caller:
172 * 0 - queue is empty or throttled.
173 * >0 - queue is not empty.
174 *
175 */
176static inline int qdisc_restart(struct Qdisc *q)
177{
178 struct netdev_queue *txq;
179 struct net_device *dev;
180 spinlock_t *root_lock;
181 struct sk_buff *skb;
182
183 /* Dequeue packet */
184 skb = dequeue_skb(q);
185 if (unlikely(!skb))
186 return 0;
187
188 root_lock = qdisc_lock(q);
189 dev = qdisc_dev(q);
190 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
191
192 return sch_direct_xmit(skb, q, dev, txq, root_lock);
193}
194
180void __qdisc_run(struct Qdisc *q) 195void __qdisc_run(struct Qdisc *q)
181{ 196{
182 unsigned long start_time = jiffies; 197 unsigned long start_time = jiffies;
@@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
547 if (ops->reset) 562 if (ops->reset)
548 ops->reset(qdisc); 563 ops->reset(qdisc);
549 564
550 kfree_skb(qdisc->gso_skb); 565 if (qdisc->gso_skb) {
551 qdisc->gso_skb = NULL; 566 kfree_skb(qdisc->gso_skb);
567 qdisc->gso_skb = NULL;
568 qdisc->q.qlen = 0;
569 }
552} 570}
553EXPORT_SYMBOL(qdisc_reset); 571EXPORT_SYMBOL(qdisc_reset);
554 572
@@ -605,6 +623,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
605 printk(KERN_INFO "%s: activation failed\n", dev->name); 623 printk(KERN_INFO "%s: activation failed\n", dev->name);
606 return; 624 return;
607 } 625 }
626
627 /* Can by-pass the queue discipline for default qdisc */
628 qdisc->flags |= TCQ_F_CAN_BYPASS;
608 } else { 629 } else {
609 qdisc = &noqueue_qdisc; 630 qdisc = &noqueue_qdisc;
610 } 631 }