diff options
author | Krishna Kumar <krkumar2@in.ibm.com> | 2009-08-05 21:44:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-08-06 23:10:18 -0400 |
commit | bbd8a0d3a3b65d341437f8b99c828fa5cc29c739 (patch) | |
tree | a4055c65be5ce3f8fd4987a32a38dfab1642ec95 /net | |
parent | 9f519f68cfffba022978634f724944a0b971fec1 (diff) |
net: Avoid enqueuing skb for default qdiscs
dev_queue_xmit enqueue's a skb and calls qdisc_run which
dequeue's the skb and xmits it. In most cases, the skb that
is enqueue'd is the same one that is dequeue'd (unless the
queue gets stopped or multiple cpu's write to the same queue
and ends in a race with qdisc_run). For default qdiscs, we
can remove the redundant enqueue/dequeue and simply xmit the
skb since the default qdisc is work-conserving.
The patch uses a new flag - TCQ_F_CAN_BYPASS to identify the
default fast queue. The controversial part of the patch is
incrementing qlen when a skb is requeued - this is to avoid
checks like the second line below:
+ } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
>> !q->gso_skb &&
+ !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
Results of a 2 hour testing for multiple netperf sessions (1,
2, 4, 8, 12 sessions on a 4 cpu system-X). The BW numbers are
aggregate Mb/s across iterations tested with this version on
System-X boxes with Chelsio 10gbps cards:
----------------------------------
Size | ORG BW NEW BW |
----------------------------------
128K | 156964 159381 |
256K | 158650 162042 |
----------------------------------
Changes from ver1:
1. Move sch_direct_xmit declaration from sch_generic.h to
pkt_sched.h
2. Update qdisc basic statistics for direct xmit path.
3. Set qlen to zero in qdisc_reset.
4. Changed some function names to more meaningful ones.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 48 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 93 |
2 files changed, 92 insertions, 49 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index f01a9c41f112..a0bc087616a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1786,6 +1786,40 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, | |||
1786 | return netdev_get_tx_queue(dev, queue_index); | 1786 | return netdev_get_tx_queue(dev, queue_index); |
1787 | } | 1787 | } |
1788 | 1788 | ||
1789 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | ||
1790 | struct net_device *dev, | ||
1791 | struct netdev_queue *txq) | ||
1792 | { | ||
1793 | spinlock_t *root_lock = qdisc_lock(q); | ||
1794 | int rc; | ||
1795 | |||
1796 | spin_lock(root_lock); | ||
1797 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { | ||
1798 | kfree_skb(skb); | ||
1799 | rc = NET_XMIT_DROP; | ||
1800 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && | ||
1801 | !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { | ||
1802 | /* | ||
1803 | * This is a work-conserving queue; there are no old skbs | ||
1804 | * waiting to be sent out; and the qdisc is not running - | ||
1805 | * xmit the skb directly. | ||
1806 | */ | ||
1807 | __qdisc_update_bstats(q, skb->len); | ||
1808 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | ||
1809 | __qdisc_run(q); | ||
1810 | else | ||
1811 | clear_bit(__QDISC_STATE_RUNNING, &q->state); | ||
1812 | |||
1813 | rc = NET_XMIT_SUCCESS; | ||
1814 | } else { | ||
1815 | rc = qdisc_enqueue_root(skb, q); | ||
1816 | qdisc_run(q); | ||
1817 | } | ||
1818 | spin_unlock(root_lock); | ||
1819 | |||
1820 | return rc; | ||
1821 | } | ||
1822 | |||
1789 | /** | 1823 | /** |
1790 | * dev_queue_xmit - transmit a buffer | 1824 | * dev_queue_xmit - transmit a buffer |
1791 | * @skb: buffer to transmit | 1825 | * @skb: buffer to transmit |
@@ -1859,19 +1893,7 @@ gso: | |||
1859 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); | 1893 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); |
1860 | #endif | 1894 | #endif |
1861 | if (q->enqueue) { | 1895 | if (q->enqueue) { |
1862 | spinlock_t *root_lock = qdisc_lock(q); | 1896 | rc = __dev_xmit_skb(skb, q, dev, txq); |
1863 | |||
1864 | spin_lock(root_lock); | ||
1865 | |||
1866 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { | ||
1867 | kfree_skb(skb); | ||
1868 | rc = NET_XMIT_DROP; | ||
1869 | } else { | ||
1870 | rc = qdisc_enqueue_root(skb, q); | ||
1871 | qdisc_run(q); | ||
1872 | } | ||
1873 | spin_unlock(root_lock); | ||
1874 | |||
1875 | goto out; | 1897 | goto out; |
1876 | } | 1898 | } |
1877 | 1899 | ||
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 27d03816ec3e..693df7ae33d8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -37,15 +37,11 @@ | |||
37 | * - updates to tree and tree walking are only done under the rtnl mutex. | 37 | * - updates to tree and tree walking are only done under the rtnl mutex. |
38 | */ | 38 | */ |
39 | 39 | ||
40 | static inline int qdisc_qlen(struct Qdisc *q) | ||
41 | { | ||
42 | return q->q.qlen; | ||
43 | } | ||
44 | |||
45 | static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) | 40 | static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) |
46 | { | 41 | { |
47 | q->gso_skb = skb; | 42 | q->gso_skb = skb; |
48 | q->qstats.requeues++; | 43 | q->qstats.requeues++; |
44 | q->q.qlen++; /* it's still part of the queue */ | ||
49 | __netif_schedule(q); | 45 | __netif_schedule(q); |
50 | 46 | ||
51 | return 0; | 47 | return 0; |
@@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) | |||
61 | 57 | ||
62 | /* check the reason of requeuing without tx lock first */ | 58 | /* check the reason of requeuing without tx lock first */ |
63 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | 59 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
64 | if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) | 60 | if (!netif_tx_queue_stopped(txq) && |
61 | !netif_tx_queue_frozen(txq)) { | ||
65 | q->gso_skb = NULL; | 62 | q->gso_skb = NULL; |
66 | else | 63 | q->q.qlen--; |
64 | } else | ||
67 | skb = NULL; | 65 | skb = NULL; |
68 | } else { | 66 | } else { |
69 | skb = q->dequeue(q); | 67 | skb = q->dequeue(q); |
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
103 | } | 101 | } |
104 | 102 | ||
105 | /* | 103 | /* |
106 | * NOTE: Called under qdisc_lock(q) with locally disabled BH. | 104 | * Transmit one skb, and handle the return status as required. Holding the |
107 | * | 105 | * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this |
108 | * __QDISC_STATE_RUNNING guarantees only one CPU can process | 106 | * function. |
109 | * this qdisc at a time. qdisc_lock(q) serializes queue accesses for | ||
110 | * this queue. | ||
111 | * | ||
112 | * netif_tx_lock serializes accesses to device driver. | ||
113 | * | ||
114 | * qdisc_lock(q) and netif_tx_lock are mutually exclusive, | ||
115 | * if one is grabbed, another must be free. | ||
116 | * | ||
117 | * Note, that this procedure can be called by a watchdog timer | ||
118 | * | 107 | * |
119 | * Returns to the caller: | 108 | * Returns to the caller: |
120 | * 0 - queue is empty or throttled. | 109 | * 0 - queue is empty or throttled. |
121 | * >0 - queue is not empty. | 110 | * >0 - queue is not empty. |
122 | * | ||
123 | */ | 111 | */ |
124 | static inline int qdisc_restart(struct Qdisc *q) | 112 | int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, |
113 | struct net_device *dev, struct netdev_queue *txq, | ||
114 | spinlock_t *root_lock) | ||
125 | { | 115 | { |
126 | struct netdev_queue *txq; | ||
127 | int ret = NETDEV_TX_BUSY; | 116 | int ret = NETDEV_TX_BUSY; |
128 | struct net_device *dev; | ||
129 | spinlock_t *root_lock; | ||
130 | struct sk_buff *skb; | ||
131 | |||
132 | /* Dequeue packet */ | ||
133 | if (unlikely((skb = dequeue_skb(q)) == NULL)) | ||
134 | return 0; | ||
135 | |||
136 | root_lock = qdisc_lock(q); | ||
137 | 117 | ||
138 | /* And release qdisc */ | 118 | /* And release qdisc */ |
139 | spin_unlock(root_lock); | 119 | spin_unlock(root_lock); |
140 | 120 | ||
141 | dev = qdisc_dev(q); | ||
142 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | ||
143 | |||
144 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | 121 | HARD_TX_LOCK(dev, txq, smp_processor_id()); |
145 | if (!netif_tx_queue_stopped(txq) && | 122 | if (!netif_tx_queue_stopped(txq) && |
146 | !netif_tx_queue_frozen(txq)) | 123 | !netif_tx_queue_frozen(txq)) |
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Qdisc *q) | |||
177 | return ret; | 154 | return ret; |
178 | } | 155 | } |
179 | 156 | ||
157 | /* | ||
158 | * NOTE: Called under qdisc_lock(q) with locally disabled BH. | ||
159 | * | ||
160 | * __QDISC_STATE_RUNNING guarantees only one CPU can process | ||
161 | * this qdisc at a time. qdisc_lock(q) serializes queue accesses for | ||
162 | * this queue. | ||
163 | * | ||
164 | * netif_tx_lock serializes accesses to device driver. | ||
165 | * | ||
166 | * qdisc_lock(q) and netif_tx_lock are mutually exclusive, | ||
167 | * if one is grabbed, another must be free. | ||
168 | * | ||
169 | * Note, that this procedure can be called by a watchdog timer | ||
170 | * | ||
171 | * Returns to the caller: | ||
172 | * 0 - queue is empty or throttled. | ||
173 | * >0 - queue is not empty. | ||
174 | * | ||
175 | */ | ||
176 | static inline int qdisc_restart(struct Qdisc *q) | ||
177 | { | ||
178 | struct netdev_queue *txq; | ||
179 | struct net_device *dev; | ||
180 | spinlock_t *root_lock; | ||
181 | struct sk_buff *skb; | ||
182 | |||
183 | /* Dequeue packet */ | ||
184 | skb = dequeue_skb(q); | ||
185 | if (unlikely(!skb)) | ||
186 | return 0; | ||
187 | |||
188 | root_lock = qdisc_lock(q); | ||
189 | dev = qdisc_dev(q); | ||
190 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | ||
191 | |||
192 | return sch_direct_xmit(skb, q, dev, txq, root_lock); | ||
193 | } | ||
194 | |||
180 | void __qdisc_run(struct Qdisc *q) | 195 | void __qdisc_run(struct Qdisc *q) |
181 | { | 196 | { |
182 | unsigned long start_time = jiffies; | 197 | unsigned long start_time = jiffies; |
@@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc) | |||
547 | if (ops->reset) | 562 | if (ops->reset) |
548 | ops->reset(qdisc); | 563 | ops->reset(qdisc); |
549 | 564 | ||
550 | kfree_skb(qdisc->gso_skb); | 565 | if (qdisc->gso_skb) { |
551 | qdisc->gso_skb = NULL; | 566 | kfree_skb(qdisc->gso_skb); |
567 | qdisc->gso_skb = NULL; | ||
568 | qdisc->q.qlen = 0; | ||
569 | } | ||
552 | } | 570 | } |
553 | EXPORT_SYMBOL(qdisc_reset); | 571 | EXPORT_SYMBOL(qdisc_reset); |
554 | 572 | ||
@@ -605,6 +623,9 @@ static void attach_one_default_qdisc(struct net_device *dev, | |||
605 | printk(KERN_INFO "%s: activation failed\n", dev->name); | 623 | printk(KERN_INFO "%s: activation failed\n", dev->name); |
606 | return; | 624 | return; |
607 | } | 625 | } |
626 | |||
627 | /* Can by-pass the queue discipline for default qdisc */ | ||
628 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
608 | } else { | 629 | } else { |
609 | qdisc = &noqueue_qdisc; | 630 | qdisc = &noqueue_qdisc; |
610 | } | 631 | } |