aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-06-02 08:09:29 -0400
committerDavid S. Miller <davem@davemloft.net>2010-06-02 08:09:29 -0400
commit79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 (patch)
tree128a3d1e9d173963070d49cc64ad38d8eb67dd7b
parent097811bb48c7837db94d7fe5d94f0f4b5e19e78c (diff)
net: add additional lock to qdisc to increase throughput
When many cpus compete for sending frames on a given qdisc, the qdisc spinlock suffers from very high contention. The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire the lock, and cannot dequeue packets fast enough, since it must wait for this lock for each dequeued packet. One solution to this problem is to force all cpus spinning on a second lock before trying to get the main lock, when/if they see __QDISC_STATE_RUNNING already set. The owning cpu then compete with at most one other cpu for the main lock, allowing for higher dequeueing rate. Based on a previous patch from Alexander Duyck. I added the heuristic to avoid the atomic in fast path, and put the new lock far away from the cache line used by the dequeue worker. Also try to release the busylock lock as late as possible. Tests with following script gave a boost from ~50.000 pps to ~600.000 pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver. (A single netperf flow can reach ~800.000 pps on this platform) for j in `seq 0 3`; do for i in `seq 0 7`; do netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 & done done Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sch_generic.h3
-rw-r--r--net/core/dev.c29
-rw-r--r--net/sched/sch_generic.c1
3 files changed, 28 insertions, 5 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b3591e4a514c..b35301b0c7b6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,7 +80,8 @@ struct Qdisc {
80 struct gnet_stats_basic_packed bstats; 80 struct gnet_stats_basic_packed bstats;
81 unsigned long __state; 81 unsigned long __state;
82 struct gnet_stats_queue qstats; 82 struct gnet_stats_queue qstats;
83 struct rcu_head rcu_head; 83 struct rcu_head rcu_head;
84 spinlock_t busylock;
84}; 85};
85 86
86static inline bool qdisc_is_running(struct Qdisc *qdisc) 87static inline bool qdisc_is_running(struct Qdisc *qdisc)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2733226d90b2..ffca5c1066fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2040 struct netdev_queue *txq) 2040 struct netdev_queue *txq)
2041{ 2041{
2042 spinlock_t *root_lock = qdisc_lock(q); 2042 spinlock_t *root_lock = qdisc_lock(q);
2043 bool contended = qdisc_is_running(q);
2043 int rc; 2044 int rc;
2044 2045
2046 /*
2047 * Heuristic to force contended enqueues to serialize on a
2048 * separate lock before trying to get qdisc main lock.
2049 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2050 * and dequeue packets faster.
2051 */
2052 if (unlikely(contended))
2053 spin_lock(&q->busylock);
2054
2045 spin_lock(root_lock); 2055 spin_lock(root_lock);
2046 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2056 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2047 kfree_skb(skb); 2057 kfree_skb(skb);
@@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2056 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2066 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2057 skb_dst_force(skb); 2067 skb_dst_force(skb);
2058 __qdisc_update_bstats(q, skb->len); 2068 __qdisc_update_bstats(q, skb->len);
2059 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2069 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2070 if (unlikely(contended)) {
2071 spin_unlock(&q->busylock);
2072 contended = false;
2073 }
2060 __qdisc_run(q); 2074 __qdisc_run(q);
2061 else 2075 } else
2062 qdisc_run_end(q); 2076 qdisc_run_end(q);
2063 2077
2064 rc = NET_XMIT_SUCCESS; 2078 rc = NET_XMIT_SUCCESS;
2065 } else { 2079 } else {
2066 skb_dst_force(skb); 2080 skb_dst_force(skb);
2067 rc = qdisc_enqueue_root(skb, q); 2081 rc = qdisc_enqueue_root(skb, q);
2068 qdisc_run(q); 2082 if (qdisc_run_begin(q)) {
2083 if (unlikely(contended)) {
2084 spin_unlock(&q->busylock);
2085 contended = false;
2086 }
2087 __qdisc_run(q);
2088 }
2069 } 2089 }
2070 spin_unlock(root_lock); 2090 spin_unlock(root_lock);
2071 2091 if (unlikely(contended))
2092 spin_unlock(&q->busylock);
2072 return rc; 2093 return rc;
2073} 2094}
2074 2095
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37b86eab6779..d20fcd2a5519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
561 561
562 INIT_LIST_HEAD(&sch->list); 562 INIT_LIST_HEAD(&sch->list);
563 skb_queue_head_init(&sch->q); 563 skb_queue_head_init(&sch->q);
564 spin_lock_init(&sch->busylock);
564 sch->ops = ops; 565 sch->ops = ops;
565 sch->enqueue = ops->enqueue; 566 sch->enqueue = ops->enqueue;
566 sch->dequeue = ops->dequeue; 567 sch->dequeue = ops->dequeue;