aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-07-16 05:15:04 -0400
committerDavid S. Miller <davem@davemloft.net>2008-07-17 22:21:20 -0400
commit37437bb2e1ae8af470dfcd5b4ff454110894ccaf (patch)
tree1795e78a7648252b0c92c972df12b776a28437d7
parent7698b4fcabcd790efc4f226bada1e7b5870653af (diff)
pkt_sched: Schedule qdiscs instead of netdev_queue.
When we have shared qdiscs, packets come out of the qdiscs for multiple transmit queues. Therefore it doesn't make any sense to schedule the transmit queue when logically we cannot know ahead of time the TX queue of the SKB that the qdisc->dequeue() will give us. Just for sanity I added a BUG check to make sure we never get into a state where the noop_qdisc is scheduled. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h12
-rw-r--r--include/net/pkt_sched.h11
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--net/core/dev.c68
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_generic.c30
7 files changed, 55 insertions, 73 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9240a95793be..1e839fa01434 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -275,7 +275,6 @@ enum netdev_state_t
275{ 275{
276 __LINK_STATE_START, 276 __LINK_STATE_START,
277 __LINK_STATE_PRESENT, 277 __LINK_STATE_PRESENT,
278 __LINK_STATE_SCHED,
279 __LINK_STATE_NOCARRIER, 278 __LINK_STATE_NOCARRIER,
280 __LINK_STATE_LINKWATCH_PENDING, 279 __LINK_STATE_LINKWATCH_PENDING,
281 __LINK_STATE_DORMANT, 280 __LINK_STATE_DORMANT,
@@ -452,7 +451,6 @@ struct netdev_queue {
452 int xmit_lock_owner; 451 int xmit_lock_owner;
453 struct Qdisc *qdisc_sleeping; 452 struct Qdisc *qdisc_sleeping;
454 struct list_head qdisc_list; 453 struct list_head qdisc_list;
455 struct netdev_queue *next_sched;
456} ____cacheline_aligned_in_smp; 454} ____cacheline_aligned_in_smp;
457 455
458/* 456/*
@@ -969,7 +967,7 @@ static inline int unregister_gifconf(unsigned int family)
969 */ 967 */
970struct softnet_data 968struct softnet_data
971{ 969{
972 struct netdev_queue *output_queue; 970 struct Qdisc *output_queue;
973 struct sk_buff_head input_pkt_queue; 971 struct sk_buff_head input_pkt_queue;
974 struct list_head poll_list; 972 struct list_head poll_list;
975 struct sk_buff *completion_queue; 973 struct sk_buff *completion_queue;
@@ -984,12 +982,12 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
984 982
985#define HAVE_NETIF_QUEUE 983#define HAVE_NETIF_QUEUE
986 984
987extern void __netif_schedule(struct netdev_queue *txq); 985extern void __netif_schedule(struct Qdisc *q);
988 986
989static inline void netif_schedule_queue(struct netdev_queue *txq) 987static inline void netif_schedule_queue(struct netdev_queue *txq)
990{ 988{
991 if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) 989 if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
992 __netif_schedule(txq); 990 __netif_schedule(txq->qdisc);
993} 991}
994 992
995static inline void netif_tx_schedule_all(struct net_device *dev) 993static inline void netif_tx_schedule_all(struct net_device *dev)
@@ -1042,7 +1040,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
1042 } 1040 }
1043#endif 1041#endif
1044 if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) 1042 if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
1045 __netif_schedule(dev_queue); 1043 __netif_schedule(dev_queue->qdisc);
1046} 1044}
1047 1045
1048static inline void netif_wake_queue(struct net_device *dev) 1046static inline void netif_wake_queue(struct net_device *dev)
@@ -1186,7 +1184,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
1186 return; 1184 return;
1187#endif 1185#endif
1188 if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) 1186 if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
1189 __netif_schedule(txq); 1187 __netif_schedule(txq->qdisc);
1190} 1188}
1191 1189
1192/** 1190/**
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 06a442d85186..e4e30052e4e2 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -84,15 +84,12 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
84 struct nlattr *tab); 84 struct nlattr *tab);
85extern void qdisc_put_rtab(struct qdisc_rate_table *tab); 85extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
86 86
87extern void __qdisc_run(struct netdev_queue *txq); 87extern void __qdisc_run(struct Qdisc *q);
88 88
89static inline void qdisc_run(struct netdev_queue *txq) 89static inline void qdisc_run(struct Qdisc *q)
90{ 90{
91 struct Qdisc *q = txq->qdisc; 91 if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
92 92 __qdisc_run(q);
93 if (!netif_tx_queue_stopped(txq) &&
94 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
95 __qdisc_run(txq);
96} 93}
97 94
98extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, 95extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 92417825d387..3cc4b5cd8c6a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -26,6 +26,7 @@ struct qdisc_rate_table
26enum qdisc_state_t 26enum qdisc_state_t
27{ 27{
28 __QDISC_STATE_RUNNING, 28 __QDISC_STATE_RUNNING,
29 __QDISC_STATE_SCHED,
29}; 30};
30 31
31struct Qdisc 32struct Qdisc
@@ -45,6 +46,7 @@ struct Qdisc
45 struct sk_buff *gso_skb; 46 struct sk_buff *gso_skb;
46 struct sk_buff_head q; 47 struct sk_buff_head q;
47 struct netdev_queue *dev_queue; 48 struct netdev_queue *dev_queue;
49 struct Qdisc *next_sched;
48 struct list_head list; 50 struct list_head list;
49 51
50 struct gnet_stats_basic bstats; 52 struct gnet_stats_basic bstats;
diff --git a/net/core/dev.c b/net/core/dev.c
index 467bfb325123..0b909b74f698 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1323,18 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1323} 1323}
1324 1324
1325 1325
1326void __netif_schedule(struct netdev_queue *txq) 1326void __netif_schedule(struct Qdisc *q)
1327{ 1327{
1328 struct net_device *dev = txq->dev; 1328 BUG_ON(q == &noop_qdisc);
1329 1329
1330 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { 1330 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
1331 struct softnet_data *sd; 1331 struct softnet_data *sd;
1332 unsigned long flags; 1332 unsigned long flags;
1333 1333
1334 local_irq_save(flags); 1334 local_irq_save(flags);
1335 sd = &__get_cpu_var(softnet_data); 1335 sd = &__get_cpu_var(softnet_data);
1336 txq->next_sched = sd->output_queue; 1336 q->next_sched = sd->output_queue;
1337 sd->output_queue = txq; 1337 sd->output_queue = q;
1338 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1338 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1339 local_irq_restore(flags); 1339 local_irq_restore(flags);
1340 } 1340 }
@@ -1771,37 +1771,23 @@ gso:
1771 rcu_read_lock_bh(); 1771 rcu_read_lock_bh();
1772 1772
1773 txq = dev_pick_tx(dev, skb); 1773 txq = dev_pick_tx(dev, skb);
1774 spin_lock_prefetch(&txq->lock);
1775
1776 /* Updates of qdisc are serialized by queue->lock.
1777 * The struct Qdisc which is pointed to by qdisc is now a
1778 * rcu structure - it may be accessed without acquiring
1779 * a lock (but the structure may be stale.) The freeing of the
1780 * qdisc will be deferred until it's known that there are no
1781 * more references to it.
1782 *
1783 * If the qdisc has an enqueue function, we still need to
1784 * hold the queue->lock before calling it, since queue->lock
1785 * also serializes access to the device queue.
1786 */
1787
1788 q = rcu_dereference(txq->qdisc); 1774 q = rcu_dereference(txq->qdisc);
1775
1789#ifdef CONFIG_NET_CLS_ACT 1776#ifdef CONFIG_NET_CLS_ACT
1790 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1777 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1791#endif 1778#endif
1792 if (q->enqueue) { 1779 if (q->enqueue) {
1793 /* Grab device queue */ 1780 spinlock_t *root_lock = qdisc_root_lock(q);
1794 spin_lock(&txq->lock); 1781
1795 q = txq->qdisc; 1782 spin_lock(root_lock);
1796 if (q->enqueue) { 1783
1797 rc = q->enqueue(skb, q); 1784 rc = q->enqueue(skb, q);
1798 qdisc_run(txq); 1785 qdisc_run(q);
1799 spin_unlock(&txq->lock); 1786
1800 1787 spin_unlock(root_lock);
1801 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; 1788
1802 goto out; 1789 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1803 } 1790 goto out;
1804 spin_unlock(&txq->lock);
1805 } 1791 }
1806 1792
1807 /* The device has no queue. Common case for software devices: 1793 /* The device has no queue. Common case for software devices:
@@ -1974,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h)
1974 } 1960 }
1975 1961
1976 if (sd->output_queue) { 1962 if (sd->output_queue) {
1977 struct netdev_queue *head; 1963 struct Qdisc *head;
1978 1964
1979 local_irq_disable(); 1965 local_irq_disable();
1980 head = sd->output_queue; 1966 head = sd->output_queue;
@@ -1982,18 +1968,20 @@ static void net_tx_action(struct softirq_action *h)
1982 local_irq_enable(); 1968 local_irq_enable();
1983 1969
1984 while (head) { 1970 while (head) {
1985 struct netdev_queue *txq = head; 1971 struct Qdisc *q = head;
1986 struct net_device *dev = txq->dev; 1972 spinlock_t *root_lock;
1973
1987 head = head->next_sched; 1974 head = head->next_sched;
1988 1975
1989 smp_mb__before_clear_bit(); 1976 smp_mb__before_clear_bit();
1990 clear_bit(__LINK_STATE_SCHED, &dev->state); 1977 clear_bit(__QDISC_STATE_SCHED, &q->state);
1991 1978
1992 if (spin_trylock(&txq->lock)) { 1979 root_lock = qdisc_root_lock(q);
1993 qdisc_run(txq); 1980 if (spin_trylock(root_lock)) {
1994 spin_unlock(&txq->lock); 1981 qdisc_run(q);
1982 spin_unlock(root_lock);
1995 } else { 1983 } else {
1996 netif_schedule_queue(txq); 1984 __netif_schedule(q);
1997 } 1985 }
1998 } 1986 }
1999 } 1987 }
@@ -4459,7 +4447,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
4459 void *ocpu) 4447 void *ocpu)
4460{ 4448{
4461 struct sk_buff **list_skb; 4449 struct sk_buff **list_skb;
4462 struct netdev_queue **list_net; 4450 struct Qdisc **list_net;
4463 struct sk_buff *skb; 4451 struct sk_buff *skb;
4464 unsigned int cpu, oldcpu = (unsigned long)ocpu; 4452 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4465 struct softnet_data *sd, *oldsd; 4453 struct softnet_data *sd, *oldsd;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 19c244a00839..8e8c5becc348 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -294,11 +294,10 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
294{ 294{
295 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 295 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
296 timer); 296 timer);
297 struct netdev_queue *txq = wd->qdisc->dev_queue;
298 297
299 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 298 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
300 smp_wmb(); 299 smp_wmb();
301 netif_schedule_queue(txq); 300 __netif_schedule(wd->qdisc);
302 301
303 return HRTIMER_NORESTART; 302 return HRTIMER_NORESTART;
304} 303}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 37ae653db683..a3953bbe2d79 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
650 } 650 }
651 651
652 sch->flags &= ~TCQ_F_THROTTLED; 652 sch->flags &= ~TCQ_F_THROTTLED;
653 netif_schedule_queue(sch->dev_queue); 653 __netif_schedule(sch);
654 return HRTIMER_NORESTART; 654 return HRTIMER_NORESTART;
655} 655}
656 656
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 739a8711ab30..dd5c4e70abe4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -72,16 +72,14 @@ static inline int qdisc_qlen(struct Qdisc *q)
72 return q->q.qlen; 72 return q->q.qlen;
73} 73}
74 74
75static inline int dev_requeue_skb(struct sk_buff *skb, 75static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
76 struct netdev_queue *dev_queue,
77 struct Qdisc *q)
78{ 76{
79 if (unlikely(skb->next)) 77 if (unlikely(skb->next))
80 q->gso_skb = skb; 78 q->gso_skb = skb;
81 else 79 else
82 q->ops->requeue(skb, q); 80 q->ops->requeue(skb, q);
83 81
84 netif_schedule_queue(dev_queue); 82 __netif_schedule(q);
85 return 0; 83 return 0;
86} 84}
87 85
@@ -121,7 +119,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
121 * some time. 119 * some time.
122 */ 120 */
123 __get_cpu_var(netdev_rx_stat).cpu_collision++; 121 __get_cpu_var(netdev_rx_stat).cpu_collision++;
124 ret = dev_requeue_skb(skb, dev_queue, q); 122 ret = dev_requeue_skb(skb, q);
125 } 123 }
126 124
127 return ret; 125 return ret;
@@ -146,9 +144,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
146 * >0 - queue is not empty. 144 * >0 - queue is not empty.
147 * 145 *
148 */ 146 */
149static inline int qdisc_restart(struct netdev_queue *txq, 147static inline int qdisc_restart(struct Qdisc *q)
150 struct Qdisc *q)
151{ 148{
149 struct netdev_queue *txq;
152 int ret = NETDEV_TX_BUSY; 150 int ret = NETDEV_TX_BUSY;
153 struct net_device *dev; 151 struct net_device *dev;
154 spinlock_t *root_lock; 152 spinlock_t *root_lock;
@@ -163,7 +161,8 @@ static inline int qdisc_restart(struct netdev_queue *txq,
163 /* And release qdisc */ 161 /* And release qdisc */
164 spin_unlock(root_lock); 162 spin_unlock(root_lock);
165 163
166 dev = txq->dev; 164 dev = qdisc_dev(q);
165 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
167 166
168 HARD_TX_LOCK(dev, txq, smp_processor_id()); 167 HARD_TX_LOCK(dev, txq, smp_processor_id());
169 if (!netif_subqueue_stopped(dev, skb)) 168 if (!netif_subqueue_stopped(dev, skb))
@@ -189,29 +188,28 @@ static inline int qdisc_restart(struct netdev_queue *txq,
189 printk(KERN_WARNING "BUG %s code %d qlen %d\n", 188 printk(KERN_WARNING "BUG %s code %d qlen %d\n",
190 dev->name, ret, q->q.qlen); 189 dev->name, ret, q->q.qlen);
191 190
192 ret = dev_requeue_skb(skb, txq, q); 191 ret = dev_requeue_skb(skb, q);
193 break; 192 break;
194 } 193 }
195 194
195 if (ret && netif_tx_queue_stopped(txq))
196 ret = 0;
197
196 return ret; 198 return ret;
197} 199}
198 200
199void __qdisc_run(struct netdev_queue *txq) 201void __qdisc_run(struct Qdisc *q)
200{ 202{
201 unsigned long start_time = jiffies; 203 unsigned long start_time = jiffies;
202 struct Qdisc *q = txq->qdisc;
203
204 while (qdisc_restart(txq, q)) {
205 if (netif_tx_queue_stopped(txq))
206 break;
207 204
205 while (qdisc_restart(q)) {
208 /* 206 /*
209 * Postpone processing if 207 * Postpone processing if
210 * 1. another process needs the CPU; 208 * 1. another process needs the CPU;
211 * 2. we've been doing it for too long. 209 * 2. we've been doing it for too long.
212 */ 210 */
213 if (need_resched() || jiffies != start_time) { 211 if (need_resched() || jiffies != start_time) {
214 netif_schedule_queue(txq); 212 __netif_schedule(q);
215 break; 213 break;
216 } 214 }
217 } 215 }