[NET]: qdisc_restart - readability changes plus one bug fix.

New changes : - Incorporated Peter Waskiewicz's comments. - Re-added back one warning message (on driver returning wrong value). Previous changes : - Converted to use switch/case code which looks neater. - "if (ret == NETDEV_TX_LOCKED && lockless)" is buggy, and the lockless check should be removed, since driver will return NETDEV_TX_LOCKED only if lockless is true and driver has to do the locking. In the original code as well as the latest code, this code can result in a bug where if LLTX is not set for a driver (lockless == 0) but the driver is written wrongly to do a trylock (despite LLTX being set), the driver returns LOCKED. But since lockless is zero, the packet is requeue'd instead of calling collision code which will issue warning and free up the skb. Instead this skb will be retried with this driver next time, and the same result will ensue. Removing this check will catch these driver bugs instead of hiding the problem. I am keeping this change to readability section since : a. it is confusing to check two things as it is; and b. it is difficult to keep this check in the changed 'switch' code. - Changed some names, like try_get_tx_pkt to dev_dequeue_skb (as that is the work being done and easier to understand) and do_dev_requeue to dev_requeue_skb, merged handle_dev_cpu_collision and tx_islocked to dev_handle_collision (handle_dev_cpu_collision is a small routine with only one caller, so there is no need to have two separate routines which also results in getting rid of two macros, etc. - Removed an XXX comment as it should never fail (I suspect this was related to batch skb WIP, Jamal ?). Converted some functions to original coding style of having the return values and the function name on same line, eg prio2list. Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Krishna Kumar <krkumar2@in.ibm.com> 2007-06-24 22:56:09 -0400
committer: David S. Miller <davem@sunset.davemloft.net> 2007-07-11 01:15:35 -0400
commit: 6c1361a6f285bf3df4b502651c0dd38d0eedc044 (patch)
tree: b0a89bc11d04bd32451b6b0637da6396fa1c8549 /net/sched
parent: 49d66a70cf9fd94057aacd6055334299ab3a5eac (diff)
1 files changed, 86 insertions, 81 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9461e8ae0529..983c32caf713 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,9 +34,6 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
-#define SCHED_TX_DROP -2
-#define SCHED_TX_QUEUE -3
 /* Main transmission queue. */
 /* Modifications to data participating in scheduling must be protected with
@@ -68,41 +65,24 @@ static inline int qdisc_qlen(struct Qdisc *q)
        return q->q.qlen;
 }
-static inline int handle_dev_cpu_collision(struct net_device *dev)
+static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
-{
+                                  struct Qdisc *q)
-        if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
-                if (net_ratelimit())
-                        printk(KERN_WARNING
-                               "Dead loop on netdevice %s, fix it urgently!\n",
-                               dev->name);
-                return SCHED_TX_DROP;
-        }
-        __get_cpu_var(netdev_rx_stat).cpu_collision++;
-        return SCHED_TX_QUEUE;
-}
-static inline int
-do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
 {
        if (unlikely(skb->next))
                dev->gso_skb = skb;
        else
                q->ops->requeue(skb, q);
-        /* XXX: Could netif_schedule fail? Or is the fact we are
-         * requeueing imply the hardware path is closed
-         * and even if we fail, some interupt will wake us
-         */
        netif_schedule(dev);
        return 0;
 }
-static inline struct sk_buff *
+static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
-try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+                                              struct Qdisc *q)
 {
-        struct sk_buff *skb = dev->gso_skb;
+        struct sk_buff *skb;
-        if (skb)
+        if ((skb = dev->gso_skb))
                dev->gso_skb = NULL;
        else
                skb = q->dequeue(q);
@@ -110,92 +90,117 @@ try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
        return skb;
 }
-static inline int
+static inline int handle_dev_cpu_collision(struct sk_buff *skb,
-tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+                                           struct net_device *dev,
+                                           struct Qdisc *q)
 {
-        int ret = handle_dev_cpu_collision(dev);
+        int ret;
-        if (ret == SCHED_TX_DROP) {
+        if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+                /*
+                 * Same CPU holding the lock. It may be a transient
+                 * configuration error, when hard_start_xmit() recurses. We
+                 * detect it by checking xmit owner and drop the packet when
+                 * deadloop is detected. Return OK to try the next skb.
+                 */
                kfree_skb(skb);
-                return qdisc_qlen(q);
+                if (net_ratelimit())
+                        printk(KERN_WARNING "Dead loop on netdevice %s, "
+                               "fix it urgently!\n", dev->name);
+                ret = qdisc_qlen(q);
+        } else {
+                /*
+                 * Another cpu is holding lock, requeue & delay xmits for
+                 * some time.
+                 */
+                __get_cpu_var(netdev_rx_stat).cpu_collision++;
+                ret = dev_requeue_skb(skb, dev, q);
        }
-        return do_dev_requeue(skb, dev, q);
+        return ret;
 }
 /*
-   NOTE: Called under dev->queue_lock with locally disabled BH.
+ * NOTE: Called under dev->queue_lock with locally disabled BH.
+ *
-   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+ * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
-   can enter this region at a time.
+ * device at a time. dev->queue_lock serializes queue accesses for
+ * this device AND dev->qdisc pointer itself.
-   dev->queue_lock serializes queue accesses for this device
+ *
-   AND dev->qdisc pointer itself.
+ *  netif_tx_lock serializes accesses to device driver.
+ *
-   netif_tx_lock serializes accesses to device driver.
+ *  dev->queue_lock and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
-   dev->queue_lock and netif_tx_lock are mutually exclusive,
+ *
-   if one is grabbed, another must be free.
+ * Note, that this procedure can be called by a watchdog timer
+ *
-   Multiple CPUs may contend for the two locks.
+ * Returns to the caller:
+ *                              0  - queue is empty or throttled.
-   Note, that this procedure can be called by a watchdog timer
+ *                              >0 - queue is not empty.
+ *
-   Returns to the caller:
+ */
-   Returns:  0  - queue is empty or throttled.
-            >0  - queue is not empty.
-*/
 static inline int qdisc_restart(struct net_device *dev)
 {
        struct Qdisc *q = dev->qdisc;
-        unsigned lockless = (dev->features & NETIF_F_LLTX);
        struct sk_buff *skb;
+        unsigned lockless;
        int ret;
-        skb = try_get_tx_pkt(dev, q);
+        /* Dequeue packet */
-        if (skb == NULL)
+        if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
                return 0;
-        /* we have a packet to send */
+        /*
-        if (!lockless) {
+         * When the driver has LLTX set, it does its own locking in
-                if (!netif_tx_trylock(dev))
+         * start_xmit. These checks are worth it because even uncongested
-                        return tx_islocked(skb, dev, q);
+         * locks can be quite expensive. The driver can do a trylock, as
+         * is being done here; in case of lock contention it should return
+         * NETDEV_TX_LOCKED and the packet will be requeued.
+         */
+        lockless = (dev->features & NETIF_F_LLTX);
+        if (!lockless && !netif_tx_trylock(dev)) {
+                /* Another CPU grabbed the driver tx lock */
+                return handle_dev_cpu_collision(skb, dev, q);
        }
-        /* all clear .. */
+        /* And release queue */
        spin_unlock(&dev->queue_lock);
        ret = NETDEV_TX_BUSY;
        if (!netif_queue_stopped(dev))
-                /* churn baby churn .. */
                ret = dev_hard_start_xmit(skb, dev);
        if (!lockless)
                netif_tx_unlock(dev);
        spin_lock(&dev->queue_lock);
-        /* we need to refresh q because it may be invalid since
-         * we dropped  dev->queue_lock earlier ...
-         * So dont try to be clever grasshopper
-         */
        q = dev->qdisc;
-        /* most likely result, packet went ok */
-        if (ret == NETDEV_TX_OK)
-                return qdisc_qlen(q);
-        /* only for lockless drivers .. */
-        if (ret == NETDEV_TX_LOCKED && lockless)
-                return tx_islocked(skb, dev, q);
-        if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+        switch (ret) {
-                printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
+        case NETDEV_TX_OK:
+                /* Driver sent out skb successfully */
+                ret = qdisc_qlen(q);
+                break;
+        case NETDEV_TX_LOCKED:
+                /* Driver try lock failed */
+                ret = handle_dev_cpu_collision(skb, dev, q);
+                break;
+        default:
+                /* Driver returned NETDEV_TX_BUSY - requeue skb */
+                if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+                        printk(KERN_WARNING "BUG %s code %d qlen %d\n",
+                               dev->name, ret, q->q.qlen);
+                ret = dev_requeue_skb(skb, dev, q);
+                break;
+        }
-        return do_dev_requeue(skb, dev, q);
+        return ret;
 }
 void __qdisc_run(struct net_device *dev)
 {
        do {
author	Krishna Kumar <krkumar2@in.ibm.com>	2007-06-24 22:56:09 -0400
committer	David S. Miller <davem@sunset.davemloft.net>	2007-07-11 01:15:35 -0400
commit	6c1361a6f285bf3df4b502651c0dd38d0eedc044 (patch)
tree	b0a89bc11d04bd32451b6b0637da6396fa1c8549 /net/sched
parent	49d66a70cf9fd94057aacd6055334299ab3a5eac (diff)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9461e8ae0529..983c32caf713 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c
@@ -34,9 +34,6 @@
34	#include <net/sock.h>	34	#include <net/sock.h>
35	#include <net/pkt_sched.h>	35	#include <net/pkt_sched.h>
36		36
37	#define SCHED_TX_DROP -2
38	#define SCHED_TX_QUEUE -3
39
40	/* Main transmission queue. */	37	/* Main transmission queue. */
41		38
42	/* Modifications to data participating in scheduling must be protected with	39	/* Modifications to data participating in scheduling must be protected with
@@ -68,41 +65,24 @@ static inline int qdisc_qlen(struct Qdisc *q)
68	return q->q.qlen;	65	return q->q.qlen;
69	}	66	}
70		67
71	static inline int handle_dev_cpu_collision(struct net_device *dev)	68	static inline int dev_requeue_skb(struct sk_buff skb, struct net_device dev,
72	{	69	struct Qdisc *q)
73	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
74	if (net_ratelimit())
75	printk(KERN_WARNING
76	"Dead loop on netdevice %s, fix it urgently!\n",
77	dev->name);
78	return SCHED_TX_DROP;
79	}
80	__get_cpu_var(netdev_rx_stat).cpu_collision++;
81	return SCHED_TX_QUEUE;
82	}
83
84	static inline int
85	do_dev_requeue(struct sk_buff skb, struct net_device dev, struct Qdisc *q)
86	{	70	{
87
88	if (unlikely(skb->next))	71	if (unlikely(skb->next))
89	dev->gso_skb = skb;	72	dev->gso_skb = skb;
90	else	73	else
91	q->ops->requeue(skb, q);	74	q->ops->requeue(skb, q);
92	/* XXX: Could netif_schedule fail? Or is the fact we are	75
93	* requeueing imply the hardware path is closed
94	* and even if we fail, some interupt will wake us
95	*/
96	netif_schedule(dev);	76	netif_schedule(dev);
97	return 0;	77	return 0;
98	}	78	}
99		79
100	static inline struct sk_buff *	80	static inline struct sk_buff dev_dequeue_skb(struct net_device dev,
101	try_get_tx_pkt(struct net_device dev, struct Qdisc q)	81	struct Qdisc *q)
102	{	82	{
103	struct sk_buff *skb = dev->gso_skb;	83	struct sk_buff *skb;
104		84
105	if (skb)	85	if ((skb = dev->gso_skb))
106	dev->gso_skb = NULL;	86	dev->gso_skb = NULL;
107	else	87	else
108	skb = q->dequeue(q);	88	skb = q->dequeue(q);
@@ -110,92 +90,117 @@ try_get_tx_pkt(struct net_device dev, struct Qdisc q)
110	return skb;	90	return skb;
111	}	91	}
112		92
113	static inline int	93	static inline int handle_dev_cpu_collision(struct sk_buff *skb,
114	tx_islocked(struct sk_buff skb, struct net_device dev, struct Qdisc *q)	94	struct net_device *dev,
		95	struct Qdisc *q)
115	{	96	{
116	int ret = handle_dev_cpu_collision(dev);	97	int ret;
117		98
118	if (ret == SCHED_TX_DROP) {	99	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
		100	/*
		101	* Same CPU holding the lock. It may be a transient
		102	* configuration error, when hard_start_xmit() recurses. We
		103	* detect it by checking xmit owner and drop the packet when
		104	* deadloop is detected. Return OK to try the next skb.
		105	*/
119	kfree_skb(skb);	106	kfree_skb(skb);
120	return qdisc_qlen(q);	107	if (net_ratelimit())
		108	printk(KERN_WARNING "Dead loop on netdevice %s, "
		109	"fix it urgently!\n", dev->name);
		110	ret = qdisc_qlen(q);
		111	} else {
		112	/*
		113	* Another cpu is holding lock, requeue & delay xmits for
		114	* some time.
		115	*/
		116	__get_cpu_var(netdev_rx_stat).cpu_collision++;
		117	ret = dev_requeue_skb(skb, dev, q);
121	}	118	}
122		119
123	return do_dev_requeue(skb, dev, q);	120	return ret;
124	}	121	}
125		122
126
127	/*	123	/*
128	NOTE: Called under dev->queue_lock with locally disabled BH.	124	* NOTE: Called under dev->queue_lock with locally disabled BH.
129		125	*
130	__LINK_STATE_QDISC_RUNNING guarantees only one CPU	126	* __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
131	can enter this region at a time.	127	* device at a time. dev->queue_lock serializes queue accesses for
132		128	* this device AND dev->qdisc pointer itself.
133	dev->queue_lock serializes queue accesses for this device	129	*
134	AND dev->qdisc pointer itself.	130	* netif_tx_lock serializes accesses to device driver.
135		131	*
136	netif_tx_lock serializes accesses to device driver.	132	* dev->queue_lock and netif_tx_lock are mutually exclusive,
137		133	* if one is grabbed, another must be free.
138	dev->queue_lock and netif_tx_lock are mutually exclusive,	134	*
139	if one is grabbed, another must be free.	135	* Note, that this procedure can be called by a watchdog timer
140		136	*
141	Multiple CPUs may contend for the two locks.	137	* Returns to the caller:
142		138	* 0 - queue is empty or throttled.
143	Note, that this procedure can be called by a watchdog timer	139	* >0 - queue is not empty.
144		140	*
145	Returns to the caller:	141	*/
146	Returns: 0 - queue is empty or throttled.
147	>0 - queue is not empty.
148
149	*/
150
151	static inline int qdisc_restart(struct net_device *dev)	142	static inline int qdisc_restart(struct net_device *dev)
152	{	143	{
153	struct Qdisc *q = dev->qdisc;	144	struct Qdisc *q = dev->qdisc;
154	unsigned lockless = (dev->features & NETIF_F_LLTX);
155	struct sk_buff *skb;	145	struct sk_buff *skb;
		146	unsigned lockless;
156	int ret;	147	int ret;
157		148
158	skb = try_get_tx_pkt(dev, q);	149	/* Dequeue packet */
159	if (skb == NULL)	150	if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
160	return 0;	151	return 0;
161		152
162	/* we have a packet to send */	153	/*
163	if (!lockless) {	154	* When the driver has LLTX set, it does its own locking in
164	if (!netif_tx_trylock(dev))	155	* start_xmit. These checks are worth it because even uncongested
165	return tx_islocked(skb, dev, q);	156	* locks can be quite expensive. The driver can do a trylock, as
		157	* is being done here; in case of lock contention it should return
		158	* NETDEV_TX_LOCKED and the packet will be requeued.
		159	*/
		160	lockless = (dev->features & NETIF_F_LLTX);
		161
		162	if (!lockless && !netif_tx_trylock(dev)) {
		163	/* Another CPU grabbed the driver tx lock */
		164	return handle_dev_cpu_collision(skb, dev, q);
166	}	165	}
167	/* all clear .. */	166
		167	/* And release queue */
168	spin_unlock(&dev->queue_lock);	168	spin_unlock(&dev->queue_lock);
169		169
170	ret = NETDEV_TX_BUSY;	170	ret = NETDEV_TX_BUSY;
171	if (!netif_queue_stopped(dev))	171	if (!netif_queue_stopped(dev))
172	/* churn baby churn .. */
173	ret = dev_hard_start_xmit(skb, dev);	172	ret = dev_hard_start_xmit(skb, dev);
174		173
175	if (!lockless)	174	if (!lockless)
176	netif_tx_unlock(dev);	175	netif_tx_unlock(dev);
177		176
178	spin_lock(&dev->queue_lock);	177	spin_lock(&dev->queue_lock);
179
180	/* we need to refresh q because it may be invalid since
181	* we dropped dev->queue_lock earlier ...
182	* So dont try to be clever grasshopper
183	*/
184	q = dev->qdisc;	178	q = dev->qdisc;
185	/* most likely result, packet went ok */
186	if (ret == NETDEV_TX_OK)
187	return qdisc_qlen(q);
188	/* only for lockless drivers .. */
189	if (ret == NETDEV_TX_LOCKED && lockless)
190	return tx_islocked(skb, dev, q);
191		179
192	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))	180	switch (ret) {
193	printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);	181	case NETDEV_TX_OK:
		182	/* Driver sent out skb successfully */
		183	ret = qdisc_qlen(q);
		184	break;
		185
		186	case NETDEV_TX_LOCKED:
		187	/* Driver try lock failed */
		188	ret = handle_dev_cpu_collision(skb, dev, q);
		189	break;
		190
		191	default:
		192	/* Driver returned NETDEV_TX_BUSY - requeue skb */
		193	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
		194	printk(KERN_WARNING "BUG %s code %d qlen %d\n",
		195	dev->name, ret, q->q.qlen);
		196
		197	ret = dev_requeue_skb(skb, dev, q);
		198	break;
		199	}
194		200
195	return do_dev_requeue(skb, dev, q);	201	return ret;
196	}	202	}
197		203
198
199	void __qdisc_run(struct net_device *dev)	204	void __qdisc_run(struct net_device *dev)
200	{	205	{
201	do {	206	do {