From 6f535763165331bb91277d7519b507fed22034e5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 11 Oct 2007 18:08:29 -0700
Subject: [NET]: Fix NAPI completion handling in some drivers.

In order for the list handling in net_rx_action() to be
correct, drivers must follow certain rules as stated by
this comment in net_rx_action():

		/* Drivers must not modify the NAPI state if they
		 * consume the entire weight.  In such cases this code
		 * still "owns" the NAPI instance and therefore can
		 * move the instance around on the list at-will.
		 */

A few drivers do not do this because they mix the budget checks
with reading hardware state, resulting in crashes like the one
reported by takano@axe-inc.co.jp.

BNX2 and TG3 are taken care of here, SKY2 fix is from Stephen
Hemminger.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 52 +++++++++++++++++++++++++++++++--------------------
 drivers/net/sky2.c | 17 +++++++++++++----
 drivers/net/tg3.c  | 55 ++++++++++++++++++++++++++++++++++++------------------
 3 files changed, 82 insertions(+), 42 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index bbfbdafb4ae5..d68accea380b 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -2633,15 +2633,11 @@ bnx2_has_work(struct bnx2 *bp)
 	return 0;
 }
 
-static int
-bnx2_poll(struct napi_struct *napi, int budget)
+static int bnx2_poll_work(struct bnx2 *bp, int work_done, int budget)
 {
-	struct bnx2 *bp = container_of(napi, struct bnx2, napi);
-	struct net_device *dev = bp->dev;
 	struct status_block *sblk = bp->status_blk;
 	u32 status_attn_bits = sblk->status_attn_bits;
 	u32 status_attn_bits_ack = sblk->status_attn_bits_ack;
-	int work_done = 0;
 
 	if ((status_attn_bits & STATUS_ATTN_EVENTS) !=
 	    (status_attn_bits_ack & STATUS_ATTN_EVENTS)) {
@@ -2660,27 +2656,43 @@ bnx2_poll(struct napi_struct *napi, int budget)
 		bnx2_tx_int(bp);
 
 	if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons)
-		work_done = bnx2_rx_int(bp, budget);
+		work_done += bnx2_rx_int(bp, budget - work_done);
 
-	bp->last_status_idx = bp->status_blk->status_idx;
-	rmb();
+	return work_done;
+}
+
+static int bnx2_poll(struct napi_struct *napi, int budget)
+{
+	struct bnx2 *bp = container_of(napi, struct bnx2, napi);
+	int work_done = 0;
+
+	while (1) {
+		work_done = bnx2_poll_work(bp, work_done, budget);
 
-	if (!bnx2_has_work(bp)) {
-		netif_rx_complete(dev, napi);
-		if (likely(bp->flags & USING_MSI_FLAG)) {
+		if (unlikely(work_done >= budget))
+			break;
+
+		if (likely(!bnx2_has_work(bp))) {
+			bp->last_status_idx = bp->status_blk->status_idx;
+			rmb();
+
+			netif_rx_complete(bp->dev, napi);
+			if (likely(bp->flags & USING_MSI_FLAG)) {
+				REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
+				       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+				       bp->last_status_idx);
+				return 0;
+			}
 			REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
 			       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+			       BNX2_PCICFG_INT_ACK_CMD_MASK_INT |
 			       bp->last_status_idx);
-			return 0;
-		}
-		REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
-		       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
-		       BNX2_PCICFG_INT_ACK_CMD_MASK_INT |
-		       bp->last_status_idx);
 
-		REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
-		       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
-		       bp->last_status_idx);
+			REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
+			       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+			       bp->last_status_idx);
+			break;
+		}
 	}
 
 	return work_done;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index fe0e7560d236..4e569fa0f96c 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -2606,7 +2606,7 @@ static int sky2_poll(struct napi_struct *napi, int work_limit)
 {
 	struct sky2_hw *hw = container_of(napi, struct sky2_hw, napi);
 	u32 status = sky2_read32(hw, B0_Y2_SP_EISR);
-	int work_done;
+	int work_done = 0;
 
 	if (unlikely(status & Y2_IS_ERROR))
 		sky2_err_intr(hw, status);
@@ -2617,10 +2617,16 @@ static int sky2_poll(struct napi_struct *napi, int work_limit)
 	if (status & Y2_IS_IRQ_PHY2)
 		sky2_phy_intr(hw, 1);
 
-	work_done = sky2_status_intr(hw, work_limit);
+	for(;;) {
+		work_done += sky2_status_intr(hw, work_limit);
+
+		if (work_done >= work_limit)
+			break;
+
+		/* More work? */
+		if (hw->st_idx != sky2_read16(hw, STAT_PUT_IDX))
+			continue;
 
-	/* More work? */
-	if (hw->st_idx == sky2_read16(hw, STAT_PUT_IDX)) {
 		/* Bug/Errata workaround?
 		 * Need to kick the TX irq moderation timer.
 		 */
@@ -2631,7 +2637,10 @@ static int sky2_poll(struct napi_struct *napi, int work_limit)
 
 		napi_complete(napi);
 		sky2_read32(hw, B0_Y2_SP_LISR);
+		break;
+
 	}
+
 	return work_done;
 }
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index d2b30fb0b318..a402b5c01896 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3555,12 +3555,9 @@ next_pkt_nopost:
 	return received;
 }
 
-static int tg3_poll(struct napi_struct *napi, int budget)
+static int tg3_poll_work(struct tg3 *tp, int work_done, int budget)
 {
-	struct tg3 *tp = container_of(napi, struct tg3, napi);
-	struct net_device *netdev = tp->dev;
 	struct tg3_hw_status *sblk = tp->hw_status;
-	int work_done = 0;
 
 	/* handle link change and other phy events */
 	if (!(tp->tg3_flags &
@@ -3578,11 +3575,8 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 	/* run TX completion thread */
 	if (sblk->idx[0].tx_consumer != tp->tx_cons) {
 		tg3_tx(tp);
-		if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING)) {
-			netif_rx_complete(netdev, napi);
-			schedule_work(&tp->reset_task);
+		if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING))
 			return 0;
-		}
 	}
 
 	/* run RX thread, within the bounds set by NAPI.
@@ -3590,21 +3584,46 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 	 * code synchronizes with tg3->napi.poll()
 	 */
 	if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
-		work_done = tg3_rx(tp, budget);
+		work_done += tg3_rx(tp, budget - work_done);
 
-	if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) {
-		tp->last_tag = sblk->status_tag;
-		rmb();
-	} else
-		sblk->status &= ~SD_STATUS_UPDATED;
+	return work_done;
+}
 
-	/* if no more work, tell net stack and NIC we're done */
-	if (!tg3_has_work(tp)) {
-		netif_rx_complete(netdev, napi);
-		tg3_restart_ints(tp);
+static int tg3_poll(struct napi_struct *napi, int budget)
+{
+	struct tg3 *tp = container_of(napi, struct tg3, napi);
+	int work_done = 0;
+
+	while (1) {
+		work_done = tg3_poll_work(tp, work_done, budget);
+
+		if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING))
+			goto tx_recovery;
+
+		if (unlikely(work_done >= budget))
+			break;
+
+		if (likely(!tg3_has_work(tp))) {
+			struct tg3_hw_status *sblk = tp->hw_status;
+
+			if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) {
+				tp->last_tag = sblk->status_tag;
+				rmb();
+			} else
+				sblk->status &= ~SD_STATUS_UPDATED;
+
+			netif_rx_complete(tp->dev, napi);
+			tg3_restart_ints(tp);
+			break;
+		}
 	}
 
 	return work_done;
+
+tx_recovery:
+	netif_rx_complete(tp->dev, napi);
+	schedule_work(&tp->reset_task);
+	return 0;
 }
 
 static void tg3_irq_quiesce(struct tg3 *tp)
-- 
cgit v1.2.2