aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMichael Chan <mchan@broadcom.com>2006-08-08 00:46:02 -0400
committerDavid S. Miller <davem@davemloft.net>2006-08-08 00:46:02 -0400
commit1b2a720506ccf7c30baaeda5d990c29b31e21726 (patch)
tree5990f56e364077c83563566ed64beae95b855a9f /drivers
parentbd37a088596ccdb2b2dd3299e25e333bca7a9a34 (diff)
[TG3]: Fix tx race condition
Fix a subtle race condition between tg3_start_xmit() and tg3_tx() discovered by Herbert Xu <herbert@gondor.apana.org.au>: CPU0 CPU1 tg3_start_xmit() if (tx_ring_full) { tx_lock tg3_tx() if (!netif_queue_stopped) netif_stop_queue() if (!tx_ring_full) update_tx_ring netif_wake_queue() tx_unlock } Even though tx_ring is updated before the if statement in tg3_tx() in program order, it can be re-ordered by the CPU as shown above. This scenario can cause the tx queue to be stopped forever if tg3_tx() has just freed up the entire tx_ring. The possibility of this happening should be very rare though. The following changes are made: 1. Add memory barrier to fix the above race condition. 2. Eliminate the private tx_lock altogether and rely solely on netif_tx_lock. This eliminates one spinlock in tg3_start_xmit() when the ring is full. 3. Because of 2, use netif_tx_lock in tg3_tx() before calling netif_wake_queue(). 4. Change TX_BUFFS_AVAIL to an inline function with a memory barrier. Herbert and David suggested using the memory barrier instead of volatile. 5. Check for the full wake queue condition before getting netif_tx_lock in tg3_tx(). This reduces the number of unnecessary spinlocks when the tx ring is full in a steady-state condition. 6. Update version to 3.65. Signed-off-by: Michael Chan <mchan@broadcom.com> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/tg3.c49
-rw-r--r--drivers/net/tg3.h8
2 files changed, 31 insertions, 26 deletions
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0afbed6753fa..eafabb253f08 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -68,8 +68,8 @@
68 68
69#define DRV_MODULE_NAME "tg3" 69#define DRV_MODULE_NAME "tg3"
70#define PFX DRV_MODULE_NAME ": " 70#define PFX DRV_MODULE_NAME ": "
71#define DRV_MODULE_VERSION "3.64" 71#define DRV_MODULE_VERSION "3.65"
72#define DRV_MODULE_RELDATE "July 31, 2006" 72#define DRV_MODULE_RELDATE "August 07, 2006"
73 73
74#define TG3_DEF_MAC_MODE 0 74#define TG3_DEF_MAC_MODE 0
75#define TG3_DEF_RX_MODE 0 75#define TG3_DEF_RX_MODE 0
@@ -123,9 +123,6 @@
123 TG3_RX_RCB_RING_SIZE(tp)) 123 TG3_RX_RCB_RING_SIZE(tp))
124#define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \ 124#define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \
125 TG3_TX_RING_SIZE) 125 TG3_TX_RING_SIZE)
126#define TX_BUFFS_AVAIL(TP) \
127 ((TP)->tx_pending - \
128 (((TP)->tx_prod - (TP)->tx_cons) & (TG3_TX_RING_SIZE - 1)))
129#define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1)) 126#define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1))
130 127
131#define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64) 128#define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64)
@@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *tp)
2987 spin_unlock(&tp->lock); 2984 spin_unlock(&tp->lock);
2988} 2985}
2989 2986
2987static inline u32 tg3_tx_avail(struct tg3 *tp)
2988{
2989 smp_mb();
2990 return (tp->tx_pending -
2991 ((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1)));
2992}
2993
2990/* Tigon3 never reports partial packet sends. So we do not 2994/* Tigon3 never reports partial packet sends. So we do not
2991 * need special logic to handle SKBs that have not had all 2995 * need special logic to handle SKBs that have not had all
2992 * of their frags sent yet, like SunGEM does. 2996 * of their frags sent yet, like SunGEM does.
@@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp)
3038 3042
3039 tp->tx_cons = sw_idx; 3043 tp->tx_cons = sw_idx;
3040 3044
3041 if (unlikely(netif_queue_stopped(tp->dev))) { 3045 /* Need to make the tx_cons update visible to tg3_start_xmit()
3042 spin_lock(&tp->tx_lock); 3046 * before checking for netif_queue_stopped(). Without the
3047 * memory barrier, there is a small possibility that tg3_start_xmit()
3048 * will miss it and cause the queue to be stopped forever.
3049 */
3050 smp_mb();
3051
3052 if (unlikely(netif_queue_stopped(tp->dev) &&
3053 (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))) {
3054 netif_tx_lock(tp->dev);
3043 if (netif_queue_stopped(tp->dev) && 3055 if (netif_queue_stopped(tp->dev) &&
3044 (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)) 3056 (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))
3045 netif_wake_queue(tp->dev); 3057 netif_wake_queue(tp->dev);
3046 spin_unlock(&tp->tx_lock); 3058 netif_tx_unlock(tp->dev);
3047 } 3059 }
3048} 3060}
3049 3061
@@ -3795,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3795 * interrupt. Furthermore, IRQ processing runs lockless so we have 3807 * interrupt. Furthermore, IRQ processing runs lockless so we have
3796 * no IRQ context deadlocks to worry about either. Rejoice! 3808 * no IRQ context deadlocks to worry about either. Rejoice!
3797 */ 3809 */
3798 if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { 3810 if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
3799 if (!netif_queue_stopped(dev)) { 3811 if (!netif_queue_stopped(dev)) {
3800 netif_stop_queue(dev); 3812 netif_stop_queue(dev);
3801 3813
@@ -3891,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3891 tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); 3903 tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
3892 3904
3893 tp->tx_prod = entry; 3905 tp->tx_prod = entry;
3894 if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { 3906 if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
3895 spin_lock(&tp->tx_lock);
3896 netif_stop_queue(dev); 3907 netif_stop_queue(dev);
3897 if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) 3908 if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
3898 netif_wake_queue(tp->dev); 3909 netif_wake_queue(tp->dev);
3899 spin_unlock(&tp->tx_lock);
3900 } 3910 }
3901 3911
3902out_unlock: 3912out_unlock:
@@ -3918,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb)
3918 struct sk_buff *segs, *nskb; 3928 struct sk_buff *segs, *nskb;
3919 3929
3920 /* Estimate the number of fragments in the worst case */ 3930 /* Estimate the number of fragments in the worst case */
3921 if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { 3931 if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
3922 netif_stop_queue(tp->dev); 3932 netif_stop_queue(tp->dev);
3923 return NETDEV_TX_BUSY; 3933 return NETDEV_TX_BUSY;
3924 } 3934 }
@@ -3958,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
3958 * interrupt. Furthermore, IRQ processing runs lockless so we have 3968 * interrupt. Furthermore, IRQ processing runs lockless so we have
3959 * no IRQ context deadlocks to worry about either. Rejoice! 3969 * no IRQ context deadlocks to worry about either. Rejoice!
3960 */ 3970 */
3961 if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { 3971 if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
3962 if (!netif_queue_stopped(dev)) { 3972 if (!netif_queue_stopped(dev)) {
3963 netif_stop_queue(dev); 3973 netif_stop_queue(dev);
3964 3974
@@ -4108,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
4108 tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); 4118 tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
4109 4119
4110 tp->tx_prod = entry; 4120 tp->tx_prod = entry;
4111 if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { 4121 if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
4112 spin_lock(&tp->tx_lock);
4113 netif_stop_queue(dev); 4122 netif_stop_queue(dev);
4114 if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) 4123 if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
4115 netif_wake_queue(tp->dev); 4124 netif_wake_queue(tp->dev);
4116 spin_unlock(&tp->tx_lock);
4117 } 4125 }
4118 4126
4119out_unlock: 4127out_unlock:
@@ -11472,7 +11480,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
11472 tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; 11480 tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
11473#endif 11481#endif
11474 spin_lock_init(&tp->lock); 11482 spin_lock_init(&tp->lock);
11475 spin_lock_init(&tp->tx_lock);
11476 spin_lock_init(&tp->indirect_lock); 11483 spin_lock_init(&tp->indirect_lock);
11477 INIT_WORK(&tp->reset_task, tg3_reset_task, tp); 11484 INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
11478 11485
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index ba2c98711c88..3ecf356cfb08 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2079,9 +2079,9 @@ struct tg3 {
2079 * lock: Held during reset, PHY access, timer, and when 2079 * lock: Held during reset, PHY access, timer, and when
2080 * updating tg3_flags and tg3_flags2. 2080 * updating tg3_flags and tg3_flags2.
2081 * 2081 *
2082 * tx_lock: Held during tg3_start_xmit and tg3_tx only 2082 * netif_tx_lock: Held during tg3_start_xmit. tg3_tx holds
2083 * when calling netif_[start|stop]_queue. 2083 * netif_tx_lock when it needs to call
2084 * tg3_start_xmit is protected by netif_tx_lock. 2084 * netif_wake_queue.
2085 * 2085 *
2086 * Both of these locks are to be held with BH safety. 2086 * Both of these locks are to be held with BH safety.
2087 * 2087 *
@@ -2118,8 +2118,6 @@ struct tg3 {
2118 u32 tx_cons; 2118 u32 tx_cons;
2119 u32 tx_pending; 2119 u32 tx_pending;
2120 2120
2121 spinlock_t tx_lock;
2122
2123 struct tg3_tx_buffer_desc *tx_ring; 2121 struct tg3_tx_buffer_desc *tx_ring;
2124 struct tx_ring_info *tx_buffers; 2122 struct tx_ring_info *tx_buffers;
2125 dma_addr_t tx_desc_mapping; 2123 dma_addr_t tx_desc_mapping;