aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorJohannes Berg <johannes@sipsolutions.net>2009-07-24 14:13:14 -0400
committerJohn W. Linville <linville@tuxdriver.com>2009-07-27 15:19:34 -0400
commit3995bd9332a51b626237d6671cfeb7235e6c1305 (patch)
tree6e707b7586c30684be54c24720c4c7f590e1691f /drivers/net
parent8bae1b2b13beb4cf4c0f119f97640503c2b74b0f (diff)
iwlwifi: fix TX queue race
I had a problem on 4965 hardware (well, probably other hardware too, but others don't survive my stress testing right now, unfortunately) where the driver was sending invalid commands to the device, but no such thing could be seen from the driver's point of view. I could reproduce this fairly easily by sending multiple TCP streams with iperf on different TIDs, though sometimes a single iperf stream was sufficient. It even happened with a single core, but I have forced preemption turned on. The culprit was a queue overrun, where we advanced the queue's write pointer over the read pointer. After careful analysis I've come to the conclusion that the cause is a race condition between iwlwifi and mac80211. mac80211, of course, checks whether the queue is stopped, before transmitting a frame. This effectively looks like this: lock(queues) if (stopped(queue)) { unlock(queues) return busy; } unlock(queues) ... <-- this place will be important there is some more code here drv_tx(frame) The driver, on the other hand, can stop and start queues, which does lock(queues) mark_running/stopped(queue) unlock(queues) [if marked running: wake up tasklet to send pending frames] Now, however, once the driver starts the queue, mac80211 can see that and end up at the marked place above, at which point for some reason the driver seems to stop the queue again (I don't understand that) and then we end up transmitting while the queue is actually full. Now, this shouldn't actually matter much, but for some reason I've seen it happen multiple times in a row and the queue actually overflows, at which point the queue bites itself in the tail and things go completely wrong. This patch fixes this by just dropping the packet should this have happened, and making the lock in iwlwifi cover everything so iwlwifi can't race against itself (dropping the lock there might make it more likely, but it did seem to happen without that too). Since we can't hold the lock across drv_tx() above, I see no way to fix this in mac80211, but I also don't understand why I haven't seen this before -- maybe I just never stress tested it this badly. With this patch, the device has survived many minutes of simultanously sending two iperf streams on different TIDs with combined throughput of about 60 Mbps. Signed-off-by: Johannes Berg <johannes@sipsolutions.net> Signed-off-by: Reinette Chatre <reinette.chatre@intel.com> Signed-off-by: John W. Linville <linville@tuxdriver.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-tx.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9bbeec9427f..5febb318636 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -720,8 +720,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
720 goto drop_unlock; 720 goto drop_unlock;
721 } 721 }
722 722
723 spin_unlock_irqrestore(&priv->lock, flags);
724
725 hdr_len = ieee80211_hdrlen(fc); 723 hdr_len = ieee80211_hdrlen(fc);
726 724
727 /* Find (or create) index into station table for destination station */ 725 /* Find (or create) index into station table for destination station */
@@ -729,7 +727,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
729 if (sta_id == IWL_INVALID_STATION) { 727 if (sta_id == IWL_INVALID_STATION) {
730 IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", 728 IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n",
731 hdr->addr1); 729 hdr->addr1);
732 goto drop; 730 goto drop_unlock;
733 } 731 }
734 732
735 IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); 733 IWL_DEBUG_TX(priv, "station Id %d\n", sta_id);
@@ -750,14 +748,17 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
750 txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; 748 txq_id = priv->stations[sta_id].tid[tid].agg.txq_id;
751 swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); 749 swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id);
752 } 750 }
753 priv->stations[sta_id].tid[tid].tfds_in_queue++;
754 } 751 }
755 752
756 txq = &priv->txq[txq_id]; 753 txq = &priv->txq[txq_id];
757 q = &txq->q; 754 q = &txq->q;
758 txq->swq_id = swq_id; 755 txq->swq_id = swq_id;
759 756
760 spin_lock_irqsave(&priv->lock, flags); 757 if (unlikely(iwl_queue_space(q) < q->high_mark))
758 goto drop_unlock;
759
760 if (ieee80211_is_data_qos(fc))
761 priv->stations[sta_id].tid[tid].tfds_in_queue++;
761 762
762 /* Set up driver data for this TFD */ 763 /* Set up driver data for this TFD */
763 memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); 764 memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info));
@@ -902,7 +903,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
902 903
903drop_unlock: 904drop_unlock:
904 spin_unlock_irqrestore(&priv->lock, flags); 905 spin_unlock_irqrestore(&priv->lock, flags);
905drop:
906 return -1; 906 return -1;
907} 907}
908EXPORT_SYMBOL(iwl_tx_skb); 908EXPORT_SYMBOL(iwl_tx_skb);