aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMichael Buesch <mb@bu3sch.de>2009-02-19 17:45:43 -0500
committerJohn W. Linville <linville@tuxdriver.com>2009-02-27 14:52:48 -0500
commitbdceeb2dad5c8487ffeb4d0fa949686e4350ec7f (patch)
treea980d4cb770bf47a939206fc9fbb92deb3232938 /drivers
parent8eccb53f1b858c9bd0b745f839174725b76508ec (diff)
b43: Optimize DMA buffers
In the old days we used one slot per frame. But when we changed that to 2, we didn't raise the overall slot count. Which resulted in an effective division of two to the number of slots. Double the number of TX slots, so we have an effective hardware queue of 128 frames per QoS queue. Also optimize the TX header cache handling. We don't need a cached TX header for slots that will never carry an actual header. So we reduce the memory consumption of the cache by 50%. So as a net result we end up with more or less the same memory usage before and after this patch (except a few tiny meta structures), but have twice the number of TX slots available. Signed-off-by: Michael Buesch <mb@bu3sch.de> Signed-off-by: John W. Linville <linville@tuxdriver.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/wireless/b43/dma.c38
-rw-r--r--drivers/net/wireless/b43/dma.h4
2 files changed, 25 insertions, 17 deletions
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 38246046a42..189b2ec1bac 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -41,6 +41,12 @@
41#include <asm/div64.h> 41#include <asm/div64.h>
42 42
43 43
44/* Required number of TX DMA slots per TX frame.
45 * This currently is 2, because we put the header and the ieee80211 frame
46 * into separate slots. */
47#define TX_SLOTS_PER_FRAME 2
48
49
44/* 32bit DMA ops. */ 50/* 32bit DMA ops. */
45static 51static
46struct b43_dmadesc_generic *op32_idx2desc(struct b43_dmaring *ring, 52struct b43_dmadesc_generic *op32_idx2desc(struct b43_dmaring *ring,
@@ -574,12 +580,11 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
574 return -ENOMEM; 580 return -ENOMEM;
575 dmaaddr = map_descbuffer(ring, skb->data, 581 dmaaddr = map_descbuffer(ring, skb->data,
576 ring->rx_buffersize, 0); 582 ring->rx_buffersize, 0);
577 } 583 if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
578 584 b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
579 if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) { 585 dev_kfree_skb_any(skb);
580 b43err(ring->dev->wl, "RX DMA buffer allocation failed\n"); 586 return -EIO;
581 dev_kfree_skb_any(skb); 587 }
582 return -EIO;
583 } 588 }
584 589
585 meta->skb = skb; 590 meta->skb = skb;
@@ -837,7 +842,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
837#endif 842#endif
838 843
839 if (for_tx) { 844 if (for_tx) {
840 ring->txhdr_cache = kcalloc(ring->nr_slots, 845 ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
841 b43_txhdr_size(dev), 846 b43_txhdr_size(dev),
842 GFP_KERNEL); 847 GFP_KERNEL);
843 if (!ring->txhdr_cache) 848 if (!ring->txhdr_cache)
@@ -853,7 +858,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
853 b43_txhdr_size(dev), 1)) { 858 b43_txhdr_size(dev), 1)) {
854 /* ugh realloc */ 859 /* ugh realloc */
855 kfree(ring->txhdr_cache); 860 kfree(ring->txhdr_cache);
856 ring->txhdr_cache = kcalloc(ring->nr_slots, 861 ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
857 b43_txhdr_size(dev), 862 b43_txhdr_size(dev),
858 GFP_KERNEL | GFP_DMA); 863 GFP_KERNEL | GFP_DMA);
859 if (!ring->txhdr_cache) 864 if (!ring->txhdr_cache)
@@ -1144,7 +1149,10 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
1144 u16 cookie; 1149 u16 cookie;
1145 size_t hdrsize = b43_txhdr_size(ring->dev); 1150 size_t hdrsize = b43_txhdr_size(ring->dev);
1146 1151
1147#define SLOTS_PER_PACKET 2 1152 /* Important note: If the number of used DMA slots per TX frame
1153 * is changed here, the TX_SLOTS_PER_FRAME definition at the top of
1154 * the file has to be updated, too!
1155 */
1148 1156
1149 old_top_slot = ring->current_slot; 1157 old_top_slot = ring->current_slot;
1150 old_used_slots = ring->used_slots; 1158 old_used_slots = ring->used_slots;
@@ -1154,7 +1162,7 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
1154 desc = ops->idx2desc(ring, slot, &meta_hdr); 1162 desc = ops->idx2desc(ring, slot, &meta_hdr);
1155 memset(meta_hdr, 0, sizeof(*meta_hdr)); 1163 memset(meta_hdr, 0, sizeof(*meta_hdr));
1156 1164
1157 header = &(ring->txhdr_cache[slot * hdrsize]); 1165 header = &(ring->txhdr_cache[(slot / TX_SLOTS_PER_FRAME) * hdrsize]);
1158 cookie = generate_cookie(ring, slot); 1166 cookie = generate_cookie(ring, slot);
1159 err = b43_generate_txhdr(ring->dev, header, 1167 err = b43_generate_txhdr(ring->dev, header,
1160 skb->data, skb->len, info, cookie); 1168 skb->data, skb->len, info, cookie);
@@ -1308,7 +1316,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
1308 * That would be a mac80211 bug. */ 1316 * That would be a mac80211 bug. */
1309 B43_WARN_ON(ring->stopped); 1317 B43_WARN_ON(ring->stopped);
1310 1318
1311 if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) { 1319 if (unlikely(free_slots(ring) < TX_SLOTS_PER_FRAME)) {
1312 b43warn(dev->wl, "DMA queue overflow\n"); 1320 b43warn(dev->wl, "DMA queue overflow\n");
1313 err = -ENOSPC; 1321 err = -ENOSPC;
1314 goto out_unlock; 1322 goto out_unlock;
@@ -1332,7 +1340,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
1332 goto out_unlock; 1340 goto out_unlock;
1333 } 1341 }
1334 ring->nr_tx_packets++; 1342 ring->nr_tx_packets++;
1335 if ((free_slots(ring) < SLOTS_PER_PACKET) || 1343 if ((free_slots(ring) < TX_SLOTS_PER_FRAME) ||
1336 should_inject_overflow(ring)) { 1344 should_inject_overflow(ring)) {
1337 /* This TX ring is full. */ 1345 /* This TX ring is full. */
1338 ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); 1346 ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
@@ -1416,7 +1424,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
1416 } 1424 }
1417 dev->stats.last_tx = jiffies; 1425 dev->stats.last_tx = jiffies;
1418 if (ring->stopped) { 1426 if (ring->stopped) {
1419 B43_WARN_ON(free_slots(ring) < SLOTS_PER_PACKET); 1427 B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME);
1420 ieee80211_wake_queue(dev->wl->hw, ring->queue_prio); 1428 ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
1421 ring->stopped = 0; 1429 ring->stopped = 0;
1422 if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { 1430 if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
@@ -1439,8 +1447,8 @@ void b43_dma_get_tx_stats(struct b43_wldev *dev,
1439 ring = select_ring_by_priority(dev, i); 1447 ring = select_ring_by_priority(dev, i);
1440 1448
1441 spin_lock_irqsave(&ring->lock, flags); 1449 spin_lock_irqsave(&ring->lock, flags);
1442 stats[i].len = ring->used_slots / SLOTS_PER_PACKET; 1450 stats[i].len = ring->used_slots / TX_SLOTS_PER_FRAME;
1443 stats[i].limit = ring->nr_slots / SLOTS_PER_PACKET; 1451 stats[i].limit = ring->nr_slots / TX_SLOTS_PER_FRAME;
1444 stats[i].count = ring->nr_tx_packets; 1452 stats[i].count = ring->nr_tx_packets;
1445 spin_unlock_irqrestore(&ring->lock, flags); 1453 spin_unlock_irqrestore(&ring->lock, flags);
1446 } 1454 }
diff --git a/drivers/net/wireless/b43/dma.h b/drivers/net/wireless/b43/dma.h
index 4ec24e8f4fd..05dde646d83 100644
--- a/drivers/net/wireless/b43/dma.h
+++ b/drivers/net/wireless/b43/dma.h
@@ -162,7 +162,7 @@ struct b43_dmadesc_generic {
162#define B43_DMA0_RX_FRAMEOFFSET 30 162#define B43_DMA0_RX_FRAMEOFFSET 30
163 163
164/* DMA engine tuning knobs */ 164/* DMA engine tuning knobs */
165#define B43_TXRING_SLOTS 128 165#define B43_TXRING_SLOTS 256
166#define B43_RXRING_SLOTS 64 166#define B43_RXRING_SLOTS 64
167#define B43_DMA0_RX_BUFFERSIZE IEEE80211_MAX_FRAME_LEN 167#define B43_DMA0_RX_BUFFERSIZE IEEE80211_MAX_FRAME_LEN
168 168
@@ -212,7 +212,7 @@ struct b43_dmaring {
212 void *descbase; 212 void *descbase;
213 /* Meta data about all descriptors. */ 213 /* Meta data about all descriptors. */
214 struct b43_dmadesc_meta *meta; 214 struct b43_dmadesc_meta *meta;
215 /* Cache of TX headers for each slot. 215 /* Cache of TX headers for each TX frame.
216 * This is to avoid an allocation on each TX. 216 * This is to avoid an allocation on each TX.
217 * This is NULL for an RX ring. 217 * This is NULL for an RX ring.
218 */ 218 */