aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2012-03-19 12:12:06 -0400
committerWey-Yi Guy <wey-yi.w.guy@intel.com>2012-04-18 10:32:28 -0400
commitbf8440e6a6f5fabf7843dbfecb1745e49182fa1c (patch)
tree7c7e8168f65d4af3bf89617d7727264e81d4fc83 /drivers
parent682e5f64de0ab5be3fb2de9f66a1da87de48ec09 (diff)
iwlwifi: improve TX cache footprint
Having cmd[], meta[] and skbs[] as separate arrays in the TX queue structure is cache inefficient as we need the data for a given entry together. To improve this, create an array with these three members (allocate meta as part of that struct) so we have the data we need together located together improving cache footprint. The downside is that we need to allocate a lot of memory in one chunk, about 10KiB (on 64-bit) which isn't very efficient. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h29
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c31
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans-pcie.c65
4 files changed, 52 insertions, 75 deletions
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
index 7caa875cfa36..c7f8f407bc99 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
@@ -179,30 +179,33 @@ struct iwl_queue {
179 * space less than this */ 179 * space less than this */
180}; 180};
181 181
182#define TFD_TX_CMD_SLOTS 256
183#define TFD_CMD_SLOTS 32
184
185struct iwl_pcie_tx_queue_entry {
186 struct iwl_device_cmd *cmd;
187 struct sk_buff *skb;
188 struct iwl_cmd_meta meta;
189};
190
182/** 191/**
183 * struct iwl_tx_queue - Tx Queue for DMA 192 * struct iwl_tx_queue - Tx Queue for DMA
184 * @q: generic Rx/Tx queue descriptor 193 * @q: generic Rx/Tx queue descriptor
185 * @bd: base of circular buffer of TFDs 194 * @tfds: transmit frame descriptors (DMA memory)
186 * @cmd: array of command/TX buffer pointers 195 * @entries: transmit entries (driver state)
187 * @meta: array of meta data for each command/tx buffer 196 * @lock: queue lock
188 * @dma_addr_cmd: physical address of cmd/tx buffer array 197 * @stuck_timer: timer that fires if queue gets stuck
189 * @txb: array of per-TFD driver data 198 * @trans_pcie: pointer back to transport (for timer)
190 * lock: queue lock
191 * @time_stamp: time (in jiffies) of last read_ptr change
192 * @need_update: indicates need to update read/write index 199 * @need_update: indicates need to update read/write index
200 * @active: stores if queue is active
193 * 201 *
194 * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame 202 * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
195 * descriptors) and required locking structures. 203 * descriptors) and required locking structures.
196 */ 204 */
197#define TFD_TX_CMD_SLOTS 256
198#define TFD_CMD_SLOTS 32
199
200struct iwl_tx_queue { 205struct iwl_tx_queue {
201 struct iwl_queue q; 206 struct iwl_queue q;
202 struct iwl_tfd *tfds; 207 struct iwl_tfd *tfds;
203 struct iwl_device_cmd **cmd; 208 struct iwl_pcie_tx_queue_entry *entries;
204 struct iwl_cmd_meta *meta;
205 struct sk_buff **skbs;
206 spinlock_t lock; 209 spinlock_t lock;
207 struct timer_list stuck_timer; 210 struct timer_list stuck_timer;
208 struct iwl_trans_pcie *trans_pcie; 211 struct iwl_trans_pcie *trans_pcie;
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
index 390490bb7f10..47c1f0a572a9 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
@@ -425,7 +425,7 @@ static void iwl_rx_handle_rxbuf(struct iwl_trans *trans,
425 cmd_index = get_cmd_index(&txq->q, index); 425 cmd_index = get_cmd_index(&txq->q, index);
426 426
427 if (reclaim) 427 if (reclaim)
428 cmd = txq->cmd[cmd_index]; 428 cmd = txq->entries[cmd_index].cmd;
429 else 429 else
430 cmd = NULL; 430 cmd = NULL;
431 431
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
index bb0a31418521..1b2aed62c7e3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
@@ -58,7 +58,7 @@ void iwl_trans_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
58 u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; 58 u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
59 __le16 bc_ent; 59 __le16 bc_ent;
60 struct iwl_tx_cmd *tx_cmd = 60 struct iwl_tx_cmd *tx_cmd =
61 (struct iwl_tx_cmd *) txq->cmd[txq->q.write_ptr]->payload; 61 (void *) txq->entries[txq->q.write_ptr].cmd->payload;
62 62
63 scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; 63 scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
64 64
@@ -221,13 +221,14 @@ void iwlagn_txq_free_tfd(struct iwl_trans *trans, struct iwl_tx_queue *txq,
221 221
222 lockdep_assert_held(&txq->lock); 222 lockdep_assert_held(&txq->lock);
223 223
224 iwlagn_unmap_tfd(trans, &txq->meta[index], &tfd_tmp[index], dma_dir); 224 iwlagn_unmap_tfd(trans, &txq->entries[index].meta,
225 &tfd_tmp[index], dma_dir);
225 226
226 /* free SKB */ 227 /* free SKB */
227 if (txq->skbs) { 228 if (txq->entries) {
228 struct sk_buff *skb; 229 struct sk_buff *skb;
229 230
230 skb = txq->skbs[index]; 231 skb = txq->entries[index].skb;
231 232
232 /* Can be called from irqs-disabled context 233 /* Can be called from irqs-disabled context
233 * If skb is not NULL, it means that the whole queue is being 234 * If skb is not NULL, it means that the whole queue is being
@@ -235,7 +236,7 @@ void iwlagn_txq_free_tfd(struct iwl_trans *trans, struct iwl_tx_queue *txq,
235 */ 236 */
236 if (skb) { 237 if (skb) {
237 iwl_op_mode_free_skb(trans->op_mode, skb); 238 iwl_op_mode_free_skb(trans->op_mode, skb);
238 txq->skbs[index] = NULL; 239 txq->entries[index].skb = NULL;
239 } 240 }
240 } 241 }
241} 242}
@@ -358,7 +359,7 @@ static void iwlagn_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
358 u8 sta_id = 0; 359 u8 sta_id = 0;
359 __le16 bc_ent; 360 __le16 bc_ent;
360 struct iwl_tx_cmd *tx_cmd = 361 struct iwl_tx_cmd *tx_cmd =
361 (struct iwl_tx_cmd *) txq->cmd[txq->q.read_ptr]->payload; 362 (void *)txq->entries[txq->q.read_ptr].cmd->payload;
362 363
363 WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX); 364 WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
364 365
@@ -578,8 +579,8 @@ static int iwl_enqueue_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
578 } 579 }
579 580
580 idx = get_cmd_index(q, q->write_ptr); 581 idx = get_cmd_index(q, q->write_ptr);
581 out_cmd = txq->cmd[idx]; 582 out_cmd = txq->entries[idx].cmd;
582 out_meta = &txq->meta[idx]; 583 out_meta = &txq->entries[idx].meta;
583 584
584 memset(out_meta, 0, sizeof(*out_meta)); /* re-initialize to NULL */ 585 memset(out_meta, 0, sizeof(*out_meta)); /* re-initialize to NULL */
585 if (cmd->flags & CMD_WANT_SKB) 586 if (cmd->flags & CMD_WANT_SKB)
@@ -772,8 +773,8 @@ void iwl_tx_cmd_complete(struct iwl_trans *trans, struct iwl_rx_cmd_buffer *rxb,
772 spin_lock(&txq->lock); 773 spin_lock(&txq->lock);
773 774
774 cmd_index = get_cmd_index(&txq->q, index); 775 cmd_index = get_cmd_index(&txq->q, index);
775 cmd = txq->cmd[cmd_index]; 776 cmd = txq->entries[cmd_index].cmd;
776 meta = &txq->meta[cmd_index]; 777 meta = &txq->entries[cmd_index].meta;
777 778
778 iwlagn_unmap_tfd(trans, meta, &txq->tfds[index], 779 iwlagn_unmap_tfd(trans, meta, &txq->tfds[index],
779 DMA_BIDIRECTIONAL); 780 DMA_BIDIRECTIONAL);
@@ -905,8 +906,8 @@ cancel:
905 * in later, it will possibly set an invalid 906 * in later, it will possibly set an invalid
906 * address (cmd->meta.source). 907 * address (cmd->meta.source).
907 */ 908 */
908 trans_pcie->txq[trans_pcie->cmd_queue].meta[cmd_idx].flags &= 909 trans_pcie->txq[trans_pcie->cmd_queue].
909 ~CMD_WANT_SKB; 910 entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
910 } 911 }
911 912
912 if (cmd->resp_pkt) { 913 if (cmd->resp_pkt) {
@@ -961,12 +962,12 @@ int iwl_tx_queue_reclaim(struct iwl_trans *trans, int txq_id, int index,
961 q->read_ptr != index; 962 q->read_ptr != index;
962 q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) { 963 q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
963 964
964 if (WARN_ON_ONCE(txq->skbs[txq->q.read_ptr] == NULL)) 965 if (WARN_ON_ONCE(txq->entries[txq->q.read_ptr].skb == NULL))
965 continue; 966 continue;
966 967
967 __skb_queue_tail(skbs, txq->skbs[txq->q.read_ptr]); 968 __skb_queue_tail(skbs, txq->entries[txq->q.read_ptr].skb);
968 969
969 txq->skbs[txq->q.read_ptr] = NULL; 970 txq->entries[txq->q.read_ptr].skb = NULL;
970 971
971 iwlagn_txq_inval_byte_cnt_tbl(trans, txq); 972 iwlagn_txq_inval_byte_cnt_tbl(trans, txq);
972 973
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
index bc610f94ea2d..e6401e8a8d4c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
@@ -333,7 +333,7 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans,
333 int i; 333 int i;
334 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 334 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
335 335
336 if (WARN_ON(txq->meta || txq->cmd || txq->skbs || txq->tfds)) 336 if (WARN_ON(txq->entries || txq->tfds))
337 return -EINVAL; 337 return -EINVAL;
338 338
339 setup_timer(&txq->stuck_timer, iwl_trans_pcie_queue_stuck_timer, 339 setup_timer(&txq->stuck_timer, iwl_trans_pcie_queue_stuck_timer,
@@ -342,35 +342,22 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans,
342 342
343 txq->q.n_window = slots_num; 343 txq->q.n_window = slots_num;
344 344
345 txq->meta = kcalloc(slots_num, sizeof(txq->meta[0]), GFP_KERNEL); 345 txq->entries = kcalloc(slots_num,
346 txq->cmd = kcalloc(slots_num, sizeof(txq->cmd[0]), GFP_KERNEL); 346 sizeof(struct iwl_pcie_tx_queue_entry),
347 GFP_KERNEL);
347 348
348 if (!txq->meta || !txq->cmd) 349 if (!txq->entries)
349 goto error; 350 goto error;
350 351
351 if (txq_id == trans_pcie->cmd_queue) 352 if (txq_id == trans_pcie->cmd_queue)
352 for (i = 0; i < slots_num; i++) { 353 for (i = 0; i < slots_num; i++) {
353 txq->cmd[i] = kmalloc(sizeof(struct iwl_device_cmd), 354 txq->entries[i].cmd =
354 GFP_KERNEL); 355 kmalloc(sizeof(struct iwl_device_cmd),
355 if (!txq->cmd[i]) 356 GFP_KERNEL);
357 if (!txq->entries[i].cmd)
356 goto error; 358 goto error;
357 } 359 }
358 360
359 /* Alloc driver data array and TFD circular buffer */
360 /* Driver private data, only for Tx (not command) queues,
361 * not shared with device. */
362 if (txq_id != trans_pcie->cmd_queue) {
363 txq->skbs = kcalloc(TFD_QUEUE_SIZE_MAX, sizeof(txq->skbs[0]),
364 GFP_KERNEL);
365 if (!txq->skbs) {
366 IWL_ERR(trans, "kmalloc for auxiliary BD "
367 "structures failed\n");
368 goto error;
369 }
370 } else {
371 txq->skbs = NULL;
372 }
373
374 /* Circular buffer of transmit frame descriptors (TFDs), 361 /* Circular buffer of transmit frame descriptors (TFDs),
375 * shared with device */ 362 * shared with device */
376 txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, 363 txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
@@ -383,17 +370,11 @@ static int iwl_trans_txq_alloc(struct iwl_trans *trans,
383 370
384 return 0; 371 return 0;
385error: 372error:
386 kfree(txq->skbs); 373 if (txq->entries && txq_id == trans_pcie->cmd_queue)
387 txq->skbs = NULL;
388 /* since txq->cmd has been zeroed,
389 * all non allocated cmd[i] will be NULL */
390 if (txq->cmd && txq_id == trans_pcie->cmd_queue)
391 for (i = 0; i < slots_num; i++) 374 for (i = 0; i < slots_num; i++)
392 kfree(txq->cmd[i]); 375 kfree(txq->entries[i].cmd);
393 kfree(txq->meta); 376 kfree(txq->entries);
394 kfree(txq->cmd); 377 txq->entries = NULL;
395 txq->meta = NULL;
396 txq->cmd = NULL;
397 378
398 return -ENOMEM; 379 return -ENOMEM;
399 380
@@ -405,7 +386,6 @@ static int iwl_trans_txq_init(struct iwl_trans *trans, struct iwl_tx_queue *txq,
405 int ret; 386 int ret;
406 387
407 txq->need_update = 0; 388 txq->need_update = 0;
408 memset(txq->meta, 0, sizeof(txq->meta[0]) * slots_num);
409 389
410 /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise 390 /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
411 * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */ 391 * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
@@ -483,7 +463,7 @@ static void iwl_tx_queue_free(struct iwl_trans *trans, int txq_id)
483 463
484 if (txq_id == trans_pcie->cmd_queue) 464 if (txq_id == trans_pcie->cmd_queue)
485 for (i = 0; i < txq->q.n_window; i++) 465 for (i = 0; i < txq->q.n_window; i++)
486 kfree(txq->cmd[i]); 466 kfree(txq->entries[i].cmd);
487 467
488 /* De-alloc circular buffer of TFDs */ 468 /* De-alloc circular buffer of TFDs */
489 if (txq->q.n_bd) { 469 if (txq->q.n_bd) {
@@ -492,15 +472,8 @@ static void iwl_tx_queue_free(struct iwl_trans *trans, int txq_id)
492 memset(&txq->q.dma_addr, 0, sizeof(txq->q.dma_addr)); 472 memset(&txq->q.dma_addr, 0, sizeof(txq->q.dma_addr));
493 } 473 }
494 474
495 /* De-alloc array of per-TFD driver data */ 475 kfree(txq->entries);
496 kfree(txq->skbs); 476 txq->entries = NULL;
497 txq->skbs = NULL;
498
499 /* deallocate arrays */
500 kfree(txq->cmd);
501 kfree(txq->meta);
502 txq->cmd = NULL;
503 txq->meta = NULL;
504 477
505 del_timer_sync(&txq->stuck_timer); 478 del_timer_sync(&txq->stuck_timer);
506 479
@@ -1295,15 +1268,15 @@ static int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1295 spin_lock(&txq->lock); 1268 spin_lock(&txq->lock);
1296 1269
1297 /* Set up driver data for this TFD */ 1270 /* Set up driver data for this TFD */
1298 txq->skbs[q->write_ptr] = skb; 1271 txq->entries[q->write_ptr].skb = skb;
1299 txq->cmd[q->write_ptr] = dev_cmd; 1272 txq->entries[q->write_ptr].cmd = dev_cmd;
1300 1273
1301 dev_cmd->hdr.cmd = REPLY_TX; 1274 dev_cmd->hdr.cmd = REPLY_TX;
1302 dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 1275 dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
1303 INDEX_TO_SEQ(q->write_ptr))); 1276 INDEX_TO_SEQ(q->write_ptr)));
1304 1277
1305 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 1278 /* Set up first empty entry in queue's array of Tx/cmd buffers */
1306 out_meta = &txq->meta[q->write_ptr]; 1279 out_meta = &txq->entries[q->write_ptr].meta;
1307 1280
1308 /* 1281 /*
1309 * Use the first empty entry in this queue's command buffer array 1282 * Use the first empty entry in this queue's command buffer array