aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/wireless/iwlwifi/pcie/tx.c
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2013-02-27 07:18:50 -0500
committerJohannes Berg <johannes.berg@intel.com>2013-02-28 05:49:42 -0500
commit38c0f334b359953f010e9b921e0b55278d3918f7 (patch)
tree0deec3b8af53cc2caab0fbfb65de7a5bf7b3dc1d /drivers/net/wireless/iwlwifi/pcie/tx.c
parentaed7d9ac1836defe033b561f4306e39014ac56fd (diff)
iwlwifi: use coherent DMA memory for command header
Recently in commit 8a964f44e01ad3bbc208c3e80d931ba91b9ea786 ("iwlwifi: always copy first 16 bytes of commands") we fixed the problem that the hardware writes back to the command and that could overwrite parts of the data that was still needed and would thus be corrupted. Investigating this problem more closely we found that this write-back isn't really ordered very well with respect to other DMA traffic. Therefore, it sometimes happened that the write-back occurred after unmapping the command again which is clearly an issue and could corrupt the next allocation that goes to that spot, or (better) cause IOMMU faults. To fix this, allocate coherent memory for the first 16 bytes of each command, containing the write-back part, and use it for all queues. All the dynamic DMA mappings only need to be TO_DEVICE then. This ensures that even when the write-back happens "too late" it can't hit memory that has been freed or a mapping that doesn't exist any more. Since now the actual command is no longer modified, we can also remove CMD_WANT_HCMD and get rid of the DMA sync that was necessary to update the scratch pointer. Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'drivers/net/wireless/iwlwifi/pcie/tx.c')
-rw-r--r--drivers/net/wireless/iwlwifi/pcie/tx.c221
1 files changed, 107 insertions, 114 deletions
diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index ff80a7e55f00..8595c16f74de 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -191,12 +191,9 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
191 } 191 }
192 192
193 for (i = q->read_ptr; i != q->write_ptr; 193 for (i = q->read_ptr; i != q->write_ptr;
194 i = iwl_queue_inc_wrap(i, q->n_bd)) { 194 i = iwl_queue_inc_wrap(i, q->n_bd))
195 struct iwl_tx_cmd *tx_cmd =
196 (struct iwl_tx_cmd *)txq->entries[i].cmd->payload;
197 IWL_ERR(trans, "scratch %d = 0x%08x\n", i, 195 IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
198 get_unaligned_le32(&tx_cmd->scratch)); 196 le32_to_cpu(txq->scratchbufs[i].scratch));
199 }
200 197
201 iwl_op_mode_nic_error(trans->op_mode); 198 iwl_op_mode_nic_error(trans->op_mode);
202} 199}
@@ -382,14 +379,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
382 return; 379 return;
383 } 380 }
384 381
385 /* Unmap tx_cmd */ 382 /* first TB is never freed - it's the scratchbuf data */
386 if (num_tbs)
387 dma_unmap_single(trans->dev,
388 dma_unmap_addr(meta, mapping),
389 dma_unmap_len(meta, len),
390 DMA_BIDIRECTIONAL);
391 383
392 /* Unmap chunks, if any. */
393 for (i = 1; i < num_tbs; i++) 384 for (i = 1; i < num_tbs; i++)
394 dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i), 385 dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i),
395 iwl_pcie_tfd_tb_get_len(tfd, i), 386 iwl_pcie_tfd_tb_get_len(tfd, i),
@@ -478,6 +469,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
478{ 469{
479 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 470 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
480 size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX; 471 size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
472 size_t scratchbuf_sz;
481 int i; 473 int i;
482 474
483 if (WARN_ON(txq->entries || txq->tfds)) 475 if (WARN_ON(txq->entries || txq->tfds))
@@ -513,9 +505,25 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
513 IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz); 505 IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz);
514 goto error; 506 goto error;
515 } 507 }
508
509 BUILD_BUG_ON(IWL_HCMD_SCRATCHBUF_SIZE != sizeof(*txq->scratchbufs));
510 BUILD_BUG_ON(offsetof(struct iwl_pcie_txq_scratch_buf, scratch) !=
511 sizeof(struct iwl_cmd_header) +
512 offsetof(struct iwl_tx_cmd, scratch));
513
514 scratchbuf_sz = sizeof(*txq->scratchbufs) * slots_num;
515
516 txq->scratchbufs = dma_alloc_coherent(trans->dev, scratchbuf_sz,
517 &txq->scratchbufs_dma,
518 GFP_KERNEL);
519 if (!txq->scratchbufs)
520 goto err_free_tfds;
521
516 txq->q.id = txq_id; 522 txq->q.id = txq_id;
517 523
518 return 0; 524 return 0;
525err_free_tfds:
526 dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr);
519error: 527error:
520 if (txq->entries && txq_id == trans_pcie->cmd_queue) 528 if (txq->entries && txq_id == trans_pcie->cmd_queue)
521 for (i = 0; i < slots_num; i++) 529 for (i = 0; i < slots_num; i++)
@@ -600,7 +608,6 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
600 if (txq_id == trans_pcie->cmd_queue) 608 if (txq_id == trans_pcie->cmd_queue)
601 for (i = 0; i < txq->q.n_window; i++) { 609 for (i = 0; i < txq->q.n_window; i++) {
602 kfree(txq->entries[i].cmd); 610 kfree(txq->entries[i].cmd);
603 kfree(txq->entries[i].copy_cmd);
604 kfree(txq->entries[i].free_buf); 611 kfree(txq->entries[i].free_buf);
605 } 612 }
606 613
@@ -609,6 +616,10 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
609 dma_free_coherent(dev, sizeof(struct iwl_tfd) * 616 dma_free_coherent(dev, sizeof(struct iwl_tfd) *
610 txq->q.n_bd, txq->tfds, txq->q.dma_addr); 617 txq->q.n_bd, txq->tfds, txq->q.dma_addr);
611 txq->q.dma_addr = 0; 618 txq->q.dma_addr = 0;
619
620 dma_free_coherent(dev,
621 sizeof(*txq->scratchbufs) * txq->q.n_window,
622 txq->scratchbufs, txq->scratchbufs_dma);
612 } 623 }
613 624
614 kfree(txq->entries); 625 kfree(txq->entries);
@@ -1142,7 +1153,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1142 void *dup_buf = NULL; 1153 void *dup_buf = NULL;
1143 dma_addr_t phys_addr; 1154 dma_addr_t phys_addr;
1144 int idx; 1155 int idx;
1145 u16 copy_size, cmd_size, dma_size; 1156 u16 copy_size, cmd_size, scratch_size;
1146 bool had_nocopy = false; 1157 bool had_nocopy = false;
1147 int i; 1158 int i;
1148 u32 cmd_pos; 1159 u32 cmd_pos;
@@ -1162,9 +1173,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1162 if (!cmd->len[i]) 1173 if (!cmd->len[i])
1163 continue; 1174 continue;
1164 1175
1165 /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ 1176 /* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
1166 if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { 1177 if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
1167 int copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; 1178 int copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
1168 1179
1169 if (copy > cmdlen[i]) 1180 if (copy > cmdlen[i])
1170 copy = cmdlen[i]; 1181 copy = cmdlen[i];
@@ -1256,9 +1267,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1256 if (!cmd->len) 1267 if (!cmd->len)
1257 continue; 1268 continue;
1258 1269
1259 /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ 1270 /* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
1260 if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { 1271 if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
1261 copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; 1272 copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
1262 1273
1263 if (copy > cmd->len[i]) 1274 if (copy > cmd->len[i])
1264 copy = cmd->len[i]; 1275 copy = cmd->len[i];
@@ -1276,48 +1287,36 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1276 } 1287 }
1277 } 1288 }
1278 1289
1279 WARN_ON_ONCE(txq->entries[idx].copy_cmd);
1280
1281 /*
1282 * since out_cmd will be the source address of the FH, it will write
1283 * the retry count there. So when the user needs to receivce the HCMD
1284 * that corresponds to the response in the response handler, it needs
1285 * to set CMD_WANT_HCMD.
1286 */
1287 if (cmd->flags & CMD_WANT_HCMD) {
1288 txq->entries[idx].copy_cmd =
1289 kmemdup(out_cmd, cmd_pos, GFP_ATOMIC);
1290 if (unlikely(!txq->entries[idx].copy_cmd)) {
1291 idx = -ENOMEM;
1292 goto out;
1293 }
1294 }
1295
1296 IWL_DEBUG_HC(trans, 1290 IWL_DEBUG_HC(trans,
1297 "Sending command %s (#%x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1291 "Sending command %s (#%x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1298 get_cmd_string(trans_pcie, out_cmd->hdr.cmd), 1292 get_cmd_string(trans_pcie, out_cmd->hdr.cmd),
1299 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1293 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
1300 cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); 1294 cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
1301 1295
1302 /* 1296 /* start the TFD with the scratchbuf */
1303 * If the entire command is smaller than IWL_HCMD_MIN_COPY_SIZE, we must 1297 scratch_size = min_t(int, copy_size, IWL_HCMD_SCRATCHBUF_SIZE);
1304 * still map at least that many bytes for the hardware to write back to. 1298 memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
1305 * We have enough space, so that's not a problem. 1299 iwl_pcie_txq_build_tfd(trans, txq,
1306 */ 1300 iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
1307 dma_size = max_t(u16, copy_size, IWL_HCMD_MIN_COPY_SIZE); 1301 scratch_size, 1);
1302
1303 /* map first command fragment, if any remains */
1304 if (copy_size > scratch_size) {
1305 phys_addr = dma_map_single(trans->dev,
1306 ((u8 *)&out_cmd->hdr) + scratch_size,
1307 copy_size - scratch_size,
1308 DMA_TO_DEVICE);
1309 if (dma_mapping_error(trans->dev, phys_addr)) {
1310 iwl_pcie_tfd_unmap(trans, out_meta,
1311 &txq->tfds[q->write_ptr]);
1312 idx = -ENOMEM;
1313 goto out;
1314 }
1308 1315
1309 phys_addr = dma_map_single(trans->dev, &out_cmd->hdr, dma_size, 1316 iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1310 DMA_BIDIRECTIONAL); 1317 copy_size - scratch_size, 0);
1311 if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
1312 idx = -ENOMEM;
1313 goto out;
1314 } 1318 }
1315 1319
1316 dma_unmap_addr_set(out_meta, mapping, phys_addr);
1317 dma_unmap_len_set(out_meta, len, dma_size);
1318
1319 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1);
1320
1321 /* map the remaining (adjusted) nocopy/dup fragments */ 1320 /* map the remaining (adjusted) nocopy/dup fragments */
1322 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1321 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1323 const void *data = cmddata[i]; 1322 const void *data = cmddata[i];
@@ -1586,10 +1585,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1586 struct iwl_cmd_meta *out_meta; 1585 struct iwl_cmd_meta *out_meta;
1587 struct iwl_txq *txq; 1586 struct iwl_txq *txq;
1588 struct iwl_queue *q; 1587 struct iwl_queue *q;
1589 dma_addr_t phys_addr = 0; 1588 dma_addr_t tb0_phys, tb1_phys, scratch_phys;
1590 dma_addr_t txcmd_phys; 1589 void *tb1_addr;
1591 dma_addr_t scratch_phys; 1590 u16 len, tb1_len, tb2_len;
1592 u16 len, firstlen, secondlen;
1593 u8 wait_write_ptr = 0; 1591 u8 wait_write_ptr = 0;
1594 __le16 fc = hdr->frame_control; 1592 __le16 fc = hdr->frame_control;
1595 u8 hdr_len = ieee80211_hdrlen(fc); 1593 u8 hdr_len = ieee80211_hdrlen(fc);
@@ -1627,85 +1625,80 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1627 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 1625 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
1628 INDEX_TO_SEQ(q->write_ptr))); 1626 INDEX_TO_SEQ(q->write_ptr)));
1629 1627
1628 tb0_phys = iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr);
1629 scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
1630 offsetof(struct iwl_tx_cmd, scratch);
1631
1632 tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
1633 tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
1634
1630 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 1635 /* Set up first empty entry in queue's array of Tx/cmd buffers */
1631 out_meta = &txq->entries[q->write_ptr].meta; 1636 out_meta = &txq->entries[q->write_ptr].meta;
1632 1637
1633 /* 1638 /*
1634 * Use the first empty entry in this queue's command buffer array 1639 * The second TB (tb1) points to the remainder of the TX command
1635 * to contain the Tx command and MAC header concatenated together 1640 * and the 802.11 header - dword aligned size
1636 * (payload data will be in another buffer). 1641 * (This calculation modifies the TX command, so do it before the
1637 * Size of this varies, due to varying MAC header length. 1642 * setup of the first TB)
1638 * If end is not dword aligned, we'll have 2 extra bytes at the end
1639 * of the MAC header (device reads on dword boundaries).
1640 * We'll tell device about this padding later.
1641 */ 1643 */
1642 len = sizeof(struct iwl_tx_cmd) + 1644 len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
1643 sizeof(struct iwl_cmd_header) + hdr_len; 1645 hdr_len - IWL_HCMD_SCRATCHBUF_SIZE;
1644 firstlen = (len + 3) & ~3; 1646 tb1_len = (len + 3) & ~3;
1645 1647
1646 /* Tell NIC about any 2-byte padding after MAC header */ 1648 /* Tell NIC about any 2-byte padding after MAC header */
1647 if (firstlen != len) 1649 if (tb1_len != len)
1648 tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK; 1650 tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
1649 1651
1650 /* Physical address of this Tx command's header (not MAC header!), 1652 /* The first TB points to the scratchbuf data - min_copy bytes */
1651 * within command buffer array. */ 1653 memcpy(&txq->scratchbufs[q->write_ptr], &dev_cmd->hdr,
1652 txcmd_phys = dma_map_single(trans->dev, 1654 IWL_HCMD_SCRATCHBUF_SIZE);
1653 &dev_cmd->hdr, firstlen, 1655 iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
1654 DMA_BIDIRECTIONAL); 1656 IWL_HCMD_SCRATCHBUF_SIZE, 1);
1655 if (unlikely(dma_mapping_error(trans->dev, txcmd_phys)))
1656 goto out_err;
1657 dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
1658 dma_unmap_len_set(out_meta, len, firstlen);
1659 1657
1660 if (!ieee80211_has_morefrags(fc)) { 1658 /* there must be data left over for TB1 or this code must be changed */
1661 txq->need_update = 1; 1659 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_HCMD_SCRATCHBUF_SIZE);
1662 } else { 1660
1663 wait_write_ptr = 1; 1661 /* map the data for TB1 */
1664 txq->need_update = 0; 1662 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_HCMD_SCRATCHBUF_SIZE;
1665 } 1663 tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
1664 if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
1665 goto out_err;
1666 iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, 0);
1666 1667
1667 /* Set up TFD's 2nd entry to point directly to remainder of skb, 1668 /*
1668 * if any (802.11 null frames have no payload). */ 1669 * Set up TFD's third entry to point directly to remainder
1669 secondlen = skb->len - hdr_len; 1670 * of skb, if any (802.11 null frames have no payload).
1670 if (secondlen > 0) { 1671 */
1671 phys_addr = dma_map_single(trans->dev, skb->data + hdr_len, 1672 tb2_len = skb->len - hdr_len;
1672 secondlen, DMA_TO_DEVICE); 1673 if (tb2_len > 0) {
1673 if (unlikely(dma_mapping_error(trans->dev, phys_addr))) { 1674 dma_addr_t tb2_phys = dma_map_single(trans->dev,
1674 dma_unmap_single(trans->dev, 1675 skb->data + hdr_len,
1675 dma_unmap_addr(out_meta, mapping), 1676 tb2_len, DMA_TO_DEVICE);
1676 dma_unmap_len(out_meta, len), 1677 if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
1677 DMA_BIDIRECTIONAL); 1678 iwl_pcie_tfd_unmap(trans, out_meta,
1679 &txq->tfds[q->write_ptr]);
1678 goto out_err; 1680 goto out_err;
1679 } 1681 }
1682 iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, 0);
1680 } 1683 }
1681 1684
1682 /* Attach buffers to TFD */
1683 iwl_pcie_txq_build_tfd(trans, txq, txcmd_phys, firstlen, 1);
1684 if (secondlen > 0)
1685 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, secondlen, 0);
1686
1687 scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
1688 offsetof(struct iwl_tx_cmd, scratch);
1689
1690 /* take back ownership of DMA buffer to enable update */
1691 dma_sync_single_for_cpu(trans->dev, txcmd_phys, firstlen,
1692 DMA_BIDIRECTIONAL);
1693 tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
1694 tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
1695
1696 /* Set up entry for this TFD in Tx byte-count array */ 1685 /* Set up entry for this TFD in Tx byte-count array */
1697 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len)); 1686 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
1698 1687
1699 dma_sync_single_for_device(trans->dev, txcmd_phys, firstlen,
1700 DMA_BIDIRECTIONAL);
1701
1702 trace_iwlwifi_dev_tx(trans->dev, skb, 1688 trace_iwlwifi_dev_tx(trans->dev, skb,
1703 &txq->tfds[txq->q.write_ptr], 1689 &txq->tfds[txq->q.write_ptr],
1704 sizeof(struct iwl_tfd), 1690 sizeof(struct iwl_tfd),
1705 &dev_cmd->hdr, firstlen, 1691 &dev_cmd->hdr, IWL_HCMD_SCRATCHBUF_SIZE + tb1_len,
1706 skb->data + hdr_len, secondlen); 1692 skb->data + hdr_len, tb2_len);
1707 trace_iwlwifi_dev_tx_data(trans->dev, skb, 1693 trace_iwlwifi_dev_tx_data(trans->dev, skb,
1708 skb->data + hdr_len, secondlen); 1694 skb->data + hdr_len, tb2_len);
1695
1696 if (!ieee80211_has_morefrags(fc)) {
1697 txq->need_update = 1;
1698 } else {
1699 wait_write_ptr = 1;
1700 txq->need_update = 0;
1701 }
1709 1702
1710 /* start timer if queue currently empty */ 1703 /* start timer if queue currently empty */
1711 if (txq->need_update && q->read_ptr == q->write_ptr && 1704 if (txq->need_update && q->read_ptr == q->write_ptr &&