aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2013-02-27 07:18:50 -0500
committerJohannes Berg <johannes.berg@intel.com>2013-02-28 05:49:42 -0500
commit38c0f334b359953f010e9b921e0b55278d3918f7 (patch)
tree0deec3b8af53cc2caab0fbfb65de7a5bf7b3dc1d /drivers/net
parentaed7d9ac1836defe033b561f4306e39014ac56fd (diff)
iwlwifi: use coherent DMA memory for command header
Recently in commit 8a964f44e01ad3bbc208c3e80d931ba91b9ea786 ("iwlwifi: always copy first 16 bytes of commands") we fixed the problem that the hardware writes back to the command and that could overwrite parts of the data that was still needed and would thus be corrupted. Investigating this problem more closely we found that this write-back isn't really ordered very well with respect to other DMA traffic. Therefore, it sometimes happened that the write-back occurred after unmapping the command again which is clearly an issue and could corrupt the next allocation that goes to that spot, or (better) cause IOMMU faults. To fix this, allocate coherent memory for the first 16 bytes of each command, containing the write-back part, and use it for all queues. All the dynamic DMA mappings only need to be TO_DEVICE then. This ensures that even when the write-back happens "too late" it can't hit memory that has been freed or a mapping that doesn't exist any more. Since now the actual command is no longer modified, we can also remove CMD_WANT_HCMD and get rid of the DMA sync that was necessary to update the scratch pointer. Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/wireless/iwlwifi/dvm/sta.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans.h8
-rw-r--r--drivers/net/wireless/iwlwifi/pcie/internal.h34
-rw-r--r--drivers/net/wireless/iwlwifi/pcie/rx.c14
-rw-r--r--drivers/net/wireless/iwlwifi/pcie/tx.c221
5 files changed, 137 insertions, 142 deletions
diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c
index 94ef33838bc6..b775769f8322 100644
--- a/drivers/net/wireless/iwlwifi/dvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/dvm/sta.c
@@ -151,7 +151,7 @@ int iwl_send_add_sta(struct iwl_priv *priv,
151 sta_id, sta->sta.addr, flags & CMD_ASYNC ? "a" : ""); 151 sta_id, sta->sta.addr, flags & CMD_ASYNC ? "a" : "");
152 152
153 if (!(flags & CMD_ASYNC)) { 153 if (!(flags & CMD_ASYNC)) {
154 cmd.flags |= CMD_WANT_SKB | CMD_WANT_HCMD; 154 cmd.flags |= CMD_WANT_SKB;
155 might_sleep(); 155 might_sleep();
156 } 156 }
157 157
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 058947f213b2..0cac2b7af78b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h
@@ -186,19 +186,13 @@ struct iwl_rx_packet {
186 * @CMD_ASYNC: Return right away and don't want for the response 186 * @CMD_ASYNC: Return right away and don't want for the response
187 * @CMD_WANT_SKB: valid only with CMD_SYNC. The caller needs the buffer of the 187 * @CMD_WANT_SKB: valid only with CMD_SYNC. The caller needs the buffer of the
188 * response. The caller needs to call iwl_free_resp when done. 188 * response. The caller needs to call iwl_free_resp when done.
189 * @CMD_WANT_HCMD: The caller needs to get the HCMD that was sent in the
190 * response handler. Chunks flagged by %IWL_HCMD_DFL_NOCOPY won't be
191 * copied. The pointer passed to the response handler is in the transport
192 * ownership and don't need to be freed by the op_mode. This also means
193 * that the pointer is invalidated after the op_mode's handler returns.
194 * @CMD_ON_DEMAND: This command is sent by the test mode pipe. 189 * @CMD_ON_DEMAND: This command is sent by the test mode pipe.
195 */ 190 */
196enum CMD_MODE { 191enum CMD_MODE {
197 CMD_SYNC = 0, 192 CMD_SYNC = 0,
198 CMD_ASYNC = BIT(0), 193 CMD_ASYNC = BIT(0),
199 CMD_WANT_SKB = BIT(1), 194 CMD_WANT_SKB = BIT(1),
200 CMD_WANT_HCMD = BIT(2), 195 CMD_ON_DEMAND = BIT(2),
201 CMD_ON_DEMAND = BIT(3),
202}; 196};
203 197
204#define DEF_CMD_PAYLOAD_SIZE 320 198#define DEF_CMD_PAYLOAD_SIZE 320
diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h
index 3d62e8055352..148843e7f34f 100644
--- a/drivers/net/wireless/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/iwlwifi/pcie/internal.h
@@ -137,10 +137,6 @@ static inline int iwl_queue_dec_wrap(int index, int n_bd)
137struct iwl_cmd_meta { 137struct iwl_cmd_meta {
138 /* only for SYNC commands, iff the reply skb is wanted */ 138 /* only for SYNC commands, iff the reply skb is wanted */
139 struct iwl_host_cmd *source; 139 struct iwl_host_cmd *source;
140
141 DEFINE_DMA_UNMAP_ADDR(mapping);
142 DEFINE_DMA_UNMAP_LEN(len);
143
144 u32 flags; 140 u32 flags;
145}; 141};
146 142
@@ -185,25 +181,36 @@ struct iwl_queue {
185/* 181/*
186 * The FH will write back to the first TB only, so we need 182 * The FH will write back to the first TB only, so we need
187 * to copy some data into the buffer regardless of whether 183 * to copy some data into the buffer regardless of whether
188 * it should be mapped or not. This indicates how much to 184 * it should be mapped or not. This indicates how big the
189 * copy, even for HCMDs it must be big enough to fit the 185 * first TB must be to include the scratch buffer. Since
190 * DRAM scratch from the TX cmd, at least 16 bytes. 186 * the scratch is 4 bytes at offset 12, it's 16 now. If we
187 * make it bigger then allocations will be bigger and copy
188 * slower, so that's probably not useful.
191 */ 189 */
192#define IWL_HCMD_MIN_COPY_SIZE 16 190#define IWL_HCMD_SCRATCHBUF_SIZE 16
193 191
194struct iwl_pcie_txq_entry { 192struct iwl_pcie_txq_entry {
195 struct iwl_device_cmd *cmd; 193 struct iwl_device_cmd *cmd;
196 struct iwl_device_cmd *copy_cmd;
197 struct sk_buff *skb; 194 struct sk_buff *skb;
198 /* buffer to free after command completes */ 195 /* buffer to free after command completes */
199 const void *free_buf; 196 const void *free_buf;
200 struct iwl_cmd_meta meta; 197 struct iwl_cmd_meta meta;
201}; 198};
202 199
200struct iwl_pcie_txq_scratch_buf {
201 struct iwl_cmd_header hdr;
202 u8 buf[8];
203 __le32 scratch;
204};
205
203/** 206/**
204 * struct iwl_txq - Tx Queue for DMA 207 * struct iwl_txq - Tx Queue for DMA
205 * @q: generic Rx/Tx queue descriptor 208 * @q: generic Rx/Tx queue descriptor
206 * @tfds: transmit frame descriptors (DMA memory) 209 * @tfds: transmit frame descriptors (DMA memory)
210 * @scratchbufs: start of command headers, including scratch buffers, for
211 * the writeback -- this is DMA memory and an array holding one buffer
212 * for each command on the queue
213 * @scratchbufs_dma: DMA address for the scratchbufs start
207 * @entries: transmit entries (driver state) 214 * @entries: transmit entries (driver state)
208 * @lock: queue lock 215 * @lock: queue lock
209 * @stuck_timer: timer that fires if queue gets stuck 216 * @stuck_timer: timer that fires if queue gets stuck
@@ -217,6 +224,8 @@ struct iwl_pcie_txq_entry {
217struct iwl_txq { 224struct iwl_txq {
218 struct iwl_queue q; 225 struct iwl_queue q;
219 struct iwl_tfd *tfds; 226 struct iwl_tfd *tfds;
227 struct iwl_pcie_txq_scratch_buf *scratchbufs;
228 dma_addr_t scratchbufs_dma;
220 struct iwl_pcie_txq_entry *entries; 229 struct iwl_pcie_txq_entry *entries;
221 spinlock_t lock; 230 spinlock_t lock;
222 struct timer_list stuck_timer; 231 struct timer_list stuck_timer;
@@ -225,6 +234,13 @@ struct iwl_txq {
225 u8 active; 234 u8 active;
226}; 235};
227 236
237static inline dma_addr_t
238iwl_pcie_get_scratchbuf_dma(struct iwl_txq *txq, int idx)
239{
240 return txq->scratchbufs_dma +
241 sizeof(struct iwl_pcie_txq_scratch_buf) * idx;
242}
243
228/** 244/**
229 * struct iwl_trans_pcie - PCIe transport specific data 245 * struct iwl_trans_pcie - PCIe transport specific data
230 * @rxq: all the RX queue data 246 * @rxq: all the RX queue data
diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index b0ae06d2456f..567e67ad1f61 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c
@@ -637,22 +637,14 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
637 index = SEQ_TO_INDEX(sequence); 637 index = SEQ_TO_INDEX(sequence);
638 cmd_index = get_cmd_index(&txq->q, index); 638 cmd_index = get_cmd_index(&txq->q, index);
639 639
640 if (reclaim) { 640 if (reclaim)
641 struct iwl_pcie_txq_entry *ent; 641 cmd = txq->entries[cmd_index].cmd;
642 ent = &txq->entries[cmd_index]; 642 else
643 cmd = ent->copy_cmd;
644 WARN_ON_ONCE(!cmd && ent->meta.flags & CMD_WANT_HCMD);
645 } else {
646 cmd = NULL; 643 cmd = NULL;
647 }
648 644
649 err = iwl_op_mode_rx(trans->op_mode, &rxcb, cmd); 645 err = iwl_op_mode_rx(trans->op_mode, &rxcb, cmd);
650 646
651 if (reclaim) { 647 if (reclaim) {
652 /* The original command isn't needed any more */
653 kfree(txq->entries[cmd_index].copy_cmd);
654 txq->entries[cmd_index].copy_cmd = NULL;
655 /* nor is the duplicated part of the command */
656 kfree(txq->entries[cmd_index].free_buf); 648 kfree(txq->entries[cmd_index].free_buf);
657 txq->entries[cmd_index].free_buf = NULL; 649 txq->entries[cmd_index].free_buf = NULL;
658 } 650 }
diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index ff80a7e55f00..8595c16f74de 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -191,12 +191,9 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
191 } 191 }
192 192
193 for (i = q->read_ptr; i != q->write_ptr; 193 for (i = q->read_ptr; i != q->write_ptr;
194 i = iwl_queue_inc_wrap(i, q->n_bd)) { 194 i = iwl_queue_inc_wrap(i, q->n_bd))
195 struct iwl_tx_cmd *tx_cmd =
196 (struct iwl_tx_cmd *)txq->entries[i].cmd->payload;
197 IWL_ERR(trans, "scratch %d = 0x%08x\n", i, 195 IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
198 get_unaligned_le32(&tx_cmd->scratch)); 196 le32_to_cpu(txq->scratchbufs[i].scratch));
199 }
200 197
201 iwl_op_mode_nic_error(trans->op_mode); 198 iwl_op_mode_nic_error(trans->op_mode);
202} 199}
@@ -382,14 +379,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
382 return; 379 return;
383 } 380 }
384 381
385 /* Unmap tx_cmd */ 382 /* first TB is never freed - it's the scratchbuf data */
386 if (num_tbs)
387 dma_unmap_single(trans->dev,
388 dma_unmap_addr(meta, mapping),
389 dma_unmap_len(meta, len),
390 DMA_BIDIRECTIONAL);
391 383
392 /* Unmap chunks, if any. */
393 for (i = 1; i < num_tbs; i++) 384 for (i = 1; i < num_tbs; i++)
394 dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i), 385 dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i),
395 iwl_pcie_tfd_tb_get_len(tfd, i), 386 iwl_pcie_tfd_tb_get_len(tfd, i),
@@ -478,6 +469,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
478{ 469{
479 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 470 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
480 size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX; 471 size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
472 size_t scratchbuf_sz;
481 int i; 473 int i;
482 474
483 if (WARN_ON(txq->entries || txq->tfds)) 475 if (WARN_ON(txq->entries || txq->tfds))
@@ -513,9 +505,25 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
513 IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz); 505 IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz);
514 goto error; 506 goto error;
515 } 507 }
508
509 BUILD_BUG_ON(IWL_HCMD_SCRATCHBUF_SIZE != sizeof(*txq->scratchbufs));
510 BUILD_BUG_ON(offsetof(struct iwl_pcie_txq_scratch_buf, scratch) !=
511 sizeof(struct iwl_cmd_header) +
512 offsetof(struct iwl_tx_cmd, scratch));
513
514 scratchbuf_sz = sizeof(*txq->scratchbufs) * slots_num;
515
516 txq->scratchbufs = dma_alloc_coherent(trans->dev, scratchbuf_sz,
517 &txq->scratchbufs_dma,
518 GFP_KERNEL);
519 if (!txq->scratchbufs)
520 goto err_free_tfds;
521
516 txq->q.id = txq_id; 522 txq->q.id = txq_id;
517 523
518 return 0; 524 return 0;
525err_free_tfds:
526 dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr);
519error: 527error:
520 if (txq->entries && txq_id == trans_pcie->cmd_queue) 528 if (txq->entries && txq_id == trans_pcie->cmd_queue)
521 for (i = 0; i < slots_num; i++) 529 for (i = 0; i < slots_num; i++)
@@ -600,7 +608,6 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
600 if (txq_id == trans_pcie->cmd_queue) 608 if (txq_id == trans_pcie->cmd_queue)
601 for (i = 0; i < txq->q.n_window; i++) { 609 for (i = 0; i < txq->q.n_window; i++) {
602 kfree(txq->entries[i].cmd); 610 kfree(txq->entries[i].cmd);
603 kfree(txq->entries[i].copy_cmd);
604 kfree(txq->entries[i].free_buf); 611 kfree(txq->entries[i].free_buf);
605 } 612 }
606 613
@@ -609,6 +616,10 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
609 dma_free_coherent(dev, sizeof(struct iwl_tfd) * 616 dma_free_coherent(dev, sizeof(struct iwl_tfd) *
610 txq->q.n_bd, txq->tfds, txq->q.dma_addr); 617 txq->q.n_bd, txq->tfds, txq->q.dma_addr);
611 txq->q.dma_addr = 0; 618 txq->q.dma_addr = 0;
619
620 dma_free_coherent(dev,
621 sizeof(*txq->scratchbufs) * txq->q.n_window,
622 txq->scratchbufs, txq->scratchbufs_dma);
612 } 623 }
613 624
614 kfree(txq->entries); 625 kfree(txq->entries);
@@ -1142,7 +1153,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1142 void *dup_buf = NULL; 1153 void *dup_buf = NULL;
1143 dma_addr_t phys_addr; 1154 dma_addr_t phys_addr;
1144 int idx; 1155 int idx;
1145 u16 copy_size, cmd_size, dma_size; 1156 u16 copy_size, cmd_size, scratch_size;
1146 bool had_nocopy = false; 1157 bool had_nocopy = false;
1147 int i; 1158 int i;
1148 u32 cmd_pos; 1159 u32 cmd_pos;
@@ -1162,9 +1173,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1162 if (!cmd->len[i]) 1173 if (!cmd->len[i])
1163 continue; 1174 continue;
1164 1175
1165 /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ 1176 /* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
1166 if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { 1177 if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
1167 int copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; 1178 int copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
1168 1179
1169 if (copy > cmdlen[i]) 1180 if (copy > cmdlen[i])
1170 copy = cmdlen[i]; 1181 copy = cmdlen[i];
@@ -1256,9 +1267,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1256 if (!cmd->len) 1267 if (!cmd->len)
1257 continue; 1268 continue;
1258 1269
1259 /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ 1270 /* need at least IWL_HCMD_SCRATCHBUF_SIZE copied */
1260 if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { 1271 if (copy_size < IWL_HCMD_SCRATCHBUF_SIZE) {
1261 copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; 1272 copy = IWL_HCMD_SCRATCHBUF_SIZE - copy_size;
1262 1273
1263 if (copy > cmd->len[i]) 1274 if (copy > cmd->len[i])
1264 copy = cmd->len[i]; 1275 copy = cmd->len[i];
@@ -1276,48 +1287,36 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1276 } 1287 }
1277 } 1288 }
1278 1289
1279 WARN_ON_ONCE(txq->entries[idx].copy_cmd);
1280
1281 /*
1282 * since out_cmd will be the source address of the FH, it will write
1283 * the retry count there. So when the user needs to receivce the HCMD
1284 * that corresponds to the response in the response handler, it needs
1285 * to set CMD_WANT_HCMD.
1286 */
1287 if (cmd->flags & CMD_WANT_HCMD) {
1288 txq->entries[idx].copy_cmd =
1289 kmemdup(out_cmd, cmd_pos, GFP_ATOMIC);
1290 if (unlikely(!txq->entries[idx].copy_cmd)) {
1291 idx = -ENOMEM;
1292 goto out;
1293 }
1294 }
1295
1296 IWL_DEBUG_HC(trans, 1290 IWL_DEBUG_HC(trans,
1297 "Sending command %s (#%x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1291 "Sending command %s (#%x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1298 get_cmd_string(trans_pcie, out_cmd->hdr.cmd), 1292 get_cmd_string(trans_pcie, out_cmd->hdr.cmd),
1299 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1293 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
1300 cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); 1294 cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
1301 1295
1302 /* 1296 /* start the TFD with the scratchbuf */
1303 * If the entire command is smaller than IWL_HCMD_MIN_COPY_SIZE, we must 1297 scratch_size = min_t(int, copy_size, IWL_HCMD_SCRATCHBUF_SIZE);
1304 * still map at least that many bytes for the hardware to write back to. 1298 memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
1305 * We have enough space, so that's not a problem. 1299 iwl_pcie_txq_build_tfd(trans, txq,
1306 */ 1300 iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
1307 dma_size = max_t(u16, copy_size, IWL_HCMD_MIN_COPY_SIZE); 1301 scratch_size, 1);
1302
1303 /* map first command fragment, if any remains */
1304 if (copy_size > scratch_size) {
1305 phys_addr = dma_map_single(trans->dev,
1306 ((u8 *)&out_cmd->hdr) + scratch_size,
1307 copy_size - scratch_size,
1308 DMA_TO_DEVICE);
1309 if (dma_mapping_error(trans->dev, phys_addr)) {
1310 iwl_pcie_tfd_unmap(trans, out_meta,
1311 &txq->tfds[q->write_ptr]);
1312 idx = -ENOMEM;
1313 goto out;
1314 }
1308 1315
1309 phys_addr = dma_map_single(trans->dev, &out_cmd->hdr, dma_size, 1316 iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1310 DMA_BIDIRECTIONAL); 1317 copy_size - scratch_size, 0);
1311 if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
1312 idx = -ENOMEM;
1313 goto out;
1314 } 1318 }
1315 1319
1316 dma_unmap_addr_set(out_meta, mapping, phys_addr);
1317 dma_unmap_len_set(out_meta, len, dma_size);
1318
1319 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1);
1320
1321 /* map the remaining (adjusted) nocopy/dup fragments */ 1320 /* map the remaining (adjusted) nocopy/dup fragments */
1322 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1321 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1323 const void *data = cmddata[i]; 1322 const void *data = cmddata[i];
@@ -1586,10 +1585,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1586 struct iwl_cmd_meta *out_meta; 1585 struct iwl_cmd_meta *out_meta;
1587 struct iwl_txq *txq; 1586 struct iwl_txq *txq;
1588 struct iwl_queue *q; 1587 struct iwl_queue *q;
1589 dma_addr_t phys_addr = 0; 1588 dma_addr_t tb0_phys, tb1_phys, scratch_phys;
1590 dma_addr_t txcmd_phys; 1589 void *tb1_addr;
1591 dma_addr_t scratch_phys; 1590 u16 len, tb1_len, tb2_len;
1592 u16 len, firstlen, secondlen;
1593 u8 wait_write_ptr = 0; 1591 u8 wait_write_ptr = 0;
1594 __le16 fc = hdr->frame_control; 1592 __le16 fc = hdr->frame_control;
1595 u8 hdr_len = ieee80211_hdrlen(fc); 1593 u8 hdr_len = ieee80211_hdrlen(fc);
@@ -1627,85 +1625,80 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1627 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 1625 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
1628 INDEX_TO_SEQ(q->write_ptr))); 1626 INDEX_TO_SEQ(q->write_ptr)));
1629 1627
1628 tb0_phys = iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr);
1629 scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
1630 offsetof(struct iwl_tx_cmd, scratch);
1631
1632 tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
1633 tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
1634
1630 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 1635 /* Set up first empty entry in queue's array of Tx/cmd buffers */
1631 out_meta = &txq->entries[q->write_ptr].meta; 1636 out_meta = &txq->entries[q->write_ptr].meta;
1632 1637
1633 /* 1638 /*
1634 * Use the first empty entry in this queue's command buffer array 1639 * The second TB (tb1) points to the remainder of the TX command
1635 * to contain the Tx command and MAC header concatenated together 1640 * and the 802.11 header - dword aligned size
1636 * (payload data will be in another buffer). 1641 * (This calculation modifies the TX command, so do it before the
1637 * Size of this varies, due to varying MAC header length. 1642 * setup of the first TB)
1638 * If end is not dword aligned, we'll have 2 extra bytes at the end
1639 * of the MAC header (device reads on dword boundaries).
1640 * We'll tell device about this padding later.
1641 */ 1643 */
1642 len = sizeof(struct iwl_tx_cmd) + 1644 len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
1643 sizeof(struct iwl_cmd_header) + hdr_len; 1645 hdr_len - IWL_HCMD_SCRATCHBUF_SIZE;
1644 firstlen = (len + 3) & ~3; 1646 tb1_len = (len + 3) & ~3;
1645 1647
1646 /* Tell NIC about any 2-byte padding after MAC header */ 1648 /* Tell NIC about any 2-byte padding after MAC header */
1647 if (firstlen != len) 1649 if (tb1_len != len)
1648 tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK; 1650 tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
1649 1651
1650 /* Physical address of this Tx command's header (not MAC header!), 1652 /* The first TB points to the scratchbuf data - min_copy bytes */
1651 * within command buffer array. */ 1653 memcpy(&txq->scratchbufs[q->write_ptr], &dev_cmd->hdr,
1652 txcmd_phys = dma_map_single(trans->dev, 1654 IWL_HCMD_SCRATCHBUF_SIZE);
1653 &dev_cmd->hdr, firstlen, 1655 iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
1654 DMA_BIDIRECTIONAL); 1656 IWL_HCMD_SCRATCHBUF_SIZE, 1);
1655 if (unlikely(dma_mapping_error(trans->dev, txcmd_phys)))
1656 goto out_err;
1657 dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
1658 dma_unmap_len_set(out_meta, len, firstlen);
1659 1657
1660 if (!ieee80211_has_morefrags(fc)) { 1658 /* there must be data left over for TB1 or this code must be changed */
1661 txq->need_update = 1; 1659 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_HCMD_SCRATCHBUF_SIZE);
1662 } else { 1660
1663 wait_write_ptr = 1; 1661 /* map the data for TB1 */
1664 txq->need_update = 0; 1662 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_HCMD_SCRATCHBUF_SIZE;
1665 } 1663 tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
1664 if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
1665 goto out_err;
1666 iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, 0);
1666 1667
1667 /* Set up TFD's 2nd entry to point directly to remainder of skb, 1668 /*
1668 * if any (802.11 null frames have no payload). */ 1669 * Set up TFD's third entry to point directly to remainder
1669 secondlen = skb->len - hdr_len; 1670 * of skb, if any (802.11 null frames have no payload).
1670 if (secondlen > 0) { 1671 */
1671 phys_addr = dma_map_single(trans->dev, skb->data + hdr_len, 1672 tb2_len = skb->len - hdr_len;
1672 secondlen, DMA_TO_DEVICE); 1673 if (tb2_len > 0) {
1673 if (unlikely(dma_mapping_error(trans->dev, phys_addr))) { 1674 dma_addr_t tb2_phys = dma_map_single(trans->dev,
1674 dma_unmap_single(trans->dev, 1675 skb->data + hdr_len,
1675 dma_unmap_addr(out_meta, mapping), 1676 tb2_len, DMA_TO_DEVICE);
1676 dma_unmap_len(out_meta, len), 1677 if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
1677 DMA_BIDIRECTIONAL); 1678 iwl_pcie_tfd_unmap(trans, out_meta,
1679 &txq->tfds[q->write_ptr]);
1678 goto out_err; 1680 goto out_err;
1679 } 1681 }
1682 iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, 0);
1680 } 1683 }
1681 1684
1682 /* Attach buffers to TFD */
1683 iwl_pcie_txq_build_tfd(trans, txq, txcmd_phys, firstlen, 1);
1684 if (secondlen > 0)
1685 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, secondlen, 0);
1686
1687 scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
1688 offsetof(struct iwl_tx_cmd, scratch);
1689
1690 /* take back ownership of DMA buffer to enable update */
1691 dma_sync_single_for_cpu(trans->dev, txcmd_phys, firstlen,
1692 DMA_BIDIRECTIONAL);
1693 tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
1694 tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
1695
1696 /* Set up entry for this TFD in Tx byte-count array */ 1685 /* Set up entry for this TFD in Tx byte-count array */
1697 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len)); 1686 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
1698 1687
1699 dma_sync_single_for_device(trans->dev, txcmd_phys, firstlen,
1700 DMA_BIDIRECTIONAL);
1701
1702 trace_iwlwifi_dev_tx(trans->dev, skb, 1688 trace_iwlwifi_dev_tx(trans->dev, skb,
1703 &txq->tfds[txq->q.write_ptr], 1689 &txq->tfds[txq->q.write_ptr],
1704 sizeof(struct iwl_tfd), 1690 sizeof(struct iwl_tfd),
1705 &dev_cmd->hdr, firstlen, 1691 &dev_cmd->hdr, IWL_HCMD_SCRATCHBUF_SIZE + tb1_len,
1706 skb->data + hdr_len, secondlen); 1692 skb->data + hdr_len, tb2_len);
1707 trace_iwlwifi_dev_tx_data(trans->dev, skb, 1693 trace_iwlwifi_dev_tx_data(trans->dev, skb,
1708 skb->data + hdr_len, secondlen); 1694 skb->data + hdr_len, tb2_len);
1695
1696 if (!ieee80211_has_morefrags(fc)) {
1697 txq->need_update = 1;
1698 } else {
1699 wait_write_ptr = 1;
1700 txq->need_update = 0;
1701 }
1709 1702
1710 /* start timer if queue currently empty */ 1703 /* start timer if queue currently empty */
1711 if (txq->need_update && q->read_ptr == q->write_ptr && 1704 if (txq->need_update && q->read_ptr == q->write_ptr &&