aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRajkumar Manoharan <rmanohar@qti.qualcomm.com>2016-03-22 07:52:18 -0400
committerKalle Valo <kvalo@qca.qualcomm.com>2016-04-04 10:03:21 -0400
commit128abd09134a5b415fef4373841ea6d3fb7b680f (patch)
treef044f94436a670bcd38d9aa22dd9c12258fc872d
parent24d9ef5eff5057bb6339ed1cf852a2b2a7be324d (diff)
ath10k: reuse copy engine 5 (htt rx) descriptors
Whenever htt rx indication i.e target to host messages are received on rx copy engine (CE5), the message will be freed after processing the response. Then CE 5 will be refilled with new descriptors at post rx processing. This memory alloc and free operations can be avoided by reusing the same descriptors. During CE pipe allocation, full ring is not initialized i.e n-1 entries are filled up. So for CE 5 full ring should be filled up to reuse descriptors. Moreover CE 5 write index will be updated in single shot instead of incremental access. This could avoid multiple pci_write and ce_ring access. From experiments, It improves CPU usage by ~3% in IPQ4019 platform. Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
-rw-r--r--drivers/net/wireless/ath/ath10k/ce.c23
-rw-r--r--drivers/net/wireless/ath/ath10k/ce.h3
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.c63
3 files changed, 84 insertions, 5 deletions
diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index d6da404c9fa7..7212802eb327 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -411,7 +411,8 @@ int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
411 411
412 lockdep_assert_held(&ar_pci->ce_lock); 412 lockdep_assert_held(&ar_pci->ce_lock);
413 413
414 if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0) 414 if ((pipe->id != 5) &&
415 CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
415 return -ENOSPC; 416 return -ENOSPC;
416 417
417 desc->addr = __cpu_to_le32(paddr); 418 desc->addr = __cpu_to_le32(paddr);
@@ -425,6 +426,19 @@ int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
425 return 0; 426 return 0;
426} 427}
427 428
429void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
430{
431 struct ath10k *ar = pipe->ar;
432 struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
433 unsigned int nentries_mask = dest_ring->nentries_mask;
434 unsigned int write_index = dest_ring->write_index;
435 u32 ctrl_addr = pipe->ctrl_addr;
436
437 write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
438 ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
439 dest_ring->write_index = write_index;
440}
441
428int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr) 442int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
429{ 443{
430 struct ath10k *ar = pipe->ar; 444 struct ath10k *ar = pipe->ar;
@@ -478,8 +492,11 @@ int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
478 *per_transfer_contextp = 492 *per_transfer_contextp =
479 dest_ring->per_transfer_context[sw_index]; 493 dest_ring->per_transfer_context[sw_index];
480 494
481 /* sanity */ 495 /* Copy engine 5 (HTT Rx) will reuse the same transfer context.
482 dest_ring->per_transfer_context[sw_index] = NULL; 496 * So update transfer context all CEs except CE5.
497 */
498 if (ce_state->id != 5)
499 dest_ring->per_transfer_context[sw_index] = NULL;
483 500
484 /* Update sw_index */ 501 /* Update sw_index */
485 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index); 502 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h
index 68717e5b9d89..25cafcfd6b12 100644
--- a/drivers/net/wireless/ath/ath10k/ce.h
+++ b/drivers/net/wireless/ath/ath10k/ce.h
@@ -166,6 +166,7 @@ int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe);
166int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe); 166int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe);
167int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr); 167int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr);
168int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr); 168int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr);
169void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries);
169 170
170/* recv flags */ 171/* recv flags */
171/* Data is byte-swapped */ 172/* Data is byte-swapped */
@@ -410,6 +411,8 @@ static inline u32 ath10k_ce_base_address(struct ath10k *ar, unsigned int ce_id)
410 (((int)(toidx)-(int)(fromidx)) & (nentries_mask)) 411 (((int)(toidx)-(int)(fromidx)) & (nentries_mask))
411 412
412#define CE_RING_IDX_INCR(nentries_mask, idx) (((idx) + 1) & (nentries_mask)) 413#define CE_RING_IDX_INCR(nentries_mask, idx) (((idx) + 1) & (nentries_mask))
414#define CE_RING_IDX_ADD(nentries_mask, idx, num) \
415 (((idx) + (num)) & (nentries_mask))
413 416
414#define CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_LSB \ 417#define CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_LSB \
415 ar->regs->ce_wrap_intr_sum_host_msi_lsb 418 ar->regs->ce_wrap_intr_sum_host_msi_lsb
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 290a61afde1a..0b305efe6c94 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -809,7 +809,8 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
809 spin_lock_bh(&ar_pci->ce_lock); 809 spin_lock_bh(&ar_pci->ce_lock);
810 num = __ath10k_ce_rx_num_free_bufs(ce_pipe); 810 num = __ath10k_ce_rx_num_free_bufs(ce_pipe);
811 spin_unlock_bh(&ar_pci->ce_lock); 811 spin_unlock_bh(&ar_pci->ce_lock);
812 while (num--) { 812
813 while (num >= 0) {
813 ret = __ath10k_pci_rx_post_buf(pipe); 814 ret = __ath10k_pci_rx_post_buf(pipe);
814 if (ret) { 815 if (ret) {
815 if (ret == -ENOSPC) 816 if (ret == -ENOSPC)
@@ -819,6 +820,7 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
819 ATH10K_PCI_RX_POST_RETRY_MS); 820 ATH10K_PCI_RX_POST_RETRY_MS);
820 break; 821 break;
821 } 822 }
823 num--;
822 } 824 }
823} 825}
824 826
@@ -1212,6 +1214,63 @@ static void ath10k_pci_process_rx_cb(struct ath10k_ce_pipe *ce_state,
1212 ath10k_pci_rx_post_pipe(pipe_info); 1214 ath10k_pci_rx_post_pipe(pipe_info);
1213} 1215}
1214 1216
1217static void ath10k_pci_process_htt_rx_cb(struct ath10k_ce_pipe *ce_state,
1218 void (*callback)(struct ath10k *ar,
1219 struct sk_buff *skb))
1220{
1221 struct ath10k *ar = ce_state->ar;
1222 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1223 struct ath10k_pci_pipe *pipe_info = &ar_pci->pipe_info[ce_state->id];
1224 struct ath10k_ce_pipe *ce_pipe = pipe_info->ce_hdl;
1225 struct sk_buff *skb;
1226 struct sk_buff_head list;
1227 void *transfer_context;
1228 unsigned int nbytes, max_nbytes, nentries;
1229 int orig_len;
1230
1231 /* No need to aquire ce_lock for CE5, since this is the only place CE5
1232 * is processed other than init and deinit. Before releasing CE5
1233 * buffers, interrupts are disabled. Thus CE5 access is serialized.
1234 */
1235 __skb_queue_head_init(&list);
1236 while (ath10k_ce_completed_recv_next_nolock(ce_state, &transfer_context,
1237 &nbytes) == 0) {
1238 skb = transfer_context;
1239 max_nbytes = skb->len + skb_tailroom(skb);
1240
1241 if (unlikely(max_nbytes < nbytes)) {
1242 ath10k_warn(ar, "rxed more than expected (nbytes %d, max %d)",
1243 nbytes, max_nbytes);
1244 continue;
1245 }
1246
1247 dma_sync_single_for_cpu(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
1248 max_nbytes, DMA_FROM_DEVICE);
1249 skb_put(skb, nbytes);
1250 __skb_queue_tail(&list, skb);
1251 }
1252
1253 nentries = skb_queue_len(&list);
1254 while ((skb = __skb_dequeue(&list))) {
1255 ath10k_dbg(ar, ATH10K_DBG_PCI, "pci rx ce pipe %d len %d\n",
1256 ce_state->id, skb->len);
1257 ath10k_dbg_dump(ar, ATH10K_DBG_PCI_DUMP, NULL, "pci rx: ",
1258 skb->data, skb->len);
1259
1260 orig_len = skb->len;
1261 callback(ar, skb);
1262 skb_push(skb, orig_len - skb->len);
1263 skb_reset_tail_pointer(skb);
1264 skb_trim(skb, 0);
1265
1266 /*let device gain the buffer again*/
1267 dma_sync_single_for_device(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
1268 skb->len + skb_tailroom(skb),
1269 DMA_FROM_DEVICE);
1270 }
1271 ath10k_ce_rx_update_write_idx(ce_pipe, nentries);
1272}
1273
1215/* Called by lower (CE) layer when data is received from the Target. */ 1274/* Called by lower (CE) layer when data is received from the Target. */
1216static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state) 1275static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
1217{ 1276{
@@ -1268,7 +1327,7 @@ static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state)
1268 */ 1327 */
1269 ath10k_ce_per_engine_service(ce_state->ar, 4); 1328 ath10k_ce_per_engine_service(ce_state->ar, 4);
1270 1329
1271 ath10k_pci_process_rx_cb(ce_state, ath10k_pci_htt_rx_deliver); 1330 ath10k_pci_process_htt_rx_cb(ce_state, ath10k_pci_htt_rx_deliver);
1272} 1331}
1273 1332
1274int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id, 1333int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,