aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/ntb
diff options
context:
space:
mode:
authorJon Mason <jon.mason@intel.com>2013-02-12 11:52:50 -0500
committerJon Mason <jon.mason@intel.com>2013-09-05 14:04:09 -0400
commit282a2feeb9bfb1d1dfbad93df206b74eaf80d564 (patch)
tree9265a6308dd746606dbcb0bac330082f97780be9 /drivers/ntb
parentac477afb0431386575ef453f50fa0052c3f0461b (diff)
NTB: Use DMA Engine to Transmit and Receive
Allocate and use a DMA engine channel to transmit and receive data over NTB. If none is allocated, fall back to using the CPU to transfer data. Signed-off-by: Jon Mason <jon.mason@intel.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Diffstat (limited to 'drivers/ntb')
-rw-r--r--drivers/ntb/ntb_hw.c17
-rw-r--r--drivers/ntb/ntb_hw.h1
-rw-r--r--drivers/ntb/ntb_transport.c324
3 files changed, 295 insertions, 47 deletions
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index ab34795cf125..0345817a8355 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
350} 350}
351 351
352/** 352/**
353 * ntb_get_mw_base() - get addr for the NTB memory window
354 * @ndev: pointer to ntb_device instance
355 * @mw: memory window number
356 *
357 * This function provides the base address of the memory window specified.
358 *
359 * RETURNS: address, or NULL on error.
360 */
361resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw)
362{
363 if (mw >= ntb_max_mw(ndev))
364 return 0;
365
366 return pci_resource_start(ndev->pdev, MW_TO_BAR(mw));
367}
368
369/**
353 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window 370 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
354 * @ndev: pointer to ntb_device instance 371 * @ndev: pointer to ntb_device instance
355 * @mw: memory window number 372 * @mw: memory window number
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
index d838bc13b956..4f42ed18103a 100644
--- a/drivers/ntb/ntb_hw.h
+++ b/drivers/ntb/ntb_hw.h
@@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val);
240int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); 240int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val);
241int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); 241int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val);
242int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); 242int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val);
243resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw);
243void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); 244void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw);
244u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); 245u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw);
245void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); 246void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index f7380e959656..ae8657259ca0 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -47,6 +47,7 @@
47 */ 47 */
48#include <linux/debugfs.h> 48#include <linux/debugfs.h>
49#include <linux/delay.h> 49#include <linux/delay.h>
50#include <linux/dmaengine.h>
50#include <linux/dma-mapping.h> 51#include <linux/dma-mapping.h>
51#include <linux/errno.h> 52#include <linux/errno.h>
52#include <linux/export.h> 53#include <linux/export.h>
@@ -68,6 +69,10 @@ static unsigned char max_num_clients;
68module_param(max_num_clients, byte, 0644); 69module_param(max_num_clients, byte, 0644);
69MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); 70MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients");
70 71
72static unsigned int copy_bytes = 1024;
73module_param(copy_bytes, uint, 0644);
74MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA");
75
71struct ntb_queue_entry { 76struct ntb_queue_entry {
72 /* ntb_queue list reference */ 77 /* ntb_queue list reference */
73 struct list_head entry; 78 struct list_head entry;
@@ -76,6 +81,13 @@ struct ntb_queue_entry {
76 void *buf; 81 void *buf;
77 unsigned int len; 82 unsigned int len;
78 unsigned int flags; 83 unsigned int flags;
84
85 struct ntb_transport_qp *qp;
86 union {
87 struct ntb_payload_header __iomem *tx_hdr;
88 struct ntb_payload_header *rx_hdr;
89 };
90 unsigned int index;
79}; 91};
80 92
81struct ntb_rx_info { 93struct ntb_rx_info {
@@ -86,6 +98,7 @@ struct ntb_transport_qp {
86 struct ntb_transport *transport; 98 struct ntb_transport *transport;
87 struct ntb_device *ndev; 99 struct ntb_device *ndev;
88 void *cb_data; 100 void *cb_data;
101 struct dma_chan *dma_chan;
89 102
90 bool client_ready; 103 bool client_ready;
91 bool qp_link; 104 bool qp_link;
@@ -99,6 +112,7 @@ struct ntb_transport_qp {
99 struct list_head tx_free_q; 112 struct list_head tx_free_q;
100 spinlock_t ntb_tx_free_q_lock; 113 spinlock_t ntb_tx_free_q_lock;
101 void __iomem *tx_mw; 114 void __iomem *tx_mw;
115 dma_addr_t tx_mw_phys;
102 unsigned int tx_index; 116 unsigned int tx_index;
103 unsigned int tx_max_entry; 117 unsigned int tx_max_entry;
104 unsigned int tx_max_frame; 118 unsigned int tx_max_frame;
@@ -114,6 +128,7 @@ struct ntb_transport_qp {
114 unsigned int rx_index; 128 unsigned int rx_index;
115 unsigned int rx_max_entry; 129 unsigned int rx_max_entry;
116 unsigned int rx_max_frame; 130 unsigned int rx_max_frame;
131 dma_cookie_t last_cookie;
117 132
118 void (*event_handler) (void *data, int status); 133 void (*event_handler) (void *data, int status);
119 struct delayed_work link_work; 134 struct delayed_work link_work;
@@ -129,9 +144,14 @@ struct ntb_transport_qp {
129 u64 rx_err_no_buf; 144 u64 rx_err_no_buf;
130 u64 rx_err_oflow; 145 u64 rx_err_oflow;
131 u64 rx_err_ver; 146 u64 rx_err_ver;
147 u64 rx_memcpy;
148 u64 rx_async;
132 u64 tx_bytes; 149 u64 tx_bytes;
133 u64 tx_pkts; 150 u64 tx_pkts;
134 u64 tx_ring_full; 151 u64 tx_ring_full;
152 u64 tx_err_no_buf;
153 u64 tx_memcpy;
154 u64 tx_async;
135}; 155};
136 156
137struct ntb_transport_mw { 157struct ntb_transport_mw {
@@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
381 char *buf; 401 char *buf;
382 ssize_t ret, out_offset, out_count; 402 ssize_t ret, out_offset, out_count;
383 403
384 out_count = 600; 404 out_count = 1000;
385 405
386 buf = kmalloc(out_count, GFP_KERNEL); 406 buf = kmalloc(out_count, GFP_KERNEL);
387 if (!buf) 407 if (!buf)
@@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
396 out_offset += snprintf(buf + out_offset, out_count - out_offset, 416 out_offset += snprintf(buf + out_offset, out_count - out_offset,
397 "rx_pkts - \t%llu\n", qp->rx_pkts); 417 "rx_pkts - \t%llu\n", qp->rx_pkts);
398 out_offset += snprintf(buf + out_offset, out_count - out_offset, 418 out_offset += snprintf(buf + out_offset, out_count - out_offset,
419 "rx_memcpy - \t%llu\n", qp->rx_memcpy);
420 out_offset += snprintf(buf + out_offset, out_count - out_offset,
421 "rx_async - \t%llu\n", qp->rx_async);
422 out_offset += snprintf(buf + out_offset, out_count - out_offset,
399 "rx_ring_empty - %llu\n", qp->rx_ring_empty); 423 "rx_ring_empty - %llu\n", qp->rx_ring_empty);
400 out_offset += snprintf(buf + out_offset, out_count - out_offset, 424 out_offset += snprintf(buf + out_offset, out_count - out_offset,
401 "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); 425 "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
@@ -415,8 +439,14 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
415 out_offset += snprintf(buf + out_offset, out_count - out_offset, 439 out_offset += snprintf(buf + out_offset, out_count - out_offset,
416 "tx_pkts - \t%llu\n", qp->tx_pkts); 440 "tx_pkts - \t%llu\n", qp->tx_pkts);
417 out_offset += snprintf(buf + out_offset, out_count - out_offset, 441 out_offset += snprintf(buf + out_offset, out_count - out_offset,
442 "tx_memcpy - \t%llu\n", qp->tx_memcpy);
443 out_offset += snprintf(buf + out_offset, out_count - out_offset,
444 "tx_async - \t%llu\n", qp->tx_async);
445 out_offset += snprintf(buf + out_offset, out_count - out_offset,
418 "tx_ring_full - \t%llu\n", qp->tx_ring_full); 446 "tx_ring_full - \t%llu\n", qp->tx_ring_full);
419 out_offset += snprintf(buf + out_offset, out_count - out_offset, 447 out_offset += snprintf(buf + out_offset, out_count - out_offset,
448 "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
449 out_offset += snprintf(buf + out_offset, out_count - out_offset,
420 "tx_mw - \t%p\n", qp->tx_mw); 450 "tx_mw - \t%p\n", qp->tx_mw);
421 out_offset += snprintf(buf + out_offset, out_count - out_offset, 451 out_offset += snprintf(buf + out_offset, out_count - out_offset,
422 "tx_index - \t%u\n", qp->tx_index); 452 "tx_index - \t%u\n", qp->tx_index);
@@ -488,11 +518,11 @@ static void ntb_transport_setup_qp_mw(struct ntb_transport *nt,
488 num_qps_mw = nt->max_qps / mw_max; 518 num_qps_mw = nt->max_qps / mw_max;
489 519
490 rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; 520 rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw;
491 qp->remote_rx_info = nt->mw[mw_num].virt_addr + 521 qp->rx_buff = nt->mw[mw_num].virt_addr + qp_num / mw_max * rx_size;
492 (qp_num / mw_max * rx_size);
493 rx_size -= sizeof(struct ntb_rx_info); 522 rx_size -= sizeof(struct ntb_rx_info);
494 523
495 qp->rx_buff = qp->remote_rx_info + 1; 524 qp->remote_rx_info = qp->rx_buff + rx_size;
525
496 /* Due to housekeeping, there must be atleast 2 buffs */ 526 /* Due to housekeeping, there must be atleast 2 buffs */
497 qp->rx_max_frame = min(transport_mtu, rx_size / 2); 527 qp->rx_max_frame = min(transport_mtu, rx_size / 2);
498 qp->rx_max_entry = rx_size / qp->rx_max_frame; 528 qp->rx_max_entry = rx_size / qp->rx_max_frame;
@@ -796,12 +826,13 @@ static void ntb_qp_link_work(struct work_struct *work)
796 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); 826 msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
797} 827}
798 828
799static void ntb_transport_init_queue(struct ntb_transport *nt, 829static int ntb_transport_init_queue(struct ntb_transport *nt,
800 unsigned int qp_num) 830 unsigned int qp_num)
801{ 831{
802 struct ntb_transport_qp *qp; 832 struct ntb_transport_qp *qp;
803 unsigned int num_qps_mw, tx_size; 833 unsigned int num_qps_mw, tx_size;
804 u8 mw_num, mw_max; 834 u8 mw_num, mw_max;
835 u64 qp_offset;
805 836
806 mw_max = ntb_max_mw(nt->ndev); 837 mw_max = ntb_max_mw(nt->ndev);
807 mw_num = QP_TO_MW(nt->ndev, qp_num); 838 mw_num = QP_TO_MW(nt->ndev, qp_num);
@@ -820,11 +851,18 @@ static void ntb_transport_init_queue(struct ntb_transport *nt,
820 num_qps_mw = nt->max_qps / mw_max; 851 num_qps_mw = nt->max_qps / mw_max;
821 852
822 tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; 853 tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw;
823 qp->rx_info = ntb_get_mw_vbase(nt->ndev, mw_num) + 854 qp_offset = qp_num / mw_max * tx_size;
824 (qp_num / mw_max * tx_size); 855 qp->tx_mw = ntb_get_mw_vbase(nt->ndev, mw_num) + qp_offset;
856 if (!qp->tx_mw)
857 return -EINVAL;
858
859 qp->tx_mw_phys = ntb_get_mw_base(qp->ndev, mw_num) + qp_offset;
860 if (!qp->tx_mw_phys)
861 return -EINVAL;
862
825 tx_size -= sizeof(struct ntb_rx_info); 863 tx_size -= sizeof(struct ntb_rx_info);
864 qp->rx_info = qp->tx_mw + tx_size;
826 865
827 qp->tx_mw = qp->rx_info + 1;
828 /* Due to housekeeping, there must be atleast 2 buffs */ 866 /* Due to housekeeping, there must be atleast 2 buffs */
829 qp->tx_max_frame = min(transport_mtu, tx_size / 2); 867 qp->tx_max_frame = min(transport_mtu, tx_size / 2);
830 qp->tx_max_entry = tx_size / qp->tx_max_frame; 868 qp->tx_max_entry = tx_size / qp->tx_max_frame;
@@ -851,6 +889,8 @@ static void ntb_transport_init_queue(struct ntb_transport *nt,
851 INIT_LIST_HEAD(&qp->rx_pend_q); 889 INIT_LIST_HEAD(&qp->rx_pend_q);
852 INIT_LIST_HEAD(&qp->rx_free_q); 890 INIT_LIST_HEAD(&qp->rx_free_q);
853 INIT_LIST_HEAD(&qp->tx_free_q); 891 INIT_LIST_HEAD(&qp->tx_free_q);
892
893 return 0;
854} 894}
855 895
856int ntb_transport_init(struct pci_dev *pdev) 896int ntb_transport_init(struct pci_dev *pdev)
@@ -889,8 +929,11 @@ int ntb_transport_init(struct pci_dev *pdev)
889 929
890 nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; 930 nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1;
891 931
892 for (i = 0; i < nt->max_qps; i++) 932 for (i = 0; i < nt->max_qps; i++) {
893 ntb_transport_init_queue(nt, i); 933 rc = ntb_transport_init_queue(nt, i);
934 if (rc)
935 goto err3;
936 }
894 937
895 INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); 938 INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
896 INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup); 939 INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup);
@@ -956,13 +999,19 @@ void ntb_transport_free(void *transport)
956 kfree(nt); 999 kfree(nt);
957} 1000}
958 1001
959static void ntb_rx_copy_task(struct ntb_transport_qp *qp, 1002static void ntb_rx_copy_callback(void *data)
960 struct ntb_queue_entry *entry, void *offset)
961{ 1003{
1004 struct ntb_queue_entry *entry = data;
1005 struct ntb_transport_qp *qp = entry->qp;
962 void *cb_data = entry->cb_data; 1006 void *cb_data = entry->cb_data;
963 unsigned int len = entry->len; 1007 unsigned int len = entry->len;
1008 struct ntb_payload_header *hdr = entry->rx_hdr;
1009
1010 /* Ensure that the data is fully copied out before clearing the flag */
1011 wmb();
1012 hdr->flags = 0;
964 1013
965 memcpy(entry->buf, offset, entry->len); 1014 iowrite32(entry->index, &qp->rx_info->entry);
966 1015
967 ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); 1016 ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q);
968 1017
@@ -970,6 +1019,86 @@ static void ntb_rx_copy_task(struct ntb_transport_qp *qp,
970 qp->rx_handler(qp, qp->cb_data, cb_data, len); 1019 qp->rx_handler(qp, qp->cb_data, cb_data, len);
971} 1020}
972 1021
1022static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset)
1023{
1024 void *buf = entry->buf;
1025 size_t len = entry->len;
1026
1027 memcpy(buf, offset, len);
1028
1029 ntb_rx_copy_callback(entry);
1030}
1031
1032static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
1033 size_t len)
1034{
1035 struct dma_async_tx_descriptor *txd;
1036 struct ntb_transport_qp *qp = entry->qp;
1037 struct dma_chan *chan = qp->dma_chan;
1038 struct dma_device *device;
1039 size_t pay_off, buff_off;
1040 dma_addr_t src, dest;
1041 dma_cookie_t cookie;
1042 void *buf = entry->buf;
1043 unsigned long flags;
1044
1045 entry->len = len;
1046
1047 if (!chan)
1048 goto err;
1049
1050 if (len < copy_bytes)
1051 goto err1;
1052
1053 device = chan->device;
1054 pay_off = (size_t) offset & ~PAGE_MASK;
1055 buff_off = (size_t) buf & ~PAGE_MASK;
1056
1057 if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
1058 goto err1;
1059
1060 dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
1061 if (dma_mapping_error(device->dev, dest))
1062 goto err1;
1063
1064 src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
1065 if (dma_mapping_error(device->dev, src))
1066 goto err2;
1067
1068 flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
1069 DMA_PREP_INTERRUPT;
1070 txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
1071 if (!txd)
1072 goto err3;
1073
1074 txd->callback = ntb_rx_copy_callback;
1075 txd->callback_param = entry;
1076
1077 cookie = dmaengine_submit(txd);
1078 if (dma_submit_error(cookie))
1079 goto err3;
1080
1081 qp->last_cookie = cookie;
1082
1083 qp->rx_async++;
1084
1085 return;
1086
1087err3:
1088 dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
1089err2:
1090 dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
1091err1:
1092 /* If the callbacks come out of order, the writing of the index to the
1093 * last completed will be out of order. This may result in the
1094 * receive stalling forever.
1095 */
1096 dma_sync_wait(chan, qp->last_cookie);
1097err:
1098 ntb_memcpy_rx(entry, offset);
1099 qp->rx_memcpy++;
1100}
1101
973static int ntb_process_rxc(struct ntb_transport_qp *qp) 1102static int ntb_process_rxc(struct ntb_transport_qp *qp)
974{ 1103{
975 struct ntb_payload_header *hdr; 1104 struct ntb_payload_header *hdr;
@@ -1008,41 +1137,45 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp)
1008 if (hdr->flags & LINK_DOWN_FLAG) { 1137 if (hdr->flags & LINK_DOWN_FLAG) {
1009 ntb_qp_link_down(qp); 1138 ntb_qp_link_down(qp);
1010 1139
1011 ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, 1140 goto err;
1012 &qp->rx_pend_q);
1013 goto out;
1014 } 1141 }
1015 1142
1016 dev_dbg(&ntb_query_pdev(qp->ndev)->dev, 1143 dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
1017 "rx offset %u, ver %u - %d payload received, buf size %d\n", 1144 "rx offset %u, ver %u - %d payload received, buf size %d\n",
1018 qp->rx_index, hdr->ver, hdr->len, entry->len); 1145 qp->rx_index, hdr->ver, hdr->len, entry->len);
1019 1146
1020 if (hdr->len <= entry->len) { 1147 qp->rx_bytes += hdr->len;
1021 entry->len = hdr->len; 1148 qp->rx_pkts++;
1022 ntb_rx_copy_task(qp, entry, offset);
1023 } else {
1024 ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
1025 &qp->rx_pend_q);
1026 1149
1150 if (hdr->len > entry->len) {
1027 qp->rx_err_oflow++; 1151 qp->rx_err_oflow++;
1028 dev_dbg(&ntb_query_pdev(qp->ndev)->dev, 1152 dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
1029 "RX overflow! Wanted %d got %d\n", 1153 "RX overflow! Wanted %d got %d\n",
1030 hdr->len, entry->len); 1154 hdr->len, entry->len);
1155
1156 goto err;
1031 } 1157 }
1032 1158
1033 qp->rx_bytes += hdr->len; 1159 entry->index = qp->rx_index;
1034 qp->rx_pkts++; 1160 entry->rx_hdr = hdr;
1161
1162 ntb_async_rx(entry, offset, hdr->len);
1035 1163
1036out: 1164out:
1165 qp->rx_index++;
1166 qp->rx_index %= qp->rx_max_entry;
1167
1168 return 0;
1169
1170err:
1171 ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
1172 &qp->rx_pend_q);
1037 /* Ensure that the data is fully copied out before clearing the flag */ 1173 /* Ensure that the data is fully copied out before clearing the flag */
1038 wmb(); 1174 wmb();
1039 hdr->flags = 0; 1175 hdr->flags = 0;
1040 iowrite32(qp->rx_index, &qp->rx_info->entry); 1176 iowrite32(qp->rx_index, &qp->rx_info->entry);
1041 1177
1042 qp->rx_index++; 1178 goto out;
1043 qp->rx_index %= qp->rx_max_entry;
1044
1045 return 0;
1046} 1179}
1047 1180
1048static void ntb_transport_rx(unsigned long data) 1181static void ntb_transport_rx(unsigned long data)
@@ -1058,6 +1191,9 @@ static void ntb_transport_rx(unsigned long data)
1058 if (rc) 1191 if (rc)
1059 break; 1192 break;
1060 } 1193 }
1194
1195 if (qp->dma_chan)
1196 dma_async_issue_pending(qp->dma_chan);
1061} 1197}
1062 1198
1063static void ntb_transport_rxc_db(void *data, int db_num) 1199static void ntb_transport_rxc_db(void *data, int db_num)
@@ -1070,19 +1206,13 @@ static void ntb_transport_rxc_db(void *data, int db_num)
1070 tasklet_schedule(&qp->rx_work); 1206 tasklet_schedule(&qp->rx_work);
1071} 1207}
1072 1208
1073static void ntb_tx_copy_task(struct ntb_transport_qp *qp, 1209static void ntb_tx_copy_callback(void *data)
1074 struct ntb_queue_entry *entry,
1075 void __iomem *offset)
1076{ 1210{
1077 struct ntb_payload_header __iomem *hdr; 1211 struct ntb_queue_entry *entry = data;
1078 1212 struct ntb_transport_qp *qp = entry->qp;
1079 memcpy_toio(offset, entry->buf, entry->len); 1213 struct ntb_payload_header __iomem *hdr = entry->tx_hdr;
1080 1214
1081 hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); 1215 /* Ensure that the data is fully copied out before setting the flags */
1082 iowrite32(entry->len, &hdr->len);
1083 iowrite32((u32) qp->tx_pkts, &hdr->ver);
1084
1085 /* Ensure that the data is fully copied out before setting the flag */
1086 wmb(); 1216 wmb();
1087 iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); 1217 iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
1088 1218
@@ -1103,15 +1233,81 @@ static void ntb_tx_copy_task(struct ntb_transport_qp *qp,
1103 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); 1233 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
1104} 1234}
1105 1235
1106static int ntb_process_tx(struct ntb_transport_qp *qp, 1236static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
1107 struct ntb_queue_entry *entry)
1108{ 1237{
1238 memcpy_toio(offset, entry->buf, entry->len);
1239
1240 ntb_tx_copy_callback(entry);
1241}
1242
1243static void ntb_async_tx(struct ntb_transport_qp *qp,
1244 struct ntb_queue_entry *entry)
1245{
1246 struct ntb_payload_header __iomem *hdr;
1247 struct dma_async_tx_descriptor *txd;
1248 struct dma_chan *chan = qp->dma_chan;
1249 struct dma_device *device;
1250 size_t dest_off, buff_off;
1251 dma_addr_t src, dest;
1252 dma_cookie_t cookie;
1109 void __iomem *offset; 1253 void __iomem *offset;
1254 size_t len = entry->len;
1255 void *buf = entry->buf;
1256 unsigned long flags;
1110 1257
1111 offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; 1258 offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
1259 hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
1260 entry->tx_hdr = hdr;
1112 1261
1113 dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - offset %p, tx %u, entry len %d flags %x buff %p\n", 1262 iowrite32(entry->len, &hdr->len);
1114 qp->tx_pkts, offset, qp->tx_index, entry->len, entry->flags, 1263 iowrite32((u32) qp->tx_pkts, &hdr->ver);
1264
1265 if (!chan)
1266 goto err;
1267
1268 if (len < copy_bytes)
1269 goto err;
1270
1271 device = chan->device;
1272 dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index;
1273 buff_off = (size_t) buf & ~PAGE_MASK;
1274 dest_off = (size_t) dest & ~PAGE_MASK;
1275
1276 if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
1277 goto err;
1278
1279 src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
1280 if (dma_mapping_error(device->dev, src))
1281 goto err;
1282
1283 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
1284 txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
1285 if (!txd)
1286 goto err1;
1287
1288 txd->callback = ntb_tx_copy_callback;
1289 txd->callback_param = entry;
1290
1291 cookie = dmaengine_submit(txd);
1292 if (dma_submit_error(cookie))
1293 goto err1;
1294
1295 dma_async_issue_pending(chan);
1296 qp->tx_async++;
1297
1298 return;
1299err1:
1300 dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
1301err:
1302 ntb_memcpy_tx(entry, offset);
1303 qp->tx_memcpy++;
1304}
1305
1306static int ntb_process_tx(struct ntb_transport_qp *qp,
1307 struct ntb_queue_entry *entry)
1308{
1309 dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - tx %u, entry len %d flags %x buff %p\n",
1310 qp->tx_pkts, qp->tx_index, entry->len, entry->flags,
1115 entry->buf); 1311 entry->buf);
1116 if (qp->tx_index == qp->remote_rx_info->entry) { 1312 if (qp->tx_index == qp->remote_rx_info->entry) {
1117 qp->tx_ring_full++; 1313 qp->tx_ring_full++;
@@ -1127,7 +1323,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
1127 return 0; 1323 return 0;
1128 } 1324 }
1129 1325
1130 ntb_tx_copy_task(qp, entry, offset); 1326 ntb_async_tx(qp, entry);
1131 1327
1132 qp->tx_index++; 1328 qp->tx_index++;
1133 qp->tx_index %= qp->tx_max_entry; 1329 qp->tx_index %= qp->tx_max_entry;
@@ -1213,11 +1409,18 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev,
1213 qp->tx_handler = handlers->tx_handler; 1409 qp->tx_handler = handlers->tx_handler;
1214 qp->event_handler = handlers->event_handler; 1410 qp->event_handler = handlers->event_handler;
1215 1411
1412 qp->dma_chan = dma_find_channel(DMA_MEMCPY);
1413 if (!qp->dma_chan)
1414 dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n");
1415 else
1416 dmaengine_get();
1417
1216 for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { 1418 for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
1217 entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); 1419 entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC);
1218 if (!entry) 1420 if (!entry)
1219 goto err1; 1421 goto err1;
1220 1422
1423 entry->qp = qp;
1221 ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, 1424 ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry,
1222 &qp->rx_free_q); 1425 &qp->rx_free_q);
1223 } 1426 }
@@ -1227,6 +1430,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev,
1227 if (!entry) 1430 if (!entry)
1228 goto err2; 1431 goto err2;
1229 1432
1433 entry->qp = qp;
1230 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, 1434 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
1231 &qp->tx_free_q); 1435 &qp->tx_free_q);
1232 } 1436 }
@@ -1272,11 +1476,26 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
1272 1476
1273 pdev = ntb_query_pdev(qp->ndev); 1477 pdev = ntb_query_pdev(qp->ndev);
1274 1478
1275 cancel_delayed_work_sync(&qp->link_work); 1479 if (qp->dma_chan) {
1480 struct dma_chan *chan = qp->dma_chan;
1481 /* Putting the dma_chan to NULL will force any new traffic to be
1482 * processed by the CPU instead of the DAM engine
1483 */
1484 qp->dma_chan = NULL;
1485
1486 /* Try to be nice and wait for any queued DMA engine
1487 * transactions to process before smashing it with a rock
1488 */
1489 dma_sync_wait(chan, qp->last_cookie);
1490 dmaengine_terminate_all(chan);
1491 dmaengine_put();
1492 }
1276 1493
1277 ntb_unregister_db_callback(qp->ndev, qp->qp_num); 1494 ntb_unregister_db_callback(qp->ndev, qp->qp_num);
1278 tasklet_disable(&qp->rx_work); 1495 tasklet_disable(&qp->rx_work);
1279 1496
1497 cancel_delayed_work_sync(&qp->link_work);
1498
1280 while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) 1499 while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q)))
1281 kfree(entry); 1500 kfree(entry);
1282 1501
@@ -1382,8 +1601,10 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
1382 return -EINVAL; 1601 return -EINVAL;
1383 1602
1384 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); 1603 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
1385 if (!entry) 1604 if (!entry) {
1605 qp->tx_err_no_buf++;
1386 return -ENOMEM; 1606 return -ENOMEM;
1607 }
1387 1608
1388 entry->cb_data = cb; 1609 entry->cb_data = cb;
1389 entry->buf = data; 1610 entry->buf = data;
@@ -1499,9 +1720,18 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num);
1499 */ 1720 */
1500unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) 1721unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp)
1501{ 1722{
1723 unsigned int max;
1724
1502 if (!qp) 1725 if (!qp)
1503 return 0; 1726 return 0;
1504 1727
1505 return qp->tx_max_frame - sizeof(struct ntb_payload_header); 1728 if (!qp->dma_chan)
1729 return qp->tx_max_frame - sizeof(struct ntb_payload_header);
1730
1731 /* If DMA engine usage is possible, try to find the max size for that */
1732 max = qp->tx_max_frame - sizeof(struct ntb_payload_header);
1733 max -= max % (1 << qp->dma_chan->device->copy_align);
1734
1735 return max;
1506} 1736}
1507EXPORT_SYMBOL_GPL(ntb_transport_max_size); 1737EXPORT_SYMBOL_GPL(ntb_transport_max_size);