diff options
author | Jon Mason <jon.mason@intel.com> | 2013-02-12 11:52:50 -0500 |
---|---|---|
committer | Jon Mason <jon.mason@intel.com> | 2013-09-05 14:04:09 -0400 |
commit | 282a2feeb9bfb1d1dfbad93df206b74eaf80d564 (patch) | |
tree | 9265a6308dd746606dbcb0bac330082f97780be9 /drivers/ntb/ntb_transport.c | |
parent | ac477afb0431386575ef453f50fa0052c3f0461b (diff) |
NTB: Use DMA Engine to Transmit and Receive
Allocate and use a DMA engine channel to transmit and receive data over
NTB. If none is allocated, fall back to using the CPU to transfer data.
Signed-off-by: Jon Mason <jon.mason@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Diffstat (limited to 'drivers/ntb/ntb_transport.c')
-rw-r--r-- | drivers/ntb/ntb_transport.c | 324 |
1 files changed, 277 insertions, 47 deletions
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e959656..ae8657259ca0 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c | |||
@@ -47,6 +47,7 @@ | |||
47 | */ | 47 | */ |
48 | #include <linux/debugfs.h> | 48 | #include <linux/debugfs.h> |
49 | #include <linux/delay.h> | 49 | #include <linux/delay.h> |
50 | #include <linux/dmaengine.h> | ||
50 | #include <linux/dma-mapping.h> | 51 | #include <linux/dma-mapping.h> |
51 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
52 | #include <linux/export.h> | 53 | #include <linux/export.h> |
@@ -68,6 +69,10 @@ static unsigned char max_num_clients; | |||
68 | module_param(max_num_clients, byte, 0644); | 69 | module_param(max_num_clients, byte, 0644); |
69 | MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); | 70 | MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); |
70 | 71 | ||
72 | static unsigned int copy_bytes = 1024; | ||
73 | module_param(copy_bytes, uint, 0644); | ||
74 | MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); | ||
75 | |||
71 | struct ntb_queue_entry { | 76 | struct ntb_queue_entry { |
72 | /* ntb_queue list reference */ | 77 | /* ntb_queue list reference */ |
73 | struct list_head entry; | 78 | struct list_head entry; |
@@ -76,6 +81,13 @@ struct ntb_queue_entry { | |||
76 | void *buf; | 81 | void *buf; |
77 | unsigned int len; | 82 | unsigned int len; |
78 | unsigned int flags; | 83 | unsigned int flags; |
84 | |||
85 | struct ntb_transport_qp *qp; | ||
86 | union { | ||
87 | struct ntb_payload_header __iomem *tx_hdr; | ||
88 | struct ntb_payload_header *rx_hdr; | ||
89 | }; | ||
90 | unsigned int index; | ||
79 | }; | 91 | }; |
80 | 92 | ||
81 | struct ntb_rx_info { | 93 | struct ntb_rx_info { |
@@ -86,6 +98,7 @@ struct ntb_transport_qp { | |||
86 | struct ntb_transport *transport; | 98 | struct ntb_transport *transport; |
87 | struct ntb_device *ndev; | 99 | struct ntb_device *ndev; |
88 | void *cb_data; | 100 | void *cb_data; |
101 | struct dma_chan *dma_chan; | ||
89 | 102 | ||
90 | bool client_ready; | 103 | bool client_ready; |
91 | bool qp_link; | 104 | bool qp_link; |
@@ -99,6 +112,7 @@ struct ntb_transport_qp { | |||
99 | struct list_head tx_free_q; | 112 | struct list_head tx_free_q; |
100 | spinlock_t ntb_tx_free_q_lock; | 113 | spinlock_t ntb_tx_free_q_lock; |
101 | void __iomem *tx_mw; | 114 | void __iomem *tx_mw; |
115 | dma_addr_t tx_mw_phys; | ||
102 | unsigned int tx_index; | 116 | unsigned int tx_index; |
103 | unsigned int tx_max_entry; | 117 | unsigned int tx_max_entry; |
104 | unsigned int tx_max_frame; | 118 | unsigned int tx_max_frame; |
@@ -114,6 +128,7 @@ struct ntb_transport_qp { | |||
114 | unsigned int rx_index; | 128 | unsigned int rx_index; |
115 | unsigned int rx_max_entry; | 129 | unsigned int rx_max_entry; |
116 | unsigned int rx_max_frame; | 130 | unsigned int rx_max_frame; |
131 | dma_cookie_t last_cookie; | ||
117 | 132 | ||
118 | void (*event_handler) (void *data, int status); | 133 | void (*event_handler) (void *data, int status); |
119 | struct delayed_work link_work; | 134 | struct delayed_work link_work; |
@@ -129,9 +144,14 @@ struct ntb_transport_qp { | |||
129 | u64 rx_err_no_buf; | 144 | u64 rx_err_no_buf; |
130 | u64 rx_err_oflow; | 145 | u64 rx_err_oflow; |
131 | u64 rx_err_ver; | 146 | u64 rx_err_ver; |
147 | u64 rx_memcpy; | ||
148 | u64 rx_async; | ||
132 | u64 tx_bytes; | 149 | u64 tx_bytes; |
133 | u64 tx_pkts; | 150 | u64 tx_pkts; |
134 | u64 tx_ring_full; | 151 | u64 tx_ring_full; |
152 | u64 tx_err_no_buf; | ||
153 | u64 tx_memcpy; | ||
154 | u64 tx_async; | ||
135 | }; | 155 | }; |
136 | 156 | ||
137 | struct ntb_transport_mw { | 157 | struct ntb_transport_mw { |
@@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
381 | char *buf; | 401 | char *buf; |
382 | ssize_t ret, out_offset, out_count; | 402 | ssize_t ret, out_offset, out_count; |
383 | 403 | ||
384 | out_count = 600; | 404 | out_count = 1000; |
385 | 405 | ||
386 | buf = kmalloc(out_count, GFP_KERNEL); | 406 | buf = kmalloc(out_count, GFP_KERNEL); |
387 | if (!buf) | 407 | if (!buf) |
@@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
396 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 416 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
397 | "rx_pkts - \t%llu\n", qp->rx_pkts); | 417 | "rx_pkts - \t%llu\n", qp->rx_pkts); |
398 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 418 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
419 | "rx_memcpy - \t%llu\n", qp->rx_memcpy); | ||
420 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
421 | "rx_async - \t%llu\n", qp->rx_async); | ||
422 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
399 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); | 423 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); |
400 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 424 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
401 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); | 425 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); |
@@ -415,8 +439,14 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
415 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 439 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
416 | "tx_pkts - \t%llu\n", qp->tx_pkts); | 440 | "tx_pkts - \t%llu\n", qp->tx_pkts); |
417 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 441 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
442 | "tx_memcpy - \t%llu\n", qp->tx_memcpy); | ||
443 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
444 | "tx_async - \t%llu\n", qp->tx_async); | ||
445 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
418 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); | 446 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); |
419 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 447 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
448 | "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); | ||
449 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
420 | "tx_mw - \t%p\n", qp->tx_mw); | 450 | "tx_mw - \t%p\n", qp->tx_mw); |
421 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 451 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
422 | "tx_index - \t%u\n", qp->tx_index); | 452 | "tx_index - \t%u\n", qp->tx_index); |
@@ -488,11 +518,11 @@ static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, | |||
488 | num_qps_mw = nt->max_qps / mw_max; | 518 | num_qps_mw = nt->max_qps / mw_max; |
489 | 519 | ||
490 | rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; | 520 | rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; |
491 | qp->remote_rx_info = nt->mw[mw_num].virt_addr + | 521 | qp->rx_buff = nt->mw[mw_num].virt_addr + qp_num / mw_max * rx_size; |
492 | (qp_num / mw_max * rx_size); | ||
493 | rx_size -= sizeof(struct ntb_rx_info); | 522 | rx_size -= sizeof(struct ntb_rx_info); |
494 | 523 | ||
495 | qp->rx_buff = qp->remote_rx_info + 1; | 524 | qp->remote_rx_info = qp->rx_buff + rx_size; |
525 | |||
496 | /* Due to housekeeping, there must be atleast 2 buffs */ | 526 | /* Due to housekeeping, there must be atleast 2 buffs */ |
497 | qp->rx_max_frame = min(transport_mtu, rx_size / 2); | 527 | qp->rx_max_frame = min(transport_mtu, rx_size / 2); |
498 | qp->rx_max_entry = rx_size / qp->rx_max_frame; | 528 | qp->rx_max_entry = rx_size / qp->rx_max_frame; |
@@ -796,12 +826,13 @@ static void ntb_qp_link_work(struct work_struct *work) | |||
796 | msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); | 826 | msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); |
797 | } | 827 | } |
798 | 828 | ||
799 | static void ntb_transport_init_queue(struct ntb_transport *nt, | 829 | static int ntb_transport_init_queue(struct ntb_transport *nt, |
800 | unsigned int qp_num) | 830 | unsigned int qp_num) |
801 | { | 831 | { |
802 | struct ntb_transport_qp *qp; | 832 | struct ntb_transport_qp *qp; |
803 | unsigned int num_qps_mw, tx_size; | 833 | unsigned int num_qps_mw, tx_size; |
804 | u8 mw_num, mw_max; | 834 | u8 mw_num, mw_max; |
835 | u64 qp_offset; | ||
805 | 836 | ||
806 | mw_max = ntb_max_mw(nt->ndev); | 837 | mw_max = ntb_max_mw(nt->ndev); |
807 | mw_num = QP_TO_MW(nt->ndev, qp_num); | 838 | mw_num = QP_TO_MW(nt->ndev, qp_num); |
@@ -820,11 +851,18 @@ static void ntb_transport_init_queue(struct ntb_transport *nt, | |||
820 | num_qps_mw = nt->max_qps / mw_max; | 851 | num_qps_mw = nt->max_qps / mw_max; |
821 | 852 | ||
822 | tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; | 853 | tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; |
823 | qp->rx_info = ntb_get_mw_vbase(nt->ndev, mw_num) + | 854 | qp_offset = qp_num / mw_max * tx_size; |
824 | (qp_num / mw_max * tx_size); | 855 | qp->tx_mw = ntb_get_mw_vbase(nt->ndev, mw_num) + qp_offset; |
856 | if (!qp->tx_mw) | ||
857 | return -EINVAL; | ||
858 | |||
859 | qp->tx_mw_phys = ntb_get_mw_base(qp->ndev, mw_num) + qp_offset; | ||
860 | if (!qp->tx_mw_phys) | ||
861 | return -EINVAL; | ||
862 | |||
825 | tx_size -= sizeof(struct ntb_rx_info); | 863 | tx_size -= sizeof(struct ntb_rx_info); |
864 | qp->rx_info = qp->tx_mw + tx_size; | ||
826 | 865 | ||
827 | qp->tx_mw = qp->rx_info + 1; | ||
828 | /* Due to housekeeping, there must be atleast 2 buffs */ | 866 | /* Due to housekeeping, there must be atleast 2 buffs */ |
829 | qp->tx_max_frame = min(transport_mtu, tx_size / 2); | 867 | qp->tx_max_frame = min(transport_mtu, tx_size / 2); |
830 | qp->tx_max_entry = tx_size / qp->tx_max_frame; | 868 | qp->tx_max_entry = tx_size / qp->tx_max_frame; |
@@ -851,6 +889,8 @@ static void ntb_transport_init_queue(struct ntb_transport *nt, | |||
851 | INIT_LIST_HEAD(&qp->rx_pend_q); | 889 | INIT_LIST_HEAD(&qp->rx_pend_q); |
852 | INIT_LIST_HEAD(&qp->rx_free_q); | 890 | INIT_LIST_HEAD(&qp->rx_free_q); |
853 | INIT_LIST_HEAD(&qp->tx_free_q); | 891 | INIT_LIST_HEAD(&qp->tx_free_q); |
892 | |||
893 | return 0; | ||
854 | } | 894 | } |
855 | 895 | ||
856 | int ntb_transport_init(struct pci_dev *pdev) | 896 | int ntb_transport_init(struct pci_dev *pdev) |
@@ -889,8 +929,11 @@ int ntb_transport_init(struct pci_dev *pdev) | |||
889 | 929 | ||
890 | nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; | 930 | nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; |
891 | 931 | ||
892 | for (i = 0; i < nt->max_qps; i++) | 932 | for (i = 0; i < nt->max_qps; i++) { |
893 | ntb_transport_init_queue(nt, i); | 933 | rc = ntb_transport_init_queue(nt, i); |
934 | if (rc) | ||
935 | goto err3; | ||
936 | } | ||
894 | 937 | ||
895 | INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); | 938 | INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); |
896 | INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup); | 939 | INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup); |
@@ -956,13 +999,19 @@ void ntb_transport_free(void *transport) | |||
956 | kfree(nt); | 999 | kfree(nt); |
957 | } | 1000 | } |
958 | 1001 | ||
959 | static void ntb_rx_copy_task(struct ntb_transport_qp *qp, | 1002 | static void ntb_rx_copy_callback(void *data) |
960 | struct ntb_queue_entry *entry, void *offset) | ||
961 | { | 1003 | { |
1004 | struct ntb_queue_entry *entry = data; | ||
1005 | struct ntb_transport_qp *qp = entry->qp; | ||
962 | void *cb_data = entry->cb_data; | 1006 | void *cb_data = entry->cb_data; |
963 | unsigned int len = entry->len; | 1007 | unsigned int len = entry->len; |
1008 | struct ntb_payload_header *hdr = entry->rx_hdr; | ||
1009 | |||
1010 | /* Ensure that the data is fully copied out before clearing the flag */ | ||
1011 | wmb(); | ||
1012 | hdr->flags = 0; | ||
964 | 1013 | ||
965 | memcpy(entry->buf, offset, entry->len); | 1014 | iowrite32(entry->index, &qp->rx_info->entry); |
966 | 1015 | ||
967 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); | 1016 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); |
968 | 1017 | ||
@@ -970,6 +1019,86 @@ static void ntb_rx_copy_task(struct ntb_transport_qp *qp, | |||
970 | qp->rx_handler(qp, qp->cb_data, cb_data, len); | 1019 | qp->rx_handler(qp, qp->cb_data, cb_data, len); |
971 | } | 1020 | } |
972 | 1021 | ||
1022 | static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) | ||
1023 | { | ||
1024 | void *buf = entry->buf; | ||
1025 | size_t len = entry->len; | ||
1026 | |||
1027 | memcpy(buf, offset, len); | ||
1028 | |||
1029 | ntb_rx_copy_callback(entry); | ||
1030 | } | ||
1031 | |||
1032 | static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, | ||
1033 | size_t len) | ||
1034 | { | ||
1035 | struct dma_async_tx_descriptor *txd; | ||
1036 | struct ntb_transport_qp *qp = entry->qp; | ||
1037 | struct dma_chan *chan = qp->dma_chan; | ||
1038 | struct dma_device *device; | ||
1039 | size_t pay_off, buff_off; | ||
1040 | dma_addr_t src, dest; | ||
1041 | dma_cookie_t cookie; | ||
1042 | void *buf = entry->buf; | ||
1043 | unsigned long flags; | ||
1044 | |||
1045 | entry->len = len; | ||
1046 | |||
1047 | if (!chan) | ||
1048 | goto err; | ||
1049 | |||
1050 | if (len < copy_bytes) | ||
1051 | goto err1; | ||
1052 | |||
1053 | device = chan->device; | ||
1054 | pay_off = (size_t) offset & ~PAGE_MASK; | ||
1055 | buff_off = (size_t) buf & ~PAGE_MASK; | ||
1056 | |||
1057 | if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) | ||
1058 | goto err1; | ||
1059 | |||
1060 | dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE); | ||
1061 | if (dma_mapping_error(device->dev, dest)) | ||
1062 | goto err1; | ||
1063 | |||
1064 | src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE); | ||
1065 | if (dma_mapping_error(device->dev, src)) | ||
1066 | goto err2; | ||
1067 | |||
1068 | flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE | | ||
1069 | DMA_PREP_INTERRUPT; | ||
1070 | txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); | ||
1071 | if (!txd) | ||
1072 | goto err3; | ||
1073 | |||
1074 | txd->callback = ntb_rx_copy_callback; | ||
1075 | txd->callback_param = entry; | ||
1076 | |||
1077 | cookie = dmaengine_submit(txd); | ||
1078 | if (dma_submit_error(cookie)) | ||
1079 | goto err3; | ||
1080 | |||
1081 | qp->last_cookie = cookie; | ||
1082 | |||
1083 | qp->rx_async++; | ||
1084 | |||
1085 | return; | ||
1086 | |||
1087 | err3: | ||
1088 | dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); | ||
1089 | err2: | ||
1090 | dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE); | ||
1091 | err1: | ||
1092 | /* If the callbacks come out of order, the writing of the index to the | ||
1093 | * last completed will be out of order. This may result in the | ||
1094 | * receive stalling forever. | ||
1095 | */ | ||
1096 | dma_sync_wait(chan, qp->last_cookie); | ||
1097 | err: | ||
1098 | ntb_memcpy_rx(entry, offset); | ||
1099 | qp->rx_memcpy++; | ||
1100 | } | ||
1101 | |||
973 | static int ntb_process_rxc(struct ntb_transport_qp *qp) | 1102 | static int ntb_process_rxc(struct ntb_transport_qp *qp) |
974 | { | 1103 | { |
975 | struct ntb_payload_header *hdr; | 1104 | struct ntb_payload_header *hdr; |
@@ -1008,41 +1137,45 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) | |||
1008 | if (hdr->flags & LINK_DOWN_FLAG) { | 1137 | if (hdr->flags & LINK_DOWN_FLAG) { |
1009 | ntb_qp_link_down(qp); | 1138 | ntb_qp_link_down(qp); |
1010 | 1139 | ||
1011 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | 1140 | goto err; |
1012 | &qp->rx_pend_q); | ||
1013 | goto out; | ||
1014 | } | 1141 | } |
1015 | 1142 | ||
1016 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, | 1143 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, |
1017 | "rx offset %u, ver %u - %d payload received, buf size %d\n", | 1144 | "rx offset %u, ver %u - %d payload received, buf size %d\n", |
1018 | qp->rx_index, hdr->ver, hdr->len, entry->len); | 1145 | qp->rx_index, hdr->ver, hdr->len, entry->len); |
1019 | 1146 | ||
1020 | if (hdr->len <= entry->len) { | 1147 | qp->rx_bytes += hdr->len; |
1021 | entry->len = hdr->len; | 1148 | qp->rx_pkts++; |
1022 | ntb_rx_copy_task(qp, entry, offset); | ||
1023 | } else { | ||
1024 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | ||
1025 | &qp->rx_pend_q); | ||
1026 | 1149 | ||
1150 | if (hdr->len > entry->len) { | ||
1027 | qp->rx_err_oflow++; | 1151 | qp->rx_err_oflow++; |
1028 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, | 1152 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, |
1029 | "RX overflow! Wanted %d got %d\n", | 1153 | "RX overflow! Wanted %d got %d\n", |
1030 | hdr->len, entry->len); | 1154 | hdr->len, entry->len); |
1155 | |||
1156 | goto err; | ||
1031 | } | 1157 | } |
1032 | 1158 | ||
1033 | qp->rx_bytes += hdr->len; | 1159 | entry->index = qp->rx_index; |
1034 | qp->rx_pkts++; | 1160 | entry->rx_hdr = hdr; |
1161 | |||
1162 | ntb_async_rx(entry, offset, hdr->len); | ||
1035 | 1163 | ||
1036 | out: | 1164 | out: |
1165 | qp->rx_index++; | ||
1166 | qp->rx_index %= qp->rx_max_entry; | ||
1167 | |||
1168 | return 0; | ||
1169 | |||
1170 | err: | ||
1171 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | ||
1172 | &qp->rx_pend_q); | ||
1037 | /* Ensure that the data is fully copied out before clearing the flag */ | 1173 | /* Ensure that the data is fully copied out before clearing the flag */ |
1038 | wmb(); | 1174 | wmb(); |
1039 | hdr->flags = 0; | 1175 | hdr->flags = 0; |
1040 | iowrite32(qp->rx_index, &qp->rx_info->entry); | 1176 | iowrite32(qp->rx_index, &qp->rx_info->entry); |
1041 | 1177 | ||
1042 | qp->rx_index++; | 1178 | goto out; |
1043 | qp->rx_index %= qp->rx_max_entry; | ||
1044 | |||
1045 | return 0; | ||
1046 | } | 1179 | } |
1047 | 1180 | ||
1048 | static void ntb_transport_rx(unsigned long data) | 1181 | static void ntb_transport_rx(unsigned long data) |
@@ -1058,6 +1191,9 @@ static void ntb_transport_rx(unsigned long data) | |||
1058 | if (rc) | 1191 | if (rc) |
1059 | break; | 1192 | break; |
1060 | } | 1193 | } |
1194 | |||
1195 | if (qp->dma_chan) | ||
1196 | dma_async_issue_pending(qp->dma_chan); | ||
1061 | } | 1197 | } |
1062 | 1198 | ||
1063 | static void ntb_transport_rxc_db(void *data, int db_num) | 1199 | static void ntb_transport_rxc_db(void *data, int db_num) |
@@ -1070,19 +1206,13 @@ static void ntb_transport_rxc_db(void *data, int db_num) | |||
1070 | tasklet_schedule(&qp->rx_work); | 1206 | tasklet_schedule(&qp->rx_work); |
1071 | } | 1207 | } |
1072 | 1208 | ||
1073 | static void ntb_tx_copy_task(struct ntb_transport_qp *qp, | 1209 | static void ntb_tx_copy_callback(void *data) |
1074 | struct ntb_queue_entry *entry, | ||
1075 | void __iomem *offset) | ||
1076 | { | 1210 | { |
1077 | struct ntb_payload_header __iomem *hdr; | 1211 | struct ntb_queue_entry *entry = data; |
1078 | 1212 | struct ntb_transport_qp *qp = entry->qp; | |
1079 | memcpy_toio(offset, entry->buf, entry->len); | 1213 | struct ntb_payload_header __iomem *hdr = entry->tx_hdr; |
1080 | 1214 | ||
1081 | hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); | 1215 | /* Ensure that the data is fully copied out before setting the flags */ |
1082 | iowrite32(entry->len, &hdr->len); | ||
1083 | iowrite32((u32) qp->tx_pkts, &hdr->ver); | ||
1084 | |||
1085 | /* Ensure that the data is fully copied out before setting the flag */ | ||
1086 | wmb(); | 1216 | wmb(); |
1087 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); | 1217 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); |
1088 | 1218 | ||
@@ -1103,15 +1233,81 @@ static void ntb_tx_copy_task(struct ntb_transport_qp *qp, | |||
1103 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); | 1233 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); |
1104 | } | 1234 | } |
1105 | 1235 | ||
1106 | static int ntb_process_tx(struct ntb_transport_qp *qp, | 1236 | static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) |
1107 | struct ntb_queue_entry *entry) | ||
1108 | { | 1237 | { |
1238 | memcpy_toio(offset, entry->buf, entry->len); | ||
1239 | |||
1240 | ntb_tx_copy_callback(entry); | ||
1241 | } | ||
1242 | |||
1243 | static void ntb_async_tx(struct ntb_transport_qp *qp, | ||
1244 | struct ntb_queue_entry *entry) | ||
1245 | { | ||
1246 | struct ntb_payload_header __iomem *hdr; | ||
1247 | struct dma_async_tx_descriptor *txd; | ||
1248 | struct dma_chan *chan = qp->dma_chan; | ||
1249 | struct dma_device *device; | ||
1250 | size_t dest_off, buff_off; | ||
1251 | dma_addr_t src, dest; | ||
1252 | dma_cookie_t cookie; | ||
1109 | void __iomem *offset; | 1253 | void __iomem *offset; |
1254 | size_t len = entry->len; | ||
1255 | void *buf = entry->buf; | ||
1256 | unsigned long flags; | ||
1110 | 1257 | ||
1111 | offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; | 1258 | offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; |
1259 | hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
1260 | entry->tx_hdr = hdr; | ||
1112 | 1261 | ||
1113 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - offset %p, tx %u, entry len %d flags %x buff %p\n", | 1262 | iowrite32(entry->len, &hdr->len); |
1114 | qp->tx_pkts, offset, qp->tx_index, entry->len, entry->flags, | 1263 | iowrite32((u32) qp->tx_pkts, &hdr->ver); |
1264 | |||
1265 | if (!chan) | ||
1266 | goto err; | ||
1267 | |||
1268 | if (len < copy_bytes) | ||
1269 | goto err; | ||
1270 | |||
1271 | device = chan->device; | ||
1272 | dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index; | ||
1273 | buff_off = (size_t) buf & ~PAGE_MASK; | ||
1274 | dest_off = (size_t) dest & ~PAGE_MASK; | ||
1275 | |||
1276 | if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) | ||
1277 | goto err; | ||
1278 | |||
1279 | src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE); | ||
1280 | if (dma_mapping_error(device->dev, src)) | ||
1281 | goto err; | ||
1282 | |||
1283 | flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT; | ||
1284 | txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); | ||
1285 | if (!txd) | ||
1286 | goto err1; | ||
1287 | |||
1288 | txd->callback = ntb_tx_copy_callback; | ||
1289 | txd->callback_param = entry; | ||
1290 | |||
1291 | cookie = dmaengine_submit(txd); | ||
1292 | if (dma_submit_error(cookie)) | ||
1293 | goto err1; | ||
1294 | |||
1295 | dma_async_issue_pending(chan); | ||
1296 | qp->tx_async++; | ||
1297 | |||
1298 | return; | ||
1299 | err1: | ||
1300 | dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); | ||
1301 | err: | ||
1302 | ntb_memcpy_tx(entry, offset); | ||
1303 | qp->tx_memcpy++; | ||
1304 | } | ||
1305 | |||
1306 | static int ntb_process_tx(struct ntb_transport_qp *qp, | ||
1307 | struct ntb_queue_entry *entry) | ||
1308 | { | ||
1309 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - tx %u, entry len %d flags %x buff %p\n", | ||
1310 | qp->tx_pkts, qp->tx_index, entry->len, entry->flags, | ||
1115 | entry->buf); | 1311 | entry->buf); |
1116 | if (qp->tx_index == qp->remote_rx_info->entry) { | 1312 | if (qp->tx_index == qp->remote_rx_info->entry) { |
1117 | qp->tx_ring_full++; | 1313 | qp->tx_ring_full++; |
@@ -1127,7 +1323,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp, | |||
1127 | return 0; | 1323 | return 0; |
1128 | } | 1324 | } |
1129 | 1325 | ||
1130 | ntb_tx_copy_task(qp, entry, offset); | 1326 | ntb_async_tx(qp, entry); |
1131 | 1327 | ||
1132 | qp->tx_index++; | 1328 | qp->tx_index++; |
1133 | qp->tx_index %= qp->tx_max_entry; | 1329 | qp->tx_index %= qp->tx_max_entry; |
@@ -1213,11 +1409,18 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, | |||
1213 | qp->tx_handler = handlers->tx_handler; | 1409 | qp->tx_handler = handlers->tx_handler; |
1214 | qp->event_handler = handlers->event_handler; | 1410 | qp->event_handler = handlers->event_handler; |
1215 | 1411 | ||
1412 | qp->dma_chan = dma_find_channel(DMA_MEMCPY); | ||
1413 | if (!qp->dma_chan) | ||
1414 | dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n"); | ||
1415 | else | ||
1416 | dmaengine_get(); | ||
1417 | |||
1216 | for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { | 1418 | for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { |
1217 | entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); | 1419 | entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); |
1218 | if (!entry) | 1420 | if (!entry) |
1219 | goto err1; | 1421 | goto err1; |
1220 | 1422 | ||
1423 | entry->qp = qp; | ||
1221 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, | 1424 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, |
1222 | &qp->rx_free_q); | 1425 | &qp->rx_free_q); |
1223 | } | 1426 | } |
@@ -1227,6 +1430,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, | |||
1227 | if (!entry) | 1430 | if (!entry) |
1228 | goto err2; | 1431 | goto err2; |
1229 | 1432 | ||
1433 | entry->qp = qp; | ||
1230 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, | 1434 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, |
1231 | &qp->tx_free_q); | 1435 | &qp->tx_free_q); |
1232 | } | 1436 | } |
@@ -1272,11 +1476,26 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) | |||
1272 | 1476 | ||
1273 | pdev = ntb_query_pdev(qp->ndev); | 1477 | pdev = ntb_query_pdev(qp->ndev); |
1274 | 1478 | ||
1275 | cancel_delayed_work_sync(&qp->link_work); | 1479 | if (qp->dma_chan) { |
1480 | struct dma_chan *chan = qp->dma_chan; | ||
1481 | /* Putting the dma_chan to NULL will force any new traffic to be | ||
1482 | * processed by the CPU instead of the DAM engine | ||
1483 | */ | ||
1484 | qp->dma_chan = NULL; | ||
1485 | |||
1486 | /* Try to be nice and wait for any queued DMA engine | ||
1487 | * transactions to process before smashing it with a rock | ||
1488 | */ | ||
1489 | dma_sync_wait(chan, qp->last_cookie); | ||
1490 | dmaengine_terminate_all(chan); | ||
1491 | dmaengine_put(); | ||
1492 | } | ||
1276 | 1493 | ||
1277 | ntb_unregister_db_callback(qp->ndev, qp->qp_num); | 1494 | ntb_unregister_db_callback(qp->ndev, qp->qp_num); |
1278 | tasklet_disable(&qp->rx_work); | 1495 | tasklet_disable(&qp->rx_work); |
1279 | 1496 | ||
1497 | cancel_delayed_work_sync(&qp->link_work); | ||
1498 | |||
1280 | while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) | 1499 | while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) |
1281 | kfree(entry); | 1500 | kfree(entry); |
1282 | 1501 | ||
@@ -1382,8 +1601,10 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, | |||
1382 | return -EINVAL; | 1601 | return -EINVAL; |
1383 | 1602 | ||
1384 | entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); | 1603 | entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); |
1385 | if (!entry) | 1604 | if (!entry) { |
1605 | qp->tx_err_no_buf++; | ||
1386 | return -ENOMEM; | 1606 | return -ENOMEM; |
1607 | } | ||
1387 | 1608 | ||
1388 | entry->cb_data = cb; | 1609 | entry->cb_data = cb; |
1389 | entry->buf = data; | 1610 | entry->buf = data; |
@@ -1499,9 +1720,18 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num); | |||
1499 | */ | 1720 | */ |
1500 | unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) | 1721 | unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) |
1501 | { | 1722 | { |
1723 | unsigned int max; | ||
1724 | |||
1502 | if (!qp) | 1725 | if (!qp) |
1503 | return 0; | 1726 | return 0; |
1504 | 1727 | ||
1505 | return qp->tx_max_frame - sizeof(struct ntb_payload_header); | 1728 | if (!qp->dma_chan) |
1729 | return qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
1730 | |||
1731 | /* If DMA engine usage is possible, try to find the max size for that */ | ||
1732 | max = qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
1733 | max -= max % (1 << qp->dma_chan->device->copy_align); | ||
1734 | |||
1735 | return max; | ||
1506 | } | 1736 | } |
1507 | EXPORT_SYMBOL_GPL(ntb_transport_max_size); | 1737 | EXPORT_SYMBOL_GPL(ntb_transport_max_size); |