diff options
| author | Jon Mason <jon.mason@intel.com> | 2013-02-12 11:52:50 -0500 |
|---|---|---|
| committer | Jon Mason <jon.mason@intel.com> | 2013-09-05 14:04:09 -0400 |
| commit | 282a2feeb9bfb1d1dfbad93df206b74eaf80d564 (patch) | |
| tree | 9265a6308dd746606dbcb0bac330082f97780be9 /drivers/ntb | |
| parent | ac477afb0431386575ef453f50fa0052c3f0461b (diff) | |
NTB: Use DMA Engine to Transmit and Receive
Allocate and use a DMA engine channel to transmit and receive data over
NTB. If none is allocated, fall back to using the CPU to transfer data.
Signed-off-by: Jon Mason <jon.mason@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Diffstat (limited to 'drivers/ntb')
| -rw-r--r-- | drivers/ntb/ntb_hw.c | 17 | ||||
| -rw-r--r-- | drivers/ntb/ntb_hw.h | 1 | ||||
| -rw-r--r-- | drivers/ntb/ntb_transport.c | 324 |
3 files changed, 295 insertions, 47 deletions
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index ab34795cf125..0345817a8355 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c | |||
| @@ -350,6 +350,23 @@ int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) | |||
| 350 | } | 350 | } |
| 351 | 351 | ||
| 352 | /** | 352 | /** |
| 353 | * ntb_get_mw_base() - get addr for the NTB memory window | ||
| 354 | * @ndev: pointer to ntb_device instance | ||
| 355 | * @mw: memory window number | ||
| 356 | * | ||
| 357 | * This function provides the base address of the memory window specified. | ||
| 358 | * | ||
| 359 | * RETURNS: address, or NULL on error. | ||
| 360 | */ | ||
| 361 | resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) | ||
| 362 | { | ||
| 363 | if (mw >= ntb_max_mw(ndev)) | ||
| 364 | return 0; | ||
| 365 | |||
| 366 | return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); | ||
| 367 | } | ||
| 368 | |||
| 369 | /** | ||
| 353 | * ntb_get_mw_vbase() - get virtual addr for the NTB memory window | 370 | * ntb_get_mw_vbase() - get virtual addr for the NTB memory window |
| 354 | * @ndev: pointer to ntb_device instance | 371 | * @ndev: pointer to ntb_device instance |
| 355 | * @mw: memory window number | 372 | * @mw: memory window number |
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index d838bc13b956..4f42ed18103a 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h | |||
| @@ -240,6 +240,7 @@ int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); | |||
| 240 | int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); | 240 | int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); |
| 241 | int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); | 241 | int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); |
| 242 | int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); | 242 | int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); |
| 243 | resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); | ||
| 243 | void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); | 244 | void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); |
| 244 | u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); | 245 | u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); |
| 245 | void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); | 246 | void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx); |
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f7380e959656..ae8657259ca0 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | */ | 47 | */ |
| 48 | #include <linux/debugfs.h> | 48 | #include <linux/debugfs.h> |
| 49 | #include <linux/delay.h> | 49 | #include <linux/delay.h> |
| 50 | #include <linux/dmaengine.h> | ||
| 50 | #include <linux/dma-mapping.h> | 51 | #include <linux/dma-mapping.h> |
| 51 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
| 52 | #include <linux/export.h> | 53 | #include <linux/export.h> |
| @@ -68,6 +69,10 @@ static unsigned char max_num_clients; | |||
| 68 | module_param(max_num_clients, byte, 0644); | 69 | module_param(max_num_clients, byte, 0644); |
| 69 | MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); | 70 | MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients"); |
| 70 | 71 | ||
| 72 | static unsigned int copy_bytes = 1024; | ||
| 73 | module_param(copy_bytes, uint, 0644); | ||
| 74 | MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); | ||
| 75 | |||
| 71 | struct ntb_queue_entry { | 76 | struct ntb_queue_entry { |
| 72 | /* ntb_queue list reference */ | 77 | /* ntb_queue list reference */ |
| 73 | struct list_head entry; | 78 | struct list_head entry; |
| @@ -76,6 +81,13 @@ struct ntb_queue_entry { | |||
| 76 | void *buf; | 81 | void *buf; |
| 77 | unsigned int len; | 82 | unsigned int len; |
| 78 | unsigned int flags; | 83 | unsigned int flags; |
| 84 | |||
| 85 | struct ntb_transport_qp *qp; | ||
| 86 | union { | ||
| 87 | struct ntb_payload_header __iomem *tx_hdr; | ||
| 88 | struct ntb_payload_header *rx_hdr; | ||
| 89 | }; | ||
| 90 | unsigned int index; | ||
| 79 | }; | 91 | }; |
| 80 | 92 | ||
| 81 | struct ntb_rx_info { | 93 | struct ntb_rx_info { |
| @@ -86,6 +98,7 @@ struct ntb_transport_qp { | |||
| 86 | struct ntb_transport *transport; | 98 | struct ntb_transport *transport; |
| 87 | struct ntb_device *ndev; | 99 | struct ntb_device *ndev; |
| 88 | void *cb_data; | 100 | void *cb_data; |
| 101 | struct dma_chan *dma_chan; | ||
| 89 | 102 | ||
| 90 | bool client_ready; | 103 | bool client_ready; |
| 91 | bool qp_link; | 104 | bool qp_link; |
| @@ -99,6 +112,7 @@ struct ntb_transport_qp { | |||
| 99 | struct list_head tx_free_q; | 112 | struct list_head tx_free_q; |
| 100 | spinlock_t ntb_tx_free_q_lock; | 113 | spinlock_t ntb_tx_free_q_lock; |
| 101 | void __iomem *tx_mw; | 114 | void __iomem *tx_mw; |
| 115 | dma_addr_t tx_mw_phys; | ||
| 102 | unsigned int tx_index; | 116 | unsigned int tx_index; |
| 103 | unsigned int tx_max_entry; | 117 | unsigned int tx_max_entry; |
| 104 | unsigned int tx_max_frame; | 118 | unsigned int tx_max_frame; |
| @@ -114,6 +128,7 @@ struct ntb_transport_qp { | |||
| 114 | unsigned int rx_index; | 128 | unsigned int rx_index; |
| 115 | unsigned int rx_max_entry; | 129 | unsigned int rx_max_entry; |
| 116 | unsigned int rx_max_frame; | 130 | unsigned int rx_max_frame; |
| 131 | dma_cookie_t last_cookie; | ||
| 117 | 132 | ||
| 118 | void (*event_handler) (void *data, int status); | 133 | void (*event_handler) (void *data, int status); |
| 119 | struct delayed_work link_work; | 134 | struct delayed_work link_work; |
| @@ -129,9 +144,14 @@ struct ntb_transport_qp { | |||
| 129 | u64 rx_err_no_buf; | 144 | u64 rx_err_no_buf; |
| 130 | u64 rx_err_oflow; | 145 | u64 rx_err_oflow; |
| 131 | u64 rx_err_ver; | 146 | u64 rx_err_ver; |
| 147 | u64 rx_memcpy; | ||
| 148 | u64 rx_async; | ||
| 132 | u64 tx_bytes; | 149 | u64 tx_bytes; |
| 133 | u64 tx_pkts; | 150 | u64 tx_pkts; |
| 134 | u64 tx_ring_full; | 151 | u64 tx_ring_full; |
| 152 | u64 tx_err_no_buf; | ||
| 153 | u64 tx_memcpy; | ||
| 154 | u64 tx_async; | ||
| 135 | }; | 155 | }; |
| 136 | 156 | ||
| 137 | struct ntb_transport_mw { | 157 | struct ntb_transport_mw { |
| @@ -381,7 +401,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
| 381 | char *buf; | 401 | char *buf; |
| 382 | ssize_t ret, out_offset, out_count; | 402 | ssize_t ret, out_offset, out_count; |
| 383 | 403 | ||
| 384 | out_count = 600; | 404 | out_count = 1000; |
| 385 | 405 | ||
| 386 | buf = kmalloc(out_count, GFP_KERNEL); | 406 | buf = kmalloc(out_count, GFP_KERNEL); |
| 387 | if (!buf) | 407 | if (!buf) |
| @@ -396,6 +416,10 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
| 396 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 416 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 397 | "rx_pkts - \t%llu\n", qp->rx_pkts); | 417 | "rx_pkts - \t%llu\n", qp->rx_pkts); |
| 398 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 418 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 419 | "rx_memcpy - \t%llu\n", qp->rx_memcpy); | ||
| 420 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
| 421 | "rx_async - \t%llu\n", qp->rx_async); | ||
| 422 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
| 399 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); | 423 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); |
| 400 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 424 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 401 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); | 425 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); |
| @@ -415,8 +439,14 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, | |||
| 415 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 439 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 416 | "tx_pkts - \t%llu\n", qp->tx_pkts); | 440 | "tx_pkts - \t%llu\n", qp->tx_pkts); |
| 417 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 441 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 442 | "tx_memcpy - \t%llu\n", qp->tx_memcpy); | ||
| 443 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
| 444 | "tx_async - \t%llu\n", qp->tx_async); | ||
| 445 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
| 418 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); | 446 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); |
| 419 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 447 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 448 | "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); | ||
| 449 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | ||
| 420 | "tx_mw - \t%p\n", qp->tx_mw); | 450 | "tx_mw - \t%p\n", qp->tx_mw); |
| 421 | out_offset += snprintf(buf + out_offset, out_count - out_offset, | 451 | out_offset += snprintf(buf + out_offset, out_count - out_offset, |
| 422 | "tx_index - \t%u\n", qp->tx_index); | 452 | "tx_index - \t%u\n", qp->tx_index); |
| @@ -488,11 +518,11 @@ static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, | |||
| 488 | num_qps_mw = nt->max_qps / mw_max; | 518 | num_qps_mw = nt->max_qps / mw_max; |
| 489 | 519 | ||
| 490 | rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; | 520 | rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; |
| 491 | qp->remote_rx_info = nt->mw[mw_num].virt_addr + | 521 | qp->rx_buff = nt->mw[mw_num].virt_addr + qp_num / mw_max * rx_size; |
| 492 | (qp_num / mw_max * rx_size); | ||
| 493 | rx_size -= sizeof(struct ntb_rx_info); | 522 | rx_size -= sizeof(struct ntb_rx_info); |
| 494 | 523 | ||
| 495 | qp->rx_buff = qp->remote_rx_info + 1; | 524 | qp->remote_rx_info = qp->rx_buff + rx_size; |
| 525 | |||
| 496 | /* Due to housekeeping, there must be atleast 2 buffs */ | 526 | /* Due to housekeeping, there must be atleast 2 buffs */ |
| 497 | qp->rx_max_frame = min(transport_mtu, rx_size / 2); | 527 | qp->rx_max_frame = min(transport_mtu, rx_size / 2); |
| 498 | qp->rx_max_entry = rx_size / qp->rx_max_frame; | 528 | qp->rx_max_entry = rx_size / qp->rx_max_frame; |
| @@ -796,12 +826,13 @@ static void ntb_qp_link_work(struct work_struct *work) | |||
| 796 | msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); | 826 | msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); |
| 797 | } | 827 | } |
| 798 | 828 | ||
| 799 | static void ntb_transport_init_queue(struct ntb_transport *nt, | 829 | static int ntb_transport_init_queue(struct ntb_transport *nt, |
| 800 | unsigned int qp_num) | 830 | unsigned int qp_num) |
| 801 | { | 831 | { |
| 802 | struct ntb_transport_qp *qp; | 832 | struct ntb_transport_qp *qp; |
| 803 | unsigned int num_qps_mw, tx_size; | 833 | unsigned int num_qps_mw, tx_size; |
| 804 | u8 mw_num, mw_max; | 834 | u8 mw_num, mw_max; |
| 835 | u64 qp_offset; | ||
| 805 | 836 | ||
| 806 | mw_max = ntb_max_mw(nt->ndev); | 837 | mw_max = ntb_max_mw(nt->ndev); |
| 807 | mw_num = QP_TO_MW(nt->ndev, qp_num); | 838 | mw_num = QP_TO_MW(nt->ndev, qp_num); |
| @@ -820,11 +851,18 @@ static void ntb_transport_init_queue(struct ntb_transport *nt, | |||
| 820 | num_qps_mw = nt->max_qps / mw_max; | 851 | num_qps_mw = nt->max_qps / mw_max; |
| 821 | 852 | ||
| 822 | tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; | 853 | tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; |
| 823 | qp->rx_info = ntb_get_mw_vbase(nt->ndev, mw_num) + | 854 | qp_offset = qp_num / mw_max * tx_size; |
| 824 | (qp_num / mw_max * tx_size); | 855 | qp->tx_mw = ntb_get_mw_vbase(nt->ndev, mw_num) + qp_offset; |
| 856 | if (!qp->tx_mw) | ||
| 857 | return -EINVAL; | ||
| 858 | |||
| 859 | qp->tx_mw_phys = ntb_get_mw_base(qp->ndev, mw_num) + qp_offset; | ||
| 860 | if (!qp->tx_mw_phys) | ||
| 861 | return -EINVAL; | ||
| 862 | |||
| 825 | tx_size -= sizeof(struct ntb_rx_info); | 863 | tx_size -= sizeof(struct ntb_rx_info); |
| 864 | qp->rx_info = qp->tx_mw + tx_size; | ||
| 826 | 865 | ||
| 827 | qp->tx_mw = qp->rx_info + 1; | ||
| 828 | /* Due to housekeeping, there must be atleast 2 buffs */ | 866 | /* Due to housekeeping, there must be atleast 2 buffs */ |
| 829 | qp->tx_max_frame = min(transport_mtu, tx_size / 2); | 867 | qp->tx_max_frame = min(transport_mtu, tx_size / 2); |
| 830 | qp->tx_max_entry = tx_size / qp->tx_max_frame; | 868 | qp->tx_max_entry = tx_size / qp->tx_max_frame; |
| @@ -851,6 +889,8 @@ static void ntb_transport_init_queue(struct ntb_transport *nt, | |||
| 851 | INIT_LIST_HEAD(&qp->rx_pend_q); | 889 | INIT_LIST_HEAD(&qp->rx_pend_q); |
| 852 | INIT_LIST_HEAD(&qp->rx_free_q); | 890 | INIT_LIST_HEAD(&qp->rx_free_q); |
| 853 | INIT_LIST_HEAD(&qp->tx_free_q); | 891 | INIT_LIST_HEAD(&qp->tx_free_q); |
| 892 | |||
| 893 | return 0; | ||
| 854 | } | 894 | } |
| 855 | 895 | ||
| 856 | int ntb_transport_init(struct pci_dev *pdev) | 896 | int ntb_transport_init(struct pci_dev *pdev) |
| @@ -889,8 +929,11 @@ int ntb_transport_init(struct pci_dev *pdev) | |||
| 889 | 929 | ||
| 890 | nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; | 930 | nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; |
| 891 | 931 | ||
| 892 | for (i = 0; i < nt->max_qps; i++) | 932 | for (i = 0; i < nt->max_qps; i++) { |
| 893 | ntb_transport_init_queue(nt, i); | 933 | rc = ntb_transport_init_queue(nt, i); |
| 934 | if (rc) | ||
| 935 | goto err3; | ||
| 936 | } | ||
| 894 | 937 | ||
| 895 | INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); | 938 | INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); |
| 896 | INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup); | 939 | INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup); |
| @@ -956,13 +999,19 @@ void ntb_transport_free(void *transport) | |||
| 956 | kfree(nt); | 999 | kfree(nt); |
| 957 | } | 1000 | } |
| 958 | 1001 | ||
| 959 | static void ntb_rx_copy_task(struct ntb_transport_qp *qp, | 1002 | static void ntb_rx_copy_callback(void *data) |
| 960 | struct ntb_queue_entry *entry, void *offset) | ||
| 961 | { | 1003 | { |
| 1004 | struct ntb_queue_entry *entry = data; | ||
| 1005 | struct ntb_transport_qp *qp = entry->qp; | ||
| 962 | void *cb_data = entry->cb_data; | 1006 | void *cb_data = entry->cb_data; |
| 963 | unsigned int len = entry->len; | 1007 | unsigned int len = entry->len; |
| 1008 | struct ntb_payload_header *hdr = entry->rx_hdr; | ||
| 1009 | |||
| 1010 | /* Ensure that the data is fully copied out before clearing the flag */ | ||
| 1011 | wmb(); | ||
| 1012 | hdr->flags = 0; | ||
| 964 | 1013 | ||
| 965 | memcpy(entry->buf, offset, entry->len); | 1014 | iowrite32(entry->index, &qp->rx_info->entry); |
| 966 | 1015 | ||
| 967 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); | 1016 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); |
| 968 | 1017 | ||
| @@ -970,6 +1019,86 @@ static void ntb_rx_copy_task(struct ntb_transport_qp *qp, | |||
| 970 | qp->rx_handler(qp, qp->cb_data, cb_data, len); | 1019 | qp->rx_handler(qp, qp->cb_data, cb_data, len); |
| 971 | } | 1020 | } |
| 972 | 1021 | ||
| 1022 | static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) | ||
| 1023 | { | ||
| 1024 | void *buf = entry->buf; | ||
| 1025 | size_t len = entry->len; | ||
| 1026 | |||
| 1027 | memcpy(buf, offset, len); | ||
| 1028 | |||
| 1029 | ntb_rx_copy_callback(entry); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, | ||
| 1033 | size_t len) | ||
| 1034 | { | ||
| 1035 | struct dma_async_tx_descriptor *txd; | ||
| 1036 | struct ntb_transport_qp *qp = entry->qp; | ||
| 1037 | struct dma_chan *chan = qp->dma_chan; | ||
| 1038 | struct dma_device *device; | ||
| 1039 | size_t pay_off, buff_off; | ||
| 1040 | dma_addr_t src, dest; | ||
| 1041 | dma_cookie_t cookie; | ||
| 1042 | void *buf = entry->buf; | ||
| 1043 | unsigned long flags; | ||
| 1044 | |||
| 1045 | entry->len = len; | ||
| 1046 | |||
| 1047 | if (!chan) | ||
| 1048 | goto err; | ||
| 1049 | |||
| 1050 | if (len < copy_bytes) | ||
| 1051 | goto err1; | ||
| 1052 | |||
| 1053 | device = chan->device; | ||
| 1054 | pay_off = (size_t) offset & ~PAGE_MASK; | ||
| 1055 | buff_off = (size_t) buf & ~PAGE_MASK; | ||
| 1056 | |||
| 1057 | if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) | ||
| 1058 | goto err1; | ||
| 1059 | |||
| 1060 | dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE); | ||
| 1061 | if (dma_mapping_error(device->dev, dest)) | ||
| 1062 | goto err1; | ||
| 1063 | |||
| 1064 | src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE); | ||
| 1065 | if (dma_mapping_error(device->dev, src)) | ||
| 1066 | goto err2; | ||
| 1067 | |||
| 1068 | flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE | | ||
| 1069 | DMA_PREP_INTERRUPT; | ||
| 1070 | txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); | ||
| 1071 | if (!txd) | ||
| 1072 | goto err3; | ||
| 1073 | |||
| 1074 | txd->callback = ntb_rx_copy_callback; | ||
| 1075 | txd->callback_param = entry; | ||
| 1076 | |||
| 1077 | cookie = dmaengine_submit(txd); | ||
| 1078 | if (dma_submit_error(cookie)) | ||
| 1079 | goto err3; | ||
| 1080 | |||
| 1081 | qp->last_cookie = cookie; | ||
| 1082 | |||
| 1083 | qp->rx_async++; | ||
| 1084 | |||
| 1085 | return; | ||
| 1086 | |||
| 1087 | err3: | ||
| 1088 | dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); | ||
| 1089 | err2: | ||
| 1090 | dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE); | ||
| 1091 | err1: | ||
| 1092 | /* If the callbacks come out of order, the writing of the index to the | ||
| 1093 | * last completed will be out of order. This may result in the | ||
| 1094 | * receive stalling forever. | ||
| 1095 | */ | ||
| 1096 | dma_sync_wait(chan, qp->last_cookie); | ||
| 1097 | err: | ||
| 1098 | ntb_memcpy_rx(entry, offset); | ||
| 1099 | qp->rx_memcpy++; | ||
| 1100 | } | ||
| 1101 | |||
| 973 | static int ntb_process_rxc(struct ntb_transport_qp *qp) | 1102 | static int ntb_process_rxc(struct ntb_transport_qp *qp) |
| 974 | { | 1103 | { |
| 975 | struct ntb_payload_header *hdr; | 1104 | struct ntb_payload_header *hdr; |
| @@ -1008,41 +1137,45 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) | |||
| 1008 | if (hdr->flags & LINK_DOWN_FLAG) { | 1137 | if (hdr->flags & LINK_DOWN_FLAG) { |
| 1009 | ntb_qp_link_down(qp); | 1138 | ntb_qp_link_down(qp); |
| 1010 | 1139 | ||
| 1011 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | 1140 | goto err; |
| 1012 | &qp->rx_pend_q); | ||
| 1013 | goto out; | ||
| 1014 | } | 1141 | } |
| 1015 | 1142 | ||
| 1016 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, | 1143 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, |
| 1017 | "rx offset %u, ver %u - %d payload received, buf size %d\n", | 1144 | "rx offset %u, ver %u - %d payload received, buf size %d\n", |
| 1018 | qp->rx_index, hdr->ver, hdr->len, entry->len); | 1145 | qp->rx_index, hdr->ver, hdr->len, entry->len); |
| 1019 | 1146 | ||
| 1020 | if (hdr->len <= entry->len) { | 1147 | qp->rx_bytes += hdr->len; |
| 1021 | entry->len = hdr->len; | 1148 | qp->rx_pkts++; |
| 1022 | ntb_rx_copy_task(qp, entry, offset); | ||
| 1023 | } else { | ||
| 1024 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | ||
| 1025 | &qp->rx_pend_q); | ||
| 1026 | 1149 | ||
| 1150 | if (hdr->len > entry->len) { | ||
| 1027 | qp->rx_err_oflow++; | 1151 | qp->rx_err_oflow++; |
| 1028 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, | 1152 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, |
| 1029 | "RX overflow! Wanted %d got %d\n", | 1153 | "RX overflow! Wanted %d got %d\n", |
| 1030 | hdr->len, entry->len); | 1154 | hdr->len, entry->len); |
| 1155 | |||
| 1156 | goto err; | ||
| 1031 | } | 1157 | } |
| 1032 | 1158 | ||
| 1033 | qp->rx_bytes += hdr->len; | 1159 | entry->index = qp->rx_index; |
| 1034 | qp->rx_pkts++; | 1160 | entry->rx_hdr = hdr; |
| 1161 | |||
| 1162 | ntb_async_rx(entry, offset, hdr->len); | ||
| 1035 | 1163 | ||
| 1036 | out: | 1164 | out: |
| 1165 | qp->rx_index++; | ||
| 1166 | qp->rx_index %= qp->rx_max_entry; | ||
| 1167 | |||
| 1168 | return 0; | ||
| 1169 | |||
| 1170 | err: | ||
| 1171 | ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, | ||
| 1172 | &qp->rx_pend_q); | ||
| 1037 | /* Ensure that the data is fully copied out before clearing the flag */ | 1173 | /* Ensure that the data is fully copied out before clearing the flag */ |
| 1038 | wmb(); | 1174 | wmb(); |
| 1039 | hdr->flags = 0; | 1175 | hdr->flags = 0; |
| 1040 | iowrite32(qp->rx_index, &qp->rx_info->entry); | 1176 | iowrite32(qp->rx_index, &qp->rx_info->entry); |
| 1041 | 1177 | ||
| 1042 | qp->rx_index++; | 1178 | goto out; |
| 1043 | qp->rx_index %= qp->rx_max_entry; | ||
| 1044 | |||
| 1045 | return 0; | ||
| 1046 | } | 1179 | } |
| 1047 | 1180 | ||
| 1048 | static void ntb_transport_rx(unsigned long data) | 1181 | static void ntb_transport_rx(unsigned long data) |
| @@ -1058,6 +1191,9 @@ static void ntb_transport_rx(unsigned long data) | |||
| 1058 | if (rc) | 1191 | if (rc) |
| 1059 | break; | 1192 | break; |
| 1060 | } | 1193 | } |
| 1194 | |||
| 1195 | if (qp->dma_chan) | ||
| 1196 | dma_async_issue_pending(qp->dma_chan); | ||
| 1061 | } | 1197 | } |
| 1062 | 1198 | ||
| 1063 | static void ntb_transport_rxc_db(void *data, int db_num) | 1199 | static void ntb_transport_rxc_db(void *data, int db_num) |
| @@ -1070,19 +1206,13 @@ static void ntb_transport_rxc_db(void *data, int db_num) | |||
| 1070 | tasklet_schedule(&qp->rx_work); | 1206 | tasklet_schedule(&qp->rx_work); |
| 1071 | } | 1207 | } |
| 1072 | 1208 | ||
| 1073 | static void ntb_tx_copy_task(struct ntb_transport_qp *qp, | 1209 | static void ntb_tx_copy_callback(void *data) |
| 1074 | struct ntb_queue_entry *entry, | ||
| 1075 | void __iomem *offset) | ||
| 1076 | { | 1210 | { |
| 1077 | struct ntb_payload_header __iomem *hdr; | 1211 | struct ntb_queue_entry *entry = data; |
| 1078 | 1212 | struct ntb_transport_qp *qp = entry->qp; | |
| 1079 | memcpy_toio(offset, entry->buf, entry->len); | 1213 | struct ntb_payload_header __iomem *hdr = entry->tx_hdr; |
| 1080 | 1214 | ||
| 1081 | hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); | 1215 | /* Ensure that the data is fully copied out before setting the flags */ |
| 1082 | iowrite32(entry->len, &hdr->len); | ||
| 1083 | iowrite32((u32) qp->tx_pkts, &hdr->ver); | ||
| 1084 | |||
| 1085 | /* Ensure that the data is fully copied out before setting the flag */ | ||
| 1086 | wmb(); | 1216 | wmb(); |
| 1087 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); | 1217 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); |
| 1088 | 1218 | ||
| @@ -1103,15 +1233,81 @@ static void ntb_tx_copy_task(struct ntb_transport_qp *qp, | |||
| 1103 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); | 1233 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); |
| 1104 | } | 1234 | } |
| 1105 | 1235 | ||
| 1106 | static int ntb_process_tx(struct ntb_transport_qp *qp, | 1236 | static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) |
| 1107 | struct ntb_queue_entry *entry) | ||
| 1108 | { | 1237 | { |
| 1238 | memcpy_toio(offset, entry->buf, entry->len); | ||
| 1239 | |||
| 1240 | ntb_tx_copy_callback(entry); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | static void ntb_async_tx(struct ntb_transport_qp *qp, | ||
| 1244 | struct ntb_queue_entry *entry) | ||
| 1245 | { | ||
| 1246 | struct ntb_payload_header __iomem *hdr; | ||
| 1247 | struct dma_async_tx_descriptor *txd; | ||
| 1248 | struct dma_chan *chan = qp->dma_chan; | ||
| 1249 | struct dma_device *device; | ||
| 1250 | size_t dest_off, buff_off; | ||
| 1251 | dma_addr_t src, dest; | ||
| 1252 | dma_cookie_t cookie; | ||
| 1109 | void __iomem *offset; | 1253 | void __iomem *offset; |
| 1254 | size_t len = entry->len; | ||
| 1255 | void *buf = entry->buf; | ||
| 1256 | unsigned long flags; | ||
| 1110 | 1257 | ||
| 1111 | offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; | 1258 | offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; |
| 1259 | hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
| 1260 | entry->tx_hdr = hdr; | ||
| 1112 | 1261 | ||
| 1113 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - offset %p, tx %u, entry len %d flags %x buff %p\n", | 1262 | iowrite32(entry->len, &hdr->len); |
| 1114 | qp->tx_pkts, offset, qp->tx_index, entry->len, entry->flags, | 1263 | iowrite32((u32) qp->tx_pkts, &hdr->ver); |
| 1264 | |||
| 1265 | if (!chan) | ||
| 1266 | goto err; | ||
| 1267 | |||
| 1268 | if (len < copy_bytes) | ||
| 1269 | goto err; | ||
| 1270 | |||
| 1271 | device = chan->device; | ||
| 1272 | dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index; | ||
| 1273 | buff_off = (size_t) buf & ~PAGE_MASK; | ||
| 1274 | dest_off = (size_t) dest & ~PAGE_MASK; | ||
| 1275 | |||
| 1276 | if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) | ||
| 1277 | goto err; | ||
| 1278 | |||
| 1279 | src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE); | ||
| 1280 | if (dma_mapping_error(device->dev, src)) | ||
| 1281 | goto err; | ||
| 1282 | |||
| 1283 | flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT; | ||
| 1284 | txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); | ||
| 1285 | if (!txd) | ||
| 1286 | goto err1; | ||
| 1287 | |||
| 1288 | txd->callback = ntb_tx_copy_callback; | ||
| 1289 | txd->callback_param = entry; | ||
| 1290 | |||
| 1291 | cookie = dmaengine_submit(txd); | ||
| 1292 | if (dma_submit_error(cookie)) | ||
| 1293 | goto err1; | ||
| 1294 | |||
| 1295 | dma_async_issue_pending(chan); | ||
| 1296 | qp->tx_async++; | ||
| 1297 | |||
| 1298 | return; | ||
| 1299 | err1: | ||
| 1300 | dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); | ||
| 1301 | err: | ||
| 1302 | ntb_memcpy_tx(entry, offset); | ||
| 1303 | qp->tx_memcpy++; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | static int ntb_process_tx(struct ntb_transport_qp *qp, | ||
| 1307 | struct ntb_queue_entry *entry) | ||
| 1308 | { | ||
| 1309 | dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - tx %u, entry len %d flags %x buff %p\n", | ||
| 1310 | qp->tx_pkts, qp->tx_index, entry->len, entry->flags, | ||
| 1115 | entry->buf); | 1311 | entry->buf); |
| 1116 | if (qp->tx_index == qp->remote_rx_info->entry) { | 1312 | if (qp->tx_index == qp->remote_rx_info->entry) { |
| 1117 | qp->tx_ring_full++; | 1313 | qp->tx_ring_full++; |
| @@ -1127,7 +1323,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp, | |||
| 1127 | return 0; | 1323 | return 0; |
| 1128 | } | 1324 | } |
| 1129 | 1325 | ||
| 1130 | ntb_tx_copy_task(qp, entry, offset); | 1326 | ntb_async_tx(qp, entry); |
| 1131 | 1327 | ||
| 1132 | qp->tx_index++; | 1328 | qp->tx_index++; |
| 1133 | qp->tx_index %= qp->tx_max_entry; | 1329 | qp->tx_index %= qp->tx_max_entry; |
| @@ -1213,11 +1409,18 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, | |||
| 1213 | qp->tx_handler = handlers->tx_handler; | 1409 | qp->tx_handler = handlers->tx_handler; |
| 1214 | qp->event_handler = handlers->event_handler; | 1410 | qp->event_handler = handlers->event_handler; |
| 1215 | 1411 | ||
| 1412 | qp->dma_chan = dma_find_channel(DMA_MEMCPY); | ||
| 1413 | if (!qp->dma_chan) | ||
| 1414 | dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n"); | ||
| 1415 | else | ||
| 1416 | dmaengine_get(); | ||
| 1417 | |||
| 1216 | for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { | 1418 | for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { |
| 1217 | entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); | 1419 | entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); |
| 1218 | if (!entry) | 1420 | if (!entry) |
| 1219 | goto err1; | 1421 | goto err1; |
| 1220 | 1422 | ||
| 1423 | entry->qp = qp; | ||
| 1221 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, | 1424 | ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, |
| 1222 | &qp->rx_free_q); | 1425 | &qp->rx_free_q); |
| 1223 | } | 1426 | } |
| @@ -1227,6 +1430,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, | |||
| 1227 | if (!entry) | 1430 | if (!entry) |
| 1228 | goto err2; | 1431 | goto err2; |
| 1229 | 1432 | ||
| 1433 | entry->qp = qp; | ||
| 1230 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, | 1434 | ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, |
| 1231 | &qp->tx_free_q); | 1435 | &qp->tx_free_q); |
| 1232 | } | 1436 | } |
| @@ -1272,11 +1476,26 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) | |||
| 1272 | 1476 | ||
| 1273 | pdev = ntb_query_pdev(qp->ndev); | 1477 | pdev = ntb_query_pdev(qp->ndev); |
| 1274 | 1478 | ||
| 1275 | cancel_delayed_work_sync(&qp->link_work); | 1479 | if (qp->dma_chan) { |
| 1480 | struct dma_chan *chan = qp->dma_chan; | ||
| 1481 | /* Putting the dma_chan to NULL will force any new traffic to be | ||
| 1482 | * processed by the CPU instead of the DAM engine | ||
| 1483 | */ | ||
| 1484 | qp->dma_chan = NULL; | ||
| 1485 | |||
| 1486 | /* Try to be nice and wait for any queued DMA engine | ||
| 1487 | * transactions to process before smashing it with a rock | ||
| 1488 | */ | ||
| 1489 | dma_sync_wait(chan, qp->last_cookie); | ||
| 1490 | dmaengine_terminate_all(chan); | ||
| 1491 | dmaengine_put(); | ||
| 1492 | } | ||
| 1276 | 1493 | ||
| 1277 | ntb_unregister_db_callback(qp->ndev, qp->qp_num); | 1494 | ntb_unregister_db_callback(qp->ndev, qp->qp_num); |
| 1278 | tasklet_disable(&qp->rx_work); | 1495 | tasklet_disable(&qp->rx_work); |
| 1279 | 1496 | ||
| 1497 | cancel_delayed_work_sync(&qp->link_work); | ||
| 1498 | |||
| 1280 | while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) | 1499 | while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) |
| 1281 | kfree(entry); | 1500 | kfree(entry); |
| 1282 | 1501 | ||
| @@ -1382,8 +1601,10 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, | |||
| 1382 | return -EINVAL; | 1601 | return -EINVAL; |
| 1383 | 1602 | ||
| 1384 | entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); | 1603 | entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); |
| 1385 | if (!entry) | 1604 | if (!entry) { |
| 1605 | qp->tx_err_no_buf++; | ||
| 1386 | return -ENOMEM; | 1606 | return -ENOMEM; |
| 1607 | } | ||
| 1387 | 1608 | ||
| 1388 | entry->cb_data = cb; | 1609 | entry->cb_data = cb; |
| 1389 | entry->buf = data; | 1610 | entry->buf = data; |
| @@ -1499,9 +1720,18 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num); | |||
| 1499 | */ | 1720 | */ |
| 1500 | unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) | 1721 | unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) |
| 1501 | { | 1722 | { |
| 1723 | unsigned int max; | ||
| 1724 | |||
| 1502 | if (!qp) | 1725 | if (!qp) |
| 1503 | return 0; | 1726 | return 0; |
| 1504 | 1727 | ||
| 1505 | return qp->tx_max_frame - sizeof(struct ntb_payload_header); | 1728 | if (!qp->dma_chan) |
| 1729 | return qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
| 1730 | |||
| 1731 | /* If DMA engine usage is possible, try to find the max size for that */ | ||
| 1732 | max = qp->tx_max_frame - sizeof(struct ntb_payload_header); | ||
| 1733 | max -= max % (1 << qp->dma_chan->device->copy_align); | ||
| 1734 | |||
| 1735 | return max; | ||
| 1506 | } | 1736 | } |
| 1507 | EXPORT_SYMBOL_GPL(ntb_transport_max_size); | 1737 | EXPORT_SYMBOL_GPL(ntb_transport_max_size); |
